diff options
Diffstat (limited to 'ipl/procs/gedcom.icn')
-rw-r--r-- | ipl/procs/gedcom.icn | 417 |
1 files changed, 417 insertions, 0 deletions
diff --git a/ipl/procs/gedcom.icn b/ipl/procs/gedcom.icn new file mode 100644 index 0000000..f2524da --- /dev/null +++ b/ipl/procs/gedcom.icn @@ -0,0 +1,417 @@ +############################################################################ +# +# File: gedcom.icn +# +# Subject: Procedures for reading GEDCOM files +# +# Author: Gregg M. Townsend +# +# Date: March 25, 2002 +# +############################################################################ +# +# This file is in the public domain. +# +############################################################################ +# +# These procedures read and interpret GEDCOM files, a standard +# format for genealogy databases. +# +############################################################################ +# +# gedload(f) loads GEDCOM data from file f and returns a gedcom +# record containing the following fields: +# tree root of tree of gednode records +# id table of labeled nodes, indexed by @ID@ +# fam list of FAM nodes (marriages) +# ind list of INDI nodes (individuals) +# +# The tree is composed of gednode records R containing these fields: +# level level +# id ID (label), including @...@ delimiters +# tag tag +# data data +# lnum line number +# parent parent node in tree +# ref referenced node, if any +# sub sub-entry list +# hcode unique hashcode, if INDI node +# +# gedwalk(tree) generates the nodes of the tree in preorder. +# +# Three procedures find descendants of a node based on a sequence +# of identifying tag strings: +# gedsub(R, tag...) generates subnodes specified by tag sequence +# gedval(R, tag...) generates data values of those subnodes +# gedref(R, tag...) generates nodes referenced by those subnodes +# +# Three procedures extract a person's name from an INDI record: +# gedfnf(R) produces "John Quincy Adams" form +# gedlnf(R) produces "Adams, John Quincy" form +# gednmf(R,f) produces an arbitrary format, substituting +# prefix, firstname, lastname, suffix for +# "P", "F", "L", "S" (respectively) in f +# +# geddate(R) finds the DATE subnode of a node and returns a string +# of at least 12 characters in a standard form such as "11 Jul 1767" +# or "abt 1810". It is assumed that the input is in English. +# +# gedyear(R) returns the year from the DATE subnode of a node. +# +# gedfind(g,s) generates the individuals under gedcom record g +# that are named by s, a string of whitespace-separated words. +# gedfind() generates each INDI node for which every word of s +# is matched by either a word of the individual's name or by +# the birth year. Matching is case-insensitive. +# +############################################################################ + +record gedcom( + tree, # tree of data records + id, # table of labeled nodes, indexed by @ID@ + fam, # list of FAM nodes + ind # list of INDI nodes +) + +record gednode( + level, # level + id, # ID (label), including @...@ delimiters + tag, # tag + data, # data + lnum, # line number + parent, # parent node in tree + ref, # referenced node, if any + sub, # sub-entry list + hcode # hashcode, if INDI node +) + +$define WHITESPACE ' \t\n\r' + + + +# gedload(f) -- load GEDCOM data from file f, returning gedcom record. + +procedure gedload(f) #: load GEDCOM data from file f + local line, lnum, r, curr + local root, id, fam, ind + local hset, h1, h2, c + + lnum := 0 + root := curr := gednode(-1, , "ROOT", "", lnum, , , []) + id := table() + fam := [] + ind := [] + + while line := read(f) do { + lnum +:= 1 + if *line = 0 then + next + + if not (r := gedscan(line)) then { + write(&errout, "ERR, line ", lnum, ": ", line) + next + } + r.lnum := lnum + r.sub := [] + + if r.tag == "CONC" then { # continuation line (no \n) + curr.data ||:= r.data + next + } + if r.tag == "CONT" then { # continuation line (with \n) + curr.data ||:= "\n" || r.data + next + } + + while curr.level >= r.level do + curr := curr.parent + put(curr.sub, r) + r.parent := curr + curr := r + + id[\r.id] := r + case r.tag of { + "FAM": put(fam, r) + "INDI": put(ind, r) + } + } + + every r := gedwalk(root) do + r.ref := id[r.data] + + hset := set() + every r := !ind do { + h1 := h2 := gedhi(r) + every c := !"123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ" do + if member(hset, h2) then + h2 := h1 || c # add disambiguating suffix if needed + else + break + insert(hset, r.hcode := h2) + } + + return gedcom(root, id, fam, ind) +end + + + +# gedscan(f) -- scan one line of a GEDCOM record, returning gednode record + +procedure gedscan(s) # (internal procedure) + local level, id, tag, data + static alnum + initial alnum := &letters ++ &digits ++ '_' + + s ? { + tab(many(WHITESPACE)) + level := tab(many(&digits)) | fail + tab(many(WHITESPACE)) + if id := (="@" || tab(upto('@') + 1)) then + tab(many(WHITESPACE)) + tag := tab(many(alnum)) | fail + tab(many(WHITESPACE)) + data := tab(0) + return gednode(level, id, tag, data) + } +end + + + +# gedwalk(r) -- walk GEDCOM tree, generating nodes in preorder + +procedure gedwalk(r) #: generate GEDCOM tree nodes in preorder + suspend r | gedwalk(!r.sub) + fail +end + + + +# gedsub(r, field...) -- generate subrecords with given tags +# gedval(r, field...) -- generate values of subrecords with given tags +# gedref(r, field...) -- generate nodes referenced by given tags + +procedure gedsub(r, f[]) #: find subrecords + local tag, x + + tag := get(f) | fail + every x := !r.sub do { + if x.tag == tag then + if *f > 0 then + suspend gedsub ! push(f, x) + else + suspend x + } +end + +procedure gedval(a[]) #: find subrecord values + suspend (gedsub ! a).data +end + +procedure gedref(a[]) #: find referenced nodes + suspend \(gedsub ! a).ref +end + + + +# gedfnf(r) -- get name from individual record, first name first + +procedure gedfnf(r) #: get first name first + return gednmf(r, "P F L S") +end + + + +# gedlnf(r) -- get name from individual record, last name first + +procedure gedlnf(r) #: get last name first + local s + s := gednmf(r, "L, P F S") + s ? { + =", " + return tab(0) + } +end + + + +# gednmf(r, f) -- general name formatter +# +# substitutes the first name, last name, prefix, and suffix +# for the letters F, L, P, S respectively in string f. +# multiple spaces are suppressed. + +procedure gednmf(r, f) #: format name + local c, s, prefix, first, last, suffix + + prefix := gedval(r, "TITL" | "NPFX") | gedval(r, "NAME", "NPFX") + s := gedval(r, "NAME") | fail + s ? { + first := trim(tab(upto('/') | 0)) + ="/" + last := trim(tab(upto('/') | 0)) + ="/" + suffix := gedval(r, "NSFX") | ("" ~== tab(0)) + } + s := "" + f ? { + while s ||:= tab(upto('PFLS ')) do { + while c := tab(any('PFLS ')) do { + s ||:= case c of { + "P": \prefix + "F": \first + "L": \last + "S": \suffix + " ": s[-1] ~== " " + } + } + } + s ||:= tab(0) + } + return trim(s) +end + + + +# geddate(r) -- get date from record in standard form + +procedure geddate(r) #: get canonical date + local s, t, w + static ftab + initial { + ftab := table() + ftab["JAN"] := "Jan"; ftab["FEB"] := "Feb"; ftab["MAR"] := "Mar" + ftab["APR"] := "Apr"; ftab["MAY"] := "May"; ftab["JUN"] := "Jun" + ftab["JUL"] := "Jul"; ftab["AUG"] := "Aug"; ftab["SEP"] := "Sep" + ftab["OCT"] := "Oct"; ftab["NOV"] := "Nov"; ftab["DEC"] := "Dec" + ftab["ABT"] := "abt"; ftab["BEF"] := "bef"; ftab["AFT"] := "aft" + ftab["CAL"] := "cal"; ftab["EST"] := "est" + } + + s := trim(gedval(r, "DATE"), WHITESPACE) | fail + t := "" + + s ? while not pos(0) do { + tab(many(WHITESPACE)) + w := tab(upto(WHITESPACE) | 0) + t ||:= " " || (\ftab[w] | w) + } + + if *t > 13 then + return t[2:0] + else + return right(t, 12) +end + + + +# gedyear(r) -- get year from event record + +procedure gedyear(r) #: get year + local d, y + + d := gedval(r, "DATE") | fail + d ? while tab(upto(&digits)) do + if (y := tab(many(&digits)) \ 1) >= 1000 then + return y +end + + + +# gedhi -- generate hashcode for individual record +# +# The hashcode uses two initials, final digits of birth year, +# and a 3-letter hashing of the full name and birthdate fields. + +procedure gedhi(r) # (internal procedure) + local s, name, bdate, bd + static lc, uc + initial { + uc := string(&ucase) + lc := string(&lcase) + } + + s := "" + name := gedval(r, "NAME") | "" + name ? { + # prefer initial of nickname; else skip unused firstname in parens + tab(upto('"') + 1) | (="(" & tab(upto(')') + 1)) + tab(any(' \t')) + s ||:= tab(any(&letters)) | "X" # first initial + tab(upto('/') + 1) + tab(any(' \t')) + s ||:= tab(any(&letters)) | "X" # second initial + } + + bdate := geddate(gedsub(r, "BIRT")) | "" + bd := bdate[-2:0] | "00" + if not (bd ? (tab(many(&digits)) & pos(0))) then + bd := "99" + s ||:= bd || gedh3a(name || bdate) + return map(s, lc, uc) +end + + + +# gedh3a(s) -- hash arbitrary string into three alphabetic characters + +procedure gedh3a(s) # (internal procedure) + local n, d1, d2, d3, c + + n := 0 + every c := !map(s) do + if not upto(' \t\f\r\n', c) then + n := 37 * n + ord(c) - 32 + d1 := 97 + (n / 676) % 26 + d2 := 97 + (n / 26) % 26 + d3 := 97 + n % 26 + return char(d1) || char(d2) || char(d3) +end + + + +# gedfind(g, s) -- find records by name from gedcom record +# +# g is a gedcom record; s is a string of whitespace-separated words. +# gedfind() generates each INDI node for which every word of s +# is matched by either a word of the individual's name or by +# the birth year. Matching is case-insensitive. + +procedure gedfind(g, s) #: find individual by name + local r + + every r := !g.ind do + if gedmatch(r, s) then + suspend r +end + + +# gedmatch(r, s) -- match record against name +# +# s is a string of words to match name field and/or birth year. +# Matching is case sensitive. + +procedure gedmatch(r, s) # (internal procedure) + local w + + every w := gedlcw(s) do + (w == (gedlcw(gedval(r, "NAME")) | gedyear(gedsub(r, "BIRT")))) | fail + return r +end + + + +# gedlcw(s, c) -- generate words from string s separated by chars from c +# +# words are mapped to lower-case to allow case-insensitive comparisons + +procedure gedlcw(s, c) # (internal procedure) + /c := '/ \t\r\n\v\f' + map(s) ? { + tab(many(c)) + while not pos(0) do { + suspend tab(upto(c) | 0) \ 1 + tab(many(c)) + } + } + fail +end |