summaryrefslogtreecommitdiff
path: root/ipl/procs/gedcom.icn
diff options
context:
space:
mode:
Diffstat (limited to 'ipl/procs/gedcom.icn')
-rw-r--r--ipl/procs/gedcom.icn417
1 files changed, 417 insertions, 0 deletions
diff --git a/ipl/procs/gedcom.icn b/ipl/procs/gedcom.icn
new file mode 100644
index 0000000..f2524da
--- /dev/null
+++ b/ipl/procs/gedcom.icn
@@ -0,0 +1,417 @@
+############################################################################
+#
+# File: gedcom.icn
+#
+# Subject: Procedures for reading GEDCOM files
+#
+# Author: Gregg M. Townsend
+#
+# Date: March 25, 2002
+#
+############################################################################
+#
+# This file is in the public domain.
+#
+############################################################################
+#
+# These procedures read and interpret GEDCOM files, a standard
+# format for genealogy databases.
+#
+############################################################################
+#
+# gedload(f) loads GEDCOM data from file f and returns a gedcom
+# record containing the following fields:
+# tree root of tree of gednode records
+# id table of labeled nodes, indexed by @ID@
+# fam list of FAM nodes (marriages)
+# ind list of INDI nodes (individuals)
+#
+# The tree is composed of gednode records R containing these fields:
+# level level
+# id ID (label), including @...@ delimiters
+# tag tag
+# data data
+# lnum line number
+# parent parent node in tree
+# ref referenced node, if any
+# sub sub-entry list
+# hcode unique hashcode, if INDI node
+#
+# gedwalk(tree) generates the nodes of the tree in preorder.
+#
+# Three procedures find descendants of a node based on a sequence
+# of identifying tag strings:
+# gedsub(R, tag...) generates subnodes specified by tag sequence
+# gedval(R, tag...) generates data values of those subnodes
+# gedref(R, tag...) generates nodes referenced by those subnodes
+#
+# Three procedures extract a person's name from an INDI record:
+# gedfnf(R) produces "John Quincy Adams" form
+# gedlnf(R) produces "Adams, John Quincy" form
+# gednmf(R,f) produces an arbitrary format, substituting
+# prefix, firstname, lastname, suffix for
+# "P", "F", "L", "S" (respectively) in f
+#
+# geddate(R) finds the DATE subnode of a node and returns a string
+# of at least 12 characters in a standard form such as "11 Jul 1767"
+# or "abt 1810". It is assumed that the input is in English.
+#
+# gedyear(R) returns the year from the DATE subnode of a node.
+#
+# gedfind(g,s) generates the individuals under gedcom record g
+# that are named by s, a string of whitespace-separated words.
+# gedfind() generates each INDI node for which every word of s
+# is matched by either a word of the individual's name or by
+# the birth year. Matching is case-insensitive.
+#
+############################################################################
+
+record gedcom(
+ tree, # tree of data records
+ id, # table of labeled nodes, indexed by @ID@
+ fam, # list of FAM nodes
+ ind # list of INDI nodes
+)
+
+record gednode(
+ level, # level
+ id, # ID (label), including @...@ delimiters
+ tag, # tag
+ data, # data
+ lnum, # line number
+ parent, # parent node in tree
+ ref, # referenced node, if any
+ sub, # sub-entry list
+ hcode # hashcode, if INDI node
+)
+
+$define WHITESPACE ' \t\n\r'
+
+
+
+# gedload(f) -- load GEDCOM data from file f, returning gedcom record.
+
+procedure gedload(f) #: load GEDCOM data from file f
+ local line, lnum, r, curr
+ local root, id, fam, ind
+ local hset, h1, h2, c
+
+ lnum := 0
+ root := curr := gednode(-1, , "ROOT", "", lnum, , , [])
+ id := table()
+ fam := []
+ ind := []
+
+ while line := read(f) do {
+ lnum +:= 1
+ if *line = 0 then
+ next
+
+ if not (r := gedscan(line)) then {
+ write(&errout, "ERR, line ", lnum, ": ", line)
+ next
+ }
+ r.lnum := lnum
+ r.sub := []
+
+ if r.tag == "CONC" then { # continuation line (no \n)
+ curr.data ||:= r.data
+ next
+ }
+ if r.tag == "CONT" then { # continuation line (with \n)
+ curr.data ||:= "\n" || r.data
+ next
+ }
+
+ while curr.level >= r.level do
+ curr := curr.parent
+ put(curr.sub, r)
+ r.parent := curr
+ curr := r
+
+ id[\r.id] := r
+ case r.tag of {
+ "FAM": put(fam, r)
+ "INDI": put(ind, r)
+ }
+ }
+
+ every r := gedwalk(root) do
+ r.ref := id[r.data]
+
+ hset := set()
+ every r := !ind do {
+ h1 := h2 := gedhi(r)
+ every c := !"123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ" do
+ if member(hset, h2) then
+ h2 := h1 || c # add disambiguating suffix if needed
+ else
+ break
+ insert(hset, r.hcode := h2)
+ }
+
+ return gedcom(root, id, fam, ind)
+end
+
+
+
+# gedscan(f) -- scan one line of a GEDCOM record, returning gednode record
+
+procedure gedscan(s) # (internal procedure)
+ local level, id, tag, data
+ static alnum
+ initial alnum := &letters ++ &digits ++ '_'
+
+ s ? {
+ tab(many(WHITESPACE))
+ level := tab(many(&digits)) | fail
+ tab(many(WHITESPACE))
+ if id := (="@" || tab(upto('@') + 1)) then
+ tab(many(WHITESPACE))
+ tag := tab(many(alnum)) | fail
+ tab(many(WHITESPACE))
+ data := tab(0)
+ return gednode(level, id, tag, data)
+ }
+end
+
+
+
+# gedwalk(r) -- walk GEDCOM tree, generating nodes in preorder
+
+procedure gedwalk(r) #: generate GEDCOM tree nodes in preorder
+ suspend r | gedwalk(!r.sub)
+ fail
+end
+
+
+
+# gedsub(r, field...) -- generate subrecords with given tags
+# gedval(r, field...) -- generate values of subrecords with given tags
+# gedref(r, field...) -- generate nodes referenced by given tags
+
+procedure gedsub(r, f[]) #: find subrecords
+ local tag, x
+
+ tag := get(f) | fail
+ every x := !r.sub do {
+ if x.tag == tag then
+ if *f > 0 then
+ suspend gedsub ! push(f, x)
+ else
+ suspend x
+ }
+end
+
+procedure gedval(a[]) #: find subrecord values
+ suspend (gedsub ! a).data
+end
+
+procedure gedref(a[]) #: find referenced nodes
+ suspend \(gedsub ! a).ref
+end
+
+
+
+# gedfnf(r) -- get name from individual record, first name first
+
+procedure gedfnf(r) #: get first name first
+ return gednmf(r, "P F L S")
+end
+
+
+
+# gedlnf(r) -- get name from individual record, last name first
+
+procedure gedlnf(r) #: get last name first
+ local s
+ s := gednmf(r, "L, P F S")
+ s ? {
+ =", "
+ return tab(0)
+ }
+end
+
+
+
+# gednmf(r, f) -- general name formatter
+#
+# substitutes the first name, last name, prefix, and suffix
+# for the letters F, L, P, S respectively in string f.
+# multiple spaces are suppressed.
+
+procedure gednmf(r, f) #: format name
+ local c, s, prefix, first, last, suffix
+
+ prefix := gedval(r, "TITL" | "NPFX") | gedval(r, "NAME", "NPFX")
+ s := gedval(r, "NAME") | fail
+ s ? {
+ first := trim(tab(upto('/') | 0))
+ ="/"
+ last := trim(tab(upto('/') | 0))
+ ="/"
+ suffix := gedval(r, "NSFX") | ("" ~== tab(0))
+ }
+ s := ""
+ f ? {
+ while s ||:= tab(upto('PFLS ')) do {
+ while c := tab(any('PFLS ')) do {
+ s ||:= case c of {
+ "P": \prefix
+ "F": \first
+ "L": \last
+ "S": \suffix
+ " ": s[-1] ~== " "
+ }
+ }
+ }
+ s ||:= tab(0)
+ }
+ return trim(s)
+end
+
+
+
+# geddate(r) -- get date from record in standard form
+
+procedure geddate(r) #: get canonical date
+ local s, t, w
+ static ftab
+ initial {
+ ftab := table()
+ ftab["JAN"] := "Jan"; ftab["FEB"] := "Feb"; ftab["MAR"] := "Mar"
+ ftab["APR"] := "Apr"; ftab["MAY"] := "May"; ftab["JUN"] := "Jun"
+ ftab["JUL"] := "Jul"; ftab["AUG"] := "Aug"; ftab["SEP"] := "Sep"
+ ftab["OCT"] := "Oct"; ftab["NOV"] := "Nov"; ftab["DEC"] := "Dec"
+ ftab["ABT"] := "abt"; ftab["BEF"] := "bef"; ftab["AFT"] := "aft"
+ ftab["CAL"] := "cal"; ftab["EST"] := "est"
+ }
+
+ s := trim(gedval(r, "DATE"), WHITESPACE) | fail
+ t := ""
+
+ s ? while not pos(0) do {
+ tab(many(WHITESPACE))
+ w := tab(upto(WHITESPACE) | 0)
+ t ||:= " " || (\ftab[w] | w)
+ }
+
+ if *t > 13 then
+ return t[2:0]
+ else
+ return right(t, 12)
+end
+
+
+
+# gedyear(r) -- get year from event record
+
+procedure gedyear(r) #: get year
+ local d, y
+
+ d := gedval(r, "DATE") | fail
+ d ? while tab(upto(&digits)) do
+ if (y := tab(many(&digits)) \ 1) >= 1000 then
+ return y
+end
+
+
+
+# gedhi -- generate hashcode for individual record
+#
+# The hashcode uses two initials, final digits of birth year,
+# and a 3-letter hashing of the full name and birthdate fields.
+
+procedure gedhi(r) # (internal procedure)
+ local s, name, bdate, bd
+ static lc, uc
+ initial {
+ uc := string(&ucase)
+ lc := string(&lcase)
+ }
+
+ s := ""
+ name := gedval(r, "NAME") | ""
+ name ? {
+ # prefer initial of nickname; else skip unused firstname in parens
+ tab(upto('"') + 1) | (="(" & tab(upto(')') + 1))
+ tab(any(' \t'))
+ s ||:= tab(any(&letters)) | "X" # first initial
+ tab(upto('/') + 1)
+ tab(any(' \t'))
+ s ||:= tab(any(&letters)) | "X" # second initial
+ }
+
+ bdate := geddate(gedsub(r, "BIRT")) | ""
+ bd := bdate[-2:0] | "00"
+ if not (bd ? (tab(many(&digits)) & pos(0))) then
+ bd := "99"
+ s ||:= bd || gedh3a(name || bdate)
+ return map(s, lc, uc)
+end
+
+
+
+# gedh3a(s) -- hash arbitrary string into three alphabetic characters
+
+procedure gedh3a(s) # (internal procedure)
+ local n, d1, d2, d3, c
+
+ n := 0
+ every c := !map(s) do
+ if not upto(' \t\f\r\n', c) then
+ n := 37 * n + ord(c) - 32
+ d1 := 97 + (n / 676) % 26
+ d2 := 97 + (n / 26) % 26
+ d3 := 97 + n % 26
+ return char(d1) || char(d2) || char(d3)
+end
+
+
+
+# gedfind(g, s) -- find records by name from gedcom record
+#
+# g is a gedcom record; s is a string of whitespace-separated words.
+# gedfind() generates each INDI node for which every word of s
+# is matched by either a word of the individual's name or by
+# the birth year. Matching is case-insensitive.
+
+procedure gedfind(g, s) #: find individual by name
+ local r
+
+ every r := !g.ind do
+ if gedmatch(r, s) then
+ suspend r
+end
+
+
+# gedmatch(r, s) -- match record against name
+#
+# s is a string of words to match name field and/or birth year.
+# Matching is case sensitive.
+
+procedure gedmatch(r, s) # (internal procedure)
+ local w
+
+ every w := gedlcw(s) do
+ (w == (gedlcw(gedval(r, "NAME")) | gedyear(gedsub(r, "BIRT")))) | fail
+ return r
+end
+
+
+
+# gedlcw(s, c) -- generate words from string s separated by chars from c
+#
+# words are mapped to lower-case to allow case-insensitive comparisons
+
+procedure gedlcw(s, c) # (internal procedure)
+ /c := '/ \t\r\n\v\f'
+ map(s) ? {
+ tab(many(c))
+ while not pos(0) do {
+ suspend tab(upto(c) | 0) \ 1
+ tab(many(c))
+ }
+ }
+ fail
+end