summaryrefslogtreecommitdiff
path: root/ipl/progs/latexidx.icn
blob: cca1fa068f1401adec4895c3dec8fccfad493801 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
############################################################################
#
#	File:     latexidx.icn
#
#	Subject:  Program to process LaTeX idx file
#
#	Author:   David S. Cargo
#
#	Date:     April 19, 1989
#
############################################################################
#
#   This file is in the public domain.
#
############################################################################
#
#  Input:
#
#     A latex .idx file containing the \indexentry lines.
#
#  Output:
#
#     \item lines sorted in order by entry value,
#  with page references put into sorted order.
#
# Processing:
#
#     While lines are available from standard input
#         Read a line containing an \indexentry
#         Form a sort key for the indexentry
#         If there is no table entry for it
#         Then create a subtable for it and assign it an initial value
#         If there is a table entry for it,
#         But not an subtable entry for the actual indexentry
#         Then create an initial page number set for it
#         Otherwise add the page number to the corresponding page number set
#    Sort the table of subtables by sort key value
#    For all subtables in the sorted list
#         Sort the subtables by indexentry values
#         For all the indexentries in the resulting list
#             Sort the set of page references
#             Write an \item entry for each indexentry and the page references
#
#  Limitations:
#
#     Length of index handled depends on implementation limits of memory alloc.
#  Page numbers must be integers (no roman numerals).  Sort key formed by
#  mapping to lower case and removing leading articles (a separate function
#  is used to produce the sort key, simplifying customization) -- otherwise
#  sorting is done in ASCII order.
#
############################################################################

procedure main()                       # no parameters, reading from stdin
    local key_table, s, page_num, itemval, key, item_list, one_item
    local page_list, refs

    key_table := table()               # for items and tables of page sets
    while s := read() do               # read strings from standard input
        {
        # start with s = "\indexentry{item}{page}"
        # save what's between the opening brace and the closing brace,
        # and reverse it
        s := reverse(s[upto('{',s)+1:-1])
        # giving s = "egap{}meti"

        # reversing allows extracting the page number first, thereby allowing
        # ANYTHING to be in the item field

        # grab the "egap", reverse it, convert to integer, convert to set
        # in case of conversion failure, use 0 as the default page number
        page_num := set([integer(reverse(s[1:upto('{',s)])) | 0])

        # the reversed item starts after the first closing brace
        # grab the "meti", reverse it
        itemval := reverse(s[upto('}', s)+1:0])

        # allow the sort key to be different from the item
        # reform may be customized to produce different equivalence classes
        key := reform(itemval)

        # if the assigned value for the key is null
        if /key_table[key]
        then
            {
            # create a subtable for the key and give it its initial value
            key_table[key] := table()
            key_table[key][itemval] := page_num
            }

        # else if the assigned value for the itemval is null
        # (e. g., when the second itemval associated with a key is found)
        else if /key_table[key][itemval]

        # give it its initial value
        then key_table[key][itemval] := page_num

        # otherwise just add it to the existing page number set
        else key_table[key][itemval] ++:= page_num
        }

    # now that all the input has been read....
    # sort keys and subtables by key value
    key_table := sort(key_table, 3)

    # loop, discarding the sort keys
    while get(key_table) do
        {
        # dequeue and sort one subtable into a list
        # sort is strictly by ASCII order within the equivalence class
        item_list := sort(get(key_table), 3)

        # loop, consuming the item and the page number sets as we go
        while one_item := get(item_list) do
            {
            # convert the page number set into a sorted list
            page_list := sort(get(item_list))

            # dequeue first integer and convert to string
            refs := string(get(page_list))

            # dequeue rest of page nums and append
            while (refs ||:= ", " || string(get(page_list)))

            write("\\item ", one_item, " ", refs)
            }
        }
    return
end

# reform - modify the item to enforce sort order appropriately
# This could do much more. For example it could strip leading braces,
# control sequences, quotation marks, etc.  It doesn't.  Maybe later.
procedure reform(item)
   item := map(item)		# map to lowercase
# drop leading article if present
   if match("a ",   item) then return item[3:0]
   if match("an ",  item) then return item[4:0]
   if match("the ", item) then return item[5:0]
   return item
end