1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
|
############################################################################
#
# File: grpsort.icn
#
# Subject: Program to sort groups of lines
#
# Author: Thomas R. Hicks
#
# Date: June 10, 1988
#
############################################################################
#
# This file is in the public domain.
#
############################################################################
#
# This program sorts input containing ``records'' defined to be
# groups of consecutive lines. Output is written to standard out-
# put. Each input record is separated by one or more repetitions
# of a demarcation line (a line beginning with the separator
# string). The first line of each record is used as the key.
#
# If no separator string is specified on the command line, the
# default is the empty string. Because all input lines are trimmed
# of whitespace (blanks and tabs), empty lines are default demarca-
# tion lines. The separator string specified can be an initial sub-
# string of the string used to demarcate lines, in which case the
# resulting partition of the input file may be different from a
# partition created using the entire demarcation string.
#
# The -o option sorts the input file but does not produce the
# sorted records. Instead it lists the keys (in sorted order) and
# line numbers defining the extent of the record associated with
# each key.
#
# The use of grpsort is illustrated by the following examples.
# The command
#
# grpsort "catscats" <x >y
#
# sorts the file x, whose records are separated by lines containing
# the string "catscats", into the file y placing a single line of
# "catscats" between each output record. Similarly, the command
#
# grpsort "cats" <x >y
#
# sorts the file x as before but assumes that any line beginning
# with the string "cats" delimits a new record. This may or may not
# divide the lines of the input file into a number of records dif-
# ferent from the previous example. In any case, the output
# records will be separated by a single line of "cats". Another
# example is
#
# grpsort -o <bibliography >bibkeys
#
# which sorts the file bibliography and produces a sorted list of
# the keys and the extents of the associated records in bibkeys.
# Each output key line is of the form:
#
# [s-e] key
#
# where
#
# s is the line number of the key line
# e is the line number of the last line
# key is the actual key of the record
#
#
############################################################################
#
# Links: usage
#
############################################################################
link usage
global lcount, linelst, ordflag
procedure main(args)
local division, keytable, keylist, line, info, nexthdr, null
linelst := []
keytable := table()
lcount := 0
if *args = 2 then
if args[1] == "-o" then
ordflag := pop(args)
else
Usage("groupsort [-o] [separator string] <file >sortedfile")
if *args = 1 then {
if args[1] == "?" then
Usage("groupsort [-o] [separator string] <file >sortedfile")
if args[1] == "-o" then
ordflag := pop(args)
else
division := args[1]
}
if *args = 0 then
division := ""
nexthdr := lmany(division) | fail # find at least one record or quit
info := [nexthdr,[lcount]]
# gather all data lines for this group/record
while line := getline() do {
if eorec(division,line) then { # at end of this record
# enter record info into sort key table
put(info[2],lcount-1)
enter(info,keytable)
# look for header of next record
if nexthdr := lmany(division) then
info := [nexthdr,[lcount]] # begin next group/record
else
info := null
}
}
# enter last line info into sort key table
if \info then {
put(info[2],lcount)
enter(info,keytable)
}
keylist := sort(keytable,1) # sort by record headers
if \ordflag then
printord(keylist) # list sorted order of records
else
printrecs(keylist,division) # print records in order
end
# enter - enter the group info into the sort key table
procedure enter(info,tbl)
if /tbl[info[1]] then # new key value
tbl[info[1]] := [info[2]]
else
put(tbl[info[1]],info[2]) # add occurrance info
end
# eorec - suceed if a delimiter string has been found, fail otherwise
procedure eorec(div,str)
if div == "" then # If delimiter string is empty,
if str == div then return # then make exact match
else
fail
if match(div,str) then return # Otherwise match initial string.
else
fail
end
# getline - get the next line (or fail), trim off trailing tabs and blanks.
procedure getline()
local line
static trimset
initial trimset := ' \t'
if line := trim(read(),trimset) then {
if /ordflag then # save only if going to print later
put(linelst,line)
lcount +:= 1
return line
}
end
# lmany - skip over many lines matching string div.
procedure lmany(div)
local line
while line := getline() do {
if eorec(div,line) then next #skip over multiple dividing lines
return line
}
end
# printord - print only the selection order of the records.
procedure printord(slist)
local x, y
every x := !slist do
every y := !x[2] do
write(y[1],"-",y[2],"\t",x[1])
end
# printrecs - write the records in sorted order, separated by div string.
procedure printrecs(slist,div)
local x, y, z
every x := !slist do
every y := !x[2] do {
every z := y[1] to y[2] do
write(linelst[z])
write(div)
}
end
|