summaryrefslogtreecommitdiff
path: root/ipl/progs/grpsort.icn
blob: 4ea4f34250e8c4a2f4b5a122be5b1a07f6960de2 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
############################################################################
#
#	File:     grpsort.icn
#
#	Subject:  Program to sort groups of lines
#
#	Author:   Thomas R. Hicks
#
#	Date:     June 10, 1988
#
############################################################################
#
#   This file is in the public domain.
#
############################################################################
#  
#     This program sorts input containing ``records'' defined to be
#  groups of consecutive lines. Output is written to standard out-
#  put.  Each input record is separated by one or more repetitions
#  of a demarcation line (a line beginning with the separator
#  string).  The first line of each record is used as the key.
#  
#     If no separator string is specified on the command line, the
#  default is the empty string. Because all input lines are trimmed
#  of whitespace (blanks and tabs), empty lines are default demarca-
#  tion lines. The separator string specified can be an initial sub-
#  string of the string used to demarcate lines, in which case the
#  resulting partition of the input file may be different from a
#  partition created using the entire demarcation string.
#  
#     The -o option sorts the input file but does not produce the
#  sorted records.  Instead it lists the keys (in sorted order) and
#  line numbers defining the extent of the record associated with
#  each key.
#  
#     The use of grpsort is illustrated by the following examples.
#  The command
#  
#          grpsort "catscats" <x >y
#  
#  sorts the file x, whose records are separated by lines containing
#  the string "catscats", into the file y placing a single line of
#  "catscats" between each output record. Similarly, the command
#  
#          grpsort "cats" <x >y
#  
#  sorts the file x as before but assumes that any line beginning
#  with the string "cats" delimits a new record. This may or may not
#  divide the lines of the input file into a number of records dif-
#  ferent from the previous example.  In any case, the output
#  records will be separated by a single line of "cats".  Another
#  example is
#  
#          grpsort -o <bibliography >bibkeys
#  
#  which sorts the file bibliography and produces a sorted list of
#  the keys and the extents of the associated records in bibkeys.
#  Each output key line is of the form:
#  
#          [s-e] key
#  
#  where
#  
#          s     is the line number of the key line
#          e     is the line number of the last line
#          key   is the actual key of the record
#  
#  
############################################################################
#
#  Links: usage
#
############################################################################

link usage

global lcount, linelst, ordflag

procedure main(args)
   local division, keytable, keylist, line, info, nexthdr, null
   linelst := []
   keytable := table()
   lcount := 0

   if *args = 2 then
      if args[1] == "-o" then
          ordflag := pop(args)
      else
          Usage("groupsort [-o] [separator string] <file >sortedfile")

   if *args = 1 then {
      if args[1] == "?" then
          Usage("groupsort [-o] [separator string] <file >sortedfile")
      if args[1] == "-o" then
          ordflag := pop(args)
      else
          division := args[1]
      }

   if *args = 0 then
      division := ""

   nexthdr := lmany(division) | fail	# find at least one record or quit
   info := [nexthdr,[lcount]]

   # gather all data lines for this group/record
   while line := getline() do {
      if eorec(division,line) then {	# at end of this record
          # enter record info into sort key table
          put(info[2],lcount-1)
          enter(info,keytable)
          # look for header of next record
          if nexthdr := lmany(division) then
      	info := [nexthdr,[lcount]] # begin next group/record
          else
      	info := null
          }
      }
   # enter last line info into sort key table
   if \info then {
      put(info[2],lcount)
      enter(info,keytable)
      }

   keylist := sort(keytable,1)		# sort by record headers
   if \ordflag then
      printord(keylist)		# list sorted order of records
   else
      printrecs(keylist,division)	# print records in order
end

# enter - enter the group info into the sort key table
procedure enter(info,tbl)
   if /tbl[info[1]] then		# new key value
      tbl[info[1]] := [info[2]]
   else
      put(tbl[info[1]],info[2])	# add occurrance info
end

# eorec - suceed if a delimiter string has been found, fail otherwise
procedure eorec(div,str)
   if div == "" then			# If delimiter string is empty,
      if str == div then return	# then make exact match
      else
          fail
   if match(div,str) then return	# Otherwise match initial string.
   else
      fail
end

# getline - get the next line (or fail), trim off trailing tabs and blanks.
procedure getline()
   local line
   static trimset
   initial trimset := ' \t'
   if line := trim(read(),trimset) then {
      if /ordflag then	# save only if going to print later
          put(linelst,line)
      lcount +:= 1
      return line
      }
end

# lmany - skip over many lines matching string div.
procedure lmany(div)
   local line
   while line := getline() do {
      if eorec(div,line) then next	#skip over multiple dividing lines
      return line
      }
end

# printord - print only the selection order of the records.
procedure printord(slist)
   local x, y
   every x := !slist do
      every y := !x[2] do
          write(y[1],"-",y[2],"\t",x[1])
end

# printrecs - write the records in sorted order, separated by div string.
procedure printrecs(slist,div)
   local x, y, z
   every x := !slist do 
      every y := !x[2] do {
          every z := y[1] to y[2] do
      	write(linelst[z])
          write(div)
          }
end