summaryrefslogtreecommitdiff
path: root/ipl/procs/dif.icn
blob: ea5713435b28fc51f4a606ba24194f48bf67adb9 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
############################################################################
#
#	File:     dif.icn
#
#	Subject:  Procedure to check for differences
#
#	Author:   Robert J. Alexander
#
#	Date:     August 14, 1996
#
############################################################################
#
#   This file is in the public domain.
#
############################################################################
#
#	dif(stream, compare, eof, group)
#		generates a sequence of differences between an	arbitrary
#		number of input streams.  Each result is returned as a list
#		of diff_recs, one for each input stream, with each diff_rec
#		containing a list of items that differ and their position
#		in the input stream.
#
#  The diff_rec type is declared as:
#
#		record diff_rec(pos,diffs)
#
#  dif() fails if there are no differences, i.e. it produces an empty
#  result sequence.
#
############################################################################
#
#  For example, if two input streams are:
#
#	a b c d e f g h
#	a b d e f i j
#
#  the output sequence would be:
#
#	[diff_rec(3,[c]),diff_rec(3,[])]
#	[diff_rec(7,[g,h]),diff_rec(6,[i,j])
#
#  The arguments to dif(stream,compare,eof,group) are:
#
#	stream		A list of data objects that represent input streams
#			from which dif will extract its input "records".
#			The elements can be of several different types which
#			result in different actions, as follows:
#
#			   Type			   Action
#			===========	=============================
#			file		file is "read" to get records
#
#			co-expression	co-expression is activated to
#					get records
#
#			list		records are "gotten" (get()) from
#					the list
#
#			diff_proc	a record type defined in "dif" to
#					allow a procedure (or procedures)
#					suppled by dif's caller to be called
#					to get records.  Diff_proc has two
#					fields, the procedure to call and the
#					argument to call it with.  Its
#					definition looks like this:
#
#					   record diff_proc(proc,arg)
#			
#
#  Optional arguments:
#
#	compare		Item comparison procedure -- succeeds if
#			"equal", otherwise fails (default is the
#			identity "===" comparison).  The comparison
#			must allow for the fact that the eof object
#			(see next) might be an argument, and a pair of
#			eofs must compare equal.
#
#	eof		An object that is distinguishable from other
#			objects in the stream.  Default is &null.
#
#	group		A procedure that is called with the current number
#			of unmatched items as its argument.  It must
#			return the number of matching items required
#			for file synchronization to occur.  Default is
#			the formula Trunc((2.0 * Log(M)) + 2.0) where
#			M is the number of unmatched items.
#
############################################################################

invocable all

record diff_rec(pos,diffs)
record diff_proc(proc,arg)
record diff_file(stream,queue)


procedure dif(stream,compare,eof,group)
  local f,linenbr,line,difflist,gf,i,j,k,l,m,n,x,test,
	result,synclist,nsyncs,syncpoint
  #
  #  Provide default arguments and initialize data.
  #
  /compare := proc("===",2)
  /group := groupfactor
  f := []
  every put(f,diff_file(!stream,[]))
  linenbr := list(*stream,0)
  line := list(*stream)
  test := list(*stream)
  difflist := list(*stream)
  every !difflist := []
  #
  #  Loop to process all records of all input streams.
  #
  repeat {
    #
    #  This is the "idle loop" where we spin until we find a discrepancy
    #  among the data streams.  A line is read from each stream, with a
    #  check for eof on all streams.  Then the line from the first
    #  stream is compared to the lines from all the others.
    #
    repeat {
      every i := 1 to *stream do
        line[i] := diffread(f[i]) | eof
      if not (every x := !line do
        (x === eof) | break) then break break
      every !linenbr +:= 1
      if (every x := !line[2:0] do
        compare(x,line[1]) | break) then break
    }
    #
    #  Aha!  We have found a difference.  Create a difference list,
    #  one entry per stream, primed with the differing line we just found.
    #
    every i := 1 to *stream do
      difflist[i] := [line[i]]
    repeat {
      #
      #  Add a new input line from each stream to the difference list.
      #  Then build lists of the subset of different lines we need to
      #  actually compare.
      #
      every i := 1 to *stream do
        put(difflist[i],diffread(f[i]) | eof)
      gf := group(*difflist[1])
      every i := 1 to *stream do
        test[i] := difflist[i][-gf:0]
      #
      #  Create a "synchronization matrix", with a row and column for
      #  each input stream.  The entries will be initially &null, then
      #  will be set to the synchronization position if sync is
      #  achieved between the two streams.  Another list is created to
      #  keep track of how many syncs have been achieved for each stream.
      #
      j := *difflist[1] - gf + 1
      synclist := list(*stream)
      every !synclist := list(*stream)
      every k := 1 to *stream do
        synclist[k][k] := j
      nsyncs := list(*stream,1)
      #
      #  Loop through positions to start comparing lines.  This set of
      #  nested loops will be exited when a stream achieves sync with
      #  all other streams.
      #
      every i := 1 to j do {
        #
        #  Loop through all streams.
        #
        every k := 1 to *stream do {
          #
          #  Loop through all streams.
          #
	  every l := 1 to *stream do {
	    if /synclist[k][l] then {	# avoid unnecessary comparisons
	      #
              #  Compare items of the test list to the differences list
              #  at all possible positions.  If they compare, store the
              #  current position in the sync matrix and bump the count
              #  of streams sync'd to this stream.  If all streams are in
	      #  sync, exit all loops but the outer one.
	      #
	      m := i - 1
	      if not every n := 1 to gf do {
	        if not compare(test[k][n],difflist[l][m +:= 1]) then break
	      } then {
	        synclist[k][l] := i	# store current position
	        if (nsyncs[k] +:= 1) = *stream then break break break break
	      }
	    }
	  }
	}
      }
    }
    #
    #  Prepare an output set.  Since we have read the input streams past
    #  the point of synchronization, we must queue those lines before their
    #  input streams. 
    #
    synclist := synclist[k]
    result := list(*stream)
    every i := 1 to *stream do {
      j := synclist[i]
      while difflist[i][j -:= 1] === eof	# trim past eof
      result[i] := diff_rec(linenbr[i],difflist[i][1:j + 1])
      f[i].queue := difflist[i][synclist[i] + gf:0] ||| f[i].queue
      linenbr[i] +:= synclist[i] + gf - 2
      difflist[i] := []
    }
    suspend result
  }
end

#
#  diffread() -- Read a line from an input stream.
#
procedure diffread(f)
  local x
  return get(f.queue) | case type(x := f.stream) of {
    "file" | "window": read(x)
    "co-expression": @x
    "diff_proc": x.proc(x.arg)
    "list": get(x)
  }
end

#
#  groupfactor() -- Determine how many like lines we need to close
#  off a group of differences.  This is the default routine -- the
#  caller may provide his own.
#
procedure groupfactor(m)  # Compute: Trunc((2.0 * Log(m)) + 2.0)
  m := string(m)
  return 2 * *m + if m <<= "316227766"[1+:*m] then 0 else 1
end