blob: 9356dad669d0e2b2095b842475a7d10216ef9540 (
plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
|
############################################################################
#
# File: compare.icn
#
# Subject: Program to look for duplicates in a collection of files
#
# Author: Ralph E. Griswold
#
# Date: January 7, 1997
#
############################################################################
#
# This file is in the public domain.
#
############################################################################
#
# This program compares files to locate ones that have the same content.
#
# The file names are given on the command line.
#
# This program has impossible time complexity if there are many files
# of the same size.
#
############################################################################
#
# Requires: UNIX
#
############################################################################
procedure main(args)
local filesets, filelist, file, xfile, size, line, input
filesets := table()
# The strategy is to divide the files into equivalence classes by size.
every file := !args do {
input := open("wc " || image(file), "p")
line := read(input)
close(input)
line ? {
move(20)
tab(many(' '))
size := integer(tab(many(&digits))) | stop("bogus size")
}
/filesets[size] := []
put(filesets[size], file)
}
filesets := sort(filesets, 3)
while get(filesets) do { # don't need size for anything
filelist := get(filesets) # just the files of that size
while file := get(filelist) do # for every file
every xfile := !filelist do # compare against the rest
if system("cmp -s " || image(file) || " " || image(xfile) ||
">/dev/null") = 0 then write(file, "==", xfile)
}
end
|