summaryrefslogtreecommitdiff
path: root/ipl/progs/compare.icn
blob: 9356dad669d0e2b2095b842475a7d10216ef9540 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
############################################################################
#
#	File:     compare.icn
#
#	Subject:  Program to look for duplicates in a collection of files
#
#	Author:   Ralph E. Griswold
#
#	Date:     January 7, 1997
#
############################################################################
#
#  This file is in the public domain.
#
############################################################################
#
#  This program compares files to locate ones that have the same content.
#
#  The file names are given on the command line.
#
#  This program has impossible time complexity if there are many files
#  of the same size.
#
############################################################################
#
#  Requires:  UNIX
#
############################################################################

procedure main(args)
   local filesets, filelist, file, xfile, size, line, input

   filesets := table()

   #  The strategy is to divide the files into equivalence classes by size.

   every file := !args do {
      input := open("wc " || image(file), "p")
      line := read(input)
      close(input)
      line ? {
         move(20)
         tab(many(' '))
         size := integer(tab(many(&digits))) | stop("bogus size")
         }
      /filesets[size] := []
      put(filesets[size], file)
      }

   filesets := sort(filesets, 3)

   while get(filesets) do {			# don't need size for anything
      filelist := get(filesets)			# just the files of that size
      while file := get(filelist) do		# for every file
         every xfile := !filelist do 		# compare against the rest
            if system("cmp -s " || image(file) || " " || image(xfile) ||
               ">/dev/null") = 0 then write(file, "==", xfile)
      }
   
end