diff options
Diffstat (limited to 'combine')
-rwxr-xr-x | combine | 167 |
1 files changed, 167 insertions, 0 deletions
@@ -0,0 +1,167 @@ +#!/usr/bin/perl + +=head1 NAME + +combine - combine sets of lines from two files using boolean operations + +=head1 SYNOPSIS + +combine file1 and file2 + +combine file1 not file2 + +combine file1 or file2 + +combine file1 xor file2 + +_ file1 and file2 _ + +_ file1 not file2 _ + +_ file1 or file2 _ + +_ file1 xor file2 _ + +=head1 DESCRIPTION + +B<combine> combines the lines in two files. Depending on the boolean +operation specified, the contents will be combined in different ways: + +=over 4 + +=item and + +Outputs lines that are in file1 if they are also present in file2. + +=item not + +Outputs lines that are in file1 but not in file2. + +=item or + +Outputs lines that are in file1 or file2. + +=item xor + +Outputs lines that are in either file1 or file2, but not in both files. + +=back + +"-" can be specified for either file to read stdin for that file. + +The input files need not be sorted, and the lines are output in the order +they occur in file1 (followed by the order they occur in file2 for the two +"or" operations). Bear in mind that this means that the operations are not +commutative; "a and b" will not necessarily be the same as "b and a". To +obtain commutative behavior sort and uniq the result. + +Note that this program can be installed as "_" to allow for the syntactic +sugar shown in the latter half of the synopsis (similar to the test/[ +command). It is not currently installed as "_" by default, but you can +alias it to that if you like. + +=head1 SEE ALSO + +join(1) + +=head1 AUTHOR + +Copyright 2006 by Joey Hess <joey@kitenet.net> + +Licensed under the GNU GPL. + +=cut + +use warnings; +use strict; + +sub filemap { + my $file=shift; + my $sub=shift; + + open (IN, $file) || die "$file: $!\n"; + while (<IN>) { + chomp; + $sub->(); + } + close IN; +} + +sub hashify { + my $file=shift; + + my %seen; + filemap $file, sub { $seen{$_}++ }; + return \%seen; +} + +sub compare_or { + my ($file1, $file2) = @_; + + filemap $file1, sub { print "$_\n" }; + filemap $file2, sub { print "$_\n" }; +} + +sub compare_xor { + my ($file1, $file2) = @_; + + my (@lines2, %seen2); + filemap $file2, + sub { + push @lines2, $_; + $seen2{$_} = 1; + }; + + # Print all lines in file1 that are not in file2, + # and mark lines that are in both files by setting + # their value in %seen2 to 0. + filemap $file1, + sub { + if (exists $seen2{$_}) { + $seen2{$_} = 0; + } + else { + print "$_\n"; + } + }; + + # Print all lines that are in file2 but not in file1. + # The value of these lines in seen2 is set to 1. + foreach (@lines2) { + print "$_\n" if $seen2{$_}; + } +} + +sub compare_not { + my ($file1, $file2) = @_; + + my $seen=hashify($file2); + filemap $file1, sub { print "$_\n" unless $seen->{$_} }; +} + +sub compare_and { + my ($file1, $file2) = @_; + + my $seen=hashify($file2); + filemap $file1, sub { print "$_\n" if $seen->{$_} }; +} + +if (@ARGV >= 4 && $ARGV[3] eq "_") { + delete $ARGV[3]; +} + +if (@ARGV != 3) { + die "Usage: combine file1 OP file2\n"; +} + +my $file1=shift; +my $op=lc shift; +my $file2=shift; + +if ($::{"compare_$op"}) { + no strict 'refs'; + "compare_$op"->($file1, $file2); +} +else { + die "unknown operation, $op\n"; +} |