summaryrefslogtreecommitdiff
path: root/combine
diff options
context:
space:
mode:
Diffstat (limited to 'combine')
-rwxr-xr-xcombine167
1 files changed, 167 insertions, 0 deletions
diff --git a/combine b/combine
new file mode 100755
index 0000000..99a4de6
--- /dev/null
+++ b/combine
@@ -0,0 +1,167 @@
+#!/usr/bin/perl
+
+=head1 NAME
+
+combine - combine sets of lines from two files using boolean operations
+
+=head1 SYNOPSIS
+
+combine file1 and file2
+
+combine file1 not file2
+
+combine file1 or file2
+
+combine file1 xor file2
+
+_ file1 and file2 _
+
+_ file1 not file2 _
+
+_ file1 or file2 _
+
+_ file1 xor file2 _
+
+=head1 DESCRIPTION
+
+B<combine> combines the lines in two files. Depending on the boolean
+operation specified, the contents will be combined in different ways:
+
+=over 4
+
+=item and
+
+Outputs lines that are in file1 if they are also present in file2.
+
+=item not
+
+Outputs lines that are in file1 but not in file2.
+
+=item or
+
+Outputs lines that are in file1 or file2.
+
+=item xor
+
+Outputs lines that are in either file1 or file2, but not in both files.
+
+=back
+
+"-" can be specified for either file to read stdin for that file.
+
+The input files need not be sorted, and the lines are output in the order
+they occur in file1 (followed by the order they occur in file2 for the two
+"or" operations). Bear in mind that this means that the operations are not
+commutative; "a and b" will not necessarily be the same as "b and a". To
+obtain commutative behavior sort and uniq the result.
+
+Note that this program can be installed as "_" to allow for the syntactic
+sugar shown in the latter half of the synopsis (similar to the test/[
+command). It is not currently installed as "_" by default, but you can
+alias it to that if you like.
+
+=head1 SEE ALSO
+
+join(1)
+
+=head1 AUTHOR
+
+Copyright 2006 by Joey Hess <joey@kitenet.net>
+
+Licensed under the GNU GPL.
+
+=cut
+
+use warnings;
+use strict;
+
+sub filemap {
+ my $file=shift;
+ my $sub=shift;
+
+ open (IN, $file) || die "$file: $!\n";
+ while (<IN>) {
+ chomp;
+ $sub->();
+ }
+ close IN;
+}
+
+sub hashify {
+ my $file=shift;
+
+ my %seen;
+ filemap $file, sub { $seen{$_}++ };
+ return \%seen;
+}
+
+sub compare_or {
+ my ($file1, $file2) = @_;
+
+ filemap $file1, sub { print "$_\n" };
+ filemap $file2, sub { print "$_\n" };
+}
+
+sub compare_xor {
+ my ($file1, $file2) = @_;
+
+ my (@lines2, %seen2);
+ filemap $file2,
+ sub {
+ push @lines2, $_;
+ $seen2{$_} = 1;
+ };
+
+ # Print all lines in file1 that are not in file2,
+ # and mark lines that are in both files by setting
+ # their value in %seen2 to 0.
+ filemap $file1,
+ sub {
+ if (exists $seen2{$_}) {
+ $seen2{$_} = 0;
+ }
+ else {
+ print "$_\n";
+ }
+ };
+
+ # Print all lines that are in file2 but not in file1.
+ # The value of these lines in seen2 is set to 1.
+ foreach (@lines2) {
+ print "$_\n" if $seen2{$_};
+ }
+}
+
+sub compare_not {
+ my ($file1, $file2) = @_;
+
+ my $seen=hashify($file2);
+ filemap $file1, sub { print "$_\n" unless $seen->{$_} };
+}
+
+sub compare_and {
+ my ($file1, $file2) = @_;
+
+ my $seen=hashify($file2);
+ filemap $file1, sub { print "$_\n" if $seen->{$_} };
+}
+
+if (@ARGV >= 4 && $ARGV[3] eq "_") {
+ delete $ARGV[3];
+}
+
+if (@ARGV != 3) {
+ die "Usage: combine file1 OP file2\n";
+}
+
+my $file1=shift;
+my $op=lc shift;
+my $file2=shift;
+
+if ($::{"compare_$op"}) {
+ no strict 'refs';
+ "compare_$op"->($file1, $file2);
+}
+else {
+ die "unknown operation, $op\n";
+}