summaryrefslogtreecommitdiff
path: root/sysutils
diff options
context:
space:
mode:
authorminskim <minskim@pkgsrc.org>2004-04-17 13:21:48 +0000
committerminskim <minskim@pkgsrc.org>2004-04-17 13:21:48 +0000
commitf1fc027f963391e3e0ba17ecccd43dcf96fa0fac (patch)
tree17ddd38a6feb8ec5cf58b5dddfec5d020c545360 /sysutils
parent77a9023fa47d3d3dc6ccd0142fcd12770ea55dda (diff)
downloadpkgsrc-f1fc027f963391e3e0ba17ecccd43dcf96fa0fac.tar.gz
Import `same' from pkgsrc-wip. Packaged by Roland Illig.
The `same' utility looks for identical files and links them together using either hard links or symbolic links, saving disk space. Changes to the original version: * the file ownership and permissions are checked. (The original version checked only the file size) * the messages in the "dry-run" mode were suggesting that anything on the disk would be modified. They have been made clear.
Diffstat (limited to 'sysutils')
-rw-r--r--sysutils/same/DESCR8
-rw-r--r--sysutils/same/Makefile20
-rw-r--r--sysutils/same/PLIST3
-rw-r--r--sysutils/same/distinfo5
-rw-r--r--sysutils/same/files/Makefile15
-rw-r--r--sysutils/same/files/same.1114
-rw-r--r--sysutils/same/patches/patch-ab133
7 files changed, 298 insertions, 0 deletions
diff --git a/sysutils/same/DESCR b/sysutils/same/DESCR
new file mode 100644
index 00000000000..4c2c7924944
--- /dev/null
+++ b/sysutils/same/DESCR
@@ -0,0 +1,8 @@
+The `same' utility looks for identical files and links them together
+using either hard links or symbolic links, saving disk space.
+
+Changes to the original version:
+* the file ownership and permissions are checked. (The original
+ version checked only the file size)
+* the messages in the "dry-run" mode were suggesting that anything on
+ the disk would be modified. They have been made clear.
diff --git a/sysutils/same/Makefile b/sysutils/same/Makefile
new file mode 100644
index 00000000000..68a237dea63
--- /dev/null
+++ b/sysutils/same/Makefile
@@ -0,0 +1,20 @@
+# $NetBSD: Makefile,v 1.1.1.1 2004/04/17 13:21:48 minskim Exp $
+
+DISTNAME= same-1.3
+CATEGORIES= sysutils
+MASTER_SITES= ftp://ftp.bitwizard.nl/same/
+
+MAINTAINER= roland.illig@gmx.de
+HOMEPAGE= http://www.bitwizard.nl/
+COMMENT= Find identical files and link(2) them to save disk space
+
+PKG_INSTALLATION_TYPES= overwrite pkgviews
+
+MAKEFILE= ${FILESDIR}/Makefile
+
+INSTALLATION_DIRS= bin man/man1
+
+post-patch:
+ @${CP} ${FILESDIR}/same.1 ${WRKSRC}
+
+.include "../../mk/bsd.pkg.mk"
diff --git a/sysutils/same/PLIST b/sysutils/same/PLIST
new file mode 100644
index 00000000000..9bc7478c798
--- /dev/null
+++ b/sysutils/same/PLIST
@@ -0,0 +1,3 @@
+@comment $NetBSD: PLIST,v 1.1.1.1 2004/04/17 13:21:48 minskim Exp $
+bin/same
+man/man1/same.1
diff --git a/sysutils/same/distinfo b/sysutils/same/distinfo
new file mode 100644
index 00000000000..782cee7cdfd
--- /dev/null
+++ b/sysutils/same/distinfo
@@ -0,0 +1,5 @@
+$NetBSD: distinfo,v 1.1.1.1 2004/04/17 13:21:48 minskim Exp $
+
+SHA1 (same-1.3.tar.gz) = 0638d10e53c022470e0ef724679f2c476607ebb2
+Size (same-1.3.tar.gz) = 9119 bytes
+SHA1 (patch-ab) = 7df4a5ee747bf542911c5c2f7208494c68ce9561
diff --git a/sysutils/same/files/Makefile b/sysutils/same/files/Makefile
new file mode 100644
index 00000000000..0cdc53fba11
--- /dev/null
+++ b/sysutils/same/files/Makefile
@@ -0,0 +1,15 @@
+same_OBJECTS= same.o crc32.o
+
+all: same
+
+install: all
+ ${BSD_INSTALL_PROGRAM} same ${PREFIX}/bin/same
+ ${BSD_INSTALL_MAN} same.1 ${PREFIX}/man/man1/same.1
+
+.c.o:
+ ${CC} ${CPPFLAGS} ${CFLAGS} -c $< -o $@
+
+same: ${same_OBJECTS}
+ ${CC} ${LDFLAGS} $(same_OBJECTS) ${LIBS} -o $@
+
+.PHONY: all install
diff --git a/sysutils/same/files/same.1 b/sysutils/same/files/same.1
new file mode 100644
index 00000000000..00a358dd9a6
--- /dev/null
+++ b/sysutils/same/files/same.1
@@ -0,0 +1,114 @@
+.TH same 1
+
+.SH NAME
+same \- find identical files and link them to save disk space.
+
+.SH SYNOPSIS
+
+\fBsame\fR [-d | --debug] [-hs \fIn\fR | --hashstart \fIn\fR]
+[-n | --dryrun] [-s] [-t | --timings] [-v | --verbose]
+[-z --nullfiles]
+
+.SH OPTIONS
+
+.TP
+\fB\-d\fR, \fB\-\-debug\fR
+Switch on debugging messages from the program.
+
+.TP
+\fB\-hs\fR \fIn\fR, \fB--hashstart\fR \fIn\fR
+Set the start value of the hash function.
+
+.TP
+\fB\-n\fR, \fB\-\-dryrun\fR
+Do not modify any file on the disk.
+
+.TP
+\fB\-s\fR
+Create symbolic links instead of hard links.
+
+.TP
+\fB\-t\fR, \fB\-\-timings\fR
+At the end of the program, output the time needed.
+
+.TP
+\fB\-v\fR, \fB\-\-verbose\fR
+Output some messages what is done.
+
+.TP
+\fB\-z\fR, \fB\-\-nullfiles\fR
+Even create links for empty files. Normally these files are ignored.
+
+
+.SH INTRODUCTION
+
+This program takes a list of files (e.g. the output of \fBfind . -type f\fR)
+on stdin. Each of the files is compared against each of the
+others. Whenever two files are found that match exactly, the two files
+are linked (soft or hard) together.
+
+
+.SH GOAL
+
+The goal of this program is to conserve disk space when you have
+several different trees of large project on your disk. By creating
+hardlinks or softlinks between the files that are the same, you can
+save lots of disk space. For example, two different versions of the
+Linux kernel only differ in a small number of files. By running this
+program you only need to store the contents of those files once.
+This is especially useful if you have different versions of complete
+trees lying around.
+
+.SH IMPLEMENTATION
+
+The filesize of every file is used as an indication of wether two
+files can be the same. Whenever the filesizes match, the hashes of
+these two files are compared. Whenever these match, the file
+contents are compared. For every matching pair one of the two
+files is replaced by a hard link to the other file.
+With the \fB-s\fR option a softlink is used.
+
+To allow you to do this incrementally, the "rm" is done on the file
+with the least links. This allows you to "merge" a new tree with
+several trees that have already been processed. The new tree has
+link count 1, while the old tree has a higher link count for those
+files that are likely candidates for linkage.
+
+The current implementation keeps the "first" incantation of a file,
+and replaces further occurrances of the same file. This is
+significant when using softlinks.
+
+.SH EXAMPLE
+
+.TP
+\fBfind . -type f | same\fR
+
+This links all files together under the current directory that are
+the same.
+
+.SH BUGS
+
+.IP \(bu
+Make sure that you have all the permissions required for
+execution of the commands.
+
+.IP \(bu
+RCS probably allows you to do similar things.
+
+.IP \(bu
+If your editor does not move the original aside before writing a
+new copy, you will change the file in ALL incarnations when
+editing a file. Patch works just fine: it moves the original
+aside before creating a new copy. I'm confident that I could
+learn Emacs to do it this way too. I'm too lazy to figure it out,
+so if you happen to know an easy way how to do this, please Email
+me at R.E.Wolff@BitWizard.nl
+
+.IP \(bu
+There is a 1024 character limit to pathnames when using symlinks.
+
+.SH AUTHOR
+
+This manpage was written by Roland Illig <roland.illig@gmx.de> for the
+pkgsrc distribution. Some sections are taken from the source code of
+`same'.
diff --git a/sysutils/same/patches/patch-ab b/sysutils/same/patches/patch-ab
new file mode 100644
index 00000000000..461ec92bdf2
--- /dev/null
+++ b/sysutils/same/patches/patch-ab
@@ -0,0 +1,133 @@
+$NetBSD: patch-ab,v 1.1.1.1 2004/04/17 13:21:48 minskim Exp $
+
+--- same.c.orig 2000-10-16 10:04:15.000000000 +0200
++++ same.c 2004-01-31 20:14:29.000000000 +0100
+@@ -143,6 +143,9 @@
+ dev_t device;
+ ino_t inode;
+ nlink_t nlink;
++ mode_t mode;
++ uid_t owner;
++ gid_t group;
+ long crc;
+ struct inode_entry *next;
+ struct name_entry *names;
+@@ -254,11 +257,19 @@
+ fprintf(stderr, "%-60s: %10.2f s\n", "*** Total execution time ***",
+ (double)total_time/CLK_TCK);
+ fputs("Statistics:\n", stderr);
+- fprintf(stderr, " Merged %lu hard links\n", stat_merge);
+- fprintf(stderr, " Calculated %lu CRCs\n", stat_crc);
+- fprintf(stderr, " Compared %lu files\n", stat_cmp);
+- fprintf(stderr, " Linked %lu names for %lu identical files\n",
+- stat_link_name, stat_link_inode);
++ if (o_dryrun) {
++ fprintf(stderr, " Would have merged %lu hard links\n", stat_merge);
++ fprintf(stderr, " Calculated %lu CRCs\n", stat_crc);
++ fprintf(stderr, " Compared %lu files\n", stat_cmp);
++ fprintf(stderr, " Would have linked %lu names for %lu identical files\n",
++ stat_link_name, stat_link_inode);
++ } else {
++ fprintf(stderr, " Merged %lu hard links\n", stat_merge);
++ fprintf(stderr, " Calculated %lu CRCs\n", stat_crc);
++ fprintf(stderr, " Compared %lu files\n", stat_cmp);
++ fprintf(stderr, " Linked %lu names for %lu identical files\n",
++ stat_link_name, stat_link_inode);
++ }
+ }
+
+ static void progress(int percent)
+@@ -292,10 +303,10 @@
+ {
+ struct name_entry *names;
+
+- printf("%sentry %p size %ld device %lx inode %lx nlink %d crc %08lx\n",
++ printf("%sentry %p size %ld device %lx inode %lx nlink %d mode %06o owner %d group %d crc %08lx\n",
+ indent, entry, (unsigned long)entry->size,
+ (unsigned long)entry->device, (unsigned long)entry->inode,
+- entry->nlink, entry->crc);
++ entry->nlink, entry->mode, entry->owner, entry->group, entry->crc);
+ for (names = entry->names; names; names = names->next)
+ printf("%s %s\n", indent, names->name);
+ }
+@@ -435,6 +446,9 @@
+ entry1 = entry0->next;
+ while (!stop && entry1) {
+ if ((entry0->size == entry1->size) &&
++ (entry0->mode == entry1->mode) &&
++ (entry0->owner == entry1->owner) &&
++ (entry0->group == entry1->group) &&
+ (entry0->size != 0 || o_nullfiles)) {
+ if (entry0->crc == -1)
+ calc_crc(entry0);
+@@ -474,10 +488,16 @@
+ {
+ int res = 0;
+
+- if (o_debug > 0)
+- printf("unlink %s\n", name);
+- if (!o_dryrun && ((res = unlink(name)) == -1))
++ if (o_debug > 0) {
++ if (o_dryrun) {
++ printf("would unlink %s\n", name);
++ } else {
++ printf("unlink %s\n", name);
++ }
++ }
++ if (!o_dryrun && ((res = unlink(name)) == -1)) {
+ fprintf(stderr, "unlink %s: %s\n", name, strerror(errno));
++ }
+ return res;
+ }
+
+@@ -485,8 +505,13 @@
+ {
+ int res = 0;
+
+- if (o_debug > 0)
+- printf("link %s %s\n", master, slave);
++ if (o_debug > 0) {
++ if (o_dryrun) {
++ printf("would link %s %s\n", master, slave);
++ } else {
++ printf("link %s %s\n", master, slave);
++ }
++ }
+ if (!o_dryrun && ((res = link(master, slave)) == -1))
+ fprintf(stderr, "link %s %s: %s\n", master, slave, strerror(errno));
+ return res;
+@@ -496,8 +521,13 @@
+ {
+ int res = 0;
+
+- if (o_debug > 0)
+- printf("symlink %s %s\n", master, slave);
++ if (o_debug > 0) {
++ if (o_dryrun) {
++ printf("would symlink %s %s\n", master, slave);
++ } else {
++ printf("symlink %s %s\n", master, slave);
++ }
++ }
+ if (!o_dryrun && ((res = symlink(master, slave)) == -1))
+ fprintf(stderr, "symlink %s %s: %s\n", master, slave, strerror(errno));
+ return res;
+@@ -618,7 +648,7 @@
+ return NULL;
+ buf[strlen(buf)-1] = '\0';
+ if (lstat(buf, &sb) < 0) {
+- fprintf(stderr, "stat %s: %s", buf, strerror(errno));
++ fprintf(stderr, "stat %s: %s\n", buf, strerror(errno));
+ exit (1);
+ }
+ } while (!S_ISREG(sb.st_mode));
+@@ -630,6 +660,9 @@
+ entry->device = sb.st_dev;
+ entry->inode = sb.st_ino;
+ entry->nlink = sb.st_nlink;
++ entry->mode = sb.st_mode;
++ entry->owner = sb.st_uid;
++ entry->group = sb.st_gid;
+ entry->crc = -1;
+
+ return entry;