From f1fc027f963391e3e0ba17ecccd43dcf96fa0fac Mon Sep 17 00:00:00 2001 From: minskim Date: Sat, 17 Apr 2004 13:21:48 +0000 Subject: Import `same' from pkgsrc-wip. Packaged by Roland Illig. The `same' utility looks for identical files and links them together using either hard links or symbolic links, saving disk space. Changes to the original version: * the file ownership and permissions are checked. (The original version checked only the file size) * the messages in the "dry-run" mode were suggesting that anything on the disk would be modified. They have been made clear. --- sysutils/same/DESCR | 8 +++ sysutils/same/Makefile | 20 +++++++ sysutils/same/PLIST | 3 + sysutils/same/distinfo | 5 ++ sysutils/same/files/Makefile | 15 +++++ sysutils/same/files/same.1 | 114 +++++++++++++++++++++++++++++++++++ sysutils/same/patches/patch-ab | 133 +++++++++++++++++++++++++++++++++++++++++ 7 files changed, 298 insertions(+) create mode 100644 sysutils/same/DESCR create mode 100644 sysutils/same/Makefile create mode 100644 sysutils/same/PLIST create mode 100644 sysutils/same/distinfo create mode 100644 sysutils/same/files/Makefile create mode 100644 sysutils/same/files/same.1 create mode 100644 sysutils/same/patches/patch-ab (limited to 'sysutils') diff --git a/sysutils/same/DESCR b/sysutils/same/DESCR new file mode 100644 index 00000000000..4c2c7924944 --- /dev/null +++ b/sysutils/same/DESCR @@ -0,0 +1,8 @@ +The `same' utility looks for identical files and links them together +using either hard links or symbolic links, saving disk space. + +Changes to the original version: +* the file ownership and permissions are checked. (The original + version checked only the file size) +* the messages in the "dry-run" mode were suggesting that anything on + the disk would be modified. They have been made clear. diff --git a/sysutils/same/Makefile b/sysutils/same/Makefile new file mode 100644 index 00000000000..68a237dea63 --- /dev/null +++ b/sysutils/same/Makefile @@ -0,0 +1,20 @@ +# $NetBSD: Makefile,v 1.1.1.1 2004/04/17 13:21:48 minskim Exp $ + +DISTNAME= same-1.3 +CATEGORIES= sysutils +MASTER_SITES= ftp://ftp.bitwizard.nl/same/ + +MAINTAINER= roland.illig@gmx.de +HOMEPAGE= http://www.bitwizard.nl/ +COMMENT= Find identical files and link(2) them to save disk space + +PKG_INSTALLATION_TYPES= overwrite pkgviews + +MAKEFILE= ${FILESDIR}/Makefile + +INSTALLATION_DIRS= bin man/man1 + +post-patch: + @${CP} ${FILESDIR}/same.1 ${WRKSRC} + +.include "../../mk/bsd.pkg.mk" diff --git a/sysutils/same/PLIST b/sysutils/same/PLIST new file mode 100644 index 00000000000..9bc7478c798 --- /dev/null +++ b/sysutils/same/PLIST @@ -0,0 +1,3 @@ +@comment $NetBSD: PLIST,v 1.1.1.1 2004/04/17 13:21:48 minskim Exp $ +bin/same +man/man1/same.1 diff --git a/sysutils/same/distinfo b/sysutils/same/distinfo new file mode 100644 index 00000000000..782cee7cdfd --- /dev/null +++ b/sysutils/same/distinfo @@ -0,0 +1,5 @@ +$NetBSD: distinfo,v 1.1.1.1 2004/04/17 13:21:48 minskim Exp $ + +SHA1 (same-1.3.tar.gz) = 0638d10e53c022470e0ef724679f2c476607ebb2 +Size (same-1.3.tar.gz) = 9119 bytes +SHA1 (patch-ab) = 7df4a5ee747bf542911c5c2f7208494c68ce9561 diff --git a/sysutils/same/files/Makefile b/sysutils/same/files/Makefile new file mode 100644 index 00000000000..0cdc53fba11 --- /dev/null +++ b/sysutils/same/files/Makefile @@ -0,0 +1,15 @@ +same_OBJECTS= same.o crc32.o + +all: same + +install: all + ${BSD_INSTALL_PROGRAM} same ${PREFIX}/bin/same + ${BSD_INSTALL_MAN} same.1 ${PREFIX}/man/man1/same.1 + +.c.o: + ${CC} ${CPPFLAGS} ${CFLAGS} -c $< -o $@ + +same: ${same_OBJECTS} + ${CC} ${LDFLAGS} $(same_OBJECTS) ${LIBS} -o $@ + +.PHONY: all install diff --git a/sysutils/same/files/same.1 b/sysutils/same/files/same.1 new file mode 100644 index 00000000000..00a358dd9a6 --- /dev/null +++ b/sysutils/same/files/same.1 @@ -0,0 +1,114 @@ +.TH same 1 + +.SH NAME +same \- find identical files and link them to save disk space. + +.SH SYNOPSIS + +\fBsame\fR [-d | --debug] [-hs \fIn\fR | --hashstart \fIn\fR] +[-n | --dryrun] [-s] [-t | --timings] [-v | --verbose] +[-z --nullfiles] + +.SH OPTIONS + +.TP +\fB\-d\fR, \fB\-\-debug\fR +Switch on debugging messages from the program. + +.TP +\fB\-hs\fR \fIn\fR, \fB--hashstart\fR \fIn\fR +Set the start value of the hash function. + +.TP +\fB\-n\fR, \fB\-\-dryrun\fR +Do not modify any file on the disk. + +.TP +\fB\-s\fR +Create symbolic links instead of hard links. + +.TP +\fB\-t\fR, \fB\-\-timings\fR +At the end of the program, output the time needed. + +.TP +\fB\-v\fR, \fB\-\-verbose\fR +Output some messages what is done. + +.TP +\fB\-z\fR, \fB\-\-nullfiles\fR +Even create links for empty files. Normally these files are ignored. + + +.SH INTRODUCTION + +This program takes a list of files (e.g. the output of \fBfind . -type f\fR) +on stdin. Each of the files is compared against each of the +others. Whenever two files are found that match exactly, the two files +are linked (soft or hard) together. + + +.SH GOAL + +The goal of this program is to conserve disk space when you have +several different trees of large project on your disk. By creating +hardlinks or softlinks between the files that are the same, you can +save lots of disk space. For example, two different versions of the +Linux kernel only differ in a small number of files. By running this +program you only need to store the contents of those files once. +This is especially useful if you have different versions of complete +trees lying around. + +.SH IMPLEMENTATION + +The filesize of every file is used as an indication of wether two +files can be the same. Whenever the filesizes match, the hashes of +these two files are compared. Whenever these match, the file +contents are compared. For every matching pair one of the two +files is replaced by a hard link to the other file. +With the \fB-s\fR option a softlink is used. + +To allow you to do this incrementally, the "rm" is done on the file +with the least links. This allows you to "merge" a new tree with +several trees that have already been processed. The new tree has +link count 1, while the old tree has a higher link count for those +files that are likely candidates for linkage. + +The current implementation keeps the "first" incantation of a file, +and replaces further occurrances of the same file. This is +significant when using softlinks. + +.SH EXAMPLE + +.TP +\fBfind . -type f | same\fR + +This links all files together under the current directory that are +the same. + +.SH BUGS + +.IP \(bu +Make sure that you have all the permissions required for +execution of the commands. + +.IP \(bu +RCS probably allows you to do similar things. + +.IP \(bu +If your editor does not move the original aside before writing a +new copy, you will change the file in ALL incarnations when +editing a file. Patch works just fine: it moves the original +aside before creating a new copy. I'm confident that I could +learn Emacs to do it this way too. I'm too lazy to figure it out, +so if you happen to know an easy way how to do this, please Email +me at R.E.Wolff@BitWizard.nl + +.IP \(bu +There is a 1024 character limit to pathnames when using symlinks. + +.SH AUTHOR + +This manpage was written by Roland Illig for the +pkgsrc distribution. Some sections are taken from the source code of +`same'. diff --git a/sysutils/same/patches/patch-ab b/sysutils/same/patches/patch-ab new file mode 100644 index 00000000000..461ec92bdf2 --- /dev/null +++ b/sysutils/same/patches/patch-ab @@ -0,0 +1,133 @@ +$NetBSD: patch-ab,v 1.1.1.1 2004/04/17 13:21:48 minskim Exp $ + +--- same.c.orig 2000-10-16 10:04:15.000000000 +0200 ++++ same.c 2004-01-31 20:14:29.000000000 +0100 +@@ -143,6 +143,9 @@ + dev_t device; + ino_t inode; + nlink_t nlink; ++ mode_t mode; ++ uid_t owner; ++ gid_t group; + long crc; + struct inode_entry *next; + struct name_entry *names; +@@ -254,11 +257,19 @@ + fprintf(stderr, "%-60s: %10.2f s\n", "*** Total execution time ***", + (double)total_time/CLK_TCK); + fputs("Statistics:\n", stderr); +- fprintf(stderr, " Merged %lu hard links\n", stat_merge); +- fprintf(stderr, " Calculated %lu CRCs\n", stat_crc); +- fprintf(stderr, " Compared %lu files\n", stat_cmp); +- fprintf(stderr, " Linked %lu names for %lu identical files\n", +- stat_link_name, stat_link_inode); ++ if (o_dryrun) { ++ fprintf(stderr, " Would have merged %lu hard links\n", stat_merge); ++ fprintf(stderr, " Calculated %lu CRCs\n", stat_crc); ++ fprintf(stderr, " Compared %lu files\n", stat_cmp); ++ fprintf(stderr, " Would have linked %lu names for %lu identical files\n", ++ stat_link_name, stat_link_inode); ++ } else { ++ fprintf(stderr, " Merged %lu hard links\n", stat_merge); ++ fprintf(stderr, " Calculated %lu CRCs\n", stat_crc); ++ fprintf(stderr, " Compared %lu files\n", stat_cmp); ++ fprintf(stderr, " Linked %lu names for %lu identical files\n", ++ stat_link_name, stat_link_inode); ++ } + } + + static void progress(int percent) +@@ -292,10 +303,10 @@ + { + struct name_entry *names; + +- printf("%sentry %p size %ld device %lx inode %lx nlink %d crc %08lx\n", ++ printf("%sentry %p size %ld device %lx inode %lx nlink %d mode %06o owner %d group %d crc %08lx\n", + indent, entry, (unsigned long)entry->size, + (unsigned long)entry->device, (unsigned long)entry->inode, +- entry->nlink, entry->crc); ++ entry->nlink, entry->mode, entry->owner, entry->group, entry->crc); + for (names = entry->names; names; names = names->next) + printf("%s %s\n", indent, names->name); + } +@@ -435,6 +446,9 @@ + entry1 = entry0->next; + while (!stop && entry1) { + if ((entry0->size == entry1->size) && ++ (entry0->mode == entry1->mode) && ++ (entry0->owner == entry1->owner) && ++ (entry0->group == entry1->group) && + (entry0->size != 0 || o_nullfiles)) { + if (entry0->crc == -1) + calc_crc(entry0); +@@ -474,10 +488,16 @@ + { + int res = 0; + +- if (o_debug > 0) +- printf("unlink %s\n", name); +- if (!o_dryrun && ((res = unlink(name)) == -1)) ++ if (o_debug > 0) { ++ if (o_dryrun) { ++ printf("would unlink %s\n", name); ++ } else { ++ printf("unlink %s\n", name); ++ } ++ } ++ if (!o_dryrun && ((res = unlink(name)) == -1)) { + fprintf(stderr, "unlink %s: %s\n", name, strerror(errno)); ++ } + return res; + } + +@@ -485,8 +505,13 @@ + { + int res = 0; + +- if (o_debug > 0) +- printf("link %s %s\n", master, slave); ++ if (o_debug > 0) { ++ if (o_dryrun) { ++ printf("would link %s %s\n", master, slave); ++ } else { ++ printf("link %s %s\n", master, slave); ++ } ++ } + if (!o_dryrun && ((res = link(master, slave)) == -1)) + fprintf(stderr, "link %s %s: %s\n", master, slave, strerror(errno)); + return res; +@@ -496,8 +521,13 @@ + { + int res = 0; + +- if (o_debug > 0) +- printf("symlink %s %s\n", master, slave); ++ if (o_debug > 0) { ++ if (o_dryrun) { ++ printf("would symlink %s %s\n", master, slave); ++ } else { ++ printf("symlink %s %s\n", master, slave); ++ } ++ } + if (!o_dryrun && ((res = symlink(master, slave)) == -1)) + fprintf(stderr, "symlink %s %s: %s\n", master, slave, strerror(errno)); + return res; +@@ -618,7 +648,7 @@ + return NULL; + buf[strlen(buf)-1] = '\0'; + if (lstat(buf, &sb) < 0) { +- fprintf(stderr, "stat %s: %s", buf, strerror(errno)); ++ fprintf(stderr, "stat %s: %s\n", buf, strerror(errno)); + exit (1); + } + } while (!S_ISREG(sb.st_mode)); +@@ -630,6 +660,9 @@ + entry->device = sb.st_dev; + entry->inode = sb.st_ino; + entry->nlink = sb.st_nlink; ++ entry->mode = sb.st_mode; ++ entry->owner = sb.st_uid; ++ entry->group = sb.st_gid; + entry->crc = -1; + + return entry; -- cgit v1.2.3