summaryrefslogtreecommitdiff
path: root/3rd-party/dirsplit
diff options
context:
space:
mode:
Diffstat (limited to '3rd-party/dirsplit')
-rw-r--r--3rd-party/dirsplit/CMakeLists.txt3
-rw-r--r--3rd-party/dirsplit/ChangeLog27
-rw-r--r--3rd-party/dirsplit/README7
-rwxr-xr-x3rd-party/dirsplit/dirsplit611
-rw-r--r--3rd-party/dirsplit/dirsplit.127
5 files changed, 675 insertions, 0 deletions
diff --git a/3rd-party/dirsplit/CMakeLists.txt b/3rd-party/dirsplit/CMakeLists.txt
new file mode 100644
index 0000000..497774c
--- /dev/null
+++ b/3rd-party/dirsplit/CMakeLists.txt
@@ -0,0 +1,3 @@
+PROJECT (DIRSPLIT C)
+INSTALL(PROGRAMS dirsplit DESTINATION bin)
+INSTALL(FILES dirsplit.1 DESTINATION share/man/man1)
diff --git a/3rd-party/dirsplit/ChangeLog b/3rd-party/dirsplit/ChangeLog
new file mode 100644
index 0000000..b5cd780
--- /dev/null
+++ b/3rd-party/dirsplit/ChangeLog
@@ -0,0 +1,27 @@
+*0.3.3: 2006/03
+
+* code refactoring, much more readable now
+* dropped the "du" exploration mode
+* dropped the read-from-file mode and input "correct" cludge
+ * Use the new -T option to specify arbitrary input, and avoid dupes.
+ * Use the new option -F to follow symlinks
+* option name fixes
+* new option simple/stupid mode
+ * no space-efficiency optimisation, instead trying to store in alphabetic
+ order
+
+0.3.2:
+
+* cosmetic, correct usage and long help
+
+0.3.1:
+
+* proper fix for the = filenames
+
+0.3:
+
+* rewrite of some data input code, now using internal directory scanning
+(exploration) go get data, not trusting du -a crap
+* estimative calculation for filesystem overhead for directory entries
+* workaround for = in filenames
+* much, much more
diff --git a/3rd-party/dirsplit/README b/3rd-party/dirsplit/README
new file mode 100644
index 0000000..3867a12
--- /dev/null
+++ b/3rd-party/dirsplit/README
@@ -0,0 +1,7 @@
+Package: dirsplit
+License: GPLv2 (until I change my mind)
+Purpose: put files from a directory structure in subdirectories of specified
+size or create catalogues for mkisofs so that generated volumes waste as few
+space as possible
+Algrorithm: randomising FirstFit or BestFit
+Usage: Selfexplaining
diff --git a/3rd-party/dirsplit/dirsplit b/3rd-party/dirsplit/dirsplit
new file mode 100755
index 0000000..1348976
--- /dev/null
+++ b/3rd-party/dirsplit/dirsplit
@@ -0,0 +1,611 @@
+#!/usr/bin/perl
+# -*- Mode: Perl -*-
+# dirsplit ---
+# Author : Eduard Bloch ( blade@debian.org )
+# Last Modified On : Sun, 06 Feb 2005 14:59:51 +0100
+# Status : Working, but use with caution!
+# License: GPLv2
+
+my $version="0.3.3";
+
+require v5.8.1;
+use strict;
+use List::Util 'shuffle';
+use Getopt::Long qw(:config no_ignore_case bundling);
+use File::Basename;
+use File::Path;
+use Cwd 'abs_path';
+
+my $ret=0;
+my $max="4488M";
+my $prefix="vol_";
+my $acc=20;
+my $emode=1;
+my $bsize=2048;
+my $ofac =50;
+my $opt_help;
+my $opt_longhelp;
+my $opt_sim;
+my $opt_dir;
+my $opt_flat;
+my $opt_move;
+my $opt_ver;
+my $opt_sln;
+my $opt_ln;
+my $opt_filter;
+my $opt_simple;
+my $opt_follow;
+my $get_ver;
+my $opt_listfile;
+
+
+my %options = (
+ "h|help" => \$opt_help,
+ "d|dirhier" => \$opt_dir,
+ "flat" => \$opt_flat,
+ "f|filter=s" => \$opt_filter,
+ "F|follow" => \$opt_follow,
+ "e|expmode=i" => \$emode,
+ "o|overhead=i" => \$ofac,
+ "b|blksize=i" => \$bsize,
+ "n|no-act" => \$opt_sim,
+ "m|move" => \$opt_move,
+ "l|symlink" => \$opt_sln,
+ "L|hardlink" => \$opt_ln,
+ "v|verbose" => \$opt_ver,
+ "s|size=s" => \$max,
+ "S|simple" => \$opt_simple,
+ "T|input=s" => \$opt_listfile,
+ "p|prefix=s" => \$prefix,
+ "a|accuracy=i" => \$acc,
+ "H|longhelp" => \$opt_longhelp,
+ "version" => \$get_ver
+);
+
+&show_help(1) unless ( GetOptions(%options));
+&show_help(1) if $opt_help;
+&show_longhelp if $opt_longhelp;
+if($get_ver) {
+ print $version;
+ exit 0;
+}
+
+# ignore the old dirhier setting since it is default now and disable the flag when opt_flat is specified
+$opt_dir = !$opt_flat;
+
+$opt_ver = 1 if $opt_sim;
+$opt_move=1 if ($opt_sln || $opt_ln);
+
+# big list @sizes containing the "items" (object sizes)
+# %names hash mapping "items" (size as key) to arrays with filenames/subarrays for coalesced files
+my @sizes;
+my %names;
+
+# result containts the calculated output. In simple mode, an
+# array (bins) of atoms (files or filelists). Otherwise, sizes
+# instead of atoms, to be resolved with %names.
+my @result;
+
+my $inputdir;
+
+$max=fixnr($max);
+# about 400kB for iso headers
+$max-=420000;
+
+# init default value
+my $globwaste=0;
+
+
+if(-d $ARGV[0] || (-d readlink($ARGV[0]))) {
+ syswrite(STDOUT,"Building file list, please wait...\n");
+ # save the absolut path before doing anyhting
+ $inputdir=Cwd::abs_path($ARGV[0]);
+ &explore($inputdir);
+}
+elsif($opt_listfile) {
+ if($opt_listfile eq "-") {
+ &parseListe(\*STDIN);
+ }
+ else {
+ open(my $in, "<", $opt_listfile) || die "Cannot open list file $opt_listfile\n";
+ &parseListe($in);
+ }
+}
+else {
+ die "Error: please specify a directory\n";
+}
+
+# check for pointless requests
+my $testsize=0;
+for(@sizes) {
+ die "Too large object(s) ($_) for the given max size: @{$names{$_}} (maybe coalesced in arrays, check manually)\n" if($_>$max);
+
+ $testsize+=$_;
+}
+
+$acc=1 if ($testsize <= $max); # just generate a list, more trials are pointless
+print "\nSumm: $testsize\n" if($opt_ver);
+die "Nothing to do!\n" if($testsize<4096); # looks like just an empty dir
+
+if(!$opt_simple) {
+ syswrite(STDOUT, "Calculating, please wait...\n");
+ my $starttime=time;
+ $globwaste=$max*@sizes;
+ for(1..$acc) {
+ syswrite(STDOUT,".");
+ my @tmp;
+ #my $waste = bp_bestfit($max, \@in, \@tmp);
+ my $waste = bp_firstfit($max, \@sizes, \@tmp);
+ #print "D: waste - $waste\n";
+ if($waste < $globwaste) {
+ $globwaste=$waste;
+ @result=@tmp;
+ }
+ if($starttime && time > $starttime+10) {
+ syswrite(STDOUT,"\nSpent already over 10s (for $_ iterations)\nHint: reduce accuracy to make it faster!\n");
+ undef $starttime;
+ }
+ @sizes=shuffle(@sizes);
+ }
+
+}
+
+print "\nCalculated, using ".(scalar @result)." volumes.\n";
+print "Wasted: $globwaste Byte (estimated, check mkisofs -print-size ...)\n";
+
+# and the real work
+my $i=0;
+my $inDirLen=length($inputdir);
+for(@result) {
+ $i++;
+ my $o;
+ open($o, ">$prefix$i.list") if(! ($opt_move || $opt_sim));
+ my $dirPrefix=dirname($prefix);
+ my $prefixBase=basename($prefix);
+ my $dirPrefixAbs=Cwd::abs_path($dirPrefix);
+
+ for(@{$_}) {
+ my $stuffRef;
+
+ # For simple mode, the files/atoms are already resolved, otherwise take
+ # the next with appropriate size.
+ my $item= $opt_simple ? $_ : shift(@{$names{$_}});
+
+ # make reference point to an array with our files, create a list if needed
+ if(ref($item) eq "ARRAY") {
+ $stuffRef=$item;
+ }
+ else {
+ $stuffRef=[$item];
+ }
+
+ for my $file (@$stuffRef) {
+ my $relFile=substr($file,$inDirLen+1);
+ my $base=basename($relFile);
+ if($opt_move) {
+ my $targetsubdir = $dirPrefixAbs."/$prefixBase$i";
+ $targetsubdir .= "/".dirname($relFile) if($opt_dir);
+ print "$file -> $targetsubdir/$base\n" if($opt_ver);
+ if(!$opt_sim) {
+ mkpath $targetsubdir || die "Problems creating $targetsubdir\n";
+ # last check
+ die "Could not create $targetsubdir?\n" if(!(-d $targetsubdir && -w $targetsubdir));
+ if($opt_sln) {
+ symlink($file, "$targetsubdir/$base");
+ }
+ elsif($opt_ln) {
+ if(-d $file && !-l $file) {
+ mkdir "$targetsubdir/$base";
+ }
+ else {
+ link($file, "$targetsubdir/$base");
+ }
+ }
+ else {
+ rename($file, "$targetsubdir/$base");
+ }
+ }
+ }
+ else {
+ # escape = in mkisofs catalogs, they are used as separator
+ my $isoname = ($opt_dir?$relFile : $base);
+ $isoname=~s/=/\\=/g;
+ my $sourcefile=$file;
+ $sourcefile=~s/=/\\=/g;
+ print "$i: /$isoname=$sourcefile\n" if $opt_ver;
+ print $o "/$isoname=$sourcefile\n" if(!$opt_sim);
+ }
+ }
+ }
+ close($o) if($o);
+}
+
+exit $ret;
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+# recursive function
+# parameter: directory
+# mode 1: descend as far as possible and index all non-directories
+# mode 2++:
+# put all files of a dir into coaleseced-object, then descend into each dir
+sub explore {
+ (my $dir) = @_;
+ my @stuff;
+ my @dirs;
+ my @files;
+
+ opendir(DIR, $dir) || die "Could not open $dir\n";
+ @stuff=readdir(DIR);
+
+ if($opt_simple) {
+ @stuff=sort { lc($a) cmp lc($b) } @stuff;
+ }
+
+ foreach my $f (@stuff) {
+ next if ($f eq "." || $f eq "..");
+ #print "\$f=$opt_filter;\n";
+
+ $f="$dir/$f" if($dir ne ".");
+
+ if ($opt_filter) {
+ next unless (eval("\$f=~$opt_filter;"));
+ }
+
+ if(-l $f && ! $opt_follow) {
+ push(@files, $f);
+ }
+ elsif(-d $f) {
+ push(@dirs, $f);
+ }
+ else {
+ push(@files, $f);
+ }
+ }
+ closedir(DIR);
+
+ if( (@dirs + @files) == 0 ) {
+ # this one is empty, register for cosmetics reason
+ &insitem(getsize($dir), $dir);
+ return;
+ }
+
+ # recurse on directories
+ &explore($_) for(@dirs);
+
+ # and now process files
+ if($emode==1) {
+ &insitem(getsize($_), $_) for(@files);
+ }
+ else {
+ # handle coalesced objects - first some sanity checks and splitting if
+ # required
+
+ my $filesum=0;
+ for(@files) {
+ my $tmp=getsize($_);
+ if($tmp>$max) {
+ # already too large, stop right here
+ die "Too large file ($_) for the given max size $max, aborting...\n";
+ }
+ $filesum += $tmp;
+ };
+
+ # handle coal. objects becoming too large
+ if($filesum>$max) {
+ # too large coal. object...
+ if($emode==3) {
+ # don't coalesc in this mode, do like mode 1 above, leave them alone
+ &insitem(getsize($_), $_) for(@files);
+ return;
+ }
+ # a bit complicated, split file set while creating coal.objects
+ if($emode==4) {
+ my $partsum=0;
+ my @sorted=sort(@files);
+ my @tmpvol;
+ for(my $i=0;$i<=$#sorted;$i++) {
+# print "D: i: $i, partsum: $partsum, file: $sorted[$i]\n";
+ my $tmp=getsize($sorted[$i]);
+ $partsum+=$tmp;
+ if($partsum>$max) {
+ # undo the last step then build the coal.object
+ $partsum-=$tmp;
+ $i--;
+
+ &insitem($partsum, \@tmpvol);
+ # reset temporaries
+ undef @tmpvol;
+ undef $partsum;
+ }
+ else {
+ push(@tmpvol, $sorted[$i]);
+ }
+ }
+ return;
+ }
+ }
+
+ # ok, building a coalesced object for simple cases
+ if($filesum) {
+ &insitem($filesum, \@files);
+ }
+ }
+}
+
+my $simplePos=0;
+my @simpleBinSizes;
+
+# args: size, object (filename or list reference)
+sub insitem {
+ my ($size, $object) = @_;
+ # normaly, put the items into the pool for calculation. In simple mode, calculate here
+
+ push(@sizes, $size);
+ push(@{$names{$size}},$object);
+
+ if($opt_simple) {
+ # now the simplest method to fill the bins, just take a new one when the
+ # object-to-be-added no longer fits
+ if($simpleBinSizes[$simplePos]+$size > $max) {
+ $globwaste += ( $max-$simpleBinSizes[$simplePos] );
+ $simplePos++;
+ };
+ $simpleBinSizes[$simplePos]+=$size;
+ push( @{$result[$simplePos]}, $object);
+ }
+
+}
+
+sub getsize {
+ (my $file) = @_;
+ my $size = ((stat($file))[7]);
+ my $rest = ($size % $bsize);
+ $size = ($size + $bsize - $rest) if ($rest);
+ return 1+int(200 + $ofac*length(basename($file)) + $size);
+}
+
+sub parseListe {
+ my $fh=${$_[0]};
+ while(<$fh>) {
+ if(/^(\w+)\s+(.+)/) {
+ &insitem(fixnr($1), $2);
+ }
+ }
+}
+
+sub fixnr {
+ # args:
+ # Number
+ # optional: default multiplier
+ my $fac;
+ my $nr;
+ if($_[0]=~/(\d+)(\D)/) {
+ $nr=$1;
+ $fac=$2;
+ }
+ elsif(defined($_[1])) {
+ $nr=$_[0];
+ $fac=$_[1];
+ }
+ else {
+ return $_[0];
+ }
+ return $nr*1000000000 if($fac eq "g");
+ return $nr*1073741824 if($fac eq "G");
+ return $nr*1000000 if($fac eq "m");
+ return $nr*1048576 if($fac eq "M");
+ return $nr*1000 if($fac eq "k");
+ return $nr*1024 if($fac eq "K");
+ return $nr if($fac eq "b");
+ die "$fac is not a valid multiplier!";
+}
+
+
+sub show_help {
+ print <<EOM
+dirsplit [options] [advanced options] < directory >
+
+ -H|--longhelp Show the long help message with more advanced options
+ -n|--no-act Only print the commands, no action (implies -v)
+ -s|--size NUMBER - Size of the medium (default: $max)
+ -e|--expmode NUMBER - directory exploration mode (recommended, see long help)
+ -m|--move Move files to target dirs (default: create mkisofs catalogs)
+ -p|--prefix STRING - first part of catalog/directory name (default: vol_)
+ -h|--help Show this option summary
+ -v|--verbose More verbosity
+
+The complete help can be displayed with the --longhelp (-H) option.
+The default mode is creating file catalogs useable with:
+ mkisofs -D -r --joliet-long -graft-points -path-list CATALOG
+
+Example:
+dirsplit -m -s 700M -e2 random_data_to_backup/
+EOM
+ ;
+ exit shift;
+}
+
+sub show_longhelp {
+ my $msglong="
+dirsplit [options] [advanced options] < directory >
+ -n|--no-act Only print the commands, no action (implies -v)
+ -s|--size NUMBER - Size of the medium (default: $max)
+ -m|--move Move files to target dirs (default: create mkisofs catalogs)
+ -l|--symlink similar to -m but just creates symlinks in the target dirs
+ -L|--hardlink like -l but creates hardlinks
+ -p|--prefix STRING - first part of catalog/directory name (default: vol_)
+ -f|--filter EXPR - Filter expression, see examples below and perlre manpage
+ --flat Flat dir mode, don't recreate subdirectory structure (not recommended)
+ -e|--expmode NUMBER, special exploration modes, used with directory argument
+
+ 1: (default) native exploration of the specified directory, but file sizes
+ are rounded up to 2048 blocks plus estimated overhead for
+ filenames (see -o option)
+ 2: like 1, but all files in directory are put together (as \"atom\") onto the
+ same medium. This does not apply to subdirectories, however.
+ 3: like 2, but don't coalesc files when the size of the \"atom\" becomes too
+ large for the medium size (currently $max)
+ 4: like 2, but the max. size of the atoms is limited to $max (storing the
+ rest on another medium)
+
+ -F|--follow Follow symlinks. Use with care!
+ -b|--blksize NUMBER, block size of the target filesystem (currently $bsize).
+ -o|--overhead NUMBER, overhead caused by directory entries (as factor for the
+ filename length, default: 50, empiricaly found for Joliet+RR
+ with not-so-deep directory structure). Works in exploration
+ mode.
+ -a|--accuracy NUMBER (1=faster, large number=better efficiency, default: 500)
+ -S|--simple Simple/stupid/alphabetic mode
+ -T|--input FILENAME (or - for STDIN): List with sizes and paths, try:
+ find dir -type f -printf \"%s %p\n\"
+ to get an example. Avoid duplicates! Unit suffixes are allowed.
+ -h|--help Show this option summary
+ -v|--verbose More verbosity
+
+File sizes are expected to be in bytes, append modifier letters to multiply
+with a factor, eg 200M (b,k,K,m,M,g,G for Bytes, Kb, KiB, Mb, MiB, Gb, GiB).
+The default output mode is creating file catalogs useable with
+ mkisofs -D -r --joliet-long -graft-points -path-list CATALOG
+
+Examples:
+dirsplit -m -s 120M -e4 largedirwithdata/ -p /zipmedia/backup_ #move stuff into splitted backup dirs
+dirsplit -s 700M -e2 music/ # make mkisofs catalogs to burn all music to 700M CDRs, keep single files in each dir together
+dirsplit -s 700M -e2 -f '/other\\/Soundtracks/' music/ # like above, only take files from other/Soundtracks
+dirsplit -s 700M -e2 -f '!/Thumbs.db|Desktop.ini|\\.m3u\$/i' # like above, ignore some junk files and playlists, both letter cases
+
+Bugs: overhead trough blocksize alignment and directory entry storage varies,
+heavily depends on the target filesystem and configuration (see -b and -o).
+
+You should compare the required size of the created catalogs, eg.:
+for x in *list ; do mkisofs -quiet -D -r --joliet-long -graft-points \\
+ -path-list \$x -print-size; done
+(output in blocks of 2048 bytes) with the expected size (-s) and media data
+(cdrecord -v -toc ...).
+";
+ print $msglong;
+ exit 0;
+}
+
+# Parms: bin size (int), input array (arr reference), output array (arr reference)
+# Returns: wasted space (int)
+sub bp_bestfit {
+ my $max=$_[0];
+ my @in = @{$_[1]};
+ my $target = $_[2];
+ my @out;
+ my @bel;
+
+ my @tmp;
+ push(@tmp,$in[0]);
+ push(@out, \@tmp);
+ $bel[0] = $in[0];
+ shift @in;
+
+ for(@in) {
+ my $bestplace=$#out+1;
+ my $bestwert=$max;
+ for($i=0;$i<=$#out;$i++) {
+ my $rest;
+ $rest=$max-$bel[$i]-$_;
+ if($rest>0 && $rest < $bestwert) {
+ $bestplace=$i;
+ $bestwert=$rest;
+ };
+ }
+ if($bestplace>$#out) {
+ my @bin;
+ $bel[$bestplace]=$_;
+ push(@bin, $_);
+ push(@out,\@bin);
+ }
+ else{
+ $bel[$bestplace]+=$_;
+ push( @{$out[$bestplace]} , $_);
+ }
+ }
+ my $ret=0;
+ # count all rests but the last one
+ for($i=0;$i<$#out;$i++) {
+ $ret+=($max-$bel[$i]);
+ }
+ @{$target} = @out;
+ return $ret;
+}
+
+# Parms: bin size (int), input array (arr reference), output array (arr reference)
+# Returns: wasted space (int)
+sub bp_firstfit {
+ my $max=$_[0];
+ my @in = @{$_[1]};
+ my $target = $_[2];
+ my @out;
+ my @bel;
+
+ piece: foreach my $obj (@in) {
+ # first fit, use the first bin with enough free space
+ # print "F: bin$i: $obj, @{$names{$obj}}\n";
+ for($i=0;$i<=$#out;$i++) {
+ my $newsize=($bel[$i]+$obj);
+# print "bel[i]: $bel[$i], new?: $newsize to max: $max\n";
+ if( $newsize <= $max ) {
+# print "F: bin$i: $bel[$i]+$obj=$newsize\n";
+ #fits here
+ $bel[$i]=$newsize;
+ push( @{$out[$i]} , $obj);
+ next piece; # break
+ }
+ }
+ # neues Bin
+ my @bin;
+ $bel[$i]=$obj;
+# print "N: bin$i: $bel[$i]=$obj\n";
+ push(@bin, $obj);
+ push(@out,\@bin);
+ }
+ my $ret=0;
+ # sum up all rests except of the one from the last bin
+ for($i=0;$i<$#out;$i++) {
+# print "hm, bel $i ist :".$bel[$i]." und res:".($max-$bel[$i])."\n";
+ $ret+=($max-$bel[$i]);
+ }
+ @{$target} = @out;
+# print "wtf, ".join(",", @{$out[0]})."\n";
+ return $ret;
+}
diff --git a/3rd-party/dirsplit/dirsplit.1 b/3rd-party/dirsplit/dirsplit.1
new file mode 100644
index 0000000..76fdfa5
--- /dev/null
+++ b/3rd-party/dirsplit/dirsplit.1
@@ -0,0 +1,27 @@
+.\" DO NOT MODIFY THIS FILE! It was generated by help2man 1.33.
+.TH DIRSPLIT "1" "March 2004" "dirsplit 0.3" "User Commands"
+.SH NAME
+dirsplit \- splits directory into multiple with equal size
+.SH SYNOPSIS
+dirsplit \fB[options]\fR < directory | content-list-file >
+.SH DESCRIPTION
+displit is designed to for a simple purpose: convert a directory with
+many multiple files (which are all smaller than a certain medium, eg.
+DVD) and "splits" it into "volumes", looking for the optimal order to
+get the best space/medium-number efficiency.
+.P
+The actual action is either adding the files to mkisofs catalogs or real
+moving of files into new directories (or creating links/symlinks).
+The method is not limited to files, whole directories can also be handled this
+way (see various filesystem exploration modes).
+.SH OPTIONS
+.TP
+Run dirsplit \fB\-h\fR to get the basic usage info.
+.TP
+Run dirsplit \fB\-H\fR to get the whole option overview and description.
+.SH EXAMPLES
+Run dirsplit \fB\-H\fR to see the commented examples.
+.SH AUTHOR
+\fBdirsplit\fR is created by Eduard Bloch (blade@debian.org) and is licensed
+under the GPLv2.
+