diff options
Diffstat (limited to '3rd-party/dirsplit')
-rw-r--r-- | 3rd-party/dirsplit/CMakeLists.txt | 3 | ||||
-rw-r--r-- | 3rd-party/dirsplit/ChangeLog | 27 | ||||
-rw-r--r-- | 3rd-party/dirsplit/README | 7 | ||||
-rwxr-xr-x | 3rd-party/dirsplit/dirsplit | 611 | ||||
-rw-r--r-- | 3rd-party/dirsplit/dirsplit.1 | 27 |
5 files changed, 675 insertions, 0 deletions
diff --git a/3rd-party/dirsplit/CMakeLists.txt b/3rd-party/dirsplit/CMakeLists.txt new file mode 100644 index 0000000..497774c --- /dev/null +++ b/3rd-party/dirsplit/CMakeLists.txt @@ -0,0 +1,3 @@ +PROJECT (DIRSPLIT C) +INSTALL(PROGRAMS dirsplit DESTINATION bin) +INSTALL(FILES dirsplit.1 DESTINATION share/man/man1) diff --git a/3rd-party/dirsplit/ChangeLog b/3rd-party/dirsplit/ChangeLog new file mode 100644 index 0000000..b5cd780 --- /dev/null +++ b/3rd-party/dirsplit/ChangeLog @@ -0,0 +1,27 @@ +*0.3.3: 2006/03 + +* code refactoring, much more readable now +* dropped the "du" exploration mode +* dropped the read-from-file mode and input "correct" cludge + * Use the new -T option to specify arbitrary input, and avoid dupes. + * Use the new option -F to follow symlinks +* option name fixes +* new option simple/stupid mode + * no space-efficiency optimisation, instead trying to store in alphabetic + order + +0.3.2: + +* cosmetic, correct usage and long help + +0.3.1: + +* proper fix for the = filenames + +0.3: + +* rewrite of some data input code, now using internal directory scanning +(exploration) go get data, not trusting du -a crap +* estimative calculation for filesystem overhead for directory entries +* workaround for = in filenames +* much, much more diff --git a/3rd-party/dirsplit/README b/3rd-party/dirsplit/README new file mode 100644 index 0000000..3867a12 --- /dev/null +++ b/3rd-party/dirsplit/README @@ -0,0 +1,7 @@ +Package: dirsplit +License: GPLv2 (until I change my mind) +Purpose: put files from a directory structure in subdirectories of specified +size or create catalogues for mkisofs so that generated volumes waste as few +space as possible +Algrorithm: randomising FirstFit or BestFit +Usage: Selfexplaining diff --git a/3rd-party/dirsplit/dirsplit b/3rd-party/dirsplit/dirsplit new file mode 100755 index 0000000..1348976 --- /dev/null +++ b/3rd-party/dirsplit/dirsplit @@ -0,0 +1,611 @@ +#!/usr/bin/perl +# -*- Mode: Perl -*- +# dirsplit --- +# Author : Eduard Bloch ( blade@debian.org ) +# Last Modified On : Sun, 06 Feb 2005 14:59:51 +0100 +# Status : Working, but use with caution! +# License: GPLv2 + +my $version="0.3.3"; + +require v5.8.1; +use strict; +use List::Util 'shuffle'; +use Getopt::Long qw(:config no_ignore_case bundling); +use File::Basename; +use File::Path; +use Cwd 'abs_path'; + +my $ret=0; +my $max="4488M"; +my $prefix="vol_"; +my $acc=20; +my $emode=1; +my $bsize=2048; +my $ofac =50; +my $opt_help; +my $opt_longhelp; +my $opt_sim; +my $opt_dir; +my $opt_flat; +my $opt_move; +my $opt_ver; +my $opt_sln; +my $opt_ln; +my $opt_filter; +my $opt_simple; +my $opt_follow; +my $get_ver; +my $opt_listfile; + + +my %options = ( + "h|help" => \$opt_help, + "d|dirhier" => \$opt_dir, + "flat" => \$opt_flat, + "f|filter=s" => \$opt_filter, + "F|follow" => \$opt_follow, + "e|expmode=i" => \$emode, + "o|overhead=i" => \$ofac, + "b|blksize=i" => \$bsize, + "n|no-act" => \$opt_sim, + "m|move" => \$opt_move, + "l|symlink" => \$opt_sln, + "L|hardlink" => \$opt_ln, + "v|verbose" => \$opt_ver, + "s|size=s" => \$max, + "S|simple" => \$opt_simple, + "T|input=s" => \$opt_listfile, + "p|prefix=s" => \$prefix, + "a|accuracy=i" => \$acc, + "H|longhelp" => \$opt_longhelp, + "version" => \$get_ver +); + +&show_help(1) unless ( GetOptions(%options)); +&show_help(1) if $opt_help; +&show_longhelp if $opt_longhelp; +if($get_ver) { + print $version; + exit 0; +} + +# ignore the old dirhier setting since it is default now and disable the flag when opt_flat is specified +$opt_dir = !$opt_flat; + +$opt_ver = 1 if $opt_sim; +$opt_move=1 if ($opt_sln || $opt_ln); + +# big list @sizes containing the "items" (object sizes) +# %names hash mapping "items" (size as key) to arrays with filenames/subarrays for coalesced files +my @sizes; +my %names; + +# result containts the calculated output. In simple mode, an +# array (bins) of atoms (files or filelists). Otherwise, sizes +# instead of atoms, to be resolved with %names. +my @result; + +my $inputdir; + +$max=fixnr($max); +# about 400kB for iso headers +$max-=420000; + +# init default value +my $globwaste=0; + + +if(-d $ARGV[0] || (-d readlink($ARGV[0]))) { + syswrite(STDOUT,"Building file list, please wait...\n"); + # save the absolut path before doing anyhting + $inputdir=Cwd::abs_path($ARGV[0]); + &explore($inputdir); +} +elsif($opt_listfile) { + if($opt_listfile eq "-") { + &parseListe(\*STDIN); + } + else { + open(my $in, "<", $opt_listfile) || die "Cannot open list file $opt_listfile\n"; + &parseListe($in); + } +} +else { + die "Error: please specify a directory\n"; +} + +# check for pointless requests +my $testsize=0; +for(@sizes) { + die "Too large object(s) ($_) for the given max size: @{$names{$_}} (maybe coalesced in arrays, check manually)\n" if($_>$max); + + $testsize+=$_; +} + +$acc=1 if ($testsize <= $max); # just generate a list, more trials are pointless +print "\nSumm: $testsize\n" if($opt_ver); +die "Nothing to do!\n" if($testsize<4096); # looks like just an empty dir + +if(!$opt_simple) { + syswrite(STDOUT, "Calculating, please wait...\n"); + my $starttime=time; + $globwaste=$max*@sizes; + for(1..$acc) { + syswrite(STDOUT,"."); + my @tmp; + #my $waste = bp_bestfit($max, \@in, \@tmp); + my $waste = bp_firstfit($max, \@sizes, \@tmp); + #print "D: waste - $waste\n"; + if($waste < $globwaste) { + $globwaste=$waste; + @result=@tmp; + } + if($starttime && time > $starttime+10) { + syswrite(STDOUT,"\nSpent already over 10s (for $_ iterations)\nHint: reduce accuracy to make it faster!\n"); + undef $starttime; + } + @sizes=shuffle(@sizes); + } + +} + +print "\nCalculated, using ".(scalar @result)." volumes.\n"; +print "Wasted: $globwaste Byte (estimated, check mkisofs -print-size ...)\n"; + +# and the real work +my $i=0; +my $inDirLen=length($inputdir); +for(@result) { + $i++; + my $o; + open($o, ">$prefix$i.list") if(! ($opt_move || $opt_sim)); + my $dirPrefix=dirname($prefix); + my $prefixBase=basename($prefix); + my $dirPrefixAbs=Cwd::abs_path($dirPrefix); + + for(@{$_}) { + my $stuffRef; + + # For simple mode, the files/atoms are already resolved, otherwise take + # the next with appropriate size. + my $item= $opt_simple ? $_ : shift(@{$names{$_}}); + + # make reference point to an array with our files, create a list if needed + if(ref($item) eq "ARRAY") { + $stuffRef=$item; + } + else { + $stuffRef=[$item]; + } + + for my $file (@$stuffRef) { + my $relFile=substr($file,$inDirLen+1); + my $base=basename($relFile); + if($opt_move) { + my $targetsubdir = $dirPrefixAbs."/$prefixBase$i"; + $targetsubdir .= "/".dirname($relFile) if($opt_dir); + print "$file -> $targetsubdir/$base\n" if($opt_ver); + if(!$opt_sim) { + mkpath $targetsubdir || die "Problems creating $targetsubdir\n"; + # last check + die "Could not create $targetsubdir?\n" if(!(-d $targetsubdir && -w $targetsubdir)); + if($opt_sln) { + symlink($file, "$targetsubdir/$base"); + } + elsif($opt_ln) { + if(-d $file && !-l $file) { + mkdir "$targetsubdir/$base"; + } + else { + link($file, "$targetsubdir/$base"); + } + } + else { + rename($file, "$targetsubdir/$base"); + } + } + } + else { + # escape = in mkisofs catalogs, they are used as separator + my $isoname = ($opt_dir?$relFile : $base); + $isoname=~s/=/\\=/g; + my $sourcefile=$file; + $sourcefile=~s/=/\\=/g; + print "$i: /$isoname=$sourcefile\n" if $opt_ver; + print $o "/$isoname=$sourcefile\n" if(!$opt_sim); + } + } + } + close($o) if($o); +} + +exit $ret; + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +# recursive function +# parameter: directory +# mode 1: descend as far as possible and index all non-directories +# mode 2++: +# put all files of a dir into coaleseced-object, then descend into each dir +sub explore { + (my $dir) = @_; + my @stuff; + my @dirs; + my @files; + + opendir(DIR, $dir) || die "Could not open $dir\n"; + @stuff=readdir(DIR); + + if($opt_simple) { + @stuff=sort { lc($a) cmp lc($b) } @stuff; + } + + foreach my $f (@stuff) { + next if ($f eq "." || $f eq ".."); + #print "\$f=$opt_filter;\n"; + + $f="$dir/$f" if($dir ne "."); + + if ($opt_filter) { + next unless (eval("\$f=~$opt_filter;")); + } + + if(-l $f && ! $opt_follow) { + push(@files, $f); + } + elsif(-d $f) { + push(@dirs, $f); + } + else { + push(@files, $f); + } + } + closedir(DIR); + + if( (@dirs + @files) == 0 ) { + # this one is empty, register for cosmetics reason + &insitem(getsize($dir), $dir); + return; + } + + # recurse on directories + &explore($_) for(@dirs); + + # and now process files + if($emode==1) { + &insitem(getsize($_), $_) for(@files); + } + else { + # handle coalesced objects - first some sanity checks and splitting if + # required + + my $filesum=0; + for(@files) { + my $tmp=getsize($_); + if($tmp>$max) { + # already too large, stop right here + die "Too large file ($_) for the given max size $max, aborting...\n"; + } + $filesum += $tmp; + }; + + # handle coal. objects becoming too large + if($filesum>$max) { + # too large coal. object... + if($emode==3) { + # don't coalesc in this mode, do like mode 1 above, leave them alone + &insitem(getsize($_), $_) for(@files); + return; + } + # a bit complicated, split file set while creating coal.objects + if($emode==4) { + my $partsum=0; + my @sorted=sort(@files); + my @tmpvol; + for(my $i=0;$i<=$#sorted;$i++) { +# print "D: i: $i, partsum: $partsum, file: $sorted[$i]\n"; + my $tmp=getsize($sorted[$i]); + $partsum+=$tmp; + if($partsum>$max) { + # undo the last step then build the coal.object + $partsum-=$tmp; + $i--; + + &insitem($partsum, \@tmpvol); + # reset temporaries + undef @tmpvol; + undef $partsum; + } + else { + push(@tmpvol, $sorted[$i]); + } + } + return; + } + } + + # ok, building a coalesced object for simple cases + if($filesum) { + &insitem($filesum, \@files); + } + } +} + +my $simplePos=0; +my @simpleBinSizes; + +# args: size, object (filename or list reference) +sub insitem { + my ($size, $object) = @_; + # normaly, put the items into the pool for calculation. In simple mode, calculate here + + push(@sizes, $size); + push(@{$names{$size}},$object); + + if($opt_simple) { + # now the simplest method to fill the bins, just take a new one when the + # object-to-be-added no longer fits + if($simpleBinSizes[$simplePos]+$size > $max) { + $globwaste += ( $max-$simpleBinSizes[$simplePos] ); + $simplePos++; + }; + $simpleBinSizes[$simplePos]+=$size; + push( @{$result[$simplePos]}, $object); + } + +} + +sub getsize { + (my $file) = @_; + my $size = ((stat($file))[7]); + my $rest = ($size % $bsize); + $size = ($size + $bsize - $rest) if ($rest); + return 1+int(200 + $ofac*length(basename($file)) + $size); +} + +sub parseListe { + my $fh=${$_[0]}; + while(<$fh>) { + if(/^(\w+)\s+(.+)/) { + &insitem(fixnr($1), $2); + } + } +} + +sub fixnr { + # args: + # Number + # optional: default multiplier + my $fac; + my $nr; + if($_[0]=~/(\d+)(\D)/) { + $nr=$1; + $fac=$2; + } + elsif(defined($_[1])) { + $nr=$_[0]; + $fac=$_[1]; + } + else { + return $_[0]; + } + return $nr*1000000000 if($fac eq "g"); + return $nr*1073741824 if($fac eq "G"); + return $nr*1000000 if($fac eq "m"); + return $nr*1048576 if($fac eq "M"); + return $nr*1000 if($fac eq "k"); + return $nr*1024 if($fac eq "K"); + return $nr if($fac eq "b"); + die "$fac is not a valid multiplier!"; +} + + +sub show_help { + print <<EOM +dirsplit [options] [advanced options] < directory > + + -H|--longhelp Show the long help message with more advanced options + -n|--no-act Only print the commands, no action (implies -v) + -s|--size NUMBER - Size of the medium (default: $max) + -e|--expmode NUMBER - directory exploration mode (recommended, see long help) + -m|--move Move files to target dirs (default: create mkisofs catalogs) + -p|--prefix STRING - first part of catalog/directory name (default: vol_) + -h|--help Show this option summary + -v|--verbose More verbosity + +The complete help can be displayed with the --longhelp (-H) option. +The default mode is creating file catalogs useable with: + mkisofs -D -r --joliet-long -graft-points -path-list CATALOG + +Example: +dirsplit -m -s 700M -e2 random_data_to_backup/ +EOM + ; + exit shift; +} + +sub show_longhelp { + my $msglong=" +dirsplit [options] [advanced options] < directory > + -n|--no-act Only print the commands, no action (implies -v) + -s|--size NUMBER - Size of the medium (default: $max) + -m|--move Move files to target dirs (default: create mkisofs catalogs) + -l|--symlink similar to -m but just creates symlinks in the target dirs + -L|--hardlink like -l but creates hardlinks + -p|--prefix STRING - first part of catalog/directory name (default: vol_) + -f|--filter EXPR - Filter expression, see examples below and perlre manpage + --flat Flat dir mode, don't recreate subdirectory structure (not recommended) + -e|--expmode NUMBER, special exploration modes, used with directory argument + + 1: (default) native exploration of the specified directory, but file sizes + are rounded up to 2048 blocks plus estimated overhead for + filenames (see -o option) + 2: like 1, but all files in directory are put together (as \"atom\") onto the + same medium. This does not apply to subdirectories, however. + 3: like 2, but don't coalesc files when the size of the \"atom\" becomes too + large for the medium size (currently $max) + 4: like 2, but the max. size of the atoms is limited to $max (storing the + rest on another medium) + + -F|--follow Follow symlinks. Use with care! + -b|--blksize NUMBER, block size of the target filesystem (currently $bsize). + -o|--overhead NUMBER, overhead caused by directory entries (as factor for the + filename length, default: 50, empiricaly found for Joliet+RR + with not-so-deep directory structure). Works in exploration + mode. + -a|--accuracy NUMBER (1=faster, large number=better efficiency, default: 500) + -S|--simple Simple/stupid/alphabetic mode + -T|--input FILENAME (or - for STDIN): List with sizes and paths, try: + find dir -type f -printf \"%s %p\n\" + to get an example. Avoid duplicates! Unit suffixes are allowed. + -h|--help Show this option summary + -v|--verbose More verbosity + +File sizes are expected to be in bytes, append modifier letters to multiply +with a factor, eg 200M (b,k,K,m,M,g,G for Bytes, Kb, KiB, Mb, MiB, Gb, GiB). +The default output mode is creating file catalogs useable with + mkisofs -D -r --joliet-long -graft-points -path-list CATALOG + +Examples: +dirsplit -m -s 120M -e4 largedirwithdata/ -p /zipmedia/backup_ #move stuff into splitted backup dirs +dirsplit -s 700M -e2 music/ # make mkisofs catalogs to burn all music to 700M CDRs, keep single files in each dir together +dirsplit -s 700M -e2 -f '/other\\/Soundtracks/' music/ # like above, only take files from other/Soundtracks +dirsplit -s 700M -e2 -f '!/Thumbs.db|Desktop.ini|\\.m3u\$/i' # like above, ignore some junk files and playlists, both letter cases + +Bugs: overhead trough blocksize alignment and directory entry storage varies, +heavily depends on the target filesystem and configuration (see -b and -o). + +You should compare the required size of the created catalogs, eg.: +for x in *list ; do mkisofs -quiet -D -r --joliet-long -graft-points \\ + -path-list \$x -print-size; done +(output in blocks of 2048 bytes) with the expected size (-s) and media data +(cdrecord -v -toc ...). +"; + print $msglong; + exit 0; +} + +# Parms: bin size (int), input array (arr reference), output array (arr reference) +# Returns: wasted space (int) +sub bp_bestfit { + my $max=$_[0]; + my @in = @{$_[1]}; + my $target = $_[2]; + my @out; + my @bel; + + my @tmp; + push(@tmp,$in[0]); + push(@out, \@tmp); + $bel[0] = $in[0]; + shift @in; + + for(@in) { + my $bestplace=$#out+1; + my $bestwert=$max; + for($i=0;$i<=$#out;$i++) { + my $rest; + $rest=$max-$bel[$i]-$_; + if($rest>0 && $rest < $bestwert) { + $bestplace=$i; + $bestwert=$rest; + }; + } + if($bestplace>$#out) { + my @bin; + $bel[$bestplace]=$_; + push(@bin, $_); + push(@out,\@bin); + } + else{ + $bel[$bestplace]+=$_; + push( @{$out[$bestplace]} , $_); + } + } + my $ret=0; + # count all rests but the last one + for($i=0;$i<$#out;$i++) { + $ret+=($max-$bel[$i]); + } + @{$target} = @out; + return $ret; +} + +# Parms: bin size (int), input array (arr reference), output array (arr reference) +# Returns: wasted space (int) +sub bp_firstfit { + my $max=$_[0]; + my @in = @{$_[1]}; + my $target = $_[2]; + my @out; + my @bel; + + piece: foreach my $obj (@in) { + # first fit, use the first bin with enough free space + # print "F: bin$i: $obj, @{$names{$obj}}\n"; + for($i=0;$i<=$#out;$i++) { + my $newsize=($bel[$i]+$obj); +# print "bel[i]: $bel[$i], new?: $newsize to max: $max\n"; + if( $newsize <= $max ) { +# print "F: bin$i: $bel[$i]+$obj=$newsize\n"; + #fits here + $bel[$i]=$newsize; + push( @{$out[$i]} , $obj); + next piece; # break + } + } + # neues Bin + my @bin; + $bel[$i]=$obj; +# print "N: bin$i: $bel[$i]=$obj\n"; + push(@bin, $obj); + push(@out,\@bin); + } + my $ret=0; + # sum up all rests except of the one from the last bin + for($i=0;$i<$#out;$i++) { +# print "hm, bel $i ist :".$bel[$i]." und res:".($max-$bel[$i])."\n"; + $ret+=($max-$bel[$i]); + } + @{$target} = @out; +# print "wtf, ".join(",", @{$out[0]})."\n"; + return $ret; +} diff --git a/3rd-party/dirsplit/dirsplit.1 b/3rd-party/dirsplit/dirsplit.1 new file mode 100644 index 0000000..76fdfa5 --- /dev/null +++ b/3rd-party/dirsplit/dirsplit.1 @@ -0,0 +1,27 @@ +.\" DO NOT MODIFY THIS FILE! It was generated by help2man 1.33. +.TH DIRSPLIT "1" "March 2004" "dirsplit 0.3" "User Commands" +.SH NAME +dirsplit \- splits directory into multiple with equal size +.SH SYNOPSIS +dirsplit \fB[options]\fR < directory | content-list-file > +.SH DESCRIPTION +displit is designed to for a simple purpose: convert a directory with +many multiple files (which are all smaller than a certain medium, eg. +DVD) and "splits" it into "volumes", looking for the optimal order to +get the best space/medium-number efficiency. +.P +The actual action is either adding the files to mkisofs catalogs or real +moving of files into new directories (or creating links/symlinks). +The method is not limited to files, whole directories can also be handled this +way (see various filesystem exploration modes). +.SH OPTIONS +.TP +Run dirsplit \fB\-h\fR to get the basic usage info. +.TP +Run dirsplit \fB\-H\fR to get the whole option overview and description. +.SH EXAMPLES +Run dirsplit \fB\-H\fR to see the commented examples. +.SH AUTHOR +\fBdirsplit\fR is created by Eduard Bloch (blade@debian.org) and is licensed +under the GPLv2. + |