diff options
Diffstat (limited to 'ipl/progs/mtf3.icn')
-rw-r--r-- | ipl/progs/mtf3.icn | 536 |
1 files changed, 536 insertions, 0 deletions
diff --git a/ipl/progs/mtf3.icn b/ipl/progs/mtf3.icn new file mode 100644 index 0000000..8ebca4e --- /dev/null +++ b/ipl/progs/mtf3.icn @@ -0,0 +1,536 @@ +############################################################################ +# +# File: mtf3.icn +# +# Subject: Program to map tar file +# +# Author: Richard Goerwitz +# +# Date: June 3, 1991 +# +############################################################################ +# +# This file is in the public domain. +# +############################################################################ +# +# Version: 3.4 +# +############################################################################ +# +# PURPOSE: Maps 15+ char. filenames in a tar archive to 14 chars. +# Handles both header blocks and the archive itself. Mtf is intended +# to facilitate installation of tar'd archives on systems subject to +# the System V 14-character filename limit. +# +# USAGE: mtf inputfile [-r reportfile] [-e .extensions] [-x exceptions] +# +# "Inputfile" is a tar archive. "Reportfile" is file containing a +# list of files already mapped by mtf in a previous run (used to +# avoid clashes with filenames in use outside the current archive). +# The -e switch precedes a list of filename .extensions which mtf is +# supposed to leave unscathed by the mapping process +# (single-character extensions such as .c and .o are automatically +# preserved; -e allows the user to specify additional extensions, +# such as .pxl, .cpi, and .icn). The final switch, -x, precedes a +# list of strings which should not be mapped at all. Use this switch +# if, say, you have a C file with a structure.field combination such +# as "thisisveryverybig.hashptr" in an archive that contains a file +# called "thisisveryverybig.h," and you want to avoid mapping that +# portion of the struct name which matches the name of the overlong +# file (to wit, "mtf inputfile -x thisisveryverybig.hashptr"). To +# prevent mapping of any string (including overlong filenames) begin- +# ning, say, with "thisisvery," use "mtf inputfile -x thisisvery." +# Be careful with this option, or you might end up defeating the +# whole point of using mtf in the first place. +# +# OUTPUT FORMAT: Mtf writes a mapped tar archive to the stdout. +# When finished, it leaves a file called "map.report" in the current +# directory which records what filenames were mapped and how. Rename +# and save this file, and use it as the "reportfile" argument to any +# subsequent runs of mtf in this same directory. Even if you don't +# plan to run mtf again, this file should still be examined, just to +# be sure that the new filenames are acceptable, and to see if +# perhaps additional .extensions and/or exceptions should be +# specified. +# +# BUGS: Mtf only maps filenames found in the main tar headers. +# Because of this, mtf cannot accept nested tar archives. If you try +# to map a tar archive within a tar file, mtf will abort with a nasty +# message about screwing up your files. Please note that, unless you +# give mtf a "reportfile" to consider, it knows nothing about files +# existing outside the archive. Hence, if an input archive refers to +# an overlong filename in another archive, mtf naturally will not +# know to shorten it. Mtf will, in fact, have no way of knowing that +# it is a filename, and not, say, an identifier in a C program. +# Final word of caution: Try not to use mtf on binaries. It cannot +# possibly preserve the correct format and alignment of strings in an +# executable. Same goes for compressed files. Mtf can't map +# filenames that it can't read! +# +############################################################################ + + +global filenametbl, chunkset, short_chunkset # see procedure mappiece(s) +global extensions, no_nos # ditto + +record hblock(name,junk,size,mtime,chksum, # tar header struct; + linkflag,linkname,therest) # see readtarhdr(s) + + +procedure main(a) + local usage, intext, i, current_list + + usage := "usage: mtf inputfile [-r reportfile] " || + "[-e .extensions] [-x exceptions]" + + *a = 0 & stop(usage) + + intext := open_input_file(a[1]) & pop(a) + + i := 0 + extensions := []; no_nos := [] + while (i +:= 1) <= *a do { + case a[i] of { + "-r" : readin_old_map_report(a[i+:=1]) + "-e" : current_list := extensions + "-x" : current_list := no_nos + default : put(current_list,a[i]) + } + } + + every !extensions ?:= (=".", tab(0)) + + # Run through all the headers in the input file, filling + # (global) filenametbl with the names of overlong files; + # make_table_of_filenames fails if there are no such files. + make_table_of_filenames(intext) | { + write(&errout,"mtf: no overlong path names to map") + a[1] ? (tab(find(".tar")+4), pos(0)) | + write(&errout,"(Is ",a[1]," even a tar archive?)") + exit(1) + } + + # Now that a table of overlong filenames exists, go back + # through the text, remapping all occurrences of these names + # to new, 14-char values; also, reset header checksums, and + # reformat text into correctly padded 512-byte blocks. Ter- + # minate output with 512 nulls. + seek(intext,1) + every writes(output_mapped_headers_and_texts(intext)) + + close(intext) + write_report() # Record mapped file and dir names for future ref. + exit(0) + +end + + + +procedure open_input_file(s) + local intext + + intext := open("" ~== s,"r") | + stop("mtf: can't open ",s) + find("UNIX",&features) | + stop("mtf: I'm not tested on non-UNIX systems.") + s[-2:0] == ".Z" & + stop("mtf: sorry, can't accept compressed files") + return intext +end + + + +procedure readin_old_map_report(s) + local mapfile, line, chunk, lchunk + + initial { + filenametbl := table() + chunkset := set() + short_chunkset := set() + } + + mapfile := open_input_file(s) + while line := read(mapfile) do { + line ? { + if chunk := tab(many(~' \t')) & tab(upto(~' \t')) & + lchunk := move(14) & pos(0) then { + filenametbl[chunk] := lchunk + insert(chunkset,chunk) + insert(short_chunkset,chunk[1:16]) + } + if /chunk | /lchunk + then stop("mtf: report file, ",s," seems mangled.") + } + } + +end + + + +procedure make_table_of_filenames(intext) + + local header # chunkset is global + + # search headers for overlong filenames; for now + # ignore everything else + while header := readtarhdr(reads(intext,512)) do { + # tab upto the next header block + tab_nxt_hdr(intext,trim_str(header.size),1) + # record overlong filenames in several global tables, sets + fixpath(trim_str(header.name)) + } + *\chunkset ~= 0 | fail + return &null + +end + + + +procedure output_mapped_headers_and_texts(intext) + + # Remember that filenametbl, chunkset, and short_chunkset + # (which are used by various procedures below) are global. + local header, newtext, full_block, block, lastblock + + # Read in headers, one at a time. + while header := readtarhdr(reads(intext,512)) do { + + # Replace overlong filenames with shorter ones, according to + # the conversions specified in the global hash table filenametbl + # (which were generated by fixpath() on the first pass). + header.name := left(map_filenams(header.name),100,"\x00") + header.linkname := left(map_filenams(header.linkname),100,"\x00") + + # Use header.size field to determine the size of the subsequent text. + # Read in the text as one string. Map overlong filenames found in it + # to shorter names as specified in the global hash table filenamtbl. + newtext := map_filenams(tab_nxt_hdr(intext,trim_str(header.size))) + + # Now, find the length of newtext, and insert it into the size field. + header.size := right(exbase10(*newtext,8) || " ",12," ") + + # Calculate the checksum of the newly retouched header. + header.chksum := right(exbase10(get_checksum(header),8)||"\x00 ",8," ") + + # Finally, join all the header fields into a new block and write it out + full_block := ""; every full_block ||:= !header + suspend left(full_block,512,"\x00") + + # Now we're ready to write out the text, padding the final block + # out to an even 512 bytes if necessary; the next header must start + # right at the beginning of a 512-byte block. + newtext ? { + while block := move(512) + do suspend block + pos(0) & next + lastblock := left(tab(0),512,"\x00") + suspend lastblock + } + } + # Write out a final null-filled block. Some tar programs will write + # out 1024 nulls at the end. Dunno why. + return repl("\x00",512) + +end + + + +procedure trim_str(s) + + # Knock out spaces, nulls from those crazy tar header + # block fields (some of which end in a space and a null, + # some just a space, and some just a null [anyone know + # why?]). + return s ? { + (tab(many(' ')) | &null) & + trim(tab(find("\x00")|0)) + } + +end + + + +procedure tab_nxt_hdr(f,size_str,firstpass) + + # Tab upto the next header block. Return the bypassed text + # as a string if not the first pass. + + local hs, next_header_offset + + hs := integer("8r" || size_str) + next_header_offset := (hs / 512) * 512 + hs % 512 ~= 0 & next_header_offset +:= 512 + if 0 = next_header_offset then return "" + else { + # if this is pass no. 1 don't bother returning a value; we're + # just collecting long filenames; + if \firstpass then { + seek(f,where(f)+next_header_offset) + return + } + else { + return reads(f,next_header_offset)[1:hs+1] | + stop("mtf: error reading in ", + string(next_header_offset)," bytes.") + } + } + +end + + + +procedure fixpath(s) + local s2, piece + + # Fixpath is a misnomer of sorts, since it is used on + # the first pass only, and merely examines each filename + # in a path, using the procedure mappiece to record any + # overlong ones in the global table filenametbl and in + # the global sets chunkset and short_chunkset; no fixing + # is actually done here. + + s2 := "" + s ? { + while piece := tab(find("/")+1) + do s2 ||:= mappiece(piece) + s2 ||:= mappiece(tab(0)) + } + return s2 + +end + + + +procedure mappiece(s) + local chunk, i, lchunk + + # Check s (the name of a file or dir as recorded in the tar header + # being examined) to see if it is over 14 chars long. If so, + # generate a unique 14-char version of the name, and store + # both values in the global hashtable filenametbl. Also store + # the original (overlong) file name in chunkset. Store the + # first fifteen chars of the original file name in short_chunkset. + # Sorry about all of the tables and sets. It actually makes for + # a reasonably efficient program. Doing away with both sets, + # while possible, causes a tenfold drop in execution speed! + + # global filenametbl, chunkset, short_chunkset, extensions + local j, ending + + initial { + /filenametbl := table() + /chunkset := set() + /short_chunkset := set() + } + + chunk := trim(s,'/') + if chunk ? (tab(find(".tar")+4), pos(0)) then { + write(&errout, "mtf: Sorry, I can't let you do this.\n", + " You've nested a tar archive within\n", + " another tar archive, which makes it\n", + " likely I'll f your filenames ubar.") + exit(2) + } + if *chunk > 14 then { + i := 0 + + if /filenametbl[chunk] then { + # if we have not seen this file, then... + repeat { + # ...find a new unique 14-character name for it; + # preserve important suffixes like ".Z," ".c," etc. + # First, check to see if the original filename (chunk) + # ends in an important extension... + if chunk ? + (tab(find(".")), + ending := move(1) || tab(match(!extensions)|any(&ascii)), + pos(0) + ) + # ...If so, then leave the extension alone; mess with the + # middle part of the filename (e.g. file.with.extension.c -> + # file.with001.c). + then { + j := (15 - *ending - 3) + lchunk:= chunk[1:j] || right(string(i+:=1),3,"0") || ending + } + # If no important extension is present, then reformat the + # end of the file (e.g. too.long.file.name -> too.long.fi01). + else lchunk := chunk[1:13] || right(string(i+:=1),2,"0") + + # If the resulting shorter file name has already been used... + if lchunk == !filenametbl + # ...then go back and find another (i.e. increment i & try + # again; else break from the repeat loop, and... + then next else break + } + # ...record both the old filename (chunk) and its new, + # mapped name (lchunk) in filenametbl. Also record the + # mapped names in chunkset and short_chunkset. + filenametbl[chunk] := lchunk + insert(chunkset,chunk) + insert(short_chunkset,chunk[1:16]) + } + } + + # If the filename is overlong, return lchunk (the shortened + # name), else return the original name (chunk). If the name, + # as passed to the current function, contained a trailing / + # (i.e. if s[-1]=="/"), then put the / back. This could be + # done more elegantly. + return (\lchunk | chunk) || ((s[-1] == "/") | "") + +end + + + +procedure readtarhdr(s) + local this_block + + # Read the silly tar header into a record. Note that, as was + # complained about above, some of the fields end in a null, some + # in a space, and some in a space and a null. The procedure + # trim_str() may (and in fact often _is_) used to remove this + # extra garbage. + + this_block := hblock() + s ? { + this_block.name := move(100) # <- to be looked at later + this_block.junk := move(8+8+8) # skip the permissions, uid, etc. + this_block.size := move(12) # <- to be looked at later + this_block.mtime := move(12) + this_block.chksum := move(8) # <- to be looked at later + this_block.linkflag := move(1) + this_block.linkname := move(100) # <- to be looked at later + this_block.therest := tab(0) + } + integer(this_block.size) | fail # If it's not an integer, we've hit + # the final (null-filled) block. + return this_block + +end + + + +procedure map_filenams(s) + local el, ch + + # Chunkset is global, and contains all the overlong filenames + # found in the first pass through the input file; here the aim + # is to map these filenames to the shortened variants as stored + # in filenametbl (GLOBAL). + + local s2, tmp_chunk_tbl, tmp_lst + static new_chunklist + initial { + + # Make sure filenames are sorted, longest first. Say we + # have a file called long_file_name_here.1 and one called + # long_file_name_here.1a. We want to check for the longer + # one first. Otherwise the portion of the second file which + # matches the first file will get remapped. + tmp_chunk_tbl := table() + every el := !chunkset + do insert(tmp_chunk_tbl,el,*el) + tmp_lst := sort(tmp_chunk_tbl,4) + new_chunklist := list() + every put(new_chunklist,tmp_lst[*tmp_lst-1 to 1 by -2]) + + } + + s2 := "" + s ? { + until pos(0) do { + # first narrow the possibilities, using short_chunkset + if member(short_chunkset,&subject[&pos:&pos+15]) + # then try to map from a long to a shorter 14-char filename + then { + if match(ch := !new_chunklist) & not match(!no_nos) + then s2 ||:= filenametbl[=ch] + else s2 ||:= move(1) + } + else s2 ||:= move(1) + } + } + return s2 + +end + + +# From the IPL. Thanks, Ralph - +# Author: Ralph E. Griswold +# Date: June 10, 1988 +# exbase10(i,j) convert base-10 integer i to base j +# The maximum base allowed is 36. + +procedure exbase10(i,j) + + static digits + local s, d, sign + initial digits := &digits || &lcase + if i = 0 then return 0 + if i < 0 then { + sign := "-" + i := -i + } + else sign := "" + s := "" + while i > 0 do { + d := i % j + if d > 9 then d := digits[d + 1] + s := d || s + i /:= j + } + return sign || s + +end + +# end IPL material + + +procedure get_checksum(r) + local sum, field + + # Calculates the new value of the checksum field for the + # current header block. Note that the specification say + # that, when calculating this value, the chksum field must + # be blank-filled. + + sum := 0 + r.chksum := " " + every field := !r + do every sum +:= ord(!field) + return sum + +end + + + +procedure write_report() + + # This procedure writes out a list of filenames which were + # remapped (because they exceeded the SysV 14-char limit), + # and then notifies the user of the existence of this file. + + local outtext, stbl, i, j, mapfile_name + + # Get a unique name for the map.report (thereby preventing + # us from overwriting an older one). + mapfile_name := "map.report"; j := 1 + until not close(open(mapfile_name,"r")) + do mapfile_name := (mapfile_name[1:11] || string(j+:=1)) + + (outtext := open(mapfile_name,"w")) | + open(mapfile_name := "/tmp/map.report","w") | + stop("mtf: Can't find a place to put map.report!") + stbl := sort(filenametbl,3) + every i := 1 to *stbl -1 by 2 do { + match(!no_nos,stbl[i]) | + write(outtext,left(stbl[i],35," ")," ",stbl[i+1]) + } + write(&errout,"\nmtf: ",mapfile_name," contains the list of changes.") + write(&errout," Please save this list!") + close(outtext) + return &null + +end |