summaryrefslogtreecommitdiff
path: root/ipl/progs/mtf3.icn
diff options
context:
space:
mode:
Diffstat (limited to 'ipl/progs/mtf3.icn')
-rw-r--r--ipl/progs/mtf3.icn536
1 files changed, 536 insertions, 0 deletions
diff --git a/ipl/progs/mtf3.icn b/ipl/progs/mtf3.icn
new file mode 100644
index 0000000..8ebca4e
--- /dev/null
+++ b/ipl/progs/mtf3.icn
@@ -0,0 +1,536 @@
+############################################################################
+#
+# File: mtf3.icn
+#
+# Subject: Program to map tar file
+#
+# Author: Richard Goerwitz
+#
+# Date: June 3, 1991
+#
+############################################################################
+#
+# This file is in the public domain.
+#
+############################################################################
+#
+# Version: 3.4
+#
+############################################################################
+#
+# PURPOSE: Maps 15+ char. filenames in a tar archive to 14 chars.
+# Handles both header blocks and the archive itself. Mtf is intended
+# to facilitate installation of tar'd archives on systems subject to
+# the System V 14-character filename limit.
+#
+# USAGE: mtf inputfile [-r reportfile] [-e .extensions] [-x exceptions]
+#
+# "Inputfile" is a tar archive. "Reportfile" is file containing a
+# list of files already mapped by mtf in a previous run (used to
+# avoid clashes with filenames in use outside the current archive).
+# The -e switch precedes a list of filename .extensions which mtf is
+# supposed to leave unscathed by the mapping process
+# (single-character extensions such as .c and .o are automatically
+# preserved; -e allows the user to specify additional extensions,
+# such as .pxl, .cpi, and .icn). The final switch, -x, precedes a
+# list of strings which should not be mapped at all. Use this switch
+# if, say, you have a C file with a structure.field combination such
+# as "thisisveryverybig.hashptr" in an archive that contains a file
+# called "thisisveryverybig.h," and you want to avoid mapping that
+# portion of the struct name which matches the name of the overlong
+# file (to wit, "mtf inputfile -x thisisveryverybig.hashptr"). To
+# prevent mapping of any string (including overlong filenames) begin-
+# ning, say, with "thisisvery," use "mtf inputfile -x thisisvery."
+# Be careful with this option, or you might end up defeating the
+# whole point of using mtf in the first place.
+#
+# OUTPUT FORMAT: Mtf writes a mapped tar archive to the stdout.
+# When finished, it leaves a file called "map.report" in the current
+# directory which records what filenames were mapped and how. Rename
+# and save this file, and use it as the "reportfile" argument to any
+# subsequent runs of mtf in this same directory. Even if you don't
+# plan to run mtf again, this file should still be examined, just to
+# be sure that the new filenames are acceptable, and to see if
+# perhaps additional .extensions and/or exceptions should be
+# specified.
+#
+# BUGS: Mtf only maps filenames found in the main tar headers.
+# Because of this, mtf cannot accept nested tar archives. If you try
+# to map a tar archive within a tar file, mtf will abort with a nasty
+# message about screwing up your files. Please note that, unless you
+# give mtf a "reportfile" to consider, it knows nothing about files
+# existing outside the archive. Hence, if an input archive refers to
+# an overlong filename in another archive, mtf naturally will not
+# know to shorten it. Mtf will, in fact, have no way of knowing that
+# it is a filename, and not, say, an identifier in a C program.
+# Final word of caution: Try not to use mtf on binaries. It cannot
+# possibly preserve the correct format and alignment of strings in an
+# executable. Same goes for compressed files. Mtf can't map
+# filenames that it can't read!
+#
+############################################################################
+
+
+global filenametbl, chunkset, short_chunkset # see procedure mappiece(s)
+global extensions, no_nos # ditto
+
+record hblock(name,junk,size,mtime,chksum, # tar header struct;
+ linkflag,linkname,therest) # see readtarhdr(s)
+
+
+procedure main(a)
+ local usage, intext, i, current_list
+
+ usage := "usage: mtf inputfile [-r reportfile] " ||
+ "[-e .extensions] [-x exceptions]"
+
+ *a = 0 & stop(usage)
+
+ intext := open_input_file(a[1]) & pop(a)
+
+ i := 0
+ extensions := []; no_nos := []
+ while (i +:= 1) <= *a do {
+ case a[i] of {
+ "-r" : readin_old_map_report(a[i+:=1])
+ "-e" : current_list := extensions
+ "-x" : current_list := no_nos
+ default : put(current_list,a[i])
+ }
+ }
+
+ every !extensions ?:= (=".", tab(0))
+
+ # Run through all the headers in the input file, filling
+ # (global) filenametbl with the names of overlong files;
+ # make_table_of_filenames fails if there are no such files.
+ make_table_of_filenames(intext) | {
+ write(&errout,"mtf: no overlong path names to map")
+ a[1] ? (tab(find(".tar")+4), pos(0)) |
+ write(&errout,"(Is ",a[1]," even a tar archive?)")
+ exit(1)
+ }
+
+ # Now that a table of overlong filenames exists, go back
+ # through the text, remapping all occurrences of these names
+ # to new, 14-char values; also, reset header checksums, and
+ # reformat text into correctly padded 512-byte blocks. Ter-
+ # minate output with 512 nulls.
+ seek(intext,1)
+ every writes(output_mapped_headers_and_texts(intext))
+
+ close(intext)
+ write_report() # Record mapped file and dir names for future ref.
+ exit(0)
+
+end
+
+
+
+procedure open_input_file(s)
+ local intext
+
+ intext := open("" ~== s,"r") |
+ stop("mtf: can't open ",s)
+ find("UNIX",&features) |
+ stop("mtf: I'm not tested on non-UNIX systems.")
+ s[-2:0] == ".Z" &
+ stop("mtf: sorry, can't accept compressed files")
+ return intext
+end
+
+
+
+procedure readin_old_map_report(s)
+ local mapfile, line, chunk, lchunk
+
+ initial {
+ filenametbl := table()
+ chunkset := set()
+ short_chunkset := set()
+ }
+
+ mapfile := open_input_file(s)
+ while line := read(mapfile) do {
+ line ? {
+ if chunk := tab(many(~' \t')) & tab(upto(~' \t')) &
+ lchunk := move(14) & pos(0) then {
+ filenametbl[chunk] := lchunk
+ insert(chunkset,chunk)
+ insert(short_chunkset,chunk[1:16])
+ }
+ if /chunk | /lchunk
+ then stop("mtf: report file, ",s," seems mangled.")
+ }
+ }
+
+end
+
+
+
+procedure make_table_of_filenames(intext)
+
+ local header # chunkset is global
+
+ # search headers for overlong filenames; for now
+ # ignore everything else
+ while header := readtarhdr(reads(intext,512)) do {
+ # tab upto the next header block
+ tab_nxt_hdr(intext,trim_str(header.size),1)
+ # record overlong filenames in several global tables, sets
+ fixpath(trim_str(header.name))
+ }
+ *\chunkset ~= 0 | fail
+ return &null
+
+end
+
+
+
+procedure output_mapped_headers_and_texts(intext)
+
+ # Remember that filenametbl, chunkset, and short_chunkset
+ # (which are used by various procedures below) are global.
+ local header, newtext, full_block, block, lastblock
+
+ # Read in headers, one at a time.
+ while header := readtarhdr(reads(intext,512)) do {
+
+ # Replace overlong filenames with shorter ones, according to
+ # the conversions specified in the global hash table filenametbl
+ # (which were generated by fixpath() on the first pass).
+ header.name := left(map_filenams(header.name),100,"\x00")
+ header.linkname := left(map_filenams(header.linkname),100,"\x00")
+
+ # Use header.size field to determine the size of the subsequent text.
+ # Read in the text as one string. Map overlong filenames found in it
+ # to shorter names as specified in the global hash table filenamtbl.
+ newtext := map_filenams(tab_nxt_hdr(intext,trim_str(header.size)))
+
+ # Now, find the length of newtext, and insert it into the size field.
+ header.size := right(exbase10(*newtext,8) || " ",12," ")
+
+ # Calculate the checksum of the newly retouched header.
+ header.chksum := right(exbase10(get_checksum(header),8)||"\x00 ",8," ")
+
+ # Finally, join all the header fields into a new block and write it out
+ full_block := ""; every full_block ||:= !header
+ suspend left(full_block,512,"\x00")
+
+ # Now we're ready to write out the text, padding the final block
+ # out to an even 512 bytes if necessary; the next header must start
+ # right at the beginning of a 512-byte block.
+ newtext ? {
+ while block := move(512)
+ do suspend block
+ pos(0) & next
+ lastblock := left(tab(0),512,"\x00")
+ suspend lastblock
+ }
+ }
+ # Write out a final null-filled block. Some tar programs will write
+ # out 1024 nulls at the end. Dunno why.
+ return repl("\x00",512)
+
+end
+
+
+
+procedure trim_str(s)
+
+ # Knock out spaces, nulls from those crazy tar header
+ # block fields (some of which end in a space and a null,
+ # some just a space, and some just a null [anyone know
+ # why?]).
+ return s ? {
+ (tab(many(' ')) | &null) &
+ trim(tab(find("\x00")|0))
+ }
+
+end
+
+
+
+procedure tab_nxt_hdr(f,size_str,firstpass)
+
+ # Tab upto the next header block. Return the bypassed text
+ # as a string if not the first pass.
+
+ local hs, next_header_offset
+
+ hs := integer("8r" || size_str)
+ next_header_offset := (hs / 512) * 512
+ hs % 512 ~= 0 & next_header_offset +:= 512
+ if 0 = next_header_offset then return ""
+ else {
+ # if this is pass no. 1 don't bother returning a value; we're
+ # just collecting long filenames;
+ if \firstpass then {
+ seek(f,where(f)+next_header_offset)
+ return
+ }
+ else {
+ return reads(f,next_header_offset)[1:hs+1] |
+ stop("mtf: error reading in ",
+ string(next_header_offset)," bytes.")
+ }
+ }
+
+end
+
+
+
+procedure fixpath(s)
+ local s2, piece
+
+ # Fixpath is a misnomer of sorts, since it is used on
+ # the first pass only, and merely examines each filename
+ # in a path, using the procedure mappiece to record any
+ # overlong ones in the global table filenametbl and in
+ # the global sets chunkset and short_chunkset; no fixing
+ # is actually done here.
+
+ s2 := ""
+ s ? {
+ while piece := tab(find("/")+1)
+ do s2 ||:= mappiece(piece)
+ s2 ||:= mappiece(tab(0))
+ }
+ return s2
+
+end
+
+
+
+procedure mappiece(s)
+ local chunk, i, lchunk
+
+ # Check s (the name of a file or dir as recorded in the tar header
+ # being examined) to see if it is over 14 chars long. If so,
+ # generate a unique 14-char version of the name, and store
+ # both values in the global hashtable filenametbl. Also store
+ # the original (overlong) file name in chunkset. Store the
+ # first fifteen chars of the original file name in short_chunkset.
+ # Sorry about all of the tables and sets. It actually makes for
+ # a reasonably efficient program. Doing away with both sets,
+ # while possible, causes a tenfold drop in execution speed!
+
+ # global filenametbl, chunkset, short_chunkset, extensions
+ local j, ending
+
+ initial {
+ /filenametbl := table()
+ /chunkset := set()
+ /short_chunkset := set()
+ }
+
+ chunk := trim(s,'/')
+ if chunk ? (tab(find(".tar")+4), pos(0)) then {
+ write(&errout, "mtf: Sorry, I can't let you do this.\n",
+ " You've nested a tar archive within\n",
+ " another tar archive, which makes it\n",
+ " likely I'll f your filenames ubar.")
+ exit(2)
+ }
+ if *chunk > 14 then {
+ i := 0
+
+ if /filenametbl[chunk] then {
+ # if we have not seen this file, then...
+ repeat {
+ # ...find a new unique 14-character name for it;
+ # preserve important suffixes like ".Z," ".c," etc.
+ # First, check to see if the original filename (chunk)
+ # ends in an important extension...
+ if chunk ?
+ (tab(find(".")),
+ ending := move(1) || tab(match(!extensions)|any(&ascii)),
+ pos(0)
+ )
+ # ...If so, then leave the extension alone; mess with the
+ # middle part of the filename (e.g. file.with.extension.c ->
+ # file.with001.c).
+ then {
+ j := (15 - *ending - 3)
+ lchunk:= chunk[1:j] || right(string(i+:=1),3,"0") || ending
+ }
+ # If no important extension is present, then reformat the
+ # end of the file (e.g. too.long.file.name -> too.long.fi01).
+ else lchunk := chunk[1:13] || right(string(i+:=1),2,"0")
+
+ # If the resulting shorter file name has already been used...
+ if lchunk == !filenametbl
+ # ...then go back and find another (i.e. increment i & try
+ # again; else break from the repeat loop, and...
+ then next else break
+ }
+ # ...record both the old filename (chunk) and its new,
+ # mapped name (lchunk) in filenametbl. Also record the
+ # mapped names in chunkset and short_chunkset.
+ filenametbl[chunk] := lchunk
+ insert(chunkset,chunk)
+ insert(short_chunkset,chunk[1:16])
+ }
+ }
+
+ # If the filename is overlong, return lchunk (the shortened
+ # name), else return the original name (chunk). If the name,
+ # as passed to the current function, contained a trailing /
+ # (i.e. if s[-1]=="/"), then put the / back. This could be
+ # done more elegantly.
+ return (\lchunk | chunk) || ((s[-1] == "/") | "")
+
+end
+
+
+
+procedure readtarhdr(s)
+ local this_block
+
+ # Read the silly tar header into a record. Note that, as was
+ # complained about above, some of the fields end in a null, some
+ # in a space, and some in a space and a null. The procedure
+ # trim_str() may (and in fact often _is_) used to remove this
+ # extra garbage.
+
+ this_block := hblock()
+ s ? {
+ this_block.name := move(100) # <- to be looked at later
+ this_block.junk := move(8+8+8) # skip the permissions, uid, etc.
+ this_block.size := move(12) # <- to be looked at later
+ this_block.mtime := move(12)
+ this_block.chksum := move(8) # <- to be looked at later
+ this_block.linkflag := move(1)
+ this_block.linkname := move(100) # <- to be looked at later
+ this_block.therest := tab(0)
+ }
+ integer(this_block.size) | fail # If it's not an integer, we've hit
+ # the final (null-filled) block.
+ return this_block
+
+end
+
+
+
+procedure map_filenams(s)
+ local el, ch
+
+ # Chunkset is global, and contains all the overlong filenames
+ # found in the first pass through the input file; here the aim
+ # is to map these filenames to the shortened variants as stored
+ # in filenametbl (GLOBAL).
+
+ local s2, tmp_chunk_tbl, tmp_lst
+ static new_chunklist
+ initial {
+
+ # Make sure filenames are sorted, longest first. Say we
+ # have a file called long_file_name_here.1 and one called
+ # long_file_name_here.1a. We want to check for the longer
+ # one first. Otherwise the portion of the second file which
+ # matches the first file will get remapped.
+ tmp_chunk_tbl := table()
+ every el := !chunkset
+ do insert(tmp_chunk_tbl,el,*el)
+ tmp_lst := sort(tmp_chunk_tbl,4)
+ new_chunklist := list()
+ every put(new_chunklist,tmp_lst[*tmp_lst-1 to 1 by -2])
+
+ }
+
+ s2 := ""
+ s ? {
+ until pos(0) do {
+ # first narrow the possibilities, using short_chunkset
+ if member(short_chunkset,&subject[&pos:&pos+15])
+ # then try to map from a long to a shorter 14-char filename
+ then {
+ if match(ch := !new_chunklist) & not match(!no_nos)
+ then s2 ||:= filenametbl[=ch]
+ else s2 ||:= move(1)
+ }
+ else s2 ||:= move(1)
+ }
+ }
+ return s2
+
+end
+
+
+# From the IPL. Thanks, Ralph -
+# Author: Ralph E. Griswold
+# Date: June 10, 1988
+# exbase10(i,j) convert base-10 integer i to base j
+# The maximum base allowed is 36.
+
+procedure exbase10(i,j)
+
+ static digits
+ local s, d, sign
+ initial digits := &digits || &lcase
+ if i = 0 then return 0
+ if i < 0 then {
+ sign := "-"
+ i := -i
+ }
+ else sign := ""
+ s := ""
+ while i > 0 do {
+ d := i % j
+ if d > 9 then d := digits[d + 1]
+ s := d || s
+ i /:= j
+ }
+ return sign || s
+
+end
+
+# end IPL material
+
+
+procedure get_checksum(r)
+ local sum, field
+
+ # Calculates the new value of the checksum field for the
+ # current header block. Note that the specification say
+ # that, when calculating this value, the chksum field must
+ # be blank-filled.
+
+ sum := 0
+ r.chksum := " "
+ every field := !r
+ do every sum +:= ord(!field)
+ return sum
+
+end
+
+
+
+procedure write_report()
+
+ # This procedure writes out a list of filenames which were
+ # remapped (because they exceeded the SysV 14-char limit),
+ # and then notifies the user of the existence of this file.
+
+ local outtext, stbl, i, j, mapfile_name
+
+ # Get a unique name for the map.report (thereby preventing
+ # us from overwriting an older one).
+ mapfile_name := "map.report"; j := 1
+ until not close(open(mapfile_name,"r"))
+ do mapfile_name := (mapfile_name[1:11] || string(j+:=1))
+
+ (outtext := open(mapfile_name,"w")) |
+ open(mapfile_name := "/tmp/map.report","w") |
+ stop("mtf: Can't find a place to put map.report!")
+ stbl := sort(filenametbl,3)
+ every i := 1 to *stbl -1 by 2 do {
+ match(!no_nos,stbl[i]) |
+ write(outtext,left(stbl[i],35," ")," ",stbl[i+1])
+ }
+ write(&errout,"\nmtf: ",mapfile_name," contains the list of changes.")
+ write(&errout," Please save this list!")
+ close(outtext)
+ return &null
+
+end