Imported Upstream version 1.4upstream/1.4

author: Tianon Gravi <admwiggin@gmail.com> 2015-01-15 11:54:00 -0700
committer: Tianon Gravi <admwiggin@gmail.com> 2015-01-15 11:54:00 -0700
commit: f154da9e12608589e8d5f0508f908a0c3e88a1bb (patch)
tree: f8255d51e10c6f1e0ed69702200b966c9556a431 /src/pkg/compress/bzip2/bzip2.go
parent: 8d8329ed5dfb9622c82a9fbec6fd99a580f9c9f6 (diff)
download: golang-upstream/1.4.tar.gz
1 files changed, 0 insertions, 484 deletions
diff --git a/src/pkg/compress/bzip2/bzip2.go b/src/pkg/compress/bzip2/bzip2.go
deleted file mode 100644
index 82e30c7c9..000000000
--- a/src/pkg/compress/bzip2/bzip2.go
+++ /dev/null
@@ -1,484 +0,0 @@
-// Copyright 2011 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-// Package bzip2 implements bzip2 decompression.
-package bzip2
-
-import "io"
-
-// There's no RFC for bzip2. I used the Wikipedia page for reference and a lot
-// of guessing: http://en.wikipedia.org/wiki/Bzip2
-// The source code to pyflate was useful for debugging:
-// http://www.paul.sladen.org/projects/pyflate
-
-// A StructuralError is returned when the bzip2 data is found to be
-// syntactically invalid.
-type StructuralError string
-
-func (s StructuralError) Error() string {
-	return "bzip2 data invalid: " + string(s)
-}
-
-// A reader decompresses bzip2 compressed data.
-type reader struct {
-	br           bitReader
-	fileCRC      uint32
-	blockCRC     uint32
-	wantBlockCRC uint32
-	setupDone    bool // true if we have parsed the bzip2 header.
-	blockSize    int  // blockSize in bytes, i.e. 900 * 1024.
-	eof          bool
-	buf          []byte    // stores Burrows-Wheeler transformed data.
-	c            [256]uint // the `C' array for the inverse BWT.
-	tt           []uint32  // mirrors the `tt' array in the bzip2 source and contains the P array in the upper 24 bits.
-	tPos         uint32    // Index of the next output byte in tt.
-
-	preRLE      []uint32 // contains the RLE data still to be processed.
-	preRLEUsed  int      // number of entries of preRLE used.
-	lastByte    int      // the last byte value seen.
-	byteRepeats uint     // the number of repeats of lastByte seen.
-	repeats     uint     // the number of copies of lastByte to output.
-}
-
-// NewReader returns an io.Reader which decompresses bzip2 data from r.
-func NewReader(r io.Reader) io.Reader {
-	bz2 := new(reader)
-	bz2.br = newBitReader(r)
-	return bz2
-}
-
-const bzip2FileMagic = 0x425a // "BZ"
-const bzip2BlockMagic = 0x314159265359
-const bzip2FinalMagic = 0x177245385090
-
-// setup parses the bzip2 header.
-func (bz2 *reader) setup(needMagic bool) error {
-	br := &bz2.br
-
-	if needMagic {
-		magic := br.ReadBits(16)
-		if magic != bzip2FileMagic {
-			return StructuralError("bad magic value")
-		}
-	}
-
-	t := br.ReadBits(8)
-	if t != 'h' {
-		return StructuralError("non-Huffman entropy encoding")
-	}
-
-	level := br.ReadBits(8)
-	if level < '1' || level > '9' {
-		return StructuralError("invalid compression level")
-	}
-
-	bz2.fileCRC = 0
-	bz2.blockSize = 100 * 1024 * (int(level) - '0')
-	if bz2.blockSize > len(bz2.tt) {
-		bz2.tt = make([]uint32, bz2.blockSize)
-	}
-	return nil
-}
-
-func (bz2 *reader) Read(buf []byte) (n int, err error) {
-	if bz2.eof {
-		return 0, io.EOF
-	}
-
-	if !bz2.setupDone {
-		err = bz2.setup(true)
-		brErr := bz2.br.Err()
-		if brErr != nil {
-			err = brErr
-		}
-		if err != nil {
-			return 0, err
-		}
-		bz2.setupDone = true
-	}
-
-	n, err = bz2.read(buf)
-	brErr := bz2.br.Err()
-	if brErr != nil {
-		err = brErr
-	}
-	return
-}
-
-func (bz2 *reader) readFromBlock(buf []byte) int {
-	// bzip2 is a block based compressor, except that it has a run-length
-	// preprocessing step. The block based nature means that we can
-	// preallocate fixed-size buffers and reuse them. However, the RLE
-	// preprocessing would require allocating huge buffers to store the
-	// maximum expansion. Thus we process blocks all at once, except for
-	// the RLE which we decompress as required.
-	n := 0
-	for (bz2.repeats > 0 || bz2.preRLEUsed < len(bz2.preRLE)) && n < len(buf) {
-		// We have RLE data pending.
-
-		// The run-length encoding works like this:
-		// Any sequence of four equal bytes is followed by a length
-		// byte which contains the number of repeats of that byte to
-		// include. (The number of repeats can be zero.) Because we are
-		// decompressing on-demand our state is kept in the reader
-		// object.
-
-		if bz2.repeats > 0 {
-			buf[n] = byte(bz2.lastByte)
-			n++
-			bz2.repeats--
-			if bz2.repeats == 0 {
-				bz2.lastByte = -1
-			}
-			continue
-		}
-
-		bz2.tPos = bz2.preRLE[bz2.tPos]
-		b := byte(bz2.tPos)
-		bz2.tPos >>= 8
-		bz2.preRLEUsed++
-
-		if bz2.byteRepeats == 3 {
-			bz2.repeats = uint(b)
-			bz2.byteRepeats = 0
-			continue
-		}
-
-		if bz2.lastByte == int(b) {
-			bz2.byteRepeats++
-		} else {
-			bz2.byteRepeats = 0
-		}
-		bz2.lastByte = int(b)
-
-		buf[n] = b
-		n++
-	}
-
-	return n
-}
-
-func (bz2 *reader) read(buf []byte) (int, error) {
-	for {
-		n := bz2.readFromBlock(buf)
-		if n > 0 {
-			bz2.blockCRC = updateCRC(bz2.blockCRC, buf[:n])
-			return n, nil
-		}
-
-		// End of block. Check CRC.
-		if bz2.blockCRC != bz2.wantBlockCRC {
-			bz2.br.err = StructuralError("block checksum mismatch")
-			return 0, bz2.br.err
-		}
-
-		// Find next block.
-		br := &bz2.br
-		switch br.ReadBits64(48) {
-		default:
-			return 0, StructuralError("bad magic value found")
-
-		case bzip2BlockMagic:
-			// Start of block.
-			err := bz2.readBlock()
-			if err != nil {
-				return 0, err
-			}
-
-		case bzip2FinalMagic:
-			// Check end-of-file CRC.
-			wantFileCRC := uint32(br.ReadBits64(32))
-			if br.err != nil {
-				return 0, br.err
-			}
-			if bz2.fileCRC != wantFileCRC {
-				br.err = StructuralError("file checksum mismatch")
-				return 0, br.err
-			}
-
-			// Skip ahead to byte boundary.
-			// Is there a file concatenated to this one?
-			// It would start with BZ.
-			if br.bits%8 != 0 {
-				br.ReadBits(br.bits % 8)
-			}
-			b, err := br.r.ReadByte()
-			if err == io.EOF {
-				br.err = io.EOF
-				bz2.eof = true
-				return 0, io.EOF
-			}
-			if err != nil {
-				br.err = err
-				return 0, err
-			}
-			z, err := br.r.ReadByte()
-			if err != nil {
-				if err == io.EOF {
-					err = io.ErrUnexpectedEOF
-				}
-				br.err = err
-				return 0, err
-			}
-			if b != 'B' || z != 'Z' {
-				return 0, StructuralError("bad magic value in continuation file")
-			}
-			if err := bz2.setup(false); err != nil {
-				return 0, err
-			}
-		}
-	}
-}
-
-// readBlock reads a bzip2 block. The magic number should already have been consumed.
-func (bz2 *reader) readBlock() (err error) {
-	br := &bz2.br
-	bz2.wantBlockCRC = uint32(br.ReadBits64(32)) // skip checksum. TODO: check it if we can figure out what it is.
-	bz2.blockCRC = 0
-	bz2.fileCRC = (bz2.fileCRC<<1 | bz2.fileCRC>>31) ^ bz2.wantBlockCRC
-	randomized := br.ReadBits(1)
-	if randomized != 0 {
-		return StructuralError("deprecated randomized files")
-	}
-	origPtr := uint(br.ReadBits(24))
-
-	// If not every byte value is used in the block (i.e., it's text) then
-	// the symbol set is reduced. The symbols used are stored as a
-	// two-level, 16x16 bitmap.
-	symbolRangeUsedBitmap := br.ReadBits(16)
-	symbolPresent := make([]bool, 256)
-	numSymbols := 0
-	for symRange := uint(0); symRange < 16; symRange++ {
-		if symbolRangeUsedBitmap&(1<<(15-symRange)) != 0 {
-			bits := br.ReadBits(16)
-			for symbol := uint(0); symbol < 16; symbol++ {
-				if bits&(1<<(15-symbol)) != 0 {
-					symbolPresent[16*symRange+symbol] = true
-					numSymbols++
-				}
-			}
-		}
-	}
-
-	// A block uses between two and six different Huffman trees.
-	numHuffmanTrees := br.ReadBits(3)
-	if numHuffmanTrees < 2 || numHuffmanTrees > 6 {
-		return StructuralError("invalid number of Huffman trees")
-	}
-
-	// The Huffman tree can switch every 50 symbols so there's a list of
-	// tree indexes telling us which tree to use for each 50 symbol block.
-	numSelectors := br.ReadBits(15)
-	treeIndexes := make([]uint8, numSelectors)
-
-	// The tree indexes are move-to-front transformed and stored as unary
-	// numbers.
-	mtfTreeDecoder := newMTFDecoderWithRange(numHuffmanTrees)
-	for i := range treeIndexes {
-		c := 0
-		for {
-			inc := br.ReadBits(1)
-			if inc == 0 {
-				break
-			}
-			c++
-		}
-		if c >= numHuffmanTrees {
-			return StructuralError("tree index too large")
-		}
-		treeIndexes[i] = uint8(mtfTreeDecoder.Decode(c))
-	}
-
-	// The list of symbols for the move-to-front transform is taken from
-	// the previously decoded symbol bitmap.
-	symbols := make([]byte, numSymbols)
-	nextSymbol := 0
-	for i := 0; i < 256; i++ {
-		if symbolPresent[i] {
-			symbols[nextSymbol] = byte(i)
-			nextSymbol++
-		}
-	}
-	mtf := newMTFDecoder(symbols)
-
-	numSymbols += 2 // to account for RUNA and RUNB symbols
-	huffmanTrees := make([]huffmanTree, numHuffmanTrees)
-
-	// Now we decode the arrays of code-lengths for each tree.
-	lengths := make([]uint8, numSymbols)
-	for i := 0; i < numHuffmanTrees; i++ {
-		// The code lengths are delta encoded from a 5-bit base value.
-		length := br.ReadBits(5)
-		for j := 0; j < numSymbols; j++ {
-			for {
-				if !br.ReadBit() {
-					break
-				}
-				if br.ReadBit() {
-					length--
-				} else {
-					length++
-				}
-			}
-			if length < 0 || length > 20 {
-				return StructuralError("Huffman length out of range")
-			}
-			lengths[j] = uint8(length)
-		}
-		huffmanTrees[i], err = newHuffmanTree(lengths)
-		if err != nil {
-			return err
-		}
-	}
-
-	selectorIndex := 1 // the next tree index to use
-	currentHuffmanTree := huffmanTrees[treeIndexes[0]]
-	bufIndex := 0 // indexes bz2.buf, the output buffer.
-	// The output of the move-to-front transform is run-length encoded and
-	// we merge the decoding into the Huffman parsing loop. These two
-	// variables accumulate the repeat count. See the Wikipedia page for
-	// details.
-	repeat := 0
-	repeat_power := 0
-
-	// The `C' array (used by the inverse BWT) needs to be zero initialized.
-	for i := range bz2.c {
-		bz2.c[i] = 0
-	}
-
-	decoded := 0 // counts the number of symbols decoded by the current tree.
-	for {
-		if decoded == 50 {
-			currentHuffmanTree = huffmanTrees[treeIndexes[selectorIndex]]
-			selectorIndex++
-			decoded = 0
-		}
-
-		v := currentHuffmanTree.Decode(br)
-		decoded++
-
-		if v < 2 {
-			// This is either the RUNA or RUNB symbol.
-			if repeat == 0 {
-				repeat_power = 1
-			}
-			repeat += repeat_power << v
-			repeat_power <<= 1
-
-			// This limit of 2 million comes from the bzip2 source
-			// code. It prevents repeat from overflowing.
-			if repeat > 2*1024*1024 {
-				return StructuralError("repeat count too large")
-			}
-			continue
-		}
-
-		if repeat > 0 {
-			// We have decoded a complete run-length so we need to
-			// replicate the last output symbol.
-			if repeat > bz2.blockSize-bufIndex {
-				return StructuralError("repeats past end of block")
-			}
-			for i := 0; i < repeat; i++ {
-				b := byte(mtf.First())
-				bz2.tt[bufIndex] = uint32(b)
-				bz2.c[b]++
-				bufIndex++
-			}
-			repeat = 0
-		}
-
-		if int(v) == numSymbols-1 {
-			// This is the EOF symbol. Because it's always at the
-			// end of the move-to-front list, and never gets moved
-			// to the front, it has this unique value.
-			break
-		}
-
-		// Since two metasymbols (RUNA and RUNB) have values 0 and 1,
-		// one would expect |v-2| to be passed to the MTF decoder.
-		// However, the front of the MTF list is never referenced as 0,
-		// it's always referenced with a run-length of 1. Thus 0
-		// doesn't need to be encoded and we have |v-1| in the next
-		// line.
-		b := byte(mtf.Decode(int(v - 1)))
-		if bufIndex >= bz2.blockSize {
-			return StructuralError("data exceeds block size")
-		}
-		bz2.tt[bufIndex] = uint32(b)
-		bz2.c[b]++
-		bufIndex++
-	}
-
-	if origPtr >= uint(bufIndex) {
-		return StructuralError("origPtr out of bounds")
-	}
-
-	// We have completed the entropy decoding. Now we can perform the
-	// inverse BWT and setup the RLE buffer.
-	bz2.preRLE = bz2.tt[:bufIndex]
-	bz2.preRLEUsed = 0
-	bz2.tPos = inverseBWT(bz2.preRLE, origPtr, bz2.c[:])
-	bz2.lastByte = -1
-	bz2.byteRepeats = 0
-	bz2.repeats = 0
-
-	return nil
-}
-
-// inverseBWT implements the inverse Burrows-Wheeler transform as described in
-// http://www.hpl.hp.com/techreports/Compaq-DEC/SRC-RR-124.pdf, section 4.2.
-// In that document, origPtr is called `I' and c is the `C' array after the
-// first pass over the data. It's an argument here because we merge the first
-// pass with the Huffman decoding.
-//
-// This also implements the `single array' method from the bzip2 source code
-// which leaves the output, still shuffled, in the bottom 8 bits of tt with the
-// index of the next byte in the top 24-bits. The index of the first byte is
-// returned.
-func inverseBWT(tt []uint32, origPtr uint, c []uint) uint32 {
-	sum := uint(0)
-	for i := 0; i < 256; i++ {
-		sum += c[i]
-		c[i] = sum - c[i]
-	}
-
-	for i := range tt {
-		b := tt[i] & 0xff
-		tt[c[b]] |= uint32(i) << 8
-		c[b]++
-	}
-
-	return tt[origPtr] >> 8
-}
-
-// This is a standard CRC32 like in hash/crc32 except that all the shifts are reversed,
-// causing the bits in the input to be processed in the reverse of the usual order.
-
-var crctab [256]uint32
-
-func init() {
-	const poly = 0x04C11DB7
-	for i := range crctab {
-		crc := uint32(i) << 24
-		for j := 0; j < 8; j++ {
-			if crc&0x80000000 != 0 {
-				crc = (crc << 1) ^ poly
-			} else {
-				crc <<= 1
-			}
-		}
-		crctab[i] = crc
-	}
-}
-
-// updateCRC updates the crc value to incorporate the data in b.
-// The initial value is 0.
-func updateCRC(val uint32, b []byte) uint32 {
-	crc := ^val
-	for _, v := range b {
-		crc = crctab[byte(crc>>24)^v] ^ (crc << 8)
-	}
-	return ^crc
-}
author	Tianon Gravi <admwiggin@gmail.com>	2015-01-15 11:54:00 -0700
committer	Tianon Gravi <admwiggin@gmail.com>	2015-01-15 11:54:00 -0700
commit	f154da9e12608589e8d5f0508f908a0c3e88a1bb (patch)
tree	f8255d51e10c6f1e0ed69702200b966c9556a431 /src/pkg/compress/bzip2/bzip2.go
parent	8d8329ed5dfb9622c82a9fbec6fd99a580f9c9f6 (diff)
download	golang-upstream/1.4.tar.gz