diff options
author | Tianon Gravi <admwiggin@gmail.com> | 2015-01-15 11:54:00 -0700 |
---|---|---|
committer | Tianon Gravi <admwiggin@gmail.com> | 2015-01-15 11:54:00 -0700 |
commit | f154da9e12608589e8d5f0508f908a0c3e88a1bb (patch) | |
tree | f8255d51e10c6f1e0ed69702200b966c9556a431 /src/pkg/compress/bzip2/huffman.go | |
parent | 8d8329ed5dfb9622c82a9fbec6fd99a580f9c9f6 (diff) | |
download | golang-upstream/1.4.tar.gz |
Imported Upstream version 1.4upstream/1.4
Diffstat (limited to 'src/pkg/compress/bzip2/huffman.go')
-rw-r--r-- | src/pkg/compress/bzip2/huffman.go | 251 |
1 files changed, 0 insertions, 251 deletions
diff --git a/src/pkg/compress/bzip2/huffman.go b/src/pkg/compress/bzip2/huffman.go deleted file mode 100644 index 75a6223d8..000000000 --- a/src/pkg/compress/bzip2/huffman.go +++ /dev/null @@ -1,251 +0,0 @@ -// Copyright 2011 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -package bzip2 - -import "sort" - -// A huffmanTree is a binary tree which is navigated, bit-by-bit to reach a -// symbol. -type huffmanTree struct { - // nodes contains all the non-leaf nodes in the tree. nodes[0] is the - // root of the tree and nextNode contains the index of the next element - // of nodes to use when the tree is being constructed. - nodes []huffmanNode - nextNode int -} - -// A huffmanNode is a node in the tree. left and right contain indexes into the -// nodes slice of the tree. If left or right is invalidNodeValue then the child -// is a left node and its value is in leftValue/rightValue. -// -// The symbols are uint16s because bzip2 encodes not only MTF indexes in the -// tree, but also two magic values for run-length encoding and an EOF symbol. -// Thus there are more than 256 possible symbols. -type huffmanNode struct { - left, right uint16 - leftValue, rightValue uint16 -} - -// invalidNodeValue is an invalid index which marks a leaf node in the tree. -const invalidNodeValue = 0xffff - -// Decode reads bits from the given bitReader and navigates the tree until a -// symbol is found. -func (t *huffmanTree) Decode(br *bitReader) (v uint16) { - nodeIndex := uint16(0) // node 0 is the root of the tree. - - for { - node := &t.nodes[nodeIndex] - bit, ok := br.TryReadBit() - if !ok && br.ReadBit() { - bit = 1 - } - // bzip2 encodes left as a true bit. - if bit != 0 { - // left - if node.left == invalidNodeValue { - return node.leftValue - } - nodeIndex = node.left - } else { - // right - if node.right == invalidNodeValue { - return node.rightValue - } - nodeIndex = node.right - } - } -} - -// newHuffmanTree builds a Huffman tree from a slice containing the code -// lengths of each symbol. The maximum code length is 32 bits. -func newHuffmanTree(lengths []uint8) (huffmanTree, error) { - // There are many possible trees that assign the same code length to - // each symbol (consider reflecting a tree down the middle, for - // example). Since the code length assignments determine the - // efficiency of the tree, each of these trees is equally good. In - // order to minimize the amount of information needed to build a tree - // bzip2 uses a canonical tree so that it can be reconstructed given - // only the code length assignments. - - if len(lengths) < 2 { - panic("newHuffmanTree: too few symbols") - } - - var t huffmanTree - - // First we sort the code length assignments by ascending code length, - // using the symbol value to break ties. - pairs := huffmanSymbolLengthPairs(make([]huffmanSymbolLengthPair, len(lengths))) - for i, length := range lengths { - pairs[i].value = uint16(i) - pairs[i].length = length - } - - sort.Sort(pairs) - - // Now we assign codes to the symbols, starting with the longest code. - // We keep the codes packed into a uint32, at the most-significant end. - // So branches are taken from the MSB downwards. This makes it easy to - // sort them later. - code := uint32(0) - length := uint8(32) - - codes := huffmanCodes(make([]huffmanCode, len(lengths))) - for i := len(pairs) - 1; i >= 0; i-- { - if length > pairs[i].length { - // If the code length decreases we shift in order to - // zero any bits beyond the end of the code. - length >>= 32 - pairs[i].length - length <<= 32 - pairs[i].length - length = pairs[i].length - } - codes[i].code = code - codes[i].codeLen = length - codes[i].value = pairs[i].value - // We need to 'increment' the code, which means treating |code| - // like a |length| bit number. - code += 1 << (32 - length) - } - - // Now we can sort by the code so that the left half of each branch are - // grouped together, recursively. - sort.Sort(codes) - - t.nodes = make([]huffmanNode, len(codes)) - _, err := buildHuffmanNode(&t, codes, 0) - return t, err -} - -// huffmanSymbolLengthPair contains a symbol and its code length. -type huffmanSymbolLengthPair struct { - value uint16 - length uint8 -} - -// huffmanSymbolLengthPair is used to provide an interface for sorting. -type huffmanSymbolLengthPairs []huffmanSymbolLengthPair - -func (h huffmanSymbolLengthPairs) Len() int { - return len(h) -} - -func (h huffmanSymbolLengthPairs) Less(i, j int) bool { - if h[i].length < h[j].length { - return true - } - if h[i].length > h[j].length { - return false - } - if h[i].value < h[j].value { - return true - } - return false -} - -func (h huffmanSymbolLengthPairs) Swap(i, j int) { - h[i], h[j] = h[j], h[i] -} - -// huffmanCode contains a symbol, its code and code length. -type huffmanCode struct { - code uint32 - codeLen uint8 - value uint16 -} - -// huffmanCodes is used to provide an interface for sorting. -type huffmanCodes []huffmanCode - -func (n huffmanCodes) Len() int { - return len(n) -} - -func (n huffmanCodes) Less(i, j int) bool { - return n[i].code < n[j].code -} - -func (n huffmanCodes) Swap(i, j int) { - n[i], n[j] = n[j], n[i] -} - -// buildHuffmanNode takes a slice of sorted huffmanCodes and builds a node in -// the Huffman tree at the given level. It returns the index of the newly -// constructed node. -func buildHuffmanNode(t *huffmanTree, codes []huffmanCode, level uint32) (nodeIndex uint16, err error) { - test := uint32(1) << (31 - level) - - // We have to search the list of codes to find the divide between the left and right sides. - firstRightIndex := len(codes) - for i, code := range codes { - if code.code&test != 0 { - firstRightIndex = i - break - } - } - - left := codes[:firstRightIndex] - right := codes[firstRightIndex:] - - if len(left) == 0 || len(right) == 0 { - // There is a superfluous level in the Huffman tree indicating - // a bug in the encoder. However, this bug has been observed in - // the wild so we handle it. - - // If this function was called recursively then we know that - // len(codes) >= 2 because, otherwise, we would have hit the - // "leaf node" case, below, and not recursed. - // - // However, for the initial call it's possible that len(codes) - // is zero or one. Both cases are invalid because a zero length - // tree cannot encode anything and a length-1 tree can only - // encode EOF and so is superfluous. We reject both. - if len(codes) < 2 { - return 0, StructuralError("empty Huffman tree") - } - - // In this case the recursion doesn't always reduce the length - // of codes so we need to ensure termination via another - // mechanism. - if level == 31 { - // Since len(codes) >= 2 the only way that the values - // can match at all 32 bits is if they are equal, which - // is invalid. This ensures that we never enter - // infinite recursion. - return 0, StructuralError("equal symbols in Huffman tree") - } - - if len(left) == 0 { - return buildHuffmanNode(t, right, level+1) - } - return buildHuffmanNode(t, left, level+1) - } - - nodeIndex = uint16(t.nextNode) - node := &t.nodes[t.nextNode] - t.nextNode++ - - if len(left) == 1 { - // leaf node - node.left = invalidNodeValue - node.leftValue = left[0].value - } else { - node.left, err = buildHuffmanNode(t, left, level+1) - } - - if err != nil { - return - } - - if len(right) == 1 { - // leaf node - node.right = invalidNodeValue - node.rightValue = right[0].value - } else { - node.right, err = buildHuffmanNode(t, right, level+1) - } - - return -} |