diff options
Diffstat (limited to 'src/pkg/compress')
-rw-r--r-- | src/pkg/compress/bzip2/bzip2.go | 4 | ||||
-rw-r--r-- | src/pkg/compress/bzip2/huffman.go | 2 | ||||
-rw-r--r-- | src/pkg/compress/flate/deflate.go | 478 | ||||
-rw-r--r-- | src/pkg/compress/flate/deflate_test.go | 15 | ||||
-rw-r--r-- | src/pkg/compress/flate/huffman_bit_writer.go | 94 | ||||
-rw-r--r-- | src/pkg/compress/flate/huffman_code.go | 7 | ||||
-rw-r--r-- | src/pkg/compress/flate/inflate.go | 249 | ||||
-rw-r--r-- | src/pkg/compress/gzip/gzip_test.go | 2 | ||||
-rw-r--r-- | src/pkg/compress/lzw/reader.go | 225 | ||||
-rw-r--r-- | src/pkg/compress/lzw/writer_test.go | 8 | ||||
-rw-r--r-- | src/pkg/compress/zlib/writer.go | 2 | ||||
-rw-r--r-- | src/pkg/compress/zlib/writer_test.go | 95 |
12 files changed, 612 insertions, 569 deletions
diff --git a/src/pkg/compress/bzip2/bzip2.go b/src/pkg/compress/bzip2/bzip2.go index 9e97edec1..8b4572306 100644 --- a/src/pkg/compress/bzip2/bzip2.go +++ b/src/pkg/compress/bzip2/bzip2.go @@ -284,7 +284,7 @@ func (bz2 *reader) readBlock() (err os.Error) { repeat := 0 repeat_power := 0 - // The `C' array (used by the inverse BWT) needs to be zero initialised. + // The `C' array (used by the inverse BWT) needs to be zero initialized. for i := range bz2.c { bz2.c[i] = 0 } @@ -330,7 +330,7 @@ func (bz2 *reader) readBlock() (err os.Error) { if int(v) == numSymbols-1 { // This is the EOF symbol. Because it's always at the - // end of the move-to-front list, and nevers gets moved + // end of the move-to-front list, and never gets moved // to the front, it has this unique value. break } diff --git a/src/pkg/compress/bzip2/huffman.go b/src/pkg/compress/bzip2/huffman.go index 732bc4a21..dc05739c7 100644 --- a/src/pkg/compress/bzip2/huffman.go +++ b/src/pkg/compress/bzip2/huffman.go @@ -68,7 +68,7 @@ func newHuffmanTree(lengths []uint8) (huffmanTree, os.Error) { // each symbol (consider reflecting a tree down the middle, for // example). Since the code length assignments determine the // efficiency of the tree, each of these trees is equally good. In - // order to minimise the amount of information needed to build a tree + // order to minimize the amount of information needed to build a tree // bzip2 uses a canonical tree so that it can be reconstructed given // only the code length assignments. diff --git a/src/pkg/compress/flate/deflate.go b/src/pkg/compress/flate/deflate.go index a02a5e8d9..b1cee0b2f 100644 --- a/src/pkg/compress/flate/deflate.go +++ b/src/pkg/compress/flate/deflate.go @@ -11,16 +11,18 @@ import ( ) const ( - NoCompression = 0 - BestSpeed = 1 - fastCompression = 3 - BestCompression = 9 - DefaultCompression = -1 - logMaxOffsetSize = 15 // Standard DEFLATE - wideLogMaxOffsetSize = 22 // Wide DEFLATE - minMatchLength = 3 // The smallest match that the compressor looks for - maxMatchLength = 258 // The longest match for the compressor - minOffsetSize = 1 // The shortest offset that makes any sence + NoCompression = 0 + BestSpeed = 1 + fastCompression = 3 + BestCompression = 9 + DefaultCompression = -1 + logWindowSize = 15 + windowSize = 1 << logWindowSize + windowMask = windowSize - 1 + logMaxOffsetSize = 15 // Standard DEFLATE + minMatchLength = 3 // The smallest match that the compressor looks for + maxMatchLength = 258 // The longest match for the compressor + minOffsetSize = 1 // The shortest offset that makes any sence // The maximum number of tokens we put into a single flat block, just too // stop things from getting too large. @@ -32,22 +34,6 @@ const ( hashShift = (hashBits + minMatchLength - 1) / minMatchLength ) -type syncPipeReader struct { - *io.PipeReader - closeChan chan bool -} - -func (sr *syncPipeReader) CloseWithError(err os.Error) os.Error { - retErr := sr.PipeReader.CloseWithError(err) - sr.closeChan <- true // finish writer close - return retErr -} - -type syncPipeWriter struct { - *io.PipeWriter - closeChan chan bool -} - type compressionLevel struct { good, lazy, nice, chain, fastSkipHashing int } @@ -68,105 +54,73 @@ var levels = []compressionLevel{ {32, 258, 258, 4096, math.MaxInt32}, } -func (sw *syncPipeWriter) Close() os.Error { - err := sw.PipeWriter.Close() - <-sw.closeChan // wait for reader close - return err -} - -func syncPipe() (*syncPipeReader, *syncPipeWriter) { - r, w := io.Pipe() - sr := &syncPipeReader{r, make(chan bool, 1)} - sw := &syncPipeWriter{w, sr.closeChan} - return sr, sw -} - type compressor struct { - level int - logWindowSize uint - w *huffmanBitWriter - r io.Reader - // (1 << logWindowSize) - 1. - windowMask int + compressionLevel - eof bool // has eof been reached on input? - sync bool // writer wants to flush - syncChan chan os.Error + w *huffmanBitWriter - // hashHead[hashValue] contains the largest inputIndex with the specified hash value - hashHead []int + // compression algorithm + fill func(*compressor, []byte) int // copy data to window + step func(*compressor) // process window + sync bool // requesting flush + // Input hash chains + // hashHead[hashValue] contains the largest inputIndex with the specified hash value // If hashHead[hashValue] is within the current window, then // hashPrev[hashHead[hashValue] & windowMask] contains the previous index // with the same hash value. - hashPrev []int - - // If we find a match of length >= niceMatch, then we don't bother searching - // any further. - niceMatch int - - // If we find a match of length >= goodMatch, we only do a half-hearted - // effort at doing lazy matching starting at the next character - goodMatch int - - // The maximum number of chains we look at when finding a match - maxChainLength int - - // The sliding window we use for matching - window []byte - - // The index just past the last valid character - windowEnd int - - // index in "window" at which current block starts - blockStart int -} - -func (d *compressor) flush() os.Error { - d.w.flush() - return d.w.err + chainHead int + hashHead []int + hashPrev []int + + // input window: unprocessed data is window[index:windowEnd] + index int + window []byte + windowEnd int + blockStart int // window index where current tokens start + byteAvailable bool // if true, still need to process window[index-1]. + + // queued output tokens: tokens[:ti] + tokens []token + ti int + + // deflate state + length int + offset int + hash int + maxInsertIndex int + err os.Error } -func (d *compressor) fillWindow(index int) (int, os.Error) { - if d.sync { - return index, nil - } - wSize := d.windowMask + 1 - if index >= wSize+wSize-(minMatchLength+maxMatchLength) { - // shift the window by wSize - copy(d.window, d.window[wSize:2*wSize]) - index -= wSize - d.windowEnd -= wSize - if d.blockStart >= wSize { - d.blockStart -= wSize +func (d *compressor) fillDeflate(b []byte) int { + if d.index >= 2*windowSize-(minMatchLength+maxMatchLength) { + // shift the window by windowSize + copy(d.window, d.window[windowSize:2*windowSize]) + d.index -= windowSize + d.windowEnd -= windowSize + if d.blockStart >= windowSize { + d.blockStart -= windowSize } else { d.blockStart = math.MaxInt32 } for i, h := range d.hashHead { - v := h - wSize + v := h - windowSize if v < -1 { v = -1 } d.hashHead[i] = v } for i, h := range d.hashPrev { - v := -h - wSize + v := -h - windowSize if v < -1 { v = -1 } d.hashPrev[i] = v } } - count, err := d.r.Read(d.window[d.windowEnd:]) - d.windowEnd += count - if count == 0 && err == nil { - d.sync = true - } - if err == os.EOF { - d.eof = true - err = nil - } - return index, err + n := copy(d.window[d.windowEnd:], b) + d.windowEnd += n + return n } func (d *compressor) writeBlock(tokens []token, index int, eof bool) os.Error { @@ -194,21 +148,21 @@ func (d *compressor) findMatch(pos int, prevHead int, prevLength int, lookahead // We quit when we get a match that's at least nice long nice := len(win) - pos - if d.niceMatch < nice { - nice = d.niceMatch + if d.nice < nice { + nice = d.nice } // If we've got a match that's good enough, only look in 1/4 the chain. - tries := d.maxChainLength + tries := d.chain length = prevLength - if length >= d.goodMatch { + if length >= d.good { tries >>= 2 } w0 := win[pos] w1 := win[pos+1] wEnd := win[pos+length] - minIndex := pos - (d.windowMask + 1) + minIndex := pos - windowSize for i := prevHead; tries > 0; tries-- { if w0 == win[i] && w1 == win[i+1] && wEnd == win[i+length] { @@ -233,7 +187,7 @@ func (d *compressor) findMatch(pos int, prevHead int, prevLength int, lookahead // hashPrev[i & windowMask] has already been overwritten, so stop now. break } - if i = d.hashPrev[i&d.windowMask]; i < minIndex || i < 0 { + if i = d.hashPrev[i&windowMask]; i < minIndex || i < 0 { break } } @@ -248,234 +202,224 @@ func (d *compressor) writeStoredBlock(buf []byte) os.Error { return d.w.err } -func (d *compressor) storedDeflate() os.Error { - buf := make([]byte, maxStoreBlockSize) - for { - n, err := d.r.Read(buf) - if n == 0 && err == nil { - d.sync = true - } - if n > 0 || d.sync { - if err := d.writeStoredBlock(buf[0:n]); err != nil { - return err - } - if d.sync { - d.syncChan <- nil - d.sync = false - } - } - if err != nil { - if err == os.EOF { - break - } - return err - } - } - return nil -} - -func (d *compressor) doDeflate() (err os.Error) { - // init - d.windowMask = 1<<d.logWindowSize - 1 +func (d *compressor) initDeflate() { d.hashHead = make([]int, hashSize) - d.hashPrev = make([]int, 1<<d.logWindowSize) - d.window = make([]byte, 2<<d.logWindowSize) + d.hashPrev = make([]int, windowSize) + d.window = make([]byte, 2*windowSize) fillInts(d.hashHead, -1) - tokens := make([]token, maxFlateBlockTokens, maxFlateBlockTokens+1) - l := levels[d.level] - d.goodMatch = l.good - d.niceMatch = l.nice - d.maxChainLength = l.chain - lazyMatch := l.lazy - length := minMatchLength - 1 - offset := 0 - byteAvailable := false - isFastDeflate := l.fastSkipHashing != 0 - index := 0 - // run - if index, err = d.fillWindow(index); err != nil { + d.tokens = make([]token, maxFlateBlockTokens, maxFlateBlockTokens+1) + d.length = minMatchLength - 1 + d.offset = 0 + d.byteAvailable = false + d.index = 0 + d.ti = 0 + d.hash = 0 + d.chainHead = -1 +} + +func (d *compressor) deflate() { + if d.windowEnd-d.index < minMatchLength+maxMatchLength && !d.sync { return } - maxOffset := d.windowMask + 1 // (1 << logWindowSize); - // only need to change when you refill the window - windowEnd := d.windowEnd - maxInsertIndex := windowEnd - (minMatchLength - 1) - ti := 0 - - hash := int(0) - if index < maxInsertIndex { - hash = int(d.window[index])<<hashShift + int(d.window[index+1]) + + d.maxInsertIndex = d.windowEnd - (minMatchLength - 1) + if d.index < d.maxInsertIndex { + d.hash = int(d.window[d.index])<<hashShift + int(d.window[d.index+1]) } - chainHead := -1 + Loop: for { - if index > windowEnd { + if d.index > d.windowEnd { panic("index > windowEnd") } - lookahead := windowEnd - index + lookahead := d.windowEnd - d.index if lookahead < minMatchLength+maxMatchLength { - if index, err = d.fillWindow(index); err != nil { - return + if !d.sync { + break Loop } - windowEnd = d.windowEnd - if index > windowEnd { + if d.index > d.windowEnd { panic("index > windowEnd") } - maxInsertIndex = windowEnd - (minMatchLength - 1) - lookahead = windowEnd - index if lookahead == 0 { // Flush current output block if any. - if byteAvailable { + if d.byteAvailable { // There is still one pending token that needs to be flushed - tokens[ti] = literalToken(uint32(d.window[index-1]) & 0xFF) - ti++ - byteAvailable = false + d.tokens[d.ti] = literalToken(uint32(d.window[d.index-1])) + d.ti++ + d.byteAvailable = false } - if ti > 0 { - if err = d.writeBlock(tokens[0:ti], index, false); err != nil { + if d.ti > 0 { + if d.err = d.writeBlock(d.tokens[0:d.ti], d.index, false); d.err != nil { return } - ti = 0 - } - if d.sync { - d.w.writeStoredHeader(0, false) - d.w.flush() - d.syncChan <- d.w.err - d.sync = false - } - - // If this was only a sync (not at EOF) keep going. - if !d.eof { - continue + d.ti = 0 } break Loop } } - if index < maxInsertIndex { + if d.index < d.maxInsertIndex { // Update the hash - hash = (hash<<hashShift + int(d.window[index+2])) & hashMask - chainHead = d.hashHead[hash] - d.hashPrev[index&d.windowMask] = chainHead - d.hashHead[hash] = index + d.hash = (d.hash<<hashShift + int(d.window[d.index+2])) & hashMask + d.chainHead = d.hashHead[d.hash] + d.hashPrev[d.index&windowMask] = d.chainHead + d.hashHead[d.hash] = d.index } - prevLength := length - prevOffset := offset - length = minMatchLength - 1 - offset = 0 - minIndex := index - maxOffset + prevLength := d.length + prevOffset := d.offset + d.length = minMatchLength - 1 + d.offset = 0 + minIndex := d.index - windowSize if minIndex < 0 { minIndex = 0 } - if chainHead >= minIndex && - (isFastDeflate && lookahead > minMatchLength-1 || - !isFastDeflate && lookahead > prevLength && prevLength < lazyMatch) { - if newLength, newOffset, ok := d.findMatch(index, chainHead, minMatchLength-1, lookahead); ok { - length = newLength - offset = newOffset + if d.chainHead >= minIndex && + (d.fastSkipHashing != 0 && lookahead > minMatchLength-1 || + d.fastSkipHashing == 0 && lookahead > prevLength && prevLength < d.lazy) { + if newLength, newOffset, ok := d.findMatch(d.index, d.chainHead, minMatchLength-1, lookahead); ok { + d.length = newLength + d.offset = newOffset } } - if isFastDeflate && length >= minMatchLength || - !isFastDeflate && prevLength >= minMatchLength && length <= prevLength { + if d.fastSkipHashing != 0 && d.length >= minMatchLength || + d.fastSkipHashing == 0 && prevLength >= minMatchLength && d.length <= prevLength { // There was a match at the previous step, and the current match is // not better. Output the previous match. - if isFastDeflate { - tokens[ti] = matchToken(uint32(length-minMatchLength), uint32(offset-minOffsetSize)) + if d.fastSkipHashing != 0 { + d.tokens[d.ti] = matchToken(uint32(d.length-minMatchLength), uint32(d.offset-minOffsetSize)) } else { - tokens[ti] = matchToken(uint32(prevLength-minMatchLength), uint32(prevOffset-minOffsetSize)) + d.tokens[d.ti] = matchToken(uint32(prevLength-minMatchLength), uint32(prevOffset-minOffsetSize)) } - ti++ + d.ti++ // Insert in the hash table all strings up to the end of the match. // index and index-1 are already inserted. If there is not enough // lookahead, the last two strings are not inserted into the hash // table. - if length <= l.fastSkipHashing { + if d.length <= d.fastSkipHashing { var newIndex int - if isFastDeflate { - newIndex = index + length + if d.fastSkipHashing != 0 { + newIndex = d.index + d.length } else { newIndex = prevLength - 1 } - for index++; index < newIndex; index++ { - if index < maxInsertIndex { - hash = (hash<<hashShift + int(d.window[index+2])) & hashMask + for d.index++; d.index < newIndex; d.index++ { + if d.index < d.maxInsertIndex { + d.hash = (d.hash<<hashShift + int(d.window[d.index+2])) & hashMask // Get previous value with the same hash. // Our chain should point to the previous value. - d.hashPrev[index&d.windowMask] = d.hashHead[hash] + d.hashPrev[d.index&windowMask] = d.hashHead[d.hash] // Set the head of the hash chain to us. - d.hashHead[hash] = index + d.hashHead[d.hash] = d.index } } - if !isFastDeflate { - byteAvailable = false - length = minMatchLength - 1 + if d.fastSkipHashing == 0 { + d.byteAvailable = false + d.length = minMatchLength - 1 } } else { // For matches this long, we don't bother inserting each individual // item into the table. - index += length - hash = (int(d.window[index])<<hashShift + int(d.window[index+1])) + d.index += d.length + d.hash = (int(d.window[d.index])<<hashShift + int(d.window[d.index+1])) } - if ti == maxFlateBlockTokens { + if d.ti == maxFlateBlockTokens { // The block includes the current character - if err = d.writeBlock(tokens, index, false); err != nil { + if d.err = d.writeBlock(d.tokens, d.index, false); d.err != nil { return } - ti = 0 + d.ti = 0 } } else { - if isFastDeflate || byteAvailable { - i := index - 1 - if isFastDeflate { - i = index + if d.fastSkipHashing != 0 || d.byteAvailable { + i := d.index - 1 + if d.fastSkipHashing != 0 { + i = d.index } - tokens[ti] = literalToken(uint32(d.window[i]) & 0xFF) - ti++ - if ti == maxFlateBlockTokens { - if err = d.writeBlock(tokens, i+1, false); err != nil { + d.tokens[d.ti] = literalToken(uint32(d.window[i])) + d.ti++ + if d.ti == maxFlateBlockTokens { + if d.err = d.writeBlock(d.tokens, i+1, false); d.err != nil { return } - ti = 0 + d.ti = 0 } } - index++ - if !isFastDeflate { - byteAvailable = true + d.index++ + if d.fastSkipHashing == 0 { + d.byteAvailable = true } } } - return } -func (d *compressor) compress(r io.Reader, w io.Writer, level int, logWindowSize uint) (err os.Error) { - d.r = r +func (d *compressor) fillStore(b []byte) int { + n := copy(d.window[d.windowEnd:], b) + d.windowEnd += n + return n +} + +func (d *compressor) store() { + if d.windowEnd > 0 { + d.err = d.writeStoredBlock(d.window[:d.windowEnd]) + } + d.windowEnd = 0 +} + +func (d *compressor) write(b []byte) (n int, err os.Error) { + n = len(b) + b = b[d.fill(d, b):] + for len(b) > 0 { + d.step(d) + b = b[d.fill(d, b):] + } + return n, d.err +} + +func (d *compressor) syncFlush() os.Error { + d.sync = true + d.step(d) + if d.err == nil { + d.w.writeStoredHeader(0, false) + d.w.flush() + d.err = d.w.err + } + d.sync = false + return d.err +} + +func (d *compressor) init(w io.Writer, level int) (err os.Error) { d.w = newHuffmanBitWriter(w) - d.level = level - d.logWindowSize = logWindowSize switch { case level == NoCompression: - err = d.storedDeflate() + d.window = make([]byte, maxStoreBlockSize) + d.fill = (*compressor).fillStore + d.step = (*compressor).store case level == DefaultCompression: - d.level = 6 + level = 6 fallthrough case 1 <= level && level <= 9: - err = d.doDeflate() + d.compressionLevel = levels[level] + d.initDeflate() + d.fill = (*compressor).fillDeflate + d.step = (*compressor).deflate default: return WrongValueError{"level", 0, 9, int32(level)} } + return nil +} - if d.sync { - d.syncChan <- err - d.sync = false - } - if err != nil { - return err +func (d *compressor) close() os.Error { + d.sync = true + d.step(d) + if d.err != nil { + return d.err } if d.w.writeStoredHeader(0, true); d.w.err != nil { return d.w.err } - return d.flush() + d.w.flush() + return d.w.err } // NewWriter returns a new Writer compressing @@ -486,14 +430,9 @@ func (d *compressor) compress(r io.Reader, w io.Writer, level int, logWindowSize // compression; it only adds the necessary DEFLATE framing. func NewWriter(w io.Writer, level int) *Writer { const logWindowSize = logMaxOffsetSize - var d compressor - d.syncChan = make(chan os.Error, 1) - pr, pw := syncPipe() - go func() { - err := d.compress(pr, w, level, logWindowSize) - pr.CloseWithError(err) - }() - return &Writer{pw, &d} + var dw Writer + dw.d.init(w, level) + return &dw } // NewWriterDict is like NewWriter but initializes the new @@ -526,18 +465,13 @@ func (w *dictWriter) Write(b []byte) (n int, err os.Error) { // A Writer takes data written to it and writes the compressed // form of that data to an underlying writer (see NewWriter). type Writer struct { - w *syncPipeWriter - d *compressor + d compressor } // Write writes data to w, which will eventually write the // compressed form of data to its underlying writer. func (w *Writer) Write(data []byte) (n int, err os.Error) { - if len(data) == 0 { - // no point, and nil interferes with sync - return - } - return w.w.Write(data) + return w.d.write(data) } // Flush flushes any pending compressed data to the underlying writer. @@ -550,18 +484,10 @@ func (w *Writer) Write(data []byte) (n int, err os.Error) { func (w *Writer) Flush() os.Error { // For more about flushing: // http://www.bolet.org/~pornin/deflate-flush.html - if w.d.sync { - panic("compress/flate: double Flush") - } - _, err := w.w.Write(nil) - err1 := <-w.d.syncChan - if err == nil { - err = err1 - } - return err + return w.d.syncFlush() } // Close flushes and closes the writer. func (w *Writer) Close() os.Error { - return w.w.Close() + return w.d.close() } diff --git a/src/pkg/compress/flate/deflate_test.go b/src/pkg/compress/flate/deflate_test.go index 650a8059a..2ac811c38 100644 --- a/src/pkg/compress/flate/deflate_test.go +++ b/src/pkg/compress/flate/deflate_test.go @@ -57,7 +57,7 @@ var deflateInflateTests = []*deflateInflateTest{ &deflateInflateTest{[]byte{0x11, 0x12}}, &deflateInflateTest{[]byte{0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11}}, &deflateInflateTest{[]byte{0x11, 0x10, 0x13, 0x41, 0x21, 0x21, 0x41, 0x13, 0x87, 0x78, 0x13}}, - &deflateInflateTest{getLargeDataChunk()}, + &deflateInflateTest{largeDataChunk()}, } var reverseBitsTests = []*reverseBitsTest{ @@ -71,23 +71,22 @@ var reverseBitsTests = []*reverseBitsTest{ &reverseBitsTest{29, 5, 23}, } -func getLargeDataChunk() []byte { +func largeDataChunk() []byte { result := make([]byte, 100000) for i := range result { - result[i] = byte(int64(i) * int64(i) & 0xFF) + result[i] = byte(i * i & 0xFF) } return result } func TestDeflate(t *testing.T) { for _, h := range deflateTests { - buffer := bytes.NewBuffer(nil) - w := NewWriter(buffer, h.level) + var buf bytes.Buffer + w := NewWriter(&buf, h.level) w.Write(h.in) w.Close() - if bytes.Compare(buffer.Bytes(), h.out) != 0 { - t.Errorf("buffer is wrong; level = %v, buffer.Bytes() = %v, expected output = %v", - h.level, buffer.Bytes(), h.out) + if !bytes.Equal(buf.Bytes(), h.out) { + t.Errorf("Deflate(%d, %x) = %x, want %x", h.level, h.in, buf.Bytes(), h.out) } } } diff --git a/src/pkg/compress/flate/huffman_bit_writer.go b/src/pkg/compress/flate/huffman_bit_writer.go index abff82dd6..3981df5cb 100644 --- a/src/pkg/compress/flate/huffman_bit_writer.go +++ b/src/pkg/compress/flate/huffman_bit_writer.go @@ -15,9 +15,6 @@ const ( // The largest offset code. offsetCodeCount = 30 - // The largest offset code in the extensions. - extendedOffsetCodeCount = 42 - // The special code used to mark the end of a block. endBlockMarker = 256 @@ -100,11 +97,11 @@ func newHuffmanBitWriter(w io.Writer) *huffmanBitWriter { return &huffmanBitWriter{ w: w, literalFreq: make([]int32, maxLit), - offsetFreq: make([]int32, extendedOffsetCodeCount), - codegen: make([]uint8, maxLit+extendedOffsetCodeCount+1), + offsetFreq: make([]int32, offsetCodeCount), + codegen: make([]uint8, maxLit+offsetCodeCount+1), codegenFreq: make([]int32, codegenCodeCount), literalEncoding: newHuffmanEncoder(maxLit), - offsetEncoding: newHuffmanEncoder(extendedOffsetCodeCount), + offsetEncoding: newHuffmanEncoder(offsetCodeCount), codegenEncoding: newHuffmanEncoder(codegenCodeCount), } } @@ -185,7 +182,7 @@ func (w *huffmanBitWriter) writeBytes(bytes []byte) { _, w.err = w.w.Write(bytes) } -// RFC 1951 3.2.7 specifies a special run-length encoding for specifiying +// RFC 1951 3.2.7 specifies a special run-length encoding for specifying // the literal and offset lengths arrays (which are concatenated into a single // array). This method generates that run-length encoding. // @@ -279,7 +276,7 @@ func (w *huffmanBitWriter) writeCode(code *huffmanEncoder, literal uint32) { // // numLiterals The number of literals specified in codegen // numOffsets The number of offsets specified in codegen -// numCodegens Tne number of codegens used in codegen +// numCodegens The number of codegens used in codegen func (w *huffmanBitWriter) writeDynamicHeader(numLiterals int, numOffsets int, numCodegens int, isEof bool) { if w.err != nil { return @@ -290,13 +287,7 @@ func (w *huffmanBitWriter) writeDynamicHeader(numLiterals int, numOffsets int, n } w.writeBits(firstBits, 3) w.writeBits(int32(numLiterals-257), 5) - if numOffsets > offsetCodeCount { - // Extended version of decompressor - w.writeBits(int32(offsetCodeCount+((numOffsets-(1+offsetCodeCount))>>3)), 5) - w.writeBits(int32((numOffsets-(1+offsetCodeCount))&0x7), 3) - } else { - w.writeBits(int32(numOffsets-1), 5) - } + w.writeBits(int32(numOffsets-1), 5) w.writeBits(int32(numCodegens-4), 4) for i := 0; i < numCodegens; i++ { @@ -368,24 +359,17 @@ func (w *huffmanBitWriter) writeBlock(tokens []token, eof bool, input []byte) { tokens = tokens[0 : n+1] tokens[n] = endBlockMarker - totalLength := -1 // Subtract 1 for endBlock. for _, t := range tokens { switch t.typ() { case literalType: w.literalFreq[t.literal()]++ - totalLength++ - break case matchType: length := t.length() offset := t.offset() - totalLength += int(length + 3) w.literalFreq[lengthCodesStart+lengthCode(length)]++ w.offsetFreq[offsetCode(offset)]++ - break } } - w.literalEncoding.generate(w.literalFreq, 15) - w.offsetEncoding.generate(w.offsetFreq, 15) // get the number of literals numLiterals := len(w.literalFreq) @@ -394,15 +378,25 @@ func (w *huffmanBitWriter) writeBlock(tokens []token, eof bool, input []byte) { } // get the number of offsets numOffsets := len(w.offsetFreq) - for numOffsets > 1 && w.offsetFreq[numOffsets-1] == 0 { + for numOffsets > 0 && w.offsetFreq[numOffsets-1] == 0 { numOffsets-- } + if numOffsets == 0 { + // We haven't found a single match. If we want to go with the dynamic encoding, + // we should count at least one offset to be sure that the offset huffman tree could be encoded. + w.offsetFreq[0] = 1 + numOffsets = 1 + } + + w.literalEncoding.generate(w.literalFreq, 15) + w.offsetEncoding.generate(w.offsetFreq, 15) + storedBytes := 0 if input != nil { storedBytes = len(input) } var extraBits int64 - var storedSize int64 + var storedSize int64 = math.MaxInt64 if storedBytes <= maxStoreBlockSize && input != nil { storedSize = int64((storedBytes + 5) * 8) // We only bother calculating the costs of the extra bits required by @@ -417,34 +411,29 @@ func (w *huffmanBitWriter) writeBlock(tokens []token, eof bool, input []byte) { // First four offset codes have extra size = 0. extraBits += int64(w.offsetFreq[offsetCode]) * int64(offsetExtraBits[offsetCode]) } - } else { - storedSize = math.MaxInt32 } - // Figure out which generates smaller code, fixed Huffman, dynamic - // Huffman, or just storing the data. - var fixedSize int64 = math.MaxInt64 - if numOffsets <= offsetCodeCount { - fixedSize = int64(3) + - fixedLiteralEncoding.bitLength(w.literalFreq) + - fixedOffsetEncoding.bitLength(w.offsetFreq) + - extraBits - } + // Figure out smallest code. + // Fixed Huffman baseline. + var size = int64(3) + + fixedLiteralEncoding.bitLength(w.literalFreq) + + fixedOffsetEncoding.bitLength(w.offsetFreq) + + extraBits + var literalEncoding = fixedLiteralEncoding + var offsetEncoding = fixedOffsetEncoding + + // Dynamic Huffman? + var numCodegens int + // Generate codegen and codegenFrequencies, which indicates how to encode // the literalEncoding and the offsetEncoding. w.generateCodegen(numLiterals, numOffsets) w.codegenEncoding.generate(w.codegenFreq, 7) - numCodegens := len(w.codegenFreq) + numCodegens = len(w.codegenFreq) for numCodegens > 4 && w.codegenFreq[codegenOrder[numCodegens-1]] == 0 { numCodegens-- } - extensionSummand := 0 - if numOffsets > offsetCodeCount { - extensionSummand = 3 - } dynamicHeader := int64(3+5+5+4+(3*numCodegens)) + - // Following line is an extension. - int64(extensionSummand) + w.codegenEncoding.bitLength(w.codegenFreq) + int64(extraBits) + int64(w.codegenFreq[16]*2) + @@ -454,26 +443,25 @@ func (w *huffmanBitWriter) writeBlock(tokens []token, eof bool, input []byte) { w.literalEncoding.bitLength(w.literalFreq) + w.offsetEncoding.bitLength(w.offsetFreq) - if storedSize < fixedSize && storedSize < dynamicSize { + if dynamicSize < size { + size = dynamicSize + literalEncoding = w.literalEncoding + offsetEncoding = w.offsetEncoding + } + + // Stored bytes? + if storedSize < size { w.writeStoredHeader(storedBytes, eof) w.writeBytes(input[0:storedBytes]) return } - var literalEncoding *huffmanEncoder - var offsetEncoding *huffmanEncoder - if fixedSize <= dynamicSize { + // Huffman. + if literalEncoding == fixedLiteralEncoding { w.writeFixedHeader(eof) - literalEncoding = fixedLiteralEncoding - offsetEncoding = fixedOffsetEncoding } else { - // Write the header. w.writeDynamicHeader(numLiterals, numOffsets, numCodegens, eof) - literalEncoding = w.literalEncoding - offsetEncoding = w.offsetEncoding } - - // Write the tokens. for _, t := range tokens { switch t.typ() { case literalType: diff --git a/src/pkg/compress/flate/huffman_code.go b/src/pkg/compress/flate/huffman_code.go index 6be605f0a..7ed603a4f 100644 --- a/src/pkg/compress/flate/huffman_code.go +++ b/src/pkg/compress/flate/huffman_code.go @@ -363,7 +363,12 @@ func (s literalNodeSorter) Less(i, j int) bool { func (s literalNodeSorter) Swap(i, j int) { s.a[i], s.a[j] = s.a[j], s.a[i] } func sortByFreq(a []literalNode) { - s := &literalNodeSorter{a, func(i, j int) bool { return a[i].freq < a[j].freq }} + s := &literalNodeSorter{a, func(i, j int) bool { + if a[i].freq == a[j].freq { + return a[i].literal < a[j].literal + } + return a[i].freq < a[j].freq + }} sort.Sort(s) } diff --git a/src/pkg/compress/flate/inflate.go b/src/pkg/compress/flate/inflate.go index 320b80d06..3845f1204 100644 --- a/src/pkg/compress/flate/inflate.go +++ b/src/pkg/compress/flate/inflate.go @@ -77,8 +77,6 @@ type huffmanDecoder struct { // Initialize Huffman decoding tables from array of code lengths. func (h *huffmanDecoder) init(bits []int) bool { - // TODO(rsc): Return false sometimes. - // Count number of codes of each length, // compute min and max length. var count [maxCodeLen + 1]int @@ -197,9 +195,8 @@ type Reader interface { // Decompress state. type decompressor struct { - // Input/output sources. + // Input source. r Reader - w io.Writer roffset int64 woffset int64 @@ -222,38 +219,79 @@ type decompressor struct { // Temporary buffer (avoids repeated allocation). buf [4]byte + + // Next step in the decompression, + // and decompression state. + step func(*decompressor) + final bool + err os.Error + toRead []byte + hl, hd *huffmanDecoder + copyLen int + copyDist int } -func (f *decompressor) inflate() (err os.Error) { - final := false - for err == nil && !final { - for f.nb < 1+2 { - if err = f.moreBits(); err != nil { - return - } +func (f *decompressor) nextBlock() { + if f.final { + if f.hw != f.hp { + f.flush((*decompressor).nextBlock) + return } - final = f.b&1 == 1 - f.b >>= 1 - typ := f.b & 3 - f.b >>= 2 - f.nb -= 1 + 2 - switch typ { - case 0: - err = f.dataBlock() - case 1: - // compressed, fixed Huffman tables - err = f.decodeBlock(&fixedHuffmanDecoder, nil) - case 2: - // compressed, dynamic Huffman tables - if err = f.readHuffman(); err == nil { - err = f.decodeBlock(&f.h1, &f.h2) - } - default: - // 3 is reserved. - err = CorruptInputError(f.roffset) + f.err = os.EOF + return + } + for f.nb < 1+2 { + if f.err = f.moreBits(); f.err != nil { + return + } + } + f.final = f.b&1 == 1 + f.b >>= 1 + typ := f.b & 3 + f.b >>= 2 + f.nb -= 1 + 2 + switch typ { + case 0: + f.dataBlock() + case 1: + // compressed, fixed Huffman tables + f.hl = &fixedHuffmanDecoder + f.hd = nil + f.huffmanBlock() + case 2: + // compressed, dynamic Huffman tables + if f.err = f.readHuffman(); f.err != nil { + break + } + f.hl = &f.h1 + f.hd = &f.h2 + f.huffmanBlock() + default: + // 3 is reserved. + f.err = CorruptInputError(f.roffset) + } +} + +func (f *decompressor) Read(b []byte) (int, os.Error) { + for { + if len(f.toRead) > 0 { + n := copy(b, f.toRead) + f.toRead = f.toRead[n:] + return n, nil + } + if f.err != nil { + return 0, f.err } + f.step(f) } - return + panic("unreachable") +} + +func (f *decompressor) Close() os.Error { + if f.err == os.EOF { + return nil + } + return f.err } // RFC 1951 section 3.2.7. @@ -358,11 +396,12 @@ func (f *decompressor) readHuffman() os.Error { // hl and hd are the Huffman states for the lit/length values // and the distance values, respectively. If hd == nil, using the // fixed distance encoding associated with fixed Huffman blocks. -func (f *decompressor) decodeBlock(hl, hd *huffmanDecoder) os.Error { +func (f *decompressor) huffmanBlock() { for { - v, err := f.huffSym(hl) + v, err := f.huffSym(f.hl) if err != nil { - return err + f.err = err + return } var n uint // number of bits extra var length int @@ -371,13 +410,15 @@ func (f *decompressor) decodeBlock(hl, hd *huffmanDecoder) os.Error { f.hist[f.hp] = byte(v) f.hp++ if f.hp == len(f.hist) { - if err = f.flush(); err != nil { - return err - } + // After the flush, continue this loop. + f.flush((*decompressor).huffmanBlock) + return } continue case v == 256: - return nil + // Done with huffman block; read next block. + f.step = (*decompressor).nextBlock + return // otherwise, reference to older data case v < 265: length = v - (257 - 3) @@ -404,7 +445,8 @@ func (f *decompressor) decodeBlock(hl, hd *huffmanDecoder) os.Error { if n > 0 { for f.nb < n { if err = f.moreBits(); err != nil { - return err + f.err = err + return } } length += int(f.b & uint32(1<<n-1)) @@ -413,18 +455,20 @@ func (f *decompressor) decodeBlock(hl, hd *huffmanDecoder) os.Error { } var dist int - if hd == nil { + if f.hd == nil { for f.nb < 5 { if err = f.moreBits(); err != nil { - return err + f.err = err + return } } dist = int(reverseByte[(f.b&0x1F)<<3]) f.b >>= 5 f.nb -= 5 } else { - if dist, err = f.huffSym(hd); err != nil { - return err + if dist, err = f.huffSym(f.hd); err != nil { + f.err = err + return } } @@ -432,14 +476,16 @@ func (f *decompressor) decodeBlock(hl, hd *huffmanDecoder) os.Error { case dist < 4: dist++ case dist >= 30: - return CorruptInputError(f.roffset) + f.err = CorruptInputError(f.roffset) + return default: nb := uint(dist-2) >> 1 // have 1 bit in bottom of dist, need nb more. extra := (dist & 1) << nb for f.nb < nb { if err = f.moreBits(); err != nil { - return err + f.err = err + return } } extra |= int(f.b & uint32(1<<nb-1)) @@ -450,12 +496,14 @@ func (f *decompressor) decodeBlock(hl, hd *huffmanDecoder) os.Error { // Copy history[-dist:-dist+length] into output. if dist > len(f.hist) { - return InternalError("bad history distance") + f.err = InternalError("bad history distance") + return } // No check on length; encoding can be prescient. if !f.hfull && dist > f.hp { - return CorruptInputError(f.roffset) + f.err = CorruptInputError(f.roffset) + return } p := f.hp - dist @@ -467,9 +515,11 @@ func (f *decompressor) decodeBlock(hl, hd *huffmanDecoder) os.Error { f.hp++ p++ if f.hp == len(f.hist) { - if err = f.flush(); err != nil { - return err - } + // After flush continue copying out of history. + f.copyLen = length - (i + 1) + f.copyDist = dist + f.flush((*decompressor).copyHuff) + return } if p == len(f.hist) { p = 0 @@ -479,8 +529,33 @@ func (f *decompressor) decodeBlock(hl, hd *huffmanDecoder) os.Error { panic("unreached") } +func (f *decompressor) copyHuff() { + length := f.copyLen + dist := f.copyDist + p := f.hp - dist + if p < 0 { + p += len(f.hist) + } + for i := 0; i < length; i++ { + f.hist[f.hp] = f.hist[p] + f.hp++ + p++ + if f.hp == len(f.hist) { + f.copyLen = length - (i + 1) + f.flush((*decompressor).copyHuff) + return + } + if p == len(f.hist) { + p = 0 + } + } + + // Continue processing Huffman block. + f.huffmanBlock() +} + // Copy a single uncompressed data block from input to output. -func (f *decompressor) dataBlock() os.Error { +func (f *decompressor) dataBlock() { // Uncompressed. // Discard current half-byte. f.nb = 0 @@ -490,21 +565,30 @@ func (f *decompressor) dataBlock() os.Error { nr, err := io.ReadFull(f.r, f.buf[0:4]) f.roffset += int64(nr) if err != nil { - return &ReadError{f.roffset, err} + f.err = &ReadError{f.roffset, err} + return } n := int(f.buf[0]) | int(f.buf[1])<<8 nn := int(f.buf[2]) | int(f.buf[3])<<8 if uint16(nn) != uint16(^n) { - return CorruptInputError(f.roffset) + f.err = CorruptInputError(f.roffset) + return } if n == 0 { // 0-length block means sync - return f.flush() + f.flush((*decompressor).nextBlock) + return } - // Read len bytes into history, - // writing as history fills. + f.copyLen = n + f.copyData() +} + +func (f *decompressor) copyData() { + // Read f.dataLen bytes into history, + // pausing for reads as history fills. + n := f.copyLen for n > 0 { m := len(f.hist) - f.hp if m > n { @@ -513,17 +597,18 @@ func (f *decompressor) dataBlock() os.Error { m, err := io.ReadFull(f.r, f.hist[f.hp:f.hp+m]) f.roffset += int64(m) if err != nil { - return &ReadError{f.roffset, err} + f.err = &ReadError{f.roffset, err} + return } n -= m f.hp += m if f.hp == len(f.hist) { - if err = f.flush(); err != nil { - return err - } + f.copyLen = n + f.flush((*decompressor).copyData) + return } } - return nil + f.step = (*decompressor).nextBlock } func (f *decompressor) setDict(dict []byte) { @@ -579,17 +664,8 @@ func (f *decompressor) huffSym(h *huffmanDecoder) (int, os.Error) { } // Flush any buffered output to the underlying writer. -func (f *decompressor) flush() os.Error { - if f.hw == f.hp { - return nil - } - n, err := f.w.Write(f.hist[f.hw:f.hp]) - if n != f.hp-f.hw && err == nil { - err = io.ErrShortWrite - } - if err != nil { - return &WriteError{f.woffset, err} - } +func (f *decompressor) flush(step func(*decompressor)) { + f.toRead = f.hist[f.hw:f.hp] f.woffset += int64(f.hp - f.hw) f.hw = f.hp if f.hp == len(f.hist) { @@ -597,7 +673,7 @@ func (f *decompressor) flush() os.Error { f.hw = 0 f.hfull = true } - return nil + f.step = step } func makeReader(r io.Reader) Reader { @@ -607,30 +683,15 @@ func makeReader(r io.Reader) Reader { return bufio.NewReader(r) } -// decompress reads DEFLATE-compressed data from r and writes -// the uncompressed data to w. -func (f *decompressor) decompress(r io.Reader, w io.Writer) os.Error { - f.r = makeReader(r) - f.w = w - f.woffset = 0 - if err := f.inflate(); err != nil { - return err - } - if err := f.flush(); err != nil { - return err - } - return nil -} - // NewReader returns a new ReadCloser that can be used // to read the uncompressed version of r. It is the caller's // responsibility to call Close on the ReadCloser when // finished reading. func NewReader(r io.Reader) io.ReadCloser { var f decompressor - pr, pw := io.Pipe() - go func() { pw.CloseWithError(f.decompress(r, pw)) }() - return pr + f.r = makeReader(r) + f.step = (*decompressor).nextBlock + return &f } // NewReaderDict is like NewReader but initializes the reader @@ -641,7 +702,7 @@ func NewReader(r io.Reader) io.ReadCloser { func NewReaderDict(r io.Reader, dict []byte) io.ReadCloser { var f decompressor f.setDict(dict) - pr, pw := io.Pipe() - go func() { pw.CloseWithError(f.decompress(r, pw)) }() - return pr + f.r = makeReader(r) + f.step = (*decompressor).nextBlock + return &f } diff --git a/src/pkg/compress/gzip/gzip_test.go b/src/pkg/compress/gzip/gzip_test.go index 23f351405..121e627e6 100644 --- a/src/pkg/compress/gzip/gzip_test.go +++ b/src/pkg/compress/gzip/gzip_test.go @@ -11,7 +11,7 @@ import ( ) // pipe creates two ends of a pipe that gzip and gunzip, and runs dfunc at the -// writer end and ifunc at the reader end. +// writer end and cfunc at the reader end. func pipe(t *testing.T, dfunc func(*Compressor), cfunc func(*Decompressor)) { piper, pipew := io.Pipe() defer piper.Close() diff --git a/src/pkg/compress/lzw/reader.go b/src/pkg/compress/lzw/reader.go index d418bc856..21231c8e5 100644 --- a/src/pkg/compress/lzw/reader.go +++ b/src/pkg/compress/lzw/reader.go @@ -32,13 +32,49 @@ const ( MSB ) +const ( + maxWidth = 12 + decoderInvalidCode = 0xffff + flushBuffer = 1 << maxWidth +) + // decoder is the state from which the readXxx method converts a byte // stream into a code stream. type decoder struct { - r io.ByteReader - bits uint32 - nBits uint - width uint + r io.ByteReader + bits uint32 + nBits uint + width uint + read func(*decoder) (uint16, os.Error) // readLSB or readMSB + litWidth int // width in bits of literal codes + err os.Error + + // The first 1<<litWidth codes are literal codes. + // The next two codes mean clear and EOF. + // Other valid codes are in the range [lo, hi] where lo := clear + 2, + // with the upper bound incrementing on each code seen. + // overflow is the code at which hi overflows the code width. + // last is the most recently seen code, or decoderInvalidCode. + clear, eof, hi, overflow, last uint16 + + // Each code c in [lo, hi] expands to two or more bytes. For c != hi: + // suffix[c] is the last of these bytes. + // prefix[c] is the code for all but the last byte. + // This code can either be a literal code or another code in [lo, c). + // The c == hi case is a special case. + suffix [1 << maxWidth]uint8 + prefix [1 << maxWidth]uint16 + + // output is the temporary output buffer. + // Literal codes are accumulated from the start of the buffer. + // Non-literal codes decode to a sequence of suffixes that are first + // written right-to-left from the end of the buffer before being copied + // to the start of the buffer. + // It is flushed when it contains >= 1<<maxWidth bytes, + // so that there is always room to decode an entire code. + output [2 * 1 << maxWidth]byte + o int // write index into output + toRead []byte // bytes to return from Read } // readLSB returns the next code for "Least Significant Bits first" data. @@ -73,116 +109,113 @@ func (d *decoder) readMSB() (uint16, os.Error) { return code, nil } -// decode decompresses bytes from r and writes them to pw. -// read specifies how to decode bytes into codes. -// litWidth is the width in bits of literal codes. -func decode(r io.Reader, read func(*decoder) (uint16, os.Error), litWidth int, pw *io.PipeWriter) { - br, ok := r.(io.ByteReader) - if !ok { - br = bufio.NewReader(r) +func (d *decoder) Read(b []byte) (int, os.Error) { + for { + if len(d.toRead) > 0 { + n := copy(b, d.toRead) + d.toRead = d.toRead[n:] + return n, nil + } + if d.err != nil { + return 0, d.err + } + d.decode() } - pw.CloseWithError(decode1(pw, br, read, uint(litWidth))) + panic("unreachable") } -func decode1(pw *io.PipeWriter, r io.ByteReader, read func(*decoder) (uint16, os.Error), litWidth uint) os.Error { - const ( - maxWidth = 12 - invalidCode = 0xffff - ) - d := decoder{r, 0, 0, 1 + litWidth} - w := bufio.NewWriter(pw) - // The first 1<<litWidth codes are literal codes. - // The next two codes mean clear and EOF. - // Other valid codes are in the range [lo, hi] where lo := clear + 2, - // with the upper bound incrementing on each code seen. - clear := uint16(1) << litWidth - eof, hi := clear+1, clear+1 - // overflow is the code at which hi overflows the code width. - overflow := uint16(1) << d.width - var ( - // Each code c in [lo, hi] expands to two or more bytes. For c != hi: - // suffix[c] is the last of these bytes. - // prefix[c] is the code for all but the last byte. - // This code can either be a literal code or another code in [lo, c). - // The c == hi case is a special case. - suffix [1 << maxWidth]uint8 - prefix [1 << maxWidth]uint16 - // buf is a scratch buffer for reconstituting the bytes that a code expands to. - // Code suffixes are written right-to-left from the end of the buffer. - buf [1 << maxWidth]byte - ) - +// decode decompresses bytes from r and leaves them in d.toRead. +// read specifies how to decode bytes into codes. +// litWidth is the width in bits of literal codes. +func (d *decoder) decode() { // Loop over the code stream, converting codes into decompressed bytes. - last := uint16(invalidCode) for { - code, err := read(&d) + code, err := d.read(d) if err != nil { if err == os.EOF { err = io.ErrUnexpectedEOF } - return err + d.err = err + return } switch { - case code < clear: + case code < d.clear: // We have a literal code. - if err := w.WriteByte(uint8(code)); err != nil { - return err - } - if last != invalidCode { + d.output[d.o] = uint8(code) + d.o++ + if d.last != decoderInvalidCode { // Save what the hi code expands to. - suffix[hi] = uint8(code) - prefix[hi] = last + d.suffix[d.hi] = uint8(code) + d.prefix[d.hi] = d.last } - case code == clear: - d.width = 1 + litWidth - hi = eof - overflow = 1 << d.width - last = invalidCode + case code == d.clear: + d.width = 1 + uint(d.litWidth) + d.hi = d.eof + d.overflow = 1 << d.width + d.last = decoderInvalidCode continue - case code == eof: - return w.Flush() - case code <= hi: - c, i := code, len(buf)-1 - if code == hi { + case code == d.eof: + d.flush() + d.err = os.EOF + return + case code <= d.hi: + c, i := code, len(d.output)-1 + if code == d.hi { // code == hi is a special case which expands to the last expansion // followed by the head of the last expansion. To find the head, we walk // the prefix chain until we find a literal code. - c = last - for c >= clear { - c = prefix[c] + c = d.last + for c >= d.clear { + c = d.prefix[c] } - buf[i] = uint8(c) + d.output[i] = uint8(c) i-- - c = last + c = d.last } - // Copy the suffix chain into buf and then write that to w. - for c >= clear { - buf[i] = suffix[c] + // Copy the suffix chain into output and then write that to w. + for c >= d.clear { + d.output[i] = d.suffix[c] i-- - c = prefix[c] + c = d.prefix[c] } - buf[i] = uint8(c) - if _, err := w.Write(buf[i:]); err != nil { - return err + d.output[i] = uint8(c) + d.o += copy(d.output[d.o:], d.output[i:]) + if d.last != decoderInvalidCode { + // Save what the hi code expands to. + d.suffix[d.hi] = uint8(c) + d.prefix[d.hi] = d.last } - // Save what the hi code expands to. - suffix[hi] = uint8(c) - prefix[hi] = last default: - return os.NewError("lzw: invalid code") + d.err = os.NewError("lzw: invalid code") + return } - last, hi = code, hi+1 - if hi == overflow { + d.last, d.hi = code, d.hi+1 + if d.hi >= d.overflow { if d.width == maxWidth { - return os.NewError("lzw: missing clear code") + d.last = decoderInvalidCode + } else { + d.width++ + d.overflow <<= 1 } - d.width++ - overflow <<= 1 + } + if d.o >= flushBuffer { + d.flush() + return } } panic("unreachable") } +func (d *decoder) flush() { + d.toRead = d.output[:d.o] + d.o = 0 +} + +func (d *decoder) Close() os.Error { + d.err = os.EINVAL // in case any Reads come along + return nil +} + // NewReader creates a new io.ReadCloser that satisfies reads by decompressing // the data read from r. // It is the caller's responsibility to call Close on the ReadCloser when @@ -190,21 +223,31 @@ func decode1(pw *io.PipeWriter, r io.ByteReader, read func(*decoder) (uint16, os // The number of bits to use for literal codes, litWidth, must be in the // range [2,8] and is typically 8. func NewReader(r io.Reader, order Order, litWidth int) io.ReadCloser { - pr, pw := io.Pipe() - var read func(*decoder) (uint16, os.Error) + d := new(decoder) switch order { case LSB: - read = (*decoder).readLSB + d.read = (*decoder).readLSB case MSB: - read = (*decoder).readMSB + d.read = (*decoder).readMSB default: - pw.CloseWithError(os.NewError("lzw: unknown order")) - return pr + d.err = os.NewError("lzw: unknown order") + return d } if litWidth < 2 || 8 < litWidth { - pw.CloseWithError(fmt.Errorf("lzw: litWidth %d out of range", litWidth)) - return pr + d.err = fmt.Errorf("lzw: litWidth %d out of range", litWidth) + return d } - go decode(r, read, litWidth, pw) - return pr + if br, ok := r.(io.ByteReader); ok { + d.r = br + } else { + d.r = bufio.NewReader(r) + } + d.litWidth = litWidth + d.width = 1 + uint(litWidth) + d.clear = uint16(1) << uint(litWidth) + d.eof, d.hi = d.clear+1, d.clear+1 + d.overflow = uint16(1) << d.width + d.last = decoderInvalidCode + + return d } diff --git a/src/pkg/compress/lzw/writer_test.go b/src/pkg/compress/lzw/writer_test.go index 82464ecd1..4c5e522f9 100644 --- a/src/pkg/compress/lzw/writer_test.go +++ b/src/pkg/compress/lzw/writer_test.go @@ -77,13 +77,13 @@ func testFile(t *testing.T, fn string, order Order, litWidth int) { t.Errorf("%s (order=%d litWidth=%d): %v", fn, order, litWidth, err1) return } - if len(b0) != len(b1) { - t.Errorf("%s (order=%d litWidth=%d): length mismatch %d versus %d", fn, order, litWidth, len(b0), len(b1)) + if len(b1) != len(b0) { + t.Errorf("%s (order=%d litWidth=%d): length mismatch %d != %d", fn, order, litWidth, len(b1), len(b0)) return } for i := 0; i < len(b0); i++ { - if b0[i] != b1[i] { - t.Errorf("%s (order=%d litWidth=%d): mismatch at %d, 0x%02x versus 0x%02x\n", fn, order, litWidth, i, b0[i], b1[i]) + if b1[i] != b0[i] { + t.Errorf("%s (order=%d litWidth=%d): mismatch at %d, 0x%02x != 0x%02x\n", fn, order, litWidth, i, b1[i], b0[i]) return } } diff --git a/src/pkg/compress/zlib/writer.go b/src/pkg/compress/zlib/writer.go index f1f9b2853..8f86e9c4c 100644 --- a/src/pkg/compress/zlib/writer.go +++ b/src/pkg/compress/zlib/writer.go @@ -89,7 +89,7 @@ func NewWriterDict(w io.Writer, level int, dict []byte) (*Writer, os.Error) { } } z.w = w - z.compressor = flate.NewWriter(w, level) + z.compressor = flate.NewWriterDict(w, level, dict) z.digest = adler32.New() return z, nil } diff --git a/src/pkg/compress/zlib/writer_test.go b/src/pkg/compress/zlib/writer_test.go index f94f28470..32f05ab68 100644 --- a/src/pkg/compress/zlib/writer_test.go +++ b/src/pkg/compress/zlib/writer_test.go @@ -5,6 +5,8 @@ package zlib import ( + "bytes" + "fmt" "io" "io/ioutil" "os" @@ -16,15 +18,13 @@ var filenames = []string{ "../testdata/pi.txt", } +var data = []string{ + "test a reasonable sized string that can be compressed", +} + // Tests that compressing and then decompressing the given file at the given compression level and dictionary // yields equivalent bytes to the original file. func testFileLevelDict(t *testing.T, fn string, level int, d string) { - // Read dictionary, if given. - var dict []byte - if d != "" { - dict = []byte(d) - } - // Read the file, as golden output. golden, err := os.Open(fn) if err != nil { @@ -32,17 +32,25 @@ func testFileLevelDict(t *testing.T, fn string, level int, d string) { return } defer golden.Close() - - // Read the file again, and push it through a pipe that compresses at the write end, and decompresses at the read end. - raw, err := os.Open(fn) - if err != nil { - t.Errorf("%s (level=%d, dict=%q): %v", fn, level, d, err) + b0, err0 := ioutil.ReadAll(golden) + if err0 != nil { + t.Errorf("%s (level=%d, dict=%q): %v", fn, level, d, err0) return } + testLevelDict(t, fn, b0, level, d) +} + +func testLevelDict(t *testing.T, fn string, b0 []byte, level int, d string) { + // Make dictionary, if given. + var dict []byte + if d != "" { + dict = []byte(d) + } + + // Push data through a pipe that compresses at the write end, and decompresses at the read end. piper, pipew := io.Pipe() defer piper.Close() go func() { - defer raw.Close() defer pipew.Close() zlibw, err := NewWriterDict(pipew, level, dict) if err != nil { @@ -50,25 +58,14 @@ func testFileLevelDict(t *testing.T, fn string, level int, d string) { return } defer zlibw.Close() - var b [1024]byte - for { - n, err0 := raw.Read(b[0:]) - if err0 != nil && err0 != os.EOF { - t.Errorf("%s (level=%d, dict=%q): %v", fn, level, d, err0) - return - } - _, err1 := zlibw.Write(b[0:n]) - if err1 == os.EPIPE { - // Fail, but do not report the error, as some other (presumably reportable) error broke the pipe. - return - } - if err1 != nil { - t.Errorf("%s (level=%d, dict=%q): %v", fn, level, d, err1) - return - } - if err0 == os.EOF { - break - } + _, err = zlibw.Write(b0) + if err == os.EPIPE { + // Fail, but do not report the error, as some other (presumably reported) error broke the pipe. + return + } + if err != nil { + t.Errorf("%s (level=%d, dict=%q): %v", fn, level, d, err) + return } }() zlibr, err := NewReaderDict(piper, dict) @@ -78,13 +75,8 @@ func testFileLevelDict(t *testing.T, fn string, level int, d string) { } defer zlibr.Close() - // Compare the two. - b0, err0 := ioutil.ReadAll(golden) + // Compare the decompressed data. b1, err1 := ioutil.ReadAll(zlibr) - if err0 != nil { - t.Errorf("%s (level=%d, dict=%q): %v", fn, level, d, err0) - return - } if err1 != nil { t.Errorf("%s (level=%d, dict=%q): %v", fn, level, d, err1) return @@ -102,6 +94,18 @@ func testFileLevelDict(t *testing.T, fn string, level int, d string) { } func TestWriter(t *testing.T) { + for i, s := range data { + b := []byte(s) + tag := fmt.Sprintf("#%d", i) + testLevelDict(t, tag, b, DefaultCompression, "") + testLevelDict(t, tag, b, NoCompression, "") + for level := BestSpeed; level <= BestCompression; level++ { + testLevelDict(t, tag, b, level, "") + } + } +} + +func TestWriterBig(t *testing.T) { for _, fn := range filenames { testFileLevelDict(t, fn, DefaultCompression, "") testFileLevelDict(t, fn, NoCompression, "") @@ -121,3 +125,20 @@ func TestWriterDict(t *testing.T) { } } } + +func TestWriterDictIsUsed(t *testing.T) { + var input = []byte("Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua.") + buf := bytes.NewBuffer(nil) + compressor, err := NewWriterDict(buf, BestCompression, input) + if err != nil { + t.Errorf("error in NewWriterDict: %s", err) + return + } + compressor.Write(input) + compressor.Close() + const expectedMaxSize = 25 + output := buf.Bytes() + if len(output) > expectedMaxSize { + t.Errorf("result too large (got %d, want <= %d bytes). Is the dictionary being used?", len(output), expectedMaxSize) + } +} |