12 files changed, 612 insertions, 569 deletions
diff --git a/src/pkg/compress/bzip2/bzip2.go b/src/pkg/compress/bzip2/bzip2.go
index 9e97edec1..8b4572306 100644
--- a/src/pkg/compress/bzip2/bzip2.go
+++ b/src/pkg/compress/bzip2/bzip2.go
@@ -284,7 +284,7 @@ func (bz2 *reader) readBlock() (err os.Error) {
 	repeat := 0
 	repeat_power := 0
 
-	// The `C' array (used by the inverse BWT) needs to be zero initialised.
+	// The `C' array (used by the inverse BWT) needs to be zero initialized.
 	for i := range bz2.c {
 		bz2.c[i] = 0
 	}
@@ -330,7 +330,7 @@ func (bz2 *reader) readBlock() (err os.Error) {
 
 		if int(v) == numSymbols-1 {
 			// This is the EOF symbol. Because it's always at the
-			// end of the move-to-front list, and nevers gets moved
+			// end of the move-to-front list, and never gets moved
 			// to the front, it has this unique value.
 			break
 		}
diff --git a/src/pkg/compress/bzip2/huffman.go b/src/pkg/compress/bzip2/huffman.go
index 732bc4a21..dc05739c7 100644
--- a/src/pkg/compress/bzip2/huffman.go
+++ b/src/pkg/compress/bzip2/huffman.go
@@ -68,7 +68,7 @@ func newHuffmanTree(lengths []uint8) (huffmanTree, os.Error) {
 	// each symbol (consider reflecting a tree down the middle, for
 	// example). Since the code length assignments determine the
 	// efficiency of the tree, each of these trees is equally good. In
-	// order to minimise the amount of information needed to build a tree
+	// order to minimize the amount of information needed to build a tree
 	// bzip2 uses a canonical tree so that it can be reconstructed given
 	// only the code length assignments.
 
diff --git a/src/pkg/compress/flate/deflate.go b/src/pkg/compress/flate/deflate.go
index a02a5e8d9..b1cee0b2f 100644
--- a/src/pkg/compress/flate/deflate.go
+++ b/src/pkg/compress/flate/deflate.go
@@ -11,16 +11,18 @@ import (
 )
 
 const (
-	NoCompression        = 0
-	BestSpeed            = 1
-	fastCompression      = 3
-	BestCompression      = 9
-	DefaultCompression   = -1
-	logMaxOffsetSize     = 15  // Standard DEFLATE
-	wideLogMaxOffsetSize = 22  // Wide DEFLATE
-	minMatchLength       = 3   // The smallest match that the compressor looks for
-	maxMatchLength       = 258 // The longest match for the compressor
-	minOffsetSize        = 1   // The shortest offset that makes any sence
+	NoCompression      = 0
+	BestSpeed          = 1
+	fastCompression    = 3
+	BestCompression    = 9
+	DefaultCompression = -1
+	logWindowSize      = 15
+	windowSize         = 1 << logWindowSize
+	windowMask         = windowSize - 1
+	logMaxOffsetSize   = 15  // Standard DEFLATE
+	minMatchLength     = 3   // The smallest match that the compressor looks for
+	maxMatchLength     = 258 // The longest match for the compressor
+	minOffsetSize      = 1   // The shortest offset that makes any sence
 
 	// The maximum number of tokens we put into a single flat block, just too
 	// stop things from getting too large.
@@ -32,22 +34,6 @@ const (
 	hashShift           = (hashBits + minMatchLength - 1) / minMatchLength
 )
 
-type syncPipeReader struct {
-	*io.PipeReader
-	closeChan chan bool
-}
-
-func (sr *syncPipeReader) CloseWithError(err os.Error) os.Error {
-	retErr := sr.PipeReader.CloseWithError(err)
-	sr.closeChan <- true // finish writer close
-	return retErr
-}
-
-type syncPipeWriter struct {
-	*io.PipeWriter
-	closeChan chan bool
-}
-
 type compressionLevel struct {
 	good, lazy, nice, chain, fastSkipHashing int
 }
@@ -68,105 +54,73 @@ var levels = []compressionLevel{
 	{32, 258, 258, 4096, math.MaxInt32},
 }
 
-func (sw *syncPipeWriter) Close() os.Error {
-	err := sw.PipeWriter.Close()
-	<-sw.closeChan // wait for reader close
-	return err
-}
-
-func syncPipe() (*syncPipeReader, *syncPipeWriter) {
-	r, w := io.Pipe()
-	sr := &syncPipeReader{r, make(chan bool, 1)}
-	sw := &syncPipeWriter{w, sr.closeChan}
-	return sr, sw
-}
-
 type compressor struct {
-	level         int
-	logWindowSize uint
-	w             *huffmanBitWriter
-	r             io.Reader
-	// (1 << logWindowSize) - 1.
-	windowMask int
+	compressionLevel
 
-	eof      bool // has eof been reached on input?
-	sync     bool // writer wants to flush
-	syncChan chan os.Error
+	w *huffmanBitWriter
 
-	// hashHead[hashValue] contains the largest inputIndex with the specified hash value
-	hashHead []int
+	// compression algorithm
+	fill func(*compressor, []byte) int // copy data to window
+	step func(*compressor)             // process window
+	sync bool                          // requesting flush
 
+	// Input hash chains
+	// hashHead[hashValue] contains the largest inputIndex with the specified hash value
 	// If hashHead[hashValue] is within the current window, then
 	// hashPrev[hashHead[hashValue] & windowMask] contains the previous index
 	// with the same hash value.
-	hashPrev []int
-
-	// If we find a match of length >= niceMatch, then we don't bother searching
-	// any further.
-	niceMatch int
-
-	// If we find a match of length >= goodMatch, we only do a half-hearted
-	// effort at doing lazy matching starting at the next character
-	goodMatch int
-
-	// The maximum number of chains we look at when finding a match
-	maxChainLength int
-
-	// The sliding window we use for matching
-	window []byte
-
-	// The index just past the last valid character
-	windowEnd int
-
-	// index in "window" at which current block starts
-	blockStart int
-}
-
-func (d *compressor) flush() os.Error {
-	d.w.flush()
-	return d.w.err
+	chainHead int
+	hashHead  []int
+	hashPrev  []int
+
+	// input window: unprocessed data is window[index:windowEnd]
+	index         int
+	window        []byte
+	windowEnd     int
+	blockStart    int  // window index where current tokens start
+	byteAvailable bool // if true, still need to process window[index-1].
+
+	// queued output tokens: tokens[:ti]
+	tokens []token
+	ti     int
+
+	// deflate state
+	length         int
+	offset         int
+	hash           int
+	maxInsertIndex int
+	err            os.Error
 }
 
-func (d *compressor) fillWindow(index int) (int, os.Error) {
-	if d.sync {
-		return index, nil
-	}
-	wSize := d.windowMask + 1
-	if index >= wSize+wSize-(minMatchLength+maxMatchLength) {
-		// shift the window by wSize
-		copy(d.window, d.window[wSize:2*wSize])
-		index -= wSize
-		d.windowEnd -= wSize
-		if d.blockStart >= wSize {
-			d.blockStart -= wSize
+func (d *compressor) fillDeflate(b []byte) int {
+	if d.index >= 2*windowSize-(minMatchLength+maxMatchLength) {
+		// shift the window by windowSize
+		copy(d.window, d.window[windowSize:2*windowSize])
+		d.index -= windowSize
+		d.windowEnd -= windowSize
+		if d.blockStart >= windowSize {
+			d.blockStart -= windowSize
 		} else {
 			d.blockStart = math.MaxInt32
 		}
 		for i, h := range d.hashHead {
-			v := h - wSize
+			v := h - windowSize
 			if v < -1 {
 				v = -1
 			}
 			d.hashHead[i] = v
 		}
 		for i, h := range d.hashPrev {
-			v := -h - wSize
+			v := -h - windowSize
 			if v < -1 {
 				v = -1
 			}
 			d.hashPrev[i] = v
 		}
 	}
-	count, err := d.r.Read(d.window[d.windowEnd:])
-	d.windowEnd += count
-	if count == 0 && err == nil {
-		d.sync = true
-	}
-	if err == os.EOF {
-		d.eof = true
-		err = nil
-	}
-	return index, err
+	n := copy(d.window[d.windowEnd:], b)
+	d.windowEnd += n
+	return n
 }
 
 func (d *compressor) writeBlock(tokens []token, index int, eof bool) os.Error {
@@ -194,21 +148,21 @@ func (d *compressor) findMatch(pos int, prevHead int, prevLength int, lookahead
 
 	// We quit when we get a match that's at least nice long
 	nice := len(win) - pos
-	if d.niceMatch < nice {
-		nice = d.niceMatch
+	if d.nice < nice {
+		nice = d.nice
 	}
 
 	// If we've got a match that's good enough, only look in 1/4 the chain.
-	tries := d.maxChainLength
+	tries := d.chain
 	length = prevLength
-	if length >= d.goodMatch {
+	if length >= d.good {
 		tries >>= 2
 	}
 
 	w0 := win[pos]
 	w1 := win[pos+1]
 	wEnd := win[pos+length]
-	minIndex := pos - (d.windowMask + 1)
+	minIndex := pos - windowSize
 
 	for i := prevHead; tries > 0; tries-- {
 		if w0 == win[i] && w1 == win[i+1] && wEnd == win[i+length] {
@@ -233,7 +187,7 @@ func (d *compressor) findMatch(pos int, prevHead int, prevLength int, lookahead
 			// hashPrev[i & windowMask] has already been overwritten, so stop now.
 			break
 		}
-		if i = d.hashPrev[i&d.windowMask]; i < minIndex || i < 0 {
+		if i = d.hashPrev[i&windowMask]; i < minIndex || i < 0 {
 			break
 		}
 	}
@@ -248,234 +202,224 @@ func (d *compressor) writeStoredBlock(buf []byte) os.Error {
 	return d.w.err
 }
 
-func (d *compressor) storedDeflate() os.Error {
-	buf := make([]byte, maxStoreBlockSize)
-	for {
-		n, err := d.r.Read(buf)
-		if n == 0 && err == nil {
-			d.sync = true
-		}
-		if n > 0 || d.sync {
-			if err := d.writeStoredBlock(buf[0:n]); err != nil {
-				return err
-			}
-			if d.sync {
-				d.syncChan <- nil
-				d.sync = false
-			}
-		}
-		if err != nil {
-			if err == os.EOF {
-				break
-			}
-			return err
-		}
-	}
-	return nil
-}
-
-func (d *compressor) doDeflate() (err os.Error) {
-	// init
-	d.windowMask = 1<<d.logWindowSize - 1
+func (d *compressor) initDeflate() {
 	d.hashHead = make([]int, hashSize)
-	d.hashPrev = make([]int, 1<<d.logWindowSize)
-	d.window = make([]byte, 2<<d.logWindowSize)
+	d.hashPrev = make([]int, windowSize)
+	d.window = make([]byte, 2*windowSize)
 	fillInts(d.hashHead, -1)
-	tokens := make([]token, maxFlateBlockTokens, maxFlateBlockTokens+1)
-	l := levels[d.level]
-	d.goodMatch = l.good
-	d.niceMatch = l.nice
-	d.maxChainLength = l.chain
-	lazyMatch := l.lazy
-	length := minMatchLength - 1
-	offset := 0
-	byteAvailable := false
-	isFastDeflate := l.fastSkipHashing != 0
-	index := 0
-	// run
-	if index, err = d.fillWindow(index); err != nil {
+	d.tokens = make([]token, maxFlateBlockTokens, maxFlateBlockTokens+1)
+	d.length = minMatchLength - 1
+	d.offset = 0
+	d.byteAvailable = false
+	d.index = 0
+	d.ti = 0
+	d.hash = 0
+	d.chainHead = -1
+}
+
+func (d *compressor) deflate() {
+	if d.windowEnd-d.index < minMatchLength+maxMatchLength && !d.sync {
 		return
 	}
-	maxOffset := d.windowMask + 1 // (1 << logWindowSize);
-	// only need to change when you refill the window
-	windowEnd := d.windowEnd
-	maxInsertIndex := windowEnd - (minMatchLength - 1)
-	ti := 0
-
-	hash := int(0)
-	if index < maxInsertIndex {
-		hash = int(d.window[index])<<hashShift + int(d.window[index+1])
+
+	d.maxInsertIndex = d.windowEnd - (minMatchLength - 1)
+	if d.index < d.maxInsertIndex {
+		d.hash = int(d.window[d.index])<<hashShift + int(d.window[d.index+1])
 	}
-	chainHead := -1
+
 Loop:
 	for {
-		if index > windowEnd {
+		if d.index > d.windowEnd {
 			panic("index > windowEnd")
 		}
-		lookahead := windowEnd - index
+		lookahead := d.windowEnd - d.index
 		if lookahead < minMatchLength+maxMatchLength {
-			if index, err = d.fillWindow(index); err != nil {
-				return
+			if !d.sync {
+				break Loop
 			}
-			windowEnd = d.windowEnd
-			if index > windowEnd {
+			if d.index > d.windowEnd {
 				panic("index > windowEnd")
 			}
-			maxInsertIndex = windowEnd - (minMatchLength - 1)
-			lookahead = windowEnd - index
 			if lookahead == 0 {
 				// Flush current output block if any.
-				if byteAvailable {
+				if d.byteAvailable {
 					// There is still one pending token that needs to be flushed
-					tokens[ti] = literalToken(uint32(d.window[index-1]) & 0xFF)
-					ti++
-					byteAvailable = false
+					d.tokens[d.ti] = literalToken(uint32(d.window[d.index-1]))
+					d.ti++
+					d.byteAvailable = false
 				}
-				if ti > 0 {
-					if err = d.writeBlock(tokens[0:ti], index, false); err != nil {
+				if d.ti > 0 {
+					if d.err = d.writeBlock(d.tokens[0:d.ti], d.index, false); d.err != nil {
 						return
 					}
-					ti = 0
-				}
-				if d.sync {
-					d.w.writeStoredHeader(0, false)
-					d.w.flush()
-					d.syncChan <- d.w.err
-					d.sync = false
-				}
-
-				// If this was only a sync (not at EOF) keep going.
-				if !d.eof {
-					continue
+					d.ti = 0
 				}
 				break Loop
 			}
 		}
-		if index < maxInsertIndex {
+		if d.index < d.maxInsertIndex {
 			// Update the hash
-			hash = (hash<<hashShift + int(d.window[index+2])) & hashMask
-			chainHead = d.hashHead[hash]
-			d.hashPrev[index&d.windowMask] = chainHead
-			d.hashHead[hash] = index
+			d.hash = (d.hash<<hashShift + int(d.window[d.index+2])) & hashMask
+			d.chainHead = d.hashHead[d.hash]
+			d.hashPrev[d.index&windowMask] = d.chainHead
+			d.hashHead[d.hash] = d.index
 		}
-		prevLength := length
-		prevOffset := offset
-		length = minMatchLength - 1
-		offset = 0
-		minIndex := index - maxOffset
+		prevLength := d.length
+		prevOffset := d.offset
+		d.length = minMatchLength - 1
+		d.offset = 0
+		minIndex := d.index - windowSize
 		if minIndex < 0 {
 			minIndex = 0
 		}
 
-		if chainHead >= minIndex &&
-			(isFastDeflate && lookahead > minMatchLength-1 ||
-				!isFastDeflate && lookahead > prevLength && prevLength < lazyMatch) {
-			if newLength, newOffset, ok := d.findMatch(index, chainHead, minMatchLength-1, lookahead); ok {
-				length = newLength
-				offset = newOffset
+		if d.chainHead >= minIndex &&
+			(d.fastSkipHashing != 0 && lookahead > minMatchLength-1 ||
+				d.fastSkipHashing == 0 && lookahead > prevLength && prevLength < d.lazy) {
+			if newLength, newOffset, ok := d.findMatch(d.index, d.chainHead, minMatchLength-1, lookahead); ok {
+				d.length = newLength
+				d.offset = newOffset
 			}
 		}
-		if isFastDeflate && length >= minMatchLength ||
-			!isFastDeflate && prevLength >= minMatchLength && length <= prevLength {
+		if d.fastSkipHashing != 0 && d.length >= minMatchLength ||
+			d.fastSkipHashing == 0 && prevLength >= minMatchLength && d.length <= prevLength {
 			// There was a match at the previous step, and the current match is
 			// not better. Output the previous match.
-			if isFastDeflate {
-				tokens[ti] = matchToken(uint32(length-minMatchLength), uint32(offset-minOffsetSize))
+			if d.fastSkipHashing != 0 {
+				d.tokens[d.ti] = matchToken(uint32(d.length-minMatchLength), uint32(d.offset-minOffsetSize))
 			} else {
-				tokens[ti] = matchToken(uint32(prevLength-minMatchLength), uint32(prevOffset-minOffsetSize))
+				d.tokens[d.ti] = matchToken(uint32(prevLength-minMatchLength), uint32(prevOffset-minOffsetSize))
 			}
-			ti++
+			d.ti++
 			// Insert in the hash table all strings up to the end of the match.
 			// index and index-1 are already inserted. If there is not enough
 			// lookahead, the last two strings are not inserted into the hash
 			// table.
-			if length <= l.fastSkipHashing {
+			if d.length <= d.fastSkipHashing {
 				var newIndex int
-				if isFastDeflate {
-					newIndex = index + length
+				if d.fastSkipHashing != 0 {
+					newIndex = d.index + d.length
 				} else {
 					newIndex = prevLength - 1
 				}
-				for index++; index < newIndex; index++ {
-					if index < maxInsertIndex {
-						hash = (hash<<hashShift + int(d.window[index+2])) & hashMask
+				for d.index++; d.index < newIndex; d.index++ {
+					if d.index < d.maxInsertIndex {
+						d.hash = (d.hash<<hashShift + int(d.window[d.index+2])) & hashMask
 						// Get previous value with the same hash.
 						// Our chain should point to the previous value.
-						d.hashPrev[index&d.windowMask] = d.hashHead[hash]
+						d.hashPrev[d.index&windowMask] = d.hashHead[d.hash]
 						// Set the head of the hash chain to us.
-						d.hashHead[hash] = index
+						d.hashHead[d.hash] = d.index
 					}
 				}
-				if !isFastDeflate {
-					byteAvailable = false
-					length = minMatchLength - 1
+				if d.fastSkipHashing == 0 {
+					d.byteAvailable = false
+					d.length = minMatchLength - 1
 				}
 			} else {
 				// For matches this long, we don't bother inserting each individual
 				// item into the table.
-				index += length
-				hash = (int(d.window[index])<<hashShift + int(d.window[index+1]))
+				d.index += d.length
+				d.hash = (int(d.window[d.index])<<hashShift + int(d.window[d.index+1]))
 			}
-			if ti == maxFlateBlockTokens {
+			if d.ti == maxFlateBlockTokens {
 				// The block includes the current character
-				if err = d.writeBlock(tokens, index, false); err != nil {
+				if d.err = d.writeBlock(d.tokens, d.index, false); d.err != nil {
 					return
 				}
-				ti = 0
+				d.ti = 0
 			}
 		} else {
-			if isFastDeflate || byteAvailable {
-				i := index - 1
-				if isFastDeflate {
-					i = index
+			if d.fastSkipHashing != 0 || d.byteAvailable {
+				i := d.index - 1
+				if d.fastSkipHashing != 0 {
+					i = d.index
 				}
-				tokens[ti] = literalToken(uint32(d.window[i]) & 0xFF)
-				ti++
-				if ti == maxFlateBlockTokens {
-					if err = d.writeBlock(tokens, i+1, false); err != nil {
+				d.tokens[d.ti] = literalToken(uint32(d.window[i]))
+				d.ti++
+				if d.ti == maxFlateBlockTokens {
+					if d.err = d.writeBlock(d.tokens, i+1, false); d.err != nil {
 						return
 					}
-					ti = 0
+					d.ti = 0
 				}
 			}
-			index++
-			if !isFastDeflate {
-				byteAvailable = true
+			d.index++
+			if d.fastSkipHashing == 0 {
+				d.byteAvailable = true
 			}
 		}
 	}
-	return
 }
 
-func (d *compressor) compress(r io.Reader, w io.Writer, level int, logWindowSize uint) (err os.Error) {
-	d.r = r
+func (d *compressor) fillStore(b []byte) int {
+	n := copy(d.window[d.windowEnd:], b)
+	d.windowEnd += n
+	return n
+}
+
+func (d *compressor) store() {
+	if d.windowEnd > 0 {
+		d.err = d.writeStoredBlock(d.window[:d.windowEnd])
+	}
+	d.windowEnd = 0
+}
+
+func (d *compressor) write(b []byte) (n int, err os.Error) {
+	n = len(b)
+	b = b[d.fill(d, b):]
+	for len(b) > 0 {
+		d.step(d)
+		b = b[d.fill(d, b):]
+	}
+	return n, d.err
+}
+
+func (d *compressor) syncFlush() os.Error {
+	d.sync = true
+	d.step(d)
+	if d.err == nil {
+		d.w.writeStoredHeader(0, false)
+		d.w.flush()
+		d.err = d.w.err
+	}
+	d.sync = false
+	return d.err
+}
+
+func (d *compressor) init(w io.Writer, level int) (err os.Error) {
 	d.w = newHuffmanBitWriter(w)
-	d.level = level
-	d.logWindowSize = logWindowSize
 
 	switch {
 	case level == NoCompression:
-		err = d.storedDeflate()
+		d.window = make([]byte, maxStoreBlockSize)
+		d.fill = (*compressor).fillStore
+		d.step = (*compressor).store
 	case level == DefaultCompression:
-		d.level = 6
+		level = 6
 		fallthrough
 	case 1 <= level && level <= 9:
-		err = d.doDeflate()
+		d.compressionLevel = levels[level]
+		d.initDeflate()
+		d.fill = (*compressor).fillDeflate
+		d.step = (*compressor).deflate
 	default:
 		return WrongValueError{"level", 0, 9, int32(level)}
 	}
+	return nil
+}
 
-	if d.sync {
-		d.syncChan <- err
-		d.sync = false
-	}
-	if err != nil {
-		return err
+func (d *compressor) close() os.Error {
+	d.sync = true
+	d.step(d)
+	if d.err != nil {
+		return d.err
 	}
 	if d.w.writeStoredHeader(0, true); d.w.err != nil {
 		return d.w.err
 	}
-	return d.flush()
+	d.w.flush()
+	return d.w.err
 }
 
 // NewWriter returns a new Writer compressing
@@ -486,14 +430,9 @@ func (d *compressor) compress(r io.Reader, w io.Writer, level int, logWindowSize
 // compression; it only adds the necessary DEFLATE framing.
 func NewWriter(w io.Writer, level int) *Writer {
 	const logWindowSize = logMaxOffsetSize
-	var d compressor
-	d.syncChan = make(chan os.Error, 1)
-	pr, pw := syncPipe()
-	go func() {
-		err := d.compress(pr, w, level, logWindowSize)
-		pr.CloseWithError(err)
-	}()
-	return &Writer{pw, &d}
+	var dw Writer
+	dw.d.init(w, level)
+	return &dw
 }
 
 // NewWriterDict is like NewWriter but initializes the new
@@ -526,18 +465,13 @@ func (w *dictWriter) Write(b []byte) (n int, err os.Error) {
 // A Writer takes data written to it and writes the compressed
 // form of that data to an underlying writer (see NewWriter).
 type Writer struct {
-	w *syncPipeWriter
-	d *compressor
+	d compressor
 }
 
 // Write writes data to w, which will eventually write the
 // compressed form of data to its underlying writer.
 func (w *Writer) Write(data []byte) (n int, err os.Error) {
-	if len(data) == 0 {
-		// no point, and nil interferes with sync
-		return
-	}
-	return w.w.Write(data)
+	return w.d.write(data)
 }
 
 // Flush flushes any pending compressed data to the underlying writer.
@@ -550,18 +484,10 @@ func (w *Writer) Write(data []byte) (n int, err os.Error) {
 func (w *Writer) Flush() os.Error {
 	// For more about flushing:
 	// http://www.bolet.org/~pornin/deflate-flush.html
-	if w.d.sync {
-		panic("compress/flate: double Flush")
-	}
-	_, err := w.w.Write(nil)
-	err1 := <-w.d.syncChan
-	if err == nil {
-		err = err1
-	}
-	return err
+	return w.d.syncFlush()
 }
 
 // Close flushes and closes the writer.
 func (w *Writer) Close() os.Error {
-	return w.w.Close()
+	return w.d.close()
 }
diff --git a/src/pkg/compress/flate/deflate_test.go b/src/pkg/compress/flate/deflate_test.go
index 650a8059a..2ac811c38 100644
--- a/src/pkg/compress/flate/deflate_test.go
+++ b/src/pkg/compress/flate/deflate_test.go
@@ -57,7 +57,7 @@ var deflateInflateTests = []*deflateInflateTest{
 	&deflateInflateTest{[]byte{0x11, 0x12}},
 	&deflateInflateTest{[]byte{0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11}},
 	&deflateInflateTest{[]byte{0x11, 0x10, 0x13, 0x41, 0x21, 0x21, 0x41, 0x13, 0x87, 0x78, 0x13}},
-	&deflateInflateTest{getLargeDataChunk()},
+	&deflateInflateTest{largeDataChunk()},
 }
 
 var reverseBitsTests = []*reverseBitsTest{
@@ -71,23 +71,22 @@ var reverseBitsTests = []*reverseBitsTest{
 	&reverseBitsTest{29, 5, 23},
 }
 
-func getLargeDataChunk() []byte {
+func largeDataChunk() []byte {
 	result := make([]byte, 100000)
 	for i := range result {
-		result[i] = byte(int64(i) * int64(i) & 0xFF)
+		result[i] = byte(i * i & 0xFF)
 	}
 	return result
 }
 
 func TestDeflate(t *testing.T) {
 	for _, h := range deflateTests {
-		buffer := bytes.NewBuffer(nil)
-		w := NewWriter(buffer, h.level)
+		var buf bytes.Buffer
+		w := NewWriter(&buf, h.level)
 		w.Write(h.in)
 		w.Close()
-		if bytes.Compare(buffer.Bytes(), h.out) != 0 {
-			t.Errorf("buffer is wrong; level = %v, buffer.Bytes() = %v, expected output = %v",
-				h.level, buffer.Bytes(), h.out)
+		if !bytes.Equal(buf.Bytes(), h.out) {
+			t.Errorf("Deflate(%d, %x) = %x, want %x", h.level, h.in, buf.Bytes(), h.out)
 		}
 	}
 }
diff --git a/src/pkg/compress/flate/huffman_bit_writer.go b/src/pkg/compress/flate/huffman_bit_writer.go
index abff82dd6..3981df5cb 100644
--- a/src/pkg/compress/flate/huffman_bit_writer.go
+++ b/src/pkg/compress/flate/huffman_bit_writer.go
@@ -15,9 +15,6 @@ const (
 	// The largest offset code.
 	offsetCodeCount = 30
 
-	// The largest offset code in the extensions.
-	extendedOffsetCodeCount = 42
-
 	// The special code used to mark the end of a block.
 	endBlockMarker = 256
 
@@ -100,11 +97,11 @@ func newHuffmanBitWriter(w io.Writer) *huffmanBitWriter {
 	return &huffmanBitWriter{
 		w:               w,
 		literalFreq:     make([]int32, maxLit),
-		offsetFreq:      make([]int32, extendedOffsetCodeCount),
-		codegen:         make([]uint8, maxLit+extendedOffsetCodeCount+1),
+		offsetFreq:      make([]int32, offsetCodeCount),
+		codegen:         make([]uint8, maxLit+offsetCodeCount+1),
 		codegenFreq:     make([]int32, codegenCodeCount),
 		literalEncoding: newHuffmanEncoder(maxLit),
-		offsetEncoding:  newHuffmanEncoder(extendedOffsetCodeCount),
+		offsetEncoding:  newHuffmanEncoder(offsetCodeCount),
 		codegenEncoding: newHuffmanEncoder(codegenCodeCount),
 	}
 }
@@ -185,7 +182,7 @@ func (w *huffmanBitWriter) writeBytes(bytes []byte) {
 	_, w.err = w.w.Write(bytes)
 }
 
-// RFC 1951 3.2.7 specifies a special run-length encoding for specifiying
+// RFC 1951 3.2.7 specifies a special run-length encoding for specifying
 // the literal and offset lengths arrays (which are concatenated into a single
 // array).  This method generates that run-length encoding.
 //
@@ -279,7 +276,7 @@ func (w *huffmanBitWriter) writeCode(code *huffmanEncoder, literal uint32) {
 //
 //  numLiterals  The number of literals specified in codegen
 //  numOffsets   The number of offsets specified in codegen
-//  numCodegens  Tne number of codegens used in codegen
+//  numCodegens  The number of codegens used in codegen
 func (w *huffmanBitWriter) writeDynamicHeader(numLiterals int, numOffsets int, numCodegens int, isEof bool) {
 	if w.err != nil {
 		return
@@ -290,13 +287,7 @@ func (w *huffmanBitWriter) writeDynamicHeader(numLiterals int, numOffsets int, n
 	}
 	w.writeBits(firstBits, 3)
 	w.writeBits(int32(numLiterals-257), 5)
-	if numOffsets > offsetCodeCount {
-		// Extended version of decompressor
-		w.writeBits(int32(offsetCodeCount+((numOffsets-(1+offsetCodeCount))>>3)), 5)
-		w.writeBits(int32((numOffsets-(1+offsetCodeCount))&0x7), 3)
-	} else {
-		w.writeBits(int32(numOffsets-1), 5)
-	}
+	w.writeBits(int32(numOffsets-1), 5)
 	w.writeBits(int32(numCodegens-4), 4)
 
 	for i := 0; i < numCodegens; i++ {
@@ -368,24 +359,17 @@ func (w *huffmanBitWriter) writeBlock(tokens []token, eof bool, input []byte) {
 	tokens = tokens[0 : n+1]
 	tokens[n] = endBlockMarker
 
-	totalLength := -1 // Subtract 1 for endBlock.
 	for _, t := range tokens {
 		switch t.typ() {
 		case literalType:
 			w.literalFreq[t.literal()]++
-			totalLength++
-			break
 		case matchType:
 			length := t.length()
 			offset := t.offset()
-			totalLength += int(length + 3)
 			w.literalFreq[lengthCodesStart+lengthCode(length)]++
 			w.offsetFreq[offsetCode(offset)]++
-			break
 		}
 	}
-	w.literalEncoding.generate(w.literalFreq, 15)
-	w.offsetEncoding.generate(w.offsetFreq, 15)
 
 	// get the number of literals
 	numLiterals := len(w.literalFreq)
@@ -394,15 +378,25 @@ func (w *huffmanBitWriter) writeBlock(tokens []token, eof bool, input []byte) {
 	}
 	// get the number of offsets
 	numOffsets := len(w.offsetFreq)
-	for numOffsets > 1 && w.offsetFreq[numOffsets-1] == 0 {
+	for numOffsets > 0 && w.offsetFreq[numOffsets-1] == 0 {
 		numOffsets--
 	}
+	if numOffsets == 0 {
+		// We haven't found a single match. If we want to go with the dynamic encoding,
+		// we should count at least one offset to be sure that the offset huffman tree could be encoded.
+		w.offsetFreq[0] = 1
+		numOffsets = 1
+	}
+
+	w.literalEncoding.generate(w.literalFreq, 15)
+	w.offsetEncoding.generate(w.offsetFreq, 15)
+
 	storedBytes := 0
 	if input != nil {
 		storedBytes = len(input)
 	}
 	var extraBits int64
-	var storedSize int64
+	var storedSize int64 = math.MaxInt64
 	if storedBytes <= maxStoreBlockSize && input != nil {
 		storedSize = int64((storedBytes + 5) * 8)
 		// We only bother calculating the costs of the extra bits required by
@@ -417,34 +411,29 @@ func (w *huffmanBitWriter) writeBlock(tokens []token, eof bool, input []byte) {
 			// First four offset codes have extra size = 0.
 			extraBits += int64(w.offsetFreq[offsetCode]) * int64(offsetExtraBits[offsetCode])
 		}
-	} else {
-		storedSize = math.MaxInt32
 	}
 
-	// Figure out which generates smaller code, fixed Huffman, dynamic
-	// Huffman, or just storing the data.
-	var fixedSize int64 = math.MaxInt64
-	if numOffsets <= offsetCodeCount {
-		fixedSize = int64(3) +
-			fixedLiteralEncoding.bitLength(w.literalFreq) +
-			fixedOffsetEncoding.bitLength(w.offsetFreq) +
-			extraBits
-	}
+	// Figure out smallest code.
+	// Fixed Huffman baseline.
+	var size = int64(3) +
+		fixedLiteralEncoding.bitLength(w.literalFreq) +
+		fixedOffsetEncoding.bitLength(w.offsetFreq) +
+		extraBits
+	var literalEncoding = fixedLiteralEncoding
+	var offsetEncoding = fixedOffsetEncoding
+
+	// Dynamic Huffman?
+	var numCodegens int
+
 	// Generate codegen and codegenFrequencies, which indicates how to encode
 	// the literalEncoding and the offsetEncoding.
 	w.generateCodegen(numLiterals, numOffsets)
 	w.codegenEncoding.generate(w.codegenFreq, 7)
-	numCodegens := len(w.codegenFreq)
+	numCodegens = len(w.codegenFreq)
 	for numCodegens > 4 && w.codegenFreq[codegenOrder[numCodegens-1]] == 0 {
 		numCodegens--
 	}
-	extensionSummand := 0
-	if numOffsets > offsetCodeCount {
-		extensionSummand = 3
-	}
 	dynamicHeader := int64(3+5+5+4+(3*numCodegens)) +
-		// Following line is an extension.
-		int64(extensionSummand) +
 		w.codegenEncoding.bitLength(w.codegenFreq) +
 		int64(extraBits) +
 		int64(w.codegenFreq[16]*2) +
@@ -454,26 +443,25 @@ func (w *huffmanBitWriter) writeBlock(tokens []token, eof bool, input []byte) {
 		w.literalEncoding.bitLength(w.literalFreq) +
 		w.offsetEncoding.bitLength(w.offsetFreq)
 
-	if storedSize < fixedSize && storedSize < dynamicSize {
+	if dynamicSize < size {
+		size = dynamicSize
+		literalEncoding = w.literalEncoding
+		offsetEncoding = w.offsetEncoding
+	}
+
+	// Stored bytes?
+	if storedSize < size {
 		w.writeStoredHeader(storedBytes, eof)
 		w.writeBytes(input[0:storedBytes])
 		return
 	}
-	var literalEncoding *huffmanEncoder
-	var offsetEncoding *huffmanEncoder
 
-	if fixedSize <= dynamicSize {
+	// Huffman.
+	if literalEncoding == fixedLiteralEncoding {
 		w.writeFixedHeader(eof)
-		literalEncoding = fixedLiteralEncoding
-		offsetEncoding = fixedOffsetEncoding
 	} else {
-		// Write the header.
 		w.writeDynamicHeader(numLiterals, numOffsets, numCodegens, eof)
-		literalEncoding = w.literalEncoding
-		offsetEncoding = w.offsetEncoding
 	}
-
-	// Write the tokens.
 	for _, t := range tokens {
 		switch t.typ() {
 		case literalType:
diff --git a/src/pkg/compress/flate/huffman_code.go b/src/pkg/compress/flate/huffman_code.go
index 6be605f0a..7ed603a4f 100644
--- a/src/pkg/compress/flate/huffman_code.go
+++ b/src/pkg/compress/flate/huffman_code.go
@@ -363,7 +363,12 @@ func (s literalNodeSorter) Less(i, j int) bool {
 func (s literalNodeSorter) Swap(i, j int) { s.a[i], s.a[j] = s.a[j], s.a[i] }
 
 func sortByFreq(a []literalNode) {
-	s := &literalNodeSorter{a, func(i, j int) bool { return a[i].freq < a[j].freq }}
+	s := &literalNodeSorter{a, func(i, j int) bool {
+		if a[i].freq == a[j].freq {
+			return a[i].literal < a[j].literal
+		}
+		return a[i].freq < a[j].freq
+	}}
 	sort.Sort(s)
 }
 
diff --git a/src/pkg/compress/flate/inflate.go b/src/pkg/compress/flate/inflate.go
index 320b80d06..3845f1204 100644
--- a/src/pkg/compress/flate/inflate.go
+++ b/src/pkg/compress/flate/inflate.go
@@ -77,8 +77,6 @@ type huffmanDecoder struct {
 
 // Initialize Huffman decoding tables from array of code lengths.
 func (h *huffmanDecoder) init(bits []int) bool {
-	// TODO(rsc): Return false sometimes.
-
 	// Count number of codes of each length,
 	// compute min and max length.
 	var count [maxCodeLen + 1]int
@@ -197,9 +195,8 @@ type Reader interface {
 
 // Decompress state.
 type decompressor struct {
-	// Input/output sources.
+	// Input source.
 	r       Reader
-	w       io.Writer
 	roffset int64
 	woffset int64
 
@@ -222,38 +219,79 @@ type decompressor struct {
 
 	// Temporary buffer (avoids repeated allocation).
 	buf [4]byte
+
+	// Next step in the decompression,
+	// and decompression state.
+	step     func(*decompressor)
+	final    bool
+	err      os.Error
+	toRead   []byte
+	hl, hd   *huffmanDecoder
+	copyLen  int
+	copyDist int
 }
 
-func (f *decompressor) inflate() (err os.Error) {
-	final := false
-	for err == nil && !final {
-		for f.nb < 1+2 {
-			if err = f.moreBits(); err != nil {
-				return
-			}
+func (f *decompressor) nextBlock() {
+	if f.final {
+		if f.hw != f.hp {
+			f.flush((*decompressor).nextBlock)
+			return
 		}
-		final = f.b&1 == 1
-		f.b >>= 1
-		typ := f.b & 3
-		f.b >>= 2
-		f.nb -= 1 + 2
-		switch typ {
-		case 0:
-			err = f.dataBlock()
-		case 1:
-			// compressed, fixed Huffman tables
-			err = f.decodeBlock(&fixedHuffmanDecoder, nil)
-		case 2:
-			// compressed, dynamic Huffman tables
-			if err = f.readHuffman(); err == nil {
-				err = f.decodeBlock(&f.h1, &f.h2)
-			}
-		default:
-			// 3 is reserved.
-			err = CorruptInputError(f.roffset)
+		f.err = os.EOF
+		return
+	}
+	for f.nb < 1+2 {
+		if f.err = f.moreBits(); f.err != nil {
+			return
+		}
+	}
+	f.final = f.b&1 == 1
+	f.b >>= 1
+	typ := f.b & 3
+	f.b >>= 2
+	f.nb -= 1 + 2
+	switch typ {
+	case 0:
+		f.dataBlock()
+	case 1:
+		// compressed, fixed Huffman tables
+		f.hl = &fixedHuffmanDecoder
+		f.hd = nil
+		f.huffmanBlock()
+	case 2:
+		// compressed, dynamic Huffman tables
+		if f.err = f.readHuffman(); f.err != nil {
+			break
+		}
+		f.hl = &f.h1
+		f.hd = &f.h2
+		f.huffmanBlock()
+	default:
+		// 3 is reserved.
+		f.err = CorruptInputError(f.roffset)
+	}
+}
+
+func (f *decompressor) Read(b []byte) (int, os.Error) {
+	for {
+		if len(f.toRead) > 0 {
+			n := copy(b, f.toRead)
+			f.toRead = f.toRead[n:]
+			return n, nil
+		}
+		if f.err != nil {
+			return 0, f.err
 		}
+		f.step(f)
 	}
-	return
+	panic("unreachable")
+}
+
+func (f *decompressor) Close() os.Error {
+	if f.err == os.EOF {
+		return nil
+	}
+	return f.err
 }
 
 // RFC 1951 section 3.2.7.
@@ -358,11 +396,12 @@ func (f *decompressor) readHuffman() os.Error {
 // hl and hd are the Huffman states for the lit/length values
 // and the distance values, respectively.  If hd == nil, using the
 // fixed distance encoding associated with fixed Huffman blocks.
-func (f *decompressor) decodeBlock(hl, hd *huffmanDecoder) os.Error {
+func (f *decompressor) huffmanBlock() {
 	for {
-		v, err := f.huffSym(hl)
+		v, err := f.huffSym(f.hl)
 		if err != nil {
-			return err
+			f.err = err
+			return
 		}
 		var n uint // number of bits extra
 		var length int
@@ -371,13 +410,15 @@ func (f *decompressor) decodeBlock(hl, hd *huffmanDecoder) os.Error {
 			f.hist[f.hp] = byte(v)
 			f.hp++
 			if f.hp == len(f.hist) {
-				if err = f.flush(); err != nil {
-					return err
-				}
+				// After the flush, continue this loop.
+				f.flush((*decompressor).huffmanBlock)
+				return
 			}
 			continue
 		case v == 256:
-			return nil
+			// Done with huffman block; read next block.
+			f.step = (*decompressor).nextBlock
+			return
 		// otherwise, reference to older data
 		case v < 265:
 			length = v - (257 - 3)
@@ -404,7 +445,8 @@ func (f *decompressor) decodeBlock(hl, hd *huffmanDecoder) os.Error {
 		if n > 0 {
 			for f.nb < n {
 				if err = f.moreBits(); err != nil {
-					return err
+					f.err = err
+					return
 				}
 			}
 			length += int(f.b & uint32(1<<n-1))
@@ -413,18 +455,20 @@ func (f *decompressor) decodeBlock(hl, hd *huffmanDecoder) os.Error {
 		}
 
 		var dist int
-		if hd == nil {
+		if f.hd == nil {
 			for f.nb < 5 {
 				if err = f.moreBits(); err != nil {
-					return err
+					f.err = err
+					return
 				}
 			}
 			dist = int(reverseByte[(f.b&0x1F)<<3])
 			f.b >>= 5
 			f.nb -= 5
 		} else {
-			if dist, err = f.huffSym(hd); err != nil {
-				return err
+			if dist, err = f.huffSym(f.hd); err != nil {
+				f.err = err
+				return
 			}
 		}
 
@@ -432,14 +476,16 @@ func (f *decompressor) decodeBlock(hl, hd *huffmanDecoder) os.Error {
 		case dist < 4:
 			dist++
 		case dist >= 30:
-			return CorruptInputError(f.roffset)
+			f.err = CorruptInputError(f.roffset)
+			return
 		default:
 			nb := uint(dist-2) >> 1
 			// have 1 bit in bottom of dist, need nb more.
 			extra := (dist & 1) << nb
 			for f.nb < nb {
 				if err = f.moreBits(); err != nil {
-					return err
+					f.err = err
+					return
 				}
 			}
 			extra |= int(f.b & uint32(1<<nb-1))
@@ -450,12 +496,14 @@ func (f *decompressor) decodeBlock(hl, hd *huffmanDecoder) os.Error {
 
 		// Copy history[-dist:-dist+length] into output.
 		if dist > len(f.hist) {
-			return InternalError("bad history distance")
+			f.err = InternalError("bad history distance")
+			return
 		}
 
 		// No check on length; encoding can be prescient.
 		if !f.hfull && dist > f.hp {
-			return CorruptInputError(f.roffset)
+			f.err = CorruptInputError(f.roffset)
+			return
 		}
 
 		p := f.hp - dist
@@ -467,9 +515,11 @@ func (f *decompressor) decodeBlock(hl, hd *huffmanDecoder) os.Error {
 			f.hp++
 			p++
 			if f.hp == len(f.hist) {
-				if err = f.flush(); err != nil {
-					return err
-				}
+				// After flush continue copying out of history.
+				f.copyLen = length - (i + 1)
+				f.copyDist = dist
+				f.flush((*decompressor).copyHuff)
+				return
 			}
 			if p == len(f.hist) {
 				p = 0
@@ -479,8 +529,33 @@ func (f *decompressor) decodeBlock(hl, hd *huffmanDecoder) os.Error {
 	panic("unreached")
 }
 
+func (f *decompressor) copyHuff() {
+	length := f.copyLen
+	dist := f.copyDist
+	p := f.hp - dist
+	if p < 0 {
+		p += len(f.hist)
+	}
+	for i := 0; i < length; i++ {
+		f.hist[f.hp] = f.hist[p]
+		f.hp++
+		p++
+		if f.hp == len(f.hist) {
+			f.copyLen = length - (i + 1)
+			f.flush((*decompressor).copyHuff)
+			return
+		}
+		if p == len(f.hist) {
+			p = 0
+		}
+	}
+
+	// Continue processing Huffman block.
+	f.huffmanBlock()
+}
+
 // Copy a single uncompressed data block from input to output.
-func (f *decompressor) dataBlock() os.Error {
+func (f *decompressor) dataBlock() {
 	// Uncompressed.
 	// Discard current half-byte.
 	f.nb = 0
@@ -490,21 +565,30 @@ func (f *decompressor) dataBlock() os.Error {
 	nr, err := io.ReadFull(f.r, f.buf[0:4])
 	f.roffset += int64(nr)
 	if err != nil {
-		return &ReadError{f.roffset, err}
+		f.err = &ReadError{f.roffset, err}
+		return
 	}
 	n := int(f.buf[0]) | int(f.buf[1])<<8
 	nn := int(f.buf[2]) | int(f.buf[3])<<8
 	if uint16(nn) != uint16(^n) {
-		return CorruptInputError(f.roffset)
+		f.err = CorruptInputError(f.roffset)
+		return
 	}
 
 	if n == 0 {
 		// 0-length block means sync
-		return f.flush()
+		f.flush((*decompressor).nextBlock)
+		return
 	}
 
-	// Read len bytes into history,
-	// writing as history fills.
+	f.copyLen = n
+	f.copyData()
+}
+
+func (f *decompressor) copyData() {
+	// Read f.dataLen bytes into history,
+	// pausing for reads as history fills.
+	n := f.copyLen
 	for n > 0 {
 		m := len(f.hist) - f.hp
 		if m > n {
@@ -513,17 +597,18 @@ func (f *decompressor) dataBlock() os.Error {
 		m, err := io.ReadFull(f.r, f.hist[f.hp:f.hp+m])
 		f.roffset += int64(m)
 		if err != nil {
-			return &ReadError{f.roffset, err}
+			f.err = &ReadError{f.roffset, err}
+			return
 		}
 		n -= m
 		f.hp += m
 		if f.hp == len(f.hist) {
-			if err = f.flush(); err != nil {
-				return err
-			}
+			f.copyLen = n
+			f.flush((*decompressor).copyData)
+			return
 		}
 	}
-	return nil
+	f.step = (*decompressor).nextBlock
 }
 
 func (f *decompressor) setDict(dict []byte) {
@@ -579,17 +664,8 @@ func (f *decompressor) huffSym(h *huffmanDecoder) (int, os.Error) {
 }
 
 // Flush any buffered output to the underlying writer.
-func (f *decompressor) flush() os.Error {
-	if f.hw == f.hp {
-		return nil
-	}
-	n, err := f.w.Write(f.hist[f.hw:f.hp])
-	if n != f.hp-f.hw && err == nil {
-		err = io.ErrShortWrite
-	}
-	if err != nil {
-		return &WriteError{f.woffset, err}
-	}
+func (f *decompressor) flush(step func(*decompressor)) {
+	f.toRead = f.hist[f.hw:f.hp]
 	f.woffset += int64(f.hp - f.hw)
 	f.hw = f.hp
 	if f.hp == len(f.hist) {
@@ -597,7 +673,7 @@ func (f *decompressor) flush() os.Error {
 		f.hw = 0
 		f.hfull = true
 	}
-	return nil
+	f.step = step
 }
 
 func makeReader(r io.Reader) Reader {
@@ -607,30 +683,15 @@ func makeReader(r io.Reader) Reader {
 	return bufio.NewReader(r)
 }
 
-// decompress reads DEFLATE-compressed data from r and writes
-// the uncompressed data to w.
-func (f *decompressor) decompress(r io.Reader, w io.Writer) os.Error {
-	f.r = makeReader(r)
-	f.w = w
-	f.woffset = 0
-	if err := f.inflate(); err != nil {
-		return err
-	}
-	if err := f.flush(); err != nil {
-		return err
-	}
-	return nil
-}
-
 // NewReader returns a new ReadCloser that can be used
 // to read the uncompressed version of r.  It is the caller's
 // responsibility to call Close on the ReadCloser when
 // finished reading.
 func NewReader(r io.Reader) io.ReadCloser {
 	var f decompressor
-	pr, pw := io.Pipe()
-	go func() { pw.CloseWithError(f.decompress(r, pw)) }()
-	return pr
+	f.r = makeReader(r)
+	f.step = (*decompressor).nextBlock
+	return &f
 }
 
 // NewReaderDict is like NewReader but initializes the reader
@@ -641,7 +702,7 @@ func NewReader(r io.Reader) io.ReadCloser {
 func NewReaderDict(r io.Reader, dict []byte) io.ReadCloser {
 	var f decompressor
 	f.setDict(dict)
-	pr, pw := io.Pipe()
-	go func() { pw.CloseWithError(f.decompress(r, pw)) }()
-	return pr
+	f.r = makeReader(r)
+	f.step = (*decompressor).nextBlock
+	return &f
 }
diff --git a/src/pkg/compress/gzip/gzip_test.go b/src/pkg/compress/gzip/gzip_test.go
index 23f351405..121e627e6 100644
--- a/src/pkg/compress/gzip/gzip_test.go
+++ b/src/pkg/compress/gzip/gzip_test.go
@@ -11,7 +11,7 @@ import (
 )
 
 // pipe creates two ends of a pipe that gzip and gunzip, and runs dfunc at the
-// writer end and ifunc at the reader end.
+// writer end and cfunc at the reader end.
 func pipe(t *testing.T, dfunc func(*Compressor), cfunc func(*Decompressor)) {
 	piper, pipew := io.Pipe()
 	defer piper.Close()
diff --git a/src/pkg/compress/lzw/reader.go b/src/pkg/compress/lzw/reader.go
index d418bc856..21231c8e5 100644
--- a/src/pkg/compress/lzw/reader.go
+++ b/src/pkg/compress/lzw/reader.go
@@ -32,13 +32,49 @@ const (
 	MSB
 )
 
+const (
+	maxWidth           = 12
+	decoderInvalidCode = 0xffff
+	flushBuffer        = 1 << maxWidth
+)
+
 // decoder is the state from which the readXxx method converts a byte
 // stream into a code stream.
 type decoder struct {
-	r     io.ByteReader
-	bits  uint32
-	nBits uint
-	width uint
+	r        io.ByteReader
+	bits     uint32
+	nBits    uint
+	width    uint
+	read     func(*decoder) (uint16, os.Error) // readLSB or readMSB
+	litWidth int                               // width in bits of literal codes
+	err      os.Error
+
+	// The first 1<<litWidth codes are literal codes.
+	// The next two codes mean clear and EOF.
+	// Other valid codes are in the range [lo, hi] where lo := clear + 2,
+	// with the upper bound incrementing on each code seen.
+	// overflow is the code at which hi overflows the code width.
+	// last is the most recently seen code, or decoderInvalidCode.
+	clear, eof, hi, overflow, last uint16
+
+	// Each code c in [lo, hi] expands to two or more bytes. For c != hi:
+	//   suffix[c] is the last of these bytes.
+	//   prefix[c] is the code for all but the last byte.
+	//   This code can either be a literal code or another code in [lo, c).
+	// The c == hi case is a special case.
+	suffix [1 << maxWidth]uint8
+	prefix [1 << maxWidth]uint16
+
+	// output is the temporary output buffer.
+	// Literal codes are accumulated from the start of the buffer.
+	// Non-literal codes decode to a sequence of suffixes that are first
+	// written right-to-left from the end of the buffer before being copied
+	// to the start of the buffer.
+	// It is flushed when it contains >= 1<<maxWidth bytes,
+	// so that there is always room to decode an entire code.
+	output [2 * 1 << maxWidth]byte
+	o      int    // write index into output
+	toRead []byte // bytes to return from Read
 }
 
 // readLSB returns the next code for "Least Significant Bits first" data.
@@ -73,116 +109,113 @@ func (d *decoder) readMSB() (uint16, os.Error) {
 	return code, nil
 }
 
-// decode decompresses bytes from r and writes them to pw.
-// read specifies how to decode bytes into codes.
-// litWidth is the width in bits of literal codes.
-func decode(r io.Reader, read func(*decoder) (uint16, os.Error), litWidth int, pw *io.PipeWriter) {
-	br, ok := r.(io.ByteReader)
-	if !ok {
-		br = bufio.NewReader(r)
+func (d *decoder) Read(b []byte) (int, os.Error) {
+	for {
+		if len(d.toRead) > 0 {
+			n := copy(b, d.toRead)
+			d.toRead = d.toRead[n:]
+			return n, nil
+		}
+		if d.err != nil {
+			return 0, d.err
+		}
+		d.decode()
 	}
-	pw.CloseWithError(decode1(pw, br, read, uint(litWidth)))
+	panic("unreachable")
 }
 
-func decode1(pw *io.PipeWriter, r io.ByteReader, read func(*decoder) (uint16, os.Error), litWidth uint) os.Error {
-	const (
-		maxWidth    = 12
-		invalidCode = 0xffff
-	)
-	d := decoder{r, 0, 0, 1 + litWidth}
-	w := bufio.NewWriter(pw)
-	// The first 1<<litWidth codes are literal codes.
-	// The next two codes mean clear and EOF.
-	// Other valid codes are in the range [lo, hi] where lo := clear + 2,
-	// with the upper bound incrementing on each code seen.
-	clear := uint16(1) << litWidth
-	eof, hi := clear+1, clear+1
-	// overflow is the code at which hi overflows the code width.
-	overflow := uint16(1) << d.width
-	var (
-		// Each code c in [lo, hi] expands to two or more bytes. For c != hi:
-		//   suffix[c] is the last of these bytes.
-		//   prefix[c] is the code for all but the last byte.
-		//   This code can either be a literal code or another code in [lo, c).
-		// The c == hi case is a special case.
-		suffix [1 << maxWidth]uint8
-		prefix [1 << maxWidth]uint16
-		// buf is a scratch buffer for reconstituting the bytes that a code expands to.
-		// Code suffixes are written right-to-left from the end of the buffer.
-		buf [1 << maxWidth]byte
-	)
-
+// decode decompresses bytes from r and leaves them in d.toRead.
+// read specifies how to decode bytes into codes.
+// litWidth is the width in bits of literal codes.
+func (d *decoder) decode() {
 	// Loop over the code stream, converting codes into decompressed bytes.
-	last := uint16(invalidCode)
 	for {
-		code, err := read(&d)
+		code, err := d.read(d)
 		if err != nil {
 			if err == os.EOF {
 				err = io.ErrUnexpectedEOF
 			}
-			return err
+			d.err = err
+			return
 		}
 		switch {
-		case code < clear:
+		case code < d.clear:
 			// We have a literal code.
-			if err := w.WriteByte(uint8(code)); err != nil {
-				return err
-			}
-			if last != invalidCode {
+			d.output[d.o] = uint8(code)
+			d.o++
+			if d.last != decoderInvalidCode {
 				// Save what the hi code expands to.
-				suffix[hi] = uint8(code)
-				prefix[hi] = last
+				d.suffix[d.hi] = uint8(code)
+				d.prefix[d.hi] = d.last
 			}
-		case code == clear:
-			d.width = 1 + litWidth
-			hi = eof
-			overflow = 1 << d.width
-			last = invalidCode
+		case code == d.clear:
+			d.width = 1 + uint(d.litWidth)
+			d.hi = d.eof
+			d.overflow = 1 << d.width
+			d.last = decoderInvalidCode
 			continue
-		case code == eof:
-			return w.Flush()
-		case code <= hi:
-			c, i := code, len(buf)-1
-			if code == hi {
+		case code == d.eof:
+			d.flush()
+			d.err = os.EOF
+			return
+		case code <= d.hi:
+			c, i := code, len(d.output)-1
+			if code == d.hi {
 				// code == hi is a special case which expands to the last expansion
 				// followed by the head of the last expansion. To find the head, we walk
 				// the prefix chain until we find a literal code.
-				c = last
-				for c >= clear {
-					c = prefix[c]
+				c = d.last
+				for c >= d.clear {
+					c = d.prefix[c]
 				}
-				buf[i] = uint8(c)
+				d.output[i] = uint8(c)
 				i--
-				c = last
+				c = d.last
 			}
-			// Copy the suffix chain into buf and then write that to w.
-			for c >= clear {
-				buf[i] = suffix[c]
+			// Copy the suffix chain into output and then write that to w.
+			for c >= d.clear {
+				d.output[i] = d.suffix[c]
 				i--
-				c = prefix[c]
+				c = d.prefix[c]
 			}
-			buf[i] = uint8(c)
-			if _, err := w.Write(buf[i:]); err != nil {
-				return err
+			d.output[i] = uint8(c)
+			d.o += copy(d.output[d.o:], d.output[i:])
+			if d.last != decoderInvalidCode {
+				// Save what the hi code expands to.
+				d.suffix[d.hi] = uint8(c)
+				d.prefix[d.hi] = d.last
 			}
-			// Save what the hi code expands to.
-			suffix[hi] = uint8(c)
-			prefix[hi] = last
 		default:
-			return os.NewError("lzw: invalid code")
+			d.err = os.NewError("lzw: invalid code")
+			return
 		}
-		last, hi = code, hi+1
-		if hi == overflow {
+		d.last, d.hi = code, d.hi+1
+		if d.hi >= d.overflow {
 			if d.width == maxWidth {
-				return os.NewError("lzw: missing clear code")
+				d.last = decoderInvalidCode
+			} else {
+				d.width++
+				d.overflow <<= 1
 			}
-			d.width++
-			overflow <<= 1
+		}
+		if d.o >= flushBuffer {
+			d.flush()
+			return
 		}
 	}
 	panic("unreachable")
 }
 
+func (d *decoder) flush() {
+	d.toRead = d.output[:d.o]
+	d.o = 0
+}
+
+func (d *decoder) Close() os.Error {
+	d.err = os.EINVAL // in case any Reads come along
+	return nil
+}
+
 // NewReader creates a new io.ReadCloser that satisfies reads by decompressing
 // the data read from r.
 // It is the caller's responsibility to call Close on the ReadCloser when
@@ -190,21 +223,31 @@ func decode1(pw *io.PipeWriter, r io.ByteReader, read func(*decoder) (uint16, os
 // The number of bits to use for literal codes, litWidth, must be in the
 // range [2,8] and is typically 8.
 func NewReader(r io.Reader, order Order, litWidth int) io.ReadCloser {
-	pr, pw := io.Pipe()
-	var read func(*decoder) (uint16, os.Error)
+	d := new(decoder)
 	switch order {
 	case LSB:
-		read = (*decoder).readLSB
+		d.read = (*decoder).readLSB
 	case MSB:
-		read = (*decoder).readMSB
+		d.read = (*decoder).readMSB
 	default:
-		pw.CloseWithError(os.NewError("lzw: unknown order"))
-		return pr
+		d.err = os.NewError("lzw: unknown order")
+		return d
 	}
 	if litWidth < 2 || 8 < litWidth {
-		pw.CloseWithError(fmt.Errorf("lzw: litWidth %d out of range", litWidth))
-		return pr
+		d.err = fmt.Errorf("lzw: litWidth %d out of range", litWidth)
+		return d
 	}
-	go decode(r, read, litWidth, pw)
-	return pr
+	if br, ok := r.(io.ByteReader); ok {
+		d.r = br
+	} else {
+		d.r = bufio.NewReader(r)
+	}
+	d.litWidth = litWidth
+	d.width = 1 + uint(litWidth)
+	d.clear = uint16(1) << uint(litWidth)
+	d.eof, d.hi = d.clear+1, d.clear+1
+	d.overflow = uint16(1) << d.width
+	d.last = decoderInvalidCode
+
+	return d
 }
diff --git a/src/pkg/compress/lzw/writer_test.go b/src/pkg/compress/lzw/writer_test.go
index 82464ecd1..4c5e522f9 100644
--- a/src/pkg/compress/lzw/writer_test.go
+++ b/src/pkg/compress/lzw/writer_test.go
@@ -77,13 +77,13 @@ func testFile(t *testing.T, fn string, order Order, litWidth int) {
 		t.Errorf("%s (order=%d litWidth=%d): %v", fn, order, litWidth, err1)
 		return
 	}
-	if len(b0) != len(b1) {
-		t.Errorf("%s (order=%d litWidth=%d): length mismatch %d versus %d", fn, order, litWidth, len(b0), len(b1))
+	if len(b1) != len(b0) {
+		t.Errorf("%s (order=%d litWidth=%d): length mismatch %d != %d", fn, order, litWidth, len(b1), len(b0))
 		return
 	}
 	for i := 0; i < len(b0); i++ {
-		if b0[i] != b1[i] {
-			t.Errorf("%s (order=%d litWidth=%d): mismatch at %d, 0x%02x versus 0x%02x\n", fn, order, litWidth, i, b0[i], b1[i])
+		if b1[i] != b0[i] {
+			t.Errorf("%s (order=%d litWidth=%d): mismatch at %d, 0x%02x != 0x%02x\n", fn, order, litWidth, i, b1[i], b0[i])
 			return
 		}
 	}
diff --git a/src/pkg/compress/zlib/writer.go b/src/pkg/compress/zlib/writer.go
index f1f9b2853..8f86e9c4c 100644
--- a/src/pkg/compress/zlib/writer.go
+++ b/src/pkg/compress/zlib/writer.go
@@ -89,7 +89,7 @@ func NewWriterDict(w io.Writer, level int, dict []byte) (*Writer, os.Error) {
 		}
 	}
 	z.w = w
-	z.compressor = flate.NewWriter(w, level)
+	z.compressor = flate.NewWriterDict(w, level, dict)
 	z.digest = adler32.New()
 	return z, nil
 }
diff --git a/src/pkg/compress/zlib/writer_test.go b/src/pkg/compress/zlib/writer_test.go
index f94f28470..32f05ab68 100644
--- a/src/pkg/compress/zlib/writer_test.go
+++ b/src/pkg/compress/zlib/writer_test.go
@@ -5,6 +5,8 @@
 package zlib
 
 import (
+	"bytes"
+	"fmt"
 	"io"
 	"io/ioutil"
 	"os"
@@ -16,15 +18,13 @@ var filenames = []string{
 	"../testdata/pi.txt",
 }
 
+var data = []string{
+	"test a reasonable sized string that can be compressed",
+}
+
 // Tests that compressing and then decompressing the given file at the given compression level and dictionary
 // yields equivalent bytes to the original file.
 func testFileLevelDict(t *testing.T, fn string, level int, d string) {
-	// Read dictionary, if given.
-	var dict []byte
-	if d != "" {
-		dict = []byte(d)
-	}
-
 	// Read the file, as golden output.
 	golden, err := os.Open(fn)
 	if err != nil {
@@ -32,17 +32,25 @@ func testFileLevelDict(t *testing.T, fn string, level int, d string) {
 		return
 	}
 	defer golden.Close()
-
-	// Read the file again, and push it through a pipe that compresses at the write end, and decompresses at the read end.
-	raw, err := os.Open(fn)
-	if err != nil {
-		t.Errorf("%s (level=%d, dict=%q): %v", fn, level, d, err)
+	b0, err0 := ioutil.ReadAll(golden)
+	if err0 != nil {
+		t.Errorf("%s (level=%d, dict=%q): %v", fn, level, d, err0)
 		return
 	}
+	testLevelDict(t, fn, b0, level, d)
+}
+
+func testLevelDict(t *testing.T, fn string, b0 []byte, level int, d string) {
+	// Make dictionary, if given.
+	var dict []byte
+	if d != "" {
+		dict = []byte(d)
+	}
+
+	// Push data through a pipe that compresses at the write end, and decompresses at the read end.
 	piper, pipew := io.Pipe()
 	defer piper.Close()
 	go func() {
-		defer raw.Close()
 		defer pipew.Close()
 		zlibw, err := NewWriterDict(pipew, level, dict)
 		if err != nil {
@@ -50,25 +58,14 @@ func testFileLevelDict(t *testing.T, fn string, level int, d string) {
 			return
 		}
 		defer zlibw.Close()
-		var b [1024]byte
-		for {
-			n, err0 := raw.Read(b[0:])
-			if err0 != nil && err0 != os.EOF {
-				t.Errorf("%s (level=%d, dict=%q): %v", fn, level, d, err0)
-				return
-			}
-			_, err1 := zlibw.Write(b[0:n])
-			if err1 == os.EPIPE {
-				// Fail, but do not report the error, as some other (presumably reportable) error broke the pipe.
-				return
-			}
-			if err1 != nil {
-				t.Errorf("%s (level=%d, dict=%q): %v", fn, level, d, err1)
-				return
-			}
-			if err0 == os.EOF {
-				break
-			}
+		_, err = zlibw.Write(b0)
+		if err == os.EPIPE {
+			// Fail, but do not report the error, as some other (presumably reported) error broke the pipe.
+			return
+		}
+		if err != nil {
+			t.Errorf("%s (level=%d, dict=%q): %v", fn, level, d, err)
+			return
 		}
 	}()
 	zlibr, err := NewReaderDict(piper, dict)
@@ -78,13 +75,8 @@ func testFileLevelDict(t *testing.T, fn string, level int, d string) {
 	}
 	defer zlibr.Close()
 
-	// Compare the two.
-	b0, err0 := ioutil.ReadAll(golden)
+	// Compare the decompressed data.
 	b1, err1 := ioutil.ReadAll(zlibr)
-	if err0 != nil {
-		t.Errorf("%s (level=%d, dict=%q): %v", fn, level, d, err0)
-		return
-	}
 	if err1 != nil {
 		t.Errorf("%s (level=%d, dict=%q): %v", fn, level, d, err1)
 		return
@@ -102,6 +94,18 @@ func testFileLevelDict(t *testing.T, fn string, level int, d string) {
 }
 
 func TestWriter(t *testing.T) {
+	for i, s := range data {
+		b := []byte(s)
+		tag := fmt.Sprintf("#%d", i)
+		testLevelDict(t, tag, b, DefaultCompression, "")
+		testLevelDict(t, tag, b, NoCompression, "")
+		for level := BestSpeed; level <= BestCompression; level++ {
+			testLevelDict(t, tag, b, level, "")
+		}
+	}
+}
+
+func TestWriterBig(t *testing.T) {
 	for _, fn := range filenames {
 		testFileLevelDict(t, fn, DefaultCompression, "")
 		testFileLevelDict(t, fn, NoCompression, "")
@@ -121,3 +125,20 @@ func TestWriterDict(t *testing.T) {
 		}
 	}
 }
+
+func TestWriterDictIsUsed(t *testing.T) {
+	var input = []byte("Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua.")
+	buf := bytes.NewBuffer(nil)
+	compressor, err := NewWriterDict(buf, BestCompression, input)
+	if err != nil {
+		t.Errorf("error in NewWriterDict: %s", err)
+		return
+	}
+	compressor.Write(input)
+	compressor.Close()
+	const expectedMaxSize = 25
+	output := buf.Bytes()
+	if len(output) > expectedMaxSize {
+		t.Errorf("result too large (got %d, want <= %d bytes). Is the dictionary being used?", len(output), expectedMaxSize)
+	}
+}