diff options
Diffstat (limited to 'src/pkg/image/jpeg')
-rw-r--r-- | src/pkg/image/jpeg/dct_test.go | 299 | ||||
-rw-r--r-- | src/pkg/image/jpeg/huffman.go | 58 | ||||
-rw-r--r-- | src/pkg/image/jpeg/idct.go | 80 | ||||
-rw-r--r-- | src/pkg/image/jpeg/reader.go | 223 | ||||
-rw-r--r-- | src/pkg/image/jpeg/reader_test.go | 157 | ||||
-rw-r--r-- | src/pkg/image/jpeg/scan.go | 432 | ||||
-rw-r--r-- | src/pkg/image/jpeg/writer.go | 100 | ||||
-rw-r--r-- | src/pkg/image/jpeg/writer_test.go | 93 |
8 files changed, 1151 insertions, 291 deletions
diff --git a/src/pkg/image/jpeg/dct_test.go b/src/pkg/image/jpeg/dct_test.go new file mode 100644 index 000000000..7389f7e4f --- /dev/null +++ b/src/pkg/image/jpeg/dct_test.go @@ -0,0 +1,299 @@ +// Copyright 2012 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package jpeg + +import ( + "bytes" + "fmt" + "math" + "math/rand" + "testing" +) + +func benchmarkDCT(b *testing.B, f func(*block)) { + b.StopTimer() + blocks := make([]block, 0, b.N*len(testBlocks)) + for i := 0; i < b.N; i++ { + blocks = append(blocks, testBlocks[:]...) + } + b.StartTimer() + for i := range blocks { + f(&blocks[i]) + } +} + +func BenchmarkFDCT(b *testing.B) { + benchmarkDCT(b, fdct) +} + +func BenchmarkIDCT(b *testing.B) { + benchmarkDCT(b, idct) +} + +func TestDCT(t *testing.T) { + blocks := make([]block, len(testBlocks)) + copy(blocks, testBlocks[:]) + + // Append some randomly generated blocks of varying sparseness. + r := rand.New(rand.NewSource(123)) + for i := 0; i < 100; i++ { + b := block{} + n := r.Int() % 64 + for j := 0; j < n; j++ { + b[r.Int()%len(b)] = r.Int31() % 256 + } + blocks = append(blocks, b) + } + + // Check that the FDCT and IDCT functions are inverses, after a scale and + // level shift. Scaling reduces the rounding errors in the conversion from + // floats to ints. + for i, b := range blocks { + got, want := b, b + for j := range got { + got[j] = (got[j] - 128) * 8 + } + slowFDCT(&got) + slowIDCT(&got) + for j := range got { + got[j] = got[j]/8 + 128 + } + if differ(&got, &want) { + t.Errorf("i=%d: IDCT(FDCT)\nsrc\n%s\ngot\n%s\nwant\n%s\n", i, &b, &got, &want) + } + } + + // Check that the optimized and slow FDCT implementations agree. + // The fdct function already does a scale and level shift. + for i, b := range blocks { + got, want := b, b + fdct(&got) + for j := range want { + want[j] = (want[j] - 128) * 8 + } + slowFDCT(&want) + if differ(&got, &want) { + t.Errorf("i=%d: FDCT\nsrc\n%s\ngot\n%s\nwant\n%s\n", i, &b, &got, &want) + } + } + + // Check that the optimized and slow IDCT implementations agree. + for i, b := range blocks { + got, want := b, b + idct(&got) + slowIDCT(&want) + if differ(&got, &want) { + t.Errorf("i=%d: IDCT\nsrc\n%s\ngot\n%s\nwant\n%s\n", i, &b, &got, &want) + } + } +} + +// differ returns whether any pair-wise elements in b0 and b1 differ by 2 or +// more. That tolerance is because there isn't a single definitive decoding of +// a given JPEG image, even before the YCbCr to RGB conversion; implementations +// can have different IDCT rounding errors. +func differ(b0, b1 *block) bool { + for i := range b0 { + delta := b0[i] - b1[i] + if delta < -2 || +2 < delta { + return true + } + } + return false +} + +// alpha returns 1 if i is 0 and returns √2 otherwise. +func alpha(i int) float64 { + if i == 0 { + return 1 + } + return math.Sqrt2 +} + +var cosines [32]float64 // cosines[k] = cos(π/2 * k/8) + +func init() { + for k := range cosines { + cosines[k] = math.Cos(math.Pi * float64(k) / 16) + } +} + +// slowFDCT performs the 8*8 2-dimensional forward discrete cosine transform: +// +// dst[u,v] = (1/8) * Σ_x Σ_y alpha(u) * alpha(v) * src[x,y] * +// cos((π/2) * (2*x + 1) * u / 8) * +// cos((π/2) * (2*y + 1) * v / 8) +// +// x and y are in pixel space, and u and v are in transform space. +// +// b acts as both dst and src. +func slowFDCT(b *block) { + var dst [blockSize]float64 + for v := 0; v < 8; v++ { + for u := 0; u < 8; u++ { + sum := 0.0 + for y := 0; y < 8; y++ { + for x := 0; x < 8; x++ { + sum += alpha(u) * alpha(v) * float64(b[8*y+x]) * + cosines[((2*x+1)*u)%32] * + cosines[((2*y+1)*v)%32] + } + } + dst[8*v+u] = sum / 8 + } + } + // Convert from float64 to int32. + for i := range dst { + b[i] = int32(dst[i] + 0.5) + } +} + +// slowIDCT performs the 8*8 2-dimensional inverse discrete cosine transform: +// +// dst[x,y] = (1/8) * Σ_u Σ_v alpha(u) * alpha(v) * src[u,v] * +// cos((π/2) * (2*x + 1) * u / 8) * +// cos((π/2) * (2*y + 1) * v / 8) +// +// x and y are in pixel space, and u and v are in transform space. +// +// b acts as both dst and src. +func slowIDCT(b *block) { + var dst [blockSize]float64 + for y := 0; y < 8; y++ { + for x := 0; x < 8; x++ { + sum := 0.0 + for v := 0; v < 8; v++ { + for u := 0; u < 8; u++ { + sum += alpha(u) * alpha(v) * float64(b[8*v+u]) * + cosines[((2*x+1)*u)%32] * + cosines[((2*y+1)*v)%32] + } + } + dst[8*y+x] = sum / 8 + } + } + // Convert from float64 to int32. + for i := range dst { + b[i] = int32(dst[i] + 0.5) + } +} + +func (b *block) String() string { + s := bytes.NewBuffer(nil) + fmt.Fprintf(s, "{\n") + for y := 0; y < 8; y++ { + fmt.Fprintf(s, "\t") + for x := 0; x < 8; x++ { + fmt.Fprintf(s, "0x%04x, ", uint16(b[8*y+x])) + } + fmt.Fprintln(s) + } + fmt.Fprintf(s, "}") + return s.String() +} + +// testBlocks are the first 10 pre-IDCT blocks from ../testdata/video-001.jpeg. +var testBlocks = [10]block{ + { + 0x7f, 0xf6, 0x01, 0x07, 0xff, 0x00, 0x00, 0x00, + 0xf5, 0x01, 0xfa, 0x01, 0xfe, 0x00, 0x01, 0x00, + 0x05, 0x05, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x01, 0xff, 0xf8, 0x00, 0x01, 0xff, 0x00, 0x00, + 0x00, 0x01, 0x00, 0x01, 0x00, 0xff, 0xff, 0x00, + 0xff, 0x0c, 0x00, 0x00, 0x00, 0x00, 0xff, 0x01, + 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, + 0x01, 0x00, 0x00, 0x01, 0xff, 0x01, 0x00, 0xfe, + }, + { + 0x29, 0x07, 0x00, 0xfc, 0x01, 0x01, 0x00, 0x00, + 0x07, 0x00, 0x03, 0x00, 0x01, 0x00, 0xff, 0xff, + 0xff, 0xfd, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x04, 0x00, 0xff, 0x01, 0x00, 0x00, + 0x01, 0x00, 0x01, 0xff, 0x00, 0x00, 0x00, 0x00, + 0x01, 0xfa, 0x01, 0x00, 0x01, 0x00, 0x01, 0xff, + 0x00, 0x00, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0xff, 0x00, 0xff, 0x00, 0x02, + }, + { + 0xc5, 0xfa, 0x01, 0x00, 0x00, 0x01, 0x00, 0xff, + 0x02, 0xff, 0x01, 0x00, 0x01, 0x00, 0xff, 0x00, + 0xff, 0xff, 0x00, 0xff, 0x01, 0x00, 0x00, 0x00, + 0xff, 0x00, 0x01, 0x00, 0x00, 0x00, 0xff, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, + 0x00, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + }, + { + 0x86, 0x05, 0x00, 0x02, 0x00, 0x00, 0x01, 0x00, + 0xf2, 0x06, 0x00, 0x00, 0x01, 0x02, 0x00, 0x00, + 0xf6, 0xfa, 0xf9, 0x00, 0xff, 0x01, 0x00, 0x00, + 0xf9, 0x00, 0x00, 0xff, 0x00, 0x00, 0x00, 0x00, + 0x00, 0xff, 0x00, 0xff, 0xff, 0xff, 0x00, 0x00, + 0xff, 0x00, 0x00, 0x01, 0x00, 0xff, 0x01, 0x00, + 0x00, 0x00, 0x00, 0xff, 0x00, 0x00, 0x00, 0x01, + 0x00, 0x01, 0xff, 0x01, 0x00, 0xff, 0x00, 0x00, + }, + { + 0x24, 0xfe, 0x00, 0xff, 0x00, 0xff, 0xff, 0x00, + 0x08, 0xfd, 0x00, 0x01, 0x01, 0x00, 0x01, 0x00, + 0x06, 0x03, 0x03, 0xff, 0x00, 0x00, 0x00, 0x00, + 0x04, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, + 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x01, + 0x01, 0x00, 0x01, 0xff, 0x00, 0x01, 0x00, 0x00, + 0x01, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x01, 0x00, 0x01, 0x00, 0x00, 0x00, 0xff, 0x01, + }, + { + 0xcd, 0xff, 0x00, 0x00, 0x00, 0x00, 0x01, 0x01, + 0x03, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, + 0x01, 0x01, 0x01, 0x01, 0x01, 0x00, 0x00, 0x00, + 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x01, 0x00, 0x00, 0x00, 0x00, 0x01, 0x01, 0x00, + 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, + 0x00, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0xff, + }, + { + 0x81, 0xfe, 0x05, 0xff, 0x01, 0xff, 0x01, 0x00, + 0xef, 0xf9, 0x00, 0xf9, 0x00, 0xff, 0x00, 0xff, + 0x05, 0xf9, 0x00, 0xf8, 0x01, 0xff, 0x01, 0xff, + 0x00, 0xff, 0x07, 0x00, 0x01, 0x00, 0x00, 0x00, + 0x01, 0x00, 0x01, 0x01, 0x00, 0x00, 0x00, 0x00, + 0x01, 0x00, 0x00, 0x00, 0xff, 0xff, 0x00, 0x01, + 0xff, 0x01, 0x01, 0x00, 0xff, 0x00, 0x00, 0x00, + 0x01, 0x01, 0x00, 0xff, 0x00, 0x00, 0x00, 0xff, + }, + { + 0x28, 0x00, 0xfe, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x0b, 0x02, 0x01, 0x03, 0x00, 0xff, 0x00, 0x01, + 0xfe, 0x02, 0x01, 0x03, 0xff, 0x00, 0x00, 0x00, + 0x01, 0x00, 0xfd, 0x00, 0x01, 0x00, 0xff, 0x00, + 0x01, 0xff, 0x00, 0xff, 0x01, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0xff, 0x01, 0x01, 0x00, 0xff, + 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0xff, 0xff, 0x00, 0x00, 0x00, 0xff, 0x00, 0x01, + }, + { + 0xdf, 0xf9, 0xfe, 0x00, 0x03, 0x01, 0xff, 0xff, + 0x04, 0x01, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, + 0xff, 0x01, 0x01, 0x01, 0x00, 0x00, 0x00, 0x01, + 0x00, 0x00, 0xfe, 0x01, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0xff, 0x01, 0x00, 0x00, 0x00, 0x01, + 0xff, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, + 0x00, 0xff, 0x00, 0xff, 0x01, 0x00, 0x00, 0x01, + 0xff, 0xff, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, + }, + { + 0x88, 0xfd, 0x00, 0x00, 0xff, 0x00, 0x01, 0xff, + 0xe1, 0x06, 0x06, 0x01, 0xff, 0x00, 0x01, 0x00, + 0x08, 0x00, 0xfa, 0x00, 0xff, 0xff, 0xff, 0xff, + 0x08, 0x01, 0x00, 0xff, 0x01, 0xff, 0x00, 0x00, + 0xf5, 0xff, 0x00, 0x01, 0xff, 0x01, 0x01, 0x00, + 0xff, 0xff, 0x01, 0xff, 0x01, 0x00, 0x01, 0x00, + 0x00, 0x01, 0x01, 0xff, 0x00, 0xff, 0x00, 0x01, + 0x02, 0x00, 0x00, 0xff, 0xff, 0x00, 0xff, 0x00, + }, +} diff --git a/src/pkg/image/jpeg/huffman.go b/src/pkg/image/jpeg/huffman.go index d2382490f..9b731fdc4 100644 --- a/src/pkg/image/jpeg/huffman.go +++ b/src/pkg/image/jpeg/huffman.go @@ -15,9 +15,9 @@ const maxNumValues = 256 // Bit stream for the Huffman decoder. // The n least significant bits of a form the unread bits, to be read in MSB to LSB order. type bits struct { - a int // accumulator. - n int // the number of unread bits in a. - m int // mask. m==1<<(n-1) when n>0, with m==0 when n==0. + a uint32 // accumulator. + m uint32 // mask. m==1<<(n-1) when n>0, with m==0 when n==0. + n int // the number of unread bits in a. } // Huffman table decoder, specified in section C. @@ -39,7 +39,7 @@ func (d *decoder) ensureNBits(n int) error { if err != nil { return err } - d.b.a = d.b.a<<8 | int(c) + d.b.a = d.b.a<<8 | uint32(c) d.b.n += 8 if d.b.m == 0 { d.b.m = 1 << 7 @@ -61,15 +61,16 @@ func (d *decoder) ensureNBits(n int) error { } // The composition of RECEIVE and EXTEND, specified in section F.2.2.1. -func (d *decoder) receiveExtend(t uint8) (int, error) { - err := d.ensureNBits(int(t)) - if err != nil { - return 0, err +func (d *decoder) receiveExtend(t uint8) (int32, error) { + if d.b.n < int(t) { + if err := d.ensureNBits(int(t)); err != nil { + return 0, err + } } d.b.n -= int(t) d.b.m >>= t - s := 1 << t - x := (d.b.a >> uint8(d.b.n)) & (s - 1) + s := int32(1) << t + x := int32(d.b.a>>uint8(d.b.n)) & (s - 1) if x < s>>1 { x += ((-1) << t) + 1 } @@ -92,8 +93,7 @@ func (d *decoder) processDHT(n int) error { return FormatError("bad Tc value") } th := d.tmp[0] & 0x0f - const isBaseline = true // Progressive mode is not yet supported. - if th > maxTh || isBaseline && th > 1 { + if th > maxTh || !d.progressive && th > 1 { return FormatError("bad Th value") } h := &d.huff[tc][th] @@ -163,15 +163,16 @@ func (d *decoder) processDHT(n int) error { // Returns the next Huffman-coded value from the bit stream, decoded according to h. // TODO(nigeltao): This decoding algorithm is simple, but slow. A lookahead table, instead of always -// peeling off only 1 bit at at time, ought to be faster. +// peeling off only 1 bit at time, ought to be faster. func (d *decoder) decodeHuffman(h *huffman) (uint8, error) { if h.length == 0 { return 0, FormatError("uninitialized Huffman table") } for i, code := 0, 0; i < maxCodeLength; i++ { - err := d.ensureNBits(1) - if err != nil { - return 0, err + if d.b.n == 0 { + if err := d.ensureNBits(1); err != nil { + return 0, err + } } if d.b.a&d.b.m != 0 { code |= 1 @@ -185,3 +186,28 @@ func (d *decoder) decodeHuffman(h *huffman) (uint8, error) { } return 0, FormatError("bad Huffman code") } + +func (d *decoder) decodeBit() (bool, error) { + if d.b.n == 0 { + if err := d.ensureNBits(1); err != nil { + return false, err + } + } + ret := d.b.a&d.b.m != 0 + d.b.n-- + d.b.m >>= 1 + return ret, nil +} + +func (d *decoder) decodeBits(n int) (uint32, error) { + if d.b.n < n { + if err := d.ensureNBits(n); err != nil { + return 0, err + } + } + ret := d.b.a >> uint(d.b.n-n) + ret &= (1 << uint(n)) - 1 + d.b.n -= n + d.b.m >>= uint(n) + return ret, nil +} diff --git a/src/pkg/image/jpeg/idct.go b/src/pkg/image/jpeg/idct.go index b387dfdff..46fcaecb7 100644 --- a/src/pkg/image/jpeg/idct.go +++ b/src/pkg/image/jpeg/idct.go @@ -37,6 +37,10 @@ package jpeg * */ +const blockSize = 64 // A DCT block is 8x8. + +type block [blockSize]int32 + const ( w1 = 2841 // 2048*sqrt(2)*cos(1*pi/16) w2 = 2676 // 2048*sqrt(2)*cos(2*pi/16) @@ -55,9 +59,7 @@ const ( r2 = 181 // 256/sqrt(2) ) -// idct performs a 2-D Inverse Discrete Cosine Transformation, followed by a -// +128 level shift and a clip to [0, 255], writing the results to dst. -// stride is the number of elements between successive rows of dst. +// idct performs a 2-D Inverse Discrete Cosine Transformation. // // The input coefficients should already have been multiplied by the // appropriate quantization table. We use fixed-point computation, with the @@ -67,33 +69,34 @@ const ( // For more on the actual algorithm, see Z. Wang, "Fast algorithms for the // discrete W transform and for the discrete Fourier transform", IEEE Trans. on // ASSP, Vol. ASSP- 32, pp. 803-816, Aug. 1984. -func idct(dst []byte, stride int, src *block) { +func idct(src *block) { // Horizontal 1-D IDCT. for y := 0; y < 8; y++ { + y8 := y * 8 // If all the AC components are zero, then the IDCT is trivial. - if src[y*8+1] == 0 && src[y*8+2] == 0 && src[y*8+3] == 0 && - src[y*8+4] == 0 && src[y*8+5] == 0 && src[y*8+6] == 0 && src[y*8+7] == 0 { - dc := src[y*8+0] << 3 - src[y*8+0] = dc - src[y*8+1] = dc - src[y*8+2] = dc - src[y*8+3] = dc - src[y*8+4] = dc - src[y*8+5] = dc - src[y*8+6] = dc - src[y*8+7] = dc + if src[y8+1] == 0 && src[y8+2] == 0 && src[y8+3] == 0 && + src[y8+4] == 0 && src[y8+5] == 0 && src[y8+6] == 0 && src[y8+7] == 0 { + dc := src[y8+0] << 3 + src[y8+0] = dc + src[y8+1] = dc + src[y8+2] = dc + src[y8+3] = dc + src[y8+4] = dc + src[y8+5] = dc + src[y8+6] = dc + src[y8+7] = dc continue } // Prescale. - x0 := (src[y*8+0] << 11) + 128 - x1 := src[y*8+4] << 11 - x2 := src[y*8+6] - x3 := src[y*8+2] - x4 := src[y*8+1] - x5 := src[y*8+7] - x6 := src[y*8+5] - x7 := src[y*8+3] + x0 := (src[y8+0] << 11) + 128 + x1 := src[y8+4] << 11 + x2 := src[y8+6] + x3 := src[y8+2] + x4 := src[y8+1] + x5 := src[y8+7] + x6 := src[y8+5] + x7 := src[y8+3] // Stage 1. x8 := w7 * (x4 + x5) @@ -123,14 +126,14 @@ func idct(dst []byte, stride int, src *block) { x4 = (r2*(x4-x5) + 128) >> 8 // Stage 4. - src[8*y+0] = (x7 + x1) >> 8 - src[8*y+1] = (x3 + x2) >> 8 - src[8*y+2] = (x0 + x4) >> 8 - src[8*y+3] = (x8 + x6) >> 8 - src[8*y+4] = (x8 - x6) >> 8 - src[8*y+5] = (x0 - x4) >> 8 - src[8*y+6] = (x3 - x2) >> 8 - src[8*y+7] = (x7 - x1) >> 8 + src[y8+0] = (x7 + x1) >> 8 + src[y8+1] = (x3 + x2) >> 8 + src[y8+2] = (x0 + x4) >> 8 + src[y8+3] = (x8 + x6) >> 8 + src[y8+4] = (x8 - x6) >> 8 + src[y8+5] = (x0 - x4) >> 8 + src[y8+6] = (x3 - x2) >> 8 + src[y8+7] = (x7 - x1) >> 8 } // Vertical 1-D IDCT. @@ -186,19 +189,4 @@ func idct(dst []byte, stride int, src *block) { src[8*6+x] = (y3 - y2) >> 14 src[8*7+x] = (y7 - y1) >> 14 } - - // Level shift by +128, clip to [0, 255], and write to dst. - for y := 0; y < 8; y++ { - for x := 0; x < 8; x++ { - c := src[y*8+x] - if c < -128 { - c = 0 - } else if c > 127 { - c = 255 - } else { - c += 128 - } - dst[y*stride+x] = uint8(c) - } - } } diff --git a/src/pkg/image/jpeg/reader.go b/src/pkg/image/jpeg/reader.go index d9adf6e58..1ee6bbcd1 100644 --- a/src/pkg/image/jpeg/reader.go +++ b/src/pkg/image/jpeg/reader.go @@ -35,11 +35,7 @@ type component struct { tq uint8 // Quantization table destination selector. } -type block [blockSize]int - const ( - blockSize = 64 // A DCT block is 8x8. - dcTable = 0 acTable = 1 maxTc = 1 @@ -51,7 +47,7 @@ const ( // A color JPEG image has Y, Cb and Cr components. nColorComponent = 3 - // We only support 4:4:4, 4:2:2 and 4:2:0 downsampling, and therefore the + // We only support 4:4:4, 4:4:0, 4:2:2 and 4:2:0 downsampling, and therefore the // number of luma samples per chroma sample is at most 2 in the horizontal // and 2 in the vertical direction. maxH = 2 @@ -74,7 +70,9 @@ const ( comMarker = 0xfe // COMment. ) -// Maps from the zig-zag ordering to the natural ordering. +// unzig maps from the zig-zag ordering to the natural ordering. For example, +// unzig[3] is the column and row of the fourth element in zig-zag order. The +// value is 16, which means first column (16%8 == 0) and third row (16/8 == 2). var unzig = [blockSize]int{ 0, 1, 8, 16, 9, 2, 3, 10, 17, 24, 32, 25, 18, 11, 4, 5, @@ -94,15 +92,18 @@ type Reader interface { type decoder struct { r Reader + b bits width, height int img1 *image.Gray img3 *image.YCbCr ri int // Restart Interval. nComp int + progressive bool + eobRun uint16 // End-of-Band run, specified in section G.1.2.2. comp [nColorComponent]component + progCoeffs [nColorComponent][]block // Saved state between progressive-mode scans. huff [maxTc + 1][maxTh + 1]huffman - quant [maxTq + 1]block - b bits + quant [maxTq + 1]block // Quantization tables, in zig-zag order. tmp [1024]byte } @@ -146,20 +147,33 @@ func (d *decoder) processSOF(n int) error { return UnsupportedError("SOF has wrong number of image components") } for i := 0; i < d.nComp; i++ { - hv := d.tmp[7+3*i] - d.comp[i].h = int(hv >> 4) - d.comp[i].v = int(hv & 0x0f) d.comp[i].c = d.tmp[6+3*i] d.comp[i].tq = d.tmp[8+3*i] if d.nComp == nGrayComponent { + // If a JPEG image has only one component, section A.2 says "this data + // is non-interleaved by definition" and section A.2.2 says "[in this + // case...] the order of data units within a scan shall be left-to-right + // and top-to-bottom... regardless of the values of H_1 and V_1". Section + // 4.8.2 also says "[for non-interleaved data], the MCU is defined to be + // one data unit". Similarly, section A.1.1 explains that it is the ratio + // of H_i to max_j(H_j) that matters, and similarly for V. For grayscale + // images, H_1 is the maximum H_j for all components j, so that ratio is + // always 1. The component's (h, v) is effectively always (1, 1): even if + // the nominal (h, v) is (2, 1), a 20x5 image is encoded in three 8x8 + // MCUs, not two 16x8 MCUs. + d.comp[i].h = 1 + d.comp[i].v = 1 continue } - // For color images, we only support 4:4:4, 4:2:2 or 4:2:0 chroma + hv := d.tmp[7+3*i] + d.comp[i].h = int(hv >> 4) + d.comp[i].v = int(hv & 0x0f) + // For color images, we only support 4:4:4, 4:4:0, 4:2:2 or 4:2:0 chroma // downsampling ratios. This implies that the (h, v) values for the Y - // component are either (1, 1), (2, 1) or (2, 2), and the (h, v) + // component are either (1, 1), (1, 2), (2, 1) or (2, 2), and the (h, v) // values for the Cr and Cb components must be (1, 1). if i == 0 { - if hv != 0x11 && hv != 0x21 && hv != 0x22 { + if hv != 0x11 && hv != 0x21 && hv != 0x22 && hv != 0x12 { return UnsupportedError("luma downsample ratio") } } else if hv != 0x11 { @@ -186,7 +200,7 @@ func (d *decoder) processDQT(n int) error { return FormatError("bad Tq value") } for i := range d.quant[tq] { - d.quant[tq][i] = int(d.tmp[i+1]) + d.quant[tq][i] = int32(d.tmp[i+1]) } } if n != 0 { @@ -195,161 +209,6 @@ func (d *decoder) processDQT(n int) error { return nil } -// makeImg allocates and initializes the destination image. -func (d *decoder) makeImg(h0, v0, mxx, myy int) { - if d.nComp == nGrayComponent { - m := image.NewGray(image.Rect(0, 0, 8*mxx, 8*myy)) - d.img1 = m.SubImage(image.Rect(0, 0, d.width, d.height)).(*image.Gray) - return - } - var subsampleRatio image.YCbCrSubsampleRatio - switch h0 * v0 { - case 1: - subsampleRatio = image.YCbCrSubsampleRatio444 - case 2: - subsampleRatio = image.YCbCrSubsampleRatio422 - case 4: - subsampleRatio = image.YCbCrSubsampleRatio420 - default: - panic("unreachable") - } - m := image.NewYCbCr(image.Rect(0, 0, 8*h0*mxx, 8*v0*myy), subsampleRatio) - d.img3 = m.SubImage(image.Rect(0, 0, d.width, d.height)).(*image.YCbCr) -} - -// Specified in section B.2.3. -func (d *decoder) processSOS(n int) error { - if d.nComp == 0 { - return FormatError("missing SOF marker") - } - if n != 4+2*d.nComp { - return UnsupportedError("SOS has wrong length") - } - _, err := io.ReadFull(d.r, d.tmp[0:4+2*d.nComp]) - if err != nil { - return err - } - if int(d.tmp[0]) != d.nComp { - return UnsupportedError("SOS has wrong number of image components") - } - var scan [nColorComponent]struct { - td uint8 // DC table selector. - ta uint8 // AC table selector. - } - for i := 0; i < d.nComp; i++ { - cs := d.tmp[1+2*i] // Component selector. - if cs != d.comp[i].c { - return UnsupportedError("scan components out of order") - } - scan[i].td = d.tmp[2+2*i] >> 4 - scan[i].ta = d.tmp[2+2*i] & 0x0f - } - // mxx and myy are the number of MCUs (Minimum Coded Units) in the image. - h0, v0 := d.comp[0].h, d.comp[0].v // The h and v values from the Y components. - mxx := (d.width + 8*h0 - 1) / (8 * h0) - myy := (d.height + 8*v0 - 1) / (8 * v0) - if d.img1 == nil && d.img3 == nil { - d.makeImg(h0, v0, mxx, myy) - } - - mcu, expectedRST := 0, uint8(rst0Marker) - var ( - b block - dc [nColorComponent]int - ) - for my := 0; my < myy; my++ { - for mx := 0; mx < mxx; mx++ { - for i := 0; i < d.nComp; i++ { - qt := &d.quant[d.comp[i].tq] - for j := 0; j < d.comp[i].h*d.comp[i].v; j++ { - // TODO(nigeltao): make this a "var b block" once the compiler's escape - // analysis is good enough to allocate it on the stack, not the heap. - b = block{} - - // Decode the DC coefficient, as specified in section F.2.2.1. - value, err := d.decodeHuffman(&d.huff[dcTable][scan[i].td]) - if err != nil { - return err - } - if value > 16 { - return UnsupportedError("excessive DC component") - } - dcDelta, err := d.receiveExtend(value) - if err != nil { - return err - } - dc[i] += dcDelta - b[0] = dc[i] * qt[0] - - // Decode the AC coefficients, as specified in section F.2.2.2. - for k := 1; k < blockSize; k++ { - value, err := d.decodeHuffman(&d.huff[acTable][scan[i].ta]) - if err != nil { - return err - } - val0 := value >> 4 - val1 := value & 0x0f - if val1 != 0 { - k += int(val0) - if k > blockSize { - return FormatError("bad DCT index") - } - ac, err := d.receiveExtend(val1) - if err != nil { - return err - } - b[unzig[k]] = ac * qt[k] - } else { - if val0 != 0x0f { - break - } - k += 0x0f - } - } - - // Perform the inverse DCT and store the MCU component to the image. - if d.nComp == nGrayComponent { - idct(d.img1.Pix[8*(my*d.img1.Stride+mx):], d.img1.Stride, &b) - } else { - switch i { - case 0: - mx0 := h0*mx + (j % 2) - my0 := v0*my + (j / 2) - idct(d.img3.Y[8*(my0*d.img3.YStride+mx0):], d.img3.YStride, &b) - case 1: - idct(d.img3.Cb[8*(my*d.img3.CStride+mx):], d.img3.CStride, &b) - case 2: - idct(d.img3.Cr[8*(my*d.img3.CStride+mx):], d.img3.CStride, &b) - } - } - } // for j - } // for i - mcu++ - if d.ri > 0 && mcu%d.ri == 0 && mcu < mxx*myy { - // A more sophisticated decoder could use RST[0-7] markers to resynchronize from corrupt input, - // but this one assumes well-formed input, and hence the restart marker follows immediately. - _, err := io.ReadFull(d.r, d.tmp[0:2]) - if err != nil { - return err - } - if d.tmp[0] != 0xff || d.tmp[1] != expectedRST { - return FormatError("bad RST marker") - } - expectedRST++ - if expectedRST == rst7Marker+1 { - expectedRST = rst0Marker - } - // Reset the Huffman decoder. - d.b = bits{} - // Reset the DC components, as per section F.2.1.3.1. - dc = [nColorComponent]int{} - } - } // for mx - } // for my - - return nil -} - // Specified in section B.2.4.4. func (d *decoder) processDRI(n int) error { if n != 2 { @@ -390,9 +249,26 @@ func (d *decoder) decode(r io.Reader, configOnly bool) (image.Image, error) { return nil, FormatError("missing 0xff marker start") } marker := d.tmp[1] + for marker == 0xff { + // Section B.1.1.2 says, "Any marker may optionally be preceded by any + // number of fill bytes, which are bytes assigned code X'FF'". + marker, err = d.r.ReadByte() + if err != nil { + return nil, err + } + } if marker == eoiMarker { // End Of Image. break } + if rst0Marker <= marker && marker <= rst7Marker { + // Figures B.2 and B.16 of the specification suggest that restart markers should + // only occur between Entropy Coded Segments and not after the final ECS. + // However, some encoders may generate incorrect JPEGs with a final restart + // marker. That restart marker will be seen here instead of inside the processSOS + // method, and is ignored as a harmless error. Restart markers have no extra data, + // so we check for this before we read the 16-bit length of the segment. + continue + } // Read the 16-bit length of the segment. The value includes the 2 bytes for the // length itself, so we subtract 2 to get the number of remaining bytes. @@ -406,13 +282,12 @@ func (d *decoder) decode(r io.Reader, configOnly bool) (image.Image, error) { } switch { - case marker == sof0Marker: // Start Of Frame (Baseline). + case marker == sof0Marker || marker == sof2Marker: // Start Of Frame. + d.progressive = marker == sof2Marker err = d.processSOF(n) if configOnly { return nil, err } - case marker == sof2Marker: // Start Of Frame (Progressive). - err = UnsupportedError("progressive mode") case marker == dhtMarker: // Define Huffman Table. err = d.processDHT(n) case marker == dqtMarker: // Define Quantization Table. @@ -421,7 +296,7 @@ func (d *decoder) decode(r io.Reader, configOnly bool) (image.Image, error) { err = d.processSOS(n) case marker == driMarker: // Define Restart Interval. err = d.processDRI(n) - case marker >= app0Marker && marker <= app15Marker || marker == comMarker: // APPlication specific, or COMment. + case app0Marker <= marker && marker <= app15Marker || marker == comMarker: // APPlication specific, or COMment. err = d.ignore(n) default: err = UnsupportedError("unknown marker") diff --git a/src/pkg/image/jpeg/reader_test.go b/src/pkg/image/jpeg/reader_test.go new file mode 100644 index 000000000..b520a8ab1 --- /dev/null +++ b/src/pkg/image/jpeg/reader_test.go @@ -0,0 +1,157 @@ +// Copyright 2012 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package jpeg + +import ( + "bytes" + "fmt" + "image" + "io/ioutil" + "os" + "testing" +) + +// TestDecodeProgressive tests that decoding the baseline and progressive +// versions of the same image result in exactly the same pixel data, in YCbCr +// space for color images, and Y space for grayscale images. +func TestDecodeProgressive(t *testing.T) { + testCases := []string{ + "../testdata/video-001", + "../testdata/video-001.q50.420", + "../testdata/video-001.q50.422", + "../testdata/video-001.q50.440", + "../testdata/video-001.q50.444", + "../testdata/video-005.gray.q50", + "../testdata/video-005.gray.q50.2x2", + } + for _, tc := range testCases { + m0, err := decodeFile(tc + ".jpeg") + if err != nil { + t.Errorf("%s: %v", tc+".jpeg", err) + continue + } + m1, err := decodeFile(tc + ".progressive.jpeg") + if err != nil { + t.Errorf("%s: %v", tc+".progressive.jpeg", err) + continue + } + if m0.Bounds() != m1.Bounds() { + t.Errorf("%s: bounds differ: %v and %v", tc, m0.Bounds(), m1.Bounds()) + continue + } + switch m0 := m0.(type) { + case *image.YCbCr: + m1 := m1.(*image.YCbCr) + if err := check(m0.Bounds(), m0.Y, m1.Y, m0.YStride, m1.YStride); err != nil { + t.Errorf("%s (Y): %v", tc, err) + continue + } + if err := check(m0.Bounds(), m0.Cb, m1.Cb, m0.CStride, m1.CStride); err != nil { + t.Errorf("%s (Cb): %v", tc, err) + continue + } + if err := check(m0.Bounds(), m0.Cr, m1.Cr, m0.CStride, m1.CStride); err != nil { + t.Errorf("%s (Cr): %v", tc, err) + continue + } + case *image.Gray: + m1 := m1.(*image.Gray) + if err := check(m0.Bounds(), m0.Pix, m1.Pix, m0.Stride, m1.Stride); err != nil { + t.Errorf("%s: %v", tc, err) + continue + } + default: + t.Errorf("%s: unexpected image type %T", tc, m0) + continue + } + } +} + +func decodeFile(filename string) (image.Image, error) { + f, err := os.Open(filename) + if err != nil { + return nil, err + } + defer f.Close() + return Decode(f) + +} + +// check checks that the two pix data are equal, within the given bounds. +func check(bounds image.Rectangle, pix0, pix1 []byte, stride0, stride1 int) error { + if len(pix0) != len(pix1) { + return fmt.Errorf("len(pix) %d and %d differ", len(pix0), len(pix1)) + } + if stride0 != stride1 { + return fmt.Errorf("strides %d and %d differ", stride0, stride1) + } + if stride0%8 != 0 { + return fmt.Errorf("stride %d is not a multiple of 8", stride0) + } + // Compare the two pix data, one 8x8 block at a time. + for y := 0; y < len(pix0)/stride0; y += 8 { + for x := 0; x < stride0; x += 8 { + if x >= bounds.Max.X || y >= bounds.Max.Y { + // We don't care if the two pix data differ if the 8x8 block is + // entirely outside of the image's bounds. For example, this can + // occur with a 4:2:0 chroma subsampling and a 1x1 image. Baseline + // decoding works on the one 16x16 MCU as a whole; progressive + // decoding's first pass works on that 16x16 MCU as a whole but + // refinement passes only process one 8x8 block within the MCU. + continue + } + + for j := 0; j < 8; j++ { + for i := 0; i < 8; i++ { + index := (y+j)*stride0 + (x + i) + if pix0[index] != pix1[index] { + return fmt.Errorf("blocks at (%d, %d) differ:\n%sand\n%s", x, y, + pixString(pix0, stride0, x, y), + pixString(pix1, stride1, x, y), + ) + } + } + } + } + } + return nil +} + +func pixString(pix []byte, stride, x, y int) string { + s := bytes.NewBuffer(nil) + for j := 0; j < 8; j++ { + fmt.Fprintf(s, "\t") + for i := 0; i < 8; i++ { + fmt.Fprintf(s, "%02x ", pix[(y+j)*stride+(x+i)]) + } + fmt.Fprintf(s, "\n") + } + return s.String() +} + +func benchmarkDecode(b *testing.B, filename string) { + b.StopTimer() + data, err := ioutil.ReadFile(filename) + if err != nil { + b.Fatal(err) + } + cfg, err := DecodeConfig(bytes.NewReader(data)) + if err != nil { + b.Fatal(err) + } + b.SetBytes(int64(cfg.Width * cfg.Height * 4)) + b.StartTimer() + for i := 0; i < b.N; i++ { + Decode(bytes.NewReader(data)) + } +} + +func BenchmarkDecodeBaseline(b *testing.B) { + benchmarkDecode(b, "../testdata/video-001.jpeg") +} + +func BenchmarkDecodeProgressive(b *testing.B) { + benchmarkDecode(b, "../testdata/video-001.progressive.jpeg") +} diff --git a/src/pkg/image/jpeg/scan.go b/src/pkg/image/jpeg/scan.go new file mode 100644 index 000000000..e3ae8ae44 --- /dev/null +++ b/src/pkg/image/jpeg/scan.go @@ -0,0 +1,432 @@ +// Copyright 2012 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package jpeg + +import ( + "image" + "io" +) + +// makeImg allocates and initializes the destination image. +func (d *decoder) makeImg(h0, v0, mxx, myy int) { + if d.nComp == nGrayComponent { + m := image.NewGray(image.Rect(0, 0, 8*mxx, 8*myy)) + d.img1 = m.SubImage(image.Rect(0, 0, d.width, d.height)).(*image.Gray) + return + } + var subsampleRatio image.YCbCrSubsampleRatio + switch { + case h0 == 1 && v0 == 1: + subsampleRatio = image.YCbCrSubsampleRatio444 + case h0 == 1 && v0 == 2: + subsampleRatio = image.YCbCrSubsampleRatio440 + case h0 == 2 && v0 == 1: + subsampleRatio = image.YCbCrSubsampleRatio422 + case h0 == 2 && v0 == 2: + subsampleRatio = image.YCbCrSubsampleRatio420 + default: + panic("unreachable") + } + m := image.NewYCbCr(image.Rect(0, 0, 8*h0*mxx, 8*v0*myy), subsampleRatio) + d.img3 = m.SubImage(image.Rect(0, 0, d.width, d.height)).(*image.YCbCr) +} + +// Specified in section B.2.3. +func (d *decoder) processSOS(n int) error { + if d.nComp == 0 { + return FormatError("missing SOF marker") + } + if n < 6 || 4+2*d.nComp < n || n%2 != 0 { + return FormatError("SOS has wrong length") + } + _, err := io.ReadFull(d.r, d.tmp[:n]) + if err != nil { + return err + } + nComp := int(d.tmp[0]) + if n != 4+2*nComp { + return FormatError("SOS length inconsistent with number of components") + } + var scan [nColorComponent]struct { + compIndex uint8 + td uint8 // DC table selector. + ta uint8 // AC table selector. + } + for i := 0; i < nComp; i++ { + cs := d.tmp[1+2*i] // Component selector. + compIndex := -1 + for j, comp := range d.comp { + if cs == comp.c { + compIndex = j + } + } + if compIndex < 0 { + return FormatError("unknown component selector") + } + scan[i].compIndex = uint8(compIndex) + scan[i].td = d.tmp[2+2*i] >> 4 + scan[i].ta = d.tmp[2+2*i] & 0x0f + } + + // zigStart and zigEnd are the spectral selection bounds. + // ah and al are the successive approximation high and low values. + // The spec calls these values Ss, Se, Ah and Al. + // + // For progressive JPEGs, these are the two more-or-less independent + // aspects of progression. Spectral selection progression is when not + // all of a block's 64 DCT coefficients are transmitted in one pass. + // For example, three passes could transmit coefficient 0 (the DC + // component), coefficients 1-5, and coefficients 6-63, in zig-zag + // order. Successive approximation is when not all of the bits of a + // band of coefficients are transmitted in one pass. For example, + // three passes could transmit the 6 most significant bits, followed + // by the second-least significant bit, followed by the least + // significant bit. + // + // For baseline JPEGs, these parameters are hard-coded to 0/63/0/0. + zigStart, zigEnd, ah, al := int32(0), int32(blockSize-1), uint32(0), uint32(0) + if d.progressive { + zigStart = int32(d.tmp[1+2*nComp]) + zigEnd = int32(d.tmp[2+2*nComp]) + ah = uint32(d.tmp[3+2*nComp] >> 4) + al = uint32(d.tmp[3+2*nComp] & 0x0f) + if (zigStart == 0 && zigEnd != 0) || zigStart > zigEnd || blockSize <= zigEnd { + return FormatError("bad spectral selection bounds") + } + if zigStart != 0 && nComp != 1 { + return FormatError("progressive AC coefficients for more than one component") + } + if ah != 0 && ah != al+1 { + return FormatError("bad successive approximation values") + } + } + + // mxx and myy are the number of MCUs (Minimum Coded Units) in the image. + h0, v0 := d.comp[0].h, d.comp[0].v // The h and v values from the Y components. + mxx := (d.width + 8*h0 - 1) / (8 * h0) + myy := (d.height + 8*v0 - 1) / (8 * v0) + if d.img1 == nil && d.img3 == nil { + d.makeImg(h0, v0, mxx, myy) + if d.progressive { + for i := 0; i < nComp; i++ { + compIndex := scan[i].compIndex + d.progCoeffs[compIndex] = make([]block, mxx*myy*d.comp[compIndex].h*d.comp[compIndex].v) + } + } + } + + d.b = bits{} + mcu, expectedRST := 0, uint8(rst0Marker) + var ( + // b is the decoded coefficients, in natural (not zig-zag) order. + b block + dc [nColorComponent]int32 + // mx0 and my0 are the location of the current (in terms of 8x8 blocks). + // For example, with 4:2:0 chroma subsampling, the block whose top left + // pixel co-ordinates are (16, 8) is the third block in the first row: + // mx0 is 2 and my0 is 0, even though the pixel is in the second MCU. + // TODO(nigeltao): rename mx0 and my0 to bx and by? + mx0, my0 int + blockCount int + ) + for my := 0; my < myy; my++ { + for mx := 0; mx < mxx; mx++ { + for i := 0; i < nComp; i++ { + compIndex := scan[i].compIndex + qt := &d.quant[d.comp[compIndex].tq] + for j := 0; j < d.comp[compIndex].h*d.comp[compIndex].v; j++ { + // The blocks are traversed one MCU at a time. For 4:2:0 chroma + // subsampling, there are four Y 8x8 blocks in every 16x16 MCU. + // For a baseline 32x16 pixel image, the Y blocks visiting order is: + // 0 1 4 5 + // 2 3 6 7 + // + // For progressive images, the DC data blocks (zigStart == 0) are traversed + // as above, but AC data blocks are traversed left to right, top to bottom: + // 0 1 2 3 + // 4 5 6 7 + // + // To further complicate matters, there is no AC data for any blocks that + // are inside the image at the MCU level but outside the image at the pixel + // level. For example, a 24x16 pixel 4:2:0 progressive image consists of + // two 16x16 MCUs. The earlier scans will process 8 Y blocks: + // 0 1 4 5 + // 2 3 6 7 + // The later scans will process only 6 Y blocks: + // 0 1 2 + // 3 4 5 + if zigStart == 0 { + mx0, my0 = d.comp[compIndex].h*mx, d.comp[compIndex].v*my + if h0 == 1 { + my0 += j + } else { + mx0 += j % 2 + my0 += j / 2 + } + } else { + q := mxx * d.comp[compIndex].h + mx0 = blockCount % q + my0 = blockCount / q + blockCount++ + if mx0*8 >= d.width || my0*8 >= d.height { + continue + } + } + + // Load the previous partially decoded coefficients, if applicable. + if d.progressive { + b = d.progCoeffs[compIndex][my0*mxx*d.comp[compIndex].h+mx0] + } else { + b = block{} + } + + if ah != 0 { + if err := d.refine(&b, &d.huff[acTable][scan[i].ta], zigStart, zigEnd, 1<<al); err != nil { + return err + } + } else { + zig := zigStart + if zig == 0 { + zig++ + // Decode the DC coefficient, as specified in section F.2.2.1. + value, err := d.decodeHuffman(&d.huff[dcTable][scan[i].td]) + if err != nil { + return err + } + if value > 16 { + return UnsupportedError("excessive DC component") + } + dcDelta, err := d.receiveExtend(value) + if err != nil { + return err + } + dc[compIndex] += dcDelta + b[0] = dc[compIndex] << al + } + + if zig <= zigEnd && d.eobRun > 0 { + d.eobRun-- + } else { + // Decode the AC coefficients, as specified in section F.2.2.2. + for ; zig <= zigEnd; zig++ { + value, err := d.decodeHuffman(&d.huff[acTable][scan[i].ta]) + if err != nil { + return err + } + val0 := value >> 4 + val1 := value & 0x0f + if val1 != 0 { + zig += int32(val0) + if zig > zigEnd { + break + } + ac, err := d.receiveExtend(val1) + if err != nil { + return err + } + b[unzig[zig]] = ac << al + } else { + if val0 != 0x0f { + d.eobRun = uint16(1 << val0) + if val0 != 0 { + bits, err := d.decodeBits(int(val0)) + if err != nil { + return err + } + d.eobRun |= uint16(bits) + } + d.eobRun-- + break + } + zig += 0x0f + } + } + } + } + + if d.progressive { + if zigEnd != blockSize-1 || al != 0 { + // We haven't completely decoded this 8x8 block. Save the coefficients. + d.progCoeffs[compIndex][my0*mxx*d.comp[compIndex].h+mx0] = b + // At this point, we could execute the rest of the loop body to dequantize and + // perform the inverse DCT, to save early stages of a progressive image to the + // *image.YCbCr buffers (the whole point of progressive encoding), but in Go, + // the jpeg.Decode function does not return until the entire image is decoded, + // so we "continue" here to avoid wasted computation. + continue + } + } + + // Dequantize, perform the inverse DCT and store the block to the image. + for zig := 0; zig < blockSize; zig++ { + b[unzig[zig]] *= qt[zig] + } + idct(&b) + dst, stride := []byte(nil), 0 + if d.nComp == nGrayComponent { + dst, stride = d.img1.Pix[8*(my0*d.img1.Stride+mx0):], d.img1.Stride + } else { + switch compIndex { + case 0: + dst, stride = d.img3.Y[8*(my0*d.img3.YStride+mx0):], d.img3.YStride + case 1: + dst, stride = d.img3.Cb[8*(my0*d.img3.CStride+mx0):], d.img3.CStride + case 2: + dst, stride = d.img3.Cr[8*(my0*d.img3.CStride+mx0):], d.img3.CStride + default: + return UnsupportedError("too many components") + } + } + // Level shift by +128, clip to [0, 255], and write to dst. + for y := 0; y < 8; y++ { + y8 := y * 8 + yStride := y * stride + for x := 0; x < 8; x++ { + c := b[y8+x] + if c < -128 { + c = 0 + } else if c > 127 { + c = 255 + } else { + c += 128 + } + dst[yStride+x] = uint8(c) + } + } + } // for j + } // for i + mcu++ + if d.ri > 0 && mcu%d.ri == 0 && mcu < mxx*myy { + // A more sophisticated decoder could use RST[0-7] markers to resynchronize from corrupt input, + // but this one assumes well-formed input, and hence the restart marker follows immediately. + _, err := io.ReadFull(d.r, d.tmp[0:2]) + if err != nil { + return err + } + if d.tmp[0] != 0xff || d.tmp[1] != expectedRST { + return FormatError("bad RST marker") + } + expectedRST++ + if expectedRST == rst7Marker+1 { + expectedRST = rst0Marker + } + // Reset the Huffman decoder. + d.b = bits{} + // Reset the DC components, as per section F.2.1.3.1. + dc = [nColorComponent]int32{} + // Reset the progressive decoder state, as per section G.1.2.2. + d.eobRun = 0 + } + } // for mx + } // for my + + return nil +} + +// refine decodes a successive approximation refinement block, as specified in +// section G.1.2. +func (d *decoder) refine(b *block, h *huffman, zigStart, zigEnd, delta int32) error { + // Refining a DC component is trivial. + if zigStart == 0 { + if zigEnd != 0 { + panic("unreachable") + } + bit, err := d.decodeBit() + if err != nil { + return err + } + if bit { + b[0] |= delta + } + return nil + } + + // Refining AC components is more complicated; see sections G.1.2.2 and G.1.2.3. + zig := zigStart + if d.eobRun == 0 { + loop: + for ; zig <= zigEnd; zig++ { + z := int32(0) + value, err := d.decodeHuffman(h) + if err != nil { + return err + } + val0 := value >> 4 + val1 := value & 0x0f + + switch val1 { + case 0: + if val0 != 0x0f { + d.eobRun = uint16(1 << val0) + if val0 != 0 { + bits, err := d.decodeBits(int(val0)) + if err != nil { + return err + } + d.eobRun |= uint16(bits) + } + break loop + } + case 1: + z = delta + bit, err := d.decodeBit() + if err != nil { + return err + } + if !bit { + z = -z + } + default: + return FormatError("unexpected Huffman code") + } + + zig, err = d.refineNonZeroes(b, zig, zigEnd, int32(val0), delta) + if err != nil { + return err + } + if zig > zigEnd { + return FormatError("too many coefficients") + } + if z != 0 { + b[unzig[zig]] = z + } + } + } + if d.eobRun > 0 { + d.eobRun-- + if _, err := d.refineNonZeroes(b, zig, zigEnd, -1, delta); err != nil { + return err + } + } + return nil +} + +// refineNonZeroes refines non-zero entries of b in zig-zag order. If nz >= 0, +// the first nz zero entries are skipped over. +func (d *decoder) refineNonZeroes(b *block, zig, zigEnd, nz, delta int32) (int32, error) { + for ; zig <= zigEnd; zig++ { + u := unzig[zig] + if b[u] == 0 { + if nz == 0 { + break + } + nz-- + continue + } + bit, err := d.decodeBit() + if err != nil { + return 0, err + } + if !bit { + continue + } + if b[u] >= 0 { + b[u] += delta + } else { + b[u] -= delta + } + } + return zig, nil +} diff --git a/src/pkg/image/jpeg/writer.go b/src/pkg/image/jpeg/writer.go index 3322c09fe..c58fbf305 100644 --- a/src/pkg/image/jpeg/writer.go +++ b/src/pkg/image/jpeg/writer.go @@ -21,7 +21,7 @@ func min(x, y int) int { } // div returns a/b rounded to the nearest integer, instead of rounded to zero. -func div(a int, b int) int { +func div(a, b int32) int32 { if a >= 0 { return (a + (b >> 1)) / b } @@ -56,26 +56,28 @@ const ( nQuantIndex ) -// unscaledQuant are the unscaled quantization tables. Each encoder copies and -// scales the tables according to its quality parameter. +// unscaledQuant are the unscaled quantization tables in zig-zag order. Each +// encoder copies and scales the tables according to its quality parameter. +// The values are derived from section K.1 after converting from natural to +// zig-zag order. var unscaledQuant = [nQuantIndex][blockSize]byte{ // Luminance. { - 16, 11, 10, 16, 24, 40, 51, 61, - 12, 12, 14, 19, 26, 58, 60, 55, - 14, 13, 16, 24, 40, 57, 69, 56, - 14, 17, 22, 29, 51, 87, 80, 62, - 18, 22, 37, 56, 68, 109, 103, 77, - 24, 35, 55, 64, 81, 104, 113, 92, - 49, 64, 78, 87, 103, 121, 120, 101, - 72, 92, 95, 98, 112, 100, 103, 99, + 16, 11, 12, 14, 12, 10, 16, 14, + 13, 14, 18, 17, 16, 19, 24, 40, + 26, 24, 22, 22, 24, 49, 35, 37, + 29, 40, 58, 51, 61, 60, 57, 51, + 56, 55, 64, 72, 92, 78, 64, 68, + 87, 69, 55, 56, 80, 109, 81, 87, + 95, 98, 103, 104, 103, 62, 77, 113, + 121, 112, 100, 120, 92, 101, 103, 99, }, // Chrominance. { - 17, 18, 24, 47, 99, 99, 99, 99, - 18, 21, 26, 66, 99, 99, 99, 99, - 24, 26, 56, 99, 99, 99, 99, 99, - 47, 66, 99, 99, 99, 99, 99, 99, + 17, 18, 18, 24, 21, 24, 47, 26, + 26, 47, 99, 66, 56, 66, 99, 99, + 99, 99, 99, 99, 99, 99, 99, 99, + 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, @@ -208,8 +210,8 @@ func init() { // writer is a buffered writer. type writer interface { Flush() error - Write([]byte) (int, error) - WriteByte(byte) error + io.Writer + io.ByteWriter } // encoder encodes an image to the JPEG format. @@ -222,7 +224,7 @@ type encoder struct { buf [16]byte // bits and nBits are accumulated bits to write to w. bits, nBits uint32 - // quant is the scaled quantization tables. + // quant is the scaled quantization tables, in zig-zag order. quant [nQuantIndex][blockSize]byte } @@ -266,14 +268,14 @@ func (e *encoder) emit(bits, nBits uint32) { } // emitHuff emits the given value with the given Huffman encoder. -func (e *encoder) emitHuff(h huffIndex, value int) { +func (e *encoder) emitHuff(h huffIndex, value int32) { x := theHuffmanLUT[h][value] e.emit(x&(1<<24-1), x>>24) } // emitHuffRLE emits a run of runLength copies of value encoded with the given // Huffman encoder. -func (e *encoder) emitHuffRLE(h huffIndex, runLength, value int) { +func (e *encoder) emitHuffRLE(h huffIndex, runLength, value int32) { a, b := value, value if a < 0 { a, b = -value, value-1 @@ -284,7 +286,7 @@ func (e *encoder) emitHuffRLE(h huffIndex, runLength, value int) { } else { nBits = 8 + uint32(bitCount[a>>8]) } - e.emitHuff(h, runLength<<4|int(nBits)) + e.emitHuff(h, runLength<<4|int32(nBits)) if nBits > 0 { e.emit(uint32(b)&(1<<nBits-1), nBits) } @@ -301,7 +303,7 @@ func (e *encoder) writeMarkerHeader(marker uint8, markerlen int) { // writeDQT writes the Define Quantization Table marker. func (e *encoder) writeDQT() { - markerlen := 2 + int(nQuantIndex)*(1+blockSize) + const markerlen = 2 + int(nQuantIndex)*(1+blockSize) e.writeMarkerHeader(dqtMarker, markerlen) for i := range e.quant { e.writeByte(uint8(i)) @@ -311,7 +313,7 @@ func (e *encoder) writeDQT() { // writeSOF0 writes the Start Of Frame (Baseline) marker. func (e *encoder) writeSOF0(size image.Point) { - markerlen := 8 + 3*nColorComponent + const markerlen = 8 + 3*nColorComponent e.writeMarkerHeader(sof0Marker, markerlen) e.buf[0] = 8 // 8-bit color. e.buf[1] = uint8(size.Y >> 8) @@ -344,15 +346,16 @@ func (e *encoder) writeDHT() { // writeBlock writes a block of pixel data using the given quantization table, // returning the post-quantized DC value of the DCT-transformed block. -func (e *encoder) writeBlock(b *block, q quantIndex, prevDC int) int { +// b is in natural (not zig-zag) order. +func (e *encoder) writeBlock(b *block, q quantIndex, prevDC int32) int32 { fdct(b) // Emit the DC delta. - dc := div(b[0], (8 * int(e.quant[q][0]))) + dc := div(b[0], 8*int32(e.quant[q][0])) e.emitHuffRLE(huffIndex(2*q+0), 0, dc-prevDC) // Emit the AC components. - h, runLength := huffIndex(2*q+1), 0 - for k := 1; k < blockSize; k++ { - ac := div(b[unzig[k]], (8 * int(e.quant[q][k]))) + h, runLength := huffIndex(2*q+1), int32(0) + for zig := 1; zig < blockSize; zig++ { + ac := div(b[unzig[zig]], 8*int32(e.quant[q][zig])) if ac == 0 { runLength++ } else { @@ -380,9 +383,9 @@ func toYCbCr(m image.Image, p image.Point, yBlock, cbBlock, crBlock *block) { for i := 0; i < 8; i++ { r, g, b, _ := m.At(min(p.X+i, xmax), min(p.Y+j, ymax)).RGBA() yy, cb, cr := color.RGBToYCbCr(uint8(r>>8), uint8(g>>8), uint8(b>>8)) - yBlock[8*j+i] = int(yy) - cbBlock[8*j+i] = int(cb) - crBlock[8*j+i] = int(cr) + yBlock[8*j+i] = int32(yy) + cbBlock[8*j+i] = int32(cb) + crBlock[8*j+i] = int32(cr) } } } @@ -405,9 +408,9 @@ func rgbaToYCbCr(m *image.RGBA, p image.Point, yBlock, cbBlock, crBlock *block) } pix := m.Pix[offset+sx*4:] yy, cb, cr := color.RGBToYCbCr(pix[0], pix[1], pix[2]) - yBlock[8*j+i] = int(yy) - cbBlock[8*j+i] = int(cb) - crBlock[8*j+i] = int(cr) + yBlock[8*j+i] = int32(yy) + cbBlock[8*j+i] = int32(cb) + crBlock[8*j+i] = int32(cr) } } } @@ -433,10 +436,12 @@ func scale(dst *block, src *[4]block) { // - component 1 uses DC table 0 and AC table 0 "\x01\x00", // - component 2 uses DC table 1 and AC table 1 "\x02\x11", // - component 3 uses DC table 1 and AC table 1 "\x03\x11", -// - padding "\x00\x00\x00". +// - the bytes "\x00\x3f\x00". Section B.2.3 of the spec says that for +// sequential DCTs, those bytes (8-bit Ss, 8-bit Se, 4-bit Ah, 4-bit Al) +// should be 0x00, 0x3f, 0x00<<4 | 0x00. var sosHeader = []byte{ 0xff, 0xda, 0x00, 0x0c, 0x03, 0x01, 0x00, 0x02, - 0x11, 0x03, 0x11, 0x00, 0x00, 0x00, + 0x11, 0x03, 0x11, 0x00, 0x3f, 0x00, } // writeSOS writes the StartOfScan marker. @@ -444,12 +449,11 @@ func (e *encoder) writeSOS(m image.Image) { e.write(sosHeader) var ( // Scratch buffers to hold the YCbCr values. - yBlock block - cbBlock [4]block - crBlock [4]block - cBlock block + // The blocks are in natural (not zig-zag) order. + b block + cb, cr [4]block // DC components are delta-encoded. - prevDCY, prevDCCb, prevDCCr int + prevDCY, prevDCCb, prevDCCr int32 ) bounds := m.Bounds() rgba, _ := m.(*image.RGBA) @@ -460,16 +464,16 @@ func (e *encoder) writeSOS(m image.Image) { yOff := (i & 2) * 4 p := image.Pt(x+xOff, y+yOff) if rgba != nil { - rgbaToYCbCr(rgba, p, &yBlock, &cbBlock[i], &crBlock[i]) + rgbaToYCbCr(rgba, p, &b, &cb[i], &cr[i]) } else { - toYCbCr(m, p, &yBlock, &cbBlock[i], &crBlock[i]) + toYCbCr(m, p, &b, &cb[i], &cr[i]) } - prevDCY = e.writeBlock(&yBlock, 0, prevDCY) + prevDCY = e.writeBlock(&b, 0, prevDCY) } - scale(&cBlock, &cbBlock) - prevDCCb = e.writeBlock(&cBlock, 1, prevDCCb) - scale(&cBlock, &crBlock) - prevDCCr = e.writeBlock(&cBlock, 1, prevDCCr) + scale(&b, &cb) + prevDCCb = e.writeBlock(&b, 1, prevDCCb) + scale(&b, &cr) + prevDCCr = e.writeBlock(&b, 1, prevDCCr) } } // Pad the last byte with 1's. diff --git a/src/pkg/image/jpeg/writer_test.go b/src/pkg/image/jpeg/writer_test.go index b8e8fa34e..0b2143f5b 100644 --- a/src/pkg/image/jpeg/writer_test.go +++ b/src/pkg/image/jpeg/writer_test.go @@ -6,6 +6,7 @@ package jpeg import ( "bytes" + "fmt" "image" "image/color" "image/png" @@ -15,6 +16,87 @@ import ( "testing" ) +// zigzag maps from the natural ordering to the zig-zag ordering. For example, +// zigzag[0*8 + 3] is the zig-zag sequence number of the element in the fourth +// column and first row. +var zigzag = [blockSize]int{ + 0, 1, 5, 6, 14, 15, 27, 28, + 2, 4, 7, 13, 16, 26, 29, 42, + 3, 8, 12, 17, 25, 30, 41, 43, + 9, 11, 18, 24, 31, 40, 44, 53, + 10, 19, 23, 32, 39, 45, 52, 54, + 20, 22, 33, 38, 46, 51, 55, 60, + 21, 34, 37, 47, 50, 56, 59, 61, + 35, 36, 48, 49, 57, 58, 62, 63, +} + +func TestZigUnzig(t *testing.T) { + for i := 0; i < blockSize; i++ { + if unzig[zigzag[i]] != i { + t.Errorf("unzig[zigzag[%d]] == %d", i, unzig[zigzag[i]]) + } + if zigzag[unzig[i]] != i { + t.Errorf("zigzag[unzig[%d]] == %d", i, zigzag[unzig[i]]) + } + } +} + +// unscaledQuantInNaturalOrder are the unscaled quantization tables in +// natural (not zig-zag) order, as specified in section K.1. +var unscaledQuantInNaturalOrder = [nQuantIndex][blockSize]byte{ + // Luminance. + { + 16, 11, 10, 16, 24, 40, 51, 61, + 12, 12, 14, 19, 26, 58, 60, 55, + 14, 13, 16, 24, 40, 57, 69, 56, + 14, 17, 22, 29, 51, 87, 80, 62, + 18, 22, 37, 56, 68, 109, 103, 77, + 24, 35, 55, 64, 81, 104, 113, 92, + 49, 64, 78, 87, 103, 121, 120, 101, + 72, 92, 95, 98, 112, 100, 103, 99, + }, + // Chrominance. + { + 17, 18, 24, 47, 99, 99, 99, 99, + 18, 21, 26, 66, 99, 99, 99, 99, + 24, 26, 56, 99, 99, 99, 99, 99, + 47, 66, 99, 99, 99, 99, 99, 99, + 99, 99, 99, 99, 99, 99, 99, 99, + 99, 99, 99, 99, 99, 99, 99, 99, + 99, 99, 99, 99, 99, 99, 99, 99, + 99, 99, 99, 99, 99, 99, 99, 99, + }, +} + +func TestUnscaledQuant(t *testing.T) { + bad := false + for i := quantIndex(0); i < nQuantIndex; i++ { + for zig := 0; zig < blockSize; zig++ { + got := unscaledQuant[i][zig] + want := unscaledQuantInNaturalOrder[i][unzig[zig]] + if got != want { + t.Errorf("i=%d, zig=%d: got %d, want %d", i, zig, got, want) + bad = true + } + } + } + if bad { + names := [nQuantIndex]string{"Luminance", "Chrominance"} + buf := &bytes.Buffer{} + for i, name := range names { + fmt.Fprintf(buf, "// %s.\n{\n", name) + for zig := 0; zig < blockSize; zig++ { + fmt.Fprintf(buf, "%d, ", unscaledQuantInNaturalOrder[i][unzig[zig]]) + if zig%8 == 7 { + buf.WriteString("\n") + } + } + buf.WriteString("},\n") + } + t.Logf("expected unscaledQuant values:\n%s", buf.String()) + } +} + var testCase = []struct { filename string quality int @@ -89,24 +171,21 @@ func TestWriter(t *testing.T) { } } -func BenchmarkEncodeRGBOpaque(b *testing.B) { +func BenchmarkEncode(b *testing.B) { b.StopTimer() img := image.NewRGBA(image.Rect(0, 0, 640, 480)) - // Set all pixels to 0xFF alpha to force opaque mode. bo := img.Bounds() rnd := rand.New(rand.NewSource(123)) for y := bo.Min.Y; y < bo.Max.Y; y++ { for x := bo.Min.X; x < bo.Max.X; x++ { - img.Set(x, y, color.RGBA{ + img.SetRGBA(x, y, color.RGBA{ uint8(rnd.Intn(256)), uint8(rnd.Intn(256)), uint8(rnd.Intn(256)), - 255}) + 255, + }) } } - if !img.Opaque() { - b.Fatal("expected image to be opaque") - } b.SetBytes(640 * 480 * 4) b.StartTimer() options := &Options{Quality: 90} |