diff options
author | Rob Pike <r@golang.org> | 2010-06-15 17:41:11 -0700 |
---|---|---|
committer | Rob Pike <r@golang.org> | 2010-06-15 17:41:11 -0700 |
commit | 7a83d6000e6eb6dff5426b6b55ab9533d8880be0 (patch) | |
tree | 85b0c3e27363d2547424a541a7be2805245ee3e6 /src/pkg/fmt/scan.go | |
parent | 67e02339172a0abac5f3e3550ac48b3703f1b73f (diff) | |
download | golang-7a83d6000e6eb6dff5426b6b55ab9533d8880be0.tar.gz |
fmt.Scan: fix %c in the case where the input does not implement ReadRune itself.
While we're at it, clean up and test the code to guarantee we see every byte when
the text is erroneous UTF-8.
Fixes issue 866.
R=rsc
CC=golang-dev
http://codereview.appspot.com/1712042
Diffstat (limited to 'src/pkg/fmt/scan.go')
-rw-r--r-- | src/pkg/fmt/scan.go | 53 |
1 files changed, 38 insertions, 15 deletions
diff --git a/src/pkg/fmt/scan.go b/src/pkg/fmt/scan.go index 92990b421..883a95d34 100644 --- a/src/pkg/fmt/scan.go +++ b/src/pkg/fmt/scan.go @@ -220,14 +220,35 @@ func (s *ss) Token() (tok string, err os.Error) { // from an io.Reader. It is used if the Reader given to the scanner does // not already implement ReadRuner. type readRune struct { - reader io.Reader - buf [utf8.UTFMax]byte + reader io.Reader + buf [utf8.UTFMax]byte // used only inside ReadRune + pending int // number of bytes in pendBuf; only >0 for bad UTF-8 + pendBuf [utf8.UTFMax]byte // bytes left over +} + +// readByte returns the next byte from the input, which may be +// left over from a previous read if the UTF-8 was ill-formed. +func (r *readRune) readByte() (b byte, err os.Error) { + if r.pending > 0 { + b = r.pendBuf[0] + copy(r.pendBuf[0:], r.pendBuf[1:]) + r.pending-- + return + } + _, err = r.reader.Read(r.pendBuf[0:1]) + return r.pendBuf[0], err +} + +// unread saves the bytes for the next read. +func (r *readRune) unread(buf []byte) { + copy(r.pendBuf[r.pending:], buf) + r.pending += len(buf) } // ReadRune returns the next UTF-8 encoded code point from the // io.Reader inside r. -func (r readRune) ReadRune() (rune int, size int, err os.Error) { - _, err = r.reader.Read(r.buf[0:1]) +func (r *readRune) ReadRune() (rune int, size int, err os.Error) { + r.buf[0], err = r.readByte() if err != nil { return 0, 0, err } @@ -235,20 +256,22 @@ func (r readRune) ReadRune() (rune int, size int, err os.Error) { rune = int(r.buf[0]) return } - for size := 1; size < utf8.UTFMax; size++ { - _, err = r.reader.Read(r.buf[size : size+1]) + var n int + for n = 1; !utf8.FullRune(r.buf[0:n]); n++ { + r.buf[n], err = r.readByte() if err != nil { - break - } - if !utf8.FullRune(r.buf[0:]) { - continue - } - if c, w := utf8.DecodeRune(r.buf[0:size]); w == size { - rune = c + if err == os.EOF { + err = nil + break + } return } } - return utf8.RuneError, 1, err + rune, size = utf8.DecodeRune(r.buf[0:n]) + if size < n { // an error + r.unread(r.buf[size:n]) + } + return } @@ -264,7 +287,7 @@ func newScanState(r io.Reader, nlIsSpace bool) *ss { if rr, ok := r.(readRuner); ok { s.rr = rr } else { - s.rr = readRune{reader: r} + s.rr = &readRune{reader: r} } s.nlIsSpace = nlIsSpace s.peekRune = -1 |