diff options
Diffstat (limited to 'src/pkg/fmt/scan.go')
-rw-r--r-- | src/pkg/fmt/scan.go | 121 |
1 files changed, 84 insertions, 37 deletions
diff --git a/src/pkg/fmt/scan.go b/src/pkg/fmt/scan.go index a408c42aa..53d88d574 100644 --- a/src/pkg/fmt/scan.go +++ b/src/pkg/fmt/scan.go @@ -7,6 +7,7 @@ package fmt import ( "bytes" "io" + "math" "os" "reflect" "strconv" @@ -15,18 +16,11 @@ import ( "utf8" ) -// readRuner is the interface to something that can read runes. If -// the object provided to Scan does not satisfy this interface, the -// object will be wrapped by a readRune object. -type readRuner interface { - ReadRune() (rune int, size int, err os.Error) -} - -// unreadRuner is the interface to something that can unread runes. +// runeUnreader is the interface to something that can unread runes. // If the object provided to Scan does not satisfy this interface, // a local buffer will be used to back up the input, but its contents // will be lost when Scan returns. -type unreadRuner interface { +type runeUnreader interface { UnreadRune() os.Error } @@ -36,7 +30,7 @@ type unreadRuner interface { type ScanState interface { // GetRune reads the next rune (Unicode code point) from the input. GetRune() (rune int, err os.Error) - // UngetRune causes the next call to GetRune to return the rune. + // UngetRune causes the next call to GetRune to return the same rune. UngetRune() // Width returns the value of the width option and whether it has been set. // The unit is Unicode code points. @@ -137,15 +131,15 @@ const EOF = -1 // ss is the internal implementation of ScanState. type ss struct { - rr readRuner // where to read input - buf bytes.Buffer // token accumulator - nlIsSpace bool // whether newline counts as white space - peekRune int // one-rune lookahead - prevRune int // last rune returned by GetRune - atEOF bool // already read EOF - maxWid int // max width of field, in runes - widPresent bool // width was specified - wid int // width consumed so far; used in accept() + rr io.RuneReader // where to read input + buf bytes.Buffer // token accumulator + nlIsSpace bool // whether newline counts as white space + peekRune int // one-rune lookahead + prevRune int // last rune returned by GetRune + atEOF bool // already read EOF + maxWid int // max width of field, in runes + widPresent bool // width was specified + wid int // width consumed so far; used in accept() } func (s *ss) GetRune() (rune int, err os.Error) { @@ -215,7 +209,7 @@ func (s *ss) mustGetRune() (rune int) { func (s *ss) UngetRune() { - if u, ok := s.rr.(unreadRuner); ok { + if u, ok := s.rr.(runeUnreader); ok { u.UnreadRune() } else { s.peekRune = s.prevRune @@ -246,7 +240,7 @@ func (s *ss) Token() (tok string, err os.Error) { // readRune is a structure to enable reading UTF-8 encoded code points // from an io.Reader. It is used if the Reader given to the scanner does -// not already implement ReadRuner. +// not already implement io.RuneReader. type readRune struct { reader io.Reader buf [utf8.UTFMax]byte // used only inside ReadRune @@ -308,7 +302,7 @@ var ssFree = newCache(func() interface{} { return new(ss) }) // Allocate a new ss struct or grab a cached one. func newScanState(r io.Reader, nlIsSpace bool) *ss { s := ssFree.get().(*ss) - if rr, ok := r.(readRuner); ok { + if rr, ok := r.(io.RuneReader); ok { s.rr = rr } else { s.rr = &readRune{reader: r} @@ -394,14 +388,12 @@ func (s *ss) consume(ok string, accept bool) bool { if rune == EOF { return false } - for i := 0; i < len(ok); i++ { - if int(ok[i]) == rune { - if accept { - s.buf.WriteRune(rune) - s.wid++ - } - return true + if strings.IndexRune(ok, rune) >= 0 { + if accept { + s.buf.WriteRune(rune) + s.wid++ } + return true } if rune != EOF && accept { s.UngetRune() @@ -409,6 +401,15 @@ func (s *ss) consume(ok string, accept bool) bool { return false } +// peek reports whether the next character is in the ok string, without consuming it. +func (s *ss) peek(ok string) bool { + rune := s.getRune() + if rune != EOF { + s.UngetRune() + } + return strings.IndexRune(ok, rune) >= 0 +} + // accept checks the next rune in the input. If it's a byte (sic) in the string, it puts it in the // buffer and returns true. Otherwise it return false. func (s *ss) accept(ok string) bool { @@ -459,7 +460,7 @@ const ( hexadecimalDigits = "0123456789aAbBcCdDeEfF" sign = "+-" period = "." - exponent = "eE" + exponent = "eEp" ) // getBase returns the numeric base represented by the verb and its digit string. @@ -482,8 +483,8 @@ func (s *ss) getBase(verb int) (base int, digits string) { } // scanNumber returns the numerical string with specified digits starting here. -func (s *ss) scanNumber(digits string) string { - if !s.accept(digits) { +func (s *ss) scanNumber(digits string, haveDigits bool) string { + if !haveDigits && !s.accept(digits) { s.errorString("expected integer") } for s.accept(digits) { @@ -502,22 +503,44 @@ func (s *ss) scanRune(bitSize int) int64 { return rune } +// scanBasePrefix reports whether the integer begins with a 0 or 0x, +// and returns the base, digit string, and whether a zero was found. +// It is called only if the verb is %v. +func (s *ss) scanBasePrefix() (base int, digits string, found bool) { + if !s.peek("0") { + return 10, decimalDigits, false + } + s.accept("0") + found = true // We've put a digit into the token buffer. + // Special cases for '0' && '0x' + base, digits = 8, octalDigits + if s.peek("xX") { + s.consume("xX", false) + base, digits = 16, hexadecimalDigits + } + return +} + // scanInt returns the value of the integer represented by the next // token, checking for overflow. Any error is stored in s.err. func (s *ss) scanInt(verb int, bitSize int) int64 { if verb == 'c' { return s.scanRune(bitSize) } - base, digits := s.getBase(verb) s.skipSpace(false) + base, digits := s.getBase(verb) + haveDigits := false if verb == 'U' { if !s.consume("U", false) || !s.consume("+", false) { s.errorString("bad unicode format ") } } else { s.accept(sign) // If there's a sign, it will be left in the token buffer. + if verb == 'v' { + base, digits, haveDigits = s.scanBasePrefix() + } } - tok := s.scanNumber(digits) + tok := s.scanNumber(digits, haveDigits) i, err := strconv.Btoi64(tok, base) if err != nil { s.error(err) @@ -536,14 +559,17 @@ func (s *ss) scanUint(verb int, bitSize int) uint64 { if verb == 'c' { return uint64(s.scanRune(bitSize)) } - base, digits := s.getBase(verb) s.skipSpace(false) + base, digits := s.getBase(verb) + haveDigits := false if verb == 'U' { if !s.consume("U", false) || !s.consume("+", false) { s.errorString("bad unicode format ") } + } else if verb == 'v' { + base, digits, haveDigits = s.scanBasePrefix() } - tok := s.scanNumber(digits) + tok := s.scanNumber(digits, haveDigits) i, err := strconv.Btoui64(tok, base) if err != nil { s.error(err) @@ -617,6 +643,27 @@ func (s *ss) complexTokens() (real, imag string) { // convertFloat converts the string to a float64value. func (s *ss) convertFloat(str string, n int) float64 { + if p := strings.Index(str, "p"); p >= 0 { + // Atof doesn't handle power-of-2 exponents, + // but they're easy to evaluate. + f, err := strconv.AtofN(str[:p], n) + if err != nil { + // Put full string into error. + if e, ok := err.(*strconv.NumError); ok { + e.Num = str + } + s.error(err) + } + n, err := strconv.Atoi(str[p+1:]) + if err != nil { + // Put full string into error. + if e, ok := err.(*strconv.NumError); ok { + e.Num = str + } + s.error(err) + } + return math.Ldexp(f, n) + } f, err := strconv.AtofN(str, n) if err != nil { s.error(err) @@ -747,7 +794,7 @@ func (s *ss) hexString() string { return s.buf.String() } -const floatVerbs = "eEfFgGv" +const floatVerbs = "beEfFgGv" // scanOne scans a single value, deriving the scanner from the type of the argument. func (s *ss) scanOne(verb int, field interface{}) { |