diff options
Diffstat (limited to 'src/pkg/fmt/scan.go')
-rw-r--r-- | src/pkg/fmt/scan.go | 413 |
1 files changed, 413 insertions, 0 deletions
diff --git a/src/pkg/fmt/scan.go b/src/pkg/fmt/scan.go new file mode 100644 index 000000000..42469b90e --- /dev/null +++ b/src/pkg/fmt/scan.go @@ -0,0 +1,413 @@ +// Copyright 2010 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package fmt + +import ( + "bytes" + "io" + "os" + "reflect" + "strconv" + "unicode" + "utf8" +) + +// readRuner is the interface to something that can read runes. If +// the object provided to Scan does not satisfy this interface, the +// object will be wrapped by a readRune object. +type readRuner interface { + ReadRune() (rune int, size int, err os.Error) +} + +type ss struct { + rr readRuner // where to read input + buf bytes.Buffer // token accumulator + nlIsSpace bool // whether newline counts as white space + peekRune int // one-rune lookahead + err os.Error +} + +// readRune is a structure to enable reading UTF-8 encoded code points +// from an io.Reader. It is used if the Reader given to the scanner does +// not already implement readRuner. +// TODO: readByteRune for things that can read bytes. +type readRune struct { + reader io.Reader + buf [utf8.UTFMax]byte +} + +// ReadRune returns the next UTF-8 encoded code point from the +// io.Reader inside r. +func (r readRune) ReadRune() (rune int, size int, err os.Error) { + _, err = r.reader.Read(r.buf[0:1]) + if err != nil { + return 0, 0, err + } + if r.buf[0] < utf8.RuneSelf { // fast check for common ASCII case + rune = int(r.buf[0]) + return + } + for size := 1; size < utf8.UTFMax; size++ { + _, err = r.reader.Read(r.buf[size : size+1]) + if err != nil { + break + } + if !utf8.FullRune(&r.buf) { + continue + } + if c, w := utf8.DecodeRune(r.buf[0:size]); w == size { + rune = c + return + } + } + return utf8.RuneError, 1, err +} + + +// A leaky bucket of reusable ss structures. +var ssFree = make(chan *ss, 100) + +// Allocate a new ss struct. Probably can grab the previous one from ssFree. +func newScanState(r io.Reader, nlIsSpace bool) *ss { + s, ok := <-ssFree + if !ok { + s = new(ss) + } + if rr, ok := r.(readRuner); ok { + s.rr = rr + } else { + s.rr = readRune{reader: r} + } + s.nlIsSpace = nlIsSpace + s.peekRune = -1 + s.err = nil + return s +} + +// Save used ss structs in ssFree; avoid an allocation per invocation. +func (s *ss) free() { + // Don't hold on to ss structs with large buffers. + if cap(s.buf.Bytes()) > 1024 { + return + } + s.buf.Reset() + s.rr = nil + _ = ssFree <- s +} + +// readRune reads the next rune, but checks the peeked item first. +func (s *ss) readRune() (rune int, err os.Error) { + if s.peekRune >= 0 { + rune = s.peekRune + s.peekRune = -1 + return + } + rune, _, err = s.rr.ReadRune() + return +} + +// token returns the next space-delimited string from the input. +// For Scanln, it stops at newlines. For Scan, newlines are treated as +// spaces. +func (s *ss) token() string { + s.buf.Reset() + // skip white space and maybe newline + for { + rune, err := s.readRune() + if err != nil { + s.err = err + return "" + } + if rune == '\n' { + if s.nlIsSpace { + continue + } + s.err = os.ErrorString("unexpected newline") + return "" + } + if !unicode.IsSpace(rune) { + s.buf.WriteRune(rune) + break + } + } + // read until white space or newline + for { + rune, err := s.readRune() + if err != nil { + if err == os.EOF { + break + } + s.err = err + return "" + } + if unicode.IsSpace(rune) { + s.peekRune = rune + break + } + s.buf.WriteRune(rune) + } + return s.buf.String() +} + +// Scan parses text read from r, storing successive space-separated +// values into successive arguments. Newlines count as space. Each +// argument must be a pointer to a basic type. It returns the number of +// items successfully parsed. If that is less than the number of arguments, +// err will report why. +func Scan(r io.Reader, a ...interface{}) (n int, err os.Error) { + s := newScanState(r, true) + n = s.doScan(a) + err = s.err + s.free() + return +} + +// Scanln parses text read from r, storing successive space-separated +// values into successive arguments. Scanning stops at a newline and after +// the final item there must be a newline or EOF. Each argument must be a +// pointer to a basic type. It returns the number of items successfully +// parsed. If that is less than the number of arguments, err will report +// why. +func Scanln(r io.Reader, a ...interface{}) (n int, err os.Error) { + s := newScanState(r, false) + n = s.doScan(a) + err = s.err + s.free() + return +} + +var intBits = uint(reflect.Typeof(int(0)).Size() * 8) +var uintptrBits = uint(reflect.Typeof(int(0)).Size() * 8) +var complexError = os.ErrorString("syntax error scanning complex number") + +// scanBool converts the token to a boolean value. +func (s *ss) scanBool(tok string) bool { + if s.err != nil { + return false + } + var b bool + b, s.err = strconv.Atob(tok) + return b +} + +// complexParts returns the strings representing the real and imaginary parts of the string. +func (s *ss) complexParts(str string) (real, imag string) { + if len(str) > 2 && str[0] == '(' && str[len(str)-1] == ')' { + str = str[1 : len(str)-1] + } + real, str = floatPart(str) + // Must now have a sign. + if len(str) == 0 || (str[0] != '+' && str[0] != '-') { + s.err = complexError + return "", "" + } + imag, str = floatPart(str) + if str != "i" { + s.err = complexError + return "", "" + } + return real, imag +} + +// floatPart returns strings holding the floating point value in the string, followed +// by the remainder of the string. That is, it splits str into (number,rest-of-string). +func floatPart(str string) (first, last string) { + i := 0 + // leading sign? + if len(str) > 0 && (str[0] == '+' || str[0] == '-') { + i++ + } + // digits? + for len(str) > 0 && '0' <= str[i] && str[i] <= '9' { + i++ + } + // period? + if str[i] == '.' { + i++ + } + // fraction? + for len(str) > 0 && '0' <= str[i] && str[i] <= '9' { + i++ + } + // exponent? + if len(str) > 0 && (str[i] == 'e' || str[i] == 'E') { + i++ + // leading sign? + if str[0] == '+' || str[0] == '-' { + i++ + } + // digits? + for len(str) > 0 && '0' <= str[i] && str[i] <= '9' { + i++ + } + } + return str[0:i], str[i:] +} + +// scanFloat converts the string to a float value. +func (s *ss) scanFloat(str string) float64 { + var f float + f, s.err = strconv.Atof(str) + return float64(f) +} + +// scanFloat32 converts the string to a float32 value. +func (s *ss) scanFloat32(str string) float64 { + var f float32 + f, s.err = strconv.Atof32(str) + return float64(f) +} + +// scanFloat64 converts the string to a float64 value. +func (s *ss) scanFloat64(str string) float64 { + var f float64 + f, s.err = strconv.Atof64(str) + return f +} + +// scanComplex converts the token to a complex128 value. +// The atof argument is a type-specific reader for the underlying type. +// If we're reading complex64, atof will parse float32s and convert them +// to float64's to avoid reproducing this code for each complex type. +func (s *ss) scanComplex(tok string, atof func(*ss, string) float64) complex128 { + if s.err != nil { + return 0 + } + sreal, simag := s.complexParts(tok) + if s.err != nil { + return 0 + } + var real, imag float64 + real = atof(s, sreal) + if s.err != nil { + return 0 + } + imag = atof(s, simag) + if s.err != nil { + return 0 + } + return cmplx(real, imag) +} + +// scanInt converts the token to an int64, but checks that it fits into the +// specified number of bits. +func (s *ss) scanInt(tok string, bitSize uint) int64 { + if s.err != nil { + return 0 + } + var i int64 + i, s.err = strconv.Atoi64(tok) + x := (i << (64 - bitSize)) >> (64 - bitSize) + if i != x { + s.err = os.ErrorString("integer overflow on token " + tok) + } + return i +} + +// scanUint converts the token to a uint64, but checks that it fits into the +// specified number of bits. +func (s *ss) scanUint(tok string, bitSize uint) uint64 { + if s.err != nil { + return 0 + } + var i uint64 + i, s.err = strconv.Atoui64(tok) + x := (i << (64 - bitSize)) >> (64 - bitSize) + if i != x { + s.err = os.ErrorString("unsigned integer overflow on token " + tok) + } + return i +} + +// doScan does the real work. At the moment, it handles only pointers to basic types. +func (s *ss) doScan(a []interface{}) int { + for n, param := range a { + tok := s.token() + switch v := param.(type) { + case *bool: + *v = s.scanBool(tok) + case *complex: + *v = complex(s.scanComplex(tok, (*ss).scanFloat)) + case *complex64: + *v = complex64(s.scanComplex(tok, (*ss).scanFloat32)) + case *complex128: + *v = s.scanComplex(tok, (*ss).scanFloat64) + case *int: + *v = int(s.scanInt(tok, intBits)) + case *int8: + *v = int8(s.scanInt(tok, 8)) + case *int16: + *v = int16(s.scanInt(tok, 16)) + case *int32: + *v = int32(s.scanInt(tok, 32)) + case *int64: + *v = s.scanInt(tok, 64) + case *uint: + *v = uint(s.scanUint(tok, intBits)) + case *uint8: + *v = uint8(s.scanUint(tok, 8)) + case *uint16: + *v = uint16(s.scanUint(tok, 16)) + case *uint32: + *v = uint32(s.scanUint(tok, 32)) + case *uint64: + *v = s.scanUint(tok, 64) + case *uintptr: + *v = uintptr(s.scanUint(tok, uintptrBits)) + case *float: + if s.err == nil { + *v, s.err = strconv.Atof(tok) + } else { + *v = 0 + } + case *float32: + if s.err == nil { + *v, s.err = strconv.Atof32(tok) + } else { + *v = 0 + } + case *float64: + if s.err == nil { + *v, s.err = strconv.Atof64(tok) + } else { + *v = 0 + } + case *string: + *v = tok + default: + t := reflect.Typeof(v) + str := t.String() + if _, ok := t.(*reflect.PtrType); !ok { + s.err = os.ErrorString("Scan: type not a pointer: " + str) + } else { + s.err = os.ErrorString("Scan: can't handle type: " + str) + } + } + if s.err != nil { + return n + } + } + // Check for newline if required. + if !s.nlIsSpace { + for { + rune, err := s.readRune() + if err != nil { + if err == os.EOF { + break + } + s.err = err + break + } + if rune == '\n' { + break + } + if !unicode.IsSpace(rune) { + s.err = os.ErrorString("Scan: expected newline") + break + } + } + } + return len(a) +} |