diff options
Diffstat (limited to 'src/pkg/template/parse/lex.go')
-rw-r--r-- | src/pkg/template/parse/lex.go | 472 |
1 files changed, 472 insertions, 0 deletions
diff --git a/src/pkg/template/parse/lex.go b/src/pkg/template/parse/lex.go new file mode 100644 index 000000000..83ad6c628 --- /dev/null +++ b/src/pkg/template/parse/lex.go @@ -0,0 +1,472 @@ +// Copyright 2011 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package parse + +import ( + "fmt" + "strings" + "unicode" + "utf8" +) + +// item represents a token or text string returned from the scanner. +type item struct { + typ itemType + val string +} + +func (i item) String() string { + switch { + case i.typ == itemEOF: + return "EOF" + case i.typ == itemError: + return i.val + case i.typ > itemKeyword: + return fmt.Sprintf("<%s>", i.val) + case len(i.val) > 10: + return fmt.Sprintf("%.10q...", i.val) + } + return fmt.Sprintf("%q", i.val) +} + +// itemType identifies the type of lex items. +type itemType int + +const ( + itemError itemType = iota // error occurred; value is text of error + itemBool // boolean constant + itemChar // printable ASCII character; grab bag for comma etc. + itemCharConstant // character constant + itemComplex // complex constant (1+2i); imaginary is just a number + itemColonEquals // colon-equals (':=') introducing a declaration + itemEOF + itemField // alphanumeric identifier, starting with '.', possibly chained ('.x.y') + itemIdentifier // alphanumeric identifier + itemLeftDelim // left action delimiter + itemNumber // simple number, including imaginary + itemPipe // pipe symbol + itemRawString // raw quoted string (includes quotes) + itemRightDelim // right action delimiter + itemString // quoted string (includes quotes) + itemText // plain text + itemVariable // variable starting with '$', such as '$' or '$1' or '$hello'. + // Keywords appear after all the rest. + itemKeyword // used only to delimit the keywords + itemDot // the cursor, spelled '.'. + itemDefine // define keyword + itemElse // else keyword + itemEnd // end keyword + itemIf // if keyword + itemRange // range keyword + itemTemplate // template keyword + itemWith // with keyword +) + +// Make the types prettyprint. +var itemName = map[itemType]string{ + itemError: "error", + itemBool: "bool", + itemChar: "char", + itemCharConstant: "charconst", + itemComplex: "complex", + itemColonEquals: ":=", + itemEOF: "EOF", + itemField: "field", + itemIdentifier: "identifier", + itemLeftDelim: "left delim", + itemNumber: "number", + itemPipe: "pipe", + itemRawString: "raw string", + itemRightDelim: "right delim", + itemString: "string", + itemVariable: "variable", + // keywords + itemDot: ".", + itemDefine: "define", + itemElse: "else", + itemIf: "if", + itemEnd: "end", + itemRange: "range", + itemTemplate: "template", + itemWith: "with", +} + +func (i itemType) String() string { + s := itemName[i] + if s == "" { + return fmt.Sprintf("item%d", int(i)) + } + return s +} + +var key = map[string]itemType{ + ".": itemDot, + "define": itemDefine, + "else": itemElse, + "end": itemEnd, + "if": itemIf, + "range": itemRange, + "template": itemTemplate, + "with": itemWith, +} + +const eof = -1 + +// stateFn represents the state of the scanner as a function that returns the next state. +type stateFn func(*lexer) stateFn + +// lexer holds the state of the scanner. +type lexer struct { + name string // the name of the input; used only for error reports. + input string // the string being scanned. + state stateFn // the next lexing function to enter + pos int // current position in the input. + start int // start position of this item. + width int // width of last rune read from input. + items chan item // channel of scanned items. +} + +// next returns the next rune in the input. +func (l *lexer) next() (rune int) { + if l.pos >= len(l.input) { + l.width = 0 + return eof + } + rune, l.width = utf8.DecodeRuneInString(l.input[l.pos:]) + l.pos += l.width + return rune +} + +// peek returns but does not consume the next rune in the input. +func (l *lexer) peek() int { + rune := l.next() + l.backup() + return rune +} + +// backup steps back one rune. Can only be called once per call of next. +func (l *lexer) backup() { + l.pos -= l.width +} + +// emit passes an item back to the client. +func (l *lexer) emit(t itemType) { + l.items <- item{t, l.input[l.start:l.pos]} + l.start = l.pos +} + +// ignore skips over the pending input before this point. +func (l *lexer) ignore() { + l.start = l.pos +} + +// accept consumes the next rune if it's from the valid set. +func (l *lexer) accept(valid string) bool { + if strings.IndexRune(valid, l.next()) >= 0 { + return true + } + l.backup() + return false +} + +// acceptRun consumes a run of runes from the valid set. +func (l *lexer) acceptRun(valid string) { + for strings.IndexRune(valid, l.next()) >= 0 { + } + l.backup() +} + +// lineNumber reports which line we're on. Doing it this way +// means we don't have to worry about peek double counting. +func (l *lexer) lineNumber() int { + return 1 + strings.Count(l.input[:l.pos], "\n") +} + +// error returns an error token and terminates the scan by passing +// back a nil pointer that will be the next state, terminating l.run. +func (l *lexer) errorf(format string, args ...interface{}) stateFn { + l.items <- item{itemError, fmt.Sprintf(format, args...)} + return nil +} + +// nextItem returns the next item from the input. +func (l *lexer) nextItem() item { + for { + select { + case item := <-l.items: + return item + default: + l.state = l.state(l) + } + } + panic("not reached") +} + +// lex creates a new scanner for the input string. +func lex(name, input string) *lexer { + l := &lexer{ + name: name, + input: input, + state: lexText, + items: make(chan item, 2), // Two items of buffering is sufficient for all state functions + } + return l +} + +// state functions + +const ( + leftDelim = "{{" + rightDelim = "}}" + leftComment = "{{/*" + rightComment = "*/}}" +) + +// lexText scans until an opening action delimiter, "{{". +func lexText(l *lexer) stateFn { + for { + if strings.HasPrefix(l.input[l.pos:], leftDelim) { + if l.pos > l.start { + l.emit(itemText) + } + return lexLeftDelim + } + if l.next() == eof { + break + } + } + // Correctly reached EOF. + if l.pos > l.start { + l.emit(itemText) + } + l.emit(itemEOF) + return nil +} + +// lexLeftDelim scans the left delimiter, which is known to be present. +func lexLeftDelim(l *lexer) stateFn { + if strings.HasPrefix(l.input[l.pos:], leftComment) { + return lexComment + } + l.pos += len(leftDelim) + l.emit(itemLeftDelim) + return lexInsideAction +} + +// lexComment scans a comment. The left comment marker is known to be present. +func lexComment(l *lexer) stateFn { + i := strings.Index(l.input[l.pos:], rightComment) + if i < 0 { + return l.errorf("unclosed comment") + } + l.pos += i + len(rightComment) + l.ignore() + return lexText +} + +// lexRightDelim scans the right delimiter, which is known to be present. +func lexRightDelim(l *lexer) stateFn { + l.pos += len(rightDelim) + l.emit(itemRightDelim) + return lexText +} + +// lexInsideAction scans the elements inside action delimiters. +func lexInsideAction(l *lexer) stateFn { + // Either number, quoted string, or identifier. + // Spaces separate and are ignored. + // Pipe symbols separate and are emitted. + if strings.HasPrefix(l.input[l.pos:], rightDelim) { + return lexRightDelim + } + switch r := l.next(); { + case r == eof || r == '\n': + return l.errorf("unclosed action") + case isSpace(r): + l.ignore() + case r == ':': + if l.next() != '=' { + return l.errorf("expected :=") + } + l.emit(itemColonEquals) + case r == '|': + l.emit(itemPipe) + case r == '"': + return lexQuote + case r == '`': + return lexRawQuote + case r == '$': + return lexIdentifier + case r == '\'': + return lexChar + case r == '.': + // special look-ahead for ".field" so we don't break l.backup(). + if l.pos < len(l.input) { + r := l.input[l.pos] + if r < '0' || '9' < r { + return lexIdentifier // itemDot comes from the keyword table. + } + } + fallthrough // '.' can start a number. + case r == '+' || r == '-' || ('0' <= r && r <= '9'): + l.backup() + return lexNumber + case isAlphaNumeric(r): + l.backup() + return lexIdentifier + case r <= unicode.MaxASCII && unicode.IsPrint(r): + l.emit(itemChar) + return lexInsideAction + default: + return l.errorf("unrecognized character in action: %#U", r) + } + return lexInsideAction +} + +// lexIdentifier scans an alphanumeric or field. +func lexIdentifier(l *lexer) stateFn { +Loop: + for { + switch r := l.next(); { + case isAlphaNumeric(r): + // absorb. + case r == '.' && (l.input[l.start] == '.' || l.input[l.start] == '$'): + // field chaining; absorb into one token. + default: + l.backup() + word := l.input[l.start:l.pos] + switch { + case key[word] > itemKeyword: + l.emit(key[word]) + case word[0] == '.': + l.emit(itemField) + case word[0] == '$': + l.emit(itemVariable) + case word == "true", word == "false": + l.emit(itemBool) + default: + l.emit(itemIdentifier) + } + break Loop + } + } + return lexInsideAction +} + +// lexChar scans a character constant. The initial quote is already +// scanned. Syntax checking is done by the parse. +func lexChar(l *lexer) stateFn { +Loop: + for { + switch l.next() { + case '\\': + if r := l.next(); r != eof && r != '\n' { + break + } + fallthrough + case eof, '\n': + return l.errorf("unterminated character constant") + case '\'': + break Loop + } + } + l.emit(itemCharConstant) + return lexInsideAction +} + +// lexNumber scans a number: decimal, octal, hex, float, or imaginary. This +// isn't a perfect number scanner - for instance it accepts "." and "0x0.2" +// and "089" - but when it's wrong the input is invalid and the parser (via +// strconv) will notice. +func lexNumber(l *lexer) stateFn { + if !l.scanNumber() { + return l.errorf("bad number syntax: %q", l.input[l.start:l.pos]) + } + if sign := l.peek(); sign == '+' || sign == '-' { + // Complex: 1+2i. No spaces, must end in 'i'. + if !l.scanNumber() || l.input[l.pos-1] != 'i' { + return l.errorf("bad number syntax: %q", l.input[l.start:l.pos]) + } + l.emit(itemComplex) + } else { + l.emit(itemNumber) + } + return lexInsideAction +} + +func (l *lexer) scanNumber() bool { + // Optional leading sign. + l.accept("+-") + // Is it hex? + digits := "0123456789" + if l.accept("0") && l.accept("xX") { + digits = "0123456789abcdefABCDEF" + } + l.acceptRun(digits) + if l.accept(".") { + l.acceptRun(digits) + } + if l.accept("eE") { + l.accept("+-") + l.acceptRun("0123456789") + } + // Is it imaginary? + l.accept("i") + // Next thing mustn't be alphanumeric. + if isAlphaNumeric(l.peek()) { + l.next() + return false + } + return true +} + +// lexQuote scans a quoted string. +func lexQuote(l *lexer) stateFn { +Loop: + for { + switch l.next() { + case '\\': + if r := l.next(); r != eof && r != '\n' { + break + } + fallthrough + case eof, '\n': + return l.errorf("unterminated quoted string") + case '"': + break Loop + } + } + l.emit(itemString) + return lexInsideAction +} + +// lexRawQuote scans a raw quoted string. +func lexRawQuote(l *lexer) stateFn { +Loop: + for { + switch l.next() { + case eof, '\n': + return l.errorf("unterminated raw quoted string") + case '`': + break Loop + } + } + l.emit(itemRawString) + return lexInsideAction +} + +// isSpace reports whether r is a space character. +func isSpace(r int) bool { + switch r { + case ' ', '\t', '\n', '\r': + return true + } + return false +} + +// isAlphaNumeric reports whether r is an alphabetic, digit, or underscore. +func isAlphaNumeric(r int) bool { + return r == '_' || unicode.IsLetter(r) || unicode.IsDigit(r) +} |