diff options
Diffstat (limited to 'src/pkg/json/parse.go')
| -rw-r--r-- | src/pkg/json/parse.go | 419 | 
1 files changed, 419 insertions, 0 deletions
| diff --git a/src/pkg/json/parse.go b/src/pkg/json/parse.go new file mode 100644 index 000000000..e33b9dbc1 --- /dev/null +++ b/src/pkg/json/parse.go @@ -0,0 +1,419 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// JSON (JavaScript Object Notation) parser. +// See http://www.json.org/ + +// The json package implements a simple parser and +// representation for JSON (JavaScript Object Notation), +// as defined at http://www.json.org/. +package json + +import ( +	"fmt"; +	"io"; +	"math"; +	"strconv"; +	"strings"; +	"utf8"; +) + +// Strings +// +//   Double quoted with escapes: \" \\ \/ \b \f \n \r \t \uXXXX. +//   No literal control characters, supposedly. +//   Have also seen \' and embedded newlines. + +func _UnHex(p string, r, l int) (v int, ok bool) { +	v = 0; +	for i := r; i < l; i++ { +		if i >= len(p) { +			return 0, false +		} +		v *= 16; +		switch { +		case '0' <= p[i] && p[i] <= '9': +			v += int(p[i] - '0'); +		case 'a' <= p[i] && p[i] <= 'f': +			v += int(p[i] - 'a' + 10); +		case 'A' <= p[i] && p[i] <= 'F': +			v += int(p[i] - 'A' + 10); +		default: +			return 0, false; +		} +	} +	return v, true; +} + +// Unquote unquotes the JSON-quoted string s, +// returning a raw string t.  If s is not a valid +// JSON-quoted string, Unquote returns with ok set to false. +func Unquote(s string) (t string, ok bool) { +	if len(s) < 2 || s[0] != '"' || s[len(s)-1] != '"' { +		return +	} +	b := make([]byte, len(s)); +	w := 0; +	for r := 1; r < len(s)-1; { +		switch { +		case s[r] == '\\': +			r++; +			if r >= len(s)-1 { +				return +			} +			switch s[r] { +			default: +				return; +			case '"', '\\', '/', '\'': +				b[w] = s[r]; +				r++; +				w++; +			case 'b': +				b[w] = '\b'; +				r++; +				w++; +			case 'f': +				b[w] = '\f'; +				r++; +				w++; +			case 'n': +				b[w] = '\n'; +				r++; +				w++; +			case 'r': +				b[w] = '\r'; +				r++; +				w++; +			case 't': +				b[w] = '\t'; +				r++; +				w++; +			case 'u': +				r++; +				rune, ok := _UnHex(s, r, 4); +				if !ok { +					return +				} +				r += 4; +				w += utf8.EncodeRune(rune, b[w:len(b)]); +			} +		// Control characters are invalid, but we've seen raw \n. +		case s[r] < ' ' && s[r] != '\n': +			if s[r] == '\n' { +				b[w] = '\n'; +				r++; +				w++; +				break; +			} +			return; +		// ASCII +		case s[r] < utf8.RuneSelf: +			b[w] = s[r]; +			r++; +			w++; +		// Coerce to well-formed UTF-8. +		default: +			rune, size := utf8.DecodeRuneInString(s[r:len(s)]); +			r += size; +			w += utf8.EncodeRune(rune, b[w:len(b)]); +		} +	} +	return string(b[0:w]), true +} + +// Quote quotes the raw string s using JSON syntax, +// so that Unquote(Quote(s)) = s, true. +func Quote(s string) string { +	chr := make([]byte, utf8.UTFMax); +	chr0 := chr[0:1]; +	b := new(io.ByteBuffer); +	chr[0] = '"'; +	b.Write(chr0); +	for i := 0; i < len(s); i++ { +		switch { +		case s[i]=='"' || s[i]=='\\': +			chr[0] = '\\'; +			chr[1] = s[i]; +			b.Write(chr[0:2]); + +		case s[i] == '\b': +			chr[0] = '\\'; +			chr[1] = 'b'; +			b.Write(chr[0:2]); + +		case s[i] == '\f': +			chr[0] = '\\'; +			chr[1] = 'f'; +			b.Write(chr[0:2]); + +		case s[i] == '\n': +			chr[0] = '\\'; +			chr[1] = 'n'; +			b.Write(chr[0:2]); + +		case s[i] == '\r': +			chr[0] = '\\'; +			chr[1] = 'r'; +			b.Write(chr[0:2]); + +		case s[i] == '\t': +			chr[0] = '\\'; +			chr[1] = 't'; +			b.Write(chr[0:2]); + +		case 0x20 <= s[i] && s[i] < utf8.RuneSelf: +			chr[0] = s[i]; +			b.Write(chr0); +		} +	} +	chr[0] = '"'; +	b.Write(chr0); +	return string(b.Data()); +} + + +// _Lexer + +type _Lexer struct { +	s string; +	i int; +	kind int; +	token string; +} + +func punct(c byte) bool { +	return c=='"' || c=='[' || c==']' || c==':' || c=='{' || c=='}' || c==',' +} + +func white(c byte) bool { +	return c==' ' || c=='\t' || c=='\n' || c=='\v' +} + +func skipwhite(p string, i int) int { +	for i < len(p) && white(p[i]) { +		i++ +	} +	return i +} + +func skiptoken(p string, i int) int { +	for i < len(p) && !punct(p[i]) && !white(p[i]) { +		i++ +	} +	return i +} + +func skipstring(p string, i int) int { +	for i++; i < len(p) && p[i] != '"'; i++ { +		if p[i] == '\\' { +			i++ +		} +	} +	if i >= len(p) { +		return i +	} +	return i+1 +} + +func (t *_Lexer) Next() { +	i, s := t.i, t.s; +	i = skipwhite(s, i); +	if i >= len(s) { +		t.kind = 0; +		t.token = ""; +		t.i = len(s); +		return; +	} + +	c := s[i]; +	switch { +	case c == '-' || '0' <= c && c <= '9': +		j := skiptoken(s, i); +		t.kind = '1'; +		t.token = s[i:j]; +		i = j; + +	case 'a' <= c && c <= 'z' || 'A' <= c && c <= 'Z': +		j := skiptoken(s, i); +		t.kind = 'a'; +		t.token = s[i:j]; +		i = j; + +	case c == '"': +		j := skipstring(s, i); +		t.kind = '"'; +		t.token = s[i:j]; +		i = j; + +	case c == '[', c == ']', c == ':', c == '{', c == '}', c == ',': +		t.kind = int(c); +		t.token = s[i:i+1]; +		i++; + +	default: +		t.kind = '?'; +		t.token = s[i:i+1]; +	} + +	t.i = i; +} + + +// Parser +// +// Implements parsing but not the actions.  Those are +// carried out by the implementation of the Builder interface. +// A Builder represents the object being created. +// Calling a method like Int64(i) sets that object to i. +// Calling a method like Elem(i) or Key(s) creates a +// new builder for a subpiece of the object (logically, +// an array element or a map key). +// +// There are two Builders, in other files. +// The JsonBuilder builds a generic Json structure +// in which maps are maps. +// The StructBuilder copies data into a possibly +// nested data structure, using the "map keys" +// as struct field names. + +type _Value interface {} + +// BUG(rsc): The json Builder interface needs to be +// reconciled with the xml Builder interface. + +// A Builder is an interface implemented by clients and passed +// to the JSON parser.  It gives clients full control over the +// eventual representation returned by the parser. +type Builder interface { +	// Set value +	Int64(i int64); +	Uint64(i uint64); +	Float64(f float64); +	String(s string); +	Bool(b bool); +	Null(); +	Array(); +	Map(); + +	// Create sub-Builders +	Elem(i int) Builder; +	Key(s string) Builder; +} + +func parse(lex *_Lexer, build Builder) bool { +	ok := false; +Switch: +	switch lex.kind { +	case 0: +		break; +	case '1': +		// If the number is exactly an integer, use that. +		if i, err := strconv.Atoi64(lex.token); err == nil { +			build.Int64(i); +			ok = true; +		} +		else if i, err := strconv.Atoui64(lex.token); err == nil { +			build.Uint64(i); +			ok = true; +		} +		// Fall back to floating point. +		else if f, err := strconv.Atof64(lex.token); err == nil { +			build.Float64(f); +			ok = true; +		} + +	case 'a': +		switch lex.token { +		case "true": +			build.Bool(true); +			ok = true; +		case "false": +			build.Bool(false); +			ok = true; +		case "null": +			build.Null(); +			ok = true; +		} + +	case '"': +		if str, ok1 := Unquote(lex.token); ok1 { +			build.String(str); +			ok = true; +		} + +	case '[': +		// array +		build.Array(); +		lex.Next(); +		n := 0; +		for lex.kind != ']' { +			if n > 0 { +				if lex.kind != ',' { +					break Switch; +				} +				lex.Next(); +			} +			if !parse(lex, build.Elem(n)) { +				break Switch; +			} +			n++; +		} +		ok = true; + +	case '{': +		// map +		lex.Next(); +		build.Map(); +		n := 0; +		for lex.kind != '}' { +			if n > 0 { +				if lex.kind != ',' { +					break Switch; +				} +				lex.Next(); +			} +			if lex.kind != '"' { +				break Switch; +			} +			key, ok := Unquote(lex.token); +			if !ok { +				break Switch; +			} +			lex.Next(); +			if lex.kind != ':' { +				break Switch; +			} +			lex.Next(); +			if !parse(lex, build.Key(key)) { +				break Switch; +			} +			n++; +		} +		ok = true; +	} + +	if ok { +		lex.Next(); +	} +	return ok; +} + +// Parse parses the JSON syntax string s and makes calls to +// the builder to construct a parsed representation. +// On success, it returns with ok set to true. +// On error, it returns with ok set to false, errindx set +// to the byte index in s where a syntax error occurred, +// and errtok set to the offending token. +func Parse(s string, builder Builder) (ok bool, errindx int, errtok string) { +	lex := new(_Lexer); +	lex.s = s; +	lex.Next(); +	if parse(lex, builder) { +		if lex.kind == 0 {	// EOF +			return true, 0, "" +		} +	} +	return false, lex.i, lex.token +} + | 
