json: scanner, Compact, Indent, and tests

This is the first of probably four separate CLs for the new implementation of the json package. The scanner is the core of the new implementation. The other CLs would be the new decoder, the new encoder, and support for JSON streams. R=r CC=golang-dev http://codereview.appspot.com/802051
author: Russ Cox <rsc@golang.org> 2010-04-18 14:45:08 -0700
committer: Russ Cox <rsc@golang.org> 2010-04-18 14:45:08 -0700
commit: 8e87e685b81a225e533d1269c972bd0131971454 (patch)
tree: 84ba9f13642d56d90ace3d90f69e6b366cbef490 /src/pkg/json
parent: 910aad8711d5bfaf09cf75f4eb5cce2b931d8105 (diff)
download: golang-8e87e685b81a225e533d1269c972bd0131971454.tar.gz
4 files changed, 1002 insertions, 0 deletions
diff --git a/src/pkg/json/Makefile b/src/pkg/json/Makefile
index 1f7f9b4e9..fd56f878f 100644
--- a/src/pkg/json/Makefile
+++ b/src/pkg/json/Makefile
@@ -8,7 +8,9 @@ TARG=json
 GOFILES=\
 	decode.go\
 	error.go\
+	indent.go\
 	parse.go\
+	scanner.go\
 	struct.go\
 
 include ../../Make.pkg
diff --git a/src/pkg/json/indent.go b/src/pkg/json/indent.go
new file mode 100644
index 000000000..000da42f6
--- /dev/null
+++ b/src/pkg/json/indent.go
@@ -0,0 +1,116 @@
+// Copyright 2010 The Go Authors.  All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package json
+
+import (
+	"bytes"
+	"os"
+)
+
+// Compact appends to dst the JSON-encoded src with
+// insignificant space characters elided.
+func Compact(dst *bytes.Buffer, src []byte) os.Error {
+	origLen := dst.Len()
+	var scan scanner
+	scan.reset()
+	start := 0
+	for i, c := range src {
+		v := scan.step(&scan, int(c))
+		if v >= scanSkipSpace {
+			if v == scanError {
+				break
+			}
+			if start < i {
+				dst.Write(src[start:i])
+			}
+			start = i + 1
+		}
+	}
+	if scan.eof() == scanError {
+		dst.Truncate(origLen)
+		return scan.err
+	}
+	if start < len(src) {
+		dst.Write(src[start:])
+	}
+	return nil
+}
+
+func newline(dst *bytes.Buffer, prefix, indent string, depth int) {
+	dst.WriteByte('\n')
+	dst.WriteString(prefix)
+	for i := 0; i < depth; i++ {
+		dst.WriteString(indent)
+	}
+}
+
+// Indent appends to dst an indented form of the JSON-encoded src.
+// Each element in a JSON object or array begins on a new,
+// indented line beginning with prefix followed by one or more
+// copies of indent according to the indentation nesting.
+// The data appended to dst has no trailing newline, to make it easier
+// to embed inside other formatted JSON data.
+func Indent(dst *bytes.Buffer, src []byte, prefix, indent string) os.Error {
+	origLen := dst.Len()
+	var scan scanner
+	scan.reset()
+	needIndent := false
+	depth := 0
+	for _, c := range src {
+		v := scan.step(&scan, int(c))
+		if v == scanSkipSpace {
+			continue
+		}
+		if v == scanError {
+			break
+		}
+		if needIndent && v != scanEndObject && v != scanEndArray {
+			needIndent = false
+			depth++
+			newline(dst, prefix, indent, depth)
+		}
+
+		// Emit semantically uninteresting bytes
+		// (in particular, punctuation in strings) unmodified.
+		if v == scanContinue {
+			dst.WriteByte(c)
+			continue
+		}
+
+		// Add spacing around real punctuation.
+		switch c {
+		case '{', '[':
+			// delay indent so that empty object and array are formatted as {} and [].
+			needIndent = true
+			dst.WriteByte(c)
+
+		case ',':
+			dst.WriteByte(c)
+			newline(dst, prefix, indent, depth)
+
+		case ':':
+			dst.WriteByte(c)
+			dst.WriteByte(' ')
+
+		case '}', ']':
+			if needIndent {
+				// suppress indent in empty object/array
+				needIndent = false
+			} else {
+				depth--
+				newline(dst, prefix, indent, depth)
+			}
+			dst.WriteByte(c)
+
+		default:
+			dst.WriteByte(c)
+		}
+	}
+	if scan.eof() == scanError {
+		dst.Truncate(origLen)
+		return scan.err
+	}
+	return nil
+}
diff --git a/src/pkg/json/scanner.go b/src/pkg/json/scanner.go
new file mode 100644
index 000000000..c622bc679
--- /dev/null
+++ b/src/pkg/json/scanner.go
@@ -0,0 +1,627 @@
+// Copyright 2010 The Go Authors.  All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package json
+
+// JSON value parser state machine.
+// Just about at the limit of what is reasonable to write by hand.
+// Some parts are a bit tedious, but overall it nicely factors out the
+// otherwise common code from the multiple scanning functions
+// in this package (Compact, Indent, checkValid, nextValue, etc).
+//
+// This file starts with two simple examples using the scanner
+// before diving into the scanner itself.
+
+import (
+	"os"
+	"strconv"
+)
+
+// checkValid verifies that data is valid JSON-encoded data.
+// scan is passed in for use by checkValid to avoid an allocation.
+func checkValid(data []byte, scan *scanner) os.Error {
+	scan.reset()
+	for _, c := range data {
+		if scan.step(scan, int(c)) == scanError {
+			return scan.err
+		}
+	}
+	if scan.eof() == scanError {
+		return scan.err
+	}
+	return nil
+}
+
+// nextValue splits data after the next whole JSON value,
+// returning that value and the bytes that follow it as separate slices.
+// scan is passed in for use by nextValue to avoid an allocation.
+func nextValue(data []byte, scan *scanner) (value, rest []byte, err os.Error) {
+	scan.reset()
+	for i, c := range data {
+		v := scan.step(scan, int(c))
+		if v >= scanEnd {
+			switch v {
+			case scanError:
+				return nil, nil, scan.err
+			case scanEnd:
+				return data[0:i], data[i:], nil
+			}
+		}
+	}
+	if scan.eof() == scanError {
+		return nil, nil, scan.err
+	}
+	return data, nil, nil
+}
+
+// A SyntaxError is a description of a JSON syntax error.
+type SyntaxError string
+
+func (e SyntaxError) String() string { return string(e) }
+
+
+// A scanner is a JSON scanning state machine.
+// Callers call scan.reset() and then pass bytes in one at a time
+// by calling scan.step(&scan, c) for each byte.
+// The return value, referred to as an opcode, tells the
+// caller about significant parsing events like beginning
+// and ending literals, objects, and arrays, so that the
+// caller can follow along if it wishes.
+// The return value scanEnd indicates that a single top-level
+// JSON value has been completed, *before* the byte that
+// just got passed in.  (The indication must be delayed in order
+// to recognize the end of numbers: is 123 a whole value or
+// the beginning of 12345e+6?).
+type scanner struct {
+	// The step is a func to be called to execute the next transition.
+	// Also tried using an integer constant and a single func
+	// with a switch, but using the func directly was 10% faster
+	// on a 64-bit Mac Mini, and it's nicer to read.
+	step func(*scanner, int) int
+
+	// Stack of what we're in the middle of - array values, object keys, object values.
+	parseState []int
+
+	// Error that happened, if any.
+	err os.Error
+
+	// 1-byte redo (see undo method)
+	redoCode  int
+	redoState func(*scanner, int) int
+}
+
+// These values are returned by the state transition functions
+// assigned to scanner.state and the method scanner.eof.
+// They give details about the current state of the scan that
+// callers might be interested to know about.
+// It is okay to ignore the return value of any particular
+// call to scanner.state: if one call returns scanError,
+// every subsequent call will return scanError too.
+const (
+	// Continue.
+	scanContinue     = iota // uninteresting byte
+	scanBeginLiteral        // end implied by next result != scanContinue
+	scanBeginObject         // begin object
+	scanObjectKey           // just finished object key (string)
+	scanObjectValue         // just finished non-last object value
+	scanEndObject           // end object (implies scanObjectValue if possible)
+	scanBeginArray          // begin array
+	scanArrayValue          // just finished array value
+	scanEndArray            // end array (implies scanArrayValue if possible)
+	scanSkipSpace           // space byte; can skip; known to be last "continue" result
+
+	// Stop.
+	scanEnd   // top-level value ended *before* this byte; known to be first "stop" result
+	scanError // hit an error, scanner.err.
+)
+
+// These values are stored in the parseState stack.
+// They give the current state of a composite value
+// being scanned.  If the parser is inside a nested value
+// the parseState describes the nested state, outermost at entry 0.
+const (
+	parseObjectKey   = iota // parsing object key (before colon)
+	parseObjectValue        // parsing object value (after colon)
+	parseArrayValue         // parsing array value
+)
+
+// reset prepares the scanner for use.
+// It must be called before calling s.step.
+func (s *scanner) reset() {
+	s.step = stateBeginValue
+	s.parseState = s.parseState[0:0]
+	s.err = nil
+}
+
+// eof tells the scanner that the end of input has been reached.
+// It returns a scan status just as s.step does.
+func (s *scanner) eof() int {
+	if s.err != nil {
+		return scanError
+	}
+	if s.step == stateEndTop {
+		return scanEnd
+	}
+	s.step(s, ' ')
+	if s.step == stateEndTop {
+		return scanEnd
+	}
+	if s.err == nil {
+		s.err = SyntaxError("unexpected end of JSON input")
+	}
+	return scanError
+}
+
+// pushParseState pushes a new parse state p onto the parse stack.
+func (s *scanner) pushParseState(p int) {
+	n := len(s.parseState)
+	if n >= cap(s.parseState) {
+		if n == 0 {
+			s.parseState = make([]int, 0, 16)
+		} else {
+			ps := make([]int, n, 2*n)
+			copy(ps, s.parseState)
+			s.parseState = ps
+		}
+	}
+	s.parseState = s.parseState[0 : n+1]
+	s.parseState[n] = p
+}
+
+// popParseState pops a parse state (already obtained) off the stack
+// and updates s.step accordingly.
+func (s *scanner) popParseState() {
+	n := len(s.parseState) - 1
+	s.parseState = s.parseState[0:n]
+	if n == 0 {
+		s.step = stateEndTop
+	} else {
+		s.step = stateEndValue
+	}
+}
+
+// NOTE(rsc): The various instances of
+//
+//	if c <= ' ' && (c == ' ' || c == '\t' || c == '\r' || c == '\n')
+//
+// below should all be if c <= ' ' && isSpace(c), but inlining
+// the checks makes a significant difference (>10%) in tight loops
+// such as nextValue.  These should be rewritten with the clearer
+// function call once 6g knows to inline the call.
+
+// stateBeginValueOrEmpty is the state after reading `[`.
+func stateBeginValueOrEmpty(s *scanner, c int) int {
+	if c <= ' ' && (c == ' ' || c == '\t' || c == '\r' || c == '\n') {
+		return scanSkipSpace
+	}
+	if c == ']' {
+		return stateEndValue(s, c)
+	}
+	return stateBeginValue(s, c)
+}
+
+// stateBeginValue is the state at the beginning of the input.
+func stateBeginValue(s *scanner, c int) int {
+	if c <= ' ' && (c == ' ' || c == '\t' || c == '\r' || c == '\n') {
+		return scanSkipSpace
+	}
+	switch c {
+	case '{':
+		s.step = stateBeginStringOrEmpty
+		s.pushParseState(parseObjectKey)
+		return scanBeginObject
+	case '[':
+		s.step = stateBeginValueOrEmpty
+		s.pushParseState(parseArrayValue)
+		return scanBeginArray
+	case '"':
+		s.step = stateInString
+		return scanBeginLiteral
+	case '-':
+		s.step = stateNeg
+		return scanBeginLiteral
+	case '0': // beginning of 0.123
+		s.step = state0
+		return scanBeginLiteral
+	case 't': // beginning of true
+		s.step = stateT
+		return scanBeginLiteral
+	case 'f': // beginning of false
+		s.step = stateF
+		return scanBeginLiteral
+	case 'n': // beginning of null
+		s.step = stateN
+		return scanBeginLiteral
+	}
+	if '1' <= c && c <= '9' { // beginning of 1234.5
+		s.step = state1
+		return scanBeginLiteral
+	}
+	return s.error(c, "looking for beginning of value")
+}
+
+// stateBeginStringOrEmpty is the state after reading `{`.
+func stateBeginStringOrEmpty(s *scanner, c int) int {
+	if c <= ' ' && (c == ' ' || c == '\t' || c == '\r' || c == '\n') {
+		return scanSkipSpace
+	}
+	if c == '}' {
+		return stateEndValue(s, c)
+	}
+	return stateBeginString(s, c)
+}
+
+// stateBeginString is the state after reading `{"key": value,`.
+func stateBeginString(s *scanner, c int) int {
+	if c <= ' ' && (c == ' ' || c == '\t' || c == '\r' || c == '\n') {
+		return scanSkipSpace
+	}
+	if c == '"' {
+		s.step = stateInString
+		return scanBeginLiteral
+	}
+	return s.error(c, "looking for beginning of object key string")
+}
+
+// stateEndValue is the state after completing a value,
+// such as after reading `{}` or `true` or `["x"`.
+func stateEndValue(s *scanner, c int) int {
+	n := len(s.parseState)
+	if n == 0 {
+		// Completed top-level before the current byte.
+		s.step = stateEndTop
+		return stateEndTop(s, c)
+	}
+	if c <= ' ' && (c == ' ' || c == '\t' || c == '\r' || c == '\n') {
+		s.step = stateEndValue
+		return scanSkipSpace
+	}
+	ps := s.parseState[n-1]
+	switch ps {
+	case parseObjectKey:
+		if c == ':' {
+			s.parseState[n-1] = parseObjectValue
+			s.step = stateBeginValue
+			return scanObjectKey
+		}
+		if c == '}' {
+			s.popParseState()
+			return scanEndObject
+		}
+		return s.error(c, "after object key")
+	case parseObjectValue:
+		if c == ',' {
+			s.parseState[n-1] = parseObjectKey
+			s.step = stateBeginString
+			return scanObjectValue
+		}
+		if c == '}' {
+			s.popParseState()
+			return scanEndObject
+		}
+		return s.error(c, "after object key:value pair")
+	case parseArrayValue:
+		if c == ',' {
+			s.step = stateBeginValue
+			return scanArrayValue
+		}
+		if c == ']' {
+			s.popParseState()
+			return scanEndArray
+		}
+		return s.error(c, "after array element")
+	}
+	return s.error(c, "")
+}
+
+// stateEndTop is the state after finishing the top-level value,
+// such as after reading `{}` or `[1,2,3]`.
+// Only space characters should be seen now.
+func stateEndTop(s *scanner, c int) int {
+	if c != ' ' && c != '\t' && c != '\r' && c != '\n' {
+		// Complain about non-space byte on next call.
+		s.error(c, "after top-level value")
+	}
+	return scanEnd
+}
+
+// stateInString is the state after reading `"`.
+func stateInString(s *scanner, c int) int {
+	if c == '"' {
+		s.step = stateEndValue
+		return scanContinue
+	}
+	if c == '\\' {
+		s.step = stateInStringEsc
+		return scanContinue
+	}
+	if c < 0x20 {
+		return s.error(c, "in string literal")
+	}
+	return scanContinue
+}
+
+// stateInStringEsc is the state after reading `"\` during a quoted string.
+func stateInStringEsc(s *scanner, c int) int {
+	switch c {
+	case 'b', 'f', 'n', 'r', 't', '\\', '"':
+		s.step = stateInString
+		return scanContinue
+	}
+	if c == 'u' {
+		s.step = stateInStringEscU
+		return scanContinue
+	}
+	return s.error(c, "in string escape code")
+}
+
+// stateInStringEscU is the state after reading `"\u` during a quoted string.
+func stateInStringEscU(s *scanner, c int) int {
+	if '0' <= c && c <= '9' || 'a' <= c && c <= 'f' || 'A' <= c && c <= 'F' {
+		s.step = stateInStringEscU1
+		return scanContinue
+	}
+	// numbers
+	return s.error(c, "in \\u hexadecimal character escape")
+}
+
+// stateInStringEscU1 is the state after reading `"\u1` during a quoted string.
+func stateInStringEscU1(s *scanner, c int) int {
+	if '0' <= c && c <= '9' || 'a' <= c && c <= 'f' || 'A' <= c && c <= 'F' {
+		s.step = stateInStringEscU12
+		return scanContinue
+	}
+	// numbers
+	return s.error(c, "in \\u hexadecimal character escape")
+}
+
+// stateInStringEscU12 is the state after reading `"\u12` during a quoted string.
+func stateInStringEscU12(s *scanner, c int) int {
+	if '0' <= c && c <= '9' || 'a' <= c && c <= 'f' || 'A' <= c && c <= 'F' {
+		s.step = stateInStringEscU123
+		return scanContinue
+	}
+	// numbers
+	return s.error(c, "in \\u hexadecimal character escape")
+}
+
+// stateInStringEscU123 is the state after reading `"\u123` during a quoted string.
+func stateInStringEscU123(s *scanner, c int) int {
+	if '0' <= c && c <= '9' || 'a' <= c && c <= 'f' || 'A' <= c && c <= 'F' {
+		s.step = stateInString
+		return scanContinue
+	}
+	// numbers
+	return s.error(c, "in \\u hexadecimal character escape")
+}
+
+// stateInStringEscU123 is the state after reading `-` during a number.
+func stateNeg(s *scanner, c int) int {
+	if c == '0' {
+		s.step = state0
+		return scanContinue
+	}
+	if '1' <= c && c <= '9' {
+		s.step = state1
+		return scanContinue
+	}
+	return s.error(c, "in numeric literal")
+}
+
+// state1 is the state after reading a non-zero integer during a number,
+// such as after reading `1` or `100` but not `0`.
+func state1(s *scanner, c int) int {
+	if '0' <= c && c <= '9' {
+		s.step = state1
+		return scanContinue
+	}
+	return state0(s, c)
+}
+
+// state0 is the state after reading `0` during a number.
+func state0(s *scanner, c int) int {
+	if c == '.' {
+		s.step = stateDot
+		return scanContinue
+	}
+	if c == 'e' {
+		s.step = stateE
+		return scanContinue
+	}
+	return stateEndValue(s, c)
+}
+
+// stateDot is the state after reading the integer and decimal point in a number,
+// such as after reading `1.`.
+func stateDot(s *scanner, c int) int {
+	if '0' <= c && c <= '9' {
+		s.step = stateDot0
+		return scanContinue
+	}
+	return s.error(c, "after decimal point in numeric literal")
+}
+
+// stateDot0 is the state after reading the integer, decimal point, and subsequent
+// digits of a number, such as after reading `3.14`.
+func stateDot0(s *scanner, c int) int {
+	if '0' <= c && c <= '9' {
+		s.step = stateDot0
+		return scanContinue
+	}
+	if c == 'e' {
+		s.step = stateE
+		return scanContinue
+	}
+	return stateEndValue(s, c)
+}
+
+// stateE is the state after reading the mantissa and e in a number,
+// such as after reading `314e` or `0.314e`.
+func stateE(s *scanner, c int) int {
+	if c == '+' {
+		s.step = stateESign
+		return scanContinue
+	}
+	if c == '-' {
+		s.step = stateESign
+		return scanContinue
+	}
+	return stateESign(s, c)
+}
+
+// stateESign is the state after reading the mantissa, e, and sign in a number,
+// such as after reading `314e-` or `0.314e+`.
+func stateESign(s *scanner, c int) int {
+	if '0' <= c && c <= '9' {
+		s.step = stateE0
+		return scanContinue
+	}
+	return s.error(c, "in exponent of numeric literal")
+}
+
+// stateE0 is the state after reading the mantissa, e, optional sign,
+// and at least one digit of the exponent in a number,
+// such as after reading `314e-2` or `0.314e+1` or `3.14e0`.
+func stateE0(s *scanner, c int) int {
+	if '0' <= c && c <= '9' {
+		s.step = stateE0
+		return scanContinue
+	}
+	return stateEndValue(s, c)
+}
+
+// stateT is the state after reading `t`.
+func stateT(s *scanner, c int) int {
+	if c == 'r' {
+		s.step = stateTr
+		return scanContinue
+	}
+	return s.error(c, "in literal true (expecting 'r')")
+}
+
+// stateTr is the state after reading `tr`.
+func stateTr(s *scanner, c int) int {
+	if c == 'u' {
+		s.step = stateTru
+		return scanContinue
+	}
+	return s.error(c, "in literal true (expecting 'u')")
+}
+
+// stateTru is the state after reading `tru`.
+func stateTru(s *scanner, c int) int {
+	if c == 'e' {
+		s.step = stateEndValue
+		return scanContinue
+	}
+	return s.error(c, "in literal true (expecting 'e')")
+}
+
+// stateF is the state after reading `f`.
+func stateF(s *scanner, c int) int {
+	if c == 'a' {
+		s.step = stateFa
+		return scanContinue
+	}
+	return s.error(c, "in literal false (expecting 'a')")
+}
+
+// stateFa is the state after reading `fa`.
+func stateFa(s *scanner, c int) int {
+	if c == 'l' {
+		s.step = stateFal
+		return scanContinue
+	}
+	return s.error(c, "in literal false (expecting 'l')")
+}
+
+// stateFal is the state after reading `fal`.
+func stateFal(s *scanner, c int) int {
+	if c == 's' {
+		s.step = stateFals
+		return scanContinue
+	}
+	return s.error(c, "in literal false (expecting 's')")
+}
+
+// stateFals is the state after reading `fals`.
+func stateFals(s *scanner, c int) int {
+	if c == 'e' {
+		s.step = stateEndValue
+		return scanContinue
+	}
+	return s.error(c, "in literal false (expecting 'e')")
+}
+
+// stateN is the state after reading `n`.
+func stateN(s *scanner, c int) int {
+	if c == 'u' {
+		s.step = stateNu
+		return scanContinue
+	}
+	return s.error(c, "in literal null (expecting 'u')")
+}
+
+// stateNu is the state after reading `nu`.
+func stateNu(s *scanner, c int) int {
+	if c == 'l' {
+		s.step = stateNul
+		return scanContinue
+	}
+	return s.error(c, "in literal null (expecting 'l')")
+}
+
+// stateNul is the state after reading `nul`.
+func stateNul(s *scanner, c int) int {
+	if c == 'l' {
+		s.step = stateEndValue
+		return scanContinue
+	}
+	return s.error(c, "in literal null (expecting 'l')")
+}
+
+// stateError is the state after reaching a syntax error,
+// such as after reading `[1}` or `5.1.2`.
+func stateError(s *scanner, c int) int {
+	return scanError
+}
+
+// error records an error and switches to the error state.
+func (s *scanner) error(c int, context string) int {
+	s.step = stateError
+	s.err = SyntaxError("invalid character '" + quoteChar(c) + "' " + context)
+	return scanError
+}
+
+// quoteChar formats c as a quoted character literal
+func quoteChar(c int) string {
+	// special cases - different from quoted strings
+	if c == '\'' {
+		return `'\''`
+	}
+	if c == '"' {
+		return `'"'`
+	}
+
+	// use quoted string with different quotation marks
+	s := strconv.Quote(string(c))
+	return "'" + s[1:len(s)-1] + "'"
+}
+
+// undo causes the scanner to return scanCode from the next state transition.
+// This gives callers a simple 1-byte undo mechanism.
+func (s *scanner) undo(scanCode int) {
+	if s.step == stateRedo {
+		panic("invalid use of scanner")
+	}
+	s.redoCode = scanCode
+	s.redoState = s.step
+	s.step = stateRedo
+}
+
+// stateRedo helps implement the scanner's 1-byte undo.
+func stateRedo(s *scanner, c int) int {
+	s.step = s.redoState
+	return s.redoCode
+}
diff --git a/src/pkg/json/scanner_test.go b/src/pkg/json/scanner_test.go
new file mode 100644
index 000000000..592960482
--- /dev/null
+++ b/src/pkg/json/scanner_test.go
@@ -0,0 +1,257 @@
+// Copyright 2010 The Go Authors.  All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package json
+
+import (
+	"bytes"
+	"math"
+	"rand"
+	"testing"
+)
+
+// Tests of simple examples.
+
+type example struct {
+	compact string
+	indent  string
+}
+
+var examples = []example{
+	example{`1`, `1`},
+	example{`{}`, `{}`},
+	example{`[]`, `[]`},
+	example{`{"":2}`, "{\n\t\"\": 2\n}"},
+	example{`[3]`, "[\n\t3\n]"},
+	example{`[1,2,3]`, "[\n\t1,\n\t2,\n\t3\n]"},
+	example{`{"x":1}`, "{\n\t\"x\": 1\n}"},
+	example{ex1, ex1i},
+}
+
+var ex1 = `[true,false,null,"x",1,1.5,0,-5e+2]`
+
+var ex1i = `[
+	true,
+	false,
+	null,
+	"x",
+	1,
+	1.5,
+	0,
+	-5e+2
+]`
+
+func TestCompact(t *testing.T) {
+	var buf bytes.Buffer
+	for _, tt := range examples {
+		buf.Reset()
+		if err := Compact(&buf, []byte(tt.compact)); err != nil {
+			t.Errorf("Compact(%#q): %v", tt.compact, err)
+		} else if s := buf.String(); s != tt.compact {
+			t.Errorf("Compact(%#q) = %#q, want original", tt.compact, s)
+		}
+
+		buf.Reset()
+		if err := Compact(&buf, []byte(tt.indent)); err != nil {
+			t.Errorf("Compact(%#q): %v", tt.indent, err)
+			continue
+		} else if s := buf.String(); s != tt.compact {
+			t.Errorf("Compact(%#q) = %#q, want %#q", tt.indent, s, tt.compact)
+		}
+	}
+}
+
+func TestIndent(t *testing.T) {
+	var buf bytes.Buffer
+	for _, tt := range examples {
+		buf.Reset()
+		if err := Indent(&buf, []byte(tt.indent), "", "\t"); err != nil {
+			t.Errorf("Indent(%#q): %v", tt.indent, err)
+		} else if s := buf.String(); s != tt.indent {
+			t.Errorf("Indent(%#q) = %#q, want original", tt.indent, s)
+		}
+
+		buf.Reset()
+		if err := Indent(&buf, []byte(tt.compact), "", "\t"); err != nil {
+			t.Errorf("Indent(%#q): %v", tt.compact, err)
+			continue
+		} else if s := buf.String(); s != tt.indent {
+			t.Errorf("Indent(%#q) = %#q, want %#q", tt.compact, s, tt.indent)
+		}
+	}
+}
+
+// Tests of a large random structure.
+
+func TestCompactBig(t *testing.T) {
+	var buf bytes.Buffer
+	if err := Compact(&buf, jsonBig); err != nil {
+		t.Fatalf("Compact: %v", err)
+	}
+	b := buf.Bytes()
+	if bytes.Compare(b, jsonBig) != 0 {
+		t.Error("Compact(jsonBig) != jsonBig")
+		diff(t, b, jsonBig)
+		return
+	}
+}
+
+func TestIndentBig(t *testing.T) {
+	var buf bytes.Buffer
+	if err := Indent(&buf, jsonBig, "", "\t"); err != nil {
+		t.Fatalf("Indent1: %v", err)
+	}
+	b := buf.Bytes()
+	if len(b) == len(jsonBig) {
+		// jsonBig is compact (no unnecessary spaces);
+		// indenting should make it bigger
+		t.Fatalf("Indent(jsonBig) did not get bigger")
+	}
+
+	// should be idempotent
+	var buf1 bytes.Buffer
+	if err := Indent(&buf1, b, "", "\t"); err != nil {
+		t.Fatalf("Indent2: %v", err)
+	}
+	b1 := buf1.Bytes()
+	if bytes.Compare(b1, b) != 0 {
+		t.Error("Indent(Indent(jsonBig)) != Indent(jsonBig)")
+		diff(t, b1, b)
+		return
+	}
+
+	// should get back to original
+	buf1.Reset()
+	if err := Compact(&buf1, b); err != nil {
+		t.Fatalf("Compact: %v", err)
+	}
+	b1 = buf1.Bytes()
+	if bytes.Compare(b1, jsonBig) != 0 {
+		t.Error("Compact(Indent(jsonBig)) != jsonBig")
+		diff(t, b1, jsonBig)
+		return
+	}
+}
+
+func TestNextValueBig(t *testing.T) {
+	var scan scanner
+	item, rest, err := nextValue(jsonBig, &scan)
+	if err != nil {
+		t.Fatalf("nextValue: ", err)
+	}
+	if len(item) != len(jsonBig) || &item[0] != &jsonBig[0] {
+		t.Errorf("invalid item: %d %d", len(item), len(jsonBig))
+	}
+	if len(rest) != 0 {
+		t.Errorf("invalid rest: %d", len(rest))
+	}
+
+	item, rest, err = nextValue(bytes.Add(jsonBig, []byte("HELLO WORLD")), &scan)
+	if err != nil {
+		t.Fatalf("nextValue extra: ", err)
+	}
+	if len(item) != len(jsonBig) {
+		t.Errorf("invalid item: %d %d", len(item), len(jsonBig))
+	}
+	if string(rest) != "HELLO WORLD" {
+		t.Errorf("invalid rest: %d", len(rest))
+	}
+}
+
+func BenchmarkSkipValue(b *testing.B) {
+	var scan scanner
+	for i := 0; i < b.N; i++ {
+		nextValue(jsonBig, &scan)
+	}
+	b.SetBytes(int64(len(jsonBig)))
+}
+
+func diff(t *testing.T, a, b []byte) {
+	for i := 0; ; i++ {
+		if i >= len(a) || i >= len(b) || a[i] != b[i] {
+			j := i - 10
+			if j < 0 {
+				j = 0
+			}
+			t.Errorf("diverge at %d: «%s» vs «%s»", i, trim(a[j:]), trim(b[j:]))
+		}
+	}
+}
+
+func trim(b []byte) []byte {
+	if len(b) > 20 {
+		return b[0:20]
+	}
+	return b
+}
+
+// Generate a random JSON object.
+
+var jsonBig []byte
+
+func init() {
+	var buf bytes.Buffer
+	Marshal(&buf, genValue(100000))
+	jsonBig = buf.Bytes()
+}
+
+func genValue(n int) interface{} {
+	if n > 1 {
+		switch rand.Intn(2) {
+		case 0:
+			return genArray(n)
+		case 1:
+			return genMap(n)
+		}
+	}
+	switch rand.Intn(3) {
+	case 0:
+		return rand.Intn(2) == 0
+	case 1:
+		return rand.NormFloat64()
+	case 2:
+		return genString(30)
+	}
+	panic("unreachable")
+}
+
+func genString(stddev float64) string {
+	n := int(math.Fabs(rand.NormFloat64()*stddev + stddev/2))
+	c := make([]int, n)
+	for i := range c {
+		f := math.Fabs(rand.NormFloat64()*64 + 32)
+		if f > 0x10ffff {
+			f = 0x10ffff
+		}
+		c[i] = int(f)
+	}
+	return string(c)
+}
+
+func genArray(n int) []interface{} {
+	f := int(math.Fabs(rand.NormFloat64()) * math.Fmin(10, float64(n/2)))
+	if f > n {
+		f = n
+	}
+	x := make([]interface{}, int(f))
+	for i := range x {
+		x[i] = genValue(((i+1)*n)/f - (i*n)/f)
+	}
+	return x
+}
+
+func genMap(n int) map[string]interface{} {
+	f := int(math.Fabs(rand.NormFloat64()) * math.Fmin(10, float64(n/2)))
+	if f > n {
+		f = n
+	}
+	if n > 0 && f == 0 {
+		f = 1
+	}
+	x := make(map[string]interface{})
+	for i := 0; i < f; i++ {
+		x[genString(10)] = genValue(((i+1)*n)/f - (i*n)/f)
+	}
+	return x
+}
author	Russ Cox <rsc@golang.org>	2010-04-18 14:45:08 -0700
committer	Russ Cox <rsc@golang.org>	2010-04-18 14:45:08 -0700
commit	8e87e685b81a225e533d1269c972bd0131971454 (patch)
tree	84ba9f13642d56d90ace3d90f69e6b366cbef490 /src/pkg/json
parent	910aad8711d5bfaf09cf75f4eb5cce2b931d8105 (diff)
download	golang-8e87e685b81a225e533d1269c972bd0131971454.tar.gz