summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPetar Maymounkov <petarm@gmail.com>2010-01-28 15:14:54 -0800
committerPetar Maymounkov <petarm@gmail.com>2010-01-28 15:14:54 -0800
commitcffa5c3529a4df2e00cc353b78b151af2412293f (patch)
treed4cfac45bf00165fd1570d1687fd99aa2ca1c69c
parentc04ab7b3f78f5fb138c5a9ed0ae3cb2e5e99d4f9 (diff)
downloadgolang-cffa5c3529a4df2e00cc353b78b151af2412293f.tar.gz
http: add lexing functions
In particular, add field-value tokenizer which respects quoting rules. The code is intended for use in tokenizing the Transfer-Encoding and Trailer fields. The lexing function is not connected to the main parsing code yet (in the next CL). R=rsc CC=golang-dev http://codereview.appspot.com/190085 Committer: Russ Cox <rsc@golang.org>
-rw-r--r--src/pkg/http/Makefile1
-rw-r--r--src/pkg/http/lex.go152
-rw-r--r--src/pkg/http/lex_test.go70
3 files changed, 223 insertions, 0 deletions
diff --git a/src/pkg/http/Makefile b/src/pkg/http/Makefile
index 7654de807..8a4562122 100644
--- a/src/pkg/http/Makefile
+++ b/src/pkg/http/Makefile
@@ -9,6 +9,7 @@ GOFILES=\
chunked.go\
client.go\
fs.go\
+ lex.go\
request.go\
response.go\
server.go\
diff --git a/src/pkg/http/lex.go b/src/pkg/http/lex.go
new file mode 100644
index 000000000..46e051957
--- /dev/null
+++ b/src/pkg/http/lex.go
@@ -0,0 +1,152 @@
+// Copyright 2009 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package http
+
+import (
+ "strings"
+)
+
+// This file deals with lexical matters of HTTP
+
+func isSeparator(c byte) bool {
+ switch c {
+ case '(', ')', '<', '>', '@', ',', ';', ':', '\\', '"', '/', '[', ']', '?', '=', '{', '}', ' ', '\t':
+ return true
+ default:
+ return false
+ }
+ panic()
+}
+
+func isSpace(c byte) bool {
+ switch c {
+ case ' ', '\t', '\r', '\n':
+ return true
+ default:
+ return false
+ }
+ panic()
+}
+
+func isCtl(c byte) bool { return (0 <= c && c <= 31) || c == 127 }
+
+func isChar(c byte) bool { return 0 <= c && c <= 127 }
+
+func isAnyText(c byte) bool { return !isCtl(c) }
+
+func isQdText(c byte) bool { return isAnyText(c) && c != '"' }
+
+func isToken(c byte) bool { return isChar(c) && !isCtl(c) && !isSeparator(c) }
+
+// Valid escaped sequences are not specified in RFC 2616, so for now, we assume
+// that they coincide with the common sense ones used by GO. Malformed
+// characters should probably not be treated as errors by a robust (forgiving)
+// parser, so we replace them with the '?' character.
+func httpUnquotePair(b byte) byte {
+ // skip the first byte, which should always be '\'
+ switch b {
+ case 'a':
+ return '\a'
+ case 'b':
+ return '\b'
+ case 'f':
+ return '\f'
+ case 'n':
+ return '\n'
+ case 'r':
+ return '\r'
+ case 't':
+ return '\t'
+ case 'v':
+ return '\v'
+ case '\\':
+ return '\\'
+ case '\'':
+ return '\''
+ case '"':
+ return '"'
+ }
+ return '?'
+}
+
+// raw must begin with a valid quoted string. Only the first quoted string is
+// parsed and is unquoted in result. eaten is the number of bytes parsed, or -1
+// upon failure.
+func httpUnquote(raw []byte) (eaten int, result string) {
+ buf := make([]byte, len(raw))
+ if raw[0] != '"' {
+ return -1, ""
+ }
+ eaten = 1
+ j := 0 // # of bytes written in buf
+ for i := 1; i < len(raw); i++ {
+ switch b := raw[i]; b {
+ case '"':
+ eaten++
+ buf = buf[0:j]
+ return i + 1, string(buf)
+ case '\\':
+ if len(raw) < i+2 {
+ return -1, ""
+ }
+ buf[j] = httpUnquotePair(raw[i+1])
+ eaten += 2
+ j++
+ i++
+ default:
+ if isQdText(b) {
+ buf[j] = b
+ } else {
+ buf[j] = '?'
+ }
+ eaten++
+ j++
+ }
+ }
+ return -1, ""
+}
+
+// This is a best effort parse, so errors are not returned, instead not all of
+// the input string might be parsed. result is always non-nil.
+func httpSplitFieldValue(fv string) (eaten int, result []string) {
+ result = make([]string, 0, len(fv))
+ raw := strings.Bytes(fv)
+ i := 0
+ chunk := ""
+ for i < len(raw) {
+ b := raw[i]
+ switch {
+ case b == '"':
+ eaten, unq := httpUnquote(raw[i:len(raw)])
+ if eaten < 0 {
+ return i, result
+ } else {
+ i += eaten
+ chunk += unq
+ }
+ case isSeparator(b):
+ if chunk != "" {
+ result = result[0 : len(result)+1]
+ result[len(result)-1] = chunk
+ chunk = ""
+ }
+ i++
+ case isToken(b):
+ chunk += string(b)
+ i++
+ case b == '\n' || b == '\r':
+ i++
+ default:
+ chunk += "?"
+ i++
+ }
+ }
+ if chunk != "" {
+ result = result[0 : len(result)+1]
+ result[len(result)-1] = chunk
+ chunk = ""
+ }
+ return i, result
+}
diff --git a/src/pkg/http/lex_test.go b/src/pkg/http/lex_test.go
new file mode 100644
index 000000000..a67070f5e
--- /dev/null
+++ b/src/pkg/http/lex_test.go
@@ -0,0 +1,70 @@
+// Copyright 2009 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package http
+
+import (
+ "testing"
+)
+
+type lexTest struct {
+ Raw string
+ Parsed int // # of parsed characters
+ Result []string
+}
+
+var lexTests = []lexTest{
+ lexTest{
+ Raw: `"abc"def,:ghi`,
+ Parsed: 13,
+ Result: []string{"abcdef", "ghi"},
+ },
+ // My understanding of the RFC is that escape sequences outside of
+ // quotes are not interpreted?
+ lexTest{
+ Raw: `"\t"\t"\t"`,
+ Parsed: 10,
+ Result: []string{"\t", "t\t"},
+ },
+ lexTest{
+ Raw: `"\yab"\r\n`,
+ Parsed: 10,
+ Result: []string{"?ab", "r", "n"},
+ },
+ lexTest{
+ Raw: "ab\f",
+ Parsed: 3,
+ Result: []string{"ab?"},
+ },
+ lexTest{
+ Raw: "\"ab \" c,de f, gh, ij\n\t\r",
+ Parsed: 23,
+ Result: []string{"ab ", "c", "de", "f", "gh", "ij"},
+ },
+}
+
+func min(x, y int) int {
+ if x <= y {
+ return x
+ }
+ return y
+}
+
+func TestSplitFieldValue(t *testing.T) {
+ for k, l := range lexTests {
+ parsed, result := httpSplitFieldValue(l.Raw)
+ if parsed != l.Parsed {
+ t.Errorf("#%d: Parsed %d, expected %d", k, parsed, l.Parsed)
+ }
+ if len(result) != len(l.Result) {
+ t.Errorf("#%d: Result len %d, expected %d", k, len(result), len(l.Result))
+ }
+ for i := 0; i < min(len(result), len(l.Result)); i++ {
+ if result[i] != l.Result[i] {
+ t.Errorf("#%d: %d-th entry mismatch. Have {%s}, expect {%s}",
+ k, i, result[i], l.Result[i])
+ }
+ }
+ }
+}