diff options
Diffstat (limited to 'src/pkg/net/textproto/reader.go')
-rw-r--r-- | src/pkg/net/textproto/reader.go | 129 |
1 files changed, 104 insertions, 25 deletions
diff --git a/src/pkg/net/textproto/reader.go b/src/pkg/net/textproto/reader.go index 125feb3e8..b61bea862 100644 --- a/src/pkg/net/textproto/reader.go +++ b/src/pkg/net/textproto/reader.go @@ -128,6 +128,17 @@ func (r *Reader) readContinuedLineSlice() ([]byte, error) { return line, nil } + // Optimistically assume that we have started to buffer the next line + // and it starts with an ASCII letter (the next header key), so we can + // avoid copying that buffered data around in memory and skipping over + // non-existent whitespace. + if r.R.Buffered() > 1 { + peek, err := r.R.Peek(1) + if err == nil && isASCIILetter(peek[0]) { + return trim(line), nil + } + } + // ReadByte or the next readLineSlice will flush the read buffer; // copy the slice into buf. r.buf = append(r.buf[:0], trim(line)...) @@ -445,23 +456,25 @@ func (r *Reader) ReadDotLines() ([]string, error) { // } // func (r *Reader) ReadMIMEHeader() (MIMEHeader, error) { - m := make(MIMEHeader) + m := make(MIMEHeader, 4) for { kv, err := r.readContinuedLineSlice() if len(kv) == 0 { return m, err } - // Key ends at first colon; must not have spaces. + // Key ends at first colon; should not have spaces but + // they appear in the wild, violating specs, so we + // remove them if present. i := bytes.IndexByte(kv, ':') if i < 0 { return m, ProtocolError("malformed MIME header line: " + string(kv)) } - key := string(kv[0:i]) - if strings.Index(key, " ") >= 0 { - key = strings.TrimRight(key, " ") + endKey := i + for endKey > 0 && kv[endKey-1] == ' ' { + endKey-- } - key = CanonicalMIMEHeaderKey(key) + key := canonicalMIMEHeaderKey(kv[:endKey]) // Skip initial spaces in value. i++ // skip colon @@ -484,41 +497,107 @@ func (r *Reader) ReadMIMEHeader() (MIMEHeader, error) { // letter and any letter following a hyphen to upper case; // the rest are converted to lowercase. For example, the // canonical key for "accept-encoding" is "Accept-Encoding". +// MIME header keys are assumed to be ASCII only. func CanonicalMIMEHeaderKey(s string) string { // Quick check for canonical encoding. - needUpper := true + upper := true for i := 0; i < len(s); i++ { c := s[i] - if needUpper && 'a' <= c && c <= 'z' { - goto MustRewrite + if upper && 'a' <= c && c <= 'z' { + return canonicalMIMEHeaderKey([]byte(s)) } - if !needUpper && 'A' <= c && c <= 'Z' { - goto MustRewrite + if !upper && 'A' <= c && c <= 'Z' { + return canonicalMIMEHeaderKey([]byte(s)) } - needUpper = c == '-' + upper = c == '-' } return s +} + +const toLower = 'a' - 'A' -MustRewrite: - // Canonicalize: first letter upper case - // and upper case after each dash. - // (Host, User-Agent, If-Modified-Since). - // MIME headers are ASCII only, so no Unicode issues. - a := []byte(s) +// canonicalMIMEHeaderKey is like CanonicalMIMEHeaderKey but is +// allowed to mutate the provided byte slice before returning the +// string. +func canonicalMIMEHeaderKey(a []byte) string { + // Look for it in commonHeaders , so that we can avoid an + // allocation by sharing the strings among all users + // of textproto. If we don't find it, a has been canonicalized + // so just return string(a). upper := true - for i, v := range a { - if v == ' ' { + lo := 0 + hi := len(commonHeaders) + for i := 0; i < len(a); i++ { + // Canonicalize: first letter upper case + // and upper case after each dash. + // (Host, User-Agent, If-Modified-Since). + // MIME headers are ASCII only, so no Unicode issues. + if a[i] == ' ' { a[i] = '-' upper = true continue } - if upper && 'a' <= v && v <= 'z' { - a[i] = v + 'A' - 'a' + c := a[i] + if upper && 'a' <= c && c <= 'z' { + c -= toLower + } else if !upper && 'A' <= c && c <= 'Z' { + c += toLower } - if !upper && 'A' <= v && v <= 'Z' { - a[i] = v + 'a' - 'A' + a[i] = c + upper = c == '-' // for next time + + if lo < hi { + for lo < hi && (len(commonHeaders[lo]) <= i || commonHeaders[lo][i] < c) { + lo++ + } + for hi > lo && commonHeaders[hi-1][i] > c { + hi-- + } } - upper = v == '-' + } + if lo < hi && len(commonHeaders[lo]) == len(a) { + return commonHeaders[lo] } return string(a) } + +var commonHeaders = []string{ + "Accept", + "Accept-Charset", + "Accept-Encoding", + "Accept-Language", + "Accept-Ranges", + "Cache-Control", + "Cc", + "Connection", + "Content-Id", + "Content-Language", + "Content-Length", + "Content-Transfer-Encoding", + "Content-Type", + "Date", + "Dkim-Signature", + "Etag", + "Expires", + "From", + "Host", + "If-Modified-Since", + "If-None-Match", + "In-Reply-To", + "Last-Modified", + "Location", + "Message-Id", + "Mime-Version", + "Pragma", + "Received", + "Return-Path", + "Server", + "Set-Cookie", + "Subject", + "To", + "User-Agent", + "Via", + "X-Forwarded-For", + "X-Imforwards", + "X-Powered-By", +} |