summaryrefslogtreecommitdiff
path: root/src/pkg/http/request.go
diff options
context:
space:
mode:
Diffstat (limited to 'src/pkg/http/request.go')
-rw-r--r--src/pkg/http/request.go413
1 files changed, 413 insertions, 0 deletions
diff --git a/src/pkg/http/request.go b/src/pkg/http/request.go
new file mode 100644
index 000000000..76dd6f30c
--- /dev/null
+++ b/src/pkg/http/request.go
@@ -0,0 +1,413 @@
+// Copyright 2009 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// HTTP Request reading and parsing.
+
+// The http package implements parsing of HTTP requests and URLs
+// and provides an extensible HTTP server.
+//
+// In the future it should also implement parsing of HTTP replies
+// and provide methods to fetch URLs via HTTP.
+package http
+
+import (
+ "bufio";
+ "fmt";
+ "http";
+ "io";
+ "os";
+ "strconv";
+ "strings";
+)
+
+const (
+ maxLineLength = 1024; // assumed < bufio.DefaultBufSize
+ maxValueLength = 1024;
+ maxHeaderLines = 1024;
+)
+
+// HTTP request parsing errors.
+type ProtocolError struct {
+ os.ErrorString
+}
+var (
+ LineTooLong = &ProtocolError{"http header line too long"};
+ ValueTooLong = &ProtocolError{"http header value too long"};
+ HeaderTooLong = &ProtocolError{"http header too long"};
+ BadContentLength = &ProtocolError{"invalid content length"};
+ ShortEntityBody = &ProtocolError{"entity body too short"};
+ BadHeader = &ProtocolError{"malformed http header"};
+ BadRequest = &ProtocolError{"invalid http request"};
+ BadHTTPVersion = &ProtocolError{"unsupported http version"};
+)
+
+// A Request represents a parsed HTTP request header.
+type Request struct {
+ Method string; // GET, POST, PUT, etc.
+ RawUrl string; // The raw URL given in the request.
+ Url *URL; // Parsed URL.
+ Proto string; // "HTTP/1.0"
+ ProtoMajor int; // 1
+ ProtoMinor int; // 0
+
+ // A header mapping request lines to their values.
+ // If the header says
+ //
+ // Accept-Language: en-us
+ // accept-encoding: gzip, deflate
+ // Connection: keep-alive
+ //
+ // then
+ //
+ // Header = map[string]string{
+ // "Accept-Encoding": "en-us",
+ // "Accept-Language": "gzip, deflate",
+ // "Connection": "keep-alive"
+ // }
+ //
+ // HTTP defines that header names are case-insensitive.
+ // The request parser implements this by canonicalizing the
+ // name, making the first character and any characters
+ // following a hyphen uppercase and the rest lowercase.
+ Header map[string] string;
+
+ // The message body.
+ Body io.Reader;
+
+ // Whether to close the connection after replying to this request.
+ Close bool;
+
+ // The host on which the URL is sought.
+ // Per RFC 2616, this is either the value of the Host: header
+ // or the host name given in the URL itself.
+ Host string;
+
+ // The referring URL, if sent in the request.
+ //
+ // Referer is misspelled as in the request itself,
+ // a mistake from the earliest days of HTTP.
+ // This value can also be fetched from the Header map
+ // as Header["Referer"]; the benefit of making it
+ // available as a structure field is that the compiler
+ // can diagnose programs that use the alternate
+ // (correct English) spelling req.Referrer but cannot
+ // diagnose programs that use Header["Referrer"].
+ Referer string;
+
+ // The User-Agent: header string, if sent in the request.
+ UserAgent string;
+}
+
+// ProtoAtLeast returns whether the HTTP protocol used
+// in the request is at least major.minor.
+func (r *Request) ProtoAtLeast(major, minor int) bool {
+ return r.ProtoMajor > major ||
+ r.ProtoMajor == major && r.ProtoMinor >= minor
+}
+
+// Read a line of bytes (up to \n) from b.
+// Give up if the line exceeds maxLineLength.
+// The returned bytes are a pointer into storage in
+// the bufio, so they are only valid until the next bufio read.
+func readLineBytes(b *bufio.Reader) (p []byte, err os.Error) {
+ if p, err = b.ReadLineSlice('\n'); err != nil {
+ return nil, err
+ }
+ if len(p) >= maxLineLength {
+ return nil, LineTooLong
+ }
+
+ // Chop off trailing white space.
+ var i int;
+ for i = len(p); i > 0; i-- {
+ if c := p[i-1]; c != ' ' && c != '\r' && c != '\t' && c != '\n' {
+ break
+ }
+ }
+ return p[0:i], nil
+}
+
+// readLineBytes, but convert the bytes into a string.
+func readLine(b *bufio.Reader) (s string, err os.Error) {
+ p, e := readLineBytes(b);
+ if e != nil {
+ return "", e
+ }
+ return string(p), nil
+}
+
+// Read a key/value pair from b.
+// A key/value has the form Key: Value\r\n
+// and the Value can continue on multiple lines if each continuation line
+// starts with a space.
+func readKeyValue(b *bufio.Reader) (key, value string, err os.Error) {
+ line, e := readLineBytes(b);
+ if e != nil {
+ return "", "", e
+ }
+ if len(line) == 0 {
+ return "", "", nil
+ }
+
+ // Scan first line for colon.
+ for i := 0; i < len(line); i++ {
+ switch line[i] {
+ case ' ':
+ // Key field has space - no good.
+ return "", "", BadHeader;
+ case ':':
+ key = string(line[0:i]);
+ // Skip initial space before value.
+ for i++; i < len(line); i++ {
+ if line[i] != ' ' {
+ break
+ }
+ }
+ value = string(line[i:len(line)]);
+
+ // Look for extension lines, which must begin with space.
+ for {
+ var c byte;
+
+ if c, e = b.ReadByte(); e != nil {
+ return "", "", e
+ }
+ if c != ' ' {
+ // Not leading space; stop.
+ b.UnreadByte();
+ break
+ }
+
+ // Eat leading space.
+ for c == ' ' {
+ if c, e = b.ReadByte(); e != nil {
+ return "", "", e
+ }
+ }
+ b.UnreadByte();
+
+ // Read the rest of the line and add to value.
+ if line, e = readLineBytes(b); e != nil {
+ return "", "", e
+ }
+ value += " " + string(line);
+
+ if len(value) >= maxValueLength {
+ return "", "", ValueTooLong
+ }
+ }
+ return key, value, nil
+ }
+ }
+
+ // Line ended before space or colon.
+ return "", "", BadHeader;
+}
+
+// Convert decimal at s[i:len(s)] to integer,
+// returning value, string position where the digits stopped,
+// and whether there was a valid number (digits, not too big).
+func atoi(s string, i int) (n, i1 int, ok bool) {
+ const Big = 1000000;
+ if i >= len(s) || s[i] < '0' || s[i] > '9' {
+ return 0, 0, false
+ }
+ n = 0;
+ for ; i < len(s) && '0' <= s[i] && s[i] <= '9'; i++ {
+ n = n*10 + int(s[i]-'0');
+ if n > Big {
+ return 0, 0, false
+ }
+ }
+ return n, i, true
+}
+
+// Parse HTTP version: "HTTP/1.2" -> (1, 2, true).
+func parseHTTPVersion(vers string) (int, int, bool) {
+ if vers[0:5] != "HTTP/" {
+ return 0, 0, false
+ }
+ major, i, ok := atoi(vers, 5);
+ if !ok || i >= len(vers) || vers[i] != '.' {
+ return 0, 0, false
+ }
+ var minor int;
+ minor, i, ok = atoi(vers, i+1);
+ if !ok || i != len(vers) {
+ return 0, 0, false
+ }
+ return major, minor, true
+}
+
+var cmap = make(map[string]string)
+
+// CanonicalHeaderKey returns the canonical format of the
+// HTTP header key s. The canonicalization converts the first
+// letter and any letter following a hyphen to upper case;
+// the rest are converted to lowercase. For example, the
+// canonical key for "accept-encoding" is "Accept-Encoding".
+func CanonicalHeaderKey(s string) string {
+ if t, ok := cmap[s]; ok {
+ return t;
+ }
+
+ // canonicalize: first letter upper case
+ // and upper case after each dash.
+ // (Host, User-Agent, If-Modified-Since).
+ // HTTP headers are ASCII only, so no Unicode issues.
+ a := io.StringBytes(s);
+ upper := true;
+ for i,v := range a {
+ if upper && 'a' <= v && v <= 'z' {
+ a[i] = v + 'A' - 'a';
+ }
+ if !upper && 'A' <= v && v <= 'Z' {
+ a[i] = v + 'a' - 'A';
+ }
+ upper = false;
+ if v == '-' {
+ upper = true;
+ }
+ }
+ t := string(a);
+ cmap[s] = t;
+ return t;
+}
+
+// ReadRequest reads and parses a request from b.
+func ReadRequest(b *bufio.Reader) (req *Request, err os.Error) {
+ req = new(Request);
+
+ // First line: GET /index.html HTTP/1.0
+ var s string;
+ if s, err = readLine(b); err != nil {
+ return nil, err
+ }
+
+ var f []string;
+ if f = strings.Split(s, " "); len(f) != 3 {
+ return nil, BadRequest
+ }
+ req.Method, req.RawUrl, req.Proto = f[0], f[1], f[2];
+ var ok bool;
+ if req.ProtoMajor, req.ProtoMinor, ok = parseHTTPVersion(req.Proto); !ok {
+ return nil, BadHTTPVersion
+ }
+
+ if req.Url, err = ParseURL(req.RawUrl); err != nil {
+ return nil, err
+ }
+
+ // Subsequent lines: Key: value.
+ nheader := 0;
+ req.Header = make(map[string] string);
+ for {
+ var key, value string;
+ if key, value, err = readKeyValue(b); err != nil {
+ return nil, err
+ }
+ if key == "" {
+ break
+ }
+ if nheader++; nheader >= maxHeaderLines {
+ return nil, HeaderTooLong
+ }
+
+ key = CanonicalHeaderKey(key);
+
+ // RFC 2616 says that if you send the same header key
+ // multiple times, it has to be semantically equivalent
+ // to concatenating the values separated by commas.
+ oldvalue, present := req.Header[key];
+ if present {
+ req.Header[key] = oldvalue+","+value
+ } else {
+ req.Header[key] = value
+ }
+ }
+
+ // RFC2616: Must treat
+ // GET /index.html HTTP/1.1
+ // Host: www.google.com
+ // and
+ // GET http://www.google.com/index.html HTTP/1.1
+ // Host: doesntmatter
+ // the same. In the second case, any Host line is ignored.
+ if v, present := req.Header["Host"]; present && req.Url.Host == "" {
+ req.Host = v
+ }
+
+ // RFC2616: Should treat
+ // Pragma: no-cache
+ // like
+ // Cache-Control: no-cache
+ if v, present := req.Header["Pragma"]; present && v == "no-cache" {
+ if cc, presentcc := req.Header["Cache-Control"]; !presentcc {
+ req.Header["Cache-Control"] = "no-cache"
+ }
+ }
+
+ // Determine whether to hang up after sending the reply.
+ if req.ProtoMajor < 1 || (req.ProtoMajor == 1 && req.ProtoMinor < 1) {
+ req.Close = true
+ } else if v, present := req.Header["Connection"]; present {
+ // TODO: Should split on commas, toss surrounding white space,
+ // and check each field.
+ if v == "close" {
+ req.Close = true
+ }
+ }
+
+ // Pull out useful fields as a convenience to clients.
+ if v, present := req.Header["Referer"]; present {
+ req.Referer = v
+ }
+ if v, present := req.Header["User-Agent"]; present {
+ req.UserAgent = v
+ }
+
+ // TODO: Parse specific header values:
+ // Accept
+ // Accept-Encoding
+ // Accept-Language
+ // Authorization
+ // Cache-Control
+ // Connection
+ // Date
+ // Expect
+ // From
+ // If-Match
+ // If-Modified-Since
+ // If-None-Match
+ // If-Range
+ // If-Unmodified-Since
+ // Max-Forwards
+ // Proxy-Authorization
+ // Referer [sic]
+ // TE (transfer-codings)
+ // Trailer
+ // Transfer-Encoding
+ // Upgrade
+ // User-Agent
+ // Via
+ // Warning
+
+ // A message body exists when either Content-Length or Transfer-Encoding
+ // headers are present. TODO: Handle Transfer-Encoding.
+ if v, present := req.Header["Content-Length"]; present {
+ length, err := strconv.Btoui64(v, 10);
+ if err != nil {
+ return nil, BadContentLength
+ }
+ // TODO: limit the Content-Length. This is an easy DoS vector.
+ raw := make([]byte, length);
+ n, err := b.Read(raw);
+ if err != nil || uint64(n) < length {
+ return nil, ShortEntityBody
+ }
+ req.Body = io.NewByteReader(raw);
+ }
+
+ return req, nil
+}