diff options
author | Ondřej Surý <ondrej@sury.org> | 2011-09-13 13:13:40 +0200 |
---|---|---|
committer | Ondřej Surý <ondrej@sury.org> | 2011-09-13 13:13:40 +0200 |
commit | 5ff4c17907d5b19510a62e08fd8d3b11e62b431d (patch) | |
tree | c0650497e988f47be9c6f2324fa692a52dea82e1 /src/pkg/url | |
parent | 80f18fc933cf3f3e829c5455a1023d69f7b86e52 (diff) | |
download | golang-upstream/60.tar.gz |
Imported Upstream version 60upstream/60
Diffstat (limited to 'src/pkg/url')
-rw-r--r-- | src/pkg/url/Makefile | 11 | ||||
-rw-r--r-- | src/pkg/url/url.go | 677 | ||||
-rw-r--r-- | src/pkg/url/url_test.go | 698 |
3 files changed, 1386 insertions, 0 deletions
diff --git a/src/pkg/url/Makefile b/src/pkg/url/Makefile new file mode 100644 index 000000000..b9267bd08 --- /dev/null +++ b/src/pkg/url/Makefile @@ -0,0 +1,11 @@ +# Copyright 2009 The Go Authors. All rights reserved. +# Use of this source code is governed by a BSD-style +# license that can be found in the LICENSE file. + +include ../../Make.inc + +TARG=url +GOFILES=\ + url.go\ + +include ../../Make.pkg diff --git a/src/pkg/url/url.go b/src/pkg/url/url.go new file mode 100644 index 000000000..d07b01611 --- /dev/null +++ b/src/pkg/url/url.go @@ -0,0 +1,677 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Package URL parses URLs and implements query escaping. +// See RFC 3986. +package url + +import ( + "os" + "strconv" + "strings" +) + +// Error reports an error and the operation and URL that caused it. +type Error struct { + Op string + URL string + Error os.Error +} + +func (e *Error) String() string { return e.Op + " " + e.URL + ": " + e.Error.String() } + +func ishex(c byte) bool { + switch { + case '0' <= c && c <= '9': + return true + case 'a' <= c && c <= 'f': + return true + case 'A' <= c && c <= 'F': + return true + } + return false +} + +func unhex(c byte) byte { + switch { + case '0' <= c && c <= '9': + return c - '0' + case 'a' <= c && c <= 'f': + return c - 'a' + 10 + case 'A' <= c && c <= 'F': + return c - 'A' + 10 + } + return 0 +} + +type encoding int + +const ( + encodePath encoding = 1 + iota + encodeUserPassword + encodeQueryComponent + encodeFragment + encodeOpaque +) + +type EscapeError string + +func (e EscapeError) String() string { + return "invalid URL escape " + strconv.Quote(string(e)) +} + +// Return true if the specified character should be escaped when +// appearing in a URL string, according to RFC 2396. +// When 'all' is true the full range of reserved characters are matched. +func shouldEscape(c byte, mode encoding) bool { + // RFC 2396 §2.3 Unreserved characters (alphanum) + if 'A' <= c && c <= 'Z' || 'a' <= c && c <= 'z' || '0' <= c && c <= '9' { + return false + } + switch c { + case '-', '_', '.', '!', '~', '*', '\'', '(', ')': // §2.3 Unreserved characters (mark) + return false + + case '$', '&', '+', ',', '/', ':', ';', '=', '?', '@': // §2.2 Reserved characters (reserved) + // Different sections of the URL allow a few of + // the reserved characters to appear unescaped. + switch mode { + case encodePath: // §3.3 + // The RFC allows : @ & = + $ , but saves / ; for assigning + // meaning to individual path segments. This package + // only manipulates the path as a whole, so we allow those + // last two as well. Clients that need to distinguish between + // `/foo;y=z/bar` and `/foo%3by=z/bar` will have to re-decode RawPath. + // That leaves only ? to escape. + return c == '?' + + case encodeUserPassword: // §3.2.2 + // The RFC allows ; : & = + $ , in userinfo, so we must escape only @ and /. + // The parsing of userinfo treats : as special so we must escape that too. + return c == '@' || c == '/' || c == ':' + + case encodeQueryComponent: // §3.4 + // The RFC reserves (so we must escape) everything. + return true + + case encodeFragment: // §4.1 + // The RFC text is silent but the grammar allows + // everything, so escape nothing. + return false + + case encodeOpaque: // §3 opaque_part + // The RFC allows opaque_part to use all characters + // except that the leading / must be escaped. + // (We implement that case in String.) + return false + } + } + + // Everything else must be escaped. + return true +} + +// QueryUnescape does the inverse transformation of QueryEscape, converting +// %AB into the byte 0xAB and '+' into ' ' (space). It returns an error if +// any % is not followed by two hexadecimal digits. +func QueryUnescape(s string) (string, os.Error) { + return unescape(s, encodeQueryComponent) +} + +// unescape unescapes a string; the mode specifies +// which section of the URL string is being unescaped. +func unescape(s string, mode encoding) (string, os.Error) { + // Count %, check that they're well-formed. + n := 0 + hasPlus := false + for i := 0; i < len(s); { + switch s[i] { + case '%': + n++ + if i+2 >= len(s) || !ishex(s[i+1]) || !ishex(s[i+2]) { + s = s[i:] + if len(s) > 3 { + s = s[0:3] + } + return "", EscapeError(s) + } + i += 3 + case '+': + hasPlus = mode == encodeQueryComponent + i++ + default: + i++ + } + } + + if n == 0 && !hasPlus { + return s, nil + } + + t := make([]byte, len(s)-2*n) + j := 0 + for i := 0; i < len(s); { + switch s[i] { + case '%': + t[j] = unhex(s[i+1])<<4 | unhex(s[i+2]) + j++ + i += 3 + case '+': + if mode == encodeQueryComponent { + t[j] = ' ' + } else { + t[j] = '+' + } + j++ + i++ + default: + t[j] = s[i] + j++ + i++ + } + } + return string(t), nil +} + +// QueryEscape escapes the string so it can be safely placed +// inside a URL query. +func QueryEscape(s string) string { + return escape(s, encodeQueryComponent) +} + +func escape(s string, mode encoding) string { + spaceCount, hexCount := 0, 0 + for i := 0; i < len(s); i++ { + c := s[i] + if shouldEscape(c, mode) { + if c == ' ' && mode == encodeQueryComponent { + spaceCount++ + } else { + hexCount++ + } + } + } + + if spaceCount == 0 && hexCount == 0 { + return s + } + + t := make([]byte, len(s)+2*hexCount) + j := 0 + for i := 0; i < len(s); i++ { + switch c := s[i]; { + case c == ' ' && mode == encodeQueryComponent: + t[j] = '+' + j++ + case shouldEscape(c, mode): + t[j] = '%' + t[j+1] = "0123456789ABCDEF"[c>>4] + t[j+2] = "0123456789ABCDEF"[c&15] + j += 3 + default: + t[j] = s[i] + j++ + } + } + return string(t) +} + +// UnescapeUserinfo parses the RawUserinfo field of a URL +// as the form user or user:password and unescapes and returns +// the two halves. +// +// This functionality should only be used with legacy web sites. +// RFC 2396 warns that interpreting Userinfo this way +// ``is NOT RECOMMENDED, because the passing of authentication +// information in clear text (such as URI) has proven to be a +// security risk in almost every case where it has been used.'' +func UnescapeUserinfo(rawUserinfo string) (user, password string, err os.Error) { + u, p := split(rawUserinfo, ':', true) + if user, err = unescape(u, encodeUserPassword); err != nil { + return "", "", err + } + if password, err = unescape(p, encodeUserPassword); err != nil { + return "", "", err + } + return +} + +// EscapeUserinfo combines user and password in the form +// user:password (or just user if password is empty) and then +// escapes it for use as the URL.RawUserinfo field. +// +// This functionality should only be used with legacy web sites. +// RFC 2396 warns that interpreting Userinfo this way +// ``is NOT RECOMMENDED, because the passing of authentication +// information in clear text (such as URI) has proven to be a +// security risk in almost every case where it has been used.'' +func EscapeUserinfo(user, password string) string { + raw := escape(user, encodeUserPassword) + if password != "" { + raw += ":" + escape(password, encodeUserPassword) + } + return raw +} + +// A URL represents a parsed URL (technically, a URI reference). +// The general form represented is: +// scheme://[userinfo@]host/path[?query][#fragment] +// The Raw, RawAuthority, RawPath, and RawQuery fields are in "wire format" +// (special characters must be hex-escaped if not meant to have special meaning). +// All other fields are logical values; '+' or '%' represent themselves. +// +// The various Raw values are supplied in wire format because +// clients typically have to split them into pieces before further +// decoding. +type URL struct { + Raw string // the original string + Scheme string // scheme + RawAuthority string // [userinfo@]host + RawUserinfo string // userinfo + Host string // host + RawPath string // /path[?query][#fragment] + Path string // /path + OpaquePath bool // path is opaque (unrooted when scheme is present) + RawQuery string // query + Fragment string // fragment +} + +// Maybe rawurl is of the form scheme:path. +// (Scheme must be [a-zA-Z][a-zA-Z0-9+-.]*) +// If so, return scheme, path; else return "", rawurl. +func getscheme(rawurl string) (scheme, path string, err os.Error) { + for i := 0; i < len(rawurl); i++ { + c := rawurl[i] + switch { + case 'a' <= c && c <= 'z' || 'A' <= c && c <= 'Z': + // do nothing + case '0' <= c && c <= '9' || c == '+' || c == '-' || c == '.': + if i == 0 { + return "", rawurl, nil + } + case c == ':': + if i == 0 { + return "", "", os.NewError("missing protocol scheme") + } + return rawurl[0:i], rawurl[i+1:], nil + default: + // we have encountered an invalid character, + // so there is no valid scheme + return "", rawurl, nil + } + } + return "", rawurl, nil +} + +// Maybe s is of the form t c u. +// If so, return t, c u (or t, u if cutc == true). +// If not, return s, "". +func split(s string, c byte, cutc bool) (string, string) { + for i := 0; i < len(s); i++ { + if s[i] == c { + if cutc { + return s[0:i], s[i+1:] + } + return s[0:i], s[i:] + } + } + return s, "" +} + +// Parse parses rawurl into a URL structure. +// The string rawurl is assumed not to have a #fragment suffix. +// (Web browsers strip #fragment before sending the URL to a web server.) +// The rawurl may be relative or absolute. +func Parse(rawurl string) (url *URL, err os.Error) { + return parse(rawurl, false) +} + +// ParseRequest parses rawurl into a URL structure. It assumes that +// rawurl was received from an HTTP request, so the rawurl is interpreted +// only as an absolute URI or an absolute path. +// The string rawurl is assumed not to have a #fragment suffix. +// (Web browsers strip #fragment before sending the URL to a web server.) +func ParseRequest(rawurl string) (url *URL, err os.Error) { + return parse(rawurl, true) +} + +// parse parses a URL from a string in one of two contexts. If +// viaRequest is true, the URL is assumed to have arrived via an HTTP request, +// in which case only absolute URLs or path-absolute relative URLs are allowed. +// If viaRequest is false, all forms of relative URLs are allowed. +func parse(rawurl string, viaRequest bool) (url *URL, err os.Error) { + var ( + leadingSlash bool + path string + ) + + if rawurl == "" { + err = os.NewError("empty url") + goto Error + } + url = new(URL) + url.Raw = rawurl + + // Split off possible leading "http:", "mailto:", etc. + // Cannot contain escaped characters. + if url.Scheme, path, err = getscheme(rawurl); err != nil { + goto Error + } + leadingSlash = strings.HasPrefix(path, "/") + + if url.Scheme != "" && !leadingSlash { + // RFC 2396: + // Absolute URI (has scheme) with non-rooted path + // is uninterpreted. It doesn't even have a ?query. + // This is the case that handles mailto:name@example.com. + url.RawPath = path + + if url.Path, err = unescape(path, encodeOpaque); err != nil { + goto Error + } + url.OpaquePath = true + } else { + if viaRequest && !leadingSlash { + err = os.NewError("invalid URI for request") + goto Error + } + + // Split off query before parsing path further. + url.RawPath = path + path, query := split(path, '?', false) + if len(query) > 1 { + url.RawQuery = query[1:] + } + + // Maybe path is //authority/path + if (url.Scheme != "" || !viaRequest) && + strings.HasPrefix(path, "//") && !strings.HasPrefix(path, "///") { + url.RawAuthority, path = split(path[2:], '/', false) + url.RawPath = url.RawPath[2+len(url.RawAuthority):] + } + + // Split authority into userinfo@host. + // If there's no @, split's default is wrong. Check explicitly. + var rawHost string + if strings.Index(url.RawAuthority, "@") < 0 { + rawHost = url.RawAuthority + } else { + url.RawUserinfo, rawHost = split(url.RawAuthority, '@', true) + } + + // We leave RawAuthority only in raw form because clients + // of common protocols should be using Userinfo and Host + // instead. Clients that wish to use RawAuthority will have to + // interpret it themselves: RFC 2396 does not define the meaning. + + if strings.Contains(rawHost, "%") { + // Host cannot contain escaped characters. + err = os.NewError("hexadecimal escape in host") + goto Error + } + url.Host = rawHost + + if url.Path, err = unescape(path, encodePath); err != nil { + goto Error + } + } + return url, nil + +Error: + return nil, &Error{"parse", rawurl, err} + +} + +// ParseWithReference is like Parse but allows a trailing #fragment. +func ParseWithReference(rawurlref string) (url *URL, err os.Error) { + // Cut off #frag. + rawurl, frag := split(rawurlref, '#', false) + if url, err = Parse(rawurl); err != nil { + return nil, err + } + url.Raw += frag + url.RawPath += frag + if len(frag) > 1 { + frag = frag[1:] + if url.Fragment, err = unescape(frag, encodeFragment); err != nil { + return nil, &Error{"parse", rawurl, err} + } + } + return url, nil +} + +// String reassembles url into a valid URL string. +// +// There are redundant fields stored in the URL structure: +// the String method consults Scheme, Path, Host, RawUserinfo, +// RawQuery, and Fragment, but not Raw, RawPath or RawAuthority. +func (url *URL) String() string { + result := "" + if url.Scheme != "" { + result += url.Scheme + ":" + } + if url.Host != "" || url.RawUserinfo != "" { + result += "//" + if url.RawUserinfo != "" { + // hide the password, if any + info := url.RawUserinfo + if i := strings.Index(info, ":"); i >= 0 { + info = info[0:i] + ":******" + } + result += info + "@" + } + result += url.Host + } + if url.OpaquePath { + path := url.Path + if strings.HasPrefix(path, "/") { + result += "%2f" + path = path[1:] + } + result += escape(path, encodeOpaque) + } else { + result += escape(url.Path, encodePath) + } + if url.RawQuery != "" { + result += "?" + url.RawQuery + } + if url.Fragment != "" { + result += "#" + escape(url.Fragment, encodeFragment) + } + return result +} + +// Values maps a string key to a list of values. +// It is typically used for query parameters and form values. +// Unlike in the http.Header map, the keys in a Values map +// are case-sensitive. +type Values map[string][]string + +// Get gets the first value associated with the given key. +// If there are no values associated with the key, Get returns +// the empty string. To access multiple values, use the map +// directly. +func (v Values) Get(key string) string { + if v == nil { + return "" + } + vs, ok := v[key] + if !ok || len(vs) == 0 { + return "" + } + return vs[0] +} + +// Set sets the key to value. It replaces any existing +// values. +func (v Values) Set(key, value string) { + v[key] = []string{value} +} + +// Add adds the key to value. It appends to any existing +// values associated with key. +func (v Values) Add(key, value string) { + v[key] = append(v[key], value) +} + +// Del deletes the values associated with key. +func (v Values) Del(key string) { + v[key] = nil, false +} + +// ParseQuery parses the URL-encoded query string and returns +// a map listing the values specified for each key. +// ParseQuery always returns a non-nil map containing all the +// valid query parameters found; err describes the first decoding error +// encountered, if any. +func ParseQuery(query string) (m Values, err os.Error) { + m = make(Values) + err = parseQuery(m, query) + return +} + +func parseQuery(m Values, query string) (err os.Error) { + for _, kv := range strings.Split(query, "&") { + if len(kv) == 0 { + continue + } + kvPair := strings.SplitN(kv, "=", 2) + + var key, value string + var e os.Error + key, e = QueryUnescape(kvPair[0]) + if e == nil && len(kvPair) > 1 { + value, e = QueryUnescape(kvPair[1]) + } + if e != nil { + err = e + continue + } + m[key] = append(m[key], value) + } + return err +} + +// Encode encodes the values into ``URL encoded'' form. +// e.g. "foo=bar&bar=baz" +func (v Values) Encode() string { + if v == nil { + return "" + } + parts := make([]string, 0, len(v)) // will be large enough for most uses + for k, vs := range v { + prefix := QueryEscape(k) + "=" + for _, v := range vs { + parts = append(parts, prefix+QueryEscape(v)) + } + } + return strings.Join(parts, "&") +} + +// resolvePath applies special path segments from refs and applies +// them to base, per RFC 2396. +func resolvePath(basepath string, refpath string) string { + base := strings.Split(basepath, "/") + refs := strings.Split(refpath, "/") + if len(base) == 0 { + base = []string{""} + } + for idx, ref := range refs { + switch { + case ref == ".": + base[len(base)-1] = "" + case ref == "..": + newLen := len(base) - 1 + if newLen < 1 { + newLen = 1 + } + base = base[0:newLen] + base[len(base)-1] = "" + default: + if idx == 0 || base[len(base)-1] == "" { + base[len(base)-1] = ref + } else { + base = append(base, ref) + } + } + } + return strings.Join(base, "/") +} + +// IsAbs returns true if the URL is absolute. +func (url *URL) IsAbs() bool { + return url.Scheme != "" +} + +// Parse parses a URL in the context of a base URL. The URL in ref +// may be relative or absolute. Parse returns nil, err on parse +// failure, otherwise its return value is the same as ResolveReference. +func (base *URL) Parse(ref string) (*URL, os.Error) { + refurl, err := Parse(ref) + if err != nil { + return nil, err + } + return base.ResolveReference(refurl), nil +} + +// ResolveReference resolves a URI reference to an absolute URI from +// an absolute base URI, per RFC 2396 Section 5.2. The URI reference +// may be relative or absolute. ResolveReference always returns a new +// URL instance, even if the returned URL is identical to either the +// base or reference. If ref is an absolute URL, then ResolveReference +// ignores base and returns a copy of ref. +func (base *URL) ResolveReference(ref *URL) *URL { + url := new(URL) + switch { + case ref.IsAbs(): + *url = *ref + default: + // relativeURI = ( net_path | abs_path | rel_path ) [ "?" query ] + *url = *base + if ref.RawAuthority != "" { + // The "net_path" case. + url.RawAuthority = ref.RawAuthority + url.Host = ref.Host + url.RawUserinfo = ref.RawUserinfo + } + switch { + case url.OpaquePath: + url.Path = ref.Path + url.RawPath = ref.RawPath + url.RawQuery = ref.RawQuery + case strings.HasPrefix(ref.Path, "/"): + // The "abs_path" case. + url.Path = ref.Path + url.RawPath = ref.RawPath + url.RawQuery = ref.RawQuery + default: + // The "rel_path" case. + path := resolvePath(base.Path, ref.Path) + if !strings.HasPrefix(path, "/") { + path = "/" + path + } + url.Path = path + url.RawPath = url.Path + url.RawQuery = ref.RawQuery + if ref.RawQuery != "" { + url.RawPath += "?" + url.RawQuery + } + } + + url.Fragment = ref.Fragment + } + url.Raw = url.String() + return url +} + +// Query parses RawQuery and returns the corresponding values. +func (u *URL) Query() Values { + v, _ := ParseQuery(u.RawQuery) + return v +} + +// EncodedPath returns the URL's path in "URL path encoded" form. +func (u *URL) EncodedPath() string { + return escape(u.Path, encodePath) +} diff --git a/src/pkg/url/url_test.go b/src/pkg/url/url_test.go new file mode 100644 index 000000000..af394d4fb --- /dev/null +++ b/src/pkg/url/url_test.go @@ -0,0 +1,698 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package url + +import ( + "fmt" + "os" + "reflect" + "testing" +) + +// TODO(rsc): +// test Unescape +// test Escape +// test Parse + +type URLTest struct { + in string + out *URL + roundtrip string // expected result of reserializing the URL; empty means same as "in". +} + +var urltests = []URLTest{ + // no path + { + "http://www.google.com", + &URL{ + Raw: "http://www.google.com", + Scheme: "http", + RawAuthority: "www.google.com", + Host: "www.google.com", + }, + "", + }, + // path + { + "http://www.google.com/", + &URL{ + Raw: "http://www.google.com/", + Scheme: "http", + RawAuthority: "www.google.com", + Host: "www.google.com", + RawPath: "/", + Path: "/", + }, + "", + }, + // path with hex escaping + { + "http://www.google.com/file%20one%26two", + &URL{ + Raw: "http://www.google.com/file%20one%26two", + Scheme: "http", + RawAuthority: "www.google.com", + Host: "www.google.com", + RawPath: "/file%20one%26two", + Path: "/file one&two", + }, + "http://www.google.com/file%20one&two", + }, + // user + { + "ftp://webmaster@www.google.com/", + &URL{ + Raw: "ftp://webmaster@www.google.com/", + Scheme: "ftp", + RawAuthority: "webmaster@www.google.com", + RawUserinfo: "webmaster", + Host: "www.google.com", + RawPath: "/", + Path: "/", + }, + "", + }, + // escape sequence in username + { + "ftp://john%20doe@www.google.com/", + &URL{ + Raw: "ftp://john%20doe@www.google.com/", + Scheme: "ftp", + RawAuthority: "john%20doe@www.google.com", + RawUserinfo: "john%20doe", + Host: "www.google.com", + RawPath: "/", + Path: "/", + }, + "ftp://john%20doe@www.google.com/", + }, + // query + { + "http://www.google.com/?q=go+language", + &URL{ + Raw: "http://www.google.com/?q=go+language", + Scheme: "http", + RawAuthority: "www.google.com", + Host: "www.google.com", + RawPath: "/?q=go+language", + Path: "/", + RawQuery: "q=go+language", + }, + "", + }, + // query with hex escaping: NOT parsed + { + "http://www.google.com/?q=go%20language", + &URL{ + Raw: "http://www.google.com/?q=go%20language", + Scheme: "http", + RawAuthority: "www.google.com", + Host: "www.google.com", + RawPath: "/?q=go%20language", + Path: "/", + RawQuery: "q=go%20language", + }, + "", + }, + // %20 outside query + { + "http://www.google.com/a%20b?q=c+d", + &URL{ + Raw: "http://www.google.com/a%20b?q=c+d", + Scheme: "http", + RawAuthority: "www.google.com", + Host: "www.google.com", + RawPath: "/a%20b?q=c+d", + Path: "/a b", + RawQuery: "q=c+d", + }, + "", + }, + // path without leading /, so no query parsing + { + "http:www.google.com/?q=go+language", + &URL{ + Raw: "http:www.google.com/?q=go+language", + Scheme: "http", + RawPath: "www.google.com/?q=go+language", + Path: "www.google.com/?q=go+language", + OpaquePath: true, + }, + "http:www.google.com/?q=go+language", + }, + // path without leading /, so no query parsing + { + "http:%2f%2fwww.google.com/?q=go+language", + &URL{ + Raw: "http:%2f%2fwww.google.com/?q=go+language", + Scheme: "http", + RawPath: "%2f%2fwww.google.com/?q=go+language", + Path: "//www.google.com/?q=go+language", + OpaquePath: true, + }, + "http:%2f/www.google.com/?q=go+language", + }, + // non-authority + { + "mailto:/webmaster@golang.org", + &URL{ + Raw: "mailto:/webmaster@golang.org", + Scheme: "mailto", + RawPath: "/webmaster@golang.org", + Path: "/webmaster@golang.org", + }, + "", + }, + // non-authority + { + "mailto:webmaster@golang.org", + &URL{ + Raw: "mailto:webmaster@golang.org", + Scheme: "mailto", + RawPath: "webmaster@golang.org", + Path: "webmaster@golang.org", + OpaquePath: true, + }, + "", + }, + // unescaped :// in query should not create a scheme + { + "/foo?query=http://bad", + &URL{ + Raw: "/foo?query=http://bad", + RawPath: "/foo?query=http://bad", + Path: "/foo", + RawQuery: "query=http://bad", + }, + "", + }, + // leading // without scheme should create an authority + { + "//foo", + &URL{ + RawAuthority: "foo", + Raw: "//foo", + Host: "foo", + Scheme: "", + RawPath: "", + Path: "", + }, + "", + }, + // leading // without scheme, with userinfo, path, and query + { + "//user@foo/path?a=b", + &URL{ + Raw: "//user@foo/path?a=b", + RawAuthority: "user@foo", + RawUserinfo: "user", + Scheme: "", + RawPath: "/path?a=b", + Path: "/path", + RawQuery: "a=b", + Host: "foo", + }, + "", + }, + // Three leading slashes isn't an authority, but doesn't return an error. + // (We can't return an error, as this code is also used via + // ServeHTTP -> ReadRequest -> Parse, which is arguably a + // different URL parsing context, but currently shares the + // same codepath) + { + "///threeslashes", + &URL{ + RawAuthority: "", + Raw: "///threeslashes", + Host: "", + Scheme: "", + RawPath: "///threeslashes", + Path: "///threeslashes", + }, + "", + }, + { + "http://user:password@google.com", + &URL{ + Raw: "http://user:password@google.com", + Scheme: "http", + RawAuthority: "user:password@google.com", + RawUserinfo: "user:password", + Host: "google.com", + }, + "http://user:******@google.com", + }, + { + "http://user:longerpass@google.com", + &URL{ + Raw: "http://user:longerpass@google.com", + Scheme: "http", + RawAuthority: "user:longerpass@google.com", + RawUserinfo: "user:longerpass", + Host: "google.com", + }, + "http://user:******@google.com", + }, +} + +var urlnofragtests = []URLTest{ + { + "http://www.google.com/?q=go+language#foo", + &URL{ + Raw: "http://www.google.com/?q=go+language#foo", + Scheme: "http", + RawAuthority: "www.google.com", + Host: "www.google.com", + RawPath: "/?q=go+language#foo", + Path: "/", + RawQuery: "q=go+language#foo", + }, + "", + }, +} + +var urlfragtests = []URLTest{ + { + "http://www.google.com/?q=go+language#foo", + &URL{ + Raw: "http://www.google.com/?q=go+language#foo", + Scheme: "http", + RawAuthority: "www.google.com", + Host: "www.google.com", + RawPath: "/?q=go+language#foo", + Path: "/", + RawQuery: "q=go+language", + Fragment: "foo", + }, + "", + }, + { + "http://www.google.com/?q=go+language#foo%26bar", + &URL{ + Raw: "http://www.google.com/?q=go+language#foo%26bar", + Scheme: "http", + RawAuthority: "www.google.com", + Host: "www.google.com", + RawPath: "/?q=go+language#foo%26bar", + Path: "/", + RawQuery: "q=go+language", + Fragment: "foo&bar", + }, + "http://www.google.com/?q=go+language#foo&bar", + }, +} + +// more useful string for debugging than fmt's struct printer +func ufmt(u *URL) string { + return fmt.Sprintf("raw=%q, scheme=%q, rawpath=%q, auth=%q, userinfo=%q, host=%q, path=%q, rawq=%q, frag=%q", + u.Raw, u.Scheme, u.RawPath, u.RawAuthority, u.RawUserinfo, + u.Host, u.Path, u.RawQuery, u.Fragment) +} + +func DoTest(t *testing.T, parse func(string) (*URL, os.Error), name string, tests []URLTest) { + for _, tt := range tests { + u, err := parse(tt.in) + if err != nil { + t.Errorf("%s(%q) returned error %s", name, tt.in, err) + continue + } + if !reflect.DeepEqual(u, tt.out) { + t.Errorf("%s(%q):\n\thave %v\n\twant %v\n", + name, tt.in, ufmt(u), ufmt(tt.out)) + } + } +} + +func TestParse(t *testing.T) { + DoTest(t, Parse, "Parse", urltests) + DoTest(t, Parse, "Parse", urlnofragtests) +} + +func TestParseWithReference(t *testing.T) { + DoTest(t, ParseWithReference, "ParseWithReference", urltests) + DoTest(t, ParseWithReference, "ParseWithReference", urlfragtests) +} + +const pathThatLooksSchemeRelative = "//not.a.user@not.a.host/just/a/path" + +var parseRequestUrlTests = []struct { + url string + expectedValid bool +}{ + {"http://foo.com", true}, + {"http://foo.com/", true}, + {"http://foo.com/path", true}, + {"/", true}, + {pathThatLooksSchemeRelative, true}, + {"//not.a.user@%66%6f%6f.com/just/a/path/also", true}, + {"foo.html", false}, + {"../dir/", false}, +} + +func TestParseRequest(t *testing.T) { + for _, test := range parseRequestUrlTests { + _, err := ParseRequest(test.url) + valid := err == nil + if valid != test.expectedValid { + t.Errorf("Expected valid=%v for %q; got %v", test.expectedValid, test.url, valid) + } + } + + url, err := ParseRequest(pathThatLooksSchemeRelative) + if err != nil { + t.Fatalf("Unexpected error %v", err) + } + if url.Path != pathThatLooksSchemeRelative { + t.Errorf("Expected path %q; got %q", pathThatLooksSchemeRelative, url.Path) + } +} + +func DoTestString(t *testing.T, parse func(string) (*URL, os.Error), name string, tests []URLTest) { + for _, tt := range tests { + u, err := parse(tt.in) + if err != nil { + t.Errorf("%s(%q) returned error %s", name, tt.in, err) + continue + } + s := u.String() + expected := tt.in + if len(tt.roundtrip) > 0 { + expected = tt.roundtrip + } + if s != expected { + t.Errorf("%s(%q).String() == %q (expected %q)", name, tt.in, s, expected) + } + } +} + +func TestURLString(t *testing.T) { + DoTestString(t, Parse, "Parse", urltests) + DoTestString(t, Parse, "Parse", urlnofragtests) + DoTestString(t, ParseWithReference, "ParseWithReference", urltests) + DoTestString(t, ParseWithReference, "ParseWithReference", urlfragtests) +} + +type EscapeTest struct { + in string + out string + err os.Error +} + +var unescapeTests = []EscapeTest{ + { + "", + "", + nil, + }, + { + "abc", + "abc", + nil, + }, + { + "1%41", + "1A", + nil, + }, + { + "1%41%42%43", + "1ABC", + nil, + }, + { + "%4a", + "J", + nil, + }, + { + "%6F", + "o", + nil, + }, + { + "%", // not enough characters after % + "", + EscapeError("%"), + }, + { + "%a", // not enough characters after % + "", + EscapeError("%a"), + }, + { + "%1", // not enough characters after % + "", + EscapeError("%1"), + }, + { + "123%45%6", // not enough characters after % + "", + EscapeError("%6"), + }, + { + "%zzzzz", // invalid hex digits + "", + EscapeError("%zz"), + }, +} + +func TestUnescape(t *testing.T) { + for _, tt := range unescapeTests { + actual, err := QueryUnescape(tt.in) + if actual != tt.out || (err != nil) != (tt.err != nil) { + t.Errorf("QueryUnescape(%q) = %q, %s; want %q, %s", tt.in, actual, err, tt.out, tt.err) + } + } +} + +var escapeTests = []EscapeTest{ + { + "", + "", + nil, + }, + { + "abc", + "abc", + nil, + }, + { + "one two", + "one+two", + nil, + }, + { + "10%", + "10%25", + nil, + }, + { + " ?&=#+%!<>#\"{}|\\^[]`☺\t", + "+%3F%26%3D%23%2B%25!%3C%3E%23%22%7B%7D%7C%5C%5E%5B%5D%60%E2%98%BA%09", + nil, + }, +} + +func TestEscape(t *testing.T) { + for _, tt := range escapeTests { + actual := QueryEscape(tt.in) + if tt.out != actual { + t.Errorf("QueryEscape(%q) = %q, want %q", tt.in, actual, tt.out) + } + + // for bonus points, verify that escape:unescape is an identity. + roundtrip, err := QueryUnescape(actual) + if roundtrip != tt.in || err != nil { + t.Errorf("QueryUnescape(%q) = %q, %s; want %q, %s", actual, roundtrip, err, tt.in, "[no error]") + } + } +} + +type UserinfoTest struct { + User string + Password string + Raw string +} + +var userinfoTests = []UserinfoTest{ + {"user", "password", "user:password"}, + {"foo:bar", "~!@#$%^&*()_+{}|[]\\-=`:;'\"<>?,./", + "foo%3Abar:~!%40%23$%25%5E&*()_+%7B%7D%7C%5B%5D%5C-=%60%3A;'%22%3C%3E?,.%2F"}, +} + +func TestEscapeUserinfo(t *testing.T) { + for _, tt := range userinfoTests { + if raw := EscapeUserinfo(tt.User, tt.Password); raw != tt.Raw { + t.Errorf("EscapeUserinfo(%q, %q) = %q, want %q", tt.User, tt.Password, raw, tt.Raw) + } + } +} + +func TestUnescapeUserinfo(t *testing.T) { + for _, tt := range userinfoTests { + if user, pass, err := UnescapeUserinfo(tt.Raw); user != tt.User || pass != tt.Password || err != nil { + t.Errorf("UnescapeUserinfo(%q) = %q, %q, %v, want %q, %q, nil", tt.Raw, user, pass, err, tt.User, tt.Password) + } + } +} + +type EncodeQueryTest struct { + m Values + expected string + expected1 string +} + +var encodeQueryTests = []EncodeQueryTest{ + {nil, "", ""}, + {Values{"q": {"puppies"}, "oe": {"utf8"}}, "q=puppies&oe=utf8", "oe=utf8&q=puppies"}, + {Values{"q": {"dogs", "&", "7"}}, "q=dogs&q=%26&q=7", "q=dogs&q=%26&q=7"}, +} + +func TestEncodeQuery(t *testing.T) { + for _, tt := range encodeQueryTests { + if q := tt.m.Encode(); q != tt.expected && q != tt.expected1 { + t.Errorf(`EncodeQuery(%+v) = %q, want %q`, tt.m, q, tt.expected) + } + } +} + +var resolvePathTests = []struct { + base, ref, expected string +}{ + {"a/b", ".", "a/"}, + {"a/b", "c", "a/c"}, + {"a/b", "..", ""}, + {"a/", "..", ""}, + {"a/", "../..", ""}, + {"a/b/c", "..", "a/"}, + {"a/b/c", "../d", "a/d"}, + {"a/b/c", ".././d", "a/d"}, + {"a/b", "./..", ""}, + {"a/./b", ".", "a/./"}, + {"a/../", ".", "a/../"}, + {"a/.././b", "c", "a/.././c"}, +} + +func TestResolvePath(t *testing.T) { + for _, test := range resolvePathTests { + got := resolvePath(test.base, test.ref) + if got != test.expected { + t.Errorf("For %q + %q got %q; expected %q", test.base, test.ref, got, test.expected) + } + } +} + +var resolveReferenceTests = []struct { + base, rel, expected string +}{ + // Absolute URL references + {"http://foo.com?a=b", "https://bar.com/", "https://bar.com/"}, + {"http://foo.com/", "https://bar.com/?a=b", "https://bar.com/?a=b"}, + {"http://foo.com/bar", "mailto:foo@example.com", "mailto:foo@example.com"}, + + // Path-absolute references + {"http://foo.com/bar", "/baz", "http://foo.com/baz"}, + {"http://foo.com/bar?a=b#f", "/baz", "http://foo.com/baz"}, + {"http://foo.com/bar?a=b", "/baz?c=d", "http://foo.com/baz?c=d"}, + + // Scheme-relative + {"https://foo.com/bar?a=b", "//bar.com/quux", "https://bar.com/quux"}, + + // Path-relative references: + + // ... current directory + {"http://foo.com", ".", "http://foo.com/"}, + {"http://foo.com/bar", ".", "http://foo.com/"}, + {"http://foo.com/bar/", ".", "http://foo.com/bar/"}, + + // ... going down + {"http://foo.com", "bar", "http://foo.com/bar"}, + {"http://foo.com/", "bar", "http://foo.com/bar"}, + {"http://foo.com/bar/baz", "quux", "http://foo.com/bar/quux"}, + + // ... going up + {"http://foo.com/bar/baz", "../quux", "http://foo.com/quux"}, + {"http://foo.com/bar/baz", "../../../../../quux", "http://foo.com/quux"}, + {"http://foo.com/bar", "..", "http://foo.com/"}, + {"http://foo.com/bar/baz", "./..", "http://foo.com/"}, + + // "." and ".." in the base aren't special + {"http://foo.com/dot/./dotdot/../foo/bar", "../baz", "http://foo.com/dot/./dotdot/../baz"}, + + // Triple dot isn't special + {"http://foo.com/bar", "...", "http://foo.com/..."}, + + // Fragment + {"http://foo.com/bar", ".#frag", "http://foo.com/#frag"}, +} + +func TestResolveReference(t *testing.T) { + mustParse := func(url string) *URL { + u, err := ParseWithReference(url) + if err != nil { + t.Fatalf("Expected URL to parse: %q, got error: %v", url, err) + } + return u + } + for _, test := range resolveReferenceTests { + base := mustParse(test.base) + rel := mustParse(test.rel) + url := base.ResolveReference(rel) + urlStr := url.String() + if urlStr != test.expected { + t.Errorf("Resolving %q + %q != %q; got %q", test.base, test.rel, test.expected, urlStr) + } + } + + // Test that new instances are returned. + base := mustParse("http://foo.com/") + abs := base.ResolveReference(mustParse(".")) + if base == abs { + t.Errorf("Expected no-op reference to return new URL instance.") + } + barRef := mustParse("http://bar.com/") + abs = base.ResolveReference(barRef) + if abs == barRef { + t.Errorf("Expected resolution of absolute reference to return new URL instance.") + } + + // Test the convenience wrapper too + base = mustParse("http://foo.com/path/one/") + abs, _ = base.Parse("../two") + expected := "http://foo.com/path/two" + if abs.String() != expected { + t.Errorf("Parse wrapper got %q; expected %q", abs.String(), expected) + } + _, err := base.Parse("") + if err == nil { + t.Errorf("Expected an error from Parse wrapper parsing an empty string.") + } + +} + +func TestQueryValues(t *testing.T) { + u, _ := Parse("http://x.com?foo=bar&bar=1&bar=2") + v := u.Query() + if len(v) != 2 { + t.Errorf("got %d keys in Query values, want 2", len(v)) + } + if g, e := v.Get("foo"), "bar"; g != e { + t.Errorf("Get(foo) = %q, want %q", g, e) + } + // Case sensitive: + if g, e := v.Get("Foo"), ""; g != e { + t.Errorf("Get(Foo) = %q, want %q", g, e) + } + if g, e := v.Get("bar"), "1"; g != e { + t.Errorf("Get(bar) = %q, want %q", g, e) + } + if g, e := v.Get("baz"), ""; g != e { + t.Errorf("Get(baz) = %q, want %q", g, e) + } + v.Del("bar") + if g, e := v.Get("bar"), ""; g != e { + t.Errorf("second Get(bar) = %q, want %q", g, e) + } +} |