summaryrefslogtreecommitdiff
path: root/src/pkg/url
diff options
context:
space:
mode:
authorOndřej Surý <ondrej@sury.org>2011-09-13 13:13:40 +0200
committerOndřej Surý <ondrej@sury.org>2011-09-13 13:13:40 +0200
commit5ff4c17907d5b19510a62e08fd8d3b11e62b431d (patch)
treec0650497e988f47be9c6f2324fa692a52dea82e1 /src/pkg/url
parent80f18fc933cf3f3e829c5455a1023d69f7b86e52 (diff)
downloadgolang-upstream/60.tar.gz
Imported Upstream version 60upstream/60
Diffstat (limited to 'src/pkg/url')
-rw-r--r--src/pkg/url/Makefile11
-rw-r--r--src/pkg/url/url.go677
-rw-r--r--src/pkg/url/url_test.go698
3 files changed, 1386 insertions, 0 deletions
diff --git a/src/pkg/url/Makefile b/src/pkg/url/Makefile
new file mode 100644
index 000000000..b9267bd08
--- /dev/null
+++ b/src/pkg/url/Makefile
@@ -0,0 +1,11 @@
+# Copyright 2009 The Go Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style
+# license that can be found in the LICENSE file.
+
+include ../../Make.inc
+
+TARG=url
+GOFILES=\
+ url.go\
+
+include ../../Make.pkg
diff --git a/src/pkg/url/url.go b/src/pkg/url/url.go
new file mode 100644
index 000000000..d07b01611
--- /dev/null
+++ b/src/pkg/url/url.go
@@ -0,0 +1,677 @@
+// Copyright 2009 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Package URL parses URLs and implements query escaping.
+// See RFC 3986.
+package url
+
+import (
+ "os"
+ "strconv"
+ "strings"
+)
+
+// Error reports an error and the operation and URL that caused it.
+type Error struct {
+ Op string
+ URL string
+ Error os.Error
+}
+
+func (e *Error) String() string { return e.Op + " " + e.URL + ": " + e.Error.String() }
+
+func ishex(c byte) bool {
+ switch {
+ case '0' <= c && c <= '9':
+ return true
+ case 'a' <= c && c <= 'f':
+ return true
+ case 'A' <= c && c <= 'F':
+ return true
+ }
+ return false
+}
+
+func unhex(c byte) byte {
+ switch {
+ case '0' <= c && c <= '9':
+ return c - '0'
+ case 'a' <= c && c <= 'f':
+ return c - 'a' + 10
+ case 'A' <= c && c <= 'F':
+ return c - 'A' + 10
+ }
+ return 0
+}
+
+type encoding int
+
+const (
+ encodePath encoding = 1 + iota
+ encodeUserPassword
+ encodeQueryComponent
+ encodeFragment
+ encodeOpaque
+)
+
+type EscapeError string
+
+func (e EscapeError) String() string {
+ return "invalid URL escape " + strconv.Quote(string(e))
+}
+
+// Return true if the specified character should be escaped when
+// appearing in a URL string, according to RFC 2396.
+// When 'all' is true the full range of reserved characters are matched.
+func shouldEscape(c byte, mode encoding) bool {
+ // RFC 2396 §2.3 Unreserved characters (alphanum)
+ if 'A' <= c && c <= 'Z' || 'a' <= c && c <= 'z' || '0' <= c && c <= '9' {
+ return false
+ }
+ switch c {
+ case '-', '_', '.', '!', '~', '*', '\'', '(', ')': // §2.3 Unreserved characters (mark)
+ return false
+
+ case '$', '&', '+', ',', '/', ':', ';', '=', '?', '@': // §2.2 Reserved characters (reserved)
+ // Different sections of the URL allow a few of
+ // the reserved characters to appear unescaped.
+ switch mode {
+ case encodePath: // §3.3
+ // The RFC allows : @ & = + $ , but saves / ; for assigning
+ // meaning to individual path segments. This package
+ // only manipulates the path as a whole, so we allow those
+ // last two as well. Clients that need to distinguish between
+ // `/foo;y=z/bar` and `/foo%3by=z/bar` will have to re-decode RawPath.
+ // That leaves only ? to escape.
+ return c == '?'
+
+ case encodeUserPassword: // §3.2.2
+ // The RFC allows ; : & = + $ , in userinfo, so we must escape only @ and /.
+ // The parsing of userinfo treats : as special so we must escape that too.
+ return c == '@' || c == '/' || c == ':'
+
+ case encodeQueryComponent: // §3.4
+ // The RFC reserves (so we must escape) everything.
+ return true
+
+ case encodeFragment: // §4.1
+ // The RFC text is silent but the grammar allows
+ // everything, so escape nothing.
+ return false
+
+ case encodeOpaque: // §3 opaque_part
+ // The RFC allows opaque_part to use all characters
+ // except that the leading / must be escaped.
+ // (We implement that case in String.)
+ return false
+ }
+ }
+
+ // Everything else must be escaped.
+ return true
+}
+
+// QueryUnescape does the inverse transformation of QueryEscape, converting
+// %AB into the byte 0xAB and '+' into ' ' (space). It returns an error if
+// any % is not followed by two hexadecimal digits.
+func QueryUnescape(s string) (string, os.Error) {
+ return unescape(s, encodeQueryComponent)
+}
+
+// unescape unescapes a string; the mode specifies
+// which section of the URL string is being unescaped.
+func unescape(s string, mode encoding) (string, os.Error) {
+ // Count %, check that they're well-formed.
+ n := 0
+ hasPlus := false
+ for i := 0; i < len(s); {
+ switch s[i] {
+ case '%':
+ n++
+ if i+2 >= len(s) || !ishex(s[i+1]) || !ishex(s[i+2]) {
+ s = s[i:]
+ if len(s) > 3 {
+ s = s[0:3]
+ }
+ return "", EscapeError(s)
+ }
+ i += 3
+ case '+':
+ hasPlus = mode == encodeQueryComponent
+ i++
+ default:
+ i++
+ }
+ }
+
+ if n == 0 && !hasPlus {
+ return s, nil
+ }
+
+ t := make([]byte, len(s)-2*n)
+ j := 0
+ for i := 0; i < len(s); {
+ switch s[i] {
+ case '%':
+ t[j] = unhex(s[i+1])<<4 | unhex(s[i+2])
+ j++
+ i += 3
+ case '+':
+ if mode == encodeQueryComponent {
+ t[j] = ' '
+ } else {
+ t[j] = '+'
+ }
+ j++
+ i++
+ default:
+ t[j] = s[i]
+ j++
+ i++
+ }
+ }
+ return string(t), nil
+}
+
+// QueryEscape escapes the string so it can be safely placed
+// inside a URL query.
+func QueryEscape(s string) string {
+ return escape(s, encodeQueryComponent)
+}
+
+func escape(s string, mode encoding) string {
+ spaceCount, hexCount := 0, 0
+ for i := 0; i < len(s); i++ {
+ c := s[i]
+ if shouldEscape(c, mode) {
+ if c == ' ' && mode == encodeQueryComponent {
+ spaceCount++
+ } else {
+ hexCount++
+ }
+ }
+ }
+
+ if spaceCount == 0 && hexCount == 0 {
+ return s
+ }
+
+ t := make([]byte, len(s)+2*hexCount)
+ j := 0
+ for i := 0; i < len(s); i++ {
+ switch c := s[i]; {
+ case c == ' ' && mode == encodeQueryComponent:
+ t[j] = '+'
+ j++
+ case shouldEscape(c, mode):
+ t[j] = '%'
+ t[j+1] = "0123456789ABCDEF"[c>>4]
+ t[j+2] = "0123456789ABCDEF"[c&15]
+ j += 3
+ default:
+ t[j] = s[i]
+ j++
+ }
+ }
+ return string(t)
+}
+
+// UnescapeUserinfo parses the RawUserinfo field of a URL
+// as the form user or user:password and unescapes and returns
+// the two halves.
+//
+// This functionality should only be used with legacy web sites.
+// RFC 2396 warns that interpreting Userinfo this way
+// ``is NOT RECOMMENDED, because the passing of authentication
+// information in clear text (such as URI) has proven to be a
+// security risk in almost every case where it has been used.''
+func UnescapeUserinfo(rawUserinfo string) (user, password string, err os.Error) {
+ u, p := split(rawUserinfo, ':', true)
+ if user, err = unescape(u, encodeUserPassword); err != nil {
+ return "", "", err
+ }
+ if password, err = unescape(p, encodeUserPassword); err != nil {
+ return "", "", err
+ }
+ return
+}
+
+// EscapeUserinfo combines user and password in the form
+// user:password (or just user if password is empty) and then
+// escapes it for use as the URL.RawUserinfo field.
+//
+// This functionality should only be used with legacy web sites.
+// RFC 2396 warns that interpreting Userinfo this way
+// ``is NOT RECOMMENDED, because the passing of authentication
+// information in clear text (such as URI) has proven to be a
+// security risk in almost every case where it has been used.''
+func EscapeUserinfo(user, password string) string {
+ raw := escape(user, encodeUserPassword)
+ if password != "" {
+ raw += ":" + escape(password, encodeUserPassword)
+ }
+ return raw
+}
+
+// A URL represents a parsed URL (technically, a URI reference).
+// The general form represented is:
+// scheme://[userinfo@]host/path[?query][#fragment]
+// The Raw, RawAuthority, RawPath, and RawQuery fields are in "wire format"
+// (special characters must be hex-escaped if not meant to have special meaning).
+// All other fields are logical values; '+' or '%' represent themselves.
+//
+// The various Raw values are supplied in wire format because
+// clients typically have to split them into pieces before further
+// decoding.
+type URL struct {
+ Raw string // the original string
+ Scheme string // scheme
+ RawAuthority string // [userinfo@]host
+ RawUserinfo string // userinfo
+ Host string // host
+ RawPath string // /path[?query][#fragment]
+ Path string // /path
+ OpaquePath bool // path is opaque (unrooted when scheme is present)
+ RawQuery string // query
+ Fragment string // fragment
+}
+
+// Maybe rawurl is of the form scheme:path.
+// (Scheme must be [a-zA-Z][a-zA-Z0-9+-.]*)
+// If so, return scheme, path; else return "", rawurl.
+func getscheme(rawurl string) (scheme, path string, err os.Error) {
+ for i := 0; i < len(rawurl); i++ {
+ c := rawurl[i]
+ switch {
+ case 'a' <= c && c <= 'z' || 'A' <= c && c <= 'Z':
+ // do nothing
+ case '0' <= c && c <= '9' || c == '+' || c == '-' || c == '.':
+ if i == 0 {
+ return "", rawurl, nil
+ }
+ case c == ':':
+ if i == 0 {
+ return "", "", os.NewError("missing protocol scheme")
+ }
+ return rawurl[0:i], rawurl[i+1:], nil
+ default:
+ // we have encountered an invalid character,
+ // so there is no valid scheme
+ return "", rawurl, nil
+ }
+ }
+ return "", rawurl, nil
+}
+
+// Maybe s is of the form t c u.
+// If so, return t, c u (or t, u if cutc == true).
+// If not, return s, "".
+func split(s string, c byte, cutc bool) (string, string) {
+ for i := 0; i < len(s); i++ {
+ if s[i] == c {
+ if cutc {
+ return s[0:i], s[i+1:]
+ }
+ return s[0:i], s[i:]
+ }
+ }
+ return s, ""
+}
+
+// Parse parses rawurl into a URL structure.
+// The string rawurl is assumed not to have a #fragment suffix.
+// (Web browsers strip #fragment before sending the URL to a web server.)
+// The rawurl may be relative or absolute.
+func Parse(rawurl string) (url *URL, err os.Error) {
+ return parse(rawurl, false)
+}
+
+// ParseRequest parses rawurl into a URL structure. It assumes that
+// rawurl was received from an HTTP request, so the rawurl is interpreted
+// only as an absolute URI or an absolute path.
+// The string rawurl is assumed not to have a #fragment suffix.
+// (Web browsers strip #fragment before sending the URL to a web server.)
+func ParseRequest(rawurl string) (url *URL, err os.Error) {
+ return parse(rawurl, true)
+}
+
+// parse parses a URL from a string in one of two contexts. If
+// viaRequest is true, the URL is assumed to have arrived via an HTTP request,
+// in which case only absolute URLs or path-absolute relative URLs are allowed.
+// If viaRequest is false, all forms of relative URLs are allowed.
+func parse(rawurl string, viaRequest bool) (url *URL, err os.Error) {
+ var (
+ leadingSlash bool
+ path string
+ )
+
+ if rawurl == "" {
+ err = os.NewError("empty url")
+ goto Error
+ }
+ url = new(URL)
+ url.Raw = rawurl
+
+ // Split off possible leading "http:", "mailto:", etc.
+ // Cannot contain escaped characters.
+ if url.Scheme, path, err = getscheme(rawurl); err != nil {
+ goto Error
+ }
+ leadingSlash = strings.HasPrefix(path, "/")
+
+ if url.Scheme != "" && !leadingSlash {
+ // RFC 2396:
+ // Absolute URI (has scheme) with non-rooted path
+ // is uninterpreted. It doesn't even have a ?query.
+ // This is the case that handles mailto:name@example.com.
+ url.RawPath = path
+
+ if url.Path, err = unescape(path, encodeOpaque); err != nil {
+ goto Error
+ }
+ url.OpaquePath = true
+ } else {
+ if viaRequest && !leadingSlash {
+ err = os.NewError("invalid URI for request")
+ goto Error
+ }
+
+ // Split off query before parsing path further.
+ url.RawPath = path
+ path, query := split(path, '?', false)
+ if len(query) > 1 {
+ url.RawQuery = query[1:]
+ }
+
+ // Maybe path is //authority/path
+ if (url.Scheme != "" || !viaRequest) &&
+ strings.HasPrefix(path, "//") && !strings.HasPrefix(path, "///") {
+ url.RawAuthority, path = split(path[2:], '/', false)
+ url.RawPath = url.RawPath[2+len(url.RawAuthority):]
+ }
+
+ // Split authority into userinfo@host.
+ // If there's no @, split's default is wrong. Check explicitly.
+ var rawHost string
+ if strings.Index(url.RawAuthority, "@") < 0 {
+ rawHost = url.RawAuthority
+ } else {
+ url.RawUserinfo, rawHost = split(url.RawAuthority, '@', true)
+ }
+
+ // We leave RawAuthority only in raw form because clients
+ // of common protocols should be using Userinfo and Host
+ // instead. Clients that wish to use RawAuthority will have to
+ // interpret it themselves: RFC 2396 does not define the meaning.
+
+ if strings.Contains(rawHost, "%") {
+ // Host cannot contain escaped characters.
+ err = os.NewError("hexadecimal escape in host")
+ goto Error
+ }
+ url.Host = rawHost
+
+ if url.Path, err = unescape(path, encodePath); err != nil {
+ goto Error
+ }
+ }
+ return url, nil
+
+Error:
+ return nil, &Error{"parse", rawurl, err}
+
+}
+
+// ParseWithReference is like Parse but allows a trailing #fragment.
+func ParseWithReference(rawurlref string) (url *URL, err os.Error) {
+ // Cut off #frag.
+ rawurl, frag := split(rawurlref, '#', false)
+ if url, err = Parse(rawurl); err != nil {
+ return nil, err
+ }
+ url.Raw += frag
+ url.RawPath += frag
+ if len(frag) > 1 {
+ frag = frag[1:]
+ if url.Fragment, err = unescape(frag, encodeFragment); err != nil {
+ return nil, &Error{"parse", rawurl, err}
+ }
+ }
+ return url, nil
+}
+
+// String reassembles url into a valid URL string.
+//
+// There are redundant fields stored in the URL structure:
+// the String method consults Scheme, Path, Host, RawUserinfo,
+// RawQuery, and Fragment, but not Raw, RawPath or RawAuthority.
+func (url *URL) String() string {
+ result := ""
+ if url.Scheme != "" {
+ result += url.Scheme + ":"
+ }
+ if url.Host != "" || url.RawUserinfo != "" {
+ result += "//"
+ if url.RawUserinfo != "" {
+ // hide the password, if any
+ info := url.RawUserinfo
+ if i := strings.Index(info, ":"); i >= 0 {
+ info = info[0:i] + ":******"
+ }
+ result += info + "@"
+ }
+ result += url.Host
+ }
+ if url.OpaquePath {
+ path := url.Path
+ if strings.HasPrefix(path, "/") {
+ result += "%2f"
+ path = path[1:]
+ }
+ result += escape(path, encodeOpaque)
+ } else {
+ result += escape(url.Path, encodePath)
+ }
+ if url.RawQuery != "" {
+ result += "?" + url.RawQuery
+ }
+ if url.Fragment != "" {
+ result += "#" + escape(url.Fragment, encodeFragment)
+ }
+ return result
+}
+
+// Values maps a string key to a list of values.
+// It is typically used for query parameters and form values.
+// Unlike in the http.Header map, the keys in a Values map
+// are case-sensitive.
+type Values map[string][]string
+
+// Get gets the first value associated with the given key.
+// If there are no values associated with the key, Get returns
+// the empty string. To access multiple values, use the map
+// directly.
+func (v Values) Get(key string) string {
+ if v == nil {
+ return ""
+ }
+ vs, ok := v[key]
+ if !ok || len(vs) == 0 {
+ return ""
+ }
+ return vs[0]
+}
+
+// Set sets the key to value. It replaces any existing
+// values.
+func (v Values) Set(key, value string) {
+ v[key] = []string{value}
+}
+
+// Add adds the key to value. It appends to any existing
+// values associated with key.
+func (v Values) Add(key, value string) {
+ v[key] = append(v[key], value)
+}
+
+// Del deletes the values associated with key.
+func (v Values) Del(key string) {
+ v[key] = nil, false
+}
+
+// ParseQuery parses the URL-encoded query string and returns
+// a map listing the values specified for each key.
+// ParseQuery always returns a non-nil map containing all the
+// valid query parameters found; err describes the first decoding error
+// encountered, if any.
+func ParseQuery(query string) (m Values, err os.Error) {
+ m = make(Values)
+ err = parseQuery(m, query)
+ return
+}
+
+func parseQuery(m Values, query string) (err os.Error) {
+ for _, kv := range strings.Split(query, "&") {
+ if len(kv) == 0 {
+ continue
+ }
+ kvPair := strings.SplitN(kv, "=", 2)
+
+ var key, value string
+ var e os.Error
+ key, e = QueryUnescape(kvPair[0])
+ if e == nil && len(kvPair) > 1 {
+ value, e = QueryUnescape(kvPair[1])
+ }
+ if e != nil {
+ err = e
+ continue
+ }
+ m[key] = append(m[key], value)
+ }
+ return err
+}
+
+// Encode encodes the values into ``URL encoded'' form.
+// e.g. "foo=bar&bar=baz"
+func (v Values) Encode() string {
+ if v == nil {
+ return ""
+ }
+ parts := make([]string, 0, len(v)) // will be large enough for most uses
+ for k, vs := range v {
+ prefix := QueryEscape(k) + "="
+ for _, v := range vs {
+ parts = append(parts, prefix+QueryEscape(v))
+ }
+ }
+ return strings.Join(parts, "&")
+}
+
+// resolvePath applies special path segments from refs and applies
+// them to base, per RFC 2396.
+func resolvePath(basepath string, refpath string) string {
+ base := strings.Split(basepath, "/")
+ refs := strings.Split(refpath, "/")
+ if len(base) == 0 {
+ base = []string{""}
+ }
+ for idx, ref := range refs {
+ switch {
+ case ref == ".":
+ base[len(base)-1] = ""
+ case ref == "..":
+ newLen := len(base) - 1
+ if newLen < 1 {
+ newLen = 1
+ }
+ base = base[0:newLen]
+ base[len(base)-1] = ""
+ default:
+ if idx == 0 || base[len(base)-1] == "" {
+ base[len(base)-1] = ref
+ } else {
+ base = append(base, ref)
+ }
+ }
+ }
+ return strings.Join(base, "/")
+}
+
+// IsAbs returns true if the URL is absolute.
+func (url *URL) IsAbs() bool {
+ return url.Scheme != ""
+}
+
+// Parse parses a URL in the context of a base URL. The URL in ref
+// may be relative or absolute. Parse returns nil, err on parse
+// failure, otherwise its return value is the same as ResolveReference.
+func (base *URL) Parse(ref string) (*URL, os.Error) {
+ refurl, err := Parse(ref)
+ if err != nil {
+ return nil, err
+ }
+ return base.ResolveReference(refurl), nil
+}
+
+// ResolveReference resolves a URI reference to an absolute URI from
+// an absolute base URI, per RFC 2396 Section 5.2. The URI reference
+// may be relative or absolute. ResolveReference always returns a new
+// URL instance, even if the returned URL is identical to either the
+// base or reference. If ref is an absolute URL, then ResolveReference
+// ignores base and returns a copy of ref.
+func (base *URL) ResolveReference(ref *URL) *URL {
+ url := new(URL)
+ switch {
+ case ref.IsAbs():
+ *url = *ref
+ default:
+ // relativeURI = ( net_path | abs_path | rel_path ) [ "?" query ]
+ *url = *base
+ if ref.RawAuthority != "" {
+ // The "net_path" case.
+ url.RawAuthority = ref.RawAuthority
+ url.Host = ref.Host
+ url.RawUserinfo = ref.RawUserinfo
+ }
+ switch {
+ case url.OpaquePath:
+ url.Path = ref.Path
+ url.RawPath = ref.RawPath
+ url.RawQuery = ref.RawQuery
+ case strings.HasPrefix(ref.Path, "/"):
+ // The "abs_path" case.
+ url.Path = ref.Path
+ url.RawPath = ref.RawPath
+ url.RawQuery = ref.RawQuery
+ default:
+ // The "rel_path" case.
+ path := resolvePath(base.Path, ref.Path)
+ if !strings.HasPrefix(path, "/") {
+ path = "/" + path
+ }
+ url.Path = path
+ url.RawPath = url.Path
+ url.RawQuery = ref.RawQuery
+ if ref.RawQuery != "" {
+ url.RawPath += "?" + url.RawQuery
+ }
+ }
+
+ url.Fragment = ref.Fragment
+ }
+ url.Raw = url.String()
+ return url
+}
+
+// Query parses RawQuery and returns the corresponding values.
+func (u *URL) Query() Values {
+ v, _ := ParseQuery(u.RawQuery)
+ return v
+}
+
+// EncodedPath returns the URL's path in "URL path encoded" form.
+func (u *URL) EncodedPath() string {
+ return escape(u.Path, encodePath)
+}
diff --git a/src/pkg/url/url_test.go b/src/pkg/url/url_test.go
new file mode 100644
index 000000000..af394d4fb
--- /dev/null
+++ b/src/pkg/url/url_test.go
@@ -0,0 +1,698 @@
+// Copyright 2009 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package url
+
+import (
+ "fmt"
+ "os"
+ "reflect"
+ "testing"
+)
+
+// TODO(rsc):
+// test Unescape
+// test Escape
+// test Parse
+
+type URLTest struct {
+ in string
+ out *URL
+ roundtrip string // expected result of reserializing the URL; empty means same as "in".
+}
+
+var urltests = []URLTest{
+ // no path
+ {
+ "http://www.google.com",
+ &URL{
+ Raw: "http://www.google.com",
+ Scheme: "http",
+ RawAuthority: "www.google.com",
+ Host: "www.google.com",
+ },
+ "",
+ },
+ // path
+ {
+ "http://www.google.com/",
+ &URL{
+ Raw: "http://www.google.com/",
+ Scheme: "http",
+ RawAuthority: "www.google.com",
+ Host: "www.google.com",
+ RawPath: "/",
+ Path: "/",
+ },
+ "",
+ },
+ // path with hex escaping
+ {
+ "http://www.google.com/file%20one%26two",
+ &URL{
+ Raw: "http://www.google.com/file%20one%26two",
+ Scheme: "http",
+ RawAuthority: "www.google.com",
+ Host: "www.google.com",
+ RawPath: "/file%20one%26two",
+ Path: "/file one&two",
+ },
+ "http://www.google.com/file%20one&two",
+ },
+ // user
+ {
+ "ftp://webmaster@www.google.com/",
+ &URL{
+ Raw: "ftp://webmaster@www.google.com/",
+ Scheme: "ftp",
+ RawAuthority: "webmaster@www.google.com",
+ RawUserinfo: "webmaster",
+ Host: "www.google.com",
+ RawPath: "/",
+ Path: "/",
+ },
+ "",
+ },
+ // escape sequence in username
+ {
+ "ftp://john%20doe@www.google.com/",
+ &URL{
+ Raw: "ftp://john%20doe@www.google.com/",
+ Scheme: "ftp",
+ RawAuthority: "john%20doe@www.google.com",
+ RawUserinfo: "john%20doe",
+ Host: "www.google.com",
+ RawPath: "/",
+ Path: "/",
+ },
+ "ftp://john%20doe@www.google.com/",
+ },
+ // query
+ {
+ "http://www.google.com/?q=go+language",
+ &URL{
+ Raw: "http://www.google.com/?q=go+language",
+ Scheme: "http",
+ RawAuthority: "www.google.com",
+ Host: "www.google.com",
+ RawPath: "/?q=go+language",
+ Path: "/",
+ RawQuery: "q=go+language",
+ },
+ "",
+ },
+ // query with hex escaping: NOT parsed
+ {
+ "http://www.google.com/?q=go%20language",
+ &URL{
+ Raw: "http://www.google.com/?q=go%20language",
+ Scheme: "http",
+ RawAuthority: "www.google.com",
+ Host: "www.google.com",
+ RawPath: "/?q=go%20language",
+ Path: "/",
+ RawQuery: "q=go%20language",
+ },
+ "",
+ },
+ // %20 outside query
+ {
+ "http://www.google.com/a%20b?q=c+d",
+ &URL{
+ Raw: "http://www.google.com/a%20b?q=c+d",
+ Scheme: "http",
+ RawAuthority: "www.google.com",
+ Host: "www.google.com",
+ RawPath: "/a%20b?q=c+d",
+ Path: "/a b",
+ RawQuery: "q=c+d",
+ },
+ "",
+ },
+ // path without leading /, so no query parsing
+ {
+ "http:www.google.com/?q=go+language",
+ &URL{
+ Raw: "http:www.google.com/?q=go+language",
+ Scheme: "http",
+ RawPath: "www.google.com/?q=go+language",
+ Path: "www.google.com/?q=go+language",
+ OpaquePath: true,
+ },
+ "http:www.google.com/?q=go+language",
+ },
+ // path without leading /, so no query parsing
+ {
+ "http:%2f%2fwww.google.com/?q=go+language",
+ &URL{
+ Raw: "http:%2f%2fwww.google.com/?q=go+language",
+ Scheme: "http",
+ RawPath: "%2f%2fwww.google.com/?q=go+language",
+ Path: "//www.google.com/?q=go+language",
+ OpaquePath: true,
+ },
+ "http:%2f/www.google.com/?q=go+language",
+ },
+ // non-authority
+ {
+ "mailto:/webmaster@golang.org",
+ &URL{
+ Raw: "mailto:/webmaster@golang.org",
+ Scheme: "mailto",
+ RawPath: "/webmaster@golang.org",
+ Path: "/webmaster@golang.org",
+ },
+ "",
+ },
+ // non-authority
+ {
+ "mailto:webmaster@golang.org",
+ &URL{
+ Raw: "mailto:webmaster@golang.org",
+ Scheme: "mailto",
+ RawPath: "webmaster@golang.org",
+ Path: "webmaster@golang.org",
+ OpaquePath: true,
+ },
+ "",
+ },
+ // unescaped :// in query should not create a scheme
+ {
+ "/foo?query=http://bad",
+ &URL{
+ Raw: "/foo?query=http://bad",
+ RawPath: "/foo?query=http://bad",
+ Path: "/foo",
+ RawQuery: "query=http://bad",
+ },
+ "",
+ },
+ // leading // without scheme should create an authority
+ {
+ "//foo",
+ &URL{
+ RawAuthority: "foo",
+ Raw: "//foo",
+ Host: "foo",
+ Scheme: "",
+ RawPath: "",
+ Path: "",
+ },
+ "",
+ },
+ // leading // without scheme, with userinfo, path, and query
+ {
+ "//user@foo/path?a=b",
+ &URL{
+ Raw: "//user@foo/path?a=b",
+ RawAuthority: "user@foo",
+ RawUserinfo: "user",
+ Scheme: "",
+ RawPath: "/path?a=b",
+ Path: "/path",
+ RawQuery: "a=b",
+ Host: "foo",
+ },
+ "",
+ },
+ // Three leading slashes isn't an authority, but doesn't return an error.
+ // (We can't return an error, as this code is also used via
+ // ServeHTTP -> ReadRequest -> Parse, which is arguably a
+ // different URL parsing context, but currently shares the
+ // same codepath)
+ {
+ "///threeslashes",
+ &URL{
+ RawAuthority: "",
+ Raw: "///threeslashes",
+ Host: "",
+ Scheme: "",
+ RawPath: "///threeslashes",
+ Path: "///threeslashes",
+ },
+ "",
+ },
+ {
+ "http://user:password@google.com",
+ &URL{
+ Raw: "http://user:password@google.com",
+ Scheme: "http",
+ RawAuthority: "user:password@google.com",
+ RawUserinfo: "user:password",
+ Host: "google.com",
+ },
+ "http://user:******@google.com",
+ },
+ {
+ "http://user:longerpass@google.com",
+ &URL{
+ Raw: "http://user:longerpass@google.com",
+ Scheme: "http",
+ RawAuthority: "user:longerpass@google.com",
+ RawUserinfo: "user:longerpass",
+ Host: "google.com",
+ },
+ "http://user:******@google.com",
+ },
+}
+
+var urlnofragtests = []URLTest{
+ {
+ "http://www.google.com/?q=go+language#foo",
+ &URL{
+ Raw: "http://www.google.com/?q=go+language#foo",
+ Scheme: "http",
+ RawAuthority: "www.google.com",
+ Host: "www.google.com",
+ RawPath: "/?q=go+language#foo",
+ Path: "/",
+ RawQuery: "q=go+language#foo",
+ },
+ "",
+ },
+}
+
+var urlfragtests = []URLTest{
+ {
+ "http://www.google.com/?q=go+language#foo",
+ &URL{
+ Raw: "http://www.google.com/?q=go+language#foo",
+ Scheme: "http",
+ RawAuthority: "www.google.com",
+ Host: "www.google.com",
+ RawPath: "/?q=go+language#foo",
+ Path: "/",
+ RawQuery: "q=go+language",
+ Fragment: "foo",
+ },
+ "",
+ },
+ {
+ "http://www.google.com/?q=go+language#foo%26bar",
+ &URL{
+ Raw: "http://www.google.com/?q=go+language#foo%26bar",
+ Scheme: "http",
+ RawAuthority: "www.google.com",
+ Host: "www.google.com",
+ RawPath: "/?q=go+language#foo%26bar",
+ Path: "/",
+ RawQuery: "q=go+language",
+ Fragment: "foo&bar",
+ },
+ "http://www.google.com/?q=go+language#foo&bar",
+ },
+}
+
+// more useful string for debugging than fmt's struct printer
+func ufmt(u *URL) string {
+ return fmt.Sprintf("raw=%q, scheme=%q, rawpath=%q, auth=%q, userinfo=%q, host=%q, path=%q, rawq=%q, frag=%q",
+ u.Raw, u.Scheme, u.RawPath, u.RawAuthority, u.RawUserinfo,
+ u.Host, u.Path, u.RawQuery, u.Fragment)
+}
+
+func DoTest(t *testing.T, parse func(string) (*URL, os.Error), name string, tests []URLTest) {
+ for _, tt := range tests {
+ u, err := parse(tt.in)
+ if err != nil {
+ t.Errorf("%s(%q) returned error %s", name, tt.in, err)
+ continue
+ }
+ if !reflect.DeepEqual(u, tt.out) {
+ t.Errorf("%s(%q):\n\thave %v\n\twant %v\n",
+ name, tt.in, ufmt(u), ufmt(tt.out))
+ }
+ }
+}
+
+func TestParse(t *testing.T) {
+ DoTest(t, Parse, "Parse", urltests)
+ DoTest(t, Parse, "Parse", urlnofragtests)
+}
+
+func TestParseWithReference(t *testing.T) {
+ DoTest(t, ParseWithReference, "ParseWithReference", urltests)
+ DoTest(t, ParseWithReference, "ParseWithReference", urlfragtests)
+}
+
+const pathThatLooksSchemeRelative = "//not.a.user@not.a.host/just/a/path"
+
+var parseRequestUrlTests = []struct {
+ url string
+ expectedValid bool
+}{
+ {"http://foo.com", true},
+ {"http://foo.com/", true},
+ {"http://foo.com/path", true},
+ {"/", true},
+ {pathThatLooksSchemeRelative, true},
+ {"//not.a.user@%66%6f%6f.com/just/a/path/also", true},
+ {"foo.html", false},
+ {"../dir/", false},
+}
+
+func TestParseRequest(t *testing.T) {
+ for _, test := range parseRequestUrlTests {
+ _, err := ParseRequest(test.url)
+ valid := err == nil
+ if valid != test.expectedValid {
+ t.Errorf("Expected valid=%v for %q; got %v", test.expectedValid, test.url, valid)
+ }
+ }
+
+ url, err := ParseRequest(pathThatLooksSchemeRelative)
+ if err != nil {
+ t.Fatalf("Unexpected error %v", err)
+ }
+ if url.Path != pathThatLooksSchemeRelative {
+ t.Errorf("Expected path %q; got %q", pathThatLooksSchemeRelative, url.Path)
+ }
+}
+
+func DoTestString(t *testing.T, parse func(string) (*URL, os.Error), name string, tests []URLTest) {
+ for _, tt := range tests {
+ u, err := parse(tt.in)
+ if err != nil {
+ t.Errorf("%s(%q) returned error %s", name, tt.in, err)
+ continue
+ }
+ s := u.String()
+ expected := tt.in
+ if len(tt.roundtrip) > 0 {
+ expected = tt.roundtrip
+ }
+ if s != expected {
+ t.Errorf("%s(%q).String() == %q (expected %q)", name, tt.in, s, expected)
+ }
+ }
+}
+
+func TestURLString(t *testing.T) {
+ DoTestString(t, Parse, "Parse", urltests)
+ DoTestString(t, Parse, "Parse", urlnofragtests)
+ DoTestString(t, ParseWithReference, "ParseWithReference", urltests)
+ DoTestString(t, ParseWithReference, "ParseWithReference", urlfragtests)
+}
+
+type EscapeTest struct {
+ in string
+ out string
+ err os.Error
+}
+
+var unescapeTests = []EscapeTest{
+ {
+ "",
+ "",
+ nil,
+ },
+ {
+ "abc",
+ "abc",
+ nil,
+ },
+ {
+ "1%41",
+ "1A",
+ nil,
+ },
+ {
+ "1%41%42%43",
+ "1ABC",
+ nil,
+ },
+ {
+ "%4a",
+ "J",
+ nil,
+ },
+ {
+ "%6F",
+ "o",
+ nil,
+ },
+ {
+ "%", // not enough characters after %
+ "",
+ EscapeError("%"),
+ },
+ {
+ "%a", // not enough characters after %
+ "",
+ EscapeError("%a"),
+ },
+ {
+ "%1", // not enough characters after %
+ "",
+ EscapeError("%1"),
+ },
+ {
+ "123%45%6", // not enough characters after %
+ "",
+ EscapeError("%6"),
+ },
+ {
+ "%zzzzz", // invalid hex digits
+ "",
+ EscapeError("%zz"),
+ },
+}
+
+func TestUnescape(t *testing.T) {
+ for _, tt := range unescapeTests {
+ actual, err := QueryUnescape(tt.in)
+ if actual != tt.out || (err != nil) != (tt.err != nil) {
+ t.Errorf("QueryUnescape(%q) = %q, %s; want %q, %s", tt.in, actual, err, tt.out, tt.err)
+ }
+ }
+}
+
+var escapeTests = []EscapeTest{
+ {
+ "",
+ "",
+ nil,
+ },
+ {
+ "abc",
+ "abc",
+ nil,
+ },
+ {
+ "one two",
+ "one+two",
+ nil,
+ },
+ {
+ "10%",
+ "10%25",
+ nil,
+ },
+ {
+ " ?&=#+%!<>#\"{}|\\^[]`☺\t",
+ "+%3F%26%3D%23%2B%25!%3C%3E%23%22%7B%7D%7C%5C%5E%5B%5D%60%E2%98%BA%09",
+ nil,
+ },
+}
+
+func TestEscape(t *testing.T) {
+ for _, tt := range escapeTests {
+ actual := QueryEscape(tt.in)
+ if tt.out != actual {
+ t.Errorf("QueryEscape(%q) = %q, want %q", tt.in, actual, tt.out)
+ }
+
+ // for bonus points, verify that escape:unescape is an identity.
+ roundtrip, err := QueryUnescape(actual)
+ if roundtrip != tt.in || err != nil {
+ t.Errorf("QueryUnescape(%q) = %q, %s; want %q, %s", actual, roundtrip, err, tt.in, "[no error]")
+ }
+ }
+}
+
+type UserinfoTest struct {
+ User string
+ Password string
+ Raw string
+}
+
+var userinfoTests = []UserinfoTest{
+ {"user", "password", "user:password"},
+ {"foo:bar", "~!@#$%^&*()_+{}|[]\\-=`:;'\"<>?,./",
+ "foo%3Abar:~!%40%23$%25%5E&*()_+%7B%7D%7C%5B%5D%5C-=%60%3A;'%22%3C%3E?,.%2F"},
+}
+
+func TestEscapeUserinfo(t *testing.T) {
+ for _, tt := range userinfoTests {
+ if raw := EscapeUserinfo(tt.User, tt.Password); raw != tt.Raw {
+ t.Errorf("EscapeUserinfo(%q, %q) = %q, want %q", tt.User, tt.Password, raw, tt.Raw)
+ }
+ }
+}
+
+func TestUnescapeUserinfo(t *testing.T) {
+ for _, tt := range userinfoTests {
+ if user, pass, err := UnescapeUserinfo(tt.Raw); user != tt.User || pass != tt.Password || err != nil {
+ t.Errorf("UnescapeUserinfo(%q) = %q, %q, %v, want %q, %q, nil", tt.Raw, user, pass, err, tt.User, tt.Password)
+ }
+ }
+}
+
+type EncodeQueryTest struct {
+ m Values
+ expected string
+ expected1 string
+}
+
+var encodeQueryTests = []EncodeQueryTest{
+ {nil, "", ""},
+ {Values{"q": {"puppies"}, "oe": {"utf8"}}, "q=puppies&oe=utf8", "oe=utf8&q=puppies"},
+ {Values{"q": {"dogs", "&", "7"}}, "q=dogs&q=%26&q=7", "q=dogs&q=%26&q=7"},
+}
+
+func TestEncodeQuery(t *testing.T) {
+ for _, tt := range encodeQueryTests {
+ if q := tt.m.Encode(); q != tt.expected && q != tt.expected1 {
+ t.Errorf(`EncodeQuery(%+v) = %q, want %q`, tt.m, q, tt.expected)
+ }
+ }
+}
+
+var resolvePathTests = []struct {
+ base, ref, expected string
+}{
+ {"a/b", ".", "a/"},
+ {"a/b", "c", "a/c"},
+ {"a/b", "..", ""},
+ {"a/", "..", ""},
+ {"a/", "../..", ""},
+ {"a/b/c", "..", "a/"},
+ {"a/b/c", "../d", "a/d"},
+ {"a/b/c", ".././d", "a/d"},
+ {"a/b", "./..", ""},
+ {"a/./b", ".", "a/./"},
+ {"a/../", ".", "a/../"},
+ {"a/.././b", "c", "a/.././c"},
+}
+
+func TestResolvePath(t *testing.T) {
+ for _, test := range resolvePathTests {
+ got := resolvePath(test.base, test.ref)
+ if got != test.expected {
+ t.Errorf("For %q + %q got %q; expected %q", test.base, test.ref, got, test.expected)
+ }
+ }
+}
+
+var resolveReferenceTests = []struct {
+ base, rel, expected string
+}{
+ // Absolute URL references
+ {"http://foo.com?a=b", "https://bar.com/", "https://bar.com/"},
+ {"http://foo.com/", "https://bar.com/?a=b", "https://bar.com/?a=b"},
+ {"http://foo.com/bar", "mailto:foo@example.com", "mailto:foo@example.com"},
+
+ // Path-absolute references
+ {"http://foo.com/bar", "/baz", "http://foo.com/baz"},
+ {"http://foo.com/bar?a=b#f", "/baz", "http://foo.com/baz"},
+ {"http://foo.com/bar?a=b", "/baz?c=d", "http://foo.com/baz?c=d"},
+
+ // Scheme-relative
+ {"https://foo.com/bar?a=b", "//bar.com/quux", "https://bar.com/quux"},
+
+ // Path-relative references:
+
+ // ... current directory
+ {"http://foo.com", ".", "http://foo.com/"},
+ {"http://foo.com/bar", ".", "http://foo.com/"},
+ {"http://foo.com/bar/", ".", "http://foo.com/bar/"},
+
+ // ... going down
+ {"http://foo.com", "bar", "http://foo.com/bar"},
+ {"http://foo.com/", "bar", "http://foo.com/bar"},
+ {"http://foo.com/bar/baz", "quux", "http://foo.com/bar/quux"},
+
+ // ... going up
+ {"http://foo.com/bar/baz", "../quux", "http://foo.com/quux"},
+ {"http://foo.com/bar/baz", "../../../../../quux", "http://foo.com/quux"},
+ {"http://foo.com/bar", "..", "http://foo.com/"},
+ {"http://foo.com/bar/baz", "./..", "http://foo.com/"},
+
+ // "." and ".." in the base aren't special
+ {"http://foo.com/dot/./dotdot/../foo/bar", "../baz", "http://foo.com/dot/./dotdot/../baz"},
+
+ // Triple dot isn't special
+ {"http://foo.com/bar", "...", "http://foo.com/..."},
+
+ // Fragment
+ {"http://foo.com/bar", ".#frag", "http://foo.com/#frag"},
+}
+
+func TestResolveReference(t *testing.T) {
+ mustParse := func(url string) *URL {
+ u, err := ParseWithReference(url)
+ if err != nil {
+ t.Fatalf("Expected URL to parse: %q, got error: %v", url, err)
+ }
+ return u
+ }
+ for _, test := range resolveReferenceTests {
+ base := mustParse(test.base)
+ rel := mustParse(test.rel)
+ url := base.ResolveReference(rel)
+ urlStr := url.String()
+ if urlStr != test.expected {
+ t.Errorf("Resolving %q + %q != %q; got %q", test.base, test.rel, test.expected, urlStr)
+ }
+ }
+
+ // Test that new instances are returned.
+ base := mustParse("http://foo.com/")
+ abs := base.ResolveReference(mustParse("."))
+ if base == abs {
+ t.Errorf("Expected no-op reference to return new URL instance.")
+ }
+ barRef := mustParse("http://bar.com/")
+ abs = base.ResolveReference(barRef)
+ if abs == barRef {
+ t.Errorf("Expected resolution of absolute reference to return new URL instance.")
+ }
+
+ // Test the convenience wrapper too
+ base = mustParse("http://foo.com/path/one/")
+ abs, _ = base.Parse("../two")
+ expected := "http://foo.com/path/two"
+ if abs.String() != expected {
+ t.Errorf("Parse wrapper got %q; expected %q", abs.String(), expected)
+ }
+ _, err := base.Parse("")
+ if err == nil {
+ t.Errorf("Expected an error from Parse wrapper parsing an empty string.")
+ }
+
+}
+
+func TestQueryValues(t *testing.T) {
+ u, _ := Parse("http://x.com?foo=bar&bar=1&bar=2")
+ v := u.Query()
+ if len(v) != 2 {
+ t.Errorf("got %d keys in Query values, want 2", len(v))
+ }
+ if g, e := v.Get("foo"), "bar"; g != e {
+ t.Errorf("Get(foo) = %q, want %q", g, e)
+ }
+ // Case sensitive:
+ if g, e := v.Get("Foo"), ""; g != e {
+ t.Errorf("Get(Foo) = %q, want %q", g, e)
+ }
+ if g, e := v.Get("bar"), "1"; g != e {
+ t.Errorf("Get(bar) = %q, want %q", g, e)
+ }
+ if g, e := v.Get("baz"), ""; g != e {
+ t.Errorf("Get(baz) = %q, want %q", g, e)
+ }
+ v.Del("bar")
+ if g, e := v.Get("bar"), ""; g != e {
+ t.Errorf("second Get(bar) = %q, want %q", g, e)
+ }
+}