diff options
Diffstat (limited to 'src/pkg/http/url.go')
-rw-r--r-- | src/pkg/http/url.go | 608 |
1 files changed, 0 insertions, 608 deletions
diff --git a/src/pkg/http/url.go b/src/pkg/http/url.go deleted file mode 100644 index e934b27c4..000000000 --- a/src/pkg/http/url.go +++ /dev/null @@ -1,608 +0,0 @@ -// Copyright 2009 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -// Parse URLs (actually URIs, but that seems overly pedantic). -// RFC 3986 - -package http - -import ( - "os" - "strconv" - "strings" -) - -// URLError reports an error and the operation and URL that caused it. -type URLError struct { - Op string - URL string - Error os.Error -} - -func (e *URLError) String() string { return e.Op + " " + e.URL + ": " + e.Error.String() } - -func ishex(c byte) bool { - switch { - case '0' <= c && c <= '9': - return true - case 'a' <= c && c <= 'f': - return true - case 'A' <= c && c <= 'F': - return true - } - return false -} - -func unhex(c byte) byte { - switch { - case '0' <= c && c <= '9': - return c - '0' - case 'a' <= c && c <= 'f': - return c - 'a' + 10 - case 'A' <= c && c <= 'F': - return c - 'A' + 10 - } - return 0 -} - -type encoding int - -const ( - encodePath encoding = 1 + iota - encodeUserPassword - encodeQueryComponent - encodeFragment - encodeOpaque -) - - -type URLEscapeError string - -func (e URLEscapeError) String() string { - return "invalid URL escape " + strconv.Quote(string(e)) -} - -// Return true if the specified character should be escaped when -// appearing in a URL string, according to RFC 2396. -// When 'all' is true the full range of reserved characters are matched. -func shouldEscape(c byte, mode encoding) bool { - // RFC 2396 §2.3 Unreserved characters (alphanum) - if 'A' <= c && c <= 'Z' || 'a' <= c && c <= 'z' || '0' <= c && c <= '9' { - return false - } - switch c { - case '-', '_', '.', '!', '~', '*', '\'', '(', ')': // §2.3 Unreserved characters (mark) - return false - - case '$', '&', '+', ',', '/', ':', ';', '=', '?', '@': // §2.2 Reserved characters (reserved) - // Different sections of the URL allow a few of - // the reserved characters to appear unescaped. - switch mode { - case encodePath: // §3.3 - // The RFC allows : @ & = + $ , but saves / ; for assigning - // meaning to individual path segments. This package - // only manipulates the path as a whole, so we allow those - // last two as well. Clients that need to distinguish between - // `/foo;y=z/bar` and `/foo%3by=z/bar` will have to re-decode RawPath. - // That leaves only ? to escape. - return c == '?' - - case encodeUserPassword: // §3.2.2 - // The RFC allows ; : & = + $ , in userinfo, so we must escape only @ and /. - // The parsing of userinfo treats : as special so we must escape that too. - return c == '@' || c == '/' || c == ':' - - case encodeQueryComponent: // §3.4 - // The RFC reserves (so we must escape) everything. - return true - - case encodeFragment: // §4.1 - // The RFC text is silent but the grammar allows - // everything, so escape nothing. - return false - - case encodeOpaque: // §3 opaque_part - // The RFC allows opaque_part to use all characters - // except that the leading / must be escaped. - // (We implement that case in String.) - return false - } - } - - // Everything else must be escaped. - return true -} - - -// URLUnescape unescapes a string in ``URL encoded'' form, -// converting %AB into the byte 0xAB and '+' into ' ' (space). -// It returns an error if any % is not followed -// by two hexadecimal digits. -// Despite the name, this encoding applies only to individual -// components of the query portion of the URL. -func URLUnescape(s string) (string, os.Error) { - return urlUnescape(s, encodeQueryComponent) -} - -// urlUnescape is like URLUnescape but mode specifies -// which section of the URL is being unescaped. -func urlUnescape(s string, mode encoding) (string, os.Error) { - // Count %, check that they're well-formed. - n := 0 - hasPlus := false - for i := 0; i < len(s); { - switch s[i] { - case '%': - n++ - if i+2 >= len(s) || !ishex(s[i+1]) || !ishex(s[i+2]) { - s = s[i:] - if len(s) > 3 { - s = s[0:3] - } - return "", URLEscapeError(s) - } - i += 3 - case '+': - hasPlus = mode == encodeQueryComponent - i++ - default: - i++ - } - } - - if n == 0 && !hasPlus { - return s, nil - } - - t := make([]byte, len(s)-2*n) - j := 0 - for i := 0; i < len(s); { - switch s[i] { - case '%': - t[j] = unhex(s[i+1])<<4 | unhex(s[i+2]) - j++ - i += 3 - case '+': - if mode == encodeQueryComponent { - t[j] = ' ' - } else { - t[j] = '+' - } - j++ - i++ - default: - t[j] = s[i] - j++ - i++ - } - } - return string(t), nil -} - -// URLEscape converts a string into ``URL encoded'' form. -// Despite the name, this encoding applies only to individual -// components of the query portion of the URL. -func URLEscape(s string) string { - return urlEscape(s, encodeQueryComponent) -} - -func urlEscape(s string, mode encoding) string { - spaceCount, hexCount := 0, 0 - for i := 0; i < len(s); i++ { - c := s[i] - if shouldEscape(c, mode) { - if c == ' ' && mode == encodeQueryComponent { - spaceCount++ - } else { - hexCount++ - } - } - } - - if spaceCount == 0 && hexCount == 0 { - return s - } - - t := make([]byte, len(s)+2*hexCount) - j := 0 - for i := 0; i < len(s); i++ { - switch c := s[i]; { - case c == ' ' && mode == encodeQueryComponent: - t[j] = '+' - j++ - case shouldEscape(c, mode): - t[j] = '%' - t[j+1] = "0123456789ABCDEF"[c>>4] - t[j+2] = "0123456789ABCDEF"[c&15] - j += 3 - default: - t[j] = s[i] - j++ - } - } - return string(t) -} - -// UnescapeUserinfo parses the RawUserinfo field of a URL -// as the form user or user:password and unescapes and returns -// the two halves. -// -// This functionality should only be used with legacy web sites. -// RFC 2396 warns that interpreting Userinfo this way -// ``is NOT RECOMMENDED, because the passing of authentication -// information in clear text (such as URI) has proven to be a -// security risk in almost every case where it has been used.'' -func UnescapeUserinfo(rawUserinfo string) (user, password string, err os.Error) { - u, p := split(rawUserinfo, ':', true) - if user, err = urlUnescape(u, encodeUserPassword); err != nil { - return "", "", err - } - if password, err = urlUnescape(p, encodeUserPassword); err != nil { - return "", "", err - } - return -} - -// EscapeUserinfo combines user and password in the form -// user:password (or just user if password is empty) and then -// escapes it for use as the URL.RawUserinfo field. -// -// This functionality should only be used with legacy web sites. -// RFC 2396 warns that interpreting Userinfo this way -// ``is NOT RECOMMENDED, because the passing of authentication -// information in clear text (such as URI) has proven to be a -// security risk in almost every case where it has been used.'' -func EscapeUserinfo(user, password string) string { - raw := urlEscape(user, encodeUserPassword) - if password != "" { - raw += ":" + urlEscape(password, encodeUserPassword) - } - return raw -} - -// A URL represents a parsed URL (technically, a URI reference). -// The general form represented is: -// scheme://[userinfo@]host/path[?query][#fragment] -// The Raw, RawAuthority, RawPath, and RawQuery fields are in "wire format" -// (special characters must be hex-escaped if not meant to have special meaning). -// All other fields are logical values; '+' or '%' represent themselves. -// -// The various Raw values are supplied in wire format because -// clients typically have to split them into pieces before further -// decoding. -type URL struct { - Raw string // the original string - Scheme string // scheme - RawAuthority string // [userinfo@]host - RawUserinfo string // userinfo - Host string // host - RawPath string // /path[?query][#fragment] - Path string // /path - OpaquePath bool // path is opaque (unrooted when scheme is present) - RawQuery string // query - Fragment string // fragment -} - -// Maybe rawurl is of the form scheme:path. -// (Scheme must be [a-zA-Z][a-zA-Z0-9+-.]*) -// If so, return scheme, path; else return "", rawurl. -func getscheme(rawurl string) (scheme, path string, err os.Error) { - for i := 0; i < len(rawurl); i++ { - c := rawurl[i] - switch { - case 'a' <= c && c <= 'z' || 'A' <= c && c <= 'Z': - // do nothing - case '0' <= c && c <= '9' || c == '+' || c == '-' || c == '.': - if i == 0 { - return "", rawurl, nil - } - case c == ':': - if i == 0 { - return "", "", os.NewError("missing protocol scheme") - } - return rawurl[0:i], rawurl[i+1:], nil - default: - // we have encountered an invalid character, - // so there is no valid scheme - return "", rawurl, nil - } - } - return "", rawurl, nil -} - -// Maybe s is of the form t c u. -// If so, return t, c u (or t, u if cutc == true). -// If not, return s, "". -func split(s string, c byte, cutc bool) (string, string) { - for i := 0; i < len(s); i++ { - if s[i] == c { - if cutc { - return s[0:i], s[i+1:] - } - return s[0:i], s[i:] - } - } - return s, "" -} - -// ParseURL parses rawurl into a URL structure. -// The string rawurl is assumed not to have a #fragment suffix. -// (Web browsers strip #fragment before sending the URL to a web server.) -// The rawurl may be relative or absolute. -func ParseURL(rawurl string) (url *URL, err os.Error) { - return parseURL(rawurl, false) -} - -// ParseRequestURL parses rawurl into a URL structure. It assumes that -// rawurl was received from an HTTP request, so the rawurl is interpreted -// only as an absolute URI or an absolute path. -// The string rawurl is assumed not to have a #fragment suffix. -// (Web browsers strip #fragment before sending the URL to a web server.) -func ParseRequestURL(rawurl string) (url *URL, err os.Error) { - return parseURL(rawurl, true) -} - -// parseURL parses a URL from a string in one of two contexts. If -// viaRequest is true, the URL is assumed to have arrived via an HTTP request, -// in which case only absolute URLs or path-absolute relative URLs are allowed. -// If viaRequest is false, all forms of relative URLs are allowed. -func parseURL(rawurl string, viaRequest bool) (url *URL, err os.Error) { - var ( - leadingSlash bool - path string - ) - - if rawurl == "" { - err = os.NewError("empty url") - goto Error - } - url = new(URL) - url.Raw = rawurl - - // Split off possible leading "http:", "mailto:", etc. - // Cannot contain escaped characters. - if url.Scheme, path, err = getscheme(rawurl); err != nil { - goto Error - } - leadingSlash = strings.HasPrefix(path, "/") - - if url.Scheme != "" && !leadingSlash { - // RFC 2396: - // Absolute URI (has scheme) with non-rooted path - // is uninterpreted. It doesn't even have a ?query. - // This is the case that handles mailto:name@example.com. - url.RawPath = path - - if url.Path, err = urlUnescape(path, encodeOpaque); err != nil { - goto Error - } - url.OpaquePath = true - } else { - if viaRequest && !leadingSlash { - err = os.NewError("invalid URI for request") - goto Error - } - - // Split off query before parsing path further. - url.RawPath = path - path, query := split(path, '?', false) - if len(query) > 1 { - url.RawQuery = query[1:] - } - - // Maybe path is //authority/path - if (url.Scheme != "" || !viaRequest) && - strings.HasPrefix(path, "//") && !strings.HasPrefix(path, "///") { - url.RawAuthority, path = split(path[2:], '/', false) - url.RawPath = url.RawPath[2+len(url.RawAuthority):] - } - - // Split authority into userinfo@host. - // If there's no @, split's default is wrong. Check explicitly. - var rawHost string - if strings.Index(url.RawAuthority, "@") < 0 { - rawHost = url.RawAuthority - } else { - url.RawUserinfo, rawHost = split(url.RawAuthority, '@', true) - } - - // We leave RawAuthority only in raw form because clients - // of common protocols should be using Userinfo and Host - // instead. Clients that wish to use RawAuthority will have to - // interpret it themselves: RFC 2396 does not define the meaning. - - if strings.Contains(rawHost, "%") { - // Host cannot contain escaped characters. - err = os.NewError("hexadecimal escape in host") - goto Error - } - url.Host = rawHost - - if url.Path, err = urlUnescape(path, encodePath); err != nil { - goto Error - } - } - return url, nil - -Error: - return nil, &URLError{"parse", rawurl, err} - -} - -// ParseURLReference is like ParseURL but allows a trailing #fragment. -func ParseURLReference(rawurlref string) (url *URL, err os.Error) { - // Cut off #frag. - rawurl, frag := split(rawurlref, '#', false) - if url, err = ParseURL(rawurl); err != nil { - return nil, err - } - url.Raw += frag - url.RawPath += frag - if len(frag) > 1 { - frag = frag[1:] - if url.Fragment, err = urlUnescape(frag, encodeFragment); err != nil { - return nil, &URLError{"parse", rawurl, err} - } - } - return url, nil -} - -// String reassembles url into a valid URL string. -// -// There are redundant fields stored in the URL structure: -// the String method consults Scheme, Path, Host, RawUserinfo, -// RawQuery, and Fragment, but not Raw, RawPath or RawAuthority. -func (url *URL) String() string { - result := "" - if url.Scheme != "" { - result += url.Scheme + ":" - } - if url.Host != "" || url.RawUserinfo != "" { - result += "//" - if url.RawUserinfo != "" { - // hide the password, if any - info := url.RawUserinfo - if i := strings.Index(info, ":"); i >= 0 { - info = info[0:i] + ":******" - } - result += info + "@" - } - result += url.Host - } - if url.OpaquePath { - path := url.Path - if strings.HasPrefix(path, "/") { - result += "%2f" - path = path[1:] - } - result += urlEscape(path, encodeOpaque) - } else { - result += urlEscape(url.Path, encodePath) - } - if url.RawQuery != "" { - result += "?" + url.RawQuery - } - if url.Fragment != "" { - result += "#" + urlEscape(url.Fragment, encodeFragment) - } - return result -} - -// Encode encodes the values into ``URL encoded'' form. -// e.g. "foo=bar&bar=baz" -func (v Values) Encode() string { - if v == nil { - return "" - } - parts := make([]string, 0, len(v)) // will be large enough for most uses - for k, vs := range v { - prefix := URLEscape(k) + "=" - for _, v := range vs { - parts = append(parts, prefix+URLEscape(v)) - } - } - return strings.Join(parts, "&") -} - -// resolvePath applies special path segments from refs and applies -// them to base, per RFC 2396. -func resolvePath(basepath string, refpath string) string { - base := strings.Split(basepath, "/") - refs := strings.Split(refpath, "/") - if len(base) == 0 { - base = []string{""} - } - for idx, ref := range refs { - switch { - case ref == ".": - base[len(base)-1] = "" - case ref == "..": - newLen := len(base) - 1 - if newLen < 1 { - newLen = 1 - } - base = base[0:newLen] - base[len(base)-1] = "" - default: - if idx == 0 || base[len(base)-1] == "" { - base[len(base)-1] = ref - } else { - base = append(base, ref) - } - } - } - return strings.Join(base, "/") -} - -// IsAbs returns true if the URL is absolute. -func (url *URL) IsAbs() bool { - return url.Scheme != "" -} - -// ParseURL parses a URL in the context of a base URL. The URL in ref -// may be relative or absolute. ParseURL returns nil, err on parse -// failure, otherwise its return value is the same as ResolveReference. -func (base *URL) ParseURL(ref string) (*URL, os.Error) { - refurl, err := ParseURL(ref) - if err != nil { - return nil, err - } - return base.ResolveReference(refurl), nil -} - -// ResolveReference resolves a URI reference to an absolute URI from -// an absolute base URI, per RFC 2396 Section 5.2. The URI reference -// may be relative or absolute. ResolveReference always returns a new -// URL instance, even if the returned URL is identical to either the -// base or reference. If ref is an absolute URL, then ResolveReference -// ignores base and returns a copy of ref. -func (base *URL) ResolveReference(ref *URL) *URL { - url := new(URL) - switch { - case ref.IsAbs(): - *url = *ref - default: - // relativeURI = ( net_path | abs_path | rel_path ) [ "?" query ] - *url = *base - if ref.RawAuthority != "" { - // The "net_path" case. - url.RawAuthority = ref.RawAuthority - url.Host = ref.Host - url.RawUserinfo = ref.RawUserinfo - } - switch { - case url.OpaquePath: - url.Path = ref.Path - url.RawPath = ref.RawPath - url.RawQuery = ref.RawQuery - case strings.HasPrefix(ref.Path, "/"): - // The "abs_path" case. - url.Path = ref.Path - url.RawPath = ref.RawPath - url.RawQuery = ref.RawQuery - default: - // The "rel_path" case. - path := resolvePath(base.Path, ref.Path) - if !strings.HasPrefix(path, "/") { - path = "/" + path - } - url.Path = path - url.RawPath = url.Path - url.RawQuery = ref.RawQuery - if ref.RawQuery != "" { - url.RawPath += "?" + url.RawQuery - } - } - - url.Fragment = ref.Fragment - } - url.Raw = url.String() - return url -} - -// Query parses RawQuery and returns the corresponding values. -func (u *URL) Query() Values { - v, _ := ParseQuery(u.RawQuery) - return v -} |