summaryrefslogtreecommitdiff
path: root/src/net/url/url.go
diff options
context:
space:
mode:
Diffstat (limited to 'src/net/url/url.go')
-rw-r--r--src/net/url/url.go719
1 files changed, 719 insertions, 0 deletions
diff --git a/src/net/url/url.go b/src/net/url/url.go
new file mode 100644
index 000000000..f167408fa
--- /dev/null
+++ b/src/net/url/url.go
@@ -0,0 +1,719 @@
+// Copyright 2009 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Package url parses URLs and implements query escaping.
+// See RFC 3986.
+package url
+
+import (
+ "bytes"
+ "errors"
+ "sort"
+ "strconv"
+ "strings"
+)
+
+// Error reports an error and the operation and URL that caused it.
+type Error struct {
+ Op string
+ URL string
+ Err error
+}
+
+func (e *Error) Error() string { return e.Op + " " + e.URL + ": " + e.Err.Error() }
+
+func ishex(c byte) bool {
+ switch {
+ case '0' <= c && c <= '9':
+ return true
+ case 'a' <= c && c <= 'f':
+ return true
+ case 'A' <= c && c <= 'F':
+ return true
+ }
+ return false
+}
+
+func unhex(c byte) byte {
+ switch {
+ case '0' <= c && c <= '9':
+ return c - '0'
+ case 'a' <= c && c <= 'f':
+ return c - 'a' + 10
+ case 'A' <= c && c <= 'F':
+ return c - 'A' + 10
+ }
+ return 0
+}
+
+type encoding int
+
+const (
+ encodePath encoding = 1 + iota
+ encodeUserPassword
+ encodeQueryComponent
+ encodeFragment
+)
+
+type EscapeError string
+
+func (e EscapeError) Error() string {
+ return "invalid URL escape " + strconv.Quote(string(e))
+}
+
+// Return true if the specified character should be escaped when
+// appearing in a URL string, according to RFC 3986.
+func shouldEscape(c byte, mode encoding) bool {
+ // §2.3 Unreserved characters (alphanum)
+ if 'A' <= c && c <= 'Z' || 'a' <= c && c <= 'z' || '0' <= c && c <= '9' {
+ return false
+ }
+
+ switch c {
+ case '-', '_', '.', '~': // §2.3 Unreserved characters (mark)
+ return false
+
+ case '$', '&', '+', ',', '/', ':', ';', '=', '?', '@': // §2.2 Reserved characters (reserved)
+ // Different sections of the URL allow a few of
+ // the reserved characters to appear unescaped.
+ switch mode {
+ case encodePath: // §3.3
+ // The RFC allows : @ & = + $ but saves / ; , for assigning
+ // meaning to individual path segments. This package
+ // only manipulates the path as a whole, so we allow those
+ // last two as well. That leaves only ? to escape.
+ return c == '?'
+
+ case encodeUserPassword: // §3.2.1
+ // The RFC allows ';', ':', '&', '=', '+', '$', and ',' in
+ // userinfo, so we must escape only '@', '/', and '?'.
+ // The parsing of userinfo treats ':' as special so we must escape
+ // that too.
+ return c == '@' || c == '/' || c == '?' || c == ':'
+
+ case encodeQueryComponent: // §3.4
+ // The RFC reserves (so we must escape) everything.
+ return true
+
+ case encodeFragment: // §4.1
+ // The RFC text is silent but the grammar allows
+ // everything, so escape nothing.
+ return false
+ }
+ }
+
+ // Everything else must be escaped.
+ return true
+}
+
+// QueryUnescape does the inverse transformation of QueryEscape, converting
+// %AB into the byte 0xAB and '+' into ' ' (space). It returns an error if
+// any % is not followed by two hexadecimal digits.
+func QueryUnescape(s string) (string, error) {
+ return unescape(s, encodeQueryComponent)
+}
+
+// unescape unescapes a string; the mode specifies
+// which section of the URL string is being unescaped.
+func unescape(s string, mode encoding) (string, error) {
+ // Count %, check that they're well-formed.
+ n := 0
+ hasPlus := false
+ for i := 0; i < len(s); {
+ switch s[i] {
+ case '%':
+ n++
+ if i+2 >= len(s) || !ishex(s[i+1]) || !ishex(s[i+2]) {
+ s = s[i:]
+ if len(s) > 3 {
+ s = s[0:3]
+ }
+ return "", EscapeError(s)
+ }
+ i += 3
+ case '+':
+ hasPlus = mode == encodeQueryComponent
+ i++
+ default:
+ i++
+ }
+ }
+
+ if n == 0 && !hasPlus {
+ return s, nil
+ }
+
+ t := make([]byte, len(s)-2*n)
+ j := 0
+ for i := 0; i < len(s); {
+ switch s[i] {
+ case '%':
+ t[j] = unhex(s[i+1])<<4 | unhex(s[i+2])
+ j++
+ i += 3
+ case '+':
+ if mode == encodeQueryComponent {
+ t[j] = ' '
+ } else {
+ t[j] = '+'
+ }
+ j++
+ i++
+ default:
+ t[j] = s[i]
+ j++
+ i++
+ }
+ }
+ return string(t), nil
+}
+
+// QueryEscape escapes the string so it can be safely placed
+// inside a URL query.
+func QueryEscape(s string) string {
+ return escape(s, encodeQueryComponent)
+}
+
+func escape(s string, mode encoding) string {
+ spaceCount, hexCount := 0, 0
+ for i := 0; i < len(s); i++ {
+ c := s[i]
+ if shouldEscape(c, mode) {
+ if c == ' ' && mode == encodeQueryComponent {
+ spaceCount++
+ } else {
+ hexCount++
+ }
+ }
+ }
+
+ if spaceCount == 0 && hexCount == 0 {
+ return s
+ }
+
+ t := make([]byte, len(s)+2*hexCount)
+ j := 0
+ for i := 0; i < len(s); i++ {
+ switch c := s[i]; {
+ case c == ' ' && mode == encodeQueryComponent:
+ t[j] = '+'
+ j++
+ case shouldEscape(c, mode):
+ t[j] = '%'
+ t[j+1] = "0123456789ABCDEF"[c>>4]
+ t[j+2] = "0123456789ABCDEF"[c&15]
+ j += 3
+ default:
+ t[j] = s[i]
+ j++
+ }
+ }
+ return string(t)
+}
+
+// A URL represents a parsed URL (technically, a URI reference).
+// The general form represented is:
+//
+// scheme://[userinfo@]host/path[?query][#fragment]
+//
+// URLs that do not start with a slash after the scheme are interpreted as:
+//
+// scheme:opaque[?query][#fragment]
+//
+// Note that the Path field is stored in decoded form: /%47%6f%2f becomes /Go/.
+// A consequence is that it is impossible to tell which slashes in the Path were
+// slashes in the raw URL and which were %2f. This distinction is rarely important,
+// but when it is a client must use other routines to parse the raw URL or construct
+// the parsed URL. For example, an HTTP server can consult req.RequestURI, and
+// an HTTP client can use URL{Host: "example.com", Opaque: "//example.com/Go%2f"}
+// instead of URL{Host: "example.com", Path: "/Go/"}.
+type URL struct {
+ Scheme string
+ Opaque string // encoded opaque data
+ User *Userinfo // username and password information
+ Host string // host or host:port
+ Path string
+ RawQuery string // encoded query values, without '?'
+ Fragment string // fragment for references, without '#'
+}
+
+// User returns a Userinfo containing the provided username
+// and no password set.
+func User(username string) *Userinfo {
+ return &Userinfo{username, "", false}
+}
+
+// UserPassword returns a Userinfo containing the provided username
+// and password.
+// This functionality should only be used with legacy web sites.
+// RFC 2396 warns that interpreting Userinfo this way
+// ``is NOT RECOMMENDED, because the passing of authentication
+// information in clear text (such as URI) has proven to be a
+// security risk in almost every case where it has been used.''
+func UserPassword(username, password string) *Userinfo {
+ return &Userinfo{username, password, true}
+}
+
+// The Userinfo type is an immutable encapsulation of username and
+// password details for a URL. An existing Userinfo value is guaranteed
+// to have a username set (potentially empty, as allowed by RFC 2396),
+// and optionally a password.
+type Userinfo struct {
+ username string
+ password string
+ passwordSet bool
+}
+
+// Username returns the username.
+func (u *Userinfo) Username() string {
+ return u.username
+}
+
+// Password returns the password in case it is set, and whether it is set.
+func (u *Userinfo) Password() (string, bool) {
+ if u.passwordSet {
+ return u.password, true
+ }
+ return "", false
+}
+
+// String returns the encoded userinfo information in the standard form
+// of "username[:password]".
+func (u *Userinfo) String() string {
+ s := escape(u.username, encodeUserPassword)
+ if u.passwordSet {
+ s += ":" + escape(u.password, encodeUserPassword)
+ }
+ return s
+}
+
+// Maybe rawurl is of the form scheme:path.
+// (Scheme must be [a-zA-Z][a-zA-Z0-9+-.]*)
+// If so, return scheme, path; else return "", rawurl.
+func getscheme(rawurl string) (scheme, path string, err error) {
+ for i := 0; i < len(rawurl); i++ {
+ c := rawurl[i]
+ switch {
+ case 'a' <= c && c <= 'z' || 'A' <= c && c <= 'Z':
+ // do nothing
+ case '0' <= c && c <= '9' || c == '+' || c == '-' || c == '.':
+ if i == 0 {
+ return "", rawurl, nil
+ }
+ case c == ':':
+ if i == 0 {
+ return "", "", errors.New("missing protocol scheme")
+ }
+ return rawurl[0:i], rawurl[i+1:], nil
+ default:
+ // we have encountered an invalid character,
+ // so there is no valid scheme
+ return "", rawurl, nil
+ }
+ }
+ return "", rawurl, nil
+}
+
+// Maybe s is of the form t c u.
+// If so, return t, c u (or t, u if cutc == true).
+// If not, return s, "".
+func split(s string, c string, cutc bool) (string, string) {
+ i := strings.Index(s, c)
+ if i < 0 {
+ return s, ""
+ }
+ if cutc {
+ return s[0:i], s[i+len(c):]
+ }
+ return s[0:i], s[i:]
+}
+
+// Parse parses rawurl into a URL structure.
+// The rawurl may be relative or absolute.
+func Parse(rawurl string) (url *URL, err error) {
+ // Cut off #frag
+ u, frag := split(rawurl, "#", true)
+ if url, err = parse(u, false); err != nil {
+ return nil, err
+ }
+ if frag == "" {
+ return url, nil
+ }
+ if url.Fragment, err = unescape(frag, encodeFragment); err != nil {
+ return nil, &Error{"parse", rawurl, err}
+ }
+ return url, nil
+}
+
+// ParseRequestURI parses rawurl into a URL structure. It assumes that
+// rawurl was received in an HTTP request, so the rawurl is interpreted
+// only as an absolute URI or an absolute path.
+// The string rawurl is assumed not to have a #fragment suffix.
+// (Web browsers strip #fragment before sending the URL to a web server.)
+func ParseRequestURI(rawurl string) (url *URL, err error) {
+ return parse(rawurl, true)
+}
+
+// parse parses a URL from a string in one of two contexts. If
+// viaRequest is true, the URL is assumed to have arrived via an HTTP request,
+// in which case only absolute URLs or path-absolute relative URLs are allowed.
+// If viaRequest is false, all forms of relative URLs are allowed.
+func parse(rawurl string, viaRequest bool) (url *URL, err error) {
+ var rest string
+
+ if rawurl == "" && viaRequest {
+ err = errors.New("empty url")
+ goto Error
+ }
+ url = new(URL)
+
+ if rawurl == "*" {
+ url.Path = "*"
+ return
+ }
+
+ // Split off possible leading "http:", "mailto:", etc.
+ // Cannot contain escaped characters.
+ if url.Scheme, rest, err = getscheme(rawurl); err != nil {
+ goto Error
+ }
+ url.Scheme = strings.ToLower(url.Scheme)
+
+ rest, url.RawQuery = split(rest, "?", true)
+
+ if !strings.HasPrefix(rest, "/") {
+ if url.Scheme != "" {
+ // We consider rootless paths per RFC 3986 as opaque.
+ url.Opaque = rest
+ return url, nil
+ }
+ if viaRequest {
+ err = errors.New("invalid URI for request")
+ goto Error
+ }
+ }
+
+ if (url.Scheme != "" || !viaRequest && !strings.HasPrefix(rest, "///")) && strings.HasPrefix(rest, "//") {
+ var authority string
+ authority, rest = split(rest[2:], "/", false)
+ url.User, url.Host, err = parseAuthority(authority)
+ if err != nil {
+ goto Error
+ }
+ if strings.Contains(url.Host, "%") {
+ err = errors.New("hexadecimal escape in host")
+ goto Error
+ }
+ }
+ if url.Path, err = unescape(rest, encodePath); err != nil {
+ goto Error
+ }
+ return url, nil
+
+Error:
+ return nil, &Error{"parse", rawurl, err}
+}
+
+func parseAuthority(authority string) (user *Userinfo, host string, err error) {
+ i := strings.LastIndex(authority, "@")
+ if i < 0 {
+ host = authority
+ return
+ }
+ userinfo, host := authority[:i], authority[i+1:]
+ if strings.Index(userinfo, ":") < 0 {
+ if userinfo, err = unescape(userinfo, encodeUserPassword); err != nil {
+ return
+ }
+ user = User(userinfo)
+ } else {
+ username, password := split(userinfo, ":", true)
+ if username, err = unescape(username, encodeUserPassword); err != nil {
+ return
+ }
+ if password, err = unescape(password, encodeUserPassword); err != nil {
+ return
+ }
+ user = UserPassword(username, password)
+ }
+ return
+}
+
+// String reassembles the URL into a valid URL string.
+// The general form of the result is one of:
+//
+// scheme:opaque
+// scheme://userinfo@host/path?query#fragment
+//
+// If u.Opaque is non-empty, String uses the first form;
+// otherwise it uses the second form.
+//
+// In the second form, the following rules apply:
+// - if u.Scheme is empty, scheme: is omitted.
+// - if u.User is nil, userinfo@ is omitted.
+// - if u.Host is empty, host/ is omitted.
+// - if u.Scheme and u.Host are empty and u.User is nil,
+// the entire scheme://userinfo@host/ is omitted.
+// - if u.Host is non-empty and u.Path begins with a /,
+// the form host/path does not add its own /.
+// - if u.RawQuery is empty, ?query is omitted.
+// - if u.Fragment is empty, #fragment is omitted.
+func (u *URL) String() string {
+ var buf bytes.Buffer
+ if u.Scheme != "" {
+ buf.WriteString(u.Scheme)
+ buf.WriteByte(':')
+ }
+ if u.Opaque != "" {
+ buf.WriteString(u.Opaque)
+ } else {
+ if u.Scheme != "" || u.Host != "" || u.User != nil {
+ buf.WriteString("//")
+ if ui := u.User; ui != nil {
+ buf.WriteString(ui.String())
+ buf.WriteByte('@')
+ }
+ if h := u.Host; h != "" {
+ buf.WriteString(h)
+ }
+ }
+ if u.Path != "" && u.Path[0] != '/' && u.Host != "" {
+ buf.WriteByte('/')
+ }
+ buf.WriteString(escape(u.Path, encodePath))
+ }
+ if u.RawQuery != "" {
+ buf.WriteByte('?')
+ buf.WriteString(u.RawQuery)
+ }
+ if u.Fragment != "" {
+ buf.WriteByte('#')
+ buf.WriteString(escape(u.Fragment, encodeFragment))
+ }
+ return buf.String()
+}
+
+// Values maps a string key to a list of values.
+// It is typically used for query parameters and form values.
+// Unlike in the http.Header map, the keys in a Values map
+// are case-sensitive.
+type Values map[string][]string
+
+// Get gets the first value associated with the given key.
+// If there are no values associated with the key, Get returns
+// the empty string. To access multiple values, use the map
+// directly.
+func (v Values) Get(key string) string {
+ if v == nil {
+ return ""
+ }
+ vs, ok := v[key]
+ if !ok || len(vs) == 0 {
+ return ""
+ }
+ return vs[0]
+}
+
+// Set sets the key to value. It replaces any existing
+// values.
+func (v Values) Set(key, value string) {
+ v[key] = []string{value}
+}
+
+// Add adds the value to key. It appends to any existing
+// values associated with key.
+func (v Values) Add(key, value string) {
+ v[key] = append(v[key], value)
+}
+
+// Del deletes the values associated with key.
+func (v Values) Del(key string) {
+ delete(v, key)
+}
+
+// ParseQuery parses the URL-encoded query string and returns
+// a map listing the values specified for each key.
+// ParseQuery always returns a non-nil map containing all the
+// valid query parameters found; err describes the first decoding error
+// encountered, if any.
+func ParseQuery(query string) (m Values, err error) {
+ m = make(Values)
+ err = parseQuery(m, query)
+ return
+}
+
+func parseQuery(m Values, query string) (err error) {
+ for query != "" {
+ key := query
+ if i := strings.IndexAny(key, "&;"); i >= 0 {
+ key, query = key[:i], key[i+1:]
+ } else {
+ query = ""
+ }
+ if key == "" {
+ continue
+ }
+ value := ""
+ if i := strings.Index(key, "="); i >= 0 {
+ key, value = key[:i], key[i+1:]
+ }
+ key, err1 := QueryUnescape(key)
+ if err1 != nil {
+ if err == nil {
+ err = err1
+ }
+ continue
+ }
+ value, err1 = QueryUnescape(value)
+ if err1 != nil {
+ if err == nil {
+ err = err1
+ }
+ continue
+ }
+ m[key] = append(m[key], value)
+ }
+ return err
+}
+
+// Encode encodes the values into ``URL encoded'' form
+// ("bar=baz&foo=quux") sorted by key.
+func (v Values) Encode() string {
+ if v == nil {
+ return ""
+ }
+ var buf bytes.Buffer
+ keys := make([]string, 0, len(v))
+ for k := range v {
+ keys = append(keys, k)
+ }
+ sort.Strings(keys)
+ for _, k := range keys {
+ vs := v[k]
+ prefix := QueryEscape(k) + "="
+ for _, v := range vs {
+ if buf.Len() > 0 {
+ buf.WriteByte('&')
+ }
+ buf.WriteString(prefix)
+ buf.WriteString(QueryEscape(v))
+ }
+ }
+ return buf.String()
+}
+
+// resolvePath applies special path segments from refs and applies
+// them to base, per RFC 3986.
+func resolvePath(base, ref string) string {
+ var full string
+ if ref == "" {
+ full = base
+ } else if ref[0] != '/' {
+ i := strings.LastIndex(base, "/")
+ full = base[:i+1] + ref
+ } else {
+ full = ref
+ }
+ if full == "" {
+ return ""
+ }
+ var dst []string
+ src := strings.Split(full, "/")
+ for _, elem := range src {
+ switch elem {
+ case ".":
+ // drop
+ case "..":
+ if len(dst) > 0 {
+ dst = dst[:len(dst)-1]
+ }
+ default:
+ dst = append(dst, elem)
+ }
+ }
+ if last := src[len(src)-1]; last == "." || last == ".." {
+ // Add final slash to the joined path.
+ dst = append(dst, "")
+ }
+ return "/" + strings.TrimLeft(strings.Join(dst, "/"), "/")
+}
+
+// IsAbs returns true if the URL is absolute.
+func (u *URL) IsAbs() bool {
+ return u.Scheme != ""
+}
+
+// Parse parses a URL in the context of the receiver. The provided URL
+// may be relative or absolute. Parse returns nil, err on parse
+// failure, otherwise its return value is the same as ResolveReference.
+func (u *URL) Parse(ref string) (*URL, error) {
+ refurl, err := Parse(ref)
+ if err != nil {
+ return nil, err
+ }
+ return u.ResolveReference(refurl), nil
+}
+
+// ResolveReference resolves a URI reference to an absolute URI from
+// an absolute base URI, per RFC 3986 Section 5.2. The URI reference
+// may be relative or absolute. ResolveReference always returns a new
+// URL instance, even if the returned URL is identical to either the
+// base or reference. If ref is an absolute URL, then ResolveReference
+// ignores base and returns a copy of ref.
+func (u *URL) ResolveReference(ref *URL) *URL {
+ url := *ref
+ if ref.Scheme == "" {
+ url.Scheme = u.Scheme
+ }
+ if ref.Scheme != "" || ref.Host != "" || ref.User != nil {
+ // The "absoluteURI" or "net_path" cases.
+ url.Path = resolvePath(ref.Path, "")
+ return &url
+ }
+ if ref.Opaque != "" {
+ url.User = nil
+ url.Host = ""
+ url.Path = ""
+ return &url
+ }
+ if ref.Path == "" {
+ if ref.RawQuery == "" {
+ url.RawQuery = u.RawQuery
+ if ref.Fragment == "" {
+ url.Fragment = u.Fragment
+ }
+ }
+ }
+ // The "abs_path" or "rel_path" cases.
+ url.Host = u.Host
+ url.User = u.User
+ url.Path = resolvePath(u.Path, ref.Path)
+ return &url
+}
+
+// Query parses RawQuery and returns the corresponding values.
+func (u *URL) Query() Values {
+ v, _ := ParseQuery(u.RawQuery)
+ return v
+}
+
+// RequestURI returns the encoded path?query or opaque?query
+// string that would be used in an HTTP request for u.
+func (u *URL) RequestURI() string {
+ result := u.Opaque
+ if result == "" {
+ result = escape(u.Path, encodePath)
+ if result == "" {
+ result = "/"
+ }
+ } else {
+ if strings.HasPrefix(result, "//") {
+ result = u.Scheme + ":" + result
+ }
+ }
+ if u.RawQuery != "" {
+ result += "?" + u.RawQuery
+ }
+ return result
+}