summaryrefslogtreecommitdiff
path: root/src/pkg/http/url.go
diff options
context:
space:
mode:
Diffstat (limited to 'src/pkg/http/url.go')
-rw-r--r--src/pkg/http/url.go608
1 files changed, 0 insertions, 608 deletions
diff --git a/src/pkg/http/url.go b/src/pkg/http/url.go
deleted file mode 100644
index e934b27c4..000000000
--- a/src/pkg/http/url.go
+++ /dev/null
@@ -1,608 +0,0 @@
-// Copyright 2009 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-// Parse URLs (actually URIs, but that seems overly pedantic).
-// RFC 3986
-
-package http
-
-import (
- "os"
- "strconv"
- "strings"
-)
-
-// URLError reports an error and the operation and URL that caused it.
-type URLError struct {
- Op string
- URL string
- Error os.Error
-}
-
-func (e *URLError) String() string { return e.Op + " " + e.URL + ": " + e.Error.String() }
-
-func ishex(c byte) bool {
- switch {
- case '0' <= c && c <= '9':
- return true
- case 'a' <= c && c <= 'f':
- return true
- case 'A' <= c && c <= 'F':
- return true
- }
- return false
-}
-
-func unhex(c byte) byte {
- switch {
- case '0' <= c && c <= '9':
- return c - '0'
- case 'a' <= c && c <= 'f':
- return c - 'a' + 10
- case 'A' <= c && c <= 'F':
- return c - 'A' + 10
- }
- return 0
-}
-
-type encoding int
-
-const (
- encodePath encoding = 1 + iota
- encodeUserPassword
- encodeQueryComponent
- encodeFragment
- encodeOpaque
-)
-
-
-type URLEscapeError string
-
-func (e URLEscapeError) String() string {
- return "invalid URL escape " + strconv.Quote(string(e))
-}
-
-// Return true if the specified character should be escaped when
-// appearing in a URL string, according to RFC 2396.
-// When 'all' is true the full range of reserved characters are matched.
-func shouldEscape(c byte, mode encoding) bool {
- // RFC 2396 §2.3 Unreserved characters (alphanum)
- if 'A' <= c && c <= 'Z' || 'a' <= c && c <= 'z' || '0' <= c && c <= '9' {
- return false
- }
- switch c {
- case '-', '_', '.', '!', '~', '*', '\'', '(', ')': // §2.3 Unreserved characters (mark)
- return false
-
- case '$', '&', '+', ',', '/', ':', ';', '=', '?', '@': // §2.2 Reserved characters (reserved)
- // Different sections of the URL allow a few of
- // the reserved characters to appear unescaped.
- switch mode {
- case encodePath: // §3.3
- // The RFC allows : @ & = + $ , but saves / ; for assigning
- // meaning to individual path segments. This package
- // only manipulates the path as a whole, so we allow those
- // last two as well. Clients that need to distinguish between
- // `/foo;y=z/bar` and `/foo%3by=z/bar` will have to re-decode RawPath.
- // That leaves only ? to escape.
- return c == '?'
-
- case encodeUserPassword: // §3.2.2
- // The RFC allows ; : & = + $ , in userinfo, so we must escape only @ and /.
- // The parsing of userinfo treats : as special so we must escape that too.
- return c == '@' || c == '/' || c == ':'
-
- case encodeQueryComponent: // §3.4
- // The RFC reserves (so we must escape) everything.
- return true
-
- case encodeFragment: // §4.1
- // The RFC text is silent but the grammar allows
- // everything, so escape nothing.
- return false
-
- case encodeOpaque: // §3 opaque_part
- // The RFC allows opaque_part to use all characters
- // except that the leading / must be escaped.
- // (We implement that case in String.)
- return false
- }
- }
-
- // Everything else must be escaped.
- return true
-}
-
-
-// URLUnescape unescapes a string in ``URL encoded'' form,
-// converting %AB into the byte 0xAB and '+' into ' ' (space).
-// It returns an error if any % is not followed
-// by two hexadecimal digits.
-// Despite the name, this encoding applies only to individual
-// components of the query portion of the URL.
-func URLUnescape(s string) (string, os.Error) {
- return urlUnescape(s, encodeQueryComponent)
-}
-
-// urlUnescape is like URLUnescape but mode specifies
-// which section of the URL is being unescaped.
-func urlUnescape(s string, mode encoding) (string, os.Error) {
- // Count %, check that they're well-formed.
- n := 0
- hasPlus := false
- for i := 0; i < len(s); {
- switch s[i] {
- case '%':
- n++
- if i+2 >= len(s) || !ishex(s[i+1]) || !ishex(s[i+2]) {
- s = s[i:]
- if len(s) > 3 {
- s = s[0:3]
- }
- return "", URLEscapeError(s)
- }
- i += 3
- case '+':
- hasPlus = mode == encodeQueryComponent
- i++
- default:
- i++
- }
- }
-
- if n == 0 && !hasPlus {
- return s, nil
- }
-
- t := make([]byte, len(s)-2*n)
- j := 0
- for i := 0; i < len(s); {
- switch s[i] {
- case '%':
- t[j] = unhex(s[i+1])<<4 | unhex(s[i+2])
- j++
- i += 3
- case '+':
- if mode == encodeQueryComponent {
- t[j] = ' '
- } else {
- t[j] = '+'
- }
- j++
- i++
- default:
- t[j] = s[i]
- j++
- i++
- }
- }
- return string(t), nil
-}
-
-// URLEscape converts a string into ``URL encoded'' form.
-// Despite the name, this encoding applies only to individual
-// components of the query portion of the URL.
-func URLEscape(s string) string {
- return urlEscape(s, encodeQueryComponent)
-}
-
-func urlEscape(s string, mode encoding) string {
- spaceCount, hexCount := 0, 0
- for i := 0; i < len(s); i++ {
- c := s[i]
- if shouldEscape(c, mode) {
- if c == ' ' && mode == encodeQueryComponent {
- spaceCount++
- } else {
- hexCount++
- }
- }
- }
-
- if spaceCount == 0 && hexCount == 0 {
- return s
- }
-
- t := make([]byte, len(s)+2*hexCount)
- j := 0
- for i := 0; i < len(s); i++ {
- switch c := s[i]; {
- case c == ' ' && mode == encodeQueryComponent:
- t[j] = '+'
- j++
- case shouldEscape(c, mode):
- t[j] = '%'
- t[j+1] = "0123456789ABCDEF"[c>>4]
- t[j+2] = "0123456789ABCDEF"[c&15]
- j += 3
- default:
- t[j] = s[i]
- j++
- }
- }
- return string(t)
-}
-
-// UnescapeUserinfo parses the RawUserinfo field of a URL
-// as the form user or user:password and unescapes and returns
-// the two halves.
-//
-// This functionality should only be used with legacy web sites.
-// RFC 2396 warns that interpreting Userinfo this way
-// ``is NOT RECOMMENDED, because the passing of authentication
-// information in clear text (such as URI) has proven to be a
-// security risk in almost every case where it has been used.''
-func UnescapeUserinfo(rawUserinfo string) (user, password string, err os.Error) {
- u, p := split(rawUserinfo, ':', true)
- if user, err = urlUnescape(u, encodeUserPassword); err != nil {
- return "", "", err
- }
- if password, err = urlUnescape(p, encodeUserPassword); err != nil {
- return "", "", err
- }
- return
-}
-
-// EscapeUserinfo combines user and password in the form
-// user:password (or just user if password is empty) and then
-// escapes it for use as the URL.RawUserinfo field.
-//
-// This functionality should only be used with legacy web sites.
-// RFC 2396 warns that interpreting Userinfo this way
-// ``is NOT RECOMMENDED, because the passing of authentication
-// information in clear text (such as URI) has proven to be a
-// security risk in almost every case where it has been used.''
-func EscapeUserinfo(user, password string) string {
- raw := urlEscape(user, encodeUserPassword)
- if password != "" {
- raw += ":" + urlEscape(password, encodeUserPassword)
- }
- return raw
-}
-
-// A URL represents a parsed URL (technically, a URI reference).
-// The general form represented is:
-// scheme://[userinfo@]host/path[?query][#fragment]
-// The Raw, RawAuthority, RawPath, and RawQuery fields are in "wire format"
-// (special characters must be hex-escaped if not meant to have special meaning).
-// All other fields are logical values; '+' or '%' represent themselves.
-//
-// The various Raw values are supplied in wire format because
-// clients typically have to split them into pieces before further
-// decoding.
-type URL struct {
- Raw string // the original string
- Scheme string // scheme
- RawAuthority string // [userinfo@]host
- RawUserinfo string // userinfo
- Host string // host
- RawPath string // /path[?query][#fragment]
- Path string // /path
- OpaquePath bool // path is opaque (unrooted when scheme is present)
- RawQuery string // query
- Fragment string // fragment
-}
-
-// Maybe rawurl is of the form scheme:path.
-// (Scheme must be [a-zA-Z][a-zA-Z0-9+-.]*)
-// If so, return scheme, path; else return "", rawurl.
-func getscheme(rawurl string) (scheme, path string, err os.Error) {
- for i := 0; i < len(rawurl); i++ {
- c := rawurl[i]
- switch {
- case 'a' <= c && c <= 'z' || 'A' <= c && c <= 'Z':
- // do nothing
- case '0' <= c && c <= '9' || c == '+' || c == '-' || c == '.':
- if i == 0 {
- return "", rawurl, nil
- }
- case c == ':':
- if i == 0 {
- return "", "", os.NewError("missing protocol scheme")
- }
- return rawurl[0:i], rawurl[i+1:], nil
- default:
- // we have encountered an invalid character,
- // so there is no valid scheme
- return "", rawurl, nil
- }
- }
- return "", rawurl, nil
-}
-
-// Maybe s is of the form t c u.
-// If so, return t, c u (or t, u if cutc == true).
-// If not, return s, "".
-func split(s string, c byte, cutc bool) (string, string) {
- for i := 0; i < len(s); i++ {
- if s[i] == c {
- if cutc {
- return s[0:i], s[i+1:]
- }
- return s[0:i], s[i:]
- }
- }
- return s, ""
-}
-
-// ParseURL parses rawurl into a URL structure.
-// The string rawurl is assumed not to have a #fragment suffix.
-// (Web browsers strip #fragment before sending the URL to a web server.)
-// The rawurl may be relative or absolute.
-func ParseURL(rawurl string) (url *URL, err os.Error) {
- return parseURL(rawurl, false)
-}
-
-// ParseRequestURL parses rawurl into a URL structure. It assumes that
-// rawurl was received from an HTTP request, so the rawurl is interpreted
-// only as an absolute URI or an absolute path.
-// The string rawurl is assumed not to have a #fragment suffix.
-// (Web browsers strip #fragment before sending the URL to a web server.)
-func ParseRequestURL(rawurl string) (url *URL, err os.Error) {
- return parseURL(rawurl, true)
-}
-
-// parseURL parses a URL from a string in one of two contexts. If
-// viaRequest is true, the URL is assumed to have arrived via an HTTP request,
-// in which case only absolute URLs or path-absolute relative URLs are allowed.
-// If viaRequest is false, all forms of relative URLs are allowed.
-func parseURL(rawurl string, viaRequest bool) (url *URL, err os.Error) {
- var (
- leadingSlash bool
- path string
- )
-
- if rawurl == "" {
- err = os.NewError("empty url")
- goto Error
- }
- url = new(URL)
- url.Raw = rawurl
-
- // Split off possible leading "http:", "mailto:", etc.
- // Cannot contain escaped characters.
- if url.Scheme, path, err = getscheme(rawurl); err != nil {
- goto Error
- }
- leadingSlash = strings.HasPrefix(path, "/")
-
- if url.Scheme != "" && !leadingSlash {
- // RFC 2396:
- // Absolute URI (has scheme) with non-rooted path
- // is uninterpreted. It doesn't even have a ?query.
- // This is the case that handles mailto:name@example.com.
- url.RawPath = path
-
- if url.Path, err = urlUnescape(path, encodeOpaque); err != nil {
- goto Error
- }
- url.OpaquePath = true
- } else {
- if viaRequest && !leadingSlash {
- err = os.NewError("invalid URI for request")
- goto Error
- }
-
- // Split off query before parsing path further.
- url.RawPath = path
- path, query := split(path, '?', false)
- if len(query) > 1 {
- url.RawQuery = query[1:]
- }
-
- // Maybe path is //authority/path
- if (url.Scheme != "" || !viaRequest) &&
- strings.HasPrefix(path, "//") && !strings.HasPrefix(path, "///") {
- url.RawAuthority, path = split(path[2:], '/', false)
- url.RawPath = url.RawPath[2+len(url.RawAuthority):]
- }
-
- // Split authority into userinfo@host.
- // If there's no @, split's default is wrong. Check explicitly.
- var rawHost string
- if strings.Index(url.RawAuthority, "@") < 0 {
- rawHost = url.RawAuthority
- } else {
- url.RawUserinfo, rawHost = split(url.RawAuthority, '@', true)
- }
-
- // We leave RawAuthority only in raw form because clients
- // of common protocols should be using Userinfo and Host
- // instead. Clients that wish to use RawAuthority will have to
- // interpret it themselves: RFC 2396 does not define the meaning.
-
- if strings.Contains(rawHost, "%") {
- // Host cannot contain escaped characters.
- err = os.NewError("hexadecimal escape in host")
- goto Error
- }
- url.Host = rawHost
-
- if url.Path, err = urlUnescape(path, encodePath); err != nil {
- goto Error
- }
- }
- return url, nil
-
-Error:
- return nil, &URLError{"parse", rawurl, err}
-
-}
-
-// ParseURLReference is like ParseURL but allows a trailing #fragment.
-func ParseURLReference(rawurlref string) (url *URL, err os.Error) {
- // Cut off #frag.
- rawurl, frag := split(rawurlref, '#', false)
- if url, err = ParseURL(rawurl); err != nil {
- return nil, err
- }
- url.Raw += frag
- url.RawPath += frag
- if len(frag) > 1 {
- frag = frag[1:]
- if url.Fragment, err = urlUnescape(frag, encodeFragment); err != nil {
- return nil, &URLError{"parse", rawurl, err}
- }
- }
- return url, nil
-}
-
-// String reassembles url into a valid URL string.
-//
-// There are redundant fields stored in the URL structure:
-// the String method consults Scheme, Path, Host, RawUserinfo,
-// RawQuery, and Fragment, but not Raw, RawPath or RawAuthority.
-func (url *URL) String() string {
- result := ""
- if url.Scheme != "" {
- result += url.Scheme + ":"
- }
- if url.Host != "" || url.RawUserinfo != "" {
- result += "//"
- if url.RawUserinfo != "" {
- // hide the password, if any
- info := url.RawUserinfo
- if i := strings.Index(info, ":"); i >= 0 {
- info = info[0:i] + ":******"
- }
- result += info + "@"
- }
- result += url.Host
- }
- if url.OpaquePath {
- path := url.Path
- if strings.HasPrefix(path, "/") {
- result += "%2f"
- path = path[1:]
- }
- result += urlEscape(path, encodeOpaque)
- } else {
- result += urlEscape(url.Path, encodePath)
- }
- if url.RawQuery != "" {
- result += "?" + url.RawQuery
- }
- if url.Fragment != "" {
- result += "#" + urlEscape(url.Fragment, encodeFragment)
- }
- return result
-}
-
-// Encode encodes the values into ``URL encoded'' form.
-// e.g. "foo=bar&bar=baz"
-func (v Values) Encode() string {
- if v == nil {
- return ""
- }
- parts := make([]string, 0, len(v)) // will be large enough for most uses
- for k, vs := range v {
- prefix := URLEscape(k) + "="
- for _, v := range vs {
- parts = append(parts, prefix+URLEscape(v))
- }
- }
- return strings.Join(parts, "&")
-}
-
-// resolvePath applies special path segments from refs and applies
-// them to base, per RFC 2396.
-func resolvePath(basepath string, refpath string) string {
- base := strings.Split(basepath, "/")
- refs := strings.Split(refpath, "/")
- if len(base) == 0 {
- base = []string{""}
- }
- for idx, ref := range refs {
- switch {
- case ref == ".":
- base[len(base)-1] = ""
- case ref == "..":
- newLen := len(base) - 1
- if newLen < 1 {
- newLen = 1
- }
- base = base[0:newLen]
- base[len(base)-1] = ""
- default:
- if idx == 0 || base[len(base)-1] == "" {
- base[len(base)-1] = ref
- } else {
- base = append(base, ref)
- }
- }
- }
- return strings.Join(base, "/")
-}
-
-// IsAbs returns true if the URL is absolute.
-func (url *URL) IsAbs() bool {
- return url.Scheme != ""
-}
-
-// ParseURL parses a URL in the context of a base URL. The URL in ref
-// may be relative or absolute. ParseURL returns nil, err on parse
-// failure, otherwise its return value is the same as ResolveReference.
-func (base *URL) ParseURL(ref string) (*URL, os.Error) {
- refurl, err := ParseURL(ref)
- if err != nil {
- return nil, err
- }
- return base.ResolveReference(refurl), nil
-}
-
-// ResolveReference resolves a URI reference to an absolute URI from
-// an absolute base URI, per RFC 2396 Section 5.2. The URI reference
-// may be relative or absolute. ResolveReference always returns a new
-// URL instance, even if the returned URL is identical to either the
-// base or reference. If ref is an absolute URL, then ResolveReference
-// ignores base and returns a copy of ref.
-func (base *URL) ResolveReference(ref *URL) *URL {
- url := new(URL)
- switch {
- case ref.IsAbs():
- *url = *ref
- default:
- // relativeURI = ( net_path | abs_path | rel_path ) [ "?" query ]
- *url = *base
- if ref.RawAuthority != "" {
- // The "net_path" case.
- url.RawAuthority = ref.RawAuthority
- url.Host = ref.Host
- url.RawUserinfo = ref.RawUserinfo
- }
- switch {
- case url.OpaquePath:
- url.Path = ref.Path
- url.RawPath = ref.RawPath
- url.RawQuery = ref.RawQuery
- case strings.HasPrefix(ref.Path, "/"):
- // The "abs_path" case.
- url.Path = ref.Path
- url.RawPath = ref.RawPath
- url.RawQuery = ref.RawQuery
- default:
- // The "rel_path" case.
- path := resolvePath(base.Path, ref.Path)
- if !strings.HasPrefix(path, "/") {
- path = "/" + path
- }
- url.Path = path
- url.RawPath = url.Path
- url.RawQuery = ref.RawQuery
- if ref.RawQuery != "" {
- url.RawPath += "?" + url.RawQuery
- }
- }
-
- url.Fragment = ref.Fragment
- }
- url.Raw = url.String()
- return url
-}
-
-// Query parses RawQuery and returns the corresponding values.
-func (u *URL) Query() Values {
- v, _ := ParseQuery(u.RawQuery)
- return v
-}