diff options
Diffstat (limited to 'src/mime/mediatype.go')
-rw-r--r-- | src/mime/mediatype.go | 358 |
1 files changed, 358 insertions, 0 deletions
diff --git a/src/mime/mediatype.go b/src/mime/mediatype.go new file mode 100644 index 000000000..ad63f9bb9 --- /dev/null +++ b/src/mime/mediatype.go @@ -0,0 +1,358 @@ +// Copyright 2010 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package mime + +import ( + "bytes" + "errors" + "fmt" + "sort" + "strings" + "unicode" +) + +// FormatMediaType serializes mediatype t and the parameters +// param as a media type conforming to RFC 2045 and RFC 2616. +// The type and parameter names are written in lower-case. +// When any of the arguments result in a standard violation then +// FormatMediaType returns the empty string. +func FormatMediaType(t string, param map[string]string) string { + slash := strings.Index(t, "/") + if slash == -1 { + return "" + } + major, sub := t[:slash], t[slash+1:] + if !isToken(major) || !isToken(sub) { + return "" + } + var b bytes.Buffer + b.WriteString(strings.ToLower(major)) + b.WriteByte('/') + b.WriteString(strings.ToLower(sub)) + + attrs := make([]string, 0, len(param)) + for a := range param { + attrs = append(attrs, a) + } + sort.Strings(attrs) + + for _, attribute := range attrs { + value := param[attribute] + b.WriteByte(';') + b.WriteByte(' ') + if !isToken(attribute) { + return "" + } + b.WriteString(strings.ToLower(attribute)) + b.WriteByte('=') + if isToken(value) { + b.WriteString(value) + continue + } + + b.WriteByte('"') + offset := 0 + for index, character := range value { + if character == '"' || character == '\\' { + b.WriteString(value[offset:index]) + offset = index + b.WriteByte('\\') + } + if character&0x80 != 0 { + return "" + } + } + b.WriteString(value[offset:]) + b.WriteByte('"') + } + return b.String() +} + +func checkMediaTypeDisposition(s string) error { + typ, rest := consumeToken(s) + if typ == "" { + return errors.New("mime: no media type") + } + if rest == "" { + return nil + } + if !strings.HasPrefix(rest, "/") { + return errors.New("mime: expected slash after first token") + } + subtype, rest := consumeToken(rest[1:]) + if subtype == "" { + return errors.New("mime: expected token after slash") + } + if rest != "" { + return errors.New("mime: unexpected content after media subtype") + } + return nil +} + +// ParseMediaType parses a media type value and any optional +// parameters, per RFC 1521. Media types are the values in +// Content-Type and Content-Disposition headers (RFC 2183). +// On success, ParseMediaType returns the media type converted +// to lowercase and trimmed of white space and a non-nil map. +// The returned map, params, maps from the lowercase +// attribute to the attribute value with its case preserved. +func ParseMediaType(v string) (mediatype string, params map[string]string, err error) { + i := strings.Index(v, ";") + if i == -1 { + i = len(v) + } + mediatype = strings.TrimSpace(strings.ToLower(v[0:i])) + + err = checkMediaTypeDisposition(mediatype) + if err != nil { + return "", nil, err + } + + params = make(map[string]string) + + // Map of base parameter name -> parameter name -> value + // for parameters containing a '*' character. + // Lazily initialized. + var continuation map[string]map[string]string + + v = v[i:] + for len(v) > 0 { + v = strings.TrimLeftFunc(v, unicode.IsSpace) + if len(v) == 0 { + break + } + key, value, rest := consumeMediaParam(v) + if key == "" { + if strings.TrimSpace(rest) == ";" { + // Ignore trailing semicolons. + // Not an error. + return + } + // Parse error. + return "", nil, errors.New("mime: invalid media parameter") + } + + pmap := params + if idx := strings.Index(key, "*"); idx != -1 { + baseName := key[:idx] + if continuation == nil { + continuation = make(map[string]map[string]string) + } + var ok bool + if pmap, ok = continuation[baseName]; !ok { + continuation[baseName] = make(map[string]string) + pmap = continuation[baseName] + } + } + if _, exists := pmap[key]; exists { + // Duplicate parameter name is bogus. + return "", nil, errors.New("mime: duplicate parameter name") + } + pmap[key] = value + v = rest + } + + // Stitch together any continuations or things with stars + // (i.e. RFC 2231 things with stars: "foo*0" or "foo*") + var buf bytes.Buffer + for key, pieceMap := range continuation { + singlePartKey := key + "*" + if v, ok := pieceMap[singlePartKey]; ok { + decv := decode2231Enc(v) + params[key] = decv + continue + } + + buf.Reset() + valid := false + for n := 0; ; n++ { + simplePart := fmt.Sprintf("%s*%d", key, n) + if v, ok := pieceMap[simplePart]; ok { + valid = true + buf.WriteString(v) + continue + } + encodedPart := simplePart + "*" + if v, ok := pieceMap[encodedPart]; ok { + valid = true + if n == 0 { + buf.WriteString(decode2231Enc(v)) + } else { + decv, _ := percentHexUnescape(v) + buf.WriteString(decv) + } + } else { + break + } + } + if valid { + params[key] = buf.String() + } + } + + return +} + +func decode2231Enc(v string) string { + sv := strings.SplitN(v, "'", 3) + if len(sv) != 3 { + return "" + } + // TODO: ignoring lang in sv[1] for now. If anybody needs it we'll + // need to decide how to expose it in the API. But I'm not sure + // anybody uses it in practice. + charset := strings.ToLower(sv[0]) + if charset != "us-ascii" && charset != "utf-8" { + // TODO: unsupported encoding + return "" + } + encv, _ := percentHexUnescape(sv[2]) + return encv +} + +func isNotTokenChar(r rune) bool { + return !isTokenChar(r) +} + +// consumeToken consumes a token from the beginning of provided +// string, per RFC 2045 section 5.1 (referenced from 2183), and return +// the token consumed and the rest of the string. Returns ("", v) on +// failure to consume at least one character. +func consumeToken(v string) (token, rest string) { + notPos := strings.IndexFunc(v, isNotTokenChar) + if notPos == -1 { + return v, "" + } + if notPos == 0 { + return "", v + } + return v[0:notPos], v[notPos:] +} + +// consumeValue consumes a "value" per RFC 2045, where a value is +// either a 'token' or a 'quoted-string'. On success, consumeValue +// returns the value consumed (and de-quoted/escaped, if a +// quoted-string) and the rest of the string. On failure, returns +// ("", v). +func consumeValue(v string) (value, rest string) { + if !strings.HasPrefix(v, `"`) && !strings.HasPrefix(v, `'`) { + return consumeToken(v) + } + + leadQuote := rune(v[0]) + + // parse a quoted-string + rest = v[1:] // consume the leading quote + buffer := new(bytes.Buffer) + var idx int + var r rune + var nextIsLiteral bool + for idx, r = range rest { + switch { + case nextIsLiteral: + buffer.WriteRune(r) + nextIsLiteral = false + case r == leadQuote: + return buffer.String(), rest[idx+1:] + case r == '\\': + nextIsLiteral = true + case r != '\r' && r != '\n': + buffer.WriteRune(r) + default: + return "", v + } + } + return "", v +} + +func consumeMediaParam(v string) (param, value, rest string) { + rest = strings.TrimLeftFunc(v, unicode.IsSpace) + if !strings.HasPrefix(rest, ";") { + return "", "", v + } + + rest = rest[1:] // consume semicolon + rest = strings.TrimLeftFunc(rest, unicode.IsSpace) + param, rest = consumeToken(rest) + param = strings.ToLower(param) + if param == "" { + return "", "", v + } + + rest = strings.TrimLeftFunc(rest, unicode.IsSpace) + if !strings.HasPrefix(rest, "=") { + return "", "", v + } + rest = rest[1:] // consume equals sign + rest = strings.TrimLeftFunc(rest, unicode.IsSpace) + value, rest = consumeValue(rest) + if value == "" { + return "", "", v + } + return param, value, rest +} + +func percentHexUnescape(s string) (string, error) { + // Count %, check that they're well-formed. + percents := 0 + for i := 0; i < len(s); { + if s[i] != '%' { + i++ + continue + } + percents++ + if i+2 >= len(s) || !ishex(s[i+1]) || !ishex(s[i+2]) { + s = s[i:] + if len(s) > 3 { + s = s[0:3] + } + return "", fmt.Errorf("mime: bogus characters after %%: %q", s) + } + i += 3 + } + if percents == 0 { + return s, nil + } + + t := make([]byte, len(s)-2*percents) + j := 0 + for i := 0; i < len(s); { + switch s[i] { + case '%': + t[j] = unhex(s[i+1])<<4 | unhex(s[i+2]) + j++ + i += 3 + default: + t[j] = s[i] + j++ + i++ + } + } + return string(t), nil +} + +func ishex(c byte) bool { + switch { + case '0' <= c && c <= '9': + return true + case 'a' <= c && c <= 'f': + return true + case 'A' <= c && c <= 'F': + return true + } + return false +} + +func unhex(c byte) byte { + switch { + case '0' <= c && c <= '9': + return c - '0' + case 'a' <= c && c <= 'f': + return c - 'a' + 10 + case 'A' <= c && c <= 'F': + return c - 'A' + 10 + } + return 0 +} |