From 5ff4c17907d5b19510a62e08fd8d3b11e62b431d Mon Sep 17 00:00:00 2001 From: Ondřej Surý Date: Tue, 13 Sep 2011 13:13:40 +0200 Subject: Imported Upstream version 60 --- src/pkg/http/sniff.go | 214 ++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 214 insertions(+) create mode 100644 src/pkg/http/sniff.go (limited to 'src/pkg/http/sniff.go') diff --git a/src/pkg/http/sniff.go b/src/pkg/http/sniff.go new file mode 100644 index 000000000..d60868750 --- /dev/null +++ b/src/pkg/http/sniff.go @@ -0,0 +1,214 @@ +// Copyright 2011 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package http + +import ( + "bytes" + "encoding/binary" +) + +// Content-type sniffing algorithm. +// References in this file refer to this draft specification: +// http://tools.ietf.org/html/draft-ietf-websec-mime-sniff-03 + +// The algorithm prefers to use sniffLen bytes to make its decision. +const sniffLen = 512 + +// DetectContentType returns the sniffed Content-Type string +// for the given data. This function always returns a valid MIME type. +func DetectContentType(data []byte) string { + if len(data) > sniffLen { + data = data[:sniffLen] + } + + // Index of the first non-whitespace byte in data. + firstNonWS := 0 + for ; firstNonWS < len(data) && isWS(data[firstNonWS]); firstNonWS++ { + } + + for _, sig := range sniffSignatures { + if ct := sig.match(data, firstNonWS); ct != "" { + return ct + } + } + + return "application/octet-stream" // fallback +} + +func isWS(b byte) bool { + return bytes.IndexByte([]byte("\t\n\x0C\n "), b) != -1 +} + +type sniffSig interface { + // match returns the MIME type of the data, or "" if unknown. + match(data []byte, firstNonWS int) string +} + +// Data matching the table in section 6. +var sniffSignatures = []sniffSig{ + htmlSig([]byte("' { + return "" + } + return "text/html; charset=utf-8" +} + +type mp4Sig int + +func (mp4Sig) match(data []byte, firstNonWS int) string { + // c.f. section 6.1. + if len(data) < 8 { + return "" + } + boxSize := int(binary.BigEndian.Uint32(data[:4])) + if boxSize%4 != 0 || len(data) < boxSize { + return "" + } + if !bytes.Equal(data[4:8], []byte("ftyp")) { + return "" + } + for st := 8; st < boxSize; st += 4 { + if st == 12 { + // minor version number + continue + } + seg := string(data[st : st+3]) + switch seg { + case "mp4", "iso", "M4V", "M4P", "M4B": + return "video/mp4" + /* The remainder are not in the spec. + case "M4A": + return "audio/mp4" + case "3gp": + return "video/3gpp" + case "jp2": + return "image/jp2" // JPEG 2000 + */ + } + } + return "" +} + +type textSig int + +func (textSig) match(data []byte, firstNonWS int) string { + // c.f. section 5, step 4. + for _, b := range data[firstNonWS:] { + switch { + case 0x00 <= b && b <= 0x08, + b == 0x0B, + 0x0E <= b && b <= 0x1A, + 0x1C <= b && b <= 0x1F: + return "" + } + } + return "text/plain; charset=utf-8" +} -- cgit v1.2.3