diff options
Diffstat (limited to 'src/net/mail')
-rw-r--r-- | src/net/mail/message.go | 556 | ||||
-rw-r--r-- | src/net/mail/message_test.go | 314 |
2 files changed, 870 insertions, 0 deletions
diff --git a/src/net/mail/message.go b/src/net/mail/message.go new file mode 100644 index 000000000..19aa888d8 --- /dev/null +++ b/src/net/mail/message.go @@ -0,0 +1,556 @@ +// Copyright 2011 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +/* +Package mail implements parsing of mail messages. + +For the most part, this package follows the syntax as specified by RFC 5322. +Notable divergences: + * Obsolete address formats are not parsed, including addresses with + embedded route information. + * Group addresses are not parsed. + * The full range of spacing (the CFWS syntax element) is not supported, + such as breaking addresses across lines. +*/ +package mail + +import ( + "bufio" + "bytes" + "encoding/base64" + "errors" + "fmt" + "io" + "io/ioutil" + "log" + "net/textproto" + "strconv" + "strings" + "time" + "unicode" +) + +var debug = debugT(false) + +type debugT bool + +func (d debugT) Printf(format string, args ...interface{}) { + if d { + log.Printf(format, args...) + } +} + +// A Message represents a parsed mail message. +type Message struct { + Header Header + Body io.Reader +} + +// ReadMessage reads a message from r. +// The headers are parsed, and the body of the message will be available +// for reading from r. +func ReadMessage(r io.Reader) (msg *Message, err error) { + tp := textproto.NewReader(bufio.NewReader(r)) + + hdr, err := tp.ReadMIMEHeader() + if err != nil { + return nil, err + } + + return &Message{ + Header: Header(hdr), + Body: tp.R, + }, nil +} + +// Layouts suitable for passing to time.Parse. +// These are tried in order. +var dateLayouts []string + +func init() { + // Generate layouts based on RFC 5322, section 3.3. + + dows := [...]string{"", "Mon, "} // day-of-week + days := [...]string{"2", "02"} // day = 1*2DIGIT + years := [...]string{"2006", "06"} // year = 4*DIGIT / 2*DIGIT + seconds := [...]string{":05", ""} // second + // "-0700 (MST)" is not in RFC 5322, but is common. + zones := [...]string{"-0700", "MST", "-0700 (MST)"} // zone = (("+" / "-") 4DIGIT) / "GMT" / ... + + for _, dow := range dows { + for _, day := range days { + for _, year := range years { + for _, second := range seconds { + for _, zone := range zones { + s := dow + day + " Jan " + year + " 15:04" + second + " " + zone + dateLayouts = append(dateLayouts, s) + } + } + } + } + } +} + +func parseDate(date string) (time.Time, error) { + for _, layout := range dateLayouts { + t, err := time.Parse(layout, date) + if err == nil { + return t, nil + } + } + return time.Time{}, errors.New("mail: header could not be parsed") +} + +// A Header represents the key-value pairs in a mail message header. +type Header map[string][]string + +// Get gets the first value associated with the given key. +// If there are no values associated with the key, Get returns "". +func (h Header) Get(key string) string { + return textproto.MIMEHeader(h).Get(key) +} + +var ErrHeaderNotPresent = errors.New("mail: header not in message") + +// Date parses the Date header field. +func (h Header) Date() (time.Time, error) { + hdr := h.Get("Date") + if hdr == "" { + return time.Time{}, ErrHeaderNotPresent + } + return parseDate(hdr) +} + +// AddressList parses the named header field as a list of addresses. +func (h Header) AddressList(key string) ([]*Address, error) { + hdr := h.Get(key) + if hdr == "" { + return nil, ErrHeaderNotPresent + } + return ParseAddressList(hdr) +} + +// Address represents a single mail address. +// An address such as "Barry Gibbs <bg@example.com>" is represented +// as Address{Name: "Barry Gibbs", Address: "bg@example.com"}. +type Address struct { + Name string // Proper name; may be empty. + Address string // user@domain +} + +// Parses a single RFC 5322 address, e.g. "Barry Gibbs <bg@example.com>" +func ParseAddress(address string) (*Address, error) { + return newAddrParser(address).parseAddress() +} + +// ParseAddressList parses the given string as a list of addresses. +func ParseAddressList(list string) ([]*Address, error) { + return newAddrParser(list).parseAddressList() +} + +// String formats the address as a valid RFC 5322 address. +// If the address's name contains non-ASCII characters +// the name will be rendered according to RFC 2047. +func (a *Address) String() string { + s := "<" + a.Address + ">" + if a.Name == "" { + return s + } + // If every character is printable ASCII, quoting is simple. + allPrintable := true + for i := 0; i < len(a.Name); i++ { + // isWSP here should actually be isFWS, + // but we don't support folding yet. + if !isVchar(a.Name[i]) && !isWSP(a.Name[i]) { + allPrintable = false + break + } + } + if allPrintable { + b := bytes.NewBufferString(`"`) + for i := 0; i < len(a.Name); i++ { + if !isQtext(a.Name[i]) && !isWSP(a.Name[i]) { + b.WriteByte('\\') + } + b.WriteByte(a.Name[i]) + } + b.WriteString(`" `) + b.WriteString(s) + return b.String() + } + + // UTF-8 "Q" encoding + b := bytes.NewBufferString("=?utf-8?q?") + for i := 0; i < len(a.Name); i++ { + switch c := a.Name[i]; { + case c == ' ': + b.WriteByte('_') + case isVchar(c) && c != '=' && c != '?' && c != '_': + b.WriteByte(c) + default: + fmt.Fprintf(b, "=%02X", c) + } + } + b.WriteString("?= ") + b.WriteString(s) + return b.String() +} + +type addrParser []byte + +func newAddrParser(s string) *addrParser { + p := addrParser(s) + return &p +} + +func (p *addrParser) parseAddressList() ([]*Address, error) { + var list []*Address + for { + p.skipSpace() + addr, err := p.parseAddress() + if err != nil { + return nil, err + } + list = append(list, addr) + + p.skipSpace() + if p.empty() { + break + } + if !p.consume(',') { + return nil, errors.New("mail: expected comma") + } + } + return list, nil +} + +// parseAddress parses a single RFC 5322 address at the start of p. +func (p *addrParser) parseAddress() (addr *Address, err error) { + debug.Printf("parseAddress: %q", *p) + p.skipSpace() + if p.empty() { + return nil, errors.New("mail: no address") + } + + // address = name-addr / addr-spec + // TODO(dsymonds): Support parsing group address. + + // addr-spec has a more restricted grammar than name-addr, + // so try parsing it first, and fallback to name-addr. + // TODO(dsymonds): Is this really correct? + spec, err := p.consumeAddrSpec() + if err == nil { + return &Address{ + Address: spec, + }, err + } + debug.Printf("parseAddress: not an addr-spec: %v", err) + debug.Printf("parseAddress: state is now %q", *p) + + // display-name + var displayName string + if p.peek() != '<' { + displayName, err = p.consumePhrase() + if err != nil { + return nil, err + } + } + debug.Printf("parseAddress: displayName=%q", displayName) + + // angle-addr = "<" addr-spec ">" + p.skipSpace() + if !p.consume('<') { + return nil, errors.New("mail: no angle-addr") + } + spec, err = p.consumeAddrSpec() + if err != nil { + return nil, err + } + if !p.consume('>') { + return nil, errors.New("mail: unclosed angle-addr") + } + debug.Printf("parseAddress: spec=%q", spec) + + return &Address{ + Name: displayName, + Address: spec, + }, nil +} + +// consumeAddrSpec parses a single RFC 5322 addr-spec at the start of p. +func (p *addrParser) consumeAddrSpec() (spec string, err error) { + debug.Printf("consumeAddrSpec: %q", *p) + + orig := *p + defer func() { + if err != nil { + *p = orig + } + }() + + // local-part = dot-atom / quoted-string + var localPart string + p.skipSpace() + if p.empty() { + return "", errors.New("mail: no addr-spec") + } + if p.peek() == '"' { + // quoted-string + debug.Printf("consumeAddrSpec: parsing quoted-string") + localPart, err = p.consumeQuotedString() + } else { + // dot-atom + debug.Printf("consumeAddrSpec: parsing dot-atom") + localPart, err = p.consumeAtom(true) + } + if err != nil { + debug.Printf("consumeAddrSpec: failed: %v", err) + return "", err + } + + if !p.consume('@') { + return "", errors.New("mail: missing @ in addr-spec") + } + + // domain = dot-atom / domain-literal + var domain string + p.skipSpace() + if p.empty() { + return "", errors.New("mail: no domain in addr-spec") + } + // TODO(dsymonds): Handle domain-literal + domain, err = p.consumeAtom(true) + if err != nil { + return "", err + } + + return localPart + "@" + domain, nil +} + +// consumePhrase parses the RFC 5322 phrase at the start of p. +func (p *addrParser) consumePhrase() (phrase string, err error) { + debug.Printf("consumePhrase: [%s]", *p) + // phrase = 1*word + var words []string + for { + // word = atom / quoted-string + var word string + p.skipSpace() + if p.empty() { + return "", errors.New("mail: missing phrase") + } + if p.peek() == '"' { + // quoted-string + word, err = p.consumeQuotedString() + } else { + // atom + // We actually parse dot-atom here to be more permissive + // than what RFC 5322 specifies. + word, err = p.consumeAtom(true) + } + + // RFC 2047 encoded-word starts with =?, ends with ?=, and has two other ?s. + if err == nil && strings.HasPrefix(word, "=?") && strings.HasSuffix(word, "?=") && strings.Count(word, "?") == 4 { + word, err = decodeRFC2047Word(word) + } + + if err != nil { + break + } + debug.Printf("consumePhrase: consumed %q", word) + words = append(words, word) + } + // Ignore any error if we got at least one word. + if err != nil && len(words) == 0 { + debug.Printf("consumePhrase: hit err: %v", err) + return "", fmt.Errorf("mail: missing word in phrase: %v", err) + } + phrase = strings.Join(words, " ") + return phrase, nil +} + +// consumeQuotedString parses the quoted string at the start of p. +func (p *addrParser) consumeQuotedString() (qs string, err error) { + // Assume first byte is '"'. + i := 1 + qsb := make([]byte, 0, 10) +Loop: + for { + if i >= p.len() { + return "", errors.New("mail: unclosed quoted-string") + } + switch c := (*p)[i]; { + case c == '"': + break Loop + case c == '\\': + if i+1 == p.len() { + return "", errors.New("mail: unclosed quoted-string") + } + qsb = append(qsb, (*p)[i+1]) + i += 2 + case isQtext(c), c == ' ' || c == '\t': + // qtext (printable US-ASCII excluding " and \), or + // FWS (almost; we're ignoring CRLF) + qsb = append(qsb, c) + i++ + default: + return "", fmt.Errorf("mail: bad character in quoted-string: %q", c) + } + } + *p = (*p)[i+1:] + return string(qsb), nil +} + +// consumeAtom parses an RFC 5322 atom at the start of p. +// If dot is true, consumeAtom parses an RFC 5322 dot-atom instead. +func (p *addrParser) consumeAtom(dot bool) (atom string, err error) { + if !isAtext(p.peek(), false) { + return "", errors.New("mail: invalid string") + } + i := 1 + for ; i < p.len() && isAtext((*p)[i], dot); i++ { + } + atom, *p = string((*p)[:i]), (*p)[i:] + return atom, nil +} + +func (p *addrParser) consume(c byte) bool { + if p.empty() || p.peek() != c { + return false + } + *p = (*p)[1:] + return true +} + +// skipSpace skips the leading space and tab characters. +func (p *addrParser) skipSpace() { + *p = bytes.TrimLeft(*p, " \t") +} + +func (p *addrParser) peek() byte { + return (*p)[0] +} + +func (p *addrParser) empty() bool { + return p.len() == 0 +} + +func (p *addrParser) len() int { + return len(*p) +} + +func decodeRFC2047Word(s string) (string, error) { + fields := strings.Split(s, "?") + if len(fields) != 5 || fields[0] != "=" || fields[4] != "=" { + return "", errors.New("address not RFC 2047 encoded") + } + charset, enc := strings.ToLower(fields[1]), strings.ToLower(fields[2]) + if charset != "us-ascii" && charset != "iso-8859-1" && charset != "utf-8" { + return "", fmt.Errorf("charset not supported: %q", charset) + } + + in := bytes.NewBufferString(fields[3]) + var r io.Reader + switch enc { + case "b": + r = base64.NewDecoder(base64.StdEncoding, in) + case "q": + r = qDecoder{r: in} + default: + return "", fmt.Errorf("RFC 2047 encoding not supported: %q", enc) + } + + dec, err := ioutil.ReadAll(r) + if err != nil { + return "", err + } + + switch charset { + case "us-ascii": + b := new(bytes.Buffer) + for _, c := range dec { + if c >= 0x80 { + b.WriteRune(unicode.ReplacementChar) + } else { + b.WriteRune(rune(c)) + } + } + return b.String(), nil + case "iso-8859-1": + b := new(bytes.Buffer) + for _, c := range dec { + b.WriteRune(rune(c)) + } + return b.String(), nil + case "utf-8": + return string(dec), nil + } + panic("unreachable") +} + +type qDecoder struct { + r io.Reader + scratch [2]byte +} + +func (qd qDecoder) Read(p []byte) (n int, err error) { + // This method writes at most one byte into p. + if len(p) == 0 { + return 0, nil + } + if _, err := qd.r.Read(qd.scratch[:1]); err != nil { + return 0, err + } + switch c := qd.scratch[0]; { + case c == '=': + if _, err := io.ReadFull(qd.r, qd.scratch[:2]); err != nil { + return 0, err + } + x, err := strconv.ParseInt(string(qd.scratch[:2]), 16, 64) + if err != nil { + return 0, fmt.Errorf("mail: invalid RFC 2047 encoding: %q", qd.scratch[:2]) + } + p[0] = byte(x) + case c == '_': + p[0] = ' ' + default: + p[0] = c + } + return 1, nil +} + +var atextChars = []byte("ABCDEFGHIJKLMNOPQRSTUVWXYZ" + + "abcdefghijklmnopqrstuvwxyz" + + "0123456789" + + "!#$%&'*+-/=?^_`{|}~") + +// isAtext returns true if c is an RFC 5322 atext character. +// If dot is true, period is included. +func isAtext(c byte, dot bool) bool { + if dot && c == '.' { + return true + } + return bytes.IndexByte(atextChars, c) >= 0 +} + +// isQtext returns true if c is an RFC 5322 qtext character. +func isQtext(c byte) bool { + // Printable US-ASCII, excluding backslash or quote. + if c == '\\' || c == '"' { + return false + } + return '!' <= c && c <= '~' +} + +// isVchar returns true if c is an RFC 5322 VCHAR character. +func isVchar(c byte) bool { + // Visible (printing) characters. + return '!' <= c && c <= '~' +} + +// isWSP returns true if c is a WSP (white space). +// WSP is a space or horizontal tab (RFC5234 Appendix B). +func isWSP(c byte) bool { + return c == ' ' || c == '\t' +} diff --git a/src/net/mail/message_test.go b/src/net/mail/message_test.go new file mode 100644 index 000000000..6ba48be04 --- /dev/null +++ b/src/net/mail/message_test.go @@ -0,0 +1,314 @@ +// Copyright 2011 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package mail + +import ( + "bytes" + "io/ioutil" + "reflect" + "strings" + "testing" + "time" +) + +var parseTests = []struct { + in string + header Header + body string +}{ + { + // RFC 5322, Appendix A.1.1 + in: `From: John Doe <jdoe@machine.example> +To: Mary Smith <mary@example.net> +Subject: Saying Hello +Date: Fri, 21 Nov 1997 09:55:06 -0600 +Message-ID: <1234@local.machine.example> + +This is a message just to say hello. +So, "Hello". +`, + header: Header{ + "From": []string{"John Doe <jdoe@machine.example>"}, + "To": []string{"Mary Smith <mary@example.net>"}, + "Subject": []string{"Saying Hello"}, + "Date": []string{"Fri, 21 Nov 1997 09:55:06 -0600"}, + "Message-Id": []string{"<1234@local.machine.example>"}, + }, + body: "This is a message just to say hello.\nSo, \"Hello\".\n", + }, +} + +func TestParsing(t *testing.T) { + for i, test := range parseTests { + msg, err := ReadMessage(bytes.NewBuffer([]byte(test.in))) + if err != nil { + t.Errorf("test #%d: Failed parsing message: %v", i, err) + continue + } + if !headerEq(msg.Header, test.header) { + t.Errorf("test #%d: Incorrectly parsed message header.\nGot:\n%+v\nWant:\n%+v", + i, msg.Header, test.header) + } + body, err := ioutil.ReadAll(msg.Body) + if err != nil { + t.Errorf("test #%d: Failed reading body: %v", i, err) + continue + } + bodyStr := string(body) + if bodyStr != test.body { + t.Errorf("test #%d: Incorrectly parsed message body.\nGot:\n%+v\nWant:\n%+v", + i, bodyStr, test.body) + } + } +} + +func headerEq(a, b Header) bool { + if len(a) != len(b) { + return false + } + for k, as := range a { + bs, ok := b[k] + if !ok { + return false + } + if !reflect.DeepEqual(as, bs) { + return false + } + } + return true +} + +func TestDateParsing(t *testing.T) { + tests := []struct { + dateStr string + exp time.Time + }{ + // RFC 5322, Appendix A.1.1 + { + "Fri, 21 Nov 1997 09:55:06 -0600", + time.Date(1997, 11, 21, 9, 55, 6, 0, time.FixedZone("", -6*60*60)), + }, + // RFC5322, Appendix A.6.2 + // Obsolete date. + { + "21 Nov 97 09:55:06 GMT", + time.Date(1997, 11, 21, 9, 55, 6, 0, time.FixedZone("GMT", 0)), + }, + // Commonly found format not specified by RFC 5322. + { + "Fri, 21 Nov 1997 09:55:06 -0600 (MDT)", + time.Date(1997, 11, 21, 9, 55, 6, 0, time.FixedZone("", -6*60*60)), + }, + } + for _, test := range tests { + hdr := Header{ + "Date": []string{test.dateStr}, + } + date, err := hdr.Date() + if err != nil { + t.Errorf("Failed parsing %q: %v", test.dateStr, err) + continue + } + if !date.Equal(test.exp) { + t.Errorf("Parse of %q: got %+v, want %+v", test.dateStr, date, test.exp) + } + } +} + +func TestAddressParsingError(t *testing.T) { + const txt = "=?iso-8859-2?Q?Bogl=E1rka_Tak=E1cs?= <unknown@gmail.com>" + _, err := ParseAddress(txt) + if err == nil || !strings.Contains(err.Error(), "charset not supported") { + t.Errorf(`mail.ParseAddress(%q) err: %q, want ".*charset not supported.*"`, txt, err) + } +} + +func TestAddressParsing(t *testing.T) { + tests := []struct { + addrsStr string + exp []*Address + }{ + // Bare address + { + `jdoe@machine.example`, + []*Address{{ + Address: "jdoe@machine.example", + }}, + }, + // RFC 5322, Appendix A.1.1 + { + `John Doe <jdoe@machine.example>`, + []*Address{{ + Name: "John Doe", + Address: "jdoe@machine.example", + }}, + }, + // RFC 5322, Appendix A.1.2 + { + `"Joe Q. Public" <john.q.public@example.com>`, + []*Address{{ + Name: "Joe Q. Public", + Address: "john.q.public@example.com", + }}, + }, + { + `Mary Smith <mary@x.test>, jdoe@example.org, Who? <one@y.test>`, + []*Address{ + { + Name: "Mary Smith", + Address: "mary@x.test", + }, + { + Address: "jdoe@example.org", + }, + { + Name: "Who?", + Address: "one@y.test", + }, + }, + }, + { + `<boss@nil.test>, "Giant; \"Big\" Box" <sysservices@example.net>`, + []*Address{ + { + Address: "boss@nil.test", + }, + { + Name: `Giant; "Big" Box`, + Address: "sysservices@example.net", + }, + }, + }, + // RFC 5322, Appendix A.1.3 + // TODO(dsymonds): Group addresses. + + // RFC 2047 "Q"-encoded ISO-8859-1 address. + { + `=?iso-8859-1?q?J=F6rg_Doe?= <joerg@example.com>`, + []*Address{ + { + Name: `Jörg Doe`, + Address: "joerg@example.com", + }, + }, + }, + // RFC 2047 "Q"-encoded US-ASCII address. Dumb but legal. + { + `=?us-ascii?q?J=6Frg_Doe?= <joerg@example.com>`, + []*Address{ + { + Name: `Jorg Doe`, + Address: "joerg@example.com", + }, + }, + }, + // RFC 2047 "Q"-encoded UTF-8 address. + { + `=?utf-8?q?J=C3=B6rg_Doe?= <joerg@example.com>`, + []*Address{ + { + Name: `Jörg Doe`, + Address: "joerg@example.com", + }, + }, + }, + // RFC 2047, Section 8. + { + `=?ISO-8859-1?Q?Andr=E9?= Pirard <PIRARD@vm1.ulg.ac.be>`, + []*Address{ + { + Name: `André Pirard`, + Address: "PIRARD@vm1.ulg.ac.be", + }, + }, + }, + // Custom example of RFC 2047 "B"-encoded ISO-8859-1 address. + { + `=?ISO-8859-1?B?SvZyZw==?= <joerg@example.com>`, + []*Address{ + { + Name: `Jörg`, + Address: "joerg@example.com", + }, + }, + }, + // Custom example of RFC 2047 "B"-encoded UTF-8 address. + { + `=?UTF-8?B?SsO2cmc=?= <joerg@example.com>`, + []*Address{ + { + Name: `Jörg`, + Address: "joerg@example.com", + }, + }, + }, + // Custom example with "." in name. For issue 4938 + { + `Asem H. <noreply@example.com>`, + []*Address{ + { + Name: `Asem H.`, + Address: "noreply@example.com", + }, + }, + }, + } + for _, test := range tests { + if len(test.exp) == 1 { + addr, err := ParseAddress(test.addrsStr) + if err != nil { + t.Errorf("Failed parsing (single) %q: %v", test.addrsStr, err) + continue + } + if !reflect.DeepEqual([]*Address{addr}, test.exp) { + t.Errorf("Parse (single) of %q: got %+v, want %+v", test.addrsStr, addr, test.exp) + } + } + + addrs, err := ParseAddressList(test.addrsStr) + if err != nil { + t.Errorf("Failed parsing (list) %q: %v", test.addrsStr, err) + continue + } + if !reflect.DeepEqual(addrs, test.exp) { + t.Errorf("Parse (list) of %q: got %+v, want %+v", test.addrsStr, addrs, test.exp) + } + } +} + +func TestAddressFormatting(t *testing.T) { + tests := []struct { + addr *Address + exp string + }{ + { + &Address{Address: "bob@example.com"}, + "<bob@example.com>", + }, + { + &Address{Name: "Bob", Address: "bob@example.com"}, + `"Bob" <bob@example.com>`, + }, + { + // note the ö (o with an umlaut) + &Address{Name: "Böb", Address: "bob@example.com"}, + `=?utf-8?q?B=C3=B6b?= <bob@example.com>`, + }, + { + &Address{Name: "Bob Jane", Address: "bob@example.com"}, + `"Bob Jane" <bob@example.com>`, + }, + { + &Address{Name: "Böb Jacöb", Address: "bob@example.com"}, + `=?utf-8?q?B=C3=B6b_Jac=C3=B6b?= <bob@example.com>`, + }, + } + for _, test := range tests { + s := test.addr.String() + if s != test.exp { + t.Errorf("Address%+v.String() = %v, want %v", *test.addr, s, test.exp) + } + } +} |