| // Copyright 2015 The Go Authors. All rights reserved. |
| // Use of this source code is governed by a BSD-style |
| // license that can be found in the LICENSE file. |
| |
| package mime |
| |
| import ( |
| "bytes" |
| "encoding/base64" |
| "errors" |
| "fmt" |
| "io" |
| "strings" |
| "sync" |
| "unicode" |
| "unicode/utf8" |
| ) |
| |
| // A WordEncoder is a RFC 2047 encoded-word encoder. |
| type WordEncoder byte |
| |
| const ( |
| // BEncoding represents Base64 encoding scheme as defined by RFC 2045. |
| BEncoding = WordEncoder('b') |
| // QEncoding represents the Q-encoding scheme as defined by RFC 2047. |
| QEncoding = WordEncoder('q') |
| ) |
| |
| var ( |
| errInvalidWord = errors.New("mime: invalid RFC 2047 encoded-word") |
| ) |
| |
| // Encode returns the encoded-word form of s. If s is ASCII without special |
| // characters, it is returned unchanged. The provided charset is the IANA |
| // charset name of s. It is case insensitive. |
| func (e WordEncoder) Encode(charset, s string) string { |
| if !needsEncoding(s) { |
| return s |
| } |
| return e.encodeWord(charset, s) |
| } |
| |
| func needsEncoding(s string) bool { |
| for _, b := range s { |
| if (b < ' ' || b > '~') && b != '\t' { |
| return true |
| } |
| } |
| return false |
| } |
| |
| // encodeWord encodes a string into an encoded-word. |
| func (e WordEncoder) encodeWord(charset, s string) string { |
| buf := getBuffer() |
| defer putBuffer(buf) |
| |
| buf.WriteString("=?") |
| buf.WriteString(charset) |
| buf.WriteByte('?') |
| buf.WriteByte(byte(e)) |
| buf.WriteByte('?') |
| |
| if e == BEncoding { |
| w := base64.NewEncoder(base64.StdEncoding, buf) |
| io.WriteString(w, s) |
| w.Close() |
| } else { |
| enc := make([]byte, 3) |
| for i := 0; i < len(s); i++ { |
| b := s[i] |
| switch { |
| case b == ' ': |
| buf.WriteByte('_') |
| case b <= '~' && b >= '!' && b != '=' && b != '?' && b != '_': |
| buf.WriteByte(b) |
| default: |
| enc[0] = '=' |
| enc[1] = upperhex[b>>4] |
| enc[2] = upperhex[b&0x0f] |
| buf.Write(enc) |
| } |
| } |
| } |
| buf.WriteString("?=") |
| return buf.String() |
| } |
| |
| const upperhex = "0123456789ABCDEF" |
| |
| // A WordDecoder decodes MIME headers containing RFC 2047 encoded-words. |
| type WordDecoder struct { |
| // CharsetReader, if non-nil, defines a function to generate |
| // charset-conversion readers, converting from the provided |
| // charset into UTF-8. |
| // Charsets are always lower-case. utf-8, iso-8859-1 and us-ascii charsets |
| // are handled by default. |
| // One of the the CharsetReader's result values must be non-nil. |
| CharsetReader func(charset string, input io.Reader) (io.Reader, error) |
| } |
| |
| // Decode decodes an encoded-word. If word is not a valid RFC 2047 encoded-word, |
| // word is returned unchanged. |
| func (d *WordDecoder) Decode(word string) (string, error) { |
| fields := strings.Split(word, "?") // TODO: remove allocation? |
| if len(fields) != 5 || fields[0] != "=" || fields[4] != "=" || len(fields[2]) != 1 { |
| return "", errInvalidWord |
| } |
| |
| content, err := decode(fields[2][0], fields[3]) |
| if err != nil { |
| return "", err |
| } |
| |
| buf := getBuffer() |
| defer putBuffer(buf) |
| |
| if err := d.convert(buf, fields[1], content); err != nil { |
| return "", err |
| } |
| |
| return buf.String(), nil |
| } |
| |
| // DecodeHeader decodes all encoded-words of the given string. It returns an |
| // error if and only if CharsetReader of d returns an error. |
| func (d *WordDecoder) DecodeHeader(header string) (string, error) { |
| // If there is no encoded-word, returns before creating a buffer. |
| i := strings.Index(header, "=?") |
| if i == -1 { |
| return header, nil |
| } |
| |
| buf := getBuffer() |
| defer putBuffer(buf) |
| |
| buf.WriteString(header[:i]) |
| header = header[i:] |
| |
| betweenWords := false |
| for { |
| start := strings.Index(header, "=?") |
| if start == -1 { |
| break |
| } |
| cur := start + len("=?") |
| |
| i := strings.Index(header[cur:], "?") |
| if i == -1 { |
| break |
| } |
| charset := header[cur : cur+i] |
| cur += i + len("?") |
| |
| if len(header) < cur+len("Q??=") { |
| break |
| } |
| encoding := header[cur] |
| cur++ |
| |
| if header[cur] != '?' { |
| break |
| } |
| cur++ |
| |
| j := strings.Index(header[cur:], "?=") |
| if j == -1 { |
| break |
| } |
| text := header[cur : cur+j] |
| end := cur + j + len("?=") |
| |
| content, err := decode(encoding, text) |
| if err != nil { |
| betweenWords = false |
| buf.WriteString(header[:start+2]) |
| header = header[start+2:] |
| continue |
| } |
| |
| // Write characters before the encoded-word. White-space and newline |
| // characters separating two encoded-words must be deleted. |
| if start > 0 && (!betweenWords || hasNonWhitespace(header[:start])) { |
| buf.WriteString(header[:start]) |
| } |
| |
| if err := d.convert(buf, charset, content); err != nil { |
| return "", err |
| } |
| |
| header = header[end:] |
| betweenWords = true |
| } |
| |
| if len(header) > 0 { |
| buf.WriteString(header) |
| } |
| |
| return buf.String(), nil |
| } |
| |
| func decode(encoding byte, text string) ([]byte, error) { |
| switch encoding { |
| case 'B', 'b': |
| return base64.StdEncoding.DecodeString(text) |
| case 'Q', 'q': |
| return qDecode(text) |
| default: |
| return nil, errInvalidWord |
| } |
| } |
| |
| func (d *WordDecoder) convert(buf *bytes.Buffer, charset string, content []byte) error { |
| switch { |
| case strings.EqualFold("utf-8", charset): |
| buf.Write(content) |
| case strings.EqualFold("iso-8859-1", charset): |
| for _, c := range content { |
| buf.WriteRune(rune(c)) |
| } |
| case strings.EqualFold("us-ascii", charset): |
| for _, c := range content { |
| if c >= utf8.RuneSelf { |
| buf.WriteRune(unicode.ReplacementChar) |
| } else { |
| buf.WriteByte(c) |
| } |
| } |
| default: |
| if d.CharsetReader == nil { |
| return fmt.Errorf("mime: unhandled charset %q", charset) |
| } |
| r, err := d.CharsetReader(strings.ToLower(charset), bytes.NewReader(content)) |
| if err != nil { |
| return err |
| } |
| if _, err = buf.ReadFrom(r); err != nil { |
| return err |
| } |
| } |
| return nil |
| } |
| |
| // hasNonWhitespace reports whether s (assumed to be ASCII) contains at least |
| // one byte of non-whitespace. |
| func hasNonWhitespace(s string) bool { |
| for _, b := range s { |
| switch b { |
| // Encoded-words can only be separated by linear white spaces which does |
| // not include vertical tabs (\v). |
| case ' ', '\t', '\n', '\r': |
| default: |
| return true |
| } |
| } |
| return false |
| } |
| |
| // qDecode decodes a Q encoded string. |
| func qDecode(s string) ([]byte, error) { |
| dec := make([]byte, len(s)) |
| n := 0 |
| for i := 0; i < len(s); i++ { |
| switch c := s[i]; { |
| case c == '_': |
| dec[n] = ' ' |
| case c == '=': |
| if i+2 >= len(s) { |
| return nil, errInvalidWord |
| } |
| b, err := readHexByte(s[i+1], s[i+2]) |
| if err != nil { |
| return nil, err |
| } |
| dec[n] = b |
| i += 2 |
| case (c <= '~' && c >= ' ') || c == '\n' || c == '\r' || c == '\t': |
| dec[n] = c |
| default: |
| return nil, errInvalidWord |
| } |
| n++ |
| } |
| |
| return dec[:n], nil |
| } |
| |
| // readHexByte returns the byte from its quoted-printable representation. |
| func readHexByte(a, b byte) (byte, error) { |
| var hb, lb byte |
| var err error |
| if hb, err = fromHex(a); err != nil { |
| return 0, err |
| } |
| if lb, err = fromHex(b); err != nil { |
| return 0, err |
| } |
| return hb<<4 | lb, nil |
| } |
| |
| func fromHex(b byte) (byte, error) { |
| switch { |
| case b >= '0' && b <= '9': |
| return b - '0', nil |
| case b >= 'A' && b <= 'F': |
| return b - 'A' + 10, nil |
| // Accept badly encoded bytes. |
| case b >= 'a' && b <= 'f': |
| return b - 'a' + 10, nil |
| } |
| return 0, fmt.Errorf("mime: invalid hex byte %#02x", b) |
| } |
| |
| var bufPool = sync.Pool{ |
| New: func() interface{} { |
| return new(bytes.Buffer) |
| }, |
| } |
| |
| func getBuffer() *bytes.Buffer { |
| return bufPool.Get().(*bytes.Buffer) |
| } |
| |
| func putBuffer(buf *bytes.Buffer) { |
| if buf.Len() > 1024 { |
| return |
| } |
| buf.Reset() |
| bufPool.Put(buf) |
| } |