src/mime/encodedword.go - go - Git at Google

 // Copyright 2015 The Go Authors. All rights reserved.
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.

 package mime

 import (
 	"bytes"
 	"encoding/base64"
 	"errors"
 	"fmt"
 	"io"
 	"strings"
 	"sync"
 	"unicode"
 	"unicode/utf8"
 )

 // A WordEncoder is a RFC 2047 encoded-word encoder.
 type WordEncoder byte

 const (
 	// BEncoding represents Base64 encoding scheme as defined by RFC 2045.
 	BEncoding = WordEncoder('b')
 	// QEncoding represents the Q-encoding scheme as defined by RFC 2047.
 	QEncoding = WordEncoder('q')
 )

 var (
 	errInvalidWord = errors.New("mime: invalid RFC 2047 encoded-word")
 )

 // Encode returns the encoded-word form of s. If s is ASCII without special
 // characters, it is returned unchanged. The provided charset is the IANA
 // charset name of s. It is case insensitive.
 func (e WordEncoder) Encode(charset, s string) string {
 	if !needsEncoding(s) {
 		return s
 	}
 	return e.encodeWord(charset, s)
 }

 func needsEncoding(s string) bool {
 	for _, b := range s {
 		if (b < ' ' || b > '~') && b != '\t' {
 			return true
 		}
 	}
 	return false
 }

 // encodeWord encodes a string into an encoded-word.
 func (e WordEncoder) encodeWord(charset, s string) string {
 	buf := getBuffer()
 	defer putBuffer(buf)

 	e.openWord(buf, charset)
 	if e == BEncoding {
 		e.bEncode(buf, charset, s)
 	} else {
 		e.qEncode(buf, charset, s)
 	}
 	closeWord(buf)

 	return buf.String()
 }

 const (
 	// The maximum length of an encoded-word is 75 characters.
 	// See RFC 2047, section 2.
 	maxEncodedWordLen = 75
 	// maxContentLen is how much content can be encoded, ignoring the header and
 	// 2-byte footer.
 	maxContentLen = maxEncodedWordLen - len("=?UTF-8?") - len("?=")
 )

 var maxBase64Len = base64.StdEncoding.DecodedLen(maxContentLen)

 // bEncode encodes s using base64 encoding and writes it to buf.
 func (e WordEncoder) bEncode(buf *bytes.Buffer, charset, s string) {
 	w := base64.NewEncoder(base64.StdEncoding, buf)
 	// If the charset is not UTF-8 or if the content is short, do not bother
 	// splitting the encoded-word.
 	if !isUTF8(charset) || base64.StdEncoding.EncodedLen(len(s)) <= maxContentLen {
 		io.WriteString(w, s)
 		w.Close()
 		return
 	}

 	var currentLen, last, runeLen int
 	for i := 0; i < len(s); i += runeLen {
 		// Multi-byte characters must not be split accross encoded-words.
 		// See RFC 2047, section 5.3.
 		_, runeLen = utf8.DecodeRuneInString(s[i:])

 		if currentLen+runeLen <= maxBase64Len {
 			currentLen += runeLen
 		} else {
 			io.WriteString(w, s[last:i])
 			w.Close()
 			e.splitWord(buf, charset)
 			last = i
 			currentLen = runeLen
 		}
 	}
 	io.WriteString(w, s[last:])
 	w.Close()
 }

 // qEncode encodes s using Q encoding and writes it to buf. It splits the
 // encoded-words when necessary.
 func (e WordEncoder) qEncode(buf *bytes.Buffer, charset, s string) {
 	// We only split encoded-words when the charset is UTF-8.
 	if !isUTF8(charset) {
 		writeQString(buf, s)
 		return
 	}

 	var currentLen, runeLen int
 	for i := 0; i < len(s); i += runeLen {
 		b := s[i]
 		// Multi-byte characters must not be split accross encoded-words.
 		// See RFC 2047, section 5.3.
 		var encLen int
 		if b >= ' ' && b <= '~' && b != '=' && b != '?' && b != '_' {
 			runeLen, encLen = 1, 1
 		} else {
 			_, runeLen = utf8.DecodeRuneInString(s[i:])
 			encLen = 3 * runeLen
 		}

 		if currentLen+encLen > maxContentLen {
 			e.splitWord(buf, charset)
 			currentLen = 0
 		}
 		writeQString(buf, s[i:i+runeLen])
 		currentLen += encLen
 	}
 }

 // writeQString encodes s using Q encoding and writes it to buf.
 func writeQString(buf *bytes.Buffer, s string) {
 	for i := 0; i < len(s); i++ {
 		switch b := s[i]; {
 		case b == ' ':
 			buf.WriteByte('_')
 		case b >= '!' && b <= '~' && b != '=' && b != '?' && b != '_':
 			buf.WriteByte(b)
 		default:
 			buf.WriteByte('=')
 			buf.WriteByte(upperhex[b>>4])
 			buf.WriteByte(upperhex[b&0x0f])
 		}
 	}
 }

 // openWord writes the beginning of an encoded-word into buf.
 func (e WordEncoder) openWord(buf *bytes.Buffer, charset string) {
 	buf.WriteString("=?")
 	buf.WriteString(charset)
 	buf.WriteByte('?')
 	buf.WriteByte(byte(e))
 	buf.WriteByte('?')
 }

 // closeWord writes the end of an encoded-word into buf.
 func closeWord(buf *bytes.Buffer) {
 	buf.WriteString("?=")
 }

 // splitWord closes the current encoded-word and opens a new one.
 func (e WordEncoder) splitWord(buf *bytes.Buffer, charset string) {
 	closeWord(buf)
 	buf.WriteByte(' ')
 	e.openWord(buf, charset)
 }

 func isUTF8(charset string) bool {
 	return strings.EqualFold(charset, "UTF-8")
 }

 const upperhex = "0123456789ABCDEF"

 // A WordDecoder decodes MIME headers containing RFC 2047 encoded-words.
 type WordDecoder struct {
 	// CharsetReader, if non-nil, defines a function to generate
 	// charset-conversion readers, converting from the provided
 	// charset into UTF-8.
 	// Charsets are always lower-case. utf-8, iso-8859-1 and us-ascii charsets
 	// are handled by default.
 	// One of the the CharsetReader's result values must be non-nil.
 	CharsetReader func(charset string, input io.Reader) (io.Reader, error)
 }

 // Decode decodes an RFC 2047 encoded-word.
 func (d *WordDecoder) Decode(word string) (string, error) {
 	if !strings.HasPrefix(word, "=?") || !strings.HasSuffix(word, "?=") || strings.Count(word, "?") != 4 {
 		return "", errInvalidWord
 	}
 	word = word[2 : len(word)-2]

 	// split delimits the first 2 fields
 	split := strings.IndexByte(word, '?')
 	// the field after split must only be one byte
 	if word[split+2] != '?' {
 		return "", errInvalidWord
 	}

 	// split word "UTF-8?q?ascii" into "UTF-8", 'q', and "ascii"
 	charset := word[:split]
 	encoding := word[split+1]
 	text := word[split+3:]

 	content, err := decode(encoding, text)
 	if err != nil {
 		return "", err
 	}

 	buf := getBuffer()
 	defer putBuffer(buf)

 	if err := d.convert(buf, charset, content); err != nil {
 		return "", err
 	}

 	return buf.String(), nil
 }

 // DecodeHeader decodes all encoded-words of the given string. It returns an
 // error if and only if CharsetReader of d returns an error.
 func (d *WordDecoder) DecodeHeader(header string) (string, error) {
 	// If there is no encoded-word, returns before creating a buffer.
 	i := strings.Index(header, "=?")
 	if i == -1 {
 		return header, nil
 	}

 	buf := getBuffer()
 	defer putBuffer(buf)

 	buf.WriteString(header[:i])
 	header = header[i:]

 	betweenWords := false
 	for {
 		start := strings.Index(header, "=?")
 		if start == -1 {
 			break
 		}
 		cur := start + len("=?")

 		i := strings.Index(header[cur:], "?")
 		if i == -1 {
 			break
 		}
 		charset := header[cur : cur+i]
 		cur += i + len("?")

 		if len(header) < cur+len("Q??=") {
 			break
 		}
 		encoding := header[cur]
 		cur++

 		if header[cur] != '?' {
 			break
 		}
 		cur++

 		j := strings.Index(header[cur:], "?=")
 		if j == -1 {
 			break
 		}
 		text := header[cur : cur+j]
 		end := cur + j + len("?=")

 		content, err := decode(encoding, text)
 		if err != nil {
 			betweenWords = false
 			buf.WriteString(header[:start+2])
 			header = header[start+2:]
 			continue
 		}

 		// Write characters before the encoded-word. White-space and newline
 		// characters separating two encoded-words must be deleted.
 		if start > 0 && (!betweenWords || hasNonWhitespace(header[:start])) {
 			buf.WriteString(header[:start])
 		}

 		if err := d.convert(buf, charset, content); err != nil {
 			return "", err
 		}

 		header = header[end:]
 		betweenWords = true
 	}

 	if len(header) > 0 {
 		buf.WriteString(header)
 	}

 	return buf.String(), nil
 }

 func decode(encoding byte, text string) ([]byte, error) {
 	switch encoding {
 	case 'B', 'b':
 		return base64.StdEncoding.DecodeString(text)
 	case 'Q', 'q':
 		return qDecode(text)
 	default:
 		return nil, errInvalidWord
 	}
 }

 func (d *WordDecoder) convert(buf *bytes.Buffer, charset string, content []byte) error {
 	switch {
 	case strings.EqualFold("utf-8", charset):
 		buf.Write(content)
 	case strings.EqualFold("iso-8859-1", charset):
 		for _, c := range content {
 			buf.WriteRune(rune(c))
 		}
 	case strings.EqualFold("us-ascii", charset):
 		for _, c := range content {
 			if c >= utf8.RuneSelf {
 				buf.WriteRune(unicode.ReplacementChar)
 			} else {
 				buf.WriteByte(c)
 			}
 		}
 	default:
 		if d.CharsetReader == nil {
 			return fmt.Errorf("mime: unhandled charset %q", charset)
 		}
 		r, err := d.CharsetReader(strings.ToLower(charset), bytes.NewReader(content))
 		if err != nil {
 			return err
 		}
 		if _, err = buf.ReadFrom(r); err != nil {
 			return err
 		}
 	}
 	return nil
 }

 // hasNonWhitespace reports whether s (assumed to be ASCII) contains at least
 // one byte of non-whitespace.
 func hasNonWhitespace(s string) bool {
 	for _, b := range s {
 		switch b {
 		// Encoded-words can only be separated by linear white spaces which does
 		// not include vertical tabs (\v).
 		case ' ', '\t', '\n', '\r':
 		default:
 			return true
 		}
 	}
 	return false
 }

 // qDecode decodes a Q encoded string.
 func qDecode(s string) ([]byte, error) {
 	dec := make([]byte, len(s))
 	n := 0
 	for i := 0; i < len(s); i++ {
 		switch c := s[i]; {
 		case c == '_':
 			dec[n] = ' '
 		case c == '=':
 			if i+2 >= len(s) {
 				return nil, errInvalidWord
 			}
 			b, err := readHexByte(s[i+1], s[i+2])
 			if err != nil {
 				return nil, err
 			}
 			dec[n] = b
 			i += 2
 		case (c <= '~' && c >= ' ') || c == '\n' || c == '\r' || c == '\t':
 			dec[n] = c
 		default:
 			return nil, errInvalidWord
 		}
 		n++
 	}

 	return dec[:n], nil
 }

 // readHexByte returns the byte from its quoted-printable representation.
 func readHexByte(a, b byte) (byte, error) {
 	var hb, lb byte
 	var err error
 	if hb, err = fromHex(a); err != nil {
 		return 0, err
 	}
 	if lb, err = fromHex(b); err != nil {
 		return 0, err
 	}
 	return hb<<4 | lb, nil
 }

 func fromHex(b byte) (byte, error) {
 	switch {
 	case b >= '0' && b <= '9':
 		return b - '0', nil
 	case b >= 'A' && b <= 'F':
 		return b - 'A' + 10, nil
 	// Accept badly encoded bytes.
 	case b >= 'a' && b <= 'f':
 		return b - 'a' + 10, nil
 	}
 	return 0, fmt.Errorf("mime: invalid hex byte %#02x", b)
 }

 var bufPool = sync.Pool{
 	New: func() interface{} {
 		return new(bytes.Buffer)
 	},
 }

 func getBuffer() *bytes.Buffer {
 	return bufPool.Get().(*bytes.Buffer)
 }

 func putBuffer(buf *bytes.Buffer) {
 	if buf.Len() > 1024 {
 		return
 	}
 	buf.Reset()
 	bufPool.Put(buf)
 }
	// Copyright 2015 The Go Authors. All rights reserved.
	// Use of this source code is governed by a BSD-style
	// license that can be found in the LICENSE file.

	package mime

	import (
	"bytes"
	"encoding/base64"
	"errors"
	"fmt"
	"io"
	"strings"
	"sync"
	"unicode"
	"unicode/utf8"
	)

	// A WordEncoder is a RFC 2047 encoded-word encoder.
	type WordEncoder byte

	const (
	// BEncoding represents Base64 encoding scheme as defined by RFC 2045.
	BEncoding = WordEncoder('b')
	// QEncoding represents the Q-encoding scheme as defined by RFC 2047.
	QEncoding = WordEncoder('q')
	)

	var (
	errInvalidWord = errors.New("mime: invalid RFC 2047 encoded-word")
	)

	// Encode returns the encoded-word form of s. If s is ASCII without special
	// characters, it is returned unchanged. The provided charset is the IANA
	// charset name of s. It is case insensitive.
	func (e WordEncoder) Encode(charset, s string) string {
	if !needsEncoding(s) {
	return s
	}
	return e.encodeWord(charset, s)
	}

	func needsEncoding(s string) bool {
	for _, b := range s {
	if (b < ' ' \|\| b > '~') && b != '\t' {
	return true
	}
	}
	return false
	}

	// encodeWord encodes a string into an encoded-word.
	func (e WordEncoder) encodeWord(charset, s string) string {
	buf := getBuffer()
	defer putBuffer(buf)

	e.openWord(buf, charset)
	if e == BEncoding {
	e.bEncode(buf, charset, s)
	} else {
	e.qEncode(buf, charset, s)
	}
	closeWord(buf)

	return buf.String()
	}

	const (
	// The maximum length of an encoded-word is 75 characters.
	// See RFC 2047, section 2.
	maxEncodedWordLen = 75
	// maxContentLen is how much content can be encoded, ignoring the header and
	// 2-byte footer.
	maxContentLen = maxEncodedWordLen - len("=?UTF-8?") - len("?=")
	)

	var maxBase64Len = base64.StdEncoding.DecodedLen(maxContentLen)

	// bEncode encodes s using base64 encoding and writes it to buf.
	func (e WordEncoder) bEncode(buf *bytes.Buffer, charset, s string) {
	w := base64.NewEncoder(base64.StdEncoding, buf)
	// If the charset is not UTF-8 or if the content is short, do not bother
	// splitting the encoded-word.
	if !isUTF8(charset) \|\| base64.StdEncoding.EncodedLen(len(s)) <= maxContentLen {
	io.WriteString(w, s)
	w.Close()
	return
	}

	var currentLen, last, runeLen int
	for i := 0; i < len(s); i += runeLen {
	// Multi-byte characters must not be split accross encoded-words.
	// See RFC 2047, section 5.3.
	_, runeLen = utf8.DecodeRuneInString(s[i:])

	if currentLen+runeLen <= maxBase64Len {
	currentLen += runeLen
	} else {
	io.WriteString(w, s[last:i])
	w.Close()
	e.splitWord(buf, charset)
	last = i
	currentLen = runeLen
	}
	}
	io.WriteString(w, s[last:])
	w.Close()
	}

	// qEncode encodes s using Q encoding and writes it to buf. It splits the
	// encoded-words when necessary.
	func (e WordEncoder) qEncode(buf *bytes.Buffer, charset, s string) {
	// We only split encoded-words when the charset is UTF-8.
	if !isUTF8(charset) {
	writeQString(buf, s)
	return
	}

	var currentLen, runeLen int
	for i := 0; i < len(s); i += runeLen {
	b := s[i]
	// Multi-byte characters must not be split accross encoded-words.
	// See RFC 2047, section 5.3.
	var encLen int
	if b >= ' ' && b <= '~' && b != '=' && b != '?' && b != '_' {
	runeLen, encLen = 1, 1
	} else {
	_, runeLen = utf8.DecodeRuneInString(s[i:])
	encLen = 3 * runeLen
	}

	if currentLen+encLen > maxContentLen {
	e.splitWord(buf, charset)
	currentLen = 0
	}
	writeQString(buf, s[i:i+runeLen])
	currentLen += encLen
	}
	}

	// writeQString encodes s using Q encoding and writes it to buf.
	func writeQString(buf *bytes.Buffer, s string) {
	for i := 0; i < len(s); i++ {
	switch b := s[i]; {
	case b == ' ':
	buf.WriteByte('_')
	case b >= '!' && b <= '~' && b != '=' && b != '?' && b != '_':
	buf.WriteByte(b)
	default:
	buf.WriteByte('=')
	buf.WriteByte(upperhex[b>>4])
	buf.WriteByte(upperhex[b&0x0f])
	}
	}
	}

	// openWord writes the beginning of an encoded-word into buf.
	func (e WordEncoder) openWord(buf *bytes.Buffer, charset string) {
	buf.WriteString("=?")
	buf.WriteString(charset)
	buf.WriteByte('?')
	buf.WriteByte(byte(e))
	buf.WriteByte('?')
	}

	// closeWord writes the end of an encoded-word into buf.
	func closeWord(buf *bytes.Buffer) {
	buf.WriteString("?=")
	}

	// splitWord closes the current encoded-word and opens a new one.
	func (e WordEncoder) splitWord(buf *bytes.Buffer, charset string) {
	closeWord(buf)
	buf.WriteByte(' ')
	e.openWord(buf, charset)
	}

	func isUTF8(charset string) bool {
	return strings.EqualFold(charset, "UTF-8")
	}

	const upperhex = "0123456789ABCDEF"

	// A WordDecoder decodes MIME headers containing RFC 2047 encoded-words.
	type WordDecoder struct {
	// CharsetReader, if non-nil, defines a function to generate
	// charset-conversion readers, converting from the provided
	// charset into UTF-8.
	// Charsets are always lower-case. utf-8, iso-8859-1 and us-ascii charsets
	// are handled by default.
	// One of the the CharsetReader's result values must be non-nil.
	CharsetReader func(charset string, input io.Reader) (io.Reader, error)
	}

	// Decode decodes an RFC 2047 encoded-word.
	func (d *WordDecoder) Decode(word string) (string, error) {
	if !strings.HasPrefix(word, "=?") \|\| !strings.HasSuffix(word, "?=") \|\| strings.Count(word, "?") != 4 {
	return "", errInvalidWord
	}
	word = word[2 : len(word)-2]

	// split delimits the first 2 fields
	split := strings.IndexByte(word, '?')
	// the field after split must only be one byte
	if word[split+2] != '?' {
	return "", errInvalidWord
	}

	// split word "UTF-8?q?ascii" into "UTF-8", 'q', and "ascii"
	charset := word[:split]
	encoding := word[split+1]
	text := word[split+3:]

	content, err := decode(encoding, text)
	if err != nil {
	return "", err
	}

	buf := getBuffer()
	defer putBuffer(buf)

	if err := d.convert(buf, charset, content); err != nil {
	return "", err
	}

	return buf.String(), nil
	}

	// DecodeHeader decodes all encoded-words of the given string. It returns an
	// error if and only if CharsetReader of d returns an error.
	func (d *WordDecoder) DecodeHeader(header string) (string, error) {
	// If there is no encoded-word, returns before creating a buffer.
	i := strings.Index(header, "=?")
	if i == -1 {
	return header, nil
	}

	buf := getBuffer()
	defer putBuffer(buf)

	buf.WriteString(header[:i])
	header = header[i:]

	betweenWords := false
	for {
	start := strings.Index(header, "=?")
	if start == -1 {
	break
	}
	cur := start + len("=?")

	i := strings.Index(header[cur:], "?")
	if i == -1 {
	break
	}
	charset := header[cur : cur+i]
	cur += i + len("?")

	if len(header) < cur+len("Q??=") {
	break
	}
	encoding := header[cur]
	cur++

	if header[cur] != '?' {
	break
	}
	cur++

	j := strings.Index(header[cur:], "?=")
	if j == -1 {
	break
	}
	text := header[cur : cur+j]
	end := cur + j + len("?=")

	content, err := decode(encoding, text)
	if err != nil {
	betweenWords = false
	buf.WriteString(header[:start+2])
	header = header[start+2:]
	continue
	}

	// Write characters before the encoded-word. White-space and newline
	// characters separating two encoded-words must be deleted.
	if start > 0 && (!betweenWords \|\| hasNonWhitespace(header[:start])) {
	buf.WriteString(header[:start])
	}

	if err := d.convert(buf, charset, content); err != nil {
	return "", err
	}

	header = header[end:]
	betweenWords = true
	}

	if len(header) > 0 {
	buf.WriteString(header)
	}

	return buf.String(), nil
	}

	func decode(encoding byte, text string) ([]byte, error) {
	switch encoding {
	case 'B', 'b':
	return base64.StdEncoding.DecodeString(text)
	case 'Q', 'q':
	return qDecode(text)
	default:
	return nil, errInvalidWord
	}
	}

	func (d WordDecoder) convert(buf bytes.Buffer, charset string, content []byte) error {
	switch {
	case strings.EqualFold("utf-8", charset):
	buf.Write(content)
	case strings.EqualFold("iso-8859-1", charset):
	for _, c := range content {
	buf.WriteRune(rune(c))
	}
	case strings.EqualFold("us-ascii", charset):
	for _, c := range content {
	if c >= utf8.RuneSelf {
	buf.WriteRune(unicode.ReplacementChar)
	} else {
	buf.WriteByte(c)
	}
	}
	default:
	if d.CharsetReader == nil {
	return fmt.Errorf("mime: unhandled charset %q", charset)
	}
	r, err := d.CharsetReader(strings.ToLower(charset), bytes.NewReader(content))
	if err != nil {
	return err
	}
	if _, err = buf.ReadFrom(r); err != nil {
	return err
	}
	}
	return nil
	}

	// hasNonWhitespace reports whether s (assumed to be ASCII) contains at least
	// one byte of non-whitespace.
	func hasNonWhitespace(s string) bool {
	for _, b := range s {
	switch b {
	// Encoded-words can only be separated by linear white spaces which does
	// not include vertical tabs (\v).
	case ' ', '\t', '\n', '\r':
	default:
	return true
	}
	}
	return false
	}

	// qDecode decodes a Q encoded string.
	func qDecode(s string) ([]byte, error) {
	dec := make([]byte, len(s))
	n := 0
	for i := 0; i < len(s); i++ {
	switch c := s[i]; {
	case c == '_':
	dec[n] = ' '
	case c == '=':
	if i+2 >= len(s) {
	return nil, errInvalidWord
	}
	b, err := readHexByte(s[i+1], s[i+2])
	if err != nil {
	return nil, err
	}
	dec[n] = b
	i += 2
	case (c <= '~' && c >= ' ') \|\| c == '\n' \|\| c == '\r' \|\| c == '\t':
	dec[n] = c
	default:
	return nil, errInvalidWord
	}
	n++
	}

	return dec[:n], nil
	}

	// readHexByte returns the byte from its quoted-printable representation.
	func readHexByte(a, b byte) (byte, error) {
	var hb, lb byte
	var err error
	if hb, err = fromHex(a); err != nil {
	return 0, err
	}
	if lb, err = fromHex(b); err != nil {
	return 0, err
	}
	return hb<<4 \| lb, nil
	}

	func fromHex(b byte) (byte, error) {
	switch {
	case b >= '0' && b <= '9':
	return b - '0', nil
	case b >= 'A' && b <= 'F':
	return b - 'A' + 10, nil
	// Accept badly encoded bytes.
	case b >= 'a' && b <= 'f':
	return b - 'a' + 10, nil
	}
	return 0, fmt.Errorf("mime: invalid hex byte %#02x", b)
	}

	var bufPool = sync.Pool{
	New: func() interface{} {
	return new(bytes.Buffer)
	},
	}

	func getBuffer() *bytes.Buffer {
	return bufPool.Get().(*bytes.Buffer)
	}

	func putBuffer(buf *bytes.Buffer) {
	if buf.Len() > 1024 {
	return
	}
	buf.Reset()
	bufPool.Put(buf)
	}