| // Copyright 2010 The Go Authors. All rights reserved. |
| // Use of this source code is governed by a BSD-style |
| // license that can be found in the LICENSE file. |
| |
| package mime |
| |
| import ( |
| "errors" |
| "fmt" |
| "sort" |
| "strings" |
| "unicode" |
| ) |
| |
| // FormatMediaType serializes mediatype t and the parameters |
| // param as a media type conforming to RFC 2045 and RFC 2616. |
| // The type and parameter names are written in lower-case. |
| // When any of the arguments result in a standard violation then |
| // FormatMediaType returns the empty string. |
| func FormatMediaType(t string, param map[string]string) string { |
| var b strings.Builder |
| if slash := strings.IndexByte(t, '/'); slash == -1 { |
| if !isToken(t) { |
| return "" |
| } |
| b.WriteString(strings.ToLower(t)) |
| } else { |
| major, sub := t[:slash], t[slash+1:] |
| if !isToken(major) || !isToken(sub) { |
| return "" |
| } |
| b.WriteString(strings.ToLower(major)) |
| b.WriteByte('/') |
| b.WriteString(strings.ToLower(sub)) |
| } |
| |
| attrs := make([]string, 0, len(param)) |
| for a := range param { |
| attrs = append(attrs, a) |
| } |
| sort.Strings(attrs) |
| |
| for _, attribute := range attrs { |
| value := param[attribute] |
| b.WriteByte(';') |
| b.WriteByte(' ') |
| if !isToken(attribute) { |
| return "" |
| } |
| b.WriteString(strings.ToLower(attribute)) |
| |
| needEnc := needsEncoding(value) |
| if needEnc { |
| // RFC 2231 section 4 |
| b.WriteByte('*') |
| } |
| b.WriteByte('=') |
| |
| if needEnc { |
| b.WriteString("utf-8''") |
| |
| offset := 0 |
| for index := 0; index < len(value); index++ { |
| ch := value[index] |
| // {RFC 2231 section 7} |
| // attribute-char := <any (US-ASCII) CHAR except SPACE, CTLs, "*", "'", "%", or tspecials> |
| if ch <= ' ' || ch >= 0x7F || |
| ch == '*' || ch == '\'' || ch == '%' || |
| isTSpecial(rune(ch)) { |
| |
| b.WriteString(value[offset:index]) |
| offset = index + 1 |
| |
| b.WriteByte('%') |
| b.WriteByte(upperhex[ch>>4]) |
| b.WriteByte(upperhex[ch&0x0F]) |
| } |
| } |
| b.WriteString(value[offset:]) |
| continue |
| } |
| |
| if isToken(value) { |
| b.WriteString(value) |
| continue |
| } |
| |
| b.WriteByte('"') |
| offset := 0 |
| for index := 0; index < len(value); index++ { |
| character := value[index] |
| if character == '"' || character == '\\' { |
| b.WriteString(value[offset:index]) |
| offset = index |
| b.WriteByte('\\') |
| } |
| } |
| b.WriteString(value[offset:]) |
| b.WriteByte('"') |
| } |
| return b.String() |
| } |
| |
| func checkMediaTypeDisposition(s string) error { |
| typ, rest := consumeToken(s) |
| if typ == "" { |
| return errors.New("mime: no media type") |
| } |
| if rest == "" { |
| return nil |
| } |
| if !strings.HasPrefix(rest, "/") { |
| return errors.New("mime: expected slash after first token") |
| } |
| subtype, rest := consumeToken(rest[1:]) |
| if subtype == "" { |
| return errors.New("mime: expected token after slash") |
| } |
| if rest != "" { |
| return errors.New("mime: unexpected content after media subtype") |
| } |
| return nil |
| } |
| |
| // ErrInvalidMediaParameter is returned by ParseMediaType if |
| // the media type value was found but there was an error parsing |
| // the optional parameters |
| var ErrInvalidMediaParameter = errors.New("mime: invalid media parameter") |
| |
| // ParseMediaType parses a media type value and any optional |
| // parameters, per RFC 1521. Media types are the values in |
| // Content-Type and Content-Disposition headers (RFC 2183). |
| // On success, ParseMediaType returns the media type converted |
| // to lowercase and trimmed of white space and a non-nil map. |
| // If there is an error parsing the optional parameter, |
| // the media type will be returned along with the error |
| // ErrInvalidMediaParameter. |
| // The returned map, params, maps from the lowercase |
| // attribute to the attribute value with its case preserved. |
| func ParseMediaType(v string) (mediatype string, params map[string]string, err error) { |
| i := strings.Index(v, ";") |
| if i == -1 { |
| i = len(v) |
| } |
| mediatype = strings.TrimSpace(strings.ToLower(v[0:i])) |
| |
| err = checkMediaTypeDisposition(mediatype) |
| if err != nil { |
| return "", nil, err |
| } |
| |
| params = make(map[string]string) |
| |
| // Map of base parameter name -> parameter name -> value |
| // for parameters containing a '*' character. |
| // Lazily initialized. |
| var continuation map[string]map[string]string |
| |
| v = v[i:] |
| for len(v) > 0 { |
| v = strings.TrimLeftFunc(v, unicode.IsSpace) |
| if len(v) == 0 { |
| break |
| } |
| key, value, rest := consumeMediaParam(v) |
| if key == "" { |
| if strings.TrimSpace(rest) == ";" { |
| // Ignore trailing semicolons. |
| // Not an error. |
| return |
| } |
| // Parse error. |
| return mediatype, nil, ErrInvalidMediaParameter |
| } |
| |
| pmap := params |
| if idx := strings.Index(key, "*"); idx != -1 { |
| baseName := key[:idx] |
| if continuation == nil { |
| continuation = make(map[string]map[string]string) |
| } |
| var ok bool |
| if pmap, ok = continuation[baseName]; !ok { |
| continuation[baseName] = make(map[string]string) |
| pmap = continuation[baseName] |
| } |
| } |
| if _, exists := pmap[key]; exists { |
| // Duplicate parameter name is bogus. |
| return "", nil, errors.New("mime: duplicate parameter name") |
| } |
| pmap[key] = value |
| v = rest |
| } |
| |
| // Stitch together any continuations or things with stars |
| // (i.e. RFC 2231 things with stars: "foo*0" or "foo*") |
| var buf strings.Builder |
| for key, pieceMap := range continuation { |
| singlePartKey := key + "*" |
| if v, ok := pieceMap[singlePartKey]; ok { |
| if decv, ok := decode2231Enc(v); ok { |
| params[key] = decv |
| } |
| continue |
| } |
| |
| buf.Reset() |
| valid := false |
| for n := 0; ; n++ { |
| simplePart := fmt.Sprintf("%s*%d", key, n) |
| if v, ok := pieceMap[simplePart]; ok { |
| valid = true |
| buf.WriteString(v) |
| continue |
| } |
| encodedPart := simplePart + "*" |
| v, ok := pieceMap[encodedPart] |
| if !ok { |
| break |
| } |
| valid = true |
| if n == 0 { |
| if decv, ok := decode2231Enc(v); ok { |
| buf.WriteString(decv) |
| } |
| } else { |
| decv, _ := percentHexUnescape(v) |
| buf.WriteString(decv) |
| } |
| } |
| if valid { |
| params[key] = buf.String() |
| } |
| } |
| |
| return |
| } |
| |
| func decode2231Enc(v string) (string, bool) { |
| sv := strings.SplitN(v, "'", 3) |
| if len(sv) != 3 { |
| return "", false |
| } |
| // TODO: ignoring lang in sv[1] for now. If anybody needs it we'll |
| // need to decide how to expose it in the API. But I'm not sure |
| // anybody uses it in practice. |
| charset := strings.ToLower(sv[0]) |
| if len(charset) == 0 { |
| return "", false |
| } |
| if charset != "us-ascii" && charset != "utf-8" { |
| // TODO: unsupported encoding |
| return "", false |
| } |
| encv, err := percentHexUnescape(sv[2]) |
| if err != nil { |
| return "", false |
| } |
| return encv, true |
| } |
| |
| func isNotTokenChar(r rune) bool { |
| return !isTokenChar(r) |
| } |
| |
| // consumeToken consumes a token from the beginning of provided |
| // string, per RFC 2045 section 5.1 (referenced from 2183), and return |
| // the token consumed and the rest of the string. Returns ("", v) on |
| // failure to consume at least one character. |
| func consumeToken(v string) (token, rest string) { |
| notPos := strings.IndexFunc(v, isNotTokenChar) |
| if notPos == -1 { |
| return v, "" |
| } |
| if notPos == 0 { |
| return "", v |
| } |
| return v[0:notPos], v[notPos:] |
| } |
| |
| // consumeValue consumes a "value" per RFC 2045, where a value is |
| // either a 'token' or a 'quoted-string'. On success, consumeValue |
| // returns the value consumed (and de-quoted/escaped, if a |
| // quoted-string) and the rest of the string. On failure, returns |
| // ("", v). |
| func consumeValue(v string) (value, rest string) { |
| if v == "" { |
| return |
| } |
| if v[0] != '"' { |
| return consumeToken(v) |
| } |
| |
| // parse a quoted-string |
| buffer := new(strings.Builder) |
| for i := 1; i < len(v); i++ { |
| r := v[i] |
| if r == '"' { |
| return buffer.String(), v[i+1:] |
| } |
| // When MSIE sends a full file path (in "intranet mode"), it does not |
| // escape backslashes: "C:\dev\go\foo.txt", not "C:\\dev\\go\\foo.txt". |
| // |
| // No known MIME generators emit unnecessary backslash escapes |
| // for simple token characters like numbers and letters. |
| // |
| // If we see an unnecessary backslash escape, assume it is from MSIE |
| // and intended as a literal backslash. This makes Go servers deal better |
| // with MSIE without affecting the way they handle conforming MIME |
| // generators. |
| if r == '\\' && i+1 < len(v) && isTSpecial(rune(v[i+1])) { |
| buffer.WriteByte(v[i+1]) |
| i++ |
| continue |
| } |
| if r == '\r' || r == '\n' { |
| return "", v |
| } |
| buffer.WriteByte(v[i]) |
| } |
| // Did not find end quote. |
| return "", v |
| } |
| |
| func consumeMediaParam(v string) (param, value, rest string) { |
| rest = strings.TrimLeftFunc(v, unicode.IsSpace) |
| if !strings.HasPrefix(rest, ";") { |
| return "", "", v |
| } |
| |
| rest = rest[1:] // consume semicolon |
| rest = strings.TrimLeftFunc(rest, unicode.IsSpace) |
| param, rest = consumeToken(rest) |
| param = strings.ToLower(param) |
| if param == "" { |
| return "", "", v |
| } |
| |
| rest = strings.TrimLeftFunc(rest, unicode.IsSpace) |
| if !strings.HasPrefix(rest, "=") { |
| return "", "", v |
| } |
| rest = rest[1:] // consume equals sign |
| rest = strings.TrimLeftFunc(rest, unicode.IsSpace) |
| value, rest2 := consumeValue(rest) |
| if value == "" && rest2 == rest { |
| return "", "", v |
| } |
| rest = rest2 |
| return param, value, rest |
| } |
| |
| func percentHexUnescape(s string) (string, error) { |
| // Count %, check that they're well-formed. |
| percents := 0 |
| for i := 0; i < len(s); { |
| if s[i] != '%' { |
| i++ |
| continue |
| } |
| percents++ |
| if i+2 >= len(s) || !ishex(s[i+1]) || !ishex(s[i+2]) { |
| s = s[i:] |
| if len(s) > 3 { |
| s = s[0:3] |
| } |
| return "", fmt.Errorf("mime: bogus characters after %%: %q", s) |
| } |
| i += 3 |
| } |
| if percents == 0 { |
| return s, nil |
| } |
| |
| t := make([]byte, len(s)-2*percents) |
| j := 0 |
| for i := 0; i < len(s); { |
| switch s[i] { |
| case '%': |
| t[j] = unhex(s[i+1])<<4 | unhex(s[i+2]) |
| j++ |
| i += 3 |
| default: |
| t[j] = s[i] |
| j++ |
| i++ |
| } |
| } |
| return string(t), nil |
| } |
| |
| func ishex(c byte) bool { |
| switch { |
| case '0' <= c && c <= '9': |
| return true |
| case 'a' <= c && c <= 'f': |
| return true |
| case 'A' <= c && c <= 'F': |
| return true |
| } |
| return false |
| } |
| |
| func unhex(c byte) byte { |
| switch { |
| case '0' <= c && c <= '9': |
| return c - '0' |
| case 'a' <= c && c <= 'f': |
| return c - 'a' + 10 |
| case 'A' <= c && c <= 'F': |
| return c - 'A' + 10 |
| } |
| return 0 |
| } |