|  | // Copyright 2010 The Go Authors. All rights reserved. | 
|  | // Use of this source code is governed by a BSD-style | 
|  | // license that can be found in the LICENSE file. | 
|  | // | 
|  |  | 
|  | /* | 
|  | Package multipart implements MIME multipart parsing, as defined in RFC | 
|  | 2046. | 
|  |  | 
|  | The implementation is sufficient for HTTP (RFC 2388) and the multipart | 
|  | bodies generated by popular browsers. | 
|  | */ | 
|  | package multipart | 
|  |  | 
|  | import ( | 
|  | "bufio" | 
|  | "bytes" | 
|  | "fmt" | 
|  | "io" | 
|  | "io/ioutil" | 
|  | "mime" | 
|  | "mime/quotedprintable" | 
|  | "net/textproto" | 
|  | ) | 
|  |  | 
|  | var emptyParams = make(map[string]string) | 
|  |  | 
|  | // This constant needs to be at least 76 for this package to work correctly. | 
|  | // This is because \r\n--separator_of_len_70- would fill the buffer and it | 
|  | // wouldn't be safe to consume a single byte from it. | 
|  | const peekBufferSize = 4096 | 
|  |  | 
|  | // A Part represents a single part in a multipart body. | 
|  | type Part struct { | 
|  | // The headers of the body, if any, with the keys canonicalized | 
|  | // in the same fashion that the Go http.Request headers are. | 
|  | // For example, "foo-bar" changes case to "Foo-Bar" | 
|  | // | 
|  | // As a special case, if the "Content-Transfer-Encoding" header | 
|  | // has a value of "quoted-printable", that header is instead | 
|  | // hidden from this map and the body is transparently decoded | 
|  | // during Read calls. | 
|  | Header textproto.MIMEHeader | 
|  |  | 
|  | mr *Reader | 
|  |  | 
|  | disposition       string | 
|  | dispositionParams map[string]string | 
|  |  | 
|  | // r is either a reader directly reading from mr, or it's a | 
|  | // wrapper around such a reader, decoding the | 
|  | // Content-Transfer-Encoding | 
|  | r io.Reader | 
|  |  | 
|  | n       int   // known data bytes waiting in mr.bufReader | 
|  | total   int64 // total data bytes read already | 
|  | err     error // error to return when n == 0 | 
|  | readErr error // read error observed from mr.bufReader | 
|  | } | 
|  |  | 
|  | // FormName returns the name parameter if p has a Content-Disposition | 
|  | // of type "form-data".  Otherwise it returns the empty string. | 
|  | func (p *Part) FormName() string { | 
|  | // See http://tools.ietf.org/html/rfc2183 section 2 for EBNF | 
|  | // of Content-Disposition value format. | 
|  | if p.dispositionParams == nil { | 
|  | p.parseContentDisposition() | 
|  | } | 
|  | if p.disposition != "form-data" { | 
|  | return "" | 
|  | } | 
|  | return p.dispositionParams["name"] | 
|  | } | 
|  |  | 
|  | // FileName returns the filename parameter of the Part's | 
|  | // Content-Disposition header. | 
|  | func (p *Part) FileName() string { | 
|  | if p.dispositionParams == nil { | 
|  | p.parseContentDisposition() | 
|  | } | 
|  | return p.dispositionParams["filename"] | 
|  | } | 
|  |  | 
|  | func (p *Part) parseContentDisposition() { | 
|  | v := p.Header.Get("Content-Disposition") | 
|  | var err error | 
|  | p.disposition, p.dispositionParams, err = mime.ParseMediaType(v) | 
|  | if err != nil { | 
|  | p.dispositionParams = emptyParams | 
|  | } | 
|  | } | 
|  |  | 
|  | // NewReader creates a new multipart Reader reading from r using the | 
|  | // given MIME boundary. | 
|  | // | 
|  | // The boundary is usually obtained from the "boundary" parameter of | 
|  | // the message's "Content-Type" header. Use mime.ParseMediaType to | 
|  | // parse such headers. | 
|  | func NewReader(r io.Reader, boundary string) *Reader { | 
|  | b := []byte("\r\n--" + boundary + "--") | 
|  | return &Reader{ | 
|  | bufReader:        bufio.NewReaderSize(&stickyErrorReader{r: r}, peekBufferSize), | 
|  | nl:               b[:2], | 
|  | nlDashBoundary:   b[:len(b)-2], | 
|  | dashBoundaryDash: b[2:], | 
|  | dashBoundary:     b[2 : len(b)-2], | 
|  | } | 
|  | } | 
|  |  | 
|  | // stickyErrorReader is an io.Reader which never calls Read on its | 
|  | // underlying Reader once an error has been seen. (the io.Reader | 
|  | // interface's contract promises nothing about the return values of | 
|  | // Read calls after an error, yet this package does do multiple Reads | 
|  | // after error) | 
|  | type stickyErrorReader struct { | 
|  | r   io.Reader | 
|  | err error | 
|  | } | 
|  |  | 
|  | func (r *stickyErrorReader) Read(p []byte) (n int, _ error) { | 
|  | if r.err != nil { | 
|  | return 0, r.err | 
|  | } | 
|  | n, r.err = r.r.Read(p) | 
|  | return n, r.err | 
|  | } | 
|  |  | 
|  | func newPart(mr *Reader) (*Part, error) { | 
|  | bp := &Part{ | 
|  | Header: make(map[string][]string), | 
|  | mr:     mr, | 
|  | } | 
|  | if err := bp.populateHeaders(); err != nil { | 
|  | return nil, err | 
|  | } | 
|  | bp.r = partReader{bp} | 
|  | const cte = "Content-Transfer-Encoding" | 
|  | if bp.Header.Get(cte) == "quoted-printable" { | 
|  | bp.Header.Del(cte) | 
|  | bp.r = quotedprintable.NewReader(bp.r) | 
|  | } | 
|  | return bp, nil | 
|  | } | 
|  |  | 
|  | func (bp *Part) populateHeaders() error { | 
|  | r := textproto.NewReader(bp.mr.bufReader) | 
|  | header, err := r.ReadMIMEHeader() | 
|  | if err == nil { | 
|  | bp.Header = header | 
|  | } | 
|  | return err | 
|  | } | 
|  |  | 
|  | // Read reads the body of a part, after its headers and before the | 
|  | // next part (if any) begins. | 
|  | func (p *Part) Read(d []byte) (n int, err error) { | 
|  | return p.r.Read(d) | 
|  | } | 
|  |  | 
|  | // partReader implements io.Reader by reading raw bytes directly from the | 
|  | // wrapped *Part, without doing any Transfer-Encoding decoding. | 
|  | type partReader struct { | 
|  | p *Part | 
|  | } | 
|  |  | 
|  | func (pr partReader) Read(d []byte) (int, error) { | 
|  | p := pr.p | 
|  | br := p.mr.bufReader | 
|  |  | 
|  | // Read into buffer until we identify some data to return, | 
|  | // or we find a reason to stop (boundary or read error). | 
|  | for p.n == 0 && p.err == nil { | 
|  | peek, _ := br.Peek(br.Buffered()) | 
|  | p.n, p.err = scanUntilBoundary(peek, p.mr.dashBoundary, p.mr.nlDashBoundary, p.total, p.readErr) | 
|  | if p.n == 0 && p.err == nil { | 
|  | // Force buffered I/O to read more into buffer. | 
|  | _, p.readErr = br.Peek(len(peek) + 1) | 
|  | if p.readErr == io.EOF { | 
|  | p.readErr = io.ErrUnexpectedEOF | 
|  | } | 
|  | } | 
|  | } | 
|  |  | 
|  | // Read out from "data to return" part of buffer. | 
|  | if p.n == 0 { | 
|  | return 0, p.err | 
|  | } | 
|  | n := len(d) | 
|  | if n > p.n { | 
|  | n = p.n | 
|  | } | 
|  | n, _ = br.Read(d[:n]) | 
|  | p.total += int64(n) | 
|  | p.n -= n | 
|  | if p.n == 0 { | 
|  | return n, p.err | 
|  | } | 
|  | return n, nil | 
|  | } | 
|  |  | 
|  | // scanUntilBoundary scans buf to identify how much of it can be safely | 
|  | // returned as part of the Part body. | 
|  | // dashBoundary is "--boundary". | 
|  | // nlDashBoundary is "\r\n--boundary" or "\n--boundary", depending on what mode we are in. | 
|  | // The comments below (and the name) assume "\n--boundary", but either is accepted. | 
|  | // total is the number of bytes read out so far. If total == 0, then a leading "--boundary" is recognized. | 
|  | // readErr is the read error, if any, that followed reading the bytes in buf. | 
|  | // scanUntilBoundary returns the number of data bytes from buf that can be | 
|  | // returned as part of the Part body and also the error to return (if any) | 
|  | // once those data bytes are done. | 
|  | func scanUntilBoundary(buf, dashBoundary, nlDashBoundary []byte, total int64, readErr error) (int, error) { | 
|  | if total == 0 { | 
|  | // At beginning of body, allow dashBoundary. | 
|  | if bytes.HasPrefix(buf, dashBoundary) { | 
|  | switch matchAfterPrefix(buf, dashBoundary, readErr) { | 
|  | case -1: | 
|  | return len(dashBoundary), nil | 
|  | case 0: | 
|  | return 0, nil | 
|  | case +1: | 
|  | return 0, io.EOF | 
|  | } | 
|  | } | 
|  | if bytes.HasPrefix(dashBoundary, buf) { | 
|  | return 0, readErr | 
|  | } | 
|  | } | 
|  |  | 
|  | // Search for "\n--boundary". | 
|  | if i := bytes.Index(buf, nlDashBoundary); i >= 0 { | 
|  | switch matchAfterPrefix(buf[i:], nlDashBoundary, readErr) { | 
|  | case -1: | 
|  | return i + len(nlDashBoundary), nil | 
|  | case 0: | 
|  | return i, nil | 
|  | case +1: | 
|  | return i, io.EOF | 
|  | } | 
|  | } | 
|  | if bytes.HasPrefix(nlDashBoundary, buf) { | 
|  | return 0, readErr | 
|  | } | 
|  |  | 
|  | // Otherwise, anything up to the final \n is not part of the boundary | 
|  | // and so must be part of the body. | 
|  | // Also if the section from the final \n onward is not a prefix of the boundary, | 
|  | // it too must be part of the body. | 
|  | i := bytes.LastIndexByte(buf, nlDashBoundary[0]) | 
|  | if i >= 0 && bytes.HasPrefix(nlDashBoundary, buf[i:]) { | 
|  | return i, nil | 
|  | } | 
|  | return len(buf), readErr | 
|  | } | 
|  |  | 
|  | // matchAfterPrefix checks whether buf should be considered to match the boundary. | 
|  | // The prefix is "--boundary" or "\r\n--boundary" or "\n--boundary", | 
|  | // and the caller has verified already that bytes.HasPrefix(buf, prefix) is true. | 
|  | // | 
|  | // matchAfterPrefix returns +1 if the buffer does match the boundary, | 
|  | // meaning the prefix is followed by a dash, space, tab, cr, nl, or end of input. | 
|  | // It returns -1 if the buffer definitely does NOT match the boundary, | 
|  | // meaning the prefix is followed by some other character. | 
|  | // For example, "--foobar" does not match "--foo". | 
|  | // It returns 0 more input needs to be read to make the decision, | 
|  | // meaning that len(buf) == len(prefix) and readErr == nil. | 
|  | func matchAfterPrefix(buf, prefix []byte, readErr error) int { | 
|  | if len(buf) == len(prefix) { | 
|  | if readErr != nil { | 
|  | return +1 | 
|  | } | 
|  | return 0 | 
|  | } | 
|  | c := buf[len(prefix)] | 
|  | if c == ' ' || c == '\t' || c == '\r' || c == '\n' || c == '-' { | 
|  | return +1 | 
|  | } | 
|  | return -1 | 
|  | } | 
|  |  | 
|  | func (p *Part) Close() error { | 
|  | io.Copy(ioutil.Discard, p) | 
|  | return nil | 
|  | } | 
|  |  | 
|  | // Reader is an iterator over parts in a MIME multipart body. | 
|  | // Reader's underlying parser consumes its input as needed. Seeking | 
|  | // isn't supported. | 
|  | type Reader struct { | 
|  | bufReader *bufio.Reader | 
|  |  | 
|  | currentPart *Part | 
|  | partsRead   int | 
|  |  | 
|  | nl               []byte // "\r\n" or "\n" (set after seeing first boundary line) | 
|  | nlDashBoundary   []byte // nl + "--boundary" | 
|  | dashBoundaryDash []byte // "--boundary--" | 
|  | dashBoundary     []byte // "--boundary" | 
|  | } | 
|  |  | 
|  | // NextPart returns the next part in the multipart or an error. | 
|  | // When there are no more parts, the error io.EOF is returned. | 
|  | func (r *Reader) NextPart() (*Part, error) { | 
|  | if r.currentPart != nil { | 
|  | r.currentPart.Close() | 
|  | } | 
|  |  | 
|  | expectNewPart := false | 
|  | for { | 
|  | line, err := r.bufReader.ReadSlice('\n') | 
|  |  | 
|  | if err == io.EOF && r.isFinalBoundary(line) { | 
|  | // If the buffer ends in "--boundary--" without the | 
|  | // trailing "\r\n", ReadSlice will return an error | 
|  | // (since it's missing the '\n'), but this is a valid | 
|  | // multipart EOF so we need to return io.EOF instead of | 
|  | // a fmt-wrapped one. | 
|  | return nil, io.EOF | 
|  | } | 
|  | if err != nil { | 
|  | return nil, fmt.Errorf("multipart: NextPart: %v", err) | 
|  | } | 
|  |  | 
|  | if r.isBoundaryDelimiterLine(line) { | 
|  | r.partsRead++ | 
|  | bp, err := newPart(r) | 
|  | if err != nil { | 
|  | return nil, err | 
|  | } | 
|  | r.currentPart = bp | 
|  | return bp, nil | 
|  | } | 
|  |  | 
|  | if r.isFinalBoundary(line) { | 
|  | // Expected EOF | 
|  | return nil, io.EOF | 
|  | } | 
|  |  | 
|  | if expectNewPart { | 
|  | return nil, fmt.Errorf("multipart: expecting a new Part; got line %q", string(line)) | 
|  | } | 
|  |  | 
|  | if r.partsRead == 0 { | 
|  | // skip line | 
|  | continue | 
|  | } | 
|  |  | 
|  | // Consume the "\n" or "\r\n" separator between the | 
|  | // body of the previous part and the boundary line we | 
|  | // now expect will follow. (either a new part or the | 
|  | // end boundary) | 
|  | if bytes.Equal(line, r.nl) { | 
|  | expectNewPart = true | 
|  | continue | 
|  | } | 
|  |  | 
|  | return nil, fmt.Errorf("multipart: unexpected line in Next(): %q", line) | 
|  | } | 
|  | } | 
|  |  | 
|  | // isFinalBoundary reports whether line is the final boundary line | 
|  | // indicating that all parts are over. | 
|  | // It matches `^--boundary--[ \t]*(\r\n)?$` | 
|  | func (mr *Reader) isFinalBoundary(line []byte) bool { | 
|  | if !bytes.HasPrefix(line, mr.dashBoundaryDash) { | 
|  | return false | 
|  | } | 
|  | rest := line[len(mr.dashBoundaryDash):] | 
|  | rest = skipLWSPChar(rest) | 
|  | return len(rest) == 0 || bytes.Equal(rest, mr.nl) | 
|  | } | 
|  |  | 
|  | func (mr *Reader) isBoundaryDelimiterLine(line []byte) (ret bool) { | 
|  | // http://tools.ietf.org/html/rfc2046#section-5.1 | 
|  | //   The boundary delimiter line is then defined as a line | 
|  | //   consisting entirely of two hyphen characters ("-", | 
|  | //   decimal value 45) followed by the boundary parameter | 
|  | //   value from the Content-Type header field, optional linear | 
|  | //   whitespace, and a terminating CRLF. | 
|  | if !bytes.HasPrefix(line, mr.dashBoundary) { | 
|  | return false | 
|  | } | 
|  | rest := line[len(mr.dashBoundary):] | 
|  | rest = skipLWSPChar(rest) | 
|  |  | 
|  | // On the first part, see our lines are ending in \n instead of \r\n | 
|  | // and switch into that mode if so. This is a violation of the spec, | 
|  | // but occurs in practice. | 
|  | if mr.partsRead == 0 && len(rest) == 1 && rest[0] == '\n' { | 
|  | mr.nl = mr.nl[1:] | 
|  | mr.nlDashBoundary = mr.nlDashBoundary[1:] | 
|  | } | 
|  | return bytes.Equal(rest, mr.nl) | 
|  | } | 
|  |  | 
|  | // skipLWSPChar returns b with leading spaces and tabs removed. | 
|  | // RFC 822 defines: | 
|  | //    LWSP-char = SPACE / HTAB | 
|  | func skipLWSPChar(b []byte) []byte { | 
|  | for len(b) > 0 && (b[0] == ' ' || b[0] == '\t') { | 
|  | b = b[1:] | 
|  | } | 
|  | return b | 
|  | } |