| // Copyright 2010 The Go Authors. All rights reserved. |
| // Use of this source code is governed by a BSD-style |
| // license that can be found in the LICENSE file. |
| // |
| |
| /* |
| Package multipart implements MIME multipart parsing, as defined in RFC |
| 2046. |
| |
| The implementation is sufficient for HTTP (RFC 2388) and the multipart |
| bodies generated by popular browsers. |
| */ |
| package multipart |
| |
| import ( |
| "bufio" |
| "bytes" |
| "fmt" |
| "io" |
| "mime" |
| "mime/quotedprintable" |
| "net/textproto" |
| "path/filepath" |
| "strings" |
| ) |
| |
| var emptyParams = make(map[string]string) |
| |
| // This constant needs to be at least 76 for this package to work correctly. |
| // This is because \r\n--separator_of_len_70- would fill the buffer and it |
| // wouldn't be safe to consume a single byte from it. |
| const peekBufferSize = 4096 |
| |
| // A Part represents a single part in a multipart body. |
| type Part struct { |
| // The headers of the body, if any, with the keys canonicalized |
| // in the same fashion that the Go http.Request headers are. |
| // For example, "foo-bar" changes case to "Foo-Bar" |
| Header textproto.MIMEHeader |
| |
| mr *Reader |
| |
| disposition string |
| dispositionParams map[string]string |
| |
| // r is either a reader directly reading from mr, or it's a |
| // wrapper around such a reader, decoding the |
| // Content-Transfer-Encoding |
| r io.Reader |
| |
| n int // known data bytes waiting in mr.bufReader |
| total int64 // total data bytes read already |
| err error // error to return when n == 0 |
| readErr error // read error observed from mr.bufReader |
| } |
| |
| // FormName returns the name parameter if p has a Content-Disposition |
| // of type "form-data". Otherwise it returns the empty string. |
| func (p *Part) FormName() string { |
| // See https://tools.ietf.org/html/rfc2183 section 2 for EBNF |
| // of Content-Disposition value format. |
| if p.dispositionParams == nil { |
| p.parseContentDisposition() |
| } |
| if p.disposition != "form-data" { |
| return "" |
| } |
| return p.dispositionParams["name"] |
| } |
| |
| // FileName returns the filename parameter of the Part's Content-Disposition |
| // header. If not empty, the filename is passed through filepath.Base (which is |
| // platform dependent) before being returned. |
| func (p *Part) FileName() string { |
| if p.dispositionParams == nil { |
| p.parseContentDisposition() |
| } |
| filename := p.dispositionParams["filename"] |
| if filename == "" { |
| return "" |
| } |
| // RFC 7578, Section 4.2 requires that if a filename is provided, the |
| // directory path information must not be used. |
| return filepath.Base(filename) |
| } |
| |
| func (p *Part) parseContentDisposition() { |
| v := p.Header.Get("Content-Disposition") |
| var err error |
| p.disposition, p.dispositionParams, err = mime.ParseMediaType(v) |
| if err != nil { |
| p.dispositionParams = emptyParams |
| } |
| } |
| |
| // NewReader creates a new multipart Reader reading from r using the |
| // given MIME boundary. |
| // |
| // The boundary is usually obtained from the "boundary" parameter of |
| // the message's "Content-Type" header. Use mime.ParseMediaType to |
| // parse such headers. |
| func NewReader(r io.Reader, boundary string) *Reader { |
| b := []byte("\r\n--" + boundary + "--") |
| return &Reader{ |
| bufReader: bufio.NewReaderSize(&stickyErrorReader{r: r}, peekBufferSize), |
| nl: b[:2], |
| nlDashBoundary: b[:len(b)-2], |
| dashBoundaryDash: b[2:], |
| dashBoundary: b[2 : len(b)-2], |
| } |
| } |
| |
| // stickyErrorReader is an io.Reader which never calls Read on its |
| // underlying Reader once an error has been seen. (the io.Reader |
| // interface's contract promises nothing about the return values of |
| // Read calls after an error, yet this package does do multiple Reads |
| // after error) |
| type stickyErrorReader struct { |
| r io.Reader |
| err error |
| } |
| |
| func (r *stickyErrorReader) Read(p []byte) (n int, _ error) { |
| if r.err != nil { |
| return 0, r.err |
| } |
| n, r.err = r.r.Read(p) |
| return n, r.err |
| } |
| |
| func newPart(mr *Reader, rawPart bool) (*Part, error) { |
| bp := &Part{ |
| Header: make(map[string][]string), |
| mr: mr, |
| } |
| if err := bp.populateHeaders(); err != nil { |
| return nil, err |
| } |
| bp.r = partReader{bp} |
| |
| // rawPart is used to switch between Part.NextPart and Part.NextRawPart. |
| if !rawPart { |
| const cte = "Content-Transfer-Encoding" |
| if strings.EqualFold(bp.Header.Get(cte), "quoted-printable") { |
| bp.Header.Del(cte) |
| bp.r = quotedprintable.NewReader(bp.r) |
| } |
| } |
| return bp, nil |
| } |
| |
| func (p *Part) populateHeaders() error { |
| r := textproto.NewReader(p.mr.bufReader) |
| header, err := r.ReadMIMEHeader() |
| if err == nil { |
| p.Header = header |
| } |
| return err |
| } |
| |
| // Read reads the body of a part, after its headers and before the |
| // next part (if any) begins. |
| func (p *Part) Read(d []byte) (n int, err error) { |
| return p.r.Read(d) |
| } |
| |
| // partReader implements io.Reader by reading raw bytes directly from the |
| // wrapped *Part, without doing any Transfer-Encoding decoding. |
| type partReader struct { |
| p *Part |
| } |
| |
| func (pr partReader) Read(d []byte) (int, error) { |
| p := pr.p |
| br := p.mr.bufReader |
| |
| // Read into buffer until we identify some data to return, |
| // or we find a reason to stop (boundary or read error). |
| for p.n == 0 && p.err == nil { |
| peek, _ := br.Peek(br.Buffered()) |
| p.n, p.err = scanUntilBoundary(peek, p.mr.dashBoundary, p.mr.nlDashBoundary, p.total, p.readErr) |
| if p.n == 0 && p.err == nil { |
| // Force buffered I/O to read more into buffer. |
| _, p.readErr = br.Peek(len(peek) + 1) |
| if p.readErr == io.EOF { |
| p.readErr = io.ErrUnexpectedEOF |
| } |
| } |
| } |
| |
| // Read out from "data to return" part of buffer. |
| if p.n == 0 { |
| return 0, p.err |
| } |
| n := len(d) |
| if n > p.n { |
| n = p.n |
| } |
| n, _ = br.Read(d[:n]) |
| p.total += int64(n) |
| p.n -= n |
| if p.n == 0 { |
| return n, p.err |
| } |
| return n, nil |
| } |
| |
| // scanUntilBoundary scans buf to identify how much of it can be safely |
| // returned as part of the Part body. |
| // dashBoundary is "--boundary". |
| // nlDashBoundary is "\r\n--boundary" or "\n--boundary", depending on what mode we are in. |
| // The comments below (and the name) assume "\n--boundary", but either is accepted. |
| // total is the number of bytes read out so far. If total == 0, then a leading "--boundary" is recognized. |
| // readErr is the read error, if any, that followed reading the bytes in buf. |
| // scanUntilBoundary returns the number of data bytes from buf that can be |
| // returned as part of the Part body and also the error to return (if any) |
| // once those data bytes are done. |
| func scanUntilBoundary(buf, dashBoundary, nlDashBoundary []byte, total int64, readErr error) (int, error) { |
| if total == 0 { |
| // At beginning of body, allow dashBoundary. |
| if bytes.HasPrefix(buf, dashBoundary) { |
| switch matchAfterPrefix(buf, dashBoundary, readErr) { |
| case -1: |
| return len(dashBoundary), nil |
| case 0: |
| return 0, nil |
| case +1: |
| return 0, io.EOF |
| } |
| } |
| if bytes.HasPrefix(dashBoundary, buf) { |
| return 0, readErr |
| } |
| } |
| |
| // Search for "\n--boundary". |
| if i := bytes.Index(buf, nlDashBoundary); i >= 0 { |
| switch matchAfterPrefix(buf[i:], nlDashBoundary, readErr) { |
| case -1: |
| return i + len(nlDashBoundary), nil |
| case 0: |
| return i, nil |
| case +1: |
| return i, io.EOF |
| } |
| } |
| if bytes.HasPrefix(nlDashBoundary, buf) { |
| return 0, readErr |
| } |
| |
| // Otherwise, anything up to the final \n is not part of the boundary |
| // and so must be part of the body. |
| // Also if the section from the final \n onward is not a prefix of the boundary, |
| // it too must be part of the body. |
| i := bytes.LastIndexByte(buf, nlDashBoundary[0]) |
| if i >= 0 && bytes.HasPrefix(nlDashBoundary, buf[i:]) { |
| return i, nil |
| } |
| return len(buf), readErr |
| } |
| |
| // matchAfterPrefix checks whether buf should be considered to match the boundary. |
| // The prefix is "--boundary" or "\r\n--boundary" or "\n--boundary", |
| // and the caller has verified already that bytes.HasPrefix(buf, prefix) is true. |
| // |
| // matchAfterPrefix returns +1 if the buffer does match the boundary, |
| // meaning the prefix is followed by a double dash, space, tab, cr, nl, |
| // or end of input. |
| // It returns -1 if the buffer definitely does NOT match the boundary, |
| // meaning the prefix is followed by some other character. |
| // For example, "--foobar" does not match "--foo". |
| // It returns 0 more input needs to be read to make the decision, |
| // meaning that len(buf) == len(prefix) and readErr == nil. |
| func matchAfterPrefix(buf, prefix []byte, readErr error) int { |
| if len(buf) == len(prefix) { |
| if readErr != nil { |
| return +1 |
| } |
| return 0 |
| } |
| c := buf[len(prefix)] |
| |
| if c == ' ' || c == '\t' || c == '\r' || c == '\n' { |
| return +1 |
| } |
| |
| // Try to detect boundaryDash |
| if c == '-' { |
| if len(buf) == len(prefix)+1 { |
| if readErr != nil { |
| // Prefix + "-" does not match |
| return -1 |
| } |
| return 0 |
| } |
| if buf[len(prefix)+1] == '-' { |
| return +1 |
| } |
| } |
| |
| return -1 |
| } |
| |
| func (p *Part) Close() error { |
| io.Copy(io.Discard, p) |
| return nil |
| } |
| |
| // Reader is an iterator over parts in a MIME multipart body. |
| // Reader's underlying parser consumes its input as needed. Seeking |
| // isn't supported. |
| type Reader struct { |
| bufReader *bufio.Reader |
| |
| currentPart *Part |
| partsRead int |
| |
| nl []byte // "\r\n" or "\n" (set after seeing first boundary line) |
| nlDashBoundary []byte // nl + "--boundary" |
| dashBoundaryDash []byte // "--boundary--" |
| dashBoundary []byte // "--boundary" |
| } |
| |
| // NextPart returns the next part in the multipart or an error. |
| // When there are no more parts, the error io.EOF is returned. |
| // |
| // As a special case, if the "Content-Transfer-Encoding" header |
| // has a value of "quoted-printable", that header is instead |
| // hidden and the body is transparently decoded during Read calls. |
| func (r *Reader) NextPart() (*Part, error) { |
| return r.nextPart(false) |
| } |
| |
| // NextRawPart returns the next part in the multipart or an error. |
| // When there are no more parts, the error io.EOF is returned. |
| // |
| // Unlike NextPart, it does not have special handling for |
| // "Content-Transfer-Encoding: quoted-printable". |
| func (r *Reader) NextRawPart() (*Part, error) { |
| return r.nextPart(true) |
| } |
| |
| func (r *Reader) nextPart(rawPart bool) (*Part, error) { |
| if r.currentPart != nil { |
| r.currentPart.Close() |
| } |
| if string(r.dashBoundary) == "--" { |
| return nil, fmt.Errorf("multipart: boundary is empty") |
| } |
| expectNewPart := false |
| for { |
| line, err := r.bufReader.ReadSlice('\n') |
| |
| if err == io.EOF && r.isFinalBoundary(line) { |
| // If the buffer ends in "--boundary--" without the |
| // trailing "\r\n", ReadSlice will return an error |
| // (since it's missing the '\n'), but this is a valid |
| // multipart EOF so we need to return io.EOF instead of |
| // a fmt-wrapped one. |
| return nil, io.EOF |
| } |
| if err != nil { |
| return nil, fmt.Errorf("multipart: NextPart: %v", err) |
| } |
| |
| if r.isBoundaryDelimiterLine(line) { |
| r.partsRead++ |
| bp, err := newPart(r, rawPart) |
| if err != nil { |
| return nil, err |
| } |
| r.currentPart = bp |
| return bp, nil |
| } |
| |
| if r.isFinalBoundary(line) { |
| // Expected EOF |
| return nil, io.EOF |
| } |
| |
| if expectNewPart { |
| return nil, fmt.Errorf("multipart: expecting a new Part; got line %q", string(line)) |
| } |
| |
| if r.partsRead == 0 { |
| // skip line |
| continue |
| } |
| |
| // Consume the "\n" or "\r\n" separator between the |
| // body of the previous part and the boundary line we |
| // now expect will follow. (either a new part or the |
| // end boundary) |
| if bytes.Equal(line, r.nl) { |
| expectNewPart = true |
| continue |
| } |
| |
| return nil, fmt.Errorf("multipart: unexpected line in Next(): %q", line) |
| } |
| } |
| |
| // isFinalBoundary reports whether line is the final boundary line |
| // indicating that all parts are over. |
| // It matches `^--boundary--[ \t]*(\r\n)?$` |
| func (r *Reader) isFinalBoundary(line []byte) bool { |
| if !bytes.HasPrefix(line, r.dashBoundaryDash) { |
| return false |
| } |
| rest := line[len(r.dashBoundaryDash):] |
| rest = skipLWSPChar(rest) |
| return len(rest) == 0 || bytes.Equal(rest, r.nl) |
| } |
| |
| func (r *Reader) isBoundaryDelimiterLine(line []byte) (ret bool) { |
| // https://tools.ietf.org/html/rfc2046#section-5.1 |
| // The boundary delimiter line is then defined as a line |
| // consisting entirely of two hyphen characters ("-", |
| // decimal value 45) followed by the boundary parameter |
| // value from the Content-Type header field, optional linear |
| // whitespace, and a terminating CRLF. |
| if !bytes.HasPrefix(line, r.dashBoundary) { |
| return false |
| } |
| rest := line[len(r.dashBoundary):] |
| rest = skipLWSPChar(rest) |
| |
| // On the first part, see our lines are ending in \n instead of \r\n |
| // and switch into that mode if so. This is a violation of the spec, |
| // but occurs in practice. |
| if r.partsRead == 0 && len(rest) == 1 && rest[0] == '\n' { |
| r.nl = r.nl[1:] |
| r.nlDashBoundary = r.nlDashBoundary[1:] |
| } |
| return bytes.Equal(rest, r.nl) |
| } |
| |
| // skipLWSPChar returns b with leading spaces and tabs removed. |
| // RFC 822 defines: |
| // |
| // LWSP-char = SPACE / HTAB |
| func skipLWSPChar(b []byte) []byte { |
| for len(b) > 0 && (b[0] == ' ' || b[0] == '\t') { |
| b = b[1:] |
| } |
| return b |
| } |