[release-branch.go1.15-security] net/http/cgi,net/http/fcgi: add Content-Type detection

This CL ensures that responses served via CGI and FastCGI
have a Content-Type header based on the content of the
response if not explicitly set by handlers.

If the implementers of the handler did not explicitly
specify a Content-Type both CGI implementations would default
to "text/html", potentially causing cross-site scripting.

Thanks to RedTeam Pentesting GmbH for reporting this.

Fixes CVE-2020-24553

Change-Id: I82cfc396309b5ab2e8d6e9a87eda8ea7e3799473
Reviewed-on: https://team-review.git.corp.google.com/c/golang/go-private/+/823217
Reviewed-by: Russ Cox <rsc@google.com>
(cherry picked from commit 23d675d07fdc56aafd67c0a0b63d5b7e14708ff0)
Reviewed-on: https://team-review.git.corp.google.com/c/golang/go-private/+/835311
Reviewed-by: Dmitri Shuralyov <dmitshur@google.com>
diff --git a/src/net/http/cgi/child.go b/src/net/http/cgi/child.go
index 9474175..61de616 100644
--- a/src/net/http/cgi/child.go
+++ b/src/net/http/cgi/child.go
@@ -163,10 +163,12 @@
 }
 
 type response struct {
-	req        *http.Request
-	header     http.Header
-	bufw       *bufio.Writer
-	headerSent bool
+	req            *http.Request
+	header         http.Header
+	code           int
+	wroteHeader    bool
+	wroteCGIHeader bool
+	bufw           *bufio.Writer
 }
 
 func (r *response) Flush() {
@@ -178,26 +180,38 @@
 }
 
 func (r *response) Write(p []byte) (n int, err error) {
-	if !r.headerSent {
+	if !r.wroteHeader {
 		r.WriteHeader(http.StatusOK)
 	}
+	if !r.wroteCGIHeader {
+		r.writeCGIHeader(p)
+	}
 	return r.bufw.Write(p)
 }
 
 func (r *response) WriteHeader(code int) {
-	if r.headerSent {
+	if r.wroteHeader {
 		// Note: explicitly using Stderr, as Stdout is our HTTP output.
 		fmt.Fprintf(os.Stderr, "CGI attempted to write header twice on request for %s", r.req.URL)
 		return
 	}
-	r.headerSent = true
-	fmt.Fprintf(r.bufw, "Status: %d %s\r\n", code, http.StatusText(code))
+	r.wroteHeader = true
+	r.code = code
+}
 
-	// Set a default Content-Type
-	if _, hasType := r.header["Content-Type"]; !hasType {
-		r.header.Add("Content-Type", "text/html; charset=utf-8")
+// writeCGIHeader finalizes the header sent to the client and writes it to the output.
+// p is not written by writeHeader, but is the first chunk of the body
+// that will be written. It is sniffed for a Content-Type if none is
+// set explicitly.
+func (r *response) writeCGIHeader(p []byte) {
+	if r.wroteCGIHeader {
+		return
 	}
-
+	r.wroteCGIHeader = true
+	fmt.Fprintf(r.bufw, "Status: %d %s\r\n", r.code, http.StatusText(r.code))
+	if _, hasType := r.header["Content-Type"]; !hasType {
+		r.header.Set("Content-Type", http.DetectContentType(p))
+	}
 	r.header.Write(r.bufw)
 	r.bufw.WriteString("\r\n")
 	r.bufw.Flush()
diff --git a/src/net/http/cgi/child_test.go b/src/net/http/cgi/child_test.go
index 14e0af4..f6ecb6e 100644
--- a/src/net/http/cgi/child_test.go
+++ b/src/net/http/cgi/child_test.go
@@ -7,6 +7,11 @@
 package cgi
 
 import (
+	"bufio"
+	"bytes"
+	"net/http"
+	"net/http/httptest"
+	"strings"
 	"testing"
 )
 
@@ -148,3 +153,67 @@
 		t.Errorf("RemoteAddr: got %q; want %q", g, e)
 	}
 }
+
+type countingWriter int
+
+func (c *countingWriter) Write(p []byte) (int, error) {
+	*c += countingWriter(len(p))
+	return len(p), nil
+}
+func (c *countingWriter) WriteString(p string) (int, error) {
+	*c += countingWriter(len(p))
+	return len(p), nil
+}
+
+func TestResponse(t *testing.T) {
+	var tests = []struct {
+		name   string
+		body   string
+		wantCT string
+	}{
+		{
+			name:   "no body",
+			wantCT: "text/plain; charset=utf-8",
+		},
+		{
+			name:   "html",
+			body:   "<html><head><title>test page</title></head><body>This is a body</body></html>",
+			wantCT: "text/html; charset=utf-8",
+		},
+		{
+			name:   "text",
+			body:   strings.Repeat("gopher", 86),
+			wantCT: "text/plain; charset=utf-8",
+		},
+		{
+			name:   "jpg",
+			body:   "\xFF\xD8\xFF" + strings.Repeat("B", 1024),
+			wantCT: "image/jpeg",
+		},
+	}
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			var buf bytes.Buffer
+			resp := response{
+				req:    httptest.NewRequest("GET", "/", nil),
+				header: http.Header{},
+				bufw:   bufio.NewWriter(&buf),
+			}
+			n, err := resp.Write([]byte(tt.body))
+			if err != nil {
+				t.Errorf("Write: unexpected %v", err)
+			}
+			if want := len(tt.body); n != want {
+				t.Errorf("reported short Write: got %v want %v", n, want)
+			}
+			resp.writeCGIHeader(nil)
+			resp.Flush()
+			if got := resp.Header().Get("Content-Type"); got != tt.wantCT {
+				t.Errorf("wrong content-type: got %q, want %q", got, tt.wantCT)
+			}
+			if !bytes.HasSuffix(buf.Bytes(), []byte(tt.body)) {
+				t.Errorf("body was not correctly written")
+			}
+		})
+	}
+}
diff --git a/src/net/http/cgi/integration_test.go b/src/net/http/cgi/integration_test.go
index 32d59c0..295c3b8 100644
--- a/src/net/http/cgi/integration_test.go
+++ b/src/net/http/cgi/integration_test.go
@@ -16,7 +16,9 @@
 	"io"
 	"net/http"
 	"net/http/httptest"
+	"net/url"
 	"os"
+	"strings"
 	"testing"
 	"time"
 )
@@ -52,7 +54,7 @@
 	}
 	replay := runCgiTest(t, h, "GET /test.go?foo=bar&a=b HTTP/1.0\nHost: example.com\n\n", expectedMap)
 
-	if expected, got := "text/html; charset=utf-8", replay.Header().Get("Content-Type"); got != expected {
+	if expected, got := "text/plain; charset=utf-8", replay.Header().Get("Content-Type"); got != expected {
 		t.Errorf("got a Content-Type of %q; expected %q", got, expected)
 	}
 	if expected, got := "X-Test-Value", replay.Header().Get("X-Test-Header"); got != expected {
@@ -152,6 +154,51 @@
 	}
 }
 
+func TestChildContentType(t *testing.T) {
+	testenv.MustHaveExec(t)
+
+	h := &Handler{
+		Path: os.Args[0],
+		Root: "/test.go",
+		Args: []string{"-test.run=TestBeChildCGIProcess"},
+	}
+	var tests = []struct {
+		name   string
+		body   string
+		wantCT string
+	}{
+		{
+			name:   "no body",
+			wantCT: "text/plain; charset=utf-8",
+		},
+		{
+			name:   "html",
+			body:   "<html><head><title>test page</title></head><body>This is a body</body></html>",
+			wantCT: "text/html; charset=utf-8",
+		},
+		{
+			name:   "text",
+			body:   strings.Repeat("gopher", 86),
+			wantCT: "text/plain; charset=utf-8",
+		},
+		{
+			name:   "jpg",
+			body:   "\xFF\xD8\xFF" + strings.Repeat("B", 1024),
+			wantCT: "image/jpeg",
+		},
+	}
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			expectedMap := map[string]string{"_body": tt.body}
+			req := fmt.Sprintf("GET /test.go?exact-body=%s HTTP/1.0\nHost: example.com\n\n", url.QueryEscape(tt.body))
+			replay := runCgiTest(t, h, req, expectedMap)
+			if got := replay.Header().Get("Content-Type"); got != tt.wantCT {
+				t.Errorf("got a Content-Type of %q; expected it to start with %q", got, tt.wantCT)
+			}
+		})
+	}
+}
+
 // golang.org/issue/7198
 func Test500WithNoHeaders(t *testing.T)     { want500Test(t, "/immediate-disconnect") }
 func Test500WithNoContentType(t *testing.T) { want500Test(t, "/no-content-type") }
@@ -203,6 +250,10 @@
 		if req.FormValue("no-body") == "1" {
 			return
 		}
+		if eb, ok := req.Form["exact-body"]; ok {
+			io.WriteString(rw, eb[0])
+			return
+		}
 		if req.FormValue("write-forever") == "1" {
 			io.Copy(rw, neverEnding('a'))
 			for {
diff --git a/src/net/http/fcgi/child.go b/src/net/http/fcgi/child.go
index 30a6b2c..a31273b 100644
--- a/src/net/http/fcgi/child.go
+++ b/src/net/http/fcgi/child.go
@@ -74,10 +74,12 @@
 
 // response implements http.ResponseWriter.
 type response struct {
-	req         *request
-	header      http.Header
-	w           *bufWriter
-	wroteHeader bool
+	req            *request
+	header         http.Header
+	code           int
+	wroteHeader    bool
+	wroteCGIHeader bool
+	w              *bufWriter
 }
 
 func newResponse(c *child, req *request) *response {
@@ -92,11 +94,14 @@
 	return r.header
 }
 
-func (r *response) Write(data []byte) (int, error) {
+func (r *response) Write(p []byte) (n int, err error) {
 	if !r.wroteHeader {
 		r.WriteHeader(http.StatusOK)
 	}
-	return r.w.Write(data)
+	if !r.wroteCGIHeader {
+		r.writeCGIHeader(p)
+	}
+	return r.w.Write(p)
 }
 
 func (r *response) WriteHeader(code int) {
@@ -104,22 +109,34 @@
 		return
 	}
 	r.wroteHeader = true
+	r.code = code
 	if code == http.StatusNotModified {
 		// Must not have body.
 		r.header.Del("Content-Type")
 		r.header.Del("Content-Length")
 		r.header.Del("Transfer-Encoding")
-	} else if r.header.Get("Content-Type") == "" {
-		r.header.Set("Content-Type", "text/html; charset=utf-8")
 	}
-
 	if r.header.Get("Date") == "" {
 		r.header.Set("Date", time.Now().UTC().Format(http.TimeFormat))
 	}
+}
 
-	fmt.Fprintf(r.w, "Status: %d %s\r\n", code, http.StatusText(code))
+// writeCGIHeader finalizes the header sent to the client and writes it to the output.
+// p is not written by writeHeader, but is the first chunk of the body
+// that will be written. It is sniffed for a Content-Type if none is
+// set explicitly.
+func (r *response) writeCGIHeader(p []byte) {
+	if r.wroteCGIHeader {
+		return
+	}
+	r.wroteCGIHeader = true
+	fmt.Fprintf(r.w, "Status: %d %s\r\n", r.code, http.StatusText(r.code))
+	if _, hasType := r.header["Content-Type"]; r.code != http.StatusNotModified && !hasType {
+		r.header.Set("Content-Type", http.DetectContentType(p))
+	}
 	r.header.Write(r.w)
 	r.w.WriteString("\r\n")
+	r.w.Flush()
 }
 
 func (r *response) Flush() {
@@ -290,6 +307,8 @@
 		httpReq = httpReq.WithContext(envVarCtx)
 		c.handler.ServeHTTP(r, httpReq)
 	}
+	// Make sure we serve something even if nothing was written to r
+	r.Write(nil)
 	r.Close()
 	c.mu.Lock()
 	delete(c.requests, req.reqId)
diff --git a/src/net/http/fcgi/fcgi_test.go b/src/net/http/fcgi/fcgi_test.go
index e9d2b34..4a27a12 100644
--- a/src/net/http/fcgi/fcgi_test.go
+++ b/src/net/http/fcgi/fcgi_test.go
@@ -10,6 +10,7 @@
 	"io"
 	"io/ioutil"
 	"net/http"
+	"strings"
 	"testing"
 )
 
@@ -344,3 +345,54 @@
 		<-done
 	}
 }
+
+func TestResponseWriterSniffsContentType(t *testing.T) {
+	var tests = []struct {
+		name   string
+		body   string
+		wantCT string
+	}{
+		{
+			name:   "no body",
+			wantCT: "text/plain; charset=utf-8",
+		},
+		{
+			name:   "html",
+			body:   "<html><head><title>test page</title></head><body>This is a body</body></html>",
+			wantCT: "text/html; charset=utf-8",
+		},
+		{
+			name:   "text",
+			body:   strings.Repeat("gopher", 86),
+			wantCT: "text/plain; charset=utf-8",
+		},
+		{
+			name:   "jpg",
+			body:   "\xFF\xD8\xFF" + strings.Repeat("B", 1024),
+			wantCT: "image/jpeg",
+		},
+	}
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			input := make([]byte, len(streamFullRequestStdin))
+			copy(input, streamFullRequestStdin)
+			rc := nopWriteCloser{bytes.NewBuffer(input)}
+			done := make(chan bool)
+			var resp *response
+			c := newChild(rc, http.HandlerFunc(func(
+				w http.ResponseWriter,
+				r *http.Request,
+			) {
+				io.WriteString(w, tt.body)
+				resp = w.(*response)
+				done <- true
+			}))
+			defer c.cleanUp()
+			go c.serve()
+			<-done
+			if got := resp.Header().Get("Content-Type"); got != tt.wantCT {
+				t.Errorf("got a Content-Type of %q; expected it to start with %q", got, tt.wantCT)
+			}
+		})
+	}
+}