gddo-server: do not tee expected 404s

When a request is expected to 404 even after we redirect godoc.org to
pkg.go.dev, don't tee that request.

Change-Id: I1640bb2886ccdd55b2790f3b8d58b2d32eca8418
Reviewed-on: https://go-review.googlesource.com/c/gddo/+/274699
Trust: Julie Qiu <julie@golang.org>
Run-TryBot: Julie Qiu <julie@golang.org>
Reviewed-by: Jonathan Amsterdam <jba@google.com>
diff --git a/gddo-server/main.go b/gddo-server/main.go
index 47ca9e0..99e98a7 100644
--- a/gddo-server/main.go
+++ b/gddo-server/main.go
@@ -1046,13 +1046,7 @@
 	if f, ok := w.(http.Flusher); ok {
 		f.Flush()
 	}
-	// Don't tee App Engine requests to pkg.go.dev.
-	if strings.HasPrefix(r.URL.Path, "/_ah/") {
-		return
-	}
-	if err := makePkgGoDevRequest(r, latency, s.isRobot(r), translateStatus(w2.status)); err != nil {
-		log.Printf("makePkgGoDevRequest(%q, %d) error: %v", r.URL, latency, err)
-	}
+	makePkgGoDevRequest(r, latency, s.isRobot(r), translateStatus(w2.status))
 }
 
 func (s *server) logRequestStart(req *http.Request) {
diff --git a/gddo-server/pkgsite.go b/gddo-server/pkgsite.go
index f7bb697..be1d3b8 100644
--- a/gddo-server/pkgsite.go
+++ b/gddo-server/pkgsite.go
@@ -13,23 +13,68 @@
 	"log"
 	"net/http"
 	"net/url"
+	"path/filepath"
 	"strings"
 	"time"
 )
 
-func makePkgGoDevRequest(r *http.Request, latency time.Duration, isRobot bool, status int) error {
+// makePkgGoDevRequest makes a request to the teeproxy with data about the
+// godoc.org request.
+func makePkgGoDevRequest(r *http.Request, latency time.Duration, isRobot bool, status int) {
+	var msg string
+	defer func() {
+		log.Printf("makePkgGoDevRequest(%q): %s", r.URL.Path, msg)
+	}()
+
+	if !shouldTeeRequest(r.URL.Path) {
+		msg = "not teeing request"
+		return
+	}
 	event := newGDDOEvent(r, latency, isRobot, status)
 	b, err := json.Marshal(event)
 	if err != nil {
-		return fmt.Errorf("json.Marshal(%v): %v", event, err)
+		msg = fmt.Sprintf("json.Marshal(%v): %v", event, err)
+		return
 	}
 
 	teeproxyURL := url.URL{Scheme: "https", Host: teeproxyHost}
 	if _, err := http.Post(teeproxyURL.String(), jsonMIMEType, bytes.NewReader(b)); err != nil {
-		return fmt.Errorf("http.Post(%q, %q, %v): %v", teeproxyURL.String(), jsonMIMEType, event, err)
+		msg = fmt.Sprintf("http.Post(%q, %q, %v): %v", teeproxyURL.String(), jsonMIMEType, event, err)
+		return
 	}
-	log.Printf("makePkgGoDevRequest: request made to %q for %+v", teeproxyURL.String(), event)
-	return nil
+	msg = fmt.Sprintf("request made to %q for %+v", teeproxyURL.String(), event)
+}
+
+// doNotTeeURLsToPkgGoDev are paths that should not be teed to pkg.go.dev.
+var doNotTeeURLsToPkgGoDev = map[string]bool{
+	"/-/bot":     true,
+	"/-/refresh": true,
+}
+
+// doNotTeeExtsToPkgGoDev are URL extensions that should not be teed to
+// pkg.go.dev.
+var doNotTeeExtsToPkgGoDev = map[string]bool{
+	".css":  true,
+	".html": true,
+	".js":   true,
+	".txt":  true,
+	".xml":  true,
+}
+
+// shouldTeeRequest reports whether a request should be teed to pkg.go.dev.
+func shouldTeeRequest(u string) bool {
+	// Don't tee App Engine requests to pkg.go.dev.
+	if strings.HasPrefix(u, "/_ah/") {
+		return false
+	}
+	ext := filepath.Ext(u)
+	if doNotTeeExtsToPkgGoDev[ext] {
+		return false
+	}
+	if doNotTeeURLsToPkgGoDev[u] {
+		return false
+	}
+	return true
 }
 
 type gddoEvent struct {
diff --git a/gddo-server/pkgsite_test.go b/gddo-server/pkgsite_test.go
index 29ea55a..a4593ee 100644
--- a/gddo-server/pkgsite_test.go
+++ b/gddo-server/pkgsite_test.go
@@ -113,7 +113,7 @@
 		},
 		{
 			from: "https://godoc.org/-/subrepo",
-			to:   "https://pkg.go.dev/search?q=golang.org/x&utm_source=godoc",
+			to:   "https://pkg.go.dev/search?q=golang.org%2Fx&utm_source=godoc",
 		},
 		{
 			from: "https://godoc.org/?q=foo",
@@ -433,3 +433,33 @@
 		})
 	}
 }
+
+func TestShouldTeeRequest(t *testing.T) {
+	for _, test := range []struct {
+		urlPath string
+		want    bool
+	}{
+		{"/", true},
+		{"/-/about", true},
+		{"/net/http", true},
+		{"/_ah/ready", false},
+		{"/_ah/warmup", false},
+		{"/-/bootstrap.min.css", false},
+		{"/-/bootstrap.min.js", false},
+		{"/-/bot", false},
+		{"/-/jquery-2.0.3.min.js", false},
+		{"/-/refresh", false},
+		{"/-/sidebar.css", false},
+		{"/-/site.css", false},
+		{"/-/site.js", false},
+		{"/BingSiteAuth.xml", false},
+		{"/google3d2f3cd4cc2bb44b.html", false},
+		{"/humans.txt", false},
+		{"/robots.txt", false},
+		{"/third_party/jquery.timeago.js", false},
+	} {
+		if got := shouldTeeRequest(test.urlPath); got != test.want {
+			t.Errorf("shouldTeeRequest(%q): %t; want %t", test.urlPath, got, test.want)
+		}
+	}
+}