devtools/cmd/csphash: check script hashes for CSP

Add a tool that checks the script hashes used
for our Content Security Policy (CSP).

Run the tool from all.bash.

(Also, prettier decided to change CONTRIBUTING.md.)

Fixes b/159711607.

Change-Id: I4498a19143c53fb90bcf0560ccaf529f4fb72a81
Reviewed-on: https://go-review.googlesource.com/c/pkgsite/+/240138
Reviewed-by: Julie Qiu <julie@golang.org>
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index 569d03b..e02e912 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -25,10 +25,10 @@
 ## Getting started
 
 1. Complete the steps in the
-[Go Contribution Guide](https://golang.org/doc/contribute.html).
+   [Go Contribution Guide](https://golang.org/doc/contribute.html).
 
 2. Download the source code for x/pkgsite:
-`git clone https://go.googlesource.com/pkgsite`
+   `git clone https://go.googlesource.com/pkgsite`
 
 3. Review the [design document](doc/design.md).
 
@@ -45,10 +45,10 @@
 ## Before sending a CL for review
 
 1. Run `./all.bash` and fix all resulting errors. See
-  [doc/precommit.md](doc/precommit.md) for instructions on setting up a
-  pre-commit hook.
+   [doc/precommit.md](doc/precommit.md) for instructions on setting up a
+   pre-commit hook.
 2. Ensure your commit message is formatted according to
-  [Go conventions](http://golang.org/wiki/CommitMessage).
+   [Go conventions](http://golang.org/wiki/CommitMessage).
 
 ## Questions
 
diff --git a/all.bash b/all.bash
index bef2b31..ebe990d 100755
--- a/all.bash
+++ b/all.bash
@@ -5,6 +5,9 @@
 
 source devtools/lib.sh || { echo "Are you at repo root?"; exit 1; }
 
+# Support ** in globs, for check_script_hashes.
+shopt -s globstar
+
 warnout() {
   while read line; do
     warn "$line"
@@ -112,6 +115,13 @@
     -td=content/static/html/pages | warnout
 }
 
+
+# check_script_hashes checks that our CSP hashes match the ones
+# for our HTML scripts.
+check_script_hashes() {
+  runcmd go run ./devtools/cmd/csphash content/static/html/**/*.tmpl
+}
+
 run_prettier() {
   if ! [ -x "$(command -v prettier)" ]; then
     err "prettier must be installed: see https://prettier.io/docs/en/install.html"
@@ -127,6 +137,7 @@
   check_staticcheck
   check_misspell
   check_unparam
+  check_script_hashes
   run_prettier
 }
 
@@ -145,6 +156,7 @@
   unparam     - (lint) run unparam on source files
   prettier    - (lint, nonstandard) run prettier on .js and .css files.
   templates   - (lint, nonstandard) run go-template-lint on templates
+  script_hashses - (lint) check script hashes
 EOUSAGE
 }
 
@@ -176,6 +188,7 @@
     prettier) run_prettier ;;
     templates) check_templates ;;
     unparam) check_unparam ;;
+    script_hashes) check_script_hashes ;;
     *)
       usage
       exit 1
diff --git a/devtools/cmd/csphash/main.go b/devtools/cmd/csphash/main.go
new file mode 100644
index 0000000..d1fea80
--- /dev/null
+++ b/devtools/cmd/csphash/main.go
@@ -0,0 +1,146 @@
+// Copyright 2020 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// csphash computes the hashes of script tags in files,
+// and checks that they are added to our content
+// security policy.
+package main
+
+import (
+	"bytes"
+	"crypto/sha256"
+	"encoding/base64"
+	"flag"
+	"fmt"
+	"io/ioutil"
+	"log"
+	"os"
+	"regexp"
+)
+
+var hashFile = flag.String("hf", "internal/middleware/secureheaders.go", "file with hashes for CSP header")
+
+func main() {
+	flag.Usage = func() {
+		fmt.Fprintf(flag.CommandLine.Output(), "usage: %s [flags] FILES\n", os.Args[0])
+		fmt.Fprintf(flag.CommandLine.Output(), "suggestion for FILES: content/static/html/**/*.tmpl\n")
+		flag.PrintDefaults()
+	}
+
+	flag.Parse()
+
+	if flag.NArg() == 0 {
+		flag.Usage()
+		os.Exit(1)
+	}
+
+	cspHashes, err := extractHashes(*hashFile)
+	if err != nil {
+		log.Fatal(err)
+	}
+	cspHashMap := map[string]bool{}
+	for _, h := range cspHashes {
+		cspHashMap[h] = true
+	}
+
+	ok := true
+	for _, file := range flag.Args() {
+		scripts, err := scripts(file)
+		if err != nil {
+			log.Fatal(err)
+		}
+		for _, s := range scripts {
+			if bytes.Contains(s.tag, []byte("src=")) {
+				fmt.Printf("%s: has script with src attribute: %s\n", file, s.tag)
+				ok = false
+			}
+			hash := cspHash(s.body)
+			if !cspHashMap[hash] {
+				fmt.Printf("missing hash: add the lines below to %s:\n", *hashFile)
+				fmt.Printf("    // From %s\n", file)
+				fmt.Printf(`    "'sha256-%s'",`, hash)
+				fmt.Println()
+				ok = false
+			} else {
+				delete(cspHashMap, hash)
+			}
+		}
+	}
+	for h := range cspHashMap {
+		fmt.Printf("unused hash %s\n", h)
+		ok = false
+	}
+	if !ok {
+		fmt.Printf("Add missing hashes to %s and remove unused ones.\n", *hashFile)
+		os.Exit(1)
+	}
+}
+
+var hashRegexp = regexp.MustCompile(`'sha256-([^']+)'`)
+
+// extractHashes scans the given file for CSP-style hashes and returns them.
+func extractHashes(filename string) ([]string, error) {
+	contents, err := ioutil.ReadFile(filename)
+	if err != nil {
+		return nil, err
+	}
+	var hashes []string
+	matches := hashRegexp.FindAllSubmatch(contents, -1)
+	for _, m := range matches {
+		hashes = append(hashes, string(m[1]))
+	}
+	return hashes, nil
+}
+
+func cspHash(b []byte) string {
+	h := sha256.Sum256(b)
+	return base64.StdEncoding.EncodeToString(h[:])
+}
+
+// script represents an HTML script element.
+type script struct {
+	tag  []byte // `<script attr="a"...>`
+	body []byte // text between open and close script tags
+}
+
+// scripts returns all the script elements in the given file.
+func scripts(filename string) ([]*script, error) {
+	contents, err := ioutil.ReadFile(filename)
+	if err != nil {
+		return nil, fmt.Errorf("%s: %v", filename, err)
+	}
+	return scriptsReader(contents)
+}
+
+var (
+	// Assume none of the attribute values contain a '>'.
+	// Regexp flag `i` means case-insensitive.
+	scriptStartRegexp = regexp.MustCompile(`(?i:<script>|<script\s[^>]*>)`)
+	// Assume all scripts end with a full close tag.
+	scriptEndRegexp = regexp.MustCompile(`(?i:</script>)`)
+)
+
+func scriptsReader(b []byte) ([]*script, error) {
+	var scripts []*script
+	offset := 0
+	for {
+		start := scriptStartRegexp.FindIndex(b)
+		if start == nil {
+			return scripts, nil
+		}
+		tag := b[start[0]:start[1]]
+		b = b[start[1]:]
+		offset += start[1]
+		end := scriptEndRegexp.FindIndex(b)
+		if end == nil {
+			return nil, fmt.Errorf("%s is missing an end tag", tag)
+		}
+		scripts = append(scripts, &script{
+			tag:  tag,
+			body: b[:end[0]],
+		})
+		b = b[end[1]:]
+		offset += end[1]
+	}
+}
diff --git a/devtools/cmd/csphash/main_test.go b/devtools/cmd/csphash/main_test.go
new file mode 100644
index 0000000..e032120
--- /dev/null
+++ b/devtools/cmd/csphash/main_test.go
@@ -0,0 +1,57 @@
+// Copyright 2020 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package main
+
+import (
+	"testing"
+
+	"github.com/google/go-cmp/cmp"
+)
+
+func TestScriptStartRegexp(t *testing.T) {
+	for _, test := range []struct {
+		in   string
+		want bool
+	}{
+		{`<script>`, true},
+		{`<Script>`, true},
+		{`<script src="/static/min.js">`, true},
+		{`<script
+					integrity="sha256-xyz"
+					src=/foo
+			  >`, true},
+		{`<scriptify>`, false},
+		{`<enscript>`, false},
+	} {
+		got := scriptStartRegexp.MatchString(test.in)
+		if got != test.want {
+			t.Errorf("%s: got %t, want %t", test.in, got, test.want)
+		}
+	}
+}
+
+func TestScriptsReader(t *testing.T) {
+	in := `
+		<script>foo </script>
+		<script src="/static/min.js"></script>
+	`
+	got, err := scriptsReader([]byte(in))
+	if err != nil {
+		t.Fatal(err)
+	}
+	want := []*script{
+		{
+			tag:  []byte(`<script>`),
+			body: []byte(`foo `),
+		},
+		{
+			tag:  []byte(`<script src="/static/min.js">`),
+			body: []byte(""),
+		},
+	}
+	if diff := cmp.Diff(want, got, cmp.AllowUnexported(script{})); diff != "" {
+		t.Errorf("mismatch (-want, +got):\n%s", diff)
+	}
+}
diff --git a/doc/frontend.md b/doc/frontend.md
index de86796..153900a 100644
--- a/doc/frontend.md
+++ b/doc/frontend.md
@@ -35,3 +35,7 @@
 your local database with packages of your choice.
 
 You can then run the frontend with: `go run cmd/frontend/main.go`
+
+If you add, change or remove any inline scripts in templates, run
+`devtools/cmd/csphash` to update the hashes. Running `all.bash`
+will do that as well.
diff --git a/internal/middleware/secureheaders.go b/internal/middleware/secureheaders.go
index ee36bc2..7eee866 100644
--- a/internal/middleware/secureheaders.go
+++ b/internal/middleware/secureheaders.go
@@ -21,6 +21,8 @@
 	"'sha256-s16e7aT7Gsajq5UH1DbaEFEnNx2VjvS5Xixcxwm4+F8='",
 	// From content/static/html/pages/pkg_doc.tmpl
 	"'sha256-AvMTqQ+22BA0Nsht+ajju4EQseFQsoG1RxW3Nh6M+wc='",
+	// From content/static/html/worker/index.tmpl
+	"'sha256-5EpitFYSzGNQNUsqi5gAaLqnI3ZWfcRo/6gLTO0oCoE='",
 }
 
 // SecureHeaders adds a content-security-policy and other security-related