notary/internal/sumweb: copy path, version encoding from cmd/go

The module paths and versions will be encoded the same
as in the proxy protocol.

Change-Id: I9f82fb9d6d58f248449eedb6b03469718435f47d
Reviewed-on: https://go-review.googlesource.com/c/exp/+/172963
Run-TryBot: Russ Cox <rsc@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Filippo Valsorda <filippo@golang.org>
diff --git a/notary/internal/sumweb/cache.go b/notary/internal/sumweb/cache.go
new file mode 100644
index 0000000..a8117a7
--- /dev/null
+++ b/notary/internal/sumweb/cache.go
@@ -0,0 +1,59 @@
+// Copyright 2018 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Parallel cache.
+// This file is copied from cmd/go/internal/par.
+
+package sumweb
+
+import (
+	"sync"
+	"sync/atomic"
+)
+
+// parCache runs an action once per key and caches the result.
+type parCache struct {
+	m sync.Map
+}
+
+type cacheEntry struct {
+	done   uint32
+	mu     sync.Mutex
+	result interface{}
+}
+
+// Do calls the function f if and only if Do is being called for the first time with this key.
+// No call to Do with a given key returns until the one call to f returns.
+// Do returns the value returned by the one call to f.
+func (c *parCache) Do(key interface{}, f func() interface{}) interface{} {
+	entryIface, ok := c.m.Load(key)
+	if !ok {
+		entryIface, _ = c.m.LoadOrStore(key, new(cacheEntry))
+	}
+	e := entryIface.(*cacheEntry)
+	if atomic.LoadUint32(&e.done) == 0 {
+		e.mu.Lock()
+		if atomic.LoadUint32(&e.done) == 0 {
+			e.result = f()
+			atomic.StoreUint32(&e.done, 1)
+		}
+		e.mu.Unlock()
+	}
+	return e.result
+}
+
+// Get returns the cached result associated with key.
+// It returns nil if there is no such result.
+// If the result for key is being computed, Get does not wait for the computation to finish.
+func (c *parCache) Get(key interface{}) interface{} {
+	entryIface, ok := c.m.Load(key)
+	if !ok {
+		return nil
+	}
+	e := entryIface.(*cacheEntry)
+	if atomic.LoadUint32(&e.done) == 0 {
+		return nil
+	}
+	return e.result
+}
diff --git a/notary/internal/sumweb/encode.go b/notary/internal/sumweb/encode.go
new file mode 100644
index 0000000..d044a84
--- /dev/null
+++ b/notary/internal/sumweb/encode.go
@@ -0,0 +1,167 @@
+// Copyright 2018 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// FS-safe encoding of module paths and versions.
+// Copied from cmd/go/internal/module and unexported.
+
+package sumweb
+
+import (
+	"fmt"
+	"unicode/utf8"
+)
+
+// Safe encodings
+//
+// Module paths appear as substrings of file system paths
+// (in the download cache) and of web server URLs in the proxy protocol.
+// In general we cannot rely on file systems to be case-sensitive,
+// nor can we rely on web servers, since they read from file systems.
+// That is, we cannot rely on the file system to keep rsc.io/QUOTE
+// and rsc.io/quote separate. Windows and macOS don't.
+// Instead, we must never require two different casings of a file path.
+// Because we want the download cache to match the proxy protocol,
+// and because we want the proxy protocol to be possible to serve
+// from a tree of static files (which might be stored on a case-insensitive
+// file system), the proxy protocol must never require two different casings
+// of a URL path either.
+//
+// One possibility would be to make the safe encoding be the lowercase
+// hexadecimal encoding of the actual path bytes. This would avoid ever
+// needing different casings of a file path, but it would be fairly illegible
+// to most programmers when those paths appeared in the file system
+// (including in file paths in compiler errors and stack traces)
+// in web server logs, and so on. Instead, we want a safe encoding that
+// leaves most paths unaltered.
+//
+// The safe encoding is this:
+// replace every uppercase letter with an exclamation mark
+// followed by the letter's lowercase equivalent.
+//
+// For example,
+// github.com/Azure/azure-sdk-for-go ->  github.com/!azure/azure-sdk-for-go.
+// github.com/GoogleCloudPlatform/cloudsql-proxy -> github.com/!google!cloud!platform/cloudsql-proxy
+// github.com/Sirupsen/logrus -> github.com/!sirupsen/logrus.
+//
+// Import paths that avoid upper-case letters are left unchanged.
+// Note that because import paths are ASCII-only and avoid various
+// problematic punctuation (like : < and >), the safe encoding is also ASCII-only
+// and avoids the same problematic punctuation.
+//
+// Import paths have never allowed exclamation marks, so there is no
+// need to define how to encode a literal !.
+//
+// Although paths are disallowed from using Unicode (see pathOK above),
+// the eventual plan is to allow Unicode letters as well, to assume that
+// file systems and URLs are Unicode-safe (storing UTF-8), and apply
+// the !-for-uppercase convention. Note however that not all runes that
+// are different but case-fold equivalent are an upper/lower pair.
+// For example, U+004B ('K'), U+006B ('k'), and U+212A ('K' for Kelvin)
+// are considered to case-fold to each other. When we do add Unicode
+// letters, we must not assume that upper/lower are the only case-equivalent pairs.
+// Perhaps the Kelvin symbol would be disallowed entirely, for example.
+// Or perhaps it would encode as "!!k", or perhaps as "(212A)".
+//
+// Also, it would be nice to allow Unicode marks as well as letters,
+// but marks include combining marks, and then we must deal not
+// only with case folding but also normalization: both U+00E9 ('é')
+// and U+0065 U+0301 ('e' followed by combining acute accent)
+// look the same on the page and are treated by some file systems
+// as the same path. If we do allow Unicode marks in paths, there
+// must be some kind of normalization to allow only one canonical
+// encoding of any character used in an import path.
+
+// encodePath returns the safe encoding of the given module path.
+// It fails if the module path is invalid.
+func encodePath(path string) (encoding string, err error) {
+	return encodeString(path)
+}
+
+// encodeVersion returns the safe encoding of the given module version.
+// Versions are allowed to be in non-semver form but must be valid file names
+// and not contain exclamation marks.
+func encodeVersion(v string) (encoding string, err error) {
+	return encodeString(v)
+}
+
+func encodeString(s string) (encoding string, err error) {
+	haveUpper := false
+	for _, r := range s {
+		if r == '!' || r >= utf8.RuneSelf {
+			// This should be disallowed by CheckPath, but diagnose anyway.
+			// The correctness of the encoding loop below depends on it.
+			return "", fmt.Errorf("internal error: inconsistency in EncodePath")
+		}
+		if 'A' <= r && r <= 'Z' {
+			haveUpper = true
+		}
+	}
+
+	if !haveUpper {
+		return s, nil
+	}
+
+	var buf []byte
+	for _, r := range s {
+		if 'A' <= r && r <= 'Z' {
+			buf = append(buf, '!', byte(r+'a'-'A'))
+		} else {
+			buf = append(buf, byte(r))
+		}
+	}
+	return string(buf), nil
+}
+
+// decodePath returns the module path of the given safe encoding.
+// It fails if the encoding is invalid or encodes an invalid path.
+func decodePath(encoding string) (path string, err error) {
+	path, ok := decodeString(encoding)
+	if !ok {
+		return "", fmt.Errorf("invalid module path encoding %q", encoding)
+	}
+	return path, nil
+}
+
+// decodeVersion returns the version string for the given safe encoding.
+// It fails if the encoding is invalid or encodes an invalid version.
+// Versions are allowed to be in non-semver form but must be valid file names
+// and not contain exclamation marks.
+func decodeVersion(encoding string) (v string, err error) {
+	v, ok := decodeString(encoding)
+	if !ok {
+		return "", fmt.Errorf("invalid version encoding %q", encoding)
+	}
+	return v, nil
+}
+
+func decodeString(encoding string) (string, bool) {
+	var buf []byte
+
+	bang := false
+	for _, r := range encoding {
+		if r >= utf8.RuneSelf {
+			return "", false
+		}
+		if bang {
+			bang = false
+			if r < 'a' || 'z' < r {
+				return "", false
+			}
+			buf = append(buf, byte(r+'A'-'a'))
+			continue
+		}
+		if r == '!' {
+			bang = true
+			continue
+		}
+		if 'A' <= r && r <= 'Z' {
+			return "", false
+		}
+		buf = append(buf, byte(r))
+	}
+	if bang {
+		return "", false
+	}
+	return string(buf), true
+}
diff --git a/notary/internal/sumweb/encode_test.go b/notary/internal/sumweb/encode_test.go
new file mode 100644
index 0000000..9ed5e4a
--- /dev/null
+++ b/notary/internal/sumweb/encode_test.go
@@ -0,0 +1,67 @@
+// Copyright 2018 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package sumweb
+
+import "testing"
+
+var encodeTests = []struct {
+	path string
+	enc  string // empty means same as path
+}{
+	{path: "ascii.com/abcdefghijklmnopqrstuvwxyz.-+/~_0123456789"},
+	{path: "github.com/GoogleCloudPlatform/omega", enc: "github.com/!google!cloud!platform/omega"},
+}
+
+func TestEncodePath(t *testing.T) {
+	// Check encodings.
+	for _, tt := range encodeTests {
+		enc, err := encodePath(tt.path)
+		if err != nil {
+			t.Errorf("encodePath(%q): unexpected error: %v", tt.path, err)
+			continue
+		}
+		want := tt.enc
+		if want == "" {
+			want = tt.path
+		}
+		if enc != want {
+			t.Errorf("encodePath(%q) = %q, want %q", tt.path, enc, want)
+		}
+	}
+}
+
+var badDecode = []string{
+	"github.com/GoogleCloudPlatform/omega",
+	"github.com/!google!cloud!platform!/omega",
+	"github.com/!0google!cloud!platform/omega",
+	"github.com/!_google!cloud!platform/omega",
+	"github.com/!!google!cloud!platform/omega",
+}
+
+func TestDecodePath(t *testing.T) {
+	// Check invalid decodings.
+	for _, bad := range badDecode {
+		_, err := decodePath(bad)
+		if err == nil {
+			t.Errorf("DecodePath(%q): succeeded, want error (invalid decoding)", bad)
+		}
+	}
+
+	// Check encodings.
+	for _, tt := range encodeTests {
+		enc := tt.enc
+		if enc == "" {
+			enc = tt.path
+		}
+		path, err := decodePath(enc)
+		if err != nil {
+			t.Errorf("decodePath(%q): unexpected error: %v", enc, err)
+			continue
+		}
+		if path != tt.path {
+			t.Errorf("decodePath(%q) = %q, want %q", enc, path, tt.path)
+		}
+	}
+}
diff --git a/notary/internal/sumweb/server.go b/notary/internal/sumweb/server.go
index d1338b3..2474b76 100644
--- a/notary/internal/sumweb/server.go
+++ b/notary/internal/sumweb/server.go
@@ -76,6 +76,7 @@
 			http.Error(w, "invalid module@version syntax", http.StatusBadRequest)
 			return
 		}
+		// TODO(rsc): Decide whether to !-decode here.
 		id, err := h.Server.FindKey(ctx, mod)
 		if err != nil {
 			reportError(w, r, err)