| // Copyright 2018 The Go Authors. All rights reserved. |
| // Use of this source code is governed by a BSD-style |
| // license that can be found in the LICENSE file. |
| |
| // FS-safe encoding of module paths and versions. |
| // Copied from cmd/go/internal/module and unexported. |
| |
| package sumweb |
| |
| import ( |
| "fmt" |
| "unicode/utf8" |
| ) |
| |
| // Safe encodings |
| // |
| // Module paths appear as substrings of file system paths |
| // (in the download cache) and of web server URLs in the proxy protocol. |
| // In general we cannot rely on file systems to be case-sensitive, |
| // nor can we rely on web servers, since they read from file systems. |
| // That is, we cannot rely on the file system to keep rsc.io/QUOTE |
| // and rsc.io/quote separate. Windows and macOS don't. |
| // Instead, we must never require two different casings of a file path. |
| // Because we want the download cache to match the proxy protocol, |
| // and because we want the proxy protocol to be possible to serve |
| // from a tree of static files (which might be stored on a case-insensitive |
| // file system), the proxy protocol must never require two different casings |
| // of a URL path either. |
| // |
| // One possibility would be to make the safe encoding be the lowercase |
| // hexadecimal encoding of the actual path bytes. This would avoid ever |
| // needing different casings of a file path, but it would be fairly illegible |
| // to most programmers when those paths appeared in the file system |
| // (including in file paths in compiler errors and stack traces) |
| // in web server logs, and so on. Instead, we want a safe encoding that |
| // leaves most paths unaltered. |
| // |
| // The safe encoding is this: |
| // replace every uppercase letter with an exclamation mark |
| // followed by the letter's lowercase equivalent. |
| // |
| // For example, |
| // github.com/Azure/azure-sdk-for-go -> github.com/!azure/azure-sdk-for-go. |
| // github.com/GoogleCloudPlatform/cloudsql-proxy -> github.com/!google!cloud!platform/cloudsql-proxy |
| // github.com/Sirupsen/logrus -> github.com/!sirupsen/logrus. |
| // |
| // Import paths that avoid upper-case letters are left unchanged. |
| // Note that because import paths are ASCII-only and avoid various |
| // problematic punctuation (like : < and >), the safe encoding is also ASCII-only |
| // and avoids the same problematic punctuation. |
| // |
| // Import paths have never allowed exclamation marks, so there is no |
| // need to define how to encode a literal !. |
| // |
| // Although paths are disallowed from using Unicode (see pathOK above), |
| // the eventual plan is to allow Unicode letters as well, to assume that |
| // file systems and URLs are Unicode-safe (storing UTF-8), and apply |
| // the !-for-uppercase convention. Note however that not all runes that |
| // are different but case-fold equivalent are an upper/lower pair. |
| // For example, U+004B ('K'), U+006B ('k'), and U+212A ('K' for Kelvin) |
| // are considered to case-fold to each other. When we do add Unicode |
| // letters, we must not assume that upper/lower are the only case-equivalent pairs. |
| // Perhaps the Kelvin symbol would be disallowed entirely, for example. |
| // Or perhaps it would encode as "!!k", or perhaps as "(212A)". |
| // |
| // Also, it would be nice to allow Unicode marks as well as letters, |
| // but marks include combining marks, and then we must deal not |
| // only with case folding but also normalization: both U+00E9 ('é') |
| // and U+0065 U+0301 ('e' followed by combining acute accent) |
| // look the same on the page and are treated by some file systems |
| // as the same path. If we do allow Unicode marks in paths, there |
| // must be some kind of normalization to allow only one canonical |
| // encoding of any character used in an import path. |
| |
| // encodePath returns the safe encoding of the given module path. |
| // It fails if the module path is invalid. |
| func encodePath(path string) (encoding string, err error) { |
| return encodeString(path) |
| } |
| |
| // encodeVersion returns the safe encoding of the given module version. |
| // Versions are allowed to be in non-semver form but must be valid file names |
| // and not contain exclamation marks. |
| func encodeVersion(v string) (encoding string, err error) { |
| return encodeString(v) |
| } |
| |
| func encodeString(s string) (encoding string, err error) { |
| haveUpper := false |
| for _, r := range s { |
| if r == '!' || r >= utf8.RuneSelf { |
| // This should be disallowed by CheckPath, but diagnose anyway. |
| // The correctness of the encoding loop below depends on it. |
| return "", fmt.Errorf("internal error: inconsistency in EncodePath") |
| } |
| if 'A' <= r && r <= 'Z' { |
| haveUpper = true |
| } |
| } |
| |
| if !haveUpper { |
| return s, nil |
| } |
| |
| var buf []byte |
| for _, r := range s { |
| if 'A' <= r && r <= 'Z' { |
| buf = append(buf, '!', byte(r+'a'-'A')) |
| } else { |
| buf = append(buf, byte(r)) |
| } |
| } |
| return string(buf), nil |
| } |
| |
| // decodePath returns the module path of the given safe encoding. |
| // It fails if the encoding is invalid or encodes an invalid path. |
| func decodePath(encoding string) (path string, err error) { |
| path, ok := decodeString(encoding) |
| if !ok { |
| return "", fmt.Errorf("invalid module path encoding %q", encoding) |
| } |
| return path, nil |
| } |
| |
| // decodeVersion returns the version string for the given safe encoding. |
| // It fails if the encoding is invalid or encodes an invalid version. |
| // Versions are allowed to be in non-semver form but must be valid file names |
| // and not contain exclamation marks. |
| func decodeVersion(encoding string) (v string, err error) { |
| v, ok := decodeString(encoding) |
| if !ok { |
| return "", fmt.Errorf("invalid version encoding %q", encoding) |
| } |
| return v, nil |
| } |
| |
| func decodeString(encoding string) (string, bool) { |
| var buf []byte |
| |
| bang := false |
| for _, r := range encoding { |
| if r >= utf8.RuneSelf { |
| return "", false |
| } |
| if bang { |
| bang = false |
| if r < 'a' || 'z' < r { |
| return "", false |
| } |
| buf = append(buf, byte(r+'A'-'a')) |
| continue |
| } |
| if r == '!' { |
| bang = true |
| continue |
| } |
| if 'A' <= r && r <= 'Z' { |
| return "", false |
| } |
| buf = append(buf, byte(r)) |
| } |
| if bang { |
| return "", false |
| } |
| return string(buf), true |
| } |