sumdb/internal/sumweb/encode.go - exp - Git at Google

 // Copyright 2018 The Go Authors. All rights reserved.
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.

 // FS-safe encoding of module paths and versions.
 // Copied from cmd/go/internal/module and unexported.

 package sumweb

 import (
 	"fmt"
 	"unicode/utf8"
 )

 // Safe encodings
 //
 // Module paths appear as substrings of file system paths
 // (in the download cache) and of web server URLs in the proxy protocol.
 // In general we cannot rely on file systems to be case-sensitive,
 // nor can we rely on web servers, since they read from file systems.
 // That is, we cannot rely on the file system to keep rsc.io/QUOTE
 // and rsc.io/quote separate. Windows and macOS don't.
 // Instead, we must never require two different casings of a file path.
 // Because we want the download cache to match the proxy protocol,
 // and because we want the proxy protocol to be possible to serve
 // from a tree of static files (which might be stored on a case-insensitive
 // file system), the proxy protocol must never require two different casings
 // of a URL path either.
 //
 // One possibility would be to make the safe encoding be the lowercase
 // hexadecimal encoding of the actual path bytes. This would avoid ever
 // needing different casings of a file path, but it would be fairly illegible
 // to most programmers when those paths appeared in the file system
 // (including in file paths in compiler errors and stack traces)
 // in web server logs, and so on. Instead, we want a safe encoding that
 // leaves most paths unaltered.
 //
 // The safe encoding is this:
 // replace every uppercase letter with an exclamation mark
 // followed by the letter's lowercase equivalent.
 //
 // For example,
 // github.com/Azure/azure-sdk-for-go ->  github.com/!azure/azure-sdk-for-go.
 // github.com/GoogleCloudPlatform/cloudsql-proxy -> github.com/!google!cloud!platform/cloudsql-proxy
 // github.com/Sirupsen/logrus -> github.com/!sirupsen/logrus.
 //
 // Import paths that avoid upper-case letters are left unchanged.
 // Note that because import paths are ASCII-only and avoid various
 // problematic punctuation (like : < and >), the safe encoding is also ASCII-only
 // and avoids the same problematic punctuation.
 //
 // Import paths have never allowed exclamation marks, so there is no
 // need to define how to encode a literal !.
 //
 // Although paths are disallowed from using Unicode (see pathOK above),
 // the eventual plan is to allow Unicode letters as well, to assume that
 // file systems and URLs are Unicode-safe (storing UTF-8), and apply
 // the !-for-uppercase convention. Note however that not all runes that
 // are different but case-fold equivalent are an upper/lower pair.
 // For example, U+004B ('K'), U+006B ('k'), and U+212A ('K' for Kelvin)
 // are considered to case-fold to each other. When we do add Unicode
 // letters, we must not assume that upper/lower are the only case-equivalent pairs.
 // Perhaps the Kelvin symbol would be disallowed entirely, for example.
 // Or perhaps it would encode as "!!k", or perhaps as "(212A)".
 //
 // Also, it would be nice to allow Unicode marks as well as letters,
 // but marks include combining marks, and then we must deal not
 // only with case folding but also normalization: both U+00E9 ('é')
 // and U+0065 U+0301 ('e' followed by combining acute accent)
 // look the same on the page and are treated by some file systems
 // as the same path. If we do allow Unicode marks in paths, there
 // must be some kind of normalization to allow only one canonical
 // encoding of any character used in an import path.

 // encodePath returns the safe encoding of the given module path.
 // It fails if the module path is invalid.
 func encodePath(path string) (encoding string, err error) {
 	return encodeString(path)
 }

 // encodeVersion returns the safe encoding of the given module version.
 // Versions are allowed to be in non-semver form but must be valid file names
 // and not contain exclamation marks.
 func encodeVersion(v string) (encoding string, err error) {
 	return encodeString(v)
 }

 func encodeString(s string) (encoding string, err error) {
 	haveUpper := false
 	for _, r := range s {
 		if r == '!' || r >= utf8.RuneSelf {
 			// This should be disallowed by CheckPath, but diagnose anyway.
 			// The correctness of the encoding loop below depends on it.
 			return "", fmt.Errorf("internal error: inconsistency in EncodePath")
 		}
 		if 'A' <= r && r <= 'Z' {
 			haveUpper = true
 		}
 	}

 	if !haveUpper {
 		return s, nil
 	}

 	var buf []byte
 	for _, r := range s {
 		if 'A' <= r && r <= 'Z' {
 			buf = append(buf, '!', byte(r+'a'-'A'))
 		} else {
 			buf = append(buf, byte(r))
 		}
 	}
 	return string(buf), nil
 }

 // decodePath returns the module path of the given safe encoding.
 // It fails if the encoding is invalid or encodes an invalid path.
 func decodePath(encoding string) (path string, err error) {
 	path, ok := decodeString(encoding)
 	if !ok {
 		return "", fmt.Errorf("invalid module path encoding %q", encoding)
 	}
 	return path, nil
 }

 // decodeVersion returns the version string for the given safe encoding.
 // It fails if the encoding is invalid or encodes an invalid version.
 // Versions are allowed to be in non-semver form but must be valid file names
 // and not contain exclamation marks.
 func decodeVersion(encoding string) (v string, err error) {
 	v, ok := decodeString(encoding)
 	if !ok {
 		return "", fmt.Errorf("invalid version encoding %q", encoding)
 	}
 	return v, nil
 }

 func decodeString(encoding string) (string, bool) {
 	var buf []byte

 	bang := false
 	for _, r := range encoding {
 		if r >= utf8.RuneSelf {
 			return "", false
 		}
 		if bang {
 			bang = false
 			if r < 'a' || 'z' < r {
 				return "", false
 			}
 			buf = append(buf, byte(r+'A'-'a'))
 			continue
 		}
 		if r == '!' {
 			bang = true
 			continue
 		}
 		if 'A' <= r && r <= 'Z' {
 			return "", false
 		}
 		buf = append(buf, byte(r))
 	}
 	if bang {
 		return "", false
 	}
 	return string(buf), true
 }
	// Copyright 2018 The Go Authors. All rights reserved.
	// Use of this source code is governed by a BSD-style
	// license that can be found in the LICENSE file.

	// FS-safe encoding of module paths and versions.
	// Copied from cmd/go/internal/module and unexported.

	package sumweb

	import (
	"fmt"
	"unicode/utf8"
	)

	// Safe encodings
	//
	// Module paths appear as substrings of file system paths
	// (in the download cache) and of web server URLs in the proxy protocol.
	// In general we cannot rely on file systems to be case-sensitive,
	// nor can we rely on web servers, since they read from file systems.
	// That is, we cannot rely on the file system to keep rsc.io/QUOTE
	// and rsc.io/quote separate. Windows and macOS don't.
	// Instead, we must never require two different casings of a file path.
	// Because we want the download cache to match the proxy protocol,
	// and because we want the proxy protocol to be possible to serve
	// from a tree of static files (which might be stored on a case-insensitive
	// file system), the proxy protocol must never require two different casings
	// of a URL path either.
	//
	// One possibility would be to make the safe encoding be the lowercase
	// hexadecimal encoding of the actual path bytes. This would avoid ever
	// needing different casings of a file path, but it would be fairly illegible
	// to most programmers when those paths appeared in the file system
	// (including in file paths in compiler errors and stack traces)
	// in web server logs, and so on. Instead, we want a safe encoding that
	// leaves most paths unaltered.
	//
	// The safe encoding is this:
	// replace every uppercase letter with an exclamation mark
	// followed by the letter's lowercase equivalent.
	//
	// For example,
	// github.com/Azure/azure-sdk-for-go -> github.com/!azure/azure-sdk-for-go.
	// github.com/GoogleCloudPlatform/cloudsql-proxy -> github.com/!google!cloud!platform/cloudsql-proxy
	// github.com/Sirupsen/logrus -> github.com/!sirupsen/logrus.
	//
	// Import paths that avoid upper-case letters are left unchanged.
	// Note that because import paths are ASCII-only and avoid various
	// problematic punctuation (like : < and >), the safe encoding is also ASCII-only
	// and avoids the same problematic punctuation.
	//
	// Import paths have never allowed exclamation marks, so there is no
	// need to define how to encode a literal !.
	//
	// Although paths are disallowed from using Unicode (see pathOK above),
	// the eventual plan is to allow Unicode letters as well, to assume that
	// file systems and URLs are Unicode-safe (storing UTF-8), and apply
	// the !-for-uppercase convention. Note however that not all runes that
	// are different but case-fold equivalent are an upper/lower pair.
	// For example, U+004B ('K'), U+006B ('k'), and U+212A ('K' for Kelvin)
	// are considered to case-fold to each other. When we do add Unicode
	// letters, we must not assume that upper/lower are the only case-equivalent pairs.
	// Perhaps the Kelvin symbol would be disallowed entirely, for example.
	// Or perhaps it would encode as "!!k", or perhaps as "(212A)".
	//
	// Also, it would be nice to allow Unicode marks as well as letters,
	// but marks include combining marks, and then we must deal not
	// only with case folding but also normalization: both U+00E9 ('é')
	// and U+0065 U+0301 ('e' followed by combining acute accent)
	// look the same on the page and are treated by some file systems
	// as the same path. If we do allow Unicode marks in paths, there
	// must be some kind of normalization to allow only one canonical
	// encoding of any character used in an import path.

	// encodePath returns the safe encoding of the given module path.
	// It fails if the module path is invalid.
	func encodePath(path string) (encoding string, err error) {
	return encodeString(path)
	}

	// encodeVersion returns the safe encoding of the given module version.
	// Versions are allowed to be in non-semver form but must be valid file names
	// and not contain exclamation marks.
	func encodeVersion(v string) (encoding string, err error) {
	return encodeString(v)
	}

	func encodeString(s string) (encoding string, err error) {
	haveUpper := false
	for _, r := range s {
	if r == '!' \|\| r >= utf8.RuneSelf {
	// This should be disallowed by CheckPath, but diagnose anyway.
	// The correctness of the encoding loop below depends on it.
	return "", fmt.Errorf("internal error: inconsistency in EncodePath")
	}
	if 'A' <= r && r <= 'Z' {
	haveUpper = true
	}
	}

	if !haveUpper {
	return s, nil
	}

	var buf []byte
	for _, r := range s {
	if 'A' <= r && r <= 'Z' {
	buf = append(buf, '!', byte(r+'a'-'A'))
	} else {
	buf = append(buf, byte(r))
	}
	}
	return string(buf), nil
	}

	// decodePath returns the module path of the given safe encoding.
	// It fails if the encoding is invalid or encodes an invalid path.
	func decodePath(encoding string) (path string, err error) {
	path, ok := decodeString(encoding)
	if !ok {
	return "", fmt.Errorf("invalid module path encoding %q", encoding)
	}
	return path, nil
	}

	// decodeVersion returns the version string for the given safe encoding.
	// It fails if the encoding is invalid or encodes an invalid version.
	// Versions are allowed to be in non-semver form but must be valid file names
	// and not contain exclamation marks.
	func decodeVersion(encoding string) (v string, err error) {
	v, ok := decodeString(encoding)
	if !ok {
	return "", fmt.Errorf("invalid version encoding %q", encoding)
	}
	return v, nil
	}

	func decodeString(encoding string) (string, bool) {
	var buf []byte

	bang := false
	for _, r := range encoding {
	if r >= utf8.RuneSelf {
	return "", false
	}
	if bang {
	bang = false
	if r < 'a' \|\| 'z' < r {
	return "", false
	}
	buf = append(buf, byte(r+'A'-'a'))
	continue
	}
	if r == '!' {
	bang = true
	continue
	}
	if 'A' <= r && r <= 'Z' {
	return "", false
	}
	buf = append(buf, byte(r))
	}
	if bang {
	return "", false
	}
	return string(buf), true
	}