internal/version: add package

Package version is added to handle versions when processing modules.

This is copied from x/pkgsite.

Change-Id: I06dba18087779976da922a5a05f91ce5f22fadda
Reviewed-on: https://go-review.googlesource.com/c/pkgsite-metrics/+/463617
Reviewed-by: Julie Qiu <julieqiu@google.com>
Reviewed-by: Jonathan Amsterdam <jba@google.com>
Run-TryBot: Julie Qiu <julieqiu@google.com>
TryBot-Result: Gopher Robot <gobot@golang.org>
diff --git a/go.mod b/go.mod
index 12f9fc2..1856172 100644
--- a/go.mod
+++ b/go.mod
@@ -5,6 +5,7 @@
 require (
 	cloud.google.com/go/errorreporting v0.3.0
 	github.com/client9/misspell v0.3.4
+	golang.org/x/mod v0.7.0
 	honnef.co/go/tools v0.3.3
 	mvdan.cc/unparam v0.0.0-20230125043941-70a0ce6e7b95
 )
@@ -20,7 +21,6 @@
 	github.com/googleapis/gax-go/v2 v2.7.0 // indirect
 	go.opencensus.io v0.23.0 // indirect
 	golang.org/x/exp/typeparams v0.0.0-20220218215828-6cf2b201936e // indirect
-	golang.org/x/mod v0.7.0 // indirect
 	golang.org/x/net v0.5.0 // indirect
 	golang.org/x/oauth2 v0.0.0-20221014153046-6fdb5e3db783 // indirect
 	golang.org/x/sync v0.1.0 // indirect
diff --git a/internal/version/version.go b/internal/version/version.go
new file mode 100644
index 0000000..aa2648a
--- /dev/null
+++ b/internal/version/version.go
@@ -0,0 +1,271 @@
+// Copyright 2023 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Package version handles version types.
+package version
+
+import (
+	"fmt"
+	"regexp"
+	"strings"
+
+	"golang.org/x/mod/semver"
+)
+
+// Type defines the version types a module can have.
+// This must be kept in sync with the 'version_type' database enum.
+type Type string
+
+const (
+	// TypeRelease is a normal release.
+	TypeRelease = Type("release")
+
+	// TypePrerelease is a version with a prerelease.
+	TypePrerelease = Type("prerelease")
+
+	// TypePseudo appears to have a prerelease of the
+	// form <commit date>-<commit hash>.
+	TypePseudo = Type("pseudo")
+)
+
+const (
+	// Latest signifies the latest available version in requests to the
+	// proxy client.
+	Latest = "latest"
+
+	// Main represents the main branch.
+	Main = "main"
+
+	// Master represents the master branch.
+	Master = "master"
+)
+
+func (t Type) String() string {
+	return string(t)
+}
+
+var pseudoVersionRE = regexp.MustCompile(`^v[0-9]+\.(0\.0-|\d+\.\d+-([^+]*\.)?0\.)\d{14}-[A-Za-z0-9]+(\+incompatible)?$`)
+
+// IsPseudo reports whether a valid version v is a pseudo-version.
+// Modified from src/cmd/go/internal/modfetch.
+func IsPseudo(v string) bool {
+	return strings.Count(v, "-") >= 2 && pseudoVersionRE.MatchString(v)
+}
+
+// IsIncompatible reports whether a valid version v is an incompatible version.
+func IsIncompatible(v string) bool {
+	return strings.HasSuffix(v, "+incompatible")
+}
+
+// ParseType returns the Type of a given a version.
+func ParseType(version string) (Type, error) {
+	if !semver.IsValid(version) {
+		return "", fmt.Errorf("ParseType(%q): invalid semver", version)
+	}
+
+	switch {
+	case IsPseudo(version):
+		return TypePseudo, nil
+	case semver.Prerelease(version) != "":
+		return TypePrerelease, nil
+	default:
+		return TypeRelease, nil
+	}
+}
+
+// ForSorting returns a string that encodes version, so that comparing two such
+// strings follows SemVer precedence, https://semver.org clause 11. It assumes
+// version is valid. The returned string ends in '~' if and only if the version
+// does not have a prerelease.
+//
+// For examples, see TestForSorting.
+func ForSorting(version string) string {
+	bytes := make([]byte, 0, len(version))
+	prerelease := false // we are in the prerelease part
+	nondigit := false   // this part has a non-digit character
+	start := 1          // skip 'v'
+	last := len(version)
+
+	// Add the semver component version[start:end] to the result.
+	addPart := func(end int) {
+		if len(bytes) > 0 {
+			// ',' comes before '-' and all letters and digits, so it correctly
+			// imposes lexicographic ordering on the parts of the version.
+			bytes = append(bytes, ',')
+		}
+		if nondigit {
+			// Prepending the largest printable character '~' to a non-numeric
+			// part, along with the fact that encoded numbers never begin with a
+			// '~', (see appendNumericPrefix), ensures the semver requirement
+			// that numeric identifiers always have lower precedence than
+			// non-numeric ones.
+			bytes = append(bytes, '~')
+		} else {
+			bytes = appendNumericPrefix(bytes, end-start)
+		}
+		bytes = append(bytes, version[start:end]...)
+		start = end + 1 // skip over separator character
+		nondigit = false
+	}
+
+loop:
+	for i, c := range version[start:] {
+		p := i + 1
+		switch {
+		case c == '.': // end of a part
+			addPart(p)
+		case c == '-': // first one is start of prerelease
+			if !prerelease {
+				prerelease = true
+				addPart(p)
+			} else {
+				nondigit = true
+			}
+		case c == '+': // start of build; nothing after this matters
+			last = p
+			break loop
+
+		case c < '0' || c > '9':
+			nondigit = true
+		}
+	}
+	if start < last {
+		addPart(last)
+	}
+	if !prerelease {
+		// Make sure prereleases appear first.
+		bytes = append(bytes, '~')
+	}
+	return string(bytes)
+}
+
+// appendNumericPrefix appends a string representing n to dst.
+// n is the length of a digit string; the value we append is a prefix for the
+// digit string s such that
+//
+//	prefix1 + s1 < prefix2 + s2
+//
+// if and only if the integer denoted by s1 is less than the one denoted by s2.
+// In other words, prefix + s is a string that can be compared with other such
+// strings while preserving the ordering of the numbers.
+//
+// If n==1, there is no prefix. (Single-digit numbers are unchanged.)
+// Otherwise, the prefix is a sequence of lower-case letters encoding n.
+// Examples:
+//
+//	n    prefix
+//	1    <none>
+//	2    a
+//	27   z
+//	28   za
+//	53   zz
+//	54   zza
+//
+// This encoding depends on the ASCII properties that:
+// - digits are ordered numerically
+// - letters are ordered alphabetically
+// - digits order before letters (so "1" < "a10")
+func appendNumericPrefix(dst []byte, n int) []byte {
+	n--
+	for i := 0; i < n/26; i++ {
+		dst = append(dst, 'z')
+	}
+	if rem := n % 26; rem > 0 {
+		dst = append(dst, byte('a'+rem-1))
+	}
+	return dst
+}
+
+// Later reports whether v1 is later than v2, using semver but preferring
+// release versions to pre-release versions, and both to pseudo-versions.
+func Later(v1, v2 string) bool {
+	rel1 := semver.Prerelease(v1) == ""
+	rel2 := semver.Prerelease(v2) == ""
+	if rel1 && rel2 {
+		return semver.Compare(v1, v2) > 0
+	}
+	if rel1 != rel2 {
+		return rel1
+	}
+	// Both are pre-release.
+	pseudo1 := IsPseudo(v1)
+	pseudo2 := IsPseudo(v2)
+	if pseudo1 == pseudo2 {
+		return semver.Compare(v1, v2) > 0
+	}
+	return !pseudo1
+}
+
+// LatestVersion finds the latest version of a module using the same algorithm as the
+// Go command. It prefers tagged release versions to tagged pre-release
+// versions, and both of those to pseudo-versions. If versions is empty, LatestVersion
+// returns the empty string.
+//
+// hasGoMod should report whether the version it is given has a go.mod file.
+// LatestVersion returns the latest incompatible version only if the latest compatible
+// version does not have a go.mod file.
+//
+// The meaning of latest is defined at
+// https://golang.org/ref/mod#version-queries. That definition does not deal
+// with retractions, or with a subtlety involving incompatible versions. The
+// actual definition is embodied in the go command's queryMatcher.filterVersions
+// method. This function is a re-implementation and specialization of that
+// method at Go version 1.16
+// (https://go.googlesource.com/go/+/refs/tags/go1.16/src/cmd/go/internal/modload/query.go#441).
+func LatestVersion(versions []string, hasGoMod func(v string) (bool, error)) (v string, err error) {
+	latest := LatestOf(versions)
+	if latest == "" {
+		return "", nil
+	}
+
+	// If the latest is a compatible version, use it.
+	if !IsIncompatible(latest) {
+		return latest, nil
+	}
+	// The latest version is incompatible. If there is a go.mod file at the
+	// latest compatible tagged version, assume the module author has adopted
+	// proper versioning, and use that latest compatible version. Otherwise, use
+	// this incompatible version.
+	latestCompat := LatestOf(RemoveIf(versions, func(v string) bool { return IsIncompatible(v) || IsPseudo(v) }))
+	if latestCompat == "" {
+		// No compatible versions; use the latest (incompatible) version.
+		return latest, nil
+	}
+	latestCompatHasGoMod, err := hasGoMod(latestCompat)
+	if err != nil {
+		return "", err
+	}
+	if latestCompatHasGoMod {
+		return latestCompat, nil
+	}
+	return latest, nil
+}
+
+// LatestOf returns the latest version of a module from a list of versions, using
+// the go command's definition of latest: semver is observed, except that
+// release versions are preferred to prerelease, and both are preferred to pseudo-versions.
+// If versions is empty, the empty string is returned.
+func LatestOf(versions []string) string {
+	if len(versions) == 0 {
+		return ""
+	}
+	latest := versions[0]
+	for _, v := range versions[1:] {
+		if Later(v, latest) {
+			latest = v
+		}
+	}
+	return latest
+}
+
+// RemoveIf returns a copy of s that omits all values for which f returns true.
+func RemoveIf(s []string, f func(string) bool) []string {
+	var r []string
+	for _, x := range s {
+		if !f(x) {
+			r = append(r, x)
+		}
+	}
+	return r
+}
diff --git a/internal/version/version_test.go b/internal/version/version_test.go
new file mode 100644
index 0000000..ddf01e9
--- /dev/null
+++ b/internal/version/version_test.go
@@ -0,0 +1,258 @@
+// Copyright 2023 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package version
+
+import (
+	"testing"
+
+	"golang.org/x/mod/semver"
+)
+
+func TestForSorting(t *testing.T) {
+	for _, test := range []struct {
+		in, want string
+	}{
+		{"v1.2.3", "1,2,3~"},
+		{"v12.48.301", "a12,a48,b301~"},
+		{"v0.9.3-alpha.1", "0,9,3,~alpha,1"},
+		{"v1.2.3-rc.20150901.-", "1,2,3,~rc,g20150901,~-"},
+		{"v1.2.3-alpha.789+build", "1,2,3,~alpha,b789"},
+	} {
+		got := ForSorting(test.in)
+		if got != test.want {
+			t.Errorf("ForSorting(%s) = %s, want %s", test.in, got, test.want)
+		}
+	}
+}
+
+func TestForSortingOrder(t *testing.T) {
+	// A list of valid semantic versions, in order.
+	semvers := []string{
+		"v0.0.0-20180713131340-b395d2d6f5ee",
+		"v0.0.0-20190124233150-8f7fa2680c82",
+		"v0.0.0",
+		"v0.0.1",
+		"v0.1.0",
+		"v1.0.0-alpha",
+		"v1.0.0-alpha.1",
+		"v1.0.0-alpha.beta",
+		"v1.0.0-beta",
+		"v1.0.0-beta.2",
+		"v1.0.0-beta.11",
+		"v1.0.0-rc.1",
+		"v1.0.0",
+		"v1.2.0",
+		"v1.11.0",
+		"v1.12.0-alph.1",
+		"v1.12.0-alpha",
+		// These next two would order incorrectly if we used '.' as a separator, because
+		// '.' comes after '-'.
+		"v2.0.0-z.a",
+		"v2.0.0-z-",
+		// These next two would sort incorrectly if we did not prepend non-numeric components
+		// with a '~'.
+		"v2.1.0-a.1",
+		"v2.1.0-a.-",
+	}
+
+	// Check that the test has the ordering right according to the semver package.
+	for i := range semvers {
+		if !semver.IsValid(semvers[i]) {
+			t.Fatalf("test is broken: bad semver: %s", semvers[i])
+		}
+		if i > 0 {
+			if semver.Compare(semvers[i-1], semvers[i]) >= 0 {
+				t.Fatalf("test is broken: %s is not less than %s", semvers[i-1], semvers[i])
+			}
+		}
+	}
+
+	// Check that ForSorting produces the correct ordering.
+	var prev string
+	for _, v := range semvers {
+		got := ForSorting(v)
+		if prev != "" && prev >= got {
+			t.Errorf("%s: %s >= %s, want less than", v, prev, got)
+		}
+		prev = got
+	}
+}
+
+func TestAppendNumericPrefix(t *testing.T) {
+	for _, test := range []struct {
+		n    int
+		want string
+	}{
+		{1, ""},
+		{2, "a"},
+		{3, "b"},
+		{26, "y"},
+		{53, "zz"},    // 53-1 = 26*2
+		{100, "zzzu"}, // 100-1 = 26*3 + 21
+	} {
+		got := string(appendNumericPrefix(nil, test.n))
+		if got != test.want {
+			t.Errorf("%d: got %s, want %s", test.n, got, test.want)
+		}
+	}
+}
+
+func TestParseVersionType(t *testing.T) {
+	testCases := []struct {
+		name, version   string
+		wantVersionType Type
+		wantErr         bool
+	}{
+		{
+			name:            "pseudo major version",
+			version:         "v1.0.0-20190311183353-d8887717615a",
+			wantVersionType: TypePseudo,
+		},
+		{
+			name:            "pseudo prerelease version",
+			version:         "v1.2.3-pre.0.20190311183353-d8887717615a",
+			wantVersionType: TypePseudo,
+		},
+		{
+			name:            "pseudo minor version",
+			version:         "v1.2.4-0.20190311183353-d8887717615a",
+			wantVersionType: TypePseudo,
+		},
+		{
+			name:            "pseudo version invalid",
+			version:         "v1.2.3-20190311183353-d8887717615a",
+			wantVersionType: TypePrerelease,
+		},
+		{
+			name:            "valid release",
+			version:         "v1.0.0",
+			wantVersionType: TypeRelease,
+		},
+		{
+			name:            "valid prerelease",
+			version:         "v1.0.0-alpha.1",
+			wantVersionType: TypePrerelease,
+		},
+		{
+			name:            "invalid version",
+			version:         "not_a_version",
+			wantVersionType: "",
+			wantErr:         true,
+		},
+	}
+
+	for _, test := range testCases {
+		t.Run(test.name, func(t *testing.T) {
+			if gotVt, err := ParseType(test.version); (test.wantErr == (err != nil)) && test.wantVersionType != gotVt {
+				t.Errorf("parseVersionType(%v) = %v, want %v", test.version, gotVt, test.wantVersionType)
+			}
+		})
+	}
+}
+
+func TestLatestOf(t *testing.T) {
+	for _, test := range []struct {
+		name     string
+		versions []string
+		want     string
+	}{
+		{
+			name:     "highest release",
+			versions: []string{"v1.2.3", "v1.0.0", "v1.9.0-pre"},
+			want:     "v1.2.3",
+		},
+		{
+			name:     "highest pre-release if no release",
+			versions: []string{"v1.2.3-alpha", "v1.0.0-beta", "v1.9.0-pre"},
+			want:     "v1.9.0-pre",
+		},
+		{
+			name:     "prefer pre-release to pseudo",
+			versions: []string{"v1.0.0-20180713131340-b395d2d6f5ee", "v0.0.0-alpha"},
+			want:     "v0.0.0-alpha",
+		},
+
+		{
+			name:     "highest pseudo if no pre-release or release",
+			versions: []string{"v0.0.0-20180713131340-b395d2d6f5ee", "v0.0.0-20190124233150-8f7fa2680c82"},
+			want:     "v0.0.0-20190124233150-8f7fa2680c82",
+		},
+		{
+			name:     "use incompatible",
+			versions: []string{"v1.2.3", "v1.0.0", "v2.0.0+incompatible"},
+			want:     "v2.0.0+incompatible",
+		},
+	} {
+		t.Run(test.name, func(t *testing.T) {
+			got := LatestOf(test.versions)
+			if got != test.want {
+				t.Errorf("got %s, want %s", got, test.want)
+			}
+		})
+	}
+}
+
+func TestLatest(t *testing.T) {
+	pseudo := "v0.0.0-20190124233150-8f7fa2680c82"
+	for _, test := range []struct {
+		name     string
+		versions []string
+		hasGoMod func(string) (bool, error)
+		want     string
+	}{
+		{
+			name:     "empty",
+			versions: nil,
+			want:     "",
+		},
+		{
+			name:     "tagged release",
+			versions: []string{pseudo, "v0.1.0", "v1.2.3-pre"},
+			want:     "v0.1.0",
+		},
+		{
+			name:     "tagged pre-release",
+			versions: []string{pseudo, "v1.2.3-pre"},
+			want:     "v1.2.3-pre",
+		},
+		{
+			name:     "pseudo",
+			versions: []string{pseudo},
+			want:     pseudo,
+		},
+		{
+			name:     "incompatible with go.mod",
+			versions: []string{"v2.0.0+incompatible", "v1.2.3"},
+			want:     "v1.2.3",
+		},
+		{
+			name:     "incompatible without go.mod",
+			versions: []string{"v2.0.0+incompatible", "v1.2.3"},
+			hasGoMod: func(v string) (bool, error) { return false, nil },
+			want:     "v2.0.0+incompatible",
+		},
+		{
+			name: "incompatible without tagged go.mod",
+			// Although the latest compatible version has a go.mod file,
+			// it is not a tagged version.
+			versions: []string{pseudo, "v2.0.0+incompatible"},
+			want:     "v2.0.0+incompatible",
+		},
+	} {
+		t.Run(test.name, func(t *testing.T) {
+
+			if test.hasGoMod == nil {
+				test.hasGoMod = func(v string) (bool, error) { return true, nil }
+			}
+			got, err := LatestVersion(test.versions, test.hasGoMod)
+			if err != nil {
+				t.Fatal(err)
+			}
+			if got != test.want {
+				t.Errorf("got %q, want %q", got, test.want)
+			}
+		})
+	}
+}