internal/natural: add natural sort order

Natural sort order allows comparing strings such that sequences of digits
are compared numerically rather than lexicographically. For example:

  "uint8" < "uint16" < "uint32"

Updates golang/go#77160

Change-Id: Iddf6aa33427131a5f0d7f235d61ab7ad6cf6f92c
Reviewed-on: https://go-review.googlesource.com/c/pkgsite/+/736560
Reviewed-by: Alan Donovan <adonovan@google.com>
Auto-Submit: Michael Pratt <mpratt@google.com>
Reviewed-by: Michael Pratt <mpratt@google.com>
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
kokoro-CI: kokoro <noreply+kokoro@google.com>
diff --git a/internal/natural/compare.go b/internal/natural/compare.go
new file mode 100644
index 0000000..7ee39a9
--- /dev/null
+++ b/internal/natural/compare.go
@@ -0,0 +1,131 @@
+// Copyright 2026 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Package natural implements "[Natural Sort Order]" for strings. This allows
+// sorting strings in a way that numbers in strings are compared numerically,
+// rather than lexicographically.
+//
+// [Natural Sort Order]: https://en.wikipedia.org/wiki/Natural_sort_order
+package natural
+
+import (
+	"cmp"
+	"strings"
+)
+
+// Compare implements [natural sort order] for strings, where numbers inside
+// strings are compared numerically. For example:
+//
+//	"uint8" < "uint16" < "uint32"
+//
+// The implementation conceptually splits the string into components of digits
+// and non-digits. Non-digit sequences are compared lexicographically.
+// Digit sequences are compared numerically. When numeric values are equal, the
+// one with fewer leading zeros is considered smaller. For example:
+//
+//	"01" < "001" < "02"
+//
+// The numeric components consist only of sequences of decimal digits [0-9]
+// denoting non-negative integers. For example:
+//
+//	"1e6"  < "10e5"
+//	"0xAB" < "0xB"
+//	"-5"   < "-10"
+//
+// [natural sort order]: https://en.wikipedia.org/wiki/Natural_sort_order
+func Compare(a, b string) int {
+	for {
+		prefix := nondigitCommonPrefixLength(a, b)
+		a, b = a[prefix:], b[prefix:]
+		if a == "" || b == "" {
+			return cmp.Compare(len(a), len(b))
+		}
+
+		adig := isdigit(a[0])
+		bdig := isdigit(b[0])
+
+		// digit vs non-digit?
+		// The one with the digit is smaller because its non-digit sequence is shorter.
+		if adig != bdig {
+			return -boolToSign(adig)
+		}
+		// Inv: adig == bdig
+
+		// If both are non-digits, compare lexicographically.
+		if !adig {
+			return -boolToSign(a[0] < b[0])
+		}
+		// Inv: adig && bdig
+
+		// Both are numbers, so we compare them numerically.
+		ac, azeros := countDigits(a)
+		bc, bzeros := countDigits(b)
+
+		// If one has more non-zero digits then it's obviously larger.
+		if ac-azeros != bc-bzeros {
+			return cmp.Compare(ac-azeros, bc-bzeros)
+		}
+
+		// Comparing equal-length digit strings will give the
+		// same result as converting them to numbers.
+		r := strings.Compare(a[azeros:ac], b[bzeros:bc])
+		if r != 0 {
+			return r
+		}
+
+		// The one with fewer leading zeros is smaller.
+		if azeros != bzeros && azeros+bzeros > 0 {
+			return cmp.Compare(azeros, bzeros)
+		}
+
+		// They were equal, so continue.
+		a, b = a[ac:], b[bc:]
+	}
+}
+
+// boolToSign converts a boolean to an integer, 1 or -1.
+func boolToSign(b bool) int {
+	if b {
+		return 1
+	} else {
+		return -1
+	}
+}
+
+// Less implements natural string comparison, where numbers are compared numerically.
+func Less(a, b string) bool {
+	return Compare(a, b) < 0
+}
+
+// nondigitCommonPrefixLength returns the length of longest common non-digit
+// byte-oriented prefix of a and b.
+//
+//	nondigitCommonPrefixLength("a1", "a1") == 1
+//	nondigitCommonPrefixLength("ab", "ac") == 1
+func nondigitCommonPrefixLength(a, b string) (i int) {
+	for i = 0; i < len(a) && i < len(b) && a[i] == b[i] && !isdigit(a[i]); i++ {
+	}
+	return i
+}
+
+// countDigits returns the number of prefix digits and leading zeros in s.
+func countDigits(s string) (count, leadingZeros int) {
+	foundNonZero := false
+	for i, c := range []byte(s) {
+		if !isdigit(c) {
+			return i, leadingZeros
+		}
+		if !foundNonZero && c == '0' {
+			leadingZeros++
+		} else {
+			foundNonZero = true
+		}
+	}
+	return len(s), leadingZeros
+}
+
+// isdigit returns true if c is a digit.
+func isdigit(c byte) bool {
+	return '0' <= c && c <= '9'
+}
diff --git a/internal/natural/compare_test.go b/internal/natural/compare_test.go
new file mode 100644
index 0000000..fa76d38
--- /dev/null
+++ b/internal/natural/compare_test.go
@@ -0,0 +1,111 @@
+// Copyright 2026 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package natural_test
+
+import (
+	"fmt"
+	"slices"
+	"testing"
+
+	"golang.org/x/pkgsite/internal/natural"
+)
+
+var tests = []struct {
+	a, b string
+	want int
+}{
+	{"", "", 0},
+	{"a", "", 1},
+	{"abc", "abc", 0},
+	{"ab", "abc", -1},
+	{"x", "ab", 1},
+	{"x", "a", 1},
+	{"b", "x", -1},
+	{"x0", "x#1", -1},
+
+	{"a", "aa", -1},
+	{"a", "a1", -1},
+	{"ab", "a1", 1},
+	{"a", "0", 1},
+	{"A", "0", 1},
+	{"file1.txt", "file2.txt", -1},
+	{"file10.txt", "file2.txt", 1},
+	{"file1000.txt", "file2.txt", 1},
+	{"file0001.txt", "file2.txt", -1},
+	{"file00a.txt", "file000a.txt", -1},
+	{"a10", "a010", -1},
+	{"1e6", "10e5", -1},
+	{"0xAB", "0xB", -1},
+	{"-5", "-10", -1},
+	{"a1b2", "a01b3", -1},
+	{"file_1.txt", "file_10.txt", -1},
+	{"file1.txt", "file1.txt", 0},
+	{"fileA.txt", "fileB.txt", -1},
+	{"file1A.txt", "file1B.txt", -1},
+	{"Uint8x16", "Uint32x8", -1},
+	{"Uint32x16", "Uint32x8", 1},
+	{"Uint10000000000000000000000", "Uint20000000000000000000000", -1},
+	{"Uint10000000000000000000000abc", "Uint10000000000000000000000abc", 0},
+	{"a1a1a1a1a1a1a1a1a1a1a1", "a1a1a1a1a1a1a1a1a1a1a1", 0},
+	{"a1a1a1a1a1a1a1a1a1a1a10", "a1a1a1a1a1a1a1a1a1a1a1", 1},
+}
+
+func TestCompare(t *testing.T) {
+	for _, tt := range tests {
+		if got := natural.Compare(tt.a, tt.b); got != tt.want {
+			t.Errorf("Compare(%q, %q) = %d; want %d", tt.a, tt.b, got, tt.want)
+		}
+		if got := natural.Compare(tt.b, tt.a); got != -tt.want {
+			t.Errorf("Compare(%q, %q) = %d; want %d", tt.b, tt.a, got, -tt.want)
+		}
+	}
+}
+
+func TestSliceSort(t *testing.T) {
+	types := []string{"Uint32x16", "Uint16x32", "Uint8x64", "Uint64x8"}
+	want := []string{"Uint8x64", "Uint16x32", "Uint32x16", "Uint64x8"}
+	slices.SortFunc(types, natural.Compare)
+	if !slices.Equal(types, want) {
+		t.Errorf("types = %v; want %v", types, want)
+	}
+}
+
+func BenchmarkCompare(b *testing.B) {
+	var tests = []struct {
+		a, b string
+	}{
+		0: {"Uint8x16", "Uint32x8"},
+		1: {"Uint32x16", "Uint32x8"},
+		2: {"func(Uint64x4) AsUint32x8() Uint32x8", "func(Uint64x4) AsUint32x8() Uint32x8"},
+		3: {"func(Uint64x4) AsUint32x8() Uint32x8", "func(Uint64x4) AsUint8x32() Uint8x32"},
+		4: {"Uint10000000000000000000000", "Uint20000000000000000000000"},
+		5: {"a1a1a1a1a1a1a1a1a1a1a1", "a1a1a1a1a1a1a1a1a1a1a1"},
+		6: {"a1a1a1a1a1a1a1a1a1a1a10", "a1a1a1a1a1a1a1a1a1a1a1"},
+	}
+	for i, test := range tests {
+		b.Run(fmt.Sprintf("%d", i), func(b *testing.B) {
+			b.ReportAllocs()
+			for b.Loop() {
+				natural.Compare(test.a, test.b)
+			}
+		})
+	}
+}
+
+func FuzzTransitivity(f *testing.F) {
+	f.Add("", "", "")
+	f.Fuzz(func(t *testing.T, a string, b string, c string) {
+		ab := natural.Compare(a, b)
+		bc := natural.Compare(b, c)
+		ca := natural.Compare(c, a)
+
+		// when the total is 3 or -3, it means that there is a cycle in comparison.
+		if tot := ab + bc + ca; tot == 3 || tot == -3 {
+			t.Errorf("Compare(%q, %q) = %d", a, b, ab)
+			t.Errorf("Compare(%q, %q) = %d", b, c, bc)
+			t.Errorf("Compare(%q, %q) = %d", c, a, ca)
+		}
+	})
+}