internal/natural: add natural sort order
Natural sort order allows comparing strings such that sequences of digits
are compared numerically rather than lexicographically. For example:
"uint8" < "uint16" < "uint32"
Updates golang/go#77160
Change-Id: Iddf6aa33427131a5f0d7f235d61ab7ad6cf6f92c
Reviewed-on: https://go-review.googlesource.com/c/pkgsite/+/736560
Reviewed-by: Alan Donovan <adonovan@google.com>
Auto-Submit: Michael Pratt <mpratt@google.com>
Reviewed-by: Michael Pratt <mpratt@google.com>
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
kokoro-CI: kokoro <noreply+kokoro@google.com>
diff --git a/internal/natural/compare.go b/internal/natural/compare.go
new file mode 100644
index 0000000..7ee39a9
--- /dev/null
+++ b/internal/natural/compare.go
@@ -0,0 +1,131 @@
+// Copyright 2026 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Package natural implements "[Natural Sort Order]" for strings. This allows
+// sorting strings in a way that numbers in strings are compared numerically,
+// rather than lexicographically.
+//
+// [Natural Sort Order]: https://en.wikipedia.org/wiki/Natural_sort_order
+package natural
+
+import (
+ "cmp"
+ "strings"
+)
+
+// Compare implements [natural sort order] for strings, where numbers inside
+// strings are compared numerically. For example:
+//
+// "uint8" < "uint16" < "uint32"
+//
+// The implementation conceptually splits the string into components of digits
+// and non-digits. Non-digit sequences are compared lexicographically.
+// Digit sequences are compared numerically. When numeric values are equal, the
+// one with fewer leading zeros is considered smaller. For example:
+//
+// "01" < "001" < "02"
+//
+// The numeric components consist only of sequences of decimal digits [0-9]
+// denoting non-negative integers. For example:
+//
+// "1e6" < "10e5"
+// "0xAB" < "0xB"
+// "-5" < "-10"
+//
+// [natural sort order]: https://en.wikipedia.org/wiki/Natural_sort_order
+func Compare(a, b string) int {
+ for {
+ prefix := nondigitCommonPrefixLength(a, b)
+ a, b = a[prefix:], b[prefix:]
+ if a == "" || b == "" {
+ return cmp.Compare(len(a), len(b))
+ }
+
+ adig := isdigit(a[0])
+ bdig := isdigit(b[0])
+
+ // digit vs non-digit?
+ // The one with the digit is smaller because its non-digit sequence is shorter.
+ if adig != bdig {
+ return -boolToSign(adig)
+ }
+ // Inv: adig == bdig
+
+ // If both are non-digits, compare lexicographically.
+ if !adig {
+ return -boolToSign(a[0] < b[0])
+ }
+ // Inv: adig && bdig
+
+ // Both are numbers, so we compare them numerically.
+ ac, azeros := countDigits(a)
+ bc, bzeros := countDigits(b)
+
+ // If one has more non-zero digits then it's obviously larger.
+ if ac-azeros != bc-bzeros {
+ return cmp.Compare(ac-azeros, bc-bzeros)
+ }
+
+ // Comparing equal-length digit strings will give the
+ // same result as converting them to numbers.
+ r := strings.Compare(a[azeros:ac], b[bzeros:bc])
+ if r != 0 {
+ return r
+ }
+
+ // The one with fewer leading zeros is smaller.
+ if azeros != bzeros && azeros+bzeros > 0 {
+ return cmp.Compare(azeros, bzeros)
+ }
+
+ // They were equal, so continue.
+ a, b = a[ac:], b[bc:]
+ }
+}
+
+// boolToSign converts a boolean to an integer, 1 or -1.
+func boolToSign(b bool) int {
+ if b {
+ return 1
+ } else {
+ return -1
+ }
+}
+
+// Less implements natural string comparison, where numbers are compared numerically.
+func Less(a, b string) bool {
+ return Compare(a, b) < 0
+}
+
+// nondigitCommonPrefixLength returns the length of longest common non-digit
+// byte-oriented prefix of a and b.
+//
+// nondigitCommonPrefixLength("a1", "a1") == 1
+// nondigitCommonPrefixLength("ab", "ac") == 1
+func nondigitCommonPrefixLength(a, b string) (i int) {
+ for i = 0; i < len(a) && i < len(b) && a[i] == b[i] && !isdigit(a[i]); i++ {
+ }
+ return i
+}
+
+// countDigits returns the number of prefix digits and leading zeros in s.
+func countDigits(s string) (count, leadingZeros int) {
+ foundNonZero := false
+ for i, c := range []byte(s) {
+ if !isdigit(c) {
+ return i, leadingZeros
+ }
+ if !foundNonZero && c == '0' {
+ leadingZeros++
+ } else {
+ foundNonZero = true
+ }
+ }
+ return len(s), leadingZeros
+}
+
+// isdigit returns true if c is a digit.
+func isdigit(c byte) bool {
+ return '0' <= c && c <= '9'
+}
diff --git a/internal/natural/compare_test.go b/internal/natural/compare_test.go
new file mode 100644
index 0000000..fa76d38
--- /dev/null
+++ b/internal/natural/compare_test.go
@@ -0,0 +1,111 @@
+// Copyright 2026 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package natural_test
+
+import (
+ "fmt"
+ "slices"
+ "testing"
+
+ "golang.org/x/pkgsite/internal/natural"
+)
+
+var tests = []struct {
+ a, b string
+ want int
+}{
+ {"", "", 0},
+ {"a", "", 1},
+ {"abc", "abc", 0},
+ {"ab", "abc", -1},
+ {"x", "ab", 1},
+ {"x", "a", 1},
+ {"b", "x", -1},
+ {"x0", "x#1", -1},
+
+ {"a", "aa", -1},
+ {"a", "a1", -1},
+ {"ab", "a1", 1},
+ {"a", "0", 1},
+ {"A", "0", 1},
+ {"file1.txt", "file2.txt", -1},
+ {"file10.txt", "file2.txt", 1},
+ {"file1000.txt", "file2.txt", 1},
+ {"file0001.txt", "file2.txt", -1},
+ {"file00a.txt", "file000a.txt", -1},
+ {"a10", "a010", -1},
+ {"1e6", "10e5", -1},
+ {"0xAB", "0xB", -1},
+ {"-5", "-10", -1},
+ {"a1b2", "a01b3", -1},
+ {"file_1.txt", "file_10.txt", -1},
+ {"file1.txt", "file1.txt", 0},
+ {"fileA.txt", "fileB.txt", -1},
+ {"file1A.txt", "file1B.txt", -1},
+ {"Uint8x16", "Uint32x8", -1},
+ {"Uint32x16", "Uint32x8", 1},
+ {"Uint10000000000000000000000", "Uint20000000000000000000000", -1},
+ {"Uint10000000000000000000000abc", "Uint10000000000000000000000abc", 0},
+ {"a1a1a1a1a1a1a1a1a1a1a1", "a1a1a1a1a1a1a1a1a1a1a1", 0},
+ {"a1a1a1a1a1a1a1a1a1a1a10", "a1a1a1a1a1a1a1a1a1a1a1", 1},
+}
+
+func TestCompare(t *testing.T) {
+ for _, tt := range tests {
+ if got := natural.Compare(tt.a, tt.b); got != tt.want {
+ t.Errorf("Compare(%q, %q) = %d; want %d", tt.a, tt.b, got, tt.want)
+ }
+ if got := natural.Compare(tt.b, tt.a); got != -tt.want {
+ t.Errorf("Compare(%q, %q) = %d; want %d", tt.b, tt.a, got, -tt.want)
+ }
+ }
+}
+
+func TestSliceSort(t *testing.T) {
+ types := []string{"Uint32x16", "Uint16x32", "Uint8x64", "Uint64x8"}
+ want := []string{"Uint8x64", "Uint16x32", "Uint32x16", "Uint64x8"}
+ slices.SortFunc(types, natural.Compare)
+ if !slices.Equal(types, want) {
+ t.Errorf("types = %v; want %v", types, want)
+ }
+}
+
+func BenchmarkCompare(b *testing.B) {
+ var tests = []struct {
+ a, b string
+ }{
+ 0: {"Uint8x16", "Uint32x8"},
+ 1: {"Uint32x16", "Uint32x8"},
+ 2: {"func(Uint64x4) AsUint32x8() Uint32x8", "func(Uint64x4) AsUint32x8() Uint32x8"},
+ 3: {"func(Uint64x4) AsUint32x8() Uint32x8", "func(Uint64x4) AsUint8x32() Uint8x32"},
+ 4: {"Uint10000000000000000000000", "Uint20000000000000000000000"},
+ 5: {"a1a1a1a1a1a1a1a1a1a1a1", "a1a1a1a1a1a1a1a1a1a1a1"},
+ 6: {"a1a1a1a1a1a1a1a1a1a1a10", "a1a1a1a1a1a1a1a1a1a1a1"},
+ }
+ for i, test := range tests {
+ b.Run(fmt.Sprintf("%d", i), func(b *testing.B) {
+ b.ReportAllocs()
+ for b.Loop() {
+ natural.Compare(test.a, test.b)
+ }
+ })
+ }
+}
+
+func FuzzTransitivity(f *testing.F) {
+ f.Add("", "", "")
+ f.Fuzz(func(t *testing.T, a string, b string, c string) {
+ ab := natural.Compare(a, b)
+ bc := natural.Compare(b, c)
+ ca := natural.Compare(c, a)
+
+ // when the total is 3 or -3, it means that there is a cycle in comparison.
+ if tot := ab + bc + ca; tot == 3 || tot == -3 {
+ t.Errorf("Compare(%q, %q) = %d", a, b, ab)
+ t.Errorf("Compare(%q, %q) = %d", b, c, bc)
+ t.Errorf("Compare(%q, %q) = %d", c, a, ca)
+ }
+ })
+}