diff options
| author | Egon Elbre <egonelbre@gmail.com> | 2026-01-13 12:29:03 +0200 |
|---|---|---|
| committer | Gopher Robot <gobot@golang.org> | 2026-01-16 13:41:58 -0800 |
| commit | 550788255d99f0e9ee169f12bf65d16e1ede9f7b (patch) | |
| tree | 16b86d111893ffe96a89a0117c31acaaa9f80d1a | |
| parent | 576fb9481e5c52c5d6679fdefa4b07607989d00d (diff) | |
| download | go-x-pkgsite-550788255d99f0e9ee169f12bf65d16e1ede9f7b.tar.xz | |
internal/natural: add natural sort order
Natural sort order allows comparing strings such that sequences of digits
are compared numerically rather than lexicographically. For example:
"uint8" < "uint16" < "uint32"
Updates golang/go#77160
Change-Id: Iddf6aa33427131a5f0d7f235d61ab7ad6cf6f92c
Reviewed-on: https://go-review.googlesource.com/c/pkgsite/+/736560
Reviewed-by: Alan Donovan <adonovan@google.com>
Auto-Submit: Michael Pratt <mpratt@google.com>
Reviewed-by: Michael Pratt <mpratt@google.com>
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
kokoro-CI: kokoro <noreply+kokoro@google.com>
| -rw-r--r-- | internal/natural/compare.go | 131 | ||||
| -rw-r--r-- | internal/natural/compare_test.go | 111 |
2 files changed, 242 insertions, 0 deletions
diff --git a/internal/natural/compare.go b/internal/natural/compare.go new file mode 100644 index 00000000..7ee39a95 --- /dev/null +++ b/internal/natural/compare.go @@ -0,0 +1,131 @@ +// Copyright 2026 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Package natural implements "[Natural Sort Order]" for strings. This allows +// sorting strings in a way that numbers in strings are compared numerically, +// rather than lexicographically. +// +// [Natural Sort Order]: https://en.wikipedia.org/wiki/Natural_sort_order +package natural + +import ( + "cmp" + "strings" +) + +// Compare implements [natural sort order] for strings, where numbers inside +// strings are compared numerically. For example: +// +// "uint8" < "uint16" < "uint32" +// +// The implementation conceptually splits the string into components of digits +// and non-digits. Non-digit sequences are compared lexicographically. +// Digit sequences are compared numerically. When numeric values are equal, the +// one with fewer leading zeros is considered smaller. For example: +// +// "01" < "001" < "02" +// +// The numeric components consist only of sequences of decimal digits [0-9] +// denoting non-negative integers. For example: +// +// "1e6" < "10e5" +// "0xAB" < "0xB" +// "-5" < "-10" +// +// [natural sort order]: https://en.wikipedia.org/wiki/Natural_sort_order +func Compare(a, b string) int { + for { + prefix := nondigitCommonPrefixLength(a, b) + a, b = a[prefix:], b[prefix:] + if a == "" || b == "" { + return cmp.Compare(len(a), len(b)) + } + + adig := isdigit(a[0]) + bdig := isdigit(b[0]) + + // digit vs non-digit? + // The one with the digit is smaller because its non-digit sequence is shorter. + if adig != bdig { + return -boolToSign(adig) + } + // Inv: adig == bdig + + // If both are non-digits, compare lexicographically. + if !adig { + return -boolToSign(a[0] < b[0]) + } + // Inv: adig && bdig + + // Both are numbers, so we compare them numerically. + ac, azeros := countDigits(a) + bc, bzeros := countDigits(b) + + // If one has more non-zero digits then it's obviously larger. + if ac-azeros != bc-bzeros { + return cmp.Compare(ac-azeros, bc-bzeros) + } + + // Comparing equal-length digit strings will give the + // same result as converting them to numbers. + r := strings.Compare(a[azeros:ac], b[bzeros:bc]) + if r != 0 { + return r + } + + // The one with fewer leading zeros is smaller. + if azeros != bzeros && azeros+bzeros > 0 { + return cmp.Compare(azeros, bzeros) + } + + // They were equal, so continue. + a, b = a[ac:], b[bc:] + } +} + +// boolToSign converts a boolean to an integer, 1 or -1. +func boolToSign(b bool) int { + if b { + return 1 + } else { + return -1 + } +} + +// Less implements natural string comparison, where numbers are compared numerically. +func Less(a, b string) bool { + return Compare(a, b) < 0 +} + +// nondigitCommonPrefixLength returns the length of longest common non-digit +// byte-oriented prefix of a and b. +// +// nondigitCommonPrefixLength("a1", "a1") == 1 +// nondigitCommonPrefixLength("ab", "ac") == 1 +func nondigitCommonPrefixLength(a, b string) (i int) { + for i = 0; i < len(a) && i < len(b) && a[i] == b[i] && !isdigit(a[i]); i++ { + } + return i +} + +// countDigits returns the number of prefix digits and leading zeros in s. +func countDigits(s string) (count, leadingZeros int) { + foundNonZero := false + for i, c := range []byte(s) { + if !isdigit(c) { + return i, leadingZeros + } + if !foundNonZero && c == '0' { + leadingZeros++ + } else { + foundNonZero = true + } + } + return len(s), leadingZeros +} + +// isdigit returns true if c is a digit. +func isdigit(c byte) bool { + return '0' <= c && c <= '9' +} diff --git a/internal/natural/compare_test.go b/internal/natural/compare_test.go new file mode 100644 index 00000000..fa76d380 --- /dev/null +++ b/internal/natural/compare_test.go @@ -0,0 +1,111 @@ +// Copyright 2026 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package natural_test + +import ( + "fmt" + "slices" + "testing" + + "golang.org/x/pkgsite/internal/natural" +) + +var tests = []struct { + a, b string + want int +}{ + {"", "", 0}, + {"a", "", 1}, + {"abc", "abc", 0}, + {"ab", "abc", -1}, + {"x", "ab", 1}, + {"x", "a", 1}, + {"b", "x", -1}, + {"x0", "x#1", -1}, + + {"a", "aa", -1}, + {"a", "a1", -1}, + {"ab", "a1", 1}, + {"a", "0", 1}, + {"A", "0", 1}, + {"file1.txt", "file2.txt", -1}, + {"file10.txt", "file2.txt", 1}, + {"file1000.txt", "file2.txt", 1}, + {"file0001.txt", "file2.txt", -1}, + {"file00a.txt", "file000a.txt", -1}, + {"a10", "a010", -1}, + {"1e6", "10e5", -1}, + {"0xAB", "0xB", -1}, + {"-5", "-10", -1}, + {"a1b2", "a01b3", -1}, + {"file_1.txt", "file_10.txt", -1}, + {"file1.txt", "file1.txt", 0}, + {"fileA.txt", "fileB.txt", -1}, + {"file1A.txt", "file1B.txt", -1}, + {"Uint8x16", "Uint32x8", -1}, + {"Uint32x16", "Uint32x8", 1}, + {"Uint10000000000000000000000", "Uint20000000000000000000000", -1}, + {"Uint10000000000000000000000abc", "Uint10000000000000000000000abc", 0}, + {"a1a1a1a1a1a1a1a1a1a1a1", "a1a1a1a1a1a1a1a1a1a1a1", 0}, + {"a1a1a1a1a1a1a1a1a1a1a10", "a1a1a1a1a1a1a1a1a1a1a1", 1}, +} + +func TestCompare(t *testing.T) { + for _, tt := range tests { + if got := natural.Compare(tt.a, tt.b); got != tt.want { + t.Errorf("Compare(%q, %q) = %d; want %d", tt.a, tt.b, got, tt.want) + } + if got := natural.Compare(tt.b, tt.a); got != -tt.want { + t.Errorf("Compare(%q, %q) = %d; want %d", tt.b, tt.a, got, -tt.want) + } + } +} + +func TestSliceSort(t *testing.T) { + types := []string{"Uint32x16", "Uint16x32", "Uint8x64", "Uint64x8"} + want := []string{"Uint8x64", "Uint16x32", "Uint32x16", "Uint64x8"} + slices.SortFunc(types, natural.Compare) + if !slices.Equal(types, want) { + t.Errorf("types = %v; want %v", types, want) + } +} + +func BenchmarkCompare(b *testing.B) { + var tests = []struct { + a, b string + }{ + 0: {"Uint8x16", "Uint32x8"}, + 1: {"Uint32x16", "Uint32x8"}, + 2: {"func(Uint64x4) AsUint32x8() Uint32x8", "func(Uint64x4) AsUint32x8() Uint32x8"}, + 3: {"func(Uint64x4) AsUint32x8() Uint32x8", "func(Uint64x4) AsUint8x32() Uint8x32"}, + 4: {"Uint10000000000000000000000", "Uint20000000000000000000000"}, + 5: {"a1a1a1a1a1a1a1a1a1a1a1", "a1a1a1a1a1a1a1a1a1a1a1"}, + 6: {"a1a1a1a1a1a1a1a1a1a1a10", "a1a1a1a1a1a1a1a1a1a1a1"}, + } + for i, test := range tests { + b.Run(fmt.Sprintf("%d", i), func(b *testing.B) { + b.ReportAllocs() + for b.Loop() { + natural.Compare(test.a, test.b) + } + }) + } +} + +func FuzzTransitivity(f *testing.F) { + f.Add("", "", "") + f.Fuzz(func(t *testing.T, a string, b string, c string) { + ab := natural.Compare(a, b) + bc := natural.Compare(b, c) + ca := natural.Compare(c, a) + + // when the total is 3 or -3, it means that there is a cycle in comparison. + if tot := ab + bc + ca; tot == 3 || tot == -3 { + t.Errorf("Compare(%q, %q) = %d", a, b, ab) + t.Errorf("Compare(%q, %q) = %d", b, c, bc) + t.Errorf("Compare(%q, %q) = %d", c, a, ca) + } + }) +} |
