aboutsummaryrefslogtreecommitdiff
path: root/src/unicode
diff options
context:
space:
mode:
Diffstat (limited to 'src/unicode')
-rw-r--r--src/unicode/letter.go67
-rw-r--r--src/unicode/letter_test.go26
2 files changed, 68 insertions, 25 deletions
diff --git a/src/unicode/letter.go b/src/unicode/letter.go
index 9e2cead631..3959314c97 100644
--- a/src/unicode/letter.go
+++ b/src/unicode/letter.go
@@ -206,34 +206,17 @@ func IsTitle(r rune) bool {
return isExcludingLatin(Title, r)
}
-// to maps the rune using the specified case mapping.
-// It additionally reports whether caseRange contained a mapping for r.
-func to(_case int, r rune, caseRange []CaseRange) (mappedRune rune, foundMapping bool) {
- if _case < 0 || MaxCase <= _case {
- return ReplacementChar, false // as reasonable an error as any
- }
+// lookupCaseRange returns the CaseRange mapping for rune r or nil if no
+// mapping exists for r.
+func lookupCaseRange(r rune, caseRange []CaseRange) *CaseRange {
// binary search over ranges
lo := 0
hi := len(caseRange)
for lo < hi {
m := int(uint(lo+hi) >> 1)
- cr := caseRange[m]
+ cr := &caseRange[m]
if rune(cr.Lo) <= r && r <= rune(cr.Hi) {
- delta := cr.Delta[_case]
- if delta > MaxRune {
- // In an Upper-Lower sequence, which always starts with
- // an UpperCase letter, the real deltas always look like:
- // {0, 1, 0} UpperCase (Lower is next)
- // {-1, 0, -1} LowerCase (Upper, Title are previous)
- // The characters at even offsets from the beginning of the
- // sequence are upper case; the ones at odd offsets are lower.
- // The correct mapping can be done by clearing or setting the low
- // bit in the sequence offset.
- // The constants UpperCase and TitleCase are even while LowerCase
- // is odd so we take the low bit from _case.
- return rune(cr.Lo) + ((r-rune(cr.Lo))&^1 | rune(_case&1)), true
- }
- return r + delta, true
+ return cr
}
if r < rune(cr.Lo) {
hi = m
@@ -241,6 +224,37 @@ func to(_case int, r rune, caseRange []CaseRange) (mappedRune rune, foundMapping
lo = m + 1
}
}
+ return nil
+}
+
+// convertCase converts r to _case using CaseRange cr.
+func convertCase(_case int, r rune, cr *CaseRange) rune {
+ delta := cr.Delta[_case]
+ if delta > MaxRune {
+ // In an Upper-Lower sequence, which always starts with
+ // an UpperCase letter, the real deltas always look like:
+ // {0, 1, 0} UpperCase (Lower is next)
+ // {-1, 0, -1} LowerCase (Upper, Title are previous)
+ // The characters at even offsets from the beginning of the
+ // sequence are upper case; the ones at odd offsets are lower.
+ // The correct mapping can be done by clearing or setting the low
+ // bit in the sequence offset.
+ // The constants UpperCase and TitleCase are even while LowerCase
+ // is odd so we take the low bit from _case.
+ return rune(cr.Lo) + ((r-rune(cr.Lo))&^1 | rune(_case&1))
+ }
+ return r + delta
+}
+
+// to maps the rune using the specified case mapping.
+// It additionally reports whether caseRange contained a mapping for r.
+func to(_case int, r rune, caseRange []CaseRange) (mappedRune rune, foundMapping bool) {
+ if _case < 0 || MaxCase <= _case {
+ return ReplacementChar, false // as reasonable an error as any
+ }
+ if cr := lookupCaseRange(r, caseRange); cr != nil {
+ return convertCase(_case, r, cr), true
+ }
return r, false
}
@@ -364,8 +378,11 @@ func SimpleFold(r rune) rune {
// No folding specified. This is a one- or two-element
// equivalence class containing rune and ToLower(rune)
// and ToUpper(rune) if they are different from rune.
- if l := ToLower(r); l != r {
- return l
+ if cr := lookupCaseRange(r, CaseRanges); cr != nil {
+ if l := convertCase(LowerCase, r, cr); l != r {
+ return l
+ }
+ return convertCase(UpperCase, r, cr)
}
- return ToUpper(r)
+ return r
}
diff --git a/src/unicode/letter_test.go b/src/unicode/letter_test.go
index 123f9a642e..75c8aeee90 100644
--- a/src/unicode/letter_test.go
+++ b/src/unicode/letter_test.go
@@ -642,3 +642,29 @@ func TestNegativeRune(t *testing.T) {
}
}
}
+
+func BenchmarkToUpper(b *testing.B) {
+ for i := 0; i < b.N; i++ {
+ _ = ToUpper('δ')
+ }
+}
+
+func BenchmarkToLower(b *testing.B) {
+ for i := 0; i < b.N; i++ {
+ _ = ToLower('Δ')
+ }
+}
+
+func BenchmarkSimpleFold(b *testing.B) {
+ bench := func(name string, r rune) {
+ b.Run(name, func(b *testing.B) {
+ for i := 0; i < b.N; i++ {
+ _ = SimpleFold(r)
+ }
+ })
+ }
+ bench("Upper", 'Δ')
+ bench("Lower", 'δ')
+ bench("Fold", '\u212A')
+ bench("NoFold", '習')
+}