aboutsummaryrefslogtreecommitdiff
path: root/src/pkg/strings
diff options
context:
space:
mode:
authorRob Pike <r@golang.org>2010-07-20 00:03:59 -0700
committerRob Pike <r@golang.org>2010-07-20 00:03:59 -0700
commit8684a089896bffb5ed0818bc39d7ab7c2cf0e2fe (patch)
treeaa1654f60f685a83842b0efb7e26008d4b9e1a6e /src/pkg/strings
parentf6b93ab432dd43d030ecfc7240e8282a7d7e409a (diff)
downloadgo-8684a089896bffb5ed0818bc39d7ab7c2cf0e2fe.tar.xz
strings: add Title
strings.ToTitle converts all characters to title case, which for consistency with the other To* functions it should continue to do. This CL adds string.Title, which does a proper title-casing of the string. A similar function for package bytes will follow once this is settled. Fixes #933. R=rsc CC=golang-dev https://golang.org/cl/1869042
Diffstat (limited to 'src/pkg/strings')
-rw-r--r--src/pkg/strings/strings.go46
-rw-r--r--src/pkg/strings/strings_test.go22
2 files changed, 68 insertions, 0 deletions
diff --git a/src/pkg/strings/strings.go b/src/pkg/strings/strings.go
index 5d3d61e195..594f03527e 100644
--- a/src/pkg/strings/strings.go
+++ b/src/pkg/strings/strings.go
@@ -333,6 +333,52 @@ func ToTitleSpecial(_case unicode.SpecialCase, s string) string {
return Map(func(r int) int { return _case.ToTitle(r) }, s)
}
+// isSeparator reports whether the rune could mark a word boundary.
+// TODO: update when package unicode captures more of the properties.
+func isSeparator(rune int) bool {
+ // ASCII alphanumerics and underscore are not separators
+ if rune <= 0x7F {
+ switch {
+ case '0' <= rune && rune <= '9':
+ return false
+ case 'a' <= rune && rune <= 'z':
+ return false
+ case 'A' <= rune && rune <= 'Z':
+ return false
+ case rune == '_':
+ return false
+ }
+ return true
+ }
+ // Letters and digits are not separators
+ if unicode.IsLetter(rune) || unicode.IsDigit(rune) {
+ return false
+ }
+ // Otherwise, all we can do for now is treat spaces as separators.
+ return unicode.IsSpace(rune)
+}
+
+// BUG(r): The rule Title uses for word boundaries does not handle Unicode punctuation properly.
+
+// Title returns a copy of the string s with all Unicode letters that begin words
+// mapped to their title case.
+func Title(s string) string {
+ // Use a closure here to remember state.
+ // Hackish but effective. Depends on Map scanning in order and calling
+ // the closure once per rune.
+ prev := ' '
+ return Map(
+ func(r int) int {
+ if isSeparator(prev) {
+ prev = r
+ return unicode.ToTitle(r)
+ }
+ prev = r
+ return r
+ },
+ s)
+}
+
// TrimLeftFunc returns a slice of the string s with all leading
// Unicode code points c satisfying f(c) removed.
func TrimLeftFunc(s string, f func(r int) bool) string {
diff --git a/src/pkg/strings/strings_test.go b/src/pkg/strings/strings_test.go
index 06f1f1de1d..8b0c98113a 100644
--- a/src/pkg/strings/strings_test.go
+++ b/src/pkg/strings/strings_test.go
@@ -741,3 +741,25 @@ func TestReplace(t *testing.T) {
}
}
}
+
+type TitleTest struct {
+ in, out string
+}
+
+var TitleTests = []TitleTest{
+ TitleTest{"", ""},
+ TitleTest{"a", "A"},
+ TitleTest{" aaa aaa aaa ", " Aaa Aaa Aaa "},
+ TitleTest{" Aaa Aaa Aaa ", " Aaa Aaa Aaa "},
+ TitleTest{"123a456", "123a456"},
+ TitleTest{"double-blind", "Double-Blind"},
+ TitleTest{"ÿøû", "Ÿøû"},
+}
+
+func TestTitle(t *testing.T) {
+ for _, tt := range TitleTests {
+ if s := Title(tt.in); s != tt.out {
+ t.Errorf("Title(%q) = %q, want %q", tt.in, s, tt.out)
+ }
+ }
+}