diff options
| author | Rob Pike <r@golang.org> | 2010-07-20 00:03:59 -0700 |
|---|---|---|
| committer | Rob Pike <r@golang.org> | 2010-07-20 00:03:59 -0700 |
| commit | 8684a089896bffb5ed0818bc39d7ab7c2cf0e2fe (patch) | |
| tree | aa1654f60f685a83842b0efb7e26008d4b9e1a6e /src/pkg/strings | |
| parent | f6b93ab432dd43d030ecfc7240e8282a7d7e409a (diff) | |
| download | go-8684a089896bffb5ed0818bc39d7ab7c2cf0e2fe.tar.xz | |
strings: add Title
strings.ToTitle converts all characters to title case, which for consistency with the
other To* functions it should continue to do. This CL adds string.Title, which
does a proper title-casing of the string.
A similar function for package bytes will follow once this is settled.
Fixes #933.
R=rsc
CC=golang-dev
https://golang.org/cl/1869042
Diffstat (limited to 'src/pkg/strings')
| -rw-r--r-- | src/pkg/strings/strings.go | 46 | ||||
| -rw-r--r-- | src/pkg/strings/strings_test.go | 22 |
2 files changed, 68 insertions, 0 deletions
diff --git a/src/pkg/strings/strings.go b/src/pkg/strings/strings.go index 5d3d61e195..594f03527e 100644 --- a/src/pkg/strings/strings.go +++ b/src/pkg/strings/strings.go @@ -333,6 +333,52 @@ func ToTitleSpecial(_case unicode.SpecialCase, s string) string { return Map(func(r int) int { return _case.ToTitle(r) }, s) } +// isSeparator reports whether the rune could mark a word boundary. +// TODO: update when package unicode captures more of the properties. +func isSeparator(rune int) bool { + // ASCII alphanumerics and underscore are not separators + if rune <= 0x7F { + switch { + case '0' <= rune && rune <= '9': + return false + case 'a' <= rune && rune <= 'z': + return false + case 'A' <= rune && rune <= 'Z': + return false + case rune == '_': + return false + } + return true + } + // Letters and digits are not separators + if unicode.IsLetter(rune) || unicode.IsDigit(rune) { + return false + } + // Otherwise, all we can do for now is treat spaces as separators. + return unicode.IsSpace(rune) +} + +// BUG(r): The rule Title uses for word boundaries does not handle Unicode punctuation properly. + +// Title returns a copy of the string s with all Unicode letters that begin words +// mapped to their title case. +func Title(s string) string { + // Use a closure here to remember state. + // Hackish but effective. Depends on Map scanning in order and calling + // the closure once per rune. + prev := ' ' + return Map( + func(r int) int { + if isSeparator(prev) { + prev = r + return unicode.ToTitle(r) + } + prev = r + return r + }, + s) +} + // TrimLeftFunc returns a slice of the string s with all leading // Unicode code points c satisfying f(c) removed. func TrimLeftFunc(s string, f func(r int) bool) string { diff --git a/src/pkg/strings/strings_test.go b/src/pkg/strings/strings_test.go index 06f1f1de1d..8b0c98113a 100644 --- a/src/pkg/strings/strings_test.go +++ b/src/pkg/strings/strings_test.go @@ -741,3 +741,25 @@ func TestReplace(t *testing.T) { } } } + +type TitleTest struct { + in, out string +} + +var TitleTests = []TitleTest{ + TitleTest{"", ""}, + TitleTest{"a", "A"}, + TitleTest{" aaa aaa aaa ", " Aaa Aaa Aaa "}, + TitleTest{" Aaa Aaa Aaa ", " Aaa Aaa Aaa "}, + TitleTest{"123a456", "123a456"}, + TitleTest{"double-blind", "Double-Blind"}, + TitleTest{"ÿøû", "Ÿøû"}, +} + +func TestTitle(t *testing.T) { + for _, tt := range TitleTests { + if s := Title(tt.in); s != tt.out { + t.Errorf("Title(%q) = %q, want %q", tt.in, s, tt.out) + } + } +} |
