aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/strconv/ftoa.go4
-rw-r--r--src/strconv/ftoaryu.go31
-rw-r--r--src/strconv/itoa.go285
3 files changed, 204 insertions, 116 deletions
diff --git a/src/strconv/ftoa.go b/src/strconv/ftoa.go
index acbcb9fcc9..1d4bf770be 100644
--- a/src/strconv/ftoa.go
+++ b/src/strconv/ftoa.go
@@ -483,7 +483,7 @@ func fmtB(dst []byte, neg bool, mant uint64, exp int, flt *floatInfo) []byte {
}
// mantissa
- dst, _ = formatBits(dst, mant, 10, false, true)
+ dst = AppendUint(dst, mant, 10)
// p
dst = append(dst, 'p')
@@ -493,7 +493,7 @@ func fmtB(dst []byte, neg bool, mant uint64, exp int, flt *floatInfo) []byte {
if exp >= 0 {
dst = append(dst, '+')
}
- dst, _ = formatBits(dst, uint64(exp), 10, exp < 0, true)
+ dst = AppendInt(dst, int64(exp), 10)
return dst
}
diff --git a/src/strconv/ftoaryu.go b/src/strconv/ftoaryu.go
index c1aaa250f3..9349df955f 100644
--- a/src/strconv/ftoaryu.go
+++ b/src/strconv/ftoaryu.go
@@ -192,30 +192,9 @@ func formatDecimal(d *decimalSlice, m uint64, trunc bool, roundUp bool, prec int
m /= 10
trimmed++
}
- // render digits (similar to formatBits)
- n := uint(prec)
+ // render digits
+ formatBase10(d.d[:prec], m)
d.nd = prec
- v := m
- for v >= 100 {
- var v1, v2 uint64
- if v>>32 == 0 {
- v1, v2 = uint64(uint32(v)/100), uint64(uint32(v)%100)
- } else {
- v1, v2 = v/100, v%100
- }
- n -= 2
- d.d[n+1] = smallsString[2*v2+1]
- d.d[n+0] = smallsString[2*v2+0]
- v = v1
- }
- if v > 0 {
- n--
- d.d[n] = smallsString[2*v+1]
- }
- if v >= 10 {
- n--
- d.d[n] = smallsString[2*v]
- }
for d.d[d.nd-1] == '0' {
d.nd--
trimmed++
@@ -448,8 +427,8 @@ func ryuDigits32(d *decimalSlice, lower, central, upper uint32,
n := endindex
for n > d.nd {
v1, v2 := v/100, v%100
- d.d[n] = smallsString[2*v2+1]
- d.d[n-1] = smallsString[2*v2+0]
+ d.d[n] = smalls[2*v2+1]
+ d.d[n-1] = smalls[2*v2+0]
n -= 2
v = v1
}
@@ -535,7 +514,7 @@ func divisibleByPower5(m uint64, k int) bool {
// divmod1e9 computes quotient and remainder of division by 1e9,
// avoiding runtime uint64 division on 32-bit platforms.
func divmod1e9(x uint64) (uint32, uint32) {
- if !host32bit {
+ if host64bit {
return uint32(x / 1e9), uint32(x % 1e9)
}
// Use the same sequence of operations as the amd64 compiler.
diff --git a/src/strconv/itoa.go b/src/strconv/itoa.go
index 928b37ffa6..7884e8e987 100644
--- a/src/strconv/itoa.go
+++ b/src/strconv/itoa.go
@@ -4,16 +4,22 @@
package strconv
-import "math/bits"
-
-const fastSmalls = true // enable fast path for small integers
+import (
+ "math/bits"
+ "runtime"
+)
// FormatUint returns the string representation of i in the given base,
// for 2 <= base <= 36. The result uses the lower-case letters 'a' to 'z'
// for digit values >= 10.
func FormatUint(i uint64, base int) string {
- if fastSmalls && i < nSmalls && base == 10 {
- return small(int(i))
+ if base == 10 {
+ if i < nSmalls {
+ return small(int(i))
+ }
+ var a [24]byte
+ j := formatBase10(a[:], i)
+ return string(a[j:])
}
_, s := formatBits(nil, i, base, false, false)
return s
@@ -23,8 +29,21 @@ func FormatUint(i uint64, base int) string {
// for 2 <= base <= 36. The result uses the lower-case letters 'a' to 'z'
// for digit values >= 10.
func FormatInt(i int64, base int) string {
- if fastSmalls && 0 <= i && i < nSmalls && base == 10 {
- return small(int(i))
+ if base == 10 {
+ if 0 <= i && i < nSmalls {
+ return small(int(i))
+ }
+ var a [24]byte
+ u := uint64(i)
+ if i < 0 {
+ u = -u
+ }
+ j := formatBase10(a[:], u)
+ if i < 0 {
+ j--
+ a[j] = '-'
+ }
+ return string(a[j:])
}
_, s := formatBits(nil, uint64(i), base, i < 0, false)
return s
@@ -38,46 +57,29 @@ func Itoa(i int) string {
// AppendInt appends the string form of the integer i,
// as generated by [FormatInt], to dst and returns the extended buffer.
func AppendInt(dst []byte, i int64, base int) []byte {
- if fastSmalls && 0 <= i && i < nSmalls && base == 10 {
- return append(dst, small(int(i))...)
+ u := uint64(i)
+ if i < 0 {
+ dst = append(dst, '-')
+ u = -u
}
- dst, _ = formatBits(dst, uint64(i), base, i < 0, true)
- return dst
+ return AppendUint(dst, u, base)
}
// AppendUint appends the string form of the unsigned integer i,
// as generated by [FormatUint], to dst and returns the extended buffer.
func AppendUint(dst []byte, i uint64, base int) []byte {
- if fastSmalls && i < nSmalls && base == 10 {
- return append(dst, small(int(i))...)
+ if base == 10 {
+ if i < nSmalls {
+ return append(dst, small(int(i))...)
+ }
+ var a [24]byte
+ j := formatBase10(a[:], i)
+ return append(dst, a[j:]...)
}
dst, _ = formatBits(dst, i, base, false, true)
return dst
}
-// small returns the string for an i with 0 <= i < nSmalls.
-func small(i int) string {
- if i < 10 {
- return digits[i : i+1]
- }
- return smallsString[i*2 : i*2+2]
-}
-
-const nSmalls = 100
-
-const smallsString = "00010203040506070809" +
- "10111213141516171819" +
- "20212223242526272829" +
- "30313233343536373839" +
- "40414243444546474849" +
- "50515253545556575859" +
- "60616263646566676869" +
- "70717273747576777879" +
- "80818283848586878889" +
- "90919293949596979899"
-
-const host32bit = ^uint(0)>>32 == 0
-
const digits = "0123456789abcdefghijklmnopqrstuvwxyz"
// formatBits computes the string representation of u in the given base.
@@ -85,15 +87,15 @@ const digits = "0123456789abcdefghijklmnopqrstuvwxyz"
// set, the string is appended to dst and the resulting byte slice is
// returned as the first result value; otherwise the string is returned
// as the second result value.
+// The caller is expected to have handled base 10 separately for speed.
func formatBits(dst []byte, u uint64, base int, neg, append_ bool) (d []byte, s string) {
- if base < 2 || base > len(digits) {
+ if base < 2 || base == 10 || base > len(digits) {
panic("strconv: illegal AppendInt/FormatInt base")
}
// 2 <= base && base <= len(digits)
var a [64 + 1]byte // +1 for sign of 64bit value in base 2
i := len(a)
-
if neg {
u = -u
}
@@ -101,56 +103,7 @@ func formatBits(dst []byte, u uint64, base int, neg, append_ bool) (d []byte, s
// convert bits
// We use uint values where we can because those will
// fit into a single register even on a 32bit machine.
- if base == 10 {
- // common case: use constants for / because
- // the compiler can optimize it into a multiply+shift
-
- if host32bit {
- // convert the lower digits using 32bit operations
- for u >= 1e9 {
- // Avoid using r = a%b in addition to q = a/b
- // since 64bit division and modulo operations
- // are calculated by runtime functions on 32bit machines.
- q := u / 1e9
- us := uint(u - q*1e9) // u % 1e9 fits into a uint
- for j := 4; j > 0; j-- {
- is := us % 100 * 2
- us /= 100
- i -= 2
- a[i+1] = smallsString[is+1]
- a[i+0] = smallsString[is+0]
- }
-
- // us < 10, since it contains the last digit
- // from the initial 9-digit us.
- i--
- a[i] = smallsString[us*2+1]
-
- u = q
- }
- // u < 1e9
- }
-
- // u guaranteed to fit into a uint
- us := uint(u)
- for us >= 100 {
- is := us % 100 * 2
- us /= 100
- i -= 2
- a[i+1] = smallsString[is+1]
- a[i+0] = smallsString[is+0]
- }
-
- // us < 100
- is := us * 2
- i--
- a[i] = smallsString[is+1]
- if us >= 10 {
- i--
- a[i] = smallsString[is]
- }
-
- } else if isPowerOfTwo(base) {
+ if isPowerOfTwo(base) {
// Use shifts and masks instead of / and %.
shift := uint(bits.TrailingZeros(uint(base)))
b := uint64(base)
@@ -197,3 +150,159 @@ func formatBits(dst []byte, u uint64, base int, neg, append_ bool) (d []byte, s
func isPowerOfTwo(x int) bool {
return x&(x-1) == 0
}
+
+const nSmalls = 100
+
+// smalls is the formatting of 00..99 concatenated.
+// It is then padded out with 56 x's to 256 bytes,
+// so that smalls[x&0xFF] has no bounds check.
+//
+// TODO(rsc): Once the compiler does a better job
+// at tracking mod bounds, the &0xFF should not be needed:
+// go.dev/issue/75954 and go.dev/issue/63110.
+const smalls = "00010203040506070809" +
+ "10111213141516171819" +
+ "20212223242526272829" +
+ "30313233343536373839" +
+ "40414243444546474849" +
+ "50515253545556575859" +
+ "60616263646566676869" +
+ "70717273747576777879" +
+ "80818283848586878889" +
+ "90919293949596979899" +
+ "xxxxxxxxxxxxxxxxxxxx" +
+ "xxxxxxxxxxxxxxxxxxxx" +
+ "xxxxxxxxxxxxxxxxxxxx" +
+ "xxxxxxxxxxxxxxxxxxxx" +
+ "xxxxxxxxxxxxxxxxxxxx" +
+ "xxxxxx"
+
+const host64bit = ^uint(0)>>32 != 0
+
+// small returns the string for an i with 0 <= i < nSmalls.
+func small(i int) string {
+ if i < 10 {
+ return digits[i : i+1]
+ }
+ return smalls[i*2 : i*2+2]
+}
+
+// formatBase10 formats the decimal representation of u into the tail of a
+// and returns the offset of the first byte written to a. That is, after
+//
+// i := formatBase10(a, u)
+//
+// the decimal representation is in a[i:].
+func formatBase10(a []byte, u uint64) int {
+ // Decide implementation strategy based on architecture.
+ const (
+ // 64-bit systems can work in 64-bit math the whole time
+ // or can split the uint64 into uint32-sized chunks.
+ // On most systems, the uint32 math is faster, but not all.
+ // The decision here is based on benchmarking.
+ itoaPure64 = host64bit && runtime.GOARCH != "amd64" && runtime.GOARCH != "arm64" && runtime.GOARCH != "s390x"
+
+ // 64-bit systems can all use 64-bit div and mod by a constant,
+ // which the compiler rewrites to use 64x64→128-bit multiplies.
+ itoaDivMod64 = host64bit // can use 64-bit div/mod by constant
+ )
+
+ if itoaPure64 {
+ // Convert 2 digits at a time, using 64-bit math.
+ i := len(a)
+ u := uint(u)
+ for u >= 100 {
+ var dd uint
+ u, dd = u/100, (u%100)*2
+ i -= 2
+ a[i+0], a[i+1] = smalls[(dd+0)&0xFF], smalls[(dd+1)&0xFF]
+ }
+
+ dd := u * 2
+ i--
+ a[i] = smalls[(dd+1)&0xFF]
+ if u >= 10 {
+ i--
+ a[i] = smalls[(dd+0)&0xFF]
+ }
+ return i
+ }
+
+ // Convert 9-digit chunks using 32-bit math.
+ // Most numbers are small, so the comparison u >= 1e9 is usually pure overhead,
+ // so we approximate it by u>>29 != 0, which is usually faster and good enough.
+ i := len(a)
+ for (host64bit && u>>29 != 0) || (!host64bit && (u>>32 != 0 || uint32(u)>>29 != 0)) {
+ var lo uint32
+ if itoaDivMod64 {
+ u, lo = u/1e9, uint32(u%1e9)
+ } else {
+ // On 64-bit systems the compiler rewrites the div and mod above
+ // into a 64x64→128-bit multiply (https://godbolt.org/z/EPnK8zvMK):
+ // hi, _ := bits.Mul64(u>>1, 0x89705f4136b4a598)
+ // q := hi >> 28
+ // lo = uint32(u - q*1e9)
+ // u = q
+ // On 32-bit systems, the compiler invokes a uint64 software divide,
+ // which is quite slow. We could write the bits.Mul64 code above
+ // but even that is slower than we'd like, since it calls a software mul64
+ // instead of having a hardware instruction to use.
+ // Instead we inline bits.Mul64 here and change y0/y1 to constants.
+ // The compiler does use direct 32x32→64-bit multiplies for this code.
+ //
+ // For lots more about division by multiplication see Warren, _Hacker's Delight_.
+ // For a concise overview, see the first two sections of
+ // https://ridiculousfish.com/blog/posts/labor-of-division-episode-iii.html.
+ const mask32 = 1<<32 - 1
+ x0 := ((u >> 1) & mask32)
+ x1 := (u >> 1) >> 32
+ const y0 = 0x36b4a598
+ const y1 = 0x89705f41
+ w0 := x0 * y0
+ t := x1*y0 + w0>>32
+ w1 := t & mask32
+ w2 := t >> 32
+ w1 += x0 * y1
+ hi := x1*y1 + w2 + w1>>32
+ q := hi >> 28
+
+ lo = uint32(u) - uint32(q)*1e9 // uint32(u - q*1e9) but faster
+ u = q
+ }
+
+ // Convert 9 digits.
+ for range 4 {
+ var dd uint32
+ lo, dd = lo/100, (lo%100)*2
+ i -= 2
+ a[i+0], a[i+1] = smalls[(dd+0)&0xFF], smalls[(dd+1)&0xFF]
+ }
+ i--
+ a[i] = smalls[(lo*2+1)&0xFF]
+
+ // If we'd been using u >= 1e9 then we would be guaranteed that u/1e9 > 0,
+ // but since we used u>>29 != 0, u/1e9 might be 0, so we might be done.
+ // (If u is now 0, then at the start we had 2²⁹ ≤ u < 10⁹, so it was still correct
+ // to write 9 digits; we have not accidentally written any leading zeros.)
+ if u == 0 {
+ return i
+ }
+ }
+
+ // Convert final chunk, at most 8 digits.
+ lo := uint32(u)
+ for lo >= 100 {
+ var dd uint32
+ lo, dd = lo/100, (lo%100)*2
+ i -= 2
+ a[i+0], a[i+1] = smalls[(dd+0)&0xFF], smalls[(dd+1)&0xFF]
+ }
+ i--
+ dd := lo * 2
+ a[i] = smalls[(dd+1)&0xFF]
+ if lo >= 10 {
+ i--
+ a[i] = smalls[(dd+0)&0xFF]
+ }
+ return i
+}