diff options
Diffstat (limited to 'src')
| -rw-r--r-- | src/strconv/ftoa.go | 4 | ||||
| -rw-r--r-- | src/strconv/ftoaryu.go | 31 | ||||
| -rw-r--r-- | src/strconv/itoa.go | 285 |
3 files changed, 204 insertions, 116 deletions
diff --git a/src/strconv/ftoa.go b/src/strconv/ftoa.go index acbcb9fcc9..1d4bf770be 100644 --- a/src/strconv/ftoa.go +++ b/src/strconv/ftoa.go @@ -483,7 +483,7 @@ func fmtB(dst []byte, neg bool, mant uint64, exp int, flt *floatInfo) []byte { } // mantissa - dst, _ = formatBits(dst, mant, 10, false, true) + dst = AppendUint(dst, mant, 10) // p dst = append(dst, 'p') @@ -493,7 +493,7 @@ func fmtB(dst []byte, neg bool, mant uint64, exp int, flt *floatInfo) []byte { if exp >= 0 { dst = append(dst, '+') } - dst, _ = formatBits(dst, uint64(exp), 10, exp < 0, true) + dst = AppendInt(dst, int64(exp), 10) return dst } diff --git a/src/strconv/ftoaryu.go b/src/strconv/ftoaryu.go index c1aaa250f3..9349df955f 100644 --- a/src/strconv/ftoaryu.go +++ b/src/strconv/ftoaryu.go @@ -192,30 +192,9 @@ func formatDecimal(d *decimalSlice, m uint64, trunc bool, roundUp bool, prec int m /= 10 trimmed++ } - // render digits (similar to formatBits) - n := uint(prec) + // render digits + formatBase10(d.d[:prec], m) d.nd = prec - v := m - for v >= 100 { - var v1, v2 uint64 - if v>>32 == 0 { - v1, v2 = uint64(uint32(v)/100), uint64(uint32(v)%100) - } else { - v1, v2 = v/100, v%100 - } - n -= 2 - d.d[n+1] = smallsString[2*v2+1] - d.d[n+0] = smallsString[2*v2+0] - v = v1 - } - if v > 0 { - n-- - d.d[n] = smallsString[2*v+1] - } - if v >= 10 { - n-- - d.d[n] = smallsString[2*v] - } for d.d[d.nd-1] == '0' { d.nd-- trimmed++ @@ -448,8 +427,8 @@ func ryuDigits32(d *decimalSlice, lower, central, upper uint32, n := endindex for n > d.nd { v1, v2 := v/100, v%100 - d.d[n] = smallsString[2*v2+1] - d.d[n-1] = smallsString[2*v2+0] + d.d[n] = smalls[2*v2+1] + d.d[n-1] = smalls[2*v2+0] n -= 2 v = v1 } @@ -535,7 +514,7 @@ func divisibleByPower5(m uint64, k int) bool { // divmod1e9 computes quotient and remainder of division by 1e9, // avoiding runtime uint64 division on 32-bit platforms. func divmod1e9(x uint64) (uint32, uint32) { - if !host32bit { + if host64bit { return uint32(x / 1e9), uint32(x % 1e9) } // Use the same sequence of operations as the amd64 compiler. diff --git a/src/strconv/itoa.go b/src/strconv/itoa.go index 928b37ffa6..7884e8e987 100644 --- a/src/strconv/itoa.go +++ b/src/strconv/itoa.go @@ -4,16 +4,22 @@ package strconv -import "math/bits" - -const fastSmalls = true // enable fast path for small integers +import ( + "math/bits" + "runtime" +) // FormatUint returns the string representation of i in the given base, // for 2 <= base <= 36. The result uses the lower-case letters 'a' to 'z' // for digit values >= 10. func FormatUint(i uint64, base int) string { - if fastSmalls && i < nSmalls && base == 10 { - return small(int(i)) + if base == 10 { + if i < nSmalls { + return small(int(i)) + } + var a [24]byte + j := formatBase10(a[:], i) + return string(a[j:]) } _, s := formatBits(nil, i, base, false, false) return s @@ -23,8 +29,21 @@ func FormatUint(i uint64, base int) string { // for 2 <= base <= 36. The result uses the lower-case letters 'a' to 'z' // for digit values >= 10. func FormatInt(i int64, base int) string { - if fastSmalls && 0 <= i && i < nSmalls && base == 10 { - return small(int(i)) + if base == 10 { + if 0 <= i && i < nSmalls { + return small(int(i)) + } + var a [24]byte + u := uint64(i) + if i < 0 { + u = -u + } + j := formatBase10(a[:], u) + if i < 0 { + j-- + a[j] = '-' + } + return string(a[j:]) } _, s := formatBits(nil, uint64(i), base, i < 0, false) return s @@ -38,46 +57,29 @@ func Itoa(i int) string { // AppendInt appends the string form of the integer i, // as generated by [FormatInt], to dst and returns the extended buffer. func AppendInt(dst []byte, i int64, base int) []byte { - if fastSmalls && 0 <= i && i < nSmalls && base == 10 { - return append(dst, small(int(i))...) + u := uint64(i) + if i < 0 { + dst = append(dst, '-') + u = -u } - dst, _ = formatBits(dst, uint64(i), base, i < 0, true) - return dst + return AppendUint(dst, u, base) } // AppendUint appends the string form of the unsigned integer i, // as generated by [FormatUint], to dst and returns the extended buffer. func AppendUint(dst []byte, i uint64, base int) []byte { - if fastSmalls && i < nSmalls && base == 10 { - return append(dst, small(int(i))...) + if base == 10 { + if i < nSmalls { + return append(dst, small(int(i))...) + } + var a [24]byte + j := formatBase10(a[:], i) + return append(dst, a[j:]...) } dst, _ = formatBits(dst, i, base, false, true) return dst } -// small returns the string for an i with 0 <= i < nSmalls. -func small(i int) string { - if i < 10 { - return digits[i : i+1] - } - return smallsString[i*2 : i*2+2] -} - -const nSmalls = 100 - -const smallsString = "00010203040506070809" + - "10111213141516171819" + - "20212223242526272829" + - "30313233343536373839" + - "40414243444546474849" + - "50515253545556575859" + - "60616263646566676869" + - "70717273747576777879" + - "80818283848586878889" + - "90919293949596979899" - -const host32bit = ^uint(0)>>32 == 0 - const digits = "0123456789abcdefghijklmnopqrstuvwxyz" // formatBits computes the string representation of u in the given base. @@ -85,15 +87,15 @@ const digits = "0123456789abcdefghijklmnopqrstuvwxyz" // set, the string is appended to dst and the resulting byte slice is // returned as the first result value; otherwise the string is returned // as the second result value. +// The caller is expected to have handled base 10 separately for speed. func formatBits(dst []byte, u uint64, base int, neg, append_ bool) (d []byte, s string) { - if base < 2 || base > len(digits) { + if base < 2 || base == 10 || base > len(digits) { panic("strconv: illegal AppendInt/FormatInt base") } // 2 <= base && base <= len(digits) var a [64 + 1]byte // +1 for sign of 64bit value in base 2 i := len(a) - if neg { u = -u } @@ -101,56 +103,7 @@ func formatBits(dst []byte, u uint64, base int, neg, append_ bool) (d []byte, s // convert bits // We use uint values where we can because those will // fit into a single register even on a 32bit machine. - if base == 10 { - // common case: use constants for / because - // the compiler can optimize it into a multiply+shift - - if host32bit { - // convert the lower digits using 32bit operations - for u >= 1e9 { - // Avoid using r = a%b in addition to q = a/b - // since 64bit division and modulo operations - // are calculated by runtime functions on 32bit machines. - q := u / 1e9 - us := uint(u - q*1e9) // u % 1e9 fits into a uint - for j := 4; j > 0; j-- { - is := us % 100 * 2 - us /= 100 - i -= 2 - a[i+1] = smallsString[is+1] - a[i+0] = smallsString[is+0] - } - - // us < 10, since it contains the last digit - // from the initial 9-digit us. - i-- - a[i] = smallsString[us*2+1] - - u = q - } - // u < 1e9 - } - - // u guaranteed to fit into a uint - us := uint(u) - for us >= 100 { - is := us % 100 * 2 - us /= 100 - i -= 2 - a[i+1] = smallsString[is+1] - a[i+0] = smallsString[is+0] - } - - // us < 100 - is := us * 2 - i-- - a[i] = smallsString[is+1] - if us >= 10 { - i-- - a[i] = smallsString[is] - } - - } else if isPowerOfTwo(base) { + if isPowerOfTwo(base) { // Use shifts and masks instead of / and %. shift := uint(bits.TrailingZeros(uint(base))) b := uint64(base) @@ -197,3 +150,159 @@ func formatBits(dst []byte, u uint64, base int, neg, append_ bool) (d []byte, s func isPowerOfTwo(x int) bool { return x&(x-1) == 0 } + +const nSmalls = 100 + +// smalls is the formatting of 00..99 concatenated. +// It is then padded out with 56 x's to 256 bytes, +// so that smalls[x&0xFF] has no bounds check. +// +// TODO(rsc): Once the compiler does a better job +// at tracking mod bounds, the &0xFF should not be needed: +// go.dev/issue/75954 and go.dev/issue/63110. +const smalls = "00010203040506070809" + + "10111213141516171819" + + "20212223242526272829" + + "30313233343536373839" + + "40414243444546474849" + + "50515253545556575859" + + "60616263646566676869" + + "70717273747576777879" + + "80818283848586878889" + + "90919293949596979899" + + "xxxxxxxxxxxxxxxxxxxx" + + "xxxxxxxxxxxxxxxxxxxx" + + "xxxxxxxxxxxxxxxxxxxx" + + "xxxxxxxxxxxxxxxxxxxx" + + "xxxxxxxxxxxxxxxxxxxx" + + "xxxxxx" + +const host64bit = ^uint(0)>>32 != 0 + +// small returns the string for an i with 0 <= i < nSmalls. +func small(i int) string { + if i < 10 { + return digits[i : i+1] + } + return smalls[i*2 : i*2+2] +} + +// formatBase10 formats the decimal representation of u into the tail of a +// and returns the offset of the first byte written to a. That is, after +// +// i := formatBase10(a, u) +// +// the decimal representation is in a[i:]. +func formatBase10(a []byte, u uint64) int { + // Decide implementation strategy based on architecture. + const ( + // 64-bit systems can work in 64-bit math the whole time + // or can split the uint64 into uint32-sized chunks. + // On most systems, the uint32 math is faster, but not all. + // The decision here is based on benchmarking. + itoaPure64 = host64bit && runtime.GOARCH != "amd64" && runtime.GOARCH != "arm64" && runtime.GOARCH != "s390x" + + // 64-bit systems can all use 64-bit div and mod by a constant, + // which the compiler rewrites to use 64x64→128-bit multiplies. + itoaDivMod64 = host64bit // can use 64-bit div/mod by constant + ) + + if itoaPure64 { + // Convert 2 digits at a time, using 64-bit math. + i := len(a) + u := uint(u) + for u >= 100 { + var dd uint + u, dd = u/100, (u%100)*2 + i -= 2 + a[i+0], a[i+1] = smalls[(dd+0)&0xFF], smalls[(dd+1)&0xFF] + } + + dd := u * 2 + i-- + a[i] = smalls[(dd+1)&0xFF] + if u >= 10 { + i-- + a[i] = smalls[(dd+0)&0xFF] + } + return i + } + + // Convert 9-digit chunks using 32-bit math. + // Most numbers are small, so the comparison u >= 1e9 is usually pure overhead, + // so we approximate it by u>>29 != 0, which is usually faster and good enough. + i := len(a) + for (host64bit && u>>29 != 0) || (!host64bit && (u>>32 != 0 || uint32(u)>>29 != 0)) { + var lo uint32 + if itoaDivMod64 { + u, lo = u/1e9, uint32(u%1e9) + } else { + // On 64-bit systems the compiler rewrites the div and mod above + // into a 64x64→128-bit multiply (https://godbolt.org/z/EPnK8zvMK): + // hi, _ := bits.Mul64(u>>1, 0x89705f4136b4a598) + // q := hi >> 28 + // lo = uint32(u - q*1e9) + // u = q + // On 32-bit systems, the compiler invokes a uint64 software divide, + // which is quite slow. We could write the bits.Mul64 code above + // but even that is slower than we'd like, since it calls a software mul64 + // instead of having a hardware instruction to use. + // Instead we inline bits.Mul64 here and change y0/y1 to constants. + // The compiler does use direct 32x32→64-bit multiplies for this code. + // + // For lots more about division by multiplication see Warren, _Hacker's Delight_. + // For a concise overview, see the first two sections of + // https://ridiculousfish.com/blog/posts/labor-of-division-episode-iii.html. + const mask32 = 1<<32 - 1 + x0 := ((u >> 1) & mask32) + x1 := (u >> 1) >> 32 + const y0 = 0x36b4a598 + const y1 = 0x89705f41 + w0 := x0 * y0 + t := x1*y0 + w0>>32 + w1 := t & mask32 + w2 := t >> 32 + w1 += x0 * y1 + hi := x1*y1 + w2 + w1>>32 + q := hi >> 28 + + lo = uint32(u) - uint32(q)*1e9 // uint32(u - q*1e9) but faster + u = q + } + + // Convert 9 digits. + for range 4 { + var dd uint32 + lo, dd = lo/100, (lo%100)*2 + i -= 2 + a[i+0], a[i+1] = smalls[(dd+0)&0xFF], smalls[(dd+1)&0xFF] + } + i-- + a[i] = smalls[(lo*2+1)&0xFF] + + // If we'd been using u >= 1e9 then we would be guaranteed that u/1e9 > 0, + // but since we used u>>29 != 0, u/1e9 might be 0, so we might be done. + // (If u is now 0, then at the start we had 2²⁹ ≤ u < 10⁹, so it was still correct + // to write 9 digits; we have not accidentally written any leading zeros.) + if u == 0 { + return i + } + } + + // Convert final chunk, at most 8 digits. + lo := uint32(u) + for lo >= 100 { + var dd uint32 + lo, dd = lo/100, (lo%100)*2 + i -= 2 + a[i+0], a[i+1] = smalls[(dd+0)&0xFF], smalls[(dd+1)&0xFF] + } + i-- + dd := lo * 2 + a[i] = smalls[(dd+1)&0xFF] + if lo >= 10 { + i-- + a[i] = smalls[(dd+0)&0xFF] + } + return i +} |
