aboutsummaryrefslogtreecommitdiff
path: root/src/math
diff options
context:
space:
mode:
authorRick Hudson <rlh@golang.org>2016-04-27 18:19:16 -0400
committerRick Hudson <rlh@golang.org>2016-04-27 18:46:52 -0400
commit23aeb34df172b17b7bfaa85fb59ca64bef9073bb (patch)
treea8ab866f1e50f0059856ce628f036d93ab620155 /src/math
parent1354b32cd70f2702381764fd595dd2faa996840c (diff)
parentd3c79d324acd7300b6f705e66af8ca711af00d9f (diff)
downloadgo-23aeb34df172b17b7bfaa85fb59ca64bef9073bb.tar.xz
[dev.garbage] Merge remote-tracking branch 'origin/master' into HEAD
Change-Id: I282fd9ce9db435dfd35e882a9502ab1abc185297
Diffstat (limited to 'src/math')
-rw-r--r--src/math/big/arith_s390x.s565
-rw-r--r--src/math/big/float.go4
-rw-r--r--src/math/big/floatmarsh.go89
-rw-r--r--src/math/big/floatmarsh_test.go82
-rw-r--r--src/math/big/gcd_test.go16
-rw-r--r--src/math/big/int.go4
-rw-r--r--src/math/big/nat.go29
-rw-r--r--src/math/big/natconv.go2
-rw-r--r--src/math/big/ratconv.go2
-rw-r--r--src/math/dim_s390x.s132
-rw-r--r--src/math/sqrt_s390x.s12
-rw-r--r--src/math/stubs_s390x.s77
12 files changed, 1003 insertions, 11 deletions
diff --git a/src/math/big/arith_s390x.s b/src/math/big/arith_s390x.s
new file mode 100644
index 0000000000..a691970810
--- /dev/null
+++ b/src/math/big/arith_s390x.s
@@ -0,0 +1,565 @@
+// Copyright 2016 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// +build !math_big_pure_go,s390x
+
+#include "textflag.h"
+
+// This file provides fast assembly versions for the elementary
+// arithmetic operations on vectors implemented in arith.go.
+
+TEXT ·mulWW(SB),NOSPLIT,$0
+ MOVD x+0(FP), R3
+ MOVD y+8(FP), R4
+ MULHDU R3, R4
+ MOVD R10, z1+16(FP)
+ MOVD R11, z0+24(FP)
+ RET
+
+// func divWW(x1, x0, y Word) (q, r Word)
+TEXT ·divWW(SB),NOSPLIT,$0
+ MOVD x1+0(FP), R10
+ MOVD x0+8(FP), R11
+ MOVD y+16(FP), R5
+ WORD $0xb98700a5 // dlgr r10,r5
+ MOVD R11, q+24(FP)
+ MOVD R10, r+32(FP)
+ RET
+
+// DI = R3, CX = R4, SI = r10, r8 = r8, r9=r9, r10 = r2 , r11 = r5, r12 = r6, r13 = r7, r14 = r1 (R0 set to 0) + use R11
+// func addVV(z, x, y []Word) (c Word)
+TEXT ·addVV(SB),NOSPLIT,$0
+ MOVD z_len+8(FP), R3
+ MOVD x+24(FP), R8
+ MOVD y+48(FP), R9
+ MOVD z+0(FP), R2
+
+ MOVD $0, R4 // c = 0
+ MOVD $0, R0 // make sure it's zero
+ MOVD $0, R10 // i = 0
+
+ // s/JL/JMP/ below to disable the unrolled loop
+ SUB $4, R3 // n -= 4
+ BLT v1 // if n < 0 goto v1
+
+U1: // n >= 0
+ // regular loop body unrolled 4x
+ MOVD 0(R8)(R10*1), R5
+ MOVD 8(R8)(R10*1), R6
+ MOVD 16(R8)(R10*1), R7
+ MOVD 24(R8)(R10*1), R1
+ ADDC R4, R4 // restore CF
+ MOVD 0(R9)(R10*1), R11
+ ADDE R11, R5
+ MOVD 8(R9)(R10*1), R11
+ ADDE R11, R6
+ MOVD 16(R9)(R10*1), R11
+ ADDE R11, R7
+ MOVD 24(R9)(R10*1), R11
+ ADDE R11, R1
+ MOVD R0, R4
+ ADDE R4, R4 // save CF
+ NEG R4, R4
+ MOVD R5, 0(R2)(R10*1)
+ MOVD R6, 8(R2)(R10*1)
+ MOVD R7, 16(R2)(R10*1)
+ MOVD R1, 24(R2)(R10*1)
+
+
+ ADD $32, R10 // i += 4
+ SUB $4, R3 // n -= 4
+ BGE U1 // if n >= 0 goto U1
+
+v1: ADD $4, R3 // n += 4
+ BLE E1 // if n <= 0 goto E1
+
+L1: // n > 0
+ ADDC R4, R4 // restore CF
+ MOVD 0(R8)(R10*1), R5
+ MOVD 0(R9)(R10*1), R11
+ ADDE R11, R5
+ MOVD R5, 0(R2)(R10*1)
+ MOVD R0, R4
+ ADDE R4, R4 // save CF
+ NEG R4, R4
+
+ ADD $8, R10 // i++
+ SUB $1, R3 // n--
+ BGT L1 // if n > 0 goto L1
+
+E1: NEG R4, R4
+ MOVD R4, c+72(FP) // return c
+ RET
+
+// DI = R3, CX = R4, SI = r10, r8 = r8, r9=r9, r10 = r2 , r11 = r5, r12 = r6, r13 = r7, r14 = r1 (R0 set to 0) + use R11
+// func subVV(z, x, y []Word) (c Word)
+// (same as addVV except for SUBC/SUBE instead of ADDC/ADDE and label names)
+TEXT ·subVV(SB),NOSPLIT,$0
+ MOVD z_len+8(FP), R3
+ MOVD x+24(FP), R8
+ MOVD y+48(FP), R9
+ MOVD z+0(FP), R2
+
+ MOVD $0, R4 // c = 0
+ MOVD $0, R0 // make sure it's zero
+ MOVD $0, R10 // i = 0
+
+ // s/JL/JMP/ below to disable the unrolled loop
+ SUB $4, R3 // n -= 4
+ BLT v1 // if n < 0 goto v1
+
+U1: // n >= 0
+ // regular loop body unrolled 4x
+ MOVD 0(R8)(R10*1), R5
+ MOVD 8(R8)(R10*1), R6
+ MOVD 16(R8)(R10*1), R7
+ MOVD 24(R8)(R10*1), R1
+ MOVD R0, R11
+ SUBC R4, R11 // restore CF
+ MOVD 0(R9)(R10*1), R11
+ SUBE R11, R5
+ MOVD 8(R9)(R10*1), R11
+ SUBE R11, R6
+ MOVD 16(R9)(R10*1), R11
+ SUBE R11, R7
+ MOVD 24(R9)(R10*1), R11
+ SUBE R11, R1
+ MOVD R0, R4
+ SUBE R4, R4 // save CF
+ MOVD R5, 0(R2)(R10*1)
+ MOVD R6, 8(R2)(R10*1)
+ MOVD R7, 16(R2)(R10*1)
+ MOVD R1, 24(R2)(R10*1)
+
+
+ ADD $32, R10 // i += 4
+ SUB $4, R3 // n -= 4
+ BGE U1 // if n >= 0 goto U1
+
+v1: ADD $4, R3 // n += 4
+ BLE E1 // if n <= 0 goto E1
+
+L1: // n > 0
+ MOVD R0, R11
+ SUBC R4, R11 // restore CF
+ MOVD 0(R8)(R10*1), R5
+ MOVD 0(R9)(R10*1), R11
+ SUBE R11, R5
+ MOVD R5, 0(R2)(R10*1)
+ MOVD R0, R4
+ SUBE R4, R4 // save CF
+
+ ADD $8, R10 // i++
+ SUB $1, R3 // n--
+ BGT L1 // if n > 0 goto L1
+
+E1: NEG R4, R4
+ MOVD R4, c+72(FP) // return c
+ RET
+
+
+// func addVW(z, x []Word, y Word) (c Word)
+TEXT ·addVW(SB),NOSPLIT,$0
+//DI = R3, CX = R4, SI = r10, r8 = r8, r10 = r2 , r11 = r5, r12 = r6, r13 = r7, r14 = r1 (R0 set to 0)
+ MOVD z_len+8(FP), R3
+ MOVD x+24(FP), R8
+ MOVD y+48(FP), R4 // c = y
+ MOVD z+0(FP), R2
+ MOVD $0, R0 // make sure it's 0
+ MOVD $0, R10 // i = 0
+
+ // s/JL/JMP/ below to disable the unrolled loop
+ SUB $4, R3 // n -= 4
+ BLT v4 // if n < 4 goto v4
+
+U4: // n >= 0
+ // regular loop body unrolled 4x
+ MOVD 0(R8)(R10*1), R5
+ MOVD 8(R8)(R10*1), R6
+ MOVD 16(R8)(R10*1), R7
+ MOVD 24(R8)(R10*1), R1
+ ADDC R4, R5
+ ADDE R0, R6
+ ADDE R0, R7
+ ADDE R0, R1
+ ADDE R0, R0
+ MOVD R0, R4 // save CF
+ SUB R0, R0
+ MOVD R5, 0(R2)(R10*1)
+ MOVD R6, 8(R2)(R10*1)
+ MOVD R7, 16(R2)(R10*1)
+ MOVD R1, 24(R2)(R10*1)
+
+ ADD $32, R10 // i += 4 -> i +=32
+ SUB $4, R3 // n -= 4
+ BGE U4 // if n >= 0 goto U4
+
+v4: ADD $4, R3 // n += 4
+ BLE E4 // if n <= 0 goto E4
+
+L4: // n > 0
+ MOVD 0(R8)(R10*1), R5
+ ADDC R4, R5
+ ADDE R0, R0
+ MOVD R0, R4 // save CF
+ SUB R0, R0
+ MOVD R5, 0(R2)(R10*1)
+
+ ADD $8, R10 // i++
+ SUB $1, R3 // n--
+ BGT L4 // if n > 0 goto L4
+
+E4: MOVD R4, c+56(FP) // return c
+
+ RET
+
+//DI = R3, CX = R4, SI = r10, r8 = r8, r10 = r2 , r11 = r5, r12 = r6, r13 = r7, r14 = r1 (R0 set to 0)
+// func subVW(z, x []Word, y Word) (c Word)
+// (same as addVW except for SUBC/SUBE instead of ADDC/ADDE and label names)
+TEXT ·subVW(SB),NOSPLIT,$0
+ MOVD z_len+8(FP), R3
+ MOVD x+24(FP), R8
+ MOVD y+48(FP), R4 // c = y
+ MOVD z+0(FP), R2
+ MOVD $0, R0 // make sure it's 0
+ MOVD $0, R10 // i = 0
+
+ // s/JL/JMP/ below to disable the unrolled loop
+ SUB $4, R3 // n -= 4
+ BLT v4 // if n < 4 goto v4
+
+U4: // n >= 0
+ // regular loop body unrolled 4x
+ MOVD 0(R8)(R10*1), R5
+ MOVD 8(R8)(R10*1), R6
+ MOVD 16(R8)(R10*1), R7
+ MOVD 24(R8)(R10*1), R1
+ SUBC R4, R5 //SLGR -> SUBC
+ SUBE R0, R6 //SLBGR -> SUBE
+ SUBE R0, R7
+ SUBE R0, R1
+ SUBE R4, R4 // save CF
+ NEG R4, R4
+ MOVD R5, 0(R2)(R10*1)
+ MOVD R6, 8(R2)(R10*1)
+ MOVD R7, 16(R2)(R10*1)
+ MOVD R1, 24(R2)(R10*1)
+
+ ADD $32, R10 // i += 4 -> i +=32
+ SUB $4, R3 // n -= 4
+ BGE U4 // if n >= 0 goto U4
+
+v4: ADD $4, R3 // n += 4
+ BLE E4 // if n <= 0 goto E4
+
+L4: // n > 0
+ MOVD 0(R8)(R10*1), R5
+ SUBC R4, R5
+ SUBE R4, R4 // save CF
+ NEG R4, R4
+ MOVD R5, 0(R2)(R10*1)
+
+ ADD $8, R10 // i++
+ SUB $1, R3 // n--
+ BGT L4 // if n > 0 goto L4
+
+E4: MOVD R4, c+56(FP) // return c
+
+ RET
+
+// func shlVU(z, x []Word, s uint) (c Word)
+TEXT ·shlVU(SB),NOSPLIT,$0
+ MOVD z_len+8(FP), R5
+ SUB $1, R5 // n--
+ BLT X8b // n < 0 (n <= 0)
+
+ // n > 0
+ MOVD s+48(FP), R4
+ CMPBEQ R0, R4, Z80 //handle 0 case beq
+ MOVD $64, R6
+ CMPBEQ R6, R4, Z864 //handle 64 case beq
+ MOVD z+0(FP), R2
+ MOVD x+24(FP), R8
+ SLD $3, R5 // n = n*8
+ SUB R4, R6, R7
+ MOVD (R8)(R5*1), R10 // w1 = x[i-1]
+ SRD R7, R10, R3
+ MOVD R3, c+56(FP)
+
+ MOVD $0, R1 // i = 0
+ BR E8
+
+ // i < n-1
+L8: MOVD R10, R3 // w = w1
+ MOVD -8(R8)(R5*1), R10 // w1 = x[i+1]
+
+ SLD R4, R3 // w<<s | w1>>ŝ
+ SRD R7, R10, R6
+ OR R6, R3
+ MOVD R3, (R2)(R5*1) // z[i] = w<<s | w1>>ŝ
+ SUB $8, R5 // i--
+
+E8: CMPBGT R5, R0, L8 // i < n-1
+
+ // i >= n-1
+X8a: SLD R4, R10 // w1<<s
+ MOVD R10, (R2) // z[0] = w1<<s
+ RET
+
+X8b: MOVD R0, c+56(FP)
+ RET
+
+Z80: MOVD z+0(FP), R2
+ MOVD x+24(FP), R8
+ SLD $3, R5 // n = n*8
+
+ MOVD (R8), R10
+ MOVD $0, R3
+ MOVD R3, c+56(FP)
+
+ MOVD $0, R1 // i = 0
+ BR E8Z
+
+ // i < n-1
+L8Z: MOVD R10, R3
+ MOVD 8(R8)(R1*1), R10
+
+ MOVD R3, (R2)(R1*1)
+ ADD $8, R1
+
+E8Z: CMPBLT R1, R5, L8Z
+
+ // i >= n-1
+ MOVD R10, (R2)(R5*1)
+ RET
+
+Z864: MOVD z+0(FP), R2
+ MOVD x+24(FP), R8
+ SLD $3, R5 // n = n*8
+ MOVD (R8)(R5*1), R3 // w1 = x[n-1]
+ MOVD R3, c+56(FP) // z[i] = x[n-1]
+
+ BR E864
+
+ // i < n-1
+L864: MOVD -8(R8)(R5*1), R3
+
+ MOVD R3, (R2)(R5*1) // z[i] = x[n-1]
+ SUB $8, R5 // i--
+
+E864: CMPBGT R5, R0, L864 // i < n-1
+
+ MOVD R0, (R2) // z[n-1] = 0
+ RET
+
+
+// CX = R4, r8 = r8, r10 = r2 , r11 = r5, DX = r3, AX = r10 , BX = R1 , 64-count = r7 (R0 set to 0) temp = R6
+// func shrVU(z, x []Word, s uint) (c Word)
+TEXT ·shrVU(SB),NOSPLIT,$0
+ MOVD z_len+8(FP), R5
+ SUB $1, R5 // n--
+ BLT X9b // n < 0 (n <= 0)
+
+ // n > 0
+ MOVD s+48(FP), R4
+ CMPBEQ R0, R4, ZB0 //handle 0 case beq
+ MOVD $64, R6
+ CMPBEQ R6, R4, ZB64 //handle 64 case beq
+ MOVD z+0(FP), R2
+ MOVD x+24(FP), R8
+ SLD $3, R5 // n = n*8
+ SUB R4, R6, R7
+ MOVD (R8), R10 // w1 = x[0]
+ SLD R7, R10, R3
+ MOVD R3, c+56(FP)
+
+ MOVD $0, R1 // i = 0
+ BR E9
+
+ // i < n-1
+L9: MOVD R10, R3 // w = w1
+ MOVD 8(R8)(R1*1), R10 // w1 = x[i+1]
+
+ SRD R4, R3 // w>>s | w1<<s
+ SLD R7, R10, R6
+ OR R6, R3
+ MOVD R3, (R2)(R1*1) // z[i] = w>>s | w1<<s
+ ADD $8, R1 // i++
+
+E9: CMPBLT R1, R5, L9 // i < n-1
+
+ // i >= n-1
+X9a: SRD R4, R10 // w1>>s
+ MOVD R10, (R2)(R5*1) // z[n-1] = w1>>s
+ RET
+
+X9b: MOVD R0, c+56(FP)
+ RET
+
+ZB0: MOVD z+0(FP), R2
+ MOVD x+24(FP), R8
+ SLD $3, R5 // n = n*8
+
+ MOVD (R8), R10 // w1 = x[0]
+ MOVD $0, R3 // R10 << 64
+ MOVD R3, c+56(FP)
+
+ MOVD $0, R1 // i = 0
+ BR E9Z
+
+ // i < n-1
+L9Z: MOVD R10, R3 // w = w1
+ MOVD 8(R8)(R1*1), R10 // w1 = x[i+1]
+
+ MOVD R3, (R2)(R1*1) // z[i] = w>>s | w1<<s
+ ADD $8, R1 // i++
+
+E9Z: CMPBLT R1, R5, L9Z // i < n-1
+
+ // i >= n-1
+ MOVD R10, (R2)(R5*1) // z[n-1] = w1>>s
+ RET
+
+ZB64: MOVD z+0(FP), R2
+ MOVD x+24(FP), R8
+ SLD $3, R5 // n = n*8
+ MOVD (R8), R3 // w1 = x[0]
+ MOVD R3, c+56(FP)
+
+ MOVD $0, R1 // i = 0
+ BR E964
+
+ // i < n-1
+L964: MOVD 8(R8)(R1*1), R3 // w1 = x[i+1]
+
+ MOVD R3, (R2)(R1*1) // z[i] = w>>s | w1<<s
+ ADD $8, R1 // i++
+
+E964: CMPBLT R1, R5, L964 // i < n-1
+
+ // i >= n-1
+ MOVD $0, R10 // w1>>s
+ MOVD R10, (R2)(R5*1) // z[n-1] = w1>>s
+ RET
+
+// CX = R4, r8 = r8, r9=r9, r10 = r2 , r11 = r5, DX = r3, AX = r6 , BX = R1 , (R0 set to 0) + use R11 + use R7 for i
+// func mulAddVWW(z, x []Word, y, r Word) (c Word)
+TEXT ·mulAddVWW(SB),NOSPLIT,$0
+ MOVD z+0(FP), R2
+ MOVD x+24(FP), R8
+ MOVD y+48(FP), R9
+ MOVD r+56(FP), R4 // c = r
+ MOVD z_len+8(FP), R5
+ MOVD $0, R1 // i = 0
+ MOVD $0, R7 // i*8 = 0
+ MOVD $0, R0 // make sure it's zero
+ BR E5
+
+L5: MOVD (R8)(R1*1), R6
+ MULHDU R9, R6
+ ADDC R4, R11 //add to low order bits
+ ADDE R0, R6
+ MOVD R11, (R2)(R1*1)
+ MOVD R6, R4
+ ADD $8, R1 // i*8 + 8
+ ADD $1, R7 // i++
+
+E5: CMPBLT R7, R5, L5 // i < n
+
+ MOVD R4, c+64(FP)
+ RET
+
+// func addMulVVW(z, x []Word, y Word) (c Word)
+// CX = R4, r8 = r8, r9=r9, r10 = r2 , r11 = r5, AX = r11, DX = R6, r12=r12, BX = R1 , (R0 set to 0) + use R11 + use R7 for i
+TEXT ·addMulVVW(SB),NOSPLIT,$0
+ MOVD z+0(FP), R2
+ MOVD x+24(FP), R8
+ MOVD y+48(FP), R9
+ MOVD z_len+8(FP), R5
+
+ MOVD $0, R1 // i*8 = 0
+ MOVD $0, R7 // i = 0
+ MOVD $0, R0 // make sure it's zero
+ MOVD $0, R4 // c = 0
+
+ MOVD R5, R12
+ AND $-2, R12
+ CMPBGE R5, $2, A6
+ BR E6
+
+A6: MOVD (R8)(R1*1), R6
+ MULHDU R9, R6
+ MOVD (R2)(R1*1), R10
+ ADDC R10, R11 //add to low order bits
+ ADDE R0, R6
+ ADDC R4, R11
+ ADDE R0, R6
+ MOVD R6, R4
+ MOVD R11, (R2)(R1*1)
+
+ MOVD (8)(R8)(R1*1), R6
+ MULHDU R9, R6
+ MOVD (8)(R2)(R1*1), R10
+ ADDC R10, R11 //add to low order bits
+ ADDE R0, R6
+ ADDC R4, R11
+ ADDE R0, R6
+ MOVD R6, R4
+ MOVD R11, (8)(R2)(R1*1)
+
+ ADD $16, R1 // i*8 + 8
+ ADD $2, R7 // i++
+
+ CMPBLT R7, R12, A6
+ BR E6
+
+L6: MOVD (R8)(R1*1), R6
+ MULHDU R9, R6
+ MOVD (R2)(R1*1), R10
+ ADDC R10, R11 //add to low order bits
+ ADDE R0, R6
+ ADDC R4, R11
+ ADDE R0, R6
+ MOVD R6, R4
+ MOVD R11, (R2)(R1*1)
+
+ ADD $8, R1 // i*8 + 8
+ ADD $1, R7 // i++
+
+E6: CMPBLT R7, R5, L6 // i < n
+
+ MOVD R4, c+56(FP)
+ RET
+
+// func divWVW(z []Word, xn Word, x []Word, y Word) (r Word)
+// CX = R4, r8 = r8, r9=r9, r10 = r2 , r11 = r5, AX = r11, DX = R6, r12=r12, BX = R1(*8) , (R0 set to 0) + use R11 + use R7 for i
+TEXT ·divWVW(SB),NOSPLIT,$0
+ MOVD z+0(FP), R2
+ MOVD xn+24(FP), R10 // r = xn
+ MOVD x+32(FP), R8
+ MOVD y+56(FP), R9
+ MOVD z_len+8(FP), R7 // i = z
+ SLD $3, R7, R1 // i*8
+ MOVD $0, R0 // make sure it's zero
+ BR E7
+
+L7: MOVD (R8)(R1*1), R11
+ WORD $0xB98700A9 //DLGR R10,R9
+ MOVD R11, (R2)(R1*1)
+
+E7: SUB $1, R7 // i--
+ SUB $8, R1
+ BGE L7 // i >= 0
+
+ MOVD R10, r+64(FP)
+ RET
+
+// func bitLen(x Word) (n int)
+TEXT ·bitLen(SB),NOSPLIT,$0
+ MOVD x+0(FP), R2
+ WORD $0xb9830022 // FLOGR R2,R2
+ MOVD $64, R3
+ SUB R2, R3
+ MOVD R3, n+8(FP)
+ RET
diff --git a/src/math/big/float.go b/src/math/big/float.go
index 4b8ad388d3..7a9c2b3dfb 100644
--- a/src/math/big/float.go
+++ b/src/math/big/float.go
@@ -1008,9 +1008,9 @@ func (x *Float) Float64() (float64, Accuracy) {
if r.form == inf || e > emax {
// overflow
if x.neg {
- return float64(math.Inf(-1)), Below
+ return math.Inf(-1), Below
}
- return float64(math.Inf(+1)), Above
+ return math.Inf(+1), Above
}
// e <= emax
diff --git a/src/math/big/floatmarsh.go b/src/math/big/floatmarsh.go
index 44987ee03a..3725d4b834 100644
--- a/src/math/big/floatmarsh.go
+++ b/src/math/big/floatmarsh.go
@@ -6,7 +6,94 @@
package big
-import "fmt"
+import (
+ "encoding/binary"
+ "fmt"
+)
+
+// Gob codec version. Permits backward-compatible changes to the encoding.
+const floatGobVersion byte = 1
+
+// GobEncode implements the gob.GobEncoder interface.
+// The Float value and all its attributes (precision,
+// rounding mode, accuracy) are marshalled.
+func (x *Float) GobEncode() ([]byte, error) {
+ if x == nil {
+ return nil, nil
+ }
+
+ // determine max. space (bytes) required for encoding
+ sz := 1 + 1 + 4 // version + mode|acc|form|neg (3+2+2+1bit) + prec
+ n := 0 // number of mantissa words
+ if x.form == finite {
+ // add space for mantissa and exponent
+ n = int((x.prec + (_W - 1)) / _W) // required mantissa length in words for given precision
+ // actual mantissa slice could be shorter (trailing 0's) or longer (unused bits):
+ // - if shorter, only encode the words present
+ // - if longer, cut off unused words when encoding in bytes
+ // (in practice, this should never happen since rounding
+ // takes care of it, but be safe and do it always)
+ if len(x.mant) < n {
+ n = len(x.mant)
+ }
+ // len(x.mant) >= n
+ sz += 4 + n*_S // exp + mant
+ }
+ buf := make([]byte, sz)
+
+ buf[0] = floatGobVersion
+ b := byte(x.mode&7)<<5 | byte((x.acc+1)&3)<<3 | byte(x.form&3)<<1
+ if x.neg {
+ b |= 1
+ }
+ buf[1] = b
+ binary.BigEndian.PutUint32(buf[2:], x.prec)
+
+ if x.form == finite {
+ binary.BigEndian.PutUint32(buf[6:], uint32(x.exp))
+ x.mant[len(x.mant)-n:].bytes(buf[10:]) // cut off unused trailing words
+ }
+
+ return buf, nil
+}
+
+// GobDecode implements the gob.GobDecoder interface.
+// The result is rounded per the precision and rounding mode of
+// z unless z's precision is 0, in which case z is set exactly
+// to the decoded value.
+func (z *Float) GobDecode(buf []byte) error {
+ if len(buf) == 0 {
+ // Other side sent a nil or default value.
+ *z = Float{}
+ return nil
+ }
+
+ if buf[0] != floatGobVersion {
+ return fmt.Errorf("Float.GobDecode: encoding version %d not supported", buf[0])
+ }
+
+ oldPrec := z.prec
+ oldMode := z.mode
+
+ b := buf[1]
+ z.mode = RoundingMode((b >> 5) & 7)
+ z.acc = Accuracy((b>>3)&3) - 1
+ z.form = form((b >> 1) & 3)
+ z.neg = b&1 != 0
+ z.prec = binary.BigEndian.Uint32(buf[2:])
+
+ if z.form == finite {
+ z.exp = int32(binary.BigEndian.Uint32(buf[6:]))
+ z.mant = z.mant.setBytes(buf[10:])
+ }
+
+ if oldPrec != 0 {
+ z.mode = oldMode
+ z.SetPrec(uint(oldPrec))
+ }
+
+ return nil
+}
// MarshalText implements the encoding.TextMarshaler interface.
// Only the Float value is marshaled (in full precision), other
diff --git a/src/math/big/floatmarsh_test.go b/src/math/big/floatmarsh_test.go
index d7ef2fca68..5bd906ddae 100644
--- a/src/math/big/floatmarsh_test.go
+++ b/src/math/big/floatmarsh_test.go
@@ -5,7 +5,10 @@
package big
import (
+ "bytes"
+ "encoding/gob"
"encoding/json"
+ "io"
"testing"
)
@@ -23,6 +26,85 @@ var floatVals = []string{
"Inf",
}
+func TestFloatGobEncoding(t *testing.T) {
+ var medium bytes.Buffer
+ enc := gob.NewEncoder(&medium)
+ dec := gob.NewDecoder(&medium)
+ for _, test := range floatVals {
+ for _, sign := range []string{"", "+", "-"} {
+ for _, prec := range []uint{0, 1, 2, 10, 53, 64, 100, 1000} {
+ for _, mode := range []RoundingMode{ToNearestEven, ToNearestAway, ToZero, AwayFromZero, ToNegativeInf, ToPositiveInf} {
+ medium.Reset() // empty buffer for each test case (in case of failures)
+ x := sign + test
+
+ var tx Float
+ _, _, err := tx.SetPrec(prec).SetMode(mode).Parse(x, 0)
+ if err != nil {
+ t.Errorf("parsing of %s (%dbits, %v) failed (invalid test case): %v", x, prec, mode, err)
+ continue
+ }
+
+ // If tx was set to prec == 0, tx.Parse(x, 0) assumes precision 64. Correct it.
+ if prec == 0 {
+ tx.SetPrec(0)
+ }
+
+ if err := enc.Encode(&tx); err != nil {
+ t.Errorf("encoding of %v (%dbits, %v) failed: %v", &tx, prec, mode, err)
+ continue
+ }
+
+ var rx Float
+ if err := dec.Decode(&rx); err != nil {
+ t.Errorf("decoding of %v (%dbits, %v) failed: %v", &tx, prec, mode, err)
+ continue
+ }
+
+ if rx.Cmp(&tx) != 0 {
+ t.Errorf("transmission of %s failed: got %s want %s", x, rx.String(), tx.String())
+ continue
+ }
+
+ if rx.Prec() != prec {
+ t.Errorf("transmission of %s's prec failed: got %d want %d", x, rx.Prec(), prec)
+ }
+
+ if rx.Mode() != mode {
+ t.Errorf("transmission of %s's mode failed: got %s want %s", x, rx.Mode(), mode)
+ }
+
+ if rx.Acc() != tx.Acc() {
+ t.Errorf("transmission of %s's accuracy failed: got %s want %s", x, rx.Acc(), tx.Acc())
+ }
+ }
+ }
+ }
+ }
+}
+
+func TestFloatCorruptGob(t *testing.T) {
+ var buf bytes.Buffer
+ tx := NewFloat(4 / 3).SetPrec(1000).SetMode(ToPositiveInf)
+ if err := gob.NewEncoder(&buf).Encode(tx); err != nil {
+ t.Fatal(err)
+ }
+ b := buf.Bytes()
+
+ var rx Float
+ if err := gob.NewDecoder(bytes.NewReader(b)).Decode(&rx); err != nil {
+ t.Fatal(err)
+ }
+
+ if err := gob.NewDecoder(bytes.NewReader(b[:10])).Decode(&rx); err != io.ErrUnexpectedEOF {
+ t.Errorf("got %v want EOF", err)
+ }
+
+ b[1] = 0
+ if err := gob.NewDecoder(bytes.NewReader(b)).Decode(&rx); err == nil {
+ t.Fatal("got nil want version error")
+ }
+}
+
func TestFloatJSONEncoding(t *testing.T) {
for _, test := range floatVals {
for _, sign := range []string{"", "+", "-"} {
diff --git a/src/math/big/gcd_test.go b/src/math/big/gcd_test.go
index c0b9f58300..a929bf597f 100644
--- a/src/math/big/gcd_test.go
+++ b/src/math/big/gcd_test.go
@@ -20,13 +20,27 @@ func randInt(r *rand.Rand, size uint) *Int {
}
func runGCD(b *testing.B, aSize, bSize uint) {
+ b.Run("WithoutXY", func(b *testing.B) {
+ runGCDExt(b, aSize, bSize, false)
+ })
+ b.Run("WithXY", func(b *testing.B) {
+ runGCDExt(b, aSize, bSize, true)
+ })
+}
+
+func runGCDExt(b *testing.B, aSize, bSize uint, calcXY bool) {
b.StopTimer()
var r = rand.New(rand.NewSource(1234))
aa := randInt(r, aSize)
bb := randInt(r, bSize)
+ var x, y *Int
+ if calcXY {
+ x = new(Int)
+ y = new(Int)
+ }
b.StartTimer()
for i := 0; i < b.N; i++ {
- new(Int).GCD(nil, nil, aa, bb)
+ new(Int).GCD(x, y, aa, bb)
}
}
diff --git a/src/math/big/int.go b/src/math/big/int.go
index 67ab7042ff..f2a75d1cd5 100644
--- a/src/math/big/int.go
+++ b/src/math/big/int.go
@@ -459,11 +459,11 @@ func (z *Int) GCD(x, y, a, b *Int) *Int {
q := new(Int)
temp := new(Int)
+ r := new(Int)
for len(B.abs) > 0 {
- r := new(Int)
q, r = q.QuoRem(A, B, r)
- A, B = B, r
+ A, B, r = B, r, A
temp.Set(X)
X.Mul(X, q)
diff --git a/src/math/big/nat.go b/src/math/big/nat.go
index 7668b6481b..2e65d2a7ef 100644
--- a/src/math/big/nat.go
+++ b/src/math/big/nat.go
@@ -8,7 +8,10 @@
package big
-import "math/rand"
+import (
+ "math/rand"
+ "sync"
+)
// An unsigned integer x of the form
//
@@ -539,6 +542,21 @@ func (z nat) div(z2, u, v nat) (q, r nat) {
return
}
+// getNat returns a nat of len n. The contents may not be zero.
+func getNat(n int) nat {
+ var z nat
+ if v := natPool.Get(); v != nil {
+ z = v.(nat)
+ }
+ return z.make(n)
+}
+
+func putNat(x nat) {
+ natPool.Put(x)
+}
+
+var natPool sync.Pool
+
// q = (uIn-r)/v, with 0 <= r < y
// Uses z as storage for q, and u as storage for r if possible.
// See Knuth, Volume 2, section 4.3.1, Algorithm D.
@@ -557,7 +575,7 @@ func (z nat) divLarge(u, uIn, v nat) (q, r nat) {
}
q = z.make(m + 1)
- qhatv := make(nat, n+1)
+ qhatv := getNat(n + 1)
if alias(u, uIn) || alias(u, v) {
u = nil // u is an alias for uIn or v - cannot reuse
}
@@ -565,10 +583,11 @@ func (z nat) divLarge(u, uIn, v nat) (q, r nat) {
u.clear() // TODO(gri) no need to clear if we allocated a new u
// D1.
+ var v1 nat
shift := nlz(v[n-1])
if shift > 0 {
// do not modify v, it may be used by another goroutine simultaneously
- v1 := make(nat, n)
+ v1 = getNat(n)
shlVU(v1, v, shift)
v = v1
}
@@ -609,6 +628,10 @@ func (z nat) divLarge(u, uIn, v nat) (q, r nat) {
q[j] = qhat
}
+ if v1 != nil {
+ putNat(v1)
+ }
+ putNat(qhatv)
q = q.norm()
shrVU(u, u, shift)
diff --git a/src/math/big/natconv.go b/src/math/big/natconv.go
index d2ce667fb6..e216bd288c 100644
--- a/src/math/big/natconv.go
+++ b/src/math/big/natconv.go
@@ -302,7 +302,7 @@ func (x nat) itoa(neg bool, base int) []byte {
}
} else {
- bb, ndigits := maxPow(Word(b))
+ bb, ndigits := maxPow(b)
// construct table of successive squares of bb*leafSize to use in subdivisions
// result (table != nil) <=> (len(x) > leafSize > 0)
diff --git a/src/math/big/ratconv.go b/src/math/big/ratconv.go
index 57df124e88..7c127f8585 100644
--- a/src/math/big/ratconv.go
+++ b/src/math/big/ratconv.go
@@ -178,7 +178,7 @@ func scanExponent(r io.ByteScanner, binExpOk bool) (exp int64, base int, err err
}
break // i > 0
}
- digits = append(digits, byte(ch))
+ digits = append(digits, ch)
}
// i > 0 => we have at least one digit
diff --git a/src/math/dim_s390x.s b/src/math/dim_s390x.s
new file mode 100644
index 0000000000..503d2611f8
--- /dev/null
+++ b/src/math/dim_s390x.s
@@ -0,0 +1,132 @@
+// Copyright 2016 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Based on dim_amd64.s
+
+#include "textflag.h"
+
+#define PosInf 0x7FF0000000000000
+#define NaN 0x7FF8000000000001
+#define NegInf 0xFFF0000000000000
+
+// func Dim(x, y float64) float64
+TEXT ·Dim(SB),NOSPLIT,$0
+ // (+Inf, +Inf) special case
+ MOVD x+0(FP), R2
+ MOVD y+8(FP), R3
+ MOVD $PosInf, R4
+ CMPUBNE R4, R2, dim2
+ CMPUBEQ R4, R3, bothInf
+dim2: // (-Inf, -Inf) special case
+ MOVD $NegInf, R4
+ CMPUBNE R4, R2, dim3
+ CMPUBEQ R4, R3, bothInf
+dim3: // (NaN, x) or (x, NaN)
+ MOVD $~(1<<63), R5
+ MOVD $PosInf, R4
+ AND R5, R2 // x = |x|
+ CMPUBLT R4, R2, isDimNaN
+ AND R5, R3 // y = |y|
+ CMPUBLT R4, R3, isDimNaN
+
+ FMOVD x+0(FP), F1
+ FMOVD y+8(FP), F2
+ FSUB F2, F1
+ FMOVD $(0.0), F2
+ FCMPU F2, F1
+ BGE +3(PC)
+ FMOVD F1, ret+16(FP)
+ RET
+ FMOVD F2, ret+16(FP)
+ RET
+bothInf: // Dim(-Inf, -Inf) or Dim(+Inf, +Inf)
+isDimNaN:
+ MOVD $NaN, R4
+ MOVD R4, ret+16(FP)
+ RET
+
+// func ·Max(x, y float64) float64
+TEXT ·Max(SB),NOSPLIT,$0
+ // +Inf special cases
+ MOVD $PosInf, R4
+ MOVD x+0(FP), R8
+ CMPUBEQ R4, R8, isPosInf
+ MOVD y+8(FP), R9
+ CMPUBEQ R4, R9, isPosInf
+ // NaN special cases
+ MOVD $~(1<<63), R5 // bit mask
+ MOVD $PosInf, R4
+ MOVD R8, R2
+ AND R5, R2 // x = |x|
+ CMPUBLT R4, R2, isMaxNaN
+ MOVD R9, R3
+ AND R5, R3 // y = |y|
+ CMPUBLT R4, R3, isMaxNaN
+ // ±0 special cases
+ OR R3, R2
+ BEQ isMaxZero
+
+ FMOVD x+0(FP), F1
+ FMOVD y+8(FP), F2
+ FCMPU F2, F1
+ BGT +3(PC)
+ FMOVD F1, ret+16(FP)
+ RET
+ FMOVD F2, ret+16(FP)
+ RET
+isMaxNaN: // return NaN
+ MOVD $NaN, R4
+isPosInf: // return +Inf
+ MOVD R4, ret+16(FP)
+ RET
+isMaxZero:
+ MOVD $(1<<63), R4 // -0.0
+ CMPUBEQ R4, R8, +3(PC)
+ MOVD R8, ret+16(FP) // return 0
+ RET
+ MOVD R9, ret+16(FP) // return other 0
+ RET
+
+// func Min(x, y float64) float64
+TEXT ·Min(SB),NOSPLIT,$0
+ // -Inf special cases
+ MOVD $NegInf, R4
+ MOVD x+0(FP), R8
+ CMPUBEQ R4, R8, isNegInf
+ MOVD y+8(FP), R9
+ CMPUBEQ R4, R9, isNegInf
+ // NaN special cases
+ MOVD $~(1<<63), R5
+ MOVD $PosInf, R4
+ MOVD R8, R2
+ AND R5, R2 // x = |x|
+ CMPUBLT R4, R2, isMinNaN
+ MOVD R9, R3
+ AND R5, R3 // y = |y|
+ CMPUBLT R4, R3, isMinNaN
+ // ±0 special cases
+ OR R3, R2
+ BEQ isMinZero
+
+ FMOVD x+0(FP), F1
+ FMOVD y+8(FP), F2
+ FCMPU F2, F1
+ BLT +3(PC)
+ FMOVD F1, ret+16(FP)
+ RET
+ FMOVD F2, ret+16(FP)
+ RET
+isMinNaN: // return NaN
+ MOVD $NaN, R4
+isNegInf: // return -Inf
+ MOVD R4, ret+16(FP)
+ RET
+isMinZero:
+ MOVD $(1<<63), R4 // -0.0
+ CMPUBEQ R4, R8, +3(PC)
+ MOVD R9, ret+16(FP) // return other 0
+ RET
+ MOVD R8, ret+16(FP) // return -0
+ RET
+
diff --git a/src/math/sqrt_s390x.s b/src/math/sqrt_s390x.s
new file mode 100644
index 0000000000..37ca0bec91
--- /dev/null
+++ b/src/math/sqrt_s390x.s
@@ -0,0 +1,12 @@
+// Copyright 2016 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "textflag.h"
+
+// func Sqrt(x float64) float64
+TEXT ·Sqrt(SB),NOSPLIT,$0
+ FMOVD x+0(FP), F1
+ FSQRT F1, F1
+ FMOVD F1, ret+8(FP)
+ RET
diff --git a/src/math/stubs_s390x.s b/src/math/stubs_s390x.s
new file mode 100644
index 0000000000..76868447cd
--- /dev/null
+++ b/src/math/stubs_s390x.s
@@ -0,0 +1,77 @@
+// Copyright 2016 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "../runtime/textflag.h"
+
+TEXT ·Asin(SB),NOSPLIT,$0
+ BR ·asin(SB)
+
+TEXT ·Acos(SB),NOSPLIT,$0
+ BR ·acos(SB)
+
+TEXT ·Atan2(SB),NOSPLIT,$0
+ BR ·atan2(SB)
+
+TEXT ·Atan(SB),NOSPLIT,$0
+ BR ·atan(SB)
+
+TEXT ·Exp2(SB),NOSPLIT,$0
+ BR ·exp2(SB)
+
+TEXT ·Expm1(SB),NOSPLIT,$0
+ BR ·expm1(SB)
+
+TEXT ·Exp(SB),NOSPLIT,$0
+ BR ·exp(SB)
+
+TEXT ·Floor(SB),NOSPLIT,$0
+ BR ·floor(SB)
+
+TEXT ·Ceil(SB),NOSPLIT,$0
+ BR ·ceil(SB)
+
+TEXT ·Trunc(SB),NOSPLIT,$0
+ BR ·trunc(SB)
+
+TEXT ·Frexp(SB),NOSPLIT,$0
+ BR ·frexp(SB)
+
+TEXT ·Hypot(SB),NOSPLIT,$0
+ BR ·hypot(SB)
+
+TEXT ·Ldexp(SB),NOSPLIT,$0
+ BR ·ldexp(SB)
+
+TEXT ·Log10(SB),NOSPLIT,$0
+ BR ·log10(SB)
+
+TEXT ·Log2(SB),NOSPLIT,$0
+ BR ·log2(SB)
+
+TEXT ·Log1p(SB),NOSPLIT,$0
+ BR ·log1p(SB)
+
+TEXT ·Log(SB),NOSPLIT,$0
+ BR ·log(SB)
+
+TEXT ·Modf(SB),NOSPLIT,$0
+ BR ·modf(SB)
+
+TEXT ·Mod(SB),NOSPLIT,$0
+ BR ·mod(SB)
+
+TEXT ·Remainder(SB),NOSPLIT,$0
+ BR ·remainder(SB)
+
+TEXT ·Sincos(SB),NOSPLIT,$0
+ BR ·sincos(SB)
+
+TEXT ·Sin(SB),NOSPLIT,$0
+ BR ·sin(SB)
+
+TEXT ·Cos(SB),NOSPLIT,$0
+ BR ·cos(SB)
+
+TEXT ·Tan(SB),NOSPLIT,$0
+ BR ·tan(SB)