mirror of https://github.com/golang/go.git
math: optimize ceil/floor functions on amd64
Use SSE 4.1 rounding instruction to perform rounding Results (haswell): name old time/op new time/op delta Floor-48 2.71ns ± 0% 1.87ns ± 1% -31.17% (p=0.000 n=16+19) Ceil-48 3.09ns ± 3% 2.16ns ± 0% -30.16% (p=0.000 n=19+12) Change-Id: If63715879eed6530b1eb4fc96132d827f8f43909 Reviewed-on: https://go-review.googlesource.com/14561 Reviewed-by: Klaus Post <klauspost@gmail.com> Run-TryBot: Brad Fitzpatrick <bradfitz@golang.org> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
This commit is contained in:
parent
acc90c53e8
commit
37cfb2e07e
|
|
@ -734,6 +734,11 @@ const (
|
|||
AAESIMC
|
||||
AAESKEYGENASSIST
|
||||
|
||||
AROUNDPS
|
||||
AROUNDSS
|
||||
AROUNDPD
|
||||
AROUNDSD
|
||||
|
||||
APSHUFD
|
||||
APCLMULQDQ
|
||||
|
||||
|
|
|
|||
|
|
@ -677,6 +677,10 @@ var Anames = []string{
|
|||
"AESDECLAST",
|
||||
"AESIMC",
|
||||
"AESKEYGENASSIST",
|
||||
"ROUNDPS",
|
||||
"ROUNDSS",
|
||||
"ROUNDPD",
|
||||
"ROUNDSD",
|
||||
"PSHUFD",
|
||||
"PCLMULQDQ",
|
||||
"JCXZW",
|
||||
|
|
|
|||
|
|
@ -1474,6 +1474,10 @@ var optab =
|
|||
{AAESDECLAST, yaes, Pq, [23]uint8{0x38, 0xdf, 0}},
|
||||
{AAESIMC, yaes, Pq, [23]uint8{0x38, 0xdb, 0}},
|
||||
{AAESKEYGENASSIST, yaes2, Pq, [23]uint8{0x3a, 0xdf, 0}},
|
||||
{AROUNDPD, yaes2, Pq, [23]uint8{0x3a, 0x09, 0}},
|
||||
{AROUNDPS, yaes2, Pq, [23]uint8{0x3a, 0x08, 0}},
|
||||
{AROUNDSD, yaes2, Pq, [23]uint8{0x3a, 0x0b, 0}},
|
||||
{AROUNDSS, yaes2, Pq, [23]uint8{0x3a, 0x0a, 0}},
|
||||
{APSHUFD, yxshuf, Pq, [23]uint8{0x70, 0}},
|
||||
{APCLMULQDQ, yxshuf, Pq, [23]uint8{0x3a, 0x44, 0}},
|
||||
{obj.AUSEFIELD, ynop, Px, [23]uint8{0, 0}},
|
||||
|
|
|
|||
|
|
@ -6,8 +6,25 @@
|
|||
|
||||
#define Big 0x4330000000000000 // 2**52
|
||||
|
||||
// func hasSSE4() bool
|
||||
// returns whether SSE4.1 is supported
|
||||
TEXT ·hasSSE4(SB),NOSPLIT,$0
|
||||
XORQ AX, AX
|
||||
INCL AX
|
||||
CPUID
|
||||
SHRQ $19, CX
|
||||
ANDQ $1, CX
|
||||
MOVB CX, ret+0(FP)
|
||||
RET
|
||||
|
||||
// func Floor(x float64) float64
|
||||
TEXT ·Floor(SB),NOSPLIT,$0
|
||||
CMPB math·useSSE4(SB), $1
|
||||
JNE nosse4
|
||||
ROUNDSD $1, x+0(FP), X0
|
||||
MOVQ X0, ret+8(FP)
|
||||
RET
|
||||
nosse4:
|
||||
MOVQ x+0(FP), AX
|
||||
MOVQ $~(1<<63), DX // sign bit mask
|
||||
ANDQ AX,DX // DX = |x|
|
||||
|
|
@ -30,6 +47,12 @@ isBig_floor:
|
|||
|
||||
// func Ceil(x float64) float64
|
||||
TEXT ·Ceil(SB),NOSPLIT,$0
|
||||
CMPB math·useSSE4(SB), $1
|
||||
JNE nosse4
|
||||
ROUNDSD $2, x+0(FP), X0
|
||||
MOVQ X0, ret+8(FP)
|
||||
RET
|
||||
nosse4:
|
||||
MOVQ x+0(FP), AX
|
||||
MOVQ $~(1<<63), DX // sign bit mask
|
||||
MOVQ AX, BX // BX = copy of x
|
||||
|
|
|
|||
|
|
@ -0,0 +1,12 @@
|
|||
// Copyright 2015 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
// +build amd64 amd64p32
|
||||
|
||||
package math
|
||||
|
||||
//defined in floor_amd64.s
|
||||
func hasSSE4() bool
|
||||
|
||||
var useSSE4 = hasSSE4()
|
||||
Loading…
Reference in New Issue