mirror of https://github.com/golang/go.git
runtime: guard VZEROUPPER on CPU feature
In CL 219131 we inserted a VZEROUPPER instruction on darwin/amd64. The instruction is not available on pre-AVX machines. Guard it with CPU feature. Fixes #37459. Change-Id: I9a064df277d091be4ee594eda5c7fd8ee323102b Reviewed-on: https://go-review.googlesource.com/c/go/+/221057 Run-TryBot: Cherry Zhang <cherryyz@google.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
This commit is contained in:
parent
089e482b3d
commit
c46ffdd2ec
|
|
@ -11,6 +11,7 @@ import (
|
||||||
|
|
||||||
// Offsets into internal/cpu records for use in assembly.
|
// Offsets into internal/cpu records for use in assembly.
|
||||||
const (
|
const (
|
||||||
|
offsetX86HasAVX = unsafe.Offsetof(cpu.X86.HasAVX)
|
||||||
offsetX86HasAVX2 = unsafe.Offsetof(cpu.X86.HasAVX2)
|
offsetX86HasAVX2 = unsafe.Offsetof(cpu.X86.HasAVX2)
|
||||||
offsetX86HasERMS = unsafe.Offsetof(cpu.X86.HasERMS)
|
offsetX86HasERMS = unsafe.Offsetof(cpu.X86.HasERMS)
|
||||||
offsetX86HasSSE2 = unsafe.Offsetof(cpu.X86.HasSSE2)
|
offsetX86HasSSE2 = unsafe.Offsetof(cpu.X86.HasSSE2)
|
||||||
|
|
|
||||||
|
|
@ -244,15 +244,6 @@ func genAMD64() {
|
||||||
|
|
||||||
// TODO: MXCSR register?
|
// TODO: MXCSR register?
|
||||||
|
|
||||||
// Apparently, the signal handling code path in darwin kernel leaves
|
|
||||||
// the upper bits of Y registers in a dirty state, which causes
|
|
||||||
// many SSE operations (128-bit and narrower) become much slower.
|
|
||||||
// Clear the upper bits to get to a clean state. See issue #37174.
|
|
||||||
// It is safe here as Go code don't use the upper bits of Y registers.
|
|
||||||
p("#ifdef GOOS_darwin")
|
|
||||||
p("VZEROUPPER")
|
|
||||||
p("#endif")
|
|
||||||
|
|
||||||
p("PUSHQ BP")
|
p("PUSHQ BP")
|
||||||
p("MOVQ SP, BP")
|
p("MOVQ SP, BP")
|
||||||
p("// Save flags before clobbering them")
|
p("// Save flags before clobbering them")
|
||||||
|
|
@ -261,6 +252,18 @@ func genAMD64() {
|
||||||
p("ADJSP $%d", l.stack)
|
p("ADJSP $%d", l.stack)
|
||||||
p("// But vet doesn't know ADJSP, so suppress vet stack checking")
|
p("// But vet doesn't know ADJSP, so suppress vet stack checking")
|
||||||
p("NOP SP")
|
p("NOP SP")
|
||||||
|
|
||||||
|
// Apparently, the signal handling code path in darwin kernel leaves
|
||||||
|
// the upper bits of Y registers in a dirty state, which causes
|
||||||
|
// many SSE operations (128-bit and narrower) become much slower.
|
||||||
|
// Clear the upper bits to get to a clean state. See issue #37174.
|
||||||
|
// It is safe here as Go code don't use the upper bits of Y registers.
|
||||||
|
p("#ifdef GOOS_darwin")
|
||||||
|
p("CMPB internal∕cpu·X86+const_offsetX86HasAVX(SB), $0")
|
||||||
|
p("JE 2(PC)")
|
||||||
|
p("VZEROUPPER")
|
||||||
|
p("#endif")
|
||||||
|
|
||||||
l.save()
|
l.save()
|
||||||
p("CALL ·asyncPreempt2(SB)")
|
p("CALL ·asyncPreempt2(SB)")
|
||||||
l.restore()
|
l.restore()
|
||||||
|
|
|
||||||
|
|
@ -4,9 +4,6 @@
|
||||||
#include "textflag.h"
|
#include "textflag.h"
|
||||||
|
|
||||||
TEXT ·asyncPreempt(SB),NOSPLIT|NOFRAME,$0-0
|
TEXT ·asyncPreempt(SB),NOSPLIT|NOFRAME,$0-0
|
||||||
#ifdef GOOS_darwin
|
|
||||||
VZEROUPPER
|
|
||||||
#endif
|
|
||||||
PUSHQ BP
|
PUSHQ BP
|
||||||
MOVQ SP, BP
|
MOVQ SP, BP
|
||||||
// Save flags before clobbering them
|
// Save flags before clobbering them
|
||||||
|
|
@ -15,6 +12,11 @@ TEXT ·asyncPreempt(SB),NOSPLIT|NOFRAME,$0-0
|
||||||
ADJSP $368
|
ADJSP $368
|
||||||
// But vet doesn't know ADJSP, so suppress vet stack checking
|
// But vet doesn't know ADJSP, so suppress vet stack checking
|
||||||
NOP SP
|
NOP SP
|
||||||
|
#ifdef GOOS_darwin
|
||||||
|
CMPB internal∕cpu·X86+const_offsetX86HasAVX(SB), $0
|
||||||
|
JE 2(PC)
|
||||||
|
VZEROUPPER
|
||||||
|
#endif
|
||||||
MOVQ AX, 0(SP)
|
MOVQ AX, 0(SP)
|
||||||
MOVQ CX, 8(SP)
|
MOVQ CX, 8(SP)
|
||||||
MOVQ DX, 16(SP)
|
MOVQ DX, 16(SP)
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue