mirror of https://github.com/golang/go.git
runtime: use hardware divider to improve performance
The hardware divider is an optional component of ARMv7. This patch detects whether it is available in runtime and use it or not. 1. The hardware divider is detected at startup and a flag is set/clear according to a perticular bit of runtime.hwcap. 2. Each call of runtime.udiv will check this flag and decide if use the hardware division instruction. A rough test shows the performance improves 40-50% for ARMv7. And the compatibility of ARMv5/v6 is not broken. fixes #19118 Change-Id: Ic586bc9659ebc169553ca2004d2bdb721df823ac Reviewed-on: https://go-review.googlesource.com/37496 Run-TryBot: Cherry Zhang <cherryyz@google.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Cherry Zhang <cherryyz@google.com>
This commit is contained in:
parent
2a8d99e427
commit
69261ecad6
|
|
@ -12,6 +12,7 @@
|
|||
// int8_t DidInitRun();
|
||||
// int8_t DidMainRun();
|
||||
// int32_t FromPkg();
|
||||
// uint32_t Divu(uint32_t, uint32_t);
|
||||
int main(void) {
|
||||
int8_t ran_init = DidInitRun();
|
||||
if (!ran_init) {
|
||||
|
|
@ -30,6 +31,11 @@ int main(void) {
|
|||
fprintf(stderr, "ERROR: FromPkg=%d, want %d\n", from_pkg, 1024);
|
||||
return 1;
|
||||
}
|
||||
uint32_t divu = Divu(2264, 31);
|
||||
if (divu != 73) {
|
||||
fprintf(stderr, "ERROR: Divu(2264, 31)=%d, want %d\n", divu, 73);
|
||||
return 1;
|
||||
}
|
||||
// test.bash looks for "PASS" to ensure this program has reached the end.
|
||||
printf("PASS\n");
|
||||
return 0;
|
||||
|
|
|
|||
|
|
@ -8,3 +8,5 @@ import "C"
|
|||
|
||||
//export FromPkg
|
||||
func FromPkg() int32 { return 1024 }
|
||||
//export Divu
|
||||
func Divu(a, b uint32) uint32 { return a / b }
|
||||
|
|
|
|||
|
|
@ -400,6 +400,12 @@ func TestTrivialExecutablePIE(t *testing.T) {
|
|||
AssertHasRPath(t, "./trivial.pie", gorootInstallDir)
|
||||
}
|
||||
|
||||
// Build a division test program and check it runs.
|
||||
func TestDivisionExecutable(t *testing.T) {
|
||||
goCmd(t, "install", "-linkshared", "division")
|
||||
run(t, "division executable", "./bin/division")
|
||||
}
|
||||
|
||||
// Build an executable that uses cgo linked against the shared runtime and check it
|
||||
// runs.
|
||||
func TestCgoExecutable(t *testing.T) {
|
||||
|
|
|
|||
|
|
@ -0,0 +1,17 @@
|
|||
// Copyright 2017 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package main
|
||||
|
||||
//go:noinline
|
||||
func div(x, y uint32) uint32 {
|
||||
return x / y
|
||||
}
|
||||
|
||||
func main() {
|
||||
a := div(97, 11)
|
||||
if a != 8 {
|
||||
panic("FAIL")
|
||||
}
|
||||
}
|
||||
|
|
@ -965,6 +965,13 @@ jmp_label_3:
|
|||
REVSH R1, R2 // b12fffe6
|
||||
RBIT R1, R2 // 312fffe6
|
||||
|
||||
// DIVHW R0, R1, R2: R1 / R0 -> R2
|
||||
DIVHW R0, R1, R2 // 11f012e7
|
||||
DIVUHW R0, R1, R2 // 11f032e7
|
||||
// DIVHW R0, R1: R1 / R0 -> R1
|
||||
DIVHW R0, R1 // 11f011e7
|
||||
DIVUHW R0, R1 // 11f031e7
|
||||
|
||||
//
|
||||
// END
|
||||
//
|
||||
|
|
|
|||
|
|
@ -247,6 +247,8 @@ const (
|
|||
ADIV
|
||||
AMOD
|
||||
AMODU
|
||||
ADIVHW
|
||||
ADIVUHW
|
||||
|
||||
AMOVB
|
||||
AMOVBS
|
||||
|
|
|
|||
|
|
@ -71,6 +71,8 @@ var Anames = []string{
|
|||
"DIV",
|
||||
"MOD",
|
||||
"MODU",
|
||||
"DIVHW",
|
||||
"DIVUHW",
|
||||
"MOVB",
|
||||
"MOVBS",
|
||||
"MOVBU",
|
||||
|
|
|
|||
|
|
@ -142,6 +142,8 @@ var optab = []Optab{
|
|||
{AMUL, C_REG, C_NONE, C_REG, 15, 4, 0, 0, 0},
|
||||
{ADIV, C_REG, C_REG, C_REG, 16, 4, 0, 0, 0},
|
||||
{ADIV, C_REG, C_NONE, C_REG, 16, 4, 0, 0, 0},
|
||||
{ADIVHW, C_REG, C_REG, C_REG, 105, 4, 0, 0, 0},
|
||||
{ADIVHW, C_REG, C_NONE, C_REG, 105, 4, 0, 0, 0},
|
||||
{AMULL, C_REG, C_REG, C_REGREG, 17, 4, 0, 0, 0},
|
||||
{AMULA, C_REG, C_REG, C_REGREG2, 17, 4, 0, 0, 0},
|
||||
{AMOVW, C_REG, C_NONE, C_SAUTO, 20, 4, REGSP, 0, 0},
|
||||
|
|
@ -1401,6 +1403,9 @@ func buildop(ctxt *obj.Link) {
|
|||
opset(AMODU, r0)
|
||||
opset(ADIVU, r0)
|
||||
|
||||
case ADIVHW:
|
||||
opset(ADIVUHW, r0)
|
||||
|
||||
case AMOVW,
|
||||
AMOVB,
|
||||
AMOVBS,
|
||||
|
|
@ -2407,6 +2412,16 @@ func (c *ctxt5) asmout(p *obj.Prog, o *Optab, out []uint32) {
|
|||
if p.As == ADATABUNDLE {
|
||||
o1 = 0xe125be70
|
||||
}
|
||||
|
||||
case 105: /* divhw r,[r,]r */
|
||||
o1 = c.oprrr(p, p.As, int(p.Scond))
|
||||
rf := int(p.From.Reg)
|
||||
rt := int(p.To.Reg)
|
||||
r := int(p.Reg)
|
||||
if r == 0 {
|
||||
r = rt
|
||||
}
|
||||
o1 |= (uint32(rf)&15)<<8 | (uint32(r)&15)<<0 | (uint32(rt)&15)<<16
|
||||
}
|
||||
|
||||
out[0] = o1
|
||||
|
|
@ -2445,6 +2460,10 @@ func (c *ctxt5) oprrr(p *obj.Prog, a obj.As, sc int) uint32 {
|
|||
c.ctxt.Diag(".nil/.W on dp instruction")
|
||||
}
|
||||
switch a {
|
||||
case ADIVHW:
|
||||
return o | 0x71<<20 | 0xf<<12 | 0x1<<4
|
||||
case ADIVUHW:
|
||||
return o | 0x73<<20 | 0xf<<12 | 0x1<<4
|
||||
case AMMUL:
|
||||
return o | 0x75<<20 | 0xf<<12 | 0x1<<4
|
||||
case AMULS:
|
||||
|
|
|
|||
|
|
@ -4,6 +4,8 @@
|
|||
|
||||
package runtime
|
||||
|
||||
var hardDiv bool // TODO: set if a hardware divider is available
|
||||
|
||||
func checkgoarm() {
|
||||
// TODO(minux): FP checks like in os_linux_arm.go.
|
||||
|
||||
|
|
|
|||
|
|
@ -4,6 +4,8 @@
|
|||
|
||||
package runtime
|
||||
|
||||
var hardDiv bool // TODO: set if a hardware divider is available
|
||||
|
||||
func checkgoarm() {
|
||||
// TODO(minux): FP checks like in os_linux_arm.go.
|
||||
|
||||
|
|
|
|||
|
|
@ -11,11 +11,13 @@ const (
|
|||
|
||||
_HWCAP_VFP = 1 << 6 // introduced in at least 2.6.11
|
||||
_HWCAP_VFPv3 = 1 << 13 // introduced in 2.6.30
|
||||
_HWCAP_IDIVA = 1 << 17
|
||||
)
|
||||
|
||||
var randomNumber uint32
|
||||
var armArch uint8 = 6 // we default to ARMv6
|
||||
var hwcap uint32 // set by setup_auxv
|
||||
var hardDiv bool // set if a hardware divider is available
|
||||
|
||||
func checkgoarm() {
|
||||
// On Android, /proc/self/auxv might be unreadable and hwcap won't
|
||||
|
|
@ -53,6 +55,7 @@ func archauxv(tag, val uintptr) {
|
|||
|
||||
case _AT_HWCAP: // CPU capability bit flags
|
||||
hwcap = uint32(val)
|
||||
hardDiv = (hwcap & _HWCAP_IDIVA) != 0
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -4,6 +4,8 @@
|
|||
|
||||
package runtime
|
||||
|
||||
var hardDiv bool // TODO: set if a hardware divider is available
|
||||
|
||||
func checkgoarm() {
|
||||
// TODO(minux): FP checks like in os_linux_arm.go.
|
||||
|
||||
|
|
|
|||
|
|
@ -6,6 +6,8 @@ package runtime
|
|||
|
||||
import "unsafe"
|
||||
|
||||
var hardDiv bool // TODO: set if a hardware divider is available
|
||||
|
||||
func lwp_mcontext_init(mc *mcontextt, stk unsafe.Pointer, mp *m, gp *g, fn uintptr) {
|
||||
// Machine dependent mcontext initialisation for LWP.
|
||||
mc.__gregs[_REG_R15] = uint32(funcPC(lwp_tramp))
|
||||
|
|
|
|||
|
|
@ -4,6 +4,8 @@
|
|||
|
||||
package runtime
|
||||
|
||||
var hardDiv bool // TODO: set if a hardware divider is available
|
||||
|
||||
func checkgoarm() {
|
||||
// TODO(minux): FP checks like in os_linux_arm.go.
|
||||
|
||||
|
|
|
|||
|
|
@ -4,6 +4,8 @@
|
|||
|
||||
package runtime
|
||||
|
||||
var hardDiv bool // TODO: set if a hardware divider is available
|
||||
|
||||
func checkgoarm() {
|
||||
return // TODO(minux)
|
||||
}
|
||||
|
|
|
|||
|
|
@ -119,6 +119,10 @@ TEXT runtime·_sfloatpanic(SB),NOSPLIT,$-4
|
|||
|
||||
// Be careful: Ra == R11 will be used by the linker for synthesized instructions.
|
||||
TEXT udiv(SB),NOSPLIT,$-4
|
||||
MOVBU runtime·hardDiv(SB), Ra
|
||||
CMP $0, Ra
|
||||
BNE udiv_hardware
|
||||
|
||||
CLZ Rq, Rs // find normalizing shift
|
||||
MOVW.S Rq<<Rs, Ra
|
||||
MOVW $fast_udiv_tab<>-64(SB), RM
|
||||
|
|
@ -154,6 +158,14 @@ TEXT udiv(SB),NOSPLIT,$-4
|
|||
ADD.PL $2, Rq
|
||||
RET
|
||||
|
||||
// use hardware divider
|
||||
udiv_hardware:
|
||||
DIVUHW Rq, Rr, Rs
|
||||
MUL Rs, Rq, RM
|
||||
RSB Rr, RM, Rr
|
||||
MOVW Rs, Rq
|
||||
RET
|
||||
|
||||
udiv_by_large_d:
|
||||
// at this point we know d>=2^(31-6)=2^25
|
||||
SUB $4, Ra, Ra
|
||||
|
|
|
|||
Loading…
Reference in New Issue