mirror of https://github.com/golang/go.git
runtime: use MOVSB instead of MOVSQ for unaligned moves
MOVSB is quite a bit faster for unaligned moves. Possibly we should use MOVSB all of the time, but Intel folks say it might be a bit faster to use MOVSQ on some processors (but not any I have access to at the moment). benchmark old ns/op new ns/op delta BenchmarkMemmove4096-8 93.9 93.2 -0.75% BenchmarkMemmoveUnalignedDst4096-8 256 151 -41.02% BenchmarkMemmoveUnalignedSrc4096-8 175 90.5 -48.29% Fixes #14630 Change-Id: I568e6d6590eb3615e6a699fb474020596be665ff Reviewed-on: https://go-review.googlesource.com/20293 Reviewed-by: Ian Lance Taylor <iant@golang.org>
This commit is contained in:
parent
b07a214d39
commit
6a33f7765f
|
|
@ -69,13 +69,25 @@ nosse2:
|
|||
/*
|
||||
* forward copy loop
|
||||
*/
|
||||
forward:
|
||||
forward:
|
||||
// Check alignment
|
||||
MOVL SI, AX
|
||||
ORL DI, AX
|
||||
TESTL $3, AX
|
||||
JNE unaligned_fwd
|
||||
|
||||
MOVL BX, CX
|
||||
SHRL $2, CX
|
||||
ANDL $3, BX
|
||||
|
||||
REP; MOVSL
|
||||
JMP tail
|
||||
|
||||
unaligned_fwd:
|
||||
MOVL BX, CX
|
||||
REP; MOVSB
|
||||
RET
|
||||
|
||||
/*
|
||||
* check overlap
|
||||
*/
|
||||
|
|
|
|||
|
|
@ -77,12 +77,25 @@ forward:
|
|||
CMPQ BX, $2048
|
||||
JLS move_256through2048
|
||||
|
||||
// Check alignment
|
||||
MOVQ SI, AX
|
||||
ORQ DI, AX
|
||||
TESTL $7, AX
|
||||
JNE unaligned_fwd
|
||||
|
||||
// Aligned - do 8 bytes at a time
|
||||
MOVQ BX, CX
|
||||
SHRQ $3, CX
|
||||
ANDQ $7, BX
|
||||
REP; MOVSQ
|
||||
JMP tail
|
||||
|
||||
unaligned_fwd:
|
||||
// Unaligned - do 1 byte at a time
|
||||
MOVQ BX, CX
|
||||
REP; MOVSB
|
||||
RET
|
||||
|
||||
back:
|
||||
/*
|
||||
* check overlap
|
||||
|
|
|
|||
|
|
@ -116,7 +116,7 @@ func BenchmarkMemmove1024(b *testing.B) { bmMemmove(b, 1024) }
|
|||
func BenchmarkMemmove2048(b *testing.B) { bmMemmove(b, 2048) }
|
||||
func BenchmarkMemmove4096(b *testing.B) { bmMemmove(b, 4096) }
|
||||
|
||||
func bmMemmoveUnaligned(b *testing.B, n int) {
|
||||
func bmMemmoveUnalignedDst(b *testing.B, n int) {
|
||||
x := make([]byte, n+1)
|
||||
y := make([]byte, n)
|
||||
b.SetBytes(int64(n))
|
||||
|
|
@ -125,31 +125,66 @@ func bmMemmoveUnaligned(b *testing.B, n int) {
|
|||
}
|
||||
}
|
||||
|
||||
func BenchmarkMemmoveUnaligned0(b *testing.B) { bmMemmoveUnaligned(b, 0) }
|
||||
func BenchmarkMemmoveUnaligned1(b *testing.B) { bmMemmoveUnaligned(b, 1) }
|
||||
func BenchmarkMemmoveUnaligned2(b *testing.B) { bmMemmoveUnaligned(b, 2) }
|
||||
func BenchmarkMemmoveUnaligned3(b *testing.B) { bmMemmoveUnaligned(b, 3) }
|
||||
func BenchmarkMemmoveUnaligned4(b *testing.B) { bmMemmoveUnaligned(b, 4) }
|
||||
func BenchmarkMemmoveUnaligned5(b *testing.B) { bmMemmoveUnaligned(b, 5) }
|
||||
func BenchmarkMemmoveUnaligned6(b *testing.B) { bmMemmoveUnaligned(b, 6) }
|
||||
func BenchmarkMemmoveUnaligned7(b *testing.B) { bmMemmoveUnaligned(b, 7) }
|
||||
func BenchmarkMemmoveUnaligned8(b *testing.B) { bmMemmoveUnaligned(b, 8) }
|
||||
func BenchmarkMemmoveUnaligned9(b *testing.B) { bmMemmoveUnaligned(b, 9) }
|
||||
func BenchmarkMemmoveUnaligned10(b *testing.B) { bmMemmoveUnaligned(b, 10) }
|
||||
func BenchmarkMemmoveUnaligned11(b *testing.B) { bmMemmoveUnaligned(b, 11) }
|
||||
func BenchmarkMemmoveUnaligned12(b *testing.B) { bmMemmoveUnaligned(b, 12) }
|
||||
func BenchmarkMemmoveUnaligned13(b *testing.B) { bmMemmoveUnaligned(b, 13) }
|
||||
func BenchmarkMemmoveUnaligned14(b *testing.B) { bmMemmoveUnaligned(b, 14) }
|
||||
func BenchmarkMemmoveUnaligned15(b *testing.B) { bmMemmoveUnaligned(b, 15) }
|
||||
func BenchmarkMemmoveUnaligned16(b *testing.B) { bmMemmoveUnaligned(b, 16) }
|
||||
func BenchmarkMemmoveUnaligned32(b *testing.B) { bmMemmoveUnaligned(b, 32) }
|
||||
func BenchmarkMemmoveUnaligned64(b *testing.B) { bmMemmoveUnaligned(b, 64) }
|
||||
func BenchmarkMemmoveUnaligned128(b *testing.B) { bmMemmoveUnaligned(b, 128) }
|
||||
func BenchmarkMemmoveUnaligned256(b *testing.B) { bmMemmoveUnaligned(b, 256) }
|
||||
func BenchmarkMemmoveUnaligned512(b *testing.B) { bmMemmoveUnaligned(b, 512) }
|
||||
func BenchmarkMemmoveUnaligned1024(b *testing.B) { bmMemmoveUnaligned(b, 1024) }
|
||||
func BenchmarkMemmoveUnaligned2048(b *testing.B) { bmMemmoveUnaligned(b, 2048) }
|
||||
func BenchmarkMemmoveUnaligned4096(b *testing.B) { bmMemmoveUnaligned(b, 4096) }
|
||||
func BenchmarkMemmoveUnalignedDst0(b *testing.B) { bmMemmoveUnalignedDst(b, 0) }
|
||||
func BenchmarkMemmoveUnalignedDst1(b *testing.B) { bmMemmoveUnalignedDst(b, 1) }
|
||||
func BenchmarkMemmoveUnalignedDst2(b *testing.B) { bmMemmoveUnalignedDst(b, 2) }
|
||||
func BenchmarkMemmoveUnalignedDst3(b *testing.B) { bmMemmoveUnalignedDst(b, 3) }
|
||||
func BenchmarkMemmoveUnalignedDst4(b *testing.B) { bmMemmoveUnalignedDst(b, 4) }
|
||||
func BenchmarkMemmoveUnalignedDst5(b *testing.B) { bmMemmoveUnalignedDst(b, 5) }
|
||||
func BenchmarkMemmoveUnalignedDst6(b *testing.B) { bmMemmoveUnalignedDst(b, 6) }
|
||||
func BenchmarkMemmoveUnalignedDst7(b *testing.B) { bmMemmoveUnalignedDst(b, 7) }
|
||||
func BenchmarkMemmoveUnalignedDst8(b *testing.B) { bmMemmoveUnalignedDst(b, 8) }
|
||||
func BenchmarkMemmoveUnalignedDst9(b *testing.B) { bmMemmoveUnalignedDst(b, 9) }
|
||||
func BenchmarkMemmoveUnalignedDst10(b *testing.B) { bmMemmoveUnalignedDst(b, 10) }
|
||||
func BenchmarkMemmoveUnalignedDst11(b *testing.B) { bmMemmoveUnalignedDst(b, 11) }
|
||||
func BenchmarkMemmoveUnalignedDst12(b *testing.B) { bmMemmoveUnalignedDst(b, 12) }
|
||||
func BenchmarkMemmoveUnalignedDst13(b *testing.B) { bmMemmoveUnalignedDst(b, 13) }
|
||||
func BenchmarkMemmoveUnalignedDst14(b *testing.B) { bmMemmoveUnalignedDst(b, 14) }
|
||||
func BenchmarkMemmoveUnalignedDst15(b *testing.B) { bmMemmoveUnalignedDst(b, 15) }
|
||||
func BenchmarkMemmoveUnalignedDst16(b *testing.B) { bmMemmoveUnalignedDst(b, 16) }
|
||||
func BenchmarkMemmoveUnalignedDst32(b *testing.B) { bmMemmoveUnalignedDst(b, 32) }
|
||||
func BenchmarkMemmoveUnalignedDst64(b *testing.B) { bmMemmoveUnalignedDst(b, 64) }
|
||||
func BenchmarkMemmoveUnalignedDst128(b *testing.B) { bmMemmoveUnalignedDst(b, 128) }
|
||||
func BenchmarkMemmoveUnalignedDst256(b *testing.B) { bmMemmoveUnalignedDst(b, 256) }
|
||||
func BenchmarkMemmoveUnalignedDst512(b *testing.B) { bmMemmoveUnalignedDst(b, 512) }
|
||||
func BenchmarkMemmoveUnalignedDst1024(b *testing.B) { bmMemmoveUnalignedDst(b, 1024) }
|
||||
func BenchmarkMemmoveUnalignedDst2048(b *testing.B) { bmMemmoveUnalignedDst(b, 2048) }
|
||||
func BenchmarkMemmoveUnalignedDst4096(b *testing.B) { bmMemmoveUnalignedDst(b, 4096) }
|
||||
|
||||
func bmMemmoveUnalignedSrc(b *testing.B, n int) {
|
||||
x := make([]byte, n)
|
||||
y := make([]byte, n+1)
|
||||
b.SetBytes(int64(n))
|
||||
for i := 0; i < b.N; i++ {
|
||||
copy(x, y[1:])
|
||||
}
|
||||
}
|
||||
|
||||
func BenchmarkMemmoveUnalignedSrc0(b *testing.B) { bmMemmoveUnalignedSrc(b, 0) }
|
||||
func BenchmarkMemmoveUnalignedSrc1(b *testing.B) { bmMemmoveUnalignedSrc(b, 1) }
|
||||
func BenchmarkMemmoveUnalignedSrc2(b *testing.B) { bmMemmoveUnalignedSrc(b, 2) }
|
||||
func BenchmarkMemmoveUnalignedSrc3(b *testing.B) { bmMemmoveUnalignedSrc(b, 3) }
|
||||
func BenchmarkMemmoveUnalignedSrc4(b *testing.B) { bmMemmoveUnalignedSrc(b, 4) }
|
||||
func BenchmarkMemmoveUnalignedSrc5(b *testing.B) { bmMemmoveUnalignedSrc(b, 5) }
|
||||
func BenchmarkMemmoveUnalignedSrc6(b *testing.B) { bmMemmoveUnalignedSrc(b, 6) }
|
||||
func BenchmarkMemmoveUnalignedSrc7(b *testing.B) { bmMemmoveUnalignedSrc(b, 7) }
|
||||
func BenchmarkMemmoveUnalignedSrc8(b *testing.B) { bmMemmoveUnalignedSrc(b, 8) }
|
||||
func BenchmarkMemmoveUnalignedSrc9(b *testing.B) { bmMemmoveUnalignedSrc(b, 9) }
|
||||
func BenchmarkMemmoveUnalignedSrc10(b *testing.B) { bmMemmoveUnalignedSrc(b, 10) }
|
||||
func BenchmarkMemmoveUnalignedSrc11(b *testing.B) { bmMemmoveUnalignedSrc(b, 11) }
|
||||
func BenchmarkMemmoveUnalignedSrc12(b *testing.B) { bmMemmoveUnalignedSrc(b, 12) }
|
||||
func BenchmarkMemmoveUnalignedSrc13(b *testing.B) { bmMemmoveUnalignedSrc(b, 13) }
|
||||
func BenchmarkMemmoveUnalignedSrc14(b *testing.B) { bmMemmoveUnalignedSrc(b, 14) }
|
||||
func BenchmarkMemmoveUnalignedSrc15(b *testing.B) { bmMemmoveUnalignedSrc(b, 15) }
|
||||
func BenchmarkMemmoveUnalignedSrc16(b *testing.B) { bmMemmoveUnalignedSrc(b, 16) }
|
||||
func BenchmarkMemmoveUnalignedSrc32(b *testing.B) { bmMemmoveUnalignedSrc(b, 32) }
|
||||
func BenchmarkMemmoveUnalignedSrc64(b *testing.B) { bmMemmoveUnalignedSrc(b, 64) }
|
||||
func BenchmarkMemmoveUnalignedSrc128(b *testing.B) { bmMemmoveUnalignedSrc(b, 128) }
|
||||
func BenchmarkMemmoveUnalignedSrc256(b *testing.B) { bmMemmoveUnalignedSrc(b, 256) }
|
||||
func BenchmarkMemmoveUnalignedSrc512(b *testing.B) { bmMemmoveUnalignedSrc(b, 512) }
|
||||
func BenchmarkMemmoveUnalignedSrc1024(b *testing.B) { bmMemmoveUnalignedSrc(b, 1024) }
|
||||
func BenchmarkMemmoveUnalignedSrc2048(b *testing.B) { bmMemmoveUnalignedSrc(b, 2048) }
|
||||
func BenchmarkMemmoveUnalignedSrc4096(b *testing.B) { bmMemmoveUnalignedSrc(b, 4096) }
|
||||
|
||||
func TestMemclr(t *testing.T) {
|
||||
size := 512
|
||||
|
|
|
|||
Loading…
Reference in New Issue