diff --git a/src/runtime/memclr_riscv64.s b/src/runtime/memclr_riscv64.s index d12b545b1e..1c1e6ab54d 100644 --- a/src/runtime/memclr_riscv64.s +++ b/src/runtime/memclr_riscv64.s @@ -16,10 +16,11 @@ TEXT runtime·memclrNoHeapPointers(SB),NOSPLIT,$0-16 BLT X11, X9, check4 // Check alignment - AND $3, X10, X5 + AND $7, X10, X5 BEQZ X5, aligned // Zero one byte at a time until we reach 8 byte alignment. + SUB X5, X9, X5 SUB X5, X11, X11 align: ADD $-1, X5 @@ -28,7 +29,7 @@ align: BNEZ X5, align aligned: - MOV $8, X9 + // X9 already contains $8 BLT X11, X9, check4 MOV $16, X9 BLT X11, X9, zero8 diff --git a/src/runtime/memmove_test.go b/src/runtime/memmove_test.go index 73895becd8..f0c9a82bb6 100644 --- a/src/runtime/memmove_test.go +++ b/src/runtime/memmove_test.go @@ -400,6 +400,32 @@ func BenchmarkMemclr(b *testing.B) { } } +func BenchmarkMemclrUnaligned(b *testing.B) { + for _, off := range []int{0, 1, 4, 7} { + for _, n := range []int{5, 16, 64, 256, 4096, 65536} { + x := make([]byte, n+off) + b.Run(fmt.Sprint(off, n), func(b *testing.B) { + b.SetBytes(int64(n)) + for i := 0; i < b.N; i++ { + MemclrBytes(x[off:]) + } + }) + } + } + + for _, off := range []int{0, 1, 4, 7} { + for _, m := range []int{1, 4, 8, 16, 64} { + x := make([]byte, (m<<20)+off) + b.Run(fmt.Sprint(off, m, "M"), func(b *testing.B) { + b.SetBytes(int64(m << 20)) + for i := 0; i < b.N; i++ { + MemclrBytes(x[off:]) + } + }) + } + } +} + func BenchmarkGoMemclr(b *testing.B) { benchmarkSizes(b, []int{5, 16, 64, 256}, func(b *testing.B, n int) { x := make([]byte, n)