diff --git a/src/bytes/bytes_test.go b/src/bytes/bytes_test.go
index 05c0090b61..f0733edd3f 100644
--- a/src/bytes/bytes_test.go
+++ b/src/bytes/bytes_test.go
@@ -652,6 +652,38 @@ func bmEqual(equal func([]byte, []byte) bool) func(b *testing.B, n int) {
 	}
 }
 
+func BenchmarkEqualBothUnaligned(b *testing.B) {
+	sizes := []int{64, 4 << 10}
+	if !isRaceBuilder {
+		sizes = append(sizes, []int{4 << 20, 64 << 20}...)
+	}
+	maxSize := 2 * (sizes[len(sizes)-1] + 8)
+	if len(bmbuf) < maxSize {
+		bmbuf = make([]byte, maxSize)
+	}
+
+	for _, n := range sizes {
+		for _, off := range []int{0, 1, 4, 7} {
+			buf1 := bmbuf[off : off+n]
+			buf2Start := (len(bmbuf) / 2) + off
+			buf2 := bmbuf[buf2Start : buf2Start+n]
+			buf1[n-1] = 'x'
+			buf2[n-1] = 'x'
+			b.Run(fmt.Sprint(n, off), func(b *testing.B) {
+				b.SetBytes(int64(n))
+				for i := 0; i < b.N; i++ {
+					eq := Equal(buf1, buf2)
+					if !eq {
+						b.Fatal("bad equal")
+					}
+				}
+			})
+			buf1[n-1] = '\x00'
+			buf2[n-1] = '\x00'
+		}
+	}
+}
+
 func BenchmarkIndex(b *testing.B) {
 	benchBytes(b, indexSizes, func(b *testing.B, n int) {
 		buf := bmbuf[0:n]
diff --git a/src/internal/bytealg/equal_riscv64.s b/src/internal/bytealg/equal_riscv64.s
index 3834083ec9..503aac5751 100644
--- a/src/internal/bytealg/equal_riscv64.s
+++ b/src/internal/bytealg/equal_riscv64.s
@@ -37,6 +37,8 @@ TEXT memequal<>(SB),NOSPLIT|NOFRAME,$0
 	BEQZ	X9, loop32_check
 
 	// Check one byte at a time until we reach 8 byte alignment.
+	SUB	X9, X0, X9
+	ADD	$8, X9, X9
 	SUB	X9, X12, X12
 align:
 	ADD	$-1, X9