crypto/internal/fips140/edwards25519/field: speed up Element.Bytes

Write bytes in 64-bit chunks made from adjacent limbs.

goos: linux
goarch: amd64
pkg: crypto/internal/fips140/edwards25519/field
cpu: Intel(R) Core(TM) i5-8350U CPU @ 1.70GHz
        │   HEAD~1    │                HEAD                 │
        │   sec/op    │   sec/op     vs base                │
Bytes-8   76.14n ± 3%   13.61n ± 3%  -82.13% (p=0.000 n=10)

        │   HEAD~1   │              HEAD              │
        │    B/op    │    B/op     vs base            │
Bytes-8   0.000 ± 0%   0.000 ± 0%  ~ (p=1.000 n=10) ¹
¹ all samples are equal

        │   HEAD~1   │              HEAD              │
        │ allocs/op  │ allocs/op   vs base            │
Bytes-8   0.000 ± 0%   0.000 ± 0%  ~ (p=1.000 n=10) ¹
¹ all samples are equal
This commit is contained in:
Alexander Yastrebov 2025-02-07 12:10:00 +01:00
parent e7d8573fbe
commit d0e1583a4f
1 changed files with 16 additions and 12 deletions

View File

@ -233,18 +233,22 @@ func (v *Element) bytes(out *[32]byte) []byte {
t := *v
t.reduce()
var buf [8]byte
for i, l := range [5]uint64{t.l0, t.l1, t.l2, t.l3, t.l4} {
bitsOffset := i * 51
byteorder.LEPutUint64(buf[:], l<<uint(bitsOffset%8))
for i, bb := range buf {
off := bitsOffset/8 + i
if off >= len(out) {
break
}
out[off] |= bb
}
}
// Pack five 51-bit limbs into four 64-bit words:
//
// 255 204 153 102 51 0
// ├──l4──┼──l3──┼──l2──┼──l1──┼──l0──┤
// ├───u3───┼───u2───┼───u1───┼───u0───┤
// 256 192 128 64 0
u0 := t.l1<<51 | t.l0
u1 := t.l2<<(102-64) | t.l1>>(64-51)
u2 := t.l3<<(153-128) | t.l2>>(128-102)
u3 := t.l4<<(204-192) | t.l3>>(192-153)
byteorder.LEPutUint64(out[0*8:], u0)
byteorder.LEPutUint64(out[1*8:], u1)
byteorder.LEPutUint64(out[2*8:], u2)
byteorder.LEPutUint64(out[3*8:], u3)
return out[:]
}