mirror of https://github.com/golang/go.git
crypto/rsa,crypto/internal/bigmod: improve verify/encrypt performance
Most libraries don't consider N secret, but it's arguably useful for
privacy applications. However, E should generally be fixed, and there is
a lot of performance to be gained by using variable-time exponentiation.
The threshold trick is from BoringSSL.
goos: linux
goarch: amd64
pkg: crypto/rsa
cpu: Intel(R) Core(TM) i5-7400 CPU @ 3.00GHz
│ old │ new │
│ sec/op │ sec/op vs base │
DecryptPKCS1v15/2048-4 1.398m ± 0% 1.396m ± 4% ~ (p=0.853 n=10)
DecryptPKCS1v15/3072-4 3.640m ± 0% 3.652m ± 1% ~ (p=0.063 n=10)
DecryptPKCS1v15/4096-4 7.756m ± 0% 7.764m ± 0% ~ (p=0.853 n=10)
EncryptPKCS1v15/2048-4 175.50µ ± 0% 39.37µ ± 0% -77.57% (p=0.000 n=10)
DecryptOAEP/2048-4 1.375m ± 0% 1.371m ± 1% ~ (p=0.089 n=10)
EncryptOAEP/2048-4 177.64µ ± 0% 41.17µ ± 1% -76.82% (p=0.000 n=10)
SignPKCS1v15/2048-4 1.419m ± 0% 1.393m ± 1% -1.84% (p=0.000 n=10)
VerifyPKCS1v15/2048-4 173.70µ ± 1% 38.28µ ± 2% -77.96% (p=0.000 n=10)
SignPSS/2048-4 1.437m ± 1% 1.413m ± 0% -1.64% (p=0.000 n=10)
VerifyPSS/2048-4 176.83µ ± 1% 43.08µ ± 5% -75.64% (p=0.000 n=10)
This finally makes everything in crypto/rsa faster than it was in Go 1.19.
goos: linux
goarch: amd64
pkg: crypto/rsa
cpu: Intel(R) Core(TM) i5-7400 CPU @ 3.00GHz
│ go1.19.txt │ go1.20.txt │ go1.21.txt │ new.txt │
│ sec/op │ sec/op vs base │ sec/op vs base │ sec/op vs base │
DecryptPKCS1v15/2048-4 1.458m ± 0% 1.597m ± 1% +9.50% (p=0.000 n=10) 1.395m ± 1% -4.30% (p=0.000 n=10) 1.396m ± 4% -4.25% (p=0.002 n=10)
DecryptPKCS1v15/3072-4 4.023m ± 1% 5.332m ± 1% +32.53% (p=0.000 n=10) 3.649m ± 1% -9.30% (p=0.000 n=10) 3.652m ± 1% -9.23% (p=0.000 n=10)
DecryptPKCS1v15/4096-4 8.710m ± 1% 11.937m ± 1% +37.05% (p=0.000 n=10) 7.564m ± 1% -13.16% (p=0.000 n=10) 7.764m ± 0% -10.86% (p=0.000 n=10)
EncryptPKCS1v15/2048-4 51.79µ ± 0% 267.68µ ± 0% +416.90% (p=0.000 n=10) 176.42µ ± 0% +240.67% (p=0.000 n=10) 39.37µ ± 0% -23.98% (p=0.000 n=10)
DecryptOAEP/2048-4 1.461m ± 0% 1.613m ± 1% +10.37% (p=0.000 n=10) 1.415m ± 0% -3.13% (p=0.000 n=10) 1.371m ± 1% -6.18% (p=0.000 n=10)
EncryptOAEP/2048-4 54.24µ ± 0% 269.19µ ± 0% +396.28% (p=0.000 n=10) 177.31µ ± 0% +226.89% (p=0.000 n=10) 41.17µ ± 1% -24.10% (p=0.000 n=10)
SignPKCS1v15/2048-4 1.510m ± 0% 1.705m ± 0% +12.93% (p=0.000 n=10) 1.423m ± 1% -5.78% (p=0.000 n=10) 1.393m ± 1% -7.76% (p=0.000 n=10)
VerifyPKCS1v15/2048-4 50.87µ ± 0% 266.41µ ± 1% +423.71% (p=0.000 n=10) 174.38µ ± 0% +242.79% (p=0.000 n=10) 38.28µ ± 2% -24.75% (p=0.000 n=10)
SignPSS/2048-4 1.513m ± 1% 1.709m ± 0% +12.97% (p=0.000 n=10) 1.461m ± 0% -3.42% (p=0.000 n=10) 1.413m ± 0% -6.58% (p=0.000 n=10)
VerifyPSS/2048-4 53.45µ ± 1% 268.56µ ± 0% +402.48% (p=0.000 n=10) 177.29µ ± 0% +231.72% (p=0.000 n=10) 43.08µ ± 5% -19.39% (p=0.000 n=10)
geomean 514.6µ 1.094m +112.65% 801.6µ +55.77% 442.1µ -14.08%
Fixes #63516
Change-Id: If40e596a2e4b3ab7a202ff34591cf9cffecfcc1b
Reviewed-on: https://go-review.googlesource.com/c/go/+/552935
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Reviewed-by: Than McIntosh <thanm@google.com>
Reviewed-by: Roland Shoemaker <roland@golang.org>
This commit is contained in:
parent
eb30ac3472
commit
b2dbfbfc23
|
|
@ -318,14 +318,48 @@ type Modulus struct {
|
|||
// rr returns R*R with R = 2^(_W * n) and n = len(m.nat.limbs).
|
||||
func rr(m *Modulus) *Nat {
|
||||
rr := NewNat().ExpandFor(m)
|
||||
// R*R is 2^(2 * _W * n). We can safely get 2^(_W * (n - 1)) by setting the
|
||||
// most significant limb to 1. We then get to R*R by shifting left by _W
|
||||
// n + 1 times.
|
||||
n := len(rr.limbs)
|
||||
rr.limbs[n-1] = 1
|
||||
for i := n - 1; i < 2*n; i++ {
|
||||
rr.shiftIn(0, m) // x = x * 2^_W mod m
|
||||
n := uint(len(rr.limbs))
|
||||
mLen := uint(m.BitLen())
|
||||
logR := _W * n
|
||||
|
||||
// We start by computing R = 2^(_W * n) mod m. We can get pretty close, to
|
||||
// 2^⌊log₂m⌋, by setting the highest bit we can without having to reduce.
|
||||
rr.limbs[n-1] = 1 << ((mLen - 1) % _W)
|
||||
// Then we double until we reach 2^(_W * n).
|
||||
for i := mLen - 1; i < logR; i++ {
|
||||
rr.Add(rr, m)
|
||||
}
|
||||
|
||||
// Next we need to get from R to 2^(_W * n) R mod m (aka from one to R in
|
||||
// the Montgomery domain, meaning we can use Montgomery multiplication now).
|
||||
// We could do that by doubling _W * n times, or with a square-and-double
|
||||
// chain log2(_W * n) long. Turns out the fastest thing is to start out with
|
||||
// doublings, and switch to square-and-double once the exponent is large
|
||||
// enough to justify the cost of the multiplications.
|
||||
|
||||
// The threshold is selected experimentally as a linear function of n.
|
||||
threshold := n / 4
|
||||
|
||||
// We calculate how many of the most-significant bits of the exponent we can
|
||||
// compute before crossing the threshold, and we do it with doublings.
|
||||
i := bits.UintSize
|
||||
for logR>>i <= threshold {
|
||||
i--
|
||||
}
|
||||
for k := uint(0); k < logR>>i; k++ {
|
||||
rr.Add(rr, m)
|
||||
}
|
||||
|
||||
// Then we process the remaining bits of the exponent with a
|
||||
// square-and-double chain.
|
||||
for i > 0 {
|
||||
rr.montgomeryMul(rr, rr, m)
|
||||
i--
|
||||
if logR>>i&1 != 0 {
|
||||
rr.Add(rr, m)
|
||||
}
|
||||
}
|
||||
|
||||
return rr
|
||||
}
|
||||
|
||||
|
|
@ -745,26 +779,21 @@ func (out *Nat) Exp(x *Nat, e []byte, m *Modulus) *Nat {
|
|||
return out.montgomeryReduction(m)
|
||||
}
|
||||
|
||||
// ExpShort calculates out = x^e mod m.
|
||||
// ExpShortVarTime calculates out = x^e mod m.
|
||||
//
|
||||
// The output will be resized to the size of m and overwritten. x must already
|
||||
// be reduced modulo m. This leaks the exact bit size of the exponent.
|
||||
func (out *Nat) ExpShort(x *Nat, e uint, m *Modulus) *Nat {
|
||||
xR := NewNat().set(x).montgomeryRepresentation(m)
|
||||
|
||||
out.resetFor(m)
|
||||
out.limbs[0] = 1
|
||||
out.montgomeryRepresentation(m)
|
||||
|
||||
// be reduced modulo m. This leaks the exponent through timing side-channels.
|
||||
func (out *Nat) ExpShortVarTime(x *Nat, e uint, m *Modulus) *Nat {
|
||||
// For short exponents, precomputing a table and using a window like in Exp
|
||||
// doesn't pay off. Instead, we do a simple constant-time conditional
|
||||
// square-and-multiply chain, skipping the initial run of zeroes.
|
||||
tmp := NewNat().ExpandFor(m)
|
||||
for i := bits.UintSize - bitLen(e); i < bits.UintSize; i++ {
|
||||
// doesn't pay off. Instead, we do a simple conditional square-and-multiply
|
||||
// chain, skipping the initial run of zeroes.
|
||||
xR := NewNat().set(x).montgomeryRepresentation(m)
|
||||
out.set(xR)
|
||||
for i := bits.UintSize - bitLen(e) + 1; i < bits.UintSize; i++ {
|
||||
out.montgomeryMul(out, out, m)
|
||||
k := (e >> (bits.UintSize - i - 1)) & 1
|
||||
tmp.montgomeryMul(out, xR, m)
|
||||
out.assign(ctEq(k, 1), tmp)
|
||||
if k := (e >> (bits.UintSize - i - 1)) & 1; k != 0 {
|
||||
out.montgomeryMul(out, xR, m)
|
||||
}
|
||||
}
|
||||
return out.montgomeryReduction(m)
|
||||
}
|
||||
|
|
|
|||
|
|
@ -303,7 +303,7 @@ func TestExpShort(t *testing.T) {
|
|||
m := modulusFromBytes([]byte{13})
|
||||
x := &Nat{[]uint{3}}
|
||||
out := &Nat{[]uint{0}}
|
||||
out.ExpShort(x, 12, m)
|
||||
out.ExpShortVarTime(x, 12, m)
|
||||
expected := &Nat{[]uint{1}}
|
||||
if out.Equal(expected) != 1 {
|
||||
t.Errorf("%+v != %+v", out, expected)
|
||||
|
|
|
|||
|
|
@ -43,6 +43,10 @@ import (
|
|||
var bigOne = big.NewInt(1)
|
||||
|
||||
// A PublicKey represents the public part of an RSA key.
|
||||
//
|
||||
// The value of the modulus N is considered secret by this library and protected
|
||||
// from leaking through timing side-channels. However, neither the value of the
|
||||
// exponent E nor the precise bit size of N are similarly protected.
|
||||
type PublicKey struct {
|
||||
N *big.Int // modulus
|
||||
E int // public exponent
|
||||
|
|
@ -478,10 +482,6 @@ var ErrMessageTooLong = errors.New("crypto/rsa: message too long for RSA key siz
|
|||
func encrypt(pub *PublicKey, plaintext []byte) ([]byte, error) {
|
||||
boring.Unreachable()
|
||||
|
||||
// Most of the CPU time for encryption and verification is spent in this
|
||||
// NewModulusFromBig call, because PublicKey doesn't have a Precomputed
|
||||
// field. If performance becomes an issue, consider placing a private
|
||||
// sync.Once on PublicKey to compute this.
|
||||
N, err := bigmod.NewModulusFromBig(pub.N)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
|
|
@ -492,7 +492,7 @@ func encrypt(pub *PublicKey, plaintext []byte) ([]byte, error) {
|
|||
}
|
||||
e := uint(pub.E)
|
||||
|
||||
return bigmod.NewNat().ExpShort(m, e, N).Bytes(N), nil
|
||||
return bigmod.NewNat().ExpShortVarTime(m, e, N).Bytes(N), nil
|
||||
}
|
||||
|
||||
// EncryptOAEP encrypts the given message with RSA-OAEP.
|
||||
|
|
@ -686,7 +686,7 @@ func decrypt(priv *PrivateKey, ciphertext []byte, check bool) ([]byte, error) {
|
|||
}
|
||||
|
||||
if check {
|
||||
c1 := bigmod.NewNat().ExpShort(m, uint(priv.E), N)
|
||||
c1 := bigmod.NewNat().ExpShortVarTime(m, uint(priv.E), N)
|
||||
if c1.Equal(c) != 1 {
|
||||
return nil, ErrDecryption
|
||||
}
|
||||
|
|
|
|||
Loading…
Reference in New Issue