hash/adler32: optimize.

The bulk of the gains come from hoisting the modulo ops outside of the inner loop. Reducing the digest type from 8 bytes to 4 bytes gains another 1% on the hash/adler32 micro-benchmark. Benchmarks for $GOOS,$GOARCH = linux,amd64 below. hash/adler32 benchmark: benchmark old ns/op new ns/op delta BenchmarkAdler32KB 1660 1364 -17.83% image/png benchmark: benchmark old ns/op new ns/op delta BenchmarkDecodeGray 2466909 2425539 -1.68% BenchmarkDecodeNRGBAGradient 9884500 9751705 -1.34% BenchmarkDecodeNRGBAOpaque 8511615 8379800 -1.55% BenchmarkDecodePaletted 1366683 1330677 -2.63% BenchmarkDecodeRGB 6987496 6884974 -1.47% BenchmarkEncodePaletted 6292408 6040052 -4.01% BenchmarkEncodeRGBOpaque 19780680 19178440 -3.04% BenchmarkEncodeRGBA 80738600 79076800 -2.06% Wall time for Denis Cheremisov's PNG-decoding program given in https://groups.google.com/group/golang-nuts/browse_thread/thread/22aa8a05040fdd49 Before: 2.44s After: 2.26s Delta: -7% R=rsc CC=golang-dev https://golang.org/cl/6251044
2012-05-25 09:58:38 +10:00 · 2012-05-25 09:58:38 +10:00 · 60ffae25bc
parent 184209787c
commit 60ffae25bc
2 changed files with 76 additions and 55 deletions
--- a/src/pkg/hash/adler32/adler32.go
+++ b/src/pkg/hash/adler32/adler32.go
@ -3,7 +3,8 @@
 // license that can be found in the LICENSE file.

 // Package adler32 implements the Adler-32 checksum.
-// Defined in RFC 1950:
+//
+// It is defined in RFC 1950:
 //	Adler-32 is composed of two sums accumulated per byte: s1 is
 //	the sum of all bytes, s2 is the sum of all s1 values. Both sums
 //	are done modulo 65521. s1 is initialized to 1, s2 to zero.  The
@ -14,20 +15,22 @@ package adler32
 import "hash"

 const (
+	// mod is the largest prime that is less than 65536.
 	mod = 65521
+	// nmax is the largest n such that
+	// 255 * n * (n+1) / 2 + (n+1) * (mod-1) <= 2^32-1.
+	// It is mentioned in RFC 1950 (search for "5552").
+	nmax = 5552
 )

 // The size of an Adler-32 checksum in bytes.
 const Size = 4

 // digest represents the partial evaluation of a checksum.
-type digest struct {
-	// invariant: (a < mod && b < mod) || a <= b
-	// invariant: a + b + 255 <= 0xffffffff
-	a, b uint32
-}
+// The low 16 bits are s1, the high 16 bits are s2.
+type digest uint32

-func (d *digest) Reset() { d.a, d.b = 1, 0 }
+func (d *digest) Reset() { *d = 1 }

 // New returns a new hash.Hash32 computing the Adler-32 checksum.
 func New() hash.Hash32 {
@ -40,43 +43,36 @@ func (d *digest) Size() int { return Size }

 func (d *digest) BlockSize() int { return 1 }

-// Add p to the running checksum a, b.
-func update(a, b uint32, p []byte) (aa, bb uint32) {
-	for _, pi := range p {
-		a += uint32(pi)
-		b += a
-		// invariant: a <= b
-		if b > (0xffffffff-255)/2 {
-			a %= mod
-			b %= mod
-			// invariant: a < mod && b < mod
-		} else {
-			// invariant: a + b + 255 <= 2 * b + 255 <= 0xffffffff
+// Add p to the running checksum d.
+func update(d digest, p []byte) digest {
+	s1, s2 := uint32(d&0xffff), uint32(d>>16)
+	for len(p) > 0 {
+		var q []byte
+		if len(p) > nmax {
+			p, q = p[:nmax], p[nmax:]
 		}
+		for _, x := range p {
+			s1 += uint32(x)
+			s2 += s1
+		}
+		s1 %= mod
+		s2 %= mod
+		p = q
 	}
-	return a, b
-}
-
-// Return the 32-bit checksum corresponding to a, b.
-func finish(a, b uint32) uint32 {
-	if b >= mod {
-		a %= mod
-		b %= mod
-	}
-	return b<<16 | a
+	return digest(s2<<16 | s1)
 }

 func (d *digest) Write(p []byte) (nn int, err error) {
-	d.a, d.b = update(d.a, d.b, p)
+	*d = update(*d, p)
 	return len(p), nil
 }

-func (d *digest) Sum32() uint32 { return finish(d.a, d.b) }
+func (d *digest) Sum32() uint32 { return uint32(*d) }

 func (d *digest) Sum(in []byte) []byte {
-	s := d.Sum32()
+	s := uint32(*d)
 	return append(in, byte(s>>24), byte(s>>16), byte(s>>8), byte(s))
 }

 // Checksum returns the Adler-32 checksum of data.
-func Checksum(data []byte) uint32 { return finish(update(1, 0, data)) }
+func Checksum(data []byte) uint32 { return uint32(update(1, data)) }
--- a/src/pkg/hash/adler32/adler32_test.go
+++ b/src/pkg/hash/adler32/adler32_test.go
@ -5,25 +5,23 @@
 package adler32

 import (
-	"io"
+	"strings"
 	"testing"
 )

-type _Adler32Test struct {
+var golden = []struct {
 	out uint32
 	in  string
-}
-
-var golden = []_Adler32Test{
-	{0x1, ""},
-	{0x620062, "a"},
-	{0x12600c4, "ab"},
-	{0x24d0127, "abc"},
-	{0x3d8018b, "abcd"},
-	{0x5c801f0, "abcde"},
-	{0x81e0256, "abcdef"},
-	{0xadb02bd, "abcdefg"},
-	{0xe000325, "abcdefgh"},
+}{
+	{0x00000001, ""},
+	{0x00620062, "a"},
+	{0x012600c4, "ab"},
+	{0x024d0127, "abc"},
+	{0x03d8018b, "abcd"},
+	{0x05c801f0, "abcde"},
+	{0x081e0256, "abcdef"},
+	{0x0adb02bd, "abcdefg"},
+	{0x0e000325, "abcdefgh"},
 	{0x118e038e, "abcdefghi"},
 	{0x158603f8, "abcdefghij"},
 	{0x3f090f02, "Discard medicine more than two years old."},
@ -47,17 +45,44 @@ var golden = []_Adler32Test{
 	{0x91dd304f, "The fugacity of a constituent in a mixture of gases at a given temperature is proportional to its mole fraction.  Lewis-Randall Rule"},
 	{0x2e5d1316, "How can you write a big system without C++?  -Paul Glick"},
 	{0xd0201df6, "'Invariant assertions' is the most elegant programming technique!  -Tom Szymanski"},
+	{0x211297c8, strings.Repeat("\xff", 5548) + "8"},
+	{0xbaa198c8, strings.Repeat("\xff", 5549) + "9"},
+	{0x553499be, strings.Repeat("\xff", 5550) + "0"},
+	{0xf0c19abe, strings.Repeat("\xff", 5551) + "1"},
+	{0x8d5c9bbe, strings.Repeat("\xff", 5552) + "2"},
+	{0x2af69cbe, strings.Repeat("\xff", 5553) + "3"},
+	{0xc9809dbe, strings.Repeat("\xff", 5554) + "4"},
+	{0x69189ebe, strings.Repeat("\xff", 5555) + "5"},
+	{0x86af0001, strings.Repeat("\x00", 1e5)},
+	{0x79660b4d, strings.Repeat("a", 1e5)},
+	{0x110588ee, strings.Repeat("ABCDEFGHIJKLMNOPQRSTUVWXYZ", 1e4)},
+}
+
+// checksum is a slow but simple implementation of the Adler-32 checksum.
+// It is a straight port of the sample code in RFC 1950 section 9.
+func checksum(p []byte) uint32 {
+	s1, s2 := uint32(1), uint32(0)
+	for _, x := range p {
+		s1 = (s1 + uint32(x)) % mod
+		s2 = (s2 + s1) % mod
+	}
+	return s2<<16 | s1
 }

 func TestGolden(t *testing.T) {
-	for i := 0; i < len(golden); i++ {
-		g := golden[i]
-		c := New()
-		io.WriteString(c, g.in)
-		s := c.Sum32()
-		if s != g.out {
-			t.Errorf("adler32(%s) = 0x%x want 0x%x", g.in, s, g.out)
-			t.FailNow()
+	for _, g := range golden {
+		in := g.in
+		if len(in) > 220 {
+			in = in[:100] + "..." + in[len(in)-100:]
+		}
+		p := []byte(g.in)
+		if got := checksum(p); got != g.out {
+			t.Errorf("simple implementation: checksum(%q) = 0x%x want 0x%x", in, got, g.out)
+			continue
+		}
+		if got := Checksum(p); got != g.out {
+			t.Errorf("optimized implementation: Checksum(%q) = 0x%x want 0x%x", in, got, g.out)
+			continue
 		}
 	}
 }