mirror of https://github.com/golang/go.git
hash/crc32: cleanup code and improve tests
Major reorganization of the crc32 code: - The arch-specific files now implement a well-defined interface (documented in crc32.go). They no longer have the responsibility of initializing and falling back to a non-accelerated implementation; instead, that happens in the higher level code. - The non-accelerated algorithms are moved to a separate file with no dependencies on other code. - The "cutoff" optimization for slicing-by-8 is moved inside the algorithm itself (as opposed to every callsite). Tests are significantly improved: - direct tests for the non-accelerated algorithms. - "cross-check" tests for arch-specific implementations (all archs). - tests for misaligned buffers for both IEEE and Castagnoli. Fixes #16909. Change-Id: I9b6dd83b7a57cd615eae901c0a6d61c6b8091c74 Reviewed-on: https://go-review.googlesource.com/27935 Reviewed-by: Keith Randall <khr@golang.org>
This commit is contained in:
parent
2a2cab2911
commit
bdde10137b
|
|
@ -20,9 +20,6 @@ import (
|
|||
// The size of a CRC-32 checksum in bytes.
|
||||
const Size = 4
|
||||
|
||||
// Use "slice by 8" when payload >= this value.
|
||||
const sliceBy8Cutoff = 16
|
||||
|
||||
// Predefined polynomials.
|
||||
const (
|
||||
// IEEE is by far and away the most common CRC-32 polynomial.
|
||||
|
|
@ -43,80 +40,96 @@ const (
|
|||
// Table is a 256-word table representing the polynomial for efficient processing.
|
||||
type Table [256]uint32
|
||||
|
||||
// This file makes use of functions implemented in architecture-specific files.
|
||||
// The interface that they implement is as follows:
|
||||
//
|
||||
// // archAvailableIEEE reports whether an architecture-specific CRC32-IEEE
|
||||
// // algorithm is available.
|
||||
// archAvailableIEEE() bool
|
||||
//
|
||||
// // archInitIEEE initializes the architecture-specific CRC3-IEEE algorithm.
|
||||
// // It can only be called if archAvailableIEEE() returns true.
|
||||
// archInitIEEE()
|
||||
//
|
||||
// // archUpdateIEEE updates the given CRC32-IEEE. It can only be called if
|
||||
// // archInitIEEE() was previously called.
|
||||
// archUpdateIEEE(crc uint32, p []byte) uint32
|
||||
//
|
||||
// // archAvailableCastagnoli reports whether an architecture-specific
|
||||
// // CRC32-C algorithm is available.
|
||||
// archAvailableCastagnoli() bool
|
||||
//
|
||||
// // archInitCastagnoli initializes the architecture-specific CRC32-C
|
||||
// // algorithm. It can only be called if archAvailableCastagnoli() returns
|
||||
// // true.
|
||||
// archInitCastagnoli()
|
||||
//
|
||||
// // archUpdateCastagnoli updates the given CRC32-C. It can only be called
|
||||
// // if archInitCastagnoli() was previously called.
|
||||
// archUpdateCastagnoli(crc uint32, p []byte) uint32
|
||||
|
||||
// castagnoliTable points to a lazily initialized Table for the Castagnoli
|
||||
// polynomial. MakeTable will always return this value when asked to make a
|
||||
// Castagnoli table so we can compare against it to find when the caller is
|
||||
// using this polynomial.
|
||||
var castagnoliTable *Table
|
||||
var castagnoliTable8 *slicing8Table
|
||||
var castagnoliArchImpl bool
|
||||
var updateCastagnoli func(crc uint32, p []byte) uint32
|
||||
var castagnoliOnce sync.Once
|
||||
|
||||
func castagnoliInit() {
|
||||
// Call the arch-specific init function and let it decide if we will need
|
||||
// the tables for the generic implementation.
|
||||
needGenericTables := castagnoliInitArch()
|
||||
castagnoliTable = simpleMakeTable(Castagnoli)
|
||||
castagnoliArchImpl = archAvailableCastagnoli()
|
||||
|
||||
if needGenericTables {
|
||||
castagnoliTable8 = makeTable8(Castagnoli)
|
||||
if castagnoliArchImpl {
|
||||
archInitCastagnoli()
|
||||
updateCastagnoli = archUpdateCastagnoli
|
||||
} else {
|
||||
// Initialize the slicing-by-8 table.
|
||||
castagnoliTable8 = slicingMakeTable(Castagnoli)
|
||||
updateCastagnoli = func(crc uint32, p []byte) uint32 {
|
||||
return slicingUpdate(crc, castagnoliTable8, p)
|
||||
}
|
||||
}
|
||||
|
||||
// Even if we don't need the contents of this table, we use it as a handle
|
||||
// returned by MakeTable. We should find a way to clean this up (see #16909).
|
||||
castagnoliTable = makeTable(Castagnoli)
|
||||
}
|
||||
|
||||
// IEEETable is the table for the IEEE polynomial.
|
||||
var IEEETable = makeTable(IEEE)
|
||||
|
||||
// slicing8Table is array of 8 Tables
|
||||
type slicing8Table [8]Table
|
||||
var IEEETable = simpleMakeTable(IEEE)
|
||||
|
||||
// ieeeTable8 is the slicing8Table for IEEE
|
||||
var ieeeTable8 *slicing8Table
|
||||
var ieeeTable8Once sync.Once
|
||||
var ieeeArchImpl bool
|
||||
var updateIEEE func(crc uint32, p []byte) uint32
|
||||
var ieeeOnce sync.Once
|
||||
|
||||
func ieeeInit() {
|
||||
ieeeArchImpl = archAvailableIEEE()
|
||||
|
||||
if ieeeArchImpl {
|
||||
archInitIEEE()
|
||||
updateIEEE = archUpdateIEEE
|
||||
} else {
|
||||
// Initialize the slicing-by-8 table.
|
||||
ieeeTable8 = slicingMakeTable(IEEE)
|
||||
updateIEEE = func(crc uint32, p []byte) uint32 {
|
||||
return slicingUpdate(crc, ieeeTable8, p)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// MakeTable returns a Table constructed from the specified polynomial.
|
||||
// The contents of this Table must not be modified.
|
||||
func MakeTable(poly uint32) *Table {
|
||||
switch poly {
|
||||
case IEEE:
|
||||
ieeeOnce.Do(ieeeInit)
|
||||
return IEEETable
|
||||
case Castagnoli:
|
||||
castagnoliOnce.Do(castagnoliInit)
|
||||
return castagnoliTable
|
||||
}
|
||||
return makeTable(poly)
|
||||
}
|
||||
|
||||
// makeTable returns the Table constructed from the specified polynomial.
|
||||
func makeTable(poly uint32) *Table {
|
||||
t := new(Table)
|
||||
for i := 0; i < 256; i++ {
|
||||
crc := uint32(i)
|
||||
for j := 0; j < 8; j++ {
|
||||
if crc&1 == 1 {
|
||||
crc = (crc >> 1) ^ poly
|
||||
} else {
|
||||
crc >>= 1
|
||||
}
|
||||
}
|
||||
t[i] = crc
|
||||
}
|
||||
return t
|
||||
}
|
||||
|
||||
// makeTable8 returns slicing8Table constructed from the specified polynomial.
|
||||
func makeTable8(poly uint32) *slicing8Table {
|
||||
t := new(slicing8Table)
|
||||
t[0] = *makeTable(poly)
|
||||
for i := 0; i < 256; i++ {
|
||||
crc := t[0][i]
|
||||
for j := 1; j < 8; j++ {
|
||||
crc = t[0][crc&0xFF] ^ (crc >> 8)
|
||||
t[j][i] = crc
|
||||
}
|
||||
}
|
||||
return t
|
||||
return simpleMakeTable(poly)
|
||||
}
|
||||
|
||||
// digest represents the partial evaluation of a checksum.
|
||||
|
|
@ -128,7 +141,12 @@ type digest struct {
|
|||
// New creates a new hash.Hash32 computing the CRC-32 checksum
|
||||
// using the polynomial represented by the Table.
|
||||
// Its Sum method will lay the value out in big-endian byte order.
|
||||
func New(tab *Table) hash.Hash32 { return &digest{0, tab} }
|
||||
func New(tab *Table) hash.Hash32 {
|
||||
if tab == IEEETable {
|
||||
ieeeOnce.Do(ieeeInit)
|
||||
}
|
||||
return &digest{0, tab}
|
||||
}
|
||||
|
||||
// NewIEEE creates a new hash.Hash32 computing the CRC-32 checksum
|
||||
// using the IEEE polynomial.
|
||||
|
|
@ -141,44 +159,32 @@ func (d *digest) BlockSize() int { return 1 }
|
|||
|
||||
func (d *digest) Reset() { d.crc = 0 }
|
||||
|
||||
func update(crc uint32, tab *Table, p []byte) uint32 {
|
||||
crc = ^crc
|
||||
for _, v := range p {
|
||||
crc = tab[byte(crc)^v] ^ (crc >> 8)
|
||||
}
|
||||
return ^crc
|
||||
}
|
||||
|
||||
// updateSlicingBy8 updates CRC using Slicing-by-8
|
||||
func updateSlicingBy8(crc uint32, tab *slicing8Table, p []byte) uint32 {
|
||||
crc = ^crc
|
||||
for len(p) > 8 {
|
||||
crc ^= uint32(p[0]) | uint32(p[1])<<8 | uint32(p[2])<<16 | uint32(p[3])<<24
|
||||
crc = tab[0][p[7]] ^ tab[1][p[6]] ^ tab[2][p[5]] ^ tab[3][p[4]] ^
|
||||
tab[4][crc>>24] ^ tab[5][(crc>>16)&0xFF] ^
|
||||
tab[6][(crc>>8)&0xFF] ^ tab[7][crc&0xFF]
|
||||
p = p[8:]
|
||||
}
|
||||
crc = ^crc
|
||||
if len(p) == 0 {
|
||||
return crc
|
||||
}
|
||||
return update(crc, &tab[0], p)
|
||||
}
|
||||
|
||||
// Update returns the result of adding the bytes in p to the crc.
|
||||
func Update(crc uint32, tab *Table, p []byte) uint32 {
|
||||
switch tab {
|
||||
case castagnoliTable:
|
||||
return updateCastagnoli(crc, p)
|
||||
case IEEETable:
|
||||
// Unfortunately, because IEEETable is exported, IEEE may be used without a
|
||||
// call to MakeTable. We have to make sure it gets initialized in that case.
|
||||
ieeeOnce.Do(ieeeInit)
|
||||
return updateIEEE(crc, p)
|
||||
default:
|
||||
return simpleUpdate(crc, tab, p)
|
||||
}
|
||||
return update(crc, tab, p)
|
||||
}
|
||||
|
||||
func (d *digest) Write(p []byte) (n int, err error) {
|
||||
d.crc = Update(d.crc, d.tab, p)
|
||||
switch d.tab {
|
||||
case castagnoliTable:
|
||||
d.crc = updateCastagnoli(d.crc, p)
|
||||
case IEEETable:
|
||||
// We only create digest objects through New() which takes care of
|
||||
// initialization in this case.
|
||||
d.crc = updateIEEE(d.crc, p)
|
||||
default:
|
||||
d.crc = simpleUpdate(d.crc, d.tab, p)
|
||||
}
|
||||
return len(p), nil
|
||||
}
|
||||
|
||||
|
|
@ -195,4 +201,7 @@ func Checksum(data []byte, tab *Table) uint32 { return Update(0, tab, data) }
|
|||
|
||||
// ChecksumIEEE returns the CRC-32 checksum of data
|
||||
// using the IEEE polynomial.
|
||||
func ChecksumIEEE(data []byte) uint32 { return updateIEEE(0, data) }
|
||||
func ChecksumIEEE(data []byte) uint32 {
|
||||
ieeeOnce.Do(ieeeInit)
|
||||
return updateIEEE(0, data)
|
||||
}
|
||||
|
|
|
|||
|
|
@ -2,6 +2,10 @@
|
|||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
// AMD64-specific hardware-assisted CRC32 algorithms. See crc32.go for a
|
||||
// description of the interface that each architecture-specific file
|
||||
// implements.
|
||||
|
||||
package crc32
|
||||
|
||||
import "unsafe"
|
||||
|
|
@ -45,9 +49,13 @@ type sse42Table [4]Table
|
|||
var castagnoliSSE42TableK1 *sse42Table
|
||||
var castagnoliSSE42TableK2 *sse42Table
|
||||
|
||||
func castagnoliInitArch() (needGenericTables bool) {
|
||||
func archAvailableCastagnoli() bool {
|
||||
return sse42
|
||||
}
|
||||
|
||||
func archInitCastagnoli() {
|
||||
if !sse42 {
|
||||
return true
|
||||
panic("arch-specific Castagnoli not available")
|
||||
}
|
||||
castagnoliSSE42TableK1 = new(sse42Table)
|
||||
castagnoliSSE42TableK2 = new(sse42Table)
|
||||
|
|
@ -65,7 +73,6 @@ func castagnoliInitArch() (needGenericTables bool) {
|
|||
castagnoliSSE42TableK2[b][i] = castagnoliSSE42(val, tmp[:])
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
// castagnoliShift computes the CRC32-C of K1 or K2 zeroes (depending on the
|
||||
|
|
@ -78,13 +85,9 @@ func castagnoliShift(table *sse42Table, crc uint32) uint32 {
|
|||
table[0][crc&0xFF]
|
||||
}
|
||||
|
||||
func updateCastagnoli(crc uint32, p []byte) uint32 {
|
||||
func archUpdateCastagnoli(crc uint32, p []byte) uint32 {
|
||||
if !sse42 {
|
||||
// Use slicing-by-8 on larger inputs.
|
||||
if len(p) >= sliceBy8Cutoff {
|
||||
return updateSlicingBy8(crc, castagnoliTable8, p)
|
||||
}
|
||||
return update(crc, castagnoliTable, p)
|
||||
panic("not available")
|
||||
}
|
||||
|
||||
// This method is inspired from the algorithm in Intel's white paper:
|
||||
|
|
@ -193,24 +196,33 @@ func updateCastagnoli(crc uint32, p []byte) uint32 {
|
|||
return ^crc
|
||||
}
|
||||
|
||||
func updateIEEE(crc uint32, p []byte) uint32 {
|
||||
if useFastIEEE && len(p) >= 64 {
|
||||
func archAvailableIEEE() bool {
|
||||
return useFastIEEE
|
||||
}
|
||||
|
||||
var archIeeeTable8 *slicing8Table
|
||||
|
||||
func archInitIEEE() {
|
||||
if !useFastIEEE {
|
||||
panic("not available")
|
||||
}
|
||||
// We still use slicing-by-8 for small buffers.
|
||||
archIeeeTable8 = slicingMakeTable(IEEE)
|
||||
}
|
||||
|
||||
func archUpdateIEEE(crc uint32, p []byte) uint32 {
|
||||
if !useFastIEEE {
|
||||
panic("not available")
|
||||
}
|
||||
|
||||
if len(p) >= 64 {
|
||||
left := len(p) & 15
|
||||
do := len(p) - left
|
||||
crc = ^ieeeCLMUL(^crc, p[:do])
|
||||
if left > 0 {
|
||||
crc = update(crc, IEEETable, p[do:])
|
||||
}
|
||||
p = p[do:]
|
||||
}
|
||||
if len(p) == 0 {
|
||||
return crc
|
||||
}
|
||||
|
||||
// Use slicing-by-8 on larger inputs.
|
||||
if len(p) >= sliceBy8Cutoff {
|
||||
ieeeTable8Once.Do(func() {
|
||||
ieeeTable8 = makeTable8(IEEE)
|
||||
})
|
||||
return updateSlicingBy8(crc, ieeeTable8, p)
|
||||
}
|
||||
|
||||
return update(crc, IEEETable, p)
|
||||
return slicingUpdate(crc, archIeeeTable8, p)
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,44 +0,0 @@
|
|||
// Copyright 2009 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package crc32
|
||||
|
||||
import (
|
||||
"math/rand"
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestCastagnoliSSE42(t *testing.T) {
|
||||
if !sse42 {
|
||||
t.Skip("SSE42 not supported")
|
||||
}
|
||||
|
||||
// Init the SSE42 tables.
|
||||
castagnoliOnce.Do(castagnoliInit)
|
||||
|
||||
// Generate a table to use with the non-SSE version.
|
||||
slicingTable := makeTable8(Castagnoli)
|
||||
|
||||
// The optimized SSE4.2 implementation behaves differently for different
|
||||
// lengths (especially around multiples of K*3). Crosscheck against the
|
||||
// software implementation for various lengths.
|
||||
for _, base := range []int{castagnoliK1, castagnoliK2, castagnoliK1 + castagnoliK2} {
|
||||
for _, baseMult := range []int{2, 3, 5, 6, 9, 30} {
|
||||
for _, variation := range []int{0, 1, 2, 3, 4, 7, 10, 16, 32, 50, 128} {
|
||||
for _, varMult := range []int{-2, -1, +1, +2} {
|
||||
length := base*baseMult + variation*varMult
|
||||
p := make([]byte, length)
|
||||
_, _ = rand.Read(p)
|
||||
crcInit := uint32(rand.Int63())
|
||||
correct := updateSlicingBy8(crcInit, slicingTable, p)
|
||||
result := updateCastagnoli(crcInit, p)
|
||||
if result != correct {
|
||||
t.Errorf("SSE42 implementation = 0x%x want 0x%x (buffer length %d)",
|
||||
result, correct, len(p))
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -11,37 +11,31 @@ package crc32
|
|||
// support.
|
||||
func haveSSE42() bool
|
||||
|
||||
// castagnoliSSE42 is defined in crc32_amd64.s and uses the SSE4.2 CRC32
|
||||
// castagnoliSSE42 is defined in crc32_amd64p32.s and uses the SSE4.2 CRC32
|
||||
// instruction.
|
||||
//go:noescape
|
||||
func castagnoliSSE42(crc uint32, p []byte) uint32
|
||||
|
||||
var sse42 = haveSSE42()
|
||||
|
||||
func castagnoliInitArch() (needGenericTables bool) {
|
||||
// We only need the generic implementation tables if we don't have SSE4.2.
|
||||
return !sse42
|
||||
func archAvailableCastagnoli() bool {
|
||||
return sse42
|
||||
}
|
||||
|
||||
func updateCastagnoli(crc uint32, p []byte) uint32 {
|
||||
if sse42 {
|
||||
return castagnoliSSE42(crc, p)
|
||||
func archInitCastagnoli() {
|
||||
if !sse42 {
|
||||
panic("not available")
|
||||
}
|
||||
// Use slicing-by-8 on larger inputs.
|
||||
if len(p) >= sliceBy8Cutoff {
|
||||
return updateSlicingBy8(crc, castagnoliTable8, p)
|
||||
}
|
||||
return update(crc, castagnoliTable, p)
|
||||
// No initialization necessary.
|
||||
}
|
||||
|
||||
func updateIEEE(crc uint32, p []byte) uint32 {
|
||||
// Use slicing-by-8 on larger inputs.
|
||||
if len(p) >= sliceBy8Cutoff {
|
||||
ieeeTable8Once.Do(func() {
|
||||
ieeeTable8 = makeTable8(IEEE)
|
||||
})
|
||||
return updateSlicingBy8(crc, ieeeTable8, p)
|
||||
func archUpdateCastagnoli(crc uint32, p []byte) uint32 {
|
||||
if !sse42 {
|
||||
panic("not available")
|
||||
}
|
||||
|
||||
return update(crc, IEEETable, p)
|
||||
return castagnoliSSE42(crc, p)
|
||||
}
|
||||
|
||||
func archAvailableIEEE() bool { return false }
|
||||
func archInitIEEE() { panic("not available") }
|
||||
func archUpdateIEEE(crc uint32, p []byte) uint32 { panic("not available") }
|
||||
|
|
|
|||
|
|
@ -2,32 +2,88 @@
|
|||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
// +build !amd64,!amd64p32,!s390x
|
||||
// This file contains CRC32 algorithms that are not specific to any architecture
|
||||
// and don't use hardware acceleration.
|
||||
//
|
||||
// The simple (and slow) CRC32 implementation only uses a 256*4 bytes table.
|
||||
//
|
||||
// The slicing-by-8 algorithm is a faster implementation that uses a bigger
|
||||
// table (8*256*4 bytes).
|
||||
|
||||
package crc32
|
||||
|
||||
// This file contains the generic version of updateCastagnoli which does
|
||||
// slicing-by-8, or uses the fallback for very small sizes.
|
||||
|
||||
func castagnoliInitArch() (needGenericTables bool) {
|
||||
return true
|
||||
// simpleMakeTable allocates and constructs a Table for the specified
|
||||
// polynomial. The table is suitable for use with the simple algorithm
|
||||
// (simpleUpdate).
|
||||
func simpleMakeTable(poly uint32) *Table {
|
||||
t := new(Table)
|
||||
simplePopulateTable(poly, t)
|
||||
return t
|
||||
}
|
||||
|
||||
func updateCastagnoli(crc uint32, p []byte) uint32 {
|
||||
// Use slicing-by-8 on larger inputs.
|
||||
if len(p) >= sliceBy8Cutoff {
|
||||
return updateSlicingBy8(crc, castagnoliTable8, p)
|
||||
// simplePopulateTable constructs a Table for the specified polynomial, suitable
|
||||
// for use with simpleUpdate.
|
||||
func simplePopulateTable(poly uint32, t *Table) {
|
||||
for i := 0; i < 256; i++ {
|
||||
crc := uint32(i)
|
||||
for j := 0; j < 8; j++ {
|
||||
if crc&1 == 1 {
|
||||
crc = (crc >> 1) ^ poly
|
||||
} else {
|
||||
crc >>= 1
|
||||
}
|
||||
}
|
||||
t[i] = crc
|
||||
}
|
||||
return update(crc, castagnoliTable, p)
|
||||
}
|
||||
|
||||
func updateIEEE(crc uint32, p []byte) uint32 {
|
||||
// Use slicing-by-8 on larger inputs.
|
||||
if len(p) >= sliceBy8Cutoff {
|
||||
ieeeTable8Once.Do(func() {
|
||||
ieeeTable8 = makeTable8(IEEE)
|
||||
})
|
||||
return updateSlicingBy8(crc, ieeeTable8, p)
|
||||
// simpleUpdate uses the simple algorithm to update the CRC, given a table that
|
||||
// was previously computed using simpleMakeTable.
|
||||
func simpleUpdate(crc uint32, tab *Table, p []byte) uint32 {
|
||||
crc = ^crc
|
||||
for _, v := range p {
|
||||
crc = tab[byte(crc)^v] ^ (crc >> 8)
|
||||
}
|
||||
return update(crc, IEEETable, p)
|
||||
return ^crc
|
||||
}
|
||||
|
||||
// Use slicing-by-8 when payload >= this value.
|
||||
const slicing8Cutoff = 16
|
||||
|
||||
// slicing8Table is array of 8 Tables, used by the slicing-by-8 algorithm.
|
||||
type slicing8Table [8]Table
|
||||
|
||||
// slicingMakeTable constructs a slicing8Table for the specified polynomial. The
|
||||
// table is suitable for use with the slicing-by-8 algorithm (slicingUpdate).
|
||||
func slicingMakeTable(poly uint32) *slicing8Table {
|
||||
t := new(slicing8Table)
|
||||
simplePopulateTable(poly, &t[0])
|
||||
for i := 0; i < 256; i++ {
|
||||
crc := t[0][i]
|
||||
for j := 1; j < 8; j++ {
|
||||
crc = t[0][crc&0xFF] ^ (crc >> 8)
|
||||
t[j][i] = crc
|
||||
}
|
||||
}
|
||||
return t
|
||||
}
|
||||
|
||||
// slicingUpdate uses the slicing-by-8 algorithm to update the CRC, given a
|
||||
// table that was previously computed using slicingMakeTable.
|
||||
func slicingUpdate(crc uint32, tab *slicing8Table, p []byte) uint32 {
|
||||
if len(p) >= slicing8Cutoff {
|
||||
crc = ^crc
|
||||
for len(p) > 8 {
|
||||
crc ^= uint32(p[0]) | uint32(p[1])<<8 | uint32(p[2])<<16 | uint32(p[3])<<24
|
||||
crc = tab[0][p[7]] ^ tab[1][p[6]] ^ tab[2][p[5]] ^ tab[3][p[4]] ^
|
||||
tab[4][crc>>24] ^ tab[5][(crc>>16)&0xFF] ^
|
||||
tab[6][(crc>>8)&0xFF] ^ tab[7][crc&0xFF]
|
||||
p = p[8:]
|
||||
}
|
||||
crc = ^crc
|
||||
}
|
||||
if len(p) == 0 {
|
||||
return crc
|
||||
}
|
||||
return simpleUpdate(crc, &tab[0], p)
|
||||
}
|
||||
|
|
|
|||
|
|
@ -0,0 +1,15 @@
|
|||
// Copyright 2011 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
// +build !amd64,!amd64p32,!s390x
|
||||
|
||||
package crc32
|
||||
|
||||
func archAvailableIEEE() bool { return false }
|
||||
func archInitIEEE() { panic("not available") }
|
||||
func archUpdateIEEE(crc uint32, p []byte) uint32 { panic("not available") }
|
||||
|
||||
func archAvailableCastagnoli() bool { return false }
|
||||
func archInitCastagnoli() { panic("not available") }
|
||||
func archUpdateCastagnoli(crc uint32, p []byte) uint32 { panic("not available") }
|
||||
|
|
@ -25,62 +25,65 @@ func vectorizedCastagnoli(crc uint32, p []byte) uint32
|
|||
//go:noescape
|
||||
func vectorizedIEEE(crc uint32, p []byte) uint32
|
||||
|
||||
func castagnoliInitArch() (needGenericTables bool) {
|
||||
return true
|
||||
func archAvailableCastagnoli() bool {
|
||||
return hasVX
|
||||
}
|
||||
|
||||
func genericCastagnoli(crc uint32, p []byte) uint32 {
|
||||
// Use slicing-by-8 on larger inputs.
|
||||
if len(p) >= sliceBy8Cutoff {
|
||||
return updateSlicingBy8(crc, castagnoliTable8, p)
|
||||
var archCastagnoliTable8 *slicing8Table
|
||||
|
||||
func archInitCastagnoli() {
|
||||
if !hasVX {
|
||||
panic("not available")
|
||||
}
|
||||
return update(crc, castagnoliTable, p)
|
||||
// We still use slicing-by-8 for small buffers.
|
||||
archCastagnoliTable8 = slicingMakeTable(Castagnoli)
|
||||
}
|
||||
|
||||
func genericIEEE(crc uint32, p []byte) uint32 {
|
||||
// Use slicing-by-8 on larger inputs.
|
||||
if len(p) >= sliceBy8Cutoff {
|
||||
ieeeTable8Once.Do(func() {
|
||||
ieeeTable8 = makeTable8(IEEE)
|
||||
})
|
||||
return updateSlicingBy8(crc, ieeeTable8, p)
|
||||
// archUpdateCastagnoli calculates the checksum of p using
|
||||
// vectorizedCastagnoli.
|
||||
func archUpdateCastagnoli(crc uint32, p []byte) uint32 {
|
||||
if !hasVX {
|
||||
panic("not available")
|
||||
}
|
||||
return update(crc, IEEETable, p)
|
||||
}
|
||||
|
||||
// updateCastagnoli calculates the checksum of p using
|
||||
// vectorizedCastagnoli if possible and falling back onto
|
||||
// genericCastagnoli as needed.
|
||||
func updateCastagnoli(crc uint32, p []byte) uint32 {
|
||||
// Use vectorized function if vector facility is available and
|
||||
// data length is above threshold.
|
||||
if hasVX && len(p) >= vxMinLen {
|
||||
// Use vectorized function if data length is above threshold.
|
||||
if len(p) >= vxMinLen {
|
||||
aligned := len(p) & ^vxAlignMask
|
||||
crc = vectorizedCastagnoli(crc, p[:aligned])
|
||||
p = p[aligned:]
|
||||
// process remaining data
|
||||
if len(p) > 0 {
|
||||
crc = genericCastagnoli(crc, p)
|
||||
}
|
||||
}
|
||||
if len(p) == 0 {
|
||||
return crc
|
||||
}
|
||||
return genericCastagnoli(crc, p)
|
||||
return slicingUpdate(crc, archCastagnoliTable8, p)
|
||||
}
|
||||
|
||||
// updateIEEE calculates the checksum of p using vectorizedIEEE if
|
||||
// possible and falling back onto genericIEEE as needed.
|
||||
func updateIEEE(crc uint32, p []byte) uint32 {
|
||||
// Use vectorized function if vector facility is available and
|
||||
// data length is above threshold.
|
||||
if hasVX && len(p) >= vxMinLen {
|
||||
func archAvailableIEEE() bool {
|
||||
return hasVX
|
||||
}
|
||||
|
||||
var archIeeeTable8 *slicing8Table
|
||||
|
||||
func archInitIEEE() {
|
||||
if !hasVX {
|
||||
panic("not available")
|
||||
}
|
||||
// We still use slicing-by-8 for small buffers.
|
||||
archIeeeTable8 = slicingMakeTable(IEEE)
|
||||
}
|
||||
|
||||
// archUpdateIEEE calculates the checksum of p using vectorizedIEEE.
|
||||
func archUpdateIEEE(crc uint32, p []byte) uint32 {
|
||||
if !hasVX {
|
||||
panic("not available")
|
||||
}
|
||||
// Use vectorized function if data length is above threshold.
|
||||
if len(p) >= vxMinLen {
|
||||
aligned := len(p) & ^vxAlignMask
|
||||
crc = vectorizedIEEE(crc, p[:aligned])
|
||||
p = p[aligned:]
|
||||
// process remaining data
|
||||
if len(p) > 0 {
|
||||
crc = genericIEEE(crc, p)
|
||||
}
|
||||
}
|
||||
if len(p) == 0 {
|
||||
return crc
|
||||
}
|
||||
return genericIEEE(crc, p)
|
||||
return slicingUpdate(crc, archIeeeTable8, p)
|
||||
}
|
||||
|
|
|
|||
|
|
@ -6,7 +6,7 @@ package crc32
|
|||
|
||||
import (
|
||||
"hash"
|
||||
"io"
|
||||
"math/rand"
|
||||
"testing"
|
||||
)
|
||||
|
||||
|
|
@ -49,42 +49,150 @@ var golden = []test{
|
|||
{0x8e0bb443, 0xdcded527, "How can you write a big system without C++? -Paul Glick"},
|
||||
}
|
||||
|
||||
// testGoldenIEEE verifies that the given function returns
|
||||
// correct IEEE checksums.
|
||||
func testGoldenIEEE(t *testing.T, crcFunc func(b []byte) uint32) {
|
||||
for _, g := range golden {
|
||||
if crc := crcFunc([]byte(g.in)); crc != g.ieee {
|
||||
t.Errorf("IEEE(%s) = 0x%x want 0x%x", g.in, crc, g.ieee)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// testGoldenCastagnoli verifies that the given function returns
|
||||
// correct IEEE checksums.
|
||||
func testGoldenCastagnoli(t *testing.T, crcFunc func(b []byte) uint32) {
|
||||
for _, g := range golden {
|
||||
if crc := crcFunc([]byte(g.in)); crc != g.castagnoli {
|
||||
t.Errorf("Castagnoli(%s) = 0x%x want 0x%x", g.in, crc, g.castagnoli)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// testCrossCheck generates random buffers of various lengths and verifies that
|
||||
// the two "update" functions return the same result.
|
||||
func testCrossCheck(t *testing.T, crcFunc1, crcFunc2 func(crc uint32, b []byte) uint32) {
|
||||
// The AMD64 implementation has some cutoffs at lengths 168*3=504 and
|
||||
// 1344*3=4032. We should make sure lengths around these values are in the
|
||||
// list.
|
||||
lengths := []int{0, 1, 2, 3, 4, 5, 10, 16, 50, 100, 128,
|
||||
500, 501, 502, 503, 504, 505, 512, 1000, 1024, 2000,
|
||||
4030, 4031, 4032, 4033, 4036, 4040, 4048, 4096, 5000, 10000}
|
||||
for _, length := range lengths {
|
||||
p := make([]byte, length)
|
||||
_, _ = rand.Read(p)
|
||||
crcInit := uint32(rand.Int63())
|
||||
crc1 := crcFunc1(crcInit, p)
|
||||
crc2 := crcFunc2(crcInit, p)
|
||||
if crc1 != crc2 {
|
||||
t.Errorf("mismatch: 0x%x vs 0x%x (buffer length %d)", crc1, crc2, length)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// TestSimple tests the simple generic algorithm.
|
||||
func TestSimple(t *testing.T) {
|
||||
tab := simpleMakeTable(IEEE)
|
||||
testGoldenIEEE(t, func(b []byte) uint32 {
|
||||
return simpleUpdate(0, tab, b)
|
||||
})
|
||||
|
||||
tab = simpleMakeTable(Castagnoli)
|
||||
testGoldenCastagnoli(t, func(b []byte) uint32 {
|
||||
return simpleUpdate(0, tab, b)
|
||||
})
|
||||
}
|
||||
|
||||
// TestSimple tests the slicing-by-8 algorithm.
|
||||
func TestSlicing(t *testing.T) {
|
||||
tab := slicingMakeTable(IEEE)
|
||||
testGoldenIEEE(t, func(b []byte) uint32 {
|
||||
return slicingUpdate(0, tab, b)
|
||||
})
|
||||
|
||||
tab = slicingMakeTable(Castagnoli)
|
||||
testGoldenCastagnoli(t, func(b []byte) uint32 {
|
||||
return slicingUpdate(0, tab, b)
|
||||
})
|
||||
|
||||
// Cross-check various polys against the simple algorithm.
|
||||
for _, poly := range []uint32{IEEE, Castagnoli, Koopman, 0xD5828281} {
|
||||
t1 := simpleMakeTable(poly)
|
||||
f1 := func(crc uint32, b []byte) uint32 {
|
||||
return simpleUpdate(crc, t1, b)
|
||||
}
|
||||
t2 := slicingMakeTable(poly)
|
||||
f2 := func(crc uint32, b []byte) uint32 {
|
||||
return slicingUpdate(crc, t2, b)
|
||||
}
|
||||
testCrossCheck(t, f1, f2)
|
||||
}
|
||||
}
|
||||
|
||||
func TestArchIEEE(t *testing.T) {
|
||||
if !archAvailableIEEE() {
|
||||
t.Skip("Arch-specific IEEE not available.")
|
||||
}
|
||||
archInitIEEE()
|
||||
slicingTable := slicingMakeTable(IEEE)
|
||||
testCrossCheck(t, archUpdateIEEE, func(crc uint32, b []byte) uint32 {
|
||||
return slicingUpdate(crc, slicingTable, b)
|
||||
})
|
||||
}
|
||||
|
||||
func TestArchCastagnoli(t *testing.T) {
|
||||
if !archAvailableCastagnoli() {
|
||||
t.Skip("Arch-specific Castagnoli not available.")
|
||||
}
|
||||
archInitCastagnoli()
|
||||
slicingTable := slicingMakeTable(Castagnoli)
|
||||
testCrossCheck(t, archUpdateCastagnoli, func(crc uint32, b []byte) uint32 {
|
||||
return slicingUpdate(crc, slicingTable, b)
|
||||
})
|
||||
}
|
||||
|
||||
func TestGolden(t *testing.T) {
|
||||
testGoldenIEEE(t, ChecksumIEEE)
|
||||
|
||||
// Some implementations have special code to deal with misaligned
|
||||
// data; test that as well.
|
||||
for delta := 1; delta <= 7; delta++ {
|
||||
testGoldenIEEE(t, func(b []byte) uint32 {
|
||||
ieee := NewIEEE()
|
||||
d := delta
|
||||
if d >= len(b) {
|
||||
d = len(b)
|
||||
}
|
||||
ieee.Write(b[:d])
|
||||
ieee.Write(b[d:])
|
||||
return ieee.Sum32()
|
||||
})
|
||||
}
|
||||
|
||||
castagnoliTab := MakeTable(Castagnoli)
|
||||
if castagnoliTab == nil {
|
||||
t.Errorf("nil Castagnoli Table")
|
||||
}
|
||||
|
||||
for _, g := range golden {
|
||||
ieee := NewIEEE()
|
||||
io.WriteString(ieee, g.in)
|
||||
s := ieee.Sum32()
|
||||
if s != g.ieee {
|
||||
t.Errorf("IEEE(%s) = 0x%x want 0x%x", g.in, s, g.ieee)
|
||||
}
|
||||
|
||||
testGoldenCastagnoli(t, func(b []byte) uint32 {
|
||||
castagnoli := New(castagnoliTab)
|
||||
io.WriteString(castagnoli, g.in)
|
||||
s = castagnoli.Sum32()
|
||||
if s != g.castagnoli {
|
||||
t.Errorf("Castagnoli(%s) = 0x%x want 0x%x", g.in, s, g.castagnoli)
|
||||
}
|
||||
castagnoli.Write(b)
|
||||
return castagnoli.Sum32()
|
||||
})
|
||||
|
||||
// The SSE4.2 implementation of this has code to deal
|
||||
// with misaligned data so we ensure that we test that
|
||||
// too.
|
||||
for delta := 1; delta <= 7; delta++ {
|
||||
if len(g.in) > delta {
|
||||
in := []byte(g.in)
|
||||
castagnoli = New(castagnoliTab)
|
||||
castagnoli.Write(in[:delta])
|
||||
castagnoli.Write(in[delta:])
|
||||
s = castagnoli.Sum32()
|
||||
if s != g.castagnoli {
|
||||
t.Errorf("Castagnoli[misaligned](%s) = 0x%x want 0x%x", g.in, s, g.castagnoli)
|
||||
}
|
||||
// Some implementations have special code to deal with misaligned
|
||||
// data; test that as well.
|
||||
for delta := 1; delta <= 7; delta++ {
|
||||
testGoldenCastagnoli(t, func(b []byte) uint32 {
|
||||
castagnoli := New(castagnoliTab)
|
||||
d := delta
|
||||
if d >= len(b) {
|
||||
d = len(b)
|
||||
}
|
||||
}
|
||||
castagnoli.Write(b[:d])
|
||||
castagnoli.Write(b[d:])
|
||||
return castagnoli.Sum32()
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
Loading…
Reference in New Issue