diff --git a/src/hash/crc32/crc32_s390x.go b/src/hash/crc32/crc32_s390x.go index 2f20690389..befb58f55f 100644 --- a/src/hash/crc32/crc32_s390x.go +++ b/src/hash/crc32/crc32_s390x.go @@ -4,14 +4,9 @@ package crc32 -import ( - "unsafe" -) - const ( vxMinLen = 64 - vxAlignment = 16 - vxAlignMask = vxAlignment - 1 + vxAlignMask = 15 // align to 16 bytes ) // hasVectorFacility reports whether the machine has the z/Architecture @@ -49,20 +44,13 @@ func genericIEEE(crc uint32, p []byte) uint32 { return update(crc, IEEETable, p) } -// updateCastagnoli calculates the checksum of p using genericCastagnoli to -// align the data appropriately for vectorCastagnoli. It avoids using -// vectorCastagnoli entirely if the length of p is less than or equal to -// vxMinLen. +// updateCastagnoli calculates the checksum of p using +// vectorizedCastagnoli if possible and falling back onto +// genericCastagnoli as needed. func updateCastagnoli(crc uint32, p []byte) uint32 { // Use vectorized function if vector facility is available and // data length is above threshold. - if hasVX && len(p) > vxMinLen { - pAddr := uintptr(unsafe.Pointer(&p[0])) - if pAddr&vxAlignMask != 0 { - prealign := vxAlignment - int(pAddr&vxAlignMask) - crc = genericCastagnoli(crc, p[:prealign]) - p = p[prealign:] - } + if hasVX && len(p) >= vxMinLen { aligned := len(p) & ^vxAlignMask crc = vectorizedCastagnoli(crc, p[:aligned]) p = p[aligned:] @@ -75,19 +63,12 @@ func updateCastagnoli(crc uint32, p []byte) uint32 { return genericCastagnoli(crc, p) } -// updateIEEE calculates the checksum of p using genericIEEE to align the data -// appropriately for vectorIEEE. It avoids using vectorIEEE entirely if the length -// of p is less than or equal to vxMinLen. +// updateIEEE calculates the checksum of p using vectorizedIEEE if +// possible and falling back onto genericIEEE as needed. func updateIEEE(crc uint32, p []byte) uint32 { // Use vectorized function if vector facility is available and // data length is above threshold. - if hasVX && len(p) > vxMinLen { - pAddr := uintptr(unsafe.Pointer(&p[0])) - if pAddr&vxAlignMask != 0 { - prealign := vxAlignment - int(pAddr&vxAlignMask) - crc = genericIEEE(crc, p[:prealign]) - p = p[prealign:] - } + if hasVX && len(p) >= vxMinLen { aligned := len(p) & ^vxAlignMask crc = vectorizedIEEE(crc, p[:aligned]) p = p[aligned:] diff --git a/src/hash/crc32/crc32_s390x.s b/src/hash/crc32/crc32_s390x.s index f8d39f3df9..0b830531f7 100644 --- a/src/hash/crc32/crc32_s390x.s +++ b/src/hash/crc32/crc32_s390x.s @@ -128,6 +128,10 @@ TEXT vectorizedBody<>(SB),NOSPLIT,$0 VZERO V0 VLVGF $3, R2, V0 + // Crash if the input size is less than 64-bytes. + CMP R4, $64 + BLT crash + // Load a 64-byte data chunk and XOR with CRC VLM 0(R3), V1, V4 // 64-bytes into V1..V4 @@ -243,3 +247,6 @@ done: XOR $0xffffffff, R2 // NOTW R2 MOVWZ R2, ret + 32(FP) RET + +crash: + MOVD $0, (R0) // input size is less than 64-bytes