mirror of https://github.com/golang/go.git
compress/flate: level 1 (best speed) match across blocks
This change makes deflate level 1 (best speed) match across block boundaries. This comes at a small speed penalty, but improves compression on almost all output. Sample numbers on various content types: enwik9: 391052014 -> 382578469 bytes, 77.59 -> 74.28 MB/s adresser.001: 57269799 -> 47756095 bytes, 287.84 -> 357.86 MB/s 10gb: 5233055166 -> 5198328382 bytes, 105.85 -> 96.99 MB/s rawstudio-mint14: 3972329211 -> 3927423364 bytes, 100.07 -> 94.22 MB/s sites: 165556800 -> 163178702 bytes, 72.31 -> 70.15 MB/s objectfiles: 115962472 -> 111649524 bytes, 132.60 -> 128.05 MB/s sharnd.out: 200015283 -> 200015283 bytes, 221.50 -> 218.83 MB/s Change-Id: I62a139e5c06976e803439a4268acede5139b8cfc Reviewed-on: https://go-review.googlesource.com/31640 Reviewed-by: Joe Tsai <thebrokentoaster@gmail.com> Reviewed-by: Nigel Tao <nigeltao@golang.org>
This commit is contained in:
parent
f8a3444388
commit
2e196b15b9
|
|
@ -84,9 +84,10 @@ type compressor struct {
|
|||
bulkHasher func([]byte, []uint32)
|
||||
|
||||
// compression algorithm
|
||||
fill func(*compressor, []byte) int // copy data to window
|
||||
step func(*compressor) // process window
|
||||
sync bool // requesting flush
|
||||
fill func(*compressor, []byte) int // copy data to window
|
||||
step func(*compressor) // process window
|
||||
sync bool // requesting flush
|
||||
bestSpeed *deflateFast // Encoder for BestSpeed
|
||||
|
||||
// Input hash chains
|
||||
// hashHead[hashValue] contains the largest inputIndex with the specified hash value
|
||||
|
|
@ -346,12 +347,13 @@ func (d *compressor) encSpeed() {
|
|||
d.err = d.w.err
|
||||
}
|
||||
d.windowEnd = 0
|
||||
d.bestSpeed.reset()
|
||||
return
|
||||
}
|
||||
|
||||
}
|
||||
// Encode the block.
|
||||
d.tokens = encodeBestSpeed(d.tokens[:0], d.window[:d.windowEnd])
|
||||
d.tokens = d.bestSpeed.encode(d.tokens[:0], d.window[:d.windowEnd])
|
||||
|
||||
// If we removed less than 1/16th, Huffman compress the block.
|
||||
if len(d.tokens) > d.windowEnd-(d.windowEnd>>4) {
|
||||
|
|
@ -584,6 +586,7 @@ func (d *compressor) init(w io.Writer, level int) (err error) {
|
|||
d.window = make([]byte, maxStoreBlockSize)
|
||||
d.fill = (*compressor).fillStore
|
||||
d.step = (*compressor).encSpeed
|
||||
d.bestSpeed = newDeflateFast()
|
||||
d.tokens = make([]token, maxStoreBlockSize)
|
||||
case level == DefaultCompression:
|
||||
level = 6
|
||||
|
|
@ -609,6 +612,7 @@ func (d *compressor) reset(w io.Writer) {
|
|||
case BestSpeed:
|
||||
d.windowEnd = 0
|
||||
d.tokens = d.tokens[:0]
|
||||
d.bestSpeed.reset()
|
||||
default:
|
||||
d.chainHead = -1
|
||||
for i := range d.hashHead {
|
||||
|
|
|
|||
|
|
@ -490,6 +490,7 @@ func TestWriterReset(t *testing.T) {
|
|||
w.d.fill, wref.d.fill = nil, nil
|
||||
w.d.step, wref.d.step = nil, nil
|
||||
w.d.bulkHasher, wref.d.bulkHasher = nil, nil
|
||||
w.d.bestSpeed, wref.d.bestSpeed = nil, nil
|
||||
// hashMatch is always overwritten when used.
|
||||
copy(w.d.hashMatch[:], wref.d.hashMatch[:])
|
||||
if len(w.d.tokens) != 0 {
|
||||
|
|
@ -681,3 +682,119 @@ func TestWriterPersistentError(t *testing.T) {
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestBestSpeedMatch(t *testing.T) {
|
||||
cases := []struct {
|
||||
previous, current []byte
|
||||
t, s, want int32
|
||||
}{{
|
||||
previous: []byte{0, 0, 0, 1, 2},
|
||||
current: []byte{3, 4, 5, 0, 1, 2, 3, 4, 5},
|
||||
t: -3,
|
||||
s: 3,
|
||||
want: 6,
|
||||
}, {
|
||||
previous: []byte{0, 0, 0, 1, 2},
|
||||
current: []byte{2, 4, 5, 0, 1, 2, 3, 4, 5},
|
||||
t: -3,
|
||||
s: 3,
|
||||
want: 3,
|
||||
}, {
|
||||
previous: []byte{0, 0, 0, 1, 1},
|
||||
current: []byte{3, 4, 5, 0, 1, 2, 3, 4, 5},
|
||||
t: -3,
|
||||
s: 3,
|
||||
want: 2,
|
||||
}, {
|
||||
previous: []byte{0, 0, 0, 1, 2},
|
||||
current: []byte{2, 2, 2, 2, 1, 2, 3, 4, 5},
|
||||
t: -1,
|
||||
s: 0,
|
||||
want: 4,
|
||||
}, {
|
||||
previous: []byte{0, 0, 0, 1, 2, 3, 4, 5, 2, 2},
|
||||
current: []byte{2, 2, 2, 2, 1, 2, 3, 4, 5},
|
||||
t: -7,
|
||||
s: 4,
|
||||
want: 5,
|
||||
}, {
|
||||
previous: []byte{9, 9, 9, 9, 9},
|
||||
current: []byte{2, 2, 2, 2, 1, 2, 3, 4, 5},
|
||||
t: -1,
|
||||
s: 0,
|
||||
want: 0,
|
||||
}, {
|
||||
previous: []byte{9, 9, 9, 9, 9},
|
||||
current: []byte{9, 2, 2, 2, 1, 2, 3, 4, 5},
|
||||
t: 0,
|
||||
s: 1,
|
||||
want: 0,
|
||||
}, {
|
||||
previous: []byte{},
|
||||
current: []byte{9, 2, 2, 2, 1, 2, 3, 4, 5},
|
||||
t: -5,
|
||||
s: 1,
|
||||
want: 0,
|
||||
}, {
|
||||
previous: []byte{},
|
||||
current: []byte{9, 2, 2, 2, 1, 2, 3, 4, 5},
|
||||
t: -1,
|
||||
s: 1,
|
||||
want: 0,
|
||||
}, {
|
||||
previous: []byte{},
|
||||
current: []byte{2, 2, 2, 2, 1, 2, 3, 4, 5},
|
||||
t: 0,
|
||||
s: 1,
|
||||
want: 3,
|
||||
}, {
|
||||
previous: []byte{3, 4, 5},
|
||||
current: []byte{3, 4, 5},
|
||||
t: -3,
|
||||
s: 0,
|
||||
want: 3,
|
||||
}, {
|
||||
previous: make([]byte, 1000),
|
||||
current: make([]byte, 1000),
|
||||
t: -1000,
|
||||
s: 0,
|
||||
want: maxMatchLength - 4,
|
||||
}, {
|
||||
previous: make([]byte, 200),
|
||||
current: make([]byte, 500),
|
||||
t: -200,
|
||||
s: 0,
|
||||
want: maxMatchLength - 4,
|
||||
}, {
|
||||
previous: make([]byte, 200),
|
||||
current: make([]byte, 500),
|
||||
t: 0,
|
||||
s: 1,
|
||||
want: maxMatchLength - 4,
|
||||
}, {
|
||||
previous: make([]byte, maxMatchLength-4),
|
||||
current: make([]byte, 500),
|
||||
t: -(maxMatchLength - 4),
|
||||
s: 0,
|
||||
want: maxMatchLength - 4,
|
||||
}, {
|
||||
previous: make([]byte, 200),
|
||||
current: make([]byte, 500),
|
||||
t: -200,
|
||||
s: 400,
|
||||
want: 100,
|
||||
}, {
|
||||
previous: make([]byte, 10),
|
||||
current: make([]byte, 500),
|
||||
t: 200,
|
||||
s: 400,
|
||||
want: 100,
|
||||
}}
|
||||
for i, c := range cases {
|
||||
e := deflateFast{prev: c.previous}
|
||||
got := e.matchLen(c.s, c.t, c.current)
|
||||
if got != c.want {
|
||||
t.Errorf("Test %d: match length, want %d, got %d", i, c.want, got)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -14,12 +14,12 @@ const (
|
|||
tableShift = 32 - tableBits // Right-shift to get the tableBits most significant bits of a uint32.
|
||||
)
|
||||
|
||||
func load32(b []byte, i int) uint32 {
|
||||
func load32(b []byte, i int32) uint32 {
|
||||
b = b[i : i+4 : len(b)] // Help the compiler eliminate bounds checks on the next line.
|
||||
return uint32(b[0]) | uint32(b[1])<<8 | uint32(b[2])<<16 | uint32(b[3])<<24
|
||||
}
|
||||
|
||||
func load64(b []byte, i int) uint64 {
|
||||
func load64(b []byte, i int32) uint64 {
|
||||
b = b[i : i+8 : len(b)] // Help the compiler eliminate bounds checks on the next line.
|
||||
return uint64(b[0]) | uint64(b[1])<<8 | uint64(b[2])<<16 | uint64(b[3])<<24 |
|
||||
uint64(b[4])<<32 | uint64(b[5])<<40 | uint64(b[6])<<48 | uint64(b[7])<<56
|
||||
|
|
@ -38,31 +38,49 @@ const (
|
|||
minNonLiteralBlockSize = 1 + 1 + inputMargin
|
||||
)
|
||||
|
||||
func encodeBestSpeed(dst []token, src []byte) []token {
|
||||
type tableEntry struct {
|
||||
val uint32 // Value at destination
|
||||
offset int32
|
||||
}
|
||||
|
||||
// deflateFast maintains the table for matches,
|
||||
// and the previous byte block for cross block matching.
|
||||
type deflateFast struct {
|
||||
table [tableSize]tableEntry
|
||||
prev []byte // Previous block, zero length if unknown.
|
||||
cur int32 // Current match offset.
|
||||
}
|
||||
|
||||
func newDeflateFast() *deflateFast {
|
||||
return &deflateFast{cur: maxStoreBlockSize, prev: make([]byte, 0, maxStoreBlockSize)}
|
||||
}
|
||||
|
||||
// encode encodes a block given in src and encodes tokens
|
||||
// to dst and returns the result.
|
||||
func (e *deflateFast) encode(dst []token, src []byte) []token {
|
||||
// Ensure that e.cur doesn't wrap.
|
||||
if e.cur > 1<<30 {
|
||||
*e = deflateFast{cur: maxStoreBlockSize, prev: e.prev[:0]}
|
||||
}
|
||||
|
||||
// This check isn't in the Snappy implementation, but there, the caller
|
||||
// instead of the callee handles this case.
|
||||
if len(src) < minNonLiteralBlockSize {
|
||||
e.cur += maxStoreBlockSize
|
||||
e.prev = e.prev[:0]
|
||||
return emitLiteral(dst, src)
|
||||
}
|
||||
|
||||
// Initialize the hash table.
|
||||
//
|
||||
// The table element type is uint16, as s < sLimit and sLimit < len(src)
|
||||
// and len(src) <= maxStoreBlockSize and maxStoreBlockSize == 65535.
|
||||
var table [tableSize]uint16
|
||||
|
||||
// sLimit is when to stop looking for offset/length copies. The inputMargin
|
||||
// lets us use a fast path for emitLiteral in the main loop, while we are
|
||||
// looking for copies.
|
||||
sLimit := len(src) - inputMargin
|
||||
sLimit := int32(len(src) - inputMargin)
|
||||
|
||||
// nextEmit is where in src the next emitLiteral should start from.
|
||||
nextEmit := 0
|
||||
|
||||
// The encoded form must start with a literal, as there are no previous
|
||||
// bytes to copy, so we start looking for hash matches at s == 1.
|
||||
s := 1
|
||||
nextHash := hash(load32(src, s))
|
||||
nextEmit := int32(0)
|
||||
s := int32(0)
|
||||
cv := load32(src, s)
|
||||
nextHash := hash(cv)
|
||||
|
||||
for {
|
||||
// Copied from the C++ snappy implementation:
|
||||
|
|
@ -80,10 +98,10 @@ func encodeBestSpeed(dst []token, src []byte) []token {
|
|||
// The "skip" variable keeps track of how many bytes there are since
|
||||
// the last match; dividing it by 32 (ie. right-shifting by five) gives
|
||||
// the number of bytes to move ahead for each iteration.
|
||||
skip := 32
|
||||
skip := int32(32)
|
||||
|
||||
nextS := s
|
||||
candidate := 0
|
||||
var candidate tableEntry
|
||||
for {
|
||||
s = nextS
|
||||
bytesBetweenHashLookups := skip >> 5
|
||||
|
|
@ -92,13 +110,19 @@ func encodeBestSpeed(dst []token, src []byte) []token {
|
|||
if nextS > sLimit {
|
||||
goto emitRemainder
|
||||
}
|
||||
candidate = int(table[nextHash&tableMask])
|
||||
table[nextHash&tableMask] = uint16(s)
|
||||
nextHash = hash(load32(src, nextS))
|
||||
// TODO: < should be <=, and add a test for that.
|
||||
if s-candidate < maxMatchOffset && load32(src, s) == load32(src, candidate) {
|
||||
break
|
||||
candidate = e.table[nextHash&tableMask]
|
||||
now := load32(src, nextS)
|
||||
e.table[nextHash&tableMask] = tableEntry{offset: s + e.cur, val: cv}
|
||||
nextHash = hash(now)
|
||||
|
||||
offset := s - (candidate.offset - e.cur)
|
||||
// TODO: >= should be >, and add a test for that.
|
||||
if offset >= maxMatchOffset || cv != candidate.val {
|
||||
// Out of range or not matched.
|
||||
cv = now
|
||||
continue
|
||||
}
|
||||
break
|
||||
}
|
||||
|
||||
// A 4-byte match has been found. We'll later see if more than 4 bytes
|
||||
|
|
@ -117,22 +141,16 @@ func encodeBestSpeed(dst []token, src []byte) []token {
|
|||
for {
|
||||
// Invariant: we have a 4-byte match at s, and no need to emit any
|
||||
// literal bytes prior to s.
|
||||
base := s
|
||||
|
||||
// Extend the 4-byte match as long as possible.
|
||||
//
|
||||
// This is an inlined version of Snappy's:
|
||||
// s = extendMatch(src, candidate+4, s+4)
|
||||
s += 4
|
||||
s1 := base + maxMatchLength
|
||||
if s1 > len(src) {
|
||||
s1 = len(src)
|
||||
}
|
||||
for i := candidate + 4; s < s1 && src[i] == src[s]; i, s = i+1, s+1 {
|
||||
}
|
||||
t := candidate.offset - e.cur + 4
|
||||
l := e.matchLen(s, t, src)
|
||||
|
||||
// matchToken is flate's equivalent of Snappy's emitCopy.
|
||||
dst = append(dst, matchToken(uint32(s-base-baseMatchLength), uint32(base-candidate-baseMatchOffset)))
|
||||
// matchToken is flate's equivalent of Snappy's emitCopy. (length,offset)
|
||||
dst = append(dst, matchToken(uint32(l+4-baseMatchLength), uint32(s-t-baseMatchOffset)))
|
||||
s += l
|
||||
nextEmit = s
|
||||
if s >= sLimit {
|
||||
goto emitRemainder
|
||||
|
|
@ -145,14 +163,18 @@ func encodeBestSpeed(dst []token, src []byte) []token {
|
|||
// are faster as one load64 call (with some shifts) instead of
|
||||
// three load32 calls.
|
||||
x := load64(src, s-1)
|
||||
prevHash := hash(uint32(x >> 0))
|
||||
table[prevHash&tableMask] = uint16(s - 1)
|
||||
currHash := hash(uint32(x >> 8))
|
||||
candidate = int(table[currHash&tableMask])
|
||||
table[currHash&tableMask] = uint16(s)
|
||||
prevHash := hash(uint32(x))
|
||||
e.table[prevHash&tableMask] = tableEntry{offset: e.cur + s - 1, val: uint32(x)}
|
||||
x >>= 8
|
||||
currHash := hash(uint32(x))
|
||||
candidate = e.table[currHash&tableMask]
|
||||
e.table[currHash&tableMask] = tableEntry{offset: e.cur + s, val: uint32(x)}
|
||||
|
||||
offset := s - (candidate.offset - e.cur)
|
||||
// TODO: >= should be >, and add a test for that.
|
||||
if s-candidate >= maxMatchOffset || uint32(x>>8) != load32(src, candidate) {
|
||||
nextHash = hash(uint32(x >> 16))
|
||||
if offset >= maxMatchOffset || uint32(x) != candidate.val {
|
||||
cv = uint32(x >> 8)
|
||||
nextHash = hash(cv)
|
||||
s++
|
||||
break
|
||||
}
|
||||
|
|
@ -160,15 +182,91 @@ func encodeBestSpeed(dst []token, src []byte) []token {
|
|||
}
|
||||
|
||||
emitRemainder:
|
||||
if nextEmit < len(src) {
|
||||
if int(nextEmit) < len(src) {
|
||||
dst = emitLiteral(dst, src[nextEmit:])
|
||||
}
|
||||
e.cur += int32(len(src))
|
||||
e.prev = e.prev[:len(src)]
|
||||
copy(e.prev, src)
|
||||
return dst
|
||||
}
|
||||
|
||||
func emitLiteral(dst []token, lit []byte) []token {
|
||||
for _, v := range lit {
|
||||
dst = append(dst, token(v))
|
||||
dst = append(dst, literalToken(uint32(v)))
|
||||
}
|
||||
return dst
|
||||
}
|
||||
|
||||
// matchLen returns the match length between src[s:] and src[t:].
|
||||
// t can be negative to indicate the match is starting in e.prev.
|
||||
// We assume that src[s-4:s] and src[t-4:t] already match.
|
||||
func (e *deflateFast) matchLen(s, t int32, src []byte) int32 {
|
||||
s1 := int(s) + maxMatchLength - 4
|
||||
if s1 > len(src) {
|
||||
s1 = len(src)
|
||||
}
|
||||
|
||||
// If we are inside the current block
|
||||
if t >= 0 {
|
||||
b := src[t:]
|
||||
a := src[s:s1]
|
||||
b = b[:len(a)]
|
||||
// Extend the match to be as long as possible.
|
||||
for i := range a {
|
||||
if a[i] != b[i] {
|
||||
return int32(i)
|
||||
}
|
||||
}
|
||||
return int32(len(a))
|
||||
}
|
||||
|
||||
// We found a match in the previous block.
|
||||
tp := int32(len(e.prev)) + t
|
||||
if tp < 0 {
|
||||
return 0
|
||||
}
|
||||
|
||||
// Extend the match to be as long as possible.
|
||||
a := src[s:s1]
|
||||
b := e.prev[tp:]
|
||||
if len(b) > len(a) {
|
||||
b = b[:len(a)]
|
||||
}
|
||||
a = a[:len(b)]
|
||||
for i := range b {
|
||||
if a[i] != b[i] {
|
||||
return int32(i)
|
||||
}
|
||||
}
|
||||
|
||||
// If we reached our limit, we matched everything we are
|
||||
// allowed to in the previous block and we return.
|
||||
n := int32(len(b))
|
||||
if int(s+n) == s1 {
|
||||
return n
|
||||
}
|
||||
|
||||
// Continue looking for more matches in the current block.
|
||||
a = src[s+n : s1]
|
||||
b = src[:len(a)]
|
||||
for i := range a {
|
||||
if a[i] != b[i] {
|
||||
return int32(i) + n
|
||||
}
|
||||
}
|
||||
return int32(len(a)) + n
|
||||
}
|
||||
|
||||
// Reset resets the encoding history.
|
||||
// This ensures that no matches are made to the previous block.
|
||||
func (e *deflateFast) reset() {
|
||||
e.prev = e.prev[:0]
|
||||
// Bump the offset, so all matches will fail distance check.
|
||||
e.cur += maxMatchOffset
|
||||
|
||||
// Protect against e.cur wraparound.
|
||||
if e.cur > 1<<30 {
|
||||
*e = deflateFast{cur: maxStoreBlockSize, prev: e.prev[:0]}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
Loading…
Reference in New Issue