diff --git a/src/compress/flate/deflate.go b/src/compress/flate/deflate.go index 199fc4cf3c..428f2508d3 100644 --- a/src/compress/flate/deflate.go +++ b/src/compress/flate/deflate.go @@ -13,14 +13,13 @@ import ( const ( NoCompression = 0 BestSpeed = 1 - fastCompression = 3 BestCompression = 9 DefaultCompression = -1 logWindowSize = 15 windowSize = 1 << logWindowSize windowMask = windowSize - 1 logMaxOffsetSize = 15 // Standard DEFLATE - minMatchLength = 3 // The smallest match that the compressor looks for + minMatchLength = 4 // The smallest match that the compressor looks for maxMatchLength = 258 // The longest match for the compressor minOffsetSize = 1 // The shortest offset that makes any sense @@ -28,39 +27,39 @@ const ( // stop things from getting too large. maxFlateBlockTokens = 1 << 14 maxStoreBlockSize = 65535 - hashBits = 17 + hashBits = 17 // After 17 performance degrades hashSize = 1 << hashBits hashMask = (1 << hashBits) - 1 - hashShift = (hashBits + minMatchLength - 1) / minMatchLength maxHashOffset = 1 << 24 skipNever = math.MaxInt32 ) type compressionLevel struct { - good, lazy, nice, chain, fastSkipHashing int + level, good, lazy, nice, chain, fastSkipHashing int } var levels = []compressionLevel{ {}, // 0 // For levels 1-3 we don't bother trying with lazy matches - {3, 0, 8, 4, 4}, - {3, 0, 16, 8, 5}, - {3, 0, 32, 32, 6}, + {1, 4, 0, 8, 4, 4}, + {2, 4, 0, 16, 8, 5}, + {3, 4, 0, 32, 32, 6}, // Levels 4-9 use increasingly more lazy matching // and increasingly stringent conditions for "good enough". - {4, 4, 16, 16, skipNever}, - {8, 16, 32, 32, skipNever}, - {8, 16, 128, 128, skipNever}, - {8, 32, 128, 256, skipNever}, - {32, 128, 258, 1024, skipNever}, - {32, 258, 258, 4096, skipNever}, + {4, 4, 4, 16, 16, skipNever}, + {5, 8, 16, 32, 32, skipNever}, + {6, 8, 16, 128, 128, skipNever}, + {7, 8, 32, 128, 256, skipNever}, + {8, 32, 128, 258, 1024, skipNever}, + {9, 32, 258, 258, 4096, skipNever}, } type compressor struct { compressionLevel - w *huffmanBitWriter + w *huffmanBitWriter + bulkHasher func([]byte, []uint32) // compression algorithm fill func(*compressor, []byte) int // copy data to window @@ -73,8 +72,8 @@ type compressor struct { // hashPrev[hashHead[hashValue] & windowMask] contains the previous index // with the same hash value. chainHead int - hashHead []int - hashPrev []int + hashHead []uint32 + hashPrev []uint32 hashOffset int // input window: unprocessed data is window[index:windowEnd] @@ -90,9 +89,12 @@ type compressor struct { // deflate state length int offset int - hash int + hash uint32 maxInsertIndex int err error + + // hashMatch must be able to contain hashes for the maximum match length. + hashMatch [maxMatchLength - 1]uint32 } func (d *compressor) fillDeflate(b []byte) int { @@ -112,15 +114,15 @@ func (d *compressor) fillDeflate(b []byte) int { d.hashOffset -= delta d.chainHead -= delta for i, v := range d.hashPrev { - if v > delta { - d.hashPrev[i] -= delta + if int(v) > delta { + d.hashPrev[i] = uint32(int(v) - delta) } else { d.hashPrev[i] = 0 } } for i, v := range d.hashHead { - if v > delta { - d.hashHead[i] -= delta + if int(v) > delta { + d.hashHead[i] = uint32(int(v) - delta) } else { d.hashHead[i] = 0 } @@ -132,19 +134,73 @@ func (d *compressor) fillDeflate(b []byte) int { return n } -func (d *compressor) writeBlock(tokens []token, index int, eof bool) error { - if index > 0 || eof { +func (d *compressor) writeBlock(tokens []token, index int) error { + if index > 0 { var window []byte if d.blockStart <= index { window = d.window[d.blockStart:index] } d.blockStart = index - d.w.writeBlock(tokens, eof, window) + d.w.writeBlock(tokens, false, window) return d.w.err } return nil } +// fillWindow will fill the current window with the supplied +// dictionary and calculate all hashes. +// This is much faster than doing a full encode. +// Should only be used after a reset. +func (d *compressor) fillWindow(b []byte) { + // Do not fill window if we are in store-only mode. + if d.compressionLevel.level == 0 { + return + } + if d.index != 0 || d.windowEnd != 0 { + panic("internal error: fillWindow called with stale data") + } + + // If we are given too much, cut it. + if len(b) > windowSize { + b = b[len(b)-windowSize:] + } + // Add all to window. + n := copy(d.window, b) + + // Calculate 256 hashes at the time (more L1 cache hits) + loops := (n + 256 - minMatchLength) / 256 + for j := 0; j < loops; j++ { + index := j * 256 + end := index + 256 + minMatchLength - 1 + if end > n { + end = n + } + toCheck := d.window[index:end] + dstSize := len(toCheck) - minMatchLength + 1 + + if dstSize <= 0 { + continue + } + + dst := d.hashMatch[:dstSize] + d.bulkHasher(toCheck, dst) + var newH uint32 + for i, val := range dst { + di := i + index + newH = val & hashMask + // Get previous value with the same hash. + // Our chain should point to the previous value. + d.hashPrev[di&windowMask] = d.hashHead[newH] + // Set the head of the hash chain to us. + d.hashHead[newH] = uint32(di + d.hashOffset) + } + d.hash = newH + } + // Update window information. + d.windowEnd = n + d.index = n +} + // Try to find a match starting at index whose length is greater than prevSize. // We only look at chainCount possibilities before giving up. func (d *compressor) findMatch(pos int, prevHead int, prevLength int, lookahead int) (length, offset int, ok bool) { @@ -168,20 +224,15 @@ func (d *compressor) findMatch(pos int, prevHead int, prevLength int, lookahead tries >>= 2 } - w0 := win[pos] - w1 := win[pos+1] wEnd := win[pos+length] + wPos := win[pos:] minIndex := pos - windowSize for i := prevHead; tries > 0; tries-- { - if w0 == win[i] && w1 == win[i+1] && wEnd == win[i+length] { - // The hash function ensures that if win[i] and win[i+1] match, win[i+2] matches + if wEnd == win[i+length] { + n := matchLen(win[i:], wPos, minMatchLook) - n := 3 - for pos+n < len(win) && win[i+n] == win[pos+n] { - n++ - } - if n > length && (n > 3 || pos-i <= 4096) { + if n > length && (n > minMatchLength || pos-i <= 4096) { length = n offset = pos - i ok = true @@ -196,7 +247,8 @@ func (d *compressor) findMatch(pos int, prevHead int, prevLength int, lookahead // hashPrev[i & windowMask] has already been overwritten, so stop now. break } - if i = d.hashPrev[i&windowMask] - d.hashOffset; i < minIndex || i < 0 { + i = int(d.hashPrev[i&windowMask]) - d.hashOffset + if i < minIndex || i < 0 { break } } @@ -211,9 +263,46 @@ func (d *compressor) writeStoredBlock(buf []byte) error { return d.w.err } +const hashmul = 0x1e35a7bd + +// hash4 returns a hash representation of the first 4 bytes +// of the supplied slice. +// The caller must ensure that len(b) >= 4. +func hash4(b []byte) uint32 { + return ((uint32(b[3]) | uint32(b[2])<<8 | uint32(b[1])<<16 | uint32(b[0])<<24) * hashmul) >> (32 - hashBits) +} + +// bulkHash4 will compute hashes using the same +// algorithm as hash4 +func bulkHash4(b []byte, dst []uint32) { + if len(b) < minMatchLength { + return + } + hb := uint32(b[3]) | uint32(b[2])<<8 | uint32(b[1])<<16 | uint32(b[0])<<24 + dst[0] = (hb * hashmul) >> (32 - hashBits) + end := len(b) - minMatchLength + 1 + for i := 1; i < end; i++ { + hb = (hb << 8) | uint32(b[i+3]) + dst[i] = (hb * hashmul) >> (32 - hashBits) + } +} + +// matchLen returns the number of matching bytes in a and b +// up to length 'max'. Both slices must be at least 'max' +// bytes in size. +func matchLen(a, b []byte, max int) int { + a = a[:max] + for i, av := range a { + if b[i] != av { + return i + } + } + return max +} + func (d *compressor) initDeflate() { - d.hashHead = make([]int, hashSize) - d.hashPrev = make([]int, windowSize) + d.hashHead = make([]uint32, hashSize) + d.hashPrev = make([]uint32, windowSize) d.window = make([]byte, 2*windowSize) d.hashOffset = 1 d.tokens = make([]token, 0, maxFlateBlockTokens+1) @@ -223,6 +312,7 @@ func (d *compressor) initDeflate() { d.index = 0 d.hash = 0 d.chainHead = -1 + d.bulkHasher = bulkHash4 } func (d *compressor) deflate() { @@ -232,7 +322,7 @@ func (d *compressor) deflate() { d.maxInsertIndex = d.windowEnd - (minMatchLength - 1) if d.index < d.maxInsertIndex { - d.hash = int(d.window[d.index])< 0 { - if d.err = d.writeBlock(d.tokens, d.index, false); d.err != nil { + if d.err = d.writeBlock(d.tokens, d.index); d.err != nil { return } d.tokens = d.tokens[:0] @@ -266,10 +356,10 @@ Loop: } if d.index < d.maxInsertIndex { // Update the hash - d.hash = (d.hash< 0 { d.step(d) b = b[d.fill(d, b):] @@ -425,42 +514,29 @@ func (d *compressor) init(w io.Writer, level int) (err error) { return nil } -var zeroes [32]int -var bzeroes [256]byte +// hzeroes is used for zeroing the hash slice. +var hzeroes [256]uint32 func (d *compressor) reset(w io.Writer) { d.w.reset(w) d.sync = false d.err = nil - switch d.compressionLevel.chain { - case 0: - // level was NoCompression. - for i := range d.window { - d.window[i] = 0 - } + switch d.compressionLevel.level { + case NoCompression: d.windowEnd = 0 default: d.chainHead = -1 for s := d.hashHead; len(s) > 0; { - n := copy(s, zeroes[:]) + n := copy(s, hzeroes[:]) s = s[n:] } - for s := d.hashPrev; len(s) > 0; s = s[len(zeroes):] { - copy(s, zeroes[:]) + for s := d.hashPrev; len(s) > 0; s = s[len(hzeroes):] { + copy(s, hzeroes[:]) } d.hashOffset = 1 d.index, d.windowEnd = 0, 0 - for s := d.window; len(s) > 0; { - n := copy(s, bzeroes[:]) - s = s[n:] - } d.blockStart, d.byteAvailable = 0, false - - d.tokens = d.tokens[:maxFlateBlockTokens+1] - for i := 0; i <= maxFlateBlockTokens; i++ { - d.tokens[i] = 0 - } d.tokens = d.tokens[:0] d.length = minMatchLength - 1 d.offset = 0 @@ -509,28 +585,22 @@ func NewWriter(w io.Writer, level int) (*Writer, error) { // can only be decompressed by a Reader initialized with the // same dictionary. func NewWriterDict(w io.Writer, level int, dict []byte) (*Writer, error) { - dw := &dictWriter{w, false} + dw := &dictWriter{w} zw, err := NewWriter(dw, level) if err != nil { return nil, err } - zw.Write(dict) - zw.Flush() - dw.enabled = true + zw.d.fillWindow(dict) zw.dict = append(zw.dict, dict...) // duplicate dictionary for Reset method. return zw, err } type dictWriter struct { - w io.Writer - enabled bool + w io.Writer } func (w *dictWriter) Write(b []byte) (n int, err error) { - if w.enabled { - return w.w.Write(b) - } - return len(b), nil + return w.w.Write(b) } // A Writer takes data written to it and writes the compressed @@ -572,10 +642,7 @@ func (w *Writer) Reset(dst io.Writer) { // w was created with NewWriterDict dw.w = dst w.d.reset(dw) - dw.enabled = false - w.Write(w.dict) - w.Flush() - dw.enabled = true + w.d.fillWindow(w.dict) } else { // w was created with NewWriter w.d.reset(dst) diff --git a/src/compress/flate/deflate_test.go b/src/compress/flate/deflate_test.go index c165ade734..6201213f01 100644 --- a/src/compress/flate/deflate_test.go +++ b/src/compress/flate/deflate_test.go @@ -80,6 +80,32 @@ func largeDataChunk() []byte { return result } +func TestBulkHash4(t *testing.T) { + for _, x := range deflateTests { + y := x.out + if len(y) < minMatchLength { + continue + } + y = append(y, y...) + for j := 4; j < len(y); j++ { + y := y[:j] + dst := make([]uint32, len(y)-minMatchLength+1) + for i := range dst { + dst[i] = uint32(i + 100) + } + bulkHash4(y, dst) + for i, got := range dst { + want := hash4(y[i:]) + if got != want && got == uint32(i)+100 { + t.Errorf("Len:%d Index:%d, want 0x%08x but not modified", len(y), i, want) + } else if got != want { + t.Errorf("Len:%d Index:%d, got 0x%08x want:0x%08x", len(y), i, got, want) + } + } + } + } +} + func TestDeflate(t *testing.T) { for _, h := range deflateTests { var buf bytes.Buffer @@ -91,7 +117,7 @@ func TestDeflate(t *testing.T) { w.Write(h.in) w.Close() if !bytes.Equal(buf.Bytes(), h.out) { - t.Errorf("Deflate(%d, %x) = %x, want %x", h.level, h.in, buf.Bytes(), h.out) + t.Errorf("Deflate(%d, %x) = \n%#v, want \n%#v", h.level, h.in, buf.Bytes(), h.out) } } } @@ -289,6 +315,9 @@ func testToFromWithLevelAndLimit(t *testing.T, level int, input []byte, name str t.Errorf("level: %d, len(compress(data)) = %d > limit = %d", level, buffer.Len(), limit) return } + if limit > 0 { + t.Logf("level: %d, size:%.2f%%, %d b\n", level, float64(buffer.Len()*100)/float64(limit), buffer.Len()) + } r := NewReader(&buffer) out, err := ioutil.ReadAll(r) if err != nil { @@ -457,6 +486,17 @@ func TestWriterReset(t *testing.T) { // DeepEqual doesn't compare functions. w.d.fill, wref.d.fill = nil, nil w.d.step, wref.d.step = nil, nil + w.d.bulkHasher, wref.d.bulkHasher = nil, nil + // hashMatch is always overwritten when used. + copy(w.d.hashMatch[:], wref.d.hashMatch[:]) + if len(w.d.tokens) != 0 { + t.Errorf("level %d Writer not reset after Reset. %d tokens were present", level, len(w.d.tokens)) + } + // As long as the length is 0, we don't care about the content. + w.d.tokens = wref.d.tokens + + // We don't care if there are values in the window, as long as it is at d.index is 0 + w.d.window = wref.d.window if !reflect.DeepEqual(w, wref) { t.Errorf("level %d Writer not reset after Reset", level) } @@ -481,7 +521,7 @@ func testResetOutput(t *testing.T, newWriter func(w io.Writer) (*Writer, error)) w.Write(b) } w.Close() - out1 := buf.String() + out1 := buf.Bytes() buf2 := new(bytes.Buffer) w.Reset(buf2) @@ -489,10 +529,23 @@ func testResetOutput(t *testing.T, newWriter func(w io.Writer) (*Writer, error)) w.Write(b) } w.Close() - out2 := buf2.String() + out2 := buf2.Bytes() - if out1 != out2 { - t.Errorf("got %q, expected %q", out2, out1) + if len(out1) != len(out2) { + t.Errorf("got %d, expected %d bytes", len(out2), len(out1)) + return + } + if !bytes.Equal(out1, out2) { + mm := 0 + for i, b := range out1[:len(out2)] { + if b != out2[i] { + t.Errorf("mismatch index %d: %#02x, expected %#02x", i, out2[i], b) + } + mm++ + if mm == 10 { + t.Fatal("Stopping") + } + } } t.Logf("got %d bytes", len(out1)) }