compress/testdata: change {e,pi}.txt from 10k to 100k digits.

These files change from exactly 10003 bytes long to 100003: a digit, a '.', 100k digits, and a '\n'. The magic constants in compress/flate/deflate_test.go change since deflateInflateStringTests checks that the compressed form of e.txt is not 'too large'. I'm not exactly sure how these numbers were originally calculated (they were introduced in codereview 5554066 "make lazy matching work"); perhaps krasin@golang.org can comment. My change was to increase the first one (no compression) to a tight bound, and multiply all the others by 10. Benchcmp numbers for compress/flate and compress/lzw below. LZW's window size of 4096 is less than 10k, so shows no significant change. Flate's window size is 32768, between 10k and 100k, and so the .*1e5 and .*1e6 benchmarks show a dramatic drop, since the compressed forms are no longer a trivial forward copy of 10k digits repeated over and over, but should now be more representative of real world usage. compress/flate: benchmark old MB/s new MB/s speedup BenchmarkDecodeDigitsSpeed1e4 16.58 16.52 1.00x BenchmarkDecodeDigitsSpeed1e5 68.09 18.10 0.27x BenchmarkDecodeDigitsSpeed1e6 124.63 18.35 0.15x BenchmarkDecodeDigitsDefault1e4 17.21 17.12 0.99x BenchmarkDecodeDigitsDefault1e5 118.28 19.19 0.16x BenchmarkDecodeDigitsDefault1e6 295.62 20.52 0.07x BenchmarkDecodeDigitsCompress1e4 17.22 17.17 1.00x BenchmarkDecodeDigitsCompress1e5 118.19 19.21 0.16x BenchmarkDecodeDigitsCompress1e6 295.59 20.55 0.07x BenchmarkEncodeDigitsSpeed1e4 8.18 8.19 1.00x BenchmarkEncodeDigitsSpeed1e5 43.22 12.84 0.30x BenchmarkEncodeDigitsSpeed1e6 80.76 13.48 0.17x BenchmarkEncodeDigitsDefault1e4 6.29 6.19 0.98x BenchmarkEncodeDigitsDefault1e5 31.63 3.60 0.11x BenchmarkEncodeDigitsDefault1e6 52.97 3.24 0.06x BenchmarkEncodeDigitsCompress1e4 6.20 6.19 1.00x BenchmarkEncodeDigitsCompress1e5 31.59 3.59 0.11x BenchmarkEncodeDigitsCompress1e6 53.18 3.25 0.06x compress/lzw: benchmark old MB/s new MB/s speedup BenchmarkDecoder1e4 21.99 22.09 1.00x BenchmarkDecoder1e5 22.77 22.71 1.00x BenchmarkDecoder1e6 22.90 22.90 1.00x BenchmarkEncoder1e4 21.04 21.19 1.01x BenchmarkEncoder1e5 22.06 22.06 1.00x BenchmarkEncoder1e6 22.16 22.28 1.01x R=rsc CC=golang-dev, krasin https://golang.org/cl/6207043
2012-05-09 10:02:28 +10:00 · 2012-05-09 10:02:28 +10:00 · 738e77aa4f
parent ffd0d02d09
commit 738e77aa4f
6 changed files with 23 additions and 15 deletions
--- a/src/pkg/compress/flate/deflate_test.go
+++ b/src/pkg/compress/flate/deflate_test.go
@ -290,7 +290,7 @@ var deflateInflateStringTests = []deflateInflateStringTest{
 	{
 		"../testdata/e.txt",
 		"2.718281828...",
-		[...]int{10013, 5065, 5096, 5115, 5093, 5079, 5079, 5079, 5079, 5079},
+		[...]int{100018, 50650, 50960, 51150, 50930, 50790, 50790, 50790, 50790, 50790},
 	},
 	{
 		"../testdata/Mark.Twain-Tom.Sawyer.txt",
--- a/src/pkg/compress/flate/reader_test.go
+++ b/src/pkg/compress/flate/reader_test.go
@ -21,14 +21,6 @@ var testfiles = []string{
 	// Digits is the digits of the irrational number e. Its decimal representation
 	// does not repeat, but there are only 10 posible digits, so it should be
 	// reasonably compressible.
-	//
-	// TODO(nigeltao): e.txt is only 10K long, so when benchmarking 100K or 1000K
-	// of input, the digits are just repeated from the beginning, and flate can
-	// trivially compress this as a length/distance copy operation. Thus,
-	// BenchmarkDecodeDigitsXxx1e6 is essentially just measuring the speed of the
-	// forwardCopy implementation, but isn't particularly representative of real
-	// usage. The TODO is to replace e.txt with 100K digits, not just 10K digits,
-	// since that's larger than the windowSize 1<<15 (= 32768).
 	digits: "../testdata/e.txt",
 	// Twain is Project Gutenberg's edition of Mark Twain's classic English novel.
 	twain: "../testdata/Mark.Twain-Tom.Sawyer.txt",
--- a/src/pkg/compress/lzw/reader_test.go
+++ b/src/pkg/compress/lzw/reader_test.go
@ -114,11 +114,19 @@ func TestReader(t *testing.T) {
 func benchmarkDecoder(b *testing.B, n int) {
 	b.StopTimer()
 	b.SetBytes(int64(n))
-	buf0, _ := ioutil.ReadFile("../testdata/e.txt")
-	buf0 = buf0[:10000]
+	buf0, err := ioutil.ReadFile("../testdata/e.txt")
+	if err != nil {
+		b.Fatal(err)
+	}
+	if len(buf0) == 0 {
+		b.Fatalf("test file has no data")
+	}
 	compressed := new(bytes.Buffer)
 	w := NewWriter(compressed, LSB, 8)
 	for i := 0; i < n; i += len(buf0) {
+		if len(buf0) > n-i {
+			buf0 = buf0[:n-i]
+		}
 		io.Copy(w, bytes.NewBuffer(buf0))
 	}
 	w.Close()
--- a/src/pkg/compress/lzw/writer_test.go
+++ b/src/pkg/compress/lzw/writer_test.go
@ -99,10 +99,18 @@ func TestWriter(t *testing.T) {
 func benchmarkEncoder(b *testing.B, n int) {
 	b.StopTimer()
 	b.SetBytes(int64(n))
-	buf0, _ := ioutil.ReadFile("../testdata/e.txt")
-	buf0 = buf0[:10000]
+	buf0, err := ioutil.ReadFile("../testdata/e.txt")
+	if err != nil {
+		b.Fatal(err)
+	}
+	if len(buf0) == 0 {
+		b.Fatalf("test file has no data")
+	}
 	buf1 := make([]byte, n)
 	for i := 0; i < n; i += len(buf0) {
+		if len(buf0) > n-i {
+			buf0 = buf0[:n-i]
+		}
 		copy(buf1[i:], buf0)
 	}
 	buf0 = nil
--- a/src/pkg/compress/testdata/e.txt
+++ b/src/pkg/compress/testdata/e.txt
--- a/src/pkg/compress/testdata/pi.txt
+++ b/src/pkg/compress/testdata/pi.txt