mirror of https://github.com/golang/go.git
image/jpeg: add DCT tests, do a small optimization (common sub-expression
elimination) in idct.go. benchmark old ns/op new ns/op delta BenchmarkIDCT 5649 5610 -0.69% BenchmarkDecodeRGBOpaque 2948607 2941051 -0.26% The "type block" declaration moved so that idct.go is compilable as a stand-alone file: "go tool 6g -S idct.go" works. R=r CC=golang-dev https://golang.org/cl/6619056
This commit is contained in:
parent
0b2ca9e62f
commit
12e343f372
|
|
@ -0,0 +1,297 @@
|
|||
// Copyright 2012 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package jpeg
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"fmt"
|
||||
"math"
|
||||
"math/rand"
|
||||
"testing"
|
||||
)
|
||||
|
||||
func BenchmarkFDCT(b *testing.B) {
|
||||
b.StopTimer()
|
||||
blocks := make([]block, 0, b.N*len(testBlocks))
|
||||
for i := 0; i < b.N; i++ {
|
||||
blocks = append(blocks, testBlocks[:]...)
|
||||
}
|
||||
b.StartTimer()
|
||||
for i := range blocks {
|
||||
fdct(&blocks[i])
|
||||
}
|
||||
}
|
||||
|
||||
func BenchmarkIDCT(b *testing.B) {
|
||||
b.StopTimer()
|
||||
dummy := make([]byte, 64)
|
||||
blocks := make([]block, 0, b.N*len(testBlocks))
|
||||
for i := 0; i < b.N; i++ {
|
||||
blocks = append(blocks, testBlocks[:]...)
|
||||
}
|
||||
b.StartTimer()
|
||||
for i := range blocks {
|
||||
idct(dummy, 8, &blocks[i])
|
||||
}
|
||||
}
|
||||
|
||||
func TestDCT(t *testing.T) {
|
||||
blocks := make([]block, len(testBlocks))
|
||||
copy(blocks, testBlocks[:])
|
||||
|
||||
// Append some randomly generated blocks of varying sparseness.
|
||||
r := rand.New(rand.NewSource(123))
|
||||
for i := 0; i < 100; i++ {
|
||||
b := block{}
|
||||
n := r.Int() % 64
|
||||
for j := 0; j < n; j++ {
|
||||
b[r.Int()%len(b)] = r.Int() % 256
|
||||
}
|
||||
blocks = append(blocks, b)
|
||||
}
|
||||
|
||||
// Check that the FDCT and IDCT functions are inverses, after a scale and
|
||||
// level shift. Scaling reduces the rounding errors in the conversion from
|
||||
// floats to ints.
|
||||
for i, b := range blocks {
|
||||
got, want := b, b
|
||||
for j := range got {
|
||||
got[j] = (got[j] - 128) * 8
|
||||
}
|
||||
slowFDCT(&got)
|
||||
slowIDCT(&got)
|
||||
for j := range got {
|
||||
got[j] = got[j]/8 + 128
|
||||
}
|
||||
if differ(&got, &want) {
|
||||
t.Errorf("i=%d: IDCT(FDCT)\nsrc\n%s\ngot\n%s\nwant\n%s\n", i, &b, &got, &want)
|
||||
}
|
||||
}
|
||||
|
||||
// Check that the optimized and slow FDCT implementations agree.
|
||||
// The fdct function already does a scale and level shift.
|
||||
for i, b := range blocks {
|
||||
got, want := b, b
|
||||
fdct(&got)
|
||||
for j := range want {
|
||||
want[j] = (want[j] - 128) * 8
|
||||
}
|
||||
slowFDCT(&want)
|
||||
if differ(&got, &want) {
|
||||
t.Errorf("i=%d: FDCT\nsrc\n%s\ngot\n%s\nwant\n%s\n", i, &b, &got, &want)
|
||||
}
|
||||
}
|
||||
|
||||
// Check that the optimized and slow IDCT implementations agree.
|
||||
dummy := make([]byte, 64)
|
||||
for i, b := range blocks {
|
||||
got, want := b, b
|
||||
idct(dummy, 8, &got)
|
||||
slowIDCT(&want)
|
||||
if differ(&got, &want) {
|
||||
t.Errorf("i=%d: IDCT\nsrc\n%s\ngot\n%s\nwant\n%s\n", i, &b, &got, &want)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// differ returns whether any pair-wise elements in b0 and b1 differ by 2 or
|
||||
// more. That tolerance is because there isn't a single definitive decoding of
|
||||
// a given JPEG image, even before the YCbCr to RGB conversion; implementations
|
||||
// can have different IDCT rounding errors.
|
||||
func differ(b0, b1 *block) bool {
|
||||
for i := range b0 {
|
||||
delta := b0[i] - b1[i]
|
||||
if delta < -2 || +2 < delta {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
// alpha returns 1 if i is 0 and returns √2 otherwise.
|
||||
func alpha(i int) float64 {
|
||||
if i == 0 {
|
||||
return 1
|
||||
}
|
||||
return math.Sqrt2
|
||||
}
|
||||
|
||||
// slowFDCT performs the 8*8 2-dimensional forward discrete cosine transform:
|
||||
//
|
||||
// dst[u,v] = (1/8) * Σ_x Σ_y alpha(u) * alpha(v) * src[x,y] *
|
||||
// cos((π/2) * (2*x + 1) * u / 8) *
|
||||
// cos((π/2) * (2*y + 1) * v / 8)
|
||||
//
|
||||
// x and y are in pixel space, and u and v are in transform space.
|
||||
//
|
||||
// b acts as both dst and src.
|
||||
func slowFDCT(b *block) {
|
||||
var dst [blockSize]float64
|
||||
for v := 0; v < 8; v++ {
|
||||
for u := 0; u < 8; u++ {
|
||||
sum := 0.0
|
||||
for y := 0; y < 8; y++ {
|
||||
for x := 0; x < 8; x++ {
|
||||
sum += alpha(u) * alpha(v) * float64(b[8*y+x]) *
|
||||
math.Cos(math.Pi*float64((2*x+1)*u)/16) *
|
||||
math.Cos(math.Pi*float64((2*y+1)*v)/16)
|
||||
}
|
||||
}
|
||||
dst[8*v+u] = sum / 8
|
||||
}
|
||||
}
|
||||
// Convert from float64 to int.
|
||||
for i := range dst {
|
||||
b[i] = int(dst[i] + 0.5)
|
||||
}
|
||||
}
|
||||
|
||||
// slowIDCT performs the 8*8 2-dimensional inverse discrete cosine transform:
|
||||
//
|
||||
// dst[x,y] = (1/8) * Σ_u Σ_v alpha(u) * alpha(v) * src[u,v] *
|
||||
// cos((π/2) * (2*x + 1) * u / 8) *
|
||||
// cos((π/2) * (2*y + 1) * v / 8)
|
||||
//
|
||||
// x and y are in pixel space, and u and v are in transform space.
|
||||
//
|
||||
// b acts as both dst and src.
|
||||
func slowIDCT(b *block) {
|
||||
var dst [blockSize]float64
|
||||
for y := 0; y < 8; y++ {
|
||||
for x := 0; x < 8; x++ {
|
||||
sum := 0.0
|
||||
for v := 0; v < 8; v++ {
|
||||
for u := 0; u < 8; u++ {
|
||||
sum += alpha(u) * alpha(v) * float64(b[8*v+u]) *
|
||||
math.Cos(math.Pi*float64((2*x+1)*u)/16) *
|
||||
math.Cos(math.Pi*float64((2*y+1)*v)/16)
|
||||
}
|
||||
}
|
||||
dst[8*y+x] = sum / 8
|
||||
}
|
||||
}
|
||||
// Convert from float64 to int.
|
||||
for i := range dst {
|
||||
b[i] = int(dst[i] + 0.5)
|
||||
}
|
||||
}
|
||||
|
||||
func (b *block) String() string {
|
||||
s := bytes.NewBuffer(nil)
|
||||
fmt.Fprintf(s, "{\n")
|
||||
for y := 0; y < 8; y++ {
|
||||
fmt.Fprintf(s, "\t")
|
||||
for x := 0; x < 8; x++ {
|
||||
fmt.Fprintf(s, "0x%04x, ", uint16(b[8*y+x]))
|
||||
}
|
||||
fmt.Fprintln(s)
|
||||
}
|
||||
fmt.Fprintf(s, "}")
|
||||
return s.String()
|
||||
}
|
||||
|
||||
// testBlocks are the first 10 pre-IDCT blocks from ../testdata/video-001.jpeg.
|
||||
var testBlocks = [10]block{
|
||||
{
|
||||
0x7f, 0xf6, 0x01, 0x07, 0xff, 0x00, 0x00, 0x00,
|
||||
0xf5, 0x01, 0xfa, 0x01, 0xfe, 0x00, 0x01, 0x00,
|
||||
0x05, 0x05, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x01, 0xff, 0xf8, 0x00, 0x01, 0xff, 0x00, 0x00,
|
||||
0x00, 0x01, 0x00, 0x01, 0x00, 0xff, 0xff, 0x00,
|
||||
0xff, 0x0c, 0x00, 0x00, 0x00, 0x00, 0xff, 0x01,
|
||||
0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00,
|
||||
0x01, 0x00, 0x00, 0x01, 0xff, 0x01, 0x00, 0xfe,
|
||||
},
|
||||
{
|
||||
0x29, 0x07, 0x00, 0xfc, 0x01, 0x01, 0x00, 0x00,
|
||||
0x07, 0x00, 0x03, 0x00, 0x01, 0x00, 0xff, 0xff,
|
||||
0xff, 0xfd, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x04, 0x00, 0xff, 0x01, 0x00, 0x00,
|
||||
0x01, 0x00, 0x01, 0xff, 0x00, 0x00, 0x00, 0x00,
|
||||
0x01, 0xfa, 0x01, 0x00, 0x01, 0x00, 0x01, 0xff,
|
||||
0x00, 0x00, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0xff, 0x00, 0xff, 0x00, 0x02,
|
||||
},
|
||||
{
|
||||
0xc5, 0xfa, 0x01, 0x00, 0x00, 0x01, 0x00, 0xff,
|
||||
0x02, 0xff, 0x01, 0x00, 0x01, 0x00, 0xff, 0x00,
|
||||
0xff, 0xff, 0x00, 0xff, 0x01, 0x00, 0x00, 0x00,
|
||||
0xff, 0x00, 0x01, 0x00, 0x00, 0x00, 0xff, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff,
|
||||
0x00, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
},
|
||||
{
|
||||
0x86, 0x05, 0x00, 0x02, 0x00, 0x00, 0x01, 0x00,
|
||||
0xf2, 0x06, 0x00, 0x00, 0x01, 0x02, 0x00, 0x00,
|
||||
0xf6, 0xfa, 0xf9, 0x00, 0xff, 0x01, 0x00, 0x00,
|
||||
0xf9, 0x00, 0x00, 0xff, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0xff, 0x00, 0xff, 0xff, 0xff, 0x00, 0x00,
|
||||
0xff, 0x00, 0x00, 0x01, 0x00, 0xff, 0x01, 0x00,
|
||||
0x00, 0x00, 0x00, 0xff, 0x00, 0x00, 0x00, 0x01,
|
||||
0x00, 0x01, 0xff, 0x01, 0x00, 0xff, 0x00, 0x00,
|
||||
},
|
||||
{
|
||||
0x24, 0xfe, 0x00, 0xff, 0x00, 0xff, 0xff, 0x00,
|
||||
0x08, 0xfd, 0x00, 0x01, 0x01, 0x00, 0x01, 0x00,
|
||||
0x06, 0x03, 0x03, 0xff, 0x00, 0x00, 0x00, 0x00,
|
||||
0x04, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff,
|
||||
0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x01,
|
||||
0x01, 0x00, 0x01, 0xff, 0x00, 0x01, 0x00, 0x00,
|
||||
0x01, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x01, 0x00, 0x01, 0x00, 0x00, 0x00, 0xff, 0x01,
|
||||
},
|
||||
{
|
||||
0xcd, 0xff, 0x00, 0x00, 0x00, 0x00, 0x01, 0x01,
|
||||
0x03, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff,
|
||||
0x01, 0x01, 0x01, 0x01, 0x01, 0x00, 0x00, 0x00,
|
||||
0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x01, 0x00, 0x00, 0x00, 0x00, 0x01, 0x01, 0x00,
|
||||
0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0xff,
|
||||
},
|
||||
{
|
||||
0x81, 0xfe, 0x05, 0xff, 0x01, 0xff, 0x01, 0x00,
|
||||
0xef, 0xf9, 0x00, 0xf9, 0x00, 0xff, 0x00, 0xff,
|
||||
0x05, 0xf9, 0x00, 0xf8, 0x01, 0xff, 0x01, 0xff,
|
||||
0x00, 0xff, 0x07, 0x00, 0x01, 0x00, 0x00, 0x00,
|
||||
0x01, 0x00, 0x01, 0x01, 0x00, 0x00, 0x00, 0x00,
|
||||
0x01, 0x00, 0x00, 0x00, 0xff, 0xff, 0x00, 0x01,
|
||||
0xff, 0x01, 0x01, 0x00, 0xff, 0x00, 0x00, 0x00,
|
||||
0x01, 0x01, 0x00, 0xff, 0x00, 0x00, 0x00, 0xff,
|
||||
},
|
||||
{
|
||||
0x28, 0x00, 0xfe, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x0b, 0x02, 0x01, 0x03, 0x00, 0xff, 0x00, 0x01,
|
||||
0xfe, 0x02, 0x01, 0x03, 0xff, 0x00, 0x00, 0x00,
|
||||
0x01, 0x00, 0xfd, 0x00, 0x01, 0x00, 0xff, 0x00,
|
||||
0x01, 0xff, 0x00, 0xff, 0x01, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0xff, 0x01, 0x01, 0x00, 0xff,
|
||||
0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0xff, 0xff, 0x00, 0x00, 0x00, 0xff, 0x00, 0x01,
|
||||
},
|
||||
{
|
||||
0xdf, 0xf9, 0xfe, 0x00, 0x03, 0x01, 0xff, 0xff,
|
||||
0x04, 0x01, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00,
|
||||
0xff, 0x01, 0x01, 0x01, 0x00, 0x00, 0x00, 0x01,
|
||||
0x00, 0x00, 0xfe, 0x01, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0xff, 0x01, 0x00, 0x00, 0x00, 0x01,
|
||||
0xff, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00,
|
||||
0x00, 0xff, 0x00, 0xff, 0x01, 0x00, 0x00, 0x01,
|
||||
0xff, 0xff, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00,
|
||||
},
|
||||
{
|
||||
0x88, 0xfd, 0x00, 0x00, 0xff, 0x00, 0x01, 0xff,
|
||||
0xe1, 0x06, 0x06, 0x01, 0xff, 0x00, 0x01, 0x00,
|
||||
0x08, 0x00, 0xfa, 0x00, 0xff, 0xff, 0xff, 0xff,
|
||||
0x08, 0x01, 0x00, 0xff, 0x01, 0xff, 0x00, 0x00,
|
||||
0xf5, 0xff, 0x00, 0x01, 0xff, 0x01, 0x01, 0x00,
|
||||
0xff, 0xff, 0x01, 0xff, 0x01, 0x00, 0x01, 0x00,
|
||||
0x00, 0x01, 0x01, 0xff, 0x00, 0xff, 0x00, 0x01,
|
||||
0x02, 0x00, 0x00, 0xff, 0xff, 0x00, 0xff, 0x00,
|
||||
},
|
||||
}
|
||||
|
|
@ -37,6 +37,10 @@ package jpeg
|
|||
*
|
||||
*/
|
||||
|
||||
const blockSize = 64 // A DCT block is 8x8.
|
||||
|
||||
type block [blockSize]int
|
||||
|
||||
const (
|
||||
w1 = 2841 // 2048*sqrt(2)*cos(1*pi/16)
|
||||
w2 = 2676 // 2048*sqrt(2)*cos(2*pi/16)
|
||||
|
|
@ -70,30 +74,31 @@ const (
|
|||
func idct(dst []byte, stride int, src *block) {
|
||||
// Horizontal 1-D IDCT.
|
||||
for y := 0; y < 8; y++ {
|
||||
y8 := y * 8
|
||||
// If all the AC components are zero, then the IDCT is trivial.
|
||||
if src[y*8+1] == 0 && src[y*8+2] == 0 && src[y*8+3] == 0 &&
|
||||
src[y*8+4] == 0 && src[y*8+5] == 0 && src[y*8+6] == 0 && src[y*8+7] == 0 {
|
||||
dc := src[y*8+0] << 3
|
||||
src[y*8+0] = dc
|
||||
src[y*8+1] = dc
|
||||
src[y*8+2] = dc
|
||||
src[y*8+3] = dc
|
||||
src[y*8+4] = dc
|
||||
src[y*8+5] = dc
|
||||
src[y*8+6] = dc
|
||||
src[y*8+7] = dc
|
||||
if src[y8+1] == 0 && src[y8+2] == 0 && src[y8+3] == 0 &&
|
||||
src[y8+4] == 0 && src[y8+5] == 0 && src[y8+6] == 0 && src[y8+7] == 0 {
|
||||
dc := src[y8+0] << 3
|
||||
src[y8+0] = dc
|
||||
src[y8+1] = dc
|
||||
src[y8+2] = dc
|
||||
src[y8+3] = dc
|
||||
src[y8+4] = dc
|
||||
src[y8+5] = dc
|
||||
src[y8+6] = dc
|
||||
src[y8+7] = dc
|
||||
continue
|
||||
}
|
||||
|
||||
// Prescale.
|
||||
x0 := (src[y*8+0] << 11) + 128
|
||||
x1 := src[y*8+4] << 11
|
||||
x2 := src[y*8+6]
|
||||
x3 := src[y*8+2]
|
||||
x4 := src[y*8+1]
|
||||
x5 := src[y*8+7]
|
||||
x6 := src[y*8+5]
|
||||
x7 := src[y*8+3]
|
||||
x0 := (src[y8+0] << 11) + 128
|
||||
x1 := src[y8+4] << 11
|
||||
x2 := src[y8+6]
|
||||
x3 := src[y8+2]
|
||||
x4 := src[y8+1]
|
||||
x5 := src[y8+7]
|
||||
x6 := src[y8+5]
|
||||
x7 := src[y8+3]
|
||||
|
||||
// Stage 1.
|
||||
x8 := w7 * (x4 + x5)
|
||||
|
|
@ -123,14 +128,14 @@ func idct(dst []byte, stride int, src *block) {
|
|||
x4 = (r2*(x4-x5) + 128) >> 8
|
||||
|
||||
// Stage 4.
|
||||
src[8*y+0] = (x7 + x1) >> 8
|
||||
src[8*y+1] = (x3 + x2) >> 8
|
||||
src[8*y+2] = (x0 + x4) >> 8
|
||||
src[8*y+3] = (x8 + x6) >> 8
|
||||
src[8*y+4] = (x8 - x6) >> 8
|
||||
src[8*y+5] = (x0 - x4) >> 8
|
||||
src[8*y+6] = (x3 - x2) >> 8
|
||||
src[8*y+7] = (x7 - x1) >> 8
|
||||
src[y8+0] = (x7 + x1) >> 8
|
||||
src[y8+1] = (x3 + x2) >> 8
|
||||
src[y8+2] = (x0 + x4) >> 8
|
||||
src[y8+3] = (x8 + x6) >> 8
|
||||
src[y8+4] = (x8 - x6) >> 8
|
||||
src[y8+5] = (x0 - x4) >> 8
|
||||
src[y8+6] = (x3 - x2) >> 8
|
||||
src[y8+7] = (x7 - x1) >> 8
|
||||
}
|
||||
|
||||
// Vertical 1-D IDCT.
|
||||
|
|
@ -189,8 +194,10 @@ func idct(dst []byte, stride int, src *block) {
|
|||
|
||||
// Level shift by +128, clip to [0, 255], and write to dst.
|
||||
for y := 0; y < 8; y++ {
|
||||
y8 := y * 8
|
||||
yStride := y * stride
|
||||
for x := 0; x < 8; x++ {
|
||||
c := src[y*8+x]
|
||||
c := src[y8+x]
|
||||
if c < -128 {
|
||||
c = 0
|
||||
} else if c > 127 {
|
||||
|
|
@ -198,7 +205,7 @@ func idct(dst []byte, stride int, src *block) {
|
|||
} else {
|
||||
c += 128
|
||||
}
|
||||
dst[y*stride+x] = uint8(c)
|
||||
dst[yStride+x] = uint8(c)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -35,11 +35,7 @@ type component struct {
|
|||
tq uint8 // Quantization table destination selector.
|
||||
}
|
||||
|
||||
type block [blockSize]int
|
||||
|
||||
const (
|
||||
blockSize = 64 // A DCT block is 8x8.
|
||||
|
||||
dcTable = 0
|
||||
acTable = 1
|
||||
maxTc = 1
|
||||
|
|
|
|||
|
|
@ -171,6 +171,23 @@ func TestWriter(t *testing.T) {
|
|||
}
|
||||
}
|
||||
|
||||
func BenchmarkDecodeRGBOpaque(b *testing.B) {
|
||||
b.StopTimer()
|
||||
data, err := ioutil.ReadFile("../testdata/video-001.jpeg")
|
||||
if err != nil {
|
||||
b.Fatal(err)
|
||||
}
|
||||
cfg, err := DecodeConfig(bytes.NewReader(data))
|
||||
if err != nil {
|
||||
b.Fatal(err)
|
||||
}
|
||||
b.SetBytes(int64(cfg.Width * cfg.Height * 4))
|
||||
b.StartTimer()
|
||||
for i := 0; i < b.N; i++ {
|
||||
Decode(bytes.NewReader(data))
|
||||
}
|
||||
}
|
||||
|
||||
func BenchmarkEncodeRGBOpaque(b *testing.B) {
|
||||
b.StopTimer()
|
||||
img := image.NewRGBA(image.Rect(0, 0, 640, 480))
|
||||
|
|
|
|||
Loading…
Reference in New Issue