From 2eaa5a0703167635287457ec562a6005e3397dbf Mon Sep 17 00:00:00 2001 From: David Chase Date: Thu, 5 Jun 2025 15:09:19 -0400 Subject: [PATCH] [dev.simd] simd: add functions+methods to load-from/store-to slices Includes the generator (which is short and uncomplicated) and a few tests. Change-Id: Icba9de042935a59bee34b278306c241b7651f5b4 Reviewed-on: https://go-review.googlesource.com/c/go/+/679258 Auto-Submit: David Chase LUCI-TryBot-Result: Go LUCI Reviewed-by: Cherry Mui --- src/go/doc/comment/std_test.go | 5 - src/simd/cpu.go | 2 +- src/simd/genslice.go | 117 +++++++++++++ src/simd/no_tag.go | 9 + src/simd/simd_test.go | 63 +++++++ src/simd/slice_amd64.go | 308 +++++++++++++++++++++++++++++++++ 6 files changed, 498 insertions(+), 6 deletions(-) create mode 100644 src/simd/genslice.go create mode 100644 src/simd/no_tag.go create mode 100644 src/simd/slice_amd64.go diff --git a/src/go/doc/comment/std_test.go b/src/go/doc/comment/std_test.go index 9a40d1d09a..bd0379856a 100644 --- a/src/go/doc/comment/std_test.go +++ b/src/go/doc/comment/std_test.go @@ -5,7 +5,6 @@ package comment import ( - "internal/buildcfg" "internal/diff" "internal/testenv" "slices" @@ -25,10 +24,6 @@ func TestStd(t *testing.T) { list = append(list, pkg) } } - // TODO remove this when simd is the default, for now fake its existence - if !buildcfg.Experiment.SIMD { - list = append(list, "simd") - } slices.Sort(list) have := strings.Join(stdPkgs, "\n") + "\n" diff --git a/src/simd/cpu.go b/src/simd/cpu.go index 84bf03cfb0..52a5614e68 100644 --- a/src/simd/cpu.go +++ b/src/simd/cpu.go @@ -4,7 +4,7 @@ //go:build goexperiment.simd -// the build condition == if the experiment is not on, cmd/api TestCheck will see this and complain +// The build condition == if the experiment is not on, cmd/api TestCheck will see this and complain // see also go/doc/comment, where "simd" is inserted to the package list of the experiment is not on. package simd diff --git a/src/simd/genslice.go b/src/simd/genslice.go new file mode 100644 index 0000000000..77b9b41c09 --- /dev/null +++ b/src/simd/genslice.go @@ -0,0 +1,117 @@ +// Copyright 2025 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +//go:build ignore + +package main + +// this generates all the code to load and store simd +// vectors to/from slices. + +import ( + "bytes" + "flag" + "fmt" + "go/format" + "io" + "os" + "strings" +) + +// //go:noescape +// func LoadUint8x16Slice(s []uint8) Uint8x16 { +// return LoadUint8x16((*[16]uint8)(s[:16])) +// } + +// //go:noescape +// func (x Uint8x16) StoreSlice(s []uint8) { +// x.Store((*[16]uint8)(s[:16])) +// } + +func slice(e string, w, c int, out io.Writer) { + b := w * c + if b < 128 || b > 512 { + return + } + E := strings.ToUpper(e[:1]) + e[1:] + t := fmt.Sprintf("%s%d", e, w) + v := fmt.Sprintf("%s%dx%d", E, w, c) + a := "a" + if strings.Contains("aeiou", e[:1]) { + a = "an" + } + fmt.Fprintf(out, + ` +// Load%sSlice loads %s %s from a slice of at least %d %ss +func Load%sSlice(s []%s) %s { + return Load%s((*[%d]%s)(s)) +} +`, v, a, v, c, t, v, t, v, v, c, t) + + fmt.Fprintf(out, + ` +// StoreSlice stores x into a slice of at least %d %ss +func (x %s) StoreSlice(s []%s) { + x.Store((*[%d]%s)(s)) +} +`, c, t, v, t, c, t) + +} + +func prologue(s string, out io.Writer) { + fmt.Fprintf(out, + `// Code generated by '%s'; DO NOT EDIT. + +//go:build goexperiment.simd + +// The build condition == if the experiment is not on, cmd/api TestCheck will see this and complain +// see also go/doc/comment, where "simd" is inserted to the package list of the experiment is not on. + +package simd + +`, s) +} + +func main() { + filename := flag.String("o", "", "write generated code to this file") + flag.Parse() + + ofile := os.Stdout + + if *filename != "" { + var err error + ofile, err = os.Create(*filename) + if err != nil { + fmt.Fprintf(os.Stderr, "Could not create the output file for the generated code, %v", err) + os.Exit(1) + } + } + + out := new(bytes.Buffer) + + prologue("go run genslice.go -o slice_amd64.go", out) + + vecs := []int{128, 256, 512} + ints := []int{8, 16, 32, 64} + floats := []int{32, 64} + for _, v := range vecs { + for _, w := range ints { + c := v / w + slice("int", w, c, out) + slice("uint", w, c, out) + } + for _, w := range floats { + c := v / w + slice("float", w, c, out) + } + } + b, err := format.Source(out.Bytes()) + if err != nil { + fmt.Fprintf(os.Stderr, "There was a problem formatting the generated code, %v", err) + os.Exit(1) + } else { + ofile.Write(b) + ofile.Close() + } +} diff --git a/src/simd/no_tag.go b/src/simd/no_tag.go new file mode 100644 index 0000000000..c11fd51b23 --- /dev/null +++ b/src/simd/no_tag.go @@ -0,0 +1,9 @@ +// Copyright 2025 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package simd + +// This file has no build tag, so that go generate can run without a build tag. + +//go:generate go run genslice.go -o slice_amd64.go diff --git a/src/simd/simd_test.go b/src/simd/simd_test.go index e611092c43..37e07c96d7 100644 --- a/src/simd/simd_test.go +++ b/src/simd/simd_test.go @@ -163,3 +163,66 @@ func TestSub(t *testing.T) { } } } + +// checkInt8Slices ensures that b and a are equal, to the end of b. +// also serves to use the slices, to prevent accidental optimization. +func checkInt8Slices(t *testing.T, a, b []int8) { + for i := range b { + if a[i] != b[i] { + t.Errorf("a and b differ at index %d, a=%d, b=%d", i, a[i], b[i]) + } + } +} + +func TestSlicesInt8(t *testing.T) { + a := []int8{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, + 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32} + v := simd.LoadInt8x32Slice(a) + b := make([]int8, 32, 32) + v.StoreSlice(b) + checkInt8Slices(t, a, b) +} + +func TestSlicesInt8TooShortLoad(t *testing.T) { + defer func() { + if r := recover(); r != nil { + t.Logf("Saw EXPECTED panic %v", r) + } else { + t.Errorf("Did not see expected panic") + } + }() + a := []int8{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, + 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31} // TOO SHORT, should panic + v := simd.LoadInt8x32Slice(a) + b := make([]int8, 32, 32) + v.StoreSlice(b) + checkInt8Slices(t, a, b) +} + +func TestSlicesInt8TooShortStore(t *testing.T) { + defer func() { + if r := recover(); r != nil { + t.Logf("Saw EXPECTED panic %v", r) + } else { + t.Errorf("Did not see expected panic") + } + }() + a := []int8{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, + 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32} + v := simd.LoadInt8x32Slice(a) + b := make([]int8, 31) // TOO SHORT, should panic + v.StoreSlice(b) + checkInt8Slices(t, a, b) +} + +func TestSlicesFloat64(t *testing.T) { + a := []float64{1, 2, 3, 4, 5, 6, 7, 8} // too long, should be fine + v := simd.LoadFloat64x4Slice(a) + b := make([]float64, 4, 4) + v.StoreSlice(b) + for i := range b { + if a[i] != b[i] { + t.Errorf("a and b differ at index %d, a=%f, b=%f", i, a[i], b[i]) + } + } +} diff --git a/src/simd/slice_amd64.go b/src/simd/slice_amd64.go new file mode 100644 index 0000000000..10050e6b9f --- /dev/null +++ b/src/simd/slice_amd64.go @@ -0,0 +1,308 @@ +// Code generated by 'go run genslice.go -o slice_amd64.go'; DO NOT EDIT. + +//go:build goexperiment.simd + +// The build condition == if the experiment is not on, cmd/api TestCheck will see this and complain +// see also go/doc/comment, where "simd" is inserted to the package list of the experiment is not on. + +package simd + +// LoadInt8x16Slice loads an Int8x16 from a slice of at least 16 int8s +func LoadInt8x16Slice(s []int8) Int8x16 { + return LoadInt8x16((*[16]int8)(s)) +} + +// StoreSlice stores x into a slice of at least 16 int8s +func (x Int8x16) StoreSlice(s []int8) { + x.Store((*[16]int8)(s)) +} + +// LoadUint8x16Slice loads an Uint8x16 from a slice of at least 16 uint8s +func LoadUint8x16Slice(s []uint8) Uint8x16 { + return LoadUint8x16((*[16]uint8)(s)) +} + +// StoreSlice stores x into a slice of at least 16 uint8s +func (x Uint8x16) StoreSlice(s []uint8) { + x.Store((*[16]uint8)(s)) +} + +// LoadInt16x8Slice loads an Int16x8 from a slice of at least 8 int16s +func LoadInt16x8Slice(s []int16) Int16x8 { + return LoadInt16x8((*[8]int16)(s)) +} + +// StoreSlice stores x into a slice of at least 8 int16s +func (x Int16x8) StoreSlice(s []int16) { + x.Store((*[8]int16)(s)) +} + +// LoadUint16x8Slice loads an Uint16x8 from a slice of at least 8 uint16s +func LoadUint16x8Slice(s []uint16) Uint16x8 { + return LoadUint16x8((*[8]uint16)(s)) +} + +// StoreSlice stores x into a slice of at least 8 uint16s +func (x Uint16x8) StoreSlice(s []uint16) { + x.Store((*[8]uint16)(s)) +} + +// LoadInt32x4Slice loads an Int32x4 from a slice of at least 4 int32s +func LoadInt32x4Slice(s []int32) Int32x4 { + return LoadInt32x4((*[4]int32)(s)) +} + +// StoreSlice stores x into a slice of at least 4 int32s +func (x Int32x4) StoreSlice(s []int32) { + x.Store((*[4]int32)(s)) +} + +// LoadUint32x4Slice loads an Uint32x4 from a slice of at least 4 uint32s +func LoadUint32x4Slice(s []uint32) Uint32x4 { + return LoadUint32x4((*[4]uint32)(s)) +} + +// StoreSlice stores x into a slice of at least 4 uint32s +func (x Uint32x4) StoreSlice(s []uint32) { + x.Store((*[4]uint32)(s)) +} + +// LoadInt64x2Slice loads an Int64x2 from a slice of at least 2 int64s +func LoadInt64x2Slice(s []int64) Int64x2 { + return LoadInt64x2((*[2]int64)(s)) +} + +// StoreSlice stores x into a slice of at least 2 int64s +func (x Int64x2) StoreSlice(s []int64) { + x.Store((*[2]int64)(s)) +} + +// LoadUint64x2Slice loads an Uint64x2 from a slice of at least 2 uint64s +func LoadUint64x2Slice(s []uint64) Uint64x2 { + return LoadUint64x2((*[2]uint64)(s)) +} + +// StoreSlice stores x into a slice of at least 2 uint64s +func (x Uint64x2) StoreSlice(s []uint64) { + x.Store((*[2]uint64)(s)) +} + +// LoadFloat32x4Slice loads a Float32x4 from a slice of at least 4 float32s +func LoadFloat32x4Slice(s []float32) Float32x4 { + return LoadFloat32x4((*[4]float32)(s)) +} + +// StoreSlice stores x into a slice of at least 4 float32s +func (x Float32x4) StoreSlice(s []float32) { + x.Store((*[4]float32)(s)) +} + +// LoadFloat64x2Slice loads a Float64x2 from a slice of at least 2 float64s +func LoadFloat64x2Slice(s []float64) Float64x2 { + return LoadFloat64x2((*[2]float64)(s)) +} + +// StoreSlice stores x into a slice of at least 2 float64s +func (x Float64x2) StoreSlice(s []float64) { + x.Store((*[2]float64)(s)) +} + +// LoadInt8x32Slice loads an Int8x32 from a slice of at least 32 int8s +func LoadInt8x32Slice(s []int8) Int8x32 { + return LoadInt8x32((*[32]int8)(s)) +} + +// StoreSlice stores x into a slice of at least 32 int8s +func (x Int8x32) StoreSlice(s []int8) { + x.Store((*[32]int8)(s)) +} + +// LoadUint8x32Slice loads an Uint8x32 from a slice of at least 32 uint8s +func LoadUint8x32Slice(s []uint8) Uint8x32 { + return LoadUint8x32((*[32]uint8)(s)) +} + +// StoreSlice stores x into a slice of at least 32 uint8s +func (x Uint8x32) StoreSlice(s []uint8) { + x.Store((*[32]uint8)(s)) +} + +// LoadInt16x16Slice loads an Int16x16 from a slice of at least 16 int16s +func LoadInt16x16Slice(s []int16) Int16x16 { + return LoadInt16x16((*[16]int16)(s)) +} + +// StoreSlice stores x into a slice of at least 16 int16s +func (x Int16x16) StoreSlice(s []int16) { + x.Store((*[16]int16)(s)) +} + +// LoadUint16x16Slice loads an Uint16x16 from a slice of at least 16 uint16s +func LoadUint16x16Slice(s []uint16) Uint16x16 { + return LoadUint16x16((*[16]uint16)(s)) +} + +// StoreSlice stores x into a slice of at least 16 uint16s +func (x Uint16x16) StoreSlice(s []uint16) { + x.Store((*[16]uint16)(s)) +} + +// LoadInt32x8Slice loads an Int32x8 from a slice of at least 8 int32s +func LoadInt32x8Slice(s []int32) Int32x8 { + return LoadInt32x8((*[8]int32)(s)) +} + +// StoreSlice stores x into a slice of at least 8 int32s +func (x Int32x8) StoreSlice(s []int32) { + x.Store((*[8]int32)(s)) +} + +// LoadUint32x8Slice loads an Uint32x8 from a slice of at least 8 uint32s +func LoadUint32x8Slice(s []uint32) Uint32x8 { + return LoadUint32x8((*[8]uint32)(s)) +} + +// StoreSlice stores x into a slice of at least 8 uint32s +func (x Uint32x8) StoreSlice(s []uint32) { + x.Store((*[8]uint32)(s)) +} + +// LoadInt64x4Slice loads an Int64x4 from a slice of at least 4 int64s +func LoadInt64x4Slice(s []int64) Int64x4 { + return LoadInt64x4((*[4]int64)(s)) +} + +// StoreSlice stores x into a slice of at least 4 int64s +func (x Int64x4) StoreSlice(s []int64) { + x.Store((*[4]int64)(s)) +} + +// LoadUint64x4Slice loads an Uint64x4 from a slice of at least 4 uint64s +func LoadUint64x4Slice(s []uint64) Uint64x4 { + return LoadUint64x4((*[4]uint64)(s)) +} + +// StoreSlice stores x into a slice of at least 4 uint64s +func (x Uint64x4) StoreSlice(s []uint64) { + x.Store((*[4]uint64)(s)) +} + +// LoadFloat32x8Slice loads a Float32x8 from a slice of at least 8 float32s +func LoadFloat32x8Slice(s []float32) Float32x8 { + return LoadFloat32x8((*[8]float32)(s)) +} + +// StoreSlice stores x into a slice of at least 8 float32s +func (x Float32x8) StoreSlice(s []float32) { + x.Store((*[8]float32)(s)) +} + +// LoadFloat64x4Slice loads a Float64x4 from a slice of at least 4 float64s +func LoadFloat64x4Slice(s []float64) Float64x4 { + return LoadFloat64x4((*[4]float64)(s)) +} + +// StoreSlice stores x into a slice of at least 4 float64s +func (x Float64x4) StoreSlice(s []float64) { + x.Store((*[4]float64)(s)) +} + +// LoadInt8x64Slice loads an Int8x64 from a slice of at least 64 int8s +func LoadInt8x64Slice(s []int8) Int8x64 { + return LoadInt8x64((*[64]int8)(s)) +} + +// StoreSlice stores x into a slice of at least 64 int8s +func (x Int8x64) StoreSlice(s []int8) { + x.Store((*[64]int8)(s)) +} + +// LoadUint8x64Slice loads an Uint8x64 from a slice of at least 64 uint8s +func LoadUint8x64Slice(s []uint8) Uint8x64 { + return LoadUint8x64((*[64]uint8)(s)) +} + +// StoreSlice stores x into a slice of at least 64 uint8s +func (x Uint8x64) StoreSlice(s []uint8) { + x.Store((*[64]uint8)(s)) +} + +// LoadInt16x32Slice loads an Int16x32 from a slice of at least 32 int16s +func LoadInt16x32Slice(s []int16) Int16x32 { + return LoadInt16x32((*[32]int16)(s)) +} + +// StoreSlice stores x into a slice of at least 32 int16s +func (x Int16x32) StoreSlice(s []int16) { + x.Store((*[32]int16)(s)) +} + +// LoadUint16x32Slice loads an Uint16x32 from a slice of at least 32 uint16s +func LoadUint16x32Slice(s []uint16) Uint16x32 { + return LoadUint16x32((*[32]uint16)(s)) +} + +// StoreSlice stores x into a slice of at least 32 uint16s +func (x Uint16x32) StoreSlice(s []uint16) { + x.Store((*[32]uint16)(s)) +} + +// LoadInt32x16Slice loads an Int32x16 from a slice of at least 16 int32s +func LoadInt32x16Slice(s []int32) Int32x16 { + return LoadInt32x16((*[16]int32)(s)) +} + +// StoreSlice stores x into a slice of at least 16 int32s +func (x Int32x16) StoreSlice(s []int32) { + x.Store((*[16]int32)(s)) +} + +// LoadUint32x16Slice loads an Uint32x16 from a slice of at least 16 uint32s +func LoadUint32x16Slice(s []uint32) Uint32x16 { + return LoadUint32x16((*[16]uint32)(s)) +} + +// StoreSlice stores x into a slice of at least 16 uint32s +func (x Uint32x16) StoreSlice(s []uint32) { + x.Store((*[16]uint32)(s)) +} + +// LoadInt64x8Slice loads an Int64x8 from a slice of at least 8 int64s +func LoadInt64x8Slice(s []int64) Int64x8 { + return LoadInt64x8((*[8]int64)(s)) +} + +// StoreSlice stores x into a slice of at least 8 int64s +func (x Int64x8) StoreSlice(s []int64) { + x.Store((*[8]int64)(s)) +} + +// LoadUint64x8Slice loads an Uint64x8 from a slice of at least 8 uint64s +func LoadUint64x8Slice(s []uint64) Uint64x8 { + return LoadUint64x8((*[8]uint64)(s)) +} + +// StoreSlice stores x into a slice of at least 8 uint64s +func (x Uint64x8) StoreSlice(s []uint64) { + x.Store((*[8]uint64)(s)) +} + +// LoadFloat32x16Slice loads a Float32x16 from a slice of at least 16 float32s +func LoadFloat32x16Slice(s []float32) Float32x16 { + return LoadFloat32x16((*[16]float32)(s)) +} + +// StoreSlice stores x into a slice of at least 16 float32s +func (x Float32x16) StoreSlice(s []float32) { + x.Store((*[16]float32)(s)) +} + +// LoadFloat64x8Slice loads a Float64x8 from a slice of at least 8 float64s +func LoadFloat64x8Slice(s []float64) Float64x8 { + return LoadFloat64x8((*[8]float64)(s)) +} + +// StoreSlice stores x into a slice of at least 8 float64s +func (x Float64x8) StoreSlice(s []float64) { + x.Store((*[8]float64)(s)) +}