mirror of https://github.com/golang/go.git
cmd/compile: memcombine different size stores
This CL implements the TODO in combineStores to allow combining stores of different sizes, as long as the total size aligns to 2, 4, 8. Fixes #72832. Change-Id: I6d1d471335da90d851ad8f3b5a0cf10bdcfa17c4 Reviewed-on: https://go-review.googlesource.com/c/go/+/661855 Reviewed-by: Keith Randall <khr@golang.org> Auto-Submit: Junyang Shao <shaojunyang@google.com> Reviewed-by: Michael Pratt <mpratt@google.com> Reviewed-by: Junyang Shao <shaojunyang@google.com> LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
This commit is contained in:
parent
fa42585dad
commit
113b25774e
|
|
@ -374,22 +374,19 @@ func memcombineStores(f *Func) {
|
|||
continue
|
||||
}
|
||||
|
||||
for n := f.Config.RegSize / size; n > 1; n /= 2 {
|
||||
if combineStores(v, n) {
|
||||
continue
|
||||
}
|
||||
}
|
||||
combineStores(v)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Try to combine the n stores ending in root.
|
||||
// Returns true if successful.
|
||||
func combineStores(root *Value, n int64) bool {
|
||||
// combineStores tries to combine the stores ending in root.
|
||||
func combineStores(root *Value) {
|
||||
// Helper functions.
|
||||
maxRegSize := root.Block.Func.Config.RegSize
|
||||
type StoreRecord struct {
|
||||
store *Value
|
||||
offset int64
|
||||
size int64
|
||||
}
|
||||
getShiftBase := func(a []StoreRecord) *Value {
|
||||
x := a[0].store.Args[1]
|
||||
|
|
@ -474,86 +471,122 @@ func combineStores(root *Value, n int64) bool {
|
|||
return val.AuxInt
|
||||
}
|
||||
|
||||
// Element size of the individual stores.
|
||||
size := root.Aux.(*types.Type).Size()
|
||||
if size*n > root.Block.Func.Config.RegSize {
|
||||
return false
|
||||
}
|
||||
|
||||
// Gather n stores to look at. Check easy conditions we require.
|
||||
a := make([]StoreRecord, 0, 8)
|
||||
allMergeable := make([]StoreRecord, 0, 8)
|
||||
rbase, roff := splitPtr(root.Args[0])
|
||||
if root.Block.Func.Config.arch == "S390X" {
|
||||
// s390x can't handle unaligned accesses to global variables.
|
||||
if rbase.ptr.Op == OpAddr {
|
||||
return false
|
||||
return
|
||||
}
|
||||
}
|
||||
a = append(a, StoreRecord{root, roff})
|
||||
for i, x := int64(1), root.Args[2]; i < n; i, x = i+1, x.Args[2] {
|
||||
allMergeable = append(allMergeable, StoreRecord{root, roff, root.Aux.(*types.Type).Size()})
|
||||
allMergeableSize := root.Aux.(*types.Type).Size()
|
||||
// TODO: this loop strictly requires stores to chain together in memory.
|
||||
// maybe we can break this constraint and match more patterns.
|
||||
for i, x := 1, root.Args[2]; i < 8; i, x = i+1, x.Args[2] {
|
||||
if x.Op != OpStore {
|
||||
return false
|
||||
break
|
||||
}
|
||||
if x.Block != root.Block {
|
||||
return false
|
||||
break
|
||||
}
|
||||
if x.Uses != 1 { // Note: root can have more than one use.
|
||||
return false
|
||||
break
|
||||
}
|
||||
if x.Aux.(*types.Type).Size() != size {
|
||||
// TODO: the constant source and consecutive load source cases
|
||||
// do not need all the stores to be the same size.
|
||||
return false
|
||||
xSize := x.Aux.(*types.Type).Size()
|
||||
if xSize == 0 {
|
||||
break
|
||||
}
|
||||
if xSize > maxRegSize-allMergeableSize {
|
||||
break
|
||||
}
|
||||
base, off := splitPtr(x.Args[0])
|
||||
if base != rbase {
|
||||
return false
|
||||
break
|
||||
}
|
||||
a = append(a, StoreRecord{x, off})
|
||||
allMergeable = append(allMergeable, StoreRecord{x, off, xSize})
|
||||
allMergeableSize += xSize
|
||||
}
|
||||
// Before we sort, grab the memory arg the result should have.
|
||||
mem := a[n-1].store.Args[2]
|
||||
// Also grab position of first store (last in array = first in memory order).
|
||||
pos := a[n-1].store.Pos
|
||||
|
||||
// Sort stores in increasing address order.
|
||||
slices.SortFunc(a, func(sr1, sr2 StoreRecord) int {
|
||||
return cmp.Compare(sr1.offset, sr2.offset)
|
||||
})
|
||||
|
||||
// Check that everything is written to sequential locations.
|
||||
for i := int64(0); i < n; i++ {
|
||||
if a[i].offset != a[0].offset+i*size {
|
||||
return false
|
||||
if len(allMergeable) <= 1 {
|
||||
return
|
||||
}
|
||||
// Fit the combined total size to be one of the register size.
|
||||
mergeableSet := map[int64][]StoreRecord{}
|
||||
for i, size := 0, int64(0); i < len(allMergeable); i++ {
|
||||
size += allMergeable[i].size
|
||||
for _, bucketSize := range []int64{8, 4, 2} {
|
||||
if size == bucketSize {
|
||||
mergeableSet[size] = slices.Clone(allMergeable[:i+1])
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
var a []StoreRecord
|
||||
var aTotalSize int64
|
||||
var mem *Value
|
||||
var pos src.XPos
|
||||
// Pick the largest mergeable set.
|
||||
for _, s := range []int64{8, 4, 2} {
|
||||
candidate := mergeableSet[s]
|
||||
// TODO: a refactoring might be more efficient:
|
||||
// Find a bunch of stores that are all adjacent and then decide how big a chunk of
|
||||
// those sequential stores to combine.
|
||||
if len(candidate) >= 2 {
|
||||
// Before we sort, grab the memory arg the result should have.
|
||||
mem = candidate[len(candidate)-1].store.Args[2]
|
||||
// Also grab position of first store (last in array = first in memory order).
|
||||
pos = candidate[len(candidate)-1].store.Pos
|
||||
// Sort stores in increasing address order.
|
||||
slices.SortFunc(candidate, func(sr1, sr2 StoreRecord) int {
|
||||
return cmp.Compare(sr1.offset, sr2.offset)
|
||||
})
|
||||
// Check that everything is written to sequential locations.
|
||||
sequential := true
|
||||
for i := 1; i < len(candidate); i++ {
|
||||
if candidate[i].offset != candidate[i-1].offset+candidate[i-1].size {
|
||||
sequential = false
|
||||
break
|
||||
}
|
||||
}
|
||||
if sequential {
|
||||
a = candidate
|
||||
aTotalSize = s
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
if len(a) <= 1 {
|
||||
return
|
||||
}
|
||||
// Memory location we're going to write at (the lowest one).
|
||||
ptr := a[0].store.Args[0]
|
||||
|
||||
// Check for constant stores
|
||||
isConst := true
|
||||
for i := int64(0); i < n; i++ {
|
||||
for i := range a {
|
||||
switch a[i].store.Args[1].Op {
|
||||
case OpConst32, OpConst16, OpConst8, OpConstBool:
|
||||
default:
|
||||
isConst = false
|
||||
}
|
||||
if !isConst {
|
||||
break
|
||||
}
|
||||
}
|
||||
if isConst {
|
||||
// Modify root to do all the stores.
|
||||
var c int64
|
||||
mask := int64(1)<<(8*size) - 1
|
||||
for i := int64(0); i < n; i++ {
|
||||
s := 8 * size * int64(i)
|
||||
for i := range a {
|
||||
mask := int64(1)<<(8*a[i].size) - 1
|
||||
s := 8 * (a[i].offset - a[0].offset)
|
||||
if root.Block.Func.Config.BigEndian {
|
||||
s = 8*size*(n-1) - s
|
||||
s = aTotalSize*8 - a[i].size - s
|
||||
}
|
||||
c |= (a[i].store.Args[1].AuxInt & mask) << s
|
||||
}
|
||||
var cv *Value
|
||||
switch size * n {
|
||||
switch aTotalSize {
|
||||
case 2:
|
||||
cv = root.Block.Func.ConstInt16(types.Types[types.TUINT16], int16(c))
|
||||
case 4:
|
||||
|
|
@ -563,7 +596,7 @@ func combineStores(root *Value, n int64) bool {
|
|||
}
|
||||
|
||||
// Move all the stores to the root.
|
||||
for i := int64(0); i < n; i++ {
|
||||
for i := range a {
|
||||
v := a[i].store
|
||||
if v == root {
|
||||
v.Aux = cv.Type // widen store type
|
||||
|
|
@ -576,14 +609,14 @@ func combineStores(root *Value, n int64) bool {
|
|||
v.Type = types.Types[types.TBOOL] // erase memory type
|
||||
}
|
||||
}
|
||||
return true
|
||||
return
|
||||
}
|
||||
|
||||
// Check for consecutive loads as the source of the stores.
|
||||
var loadMem *Value
|
||||
var loadBase BaseAddress
|
||||
var loadIdx int64
|
||||
for i := int64(0); i < n; i++ {
|
||||
for i := range a {
|
||||
load := a[i].store.Args[1]
|
||||
if load.Op != OpLoad {
|
||||
loadMem = nil
|
||||
|
|
@ -622,7 +655,7 @@ func combineStores(root *Value, n int64) bool {
|
|||
if loadMem != nil {
|
||||
// Modify the first load to do a larger load instead.
|
||||
load := a[0].store.Args[1]
|
||||
switch size * n {
|
||||
switch aTotalSize {
|
||||
case 2:
|
||||
load.Type = types.Types[types.TUINT16]
|
||||
case 4:
|
||||
|
|
@ -632,7 +665,7 @@ func combineStores(root *Value, n int64) bool {
|
|||
}
|
||||
|
||||
// Modify root to do the store.
|
||||
for i := int64(0); i < n; i++ {
|
||||
for i := range a {
|
||||
v := a[i].store
|
||||
if v == root {
|
||||
v.Aux = load.Type // widen store type
|
||||
|
|
@ -645,45 +678,47 @@ func combineStores(root *Value, n int64) bool {
|
|||
v.Type = types.Types[types.TBOOL] // erase memory type
|
||||
}
|
||||
}
|
||||
return true
|
||||
return
|
||||
}
|
||||
|
||||
// Check that all the shift/trunc are of the same base value.
|
||||
shiftBase := getShiftBase(a)
|
||||
if shiftBase == nil {
|
||||
return false
|
||||
return
|
||||
}
|
||||
for i := int64(0); i < n; i++ {
|
||||
for i := range a {
|
||||
if !isShiftBase(a[i].store, shiftBase) {
|
||||
return false
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
// Check for writes in little-endian or big-endian order.
|
||||
isLittleEndian := true
|
||||
shift0 := shift(a[0].store, shiftBase)
|
||||
for i := int64(1); i < n; i++ {
|
||||
if shift(a[i].store, shiftBase) != shift0+i*size*8 {
|
||||
for i := 1; i < len(a); i++ {
|
||||
if shift(a[i].store, shiftBase) != shift0+(a[i].offset-a[0].offset)*8 {
|
||||
isLittleEndian = false
|
||||
break
|
||||
}
|
||||
}
|
||||
isBigEndian := true
|
||||
for i := int64(1); i < n; i++ {
|
||||
if shift(a[i].store, shiftBase) != shift0-i*size*8 {
|
||||
shiftedSize := int64(0)
|
||||
for i := 1; i < len(a); i++ {
|
||||
shiftedSize += a[i].size
|
||||
if shift(a[i].store, shiftBase) != shift0-shiftedSize*8 {
|
||||
isBigEndian = false
|
||||
break
|
||||
}
|
||||
}
|
||||
if !isLittleEndian && !isBigEndian {
|
||||
return false
|
||||
return
|
||||
}
|
||||
|
||||
// Check to see if we need byte swap before storing.
|
||||
needSwap := isLittleEndian && root.Block.Func.Config.BigEndian ||
|
||||
isBigEndian && !root.Block.Func.Config.BigEndian
|
||||
if needSwap && (size != 1 || !root.Block.Func.Config.haveByteSwap(n)) {
|
||||
return false
|
||||
if needSwap && (int64(len(a)) != aTotalSize || !root.Block.Func.Config.haveByteSwap(aTotalSize)) {
|
||||
return
|
||||
}
|
||||
|
||||
// This is the commit point.
|
||||
|
|
@ -693,18 +728,19 @@ func combineStores(root *Value, n int64) bool {
|
|||
if isLittleEndian && shift0 != 0 {
|
||||
sv = rightShift(root.Block, root.Pos, sv, shift0)
|
||||
}
|
||||
if isBigEndian && shift0-(n-1)*size*8 != 0 {
|
||||
sv = rightShift(root.Block, root.Pos, sv, shift0-(n-1)*size*8)
|
||||
shiftedSize = int64(aTotalSize - a[0].size)
|
||||
if isBigEndian && shift0-shiftedSize*8 != 0 {
|
||||
sv = rightShift(root.Block, root.Pos, sv, shift0-shiftedSize*8)
|
||||
}
|
||||
if sv.Type.Size() > size*n {
|
||||
sv = truncate(root.Block, root.Pos, sv, sv.Type.Size(), size*n)
|
||||
if sv.Type.Size() > aTotalSize {
|
||||
sv = truncate(root.Block, root.Pos, sv, sv.Type.Size(), aTotalSize)
|
||||
}
|
||||
if needSwap {
|
||||
sv = byteSwap(root.Block, root.Pos, sv)
|
||||
}
|
||||
|
||||
// Move all the stores to the root.
|
||||
for i := int64(0); i < n; i++ {
|
||||
for i := range a {
|
||||
v := a[i].store
|
||||
if v == root {
|
||||
v.Aux = sv.Type // widen store type
|
||||
|
|
@ -717,7 +753,6 @@ func combineStores(root *Value, n int64) bool {
|
|||
v.Type = types.Types[types.TBOOL] // erase memory type
|
||||
}
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
func sizeType(size int64) *types.Type {
|
||||
|
|
|
|||
|
|
@ -0,0 +1,41 @@
|
|||
// asmcheck
|
||||
|
||||
// Copyright 2025 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package codegen
|
||||
|
||||
type tile1 struct {
|
||||
a uint16
|
||||
b uint16
|
||||
c uint32
|
||||
}
|
||||
|
||||
func store_tile1(t *tile1) {
|
||||
// amd64:`MOVQ`
|
||||
t.a, t.b, t.c = 1, 1, 1
|
||||
}
|
||||
|
||||
type tile2 struct {
|
||||
a, b, c, d, e int8
|
||||
}
|
||||
|
||||
func store_tile2(t *tile2) {
|
||||
// amd64:`MOVW`
|
||||
t.a, t.b = 1, 1
|
||||
// amd64:`MOVW`
|
||||
t.d, t.e = 1, 1
|
||||
}
|
||||
|
||||
type tile3 struct {
|
||||
a, b uint8
|
||||
c uint16
|
||||
}
|
||||
|
||||
func store_shifted(t *tile3, x uint32) {
|
||||
// amd64:`MOVL`
|
||||
t.a = uint8(x)
|
||||
t.b = uint8(x >> 8)
|
||||
t.c = uint16(x >> 16)
|
||||
}
|
||||
Loading…
Reference in New Issue