mirror of https://github.com/golang/go.git
cmd/compile: introduce separate memory op combining pass
Memory op combining is currently done using arch-specific rewrite rules. Instead, do them as a arch-independent rewrite pass. This ensures that all architectures (with unaligned loads & stores) get equal treatment. This removes a lot of rewrite rules. The new pass is a bit more comprehensive. It handles things like out-of-order writes and is careful not to apply partial optimizations that then block further optimizations. Change-Id: I780ff3bb052475cd725a923309616882d25b8d9e Reviewed-on: https://go-review.googlesource.com/c/go/+/478475 Reviewed-by: Keith Randall <khr@google.com> TryBot-Result: Gopher Robot <gobot@golang.org> Run-TryBot: Keith Randall <khr@golang.org> Reviewed-by: David Chase <drchase@google.com>
This commit is contained in:
parent
e9c2607ab4
commit
cedf5008a8
|
|
@ -52,6 +52,7 @@
|
|||
(OffPtr [off] ptr) => (ADDLconst [int32(off)] ptr)
|
||||
|
||||
(Bswap32 ...) => (BSWAPL ...)
|
||||
(Bswap16 x) => (ROLWconst [8] x)
|
||||
|
||||
(Sqrt ...) => (SQRTSD ...)
|
||||
(Sqrt32 ...) => (SQRTSS ...)
|
||||
|
|
@ -918,160 +919,6 @@
|
|||
// Convert LEAL1 back to ADDL if we can
|
||||
(LEAL1 [0] {nil} x y) => (ADDL x y)
|
||||
|
||||
// Combining byte loads into larger (unaligned) loads.
|
||||
// There are many ways these combinations could occur. This is
|
||||
// designed to match the way encoding/binary.LittleEndian does it.
|
||||
(ORL x0:(MOVBload [i0] {s} p mem)
|
||||
s0:(SHLLconst [8] x1:(MOVBload [i1] {s} p mem)))
|
||||
&& i1 == i0+1
|
||||
&& x0.Uses == 1
|
||||
&& x1.Uses == 1
|
||||
&& s0.Uses == 1
|
||||
&& mergePoint(b,x0,x1) != nil
|
||||
&& clobber(x0, x1, s0)
|
||||
=> @mergePoint(b,x0,x1) (MOVWload [i0] {s} p mem)
|
||||
|
||||
(ORL x0:(MOVBload [i] {s} p0 mem)
|
||||
s0:(SHLLconst [8] x1:(MOVBload [i] {s} p1 mem)))
|
||||
&& x0.Uses == 1
|
||||
&& x1.Uses == 1
|
||||
&& s0.Uses == 1
|
||||
&& sequentialAddresses(p0, p1, 1)
|
||||
&& mergePoint(b,x0,x1) != nil
|
||||
&& clobber(x0, x1, s0)
|
||||
=> @mergePoint(b,x0,x1) (MOVWload [i] {s} p0 mem)
|
||||
|
||||
(ORL o0:(ORL
|
||||
x0:(MOVWload [i0] {s} p mem)
|
||||
s0:(SHLLconst [16] x1:(MOVBload [i2] {s} p mem)))
|
||||
s1:(SHLLconst [24] x2:(MOVBload [i3] {s} p mem)))
|
||||
&& i2 == i0+2
|
||||
&& i3 == i0+3
|
||||
&& x0.Uses == 1
|
||||
&& x1.Uses == 1
|
||||
&& x2.Uses == 1
|
||||
&& s0.Uses == 1
|
||||
&& s1.Uses == 1
|
||||
&& o0.Uses == 1
|
||||
&& mergePoint(b,x0,x1,x2) != nil
|
||||
&& clobber(x0, x1, x2, s0, s1, o0)
|
||||
=> @mergePoint(b,x0,x1,x2) (MOVLload [i0] {s} p mem)
|
||||
|
||||
(ORL o0:(ORL
|
||||
x0:(MOVWload [i] {s} p0 mem)
|
||||
s0:(SHLLconst [16] x1:(MOVBload [i] {s} p1 mem)))
|
||||
s1:(SHLLconst [24] x2:(MOVBload [i] {s} p2 mem)))
|
||||
&& x0.Uses == 1
|
||||
&& x1.Uses == 1
|
||||
&& x2.Uses == 1
|
||||
&& s0.Uses == 1
|
||||
&& s1.Uses == 1
|
||||
&& o0.Uses == 1
|
||||
&& sequentialAddresses(p0, p1, 2)
|
||||
&& sequentialAddresses(p1, p2, 1)
|
||||
&& mergePoint(b,x0,x1,x2) != nil
|
||||
&& clobber(x0, x1, x2, s0, s1, o0)
|
||||
=> @mergePoint(b,x0,x1,x2) (MOVLload [i] {s} p0 mem)
|
||||
|
||||
// Combine constant stores into larger (unaligned) stores.
|
||||
(MOVBstoreconst [c] {s} p x:(MOVBstoreconst [a] {s} p mem))
|
||||
&& x.Uses == 1
|
||||
&& a.Off() + 1 == c.Off()
|
||||
&& clobber(x)
|
||||
=> (MOVWstoreconst [makeValAndOff(a.Val()&0xff | c.Val()<<8, a.Off())] {s} p mem)
|
||||
(MOVBstoreconst [a] {s} p x:(MOVBstoreconst [c] {s} p mem))
|
||||
&& x.Uses == 1
|
||||
&& a.Off() + 1 == c.Off()
|
||||
&& clobber(x)
|
||||
=> (MOVWstoreconst [makeValAndOff(a.Val()&0xff | c.Val()<<8, a.Off())] {s} p mem)
|
||||
|
||||
(MOVBstoreconst [c] {s} p1 x:(MOVBstoreconst [a] {s} p0 mem))
|
||||
&& x.Uses == 1
|
||||
&& a.Off() == c.Off()
|
||||
&& sequentialAddresses(p0, p1, 1)
|
||||
&& clobber(x)
|
||||
=> (MOVWstoreconst [makeValAndOff(a.Val()&0xff | c.Val()<<8, a.Off())] {s} p0 mem)
|
||||
(MOVBstoreconst [a] {s} p0 x:(MOVBstoreconst [c] {s} p1 mem))
|
||||
&& x.Uses == 1
|
||||
&& a.Off() == c.Off()
|
||||
&& sequentialAddresses(p0, p1, 1)
|
||||
&& clobber(x)
|
||||
=> (MOVWstoreconst [makeValAndOff(a.Val()&0xff | c.Val()<<8, a.Off())] {s} p0 mem)
|
||||
|
||||
(MOVWstoreconst [c] {s} p x:(MOVWstoreconst [a] {s} p mem))
|
||||
&& x.Uses == 1
|
||||
&& a.Off() + 2 == c.Off()
|
||||
&& clobber(x)
|
||||
=> (MOVLstoreconst [makeValAndOff(a.Val()&0xffff | c.Val()<<16, a.Off())] {s} p mem)
|
||||
(MOVWstoreconst [a] {s} p x:(MOVWstoreconst [c] {s} p mem))
|
||||
&& x.Uses == 1
|
||||
&& ValAndOff(a).Off() + 2 == ValAndOff(c).Off()
|
||||
&& clobber(x)
|
||||
=> (MOVLstoreconst [makeValAndOff(a.Val()&0xffff | c.Val()<<16, a.Off())] {s} p mem)
|
||||
|
||||
(MOVWstoreconst [c] {s} p1 x:(MOVWstoreconst [a] {s} p0 mem))
|
||||
&& x.Uses == 1
|
||||
&& a.Off() == c.Off()
|
||||
&& sequentialAddresses(p0, p1, 2)
|
||||
&& clobber(x)
|
||||
=> (MOVLstoreconst [makeValAndOff(a.Val()&0xffff | c.Val()<<16, a.Off())] {s} p0 mem)
|
||||
(MOVWstoreconst [a] {s} p0 x:(MOVWstoreconst [c] {s} p1 mem))
|
||||
&& x.Uses == 1
|
||||
&& a.Off() == c.Off()
|
||||
&& sequentialAddresses(p0, p1, 2)
|
||||
&& clobber(x)
|
||||
=> (MOVLstoreconst [makeValAndOff(a.Val()&0xffff | c.Val()<<16, a.Off())] {s} p0 mem)
|
||||
|
||||
// Combine stores into larger (unaligned) stores.
|
||||
(MOVBstore [i] {s} p (SHR(W|L)const [8] w) x:(MOVBstore [i-1] {s} p w mem))
|
||||
&& x.Uses == 1
|
||||
&& clobber(x)
|
||||
=> (MOVWstore [i-1] {s} p w mem)
|
||||
(MOVBstore [i] {s} p w x:(MOVBstore {s} [i+1] p (SHR(W|L)const [8] w) mem))
|
||||
&& x.Uses == 1
|
||||
&& clobber(x)
|
||||
=> (MOVWstore [i] {s} p w mem)
|
||||
(MOVBstore [i] {s} p (SHRLconst [j] w) x:(MOVBstore [i-1] {s} p w0:(SHRLconst [j-8] w) mem))
|
||||
&& x.Uses == 1
|
||||
&& clobber(x)
|
||||
=> (MOVWstore [i-1] {s} p w0 mem)
|
||||
|
||||
(MOVBstore [i] {s} p1 (SHR(W|L)const [8] w) x:(MOVBstore [i] {s} p0 w mem))
|
||||
&& x.Uses == 1
|
||||
&& sequentialAddresses(p0, p1, 1)
|
||||
&& clobber(x)
|
||||
=> (MOVWstore [i] {s} p0 w mem)
|
||||
(MOVBstore [i] {s} p0 w x:(MOVBstore {s} [i] p1 (SHR(W|L)const [8] w) mem))
|
||||
&& x.Uses == 1
|
||||
&& sequentialAddresses(p0, p1, 1)
|
||||
&& clobber(x)
|
||||
=> (MOVWstore [i] {s} p0 w mem)
|
||||
(MOVBstore [i] {s} p1 (SHRLconst [j] w) x:(MOVBstore [i] {s} p0 w0:(SHRLconst [j-8] w) mem))
|
||||
&& x.Uses == 1
|
||||
&& sequentialAddresses(p0, p1, 1)
|
||||
&& clobber(x)
|
||||
=> (MOVWstore [i] {s} p0 w0 mem)
|
||||
|
||||
(MOVWstore [i] {s} p (SHRLconst [16] w) x:(MOVWstore [i-2] {s} p w mem))
|
||||
&& x.Uses == 1
|
||||
&& clobber(x)
|
||||
=> (MOVLstore [i-2] {s} p w mem)
|
||||
(MOVWstore [i] {s} p (SHRLconst [j] w) x:(MOVWstore [i-2] {s} p w0:(SHRLconst [j-16] w) mem))
|
||||
&& x.Uses == 1
|
||||
&& clobber(x)
|
||||
=> (MOVLstore [i-2] {s} p w0 mem)
|
||||
|
||||
(MOVWstore [i] {s} p1 (SHRLconst [16] w) x:(MOVWstore [i] {s} p0 w mem))
|
||||
&& x.Uses == 1
|
||||
&& sequentialAddresses(p0, p1, 2)
|
||||
&& clobber(x)
|
||||
=> (MOVLstore [i] {s} p0 w mem)
|
||||
(MOVWstore [i] {s} p1 (SHRLconst [j] w) x:(MOVWstore [i] {s} p0 w0:(SHRLconst [j-16] w) mem))
|
||||
&& x.Uses == 1
|
||||
&& sequentialAddresses(p0, p1, 2)
|
||||
&& clobber(x)
|
||||
=> (MOVLstore [i] {s} p0 w0 mem)
|
||||
|
||||
// For PIC, break floating-point constant loading into two instructions so we have
|
||||
// a register to use for holding the address of the constant pool entry.
|
||||
(MOVSSconst [c]) && config.ctxt.Flag_shared => (MOVSSconst2 (MOVSSconst1 [c]))
|
||||
|
|
|
|||
|
|
@ -110,6 +110,7 @@
|
|||
(BitLen8 <t> x) && buildcfg.GOAMD64 >= 3 => (NEGQ (ADDQconst <t> [-32] (LZCNTL (MOVBQZX <x.Type> x))))
|
||||
|
||||
(Bswap(64|32) ...) => (BSWAP(Q|L) ...)
|
||||
(Bswap16 x) => (ROLWconst [8] x)
|
||||
|
||||
(PopCount(64|32) ...) => (POPCNT(Q|L) ...)
|
||||
(PopCount16 x) => (POPCNTL (MOVWQZX <typ.UInt32> x))
|
||||
|
|
@ -1469,397 +1470,6 @@
|
|||
// Convert LEAQ1 back to ADDQ if we can
|
||||
(LEAQ1 [0] x y) && v.Aux == nil => (ADDQ x y)
|
||||
|
||||
// Combining byte loads into larger (unaligned) loads.
|
||||
// There are many ways these combinations could occur. This is
|
||||
// designed to match the way encoding/binary.LittleEndian does it.
|
||||
|
||||
// Little-endian loads
|
||||
|
||||
(OR(L|Q) x0:(MOVBload [i0] {s} p mem)
|
||||
sh:(SHL(L|Q)const [8] x1:(MOVBload [i1] {s} p mem)))
|
||||
&& i1 == i0+1
|
||||
&& x0.Uses == 1
|
||||
&& x1.Uses == 1
|
||||
&& sh.Uses == 1
|
||||
&& mergePoint(b,x0,x1) != nil
|
||||
&& clobber(x0, x1, sh)
|
||||
=> @mergePoint(b,x0,x1) (MOVWload [i0] {s} p mem)
|
||||
|
||||
(OR(L|Q) x0:(MOVBload [i] {s} p0 mem)
|
||||
sh:(SHL(L|Q)const [8] x1:(MOVBload [i] {s} p1 mem)))
|
||||
&& x0.Uses == 1
|
||||
&& x1.Uses == 1
|
||||
&& sh.Uses == 1
|
||||
&& sequentialAddresses(p0, p1, 1)
|
||||
&& mergePoint(b,x0,x1) != nil
|
||||
&& clobber(x0, x1, sh)
|
||||
=> @mergePoint(b,x0,x1) (MOVWload [i] {s} p0 mem)
|
||||
|
||||
(OR(L|Q) x0:(MOVWload [i0] {s} p mem)
|
||||
sh:(SHL(L|Q)const [16] x1:(MOVWload [i1] {s} p mem)))
|
||||
&& i1 == i0+2
|
||||
&& x0.Uses == 1
|
||||
&& x1.Uses == 1
|
||||
&& sh.Uses == 1
|
||||
&& mergePoint(b,x0,x1) != nil
|
||||
&& clobber(x0, x1, sh)
|
||||
=> @mergePoint(b,x0,x1) (MOVLload [i0] {s} p mem)
|
||||
|
||||
(OR(L|Q) x0:(MOVWload [i] {s} p0 mem)
|
||||
sh:(SHL(L|Q)const [16] x1:(MOVWload [i] {s} p1 mem)))
|
||||
&& x0.Uses == 1
|
||||
&& x1.Uses == 1
|
||||
&& sh.Uses == 1
|
||||
&& sequentialAddresses(p0, p1, 2)
|
||||
&& mergePoint(b,x0,x1) != nil
|
||||
&& clobber(x0, x1, sh)
|
||||
=> @mergePoint(b,x0,x1) (MOVLload [i] {s} p0 mem)
|
||||
|
||||
(ORQ x0:(MOVLload [i0] {s} p mem)
|
||||
sh:(SHLQconst [32] x1:(MOVLload [i1] {s} p mem)))
|
||||
&& i1 == i0+4
|
||||
&& x0.Uses == 1
|
||||
&& x1.Uses == 1
|
||||
&& sh.Uses == 1
|
||||
&& mergePoint(b,x0,x1) != nil
|
||||
&& clobber(x0, x1, sh)
|
||||
=> @mergePoint(b,x0,x1) (MOVQload [i0] {s} p mem)
|
||||
|
||||
(ORQ x0:(MOVLload [i] {s} p0 mem)
|
||||
sh:(SHLQconst [32] x1:(MOVLload [i] {s} p1 mem)))
|
||||
&& x0.Uses == 1
|
||||
&& x1.Uses == 1
|
||||
&& sh.Uses == 1
|
||||
&& sequentialAddresses(p0, p1, 4)
|
||||
&& mergePoint(b,x0,x1) != nil
|
||||
&& clobber(x0, x1, sh)
|
||||
=> @mergePoint(b,x0,x1) (MOVQload [i] {s} p0 mem)
|
||||
|
||||
(OR(L|Q)
|
||||
s1:(SHL(L|Q)const [j1] x1:(MOVBload [i1] {s} p mem))
|
||||
or:(OR(L|Q)
|
||||
s0:(SHL(L|Q)const [j0] x0:(MOVBload [i0] {s} p mem))
|
||||
y))
|
||||
&& i1 == i0+1
|
||||
&& j1 == j0+8
|
||||
&& j0 % 16 == 0
|
||||
&& x0.Uses == 1
|
||||
&& x1.Uses == 1
|
||||
&& s0.Uses == 1
|
||||
&& s1.Uses == 1
|
||||
&& or.Uses == 1
|
||||
&& mergePoint(b,x0,x1,y) != nil
|
||||
&& clobber(x0, x1, s0, s1, or)
|
||||
=> @mergePoint(b,x0,x1,y) (OR(L|Q) <v.Type> (SHL(L|Q)const <v.Type> [j0] (MOVWload [i0] {s} p mem)) y)
|
||||
|
||||
(OR(L|Q)
|
||||
s1:(SHL(L|Q)const [j1] x1:(MOVBload [i] {s} p1 mem))
|
||||
or:(OR(L|Q)
|
||||
s0:(SHL(L|Q)const [j0] x0:(MOVBload [i] {s} p0 mem))
|
||||
y))
|
||||
&& j1 == j0+8
|
||||
&& j0 % 16 == 0
|
||||
&& x0.Uses == 1
|
||||
&& x1.Uses == 1
|
||||
&& s0.Uses == 1
|
||||
&& s1.Uses == 1
|
||||
&& or.Uses == 1
|
||||
&& sequentialAddresses(p0, p1, 1)
|
||||
&& mergePoint(b,x0,x1,y) != nil
|
||||
&& clobber(x0, x1, s0, s1, or)
|
||||
=> @mergePoint(b,x0,x1,y) (OR(L|Q) <v.Type> (SHL(L|Q)const <v.Type> [j0] (MOVWload [i] {s} p0 mem)) y)
|
||||
|
||||
(ORQ
|
||||
s1:(SHLQconst [j1] x1:(MOVWload [i1] {s} p mem))
|
||||
or:(ORQ
|
||||
s0:(SHLQconst [j0] x0:(MOVWload [i0] {s} p mem))
|
||||
y))
|
||||
&& i1 == i0+2
|
||||
&& j1 == j0+16
|
||||
&& j0 % 32 == 0
|
||||
&& x0.Uses == 1
|
||||
&& x1.Uses == 1
|
||||
&& s0.Uses == 1
|
||||
&& s1.Uses == 1
|
||||
&& or.Uses == 1
|
||||
&& mergePoint(b,x0,x1,y) != nil
|
||||
&& clobber(x0, x1, s0, s1, or)
|
||||
=> @mergePoint(b,x0,x1,y) (ORQ <v.Type> (SHLQconst <v.Type> [j0] (MOVLload [i0] {s} p mem)) y)
|
||||
|
||||
(ORQ
|
||||
s1:(SHLQconst [j1] x1:(MOVWload [i] {s} p1 mem))
|
||||
or:(ORQ
|
||||
s0:(SHLQconst [j0] x0:(MOVWload [i] {s} p0 mem))
|
||||
y))
|
||||
&& j1 == j0+16
|
||||
&& j0 % 32 == 0
|
||||
&& x0.Uses == 1
|
||||
&& x1.Uses == 1
|
||||
&& s0.Uses == 1
|
||||
&& s1.Uses == 1
|
||||
&& or.Uses == 1
|
||||
&& sequentialAddresses(p0, p1, 2)
|
||||
&& mergePoint(b,x0,x1,y) != nil
|
||||
&& clobber(x0, x1, s0, s1, or)
|
||||
=> @mergePoint(b,x0,x1,y) (ORQ <v.Type> (SHLQconst <v.Type> [j0] (MOVLload [i] {s} p0 mem)) y)
|
||||
|
||||
// Big-endian loads
|
||||
|
||||
(OR(L|Q)
|
||||
x1:(MOVBload [i1] {s} p mem)
|
||||
sh:(SHL(L|Q)const [8] x0:(MOVBload [i0] {s} p mem)))
|
||||
&& i1 == i0+1
|
||||
&& x0.Uses == 1
|
||||
&& x1.Uses == 1
|
||||
&& sh.Uses == 1
|
||||
&& mergePoint(b,x0,x1) != nil
|
||||
&& clobber(x0, x1, sh)
|
||||
=> @mergePoint(b,x0,x1) (ROLWconst <v.Type> [8] (MOVWload [i0] {s} p mem))
|
||||
|
||||
(OR(L|Q)
|
||||
x1:(MOVBload [i] {s} p1 mem)
|
||||
sh:(SHL(L|Q)const [8] x0:(MOVBload [i] {s} p0 mem)))
|
||||
&& x0.Uses == 1
|
||||
&& x1.Uses == 1
|
||||
&& sh.Uses == 1
|
||||
&& sequentialAddresses(p0, p1, 1)
|
||||
&& mergePoint(b,x0,x1) != nil
|
||||
&& clobber(x0, x1, sh)
|
||||
=> @mergePoint(b,x0,x1) (ROLWconst <v.Type> [8] (MOVWload [i] {s} p0 mem))
|
||||
|
||||
(OR(L|Q)
|
||||
r1:(ROLWconst [8] x1:(MOVWload [i1] {s} p mem))
|
||||
sh:(SHL(L|Q)const [16] r0:(ROLWconst [8] x0:(MOVWload [i0] {s} p mem))))
|
||||
&& i1 == i0+2
|
||||
&& x0.Uses == 1
|
||||
&& x1.Uses == 1
|
||||
&& r0.Uses == 1
|
||||
&& r1.Uses == 1
|
||||
&& sh.Uses == 1
|
||||
&& mergePoint(b,x0,x1) != nil
|
||||
&& clobber(x0, x1, r0, r1, sh)
|
||||
=> @mergePoint(b,x0,x1) (BSWAPL <v.Type> (MOVLload [i0] {s} p mem))
|
||||
|
||||
(OR(L|Q)
|
||||
r1:(ROLWconst [8] x1:(MOVWload [i] {s} p1 mem))
|
||||
sh:(SHL(L|Q)const [16] r0:(ROLWconst [8] x0:(MOVWload [i] {s} p0 mem))))
|
||||
&& x0.Uses == 1
|
||||
&& x1.Uses == 1
|
||||
&& r0.Uses == 1
|
||||
&& r1.Uses == 1
|
||||
&& sh.Uses == 1
|
||||
&& sequentialAddresses(p0, p1, 2)
|
||||
&& mergePoint(b,x0,x1) != nil
|
||||
&& clobber(x0, x1, r0, r1, sh)
|
||||
=> @mergePoint(b,x0,x1) (BSWAPL <v.Type> (MOVLload [i] {s} p0 mem))
|
||||
|
||||
(ORQ
|
||||
r1:(BSWAPL x1:(MOVLload [i1] {s} p mem))
|
||||
sh:(SHLQconst [32] r0:(BSWAPL x0:(MOVLload [i0] {s} p mem))))
|
||||
&& i1 == i0+4
|
||||
&& x0.Uses == 1
|
||||
&& x1.Uses == 1
|
||||
&& r0.Uses == 1
|
||||
&& r1.Uses == 1
|
||||
&& sh.Uses == 1
|
||||
&& mergePoint(b,x0,x1) != nil
|
||||
&& clobber(x0, x1, r0, r1, sh)
|
||||
=> @mergePoint(b,x0,x1) (BSWAPQ <v.Type> (MOVQload [i0] {s} p mem))
|
||||
|
||||
(ORQ
|
||||
r1:(BSWAPL x1:(MOVLload [i] {s} p1 mem))
|
||||
sh:(SHLQconst [32] r0:(BSWAPL x0:(MOVLload [i] {s} p0 mem))))
|
||||
&& x0.Uses == 1
|
||||
&& x1.Uses == 1
|
||||
&& r0.Uses == 1
|
||||
&& r1.Uses == 1
|
||||
&& sh.Uses == 1
|
||||
&& sequentialAddresses(p0, p1, 4)
|
||||
&& mergePoint(b,x0,x1) != nil
|
||||
&& clobber(x0, x1, r0, r1, sh)
|
||||
=> @mergePoint(b,x0,x1) (BSWAPQ <v.Type> (MOVQload [i] {s} p0 mem))
|
||||
|
||||
(OR(L|Q)
|
||||
s0:(SHL(L|Q)const [j0] x0:(MOVBload [i0] {s} p mem))
|
||||
or:(OR(L|Q)
|
||||
s1:(SHL(L|Q)const [j1] x1:(MOVBload [i1] {s} p mem))
|
||||
y))
|
||||
&& i1 == i0+1
|
||||
&& j1 == j0-8
|
||||
&& j1 % 16 == 0
|
||||
&& x0.Uses == 1
|
||||
&& x1.Uses == 1
|
||||
&& s0.Uses == 1
|
||||
&& s1.Uses == 1
|
||||
&& or.Uses == 1
|
||||
&& mergePoint(b,x0,x1,y) != nil
|
||||
&& clobber(x0, x1, s0, s1, or)
|
||||
=> @mergePoint(b,x0,x1,y) (OR(L|Q) <v.Type> (SHL(L|Q)const <v.Type> [j1] (ROLWconst <typ.UInt16> [8] (MOVWload [i0] {s} p mem))) y)
|
||||
|
||||
(OR(L|Q)
|
||||
s0:(SHL(L|Q)const [j0] x0:(MOVBload [i] {s} p0 mem))
|
||||
or:(OR(L|Q)
|
||||
s1:(SHL(L|Q)const [j1] x1:(MOVBload [i] {s} p1 mem))
|
||||
y))
|
||||
&& j1 == j0-8
|
||||
&& j1 % 16 == 0
|
||||
&& x0.Uses == 1
|
||||
&& x1.Uses == 1
|
||||
&& s0.Uses == 1
|
||||
&& s1.Uses == 1
|
||||
&& or.Uses == 1
|
||||
&& sequentialAddresses(p0, p1, 1)
|
||||
&& mergePoint(b,x0,x1,y) != nil
|
||||
&& clobber(x0, x1, s0, s1, or)
|
||||
=> @mergePoint(b,x0,x1,y) (OR(L|Q) <v.Type> (SHL(L|Q)const <v.Type> [j1] (ROLWconst <typ.UInt16> [8] (MOVWload [i] {s} p0 mem))) y)
|
||||
|
||||
(ORQ
|
||||
s0:(SHLQconst [j0] r0:(ROLWconst [8] x0:(MOVWload [i0] {s} p mem)))
|
||||
or:(ORQ
|
||||
s1:(SHLQconst [j1] r1:(ROLWconst [8] x1:(MOVWload [i1] {s} p mem)))
|
||||
y))
|
||||
&& i1 == i0+2
|
||||
&& j1 == j0-16
|
||||
&& j1 % 32 == 0
|
||||
&& x0.Uses == 1
|
||||
&& x1.Uses == 1
|
||||
&& r0.Uses == 1
|
||||
&& r1.Uses == 1
|
||||
&& s0.Uses == 1
|
||||
&& s1.Uses == 1
|
||||
&& or.Uses == 1
|
||||
&& mergePoint(b,x0,x1,y) != nil
|
||||
&& clobber(x0, x1, r0, r1, s0, s1, or)
|
||||
=> @mergePoint(b,x0,x1,y) (ORQ <v.Type> (SHLQconst <v.Type> [j1] (BSWAPL <typ.UInt32> (MOVLload [i0] {s} p mem))) y)
|
||||
|
||||
(ORQ
|
||||
s0:(SHLQconst [j0] r0:(ROLWconst [8] x0:(MOVWload [i] {s} p0 mem)))
|
||||
or:(ORQ
|
||||
s1:(SHLQconst [j1] r1:(ROLWconst [8] x1:(MOVWload [i] {s} p1 mem)))
|
||||
y))
|
||||
&& j1 == j0-16
|
||||
&& j1 % 32 == 0
|
||||
&& x0.Uses == 1
|
||||
&& x1.Uses == 1
|
||||
&& r0.Uses == 1
|
||||
&& r1.Uses == 1
|
||||
&& s0.Uses == 1
|
||||
&& s1.Uses == 1
|
||||
&& or.Uses == 1
|
||||
&& sequentialAddresses(p0, p1, 2)
|
||||
&& mergePoint(b,x0,x1,y) != nil
|
||||
&& clobber(x0, x1, r0, r1, s0, s1, or)
|
||||
=> @mergePoint(b,x0,x1,y) (ORQ <v.Type> (SHLQconst <v.Type> [j1] (BSWAPL <typ.UInt32> (MOVLload [i] {s} p0 mem))) y)
|
||||
|
||||
// Combine 2 byte stores + shift into rolw 8 + word store
|
||||
(MOVBstore [i] {s} p w
|
||||
x0:(MOVBstore [i-1] {s} p (SHRWconst [8] w) mem))
|
||||
&& x0.Uses == 1
|
||||
&& clobber(x0)
|
||||
=> (MOVWstore [i-1] {s} p (ROLWconst <typ.UInt16> [8] w) mem)
|
||||
(MOVBstore [i] {s} p1 w
|
||||
x0:(MOVBstore [i] {s} p0 (SHRWconst [8] w) mem))
|
||||
&& x0.Uses == 1
|
||||
&& sequentialAddresses(p0, p1, 1)
|
||||
&& clobber(x0)
|
||||
=> (MOVWstore [i] {s} p0 (ROLWconst <typ.UInt16> [8] w) mem)
|
||||
|
||||
// Combine stores + shifts into bswap and larger (unaligned) stores
|
||||
(MOVBstore [i] {s} p w
|
||||
x2:(MOVBstore [i-1] {s} p (SHRLconst [8] w)
|
||||
x1:(MOVBstore [i-2] {s} p (SHRLconst [16] w)
|
||||
x0:(MOVBstore [i-3] {s} p (SHRLconst [24] w) mem))))
|
||||
&& x0.Uses == 1
|
||||
&& x1.Uses == 1
|
||||
&& x2.Uses == 1
|
||||
&& clobber(x0, x1, x2)
|
||||
=> (MOVLstore [i-3] {s} p (BSWAPL <typ.UInt32> w) mem)
|
||||
(MOVBstore [i] {s} p3 w
|
||||
x2:(MOVBstore [i] {s} p2 (SHRLconst [8] w)
|
||||
x1:(MOVBstore [i] {s} p1 (SHRLconst [16] w)
|
||||
x0:(MOVBstore [i] {s} p0 (SHRLconst [24] w) mem))))
|
||||
&& x0.Uses == 1
|
||||
&& x1.Uses == 1
|
||||
&& x2.Uses == 1
|
||||
&& sequentialAddresses(p0, p1, 1)
|
||||
&& sequentialAddresses(p1, p2, 1)
|
||||
&& sequentialAddresses(p2, p3, 1)
|
||||
&& clobber(x0, x1, x2)
|
||||
=> (MOVLstore [i] {s} p0 (BSWAPL <typ.UInt32> w) mem)
|
||||
|
||||
(MOVBstore [i] {s} p w
|
||||
x6:(MOVBstore [i-1] {s} p (SHRQconst [8] w)
|
||||
x5:(MOVBstore [i-2] {s} p (SHRQconst [16] w)
|
||||
x4:(MOVBstore [i-3] {s} p (SHRQconst [24] w)
|
||||
x3:(MOVBstore [i-4] {s} p (SHRQconst [32] w)
|
||||
x2:(MOVBstore [i-5] {s} p (SHRQconst [40] w)
|
||||
x1:(MOVBstore [i-6] {s} p (SHRQconst [48] w)
|
||||
x0:(MOVBstore [i-7] {s} p (SHRQconst [56] w) mem))))))))
|
||||
&& x0.Uses == 1
|
||||
&& x1.Uses == 1
|
||||
&& x2.Uses == 1
|
||||
&& x3.Uses == 1
|
||||
&& x4.Uses == 1
|
||||
&& x5.Uses == 1
|
||||
&& x6.Uses == 1
|
||||
&& clobber(x0, x1, x2, x3, x4, x5, x6)
|
||||
=> (MOVQstore [i-7] {s} p (BSWAPQ <typ.UInt64> w) mem)
|
||||
(MOVBstore [i] {s} p7 w
|
||||
x6:(MOVBstore [i] {s} p6 (SHRQconst [8] w)
|
||||
x5:(MOVBstore [i] {s} p5 (SHRQconst [16] w)
|
||||
x4:(MOVBstore [i] {s} p4 (SHRQconst [24] w)
|
||||
x3:(MOVBstore [i] {s} p3 (SHRQconst [32] w)
|
||||
x2:(MOVBstore [i] {s} p2 (SHRQconst [40] w)
|
||||
x1:(MOVBstore [i] {s} p1 (SHRQconst [48] w)
|
||||
x0:(MOVBstore [i] {s} p0 (SHRQconst [56] w) mem))))))))
|
||||
&& x0.Uses == 1
|
||||
&& x1.Uses == 1
|
||||
&& x2.Uses == 1
|
||||
&& x3.Uses == 1
|
||||
&& x4.Uses == 1
|
||||
&& x5.Uses == 1
|
||||
&& x6.Uses == 1
|
||||
&& sequentialAddresses(p0, p1, 1)
|
||||
&& sequentialAddresses(p1, p2, 1)
|
||||
&& sequentialAddresses(p2, p3, 1)
|
||||
&& sequentialAddresses(p3, p4, 1)
|
||||
&& sequentialAddresses(p4, p5, 1)
|
||||
&& sequentialAddresses(p5, p6, 1)
|
||||
&& sequentialAddresses(p6, p7, 1)
|
||||
&& clobber(x0, x1, x2, x3, x4, x5, x6)
|
||||
=> (MOVQstore [i] {s} p0 (BSWAPQ <typ.UInt64> w) mem)
|
||||
|
||||
// Combine constant stores into larger (unaligned) stores.
|
||||
(MOVBstoreconst [c] {s} p1 x:(MOVBstoreconst [a] {s} p0 mem))
|
||||
&& x.Uses == 1
|
||||
&& sequentialAddresses(p0, p1, int64(a.Off()+1-c.Off()))
|
||||
&& clobber(x)
|
||||
=> (MOVWstoreconst [makeValAndOff(a.Val()&0xff | c.Val()<<8, a.Off())] {s} p0 mem)
|
||||
(MOVBstoreconst [a] {s} p0 x:(MOVBstoreconst [c] {s} p1 mem))
|
||||
&& x.Uses == 1
|
||||
&& sequentialAddresses(p0, p1, int64(a.Off()+1-c.Off()))
|
||||
&& clobber(x)
|
||||
=> (MOVWstoreconst [makeValAndOff(a.Val()&0xff | c.Val()<<8, a.Off())] {s} p0 mem)
|
||||
(MOVWstoreconst [c] {s} p1 x:(MOVWstoreconst [a] {s} p0 mem))
|
||||
&& x.Uses == 1
|
||||
&& sequentialAddresses(p0, p1, int64(a.Off()+2-c.Off()))
|
||||
&& clobber(x)
|
||||
=> (MOVLstoreconst [makeValAndOff(a.Val()&0xffff | c.Val()<<16, a.Off())] {s} p0 mem)
|
||||
(MOVWstoreconst [a] {s} p0 x:(MOVWstoreconst [c] {s} p1 mem))
|
||||
&& x.Uses == 1
|
||||
&& sequentialAddresses(p0, p1, int64(a.Off()+2-c.Off()))
|
||||
&& clobber(x)
|
||||
=> (MOVLstoreconst [makeValAndOff(a.Val()&0xffff | c.Val()<<16, a.Off())] {s} p0 mem)
|
||||
(MOVLstoreconst [c] {s} p1 x:(MOVLstoreconst [a] {s} p0 mem))
|
||||
&& x.Uses == 1
|
||||
&& sequentialAddresses(p0, p1, int64(a.Off()+4-c.Off()))
|
||||
&& clobber(x)
|
||||
=> (MOVQstore [a.Off()] {s} p0 (MOVQconst [a.Val64()&0xffffffff | c.Val64()<<32]) mem)
|
||||
(MOVLstoreconst [a] {s} p0 x:(MOVLstoreconst [c] {s} p1 mem))
|
||||
&& x.Uses == 1
|
||||
&& sequentialAddresses(p0, p1, int64(a.Off()+4-c.Off()))
|
||||
&& clobber(x)
|
||||
=> (MOVQstore [a.Off()] {s} p0 (MOVQconst [a.Val64()&0xffffffff | c.Val64()<<32]) mem)
|
||||
(MOVQstoreconst [c] {s} p1 x:(MOVQstoreconst [a] {s} p0 mem))
|
||||
&& config.useSSE
|
||||
&& x.Uses == 1
|
||||
|
|
@ -1877,86 +1487,6 @@
|
|||
&& clobber(x)
|
||||
=> (MOVOstoreconst [makeValAndOff(0,a.Off())] {s} p0 mem)
|
||||
|
||||
// Combine stores into larger (unaligned) stores. Little endian.
|
||||
(MOVBstore [i] {s} p (SHR(W|L|Q)const [8] w) x:(MOVBstore [i-1] {s} p w mem))
|
||||
&& x.Uses == 1
|
||||
&& clobber(x)
|
||||
=> (MOVWstore [i-1] {s} p w mem)
|
||||
(MOVBstore [i] {s} p w x:(MOVBstore [i+1] {s} p (SHR(W|L|Q)const [8] w) mem))
|
||||
&& x.Uses == 1
|
||||
&& clobber(x)
|
||||
=> (MOVWstore [i] {s} p w mem)
|
||||
(MOVBstore [i] {s} p (SHR(L|Q)const [j] w) x:(MOVBstore [i-1] {s} p w0:(SHR(L|Q)const [j-8] w) mem))
|
||||
&& x.Uses == 1
|
||||
&& clobber(x)
|
||||
=> (MOVWstore [i-1] {s} p w0 mem)
|
||||
(MOVBstore [i] {s} p1 (SHR(W|L|Q)const [8] w) x:(MOVBstore [i] {s} p0 w mem))
|
||||
&& x.Uses == 1
|
||||
&& sequentialAddresses(p0, p1, 1)
|
||||
&& clobber(x)
|
||||
=> (MOVWstore [i] {s} p0 w mem)
|
||||
(MOVBstore [i] {s} p0 w x:(MOVBstore [i] {s} p1 (SHR(W|L|Q)const [8] w) mem))
|
||||
&& x.Uses == 1
|
||||
&& sequentialAddresses(p0, p1, 1)
|
||||
&& clobber(x)
|
||||
=> (MOVWstore [i] {s} p0 w mem)
|
||||
(MOVBstore [i] {s} p1 (SHR(L|Q)const [j] w) x:(MOVBstore [i] {s} p0 w0:(SHR(L|Q)const [j-8] w) mem))
|
||||
&& x.Uses == 1
|
||||
&& sequentialAddresses(p0, p1, 1)
|
||||
&& clobber(x)
|
||||
=> (MOVWstore [i] {s} p0 w0 mem)
|
||||
|
||||
(MOVWstore [i] {s} p (SHR(L|Q)const [16] w) x:(MOVWstore [i-2] {s} p w mem))
|
||||
&& x.Uses == 1
|
||||
&& clobber(x)
|
||||
=> (MOVLstore [i-2] {s} p w mem)
|
||||
(MOVWstore [i] {s} p (SHR(L|Q)const [j] w) x:(MOVWstore [i-2] {s} p w0:(SHR(L|Q)const [j-16] w) mem))
|
||||
&& x.Uses == 1
|
||||
&& clobber(x)
|
||||
=> (MOVLstore [i-2] {s} p w0 mem)
|
||||
(MOVWstore [i] {s} p1 (SHR(L|Q)const [16] w) x:(MOVWstore [i] {s} p0 w mem))
|
||||
&& x.Uses == 1
|
||||
&& sequentialAddresses(p0, p1, 2)
|
||||
&& clobber(x)
|
||||
=> (MOVLstore [i] {s} p0 w mem)
|
||||
(MOVWstore [i] {s} p1 (SHR(L|Q)const [j] w) x:(MOVWstore [i] {s} p0 w0:(SHR(L|Q)const [j-16] w) mem))
|
||||
&& x.Uses == 1
|
||||
&& sequentialAddresses(p0, p1, 2)
|
||||
&& clobber(x)
|
||||
=> (MOVLstore [i] {s} p0 w0 mem)
|
||||
|
||||
(MOVLstore [i] {s} p (SHRQconst [32] w) x:(MOVLstore [i-4] {s} p w mem))
|
||||
&& x.Uses == 1
|
||||
&& clobber(x)
|
||||
=> (MOVQstore [i-4] {s} p w mem)
|
||||
(MOVLstore [i] {s} p (SHRQconst [j] w) x:(MOVLstore [i-4] {s} p w0:(SHRQconst [j-32] w) mem))
|
||||
&& x.Uses == 1
|
||||
&& clobber(x)
|
||||
=> (MOVQstore [i-4] {s} p w0 mem)
|
||||
(MOVLstore [i] {s} p1 (SHRQconst [32] w) x:(MOVLstore [i] {s} p0 w mem))
|
||||
&& x.Uses == 1
|
||||
&& sequentialAddresses(p0, p1, 4)
|
||||
&& clobber(x)
|
||||
=> (MOVQstore [i] {s} p0 w mem)
|
||||
(MOVLstore [i] {s} p1 (SHRQconst [j] w) x:(MOVLstore [i] {s} p0 w0:(SHRQconst [j-32] w) mem))
|
||||
&& x.Uses == 1
|
||||
&& sequentialAddresses(p0, p1, 4)
|
||||
&& clobber(x)
|
||||
=> (MOVQstore [i] {s} p0 w0 mem)
|
||||
|
||||
(MOVBstore [c3] {s} p3 (SHRQconst [56] w)
|
||||
x1:(MOVWstore [c2] {s} p2 (SHRQconst [40] w)
|
||||
x2:(MOVLstore [c1] {s} p1 (SHRQconst [8] w)
|
||||
x3:(MOVBstore [c0] {s} p0 w mem))))
|
||||
&& x1.Uses == 1
|
||||
&& x2.Uses == 1
|
||||
&& x3.Uses == 1
|
||||
&& sequentialAddresses(p0, p1, int64(1 + c0 - c1))
|
||||
&& sequentialAddresses(p0, p2, int64(5 + c0 - c2))
|
||||
&& sequentialAddresses(p0, p3, int64(7 + c0 - c3))
|
||||
&& clobber(x1, x2, x3)
|
||||
=> (MOVQstore [c0] {s} p0 w mem)
|
||||
|
||||
(MOVBstore [i] {s} p
|
||||
x1:(MOVBload [j] {s2} p2 mem)
|
||||
mem2:(MOVBstore [i-1] {s} p
|
||||
|
|
@ -2195,26 +1725,6 @@
|
|||
(MOVWstore [i] {s} p x:(ROLWconst [8] w) mem) && x.Uses == 1 && buildcfg.GOAMD64 >= 3 => (MOVBEWstore [i] {s} p w mem)
|
||||
(MOVBEWstore [i] {s} p x:(ROLWconst [8] w) mem) && x.Uses == 1 => (MOVWstore [i] {s} p w mem)
|
||||
|
||||
(ORQ x0:(MOVBELload [i0] {s} p mem)
|
||||
sh:(SHLQconst [32] x1:(MOVBELload [i1] {s} p mem)))
|
||||
&& i0 == i1+4
|
||||
&& x0.Uses == 1
|
||||
&& x1.Uses == 1
|
||||
&& sh.Uses == 1
|
||||
&& mergePoint(b,x0,x1) != nil
|
||||
&& clobber(x0, x1, sh)
|
||||
=> @mergePoint(b,x0,x1) (MOVBEQload [i1] {s} p mem)
|
||||
|
||||
(ORQ x0:(MOVBELload [i] {s} p0 mem)
|
||||
sh:(SHLQconst [32] x1:(MOVBELload [i] {s} p1 mem)))
|
||||
&& x0.Uses == 1
|
||||
&& x1.Uses == 1
|
||||
&& sh.Uses == 1
|
||||
&& sequentialAddresses(p1, p0, 4)
|
||||
&& mergePoint(b,x0,x1) != nil
|
||||
&& clobber(x0, x1, sh)
|
||||
=> @mergePoint(b,x0,x1) (MOVBEQload [i] {s} p1 mem)
|
||||
|
||||
(SAR(Q|L) l:(MOV(Q|L)load [off] {sym} ptr mem) x) && buildcfg.GOAMD64 >= 3 && canMergeLoad(v, l) && clobber(l) => (SARX(Q|L)load [off] {sym} ptr x mem)
|
||||
(SHL(Q|L) l:(MOV(Q|L)load [off] {sym} ptr mem) x) && buildcfg.GOAMD64 >= 3 && canMergeLoad(v, l) && clobber(l) => (SHLX(Q|L)load [off] {sym} ptr x mem)
|
||||
(SHR(Q|L) l:(MOV(Q|L)load [off] {sym} ptr mem) x) && buildcfg.GOAMD64 >= 3 && canMergeLoad(v, l) && clobber(l) => (SHRX(Q|L)load [off] {sym} ptr x mem)
|
||||
|
|
|
|||
|
|
@ -103,6 +103,7 @@
|
|||
|
||||
(Bswap64 ...) => (REV ...)
|
||||
(Bswap32 ...) => (REVW ...)
|
||||
(Bswap16 ...) => (REV16W ...)
|
||||
|
||||
(BitRev64 ...) => (RBIT ...)
|
||||
(BitRev32 ...) => (RBITW ...)
|
||||
|
|
@ -481,6 +482,9 @@
|
|||
(STP [16] dst (Select0 <typ.UInt64> (LDP [16] src mem)) (Select1 <typ.UInt64> (LDP [16] src mem))
|
||||
(STP dst (Select0 <typ.UInt64> (LDP src mem)) (Select1 <typ.UInt64> (LDP src mem)) mem))))
|
||||
|
||||
(MOVDstorezero {s} [i] ptr x:(MOVDstorezero {s} [i+8] ptr mem)) && x.Uses == 1 && clobber(x) => (MOVQstorezero {s} [i] ptr mem)
|
||||
(MOVDstorezero {s} [i] ptr x:(MOVDstorezero {s} [i-8] ptr mem)) && x.Uses == 1 && clobber(x) => (MOVQstorezero {s} [i-8] ptr mem)
|
||||
|
||||
// strip off fractional word move
|
||||
(Move [s] dst src mem) && s%16 != 0 && s%16 <= 8 && s > 16 =>
|
||||
(Move [8]
|
||||
|
|
@ -1927,918 +1931,6 @@
|
|||
(ORshiftRL [rc] (ANDconst [ac] y) (SLLconst [lc] x)) && lc < rc && ac == ^((1<<uint(64-rc)-1))
|
||||
=> (BFXIL [armBFAuxInt(rc-lc, 64-rc)] y x)
|
||||
|
||||
// do combined loads
|
||||
// little endian loads
|
||||
// b[0] | b[1]<<8 => load 16-bit
|
||||
(ORshiftLL <t> [8]
|
||||
y0:(MOVDnop x0:(MOVBUload [i0] {s} p mem))
|
||||
y1:(MOVDnop x1:(MOVBUload [i1] {s} p mem)))
|
||||
&& i1 == i0+1
|
||||
&& x0.Uses == 1 && x1.Uses == 1
|
||||
&& y0.Uses == 1 && y1.Uses == 1
|
||||
&& mergePoint(b,x0,x1) != nil
|
||||
&& clobber(x0, x1, y0, y1)
|
||||
=> @mergePoint(b,x0,x1) (MOVHUload <t> {s} (OffPtr <p.Type> [int64(i0)] p) mem)
|
||||
(ORshiftLL <t> [8]
|
||||
y0:(MOVDnop x0:(MOVBUloadidx ptr0 idx0 mem))
|
||||
y1:(MOVDnop x1:(MOVBUload [1] {s} p1:(ADD ptr1 idx1) mem)))
|
||||
&& s == nil
|
||||
&& x0.Uses == 1 && x1.Uses == 1
|
||||
&& y0.Uses == 1 && y1.Uses == 1
|
||||
&& mergePoint(b,x0,x1) != nil
|
||||
&& (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1))
|
||||
&& clobber(x0, x1, y0, y1)
|
||||
=> @mergePoint(b,x0,x1) (MOVHUloadidx <t> ptr0 idx0 mem)
|
||||
(ORshiftLL <t> [8]
|
||||
y0:(MOVDnop x0:(MOVBUloadidx ptr idx mem))
|
||||
y1:(MOVDnop x1:(MOVBUloadidx ptr (ADDconst [1] idx) mem)))
|
||||
&& x0.Uses == 1 && x1.Uses == 1
|
||||
&& y0.Uses == 1 && y1.Uses == 1
|
||||
&& mergePoint(b,x0,x1) != nil
|
||||
&& clobber(x0, x1, y0, y1)
|
||||
=> @mergePoint(b,x0,x1) (MOVHUloadidx <t> ptr idx mem)
|
||||
|
||||
// b[0] | b[1]<<8 | b[2]<<16 | b[3]<<24 => load 32-bit
|
||||
(ORshiftLL <t> [24] o0:(ORshiftLL [16]
|
||||
x0:(MOVHUload [i0] {s} p mem)
|
||||
y1:(MOVDnop x1:(MOVBUload [i2] {s} p mem)))
|
||||
y2:(MOVDnop x2:(MOVBUload [i3] {s} p mem)))
|
||||
&& i2 == i0+2
|
||||
&& i3 == i0+3
|
||||
&& x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1
|
||||
&& y1.Uses == 1 && y2.Uses == 1
|
||||
&& o0.Uses == 1
|
||||
&& mergePoint(b,x0,x1,x2) != nil
|
||||
&& clobber(x0, x1, x2, y1, y2, o0)
|
||||
=> @mergePoint(b,x0,x1,x2) (MOVWUload <t> {s} (OffPtr <p.Type> [int64(i0)] p) mem)
|
||||
(ORshiftLL <t> [24] o0:(ORshiftLL [16]
|
||||
x0:(MOVHUloadidx ptr0 idx0 mem)
|
||||
y1:(MOVDnop x1:(MOVBUload [2] {s} p1:(ADD ptr1 idx1) mem)))
|
||||
y2:(MOVDnop x2:(MOVBUload [3] {s} p mem)))
|
||||
&& s == nil
|
||||
&& x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1
|
||||
&& y1.Uses == 1 && y2.Uses == 1
|
||||
&& o0.Uses == 1
|
||||
&& mergePoint(b,x0,x1,x2) != nil
|
||||
&& (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1))
|
||||
&& isSamePtr(p1, p)
|
||||
&& clobber(x0, x1, x2, y1, y2, o0)
|
||||
=> @mergePoint(b,x0,x1,x2) (MOVWUloadidx <t> ptr0 idx0 mem)
|
||||
(ORshiftLL <t> [24] o0:(ORshiftLL [16]
|
||||
x0:(MOVHUloadidx ptr idx mem)
|
||||
y1:(MOVDnop x1:(MOVBUloadidx ptr (ADDconst [2] idx) mem)))
|
||||
y2:(MOVDnop x2:(MOVBUloadidx ptr (ADDconst [3] idx) mem)))
|
||||
&& x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1
|
||||
&& y1.Uses == 1 && y2.Uses == 1
|
||||
&& o0.Uses == 1
|
||||
&& mergePoint(b,x0,x1,x2) != nil
|
||||
&& clobber(x0, x1, x2, y1, y2, o0)
|
||||
=> @mergePoint(b,x0,x1,x2) (MOVWUloadidx <t> ptr idx mem)
|
||||
(ORshiftLL <t> [24] o0:(ORshiftLL [16]
|
||||
x0:(MOVHUloadidx2 ptr0 idx0 mem)
|
||||
y1:(MOVDnop x1:(MOVBUload [2] {s} p1:(ADDshiftLL [1] ptr1 idx1) mem)))
|
||||
y2:(MOVDnop x2:(MOVBUload [3] {s} p mem)))
|
||||
&& s == nil
|
||||
&& x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1
|
||||
&& y1.Uses == 1 && y2.Uses == 1
|
||||
&& o0.Uses == 1
|
||||
&& mergePoint(b,x0,x1,x2) != nil
|
||||
&& isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1)
|
||||
&& isSamePtr(p1, p)
|
||||
&& clobber(x0, x1, x2, y1, y2, o0)
|
||||
=> @mergePoint(b,x0,x1,x2) (MOVWUloadidx <t> ptr0 (SLLconst <idx0.Type> [1] idx0) mem)
|
||||
|
||||
// b[0] | b[1]<<8 | b[2]<<16 | b[3]<<24 | b[4]<<32 | b[5]<<40 | b[6]<<48 | b[7]<<56 => load 64-bit
|
||||
(ORshiftLL <t> [56] o0:(ORshiftLL [48] o1:(ORshiftLL [40] o2:(ORshiftLL [32]
|
||||
x0:(MOVWUload [i0] {s} p mem)
|
||||
y1:(MOVDnop x1:(MOVBUload [i4] {s} p mem)))
|
||||
y2:(MOVDnop x2:(MOVBUload [i5] {s} p mem)))
|
||||
y3:(MOVDnop x3:(MOVBUload [i6] {s} p mem)))
|
||||
y4:(MOVDnop x4:(MOVBUload [i7] {s} p mem)))
|
||||
&& i4 == i0+4
|
||||
&& i5 == i0+5
|
||||
&& i6 == i0+6
|
||||
&& i7 == i0+7
|
||||
&& x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1
|
||||
&& y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && y4.Uses == 1
|
||||
&& o0.Uses == 1 && o1.Uses == 1 && o2.Uses == 1
|
||||
&& mergePoint(b,x0,x1,x2,x3,x4) != nil
|
||||
&& clobber(x0, x1, x2, x3, x4, y1, y2, y3, y4, o0, o1, o2)
|
||||
=> @mergePoint(b,x0,x1,x2,x3,x4) (MOVDload <t> {s} (OffPtr <p.Type> [int64(i0)] p) mem)
|
||||
(ORshiftLL <t> [56] o0:(ORshiftLL [48] o1:(ORshiftLL [40] o2:(ORshiftLL [32]
|
||||
x0:(MOVWUloadidx ptr0 idx0 mem)
|
||||
y1:(MOVDnop x1:(MOVBUload [4] {s} p1:(ADD ptr1 idx1) mem)))
|
||||
y2:(MOVDnop x2:(MOVBUload [5] {s} p mem)))
|
||||
y3:(MOVDnop x3:(MOVBUload [6] {s} p mem)))
|
||||
y4:(MOVDnop x4:(MOVBUload [7] {s} p mem)))
|
||||
&& s == nil
|
||||
&& x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1
|
||||
&& y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && y4.Uses == 1
|
||||
&& o0.Uses == 1 && o1.Uses == 1 && o2.Uses == 1
|
||||
&& mergePoint(b,x0,x1,x2,x3,x4) != nil
|
||||
&& (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1))
|
||||
&& isSamePtr(p1, p)
|
||||
&& clobber(x0, x1, x2, x3, x4, y1, y2, y3, y4, o0, o1, o2)
|
||||
=> @mergePoint(b,x0,x1,x2,x3,x4) (MOVDloadidx <t> ptr0 idx0 mem)
|
||||
(ORshiftLL <t> [56] o0:(ORshiftLL [48] o1:(ORshiftLL [40] o2:(ORshiftLL [32]
|
||||
x0:(MOVWUloadidx4 ptr0 idx0 mem)
|
||||
y1:(MOVDnop x1:(MOVBUload [4] {s} p1:(ADDshiftLL [2] ptr1 idx1) mem)))
|
||||
y2:(MOVDnop x2:(MOVBUload [5] {s} p mem)))
|
||||
y3:(MOVDnop x3:(MOVBUload [6] {s} p mem)))
|
||||
y4:(MOVDnop x4:(MOVBUload [7] {s} p mem)))
|
||||
&& s == nil
|
||||
&& x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1
|
||||
&& y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && y4.Uses == 1
|
||||
&& o0.Uses == 1 && o1.Uses == 1 && o2.Uses == 1
|
||||
&& mergePoint(b,x0,x1,x2,x3,x4) != nil
|
||||
&& isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1)
|
||||
&& isSamePtr(p1, p)
|
||||
&& clobber(x0, x1, x2, x3, x4, y1, y2, y3, y4, o0, o1, o2)
|
||||
=> @mergePoint(b,x0,x1,x2,x3,x4) (MOVDloadidx <t> ptr0 (SLLconst <idx0.Type> [2] idx0) mem)
|
||||
(ORshiftLL <t> [56] o0:(ORshiftLL [48] o1:(ORshiftLL [40] o2:(ORshiftLL [32]
|
||||
x0:(MOVWUloadidx ptr idx mem)
|
||||
y1:(MOVDnop x1:(MOVBUloadidx ptr (ADDconst [4] idx) mem)))
|
||||
y2:(MOVDnop x2:(MOVBUloadidx ptr (ADDconst [5] idx) mem)))
|
||||
y3:(MOVDnop x3:(MOVBUloadidx ptr (ADDconst [6] idx) mem)))
|
||||
y4:(MOVDnop x4:(MOVBUloadidx ptr (ADDconst [7] idx) mem)))
|
||||
&& x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1
|
||||
&& y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && y4.Uses == 1
|
||||
&& o0.Uses == 1 && o1.Uses == 1 && o2.Uses == 1
|
||||
&& mergePoint(b,x0,x1,x2,x3,x4) != nil
|
||||
&& clobber(x0, x1, x2, x3, x4, y1, y2, y3, y4, o0, o1, o2)
|
||||
=> @mergePoint(b,x0,x1,x2,x3,x4) (MOVDloadidx <t> ptr idx mem)
|
||||
|
||||
// b[3]<<24 | b[2]<<16 | b[1]<<8 | b[0] => load 32-bit
|
||||
(OR <t> o0:(ORshiftLL [8] o1:(ORshiftLL [16] s0:(SLLconst [24]
|
||||
y0:(MOVDnop x0:(MOVBUload [i3] {s} p mem)))
|
||||
y1:(MOVDnop x1:(MOVBUload [i2] {s} p mem)))
|
||||
y2:(MOVDnop x2:(MOVBUload [i1] {s} p mem)))
|
||||
y3:(MOVDnop x3:(MOVBUload [i0] {s} p mem)))
|
||||
&& i1 == i0+1
|
||||
&& i2 == i0+2
|
||||
&& i3 == i0+3
|
||||
&& x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1
|
||||
&& y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1
|
||||
&& o0.Uses == 1 && o1.Uses == 1 && s0.Uses == 1
|
||||
&& mergePoint(b,x0,x1,x2,x3) != nil
|
||||
&& clobber(x0, x1, x2, x3, y0, y1, y2, y3, o0, o1, s0)
|
||||
=> @mergePoint(b,x0,x1,x2,x3) (MOVWUload <t> {s} (OffPtr <p.Type> [int64(i0)] p) mem)
|
||||
(OR <t> o0:(ORshiftLL [8] o1:(ORshiftLL [16] s0:(SLLconst [24]
|
||||
y0:(MOVDnop x0:(MOVBUload [3] {s} p mem)))
|
||||
y1:(MOVDnop x1:(MOVBUload [2] {s} p mem)))
|
||||
y2:(MOVDnop x2:(MOVBUload [1] {s} p1:(ADD ptr1 idx1) mem)))
|
||||
y3:(MOVDnop x3:(MOVBUloadidx ptr0 idx0 mem)))
|
||||
&& s == nil
|
||||
&& x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1
|
||||
&& y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1
|
||||
&& o0.Uses == 1 && o1.Uses == 1 && s0.Uses == 1
|
||||
&& mergePoint(b,x0,x1,x2,x3) != nil
|
||||
&& (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1))
|
||||
&& isSamePtr(p1, p)
|
||||
&& clobber(x0, x1, x2, x3, y0, y1, y2, y3, o0, o1, s0)
|
||||
=> @mergePoint(b,x0,x1,x2,x3) (MOVWUloadidx <t> ptr0 idx0 mem)
|
||||
(OR <t> o0:(ORshiftLL [8] o1:(ORshiftLL [16] s0:(SLLconst [24]
|
||||
y0:(MOVDnop x0:(MOVBUloadidx ptr (ADDconst [3] idx) mem)))
|
||||
y1:(MOVDnop x1:(MOVBUloadidx ptr (ADDconst [2] idx) mem)))
|
||||
y2:(MOVDnop x2:(MOVBUloadidx ptr (ADDconst [1] idx) mem)))
|
||||
y3:(MOVDnop x3:(MOVBUloadidx ptr idx mem)))
|
||||
&& x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1
|
||||
&& y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1
|
||||
&& o0.Uses == 1 && o1.Uses == 1 && s0.Uses == 1
|
||||
&& mergePoint(b,x0,x1,x2,x3) != nil
|
||||
&& clobber(x0, x1, x2, x3, y0, y1, y2, y3, o0, o1, s0)
|
||||
=> @mergePoint(b,x0,x1,x2,x3) (MOVWUloadidx <t> ptr idx mem)
|
||||
|
||||
// b[7]<<56 | b[6]<<48 | b[5]<<40 | b[4]<<32 | b[3]<<24 | b[2]<<16 | b[1]<<8 | b[0] => load 64-bit
|
||||
(OR <t> o0:(ORshiftLL [8] o1:(ORshiftLL [16] o2:(ORshiftLL [24] o3:(ORshiftLL [32] o4:(ORshiftLL [40] o5:(ORshiftLL [48] s0:(SLLconst [56]
|
||||
y0:(MOVDnop x0:(MOVBUload [i7] {s} p mem)))
|
||||
y1:(MOVDnop x1:(MOVBUload [i6] {s} p mem)))
|
||||
y2:(MOVDnop x2:(MOVBUload [i5] {s} p mem)))
|
||||
y3:(MOVDnop x3:(MOVBUload [i4] {s} p mem)))
|
||||
y4:(MOVDnop x4:(MOVBUload [i3] {s} p mem)))
|
||||
y5:(MOVDnop x5:(MOVBUload [i2] {s} p mem)))
|
||||
y6:(MOVDnop x6:(MOVBUload [i1] {s} p mem)))
|
||||
y7:(MOVDnop x7:(MOVBUload [i0] {s} p mem)))
|
||||
&& i1 == i0+1
|
||||
&& i2 == i0+2
|
||||
&& i3 == i0+3
|
||||
&& i4 == i0+4
|
||||
&& i5 == i0+5
|
||||
&& i6 == i0+6
|
||||
&& i7 == i0+7
|
||||
&& x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1
|
||||
&& x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && x7.Uses == 1
|
||||
&& y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1
|
||||
&& y4.Uses == 1 && y5.Uses == 1 && y6.Uses == 1 && y7.Uses == 1
|
||||
&& o0.Uses == 1 && o1.Uses == 1 && o2.Uses == 1 && o3.Uses == 1
|
||||
&& o4.Uses == 1 && o5.Uses == 1 && s0.Uses == 1
|
||||
&& mergePoint(b,x0,x1,x2,x3,x4,x5,x6,x7) != nil
|
||||
&& clobber(x0, x1, x2, x3, x4, x5, x6, x7, y0, y1, y2, y3, y4, y5, y6, y7, o0, o1, o2, o3, o4, o5, s0)
|
||||
=> @mergePoint(b,x0,x1,x2,x3,x4,x5,x6,x7) (MOVDload <t> {s} (OffPtr <p.Type> [int64(i0)] p) mem)
|
||||
(OR <t> o0:(ORshiftLL [8] o1:(ORshiftLL [16] o2:(ORshiftLL [24] o3:(ORshiftLL [32] o4:(ORshiftLL [40] o5:(ORshiftLL [48] s0:(SLLconst [56]
|
||||
y0:(MOVDnop x0:(MOVBUload [7] {s} p mem)))
|
||||
y1:(MOVDnop x1:(MOVBUload [6] {s} p mem)))
|
||||
y2:(MOVDnop x2:(MOVBUload [5] {s} p mem)))
|
||||
y3:(MOVDnop x3:(MOVBUload [4] {s} p mem)))
|
||||
y4:(MOVDnop x4:(MOVBUload [3] {s} p mem)))
|
||||
y5:(MOVDnop x5:(MOVBUload [2] {s} p mem)))
|
||||
y6:(MOVDnop x6:(MOVBUload [1] {s} p1:(ADD ptr1 idx1) mem)))
|
||||
y7:(MOVDnop x7:(MOVBUloadidx ptr0 idx0 mem)))
|
||||
&& s == nil
|
||||
&& x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1
|
||||
&& x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && x7.Uses == 1
|
||||
&& y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1
|
||||
&& y4.Uses == 1 && y5.Uses == 1 && y6.Uses == 1 && y7.Uses == 1
|
||||
&& o0.Uses == 1 && o1.Uses == 1 && o2.Uses == 1 && o3.Uses == 1
|
||||
&& o4.Uses == 1 && o5.Uses == 1 && s0.Uses == 1
|
||||
&& mergePoint(b,x0,x1,x2,x3,x4,x5,x6,x7) != nil
|
||||
&& (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1))
|
||||
&& isSamePtr(p1, p)
|
||||
&& clobber(x0, x1, x2, x3, x4, x5, x6, x7, y0, y1, y2, y3, y4, y5, y6, y7, o0, o1, o2, o3, o4, o5, s0)
|
||||
=> @mergePoint(b,x0,x1,x2,x3,x4,x5,x6,x7) (MOVDloadidx <t> ptr0 idx0 mem)
|
||||
(OR <t> o0:(ORshiftLL [8] o1:(ORshiftLL [16] o2:(ORshiftLL [24] o3:(ORshiftLL [32] o4:(ORshiftLL [40] o5:(ORshiftLL [48] s0:(SLLconst [56]
|
||||
y0:(MOVDnop x0:(MOVBUloadidx ptr (ADDconst [7] idx) mem)))
|
||||
y1:(MOVDnop x1:(MOVBUloadidx ptr (ADDconst [6] idx) mem)))
|
||||
y2:(MOVDnop x2:(MOVBUloadidx ptr (ADDconst [5] idx) mem)))
|
||||
y3:(MOVDnop x3:(MOVBUloadidx ptr (ADDconst [4] idx) mem)))
|
||||
y4:(MOVDnop x4:(MOVBUloadidx ptr (ADDconst [3] idx) mem)))
|
||||
y5:(MOVDnop x5:(MOVBUloadidx ptr (ADDconst [2] idx) mem)))
|
||||
y6:(MOVDnop x6:(MOVBUloadidx ptr (ADDconst [1] idx) mem)))
|
||||
y7:(MOVDnop x7:(MOVBUloadidx ptr idx mem)))
|
||||
&& x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1
|
||||
&& x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && x7.Uses == 1
|
||||
&& y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1
|
||||
&& y4.Uses == 1 && y5.Uses == 1 && y6.Uses == 1 && y7.Uses == 1
|
||||
&& o0.Uses == 1 && o1.Uses == 1 && o2.Uses == 1 && o3.Uses == 1
|
||||
&& o4.Uses == 1 && o5.Uses == 1 && s0.Uses == 1
|
||||
&& mergePoint(b,x0,x1,x2,x3,x4,x5,x6,x7) != nil
|
||||
&& clobber(x0, x1, x2, x3, x4, x5, x6, x7, y0, y1, y2, y3, y4, y5, y6, y7, o0, o1, o2, o3, o4, o5, s0)
|
||||
=> @mergePoint(b,x0,x1,x2,x3,x4,x5,x6,x7) (MOVDloadidx <t> ptr idx mem)
|
||||
|
||||
// big endian loads
|
||||
// b[1] | b[0]<<8 => load 16-bit, reverse
|
||||
(ORshiftLL <t> [8]
|
||||
y0:(MOVDnop x0:(MOVBUload [i1] {s} p mem))
|
||||
y1:(MOVDnop x1:(MOVBUload [i0] {s} p mem)))
|
||||
&& i1 == i0+1
|
||||
&& x0.Uses == 1 && x1.Uses == 1
|
||||
&& y0.Uses == 1 && y1.Uses == 1
|
||||
&& mergePoint(b,x0,x1) != nil
|
||||
&& clobber(x0, x1, y0, y1)
|
||||
=> @mergePoint(b,x0,x1) (REV16W <t> (MOVHUload <t> [i0] {s} p mem))
|
||||
(ORshiftLL <t> [8]
|
||||
y0:(MOVDnop x0:(MOVBUload [1] {s} p1:(ADD ptr1 idx1) mem))
|
||||
y1:(MOVDnop x1:(MOVBUloadidx ptr0 idx0 mem)))
|
||||
&& s == nil
|
||||
&& x0.Uses == 1 && x1.Uses == 1
|
||||
&& y0.Uses == 1 && y1.Uses == 1
|
||||
&& mergePoint(b,x0,x1) != nil
|
||||
&& (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1))
|
||||
&& clobber(x0, x1, y0, y1)
|
||||
=> @mergePoint(b,x0,x1) (REV16W <t> (MOVHUloadidx <t> ptr0 idx0 mem))
|
||||
(ORshiftLL <t> [8]
|
||||
y0:(MOVDnop x0:(MOVBUloadidx ptr (ADDconst [1] idx) mem))
|
||||
y1:(MOVDnop x1:(MOVBUloadidx ptr idx mem)))
|
||||
&& x0.Uses == 1 && x1.Uses == 1
|
||||
&& y0.Uses == 1 && y1.Uses == 1
|
||||
&& mergePoint(b,x0,x1) != nil
|
||||
&& clobber(x0, x1, y0, y1)
|
||||
=> @mergePoint(b,x0,x1) (REV16W <t> (MOVHUloadidx <t> ptr idx mem))
|
||||
|
||||
// b[3] | b[2]<<8 | b[1]<<16 | b[0]<<24 => load 32-bit, reverse
|
||||
(ORshiftLL <t> [24] o0:(ORshiftLL [16]
|
||||
y0:(REV16W x0:(MOVHUload [i2] {s} p mem))
|
||||
y1:(MOVDnop x1:(MOVBUload [i1] {s} p mem)))
|
||||
y2:(MOVDnop x2:(MOVBUload [i0] {s} p mem)))
|
||||
&& i1 == i0+1
|
||||
&& i2 == i0+2
|
||||
&& x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1
|
||||
&& y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1
|
||||
&& o0.Uses == 1
|
||||
&& mergePoint(b,x0,x1,x2) != nil
|
||||
&& clobber(x0, x1, x2, y0, y1, y2, o0)
|
||||
=> @mergePoint(b,x0,x1,x2) (REVW <t> (MOVWUload <t> {s} (OffPtr <p.Type> [int64(i0)] p) mem))
|
||||
(ORshiftLL <t> [24] o0:(ORshiftLL [16]
|
||||
y0:(REV16W x0:(MOVHUload [2] {s} p mem))
|
||||
y1:(MOVDnop x1:(MOVBUload [1] {s} p1:(ADD ptr1 idx1) mem)))
|
||||
y2:(MOVDnop x2:(MOVBUloadidx ptr0 idx0 mem)))
|
||||
&& s == nil
|
||||
&& x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1
|
||||
&& y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1
|
||||
&& o0.Uses == 1
|
||||
&& mergePoint(b,x0,x1,x2) != nil
|
||||
&& (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1))
|
||||
&& isSamePtr(p1, p)
|
||||
&& clobber(x0, x1, x2, y0, y1, y2, o0)
|
||||
=> @mergePoint(b,x0,x1,x2) (REVW <t> (MOVWUloadidx <t> ptr0 idx0 mem))
|
||||
(ORshiftLL <t> [24] o0:(ORshiftLL [16]
|
||||
y0:(REV16W x0:(MOVHUloadidx ptr (ADDconst [2] idx) mem))
|
||||
y1:(MOVDnop x1:(MOVBUloadidx ptr (ADDconst [1] idx) mem)))
|
||||
y2:(MOVDnop x2:(MOVBUloadidx ptr idx mem)))
|
||||
&& x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1
|
||||
&& y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1
|
||||
&& o0.Uses == 1
|
||||
&& mergePoint(b,x0,x1,x2) != nil
|
||||
&& clobber(x0, x1, x2, y0, y1, y2, o0)
|
||||
=> @mergePoint(b,x0,x1,x2) (REVW <t> (MOVWUloadidx <t> ptr idx mem))
|
||||
|
||||
// b[7] | b[6]<<8 | b[5]<<16 | b[4]<<24 | b[3]<<32 | b[2]<<40 | b[1]<<48 | b[0]<<56 => load 64-bit, reverse
|
||||
(ORshiftLL <t> [56] o0:(ORshiftLL [48] o1:(ORshiftLL [40] o2:(ORshiftLL [32]
|
||||
y0:(REVW x0:(MOVWUload [i4] {s} p mem))
|
||||
y1:(MOVDnop x1:(MOVBUload [i3] {s} p mem)))
|
||||
y2:(MOVDnop x2:(MOVBUload [i2] {s} p mem)))
|
||||
y3:(MOVDnop x3:(MOVBUload [i1] {s} p mem)))
|
||||
y4:(MOVDnop x4:(MOVBUload [i0] {s} p mem)))
|
||||
&& i1 == i0+1
|
||||
&& i2 == i0+2
|
||||
&& i3 == i0+3
|
||||
&& i4 == i0+4
|
||||
&& x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1
|
||||
&& y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && y4.Uses == 1
|
||||
&& o0.Uses == 1 && o1.Uses == 1 && o2.Uses == 1
|
||||
&& mergePoint(b,x0,x1,x2,x3,x4) != nil
|
||||
&& clobber(x0, x1, x2, x3, x4, y0, y1, y2, y3, y4, o0, o1, o2)
|
||||
=> @mergePoint(b,x0,x1,x2,x3,x4) (REV <t> (MOVDload <t> {s} (OffPtr <p.Type> [int64(i0)] p) mem))
|
||||
(ORshiftLL <t> [56] o0:(ORshiftLL [48] o1:(ORshiftLL [40] o2:(ORshiftLL [32]
|
||||
y0:(REVW x0:(MOVWUload [4] {s} p mem))
|
||||
y1:(MOVDnop x1:(MOVBUload [3] {s} p mem)))
|
||||
y2:(MOVDnop x2:(MOVBUload [2] {s} p mem)))
|
||||
y3:(MOVDnop x3:(MOVBUload [1] {s} p1:(ADD ptr1 idx1) mem)))
|
||||
y4:(MOVDnop x4:(MOVBUloadidx ptr0 idx0 mem)))
|
||||
&& s == nil
|
||||
&& x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1
|
||||
&& y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && y4.Uses == 1
|
||||
&& o0.Uses == 1 && o1.Uses == 1 && o2.Uses == 1
|
||||
&& mergePoint(b,x0,x1,x2,x3,x4) != nil
|
||||
&& (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1))
|
||||
&& isSamePtr(p1, p)
|
||||
&& clobber(x0, x1, x2, x3, x4, y0, y1, y2, y3, y4, o0, o1, o2)
|
||||
=> @mergePoint(b,x0,x1,x2,x3,x4) (REV <t> (MOVDloadidx <t> ptr0 idx0 mem))
|
||||
(ORshiftLL <t> [56] o0:(ORshiftLL [48] o1:(ORshiftLL [40] o2:(ORshiftLL [32]
|
||||
y0:(REVW x0:(MOVWUloadidx ptr (ADDconst [4] idx) mem))
|
||||
y1:(MOVDnop x1:(MOVBUloadidx ptr (ADDconst [3] idx) mem)))
|
||||
y2:(MOVDnop x2:(MOVBUloadidx ptr (ADDconst [2] idx) mem)))
|
||||
y3:(MOVDnop x3:(MOVBUloadidx ptr (ADDconst [1] idx) mem)))
|
||||
y4:(MOVDnop x4:(MOVBUloadidx ptr idx mem)))
|
||||
&& x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1
|
||||
&& y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && y4.Uses == 1
|
||||
&& o0.Uses == 1 && o1.Uses == 1 && o2.Uses == 1
|
||||
&& mergePoint(b,x0,x1,x2,x3,x4) != nil
|
||||
&& clobber(x0, x1, x2, x3, x4, y0, y1, y2, y3, y4, o0, o1, o2)
|
||||
=> @mergePoint(b,x0,x1,x2,x3,x4) (REV <t> (MOVDloadidx <t> ptr idx mem))
|
||||
|
||||
// b[0]<<24 | b[1]<<16 | b[2]<<8 | b[3] => load 32-bit, reverse
|
||||
(OR <t> o0:(ORshiftLL [8] o1:(ORshiftLL [16] s0:(SLLconst [24]
|
||||
y0:(MOVDnop x0:(MOVBUload [i0] {s} p mem)))
|
||||
y1:(MOVDnop x1:(MOVBUload [i1] {s} p mem)))
|
||||
y2:(MOVDnop x2:(MOVBUload [i2] {s} p mem)))
|
||||
y3:(MOVDnop x3:(MOVBUload [i3] {s} p mem)))
|
||||
&& i1 == i0+1
|
||||
&& i2 == i0+2
|
||||
&& i3 == i0+3
|
||||
&& x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1
|
||||
&& y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1
|
||||
&& o0.Uses == 1 && o1.Uses == 1 && s0.Uses == 1
|
||||
&& mergePoint(b,x0,x1,x2,x3) != nil
|
||||
&& clobber(x0, x1, x2, x3, y0, y1, y2, y3, o0, o1, s0)
|
||||
=> @mergePoint(b,x0,x1,x2,x3) (REVW <t> (MOVWUload <t> {s} (OffPtr <p.Type> [int64(i0)] p) mem))
|
||||
(OR <t> o0:(ORshiftLL [8] o1:(ORshiftLL [16] s0:(SLLconst [24]
|
||||
y0:(MOVDnop x0:(MOVBUloadidx ptr0 idx0 mem)))
|
||||
y1:(MOVDnop x1:(MOVBUload [1] {s} p1:(ADD ptr1 idx1) mem)))
|
||||
y2:(MOVDnop x2:(MOVBUload [2] {s} p mem)))
|
||||
y3:(MOVDnop x3:(MOVBUload [3] {s} p mem)))
|
||||
&& s == nil
|
||||
&& x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1
|
||||
&& y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1
|
||||
&& o0.Uses == 1 && o1.Uses == 1 && s0.Uses == 1
|
||||
&& mergePoint(b,x0,x1,x2,x3) != nil
|
||||
&& (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1))
|
||||
&& isSamePtr(p1, p)
|
||||
&& clobber(x0, x1, x2, x3, y0, y1, y2, y3, o0, o1, s0)
|
||||
=> @mergePoint(b,x0,x1,x2,x3) (REVW <t> (MOVWUloadidx <t> ptr0 idx0 mem))
|
||||
(OR <t> o0:(ORshiftLL [8] o1:(ORshiftLL [16] s0:(SLLconst [24]
|
||||
y0:(MOVDnop x0:(MOVBUloadidx ptr idx mem)))
|
||||
y1:(MOVDnop x1:(MOVBUloadidx ptr (ADDconst [1] idx) mem)))
|
||||
y2:(MOVDnop x2:(MOVBUloadidx ptr (ADDconst [2] idx) mem)))
|
||||
y3:(MOVDnop x3:(MOVBUloadidx ptr (ADDconst [3] idx) mem)))
|
||||
&& x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1
|
||||
&& y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1
|
||||
&& o0.Uses == 1 && o1.Uses == 1 && s0.Uses == 1
|
||||
&& mergePoint(b,x0,x1,x2,x3) != nil
|
||||
&& clobber(x0, x1, x2, x3, y0, y1, y2, y3, o0, o1, s0)
|
||||
=> @mergePoint(b,x0,x1,x2,x3) (REVW <t> (MOVWUloadidx <t> ptr idx mem))
|
||||
|
||||
// b[0]<<56 | b[1]<<48 | b[2]<<40 | b[3]<<32 | b[4]<<24 | b[5]<<16 | b[6]<<8 | b[7] => load 64-bit, reverse
|
||||
(OR <t> o0:(ORshiftLL [8] o1:(ORshiftLL [16] o2:(ORshiftLL [24] o3:(ORshiftLL [32] o4:(ORshiftLL [40] o5:(ORshiftLL [48] s0:(SLLconst [56]
|
||||
y0:(MOVDnop x0:(MOVBUload [i0] {s} p mem)))
|
||||
y1:(MOVDnop x1:(MOVBUload [i1] {s} p mem)))
|
||||
y2:(MOVDnop x2:(MOVBUload [i2] {s} p mem)))
|
||||
y3:(MOVDnop x3:(MOVBUload [i3] {s} p mem)))
|
||||
y4:(MOVDnop x4:(MOVBUload [i4] {s} p mem)))
|
||||
y5:(MOVDnop x5:(MOVBUload [i5] {s} p mem)))
|
||||
y6:(MOVDnop x6:(MOVBUload [i6] {s} p mem)))
|
||||
y7:(MOVDnop x7:(MOVBUload [i7] {s} p mem)))
|
||||
&& i1 == i0+1
|
||||
&& i2 == i0+2
|
||||
&& i3 == i0+3
|
||||
&& i4 == i0+4
|
||||
&& i5 == i0+5
|
||||
&& i6 == i0+6
|
||||
&& i7 == i0+7
|
||||
&& x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1
|
||||
&& x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && x7.Uses == 1
|
||||
&& y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1
|
||||
&& y4.Uses == 1 && y5.Uses == 1 && y6.Uses == 1 && y7.Uses == 1
|
||||
&& o0.Uses == 1 && o1.Uses == 1 && o2.Uses == 1 && o3.Uses == 1
|
||||
&& o4.Uses == 1 && o5.Uses == 1 && s0.Uses == 1
|
||||
&& mergePoint(b,x0,x1,x2,x3,x4,x5,x6,x7) != nil
|
||||
&& clobber(x0, x1, x2, x3, x4, x5, x6, x7, y0, y1, y2, y3, y4, y5, y6, y7, o0, o1, o2, o3, o4, o5, s0)
|
||||
=> @mergePoint(b,x0,x1,x2,x3,x4,x5,x6,x7) (REV <t> (MOVDload <t> {s} (OffPtr <p.Type> [int64(i0)] p) mem))
|
||||
(OR <t> o0:(ORshiftLL [8] o1:(ORshiftLL [16] o2:(ORshiftLL [24] o3:(ORshiftLL [32] o4:(ORshiftLL [40] o5:(ORshiftLL [48] s0:(SLLconst [56]
|
||||
y0:(MOVDnop x0:(MOVBUloadidx ptr0 idx0 mem)))
|
||||
y1:(MOVDnop x1:(MOVBUload [1] {s} p1:(ADD ptr1 idx1) mem)))
|
||||
y2:(MOVDnop x2:(MOVBUload [2] {s} p mem)))
|
||||
y3:(MOVDnop x3:(MOVBUload [3] {s} p mem)))
|
||||
y4:(MOVDnop x4:(MOVBUload [4] {s} p mem)))
|
||||
y5:(MOVDnop x5:(MOVBUload [5] {s} p mem)))
|
||||
y6:(MOVDnop x6:(MOVBUload [6] {s} p mem)))
|
||||
y7:(MOVDnop x7:(MOVBUload [7] {s} p mem)))
|
||||
&& s == nil
|
||||
&& x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1
|
||||
&& x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && x7.Uses == 1
|
||||
&& y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1
|
||||
&& y4.Uses == 1 && y5.Uses == 1 && y6.Uses == 1 && y7.Uses == 1
|
||||
&& o0.Uses == 1 && o1.Uses == 1 && o2.Uses == 1 && o3.Uses == 1
|
||||
&& o4.Uses == 1 && o5.Uses == 1 && s0.Uses == 1
|
||||
&& mergePoint(b,x0,x1,x2,x3,x4,x5,x6,x7) != nil
|
||||
&& (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1))
|
||||
&& isSamePtr(p1, p)
|
||||
&& clobber(x0, x1, x2, x3, x4, x5, x6, x7, y0, y1, y2, y3, y4, y5, y6, y7, o0, o1, o2, o3, o4, o5, s0)
|
||||
=> @mergePoint(b,x0,x1,x2,x3,x4,x5,x6,x7) (REV <t> (MOVDloadidx <t> ptr0 idx0 mem))
|
||||
(OR <t> o0:(ORshiftLL [8] o1:(ORshiftLL [16] o2:(ORshiftLL [24] o3:(ORshiftLL [32] o4:(ORshiftLL [40] o5:(ORshiftLL [48] s0:(SLLconst [56]
|
||||
y0:(MOVDnop x0:(MOVBUloadidx ptr idx mem)))
|
||||
y1:(MOVDnop x1:(MOVBUloadidx ptr (ADDconst [1] idx) mem)))
|
||||
y2:(MOVDnop x2:(MOVBUloadidx ptr (ADDconst [2] idx) mem)))
|
||||
y3:(MOVDnop x3:(MOVBUloadidx ptr (ADDconst [3] idx) mem)))
|
||||
y4:(MOVDnop x4:(MOVBUloadidx ptr (ADDconst [4] idx) mem)))
|
||||
y5:(MOVDnop x5:(MOVBUloadidx ptr (ADDconst [5] idx) mem)))
|
||||
y6:(MOVDnop x6:(MOVBUloadidx ptr (ADDconst [6] idx) mem)))
|
||||
y7:(MOVDnop x7:(MOVBUloadidx ptr (ADDconst [7] idx) mem)))
|
||||
&& x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1
|
||||
&& x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && x7.Uses == 1
|
||||
&& y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1
|
||||
&& y4.Uses == 1 && y5.Uses == 1 && y6.Uses == 1 && y7.Uses == 1
|
||||
&& o0.Uses == 1 && o1.Uses == 1 && o2.Uses == 1 && o3.Uses == 1
|
||||
&& o4.Uses == 1 && o5.Uses == 1 && s0.Uses == 1
|
||||
&& mergePoint(b,x0,x1,x2,x3,x4,x5,x6,x7) != nil
|
||||
&& clobber(x0, x1, x2, x3, x4, x5, x6, x7, y0, y1, y2, y3, y4, y5, y6, y7, o0, o1, o2, o3, o4, o5, s0)
|
||||
=> @mergePoint(b,x0,x1,x2,x3,x4,x5,x6,x7) (REV <t> (MOVDloadidx <t> ptr idx mem))
|
||||
|
||||
// Combine zero stores into larger (unaligned) stores.
|
||||
(MOVBstorezero [i] {s} ptr0 x:(MOVBstorezero [j] {s} ptr1 mem))
|
||||
&& x.Uses == 1
|
||||
&& areAdjacentOffsets(int64(i),int64(j),1)
|
||||
&& isSamePtr(ptr0, ptr1)
|
||||
&& clobber(x)
|
||||
=> (MOVHstorezero [int32(min(int64(i),int64(j)))] {s} ptr0 mem)
|
||||
(MOVBstorezero [1] {s} (ADD ptr0 idx0) x:(MOVBstorezeroidx ptr1 idx1 mem))
|
||||
&& x.Uses == 1
|
||||
&& s == nil
|
||||
&& (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1))
|
||||
&& clobber(x)
|
||||
=> (MOVHstorezeroidx ptr1 idx1 mem)
|
||||
(MOVBstorezeroidx ptr (ADDconst [1] idx) x:(MOVBstorezeroidx ptr idx mem))
|
||||
&& x.Uses == 1
|
||||
&& clobber(x)
|
||||
=> (MOVHstorezeroidx ptr idx mem)
|
||||
(MOVHstorezero [i] {s} ptr0 x:(MOVHstorezero [j] {s} ptr1 mem))
|
||||
&& x.Uses == 1
|
||||
&& areAdjacentOffsets(int64(i),int64(j),2)
|
||||
&& isSamePtr(ptr0, ptr1)
|
||||
&& clobber(x)
|
||||
=> (MOVWstorezero [int32(min(int64(i),int64(j)))] {s} ptr0 mem)
|
||||
(MOVHstorezero [2] {s} (ADD ptr0 idx0) x:(MOVHstorezeroidx ptr1 idx1 mem))
|
||||
&& x.Uses == 1
|
||||
&& s == nil
|
||||
&& (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1))
|
||||
&& clobber(x)
|
||||
=> (MOVWstorezeroidx ptr1 idx1 mem)
|
||||
(MOVHstorezeroidx ptr (ADDconst [2] idx) x:(MOVHstorezeroidx ptr idx mem))
|
||||
&& x.Uses == 1
|
||||
&& clobber(x)
|
||||
=> (MOVWstorezeroidx ptr idx mem)
|
||||
(MOVHstorezero [2] {s} (ADDshiftLL [1] ptr0 idx0) x:(MOVHstorezeroidx2 ptr1 idx1 mem))
|
||||
&& x.Uses == 1
|
||||
&& s == nil
|
||||
&& isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1)
|
||||
&& clobber(x)
|
||||
=> (MOVWstorezeroidx ptr1 (SLLconst <idx1.Type> [1] idx1) mem)
|
||||
(MOVWstorezero [i] {s} ptr0 x:(MOVWstorezero [j] {s} ptr1 mem))
|
||||
&& x.Uses == 1
|
||||
&& areAdjacentOffsets(int64(i),int64(j),4)
|
||||
&& isSamePtr(ptr0, ptr1)
|
||||
&& clobber(x)
|
||||
=> (MOVDstorezero [int32(min(int64(i),int64(j)))] {s} ptr0 mem)
|
||||
(MOVWstorezero [4] {s} (ADD ptr0 idx0) x:(MOVWstorezeroidx ptr1 idx1 mem))
|
||||
&& x.Uses == 1
|
||||
&& s == nil
|
||||
&& (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1))
|
||||
&& clobber(x)
|
||||
=> (MOVDstorezeroidx ptr1 idx1 mem)
|
||||
(MOVWstorezeroidx ptr (ADDconst [4] idx) x:(MOVWstorezeroidx ptr idx mem))
|
||||
&& x.Uses == 1
|
||||
&& clobber(x)
|
||||
=> (MOVDstorezeroidx ptr idx mem)
|
||||
(MOVWstorezero [4] {s} (ADDshiftLL [2] ptr0 idx0) x:(MOVWstorezeroidx4 ptr1 idx1 mem))
|
||||
&& x.Uses == 1
|
||||
&& s == nil
|
||||
&& isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1)
|
||||
&& clobber(x)
|
||||
=> (MOVDstorezeroidx ptr1 (SLLconst <idx1.Type> [2] idx1) mem)
|
||||
(MOVDstorezero [i] {s} ptr0 x:(MOVDstorezero [j] {s} ptr1 mem))
|
||||
&& x.Uses == 1
|
||||
&& areAdjacentOffsets(int64(i),int64(j),8)
|
||||
&& isSamePtr(ptr0, ptr1)
|
||||
&& clobber(x)
|
||||
=> (MOVQstorezero [int32(min(int64(i),int64(j)))] {s} ptr0 mem)
|
||||
(MOVDstorezero [8] {s} p0:(ADD ptr0 idx0) x:(MOVDstorezeroidx ptr1 idx1 mem))
|
||||
&& x.Uses == 1
|
||||
&& s == nil
|
||||
&& (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1))
|
||||
&& clobber(x)
|
||||
=> (MOVQstorezero [0] {s} p0 mem)
|
||||
(MOVDstorezero [8] {s} p0:(ADDshiftLL [3] ptr0 idx0) x:(MOVDstorezeroidx8 ptr1 idx1 mem))
|
||||
&& x.Uses == 1
|
||||
&& s == nil
|
||||
&& isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1)
|
||||
&& clobber(x)
|
||||
=> (MOVQstorezero [0] {s} p0 mem)
|
||||
|
||||
// Combine stores into larger (unaligned) stores.
|
||||
(MOVBstore [i] {s} ptr0 (SRLconst [8] w) x:(MOVBstore [i-1] {s} ptr1 w mem))
|
||||
&& x.Uses == 1
|
||||
&& isSamePtr(ptr0, ptr1)
|
||||
&& clobber(x)
|
||||
=> (MOVHstore [i-1] {s} ptr0 w mem)
|
||||
(MOVBstore [1] {s} (ADD ptr0 idx0) (SRLconst [8] w) x:(MOVBstoreidx ptr1 idx1 w mem))
|
||||
&& x.Uses == 1
|
||||
&& s == nil
|
||||
&& (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1))
|
||||
&& clobber(x)
|
||||
=> (MOVHstoreidx ptr1 idx1 w mem)
|
||||
(MOVBstoreidx ptr (ADDconst [1] idx) (SRLconst [8] w) x:(MOVBstoreidx ptr idx w mem))
|
||||
&& x.Uses == 1
|
||||
&& clobber(x)
|
||||
=> (MOVHstoreidx ptr idx w mem)
|
||||
(MOVBstore [i] {s} ptr0 (UBFX [armBFAuxInt(8, 8)] w) x:(MOVBstore [i-1] {s} ptr1 w mem))
|
||||
&& x.Uses == 1
|
||||
&& isSamePtr(ptr0, ptr1)
|
||||
&& clobber(x)
|
||||
=> (MOVHstore [i-1] {s} ptr0 w mem)
|
||||
(MOVBstore [1] {s} (ADD ptr0 idx0) (UBFX [armBFAuxInt(8, 8)] w) x:(MOVBstoreidx ptr1 idx1 w mem))
|
||||
&& x.Uses == 1
|
||||
&& s == nil
|
||||
&& (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1))
|
||||
&& clobber(x)
|
||||
=> (MOVHstoreidx ptr1 idx1 w mem)
|
||||
(MOVBstore [i] {s} ptr0 (UBFX [armBFAuxInt(8, 24)] w) x:(MOVBstore [i-1] {s} ptr1 w mem))
|
||||
&& x.Uses == 1
|
||||
&& isSamePtr(ptr0, ptr1)
|
||||
&& clobber(x)
|
||||
=> (MOVHstore [i-1] {s} ptr0 w mem)
|
||||
(MOVBstore [1] {s} (ADD ptr0 idx0) (UBFX [armBFAuxInt(8, 24)] w) x:(MOVBstoreidx ptr1 idx1 w mem))
|
||||
&& x.Uses == 1
|
||||
&& s == nil
|
||||
&& (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1))
|
||||
&& clobber(x)
|
||||
=> (MOVHstoreidx ptr1 idx1 w mem)
|
||||
(MOVBstore [i] {s} ptr0 (SRLconst [8] (MOVDreg w)) x:(MOVBstore [i-1] {s} ptr1 w mem))
|
||||
&& x.Uses == 1
|
||||
&& isSamePtr(ptr0, ptr1)
|
||||
&& clobber(x)
|
||||
=> (MOVHstore [i-1] {s} ptr0 w mem)
|
||||
(MOVBstore [1] {s} (ADD ptr0 idx0) (SRLconst [8] (MOVDreg w)) x:(MOVBstoreidx ptr1 idx1 w mem))
|
||||
&& x.Uses == 1
|
||||
&& s == nil
|
||||
&& (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1))
|
||||
&& clobber(x)
|
||||
=> (MOVHstoreidx ptr1 idx1 w mem)
|
||||
(MOVBstore [i] {s} ptr0 (SRLconst [j] w) x:(MOVBstore [i-1] {s} ptr1 w0:(SRLconst [j-8] w) mem))
|
||||
&& x.Uses == 1
|
||||
&& isSamePtr(ptr0, ptr1)
|
||||
&& clobber(x)
|
||||
=> (MOVHstore [i-1] {s} ptr0 w0 mem)
|
||||
(MOVBstore [1] {s} (ADD ptr0 idx0) (SRLconst [j] w) x:(MOVBstoreidx ptr1 idx1 w0:(SRLconst [j-8] w) mem))
|
||||
&& x.Uses == 1
|
||||
&& s == nil
|
||||
&& (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1))
|
||||
&& clobber(x)
|
||||
=> (MOVHstoreidx ptr1 idx1 w0 mem)
|
||||
(MOVBstore [i] {s} ptr0 (UBFX [bfc] w) x:(MOVBstore [i-1] {s} ptr1 w0:(UBFX [bfc2] w) mem))
|
||||
&& x.Uses == 1
|
||||
&& isSamePtr(ptr0, ptr1)
|
||||
&& bfc.getARM64BFwidth() == 32 - bfc.getARM64BFlsb()
|
||||
&& bfc2.getARM64BFwidth() == 32 - bfc2.getARM64BFlsb()
|
||||
&& bfc2.getARM64BFlsb() == bfc.getARM64BFlsb() - 8
|
||||
&& clobber(x)
|
||||
=> (MOVHstore [i-1] {s} ptr0 w0 mem)
|
||||
(MOVBstore [1] {s} (ADD ptr0 idx0) (UBFX [bfc] w) x:(MOVBstoreidx ptr1 idx1 w0:(UBFX [bfc2] w) mem))
|
||||
&& x.Uses == 1
|
||||
&& s == nil
|
||||
&& (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1))
|
||||
&& bfc.getARM64BFwidth() == 32 - bfc.getARM64BFlsb()
|
||||
&& bfc2.getARM64BFwidth() == 32 - bfc2.getARM64BFlsb()
|
||||
&& bfc2.getARM64BFlsb() == bfc.getARM64BFlsb() - 8
|
||||
&& clobber(x)
|
||||
=> (MOVHstoreidx ptr1 idx1 w0 mem)
|
||||
(MOVBstore [i] {s} ptr0 (SRLconst [j] (MOVDreg w)) x:(MOVBstore [i-1] {s} ptr1 w0:(SRLconst [j-8] (MOVDreg w)) mem))
|
||||
&& x.Uses == 1
|
||||
&& isSamePtr(ptr0, ptr1)
|
||||
&& clobber(x)
|
||||
=> (MOVHstore [i-1] {s} ptr0 w0 mem)
|
||||
(MOVBstore [1] {s} (ADD ptr0 idx0) (SRLconst [j] (MOVDreg w)) x:(MOVBstoreidx ptr1 idx1 w0:(SRLconst [j-8] (MOVDreg w)) mem))
|
||||
&& x.Uses == 1
|
||||
&& s == nil
|
||||
&& (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1))
|
||||
&& clobber(x)
|
||||
=> (MOVHstoreidx ptr1 idx1 w0 mem)
|
||||
(MOVHstore [i] {s} ptr0 (SRLconst [16] w) x:(MOVHstore [i-2] {s} ptr1 w mem))
|
||||
&& x.Uses == 1
|
||||
&& isSamePtr(ptr0, ptr1)
|
||||
&& clobber(x)
|
||||
=> (MOVWstore [i-2] {s} ptr0 w mem)
|
||||
(MOVHstore [2] {s} (ADD ptr0 idx0) (SRLconst [16] w) x:(MOVHstoreidx ptr1 idx1 w mem))
|
||||
&& x.Uses == 1
|
||||
&& s == nil
|
||||
&& (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1))
|
||||
&& clobber(x)
|
||||
=> (MOVWstoreidx ptr1 idx1 w mem)
|
||||
(MOVHstoreidx ptr (ADDconst [2] idx) (SRLconst [16] w) x:(MOVHstoreidx ptr idx w mem))
|
||||
&& x.Uses == 1
|
||||
&& clobber(x)
|
||||
=> (MOVWstoreidx ptr idx w mem)
|
||||
(MOVHstore [2] {s} (ADDshiftLL [1] ptr0 idx0) (SRLconst [16] w) x:(MOVHstoreidx2 ptr1 idx1 w mem))
|
||||
&& x.Uses == 1
|
||||
&& s == nil
|
||||
&& isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1)
|
||||
&& clobber(x)
|
||||
=> (MOVWstoreidx ptr1 (SLLconst <idx1.Type> [1] idx1) w mem)
|
||||
(MOVHstore [i] {s} ptr0 (UBFX [armBFAuxInt(16, 16)] w) x:(MOVHstore [i-2] {s} ptr1 w mem))
|
||||
&& x.Uses == 1
|
||||
&& isSamePtr(ptr0, ptr1)
|
||||
&& clobber(x)
|
||||
=> (MOVWstore [i-2] {s} ptr0 w mem)
|
||||
(MOVHstore [2] {s} (ADD ptr0 idx0) (UBFX [armBFAuxInt(16, 16)] w) x:(MOVHstoreidx ptr1 idx1 w mem))
|
||||
&& x.Uses == 1
|
||||
&& s == nil
|
||||
&& (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1))
|
||||
&& clobber(x)
|
||||
=> (MOVWstoreidx ptr1 idx1 w mem)
|
||||
(MOVHstore [2] {s} (ADDshiftLL [1] ptr0 idx0) (UBFX [armBFAuxInt(16, 16)] w) x:(MOVHstoreidx2 ptr1 idx1 w mem))
|
||||
&& x.Uses == 1
|
||||
&& s == nil
|
||||
&& isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1)
|
||||
&& clobber(x)
|
||||
=> (MOVWstoreidx ptr1 (SLLconst <idx1.Type> [1] idx1) w mem)
|
||||
(MOVHstore [i] {s} ptr0 (SRLconst [16] (MOVDreg w)) x:(MOVHstore [i-2] {s} ptr1 w mem))
|
||||
&& x.Uses == 1
|
||||
&& isSamePtr(ptr0, ptr1)
|
||||
&& clobber(x)
|
||||
=> (MOVWstore [i-2] {s} ptr0 w mem)
|
||||
(MOVHstore [2] {s} (ADD ptr0 idx0) (SRLconst [16] (MOVDreg w)) x:(MOVHstoreidx ptr1 idx1 w mem))
|
||||
&& x.Uses == 1
|
||||
&& s == nil
|
||||
&& (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1))
|
||||
&& clobber(x)
|
||||
=> (MOVWstoreidx ptr1 idx1 w mem)
|
||||
(MOVHstore [2] {s} (ADDshiftLL [1] ptr0 idx0) (SRLconst [16] (MOVDreg w)) x:(MOVHstoreidx2 ptr1 idx1 w mem))
|
||||
&& x.Uses == 1
|
||||
&& s == nil
|
||||
&& isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1)
|
||||
&& clobber(x)
|
||||
=> (MOVWstoreidx ptr1 (SLLconst <idx1.Type> [1] idx1) w mem)
|
||||
(MOVHstore [i] {s} ptr0 (SRLconst [j] w) x:(MOVHstore [i-2] {s} ptr1 w0:(SRLconst [j-16] w) mem))
|
||||
&& x.Uses == 1
|
||||
&& isSamePtr(ptr0, ptr1)
|
||||
&& clobber(x)
|
||||
=> (MOVWstore [i-2] {s} ptr0 w0 mem)
|
||||
(MOVHstore [2] {s} (ADD ptr0 idx0) (SRLconst [j] w) x:(MOVHstoreidx ptr1 idx1 w0:(SRLconst [j-16] w) mem))
|
||||
&& x.Uses == 1
|
||||
&& s == nil
|
||||
&& (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1))
|
||||
&& clobber(x)
|
||||
=> (MOVWstoreidx ptr1 idx1 w0 mem)
|
||||
(MOVHstore [2] {s} (ADDshiftLL [1] ptr0 idx0) (SRLconst [j] w) x:(MOVHstoreidx2 ptr1 idx1 w0:(SRLconst [j-16] w) mem))
|
||||
&& x.Uses == 1
|
||||
&& s == nil
|
||||
&& isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1)
|
||||
&& clobber(x)
|
||||
=> (MOVWstoreidx ptr1 (SLLconst <idx1.Type> [1] idx1) w0 mem)
|
||||
(MOVWstore [i] {s} ptr0 (SRLconst [32] w) x:(MOVWstore [i-4] {s} ptr1 w mem))
|
||||
&& x.Uses == 1
|
||||
&& isSamePtr(ptr0, ptr1)
|
||||
&& clobber(x)
|
||||
=> (MOVDstore [i-4] {s} ptr0 w mem)
|
||||
(MOVWstore [4] {s} (ADD ptr0 idx0) (SRLconst [32] w) x:(MOVWstoreidx ptr1 idx1 w mem))
|
||||
&& x.Uses == 1
|
||||
&& s == nil
|
||||
&& (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1))
|
||||
&& clobber(x)
|
||||
=> (MOVDstoreidx ptr1 idx1 w mem)
|
||||
(MOVWstoreidx ptr (ADDconst [4] idx) (SRLconst [32] w) x:(MOVWstoreidx ptr idx w mem))
|
||||
&& x.Uses == 1
|
||||
&& clobber(x)
|
||||
=> (MOVDstoreidx ptr idx w mem)
|
||||
(MOVWstore [4] {s} (ADDshiftLL [2] ptr0 idx0) (SRLconst [32] w) x:(MOVWstoreidx4 ptr1 idx1 w mem))
|
||||
&& x.Uses == 1
|
||||
&& s == nil
|
||||
&& isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1)
|
||||
&& clobber(x)
|
||||
=> (MOVDstoreidx ptr1 (SLLconst <idx1.Type> [2] idx1) w mem)
|
||||
(MOVWstore [i] {s} ptr0 (SRLconst [j] w) x:(MOVWstore [i-4] {s} ptr1 w0:(SRLconst [j-32] w) mem))
|
||||
&& x.Uses == 1
|
||||
&& isSamePtr(ptr0, ptr1)
|
||||
&& clobber(x)
|
||||
=> (MOVDstore [i-4] {s} ptr0 w0 mem)
|
||||
(MOVWstore [4] {s} (ADD ptr0 idx0) (SRLconst [j] w) x:(MOVWstoreidx ptr1 idx1 w0:(SRLconst [j-32] w) mem))
|
||||
&& x.Uses == 1
|
||||
&& s == nil
|
||||
&& (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1))
|
||||
&& clobber(x)
|
||||
=> (MOVDstoreidx ptr1 idx1 w0 mem)
|
||||
(MOVWstore [4] {s} (ADDshiftLL [2] ptr0 idx0) (SRLconst [j] w) x:(MOVWstoreidx4 ptr1 idx1 w0:(SRLconst [j-32] w) mem))
|
||||
&& x.Uses == 1
|
||||
&& s == nil
|
||||
&& isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1)
|
||||
&& clobber(x)
|
||||
=> (MOVDstoreidx ptr1 (SLLconst <idx1.Type> [2] idx1) w0 mem)
|
||||
(MOVBstore [i] {s} ptr w
|
||||
x0:(MOVBstore [i-1] {s} ptr (SRLconst [8] w)
|
||||
x1:(MOVBstore [i-2] {s} ptr (SRLconst [16] w)
|
||||
x2:(MOVBstore [i-3] {s} ptr (SRLconst [24] w)
|
||||
x3:(MOVBstore [i-4] {s} ptr (SRLconst [32] w)
|
||||
x4:(MOVBstore [i-5] {s} ptr (SRLconst [40] w)
|
||||
x5:(MOVBstore [i-6] {s} ptr (SRLconst [48] w)
|
||||
x6:(MOVBstore [i-7] {s} ptr (SRLconst [56] w) mem))))))))
|
||||
&& x0.Uses == 1
|
||||
&& x1.Uses == 1
|
||||
&& x2.Uses == 1
|
||||
&& x3.Uses == 1
|
||||
&& x4.Uses == 1
|
||||
&& x5.Uses == 1
|
||||
&& x6.Uses == 1
|
||||
&& clobber(x0, x1, x2, x3, x4, x5, x6)
|
||||
=> (MOVDstore [i-7] {s} ptr (REV <typ.UInt64> w) mem)
|
||||
(MOVBstore [7] {s} p w
|
||||
x0:(MOVBstore [6] {s} p (SRLconst [8] w)
|
||||
x1:(MOVBstore [5] {s} p (SRLconst [16] w)
|
||||
x2:(MOVBstore [4] {s} p (SRLconst [24] w)
|
||||
x3:(MOVBstore [3] {s} p (SRLconst [32] w)
|
||||
x4:(MOVBstore [2] {s} p (SRLconst [40] w)
|
||||
x5:(MOVBstore [1] {s} p1:(ADD ptr1 idx1) (SRLconst [48] w)
|
||||
x6:(MOVBstoreidx ptr0 idx0 (SRLconst [56] w) mem))))))))
|
||||
&& x0.Uses == 1
|
||||
&& x1.Uses == 1
|
||||
&& x2.Uses == 1
|
||||
&& x3.Uses == 1
|
||||
&& x4.Uses == 1
|
||||
&& x5.Uses == 1
|
||||
&& x6.Uses == 1
|
||||
&& s == nil
|
||||
&& (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1))
|
||||
&& isSamePtr(p1, p)
|
||||
&& clobber(x0, x1, x2, x3, x4, x5, x6)
|
||||
=> (MOVDstoreidx ptr0 idx0 (REV <typ.UInt64> w) mem)
|
||||
(MOVBstore [i] {s} ptr w
|
||||
x0:(MOVBstore [i-1] {s} ptr (UBFX [armBFAuxInt(8, 24)] w)
|
||||
x1:(MOVBstore [i-2] {s} ptr (UBFX [armBFAuxInt(16, 16)] w)
|
||||
x2:(MOVBstore [i-3] {s} ptr (UBFX [armBFAuxInt(24, 8)] w) mem))))
|
||||
&& x0.Uses == 1
|
||||
&& x1.Uses == 1
|
||||
&& x2.Uses == 1
|
||||
&& clobber(x0, x1, x2)
|
||||
=> (MOVWstore [i-3] {s} ptr (REVW <typ.UInt32> w) mem)
|
||||
(MOVBstore [3] {s} p w
|
||||
x0:(MOVBstore [2] {s} p (UBFX [armBFAuxInt(8, 24)] w)
|
||||
x1:(MOVBstore [1] {s} p1:(ADD ptr1 idx1) (UBFX [armBFAuxInt(16, 16)] w)
|
||||
x2:(MOVBstoreidx ptr0 idx0 (UBFX [armBFAuxInt(24, 8)] w) mem))))
|
||||
&& x0.Uses == 1
|
||||
&& x1.Uses == 1
|
||||
&& x2.Uses == 1
|
||||
&& s == nil
|
||||
&& (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1))
|
||||
&& isSamePtr(p1, p)
|
||||
&& clobber(x0, x1, x2)
|
||||
=> (MOVWstoreidx ptr0 idx0 (REVW <typ.UInt32> w) mem)
|
||||
(MOVBstoreidx ptr (ADDconst [3] idx) w
|
||||
x0:(MOVBstoreidx ptr (ADDconst [2] idx) (UBFX [armBFAuxInt(8, 24)] w)
|
||||
x1:(MOVBstoreidx ptr (ADDconst [1] idx) (UBFX [armBFAuxInt(16, 16)] w)
|
||||
x2:(MOVBstoreidx ptr idx (UBFX [armBFAuxInt(24, 8)] w) mem))))
|
||||
&& x0.Uses == 1
|
||||
&& x1.Uses == 1
|
||||
&& x2.Uses == 1
|
||||
&& clobber(x0, x1, x2)
|
||||
=> (MOVWstoreidx ptr idx (REVW <typ.UInt32> w) mem)
|
||||
(MOVBstoreidx ptr idx w
|
||||
x0:(MOVBstoreidx ptr (ADDconst [1] idx) (UBFX [armBFAuxInt(8, 24)] w)
|
||||
x1:(MOVBstoreidx ptr (ADDconst [2] idx) (UBFX [armBFAuxInt(16, 16)] w)
|
||||
x2:(MOVBstoreidx ptr (ADDconst [3] idx) (UBFX [armBFAuxInt(24, 8)] w) mem))))
|
||||
&& x0.Uses == 1
|
||||
&& x1.Uses == 1
|
||||
&& x2.Uses == 1
|
||||
&& clobber(x0, x1, x2)
|
||||
=> (MOVWstoreidx ptr idx w mem)
|
||||
(MOVBstore [i] {s} ptr w
|
||||
x0:(MOVBstore [i-1] {s} ptr (SRLconst [8] (MOVDreg w))
|
||||
x1:(MOVBstore [i-2] {s} ptr (SRLconst [16] (MOVDreg w))
|
||||
x2:(MOVBstore [i-3] {s} ptr (SRLconst [24] (MOVDreg w)) mem))))
|
||||
&& x0.Uses == 1
|
||||
&& x1.Uses == 1
|
||||
&& x2.Uses == 1
|
||||
&& clobber(x0, x1, x2)
|
||||
=> (MOVWstore [i-3] {s} ptr (REVW <typ.UInt32> w) mem)
|
||||
(MOVBstore [3] {s} p w
|
||||
x0:(MOVBstore [2] {s} p (SRLconst [8] (MOVDreg w))
|
||||
x1:(MOVBstore [1] {s} p1:(ADD ptr1 idx1) (SRLconst [16] (MOVDreg w))
|
||||
x2:(MOVBstoreidx ptr0 idx0 (SRLconst [24] (MOVDreg w)) mem))))
|
||||
&& x0.Uses == 1
|
||||
&& x1.Uses == 1
|
||||
&& x2.Uses == 1
|
||||
&& s == nil
|
||||
&& (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1))
|
||||
&& isSamePtr(p1, p)
|
||||
&& clobber(x0, x1, x2)
|
||||
=> (MOVWstoreidx ptr0 idx0 (REVW <typ.UInt32> w) mem)
|
||||
(MOVBstore [i] {s} ptr w
|
||||
x0:(MOVBstore [i-1] {s} ptr (SRLconst [8] w)
|
||||
x1:(MOVBstore [i-2] {s} ptr (SRLconst [16] w)
|
||||
x2:(MOVBstore [i-3] {s} ptr (SRLconst [24] w) mem))))
|
||||
&& x0.Uses == 1
|
||||
&& x1.Uses == 1
|
||||
&& x2.Uses == 1
|
||||
&& clobber(x0, x1, x2)
|
||||
=> (MOVWstore [i-3] {s} ptr (REVW <typ.UInt32> w) mem)
|
||||
(MOVBstore [3] {s} p w
|
||||
x0:(MOVBstore [2] {s} p (SRLconst [8] w)
|
||||
x1:(MOVBstore [1] {s} p1:(ADD ptr1 idx1) (SRLconst [16] w)
|
||||
x2:(MOVBstoreidx ptr0 idx0 (SRLconst [24] w) mem))))
|
||||
&& x0.Uses == 1
|
||||
&& x1.Uses == 1
|
||||
&& x2.Uses == 1
|
||||
&& s == nil
|
||||
&& (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1))
|
||||
&& isSamePtr(p1, p)
|
||||
&& clobber(x0, x1, x2)
|
||||
=> (MOVWstoreidx ptr0 idx0 (REVW <typ.UInt32> w) mem)
|
||||
(MOVBstore [i] {s} ptr w x:(MOVBstore [i-1] {s} ptr (SRLconst [8] w) mem))
|
||||
&& x.Uses == 1
|
||||
&& clobber(x)
|
||||
=> (MOVHstore [i-1] {s} ptr (REV16W <typ.UInt16> w) mem)
|
||||
(MOVBstore [1] {s} (ADD ptr1 idx1) w x:(MOVBstoreidx ptr0 idx0 (SRLconst [8] w) mem))
|
||||
&& x.Uses == 1
|
||||
&& s == nil
|
||||
&& (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1))
|
||||
&& clobber(x)
|
||||
=> (MOVHstoreidx ptr0 idx0 (REV16W <typ.UInt16> w) mem)
|
||||
(MOVBstore [i] {s} ptr w x:(MOVBstore [i-1] {s} ptr (UBFX [armBFAuxInt(8, 8)] w) mem))
|
||||
&& x.Uses == 1
|
||||
&& clobber(x)
|
||||
=> (MOVHstore [i-1] {s} ptr (REV16W <typ.UInt16> w) mem)
|
||||
(MOVBstore [1] {s} (ADD ptr1 idx1) w x:(MOVBstoreidx ptr0 idx0 (UBFX [armBFAuxInt(8, 8)] w) mem))
|
||||
&& x.Uses == 1
|
||||
&& s == nil
|
||||
&& (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1))
|
||||
&& clobber(x)
|
||||
=> (MOVHstoreidx ptr0 idx0 (REV16W <typ.UInt16> w) mem)
|
||||
(MOVBstoreidx ptr (ADDconst [1] idx) w x:(MOVBstoreidx ptr idx (UBFX [armBFAuxInt(8, 8)] w) mem))
|
||||
&& x.Uses == 1
|
||||
&& clobber(x)
|
||||
=> (MOVHstoreidx ptr idx (REV16W <typ.UInt16> w) mem)
|
||||
(MOVBstoreidx ptr idx w x:(MOVBstoreidx ptr (ADDconst [1] idx) (UBFX [armBFAuxInt(8, 8)] w) mem))
|
||||
&& x.Uses == 1
|
||||
&& clobber(x)
|
||||
=> (MOVHstoreidx ptr idx w mem)
|
||||
(MOVBstore [i] {s} ptr w x:(MOVBstore [i-1] {s} ptr (SRLconst [8] (MOVDreg w)) mem))
|
||||
&& x.Uses == 1
|
||||
&& clobber(x)
|
||||
=> (MOVHstore [i-1] {s} ptr (REV16W <typ.UInt16> w) mem)
|
||||
(MOVBstore [1] {s} (ADD ptr1 idx1) w x:(MOVBstoreidx ptr0 idx0 (SRLconst [8] (MOVDreg w)) mem))
|
||||
&& x.Uses == 1
|
||||
&& s == nil
|
||||
&& (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1))
|
||||
&& clobber(x)
|
||||
=> (MOVHstoreidx ptr0 idx0 (REV16W <typ.UInt16> w) mem)
|
||||
(MOVBstore [i] {s} ptr w x:(MOVBstore [i-1] {s} ptr (UBFX [armBFAuxInt(8, 24)] w) mem))
|
||||
&& x.Uses == 1
|
||||
&& clobber(x)
|
||||
=> (MOVHstore [i-1] {s} ptr (REV16W <typ.UInt16> w) mem)
|
||||
(MOVBstore [1] {s} (ADD ptr1 idx1) w x:(MOVBstoreidx ptr0 idx0 (UBFX [armBFAuxInt(8, 24)] w) mem))
|
||||
&& x.Uses == 1
|
||||
&& s == nil
|
||||
&& (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1))
|
||||
&& clobber(x)
|
||||
=> (MOVHstoreidx ptr0 idx0 (REV16W <typ.UInt16> w) mem)
|
||||
|
||||
// FP simplification
|
||||
(FNEGS (FMULS x y)) => (FNMULS x y)
|
||||
(FNEGD (FMULD x y)) => (FNMULD x y)
|
||||
|
|
|
|||
|
|
@ -845,8 +845,8 @@
|
|||
(MOVWstoreidx ptr idx (MOV(W|WZ)reg x) mem) => (MOVWstoreidx ptr idx x mem)
|
||||
(MOVBstoreidx ptr idx (SRWconst (MOV(H|HZ)reg x) [c]) mem) && c <= 8 => (MOVBstoreidx ptr idx (SRWconst <typ.UInt32> x [c]) mem)
|
||||
(MOVBstoreidx ptr idx (SRWconst (MOV(W|WZ)reg x) [c]) mem) && c <= 24 => (MOVBstoreidx ptr idx (SRWconst <typ.UInt32> x [c]) mem)
|
||||
(MOVHBRstore {sym} ptr (MOV(H|HZ|W|WZ)reg x) mem) => (MOVHBRstore {sym} ptr x mem)
|
||||
(MOVWBRstore {sym} ptr (MOV(W|WZ)reg x) mem) => (MOVWBRstore {sym} ptr x mem)
|
||||
(MOVHBRstore ptr (MOV(H|HZ|W|WZ)reg x) mem) => (MOVHBRstore ptr x mem)
|
||||
(MOVWBRstore ptr (MOV(W|WZ)reg x) mem) => (MOVWBRstore ptr x mem)
|
||||
|
||||
// Lose W-widening ops fed to compare-W
|
||||
(CMP(W|WU) x (MOV(W|WZ)reg y)) => (CMP(W|WU) x y)
|
||||
|
|
@ -958,307 +958,6 @@
|
|||
(F(ADD|SUB) (FMUL x y) z) && x.Block.Func.useFMA(v) => (FM(ADD|SUB) x y z)
|
||||
(F(ADDS|SUBS) (FMULS x y) z) && x.Block.Func.useFMA(v) => (FM(ADDS|SUBS) x y z)
|
||||
|
||||
// The following statements are found in encoding/binary functions UintXX (load) and PutUintXX (store)
|
||||
// and convert the statements in these functions from multiple single byte loads or stores to
|
||||
// the single largest possible load or store.
|
||||
// Some are marked big or little endian based on the order in which the bytes are loaded or stored,
|
||||
// not on the ordering of the machine. These are intended for little endian machines.
|
||||
// To implement for big endian machines, most rules would have to be duplicated but the
|
||||
// resulting rule would be reversed, i. e., MOVHZload on little endian would be MOVHBRload on big endian
|
||||
// and vice versa.
|
||||
// b[0] | b[1]<<8 => load 16-bit Little endian
|
||||
(OR <t> x0:(MOVBZload [i0] {s} p mem)
|
||||
o1:(SL(W|D)const x1:(MOVBZload [i1] {s} p mem) [8]))
|
||||
&& !config.BigEndian
|
||||
&& i1 == i0+1
|
||||
&& x0.Uses ==1 && x1.Uses == 1
|
||||
&& o1.Uses == 1
|
||||
&& mergePoint(b, x0, x1) != nil
|
||||
&& clobber(x0, x1, o1)
|
||||
=> @mergePoint(b,x0,x1) (MOVHZload <t> {s} [i0] p mem)
|
||||
|
||||
// b[0]<<8 | b[1] => load 16-bit Big endian on Little endian arch.
|
||||
// Use byte-reverse indexed load for 2 bytes.
|
||||
(OR <t> x0:(MOVBZload [i1] {s} p mem)
|
||||
o1:(SL(W|D)const x1:(MOVBZload [i0] {s} p mem) [8]))
|
||||
&& !config.BigEndian
|
||||
&& i1 == i0+1
|
||||
&& x0.Uses ==1 && x1.Uses == 1
|
||||
&& o1.Uses == 1
|
||||
&& mergePoint(b, x0, x1) != nil
|
||||
&& clobber(x0, x1, o1)
|
||||
=> @mergePoint(b,x0,x1) (MOVHBRload <t> (MOVDaddr <typ.Uintptr> [i0] {s} p) mem)
|
||||
|
||||
// b[0]<<n+8 | b[1]<<n => load 16-bit Big endian (where n%8== 0)
|
||||
// Use byte-reverse indexed load for 2 bytes,
|
||||
// then shift left to the correct position. Used to match subrules
|
||||
// from longer rules.
|
||||
(OR <t> s0:(SL(W|D)const x0:(MOVBZload [i1] {s} p mem) [n1])
|
||||
s1:(SL(W|D)const x1:(MOVBZload [i0] {s} p mem) [n2]))
|
||||
&& !config.BigEndian
|
||||
&& i1 == i0+1
|
||||
&& n1%8 == 0
|
||||
&& n2 == n1+8
|
||||
&& x0.Uses == 1 && x1.Uses == 1
|
||||
&& s0.Uses == 1 && s1.Uses == 1
|
||||
&& mergePoint(b, x0, x1) != nil
|
||||
&& clobber(x0, x1, s0, s1)
|
||||
=> @mergePoint(b,x0,x1) (SLDconst <t> (MOVHBRload <t> (MOVDaddr <typ.Uintptr> [i0] {s} p) mem) [n1])
|
||||
|
||||
// b[0] | b[1]<<8 | b[2]<<16 | b[3]<<24 => load 32-bit Little endian
|
||||
// Use byte-reverse indexed load for 4 bytes.
|
||||
(OR <t> s1:(SL(W|D)const x2:(MOVBZload [i3] {s} p mem) [24])
|
||||
o0:(OR <t> s0:(SL(W|D)const x1:(MOVBZload [i2] {s} p mem) [16])
|
||||
x0:(MOVHZload [i0] {s} p mem)))
|
||||
&& !config.BigEndian
|
||||
&& i2 == i0+2
|
||||
&& i3 == i0+3
|
||||
&& x0.Uses ==1 && x1.Uses == 1 && x2.Uses == 1
|
||||
&& o0.Uses == 1
|
||||
&& s0.Uses == 1 && s1.Uses == 1
|
||||
&& mergePoint(b, x0, x1, x2) != nil
|
||||
&& clobber(x0, x1, x2, s0, s1, o0)
|
||||
=> @mergePoint(b,x0,x1,x2) (MOVWZload <t> {s} [i0] p mem)
|
||||
|
||||
// b[0]<<24 | b[1]<<16 | b[2]<<8 | b[3] => load 32-bit Big endian order on Little endian arch
|
||||
// Use byte-reverse indexed load for 4 bytes with computed address.
|
||||
// Could be used to match subrules of a longer rule.
|
||||
(OR <t> s1:(SL(W|D)const x2:(MOVBZload [i0] {s} p mem) [24])
|
||||
o0:(OR <t> s0:(SL(W|D)const x1:(MOVBZload [i1] {s} p mem) [16])
|
||||
x0:(MOVHBRload <t> (MOVDaddr <typ.Uintptr> [i2] {s} p) mem)))
|
||||
&& !config.BigEndian
|
||||
&& i1 == i0+1
|
||||
&& i2 == i0+2
|
||||
&& x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1
|
||||
&& o0.Uses == 1
|
||||
&& s0.Uses == 1 && s1.Uses == 1
|
||||
&& mergePoint(b, x0, x1, x2) != nil
|
||||
&& clobber(x0, x1, x2, s0, s1, o0)
|
||||
=> @mergePoint(b,x0,x1,x2) (MOVWBRload <t> (MOVDaddr <typ.Uintptr> [i0] {s} p) mem)
|
||||
|
||||
// b[3] | b[2]<<8 | b[1]<<16 | b[0]<<24 => load 32-bit Big endian order on Little endian arch
|
||||
// Use byte-reverse indexed load for 4 bytes with computed address.
|
||||
// Could be used to match subrules of a longer rule.
|
||||
(OR <t> x0:(MOVBZload [i3] {s} p mem)
|
||||
o0:(OR <t> s0:(SL(W|D)const x1:(MOVBZload [i2] {s} p mem) [8])
|
||||
s1:(SL(W|D)const x2:(MOVHBRload <t> (MOVDaddr <typ.Uintptr> [i0] {s} p) mem) [16])))
|
||||
&& !config.BigEndian
|
||||
&& i2 == i0+2
|
||||
&& i3 == i0+3
|
||||
&& x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1
|
||||
&& o0.Uses == 1
|
||||
&& s0.Uses == 1 && s1.Uses == 1
|
||||
&& mergePoint(b, x0, x1, x2) != nil
|
||||
&& clobber(x0, x1, x2, s0, s1, o0)
|
||||
=> @mergePoint(b,x0,x1,x2) (MOVWBRload <t> (MOVDaddr <typ.Uintptr> [i0] {s} p) mem)
|
||||
|
||||
// b[0]<<56 | b[1]<<48 | b[2]<<40 | b[3]<<32 => load 32-bit Big endian order on Little endian arch
|
||||
// Use byte-reverse indexed load to for 4 bytes with computed address.
|
||||
// Used to match longer rules.
|
||||
(OR <t> s2:(SLDconst x2:(MOVBZload [i3] {s} p mem) [32])
|
||||
o0:(OR <t> s1:(SLDconst x1:(MOVBZload [i2] {s} p mem) [40])
|
||||
s0:(SLDconst x0:(MOVHBRload <t> (MOVDaddr <typ.Uintptr> [i0] {s} p) mem) [48])))
|
||||
&& !config.BigEndian
|
||||
&& i2 == i0+2
|
||||
&& i3 == i0+3
|
||||
&& x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1
|
||||
&& o0.Uses == 1
|
||||
&& s0.Uses == 1 && s1.Uses == 1 && s2.Uses == 1
|
||||
&& mergePoint(b, x0, x1, x2) != nil
|
||||
&& clobber(x0, x1, x2, s0, s1, s2, o0)
|
||||
=> @mergePoint(b,x0,x1,x2) (SLDconst <t> (MOVWBRload <t> (MOVDaddr <typ.Uintptr> [i0] {s} p) mem) [32])
|
||||
|
||||
// b[3]<<32 | b[2]<<40 | b[1]<<48 | b[0]<<56 => load 32-bit Big endian order on Little endian arch
|
||||
// Use byte-reverse indexed load for 4 bytes with constant address.
|
||||
// Used to match longer rules.
|
||||
(OR <t> s2:(SLDconst x2:(MOVBZload [i0] {s} p mem) [56])
|
||||
o0:(OR <t> s1:(SLDconst x1:(MOVBZload [i1] {s} p mem) [48])
|
||||
s0:(SLDconst x0:(MOVHBRload <t> (MOVDaddr <typ.Uintptr> [i2] {s} p) mem) [32])))
|
||||
&& !config.BigEndian
|
||||
&& i1 == i0+1
|
||||
&& i2 == i0+2
|
||||
&& x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1
|
||||
&& o0.Uses == 1
|
||||
&& s0.Uses == 1 && s1.Uses == 1 && s2.Uses == 1
|
||||
&& mergePoint(b, x0, x1, x2) != nil
|
||||
&& clobber(x0, x1, x2, s0, s1, s2, o0)
|
||||
=> @mergePoint(b,x0,x1,x2) (SLDconst <t> (MOVWBRload <t> (MOVDaddr <typ.Uintptr> [i0] {s} p) mem) [32])
|
||||
|
||||
// b[0] | b[1]<<8 | b[2]<<16 | b[3]<<24 | b[4] <<32 | b[5]<<40 | b[6]<<48 | b[7]<<56 => load 64-bit Little endian
|
||||
// Rules with commutative ops and many operands will result in extremely large functions in rewritePPC64,
|
||||
// so matching shorter previously defined subrules is important.
|
||||
// Offset must be multiple of 4 for MOVD
|
||||
(OR <t> s6:(SLDconst x7:(MOVBZload [i7] {s} p mem) [56])
|
||||
o5:(OR <t> s5:(SLDconst x6:(MOVBZload [i6] {s} p mem) [48])
|
||||
o4:(OR <t> s4:(SLDconst x5:(MOVBZload [i5] {s} p mem) [40])
|
||||
o3:(OR <t> s3:(SLDconst x4:(MOVBZload [i4] {s} p mem) [32])
|
||||
x0:(MOVWZload {s} [i0] p mem)))))
|
||||
&& !config.BigEndian
|
||||
&& i4 == i0+4
|
||||
&& i5 == i0+5
|
||||
&& i6 == i0+6
|
||||
&& i7 == i0+7
|
||||
&& x0.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses ==1 && x7.Uses == 1
|
||||
&& o3.Uses == 1 && o4.Uses == 1 && o5.Uses == 1
|
||||
&& s3.Uses == 1 && s4.Uses == 1 && s5.Uses == 1 && s6.Uses == 1
|
||||
&& mergePoint(b, x0, x4, x5, x6, x7) != nil
|
||||
&& clobber(x0, x4, x5, x6, x7, s3, s4, s5, s6, o3, o4, o5)
|
||||
=> @mergePoint(b,x0,x4,x5,x6,x7) (MOVDload <t> {s} [i0] p mem)
|
||||
|
||||
// b[7] | b[6]<<8 | b[5]<<16 | b[4]<<24 | b[3]<<32 | b[2]<<40 | b[1]<<48 | b[0]<<56 load 64-bit Big endian ordered bytes on Little endian arch
|
||||
// Use byte-reverse indexed load of 8 bytes.
|
||||
// Rules with commutative ops and many operands can result in extremely large functions in rewritePPC64,
|
||||
// so matching shorter previously defined subrules is important.
|
||||
(OR <t> s0:(SLDconst x0:(MOVBZload [i0] {s} p mem) [56])
|
||||
o0:(OR <t> s1:(SLDconst x1:(MOVBZload [i1] {s} p mem) [48])
|
||||
o1:(OR <t> s2:(SLDconst x2:(MOVBZload [i2] {s} p mem) [40])
|
||||
o2:(OR <t> s3:(SLDconst x3:(MOVBZload [i3] {s} p mem) [32])
|
||||
x4:(MOVWBRload <t> (MOVDaddr <typ.Uintptr> [i4] p) mem)))))
|
||||
&& !config.BigEndian
|
||||
&& i1 == i0+1
|
||||
&& i2 == i0+2
|
||||
&& i3 == i0+3
|
||||
&& i4 == i0+4
|
||||
&& x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1
|
||||
&& o0.Uses == 1 && o1.Uses == 1 && o2.Uses == 1
|
||||
&& s0.Uses == 1 && s1.Uses == 1 && s2.Uses == 1 && s3.Uses == 1
|
||||
&& mergePoint(b, x0, x1, x2, x3, x4) != nil
|
||||
&& clobber(x0, x1, x2, x3, x4, o0, o1, o2, s0, s1, s2, s3)
|
||||
=> @mergePoint(b,x0,x1,x2,x3,x4) (MOVDBRload <t> (MOVDaddr <typ.Uintptr> [i0] {s} p) mem)
|
||||
|
||||
// b[0]<<56 | b[1]<<48 | b[2]<<40 | b[3]<<32 | b[4]<<24 | b[5]<<16 | b[6]<<8 | b[7] => load 64-bit Big endian ordered bytes on Little endian arch
|
||||
// Use byte-reverse indexed load of 8 bytes.
|
||||
// Rules with commutative ops and many operands can result in extremely large functions in rewritePPC64,
|
||||
// so matching shorter previously defined subrules is important.
|
||||
(OR <t> x7:(MOVBZload [i7] {s} p mem)
|
||||
o5:(OR <t> s6:(SLDconst x6:(MOVBZload [i6] {s} p mem) [8])
|
||||
o4:(OR <t> s5:(SLDconst x5:(MOVBZload [i5] {s} p mem) [16])
|
||||
o3:(OR <t> s4:(SLDconst x4:(MOVBZload [i4] {s} p mem) [24])
|
||||
s0:(SL(W|D)const x3:(MOVWBRload <t> (MOVDaddr <typ.Uintptr> [i0] {s} p) mem) [32])))))
|
||||
&& !config.BigEndian
|
||||
&& i4 == i0+4
|
||||
&& i5 == i0+5
|
||||
&& i6 == i0+6
|
||||
&& i7 == i0+7
|
||||
&& x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && x7.Uses == 1
|
||||
&& o3.Uses == 1 && o4.Uses == 1 && o5.Uses == 1
|
||||
&& s0.Uses == 1 && s4.Uses == 1 && s5.Uses == 1 && s6.Uses == 1
|
||||
&& mergePoint(b, x3, x4, x5, x6, x7) != nil
|
||||
&& clobber(x3, x4, x5, x6, x7, o3, o4, o5, s0, s4, s5, s6)
|
||||
=> @mergePoint(b,x3,x4,x5,x6,x7) (MOVDBRload <t> (MOVDaddr <typ.Uintptr> [i0] {s} p) mem)
|
||||
|
||||
// 2 byte store Little endian as in:
|
||||
// b[0] = byte(v >> 16)
|
||||
// b[1] = byte(v >> 24)
|
||||
// Added for use in matching longer rules.
|
||||
(MOVBstore [i1] {s} p (SR(W|D)const w [24])
|
||||
x0:(MOVBstore [i0] {s} p (SR(W|D)const w [16]) mem))
|
||||
&& !config.BigEndian
|
||||
&& x0.Uses == 1
|
||||
&& i1 == i0+1
|
||||
&& clobber(x0)
|
||||
=> (MOVHstore [i0] {s} p (SRWconst <typ.UInt16> w [16]) mem)
|
||||
|
||||
// 2 byte store Little endian as in:
|
||||
// b[0] = byte(v)
|
||||
// b[1] = byte(v >> 8)
|
||||
(MOVBstore [i1] {s} p (SR(W|D)const w [8])
|
||||
x0:(MOVBstore [i0] {s} p w mem))
|
||||
&& !config.BigEndian
|
||||
&& x0.Uses == 1
|
||||
&& i1 == i0+1
|
||||
&& clobber(x0)
|
||||
=> (MOVHstore [i0] {s} p w mem)
|
||||
|
||||
// 4 byte store Little endian as in:
|
||||
// b[0:1] = uint16(v)
|
||||
// b[2:3] = uint16(v >> 16)
|
||||
(MOVHstore [i1] {s} p (SR(W|D)const w [16])
|
||||
x0:(MOVHstore [i0] {s} p w mem))
|
||||
&& !config.BigEndian
|
||||
&& x0.Uses == 1
|
||||
&& i1 == i0+2
|
||||
&& clobber(x0)
|
||||
=> (MOVWstore [i0] {s} p w mem)
|
||||
|
||||
// 4 byte store Big endian as in:
|
||||
// b[0] = byte(v >> 24)
|
||||
// b[1] = byte(v >> 16)
|
||||
// b[2] = byte(v >> 8)
|
||||
// b[3] = byte(v)
|
||||
// Use byte-reverse indexed 4 byte store.
|
||||
(MOVBstore [i3] {s} p w
|
||||
x0:(MOVBstore [i2] {s} p (SRWconst w [8])
|
||||
x1:(MOVBstore [i1] {s} p (SRWconst w [16])
|
||||
x2:(MOVBstore [i0] {s} p (SRWconst w [24]) mem))))
|
||||
&& !config.BigEndian
|
||||
&& x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1
|
||||
&& i1 == i0+1 && i2 == i0+2 && i3 == i0+3
|
||||
&& clobber(x0, x1, x2)
|
||||
=> (MOVWBRstore (MOVDaddr <typ.Uintptr> [i0] {s} p) w mem)
|
||||
|
||||
// The 2 byte store appears after the 4 byte store so that the
|
||||
// match for the 2 byte store is not done first.
|
||||
// If the 4 byte store is based on the 2 byte store then there are
|
||||
// variations on the MOVDaddr subrule that would require additional
|
||||
// rules to be written.
|
||||
|
||||
// 2 byte store Big endian as in:
|
||||
// b[0] = byte(v >> 8)
|
||||
// b[1] = byte(v)
|
||||
(MOVBstore [i1] {s} p w x0:(MOVBstore [i0] {s} p (SRWconst w [8]) mem))
|
||||
&& !config.BigEndian
|
||||
&& x0.Uses == 1
|
||||
&& i1 == i0+1
|
||||
&& clobber(x0)
|
||||
=> (MOVHBRstore (MOVDaddr <typ.Uintptr> [i0] {s} p) w mem)
|
||||
|
||||
// 8 byte store Little endian as in:
|
||||
// b[0] = byte(v)
|
||||
// b[1] = byte(v >> 8)
|
||||
// b[2] = byte(v >> 16)
|
||||
// b[3] = byte(v >> 24)
|
||||
// b[4] = byte(v >> 32)
|
||||
// b[5] = byte(v >> 40)
|
||||
// b[6] = byte(v >> 48)
|
||||
// b[7] = byte(v >> 56)
|
||||
// Built on previously defined rules
|
||||
// Offset must be multiple of 4 for MOVDstore
|
||||
(MOVBstore [i7] {s} p (SRDconst w [56])
|
||||
x0:(MOVBstore [i6] {s} p (SRDconst w [48])
|
||||
x1:(MOVBstore [i5] {s} p (SRDconst w [40])
|
||||
x2:(MOVBstore [i4] {s} p (SRDconst w [32])
|
||||
x3:(MOVWstore [i0] {s} p w mem)))))
|
||||
&& !config.BigEndian
|
||||
&& x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1
|
||||
&& i4 == i0+4 && i5 == i0+5 && i6 == i0+6 && i7 == i0+7
|
||||
&& clobber(x0, x1, x2, x3)
|
||||
=> (MOVDstore [i0] {s} p w mem)
|
||||
|
||||
// 8 byte store Big endian as in:
|
||||
// b[0] = byte(v >> 56)
|
||||
// b[1] = byte(v >> 48)
|
||||
// b[2] = byte(v >> 40)
|
||||
// b[3] = byte(v >> 32)
|
||||
// b[4] = byte(v >> 24)
|
||||
// b[5] = byte(v >> 16)
|
||||
// b[6] = byte(v >> 8)
|
||||
// b[7] = byte(v)
|
||||
// Use byte-reverse indexed 8 byte store.
|
||||
(MOVBstore [i7] {s} p w
|
||||
x0:(MOVBstore [i6] {s} p (SRDconst w [8])
|
||||
x1:(MOVBstore [i5] {s} p (SRDconst w [16])
|
||||
x2:(MOVBstore [i4] {s} p (SRDconst w [24])
|
||||
x3:(MOVBstore [i3] {s} p (SRDconst w [32])
|
||||
x4:(MOVBstore [i2] {s} p (SRDconst w [40])
|
||||
x5:(MOVBstore [i1] {s} p (SRDconst w [48])
|
||||
x6:(MOVBstore [i0] {s} p (SRDconst w [56]) mem))))))))
|
||||
&& !config.BigEndian
|
||||
&& x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1
|
||||
&& i1 == i0+1 && i2 == i0+2 && i3 == i0+3 && i4 == i0+4 && i5 == i0+5 && i6 == i0+6 && i7 == i0+7
|
||||
&& clobber(x0, x1, x2, x3, x4, x5, x6)
|
||||
=> (MOVDBRstore (MOVDaddr <typ.Uintptr> [i0] {s} p) w mem)
|
||||
|
||||
// Arch-specific inlining for small or disjoint runtime.memmove
|
||||
(SelectN [0] call:(CALLstatic {sym} s1:(MOVDstore _ (MOVDconst [sz]) s2:(MOVDstore _ src s3:(MOVDstore {t} _ dst mem)))))
|
||||
&& sz >= 0
|
||||
|
|
@ -1287,3 +986,22 @@
|
|||
|
||||
// Use byte reverse instructions on Power10
|
||||
(Bswap(16|32|64) x) && buildcfg.GOPPC64>=10 => (BR(H|W|D) x)
|
||||
|
||||
// Fold bit reversal into loads.
|
||||
(BR(W|H) x:(MOV(W|H)Zload [off] {sym} ptr mem)) && x.Uses == 1 => @x.Block (MOV(W|H)BRload (MOVDaddr <ptr.Type> [off] {sym} ptr) mem)
|
||||
(BR(W|H) x:(MOV(W|H)Zloadidx ptr idx mem)) && x.Uses == 1 => @x.Block (MOV(W|H)Zreg (MOV(W|H)BRloadidx ptr idx mem))
|
||||
(BRD x:(MOVDload [off] {sym} ptr mem)) && x.Uses == 1 => @x.Block (MOVDBRload (MOVDaddr <ptr.Type> [off] {sym} ptr) mem)
|
||||
(BRD x:(MOVDloadidx ptr idx mem)) && x.Uses == 1 => @x.Block (MOVDBRloadidx ptr idx mem)
|
||||
|
||||
// Fold bit reversal into stores.
|
||||
(MOV(D|W|H)store [off] {sym} ptr r:(BR(D|W|H) val) mem) && r.Uses == 1 => (MOV(D|W|H)BRstore (MOVDaddr <ptr.Type> [off] {sym} ptr) val mem)
|
||||
(MOV(D|W|H)storeidx ptr idx r:(BR(D|W|H) val) mem) && r.Uses == 1 => (MOV(D|W|H)BRstoreidx ptr idx val mem)
|
||||
|
||||
// GOPPC64<10 rules.
|
||||
// These Bswap operations should only be introduced by the memcombine pass in places where they can be folded into loads or stores.
|
||||
(Bswap(32|16) x:(MOV(W|H)Zload [off] {sym} ptr mem)) => @x.Block (MOV(W|H)BRload (MOVDaddr <ptr.Type> [off] {sym} ptr) mem)
|
||||
(Bswap(32|16) x:(MOV(W|H)Zloadidx ptr idx mem)) => @x.Block (MOV(W|H)Zreg (MOV(W|H)BRloadidx ptr idx mem))
|
||||
(Bswap64 x:(MOVDload [off] {sym} ptr mem)) => @x.Block (MOVDBRload (MOVDaddr <ptr.Type> [off] {sym} ptr) mem)
|
||||
(Bswap64 x:(MOVDloadidx ptr idx mem)) => @x.Block (MOVDBRloadidx ptr idx mem)
|
||||
(MOV(D|W|H)store [off] {sym} ptr (Bswap(64|32|16) val) mem) => (MOV(D|W|H)BRstore (MOVDaddr <ptr.Type> [off] {sym} ptr) val mem)
|
||||
(MOV(D|W|H)storeidx ptr idx (Bswap(64|32|16) val) mem) => (MOV(D|W|H)BRstoreidx ptr idx val mem)
|
||||
|
|
|
|||
|
|
@ -331,9 +331,9 @@ func init() {
|
|||
// Load bytes in reverse endian order of the arch from arg0 into a 64 bit register, all zero extend.
|
||||
// The generated instructions are indexed loads with no offset field in the instruction so the aux fields are not used.
|
||||
// In these cases the index register field is set to 0 and the full address is in the base register.
|
||||
{name: "MOVDBRload", argLength: 2, reg: gpload, asm: "MOVDBR", aux: "SymOff", typ: "Int64", faultOnNilArg0: true, symEffect: "Read"}, // load 8 bytes reverse order
|
||||
{name: "MOVWBRload", argLength: 2, reg: gpload, asm: "MOVWBR", aux: "SymOff", typ: "Int32", faultOnNilArg0: true, symEffect: "Read"}, // load 4 bytes zero extend reverse order
|
||||
{name: "MOVHBRload", argLength: 2, reg: gpload, asm: "MOVHBR", aux: "SymOff", typ: "Int16", faultOnNilArg0: true, symEffect: "Read"}, // load 2 bytes zero extend reverse order
|
||||
{name: "MOVDBRload", argLength: 2, reg: gpload, asm: "MOVDBR", typ: "UInt64", faultOnNilArg0: true}, // load 8 bytes reverse order
|
||||
{name: "MOVWBRload", argLength: 2, reg: gpload, asm: "MOVWBR", typ: "UInt32", faultOnNilArg0: true}, // load 4 bytes zero extend reverse order
|
||||
{name: "MOVHBRload", argLength: 2, reg: gpload, asm: "MOVHBR", typ: "UInt16", faultOnNilArg0: true}, // load 2 bytes zero extend reverse order
|
||||
|
||||
// In these cases an index register is used in addition to a base register
|
||||
// Loads from memory location arg[0] + arg[1].
|
||||
|
|
@ -355,9 +355,9 @@ func init() {
|
|||
|
||||
// Store bytes in the reverse endian order of the arch into arg0.
|
||||
// These are indexed stores with no offset field in the instruction so the auxint fields are not used.
|
||||
{name: "MOVDBRstore", argLength: 3, reg: gpstore, asm: "MOVDBR", aux: "Sym", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // store 8 bytes reverse order
|
||||
{name: "MOVWBRstore", argLength: 3, reg: gpstore, asm: "MOVWBR", aux: "Sym", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // store 4 bytes reverse order
|
||||
{name: "MOVHBRstore", argLength: 3, reg: gpstore, asm: "MOVHBR", aux: "Sym", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // store 2 bytes reverse order
|
||||
{name: "MOVDBRstore", argLength: 3, reg: gpstore, asm: "MOVDBR", typ: "Mem", faultOnNilArg0: true}, // store 8 bytes reverse order
|
||||
{name: "MOVWBRstore", argLength: 3, reg: gpstore, asm: "MOVWBR", typ: "Mem", faultOnNilArg0: true}, // store 4 bytes reverse order
|
||||
{name: "MOVHBRstore", argLength: 3, reg: gpstore, asm: "MOVHBR", typ: "Mem", faultOnNilArg0: true}, // store 2 bytes reverse order
|
||||
|
||||
// Floating point loads from arg0+aux+auxint
|
||||
{name: "FMOVDload", argLength: 2, reg: fpload, asm: "FMOVD", aux: "SymOff", typ: "Float64", faultOnNilArg0: true, symEffect: "Read"}, // load double float
|
||||
|
|
|
|||
|
|
@ -1294,365 +1294,6 @@
|
|||
&& clobber(g)
|
||||
=> ((ADD|SUB|MULL|AND|OR|XOR)Wload <t> [off] {sym} x ptr mem)
|
||||
|
||||
// Combine constant stores into larger (unaligned) stores.
|
||||
// Avoid SB because constant stores to relative offsets are
|
||||
// emulated by the assembler and also can't handle unaligned offsets.
|
||||
(MOVBstoreconst [c] {s} p x:(MOVBstoreconst [a] {s} p mem))
|
||||
&& p.Op != OpSB
|
||||
&& x.Uses == 1
|
||||
&& a.Off() + 1 == c.Off()
|
||||
&& clobber(x)
|
||||
=> (MOVHstoreconst [makeValAndOff(c.Val()&0xff | a.Val()<<8, a.Off())] {s} p mem)
|
||||
(MOVHstoreconst [c] {s} p x:(MOVHstoreconst [a] {s} p mem))
|
||||
&& p.Op != OpSB
|
||||
&& x.Uses == 1
|
||||
&& a.Off() + 2 == c.Off()
|
||||
&& clobber(x)
|
||||
=> (MOVWstore [a.Off()] {s} p (MOVDconst [int64(c.Val()&0xffff | a.Val()<<16)]) mem)
|
||||
(MOVWstoreconst [c] {s} p x:(MOVWstoreconst [a] {s} p mem))
|
||||
&& p.Op != OpSB
|
||||
&& x.Uses == 1
|
||||
&& a.Off() + 4 == c.Off()
|
||||
&& clobber(x)
|
||||
=> (MOVDstore [a.Off()] {s} p (MOVDconst [c.Val64()&0xffffffff | a.Val64()<<32]) mem)
|
||||
|
||||
// Combine stores into larger (unaligned) stores.
|
||||
// It doesn't work on global data (based on SB) because stores with relative addressing
|
||||
// require that the memory operand be aligned.
|
||||
(MOVBstore [i] {s} p w x:(MOVBstore [i-1] {s} p (SRDconst [8] w) mem))
|
||||
&& p.Op != OpSB
|
||||
&& x.Uses == 1
|
||||
&& clobber(x)
|
||||
=> (MOVHstore [i-1] {s} p w mem)
|
||||
(MOVBstore [i] {s} p w0:(SRDconst [j] w) x:(MOVBstore [i-1] {s} p (SRDconst [j+8] w) mem))
|
||||
&& p.Op != OpSB
|
||||
&& x.Uses == 1
|
||||
&& clobber(x)
|
||||
=> (MOVHstore [i-1] {s} p w0 mem)
|
||||
(MOVBstore [i] {s} p w x:(MOVBstore [i-1] {s} p (SRWconst [8] w) mem))
|
||||
&& p.Op != OpSB
|
||||
&& x.Uses == 1
|
||||
&& clobber(x)
|
||||
=> (MOVHstore [i-1] {s} p w mem)
|
||||
(MOVBstore [i] {s} p w0:(SRWconst [j] w) x:(MOVBstore [i-1] {s} p (SRWconst [j+8] w) mem))
|
||||
&& p.Op != OpSB
|
||||
&& x.Uses == 1
|
||||
&& clobber(x)
|
||||
=> (MOVHstore [i-1] {s} p w0 mem)
|
||||
(MOVHstore [i] {s} p w x:(MOVHstore [i-2] {s} p (SRDconst [16] w) mem))
|
||||
&& p.Op != OpSB
|
||||
&& x.Uses == 1
|
||||
&& clobber(x)
|
||||
=> (MOVWstore [i-2] {s} p w mem)
|
||||
(MOVHstore [i] {s} p w0:(SRDconst [j] w) x:(MOVHstore [i-2] {s} p (SRDconst [j+16] w) mem))
|
||||
&& p.Op != OpSB
|
||||
&& x.Uses == 1
|
||||
&& clobber(x)
|
||||
=> (MOVWstore [i-2] {s} p w0 mem)
|
||||
(MOVHstore [i] {s} p w x:(MOVHstore [i-2] {s} p (SRWconst [16] w) mem))
|
||||
&& p.Op != OpSB
|
||||
&& x.Uses == 1
|
||||
&& clobber(x)
|
||||
=> (MOVWstore [i-2] {s} p w mem)
|
||||
(MOVHstore [i] {s} p w0:(SRWconst [j] w) x:(MOVHstore [i-2] {s} p (SRWconst [j+16] w) mem))
|
||||
&& p.Op != OpSB
|
||||
&& x.Uses == 1
|
||||
&& clobber(x)
|
||||
=> (MOVWstore [i-2] {s} p w0 mem)
|
||||
(MOVWstore [i] {s} p (SRDconst [32] w) x:(MOVWstore [i-4] {s} p w mem))
|
||||
&& p.Op != OpSB
|
||||
&& x.Uses == 1
|
||||
&& clobber(x)
|
||||
=> (MOVDstore [i-4] {s} p w mem)
|
||||
(MOVWstore [i] {s} p w0:(SRDconst [j] w) x:(MOVWstore [i-4] {s} p (SRDconst [j+32] w) mem))
|
||||
&& p.Op != OpSB
|
||||
&& x.Uses == 1
|
||||
&& clobber(x)
|
||||
=> (MOVDstore [i-4] {s} p w0 mem)
|
||||
|
||||
// Combine stores into larger (unaligned) stores with the bytes reversed (little endian).
|
||||
// Store-with-bytes-reversed instructions do not support relative memory addresses,
|
||||
// so these stores can't operate on global data (SB).
|
||||
(MOVBstore [i] {s} p (SRDconst [8] w) x:(MOVBstore [i-1] {s} p w mem))
|
||||
&& p.Op != OpSB
|
||||
&& x.Uses == 1
|
||||
&& clobber(x)
|
||||
=> (MOVHBRstore [i-1] {s} p w mem)
|
||||
(MOVBstore [i] {s} p (SRDconst [j] w) x:(MOVBstore [i-1] {s} p w0:(SRDconst [j-8] w) mem))
|
||||
&& p.Op != OpSB
|
||||
&& x.Uses == 1
|
||||
&& clobber(x)
|
||||
=> (MOVHBRstore [i-1] {s} p w0 mem)
|
||||
(MOVBstore [i] {s} p (SRWconst [8] w) x:(MOVBstore [i-1] {s} p w mem))
|
||||
&& p.Op != OpSB
|
||||
&& x.Uses == 1
|
||||
&& clobber(x)
|
||||
=> (MOVHBRstore [i-1] {s} p w mem)
|
||||
(MOVBstore [i] {s} p (SRWconst [j] w) x:(MOVBstore [i-1] {s} p w0:(SRWconst [j-8] w) mem))
|
||||
&& p.Op != OpSB
|
||||
&& x.Uses == 1
|
||||
&& clobber(x)
|
||||
=> (MOVHBRstore [i-1] {s} p w0 mem)
|
||||
(MOVHBRstore [i] {s} p (SRDconst [16] w) x:(MOVHBRstore [i-2] {s} p w mem))
|
||||
&& x.Uses == 1
|
||||
&& clobber(x)
|
||||
=> (MOVWBRstore [i-2] {s} p w mem)
|
||||
(MOVHBRstore [i] {s} p (SRDconst [j] w) x:(MOVHBRstore [i-2] {s} p w0:(SRDconst [j-16] w) mem))
|
||||
&& x.Uses == 1
|
||||
&& clobber(x)
|
||||
=> (MOVWBRstore [i-2] {s} p w0 mem)
|
||||
(MOVHBRstore [i] {s} p (SRWconst [16] w) x:(MOVHBRstore [i-2] {s} p w mem))
|
||||
&& x.Uses == 1
|
||||
&& clobber(x)
|
||||
=> (MOVWBRstore [i-2] {s} p w mem)
|
||||
(MOVHBRstore [i] {s} p (SRWconst [j] w) x:(MOVHBRstore [i-2] {s} p w0:(SRWconst [j-16] w) mem))
|
||||
&& x.Uses == 1
|
||||
&& clobber(x)
|
||||
=> (MOVWBRstore [i-2] {s} p w0 mem)
|
||||
(MOVWBRstore [i] {s} p (SRDconst [32] w) x:(MOVWBRstore [i-4] {s} p w mem))
|
||||
&& x.Uses == 1
|
||||
&& clobber(x)
|
||||
=> (MOVDBRstore [i-4] {s} p w mem)
|
||||
(MOVWBRstore [i] {s} p (SRDconst [j] w) x:(MOVWBRstore [i-4] {s} p w0:(SRDconst [j-32] w) mem))
|
||||
&& x.Uses == 1
|
||||
&& clobber(x)
|
||||
=> (MOVDBRstore [i-4] {s} p w0 mem)
|
||||
|
||||
(MOVBstore [7] {s} p1 (SRDconst w)
|
||||
x1:(MOVHBRstore [5] {s} p1 (SRDconst w)
|
||||
x2:(MOVWBRstore [1] {s} p1 (SRDconst w)
|
||||
x3:(MOVBstore [0] {s} p1 w mem))))
|
||||
&& x1.Uses == 1
|
||||
&& x2.Uses == 1
|
||||
&& x3.Uses == 1
|
||||
&& clobber(x1, x2, x3)
|
||||
=> (MOVDBRstore {s} p1 w mem)
|
||||
|
||||
// Combining byte loads into larger (unaligned) loads.
|
||||
|
||||
// Big-endian loads
|
||||
|
||||
(ORW x1:(MOVBZload [i1] {s} p mem)
|
||||
sh:(SLWconst [8] x0:(MOVBZload [i0] {s} p mem)))
|
||||
&& i1 == i0+1
|
||||
&& p.Op != OpSB
|
||||
&& x0.Uses == 1
|
||||
&& x1.Uses == 1
|
||||
&& sh.Uses == 1
|
||||
&& mergePoint(b,x0,x1) != nil
|
||||
&& clobber(x0, x1, sh)
|
||||
=> @mergePoint(b,x0,x1) (MOVHZload [i0] {s} p mem)
|
||||
|
||||
(OR x1:(MOVBZload [i1] {s} p mem)
|
||||
sh:(SLDconst [8] x0:(MOVBZload [i0] {s} p mem)))
|
||||
&& i1 == i0+1
|
||||
&& p.Op != OpSB
|
||||
&& x0.Uses == 1
|
||||
&& x1.Uses == 1
|
||||
&& sh.Uses == 1
|
||||
&& mergePoint(b,x0,x1) != nil
|
||||
&& clobber(x0, x1, sh)
|
||||
=> @mergePoint(b,x0,x1) (MOVHZload [i0] {s} p mem)
|
||||
|
||||
(ORW x1:(MOVHZload [i1] {s} p mem)
|
||||
sh:(SLWconst [16] x0:(MOVHZload [i0] {s} p mem)))
|
||||
&& i1 == i0+2
|
||||
&& p.Op != OpSB
|
||||
&& x0.Uses == 1
|
||||
&& x1.Uses == 1
|
||||
&& sh.Uses == 1
|
||||
&& mergePoint(b,x0,x1) != nil
|
||||
&& clobber(x0, x1, sh)
|
||||
=> @mergePoint(b,x0,x1) (MOVWZload [i0] {s} p mem)
|
||||
|
||||
(OR x1:(MOVHZload [i1] {s} p mem)
|
||||
sh:(SLDconst [16] x0:(MOVHZload [i0] {s} p mem)))
|
||||
&& i1 == i0+2
|
||||
&& p.Op != OpSB
|
||||
&& x0.Uses == 1
|
||||
&& x1.Uses == 1
|
||||
&& sh.Uses == 1
|
||||
&& mergePoint(b,x0,x1) != nil
|
||||
&& clobber(x0, x1, sh)
|
||||
=> @mergePoint(b,x0,x1) (MOVWZload [i0] {s} p mem)
|
||||
|
||||
(OR x1:(MOVWZload [i1] {s} p mem)
|
||||
sh:(SLDconst [32] x0:(MOVWZload [i0] {s} p mem)))
|
||||
&& i1 == i0+4
|
||||
&& p.Op != OpSB
|
||||
&& x0.Uses == 1
|
||||
&& x1.Uses == 1
|
||||
&& sh.Uses == 1
|
||||
&& mergePoint(b,x0,x1) != nil
|
||||
&& clobber(x0, x1, sh)
|
||||
=> @mergePoint(b,x0,x1) (MOVDload [i0] {s} p mem)
|
||||
|
||||
(ORW
|
||||
s0:(SLWconst [j0] x0:(MOVBZload [i0] {s} p mem))
|
||||
or:(ORW
|
||||
s1:(SLWconst [j1] x1:(MOVBZload [i1] {s} p mem))
|
||||
y))
|
||||
&& i1 == i0+1
|
||||
&& j1 == j0-8
|
||||
&& j1 % 16 == 0
|
||||
&& x0.Uses == 1
|
||||
&& x1.Uses == 1
|
||||
&& s0.Uses == 1
|
||||
&& s1.Uses == 1
|
||||
&& or.Uses == 1
|
||||
&& mergePoint(b,x0,x1,y) != nil
|
||||
&& clobber(x0, x1, s0, s1, or)
|
||||
=> @mergePoint(b,x0,x1,y) (ORW <v.Type> (SLWconst <v.Type> [j1] (MOVHZload [i0] {s} p mem)) y)
|
||||
|
||||
(OR
|
||||
s0:(SLDconst [j0] x0:(MOVBZload [i0] {s} p mem))
|
||||
or:(OR
|
||||
s1:(SLDconst [j1] x1:(MOVBZload [i1] {s} p mem))
|
||||
y))
|
||||
&& i1 == i0+1
|
||||
&& j1 == j0-8
|
||||
&& j1 % 16 == 0
|
||||
&& x0.Uses == 1
|
||||
&& x1.Uses == 1
|
||||
&& s0.Uses == 1
|
||||
&& s1.Uses == 1
|
||||
&& or.Uses == 1
|
||||
&& mergePoint(b,x0,x1,y) != nil
|
||||
&& clobber(x0, x1, s0, s1, or)
|
||||
=> @mergePoint(b,x0,x1,y) (OR <v.Type> (SLDconst <v.Type> [j1] (MOVHZload [i0] {s} p mem)) y)
|
||||
|
||||
(OR
|
||||
s0:(SLDconst [j0] x0:(MOVHZload [i0] {s} p mem))
|
||||
or:(OR
|
||||
s1:(SLDconst [j1] x1:(MOVHZload [i1] {s} p mem))
|
||||
y))
|
||||
&& i1 == i0+2
|
||||
&& j1 == j0-16
|
||||
&& j1 % 32 == 0
|
||||
&& x0.Uses == 1
|
||||
&& x1.Uses == 1
|
||||
&& s0.Uses == 1
|
||||
&& s1.Uses == 1
|
||||
&& or.Uses == 1
|
||||
&& mergePoint(b,x0,x1,y) != nil
|
||||
&& clobber(x0, x1, s0, s1, or)
|
||||
=> @mergePoint(b,x0,x1,y) (OR <v.Type> (SLDconst <v.Type> [j1] (MOVWZload [i0] {s} p mem)) y)
|
||||
|
||||
// Little-endian loads
|
||||
|
||||
(ORW x0:(MOVBZload [i0] {s} p mem)
|
||||
sh:(SLWconst [8] x1:(MOVBZload [i1] {s} p mem)))
|
||||
&& p.Op != OpSB
|
||||
&& i1 == i0+1
|
||||
&& x0.Uses == 1
|
||||
&& x1.Uses == 1
|
||||
&& sh.Uses == 1
|
||||
&& mergePoint(b,x0,x1) != nil
|
||||
&& clobber(x0, x1, sh)
|
||||
=> @mergePoint(b,x0,x1) (MOVHZreg (MOVHBRload [i0] {s} p mem))
|
||||
|
||||
(OR x0:(MOVBZload [i0] {s} p mem)
|
||||
sh:(SLDconst [8] x1:(MOVBZload [i1] {s} p mem)))
|
||||
&& p.Op != OpSB
|
||||
&& i1 == i0+1
|
||||
&& x0.Uses == 1
|
||||
&& x1.Uses == 1
|
||||
&& sh.Uses == 1
|
||||
&& mergePoint(b,x0,x1) != nil
|
||||
&& clobber(x0, x1, sh)
|
||||
=> @mergePoint(b,x0,x1) (MOVHZreg (MOVHBRload [i0] {s} p mem))
|
||||
|
||||
(ORW r0:(MOVHZreg x0:(MOVHBRload [i0] {s} p mem))
|
||||
sh:(SLWconst [16] r1:(MOVHZreg x1:(MOVHBRload [i1] {s} p mem))))
|
||||
&& i1 == i0+2
|
||||
&& x0.Uses == 1
|
||||
&& x1.Uses == 1
|
||||
&& r0.Uses == 1
|
||||
&& r1.Uses == 1
|
||||
&& sh.Uses == 1
|
||||
&& mergePoint(b,x0,x1) != nil
|
||||
&& clobber(x0, x1, r0, r1, sh)
|
||||
=> @mergePoint(b,x0,x1) (MOVWBRload [i0] {s} p mem)
|
||||
|
||||
(OR r0:(MOVHZreg x0:(MOVHBRload [i0] {s} p mem))
|
||||
sh:(SLDconst [16] r1:(MOVHZreg x1:(MOVHBRload [i1] {s} p mem))))
|
||||
&& i1 == i0+2
|
||||
&& x0.Uses == 1
|
||||
&& x1.Uses == 1
|
||||
&& r0.Uses == 1
|
||||
&& r1.Uses == 1
|
||||
&& sh.Uses == 1
|
||||
&& mergePoint(b,x0,x1) != nil
|
||||
&& clobber(x0, x1, r0, r1, sh)
|
||||
=> @mergePoint(b,x0,x1) (MOVWZreg (MOVWBRload [i0] {s} p mem))
|
||||
|
||||
(OR r0:(MOVWZreg x0:(MOVWBRload [i0] {s} p mem))
|
||||
sh:(SLDconst [32] r1:(MOVWZreg x1:(MOVWBRload [i1] {s} p mem))))
|
||||
&& i1 == i0+4
|
||||
&& x0.Uses == 1
|
||||
&& x1.Uses == 1
|
||||
&& r0.Uses == 1
|
||||
&& r1.Uses == 1
|
||||
&& sh.Uses == 1
|
||||
&& mergePoint(b,x0,x1) != nil
|
||||
&& clobber(x0, x1, r0, r1, sh)
|
||||
=> @mergePoint(b,x0,x1) (MOVDBRload [i0] {s} p mem)
|
||||
|
||||
(ORW
|
||||
s1:(SLWconst [j1] x1:(MOVBZload [i1] {s} p mem))
|
||||
or:(ORW
|
||||
s0:(SLWconst [j0] x0:(MOVBZload [i0] {s} p mem))
|
||||
y))
|
||||
&& p.Op != OpSB
|
||||
&& i1 == i0+1
|
||||
&& j1 == j0+8
|
||||
&& j0 % 16 == 0
|
||||
&& x0.Uses == 1
|
||||
&& x1.Uses == 1
|
||||
&& s0.Uses == 1
|
||||
&& s1.Uses == 1
|
||||
&& or.Uses == 1
|
||||
&& mergePoint(b,x0,x1,y) != nil
|
||||
&& clobber(x0, x1, s0, s1, or)
|
||||
=> @mergePoint(b,x0,x1,y) (ORW <v.Type> (SLWconst <v.Type> [j0] (MOVHZreg (MOVHBRload [i0] {s} p mem))) y)
|
||||
|
||||
(OR
|
||||
s1:(SLDconst [j1] x1:(MOVBZload [i1] {s} p mem))
|
||||
or:(OR
|
||||
s0:(SLDconst [j0] x0:(MOVBZload [i0] {s} p mem))
|
||||
y))
|
||||
&& p.Op != OpSB
|
||||
&& i1 == i0+1
|
||||
&& j1 == j0+8
|
||||
&& j0 % 16 == 0
|
||||
&& x0.Uses == 1
|
||||
&& x1.Uses == 1
|
||||
&& s0.Uses == 1
|
||||
&& s1.Uses == 1
|
||||
&& or.Uses == 1
|
||||
&& mergePoint(b,x0,x1,y) != nil
|
||||
&& clobber(x0, x1, s0, s1, or)
|
||||
=> @mergePoint(b,x0,x1,y) (OR <v.Type> (SLDconst <v.Type> [j0] (MOVHZreg (MOVHBRload [i0] {s} p mem))) y)
|
||||
|
||||
(OR
|
||||
s1:(SLDconst [j1] r1:(MOVHZreg x1:(MOVHBRload [i1] {s} p mem)))
|
||||
or:(OR
|
||||
s0:(SLDconst [j0] r0:(MOVHZreg x0:(MOVHBRload [i0] {s} p mem)))
|
||||
y))
|
||||
&& i1 == i0+2
|
||||
&& j1 == j0+16
|
||||
&& j0 % 32 == 0
|
||||
&& x0.Uses == 1
|
||||
&& x1.Uses == 1
|
||||
&& r0.Uses == 1
|
||||
&& r1.Uses == 1
|
||||
&& s0.Uses == 1
|
||||
&& s1.Uses == 1
|
||||
&& or.Uses == 1
|
||||
&& mergePoint(b,x0,x1,y) != nil
|
||||
&& clobber(x0, x1, r0, r1, s0, s1, or)
|
||||
=> @mergePoint(b,x0,x1,y) (OR <v.Type> (SLDconst <v.Type> [j0] (MOVWZreg (MOVWBRload [i0] {s} p mem))) y)
|
||||
|
||||
// Combine stores into store multiples.
|
||||
// 32-bit
|
||||
(MOVWstore [i] {s} p w1 x:(MOVWstore [i-4] {s} p w0 mem))
|
||||
|
|
@ -1701,3 +1342,19 @@
|
|||
|
||||
// Convert 32-bit store multiples into 64-bit stores.
|
||||
(STM2 [i] {s} p (SRDconst [32] x) x mem) => (MOVDstore [i] {s} p x mem)
|
||||
|
||||
// Fold bit reversal into loads.
|
||||
(MOVWBR x:(MOVWZload [off] {sym} ptr mem)) && x.Uses == 1 => @x.Block (MOVWZreg (MOVWBRload [off] {sym} ptr mem)) // need zero extension?
|
||||
(MOVWBR x:(MOVWZloadidx [off] {sym} ptr idx mem)) && x.Uses == 1 => @x.Block (MOVWZreg (MOVWBRloadidx [off] {sym} ptr idx mem)) // need zero extension?
|
||||
(MOVDBR x:(MOVDload [off] {sym} ptr mem)) && x.Uses == 1 => @x.Block (MOVDBRload [off] {sym} ptr mem)
|
||||
(MOVDBR x:(MOVDloadidx [off] {sym} ptr idx mem)) && x.Uses == 1 => @x.Block (MOVDBRloadidx [off] {sym} ptr idx mem)
|
||||
|
||||
// Fold bit reversal into stores.
|
||||
(MOV(D|W)store [off] {sym} ptr r:(MOV(D|W)BR x) mem) && r.Uses == 1 => (MOV(D|W)BRstore [off] {sym} ptr x mem)
|
||||
(MOV(D|W)storeidx [off] {sym} ptr idx r:(MOV(D|W)BR x) mem) && r.Uses == 1 => (MOV(D|W)BRstoreidx [off] {sym} ptr idx x mem)
|
||||
|
||||
// Special bswap16 rules
|
||||
(Bswap16 x:(MOVHZload [off] {sym} ptr mem)) => @x.Block (MOVHZreg (MOVHBRload [off] {sym} ptr mem))
|
||||
(Bswap16 x:(MOVHZloadidx [off] {sym} ptr idx mem)) => @x.Block (MOVHZreg (MOVHBRloadidx [off] {sym} ptr idx mem))
|
||||
(MOVHstore [off] {sym} ptr (Bswap16 val) mem) => (MOVHBRstore [off] {sym} ptr val mem)
|
||||
(MOVHstoreidx [off] {sym} ptr idx (Bswap16 val) mem) => (MOVHBRstoreidx [off] {sym} ptr idx val mem)
|
||||
|
|
|
|||
|
|
@ -482,6 +482,7 @@ var passes = [...]pass{
|
|||
{name: "branchelim", fn: branchelim},
|
||||
{name: "late fuse", fn: fuseLate},
|
||||
{name: "dse", fn: dse},
|
||||
{name: "memcombine", fn: memcombine},
|
||||
{name: "writebarrier", fn: writebarrier, required: true}, // expand write barrier ops
|
||||
{name: "insert resched checks", fn: insertLoopReschedChecks,
|
||||
disabled: !buildcfg.Experiment.PreemptibleLoops}, // insert resched checks in loops.
|
||||
|
|
@ -580,6 +581,10 @@ var passOrder = [...]constraint{
|
|||
{"regalloc", "stackframe"},
|
||||
// trim needs regalloc to be done first.
|
||||
{"regalloc", "trim"},
|
||||
// memcombine works better if fuse happens first, to help merge stores.
|
||||
{"late fuse", "memcombine"},
|
||||
// memcombine is a arch-independent pass.
|
||||
{"memcombine", "lower"},
|
||||
}
|
||||
|
||||
func init() {
|
||||
|
|
|
|||
|
|
@ -6,6 +6,7 @@ package ssa
|
|||
|
||||
import (
|
||||
"cmd/compile/internal/abi"
|
||||
"cmd/compile/internal/base"
|
||||
"cmd/compile/internal/ir"
|
||||
"cmd/compile/internal/types"
|
||||
"cmd/internal/obj"
|
||||
|
|
@ -50,6 +51,10 @@ type Config struct {
|
|||
Race bool // race detector enabled
|
||||
BigEndian bool //
|
||||
UseFMA bool // Use hardware FMA operation
|
||||
unalignedOK bool // Unaligned loads/stores are ok
|
||||
haveBswap64 bool // architecture implements Bswap64
|
||||
haveBswap32 bool // architecture implements Bswap32
|
||||
haveBswap16 bool // architecture implements Bswap16
|
||||
}
|
||||
|
||||
type (
|
||||
|
|
@ -192,6 +197,10 @@ func NewConfig(arch string, types Types, ctxt *obj.Link, optimize, softfloat boo
|
|||
c.FPReg = framepointerRegAMD64
|
||||
c.LinkReg = linkRegAMD64
|
||||
c.hasGReg = true
|
||||
c.unalignedOK = true
|
||||
c.haveBswap64 = true
|
||||
c.haveBswap32 = true
|
||||
c.haveBswap16 = true
|
||||
case "386":
|
||||
c.PtrSize = 4
|
||||
c.RegSize = 4
|
||||
|
|
@ -204,6 +213,9 @@ func NewConfig(arch string, types Types, ctxt *obj.Link, optimize, softfloat boo
|
|||
c.FPReg = framepointerReg386
|
||||
c.LinkReg = linkReg386
|
||||
c.hasGReg = false
|
||||
c.unalignedOK = true
|
||||
c.haveBswap32 = true
|
||||
c.haveBswap16 = true
|
||||
case "arm":
|
||||
c.PtrSize = 4
|
||||
c.RegSize = 4
|
||||
|
|
@ -230,6 +242,10 @@ func NewConfig(arch string, types Types, ctxt *obj.Link, optimize, softfloat boo
|
|||
c.FPReg = framepointerRegARM64
|
||||
c.LinkReg = linkRegARM64
|
||||
c.hasGReg = true
|
||||
c.unalignedOK = true
|
||||
c.haveBswap64 = true
|
||||
c.haveBswap32 = true
|
||||
c.haveBswap16 = true
|
||||
case "ppc64":
|
||||
c.BigEndian = true
|
||||
fallthrough
|
||||
|
|
@ -249,6 +265,14 @@ func NewConfig(arch string, types Types, ctxt *obj.Link, optimize, softfloat boo
|
|||
c.FPReg = framepointerRegPPC64
|
||||
c.LinkReg = linkRegPPC64
|
||||
c.hasGReg = true
|
||||
c.unalignedOK = true
|
||||
// Note: ppc64 has register bswap ops only when GOPPC64>=10.
|
||||
// But it has bswap+load and bswap+store ops for all ppc64 variants.
|
||||
// That is the sense we're using them here - they are only used
|
||||
// in contexts where they can be merged with a load or store.
|
||||
c.haveBswap64 = true
|
||||
c.haveBswap32 = true
|
||||
c.haveBswap16 = true
|
||||
case "mips64":
|
||||
c.BigEndian = true
|
||||
fallthrough
|
||||
|
|
@ -288,6 +312,10 @@ func NewConfig(arch string, types Types, ctxt *obj.Link, optimize, softfloat boo
|
|||
c.hasGReg = true
|
||||
c.noDuffDevice = true
|
||||
c.BigEndian = true
|
||||
c.unalignedOK = true
|
||||
c.haveBswap64 = true
|
||||
c.haveBswap32 = true
|
||||
c.haveBswap16 = true // only for loads&stores, see ppc64 comment
|
||||
case "mips":
|
||||
c.BigEndian = true
|
||||
fallthrough
|
||||
|
|
@ -387,3 +415,17 @@ func NewConfig(arch string, types Types, ctxt *obj.Link, optimize, softfloat boo
|
|||
}
|
||||
|
||||
func (c *Config) Ctxt() *obj.Link { return c.ctxt }
|
||||
|
||||
func (c *Config) haveByteSwap(size int64) bool {
|
||||
switch size {
|
||||
case 8:
|
||||
return c.haveBswap64
|
||||
case 4:
|
||||
return c.haveBswap32
|
||||
case 2:
|
||||
return c.haveBswap16
|
||||
default:
|
||||
base.Fatalf("bad size %d\n", size)
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -0,0 +1,737 @@
|
|||
// Copyright 2023 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package ssa
|
||||
|
||||
import (
|
||||
"cmd/compile/internal/base"
|
||||
"cmd/compile/internal/types"
|
||||
"cmd/internal/src"
|
||||
"sort"
|
||||
)
|
||||
|
||||
// memcombine combines smaller loads and stores into larger ones.
|
||||
// We ensure this generates good code for encoding/binary operations.
|
||||
// It may help other cases also.
|
||||
func memcombine(f *Func) {
|
||||
// This optimization requires that the architecture has
|
||||
// unaligned loads and unaligned stores.
|
||||
if !f.Config.unalignedOK {
|
||||
return
|
||||
}
|
||||
|
||||
memcombineLoads(f)
|
||||
memcombineStores(f)
|
||||
}
|
||||
|
||||
func memcombineLoads(f *Func) {
|
||||
// Find "OR trees" to start with.
|
||||
mark := f.newSparseSet(f.NumValues())
|
||||
defer f.retSparseSet(mark)
|
||||
var order []*Value
|
||||
|
||||
// Mark all values that are the argument of an OR.
|
||||
for _, b := range f.Blocks {
|
||||
for _, v := range b.Values {
|
||||
if v.Op == OpOr16 || v.Op == OpOr32 || v.Op == OpOr64 {
|
||||
mark.add(v.Args[0].ID)
|
||||
mark.add(v.Args[1].ID)
|
||||
}
|
||||
}
|
||||
}
|
||||
for _, b := range f.Blocks {
|
||||
for _, v := range b.Values {
|
||||
if v.Op != OpOr16 && v.Op != OpOr32 && v.Op != OpOr64 {
|
||||
continue
|
||||
}
|
||||
if mark.contains(v.ID) {
|
||||
// marked - means it is not the root of an OR tree
|
||||
continue
|
||||
}
|
||||
// Add the OR tree rooted at v to the order.
|
||||
// We use BFS here, but any walk that puts roots before leaves would work.
|
||||
i := len(order)
|
||||
order = append(order, v)
|
||||
for ; i < len(order); i++ {
|
||||
x := order[i]
|
||||
for j := 0; j < 2; j++ {
|
||||
a := x.Args[j]
|
||||
if a.Op == OpOr16 || a.Op == OpOr32 || a.Op == OpOr64 {
|
||||
order = append(order, a)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
for _, v := range order {
|
||||
max := f.Config.RegSize
|
||||
switch v.Op {
|
||||
case OpOr64:
|
||||
case OpOr32:
|
||||
max = 4
|
||||
case OpOr16:
|
||||
max = 2
|
||||
default:
|
||||
continue
|
||||
}
|
||||
for n := max; n > 1; n /= 2 {
|
||||
if combineLoads(v, n) {
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// A BaseAddress represents the address ptr+idx, where
|
||||
// ptr is a pointer type and idx is an integer type.
|
||||
// idx may be nil, in which case it is treated as 0.
|
||||
type BaseAddress struct {
|
||||
ptr *Value
|
||||
idx *Value
|
||||
}
|
||||
|
||||
// splitPtr returns the base address of ptr and any
|
||||
// constant offset from that base.
|
||||
// BaseAddress{ptr,nil},0 is always a valid result, but splitPtr
|
||||
// tries to peel away as many constants into off as possible.
|
||||
func splitPtr(ptr *Value) (BaseAddress, int64) {
|
||||
var idx *Value
|
||||
var off int64
|
||||
for {
|
||||
if ptr.Op == OpOffPtr {
|
||||
off += ptr.AuxInt
|
||||
ptr = ptr.Args[0]
|
||||
} else if ptr.Op == OpAddPtr {
|
||||
if idx != nil {
|
||||
// We have two or more indexing values.
|
||||
// Pick the first one we found.
|
||||
return BaseAddress{ptr: ptr, idx: idx}, off
|
||||
}
|
||||
idx = ptr.Args[1]
|
||||
if idx.Op == OpAdd32 || idx.Op == OpAdd64 {
|
||||
if idx.Args[0].Op == OpConst32 || idx.Args[0].Op == OpConst64 {
|
||||
off += idx.Args[0].AuxInt
|
||||
idx = idx.Args[1]
|
||||
} else if idx.Args[1].Op == OpConst32 || idx.Args[1].Op == OpConst64 {
|
||||
off += idx.Args[1].AuxInt
|
||||
idx = idx.Args[0]
|
||||
}
|
||||
}
|
||||
ptr = ptr.Args[0]
|
||||
} else {
|
||||
return BaseAddress{ptr: ptr, idx: idx}, off
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func combineLoads(root *Value, n int64) bool {
|
||||
orOp := root.Op
|
||||
var shiftOp Op
|
||||
switch orOp {
|
||||
case OpOr64:
|
||||
shiftOp = OpLsh64x64
|
||||
case OpOr32:
|
||||
shiftOp = OpLsh32x64
|
||||
case OpOr16:
|
||||
shiftOp = OpLsh16x64
|
||||
default:
|
||||
return false
|
||||
}
|
||||
|
||||
// Find n values that are ORed together with the above op.
|
||||
a := make([]*Value, 0, 8)
|
||||
v := root
|
||||
for int64(len(a)) < n {
|
||||
if v.Args[0].Op == orOp {
|
||||
a = append(a, v.Args[1])
|
||||
v = v.Args[0]
|
||||
} else if v.Args[1].Op == orOp {
|
||||
a = append(a, v.Args[0])
|
||||
v = v.Args[1]
|
||||
} else if int64(len(a)) == n-2 {
|
||||
a = append(a, v.Args[0])
|
||||
a = append(a, v.Args[1])
|
||||
v = nil
|
||||
} else {
|
||||
return false
|
||||
}
|
||||
}
|
||||
tail := v // Value to OR in beyond the ones we're working with (or nil if none).
|
||||
|
||||
// Check that the first entry to see what ops we're looking for.
|
||||
// All the entries should be of the form shift(extend(load)), maybe with no shift.
|
||||
v = a[0]
|
||||
if v.Op == shiftOp {
|
||||
v = v.Args[0]
|
||||
}
|
||||
var extOp Op
|
||||
if orOp == OpOr64 && (v.Op == OpZeroExt8to64 || v.Op == OpZeroExt16to64 || v.Op == OpZeroExt32to64) ||
|
||||
orOp == OpOr32 && (v.Op == OpZeroExt8to32 || v.Op == OpZeroExt16to32) ||
|
||||
orOp == OpOr16 && v.Op == OpZeroExt8to16 {
|
||||
extOp = v.Op
|
||||
v = v.Args[0]
|
||||
} else {
|
||||
return false
|
||||
}
|
||||
if v.Op != OpLoad {
|
||||
return false
|
||||
}
|
||||
base, _ := splitPtr(v.Args[0])
|
||||
mem := v.Args[1]
|
||||
size := v.Type.Size()
|
||||
|
||||
if root.Block.Func.Config.arch == "S390X" {
|
||||
// s390x can't handle unaligned accesses to global variables.
|
||||
if base.ptr.Op == OpAddr {
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
// Check all the entries, extract useful info.
|
||||
type LoadRecord struct {
|
||||
load *Value
|
||||
offset int64 // offset of load address from base
|
||||
shift int64
|
||||
}
|
||||
r := make([]LoadRecord, n, 8)
|
||||
for i := int64(0); i < n; i++ {
|
||||
v := a[i]
|
||||
if v.Uses != 1 {
|
||||
return false
|
||||
}
|
||||
shift := int64(0)
|
||||
if v.Op == shiftOp {
|
||||
if v.Args[1].Op != OpConst64 {
|
||||
return false
|
||||
}
|
||||
shift = v.Args[1].AuxInt
|
||||
v = v.Args[0]
|
||||
if v.Uses != 1 {
|
||||
return false
|
||||
}
|
||||
}
|
||||
if v.Op != extOp {
|
||||
return false
|
||||
}
|
||||
load := v.Args[0]
|
||||
if load.Op != OpLoad {
|
||||
return false
|
||||
}
|
||||
if load.Uses != 1 {
|
||||
return false
|
||||
}
|
||||
if load.Args[1] != mem {
|
||||
return false
|
||||
}
|
||||
p, off := splitPtr(load.Args[0])
|
||||
if p != base {
|
||||
return false
|
||||
}
|
||||
r[i] = LoadRecord{load: load, offset: off, shift: shift}
|
||||
}
|
||||
|
||||
// Sort in memory address order.
|
||||
sort.Slice(r, func(i, j int) bool {
|
||||
return r[i].offset < r[j].offset
|
||||
})
|
||||
|
||||
// Check that we have contiguous offsets.
|
||||
for i := int64(0); i < n; i++ {
|
||||
if r[i].offset != r[0].offset+i*size {
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
// Check for reads in little-endian or big-endian order.
|
||||
shift0 := r[0].shift
|
||||
isLittleEndian := true
|
||||
for i := int64(0); i < n; i++ {
|
||||
if r[i].shift != shift0+i*size*8 {
|
||||
isLittleEndian = false
|
||||
break
|
||||
}
|
||||
}
|
||||
isBigEndian := true
|
||||
for i := int64(0); i < n; i++ {
|
||||
if r[i].shift != shift0-i*size*8 {
|
||||
isBigEndian = false
|
||||
break
|
||||
}
|
||||
}
|
||||
if !isLittleEndian && !isBigEndian {
|
||||
return false
|
||||
}
|
||||
|
||||
// Find a place to put the new load.
|
||||
// This is tricky, because it has to be at a point where
|
||||
// its memory argument is live. We can't just put it in root.Block.
|
||||
// We use the block of the latest load.
|
||||
loads := make([]*Value, n, 8)
|
||||
for i := int64(0); i < n; i++ {
|
||||
loads[i] = r[i].load
|
||||
}
|
||||
loadBlock := mergePoint(root.Block, loads...)
|
||||
if loadBlock == nil {
|
||||
return false
|
||||
}
|
||||
// Find a source position to use.
|
||||
pos := src.NoXPos
|
||||
for _, load := range loads {
|
||||
if load.Block == loadBlock {
|
||||
pos = load.Pos
|
||||
break
|
||||
}
|
||||
}
|
||||
if pos == src.NoXPos {
|
||||
return false
|
||||
}
|
||||
|
||||
// Check to see if we need byte swap before storing.
|
||||
needSwap := isLittleEndian && root.Block.Func.Config.BigEndian ||
|
||||
isBigEndian && !root.Block.Func.Config.BigEndian
|
||||
if needSwap && (size != 1 || !root.Block.Func.Config.haveByteSwap(n)) {
|
||||
return false
|
||||
}
|
||||
|
||||
// This is the commit point.
|
||||
|
||||
// First, issue load at lowest address.
|
||||
v = loadBlock.NewValue2(pos, OpLoad, sizeType(n*size), r[0].load.Args[0], mem)
|
||||
|
||||
// Byte swap if needed,
|
||||
if needSwap {
|
||||
v = byteSwap(loadBlock, pos, v)
|
||||
}
|
||||
|
||||
// Extend if needed.
|
||||
if n*size < root.Type.Size() {
|
||||
v = zeroExtend(loadBlock, pos, v, n*size, root.Type.Size())
|
||||
}
|
||||
|
||||
// Shift if needed.
|
||||
if isLittleEndian && shift0 != 0 {
|
||||
v = leftShift(loadBlock, pos, v, shift0)
|
||||
}
|
||||
if isBigEndian && shift0-(n-1)*8 != 0 {
|
||||
v = leftShift(loadBlock, pos, v, shift0-(n-1)*8)
|
||||
}
|
||||
|
||||
// Install. If there's a tail, make the root (OR v tail).
|
||||
// If not, do (Copy v).
|
||||
if tail != nil {
|
||||
root.SetArg(0, v)
|
||||
root.SetArg(1, tail)
|
||||
} else {
|
||||
root.reset(OpCopy)
|
||||
root.AddArg(v)
|
||||
}
|
||||
|
||||
// Clobber the loads, just to prevent additional work being done on
|
||||
// subtrees (which are now unreachable).
|
||||
for i := int64(0); i < n; i++ {
|
||||
clobber(r[i].load)
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
func memcombineStores(f *Func) {
|
||||
mark := f.newSparseSet(f.NumValues())
|
||||
defer f.retSparseSet(mark)
|
||||
var order []*Value
|
||||
|
||||
for _, b := range f.Blocks {
|
||||
// Mark all stores which are not last in a store sequence.
|
||||
mark.clear()
|
||||
for _, v := range b.Values {
|
||||
if v.Op == OpStore {
|
||||
mark.add(v.MemoryArg().ID)
|
||||
}
|
||||
}
|
||||
|
||||
// pick an order for visiting stores such that
|
||||
// later stores come earlier in the ordering.
|
||||
order = order[:0]
|
||||
for _, v := range b.Values {
|
||||
if v.Op != OpStore {
|
||||
continue
|
||||
}
|
||||
if mark.contains(v.ID) {
|
||||
continue // not last in a chain of stores
|
||||
}
|
||||
for {
|
||||
order = append(order, v)
|
||||
v = v.Args[2]
|
||||
if v.Block != b || v.Op != OpStore {
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Look for combining opportunities at each store in queue order.
|
||||
for _, v := range order {
|
||||
if v.Op != OpStore { // already rewritten
|
||||
continue
|
||||
}
|
||||
|
||||
size := v.Aux.(*types.Type).Size()
|
||||
if size >= f.Config.RegSize || size == 0 {
|
||||
continue
|
||||
}
|
||||
|
||||
for n := f.Config.RegSize / size; n > 1; n /= 2 {
|
||||
if combineStores(v, n) {
|
||||
continue
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Try to combine the n stores ending in root.
|
||||
// Returns true if successful.
|
||||
func combineStores(root *Value, n int64) bool {
|
||||
// Helper functions.
|
||||
type StoreRecord struct {
|
||||
store *Value
|
||||
offset int64
|
||||
}
|
||||
getShiftBase := func(a []StoreRecord) *Value {
|
||||
x := a[0].store.Args[1]
|
||||
y := a[1].store.Args[1]
|
||||
switch x.Op {
|
||||
case OpTrunc64to8, OpTrunc64to16, OpTrunc64to32, OpTrunc32to8, OpTrunc32to16, OpTrunc16to8:
|
||||
x = x.Args[0]
|
||||
default:
|
||||
return nil
|
||||
}
|
||||
switch y.Op {
|
||||
case OpTrunc64to8, OpTrunc64to16, OpTrunc64to32, OpTrunc32to8, OpTrunc32to16, OpTrunc16to8:
|
||||
y = y.Args[0]
|
||||
default:
|
||||
return nil
|
||||
}
|
||||
var x2 *Value
|
||||
switch x.Op {
|
||||
case OpRsh64Ux64, OpRsh32Ux64, OpRsh16Ux64:
|
||||
x2 = x.Args[0]
|
||||
default:
|
||||
}
|
||||
var y2 *Value
|
||||
switch y.Op {
|
||||
case OpRsh64Ux64, OpRsh32Ux64, OpRsh16Ux64:
|
||||
y2 = y.Args[0]
|
||||
default:
|
||||
}
|
||||
if y2 == x {
|
||||
// a shift of x and x itself.
|
||||
return x
|
||||
}
|
||||
if x2 == y {
|
||||
// a shift of y and y itself.
|
||||
return y
|
||||
}
|
||||
if x2 == y2 {
|
||||
// 2 shifts both of the same argument.
|
||||
return x2
|
||||
}
|
||||
return nil
|
||||
}
|
||||
isShiftBase := func(v, base *Value) bool {
|
||||
val := v.Args[1]
|
||||
switch val.Op {
|
||||
case OpTrunc64to8, OpTrunc64to16, OpTrunc64to32, OpTrunc32to8, OpTrunc32to16, OpTrunc16to8:
|
||||
val = val.Args[0]
|
||||
default:
|
||||
return false
|
||||
}
|
||||
if val == base {
|
||||
return true
|
||||
}
|
||||
switch val.Op {
|
||||
case OpRsh64Ux64, OpRsh32Ux64, OpRsh16Ux64:
|
||||
val = val.Args[0]
|
||||
default:
|
||||
return false
|
||||
}
|
||||
return val == base
|
||||
}
|
||||
shift := func(v, base *Value) int64 {
|
||||
val := v.Args[1]
|
||||
switch val.Op {
|
||||
case OpTrunc64to8, OpTrunc64to16, OpTrunc64to32, OpTrunc32to8, OpTrunc32to16, OpTrunc16to8:
|
||||
val = val.Args[0]
|
||||
default:
|
||||
return -1
|
||||
}
|
||||
if val == base {
|
||||
return 0
|
||||
}
|
||||
switch val.Op {
|
||||
case OpRsh64Ux64, OpRsh32Ux64, OpRsh16Ux64:
|
||||
val = val.Args[1]
|
||||
default:
|
||||
return -1
|
||||
}
|
||||
if val.Op != OpConst64 {
|
||||
return -1
|
||||
}
|
||||
return val.AuxInt
|
||||
}
|
||||
|
||||
// Element size of the individual stores.
|
||||
size := root.Aux.(*types.Type).Size()
|
||||
if size*n > root.Block.Func.Config.RegSize {
|
||||
return false
|
||||
}
|
||||
|
||||
// Gather n stores to look at. Check easy conditions we require.
|
||||
a := make([]StoreRecord, 0, 8)
|
||||
rbase, roff := splitPtr(root.Args[0])
|
||||
if root.Block.Func.Config.arch == "S390X" {
|
||||
// s390x can't handle unaligned accesses to global variables.
|
||||
if rbase.ptr.Op == OpAddr {
|
||||
return false
|
||||
}
|
||||
}
|
||||
a = append(a, StoreRecord{root, roff})
|
||||
for i, x := int64(1), root.Args[2]; i < n; i, x = i+1, x.Args[2] {
|
||||
if x.Op != OpStore {
|
||||
return false
|
||||
}
|
||||
if x.Block != root.Block {
|
||||
return false
|
||||
}
|
||||
if x.Uses != 1 { // Note: root can have more than one use.
|
||||
return false
|
||||
}
|
||||
if x.Aux.(*types.Type).Size() != size {
|
||||
return false
|
||||
}
|
||||
base, off := splitPtr(x.Args[0])
|
||||
if base != rbase {
|
||||
return false
|
||||
}
|
||||
a = append(a, StoreRecord{x, off})
|
||||
}
|
||||
// Before we sort, grab the memory arg the result should have.
|
||||
mem := a[n-1].store.Args[2]
|
||||
|
||||
// Sort stores in increasing address order.
|
||||
sort.Slice(a, func(i, j int) bool {
|
||||
return a[i].offset < a[j].offset
|
||||
})
|
||||
|
||||
// Check that everything is written to sequential locations.
|
||||
for i := int64(0); i < n; i++ {
|
||||
if a[i].offset != a[0].offset+i*size {
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
// Memory location we're going to write at (the lowest one).
|
||||
ptr := a[0].store.Args[0]
|
||||
|
||||
// Check for constant stores
|
||||
isConst := true
|
||||
for i := int64(0); i < n; i++ {
|
||||
switch a[i].store.Args[1].Op {
|
||||
case OpConst32, OpConst16, OpConst8:
|
||||
default:
|
||||
isConst = false
|
||||
break
|
||||
}
|
||||
}
|
||||
if isConst {
|
||||
// Modify root to do all the stores.
|
||||
var c int64
|
||||
mask := int64(1)<<(8*size) - 1
|
||||
for i := int64(0); i < n; i++ {
|
||||
s := 8 * size * int64(i)
|
||||
if root.Block.Func.Config.BigEndian {
|
||||
s = 8*size*(n-1) - s
|
||||
}
|
||||
c |= (a[i].store.Args[1].AuxInt & mask) << s
|
||||
}
|
||||
var cv *Value
|
||||
switch size * n {
|
||||
case 2:
|
||||
cv = root.Block.Func.ConstInt16(types.Types[types.TUINT16], int16(c))
|
||||
case 4:
|
||||
cv = root.Block.Func.ConstInt32(types.Types[types.TUINT32], int32(c))
|
||||
case 8:
|
||||
cv = root.Block.Func.ConstInt64(types.Types[types.TUINT64], c)
|
||||
}
|
||||
|
||||
// Move all the stores to the root.
|
||||
for i := int64(0); i < n; i++ {
|
||||
v := a[i].store
|
||||
if v == root {
|
||||
v.Aux = cv.Type // widen store type
|
||||
v.SetArg(0, ptr)
|
||||
v.SetArg(1, cv)
|
||||
v.SetArg(2, mem)
|
||||
} else {
|
||||
clobber(v)
|
||||
v.Type = types.Types[types.TBOOL] // erase memory type
|
||||
}
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
// Check that all the shift/trunc are of the same base value.
|
||||
shiftBase := getShiftBase(a)
|
||||
if shiftBase == nil {
|
||||
return false
|
||||
}
|
||||
for i := int64(0); i < n; i++ {
|
||||
if !isShiftBase(a[i].store, shiftBase) {
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
// Check for writes in little-endian or big-endian order.
|
||||
isLittleEndian := true
|
||||
shift0 := shift(a[0].store, shiftBase)
|
||||
for i := int64(1); i < n; i++ {
|
||||
if shift(a[i].store, shiftBase) != shift0+i*8 {
|
||||
isLittleEndian = false
|
||||
break
|
||||
}
|
||||
}
|
||||
isBigEndian := true
|
||||
for i := int64(1); i < n; i++ {
|
||||
if shift(a[i].store, shiftBase) != shift0-i*8 {
|
||||
isBigEndian = false
|
||||
break
|
||||
}
|
||||
}
|
||||
if !isLittleEndian && !isBigEndian {
|
||||
return false
|
||||
}
|
||||
|
||||
// Check to see if we need byte swap before storing.
|
||||
needSwap := isLittleEndian && root.Block.Func.Config.BigEndian ||
|
||||
isBigEndian && !root.Block.Func.Config.BigEndian
|
||||
if needSwap && (size != 1 || !root.Block.Func.Config.haveByteSwap(n)) {
|
||||
return false
|
||||
}
|
||||
|
||||
// This is the commit point.
|
||||
|
||||
// Modify root to do all the stores.
|
||||
sv := shiftBase
|
||||
if isLittleEndian && shift0 != 0 {
|
||||
sv = rightShift(root.Block, root.Pos, sv, shift0)
|
||||
}
|
||||
if isBigEndian && shift0-(n-1)*8 != 0 {
|
||||
sv = rightShift(root.Block, root.Pos, sv, shift0-(n-1)*8)
|
||||
}
|
||||
if sv.Type.Size() > size*n {
|
||||
sv = truncate(root.Block, root.Pos, sv, sv.Type.Size(), size*n)
|
||||
}
|
||||
if needSwap {
|
||||
sv = byteSwap(root.Block, root.Pos, sv)
|
||||
}
|
||||
|
||||
// Move all the stores to the root.
|
||||
for i := int64(0); i < n; i++ {
|
||||
v := a[i].store
|
||||
if v == root {
|
||||
v.Aux = sv.Type // widen store type
|
||||
v.SetArg(0, ptr)
|
||||
v.SetArg(1, sv)
|
||||
v.SetArg(2, mem)
|
||||
} else {
|
||||
clobber(v)
|
||||
v.Type = types.Types[types.TBOOL] // erase memory type
|
||||
}
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
func sizeType(size int64) *types.Type {
|
||||
switch size {
|
||||
case 8:
|
||||
return types.Types[types.TUINT64]
|
||||
case 4:
|
||||
return types.Types[types.TUINT32]
|
||||
case 2:
|
||||
return types.Types[types.TUINT16]
|
||||
default:
|
||||
base.Fatalf("bad size %d\n", size)
|
||||
return nil
|
||||
}
|
||||
}
|
||||
|
||||
func truncate(b *Block, pos src.XPos, v *Value, from, to int64) *Value {
|
||||
switch from*10 + to {
|
||||
case 82:
|
||||
return b.NewValue1(pos, OpTrunc64to16, types.Types[types.TUINT16], v)
|
||||
case 84:
|
||||
return b.NewValue1(pos, OpTrunc64to32, types.Types[types.TUINT32], v)
|
||||
case 42:
|
||||
return b.NewValue1(pos, OpTrunc32to16, types.Types[types.TUINT16], v)
|
||||
default:
|
||||
base.Fatalf("bad sizes %d %d\n", from, to)
|
||||
return nil
|
||||
}
|
||||
}
|
||||
func zeroExtend(b *Block, pos src.XPos, v *Value, from, to int64) *Value {
|
||||
switch from*10 + to {
|
||||
case 24:
|
||||
return b.NewValue1(pos, OpZeroExt16to32, types.Types[types.TUINT32], v)
|
||||
case 28:
|
||||
return b.NewValue1(pos, OpZeroExt16to64, types.Types[types.TUINT64], v)
|
||||
case 48:
|
||||
return b.NewValue1(pos, OpZeroExt32to64, types.Types[types.TUINT64], v)
|
||||
default:
|
||||
base.Fatalf("bad sizes %d %d\n", from, to)
|
||||
return nil
|
||||
}
|
||||
}
|
||||
|
||||
func leftShift(b *Block, pos src.XPos, v *Value, shift int64) *Value {
|
||||
s := b.Func.ConstInt64(types.Types[types.TUINT64], shift)
|
||||
size := v.Type.Size()
|
||||
switch size {
|
||||
case 8:
|
||||
return b.NewValue2(pos, OpLsh64x64, v.Type, v, s)
|
||||
case 4:
|
||||
return b.NewValue2(pos, OpLsh32x64, v.Type, v, s)
|
||||
case 2:
|
||||
return b.NewValue2(pos, OpLsh16x64, v.Type, v, s)
|
||||
default:
|
||||
base.Fatalf("bad size %d\n", size)
|
||||
return nil
|
||||
}
|
||||
}
|
||||
func rightShift(b *Block, pos src.XPos, v *Value, shift int64) *Value {
|
||||
s := b.Func.ConstInt64(types.Types[types.TUINT64], shift)
|
||||
size := v.Type.Size()
|
||||
switch size {
|
||||
case 8:
|
||||
return b.NewValue2(pos, OpRsh64Ux64, v.Type, v, s)
|
||||
case 4:
|
||||
return b.NewValue2(pos, OpRsh32Ux64, v.Type, v, s)
|
||||
case 2:
|
||||
return b.NewValue2(pos, OpRsh16Ux64, v.Type, v, s)
|
||||
default:
|
||||
base.Fatalf("bad size %d\n", size)
|
||||
return nil
|
||||
}
|
||||
}
|
||||
func byteSwap(b *Block, pos src.XPos, v *Value) *Value {
|
||||
switch v.Type.Size() {
|
||||
case 8:
|
||||
return b.NewValue1(pos, OpBswap64, v.Type, v)
|
||||
case 4:
|
||||
return b.NewValue1(pos, OpBswap32, v.Type, v)
|
||||
case 2:
|
||||
return b.NewValue1(pos, OpBswap16, v.Type, v)
|
||||
|
||||
default:
|
||||
v.Fatalf("bad size %d\n", v.Type.Size())
|
||||
return nil
|
||||
}
|
||||
}
|
||||
|
|
@ -29470,10 +29470,8 @@ var opcodeTable = [...]opInfo{
|
|||
},
|
||||
{
|
||||
name: "MOVDBRload",
|
||||
auxType: auxSymOff,
|
||||
argLen: 2,
|
||||
faultOnNilArg0: true,
|
||||
symEffect: SymRead,
|
||||
asm: ppc64.AMOVDBR,
|
||||
reg: regInfo{
|
||||
inputs: []inputInfo{
|
||||
|
|
@ -29486,10 +29484,8 @@ var opcodeTable = [...]opInfo{
|
|||
},
|
||||
{
|
||||
name: "MOVWBRload",
|
||||
auxType: auxSymOff,
|
||||
argLen: 2,
|
||||
faultOnNilArg0: true,
|
||||
symEffect: SymRead,
|
||||
asm: ppc64.AMOVWBR,
|
||||
reg: regInfo{
|
||||
inputs: []inputInfo{
|
||||
|
|
@ -29502,10 +29498,8 @@ var opcodeTable = [...]opInfo{
|
|||
},
|
||||
{
|
||||
name: "MOVHBRload",
|
||||
auxType: auxSymOff,
|
||||
argLen: 2,
|
||||
faultOnNilArg0: true,
|
||||
symEffect: SymRead,
|
||||
asm: ppc64.AMOVHBR,
|
||||
reg: regInfo{
|
||||
inputs: []inputInfo{
|
||||
|
|
@ -29684,10 +29678,8 @@ var opcodeTable = [...]opInfo{
|
|||
},
|
||||
{
|
||||
name: "MOVDBRstore",
|
||||
auxType: auxSym,
|
||||
argLen: 3,
|
||||
faultOnNilArg0: true,
|
||||
symEffect: SymWrite,
|
||||
asm: ppc64.AMOVDBR,
|
||||
reg: regInfo{
|
||||
inputs: []inputInfo{
|
||||
|
|
@ -29698,10 +29690,8 @@ var opcodeTable = [...]opInfo{
|
|||
},
|
||||
{
|
||||
name: "MOVWBRstore",
|
||||
auxType: auxSym,
|
||||
argLen: 3,
|
||||
faultOnNilArg0: true,
|
||||
symEffect: SymWrite,
|
||||
asm: ppc64.AMOVWBR,
|
||||
reg: regInfo{
|
||||
inputs: []inputInfo{
|
||||
|
|
@ -29712,10 +29702,8 @@ var opcodeTable = [...]opInfo{
|
|||
},
|
||||
{
|
||||
name: "MOVHBRstore",
|
||||
auxType: auxSym,
|
||||
argLen: 3,
|
||||
faultOnNilArg0: true,
|
||||
symEffect: SymWrite,
|
||||
asm: ppc64.AMOVHBR,
|
||||
reg: regInfo{
|
||||
inputs: []inputInfo{
|
||||
|
|
|
|||
|
|
@ -278,6 +278,8 @@ func rewriteValue386(v *Value) bool {
|
|||
case OpAvg32u:
|
||||
v.Op = Op386AVGLU
|
||||
return true
|
||||
case OpBswap16:
|
||||
return rewriteValue386_OpBswap16(v)
|
||||
case OpBswap32:
|
||||
v.Op = Op386BSWAPL
|
||||
return true
|
||||
|
|
@ -3715,266 +3717,6 @@ func rewriteValue386_Op386MOVBstore(v *Value) bool {
|
|||
v.AddArg3(base, val, mem)
|
||||
return true
|
||||
}
|
||||
// match: (MOVBstore [i] {s} p (SHRWconst [8] w) x:(MOVBstore [i-1] {s} p w mem))
|
||||
// cond: x.Uses == 1 && clobber(x)
|
||||
// result: (MOVWstore [i-1] {s} p w mem)
|
||||
for {
|
||||
i := auxIntToInt32(v.AuxInt)
|
||||
s := auxToSym(v.Aux)
|
||||
p := v_0
|
||||
if v_1.Op != Op386SHRWconst || auxIntToInt16(v_1.AuxInt) != 8 {
|
||||
break
|
||||
}
|
||||
w := v_1.Args[0]
|
||||
x := v_2
|
||||
if x.Op != Op386MOVBstore || auxIntToInt32(x.AuxInt) != i-1 || auxToSym(x.Aux) != s {
|
||||
break
|
||||
}
|
||||
mem := x.Args[2]
|
||||
if p != x.Args[0] || w != x.Args[1] || !(x.Uses == 1 && clobber(x)) {
|
||||
break
|
||||
}
|
||||
v.reset(Op386MOVWstore)
|
||||
v.AuxInt = int32ToAuxInt(i - 1)
|
||||
v.Aux = symToAux(s)
|
||||
v.AddArg3(p, w, mem)
|
||||
return true
|
||||
}
|
||||
// match: (MOVBstore [i] {s} p (SHRLconst [8] w) x:(MOVBstore [i-1] {s} p w mem))
|
||||
// cond: x.Uses == 1 && clobber(x)
|
||||
// result: (MOVWstore [i-1] {s} p w mem)
|
||||
for {
|
||||
i := auxIntToInt32(v.AuxInt)
|
||||
s := auxToSym(v.Aux)
|
||||
p := v_0
|
||||
if v_1.Op != Op386SHRLconst || auxIntToInt32(v_1.AuxInt) != 8 {
|
||||
break
|
||||
}
|
||||
w := v_1.Args[0]
|
||||
x := v_2
|
||||
if x.Op != Op386MOVBstore || auxIntToInt32(x.AuxInt) != i-1 || auxToSym(x.Aux) != s {
|
||||
break
|
||||
}
|
||||
mem := x.Args[2]
|
||||
if p != x.Args[0] || w != x.Args[1] || !(x.Uses == 1 && clobber(x)) {
|
||||
break
|
||||
}
|
||||
v.reset(Op386MOVWstore)
|
||||
v.AuxInt = int32ToAuxInt(i - 1)
|
||||
v.Aux = symToAux(s)
|
||||
v.AddArg3(p, w, mem)
|
||||
return true
|
||||
}
|
||||
// match: (MOVBstore [i] {s} p w x:(MOVBstore {s} [i+1] p (SHRWconst [8] w) mem))
|
||||
// cond: x.Uses == 1 && clobber(x)
|
||||
// result: (MOVWstore [i] {s} p w mem)
|
||||
for {
|
||||
i := auxIntToInt32(v.AuxInt)
|
||||
s := auxToSym(v.Aux)
|
||||
p := v_0
|
||||
w := v_1
|
||||
x := v_2
|
||||
if x.Op != Op386MOVBstore || auxIntToInt32(x.AuxInt) != i+1 || auxToSym(x.Aux) != s {
|
||||
break
|
||||
}
|
||||
mem := x.Args[2]
|
||||
if p != x.Args[0] {
|
||||
break
|
||||
}
|
||||
x_1 := x.Args[1]
|
||||
if x_1.Op != Op386SHRWconst || auxIntToInt16(x_1.AuxInt) != 8 || w != x_1.Args[0] || !(x.Uses == 1 && clobber(x)) {
|
||||
break
|
||||
}
|
||||
v.reset(Op386MOVWstore)
|
||||
v.AuxInt = int32ToAuxInt(i)
|
||||
v.Aux = symToAux(s)
|
||||
v.AddArg3(p, w, mem)
|
||||
return true
|
||||
}
|
||||
// match: (MOVBstore [i] {s} p w x:(MOVBstore {s} [i+1] p (SHRLconst [8] w) mem))
|
||||
// cond: x.Uses == 1 && clobber(x)
|
||||
// result: (MOVWstore [i] {s} p w mem)
|
||||
for {
|
||||
i := auxIntToInt32(v.AuxInt)
|
||||
s := auxToSym(v.Aux)
|
||||
p := v_0
|
||||
w := v_1
|
||||
x := v_2
|
||||
if x.Op != Op386MOVBstore || auxIntToInt32(x.AuxInt) != i+1 || auxToSym(x.Aux) != s {
|
||||
break
|
||||
}
|
||||
mem := x.Args[2]
|
||||
if p != x.Args[0] {
|
||||
break
|
||||
}
|
||||
x_1 := x.Args[1]
|
||||
if x_1.Op != Op386SHRLconst || auxIntToInt32(x_1.AuxInt) != 8 || w != x_1.Args[0] || !(x.Uses == 1 && clobber(x)) {
|
||||
break
|
||||
}
|
||||
v.reset(Op386MOVWstore)
|
||||
v.AuxInt = int32ToAuxInt(i)
|
||||
v.Aux = symToAux(s)
|
||||
v.AddArg3(p, w, mem)
|
||||
return true
|
||||
}
|
||||
// match: (MOVBstore [i] {s} p (SHRLconst [j] w) x:(MOVBstore [i-1] {s} p w0:(SHRLconst [j-8] w) mem))
|
||||
// cond: x.Uses == 1 && clobber(x)
|
||||
// result: (MOVWstore [i-1] {s} p w0 mem)
|
||||
for {
|
||||
i := auxIntToInt32(v.AuxInt)
|
||||
s := auxToSym(v.Aux)
|
||||
p := v_0
|
||||
if v_1.Op != Op386SHRLconst {
|
||||
break
|
||||
}
|
||||
j := auxIntToInt32(v_1.AuxInt)
|
||||
w := v_1.Args[0]
|
||||
x := v_2
|
||||
if x.Op != Op386MOVBstore || auxIntToInt32(x.AuxInt) != i-1 || auxToSym(x.Aux) != s {
|
||||
break
|
||||
}
|
||||
mem := x.Args[2]
|
||||
if p != x.Args[0] {
|
||||
break
|
||||
}
|
||||
w0 := x.Args[1]
|
||||
if w0.Op != Op386SHRLconst || auxIntToInt32(w0.AuxInt) != j-8 || w != w0.Args[0] || !(x.Uses == 1 && clobber(x)) {
|
||||
break
|
||||
}
|
||||
v.reset(Op386MOVWstore)
|
||||
v.AuxInt = int32ToAuxInt(i - 1)
|
||||
v.Aux = symToAux(s)
|
||||
v.AddArg3(p, w0, mem)
|
||||
return true
|
||||
}
|
||||
// match: (MOVBstore [i] {s} p1 (SHRWconst [8] w) x:(MOVBstore [i] {s} p0 w mem))
|
||||
// cond: x.Uses == 1 && sequentialAddresses(p0, p1, 1) && clobber(x)
|
||||
// result: (MOVWstore [i] {s} p0 w mem)
|
||||
for {
|
||||
i := auxIntToInt32(v.AuxInt)
|
||||
s := auxToSym(v.Aux)
|
||||
p1 := v_0
|
||||
if v_1.Op != Op386SHRWconst || auxIntToInt16(v_1.AuxInt) != 8 {
|
||||
break
|
||||
}
|
||||
w := v_1.Args[0]
|
||||
x := v_2
|
||||
if x.Op != Op386MOVBstore || auxIntToInt32(x.AuxInt) != i || auxToSym(x.Aux) != s {
|
||||
break
|
||||
}
|
||||
mem := x.Args[2]
|
||||
p0 := x.Args[0]
|
||||
if w != x.Args[1] || !(x.Uses == 1 && sequentialAddresses(p0, p1, 1) && clobber(x)) {
|
||||
break
|
||||
}
|
||||
v.reset(Op386MOVWstore)
|
||||
v.AuxInt = int32ToAuxInt(i)
|
||||
v.Aux = symToAux(s)
|
||||
v.AddArg3(p0, w, mem)
|
||||
return true
|
||||
}
|
||||
// match: (MOVBstore [i] {s} p1 (SHRLconst [8] w) x:(MOVBstore [i] {s} p0 w mem))
|
||||
// cond: x.Uses == 1 && sequentialAddresses(p0, p1, 1) && clobber(x)
|
||||
// result: (MOVWstore [i] {s} p0 w mem)
|
||||
for {
|
||||
i := auxIntToInt32(v.AuxInt)
|
||||
s := auxToSym(v.Aux)
|
||||
p1 := v_0
|
||||
if v_1.Op != Op386SHRLconst || auxIntToInt32(v_1.AuxInt) != 8 {
|
||||
break
|
||||
}
|
||||
w := v_1.Args[0]
|
||||
x := v_2
|
||||
if x.Op != Op386MOVBstore || auxIntToInt32(x.AuxInt) != i || auxToSym(x.Aux) != s {
|
||||
break
|
||||
}
|
||||
mem := x.Args[2]
|
||||
p0 := x.Args[0]
|
||||
if w != x.Args[1] || !(x.Uses == 1 && sequentialAddresses(p0, p1, 1) && clobber(x)) {
|
||||
break
|
||||
}
|
||||
v.reset(Op386MOVWstore)
|
||||
v.AuxInt = int32ToAuxInt(i)
|
||||
v.Aux = symToAux(s)
|
||||
v.AddArg3(p0, w, mem)
|
||||
return true
|
||||
}
|
||||
// match: (MOVBstore [i] {s} p0 w x:(MOVBstore {s} [i] p1 (SHRWconst [8] w) mem))
|
||||
// cond: x.Uses == 1 && sequentialAddresses(p0, p1, 1) && clobber(x)
|
||||
// result: (MOVWstore [i] {s} p0 w mem)
|
||||
for {
|
||||
i := auxIntToInt32(v.AuxInt)
|
||||
s := auxToSym(v.Aux)
|
||||
p0 := v_0
|
||||
w := v_1
|
||||
x := v_2
|
||||
if x.Op != Op386MOVBstore || auxIntToInt32(x.AuxInt) != i || auxToSym(x.Aux) != s {
|
||||
break
|
||||
}
|
||||
mem := x.Args[2]
|
||||
p1 := x.Args[0]
|
||||
x_1 := x.Args[1]
|
||||
if x_1.Op != Op386SHRWconst || auxIntToInt16(x_1.AuxInt) != 8 || w != x_1.Args[0] || !(x.Uses == 1 && sequentialAddresses(p0, p1, 1) && clobber(x)) {
|
||||
break
|
||||
}
|
||||
v.reset(Op386MOVWstore)
|
||||
v.AuxInt = int32ToAuxInt(i)
|
||||
v.Aux = symToAux(s)
|
||||
v.AddArg3(p0, w, mem)
|
||||
return true
|
||||
}
|
||||
// match: (MOVBstore [i] {s} p0 w x:(MOVBstore {s} [i] p1 (SHRLconst [8] w) mem))
|
||||
// cond: x.Uses == 1 && sequentialAddresses(p0, p1, 1) && clobber(x)
|
||||
// result: (MOVWstore [i] {s} p0 w mem)
|
||||
for {
|
||||
i := auxIntToInt32(v.AuxInt)
|
||||
s := auxToSym(v.Aux)
|
||||
p0 := v_0
|
||||
w := v_1
|
||||
x := v_2
|
||||
if x.Op != Op386MOVBstore || auxIntToInt32(x.AuxInt) != i || auxToSym(x.Aux) != s {
|
||||
break
|
||||
}
|
||||
mem := x.Args[2]
|
||||
p1 := x.Args[0]
|
||||
x_1 := x.Args[1]
|
||||
if x_1.Op != Op386SHRLconst || auxIntToInt32(x_1.AuxInt) != 8 || w != x_1.Args[0] || !(x.Uses == 1 && sequentialAddresses(p0, p1, 1) && clobber(x)) {
|
||||
break
|
||||
}
|
||||
v.reset(Op386MOVWstore)
|
||||
v.AuxInt = int32ToAuxInt(i)
|
||||
v.Aux = symToAux(s)
|
||||
v.AddArg3(p0, w, mem)
|
||||
return true
|
||||
}
|
||||
// match: (MOVBstore [i] {s} p1 (SHRLconst [j] w) x:(MOVBstore [i] {s} p0 w0:(SHRLconst [j-8] w) mem))
|
||||
// cond: x.Uses == 1 && sequentialAddresses(p0, p1, 1) && clobber(x)
|
||||
// result: (MOVWstore [i] {s} p0 w0 mem)
|
||||
for {
|
||||
i := auxIntToInt32(v.AuxInt)
|
||||
s := auxToSym(v.Aux)
|
||||
p1 := v_0
|
||||
if v_1.Op != Op386SHRLconst {
|
||||
break
|
||||
}
|
||||
j := auxIntToInt32(v_1.AuxInt)
|
||||
w := v_1.Args[0]
|
||||
x := v_2
|
||||
if x.Op != Op386MOVBstore || auxIntToInt32(x.AuxInt) != i || auxToSym(x.Aux) != s {
|
||||
break
|
||||
}
|
||||
mem := x.Args[2]
|
||||
p0 := x.Args[0]
|
||||
w0 := x.Args[1]
|
||||
if w0.Op != Op386SHRLconst || auxIntToInt32(w0.AuxInt) != j-8 || w != w0.Args[0] || !(x.Uses == 1 && sequentialAddresses(p0, p1, 1) && clobber(x)) {
|
||||
break
|
||||
}
|
||||
v.reset(Op386MOVWstore)
|
||||
v.AuxInt = int32ToAuxInt(i)
|
||||
v.Aux = symToAux(s)
|
||||
v.AddArg3(p0, w0, mem)
|
||||
return true
|
||||
}
|
||||
return false
|
||||
}
|
||||
func rewriteValue386_Op386MOVBstoreconst(v *Value) bool {
|
||||
|
|
@ -4025,108 +3767,6 @@ func rewriteValue386_Op386MOVBstoreconst(v *Value) bool {
|
|||
v.AddArg2(ptr, mem)
|
||||
return true
|
||||
}
|
||||
// match: (MOVBstoreconst [c] {s} p x:(MOVBstoreconst [a] {s} p mem))
|
||||
// cond: x.Uses == 1 && a.Off() + 1 == c.Off() && clobber(x)
|
||||
// result: (MOVWstoreconst [makeValAndOff(a.Val()&0xff | c.Val()<<8, a.Off())] {s} p mem)
|
||||
for {
|
||||
c := auxIntToValAndOff(v.AuxInt)
|
||||
s := auxToSym(v.Aux)
|
||||
p := v_0
|
||||
x := v_1
|
||||
if x.Op != Op386MOVBstoreconst {
|
||||
break
|
||||
}
|
||||
a := auxIntToValAndOff(x.AuxInt)
|
||||
if auxToSym(x.Aux) != s {
|
||||
break
|
||||
}
|
||||
mem := x.Args[1]
|
||||
if p != x.Args[0] || !(x.Uses == 1 && a.Off()+1 == c.Off() && clobber(x)) {
|
||||
break
|
||||
}
|
||||
v.reset(Op386MOVWstoreconst)
|
||||
v.AuxInt = valAndOffToAuxInt(makeValAndOff(a.Val()&0xff|c.Val()<<8, a.Off()))
|
||||
v.Aux = symToAux(s)
|
||||
v.AddArg2(p, mem)
|
||||
return true
|
||||
}
|
||||
// match: (MOVBstoreconst [a] {s} p x:(MOVBstoreconst [c] {s} p mem))
|
||||
// cond: x.Uses == 1 && a.Off() + 1 == c.Off() && clobber(x)
|
||||
// result: (MOVWstoreconst [makeValAndOff(a.Val()&0xff | c.Val()<<8, a.Off())] {s} p mem)
|
||||
for {
|
||||
a := auxIntToValAndOff(v.AuxInt)
|
||||
s := auxToSym(v.Aux)
|
||||
p := v_0
|
||||
x := v_1
|
||||
if x.Op != Op386MOVBstoreconst {
|
||||
break
|
||||
}
|
||||
c := auxIntToValAndOff(x.AuxInt)
|
||||
if auxToSym(x.Aux) != s {
|
||||
break
|
||||
}
|
||||
mem := x.Args[1]
|
||||
if p != x.Args[0] || !(x.Uses == 1 && a.Off()+1 == c.Off() && clobber(x)) {
|
||||
break
|
||||
}
|
||||
v.reset(Op386MOVWstoreconst)
|
||||
v.AuxInt = valAndOffToAuxInt(makeValAndOff(a.Val()&0xff|c.Val()<<8, a.Off()))
|
||||
v.Aux = symToAux(s)
|
||||
v.AddArg2(p, mem)
|
||||
return true
|
||||
}
|
||||
// match: (MOVBstoreconst [c] {s} p1 x:(MOVBstoreconst [a] {s} p0 mem))
|
||||
// cond: x.Uses == 1 && a.Off() == c.Off() && sequentialAddresses(p0, p1, 1) && clobber(x)
|
||||
// result: (MOVWstoreconst [makeValAndOff(a.Val()&0xff | c.Val()<<8, a.Off())] {s} p0 mem)
|
||||
for {
|
||||
c := auxIntToValAndOff(v.AuxInt)
|
||||
s := auxToSym(v.Aux)
|
||||
p1 := v_0
|
||||
x := v_1
|
||||
if x.Op != Op386MOVBstoreconst {
|
||||
break
|
||||
}
|
||||
a := auxIntToValAndOff(x.AuxInt)
|
||||
if auxToSym(x.Aux) != s {
|
||||
break
|
||||
}
|
||||
mem := x.Args[1]
|
||||
p0 := x.Args[0]
|
||||
if !(x.Uses == 1 && a.Off() == c.Off() && sequentialAddresses(p0, p1, 1) && clobber(x)) {
|
||||
break
|
||||
}
|
||||
v.reset(Op386MOVWstoreconst)
|
||||
v.AuxInt = valAndOffToAuxInt(makeValAndOff(a.Val()&0xff|c.Val()<<8, a.Off()))
|
||||
v.Aux = symToAux(s)
|
||||
v.AddArg2(p0, mem)
|
||||
return true
|
||||
}
|
||||
// match: (MOVBstoreconst [a] {s} p0 x:(MOVBstoreconst [c] {s} p1 mem))
|
||||
// cond: x.Uses == 1 && a.Off() == c.Off() && sequentialAddresses(p0, p1, 1) && clobber(x)
|
||||
// result: (MOVWstoreconst [makeValAndOff(a.Val()&0xff | c.Val()<<8, a.Off())] {s} p0 mem)
|
||||
for {
|
||||
a := auxIntToValAndOff(v.AuxInt)
|
||||
s := auxToSym(v.Aux)
|
||||
p0 := v_0
|
||||
x := v_1
|
||||
if x.Op != Op386MOVBstoreconst {
|
||||
break
|
||||
}
|
||||
c := auxIntToValAndOff(x.AuxInt)
|
||||
if auxToSym(x.Aux) != s {
|
||||
break
|
||||
}
|
||||
mem := x.Args[1]
|
||||
p1 := x.Args[0]
|
||||
if !(x.Uses == 1 && a.Off() == c.Off() && sequentialAddresses(p0, p1, 1) && clobber(x)) {
|
||||
break
|
||||
}
|
||||
v.reset(Op386MOVWstoreconst)
|
||||
v.AuxInt = valAndOffToAuxInt(makeValAndOff(a.Val()&0xff|c.Val()<<8, a.Off()))
|
||||
v.Aux = symToAux(s)
|
||||
v.AddArg2(p0, mem)
|
||||
return true
|
||||
}
|
||||
return false
|
||||
}
|
||||
func rewriteValue386_Op386MOVLload(v *Value) bool {
|
||||
|
|
@ -5258,115 +4898,6 @@ func rewriteValue386_Op386MOVWstore(v *Value) bool {
|
|||
v.AddArg3(base, val, mem)
|
||||
return true
|
||||
}
|
||||
// match: (MOVWstore [i] {s} p (SHRLconst [16] w) x:(MOVWstore [i-2] {s} p w mem))
|
||||
// cond: x.Uses == 1 && clobber(x)
|
||||
// result: (MOVLstore [i-2] {s} p w mem)
|
||||
for {
|
||||
i := auxIntToInt32(v.AuxInt)
|
||||
s := auxToSym(v.Aux)
|
||||
p := v_0
|
||||
if v_1.Op != Op386SHRLconst || auxIntToInt32(v_1.AuxInt) != 16 {
|
||||
break
|
||||
}
|
||||
w := v_1.Args[0]
|
||||
x := v_2
|
||||
if x.Op != Op386MOVWstore || auxIntToInt32(x.AuxInt) != i-2 || auxToSym(x.Aux) != s {
|
||||
break
|
||||
}
|
||||
mem := x.Args[2]
|
||||
if p != x.Args[0] || w != x.Args[1] || !(x.Uses == 1 && clobber(x)) {
|
||||
break
|
||||
}
|
||||
v.reset(Op386MOVLstore)
|
||||
v.AuxInt = int32ToAuxInt(i - 2)
|
||||
v.Aux = symToAux(s)
|
||||
v.AddArg3(p, w, mem)
|
||||
return true
|
||||
}
|
||||
// match: (MOVWstore [i] {s} p (SHRLconst [j] w) x:(MOVWstore [i-2] {s} p w0:(SHRLconst [j-16] w) mem))
|
||||
// cond: x.Uses == 1 && clobber(x)
|
||||
// result: (MOVLstore [i-2] {s} p w0 mem)
|
||||
for {
|
||||
i := auxIntToInt32(v.AuxInt)
|
||||
s := auxToSym(v.Aux)
|
||||
p := v_0
|
||||
if v_1.Op != Op386SHRLconst {
|
||||
break
|
||||
}
|
||||
j := auxIntToInt32(v_1.AuxInt)
|
||||
w := v_1.Args[0]
|
||||
x := v_2
|
||||
if x.Op != Op386MOVWstore || auxIntToInt32(x.AuxInt) != i-2 || auxToSym(x.Aux) != s {
|
||||
break
|
||||
}
|
||||
mem := x.Args[2]
|
||||
if p != x.Args[0] {
|
||||
break
|
||||
}
|
||||
w0 := x.Args[1]
|
||||
if w0.Op != Op386SHRLconst || auxIntToInt32(w0.AuxInt) != j-16 || w != w0.Args[0] || !(x.Uses == 1 && clobber(x)) {
|
||||
break
|
||||
}
|
||||
v.reset(Op386MOVLstore)
|
||||
v.AuxInt = int32ToAuxInt(i - 2)
|
||||
v.Aux = symToAux(s)
|
||||
v.AddArg3(p, w0, mem)
|
||||
return true
|
||||
}
|
||||
// match: (MOVWstore [i] {s} p1 (SHRLconst [16] w) x:(MOVWstore [i] {s} p0 w mem))
|
||||
// cond: x.Uses == 1 && sequentialAddresses(p0, p1, 2) && clobber(x)
|
||||
// result: (MOVLstore [i] {s} p0 w mem)
|
||||
for {
|
||||
i := auxIntToInt32(v.AuxInt)
|
||||
s := auxToSym(v.Aux)
|
||||
p1 := v_0
|
||||
if v_1.Op != Op386SHRLconst || auxIntToInt32(v_1.AuxInt) != 16 {
|
||||
break
|
||||
}
|
||||
w := v_1.Args[0]
|
||||
x := v_2
|
||||
if x.Op != Op386MOVWstore || auxIntToInt32(x.AuxInt) != i || auxToSym(x.Aux) != s {
|
||||
break
|
||||
}
|
||||
mem := x.Args[2]
|
||||
p0 := x.Args[0]
|
||||
if w != x.Args[1] || !(x.Uses == 1 && sequentialAddresses(p0, p1, 2) && clobber(x)) {
|
||||
break
|
||||
}
|
||||
v.reset(Op386MOVLstore)
|
||||
v.AuxInt = int32ToAuxInt(i)
|
||||
v.Aux = symToAux(s)
|
||||
v.AddArg3(p0, w, mem)
|
||||
return true
|
||||
}
|
||||
// match: (MOVWstore [i] {s} p1 (SHRLconst [j] w) x:(MOVWstore [i] {s} p0 w0:(SHRLconst [j-16] w) mem))
|
||||
// cond: x.Uses == 1 && sequentialAddresses(p0, p1, 2) && clobber(x)
|
||||
// result: (MOVLstore [i] {s} p0 w0 mem)
|
||||
for {
|
||||
i := auxIntToInt32(v.AuxInt)
|
||||
s := auxToSym(v.Aux)
|
||||
p1 := v_0
|
||||
if v_1.Op != Op386SHRLconst {
|
||||
break
|
||||
}
|
||||
j := auxIntToInt32(v_1.AuxInt)
|
||||
w := v_1.Args[0]
|
||||
x := v_2
|
||||
if x.Op != Op386MOVWstore || auxIntToInt32(x.AuxInt) != i || auxToSym(x.Aux) != s {
|
||||
break
|
||||
}
|
||||
mem := x.Args[2]
|
||||
p0 := x.Args[0]
|
||||
w0 := x.Args[1]
|
||||
if w0.Op != Op386SHRLconst || auxIntToInt32(w0.AuxInt) != j-16 || w != w0.Args[0] || !(x.Uses == 1 && sequentialAddresses(p0, p1, 2) && clobber(x)) {
|
||||
break
|
||||
}
|
||||
v.reset(Op386MOVLstore)
|
||||
v.AuxInt = int32ToAuxInt(i)
|
||||
v.Aux = symToAux(s)
|
||||
v.AddArg3(p0, w0, mem)
|
||||
return true
|
||||
}
|
||||
return false
|
||||
}
|
||||
func rewriteValue386_Op386MOVWstoreconst(v *Value) bool {
|
||||
|
|
@ -5417,108 +4948,6 @@ func rewriteValue386_Op386MOVWstoreconst(v *Value) bool {
|
|||
v.AddArg2(ptr, mem)
|
||||
return true
|
||||
}
|
||||
// match: (MOVWstoreconst [c] {s} p x:(MOVWstoreconst [a] {s} p mem))
|
||||
// cond: x.Uses == 1 && a.Off() + 2 == c.Off() && clobber(x)
|
||||
// result: (MOVLstoreconst [makeValAndOff(a.Val()&0xffff | c.Val()<<16, a.Off())] {s} p mem)
|
||||
for {
|
||||
c := auxIntToValAndOff(v.AuxInt)
|
||||
s := auxToSym(v.Aux)
|
||||
p := v_0
|
||||
x := v_1
|
||||
if x.Op != Op386MOVWstoreconst {
|
||||
break
|
||||
}
|
||||
a := auxIntToValAndOff(x.AuxInt)
|
||||
if auxToSym(x.Aux) != s {
|
||||
break
|
||||
}
|
||||
mem := x.Args[1]
|
||||
if p != x.Args[0] || !(x.Uses == 1 && a.Off()+2 == c.Off() && clobber(x)) {
|
||||
break
|
||||
}
|
||||
v.reset(Op386MOVLstoreconst)
|
||||
v.AuxInt = valAndOffToAuxInt(makeValAndOff(a.Val()&0xffff|c.Val()<<16, a.Off()))
|
||||
v.Aux = symToAux(s)
|
||||
v.AddArg2(p, mem)
|
||||
return true
|
||||
}
|
||||
// match: (MOVWstoreconst [a] {s} p x:(MOVWstoreconst [c] {s} p mem))
|
||||
// cond: x.Uses == 1 && ValAndOff(a).Off() + 2 == ValAndOff(c).Off() && clobber(x)
|
||||
// result: (MOVLstoreconst [makeValAndOff(a.Val()&0xffff | c.Val()<<16, a.Off())] {s} p mem)
|
||||
for {
|
||||
a := auxIntToValAndOff(v.AuxInt)
|
||||
s := auxToSym(v.Aux)
|
||||
p := v_0
|
||||
x := v_1
|
||||
if x.Op != Op386MOVWstoreconst {
|
||||
break
|
||||
}
|
||||
c := auxIntToValAndOff(x.AuxInt)
|
||||
if auxToSym(x.Aux) != s {
|
||||
break
|
||||
}
|
||||
mem := x.Args[1]
|
||||
if p != x.Args[0] || !(x.Uses == 1 && ValAndOff(a).Off()+2 == ValAndOff(c).Off() && clobber(x)) {
|
||||
break
|
||||
}
|
||||
v.reset(Op386MOVLstoreconst)
|
||||
v.AuxInt = valAndOffToAuxInt(makeValAndOff(a.Val()&0xffff|c.Val()<<16, a.Off()))
|
||||
v.Aux = symToAux(s)
|
||||
v.AddArg2(p, mem)
|
||||
return true
|
||||
}
|
||||
// match: (MOVWstoreconst [c] {s} p1 x:(MOVWstoreconst [a] {s} p0 mem))
|
||||
// cond: x.Uses == 1 && a.Off() == c.Off() && sequentialAddresses(p0, p1, 2) && clobber(x)
|
||||
// result: (MOVLstoreconst [makeValAndOff(a.Val()&0xffff | c.Val()<<16, a.Off())] {s} p0 mem)
|
||||
for {
|
||||
c := auxIntToValAndOff(v.AuxInt)
|
||||
s := auxToSym(v.Aux)
|
||||
p1 := v_0
|
||||
x := v_1
|
||||
if x.Op != Op386MOVWstoreconst {
|
||||
break
|
||||
}
|
||||
a := auxIntToValAndOff(x.AuxInt)
|
||||
if auxToSym(x.Aux) != s {
|
||||
break
|
||||
}
|
||||
mem := x.Args[1]
|
||||
p0 := x.Args[0]
|
||||
if !(x.Uses == 1 && a.Off() == c.Off() && sequentialAddresses(p0, p1, 2) && clobber(x)) {
|
||||
break
|
||||
}
|
||||
v.reset(Op386MOVLstoreconst)
|
||||
v.AuxInt = valAndOffToAuxInt(makeValAndOff(a.Val()&0xffff|c.Val()<<16, a.Off()))
|
||||
v.Aux = symToAux(s)
|
||||
v.AddArg2(p0, mem)
|
||||
return true
|
||||
}
|
||||
// match: (MOVWstoreconst [a] {s} p0 x:(MOVWstoreconst [c] {s} p1 mem))
|
||||
// cond: x.Uses == 1 && a.Off() == c.Off() && sequentialAddresses(p0, p1, 2) && clobber(x)
|
||||
// result: (MOVLstoreconst [makeValAndOff(a.Val()&0xffff | c.Val()<<16, a.Off())] {s} p0 mem)
|
||||
for {
|
||||
a := auxIntToValAndOff(v.AuxInt)
|
||||
s := auxToSym(v.Aux)
|
||||
p0 := v_0
|
||||
x := v_1
|
||||
if x.Op != Op386MOVWstoreconst {
|
||||
break
|
||||
}
|
||||
c := auxIntToValAndOff(x.AuxInt)
|
||||
if auxToSym(x.Aux) != s {
|
||||
break
|
||||
}
|
||||
mem := x.Args[1]
|
||||
p1 := x.Args[0]
|
||||
if !(x.Uses == 1 && a.Off() == c.Off() && sequentialAddresses(p0, p1, 2) && clobber(x)) {
|
||||
break
|
||||
}
|
||||
v.reset(Op386MOVLstoreconst)
|
||||
v.AuxInt = valAndOffToAuxInt(makeValAndOff(a.Val()&0xffff|c.Val()<<16, a.Off()))
|
||||
v.Aux = symToAux(s)
|
||||
v.AddArg2(p0, mem)
|
||||
return true
|
||||
}
|
||||
return false
|
||||
}
|
||||
func rewriteValue386_Op386MULL(v *Value) bool {
|
||||
|
|
@ -6237,8 +5666,6 @@ func rewriteValue386_Op386NOTL(v *Value) bool {
|
|||
func rewriteValue386_Op386ORL(v *Value) bool {
|
||||
v_1 := v.Args[1]
|
||||
v_0 := v.Args[0]
|
||||
b := v.Block
|
||||
typ := &b.Func.Config.Types
|
||||
// match: (ORL x (MOVLconst [c]))
|
||||
// result: (ORLconst [c] x)
|
||||
for {
|
||||
|
|
@ -6290,203 +5717,6 @@ func rewriteValue386_Op386ORL(v *Value) bool {
|
|||
v.copyOf(x)
|
||||
return true
|
||||
}
|
||||
// match: (ORL x0:(MOVBload [i0] {s} p mem) s0:(SHLLconst [8] x1:(MOVBload [i1] {s} p mem)))
|
||||
// cond: i1 == i0+1 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && mergePoint(b,x0,x1) != nil && clobber(x0, x1, s0)
|
||||
// result: @mergePoint(b,x0,x1) (MOVWload [i0] {s} p mem)
|
||||
for {
|
||||
for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
|
||||
x0 := v_0
|
||||
if x0.Op != Op386MOVBload {
|
||||
continue
|
||||
}
|
||||
i0 := auxIntToInt32(x0.AuxInt)
|
||||
s := auxToSym(x0.Aux)
|
||||
mem := x0.Args[1]
|
||||
p := x0.Args[0]
|
||||
s0 := v_1
|
||||
if s0.Op != Op386SHLLconst || auxIntToInt32(s0.AuxInt) != 8 {
|
||||
continue
|
||||
}
|
||||
x1 := s0.Args[0]
|
||||
if x1.Op != Op386MOVBload {
|
||||
continue
|
||||
}
|
||||
i1 := auxIntToInt32(x1.AuxInt)
|
||||
if auxToSym(x1.Aux) != s {
|
||||
continue
|
||||
}
|
||||
_ = x1.Args[1]
|
||||
if p != x1.Args[0] || mem != x1.Args[1] || !(i1 == i0+1 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0, x1, s0)) {
|
||||
continue
|
||||
}
|
||||
b = mergePoint(b, x0, x1)
|
||||
v0 := b.NewValue0(x1.Pos, Op386MOVWload, typ.UInt16)
|
||||
v.copyOf(v0)
|
||||
v0.AuxInt = int32ToAuxInt(i0)
|
||||
v0.Aux = symToAux(s)
|
||||
v0.AddArg2(p, mem)
|
||||
return true
|
||||
}
|
||||
break
|
||||
}
|
||||
// match: (ORL x0:(MOVBload [i] {s} p0 mem) s0:(SHLLconst [8] x1:(MOVBload [i] {s} p1 mem)))
|
||||
// cond: x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && sequentialAddresses(p0, p1, 1) && mergePoint(b,x0,x1) != nil && clobber(x0, x1, s0)
|
||||
// result: @mergePoint(b,x0,x1) (MOVWload [i] {s} p0 mem)
|
||||
for {
|
||||
for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
|
||||
x0 := v_0
|
||||
if x0.Op != Op386MOVBload {
|
||||
continue
|
||||
}
|
||||
i := auxIntToInt32(x0.AuxInt)
|
||||
s := auxToSym(x0.Aux)
|
||||
mem := x0.Args[1]
|
||||
p0 := x0.Args[0]
|
||||
s0 := v_1
|
||||
if s0.Op != Op386SHLLconst || auxIntToInt32(s0.AuxInt) != 8 {
|
||||
continue
|
||||
}
|
||||
x1 := s0.Args[0]
|
||||
if x1.Op != Op386MOVBload || auxIntToInt32(x1.AuxInt) != i || auxToSym(x1.Aux) != s {
|
||||
continue
|
||||
}
|
||||
_ = x1.Args[1]
|
||||
p1 := x1.Args[0]
|
||||
if mem != x1.Args[1] || !(x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && sequentialAddresses(p0, p1, 1) && mergePoint(b, x0, x1) != nil && clobber(x0, x1, s0)) {
|
||||
continue
|
||||
}
|
||||
b = mergePoint(b, x0, x1)
|
||||
v0 := b.NewValue0(x1.Pos, Op386MOVWload, typ.UInt16)
|
||||
v.copyOf(v0)
|
||||
v0.AuxInt = int32ToAuxInt(i)
|
||||
v0.Aux = symToAux(s)
|
||||
v0.AddArg2(p0, mem)
|
||||
return true
|
||||
}
|
||||
break
|
||||
}
|
||||
// match: (ORL o0:(ORL x0:(MOVWload [i0] {s} p mem) s0:(SHLLconst [16] x1:(MOVBload [i2] {s} p mem))) s1:(SHLLconst [24] x2:(MOVBload [i3] {s} p mem)))
|
||||
// cond: i2 == i0+2 && i3 == i0+3 && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && o0.Uses == 1 && mergePoint(b,x0,x1,x2) != nil && clobber(x0, x1, x2, s0, s1, o0)
|
||||
// result: @mergePoint(b,x0,x1,x2) (MOVLload [i0] {s} p mem)
|
||||
for {
|
||||
for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
|
||||
o0 := v_0
|
||||
if o0.Op != Op386ORL {
|
||||
continue
|
||||
}
|
||||
_ = o0.Args[1]
|
||||
o0_0 := o0.Args[0]
|
||||
o0_1 := o0.Args[1]
|
||||
for _i1 := 0; _i1 <= 1; _i1, o0_0, o0_1 = _i1+1, o0_1, o0_0 {
|
||||
x0 := o0_0
|
||||
if x0.Op != Op386MOVWload {
|
||||
continue
|
||||
}
|
||||
i0 := auxIntToInt32(x0.AuxInt)
|
||||
s := auxToSym(x0.Aux)
|
||||
mem := x0.Args[1]
|
||||
p := x0.Args[0]
|
||||
s0 := o0_1
|
||||
if s0.Op != Op386SHLLconst || auxIntToInt32(s0.AuxInt) != 16 {
|
||||
continue
|
||||
}
|
||||
x1 := s0.Args[0]
|
||||
if x1.Op != Op386MOVBload {
|
||||
continue
|
||||
}
|
||||
i2 := auxIntToInt32(x1.AuxInt)
|
||||
if auxToSym(x1.Aux) != s {
|
||||
continue
|
||||
}
|
||||
_ = x1.Args[1]
|
||||
if p != x1.Args[0] || mem != x1.Args[1] {
|
||||
continue
|
||||
}
|
||||
s1 := v_1
|
||||
if s1.Op != Op386SHLLconst || auxIntToInt32(s1.AuxInt) != 24 {
|
||||
continue
|
||||
}
|
||||
x2 := s1.Args[0]
|
||||
if x2.Op != Op386MOVBload {
|
||||
continue
|
||||
}
|
||||
i3 := auxIntToInt32(x2.AuxInt)
|
||||
if auxToSym(x2.Aux) != s {
|
||||
continue
|
||||
}
|
||||
_ = x2.Args[1]
|
||||
if p != x2.Args[0] || mem != x2.Args[1] || !(i2 == i0+2 && i3 == i0+3 && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && o0.Uses == 1 && mergePoint(b, x0, x1, x2) != nil && clobber(x0, x1, x2, s0, s1, o0)) {
|
||||
continue
|
||||
}
|
||||
b = mergePoint(b, x0, x1, x2)
|
||||
v0 := b.NewValue0(x2.Pos, Op386MOVLload, typ.UInt32)
|
||||
v.copyOf(v0)
|
||||
v0.AuxInt = int32ToAuxInt(i0)
|
||||
v0.Aux = symToAux(s)
|
||||
v0.AddArg2(p, mem)
|
||||
return true
|
||||
}
|
||||
}
|
||||
break
|
||||
}
|
||||
// match: (ORL o0:(ORL x0:(MOVWload [i] {s} p0 mem) s0:(SHLLconst [16] x1:(MOVBload [i] {s} p1 mem))) s1:(SHLLconst [24] x2:(MOVBload [i] {s} p2 mem)))
|
||||
// cond: x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && o0.Uses == 1 && sequentialAddresses(p0, p1, 2) && sequentialAddresses(p1, p2, 1) && mergePoint(b,x0,x1,x2) != nil && clobber(x0, x1, x2, s0, s1, o0)
|
||||
// result: @mergePoint(b,x0,x1,x2) (MOVLload [i] {s} p0 mem)
|
||||
for {
|
||||
for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
|
||||
o0 := v_0
|
||||
if o0.Op != Op386ORL {
|
||||
continue
|
||||
}
|
||||
_ = o0.Args[1]
|
||||
o0_0 := o0.Args[0]
|
||||
o0_1 := o0.Args[1]
|
||||
for _i1 := 0; _i1 <= 1; _i1, o0_0, o0_1 = _i1+1, o0_1, o0_0 {
|
||||
x0 := o0_0
|
||||
if x0.Op != Op386MOVWload {
|
||||
continue
|
||||
}
|
||||
i := auxIntToInt32(x0.AuxInt)
|
||||
s := auxToSym(x0.Aux)
|
||||
mem := x0.Args[1]
|
||||
p0 := x0.Args[0]
|
||||
s0 := o0_1
|
||||
if s0.Op != Op386SHLLconst || auxIntToInt32(s0.AuxInt) != 16 {
|
||||
continue
|
||||
}
|
||||
x1 := s0.Args[0]
|
||||
if x1.Op != Op386MOVBload || auxIntToInt32(x1.AuxInt) != i || auxToSym(x1.Aux) != s {
|
||||
continue
|
||||
}
|
||||
_ = x1.Args[1]
|
||||
p1 := x1.Args[0]
|
||||
if mem != x1.Args[1] {
|
||||
continue
|
||||
}
|
||||
s1 := v_1
|
||||
if s1.Op != Op386SHLLconst || auxIntToInt32(s1.AuxInt) != 24 {
|
||||
continue
|
||||
}
|
||||
x2 := s1.Args[0]
|
||||
if x2.Op != Op386MOVBload || auxIntToInt32(x2.AuxInt) != i || auxToSym(x2.Aux) != s {
|
||||
continue
|
||||
}
|
||||
_ = x2.Args[1]
|
||||
p2 := x2.Args[0]
|
||||
if mem != x2.Args[1] || !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && o0.Uses == 1 && sequentialAddresses(p0, p1, 2) && sequentialAddresses(p1, p2, 1) && mergePoint(b, x0, x1, x2) != nil && clobber(x0, x1, x2, s0, s1, o0)) {
|
||||
continue
|
||||
}
|
||||
b = mergePoint(b, x0, x1, x2)
|
||||
v0 := b.NewValue0(x2.Pos, Op386MOVLload, typ.UInt32)
|
||||
v.copyOf(v0)
|
||||
v0.AuxInt = int32ToAuxInt(i)
|
||||
v0.Aux = symToAux(s)
|
||||
v0.AddArg2(p0, mem)
|
||||
return true
|
||||
}
|
||||
}
|
||||
break
|
||||
}
|
||||
return false
|
||||
}
|
||||
func rewriteValue386_Op386ORLconst(v *Value) bool {
|
||||
|
|
@ -8483,6 +7713,18 @@ func rewriteValue386_OpAddr(v *Value) bool {
|
|||
return true
|
||||
}
|
||||
}
|
||||
func rewriteValue386_OpBswap16(v *Value) bool {
|
||||
v_0 := v.Args[0]
|
||||
// match: (Bswap16 x)
|
||||
// result: (ROLWconst [8] x)
|
||||
for {
|
||||
x := v_0
|
||||
v.reset(Op386ROLWconst)
|
||||
v.AuxInt = int16ToAuxInt(8)
|
||||
v.AddArg(x)
|
||||
return true
|
||||
}
|
||||
}
|
||||
func rewriteValue386_OpConst16(v *Value) bool {
|
||||
// match: (Const16 [c])
|
||||
// result: (MOVLconst [int32(c)])
|
||||
|
|
|
|||
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
|
|
@ -84,6 +84,13 @@ func (v *Value) AuxInt8() int8 {
|
|||
return int8(v.AuxInt)
|
||||
}
|
||||
|
||||
func (v *Value) AuxUInt8() uint8 {
|
||||
if opcodeTable[v.Op].auxType != auxUInt8 {
|
||||
v.Fatalf("op %s doesn't have an uint8 aux field", v.Op)
|
||||
}
|
||||
return uint8(v.AuxInt)
|
||||
}
|
||||
|
||||
func (v *Value) AuxInt16() int16 {
|
||||
if opcodeTable[v.Op].auxType != auxInt16 {
|
||||
v.Fatalf("op %s doesn't have an int16 aux field", v.Op)
|
||||
|
|
@ -190,6 +197,8 @@ func (v *Value) auxString() string {
|
|||
return fmt.Sprintf(" [%d]", v.AuxInt32())
|
||||
case auxInt64, auxInt128:
|
||||
return fmt.Sprintf(" [%d]", v.AuxInt)
|
||||
case auxUInt8:
|
||||
return fmt.Sprintf(" [%d]", v.AuxUInt8())
|
||||
case auxARM64BitField:
|
||||
lsb := v.AuxArm64BitField().getARM64BFlsb()
|
||||
width := v.AuxArm64BitField().getARM64BFwidth()
|
||||
|
|
@ -202,6 +211,7 @@ func (v *Value) auxString() string {
|
|||
if v.Aux != nil {
|
||||
return fmt.Sprintf(" {%v}", v.Aux)
|
||||
}
|
||||
return ""
|
||||
case auxSymOff, auxCallOff, auxTypSize, auxNameOffsetInt8:
|
||||
s := ""
|
||||
if v.Aux != nil {
|
||||
|
|
@ -223,8 +233,12 @@ func (v *Value) auxString() string {
|
|||
return fmt.Sprintf(" {%v}", v.Aux)
|
||||
case auxFlagConstant:
|
||||
return fmt.Sprintf("[%s]", flagConstant(v.AuxInt))
|
||||
case auxNone:
|
||||
return ""
|
||||
default:
|
||||
// If you see this, add a case above instead.
|
||||
return fmt.Sprintf("[auxtype=%d AuxInt=%d Aux=%v]", opcodeTable[v.Op].auxType, v.AuxInt, v.Aux)
|
||||
}
|
||||
return ""
|
||||
}
|
||||
|
||||
// If/when midstack inlining is enabled (-l=4), the compiler gets both larger and slower.
|
||||
|
|
|
|||
|
|
@ -0,0 +1,73 @@
|
|||
// Copyright 2023 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package test
|
||||
|
||||
import (
|
||||
"encoding/binary"
|
||||
"testing"
|
||||
)
|
||||
|
||||
var gv = [16]byte{0, 1, 2, 3, 4, 5, 6, 7, 8}
|
||||
|
||||
//go:noinline
|
||||
func readGlobalUnaligned() uint64 {
|
||||
return binary.LittleEndian.Uint64(gv[1:])
|
||||
}
|
||||
|
||||
func TestUnalignedGlobal(t *testing.T) {
|
||||
// Note: this is a test not so much of the result of the read, but of
|
||||
// the correct compilation of that read. On s390x unaligned global
|
||||
// accesses fail to compile.
|
||||
if got, want := readGlobalUnaligned(), uint64(0x0807060504030201); got != want {
|
||||
t.Errorf("read global %x, want %x", got, want)
|
||||
}
|
||||
}
|
||||
|
||||
func TestSpillOfExtendedEndianLoads(t *testing.T) {
|
||||
b := []byte{0xaa, 0xbb, 0xcc, 0xdd}
|
||||
|
||||
var testCases = []struct {
|
||||
fn func([]byte) uint64
|
||||
want uint64
|
||||
}{
|
||||
{readUint16le, 0xbbaa},
|
||||
{readUint16be, 0xaabb},
|
||||
{readUint32le, 0xddccbbaa},
|
||||
{readUint32be, 0xaabbccdd},
|
||||
}
|
||||
for _, test := range testCases {
|
||||
if got := test.fn(b); got != test.want {
|
||||
t.Errorf("got %x, want %x", got, test.want)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func readUint16le(b []byte) uint64 {
|
||||
y := uint64(binary.LittleEndian.Uint16(b))
|
||||
nop() // force spill
|
||||
return y
|
||||
}
|
||||
|
||||
func readUint16be(b []byte) uint64 {
|
||||
y := uint64(binary.BigEndian.Uint16(b))
|
||||
nop() // force spill
|
||||
return y
|
||||
}
|
||||
|
||||
func readUint32le(b []byte) uint64 {
|
||||
y := uint64(binary.LittleEndian.Uint32(b))
|
||||
nop() // force spill
|
||||
return y
|
||||
}
|
||||
|
||||
func readUint32be(b []byte) uint64 {
|
||||
y := uint64(binary.BigEndian.Uint32(b))
|
||||
nop() // force spill
|
||||
return y
|
||||
}
|
||||
|
||||
//go:noinline
|
||||
func nop() {
|
||||
}
|
||||
Loading…
Reference in New Issue