diff --git a/src/cmd/compile/internal/ssa/config.go b/src/cmd/compile/internal/ssa/config.go index 6587c40ebc..54704ec60e 100644 --- a/src/cmd/compile/internal/ssa/config.go +++ b/src/cmd/compile/internal/ssa/config.go @@ -33,6 +33,7 @@ type Config struct { ctxt *obj.Link // Generic arch information optimize bool // Do optimization noDuffDevice bool // Don't use Duff's device + useSSE bool // Use SSE for non-float operations nacl bool // GOOS=nacl use387 bool // GO386=387 NeedsFpScratch bool // No direct move between GP and FP register sets @@ -264,11 +265,13 @@ func NewConfig(arch string, types Types, ctxt *obj.Link, optimize bool) *Config c.ctxt = ctxt c.optimize = optimize c.nacl = objabi.GOOS == "nacl" + c.useSSE = true - // Don't use Duff's device on Plan 9 AMD64, because floating - // point operations are not allowed in note handler. + // Don't use Duff's device nor SSE on Plan 9 AMD64, because + // floating point operations are not allowed in note handler. if objabi.GOOS == "plan9" && arch == "amd64" { c.noDuffDevice = true + c.useSSE = false } if c.nacl { diff --git a/src/cmd/compile/internal/ssa/gen/AMD64.rules b/src/cmd/compile/internal/ssa/gen/AMD64.rules index fff894c571..e648e0856b 100644 --- a/src/cmd/compile/internal/ssa/gen/AMD64.rules +++ b/src/cmd/compile/internal/ssa/gen/AMD64.rules @@ -386,29 +386,48 @@ (MOVLstoreconst [makeValAndOff(0,3)] destptr (MOVLstoreconst [0] destptr mem)) -(Zero [s] destptr mem) && s > 8 && s < 16 -> +// Strip off any fractional word zeroing. +(Zero [s] destptr mem) && s%8 != 0 && s > 8 && !config.useSSE -> + (Zero [s-s%8] (OffPtr destptr [s%8]) + (MOVQstoreconst [0] destptr mem)) + +// Zero small numbers of words directly. +(Zero [16] destptr mem) && !config.useSSE -> + (MOVQstoreconst [makeValAndOff(0,8)] destptr + (MOVQstoreconst [0] destptr mem)) +(Zero [24] destptr mem) && !config.useSSE -> + (MOVQstoreconst [makeValAndOff(0,16)] destptr + (MOVQstoreconst [makeValAndOff(0,8)] destptr + (MOVQstoreconst [0] destptr mem))) +(Zero [32] destptr mem) && !config.useSSE -> + (MOVQstoreconst [makeValAndOff(0,24)] destptr + (MOVQstoreconst [makeValAndOff(0,16)] destptr + (MOVQstoreconst [makeValAndOff(0,8)] destptr + (MOVQstoreconst [0] destptr mem)))) + +(Zero [s] destptr mem) && s > 8 && s < 16 && config.useSSE -> (MOVQstoreconst [makeValAndOff(0,s-8)] destptr (MOVQstoreconst [0] destptr mem)) // Adjust zeros to be a multiple of 16 bytes. -(Zero [s] destptr mem) && s%16 != 0 && s > 16 && s%16 > 8 -> +(Zero [s] destptr mem) && s%16 != 0 && s > 16 && s%16 > 8 && config.useSSE -> (Zero [s-s%16] (OffPtr destptr [s%16]) (MOVOstore destptr (MOVOconst [0]) mem)) -(Zero [s] destptr mem) && s%16 != 0 && s > 16 && s%16 <= 8 -> +(Zero [s] destptr mem) && s%16 != 0 && s > 16 && s%16 <= 8 && config.useSSE -> (Zero [s-s%16] (OffPtr destptr [s%16]) (MOVQstoreconst [0] destptr mem)) -(Zero [16] destptr mem) -> +(Zero [16] destptr mem) && config.useSSE -> (MOVOstore destptr (MOVOconst [0]) mem) -(Zero [32] destptr mem) -> +(Zero [32] destptr mem) && config.useSSE -> (MOVOstore (OffPtr destptr [16]) (MOVOconst [0]) (MOVOstore destptr (MOVOconst [0]) mem)) -(Zero [48] destptr mem) -> +(Zero [48] destptr mem) && config.useSSE -> (MOVOstore (OffPtr destptr [32]) (MOVOconst [0]) (MOVOstore (OffPtr destptr [16]) (MOVOconst [0]) (MOVOstore destptr (MOVOconst [0]) mem))) -(Zero [64] destptr mem) -> +(Zero [64] destptr mem) && config.useSSE -> (MOVOstore (OffPtr destptr [48]) (MOVOconst [0]) (MOVOstore (OffPtr destptr [32]) (MOVOconst [0]) (MOVOstore (OffPtr destptr [16]) (MOVOconst [0]) @@ -421,7 +440,7 @@ // Large zeroing uses REP STOSQ. (Zero [s] destptr mem) - && (s > 1024 || (config.noDuffDevice && s > 64)) + && (s > 1024 || (config.noDuffDevice && s > 64 || !config.useSSE && s > 32)) && s%8 == 0 -> (REPSTOSQ destptr (MOVQconst [s/8]) (MOVQconst [0]) mem) @@ -2205,6 +2224,7 @@ && clobber(x) -> (MOVQstore [ValAndOff(a).Off()] {s} p (MOVQconst [ValAndOff(a).Val()&0xffffffff | ValAndOff(c).Val()<<32]) mem) (MOVQstoreconst [c] {s} p x:(MOVQstoreconst [c2] {s} p mem)) + && config.useSSE && x.Uses == 1 && ValAndOff(c2).Off() + 8 == ValAndOff(c).Off() && ValAndOff(c).Val() == 0 diff --git a/src/cmd/compile/internal/ssa/rewriteAMD64.go b/src/cmd/compile/internal/ssa/rewriteAMD64.go index 302812b170..3762931178 100644 --- a/src/cmd/compile/internal/ssa/rewriteAMD64.go +++ b/src/cmd/compile/internal/ssa/rewriteAMD64.go @@ -842,7 +842,7 @@ func rewriteValueAMD64(v *Value) bool { case OpXor8: return rewriteValueAMD64_OpXor8_0(v) case OpZero: - return rewriteValueAMD64_OpZero_0(v) || rewriteValueAMD64_OpZero_10(v) + return rewriteValueAMD64_OpZero_0(v) || rewriteValueAMD64_OpZero_10(v) || rewriteValueAMD64_OpZero_20(v) case OpZeroExt16to32: return rewriteValueAMD64_OpZeroExt16to32_0(v) case OpZeroExt16to64: @@ -9089,6 +9089,8 @@ func rewriteValueAMD64_OpAMD64MOVQstore_0(v *Value) bool { func rewriteValueAMD64_OpAMD64MOVQstoreconst_0(v *Value) bool { b := v.Block _ = b + config := b.Func.Config + _ = config // match: (MOVQstoreconst [sc] {s} (ADDQconst [off] ptr) mem) // cond: ValAndOff(sc).canAdd(off) // result: (MOVQstoreconst [ValAndOff(sc).add(off)] {s} ptr mem) @@ -9218,7 +9220,7 @@ func rewriteValueAMD64_OpAMD64MOVQstoreconst_0(v *Value) bool { return true } // match: (MOVQstoreconst [c] {s} p x:(MOVQstoreconst [c2] {s} p mem)) - // cond: x.Uses == 1 && ValAndOff(c2).Off() + 8 == ValAndOff(c).Off() && ValAndOff(c).Val() == 0 && ValAndOff(c2).Val() == 0 && clobber(x) + // cond: config.useSSE && x.Uses == 1 && ValAndOff(c2).Off() + 8 == ValAndOff(c).Off() && ValAndOff(c).Val() == 0 && ValAndOff(c2).Val() == 0 && clobber(x) // result: (MOVOstore [ValAndOff(c2).Off()] {s} p (MOVOconst [0]) mem) for { c := v.AuxInt @@ -9238,7 +9240,7 @@ func rewriteValueAMD64_OpAMD64MOVQstoreconst_0(v *Value) bool { break } mem := x.Args[1] - if !(x.Uses == 1 && ValAndOff(c2).Off()+8 == ValAndOff(c).Off() && ValAndOff(c).Val() == 0 && ValAndOff(c2).Val() == 0 && clobber(x)) { + if !(config.useSSE && x.Uses == 1 && ValAndOff(c2).Off()+8 == ValAndOff(c).Off() && ValAndOff(c).Val() == 0 && ValAndOff(c2).Val() == 0 && clobber(x)) { break } v.reset(OpAMD64MOVOstore) @@ -42862,6 +42864,8 @@ func rewriteValueAMD64_OpXor8_0(v *Value) bool { func rewriteValueAMD64_OpZero_0(v *Value) bool { b := v.Block _ = b + config := b.Func.Config + _ = config // match: (Zero [0] _ mem) // cond: // result: mem @@ -43021,14 +43025,126 @@ func rewriteValueAMD64_OpZero_0(v *Value) bool { return true } // match: (Zero [s] destptr mem) - // cond: s > 8 && s < 16 + // cond: s%8 != 0 && s > 8 && !config.useSSE + // result: (Zero [s-s%8] (OffPtr destptr [s%8]) (MOVQstoreconst [0] destptr mem)) + for { + s := v.AuxInt + _ = v.Args[1] + destptr := v.Args[0] + mem := v.Args[1] + if !(s%8 != 0 && s > 8 && !config.useSSE) { + break + } + v.reset(OpZero) + v.AuxInt = s - s%8 + v0 := b.NewValue0(v.Pos, OpOffPtr, destptr.Type) + v0.AuxInt = s % 8 + v0.AddArg(destptr) + v.AddArg(v0) + v1 := b.NewValue0(v.Pos, OpAMD64MOVQstoreconst, types.TypeMem) + v1.AuxInt = 0 + v1.AddArg(destptr) + v1.AddArg(mem) + v.AddArg(v1) + return true + } + return false +} +func rewriteValueAMD64_OpZero_10(v *Value) bool { + b := v.Block + _ = b + config := b.Func.Config + _ = config + // match: (Zero [16] destptr mem) + // cond: !config.useSSE + // result: (MOVQstoreconst [makeValAndOff(0,8)] destptr (MOVQstoreconst [0] destptr mem)) + for { + if v.AuxInt != 16 { + break + } + _ = v.Args[1] + destptr := v.Args[0] + mem := v.Args[1] + if !(!config.useSSE) { + break + } + v.reset(OpAMD64MOVQstoreconst) + v.AuxInt = makeValAndOff(0, 8) + v.AddArg(destptr) + v0 := b.NewValue0(v.Pos, OpAMD64MOVQstoreconst, types.TypeMem) + v0.AuxInt = 0 + v0.AddArg(destptr) + v0.AddArg(mem) + v.AddArg(v0) + return true + } + // match: (Zero [24] destptr mem) + // cond: !config.useSSE + // result: (MOVQstoreconst [makeValAndOff(0,16)] destptr (MOVQstoreconst [makeValAndOff(0,8)] destptr (MOVQstoreconst [0] destptr mem))) + for { + if v.AuxInt != 24 { + break + } + _ = v.Args[1] + destptr := v.Args[0] + mem := v.Args[1] + if !(!config.useSSE) { + break + } + v.reset(OpAMD64MOVQstoreconst) + v.AuxInt = makeValAndOff(0, 16) + v.AddArg(destptr) + v0 := b.NewValue0(v.Pos, OpAMD64MOVQstoreconst, types.TypeMem) + v0.AuxInt = makeValAndOff(0, 8) + v0.AddArg(destptr) + v1 := b.NewValue0(v.Pos, OpAMD64MOVQstoreconst, types.TypeMem) + v1.AuxInt = 0 + v1.AddArg(destptr) + v1.AddArg(mem) + v0.AddArg(v1) + v.AddArg(v0) + return true + } + // match: (Zero [32] destptr mem) + // cond: !config.useSSE + // result: (MOVQstoreconst [makeValAndOff(0,24)] destptr (MOVQstoreconst [makeValAndOff(0,16)] destptr (MOVQstoreconst [makeValAndOff(0,8)] destptr (MOVQstoreconst [0] destptr mem)))) + for { + if v.AuxInt != 32 { + break + } + _ = v.Args[1] + destptr := v.Args[0] + mem := v.Args[1] + if !(!config.useSSE) { + break + } + v.reset(OpAMD64MOVQstoreconst) + v.AuxInt = makeValAndOff(0, 24) + v.AddArg(destptr) + v0 := b.NewValue0(v.Pos, OpAMD64MOVQstoreconst, types.TypeMem) + v0.AuxInt = makeValAndOff(0, 16) + v0.AddArg(destptr) + v1 := b.NewValue0(v.Pos, OpAMD64MOVQstoreconst, types.TypeMem) + v1.AuxInt = makeValAndOff(0, 8) + v1.AddArg(destptr) + v2 := b.NewValue0(v.Pos, OpAMD64MOVQstoreconst, types.TypeMem) + v2.AuxInt = 0 + v2.AddArg(destptr) + v2.AddArg(mem) + v1.AddArg(v2) + v0.AddArg(v1) + v.AddArg(v0) + return true + } + // match: (Zero [s] destptr mem) + // cond: s > 8 && s < 16 && config.useSSE // result: (MOVQstoreconst [makeValAndOff(0,s-8)] destptr (MOVQstoreconst [0] destptr mem)) for { s := v.AuxInt _ = v.Args[1] destptr := v.Args[0] mem := v.Args[1] - if !(s > 8 && s < 16) { + if !(s > 8 && s < 16 && config.useSSE) { break } v.reset(OpAMD64MOVQstoreconst) @@ -43041,24 +43157,15 @@ func rewriteValueAMD64_OpZero_0(v *Value) bool { v.AddArg(v0) return true } - return false -} -func rewriteValueAMD64_OpZero_10(v *Value) bool { - b := v.Block - _ = b - config := b.Func.Config - _ = config - typ := &b.Func.Config.Types - _ = typ // match: (Zero [s] destptr mem) - // cond: s%16 != 0 && s > 16 && s%16 > 8 + // cond: s%16 != 0 && s > 16 && s%16 > 8 && config.useSSE // result: (Zero [s-s%16] (OffPtr destptr [s%16]) (MOVOstore destptr (MOVOconst [0]) mem)) for { s := v.AuxInt _ = v.Args[1] destptr := v.Args[0] mem := v.Args[1] - if !(s%16 != 0 && s > 16 && s%16 > 8) { + if !(s%16 != 0 && s > 16 && s%16 > 8 && config.useSSE) { break } v.reset(OpZero) @@ -43077,14 +43184,14 @@ func rewriteValueAMD64_OpZero_10(v *Value) bool { return true } // match: (Zero [s] destptr mem) - // cond: s%16 != 0 && s > 16 && s%16 <= 8 + // cond: s%16 != 0 && s > 16 && s%16 <= 8 && config.useSSE // result: (Zero [s-s%16] (OffPtr destptr [s%16]) (MOVQstoreconst [0] destptr mem)) for { s := v.AuxInt _ = v.Args[1] destptr := v.Args[0] mem := v.Args[1] - if !(s%16 != 0 && s > 16 && s%16 <= 8) { + if !(s%16 != 0 && s > 16 && s%16 <= 8 && config.useSSE) { break } v.reset(OpZero) @@ -43101,7 +43208,7 @@ func rewriteValueAMD64_OpZero_10(v *Value) bool { return true } // match: (Zero [16] destptr mem) - // cond: + // cond: config.useSSE // result: (MOVOstore destptr (MOVOconst [0]) mem) for { if v.AuxInt != 16 { @@ -43110,6 +43217,9 @@ func rewriteValueAMD64_OpZero_10(v *Value) bool { _ = v.Args[1] destptr := v.Args[0] mem := v.Args[1] + if !(config.useSSE) { + break + } v.reset(OpAMD64MOVOstore) v.AddArg(destptr) v0 := b.NewValue0(v.Pos, OpAMD64MOVOconst, types.TypeInt128) @@ -43119,7 +43229,7 @@ func rewriteValueAMD64_OpZero_10(v *Value) bool { return true } // match: (Zero [32] destptr mem) - // cond: + // cond: config.useSSE // result: (MOVOstore (OffPtr destptr [16]) (MOVOconst [0]) (MOVOstore destptr (MOVOconst [0]) mem)) for { if v.AuxInt != 32 { @@ -43128,6 +43238,9 @@ func rewriteValueAMD64_OpZero_10(v *Value) bool { _ = v.Args[1] destptr := v.Args[0] mem := v.Args[1] + if !(config.useSSE) { + break + } v.reset(OpAMD64MOVOstore) v0 := b.NewValue0(v.Pos, OpOffPtr, destptr.Type) v0.AuxInt = 16 @@ -43146,7 +43259,7 @@ func rewriteValueAMD64_OpZero_10(v *Value) bool { return true } // match: (Zero [48] destptr mem) - // cond: + // cond: config.useSSE // result: (MOVOstore (OffPtr destptr [32]) (MOVOconst [0]) (MOVOstore (OffPtr destptr [16]) (MOVOconst [0]) (MOVOstore destptr (MOVOconst [0]) mem))) for { if v.AuxInt != 48 { @@ -43155,6 +43268,9 @@ func rewriteValueAMD64_OpZero_10(v *Value) bool { _ = v.Args[1] destptr := v.Args[0] mem := v.Args[1] + if !(config.useSSE) { + break + } v.reset(OpAMD64MOVOstore) v0 := b.NewValue0(v.Pos, OpOffPtr, destptr.Type) v0.AuxInt = 32 @@ -43182,7 +43298,7 @@ func rewriteValueAMD64_OpZero_10(v *Value) bool { return true } // match: (Zero [64] destptr mem) - // cond: + // cond: config.useSSE // result: (MOVOstore (OffPtr destptr [48]) (MOVOconst [0]) (MOVOstore (OffPtr destptr [32]) (MOVOconst [0]) (MOVOstore (OffPtr destptr [16]) (MOVOconst [0]) (MOVOstore destptr (MOVOconst [0]) mem)))) for { if v.AuxInt != 64 { @@ -43191,6 +43307,9 @@ func rewriteValueAMD64_OpZero_10(v *Value) bool { _ = v.Args[1] destptr := v.Args[0] mem := v.Args[1] + if !(config.useSSE) { + break + } v.reset(OpAMD64MOVOstore) v0 := b.NewValue0(v.Pos, OpOffPtr, destptr.Type) v0.AuxInt = 48 @@ -43226,6 +43345,15 @@ func rewriteValueAMD64_OpZero_10(v *Value) bool { v.AddArg(v2) return true } + return false +} +func rewriteValueAMD64_OpZero_20(v *Value) bool { + b := v.Block + _ = b + config := b.Func.Config + _ = config + typ := &b.Func.Config.Types + _ = typ // match: (Zero [s] destptr mem) // cond: s > 64 && s <= 1024 && s%16 == 0 && !config.noDuffDevice // result: (DUFFZERO [s] destptr (MOVOconst [0]) mem) @@ -43247,14 +43375,14 @@ func rewriteValueAMD64_OpZero_10(v *Value) bool { return true } // match: (Zero [s] destptr mem) - // cond: (s > 1024 || (config.noDuffDevice && s > 64)) && s%8 == 0 + // cond: (s > 1024 || (config.noDuffDevice && s > 64 || !config.useSSE && s > 32)) && s%8 == 0 // result: (REPSTOSQ destptr (MOVQconst [s/8]) (MOVQconst [0]) mem) for { s := v.AuxInt _ = v.Args[1] destptr := v.Args[0] mem := v.Args[1] - if !((s > 1024 || (config.noDuffDevice && s > 64)) && s%8 == 0) { + if !((s > 1024 || (config.noDuffDevice && s > 64 || !config.useSSE && s > 32)) && s%8 == 0) { break } v.reset(OpAMD64REPSTOSQ)