diff --git a/src/cmd/compile/internal/amd64/ggen.go b/src/cmd/compile/internal/amd64/ggen.go index f065bb4dd4..b5847d48b9 100644 --- a/src/cmd/compile/internal/amd64/ggen.go +++ b/src/cmd/compile/internal/amd64/ggen.go @@ -58,7 +58,6 @@ func zerorange(pp *objw.Progs, p *obj.Prog, off, cnt int64, state *uint32) *obj. const ( r13 = 1 << iota // if R13 is already zeroed. x15 // if X15 is already zeroed. Note: in new ABI, X15 is always zero. - rax // if RAX is already zeroed. ) if cnt == 0 { @@ -117,18 +116,33 @@ func zerorange(pp *objw.Progs, p *obj.Prog, off, cnt int64, state *uint32) *obj. p = pp.Append(p, x86.AMOVQ, obj.TYPE_REG, x86.REG_R12, 0, obj.TYPE_REG, x86.REG_DI, 0) } else { - // Note: here we have to use RAX since it is an implicit input - // for the REPSTOSQ below. This is going to be problematic when - // regabi is in effect; this will be fixed in a forthcoming CL. - if *state&rax == 0 { - p = pp.Append(p, x86.AMOVQ, obj.TYPE_CONST, 0, 0, obj.TYPE_REG, x86.REG_AX, 0) - *state |= rax - } + // When the register ABI is in effect, at this point in the + // prolog we may have live values in all of RAX,RDI,RCX. Save + // them off to registers before the REPSTOSQ below, then + // restore. Note that R12 and R13 are always available as + // scratch regs; here we also use R15 (this is safe to do + // since there won't be any globals accessed in the prolog). + // See rewriteToUseGot() in obj6.go for more on r15 use. + // Save rax/rdi/rcx + p = pp.Append(p, x86.AMOVQ, obj.TYPE_REG, x86.REG_DI, 0, obj.TYPE_REG, x86.REG_R12, 0) + p = pp.Append(p, x86.AMOVQ, obj.TYPE_REG, x86.REG_AX, 0, obj.TYPE_REG, x86.REG_R13, 0) + p = pp.Append(p, x86.AMOVQ, obj.TYPE_REG, x86.REG_CX, 0, obj.TYPE_REG, x86.REG_R15, 0) + + // Set up the REPSTOSQ and kick it off. + p = pp.Append(p, x86.AMOVQ, obj.TYPE_CONST, 0, 0, obj.TYPE_REG, x86.REG_AX, 0) p = pp.Append(p, x86.AMOVQ, obj.TYPE_CONST, 0, cnt/int64(types.RegSize), obj.TYPE_REG, x86.REG_CX, 0) p = pp.Append(p, leaptr, obj.TYPE_MEM, x86.REG_SP, off, obj.TYPE_REG, x86.REG_DI, 0) p = pp.Append(p, x86.AREP, obj.TYPE_NONE, 0, 0, obj.TYPE_NONE, 0, 0) p = pp.Append(p, x86.ASTOSQ, obj.TYPE_NONE, 0, 0, obj.TYPE_NONE, 0, 0) + + // Restore rax/rdi/rcx + p = pp.Append(p, x86.AMOVQ, obj.TYPE_REG, x86.REG_R12, 0, obj.TYPE_REG, x86.REG_DI, 0) + p = pp.Append(p, x86.AMOVQ, obj.TYPE_REG, x86.REG_R13, 0, obj.TYPE_REG, x86.REG_AX, 0) + p = pp.Append(p, x86.AMOVQ, obj.TYPE_REG, x86.REG_R15, 0, obj.TYPE_REG, x86.REG_CX, 0) + + // Record the fact that r13 is no longer zero. + *state &= ^uint32(r13) } return p diff --git a/src/cmd/compile/internal/test/zerorange_test.go b/src/cmd/compile/internal/test/zerorange_test.go index cb1a6e04e4..ec87136157 100644 --- a/src/cmd/compile/internal/test/zerorange_test.go +++ b/src/cmd/compile/internal/test/zerorange_test.go @@ -4,7 +4,9 @@ package test -import "testing" +import ( + "testing" +) var glob = 3 var globp *int64 @@ -94,3 +96,90 @@ func testZeroRange136(t *testing.T) (r, s, t2, u, v, w, x, y, r1, s1, t1, u1, v1 globp = &z1 return } + +type S struct { + x [2]uint64 + p *uint64 + y [2]uint64 + q uint64 +} + +type M struct { + x [8]uint64 + p *uint64 + y [8]uint64 + q uint64 +} + +type L struct { + x [4096]uint64 + p *uint64 + y [4096]uint64 + q uint64 +} + +//go:noinline +func triggerZerorangeLarge(f, g, h uint64) (rv0 uint64) { + ll := L{p: &f} + da := f + rv0 = f + g + h + defer func(dl L, i uint64) { + rv0 += dl.q + i + }(ll, da) + return rv0 +} + +//go:noinline +func triggerZerorangeMedium(f, g, h uint64) (rv0 uint64) { + ll := M{p: &f} + rv0 = f + g + h + defer func(dm M, i uint64) { + rv0 += dm.q + i + }(ll, f) + return rv0 +} + +//go:noinline +func triggerZerorangeSmall(f, g, h uint64) (rv0 uint64) { + ll := S{p: &f} + rv0 = f + g + h + defer func(ds S, i uint64) { + rv0 += ds.q + i + }(ll, f) + return rv0 +} + +// This test was created as a follow up to issue #45372, to help +// improve coverage of the compiler's arch-specific "zerorange" +// function, which is invoked to zero out ambiguously live portions of +// the stack frame in certain specific circumstances. +// +// In the current compiler implementation, for zerorange to be +// invoked, we need to have an ambiguously live variable that needs +// zeroing. One way to trigger this is to have a function with an +// open-coded defer, where the opendefer function has an argument that +// contains a pointer (this is what's used below). +// +// At the moment this test doesn't do any specific checking for +// code sequence, or verification that things were properly set to zero, +// this seems as though it would be too tricky and would result +// in a "brittle" test. +// +// The small/medium/large scenarios below are inspired by the amd64 +// implementation of zerorange, which generates different code +// depending on the size of the thing that needs to be zeroed out +// (I've verified at the time of the writing of this test that it +// exercises the various cases). +// +func TestZerorange45372(t *testing.T) { + if r := triggerZerorangeLarge(101, 303, 505); r != 1010 { + t.Errorf("large: wanted %d got %d", 1010, r) + } + if r := triggerZerorangeMedium(101, 303, 505); r != 1010 { + t.Errorf("medium: wanted %d got %d", 1010, r) + } + if r := triggerZerorangeSmall(101, 303, 505); r != 1010 { + t.Errorf("small: wanted %d got %d", 1010, r) + } + +}