mirror of https://github.com/golang/go.git
cmd/compile: avoid write barrier in append fast path
When we are writing the result of an append back to the same slice, we don’t need a write barrier on the fast path. This re-implements an optimization that was present in the old backend. Updates #14921 Fixes #14969 Sample code: var x []byte func p() { x = append(x, 1, 2, 3) } Before: "".p t=1 size=224 args=0x0 locals=0x48 0x0000 00000 (append.go:21) TEXT "".p(SB), $72-0 0x0000 00000 (append.go:21) MOVQ (TLS), CX 0x0009 00009 (append.go:21) CMPQ SP, 16(CX) 0x000d 00013 (append.go:21) JLS 199 0x0013 00019 (append.go:21) SUBQ $72, SP 0x0017 00023 (append.go:21) FUNCDATA $0, gclocals·33cdeccccebe80329f1fdbee7f5874cb(SB) 0x0017 00023 (append.go:21) FUNCDATA $1, gclocals·33cdeccccebe80329f1fdbee7f5874cb(SB) 0x0017 00023 (append.go:19) MOVQ "".x+16(SB), CX 0x001e 00030 (append.go:19) MOVQ "".x(SB), DX 0x0025 00037 (append.go:19) MOVQ "".x+8(SB), BX 0x002c 00044 (append.go:19) MOVQ BX, "".autotmp_0+64(SP) 0x0031 00049 (append.go:22) LEAQ 3(BX), BP 0x0035 00053 (append.go:22) CMPQ BP, CX 0x0038 00056 (append.go:22) JGT $0, 131 0x003a 00058 (append.go:22) MOVB $1, (DX)(BX*1) 0x003e 00062 (append.go:22) MOVB $2, 1(DX)(BX*1) 0x0043 00067 (append.go:22) MOVB $3, 2(DX)(BX*1) 0x0048 00072 (append.go:22) MOVQ BP, "".x+8(SB) 0x004f 00079 (append.go:22) MOVQ CX, "".x+16(SB) 0x0056 00086 (append.go:22) MOVL runtime.writeBarrier(SB), AX 0x005c 00092 (append.go:22) TESTB AL, AL 0x005e 00094 (append.go:22) JNE $0, 108 0x0060 00096 (append.go:22) MOVQ DX, "".x(SB) 0x0067 00103 (append.go:23) ADDQ $72, SP 0x006b 00107 (append.go:23) RET 0x006c 00108 (append.go:22) LEAQ "".x(SB), CX 0x0073 00115 (append.go:22) MOVQ CX, (SP) 0x0077 00119 (append.go:22) MOVQ DX, 8(SP) 0x007c 00124 (append.go:22) PCDATA $0, $0 0x007c 00124 (append.go:22) CALL runtime.writebarrierptr(SB) 0x0081 00129 (append.go:23) JMP 103 0x0083 00131 (append.go:22) LEAQ type.[]uint8(SB), AX 0x008a 00138 (append.go:22) MOVQ AX, (SP) 0x008e 00142 (append.go:22) MOVQ DX, 8(SP) 0x0093 00147 (append.go:22) MOVQ BX, 16(SP) 0x0098 00152 (append.go:22) MOVQ CX, 24(SP) 0x009d 00157 (append.go:22) MOVQ BP, 32(SP) 0x00a2 00162 (append.go:22) PCDATA $0, $0 0x00a2 00162 (append.go:22) CALL runtime.growslice(SB) 0x00a7 00167 (append.go:22) MOVQ 40(SP), DX 0x00ac 00172 (append.go:22) MOVQ 48(SP), AX 0x00b1 00177 (append.go:22) MOVQ 56(SP), CX 0x00b6 00182 (append.go:22) ADDQ $3, AX 0x00ba 00186 (append.go:19) MOVQ "".autotmp_0+64(SP), BX 0x00bf 00191 (append.go:22) MOVQ AX, BP 0x00c2 00194 (append.go:22) JMP 58 0x00c7 00199 (append.go:22) NOP 0x00c7 00199 (append.go:21) CALL runtime.morestack_noctxt(SB) 0x00cc 00204 (append.go:21) JMP 0 After: "".p t=1 size=208 args=0x0 locals=0x48 0x0000 00000 (append.go:21) TEXT "".p(SB), $72-0 0x0000 00000 (append.go:21) MOVQ (TLS), CX 0x0009 00009 (append.go:21) CMPQ SP, 16(CX) 0x000d 00013 (append.go:21) JLS 191 0x0013 00019 (append.go:21) SUBQ $72, SP 0x0017 00023 (append.go:21) FUNCDATA $0, gclocals·33cdeccccebe80329f1fdbee7f5874cb(SB) 0x0017 00023 (append.go:21) FUNCDATA $1, gclocals·33cdeccccebe80329f1fdbee7f5874cb(SB) 0x0017 00023 (append.go:19) MOVQ "".x+16(SB), CX 0x001e 00030 (append.go:19) MOVQ "".x+8(SB), DX 0x0025 00037 (append.go:19) MOVQ DX, "".autotmp_0+64(SP) 0x002a 00042 (append.go:19) MOVQ "".x(SB), BX 0x0031 00049 (append.go:22) LEAQ 3(DX), BP 0x0035 00053 (append.go:22) MOVQ BP, "".x+8(SB) 0x003c 00060 (append.go:22) CMPQ BP, CX 0x003f 00063 (append.go:22) JGT $0, 84 0x0041 00065 (append.go:22) MOVB $1, (BX)(DX*1) 0x0045 00069 (append.go:22) MOVB $2, 1(BX)(DX*1) 0x004a 00074 (append.go:22) MOVB $3, 2(BX)(DX*1) 0x004f 00079 (append.go:23) ADDQ $72, SP 0x0053 00083 (append.go:23) RET 0x0054 00084 (append.go:22) LEAQ type.[]uint8(SB), AX 0x005b 00091 (append.go:22) MOVQ AX, (SP) 0x005f 00095 (append.go:22) MOVQ BX, 8(SP) 0x0064 00100 (append.go:22) MOVQ DX, 16(SP) 0x0069 00105 (append.go:22) MOVQ CX, 24(SP) 0x006e 00110 (append.go:22) MOVQ BP, 32(SP) 0x0073 00115 (append.go:22) PCDATA $0, $0 0x0073 00115 (append.go:22) CALL runtime.growslice(SB) 0x0078 00120 (append.go:22) MOVQ 40(SP), CX 0x007d 00125 (append.go:22) MOVQ 56(SP), AX 0x0082 00130 (append.go:22) MOVQ AX, "".x+16(SB) 0x0089 00137 (append.go:22) MOVL runtime.writeBarrier(SB), AX 0x008f 00143 (append.go:22) TESTB AL, AL 0x0091 00145 (append.go:22) JNE $0, 168 0x0093 00147 (append.go:22) MOVQ CX, "".x(SB) 0x009a 00154 (append.go:22) MOVQ "".x(SB), BX 0x00a1 00161 (append.go:19) MOVQ "".autotmp_0+64(SP), DX 0x00a6 00166 (append.go:22) JMP 65 0x00a8 00168 (append.go:22) LEAQ "".x(SB), DX 0x00af 00175 (append.go:22) MOVQ DX, (SP) 0x00b3 00179 (append.go:22) MOVQ CX, 8(SP) 0x00b8 00184 (append.go:22) PCDATA $0, $0 0x00b8 00184 (append.go:22) CALL runtime.writebarrierptr(SB) 0x00bd 00189 (append.go:22) JMP 154 0x00bf 00191 (append.go:22) NOP 0x00bf 00191 (append.go:21) CALL runtime.morestack_noctxt(SB) 0x00c4 00196 (append.go:21) JMP 0 Change-Id: I77a41ad3a22557a4bb4654de7d6d24a029efe34a Reviewed-on: https://go-review.googlesource.com/21813 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
This commit is contained in:
parent
32efa16c3d
commit
a4650a2111
|
|
@ -683,14 +683,27 @@ func (s *state) stmt(n *Node) {
|
|||
|
||||
// Evaluate RHS.
|
||||
rhs := n.Right
|
||||
if rhs != nil && (rhs.Op == OSTRUCTLIT || rhs.Op == OARRAYLIT) {
|
||||
// All literals with nonzero fields have already been
|
||||
// rewritten during walk. Any that remain are just T{}
|
||||
// or equivalents. Use the zero value.
|
||||
if !iszero(rhs) {
|
||||
Fatalf("literal with nonzero value in SSA: %v", rhs)
|
||||
if rhs != nil {
|
||||
switch rhs.Op {
|
||||
case OSTRUCTLIT, OARRAYLIT:
|
||||
// All literals with nonzero fields have already been
|
||||
// rewritten during walk. Any that remain are just T{}
|
||||
// or equivalents. Use the zero value.
|
||||
if !iszero(rhs) {
|
||||
Fatalf("literal with nonzero value in SSA: %v", rhs)
|
||||
}
|
||||
rhs = nil
|
||||
case OAPPEND:
|
||||
// If we're writing the result of an append back to the same slice,
|
||||
// handle it specially to avoid write barriers on the fast (non-growth) path.
|
||||
// If the slice can be SSA'd, it'll be on the stack,
|
||||
// so there will be no write barriers,
|
||||
// so there's no need to attempt to prevent them.
|
||||
if samesafeexpr(n.Left, rhs.List.First()) && !s.canSSA(n.Left) {
|
||||
s.append(rhs, true)
|
||||
return
|
||||
}
|
||||
}
|
||||
rhs = nil
|
||||
}
|
||||
var r *ssa.Value
|
||||
needwb := n.Op == OASWB && rhs != nil
|
||||
|
|
@ -709,11 +722,11 @@ func (s *state) stmt(n *Node) {
|
|||
}
|
||||
}
|
||||
if rhs != nil && rhs.Op == OAPPEND {
|
||||
// Yuck! The frontend gets rid of the write barrier, but we need it!
|
||||
// At least, we need it in the case where growslice is called.
|
||||
// TODO: Do the write barrier on just the growslice branch.
|
||||
// The frontend gets rid of the write barrier to enable the special OAPPEND
|
||||
// handling above, but since this is not a special case, we need it.
|
||||
// TODO: just add a ptr graying to the end of growslice?
|
||||
// TODO: check whether we need to do this for ODOTTYPE and ORECV also.
|
||||
// TODO: check whether we need to provide special handling and a write barrier
|
||||
// for ODOTTYPE and ORECV also.
|
||||
// They get similar wb-removal treatment in walk.go:OAS.
|
||||
needwb = true
|
||||
}
|
||||
|
|
@ -2079,7 +2092,7 @@ func (s *state) expr(n *Node) *ssa.Value {
|
|||
return s.newValue1(ssa.OpGetG, n.Type, s.mem())
|
||||
|
||||
case OAPPEND:
|
||||
return s.exprAppend(n)
|
||||
return s.append(n, false)
|
||||
|
||||
default:
|
||||
s.Unimplementedf("unhandled expr %s", opnames[n.Op])
|
||||
|
|
@ -2087,25 +2100,57 @@ func (s *state) expr(n *Node) *ssa.Value {
|
|||
}
|
||||
}
|
||||
|
||||
// exprAppend converts an OAPPEND node n to an ssa.Value, adds it to s, and returns the Value.
|
||||
func (s *state) exprAppend(n *Node) *ssa.Value {
|
||||
// append(s, e1, e2, e3). Compile like:
|
||||
// append converts an OAPPEND node to SSA.
|
||||
// If inplace is false, it converts the OAPPEND expression n to an ssa.Value,
|
||||
// adds it to s, and returns the Value.
|
||||
// If inplace is true, it writes the result of the OAPPEND expression n
|
||||
// back to the slice being appended to, and returns nil.
|
||||
// inplace MUST be set to false if the slice can be SSA'd.
|
||||
func (s *state) append(n *Node, inplace bool) *ssa.Value {
|
||||
// If inplace is false, process as expression "append(s, e1, e2, e3)":
|
||||
//
|
||||
// ptr, len, cap := s
|
||||
// newlen := len + 3
|
||||
// if newlen > s.cap {
|
||||
// if newlen > cap {
|
||||
// ptr, len, cap = growslice(s, newlen)
|
||||
// newlen = len + 3 // recalculate to avoid a spill
|
||||
// }
|
||||
// // with write barriers, if needed:
|
||||
// *(ptr+len) = e1
|
||||
// *(ptr+len+1) = e2
|
||||
// *(ptr+len+2) = e3
|
||||
// return makeslice(ptr, newlen, cap)
|
||||
//
|
||||
//
|
||||
// If inplace is true, process as statement "s = append(s, e1, e2, e3)":
|
||||
//
|
||||
// a := &s
|
||||
// ptr, len, cap := s
|
||||
// newlen := len + 3
|
||||
// *a.len = newlen // store newlen immediately to avoid a spill
|
||||
// if newlen > cap {
|
||||
// newptr, _, newcap = growslice(ptr, len, cap, newlen)
|
||||
// *a.cap = newcap // write before ptr to avoid a spill
|
||||
// *a.ptr = newptr // with write barrier
|
||||
// }
|
||||
// // with write barriers, if needed:
|
||||
// *(ptr+len) = e1
|
||||
// *(ptr+len+1) = e2
|
||||
// *(ptr+len+2) = e3
|
||||
// makeslice(ptr, newlen, cap)
|
||||
|
||||
et := n.Type.Elem()
|
||||
pt := Ptrto(et)
|
||||
|
||||
// Evaluate slice
|
||||
slice := s.expr(n.List.First())
|
||||
sn := n.List.First() // the slice node is the first in the list
|
||||
|
||||
var slice, addr *ssa.Value
|
||||
if inplace {
|
||||
addr = s.addr(sn, false)
|
||||
slice = s.newValue2(ssa.OpLoad, n.Type, addr, s.mem())
|
||||
} else {
|
||||
slice = s.expr(sn)
|
||||
}
|
||||
|
||||
// Allocate new blocks
|
||||
grow := s.f.NewBlock(ssa.BlockPlain)
|
||||
|
|
@ -2117,10 +2162,20 @@ func (s *state) exprAppend(n *Node) *ssa.Value {
|
|||
l := s.newValue1(ssa.OpSliceLen, Types[TINT], slice)
|
||||
c := s.newValue1(ssa.OpSliceCap, Types[TINT], slice)
|
||||
nl := s.newValue2(s.ssaOp(OADD, Types[TINT]), Types[TINT], l, s.constInt(Types[TINT], nargs))
|
||||
|
||||
if inplace {
|
||||
lenaddr := s.newValue1I(ssa.OpOffPtr, pt, int64(Array_nel), addr)
|
||||
s.vars[&memVar] = s.newValue3I(ssa.OpStore, ssa.TypeMem, s.config.IntSize, lenaddr, nl, s.mem())
|
||||
}
|
||||
|
||||
cmp := s.newValue2(s.ssaOp(OGT, Types[TINT]), Types[TBOOL], nl, c)
|
||||
s.vars[&ptrVar] = p
|
||||
s.vars[&newlenVar] = nl
|
||||
s.vars[&capVar] = c
|
||||
|
||||
if !inplace {
|
||||
s.vars[&newlenVar] = nl
|
||||
s.vars[&capVar] = c
|
||||
}
|
||||
|
||||
b := s.endBlock()
|
||||
b.Kind = ssa.BlockIf
|
||||
b.Likely = ssa.BranchUnlikely
|
||||
|
|
@ -2134,9 +2189,18 @@ func (s *state) exprAppend(n *Node) *ssa.Value {
|
|||
|
||||
r := s.rtcall(growslice, true, []*Type{pt, Types[TINT], Types[TINT]}, taddr, p, l, c, nl)
|
||||
|
||||
s.vars[&ptrVar] = r[0]
|
||||
s.vars[&newlenVar] = s.newValue2(s.ssaOp(OADD, Types[TINT]), Types[TINT], r[1], s.constInt(Types[TINT], nargs))
|
||||
s.vars[&capVar] = r[2]
|
||||
if inplace {
|
||||
capaddr := s.newValue1I(ssa.OpOffPtr, pt, int64(Array_cap), addr)
|
||||
s.vars[&memVar] = s.newValue3I(ssa.OpStore, ssa.TypeMem, s.config.IntSize, capaddr, r[2], s.mem())
|
||||
s.insertWBstore(pt, addr, r[0], n.Lineno, 0)
|
||||
// load the value we just stored to avoid having to spill it
|
||||
s.vars[&ptrVar] = s.newValue2(ssa.OpLoad, pt, addr, s.mem())
|
||||
} else {
|
||||
s.vars[&ptrVar] = r[0]
|
||||
s.vars[&newlenVar] = s.newValue2(s.ssaOp(OADD, Types[TINT]), Types[TINT], r[1], s.constInt(Types[TINT], nargs))
|
||||
s.vars[&capVar] = r[2]
|
||||
}
|
||||
|
||||
b = s.endBlock()
|
||||
b.AddEdgeTo(assign)
|
||||
|
||||
|
|
@ -2156,9 +2220,11 @@ func (s *state) exprAppend(n *Node) *ssa.Value {
|
|||
}
|
||||
}
|
||||
|
||||
p = s.variable(&ptrVar, pt) // generates phi for ptr
|
||||
nl = s.variable(&newlenVar, Types[TINT]) // generates phi for nl
|
||||
c = s.variable(&capVar, Types[TINT]) // generates phi for cap
|
||||
p = s.variable(&ptrVar, pt) // generates phi for ptr
|
||||
if !inplace {
|
||||
nl = s.variable(&newlenVar, Types[TINT]) // generates phi for nl
|
||||
c = s.variable(&capVar, Types[TINT]) // generates phi for cap
|
||||
}
|
||||
p2 := s.newValue2(ssa.OpPtrIndex, pt, p, l)
|
||||
// TODO: just one write barrier call for all of these writes?
|
||||
// TODO: maybe just one writeBarrier.enabled check?
|
||||
|
|
@ -2179,10 +2245,13 @@ func (s *state) exprAppend(n *Node) *ssa.Value {
|
|||
}
|
||||
}
|
||||
|
||||
// make result
|
||||
delete(s.vars, &ptrVar)
|
||||
if inplace {
|
||||
return nil
|
||||
}
|
||||
delete(s.vars, &newlenVar)
|
||||
delete(s.vars, &capVar)
|
||||
// make result
|
||||
return s.newValue3(ssa.OpSliceMake, n.Type, p, nl, c)
|
||||
}
|
||||
|
||||
|
|
|
|||
Loading…
Reference in New Issue