cmd/compiler,internal/runtime/atomic: optimize Cas{64,32} on loong64

In Loongson's new microstructure LA664 (Loongson-3A6000) and later, the atomic
compare-and-exchange instruction AMCAS[DB]{B,W,H,V} [1] is supported. Therefore,
the implementation of the atomic operation compare-and-swap can be selected according
to the CPUCFG flag LAMCAS: AMCASDB(full barrier) instruction is used on new
microstructures, and traditional LL-SC is used on LA464 (Loongson-3A5000) and older
microstructures. This can significantly improve the performance of Go programs on
new microstructures.

goos: linux
goarch: loong64
pkg: internal/runtime/atomic
cpu: Loongson-3A6000 @ 2500.00MHz
         |  bench.old   |  bench.new                           |
         |   sec/op     |   sec/op       vs base               |
Cas        46.84n ±  0%   22.82n ±  0%  -51.28% (p=0.000 n=20)
Cas-2      47.58n ±  0%   29.57n ±  0%  -37.85% (p=0.000 n=20)
Cas-4      43.27n ± 20%   25.31n ± 13%  -41.50% (p=0.000 n=20)
Cas64      46.85n ±  0%   22.82n ±  0%  -51.29% (p=0.000 n=20)
Cas64-2    47.43n ±  0%   29.53n ±  0%  -37.74% (p=0.002 n=20)
Cas64-4    43.18n ±  0%   25.28n ±  2%  -41.46% (p=0.000 n=20)
geomean    45.82n         25.74n        -43.82%

goos: linux
goarch: loong64
pkg: internal/runtime/atomic
cpu: Loongson-3A5000 @ 2500.00MHz
         |  bench.old  |  bench.new                         |
         |   sec/op    |   sec/op      vs base              |
Cas        50.05n ± 0%   51.26n ± 0%  +2.42% (p=0.000 n=20)
Cas-2      52.80n ± 0%   53.11n ± 0%  +0.59% (p=0.000 n=20)
Cas-4      55.97n ± 0%   57.31n ± 0%  +2.39% (p=0.000 n=20)
Cas64      50.05n ± 0%   51.26n ± 0%  +2.42% (p=0.000 n=20)
Cas64-2    52.68n ± 0%   53.11n ± 0%  +0.82% (p=0.000 n=20)
Cas64-4    55.96n ± 0%   57.26n ± 0%  +2.33% (p=0.000 n=20)
geomean    52.86n        53.83n       +1.82%

[1]: https://loongson.github.io/LoongArch-Documentation/LoongArch-ELF-ABI-EN.html

Change-Id: I9b777c63c124fb492f61c903f77061fa2b4e5322
Reviewed-on: https://go-review.googlesource.com/c/go/+/613396
Reviewed-by: Meidan Li <limeidan@loongson.cn>
Reviewed-by: David Chase <drchase@google.com>
Reviewed-by: Dmitri Shuralyov <dmitshur@google.com>
Reviewed-by: Qiqi Huang <huangqiqi@loongson.cn>
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
This commit is contained in:
Guoqi Chen 2024-09-20 11:06:18 +08:00 committed by abner chenc
parent ec7824b6bb
commit 5432cd96fd
15 changed files with 277 additions and 50 deletions

View File

@ -60,6 +60,7 @@ type symsStruct struct {
Zerobase *obj.LSym
ARM64HasATOMICS *obj.LSym
ARMHasVFPv4 *obj.LSym
Loong64HasLAMCAS *obj.LSym
Loong64HasLAM_BH *obj.LSym
Loong64HasLSX *obj.LSym
X86HasFMA *obj.LSym

View File

@ -746,52 +746,64 @@ func ssaGenValue(s *ssagen.State, v *ssa.Value) {
case ssa.OpLOONG64LoweredAtomicCas32, ssa.OpLOONG64LoweredAtomicCas64:
// MOVV $0, Rout
// DBAR
// DBAR 0x14
// LL (Rarg0), Rtmp
// BNE Rtmp, Rarg1, 4(PC)
// MOVV Rarg2, Rout
// SC Rout, (Rarg0)
// BEQ Rout, -4(PC)
// DBAR
// DBAR 0x12
ll := loong64.ALLV
sc := loong64.ASCV
if v.Op == ssa.OpLOONG64LoweredAtomicCas32 {
ll = loong64.ALL
sc = loong64.ASC
}
p := s.Prog(loong64.AMOVV)
p.From.Type = obj.TYPE_REG
p.From.Reg = loong64.REGZERO
p.To.Type = obj.TYPE_REG
p.To.Reg = v.Reg0()
s.Prog(loong64.ADBAR)
p1 := s.Prog(ll)
p1.From.Type = obj.TYPE_MEM
p1.From.Reg = v.Args[0].Reg()
p1.To.Type = obj.TYPE_REG
p1.To.Reg = loong64.REGTMP
p2 := s.Prog(loong64.ABNE)
p2.From.Type = obj.TYPE_REG
p2.From.Reg = v.Args[1].Reg()
p2.Reg = loong64.REGTMP
p2.To.Type = obj.TYPE_BRANCH
p3 := s.Prog(loong64.AMOVV)
p1 := s.Prog(loong64.ADBAR)
p1.From.Type = obj.TYPE_CONST
p1.From.Offset = 0x14
p2 := s.Prog(ll)
p2.From.Type = obj.TYPE_MEM
p2.From.Reg = v.Args[0].Reg()
p2.To.Type = obj.TYPE_REG
p2.To.Reg = loong64.REGTMP
p3 := s.Prog(loong64.ABNE)
p3.From.Type = obj.TYPE_REG
p3.From.Reg = v.Args[2].Reg()
p3.To.Type = obj.TYPE_REG
p3.To.Reg = v.Reg0()
p4 := s.Prog(sc)
p3.From.Reg = v.Args[1].Reg()
p3.Reg = loong64.REGTMP
p3.To.Type = obj.TYPE_BRANCH
p4 := s.Prog(loong64.AMOVV)
p4.From.Type = obj.TYPE_REG
p4.From.Reg = v.Reg0()
p4.To.Type = obj.TYPE_MEM
p4.To.Reg = v.Args[0].Reg()
p5 := s.Prog(loong64.ABEQ)
p4.From.Reg = v.Args[2].Reg()
p4.To.Type = obj.TYPE_REG
p4.To.Reg = v.Reg0()
p5 := s.Prog(sc)
p5.From.Type = obj.TYPE_REG
p5.From.Reg = v.Reg0()
p5.To.Type = obj.TYPE_BRANCH
p5.To.SetTarget(p1)
p6 := s.Prog(loong64.ADBAR)
p2.To.SetTarget(p6)
p5.To.Type = obj.TYPE_MEM
p5.To.Reg = v.Args[0].Reg()
p6 := s.Prog(loong64.ABEQ)
p6.From.Type = obj.TYPE_REG
p6.From.Reg = v.Reg0()
p6.To.Type = obj.TYPE_BRANCH
p6.To.SetTarget(p2)
p7 := s.Prog(loong64.ADBAR)
p7.From.Type = obj.TYPE_CONST
p7.From.Offset = 0x12
p3.To.SetTarget(p7)
case ssa.OpLOONG64LoweredAtomicAnd32,
ssa.OpLOONG64LoweredAtomicOr32:
@ -815,6 +827,53 @@ func ssaGenValue(s *ssagen.State, v *ssa.Value) {
p.To.Reg = v.Args[0].Reg()
p.RegTo2 = v.Reg0()
case ssa.OpLOONG64LoweredAtomicCas64Variant, ssa.OpLOONG64LoweredAtomicCas32Variant:
// MOVV $0, Rout
// MOVV Rarg1, Rtmp
// AMCASDBx Rarg2, (Rarg0), Rtmp
// BNE Rarg1, Rtmp, 2(PC)
// MOVV $1, Rout
// NOP
amcasx := loong64.AAMCASDBV
if v.Op == ssa.OpLOONG64LoweredAtomicCas32Variant {
amcasx = loong64.AAMCASDBW
}
p := s.Prog(loong64.AMOVV)
p.From.Type = obj.TYPE_REG
p.From.Reg = loong64.REGZERO
p.To.Type = obj.TYPE_REG
p.To.Reg = v.Reg0()
p1 := s.Prog(loong64.AMOVV)
p1.From.Type = obj.TYPE_REG
p1.From.Reg = v.Args[1].Reg()
p1.To.Type = obj.TYPE_REG
p1.To.Reg = loong64.REGTMP
p2 := s.Prog(amcasx)
p2.From.Type = obj.TYPE_REG
p2.From.Reg = v.Args[2].Reg()
p2.To.Type = obj.TYPE_MEM
p2.To.Reg = v.Args[0].Reg()
p2.RegTo2 = loong64.REGTMP
p3 := s.Prog(loong64.ABNE)
p3.From.Type = obj.TYPE_REG
p3.From.Reg = v.Args[1].Reg()
p3.Reg = loong64.REGTMP
p3.To.Type = obj.TYPE_BRANCH
p4 := s.Prog(loong64.AMOVV)
p4.From.Type = obj.TYPE_CONST
p4.From.Offset = 0x1
p4.To.Type = obj.TYPE_REG
p4.To.Reg = v.Reg0()
p5 := s.Prog(obj.ANOP)
p3.To.SetTarget(p5)
case ssa.OpLOONG64LoweredNilCheck:
// Issue a load which will fault if arg is nil.
p := s.Prog(loong64.AMOVB)

View File

@ -453,8 +453,14 @@
(AtomicAdd(32|64) ...) => (LoweredAtomicAdd(32|64) ...)
// Loong64's 32-bit atomic operation instructions ll.w and amcasw are both sign-extended,
// so the input parameters need to be sign-extended to 64 bits, otherwise the subsequent
// comparison operations may not produce the expected results.
//
(AtomicCompareAndSwap32 ptr old new mem) => (LoweredAtomicCas32 ptr (SignExt32to64 old) new mem)
(AtomicCompareAndSwap64 ...) => (LoweredAtomicCas64 ...)
(AtomicCompareAndSwap32Variant ptr old new mem) => (LoweredAtomicCas32Variant ptr (SignExt32to64 old) new mem)
(AtomicCompareAndSwap64Variant ...) => (LoweredAtomicCas64Variant ...)
// Atomic memory logical operations (old style).
//

View File

@ -479,17 +479,34 @@ func init() {
// } else {
// return (false, memory)
// }
// DBAR
// MOVV $0, Rout
// DBAR 0x14
// LL (Rarg0), Rtmp
// BNE Rtmp, Rarg1, 4(PC)
// MOVV Rarg2, Rout
// SC Rout, (Rarg0)
// BEQ Rout, -4(PC)
// DBAR
// DBAR 0x12
{name: "LoweredAtomicCas32", argLength: 4, reg: gpcas, resultNotInArgs: true, faultOnNilArg0: true, hasSideEffects: true, unsafePoint: true},
{name: "LoweredAtomicCas64", argLength: 4, reg: gpcas, resultNotInArgs: true, faultOnNilArg0: true, hasSideEffects: true, unsafePoint: true},
// atomic compare and swap variant.
// arg0 = pointer, arg1 = old value, arg2 = new value, arg3 = memory. auxint must be zero.
// if *arg0 == arg1 {
// *arg0 = arg2
// return (true, memory)
// } else {
// return (false, memory)
// }
// MOVV $0, Rout
// MOVV Rarg1, Rtmp
// AMCASDBx Rarg2, (Rarg0), Rtmp
// BNE Rarg1, Rtmp, 2(PC)
// MOVV $1, Rout
// NOP
{name: "LoweredAtomicCas64Variant", argLength: 4, reg: gpcas, resultNotInArgs: true, faultOnNilArg0: true, hasSideEffects: true, unsafePoint: true},
{name: "LoweredAtomicCas32Variant", argLength: 4, reg: gpcas, resultNotInArgs: true, faultOnNilArg0: true, hasSideEffects: true, unsafePoint: true},
// Atomic 32 bit AND/OR.
// *arg0 &= (|=) arg1. arg2=mem. returns nil.
{name: "LoweredAtomicAnd32", argLength: 3, reg: gpxchg, asm: "AMANDDBW", resultNotInArgs: true, faultOnNilArg0: true, hasSideEffects: true},

View File

@ -1928,6 +1928,8 @@ const (
OpLOONG64LoweredAtomicAdd64
OpLOONG64LoweredAtomicCas32
OpLOONG64LoweredAtomicCas64
OpLOONG64LoweredAtomicCas64Variant
OpLOONG64LoweredAtomicCas32Variant
OpLOONG64LoweredAtomicAnd32
OpLOONG64LoweredAtomicOr32
OpLOONG64LoweredAtomicAnd32value
@ -25921,6 +25923,42 @@ var opcodeTable = [...]opInfo{
},
},
},
{
name: "LoweredAtomicCas64Variant",
argLen: 4,
resultNotInArgs: true,
faultOnNilArg0: true,
hasSideEffects: true,
unsafePoint: true,
reg: regInfo{
inputs: []inputInfo{
{1, 1073741816}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R21 g R23 R24 R25 R26 R27 R28 R29 R31
{2, 1073741816}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R21 g R23 R24 R25 R26 R27 R28 R29 R31
{0, 4611686019501129724}, // SP R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R21 g R23 R24 R25 R26 R27 R28 R29 R31 SB
},
outputs: []outputInfo{
{0, 1071644664}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R21 R23 R24 R25 R26 R27 R28 R29 R31
},
},
},
{
name: "LoweredAtomicCas32Variant",
argLen: 4,
resultNotInArgs: true,
faultOnNilArg0: true,
hasSideEffects: true,
unsafePoint: true,
reg: regInfo{
inputs: []inputInfo{
{1, 1073741816}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R21 g R23 R24 R25 R26 R27 R28 R29 R31
{2, 1073741816}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R21 g R23 R24 R25 R26 R27 R28 R29 R31
{0, 4611686019501129724}, // SP R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R21 g R23 R24 R25 R26 R27 R28 R29 R31 SB
},
outputs: []outputInfo{
{0, 1071644664}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R21 R23 R24 R25 R26 R27 R28 R29 R31
},
},
},
{
name: "LoweredAtomicAnd32",
argLen: 3,

View File

@ -66,9 +66,14 @@ func rewriteValueLOONG64(v *Value) bool {
return rewriteValueLOONG64_OpAtomicAnd8(v)
case OpAtomicCompareAndSwap32:
return rewriteValueLOONG64_OpAtomicCompareAndSwap32(v)
case OpAtomicCompareAndSwap32Variant:
return rewriteValueLOONG64_OpAtomicCompareAndSwap32Variant(v)
case OpAtomicCompareAndSwap64:
v.Op = OpLOONG64LoweredAtomicCas64
return true
case OpAtomicCompareAndSwap64Variant:
v.Op = OpLOONG64LoweredAtomicCas64Variant
return true
case OpAtomicExchange32:
v.Op = OpLOONG64LoweredAtomicExchange32
return true
@ -915,6 +920,27 @@ func rewriteValueLOONG64_OpAtomicCompareAndSwap32(v *Value) bool {
return true
}
}
func rewriteValueLOONG64_OpAtomicCompareAndSwap32Variant(v *Value) bool {
v_3 := v.Args[3]
v_2 := v.Args[2]
v_1 := v.Args[1]
v_0 := v.Args[0]
b := v.Block
typ := &b.Func.Config.Types
// match: (AtomicCompareAndSwap32Variant ptr old new mem)
// result: (LoweredAtomicCas32Variant ptr (SignExt32to64 old) new mem)
for {
ptr := v_0
old := v_1
new := v_2
mem := v_3
v.reset(OpLOONG64LoweredAtomicCas32Variant)
v0 := b.NewValue0(v.Pos, OpSignExt32to64, typ.Int64)
v0.AddArg(old)
v.AddArg4(ptr, v0, new, mem)
return true
}
}
func rewriteValueLOONG64_OpAtomicOr8(v *Value) bool {
v_2 := v.Args[2]
v_1 := v.Args[1]

View File

@ -298,7 +298,7 @@ func initIntrinsics(cfg *intrinsicBuildConfig) {
},
sys.PPC64)
makeAtomicGuardedIntrinsicLoong64common := func(op0, op1 ssa.Op, typ types.Kind, emit atomicOpEmitter, needReturn bool) intrinsicBuilder {
makeAtomicStoreGuardedIntrinsicLoong64 := func(op0, op1 ssa.Op, typ types.Kind, emit atomicOpEmitter) intrinsicBuilder {
return func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
// Target Atomic feature is identified by dynamic detection
addr := s.entryNewValue1A(ssa.OpAddr, types.Types[types.TBOOL].PtrTo(), ir.Syms.Loong64HasLAM_BH, s.sb)
@ -315,29 +315,21 @@ func initIntrinsics(cfg *intrinsicBuildConfig) {
// We have atomic instructions - use it directly.
s.startBlock(bTrue)
emit(s, n, args, op1, typ, needReturn)
emit(s, n, args, op1, typ, false)
s.endBlock().AddEdgeTo(bEnd)
// Use original instruction sequence.
s.startBlock(bFalse)
emit(s, n, args, op0, typ, needReturn)
emit(s, n, args, op0, typ, false)
s.endBlock().AddEdgeTo(bEnd)
// Merge results.
s.startBlock(bEnd)
if needReturn {
return s.variable(n, types.Types[typ])
} else {
return nil
}
return nil
}
}
makeAtomicStoreGuardedIntrinsicLoong64 := func(op0, op1 ssa.Op, typ types.Kind, emit atomicOpEmitter) intrinsicBuilder {
return makeAtomicGuardedIntrinsicLoong64common(op0, op1, typ, emit, false)
}
atomicStoreEmitterLoong64 := func(s *state, n *ir.CallExpr, args []*ssa.Value, op ssa.Op, typ types.Kind, needReturn bool) {
v := s.newValue3(op, types.NewTuple(types.Types[typ], types.TypeMem), args[0], args[1], s.mem())
s.vars[memVar] = s.newValue1(ssa.OpSelect1, types.TypeMem, v)
@ -475,14 +467,14 @@ func initIntrinsics(cfg *intrinsicBuildConfig) {
s.vars[memVar] = s.newValue1(ssa.OpSelect1, types.TypeMem, v)
return s.newValue1(ssa.OpSelect0, types.Types[types.TBOOL], v)
},
sys.AMD64, sys.Loong64, sys.MIPS, sys.MIPS64, sys.PPC64, sys.RISCV64, sys.S390X)
sys.AMD64, sys.MIPS, sys.MIPS64, sys.PPC64, sys.RISCV64, sys.S390X)
addF("internal/runtime/atomic", "Cas64",
func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
v := s.newValue4(ssa.OpAtomicCompareAndSwap64, types.NewTuple(types.Types[types.TBOOL], types.TypeMem), args[0], args[1], args[2], s.mem())
s.vars[memVar] = s.newValue1(ssa.OpSelect1, types.TypeMem, v)
return s.newValue1(ssa.OpSelect0, types.Types[types.TBOOL], v)
},
sys.AMD64, sys.Loong64, sys.MIPS64, sys.PPC64, sys.RISCV64, sys.S390X)
sys.AMD64, sys.MIPS64, sys.PPC64, sys.RISCV64, sys.S390X)
addF("internal/runtime/atomic", "CasRel",
func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
v := s.newValue4(ssa.OpAtomicCompareAndSwap32, types.NewTuple(types.Types[types.TBOOL], types.TypeMem), args[0], args[1], args[2], s.mem())
@ -506,6 +498,53 @@ func initIntrinsics(cfg *intrinsicBuildConfig) {
makeAtomicGuardedIntrinsicARM64(ssa.OpAtomicCompareAndSwap64, ssa.OpAtomicCompareAndSwap64Variant, types.TBOOL, atomicCasEmitterARM64),
sys.ARM64)
atomicCasEmitterLoong64 := func(s *state, n *ir.CallExpr, args []*ssa.Value, op ssa.Op, typ types.Kind, needReturn bool) {
v := s.newValue4(op, types.NewTuple(types.Types[types.TBOOL], types.TypeMem), args[0], args[1], args[2], s.mem())
s.vars[memVar] = s.newValue1(ssa.OpSelect1, types.TypeMem, v)
if needReturn {
s.vars[n] = s.newValue1(ssa.OpSelect0, types.Types[typ], v)
}
}
makeAtomicCasGuardedIntrinsicLoong64 := func(op0, op1 ssa.Op, emit atomicOpEmitter) intrinsicBuilder {
return func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
// Target Atomic feature is identified by dynamic detection
addr := s.entryNewValue1A(ssa.OpAddr, types.Types[types.TBOOL].PtrTo(), ir.Syms.Loong64HasLAMCAS, s.sb)
v := s.load(types.Types[types.TBOOL], addr)
b := s.endBlock()
b.Kind = ssa.BlockIf
b.SetControl(v)
bTrue := s.f.NewBlock(ssa.BlockPlain)
bFalse := s.f.NewBlock(ssa.BlockPlain)
bEnd := s.f.NewBlock(ssa.BlockPlain)
b.AddEdgeTo(bTrue)
b.AddEdgeTo(bFalse)
b.Likely = ssa.BranchLikely
// We have atomic instructions - use it directly.
s.startBlock(bTrue)
emit(s, n, args, op1, types.TBOOL, true)
s.endBlock().AddEdgeTo(bEnd)
// Use original instruction sequence.
s.startBlock(bFalse)
emit(s, n, args, op0, types.TBOOL, true)
s.endBlock().AddEdgeTo(bEnd)
// Merge results.
s.startBlock(bEnd)
return s.variable(n, types.Types[types.TBOOL])
}
}
addF("internal/runtime/atomic", "Cas",
makeAtomicCasGuardedIntrinsicLoong64(ssa.OpAtomicCompareAndSwap32, ssa.OpAtomicCompareAndSwap32Variant, atomicCasEmitterLoong64),
sys.Loong64)
addF("internal/runtime/atomic", "Cas64",
makeAtomicCasGuardedIntrinsicLoong64(ssa.OpAtomicCompareAndSwap64, ssa.OpAtomicCompareAndSwap64Variant, atomicCasEmitterLoong64),
sys.Loong64)
// Old-style atomic logical operation API (all supported archs except arm64).
addF("internal/runtime/atomic", "And8",
func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {

View File

@ -150,6 +150,7 @@ func InitConfig() {
ir.Syms.X86HasFMA = typecheck.LookupRuntimeVar("x86HasFMA") // bool
ir.Syms.ARMHasVFPv4 = typecheck.LookupRuntimeVar("armHasVFPv4") // bool
ir.Syms.ARM64HasATOMICS = typecheck.LookupRuntimeVar("arm64HasATOMICS") // bool
ir.Syms.Loong64HasLAMCAS = typecheck.LookupRuntimeVar("loong64HasLAMCAS") // bool
ir.Syms.Loong64HasLAM_BH = typecheck.LookupRuntimeVar("loong64HasLAM_BH") // bool
ir.Syms.Loong64HasLSX = typecheck.LookupRuntimeVar("loong64HasLSX") // bool
ir.Syms.Staticuint64s = typecheck.LookupRuntimeVar("staticuint64s")

View File

@ -289,6 +289,7 @@ var x86HasSSE41 bool
var x86HasFMA bool
var armHasVFPv4 bool
var arm64HasATOMICS bool
var loong64HasLAMCAS bool
var loong64HasLAM_BH bool
var loong64HasLSX bool

View File

@ -237,6 +237,7 @@ var runtimeDecls = [...]struct {
{"x86HasFMA", varTag, 6},
{"armHasVFPv4", varTag, 6},
{"arm64HasATOMICS", varTag, 6},
{"loong64HasLAMCAS", varTag, 6},
{"loong64HasLAM_BH", varTag, 6},
{"loong64HasLSX", varTag, 6},
{"asanregisterglobals", funcTag, 130},

View File

@ -216,6 +216,7 @@ var builtins = [...]struct {
{"runtime.x86HasFMA", 0},
{"runtime.armHasVFPv4", 0},
{"runtime.arm64HasATOMICS", 0},
{"runtime.loong64HasLAMCAS", 0},
{"runtime.loong64HasLAM_BH", 0},
{"runtime.loong64HasLSX", 0},
{"runtime.asanregisterglobals", 1},

View File

@ -12,6 +12,7 @@ import (
)
const (
offsetLOONG64HasLAMCAS = unsafe.Offsetof(cpu.Loong64.HasLAMCAS)
offsetLoong64HasLAM_BH = unsafe.Offsetof(cpu.Loong64.HasLAM_BH)
)

View File

@ -16,18 +16,32 @@ TEXT ·Cas(SB), NOSPLIT, $0-17
MOVV ptr+0(FP), R4
MOVW old+8(FP), R5
MOVW new+12(FP), R6
DBAR
MOVBU internalcpu·Loong64+const_offsetLOONG64HasLAMCAS(SB), R8
BEQ R8, cas_again
MOVV R5, R7 // backup old value
AMCASDBW R6, (R4), R5
BNE R7, R5, cas_fail0
MOVV $1, R4
MOVB R4, ret+16(FP)
RET
cas_fail0:
MOVB R0, ret+16(FP)
RET
// Implemented using the ll-sc instruction pair
DBAR $0x14 // LoadAcquire barrier
cas_again:
MOVV R6, R7
LL (R4), R8
BNE R5, R8, cas_fail
BNE R5, R8, cas_fail1
SC R7, (R4)
BEQ R7, cas_again
MOVV $1, R4
MOVB R4, ret+16(FP)
DBAR
DBAR $0x12 // StoreRelease barrier
RET
cas_fail:
cas_fail1:
MOVV $0, R4
JMP -4(PC)
@ -43,21 +57,41 @@ TEXT ·Cas64(SB), NOSPLIT, $0-25
MOVV ptr+0(FP), R4
MOVV old+8(FP), R5
MOVV new+16(FP), R6
DBAR
MOVBU internalcpu·Loong64+const_offsetLOONG64HasLAMCAS(SB), R8
BEQ R8, cas64_again
MOVV R5, R7 // backup old value
AMCASDBV R6, (R4), R5
BNE R7, R5, cas64_fail0
MOVV $1, R4
MOVB R4, ret+24(FP)
RET
cas64_fail0:
MOVB R0, ret+24(FP)
RET
// Implemented using the ll-sc instruction pair
DBAR $0x14
cas64_again:
MOVV R6, R7
LLV (R4), R8
BNE R5, R8, cas64_fail
BNE R5, R8, cas64_fail1
SCV R7, (R4)
BEQ R7, cas64_again
MOVV $1, R4
MOVB R4, ret+24(FP)
DBAR
DBAR $0x12
RET
cas64_fail:
cas64_fail1:
MOVV $0, R4
JMP -4(PC)
TEXT ·Casint32(SB),NOSPLIT,$0-17
JMP ·Cas(SB)
TEXT ·Casint64(SB),NOSPLIT,$0-25
JMP ·Cas64(SB)
TEXT ·Casuintptr(SB), NOSPLIT, $0-25
JMP ·Cas64(SB)

View File

@ -34,6 +34,7 @@ var (
arm64HasATOMICS bool
loong64HasLAMCAS bool
loong64HasLAM_BH bool
loong64HasLSX bool
)

View File

@ -752,6 +752,7 @@ func cpuinit(env string) {
arm64HasATOMICS = cpu.ARM64.HasATOMICS
case "loong64":
loong64HasLAMCAS = cpu.Loong64.HasLAMCAS
loong64HasLAM_BH = cpu.Loong64.HasLAM_BH
loong64HasLSX = cpu.Loong64.HasLSX
}