runtime: remove the meaningless offset of 8 for duffzero on loong64

Currently we subtract 8 from offset when calling duffzero because 8
is added to offset in the duffzero implementation. This operation is
meaningless, so remove it.

Change-Id: I7e451d04d7e98ccafe711645d81d3aadf376766f
Reviewed-on: https://go-review.googlesource.com/c/go/+/487295
Reviewed-by: Keith Randall <khr@google.com>
Reviewed-by: Meidan Li <limeidan@loongson.cn>
Reviewed-by: WANG Xuerui <git@xen0n.name>
Run-TryBot: WANG Xuerui <git@xen0n.name>
TryBot-Result: Gopher Robot <gobot@golang.org>
Reviewed-by: xiaodong liu <teaofmoli@gmail.com>
Reviewed-by: Carlos Amedee <carlos@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
Auto-Submit: Ian Lance Taylor <iant@golang.org>
This commit is contained in:
Guoqi Chen 2023-04-21 11:08:09 +08:00 committed by Gopher Robot
parent e3ef8d1810
commit 06f420fc19
6 changed files with 151 additions and 149 deletions

View File

@ -5,6 +5,7 @@
package loong64 package loong64
import ( import (
"cmd/compile/internal/base"
"cmd/compile/internal/ir" "cmd/compile/internal/ir"
"cmd/compile/internal/objw" "cmd/compile/internal/objw"
"cmd/compile/internal/types" "cmd/compile/internal/types"
@ -16,34 +17,38 @@ func zerorange(pp *objw.Progs, p *obj.Prog, off, cnt int64, _ *uint32) *obj.Prog
if cnt == 0 { if cnt == 0 {
return p return p
} }
// Adjust the frame to account for LR.
off += base.Ctxt.Arch.FixedFrameSize
if cnt < int64(4*types.PtrSize) { if cnt < int64(4*types.PtrSize) {
for i := int64(0); i < cnt; i += int64(types.PtrSize) { for i := int64(0); i < cnt; i += int64(types.PtrSize) {
p = pp.Append(p, loong64.AMOVV, obj.TYPE_REG, loong64.REGZERO, 0, obj.TYPE_MEM, loong64.REGSP, 8+off+i) p = pp.Append(p, loong64.AMOVV, obj.TYPE_REG, loong64.REGZERO, 0, obj.TYPE_MEM, loong64.REGSP, off+i)
} }
} else if cnt <= int64(128*types.PtrSize) { } else if cnt <= int64(128*types.PtrSize) {
p = pp.Append(p, loong64.AADDV, obj.TYPE_CONST, 0, 8+off-8, obj.TYPE_REG, loong64.REGRT1, 0) p = pp.Append(p, loong64.AADDV, obj.TYPE_CONST, 0, off, obj.TYPE_REG, loong64.REGRT1, 0)
p.Reg = loong64.REGSP p.Reg = loong64.REGSP
p = pp.Append(p, obj.ADUFFZERO, obj.TYPE_NONE, 0, 0, obj.TYPE_MEM, 0, 0) p = pp.Append(p, obj.ADUFFZERO, obj.TYPE_NONE, 0, 0, obj.TYPE_MEM, 0, 0)
p.To.Name = obj.NAME_EXTERN p.To.Name = obj.NAME_EXTERN
p.To.Sym = ir.Syms.Duffzero p.To.Sym = ir.Syms.Duffzero
p.To.Offset = 8 * (128 - cnt/int64(types.PtrSize)) p.To.Offset = 8 * (128 - cnt/int64(types.PtrSize))
} else { } else {
// ADDV $(8+frame+lo-8), SP, r1 // ADDV $(off), SP, r1
// ADDV $cnt, r1, r2 // ADDV $cnt, r1, r2
// loop: // loop:
// MOVV R0, (Widthptr)r1 // MOVV R0, (r1)
// ADDV $Widthptr, r1 // ADDV $Widthptr, r1
// BNE r1, r2, loop // BNE r1, r2, loop
p = pp.Append(p, loong64.AADDV, obj.TYPE_CONST, 0, 8+off-8, obj.TYPE_REG, loong64.REGRT1, 0) p = pp.Append(p, loong64.AADDV, obj.TYPE_CONST, 0, off, obj.TYPE_REG, loong64.REGRT1, 0)
p.Reg = loong64.REGSP p.Reg = loong64.REGSP
p = pp.Append(p, loong64.AADDV, obj.TYPE_CONST, 0, cnt, obj.TYPE_REG, loong64.REGRT2, 0) p = pp.Append(p, loong64.AADDV, obj.TYPE_CONST, 0, cnt, obj.TYPE_REG, loong64.REGRT2, 0)
p.Reg = loong64.REGRT1 p.Reg = loong64.REGRT1
p = pp.Append(p, loong64.AMOVV, obj.TYPE_REG, loong64.REGZERO, 0, obj.TYPE_MEM, loong64.REGRT1, int64(types.PtrSize)) p = pp.Append(p, loong64.AMOVV, obj.TYPE_REG, loong64.REGZERO, 0, obj.TYPE_MEM, loong64.REGRT1, 0)
p1 := p loop := p
p = pp.Append(p, loong64.AADDV, obj.TYPE_CONST, 0, int64(types.PtrSize), obj.TYPE_REG, loong64.REGRT1, 0) p = pp.Append(p, loong64.AADDV, obj.TYPE_CONST, 0, int64(types.PtrSize), obj.TYPE_REG, loong64.REGRT1, 0)
p = pp.Append(p, loong64.ABNE, obj.TYPE_REG, loong64.REGRT1, 0, obj.TYPE_BRANCH, 0, 0) p = pp.Append(p, loong64.ABNE, obj.TYPE_REG, loong64.REGRT1, 0, obj.TYPE_BRANCH, 0, 0)
p.Reg = loong64.REGRT2 p.Reg = loong64.REGRT2
p.To.SetTarget(p1) p.To.SetTarget(loop)
} }
return p return p

View File

@ -340,18 +340,13 @@ func ssaGenValue(s *ssagen.State, v *ssa.Value) {
p.To.Type = obj.TYPE_REG p.To.Type = obj.TYPE_REG
p.To.Reg = v.Reg() p.To.Reg = v.Reg()
case ssa.OpLOONG64DUFFZERO: case ssa.OpLOONG64DUFFZERO:
// runtime.duffzero expects start address - 8 in R19 // runtime.duffzero expects start address in R19
p := s.Prog(loong64.ASUBVU) p := s.Prog(obj.ADUFFZERO)
p.From.Type = obj.TYPE_CONST
p.From.Offset = 8
p.Reg = v.Args[0].Reg()
p.To.Type = obj.TYPE_REG
p.To.Reg = loong64.REG_R19
p = s.Prog(obj.ADUFFZERO)
p.To.Type = obj.TYPE_MEM p.To.Type = obj.TYPE_MEM
p.To.Name = obj.NAME_EXTERN p.To.Name = obj.NAME_EXTERN
p.To.Sym = ir.Syms.Duffzero p.To.Sym = ir.Syms.Duffzero
p.To.Offset = v.AuxInt p.To.Offset = v.AuxInt
case ssa.OpLOONG64LoweredZero: case ssa.OpLOONG64LoweredZero:
// SUBV $8, R19 // SUBV $8, R19
// MOVV R0, 8(R19) // MOVV R0, 8(R19)

View File

@ -289,9 +289,10 @@ func init() {
aux: "Int64", aux: "Int64",
argLength: 2, argLength: 2,
reg: regInfo{ reg: regInfo{
inputs: []regMask{gp}, inputs: []regMask{buildReg("R19")},
clobbers: buildReg("R19 R1"), clobbers: buildReg("R19 R1"),
}, },
typ: "Mem",
faultOnNilArg0: true, faultOnNilArg0: true,
}, },
@ -309,6 +310,7 @@ func init() {
inputs: []regMask{buildReg("R20"), buildReg("R19")}, inputs: []regMask{buildReg("R20"), buildReg("R19")},
clobbers: buildReg("R19 R20 R1"), clobbers: buildReg("R19 R20 R1"),
}, },
typ: "Mem",
faultOnNilArg0: true, faultOnNilArg0: true,
faultOnNilArg1: true, faultOnNilArg1: true,
}, },

View File

@ -24540,7 +24540,7 @@ var opcodeTable = [...]opInfo{
faultOnNilArg0: true, faultOnNilArg0: true,
reg: regInfo{ reg: regInfo{
inputs: []inputInfo{ inputs: []inputInfo{
{0, 1070596088}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R23 R24 R25 R26 R27 R28 R29 R31 {0, 262144}, // R19
}, },
clobbers: 262146, // R1 R19 clobbers: 262146, // R1 R19
}, },

View File

@ -5,261 +5,261 @@
#include "textflag.h" #include "textflag.h"
TEXT runtime·duffzero(SB), NOSPLIT|NOFRAME, $0-0 TEXT runtime·duffzero(SB), NOSPLIT|NOFRAME, $0-0
MOVV R0, 8(R19) MOVV R0, (R19)
ADDV $8, R19 ADDV $8, R19
MOVV R0, 8(R19) MOVV R0, (R19)
ADDV $8, R19 ADDV $8, R19
MOVV R0, 8(R19) MOVV R0, (R19)
ADDV $8, R19 ADDV $8, R19
MOVV R0, 8(R19) MOVV R0, (R19)
ADDV $8, R19 ADDV $8, R19
MOVV R0, 8(R19) MOVV R0, (R19)
ADDV $8, R19 ADDV $8, R19
MOVV R0, 8(R19) MOVV R0, (R19)
ADDV $8, R19 ADDV $8, R19
MOVV R0, 8(R19) MOVV R0, (R19)
ADDV $8, R19 ADDV $8, R19
MOVV R0, 8(R19) MOVV R0, (R19)
ADDV $8, R19 ADDV $8, R19
MOVV R0, 8(R19) MOVV R0, (R19)
ADDV $8, R19 ADDV $8, R19
MOVV R0, 8(R19) MOVV R0, (R19)
ADDV $8, R19 ADDV $8, R19
MOVV R0, 8(R19) MOVV R0, (R19)
ADDV $8, R19 ADDV $8, R19
MOVV R0, 8(R19) MOVV R0, (R19)
ADDV $8, R19 ADDV $8, R19
MOVV R0, 8(R19) MOVV R0, (R19)
ADDV $8, R19 ADDV $8, R19
MOVV R0, 8(R19) MOVV R0, (R19)
ADDV $8, R19 ADDV $8, R19
MOVV R0, 8(R19) MOVV R0, (R19)
ADDV $8, R19 ADDV $8, R19
MOVV R0, 8(R19) MOVV R0, (R19)
ADDV $8, R19 ADDV $8, R19
MOVV R0, 8(R19) MOVV R0, (R19)
ADDV $8, R19 ADDV $8, R19
MOVV R0, 8(R19) MOVV R0, (R19)
ADDV $8, R19 ADDV $8, R19
MOVV R0, 8(R19) MOVV R0, (R19)
ADDV $8, R19 ADDV $8, R19
MOVV R0, 8(R19) MOVV R0, (R19)
ADDV $8, R19 ADDV $8, R19
MOVV R0, 8(R19) MOVV R0, (R19)
ADDV $8, R19 ADDV $8, R19
MOVV R0, 8(R19) MOVV R0, (R19)
ADDV $8, R19 ADDV $8, R19
MOVV R0, 8(R19) MOVV R0, (R19)
ADDV $8, R19 ADDV $8, R19
MOVV R0, 8(R19) MOVV R0, (R19)
ADDV $8, R19 ADDV $8, R19
MOVV R0, 8(R19) MOVV R0, (R19)
ADDV $8, R19 ADDV $8, R19
MOVV R0, 8(R19) MOVV R0, (R19)
ADDV $8, R19 ADDV $8, R19
MOVV R0, 8(R19) MOVV R0, (R19)
ADDV $8, R19 ADDV $8, R19
MOVV R0, 8(R19) MOVV R0, (R19)
ADDV $8, R19 ADDV $8, R19
MOVV R0, 8(R19) MOVV R0, (R19)
ADDV $8, R19 ADDV $8, R19
MOVV R0, 8(R19) MOVV R0, (R19)
ADDV $8, R19 ADDV $8, R19
MOVV R0, 8(R19) MOVV R0, (R19)
ADDV $8, R19 ADDV $8, R19
MOVV R0, 8(R19) MOVV R0, (R19)
ADDV $8, R19 ADDV $8, R19
MOVV R0, 8(R19) MOVV R0, (R19)
ADDV $8, R19 ADDV $8, R19
MOVV R0, 8(R19) MOVV R0, (R19)
ADDV $8, R19 ADDV $8, R19
MOVV R0, 8(R19) MOVV R0, (R19)
ADDV $8, R19 ADDV $8, R19
MOVV R0, 8(R19) MOVV R0, (R19)
ADDV $8, R19 ADDV $8, R19
MOVV R0, 8(R19) MOVV R0, (R19)
ADDV $8, R19 ADDV $8, R19
MOVV R0, 8(R19) MOVV R0, (R19)
ADDV $8, R19 ADDV $8, R19
MOVV R0, 8(R19) MOVV R0, (R19)
ADDV $8, R19 ADDV $8, R19
MOVV R0, 8(R19) MOVV R0, (R19)
ADDV $8, R19 ADDV $8, R19
MOVV R0, 8(R19) MOVV R0, (R19)
ADDV $8, R19 ADDV $8, R19
MOVV R0, 8(R19) MOVV R0, (R19)
ADDV $8, R19 ADDV $8, R19
MOVV R0, 8(R19) MOVV R0, (R19)
ADDV $8, R19 ADDV $8, R19
MOVV R0, 8(R19) MOVV R0, (R19)
ADDV $8, R19 ADDV $8, R19
MOVV R0, 8(R19) MOVV R0, (R19)
ADDV $8, R19 ADDV $8, R19
MOVV R0, 8(R19) MOVV R0, (R19)
ADDV $8, R19 ADDV $8, R19
MOVV R0, 8(R19) MOVV R0, (R19)
ADDV $8, R19 ADDV $8, R19
MOVV R0, 8(R19) MOVV R0, (R19)
ADDV $8, R19 ADDV $8, R19
MOVV R0, 8(R19) MOVV R0, (R19)
ADDV $8, R19 ADDV $8, R19
MOVV R0, 8(R19) MOVV R0, (R19)
ADDV $8, R19 ADDV $8, R19
MOVV R0, 8(R19) MOVV R0, (R19)
ADDV $8, R19 ADDV $8, R19
MOVV R0, 8(R19) MOVV R0, (R19)
ADDV $8, R19 ADDV $8, R19
MOVV R0, 8(R19) MOVV R0, (R19)
ADDV $8, R19 ADDV $8, R19
MOVV R0, 8(R19) MOVV R0, (R19)
ADDV $8, R19 ADDV $8, R19
MOVV R0, 8(R19) MOVV R0, (R19)
ADDV $8, R19 ADDV $8, R19
MOVV R0, 8(R19) MOVV R0, (R19)
ADDV $8, R19 ADDV $8, R19
MOVV R0, 8(R19) MOVV R0, (R19)
ADDV $8, R19 ADDV $8, R19
MOVV R0, 8(R19) MOVV R0, (R19)
ADDV $8, R19 ADDV $8, R19
MOVV R0, 8(R19) MOVV R0, (R19)
ADDV $8, R19 ADDV $8, R19
MOVV R0, 8(R19) MOVV R0, (R19)
ADDV $8, R19 ADDV $8, R19
MOVV R0, 8(R19) MOVV R0, (R19)
ADDV $8, R19 ADDV $8, R19
MOVV R0, 8(R19) MOVV R0, (R19)
ADDV $8, R19 ADDV $8, R19
MOVV R0, 8(R19) MOVV R0, (R19)
ADDV $8, R19 ADDV $8, R19
MOVV R0, 8(R19) MOVV R0, (R19)
ADDV $8, R19 ADDV $8, R19
MOVV R0, 8(R19) MOVV R0, (R19)
ADDV $8, R19 ADDV $8, R19
MOVV R0, 8(R19) MOVV R0, (R19)
ADDV $8, R19 ADDV $8, R19
MOVV R0, 8(R19) MOVV R0, (R19)
ADDV $8, R19 ADDV $8, R19
MOVV R0, 8(R19) MOVV R0, (R19)
ADDV $8, R19 ADDV $8, R19
MOVV R0, 8(R19) MOVV R0, (R19)
ADDV $8, R19 ADDV $8, R19
MOVV R0, 8(R19) MOVV R0, (R19)
ADDV $8, R19 ADDV $8, R19
MOVV R0, 8(R19) MOVV R0, (R19)
ADDV $8, R19 ADDV $8, R19
MOVV R0, 8(R19) MOVV R0, (R19)
ADDV $8, R19 ADDV $8, R19
MOVV R0, 8(R19) MOVV R0, (R19)
ADDV $8, R19 ADDV $8, R19
MOVV R0, 8(R19) MOVV R0, (R19)
ADDV $8, R19 ADDV $8, R19
MOVV R0, 8(R19) MOVV R0, (R19)
ADDV $8, R19 ADDV $8, R19
MOVV R0, 8(R19) MOVV R0, (R19)
ADDV $8, R19 ADDV $8, R19
MOVV R0, 8(R19) MOVV R0, (R19)
ADDV $8, R19 ADDV $8, R19
MOVV R0, 8(R19) MOVV R0, (R19)
ADDV $8, R19 ADDV $8, R19
MOVV R0, 8(R19) MOVV R0, (R19)
ADDV $8, R19 ADDV $8, R19
MOVV R0, 8(R19) MOVV R0, (R19)
ADDV $8, R19 ADDV $8, R19
MOVV R0, 8(R19) MOVV R0, (R19)
ADDV $8, R19 ADDV $8, R19
MOVV R0, 8(R19) MOVV R0, (R19)
ADDV $8, R19 ADDV $8, R19
MOVV R0, 8(R19) MOVV R0, (R19)
ADDV $8, R19 ADDV $8, R19
MOVV R0, 8(R19) MOVV R0, (R19)
ADDV $8, R19 ADDV $8, R19
MOVV R0, 8(R19) MOVV R0, (R19)
ADDV $8, R19 ADDV $8, R19
MOVV R0, 8(R19) MOVV R0, (R19)
ADDV $8, R19 ADDV $8, R19
MOVV R0, 8(R19) MOVV R0, (R19)
ADDV $8, R19 ADDV $8, R19
MOVV R0, 8(R19) MOVV R0, (R19)
ADDV $8, R19 ADDV $8, R19
MOVV R0, 8(R19) MOVV R0, (R19)
ADDV $8, R19 ADDV $8, R19
MOVV R0, 8(R19) MOVV R0, (R19)
ADDV $8, R19 ADDV $8, R19
MOVV R0, 8(R19) MOVV R0, (R19)
ADDV $8, R19 ADDV $8, R19
MOVV R0, 8(R19) MOVV R0, (R19)
ADDV $8, R19 ADDV $8, R19
MOVV R0, 8(R19) MOVV R0, (R19)
ADDV $8, R19 ADDV $8, R19
MOVV R0, 8(R19) MOVV R0, (R19)
ADDV $8, R19 ADDV $8, R19
MOVV R0, 8(R19) MOVV R0, (R19)
ADDV $8, R19 ADDV $8, R19
MOVV R0, 8(R19) MOVV R0, (R19)
ADDV $8, R19 ADDV $8, R19
MOVV R0, 8(R19) MOVV R0, (R19)
ADDV $8, R19 ADDV $8, R19
MOVV R0, 8(R19) MOVV R0, (R19)
ADDV $8, R19 ADDV $8, R19
MOVV R0, 8(R19) MOVV R0, (R19)
ADDV $8, R19 ADDV $8, R19
MOVV R0, 8(R19) MOVV R0, (R19)
ADDV $8, R19 ADDV $8, R19
MOVV R0, 8(R19) MOVV R0, (R19)
ADDV $8, R19 ADDV $8, R19
MOVV R0, 8(R19) MOVV R0, (R19)
ADDV $8, R19 ADDV $8, R19
MOVV R0, 8(R19) MOVV R0, (R19)
ADDV $8, R19 ADDV $8, R19
MOVV R0, 8(R19) MOVV R0, (R19)
ADDV $8, R19 ADDV $8, R19
MOVV R0, 8(R19) MOVV R0, (R19)
ADDV $8, R19 ADDV $8, R19
MOVV R0, 8(R19) MOVV R0, (R19)
ADDV $8, R19 ADDV $8, R19
MOVV R0, 8(R19) MOVV R0, (R19)
ADDV $8, R19 ADDV $8, R19
MOVV R0, 8(R19) MOVV R0, (R19)
ADDV $8, R19 ADDV $8, R19
MOVV R0, 8(R19) MOVV R0, (R19)
ADDV $8, R19 ADDV $8, R19
MOVV R0, 8(R19) MOVV R0, (R19)
ADDV $8, R19 ADDV $8, R19
MOVV R0, 8(R19) MOVV R0, (R19)
ADDV $8, R19 ADDV $8, R19
MOVV R0, 8(R19) MOVV R0, (R19)
ADDV $8, R19 ADDV $8, R19
MOVV R0, 8(R19) MOVV R0, (R19)
ADDV $8, R19 ADDV $8, R19
MOVV R0, 8(R19) MOVV R0, (R19)
ADDV $8, R19 ADDV $8, R19
MOVV R0, 8(R19) MOVV R0, (R19)
ADDV $8, R19 ADDV $8, R19
MOVV R0, 8(R19) MOVV R0, (R19)
ADDV $8, R19 ADDV $8, R19
MOVV R0, 8(R19) MOVV R0, (R19)
ADDV $8, R19 ADDV $8, R19
MOVV R0, 8(R19) MOVV R0, (R19)
ADDV $8, R19 ADDV $8, R19
MOVV R0, 8(R19) MOVV R0, (R19)
ADDV $8, R19 ADDV $8, R19
MOVV R0, 8(R19) MOVV R0, (R19)
ADDV $8, R19 ADDV $8, R19
MOVV R0, 8(R19) MOVV R0, (R19)
ADDV $8, R19 ADDV $8, R19
MOVV R0, 8(R19) MOVV R0, (R19)
ADDV $8, R19 ADDV $8, R19
MOVV R0, 8(R19) MOVV R0, (R19)
ADDV $8, R19 ADDV $8, R19
MOVV R0, 8(R19) MOVV R0, (R19)
ADDV $8, R19 ADDV $8, R19
MOVV R0, 8(R19) MOVV R0, (R19)
ADDV $8, R19 ADDV $8, R19
MOVV R0, 8(R19) MOVV R0, (R19)
ADDV $8, R19 ADDV $8, R19
MOVV R0, 8(R19) MOVV R0, (R19)
ADDV $8, R19 ADDV $8, R19
MOVV R0, 8(R19) MOVV R0, (R19)
ADDV $8, R19 ADDV $8, R19
RET RET

View File

@ -179,11 +179,11 @@ func copyARM64(w io.Writer) {
func zeroLOONG64(w io.Writer) { func zeroLOONG64(w io.Writer) {
// R0: always zero // R0: always zero
// R19 (aka REGRT1): ptr to memory to be zeroed - 8 // R19 (aka REGRT1): ptr to memory to be zeroed
// On return, R19 points to the last zeroed dword. // On return, R19 points to the last zeroed dword.
fmt.Fprintln(w, "TEXT runtime·duffzero(SB), NOSPLIT|NOFRAME, $0-0") fmt.Fprintln(w, "TEXT runtime·duffzero(SB), NOSPLIT|NOFRAME, $0-0")
for i := 0; i < 128; i++ { for i := 0; i < 128; i++ {
fmt.Fprintln(w, "\tMOVV\tR0, 8(R19)") fmt.Fprintln(w, "\tMOVV\tR0, (R19)")
fmt.Fprintln(w, "\tADDV\t$8, R19") fmt.Fprintln(w, "\tADDV\t$8, R19")
} }
fmt.Fprintln(w, "\tRET") fmt.Fprintln(w, "\tRET")