mirror of https://github.com/golang/go.git
cmd/internal/obj/arm64: encode large constants into MOVZ/MOVN and MOVK instructions
Current assembler gets large constants from constant pool, this CL
gets rid of the pool by using MOVZ/MOVN and MOVK to load large
constants.
This CL changes the assembler behavior as follows.
1. go assembly 1, MOVD $0x1111222233334444, R1
2, MOVD $0x1111ffff1111ffff, R1
previous version: MOVD 0x9a4, R1 (loads constant from pool).
optimized version: 1, MOVD $0x4444, R1; MOVK $(0x3333<<16), R1; MOVK $(0x2222<<32), R1;
MOVK $(0x1111<<48), R1. 2, MOVN $(0xeeee<<16), R1; MOVK $(0x1111<<48), R1.
Add test cases, and below are binary size comparison and bechmark results.
1. Binary size before/after
binary size change
pkg/linux_arm64 +25.4KB
pkg/tool/linux_arm64 -2.9KB
go -2KB
gofmt no change
2. compiler benchmark.
name old time/op new time/op delta
Template 574ms ±21% 577ms ±14% ~ (p=0.853 n=10+10)
Unicode 327ms ±29% 353ms ±23% ~ (p=0.360 n=10+8)
GoTypes 1.97s ± 8% 2.04s ±11% ~ (p=0.143 n=10+10)
Compiler 9.13s ± 9% 9.25s ± 8% ~ (p=0.684 n=10+10)
SSA 29.2s ± 5% 27.0s ± 4% -7.40% (p=0.000 n=10+10)
Flate 402ms ±40% 308ms ± 6% -23.29% (p=0.004 n=10+10)
GoParser 470ms ±26% 382ms ±10% -18.82% (p=0.000 n=9+10)
Reflect 1.36s ±16% 1.17s ± 7% -13.92% (p=0.001 n=9+10)
Tar 561ms ±19% 466ms ±15% -17.08% (p=0.000 n=9+10)
XML 745ms ±20% 679ms ±20% ~ (p=0.123 n=10+10)
StdCmd 35.5s ± 6% 37.2s ± 3% +4.81% (p=0.001 n=9+8)
name old user-time/op new user-time/op delta
Template 625ms ±14% 660ms ±18% ~ (p=0.343 n=10+10)
Unicode 355ms ±10% 373ms ±20% ~ (p=0.346 n=9+10)
GoTypes 2.39s ± 8% 2.37s ± 5% ~ (p=0.897 n=10+10)
Compiler 11.1s ± 4% 11.4s ± 2% +2.63% (p=0.010 n=10+9)
SSA 35.4s ± 3% 34.9s ± 2% ~ (p=0.113 n=10+9)
Flate 402ms ±13% 371ms ±30% ~ (p=0.089 n=10+9)
GoParser 513ms ± 8% 489ms ±24% -4.76% (p=0.039 n=9+9)
Reflect 1.52s ±12% 1.41s ± 5% -7.32% (p=0.001 n=9+10)
Tar 607ms ±10% 558ms ± 8% -7.96% (p=0.009 n=9+10)
XML 828ms ±10% 789ms ±12% ~ (p=0.059 n=10+10)
name old text-bytes new text-bytes delta
HelloSize 714kB ± 0% 712kB ± 0% -0.23% (p=0.000 n=10+10)
CmdGoSize 8.26MB ± 0% 8.25MB ± 0% -0.14% (p=0.000 n=10+10)
name old data-bytes new data-bytes delta
HelloSize 10.5kB ± 0% 10.5kB ± 0% ~ (all equal)
CmdGoSize 258kB ± 0% 258kB ± 0% ~ (all equal)
name old bss-bytes new bss-bytes delta
HelloSize 125kB ± 0% 125kB ± 0% ~ (all equal)
CmdGoSize 146kB ± 0% 146kB ± 0% ~ (all equal)
name old exe-bytes new exe-bytes delta
HelloSize 1.18MB ± 0% 1.18MB ± 0% ~ (all equal)
CmdGoSize 11.2MB ± 0% 11.2MB ± 0% -0.13% (p=0.000 n=10+10)
3. go1 benckmark.
name old time/op new time/op delta
BinaryTree17 6.60s ±18% 7.36s ±22% ~ (p=0.222 n=5+5)
Fannkuch11 4.04s ± 0% 4.05s ± 0% ~ (p=0.421 n=5+5)
FmtFprintfEmpty 91.8ns ±14% 91.2ns ± 9% ~ (p=0.667 n=5+5)
FmtFprintfString 145ns ± 0% 151ns ± 6% ~ (p=0.397 n=4+5)
FmtFprintfInt 169ns ± 0% 176ns ± 5% +4.14% (p=0.016 n=4+5)
FmtFprintfIntInt 229ns ± 2% 243ns ± 6% ~ (p=0.143 n=5+5)
FmtFprintfPrefixedInt 343ns ± 0% 350ns ± 3% +1.92% (p=0.048 n=5+5)
FmtFprintfFloat 400ns ± 3% 394ns ± 3% ~ (p=0.063 n=5+5)
FmtManyArgs 1.04µs ± 0% 1.05µs ± 0% +1.62% (p=0.029 n=4+4)
GobDecode 13.9ms ± 4% 13.9ms ± 5% ~ (p=1.000 n=5+5)
GobEncode 10.6ms ± 4% 10.6ms ± 5% ~ (p=0.421 n=5+5)
Gzip 567ms ± 1% 563ms ± 4% ~ (p=0.548 n=5+5)
Gunzip 60.2ms ± 1% 60.4ms ± 0% ~ (p=0.056 n=5+5)
HTTPClientServer 114µs ± 4% 108µs ± 7% ~ (p=0.095 n=5+5)
JSONEncode 18.4ms ± 2% 17.8ms ± 2% -3.06% (p=0.016 n=5+5)
JSONDecode 105ms ± 1% 103ms ± 2% ~ (p=0.056 n=5+5)
Mandelbrot200 5.48ms ± 0% 5.49ms ± 0% ~ (p=0.841 n=5+5)
GoParse 6.05ms ± 1% 6.05ms ± 2% ~ (p=1.000 n=5+5)
RegexpMatchEasy0_32 143ns ± 1% 146ns ± 4% +2.10% (p=0.048 n=4+5)
RegexpMatchEasy0_1K 499ns ± 1% 492ns ± 2% ~ (p=0.079 n=5+5)
RegexpMatchEasy1_32 137ns ± 0% 136ns ± 1% -0.73% (p=0.016 n=4+5)
RegexpMatchEasy1_1K 826ns ± 4% 823ns ± 2% ~ (p=0.841 n=5+5)
RegexpMatchMedium_32 224ns ± 5% 233ns ± 8% ~ (p=0.119 n=5+5)
RegexpMatchMedium_1K 59.6µs ± 0% 59.3µs ± 1% -0.66% (p=0.016 n=4+5)
RegexpMatchHard_32 3.29µs ± 3% 3.26µs ± 1% ~ (p=0.889 n=5+5)
RegexpMatchHard_1K 98.8µs ± 2% 99.0µs ± 0% ~ (p=0.690 n=5+5)
Revcomp 1.02s ± 1% 1.01s ± 1% ~ (p=0.095 n=5+5)
Template 135ms ± 5% 131ms ± 1% ~ (p=0.151 n=5+5)
TimeParse 591ns ± 0% 593ns ± 0% +0.20% (p=0.048 n=5+5)
TimeFormat 655ns ± 2% 607ns ± 0% -7.42% (p=0.016 n=5+4)
[Geo mean] 93.5µs 93.8µs +0.23%
name old speed new speed delta
GobDecode 55.1MB/s ± 4% 55.1MB/s ± 4% ~ (p=1.000 n=5+5)
GobEncode 72.4MB/s ± 4% 72.3MB/s ± 5% ~ (p=0.421 n=5+5)
Gzip 34.2MB/s ± 1% 34.5MB/s ± 4% ~ (p=0.548 n=5+5)
Gunzip 322MB/s ± 1% 321MB/s ± 0% ~ (p=0.056 n=5+5)
JSONEncode 106MB/s ± 2% 109MB/s ± 2% +3.16% (p=0.016 n=5+5)
JSONDecode 18.5MB/s ± 1% 18.8MB/s ± 2% ~ (p=0.056 n=5+5)
GoParse 9.57MB/s ± 1% 9.57MB/s ± 2% ~ (p=0.952 n=5+5)
RegexpMatchEasy0_32 223MB/s ± 1% 221MB/s ± 0% -1.10% (p=0.029 n=4+4)
RegexpMatchEasy0_1K 2.05GB/s ± 1% 2.08GB/s ± 2% ~ (p=0.095 n=5+5)
RegexpMatchEasy1_32 232MB/s ± 0% 234MB/s ± 1% +0.76% (p=0.016 n=4+5)
RegexpMatchEasy1_1K 1.24GB/s ± 4% 1.24GB/s ± 2% ~ (p=0.841 n=5+5)
RegexpMatchMedium_32 4.45MB/s ± 5% 4.20MB/s ± 1% -5.63% (p=0.000 n=5+4)
RegexpMatchMedium_1K 17.2MB/s ± 0% 17.3MB/s ± 1% +0.66% (p=0.016 n=4+5)
RegexpMatchHard_32 9.73MB/s ± 3% 9.83MB/s ± 1% ~ (p=0.889 n=5+5)
RegexpMatchHard_1K 10.4MB/s ± 2% 10.3MB/s ± 0% ~ (p=0.635 n=5+5)
Revcomp 249MB/s ± 1% 252MB/s ± 1% ~ (p=0.095 n=5+5)
Template 14.4MB/s ± 4% 14.8MB/s ± 1% ~ (p=0.151 n=5+5)
[Geo mean] 62.1MB/s 62.3MB/s +0.34%
Fixes #10108
Change-Id: I79038f3c4c2ff874c136053d1a2b1c8a5a9cfac5
Reviewed-on: https://go-review.googlesource.com/c/118796
Reviewed-by: Cherry Zhang <cherryyz@google.com>
Run-TryBot: Cherry Zhang <cherryyz@google.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
This commit is contained in:
parent
ac277d9234
commit
644ddaa842
|
|
@ -195,6 +195,11 @@ TEXT foo(SB), DUPOK|NOSPLIT, $-8
|
|||
CMPW $27745, R2 // 3b8c8d525f001b6b
|
||||
CMNW $0x3fffffc0, R2 // CMNW $1073741760, R2 // fb5f1a325f001b2b
|
||||
CMPW $0xffff0, R1 // CMPW $1048560, R1 // fb3f1c323f001b6b
|
||||
CMP $0xffffffffffa0, R3 // CMP $281474976710560, R3 // fb0b80921b00e0f27f001beb
|
||||
CMP $0xf4240, R1 // CMP $1000000, R1 // 1b4888d2fb01a0f23f001beb
|
||||
ADD $0x186a0, R2, R5 // ADD $100000, R2, R5 // 45801a91a5604091
|
||||
SUB $0xe7791f700, R3, R1 // SUB $62135596800, R3, R1 // 1be09ed23bf2aef2db01c0f261001bcb
|
||||
CMP $3343198598084851058, R3 // 5bae8ed2db8daef23badcdf2bbcce5f27f001beb
|
||||
ADD $0x3fffffffc000, R5 // ADD $70368744161280, R5 // fb7f72b2a5001b8b
|
||||
// LTYPE1 imsr ',' spreg ','
|
||||
// {
|
||||
|
|
@ -240,12 +245,21 @@ TEXT foo(SB), DUPOK|NOSPLIT, $-8
|
|||
|
||||
EOR $0xe03fffffffffffff, R20, R22 // EOR $-2287828610704211969, R20, R22 // 96e243d2
|
||||
TSTW $0x600000006, R1 // TSTW $25769803782, R1 // 3f041f72
|
||||
TST $0x4900000049, R0 // TST $313532612681, R0 // 3b0980d23b09c0f21f001bea
|
||||
ORR $0x170000, R2, R1 // ORR $1507328, R2, R1 // fb02a0d241001baa
|
||||
AND $0xff00ff, R2 // AND $16711935, R2 // fb1f80d2fb1fa0f242001b8a
|
||||
AND $0xff00ffff, R1 // AND $4278255615, R1 // fbff9fd21be0bff221001b8a
|
||||
ANDS $0xffff, R2 // ANDS $65535, R2 // 423c40f2
|
||||
AND $0x7fffffff, R3 // AND $2147483647, R3 // 63784092
|
||||
ANDS $0x0ffffffff80000000, R2 // ANDS $-2147483648, R2 // 428061f2
|
||||
AND $0xfffff, R2 // AND $1048575, R2 // 424c4092
|
||||
ANDW $0xf00fffff, R1 // ANDW $4027580415, R1 // 215c0412
|
||||
ANDSW $0xff00ffff, R1 // ANDSW $4278255615, R1 // 215c0872
|
||||
TST $0x11223344, R2 // TST $287454020, R2 // 9b6886d25b24a2f25f001bea
|
||||
TSTW $0xa000, R3 // TSTW $40960, R3 // 1b0094527f001b6a
|
||||
BICW $0xa000, R3 // BICW $40960, R3 // 1b00945263003b0a
|
||||
ORRW $0x1b000, R2, R3 // ORRW $110592, R2, R3 // 1b0096523b00a07243001b2a
|
||||
TSTW $0x500000, R1 // TSTW $5242880, R1 // 1b0aa0523f001b6a
|
||||
TSTW $0xff00ff, R1 // TSTW $16711935, R1 // 3f9c0072
|
||||
|
||||
AND $8, R0, RSP // 1f007d92
|
||||
|
|
@ -256,13 +270,20 @@ TEXT foo(SB), DUPOK|NOSPLIT, $-8
|
|||
EON $8, R0, RSP // 1ff87cd2
|
||||
|
||||
MOVD $0x3fffffffc000, R0 // MOVD $70368744161280, R0 // e07f72b2
|
||||
MOVW $1000000, R4 // 04488852e401a072
|
||||
MOVW $0xaaaa0000, R1 // MOVW $2863267840, R1 // 4155b552
|
||||
MOVW $0xaaaaffff, R1 // MOVW $2863333375, R1 // a1aaaa12
|
||||
MOVW $0xaaaa, R1 // MOVW $43690, R1 // 41559552
|
||||
MOVW $0xffffaaaa, R1 // MOVW $4294945450, R1 // a1aa8a12
|
||||
MOVW $0xffff0000, R1 // MOVW $4294901760, R1 // e1ffbf52
|
||||
MOVD $0xffff00000000000, R1 // MOVD $1152903912420802560, R1 // e13f54b2
|
||||
MOVD $0x1111000000001111, R1 // MOVD $1229764173248860433, R1 // 212282d22122e2f2
|
||||
MOVD $0x1111ffff1111ffff, R1 // MOVD $1230045644216991743, R1 // c1ddbd922122e2f2
|
||||
MOVD $0x1111222233334444, R1 // MOVD $1229801703532086340, R1 // 818888d26166a6f24144c4f22122e2f2
|
||||
MOVD $0xaaaaffff, R1 // MOVD $2863333375, R1 // e1ff9fd24155b5f2
|
||||
MOVD $0x11110000, R1 // MOVD $286326784, R1 // 2122a2d2
|
||||
MOVD $0xaaaa0000aaaa1111, R1 // MOVD $-6149102338357718767, R1 // 212282d24155b5f24155f5f2
|
||||
MOVD $0x1111ffff1111aaaa, R1 // MOVD $1230045644216969898, R1 // a1aa8a922122a2f22122e2f2
|
||||
MOVD $0, R1 // 010080d2
|
||||
MOVD $-1, R1 // 01008092
|
||||
MOVD $0x210000, R0 // MOVD $2162688, R0 // 2004a0d2
|
||||
|
|
|
|||
|
|
@ -414,6 +414,8 @@ const (
|
|||
C_BITCON // bitfield and logical immediate masks
|
||||
C_ADDCON2 // 24-bit constant
|
||||
C_LCON // 32-bit constant
|
||||
C_MOVCON2 // a constant that can be loaded with one MOVZ/MOVN and one MOVK
|
||||
C_MOVCON3 // a constant that can be loaded with one MOVZ/MOVN and two MOVKs
|
||||
C_VCON // 64-bit constant
|
||||
C_FCON // floating-point constant
|
||||
C_VCONADDR // 64-bit memory address
|
||||
|
|
|
|||
|
|
@ -30,6 +30,8 @@ var cnames7 = []string{
|
|||
"BITCON",
|
||||
"ADDCON2",
|
||||
"LCON",
|
||||
"MOVCON2",
|
||||
"MOVCON3",
|
||||
"VCON",
|
||||
"FCON",
|
||||
"VCONADDR",
|
||||
|
|
|
|||
|
|
@ -198,9 +198,15 @@ var optab = []Optab{
|
|||
{ACMP, C_BITCON, C_RSP, C_NONE, C_NONE, 62, 8, 0, 0, 0},
|
||||
{AADD, C_ADDCON2, C_RSP, C_NONE, C_RSP, 48, 8, 0, 0, 0},
|
||||
{AADD, C_ADDCON2, C_NONE, C_NONE, C_RSP, 48, 8, 0, 0, 0},
|
||||
{AADD, C_VCON, C_RSP, C_NONE, C_RSP, 13, 8, 0, LFROM, 0},
|
||||
{AADD, C_VCON, C_NONE, C_NONE, C_RSP, 13, 8, 0, LFROM, 0},
|
||||
{ACMP, C_VCON, C_REG, C_NONE, C_NONE, 13, 8, 0, LFROM, 0},
|
||||
{AADD, C_MOVCON2, C_RSP, C_NONE, C_RSP, 13, 12, 0, 0, 0},
|
||||
{AADD, C_MOVCON2, C_NONE, C_NONE, C_RSP, 13, 12, 0, 0, 0},
|
||||
{AADD, C_MOVCON3, C_RSP, C_NONE, C_RSP, 13, 16, 0, 0, 0},
|
||||
{AADD, C_MOVCON3, C_NONE, C_NONE, C_RSP, 13, 16, 0, 0, 0},
|
||||
{AADD, C_VCON, C_RSP, C_NONE, C_RSP, 13, 20, 0, 0, 0},
|
||||
{AADD, C_VCON, C_NONE, C_NONE, C_RSP, 13, 20, 0, 0, 0},
|
||||
{ACMP, C_MOVCON2, C_REG, C_NONE, C_NONE, 13, 12, 0, 0, 0},
|
||||
{ACMP, C_MOVCON3, C_REG, C_NONE, C_NONE, 13, 16, 0, 0, 0},
|
||||
{ACMP, C_VCON, C_REG, C_NONE, C_NONE, 13, 20, 0, 0, 0},
|
||||
{AADD, C_SHIFT, C_REG, C_NONE, C_REG, 3, 4, 0, 0, 0},
|
||||
{AADD, C_SHIFT, C_NONE, C_NONE, C_REG, 3, 4, 0, 0, 0},
|
||||
{AMVN, C_SHIFT, C_NONE, C_NONE, C_REG, 3, 4, 0, 0, 0},
|
||||
|
|
@ -255,11 +261,21 @@ var optab = []Optab{
|
|||
{AANDS, C_MOVCON, C_REG, C_NONE, C_REG, 62, 8, 0, 0, 0},
|
||||
{AANDS, C_MOVCON, C_NONE, C_NONE, C_REG, 62, 8, 0, 0, 0},
|
||||
{ATST, C_MOVCON, C_REG, C_NONE, C_NONE, 62, 8, 0, 0, 0},
|
||||
{AAND, C_VCON, C_REG, C_NONE, C_REG, 28, 8, 0, LFROM, 0},
|
||||
{AAND, C_VCON, C_NONE, C_NONE, C_REG, 28, 8, 0, LFROM, 0},
|
||||
{AANDS, C_VCON, C_REG, C_NONE, C_REG, 28, 8, 0, LFROM, 0},
|
||||
{AANDS, C_VCON, C_NONE, C_NONE, C_REG, 28, 8, 0, LFROM, 0},
|
||||
{ATST, C_VCON, C_REG, C_NONE, C_NONE, 28, 8, 0, LFROM, 0},
|
||||
{AAND, C_MOVCON2, C_REG, C_NONE, C_REG, 28, 12, 0, 0, 0},
|
||||
{AAND, C_MOVCON2, C_NONE, C_NONE, C_REG, 28, 12, 0, 0, 0},
|
||||
{AAND, C_MOVCON3, C_REG, C_NONE, C_REG, 28, 16, 0, 0, 0},
|
||||
{AAND, C_MOVCON3, C_NONE, C_NONE, C_REG, 28, 16, 0, 0, 0},
|
||||
{AAND, C_VCON, C_REG, C_NONE, C_REG, 28, 20, 0, 0, 0},
|
||||
{AAND, C_VCON, C_NONE, C_NONE, C_REG, 28, 20, 0, 0, 0},
|
||||
{AANDS, C_MOVCON2, C_REG, C_NONE, C_REG, 28, 12, 0, 0, 0},
|
||||
{AANDS, C_MOVCON2, C_NONE, C_NONE, C_REG, 28, 12, 0, 0, 0},
|
||||
{AANDS, C_MOVCON3, C_REG, C_NONE, C_REG, 28, 16, 0, 0, 0},
|
||||
{AANDS, C_MOVCON3, C_NONE, C_NONE, C_REG, 28, 16, 0, 0, 0},
|
||||
{AANDS, C_VCON, C_REG, C_NONE, C_REG, 28, 20, 0, 0, 0},
|
||||
{AANDS, C_VCON, C_NONE, C_NONE, C_REG, 28, 20, 0, 0, 0},
|
||||
{ATST, C_MOVCON2, C_REG, C_NONE, C_NONE, 28, 12, 0, 0, 0},
|
||||
{ATST, C_MOVCON3, C_REG, C_NONE, C_NONE, 28, 16, 0, 0, 0},
|
||||
{ATST, C_VCON, C_REG, C_NONE, C_NONE, 28, 20, 0, 0, 0},
|
||||
{AAND, C_SHIFT, C_REG, C_NONE, C_REG, 3, 4, 0, 0, 0},
|
||||
{AAND, C_SHIFT, C_NONE, C_NONE, C_REG, 3, 4, 0, 0, 0},
|
||||
{AANDS, C_SHIFT, C_REG, C_NONE, C_REG, 3, 4, 0, 0, 0},
|
||||
|
|
@ -278,8 +294,10 @@ var optab = []Optab{
|
|||
{AMOVD, C_MOVCON, C_NONE, C_NONE, C_REG, 32, 4, 0, 0, 0},
|
||||
{AMOVW, C_BITCON, C_NONE, C_NONE, C_REG, 32, 4, 0, 0, 0},
|
||||
{AMOVD, C_BITCON, C_NONE, C_NONE, C_REG, 32, 4, 0, 0, 0},
|
||||
{AMOVW, C_LCON, C_NONE, C_NONE, C_REG, 12, 4, 0, LFROM, 0},
|
||||
{AMOVD, C_VCON, C_NONE, C_NONE, C_REG, 12, 4, 0, LFROM, 0},
|
||||
{AMOVW, C_MOVCON2, C_NONE, C_NONE, C_REG, 12, 8, 0, 0, 0},
|
||||
{AMOVD, C_MOVCON2, C_NONE, C_NONE, C_REG, 12, 8, 0, 0, 0},
|
||||
{AMOVD, C_MOVCON3, C_NONE, C_NONE, C_REG, 12, 12, 0, 0, 0},
|
||||
{AMOVD, C_VCON, C_NONE, C_NONE, C_REG, 12, 16, 0, 0, 0},
|
||||
|
||||
{AMOVK, C_VCON, C_NONE, C_NONE, C_REG, 33, 4, 0, 0, 0},
|
||||
{AMOVD, C_AACON, C_NONE, C_NONE, C_REG, 4, 4, REGFROM, 0, 0},
|
||||
|
|
@ -401,8 +419,8 @@ var optab = []Optab{
|
|||
{AMOVH, C_REG, C_NONE, C_NONE, C_NSOREG, 20, 4, 0, 0, 0},
|
||||
{AMOVW, C_REG, C_NONE, C_NONE, C_NSAUTO, 20, 4, REGSP, 0, 0},
|
||||
{AMOVW, C_REG, C_NONE, C_NONE, C_NSOREG, 20, 4, 0, 0, 0},
|
||||
{AMOVD, C_REG, C_NONE, C_NONE, C_NSOREG, 20, 4, 0, 0, 0},
|
||||
{AMOVD, C_REG, C_NONE, C_NONE, C_NSAUTO, 20, 4, REGSP, 0, 0},
|
||||
{AMOVD, C_REG, C_NONE, C_NONE, C_NSOREG, 20, 4, 0, 0, 0},
|
||||
|
||||
{AFMOVS, C_FREG, C_NONE, C_NONE, C_NSAUTO, 20, 4, REGSP, 0, 0},
|
||||
{AFMOVS, C_FREG, C_NONE, C_NONE, C_NSOREG, 20, 4, 0, 0, 0},
|
||||
|
|
@ -411,15 +429,15 @@ var optab = []Optab{
|
|||
|
||||
/* scaled 12-bit unsigned displacement load */
|
||||
{AMOVB, C_UAUTO4K, C_NONE, C_NONE, C_REG, 21, 4, REGSP, 0, 0},
|
||||
{AMOVB, C_UOREG4K, C_NONE, C_NONE, C_REG, 21, 4, REGSP, 0, 0},
|
||||
{AMOVB, C_UOREG4K, C_NONE, C_NONE, C_REG, 21, 4, 0, 0, 0},
|
||||
{AMOVBU, C_UAUTO4K, C_NONE, C_NONE, C_REG, 21, 4, REGSP, 0, 0},
|
||||
{AMOVBU, C_UOREG4K, C_NONE, C_NONE, C_REG, 21, 4, REGSP, 0, 0},
|
||||
{AMOVBU, C_UOREG4K, C_NONE, C_NONE, C_REG, 21, 4, 0, 0, 0},
|
||||
{AMOVH, C_UAUTO8K, C_NONE, C_NONE, C_REG, 21, 4, REGSP, 0, 0},
|
||||
{AMOVH, C_UOREG8K, C_NONE, C_NONE, C_REG, 21, 4, REGSP, 0, 0},
|
||||
{AMOVH, C_UOREG8K, C_NONE, C_NONE, C_REG, 21, 4, 0, 0, 0},
|
||||
{AMOVW, C_UAUTO16K, C_NONE, C_NONE, C_REG, 21, 4, REGSP, 0, 0},
|
||||
{AMOVW, C_UOREG16K, C_NONE, C_NONE, C_REG, 21, 4, REGSP, 0, 0},
|
||||
{AMOVW, C_UOREG16K, C_NONE, C_NONE, C_REG, 21, 4, 0, 0, 0},
|
||||
{AMOVD, C_UAUTO32K, C_NONE, C_NONE, C_REG, 21, 4, REGSP, 0, 0},
|
||||
{AMOVD, C_UOREG32K, C_NONE, C_NONE, C_REG, 21, 4, REGSP, 0, 0},
|
||||
{AMOVD, C_UOREG32K, C_NONE, C_NONE, C_REG, 21, 4, 0, 0, 0},
|
||||
|
||||
{AFMOVS, C_UAUTO16K, C_NONE, C_NONE, C_FREG, 21, 4, REGSP, 0, 0},
|
||||
{AFMOVS, C_UOREG16K, C_NONE, C_NONE, C_FREG, 21, 4, 0, 0, 0},
|
||||
|
|
@ -428,15 +446,15 @@ var optab = []Optab{
|
|||
|
||||
/* unscaled 9-bit signed displacement load */
|
||||
{AMOVB, C_NSAUTO, C_NONE, C_NONE, C_REG, 21, 4, REGSP, 0, 0},
|
||||
{AMOVB, C_NSOREG, C_NONE, C_NONE, C_REG, 21, 4, REGSP, 0, 0},
|
||||
{AMOVB, C_NSOREG, C_NONE, C_NONE, C_REG, 21, 4, 0, 0, 0},
|
||||
{AMOVBU, C_NSAUTO, C_NONE, C_NONE, C_REG, 21, 4, REGSP, 0, 0},
|
||||
{AMOVBU, C_NSOREG, C_NONE, C_NONE, C_REG, 21, 4, REGSP, 0, 0},
|
||||
{AMOVBU, C_NSOREG, C_NONE, C_NONE, C_REG, 21, 4, 0, 0, 0},
|
||||
{AMOVH, C_NSAUTO, C_NONE, C_NONE, C_REG, 21, 4, REGSP, 0, 0},
|
||||
{AMOVH, C_NSOREG, C_NONE, C_NONE, C_REG, 21, 4, REGSP, 0, 0},
|
||||
{AMOVH, C_NSOREG, C_NONE, C_NONE, C_REG, 21, 4, 0, 0, 0},
|
||||
{AMOVW, C_NSAUTO, C_NONE, C_NONE, C_REG, 21, 4, REGSP, 0, 0},
|
||||
{AMOVW, C_NSOREG, C_NONE, C_NONE, C_REG, 21, 4, REGSP, 0, 0},
|
||||
{AMOVW, C_NSOREG, C_NONE, C_NONE, C_REG, 21, 4, 0, 0, 0},
|
||||
{AMOVD, C_NSAUTO, C_NONE, C_NONE, C_REG, 21, 4, REGSP, 0, 0},
|
||||
{AMOVD, C_NSOREG, C_NONE, C_NONE, C_REG, 21, 4, REGSP, 0, 0},
|
||||
{AMOVD, C_NSOREG, C_NONE, C_NONE, C_REG, 21, 4, 0, 0, 0},
|
||||
|
||||
{AFMOVS, C_NSAUTO, C_NONE, C_NONE, C_FREG, 21, 4, REGSP, 0, 0},
|
||||
{AFMOVS, C_NSOREG, C_NONE, C_NONE, C_FREG, 21, 4, 0, 0, 0},
|
||||
|
|
@ -1105,6 +1123,15 @@ func isSTXPop(op obj.As) bool {
|
|||
return false
|
||||
}
|
||||
|
||||
func isANDop(op obj.As) bool {
|
||||
switch op {
|
||||
case AAND, AORR, AEOR, AANDS, ATST,
|
||||
ABIC, AEON, AORN, ABICS:
|
||||
return true
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func isANDWop(op obj.As) bool {
|
||||
switch op {
|
||||
case AANDW, AORRW, AEORW, AANDSW, ATSTW,
|
||||
|
|
@ -1114,6 +1141,14 @@ func isANDWop(op obj.As) bool {
|
|||
return false
|
||||
}
|
||||
|
||||
func isADDop(op obj.As) bool {
|
||||
switch op {
|
||||
case AADD, AADDS, ASUB, ASUBS, ACMN, ACMP:
|
||||
return true
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func isADDWop(op obj.As) bool {
|
||||
switch op {
|
||||
case AADDW, AADDSW, ASUBW, ASUBSW, ACMNW, ACMPW:
|
||||
|
|
@ -1445,6 +1480,12 @@ func (c *ctxt7) con32class(a *obj.Addr) int {
|
|||
if isbitcon(uint64(v)) {
|
||||
return C_ABCON
|
||||
}
|
||||
if movcon(int64(v)) >= 0 {
|
||||
return C_AMCON
|
||||
}
|
||||
if movcon(int64(^v)) >= 0 {
|
||||
return C_AMCON
|
||||
}
|
||||
return C_ADDCON
|
||||
}
|
||||
|
||||
|
|
@ -1474,6 +1515,29 @@ func (c *ctxt7) con32class(a *obj.Addr) int {
|
|||
return C_LCON
|
||||
}
|
||||
|
||||
// con64class reclassifies the constant of C_VCON and C_LCON class.
|
||||
func (c *ctxt7) con64class(a *obj.Addr) int {
|
||||
zeroCount := 0
|
||||
negCount := 0
|
||||
for i := uint(0); i < 4; i++ {
|
||||
immh := uint32(a.Offset >> (i * 16) & 0xffff)
|
||||
if immh == 0 {
|
||||
zeroCount++
|
||||
} else if immh == 0xffff {
|
||||
negCount++
|
||||
}
|
||||
}
|
||||
if zeroCount >= 3 || negCount >= 3 {
|
||||
return C_MOVCON
|
||||
} else if zeroCount == 2 || negCount == 2 {
|
||||
return C_MOVCON2
|
||||
} else if zeroCount == 1 || negCount == 1 {
|
||||
return C_MOVCON3
|
||||
} else {
|
||||
return C_VCON
|
||||
}
|
||||
}
|
||||
|
||||
func (c *ctxt7) aclass(a *obj.Addr) int {
|
||||
switch a.Type {
|
||||
case obj.TYPE_NONE:
|
||||
|
|
@ -1698,6 +1762,10 @@ func (c *ctxt7) oplook(p *obj.Prog) *Optab {
|
|||
a1 = c.con32class(&p.From) + 1
|
||||
p.From.Class = int8(a1)
|
||||
}
|
||||
if ((p.As == AMOVD) || isANDop(p.As) || isADDop(p.As)) && (a0 == C_LCON || a0 == C_VCON) {
|
||||
a1 = c.con64class(&p.From) + 1
|
||||
p.From.Class = int8(a1)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -1793,6 +1861,9 @@ func cmp(a int, b int) bool {
|
|||
return true
|
||||
}
|
||||
|
||||
case C_MOVCON2:
|
||||
return cmp(C_LCON, b)
|
||||
|
||||
case C_VCON:
|
||||
return cmp(C_LCON, b)
|
||||
|
||||
|
|
@ -2711,6 +2782,7 @@ func (c *ctxt7) checkShiftAmount(p *obj.Prog, a *obj.Addr) {
|
|||
}
|
||||
|
||||
func (c *ctxt7) asmout(p *obj.Prog, o *Optab, out []uint32) {
|
||||
var os [5]uint32
|
||||
o1 := uint32(0)
|
||||
o2 := uint32(0)
|
||||
o3 := uint32(0)
|
||||
|
|
@ -2900,13 +2972,29 @@ func (c *ctxt7) asmout(p *obj.Prog, o *Optab, out []uint32) {
|
|||
}
|
||||
|
||||
case 12: /* movT $vcon, reg */
|
||||
o1 = c.omovlit(p.As, p, &p.From, int(p.To.Reg))
|
||||
num := c.omovlconst(p.As, p, &p.From, int(p.To.Reg), os[:])
|
||||
if num == 0 {
|
||||
c.ctxt.Diag("invalid constant: %v", p)
|
||||
}
|
||||
o1 = os[0]
|
||||
o2 = os[1]
|
||||
o3 = os[2]
|
||||
o4 = os[3]
|
||||
|
||||
case 13: /* addop $vcon, [R], R (64 bit literal); cmp $lcon,R -> addop $lcon,R, ZR */
|
||||
o1 = c.omovlit(AMOVD, p, &p.From, REGTMP)
|
||||
|
||||
if o1 == 0 {
|
||||
break
|
||||
o := uint32(0)
|
||||
num := uint8(0)
|
||||
cls := oclass(&p.From)
|
||||
if isADDWop(p.As) {
|
||||
if (cls != C_LCON) && (cls != C_ADDCON2) {
|
||||
c.ctxt.Diag("illegal combination: %v", p)
|
||||
}
|
||||
num = c.omovlconst(AMOVW, p, &p.From, REGTMP, os[:])
|
||||
} else {
|
||||
num = c.omovlconst(AMOVD, p, &p.From, REGTMP, os[:])
|
||||
}
|
||||
if num == 0 {
|
||||
c.ctxt.Diag("invalid constant: %v", p)
|
||||
}
|
||||
rt := int(p.To.Reg)
|
||||
if p.To.Type == obj.TYPE_NONE {
|
||||
|
|
@ -2917,16 +3005,23 @@ func (c *ctxt7) asmout(p *obj.Prog, o *Optab, out []uint32) {
|
|||
r = rt
|
||||
}
|
||||
if p.To.Type != obj.TYPE_NONE && (p.To.Reg == REGSP || r == REGSP) {
|
||||
o2 = c.opxrrr(p, p.As, false)
|
||||
o2 |= REGTMP & 31 << 16
|
||||
o2 |= LSL0_64
|
||||
o = c.opxrrr(p, p.As, false)
|
||||
o |= REGTMP & 31 << 16
|
||||
o |= LSL0_64
|
||||
} else {
|
||||
o2 = c.oprrr(p, p.As)
|
||||
o2 |= REGTMP & 31 << 16 /* shift is 0 */
|
||||
o = c.oprrr(p, p.As)
|
||||
o |= REGTMP & 31 << 16 /* shift is 0 */
|
||||
}
|
||||
|
||||
o2 |= uint32(r&31) << 5
|
||||
o2 |= uint32(rt & 31)
|
||||
o |= uint32(r&31) << 5
|
||||
o |= uint32(rt & 31)
|
||||
|
||||
os[num] = o
|
||||
o1 = os[0]
|
||||
o2 = os[1]
|
||||
o3 = os[2]
|
||||
o4 = os[3]
|
||||
o5 = os[4]
|
||||
|
||||
case 14: /* word */
|
||||
if c.aclass(&p.To) == C_ADDR {
|
||||
|
|
@ -3172,10 +3267,20 @@ func (c *ctxt7) asmout(p *obj.Prog, o *Optab, out []uint32) {
|
|||
o1 |= (uint32(r&31) << 5) | uint32(rt&31)
|
||||
|
||||
case 28: /* logop $vcon, [R], R (64 bit literal) */
|
||||
o1 = c.omovlit(AMOVD, p, &p.From, REGTMP)
|
||||
o := uint32(0)
|
||||
num := uint8(0)
|
||||
cls := oclass(&p.From)
|
||||
if isANDWop(p.As) {
|
||||
if (cls != C_LCON) && (cls != C_ADDCON) {
|
||||
c.ctxt.Diag("illegal combination: %v", p)
|
||||
}
|
||||
num = c.omovlconst(AMOVW, p, &p.From, REGTMP, os[:])
|
||||
} else {
|
||||
num = c.omovlconst(AMOVD, p, &p.From, REGTMP, os[:])
|
||||
}
|
||||
|
||||
if o1 == 0 {
|
||||
break
|
||||
if num == 0 {
|
||||
c.ctxt.Diag("invalid constant: %v", p)
|
||||
}
|
||||
rt := int(p.To.Reg)
|
||||
if p.To.Type == obj.TYPE_NONE {
|
||||
|
|
@ -3185,10 +3290,17 @@ func (c *ctxt7) asmout(p *obj.Prog, o *Optab, out []uint32) {
|
|||
if r == 0 {
|
||||
r = rt
|
||||
}
|
||||
o2 = c.oprrr(p, p.As)
|
||||
o2 |= REGTMP & 31 << 16 /* shift is 0 */
|
||||
o2 |= uint32(r&31) << 5
|
||||
o2 |= uint32(rt & 31)
|
||||
o = c.oprrr(p, p.As)
|
||||
o |= REGTMP & 31 << 16 /* shift is 0 */
|
||||
o |= uint32(r&31) << 5
|
||||
o |= uint32(rt & 31)
|
||||
|
||||
os[num] = o
|
||||
o1 = os[0]
|
||||
o2 = os[1]
|
||||
o3 = os[2]
|
||||
o4 = os[3]
|
||||
o5 = os[4]
|
||||
|
||||
case 29: /* op Rn, Rd */
|
||||
fc := c.aclass(&p.From)
|
||||
|
|
@ -6319,10 +6431,155 @@ func (c *ctxt7) omovconst(as obj.As, p *obj.Prog, a *obj.Addr, rt int) (o1 uint3
|
|||
}
|
||||
o1 |= MOVCONST(d, s, rt)
|
||||
}
|
||||
|
||||
return o1
|
||||
}
|
||||
|
||||
// load a 32-bit/64-bit large constant (LCON or VCON) in a.Offset into rt
|
||||
// put the instruction sequence in os and return the number of instructions.
|
||||
func (c *ctxt7) omovlconst(as obj.As, p *obj.Prog, a *obj.Addr, rt int, os []uint32) (num uint8) {
|
||||
switch as {
|
||||
case AMOVW:
|
||||
d := uint32(a.Offset)
|
||||
// use MOVZW and MOVKW to load a constant to rt
|
||||
os[0] = c.opirr(p, AMOVZW)
|
||||
os[0] |= MOVCONST(int64(d), 0, rt)
|
||||
os[1] = c.opirr(p, AMOVKW)
|
||||
os[1] |= MOVCONST(int64(d), 1, rt)
|
||||
return 2
|
||||
|
||||
case AMOVD:
|
||||
d := a.Offset
|
||||
dn := ^d
|
||||
var immh [4]uint64
|
||||
var i int
|
||||
zeroCount := int(0)
|
||||
negCount := int(0)
|
||||
for i = 0; i < 4; i++ {
|
||||
immh[i] = uint64((d >> uint(i*16)) & 0xffff)
|
||||
if immh[i] == 0 {
|
||||
zeroCount++
|
||||
} else if immh[i] == 0xffff {
|
||||
negCount++
|
||||
}
|
||||
}
|
||||
|
||||
if zeroCount == 4 || negCount == 4 {
|
||||
c.ctxt.Diag("the immediate should be MOVCON: %v", p)
|
||||
}
|
||||
switch {
|
||||
case zeroCount == 3:
|
||||
// one MOVZ
|
||||
for i = 0; i < 4; i++ {
|
||||
if immh[i] != 0 {
|
||||
os[0] = c.opirr(p, AMOVZ)
|
||||
os[0] |= MOVCONST(d, i, rt)
|
||||
break
|
||||
}
|
||||
}
|
||||
return 1
|
||||
|
||||
case negCount == 3:
|
||||
// one MOVN
|
||||
for i = 0; i < 4; i++ {
|
||||
if immh[i] != 0xffff {
|
||||
os[0] = c.opirr(p, AMOVN)
|
||||
os[0] |= MOVCONST(dn, i, rt)
|
||||
break
|
||||
}
|
||||
}
|
||||
return 1
|
||||
|
||||
case zeroCount == 2:
|
||||
// one MOVZ and one MOVK
|
||||
for i = 0; i < 4; i++ {
|
||||
if immh[i] != 0 {
|
||||
os[0] = c.opirr(p, AMOVZ)
|
||||
os[0] |= MOVCONST(d, i, rt)
|
||||
i++
|
||||
break
|
||||
}
|
||||
}
|
||||
for ; i < 4; i++ {
|
||||
if immh[i] != 0 {
|
||||
os[1] = c.opirr(p, AMOVK)
|
||||
os[1] |= MOVCONST(d, i, rt)
|
||||
}
|
||||
}
|
||||
return 2
|
||||
|
||||
case negCount == 2:
|
||||
// one MOVN and one MOVK
|
||||
for i = 0; i < 4; i++ {
|
||||
if immh[i] != 0xffff {
|
||||
os[0] = c.opirr(p, AMOVN)
|
||||
os[0] |= MOVCONST(dn, i, rt)
|
||||
i++
|
||||
break
|
||||
}
|
||||
}
|
||||
for ; i < 4; i++ {
|
||||
if immh[i] != 0xffff {
|
||||
os[1] = c.opirr(p, AMOVK)
|
||||
os[1] |= MOVCONST(d, i, rt)
|
||||
}
|
||||
}
|
||||
return 2
|
||||
|
||||
case zeroCount == 1:
|
||||
// one MOVZ and two MOVKs
|
||||
for i = 0; i < 4; i++ {
|
||||
if immh[i] != 0 {
|
||||
os[0] = c.opirr(p, AMOVZ)
|
||||
os[0] |= MOVCONST(d, i, rt)
|
||||
i++
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
for j := 1; i < 4; i++ {
|
||||
if immh[i] != 0 {
|
||||
os[j] = c.opirr(p, AMOVK)
|
||||
os[j] |= MOVCONST(d, i, rt)
|
||||
j++
|
||||
}
|
||||
}
|
||||
return 3
|
||||
|
||||
case negCount == 1:
|
||||
// one MOVN and two MOVKs
|
||||
for i = 0; i < 4; i++ {
|
||||
if immh[i] != 0xffff {
|
||||
os[0] = c.opirr(p, AMOVN)
|
||||
os[0] |= MOVCONST(dn, i, rt)
|
||||
i++
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
for j := 1; i < 4; i++ {
|
||||
if immh[i] != 0xffff {
|
||||
os[j] = c.opirr(p, AMOVK)
|
||||
os[j] |= MOVCONST(d, i, rt)
|
||||
j++
|
||||
}
|
||||
}
|
||||
return 3
|
||||
|
||||
default:
|
||||
// one MOVZ and 3 MOVKs
|
||||
os[0] = c.opirr(p, AMOVZ)
|
||||
os[0] |= MOVCONST(d, 0, rt)
|
||||
for i = 1; i < 4; i++ {
|
||||
os[i] = c.opirr(p, AMOVK)
|
||||
os[i] |= MOVCONST(d, i, rt)
|
||||
}
|
||||
return 4
|
||||
}
|
||||
default:
|
||||
return 0
|
||||
}
|
||||
}
|
||||
|
||||
func (c *ctxt7) opbfm(p *obj.Prog, a obj.As, r int, s int, rf int, rt int) uint32 {
|
||||
var b uint32
|
||||
o := c.opirr(p, a)
|
||||
|
|
|
|||
Loading…
Reference in New Issue