From 34a9cdef878dc4542586ff412b74c841fee2c5e6 Mon Sep 17 00:00:00 2001 From: Junyang Shao Date: Thu, 12 Jun 2025 16:21:35 +0000 Subject: [PATCH] [dev.simd] cmd/compile: add round simd ops This CL is generated by CL 678195. Change-Id: Ica600229a4e9623fa45f3b5aa370cdd6d9c31686 Reviewed-on: https://go-review.googlesource.com/c/go/+/681295 LUCI-TryBot-Result: Go LUCI Reviewed-by: David Chase --- src/cmd/compile/internal/amd64/simdssa.go | 48 + .../compile/internal/ssa/_gen/simdAMD64.rules | 212 + .../compile/internal/ssa/_gen/simdAMD64ops.go | 32 + .../internal/ssa/_gen/simdgenericOps.go | 212 + src/cmd/compile/internal/ssa/opGen.go | 1956 +++++++++ src/cmd/compile/internal/ssa/rewriteAMD64.go | 3596 +++++++++++++++++ .../compile/internal/ssagen/simdintrinsics.go | 212 + src/simd/stubs_amd64.go | 636 +++ 8 files changed, 6904 insertions(+) diff --git a/src/cmd/compile/internal/amd64/simdssa.go b/src/cmd/compile/internal/amd64/simdssa.go index 253bec09ca..f5bc26fe74 100644 --- a/src/cmd/compile/internal/amd64/simdssa.go +++ b/src/cmd/compile/internal/amd64/simdssa.go @@ -74,6 +74,10 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool { ssa.OpAMD64VPADDD512, ssa.OpAMD64VPADDQ512, ssa.OpAMD64VPADDB512, + ssa.OpAMD64VADDSUBPS128, + ssa.OpAMD64VADDSUBPS256, + ssa.OpAMD64VADDSUBPD128, + ssa.OpAMD64VADDSUBPD256, ssa.OpAMD64VANDPS128, ssa.OpAMD64VANDPS256, ssa.OpAMD64VANDPD128, @@ -564,6 +568,38 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool { ssa.OpAMD64VSQRTPDMasked512: p = simdFp1k1fp1(s, v) + case ssa.OpAMD64VROUNDPS128, + ssa.OpAMD64VROUNDPS256, + ssa.OpAMD64VROUNDPD128, + ssa.OpAMD64VROUNDPD256, + ssa.OpAMD64VRNDSCALEPS512, + ssa.OpAMD64VRNDSCALEPS128, + ssa.OpAMD64VRNDSCALEPS256, + ssa.OpAMD64VRNDSCALEPD128, + ssa.OpAMD64VRNDSCALEPD256, + ssa.OpAMD64VRNDSCALEPD512, + ssa.OpAMD64VREDUCEPS512, + ssa.OpAMD64VREDUCEPS128, + ssa.OpAMD64VREDUCEPS256, + ssa.OpAMD64VREDUCEPD128, + ssa.OpAMD64VREDUCEPD256, + ssa.OpAMD64VREDUCEPD512: + p = simdFp11Imm8(s, v) + + case ssa.OpAMD64VRNDSCALEPSMasked512, + ssa.OpAMD64VRNDSCALEPSMasked128, + ssa.OpAMD64VRNDSCALEPSMasked256, + ssa.OpAMD64VRNDSCALEPDMasked128, + ssa.OpAMD64VRNDSCALEPDMasked256, + ssa.OpAMD64VRNDSCALEPDMasked512, + ssa.OpAMD64VREDUCEPSMasked512, + ssa.OpAMD64VREDUCEPSMasked128, + ssa.OpAMD64VREDUCEPSMasked256, + ssa.OpAMD64VREDUCEPDMasked128, + ssa.OpAMD64VREDUCEPDMasked256, + ssa.OpAMD64VREDUCEPDMasked512: + p = simdFp1k1fp1Imm8(s, v) + case ssa.OpAMD64VCMPPS128, ssa.OpAMD64VCMPPS256, ssa.OpAMD64VCMPPD128, @@ -709,6 +745,18 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool { ssa.OpAMD64VPAVGBMasked128, ssa.OpAMD64VPAVGBMasked256, ssa.OpAMD64VPAVGBMasked512, + ssa.OpAMD64VRNDSCALEPSMasked512, + ssa.OpAMD64VRNDSCALEPSMasked128, + ssa.OpAMD64VRNDSCALEPSMasked256, + ssa.OpAMD64VRNDSCALEPDMasked128, + ssa.OpAMD64VRNDSCALEPDMasked256, + ssa.OpAMD64VRNDSCALEPDMasked512, + ssa.OpAMD64VREDUCEPSMasked512, + ssa.OpAMD64VREDUCEPSMasked128, + ssa.OpAMD64VREDUCEPSMasked256, + ssa.OpAMD64VREDUCEPDMasked128, + ssa.OpAMD64VREDUCEPDMasked256, + ssa.OpAMD64VREDUCEPDMasked512, ssa.OpAMD64VDIVPSMasked512, ssa.OpAMD64VDIVPSMasked128, ssa.OpAMD64VDIVPSMasked256, diff --git a/src/cmd/compile/internal/ssa/_gen/simdAMD64.rules b/src/cmd/compile/internal/ssa/_gen/simdAMD64.rules index a9daf27548..8bf896afb2 100644 --- a/src/cmd/compile/internal/ssa/_gen/simdAMD64.rules +++ b/src/cmd/compile/internal/ssa/_gen/simdAMD64.rules @@ -42,6 +42,10 @@ (AddUint8x16 ...) => (VPADDB128 ...) (AddUint8x32 ...) => (VPADDB256 ...) (AddUint8x64 ...) => (VPADDB512 ...) +(AddSubFloat32x4 ...) => (VADDSUBPS128 ...) +(AddSubFloat32x8 ...) => (VADDSUBPS256 ...) +(AddSubFloat64x2 ...) => (VADDSUBPD128 ...) +(AddSubFloat64x4 ...) => (VADDSUBPD256 ...) (AndFloat32x16 ...) => (VANDPS512 ...) (AndFloat32x4 ...) => (VANDPS128 ...) (AndFloat32x8 ...) => (VANDPS256 ...) @@ -112,6 +116,70 @@ (AverageUint8x16 ...) => (VPAVGB128 ...) (AverageUint8x32 ...) => (VPAVGB256 ...) (AverageUint8x64 ...) => (VPAVGB512 ...) +(CeilFloat32x4 x) => (VROUNDPS128 [2] x) +(CeilFloat32x8 x) => (VROUNDPS256 [2] x) +(CeilFloat64x2 x) => (VROUNDPD128 [2] x) +(CeilFloat64x4 x) => (VROUNDPD256 [2] x) +(CeilSuppressExceptionWithPrecisionFloat32x16 [a] x) => (VRNDSCALEPS512 [a+10] x) +(CeilSuppressExceptionWithPrecisionFloat32x4 [a] x) => (VRNDSCALEPS128 [a+10] x) +(CeilSuppressExceptionWithPrecisionFloat32x8 [a] x) => (VRNDSCALEPS256 [a+10] x) +(CeilSuppressExceptionWithPrecisionFloat64x2 [a] x) => (VRNDSCALEPD128 [a+10] x) +(CeilSuppressExceptionWithPrecisionFloat64x4 [a] x) => (VRNDSCALEPD256 [a+10] x) +(CeilSuppressExceptionWithPrecisionFloat64x8 [a] x) => (VRNDSCALEPD512 [a+10] x) +(CeilWithPrecisionFloat32x16 [a] x) => (VRNDSCALEPS512 [a+2] x) +(CeilWithPrecisionFloat32x4 [a] x) => (VRNDSCALEPS128 [a+2] x) +(CeilWithPrecisionFloat32x8 [a] x) => (VRNDSCALEPS256 [a+2] x) +(CeilWithPrecisionFloat64x2 [a] x) => (VRNDSCALEPD128 [a+2] x) +(CeilWithPrecisionFloat64x4 [a] x) => (VRNDSCALEPD256 [a+2] x) +(CeilWithPrecisionFloat64x8 [a] x) => (VRNDSCALEPD512 [a+2] x) +(DiffWithCeilSuppressExceptionWithPrecisionFloat32x16 [a] x) => (VREDUCEPS512 [a+10] x) +(DiffWithCeilSuppressExceptionWithPrecisionFloat32x4 [a] x) => (VREDUCEPS128 [a+10] x) +(DiffWithCeilSuppressExceptionWithPrecisionFloat32x8 [a] x) => (VREDUCEPS256 [a+10] x) +(DiffWithCeilSuppressExceptionWithPrecisionFloat64x2 [a] x) => (VREDUCEPD128 [a+10] x) +(DiffWithCeilSuppressExceptionWithPrecisionFloat64x4 [a] x) => (VREDUCEPD256 [a+10] x) +(DiffWithCeilSuppressExceptionWithPrecisionFloat64x8 [a] x) => (VREDUCEPD512 [a+10] x) +(DiffWithCeilWithPrecisionFloat32x16 [a] x) => (VREDUCEPS512 [a+2] x) +(DiffWithCeilWithPrecisionFloat32x4 [a] x) => (VREDUCEPS128 [a+2] x) +(DiffWithCeilWithPrecisionFloat32x8 [a] x) => (VREDUCEPS256 [a+2] x) +(DiffWithCeilWithPrecisionFloat64x2 [a] x) => (VREDUCEPD128 [a+2] x) +(DiffWithCeilWithPrecisionFloat64x4 [a] x) => (VREDUCEPD256 [a+2] x) +(DiffWithCeilWithPrecisionFloat64x8 [a] x) => (VREDUCEPD512 [a+2] x) +(DiffWithFloorSuppressExceptionWithPrecisionFloat32x16 [a] x) => (VREDUCEPS512 [a+9] x) +(DiffWithFloorSuppressExceptionWithPrecisionFloat32x4 [a] x) => (VREDUCEPS128 [a+9] x) +(DiffWithFloorSuppressExceptionWithPrecisionFloat32x8 [a] x) => (VREDUCEPS256 [a+9] x) +(DiffWithFloorSuppressExceptionWithPrecisionFloat64x2 [a] x) => (VREDUCEPD128 [a+9] x) +(DiffWithFloorSuppressExceptionWithPrecisionFloat64x4 [a] x) => (VREDUCEPD256 [a+9] x) +(DiffWithFloorSuppressExceptionWithPrecisionFloat64x8 [a] x) => (VREDUCEPD512 [a+9] x) +(DiffWithFloorWithPrecisionFloat32x16 [a] x) => (VREDUCEPS512 [a+1] x) +(DiffWithFloorWithPrecisionFloat32x4 [a] x) => (VREDUCEPS128 [a+1] x) +(DiffWithFloorWithPrecisionFloat32x8 [a] x) => (VREDUCEPS256 [a+1] x) +(DiffWithFloorWithPrecisionFloat64x2 [a] x) => (VREDUCEPD128 [a+1] x) +(DiffWithFloorWithPrecisionFloat64x4 [a] x) => (VREDUCEPD256 [a+1] x) +(DiffWithFloorWithPrecisionFloat64x8 [a] x) => (VREDUCEPD512 [a+1] x) +(DiffWithRoundSuppressExceptionWithPrecisionFloat32x16 [a] x) => (VREDUCEPS512 [a+8] x) +(DiffWithRoundSuppressExceptionWithPrecisionFloat32x4 [a] x) => (VREDUCEPS128 [a+8] x) +(DiffWithRoundSuppressExceptionWithPrecisionFloat32x8 [a] x) => (VREDUCEPS256 [a+8] x) +(DiffWithRoundSuppressExceptionWithPrecisionFloat64x2 [a] x) => (VREDUCEPD128 [a+8] x) +(DiffWithRoundSuppressExceptionWithPrecisionFloat64x4 [a] x) => (VREDUCEPD256 [a+8] x) +(DiffWithRoundSuppressExceptionWithPrecisionFloat64x8 [a] x) => (VREDUCEPD512 [a+8] x) +(DiffWithRoundWithPrecisionFloat32x16 [a] x) => (VREDUCEPS512 [a+0] x) +(DiffWithRoundWithPrecisionFloat32x4 [a] x) => (VREDUCEPS128 [a+0] x) +(DiffWithRoundWithPrecisionFloat32x8 [a] x) => (VREDUCEPS256 [a+0] x) +(DiffWithRoundWithPrecisionFloat64x2 [a] x) => (VREDUCEPD128 [a+0] x) +(DiffWithRoundWithPrecisionFloat64x4 [a] x) => (VREDUCEPD256 [a+0] x) +(DiffWithRoundWithPrecisionFloat64x8 [a] x) => (VREDUCEPD512 [a+0] x) +(DiffWithTruncSuppressExceptionWithPrecisionFloat32x16 [a] x) => (VREDUCEPS512 [a+11] x) +(DiffWithTruncSuppressExceptionWithPrecisionFloat32x4 [a] x) => (VREDUCEPS128 [a+11] x) +(DiffWithTruncSuppressExceptionWithPrecisionFloat32x8 [a] x) => (VREDUCEPS256 [a+11] x) +(DiffWithTruncSuppressExceptionWithPrecisionFloat64x2 [a] x) => (VREDUCEPD128 [a+11] x) +(DiffWithTruncSuppressExceptionWithPrecisionFloat64x4 [a] x) => (VREDUCEPD256 [a+11] x) +(DiffWithTruncSuppressExceptionWithPrecisionFloat64x8 [a] x) => (VREDUCEPD512 [a+11] x) +(DiffWithTruncWithPrecisionFloat32x16 [a] x) => (VREDUCEPS512 [a+3] x) +(DiffWithTruncWithPrecisionFloat32x4 [a] x) => (VREDUCEPS128 [a+3] x) +(DiffWithTruncWithPrecisionFloat32x8 [a] x) => (VREDUCEPS256 [a+3] x) +(DiffWithTruncWithPrecisionFloat64x2 [a] x) => (VREDUCEPD128 [a+3] x) +(DiffWithTruncWithPrecisionFloat64x4 [a] x) => (VREDUCEPD256 [a+3] x) +(DiffWithTruncWithPrecisionFloat64x8 [a] x) => (VREDUCEPD512 [a+3] x) (DivFloat32x16 ...) => (VDIVPS512 ...) (DivFloat32x4 ...) => (VDIVPS128 ...) (DivFloat32x8 ...) => (VDIVPS256 ...) @@ -148,6 +216,22 @@ (EqualUint8x16 x y) => (VPMOVMToVec8x16 (VPCMPUB128 [0] x y)) (EqualUint8x32 x y) => (VPMOVMToVec8x32 (VPCMPUB256 [0] x y)) (EqualUint8x64 x y) => (VPMOVMToVec8x64 (VPCMPUB512 [0] x y)) +(FloorFloat32x4 x) => (VROUNDPS128 [1] x) +(FloorFloat32x8 x) => (VROUNDPS256 [1] x) +(FloorFloat64x2 x) => (VROUNDPD128 [1] x) +(FloorFloat64x4 x) => (VROUNDPD256 [1] x) +(FloorSuppressExceptionWithPrecisionFloat32x16 [a] x) => (VRNDSCALEPS512 [a+9] x) +(FloorSuppressExceptionWithPrecisionFloat32x4 [a] x) => (VRNDSCALEPS128 [a+9] x) +(FloorSuppressExceptionWithPrecisionFloat32x8 [a] x) => (VRNDSCALEPS256 [a+9] x) +(FloorSuppressExceptionWithPrecisionFloat64x2 [a] x) => (VRNDSCALEPD128 [a+9] x) +(FloorSuppressExceptionWithPrecisionFloat64x4 [a] x) => (VRNDSCALEPD256 [a+9] x) +(FloorSuppressExceptionWithPrecisionFloat64x8 [a] x) => (VRNDSCALEPD512 [a+9] x) +(FloorWithPrecisionFloat32x16 [a] x) => (VRNDSCALEPS512 [a+1] x) +(FloorWithPrecisionFloat32x4 [a] x) => (VRNDSCALEPS128 [a+1] x) +(FloorWithPrecisionFloat32x8 [a] x) => (VRNDSCALEPS256 [a+1] x) +(FloorWithPrecisionFloat64x2 [a] x) => (VRNDSCALEPD128 [a+1] x) +(FloorWithPrecisionFloat64x4 [a] x) => (VRNDSCALEPD256 [a+1] x) +(FloorWithPrecisionFloat64x8 [a] x) => (VRNDSCALEPD512 [a+1] x) (GreaterFloat32x16 x y) => (VPMOVMToVec32x16 (VCMPPS512 [6] x y)) (GreaterFloat32x4 x y) => (VCMPPS128 [6] x y) (GreaterFloat32x8 x y) => (VCMPPS256 [6] x y) @@ -370,6 +454,66 @@ (MaskedAverageUint8x16 x y mask) => (VPAVGBMasked128 x y (VPMOVVec8x16ToM mask)) (MaskedAverageUint8x32 x y mask) => (VPAVGBMasked256 x y (VPMOVVec8x32ToM mask)) (MaskedAverageUint8x64 x y mask) => (VPAVGBMasked512 x y (VPMOVVec8x64ToM mask)) +(MaskedCeilSuppressExceptionWithPrecisionFloat32x16 [a] x mask) => (VRNDSCALEPSMasked512 [a+10] x (VPMOVVec32x16ToM mask)) +(MaskedCeilSuppressExceptionWithPrecisionFloat32x4 [a] x mask) => (VRNDSCALEPSMasked128 [a+10] x (VPMOVVec32x4ToM mask)) +(MaskedCeilSuppressExceptionWithPrecisionFloat32x8 [a] x mask) => (VRNDSCALEPSMasked256 [a+10] x (VPMOVVec32x8ToM mask)) +(MaskedCeilSuppressExceptionWithPrecisionFloat64x2 [a] x mask) => (VRNDSCALEPDMasked128 [a+10] x (VPMOVVec64x2ToM mask)) +(MaskedCeilSuppressExceptionWithPrecisionFloat64x4 [a] x mask) => (VRNDSCALEPDMasked256 [a+10] x (VPMOVVec64x4ToM mask)) +(MaskedCeilSuppressExceptionWithPrecisionFloat64x8 [a] x mask) => (VRNDSCALEPDMasked512 [a+10] x (VPMOVVec64x8ToM mask)) +(MaskedCeilWithPrecisionFloat32x16 [a] x mask) => (VRNDSCALEPSMasked512 [a+2] x (VPMOVVec32x16ToM mask)) +(MaskedCeilWithPrecisionFloat32x4 [a] x mask) => (VRNDSCALEPSMasked128 [a+2] x (VPMOVVec32x4ToM mask)) +(MaskedCeilWithPrecisionFloat32x8 [a] x mask) => (VRNDSCALEPSMasked256 [a+2] x (VPMOVVec32x8ToM mask)) +(MaskedCeilWithPrecisionFloat64x2 [a] x mask) => (VRNDSCALEPDMasked128 [a+2] x (VPMOVVec64x2ToM mask)) +(MaskedCeilWithPrecisionFloat64x4 [a] x mask) => (VRNDSCALEPDMasked256 [a+2] x (VPMOVVec64x4ToM mask)) +(MaskedCeilWithPrecisionFloat64x8 [a] x mask) => (VRNDSCALEPDMasked512 [a+2] x (VPMOVVec64x8ToM mask)) +(MaskedDiffWithCeilSuppressExceptionWithPrecisionFloat32x16 [a] x mask) => (VREDUCEPSMasked512 [a+10] x (VPMOVVec32x16ToM mask)) +(MaskedDiffWithCeilSuppressExceptionWithPrecisionFloat32x4 [a] x mask) => (VREDUCEPSMasked128 [a+10] x (VPMOVVec32x4ToM mask)) +(MaskedDiffWithCeilSuppressExceptionWithPrecisionFloat32x8 [a] x mask) => (VREDUCEPSMasked256 [a+10] x (VPMOVVec32x8ToM mask)) +(MaskedDiffWithCeilSuppressExceptionWithPrecisionFloat64x2 [a] x mask) => (VREDUCEPDMasked128 [a+10] x (VPMOVVec64x2ToM mask)) +(MaskedDiffWithCeilSuppressExceptionWithPrecisionFloat64x4 [a] x mask) => (VREDUCEPDMasked256 [a+10] x (VPMOVVec64x4ToM mask)) +(MaskedDiffWithCeilSuppressExceptionWithPrecisionFloat64x8 [a] x mask) => (VREDUCEPDMasked512 [a+10] x (VPMOVVec64x8ToM mask)) +(MaskedDiffWithCeilWithPrecisionFloat32x16 [a] x mask) => (VREDUCEPSMasked512 [a+2] x (VPMOVVec32x16ToM mask)) +(MaskedDiffWithCeilWithPrecisionFloat32x4 [a] x mask) => (VREDUCEPSMasked128 [a+2] x (VPMOVVec32x4ToM mask)) +(MaskedDiffWithCeilWithPrecisionFloat32x8 [a] x mask) => (VREDUCEPSMasked256 [a+2] x (VPMOVVec32x8ToM mask)) +(MaskedDiffWithCeilWithPrecisionFloat64x2 [a] x mask) => (VREDUCEPDMasked128 [a+2] x (VPMOVVec64x2ToM mask)) +(MaskedDiffWithCeilWithPrecisionFloat64x4 [a] x mask) => (VREDUCEPDMasked256 [a+2] x (VPMOVVec64x4ToM mask)) +(MaskedDiffWithCeilWithPrecisionFloat64x8 [a] x mask) => (VREDUCEPDMasked512 [a+2] x (VPMOVVec64x8ToM mask)) +(MaskedDiffWithFloorSuppressExceptionWithPrecisionFloat32x16 [a] x mask) => (VREDUCEPSMasked512 [a+9] x (VPMOVVec32x16ToM mask)) +(MaskedDiffWithFloorSuppressExceptionWithPrecisionFloat32x4 [a] x mask) => (VREDUCEPSMasked128 [a+9] x (VPMOVVec32x4ToM mask)) +(MaskedDiffWithFloorSuppressExceptionWithPrecisionFloat32x8 [a] x mask) => (VREDUCEPSMasked256 [a+9] x (VPMOVVec32x8ToM mask)) +(MaskedDiffWithFloorSuppressExceptionWithPrecisionFloat64x2 [a] x mask) => (VREDUCEPDMasked128 [a+9] x (VPMOVVec64x2ToM mask)) +(MaskedDiffWithFloorSuppressExceptionWithPrecisionFloat64x4 [a] x mask) => (VREDUCEPDMasked256 [a+9] x (VPMOVVec64x4ToM mask)) +(MaskedDiffWithFloorSuppressExceptionWithPrecisionFloat64x8 [a] x mask) => (VREDUCEPDMasked512 [a+9] x (VPMOVVec64x8ToM mask)) +(MaskedDiffWithFloorWithPrecisionFloat32x16 [a] x mask) => (VREDUCEPSMasked512 [a+1] x (VPMOVVec32x16ToM mask)) +(MaskedDiffWithFloorWithPrecisionFloat32x4 [a] x mask) => (VREDUCEPSMasked128 [a+1] x (VPMOVVec32x4ToM mask)) +(MaskedDiffWithFloorWithPrecisionFloat32x8 [a] x mask) => (VREDUCEPSMasked256 [a+1] x (VPMOVVec32x8ToM mask)) +(MaskedDiffWithFloorWithPrecisionFloat64x2 [a] x mask) => (VREDUCEPDMasked128 [a+1] x (VPMOVVec64x2ToM mask)) +(MaskedDiffWithFloorWithPrecisionFloat64x4 [a] x mask) => (VREDUCEPDMasked256 [a+1] x (VPMOVVec64x4ToM mask)) +(MaskedDiffWithFloorWithPrecisionFloat64x8 [a] x mask) => (VREDUCEPDMasked512 [a+1] x (VPMOVVec64x8ToM mask)) +(MaskedDiffWithRoundSuppressExceptionWithPrecisionFloat32x16 [a] x mask) => (VREDUCEPSMasked512 [a+8] x (VPMOVVec32x16ToM mask)) +(MaskedDiffWithRoundSuppressExceptionWithPrecisionFloat32x4 [a] x mask) => (VREDUCEPSMasked128 [a+8] x (VPMOVVec32x4ToM mask)) +(MaskedDiffWithRoundSuppressExceptionWithPrecisionFloat32x8 [a] x mask) => (VREDUCEPSMasked256 [a+8] x (VPMOVVec32x8ToM mask)) +(MaskedDiffWithRoundSuppressExceptionWithPrecisionFloat64x2 [a] x mask) => (VREDUCEPDMasked128 [a+8] x (VPMOVVec64x2ToM mask)) +(MaskedDiffWithRoundSuppressExceptionWithPrecisionFloat64x4 [a] x mask) => (VREDUCEPDMasked256 [a+8] x (VPMOVVec64x4ToM mask)) +(MaskedDiffWithRoundSuppressExceptionWithPrecisionFloat64x8 [a] x mask) => (VREDUCEPDMasked512 [a+8] x (VPMOVVec64x8ToM mask)) +(MaskedDiffWithRoundWithPrecisionFloat32x16 [a] x mask) => (VREDUCEPSMasked512 [a+0] x (VPMOVVec32x16ToM mask)) +(MaskedDiffWithRoundWithPrecisionFloat32x4 [a] x mask) => (VREDUCEPSMasked128 [a+0] x (VPMOVVec32x4ToM mask)) +(MaskedDiffWithRoundWithPrecisionFloat32x8 [a] x mask) => (VREDUCEPSMasked256 [a+0] x (VPMOVVec32x8ToM mask)) +(MaskedDiffWithRoundWithPrecisionFloat64x2 [a] x mask) => (VREDUCEPDMasked128 [a+0] x (VPMOVVec64x2ToM mask)) +(MaskedDiffWithRoundWithPrecisionFloat64x4 [a] x mask) => (VREDUCEPDMasked256 [a+0] x (VPMOVVec64x4ToM mask)) +(MaskedDiffWithRoundWithPrecisionFloat64x8 [a] x mask) => (VREDUCEPDMasked512 [a+0] x (VPMOVVec64x8ToM mask)) +(MaskedDiffWithTruncSuppressExceptionWithPrecisionFloat32x16 [a] x mask) => (VREDUCEPSMasked512 [a+11] x (VPMOVVec32x16ToM mask)) +(MaskedDiffWithTruncSuppressExceptionWithPrecisionFloat32x4 [a] x mask) => (VREDUCEPSMasked128 [a+11] x (VPMOVVec32x4ToM mask)) +(MaskedDiffWithTruncSuppressExceptionWithPrecisionFloat32x8 [a] x mask) => (VREDUCEPSMasked256 [a+11] x (VPMOVVec32x8ToM mask)) +(MaskedDiffWithTruncSuppressExceptionWithPrecisionFloat64x2 [a] x mask) => (VREDUCEPDMasked128 [a+11] x (VPMOVVec64x2ToM mask)) +(MaskedDiffWithTruncSuppressExceptionWithPrecisionFloat64x4 [a] x mask) => (VREDUCEPDMasked256 [a+11] x (VPMOVVec64x4ToM mask)) +(MaskedDiffWithTruncSuppressExceptionWithPrecisionFloat64x8 [a] x mask) => (VREDUCEPDMasked512 [a+11] x (VPMOVVec64x8ToM mask)) +(MaskedDiffWithTruncWithPrecisionFloat32x16 [a] x mask) => (VREDUCEPSMasked512 [a+3] x (VPMOVVec32x16ToM mask)) +(MaskedDiffWithTruncWithPrecisionFloat32x4 [a] x mask) => (VREDUCEPSMasked128 [a+3] x (VPMOVVec32x4ToM mask)) +(MaskedDiffWithTruncWithPrecisionFloat32x8 [a] x mask) => (VREDUCEPSMasked256 [a+3] x (VPMOVVec32x8ToM mask)) +(MaskedDiffWithTruncWithPrecisionFloat64x2 [a] x mask) => (VREDUCEPDMasked128 [a+3] x (VPMOVVec64x2ToM mask)) +(MaskedDiffWithTruncWithPrecisionFloat64x4 [a] x mask) => (VREDUCEPDMasked256 [a+3] x (VPMOVVec64x4ToM mask)) +(MaskedDiffWithTruncWithPrecisionFloat64x8 [a] x mask) => (VREDUCEPDMasked512 [a+3] x (VPMOVVec64x8ToM mask)) (MaskedDivFloat32x16 x y mask) => (VDIVPSMasked512 x y (VPMOVVec32x16ToM mask)) (MaskedDivFloat32x4 x y mask) => (VDIVPSMasked128 x y (VPMOVVec32x4ToM mask)) (MaskedDivFloat32x8 x y mask) => (VDIVPSMasked256 x y (VPMOVVec32x8ToM mask)) @@ -406,6 +550,18 @@ (MaskedEqualUint8x16 x y mask) => (VPMOVMToVec8x16 (VPCMPUBMasked128 [0] x y (VPMOVVec8x16ToM mask))) (MaskedEqualUint8x32 x y mask) => (VPMOVMToVec8x32 (VPCMPUBMasked256 [0] x y (VPMOVVec8x32ToM mask))) (MaskedEqualUint8x64 x y mask) => (VPMOVMToVec8x64 (VPCMPUBMasked512 [0] x y (VPMOVVec8x64ToM mask))) +(MaskedFloorSuppressExceptionWithPrecisionFloat32x16 [a] x mask) => (VRNDSCALEPSMasked512 [a+9] x (VPMOVVec32x16ToM mask)) +(MaskedFloorSuppressExceptionWithPrecisionFloat32x4 [a] x mask) => (VRNDSCALEPSMasked128 [a+9] x (VPMOVVec32x4ToM mask)) +(MaskedFloorSuppressExceptionWithPrecisionFloat32x8 [a] x mask) => (VRNDSCALEPSMasked256 [a+9] x (VPMOVVec32x8ToM mask)) +(MaskedFloorSuppressExceptionWithPrecisionFloat64x2 [a] x mask) => (VRNDSCALEPDMasked128 [a+9] x (VPMOVVec64x2ToM mask)) +(MaskedFloorSuppressExceptionWithPrecisionFloat64x4 [a] x mask) => (VRNDSCALEPDMasked256 [a+9] x (VPMOVVec64x4ToM mask)) +(MaskedFloorSuppressExceptionWithPrecisionFloat64x8 [a] x mask) => (VRNDSCALEPDMasked512 [a+9] x (VPMOVVec64x8ToM mask)) +(MaskedFloorWithPrecisionFloat32x16 [a] x mask) => (VRNDSCALEPSMasked512 [a+1] x (VPMOVVec32x16ToM mask)) +(MaskedFloorWithPrecisionFloat32x4 [a] x mask) => (VRNDSCALEPSMasked128 [a+1] x (VPMOVVec32x4ToM mask)) +(MaskedFloorWithPrecisionFloat32x8 [a] x mask) => (VRNDSCALEPSMasked256 [a+1] x (VPMOVVec32x8ToM mask)) +(MaskedFloorWithPrecisionFloat64x2 [a] x mask) => (VRNDSCALEPDMasked128 [a+1] x (VPMOVVec64x2ToM mask)) +(MaskedFloorWithPrecisionFloat64x4 [a] x mask) => (VRNDSCALEPDMasked256 [a+1] x (VPMOVVec64x4ToM mask)) +(MaskedFloorWithPrecisionFloat64x8 [a] x mask) => (VRNDSCALEPDMasked512 [a+1] x (VPMOVVec64x8ToM mask)) (MaskedGreaterFloat32x16 x y mask) => (VPMOVMToVec32x16 (VCMPPSMasked512 [6] x y (VPMOVVec32x16ToM mask))) (MaskedGreaterFloat32x4 x y mask) => (VPMOVMToVec32x4 (VCMPPSMasked128 [6] x y (VPMOVVec32x4ToM mask))) (MaskedGreaterFloat32x8 x y mask) => (VPMOVMToVec32x8 (VCMPPSMasked256 [6] x y (VPMOVVec32x8ToM mask))) @@ -697,6 +853,18 @@ (MaskedPopCountUint8x16 x mask) => (VPOPCNTBMasked128 x (VPMOVVec8x16ToM mask)) (MaskedPopCountUint8x32 x mask) => (VPOPCNTBMasked256 x (VPMOVVec8x32ToM mask)) (MaskedPopCountUint8x64 x mask) => (VPOPCNTBMasked512 x (VPMOVVec8x64ToM mask)) +(MaskedRoundSuppressExceptionWithPrecisionFloat32x16 [a] x mask) => (VRNDSCALEPSMasked512 [a+8] x (VPMOVVec32x16ToM mask)) +(MaskedRoundSuppressExceptionWithPrecisionFloat32x4 [a] x mask) => (VRNDSCALEPSMasked128 [a+8] x (VPMOVVec32x4ToM mask)) +(MaskedRoundSuppressExceptionWithPrecisionFloat32x8 [a] x mask) => (VRNDSCALEPSMasked256 [a+8] x (VPMOVVec32x8ToM mask)) +(MaskedRoundSuppressExceptionWithPrecisionFloat64x2 [a] x mask) => (VRNDSCALEPDMasked128 [a+8] x (VPMOVVec64x2ToM mask)) +(MaskedRoundSuppressExceptionWithPrecisionFloat64x4 [a] x mask) => (VRNDSCALEPDMasked256 [a+8] x (VPMOVVec64x4ToM mask)) +(MaskedRoundSuppressExceptionWithPrecisionFloat64x8 [a] x mask) => (VRNDSCALEPDMasked512 [a+8] x (VPMOVVec64x8ToM mask)) +(MaskedRoundWithPrecisionFloat32x16 [a] x mask) => (VRNDSCALEPSMasked512 [a+0] x (VPMOVVec32x16ToM mask)) +(MaskedRoundWithPrecisionFloat32x4 [a] x mask) => (VRNDSCALEPSMasked128 [a+0] x (VPMOVVec32x4ToM mask)) +(MaskedRoundWithPrecisionFloat32x8 [a] x mask) => (VRNDSCALEPSMasked256 [a+0] x (VPMOVVec32x8ToM mask)) +(MaskedRoundWithPrecisionFloat64x2 [a] x mask) => (VRNDSCALEPDMasked128 [a+0] x (VPMOVVec64x2ToM mask)) +(MaskedRoundWithPrecisionFloat64x4 [a] x mask) => (VRNDSCALEPDMasked256 [a+0] x (VPMOVVec64x4ToM mask)) +(MaskedRoundWithPrecisionFloat64x8 [a] x mask) => (VRNDSCALEPDMasked512 [a+0] x (VPMOVVec64x8ToM mask)) (MaskedSaturatedAddInt16x16 x y mask) => (VPADDSWMasked256 x y (VPMOVVec16x16ToM mask)) (MaskedSaturatedAddInt16x32 x y mask) => (VPADDSWMasked512 x y (VPMOVVec16x32ToM mask)) (MaskedSaturatedAddInt16x8 x y mask) => (VPADDSWMasked128 x y (VPMOVVec16x8ToM mask)) @@ -757,6 +925,18 @@ (MaskedSubUint8x16 x y mask) => (VPSUBBMasked128 x y (VPMOVVec8x16ToM mask)) (MaskedSubUint8x32 x y mask) => (VPSUBBMasked256 x y (VPMOVVec8x32ToM mask)) (MaskedSubUint8x64 x y mask) => (VPSUBBMasked512 x y (VPMOVVec8x64ToM mask)) +(MaskedTruncSuppressExceptionWithPrecisionFloat32x16 [a] x mask) => (VRNDSCALEPSMasked512 [a+11] x (VPMOVVec32x16ToM mask)) +(MaskedTruncSuppressExceptionWithPrecisionFloat32x4 [a] x mask) => (VRNDSCALEPSMasked128 [a+11] x (VPMOVVec32x4ToM mask)) +(MaskedTruncSuppressExceptionWithPrecisionFloat32x8 [a] x mask) => (VRNDSCALEPSMasked256 [a+11] x (VPMOVVec32x8ToM mask)) +(MaskedTruncSuppressExceptionWithPrecisionFloat64x2 [a] x mask) => (VRNDSCALEPDMasked128 [a+11] x (VPMOVVec64x2ToM mask)) +(MaskedTruncSuppressExceptionWithPrecisionFloat64x4 [a] x mask) => (VRNDSCALEPDMasked256 [a+11] x (VPMOVVec64x4ToM mask)) +(MaskedTruncSuppressExceptionWithPrecisionFloat64x8 [a] x mask) => (VRNDSCALEPDMasked512 [a+11] x (VPMOVVec64x8ToM mask)) +(MaskedTruncWithPrecisionFloat32x16 [a] x mask) => (VRNDSCALEPSMasked512 [a+3] x (VPMOVVec32x16ToM mask)) +(MaskedTruncWithPrecisionFloat32x4 [a] x mask) => (VRNDSCALEPSMasked128 [a+3] x (VPMOVVec32x4ToM mask)) +(MaskedTruncWithPrecisionFloat32x8 [a] x mask) => (VRNDSCALEPSMasked256 [a+3] x (VPMOVVec32x8ToM mask)) +(MaskedTruncWithPrecisionFloat64x2 [a] x mask) => (VRNDSCALEPDMasked128 [a+3] x (VPMOVVec64x2ToM mask)) +(MaskedTruncWithPrecisionFloat64x4 [a] x mask) => (VRNDSCALEPDMasked256 [a+3] x (VPMOVVec64x4ToM mask)) +(MaskedTruncWithPrecisionFloat64x8 [a] x mask) => (VRNDSCALEPDMasked512 [a+3] x (VPMOVVec64x8ToM mask)) (MaskedXorFloat32x16 x y mask) => (VXORPSMasked512 x y (VPMOVVec32x16ToM mask)) (MaskedXorFloat32x4 x y mask) => (VXORPSMasked128 x y (VPMOVVec32x4ToM mask)) (MaskedXorFloat32x8 x y mask) => (VXORPSMasked256 x y (VPMOVVec32x8ToM mask)) @@ -976,6 +1156,22 @@ (PopCountUint8x16 ...) => (VPOPCNTB128 ...) (PopCountUint8x32 ...) => (VPOPCNTB256 ...) (PopCountUint8x64 ...) => (VPOPCNTB512 ...) +(RoundFloat32x4 x) => (VROUNDPS128 [0] x) +(RoundFloat32x8 x) => (VROUNDPS256 [0] x) +(RoundFloat64x2 x) => (VROUNDPD128 [0] x) +(RoundFloat64x4 x) => (VROUNDPD256 [0] x) +(RoundSuppressExceptionWithPrecisionFloat32x16 [a] x) => (VRNDSCALEPS512 [a+8] x) +(RoundSuppressExceptionWithPrecisionFloat32x4 [a] x) => (VRNDSCALEPS128 [a+8] x) +(RoundSuppressExceptionWithPrecisionFloat32x8 [a] x) => (VRNDSCALEPS256 [a+8] x) +(RoundSuppressExceptionWithPrecisionFloat64x2 [a] x) => (VRNDSCALEPD128 [a+8] x) +(RoundSuppressExceptionWithPrecisionFloat64x4 [a] x) => (VRNDSCALEPD256 [a+8] x) +(RoundSuppressExceptionWithPrecisionFloat64x8 [a] x) => (VRNDSCALEPD512 [a+8] x) +(RoundWithPrecisionFloat32x16 [a] x) => (VRNDSCALEPS512 [a+0] x) +(RoundWithPrecisionFloat32x4 [a] x) => (VRNDSCALEPS128 [a+0] x) +(RoundWithPrecisionFloat32x8 [a] x) => (VRNDSCALEPS256 [a+0] x) +(RoundWithPrecisionFloat64x2 [a] x) => (VRNDSCALEPD128 [a+0] x) +(RoundWithPrecisionFloat64x4 [a] x) => (VRNDSCALEPD256 [a+0] x) +(RoundWithPrecisionFloat64x8 [a] x) => (VRNDSCALEPD512 [a+0] x) (SaturatedAddInt16x16 ...) => (VPADDSW256 ...) (SaturatedAddInt16x32 ...) => (VPADDSW512 ...) (SaturatedAddInt16x8 ...) => (VPADDSW128 ...) @@ -1046,6 +1242,22 @@ (SubUint8x16 ...) => (VPSUBB128 ...) (SubUint8x32 ...) => (VPSUBB256 ...) (SubUint8x64 ...) => (VPSUBB512 ...) +(TruncFloat32x4 x) => (VROUNDPS128 [3] x) +(TruncFloat32x8 x) => (VROUNDPS256 [3] x) +(TruncFloat64x2 x) => (VROUNDPD128 [3] x) +(TruncFloat64x4 x) => (VROUNDPD256 [3] x) +(TruncSuppressExceptionWithPrecisionFloat32x16 [a] x) => (VRNDSCALEPS512 [a+11] x) +(TruncSuppressExceptionWithPrecisionFloat32x4 [a] x) => (VRNDSCALEPS128 [a+11] x) +(TruncSuppressExceptionWithPrecisionFloat32x8 [a] x) => (VRNDSCALEPS256 [a+11] x) +(TruncSuppressExceptionWithPrecisionFloat64x2 [a] x) => (VRNDSCALEPD128 [a+11] x) +(TruncSuppressExceptionWithPrecisionFloat64x4 [a] x) => (VRNDSCALEPD256 [a+11] x) +(TruncSuppressExceptionWithPrecisionFloat64x8 [a] x) => (VRNDSCALEPD512 [a+11] x) +(TruncWithPrecisionFloat32x16 [a] x) => (VRNDSCALEPS512 [a+3] x) +(TruncWithPrecisionFloat32x4 [a] x) => (VRNDSCALEPS128 [a+3] x) +(TruncWithPrecisionFloat32x8 [a] x) => (VRNDSCALEPS256 [a+3] x) +(TruncWithPrecisionFloat64x2 [a] x) => (VRNDSCALEPD128 [a+3] x) +(TruncWithPrecisionFloat64x4 [a] x) => (VRNDSCALEPD256 [a+3] x) +(TruncWithPrecisionFloat64x8 [a] x) => (VRNDSCALEPD512 [a+3] x) (XorFloat32x16 ...) => (VXORPS512 ...) (XorFloat32x4 ...) => (VXORPS128 ...) (XorFloat32x8 ...) => (VXORPS256 ...) diff --git a/src/cmd/compile/internal/ssa/_gen/simdAMD64ops.go b/src/cmd/compile/internal/ssa/_gen/simdAMD64ops.go index b9709ca819..6881757d1a 100644 --- a/src/cmd/compile/internal/ssa/_gen/simdAMD64ops.go +++ b/src/cmd/compile/internal/ssa/_gen/simdAMD64ops.go @@ -30,6 +30,7 @@ func simdAMD64Ops(fp11, fp21, fp2k1, fp1k1fp1, fp2k1fp1, fp2k1k1, fp31, fp3k1fp1 {name: "VSQRTPS512", argLength: 1, reg: fp11, asm: "VSQRTPS", commutative: false, typ: "Vec512", resultInArg0: false}, {name: "VXORPS512", argLength: 2, reg: fp21, asm: "VXORPS", commutative: true, typ: "Vec512", resultInArg0: false}, {name: "VADDPS128", argLength: 2, reg: fp21, asm: "VADDPS", commutative: true, typ: "Vec128", resultInArg0: false}, + {name: "VADDSUBPS128", argLength: 2, reg: fp21, asm: "VADDSUBPS", commutative: false, typ: "Vec128", resultInArg0: false}, {name: "VANDPS128", argLength: 2, reg: fp21, asm: "VANDPS", commutative: true, typ: "Vec128", resultInArg0: false}, {name: "VANDNPS128", argLength: 2, reg: fp21, asm: "VANDNPS", commutative: true, typ: "Vec128", resultInArg0: false}, {name: "VRCP14PS128", argLength: 1, reg: fp11, asm: "VRCP14PS", commutative: false, typ: "Vec128", resultInArg0: false}, @@ -58,6 +59,7 @@ func simdAMD64Ops(fp11, fp21, fp2k1, fp1k1fp1, fp2k1fp1, fp2k1k1, fp31, fp3k1fp1 {name: "VSQRTPS128", argLength: 1, reg: fp11, asm: "VSQRTPS", commutative: false, typ: "Vec128", resultInArg0: false}, {name: "VXORPS128", argLength: 2, reg: fp21, asm: "VXORPS", commutative: true, typ: "Vec128", resultInArg0: false}, {name: "VADDPS256", argLength: 2, reg: fp21, asm: "VADDPS", commutative: true, typ: "Vec256", resultInArg0: false}, + {name: "VADDSUBPS256", argLength: 2, reg: fp21, asm: "VADDSUBPS", commutative: false, typ: "Vec256", resultInArg0: false}, {name: "VANDPS256", argLength: 2, reg: fp21, asm: "VANDPS", commutative: true, typ: "Vec256", resultInArg0: false}, {name: "VANDNPS256", argLength: 2, reg: fp21, asm: "VANDNPS", commutative: true, typ: "Vec256", resultInArg0: false}, {name: "VRCP14PS256", argLength: 1, reg: fp11, asm: "VRCP14PS", commutative: false, typ: "Vec256", resultInArg0: false}, @@ -86,6 +88,7 @@ func simdAMD64Ops(fp11, fp21, fp2k1, fp1k1fp1, fp2k1fp1, fp2k1k1, fp31, fp3k1fp1 {name: "VSQRTPS256", argLength: 1, reg: fp11, asm: "VSQRTPS", commutative: false, typ: "Vec256", resultInArg0: false}, {name: "VXORPS256", argLength: 2, reg: fp21, asm: "VXORPS", commutative: true, typ: "Vec256", resultInArg0: false}, {name: "VADDPD128", argLength: 2, reg: fp21, asm: "VADDPD", commutative: true, typ: "Vec128", resultInArg0: false}, + {name: "VADDSUBPD128", argLength: 2, reg: fp21, asm: "VADDSUBPD", commutative: false, typ: "Vec128", resultInArg0: false}, {name: "VANDPD128", argLength: 2, reg: fp21, asm: "VANDPD", commutative: true, typ: "Vec128", resultInArg0: false}, {name: "VANDNPD128", argLength: 2, reg: fp21, asm: "VANDNPD", commutative: true, typ: "Vec128", resultInArg0: false}, {name: "VRCP14PD128", argLength: 1, reg: fp11, asm: "VRCP14PD", commutative: false, typ: "Vec128", resultInArg0: false}, @@ -114,6 +117,7 @@ func simdAMD64Ops(fp11, fp21, fp2k1, fp1k1fp1, fp2k1fp1, fp2k1k1, fp31, fp3k1fp1 {name: "VSQRTPD128", argLength: 1, reg: fp11, asm: "VSQRTPD", commutative: false, typ: "Vec128", resultInArg0: false}, {name: "VXORPD128", argLength: 2, reg: fp21, asm: "VXORPD", commutative: true, typ: "Vec128", resultInArg0: false}, {name: "VADDPD256", argLength: 2, reg: fp21, asm: "VADDPD", commutative: true, typ: "Vec256", resultInArg0: false}, + {name: "VADDSUBPD256", argLength: 2, reg: fp21, asm: "VADDSUBPD", commutative: false, typ: "Vec256", resultInArg0: false}, {name: "VANDPD256", argLength: 2, reg: fp21, asm: "VANDPD", commutative: true, typ: "Vec256", resultInArg0: false}, {name: "VANDNPD256", argLength: 2, reg: fp21, asm: "VANDNPD", commutative: true, typ: "Vec256", resultInArg0: false}, {name: "VRCP14PD256", argLength: 1, reg: fp11, asm: "VRCP14PD", commutative: false, typ: "Vec256", resultInArg0: false}, @@ -543,17 +547,45 @@ func simdAMD64Ops(fp11, fp21, fp2k1, fp1k1fp1, fp2k1fp1, fp2k1k1, fp31, fp3k1fp1 {name: "VPMINUBMasked512", argLength: 3, reg: fp2k1fp1, asm: "VPMINUB", commutative: true, typ: "Vec512", resultInArg0: false}, {name: "VPMAXUB512", argLength: 2, reg: fp21, asm: "VPMAXUB", commutative: true, typ: "Vec512", resultInArg0: false}, {name: "VPMINUB512", argLength: 2, reg: fp21, asm: "VPMINUB", commutative: true, typ: "Vec512", resultInArg0: false}, + {name: "VRNDSCALEPS512", argLength: 1, reg: fp11, asm: "VRNDSCALEPS", aux: "Int8", commutative: false, typ: "Vec512", resultInArg0: false}, + {name: "VREDUCEPS512", argLength: 1, reg: fp11, asm: "VREDUCEPS", aux: "Int8", commutative: false, typ: "Vec512", resultInArg0: false}, {name: "VCMPPS512", argLength: 2, reg: fp2k1, asm: "VCMPPS", aux: "Int8", commutative: true, typ: "Mask", resultInArg0: false}, + {name: "VRNDSCALEPSMasked512", argLength: 2, reg: fp1k1fp1, asm: "VRNDSCALEPS", aux: "Int8", commutative: false, typ: "Vec512", resultInArg0: false}, + {name: "VREDUCEPSMasked512", argLength: 2, reg: fp1k1fp1, asm: "VREDUCEPS", aux: "Int8", commutative: false, typ: "Vec512", resultInArg0: false}, {name: "VCMPPSMasked512", argLength: 3, reg: fp2k1k1, asm: "VCMPPS", aux: "Int8", commutative: true, typ: "Mask", resultInArg0: false}, + {name: "VROUNDPS128", argLength: 1, reg: fp11, asm: "VROUNDPS", aux: "Int8", commutative: false, typ: "Vec128", resultInArg0: false}, + {name: "VRNDSCALEPS128", argLength: 1, reg: fp11, asm: "VRNDSCALEPS", aux: "Int8", commutative: false, typ: "Vec128", resultInArg0: false}, + {name: "VREDUCEPS128", argLength: 1, reg: fp11, asm: "VREDUCEPS", aux: "Int8", commutative: false, typ: "Vec128", resultInArg0: false}, {name: "VCMPPS128", argLength: 2, reg: fp21, asm: "VCMPPS", aux: "Int8", commutative: true, typ: "Vec128", resultInArg0: false}, + {name: "VRNDSCALEPSMasked128", argLength: 2, reg: fp1k1fp1, asm: "VRNDSCALEPS", aux: "Int8", commutative: false, typ: "Vec128", resultInArg0: false}, + {name: "VREDUCEPSMasked128", argLength: 2, reg: fp1k1fp1, asm: "VREDUCEPS", aux: "Int8", commutative: false, typ: "Vec128", resultInArg0: false}, {name: "VCMPPSMasked128", argLength: 3, reg: fp2k1k1, asm: "VCMPPS", aux: "Int8", commutative: true, typ: "Mask", resultInArg0: false}, + {name: "VROUNDPS256", argLength: 1, reg: fp11, asm: "VROUNDPS", aux: "Int8", commutative: false, typ: "Vec256", resultInArg0: false}, + {name: "VRNDSCALEPS256", argLength: 1, reg: fp11, asm: "VRNDSCALEPS", aux: "Int8", commutative: false, typ: "Vec256", resultInArg0: false}, + {name: "VREDUCEPS256", argLength: 1, reg: fp11, asm: "VREDUCEPS", aux: "Int8", commutative: false, typ: "Vec256", resultInArg0: false}, {name: "VCMPPS256", argLength: 2, reg: fp21, asm: "VCMPPS", aux: "Int8", commutative: true, typ: "Vec256", resultInArg0: false}, + {name: "VRNDSCALEPSMasked256", argLength: 2, reg: fp1k1fp1, asm: "VRNDSCALEPS", aux: "Int8", commutative: false, typ: "Vec256", resultInArg0: false}, + {name: "VREDUCEPSMasked256", argLength: 2, reg: fp1k1fp1, asm: "VREDUCEPS", aux: "Int8", commutative: false, typ: "Vec256", resultInArg0: false}, {name: "VCMPPSMasked256", argLength: 3, reg: fp2k1k1, asm: "VCMPPS", aux: "Int8", commutative: true, typ: "Mask", resultInArg0: false}, + {name: "VROUNDPD128", argLength: 1, reg: fp11, asm: "VROUNDPD", aux: "Int8", commutative: false, typ: "Vec128", resultInArg0: false}, + {name: "VRNDSCALEPD128", argLength: 1, reg: fp11, asm: "VRNDSCALEPD", aux: "Int8", commutative: false, typ: "Vec128", resultInArg0: false}, + {name: "VREDUCEPD128", argLength: 1, reg: fp11, asm: "VREDUCEPD", aux: "Int8", commutative: false, typ: "Vec128", resultInArg0: false}, {name: "VCMPPD128", argLength: 2, reg: fp21, asm: "VCMPPD", aux: "Int8", commutative: true, typ: "Vec128", resultInArg0: false}, + {name: "VRNDSCALEPDMasked128", argLength: 2, reg: fp1k1fp1, asm: "VRNDSCALEPD", aux: "Int8", commutative: false, typ: "Vec128", resultInArg0: false}, + {name: "VREDUCEPDMasked128", argLength: 2, reg: fp1k1fp1, asm: "VREDUCEPD", aux: "Int8", commutative: false, typ: "Vec128", resultInArg0: false}, {name: "VCMPPDMasked128", argLength: 3, reg: fp2k1k1, asm: "VCMPPD", aux: "Int8", commutative: true, typ: "Mask", resultInArg0: false}, + {name: "VROUNDPD256", argLength: 1, reg: fp11, asm: "VROUNDPD", aux: "Int8", commutative: false, typ: "Vec256", resultInArg0: false}, + {name: "VRNDSCALEPD256", argLength: 1, reg: fp11, asm: "VRNDSCALEPD", aux: "Int8", commutative: false, typ: "Vec256", resultInArg0: false}, + {name: "VREDUCEPD256", argLength: 1, reg: fp11, asm: "VREDUCEPD", aux: "Int8", commutative: false, typ: "Vec256", resultInArg0: false}, {name: "VCMPPD256", argLength: 2, reg: fp21, asm: "VCMPPD", aux: "Int8", commutative: true, typ: "Vec256", resultInArg0: false}, + {name: "VRNDSCALEPDMasked256", argLength: 2, reg: fp1k1fp1, asm: "VRNDSCALEPD", aux: "Int8", commutative: false, typ: "Vec256", resultInArg0: false}, + {name: "VREDUCEPDMasked256", argLength: 2, reg: fp1k1fp1, asm: "VREDUCEPD", aux: "Int8", commutative: false, typ: "Vec256", resultInArg0: false}, {name: "VCMPPDMasked256", argLength: 3, reg: fp2k1k1, asm: "VCMPPD", aux: "Int8", commutative: true, typ: "Mask", resultInArg0: false}, + {name: "VRNDSCALEPD512", argLength: 1, reg: fp11, asm: "VRNDSCALEPD", aux: "Int8", commutative: false, typ: "Vec512", resultInArg0: false}, + {name: "VREDUCEPD512", argLength: 1, reg: fp11, asm: "VREDUCEPD", aux: "Int8", commutative: false, typ: "Vec512", resultInArg0: false}, {name: "VCMPPD512", argLength: 2, reg: fp2k1, asm: "VCMPPD", aux: "Int8", commutative: true, typ: "Mask", resultInArg0: false}, + {name: "VRNDSCALEPDMasked512", argLength: 2, reg: fp1k1fp1, asm: "VRNDSCALEPD", aux: "Int8", commutative: false, typ: "Vec512", resultInArg0: false}, + {name: "VREDUCEPDMasked512", argLength: 2, reg: fp1k1fp1, asm: "VREDUCEPD", aux: "Int8", commutative: false, typ: "Vec512", resultInArg0: false}, {name: "VCMPPDMasked512", argLength: 3, reg: fp2k1k1, asm: "VCMPPD", aux: "Int8", commutative: true, typ: "Mask", resultInArg0: false}, {name: "VPCMPW256", argLength: 2, reg: fp2k1, asm: "VPCMPW", aux: "Int8", commutative: false, typ: "Mask", resultInArg0: false}, {name: "VPCMPWMasked256", argLength: 3, reg: fp2k1k1, asm: "VPCMPW", aux: "Int8", commutative: false, typ: "Mask", resultInArg0: false}, diff --git a/src/cmd/compile/internal/ssa/_gen/simdgenericOps.go b/src/cmd/compile/internal/ssa/_gen/simdgenericOps.go index 529ec09de9..25a496c52f 100644 --- a/src/cmd/compile/internal/ssa/_gen/simdgenericOps.go +++ b/src/cmd/compile/internal/ssa/_gen/simdgenericOps.go @@ -46,12 +46,15 @@ func simdGenericOps() []opData { {name: "SubFloat32x16", argLength: 2, commutative: false}, {name: "XorFloat32x16", argLength: 2, commutative: true}, {name: "AddFloat32x4", argLength: 2, commutative: true}, + {name: "AddSubFloat32x4", argLength: 2, commutative: false}, {name: "AndFloat32x4", argLength: 2, commutative: true}, {name: "AndNotFloat32x4", argLength: 2, commutative: true}, {name: "ApproximateReciprocalFloat32x4", argLength: 1, commutative: false}, {name: "ApproximateReciprocalOfSqrtFloat32x4", argLength: 1, commutative: false}, + {name: "CeilFloat32x4", argLength: 1, commutative: false}, {name: "DivFloat32x4", argLength: 2, commutative: false}, {name: "EqualFloat32x4", argLength: 2, commutative: true}, + {name: "FloorFloat32x4", argLength: 1, commutative: false}, {name: "GreaterFloat32x4", argLength: 2, commutative: false}, {name: "GreaterEqualFloat32x4", argLength: 2, commutative: false}, {name: "IsNanFloat32x4", argLength: 2, commutative: true}, @@ -86,16 +89,21 @@ func simdGenericOps() []opData { {name: "OrFloat32x4", argLength: 2, commutative: true}, {name: "PairwiseAddFloat32x4", argLength: 2, commutative: false}, {name: "PairwiseSubFloat32x4", argLength: 2, commutative: false}, + {name: "RoundFloat32x4", argLength: 1, commutative: false}, {name: "SqrtFloat32x4", argLength: 1, commutative: false}, {name: "SubFloat32x4", argLength: 2, commutative: false}, + {name: "TruncFloat32x4", argLength: 1, commutative: false}, {name: "XorFloat32x4", argLength: 2, commutative: true}, {name: "AddFloat32x8", argLength: 2, commutative: true}, + {name: "AddSubFloat32x8", argLength: 2, commutative: false}, {name: "AndFloat32x8", argLength: 2, commutative: true}, {name: "AndNotFloat32x8", argLength: 2, commutative: true}, {name: "ApproximateReciprocalFloat32x8", argLength: 1, commutative: false}, {name: "ApproximateReciprocalOfSqrtFloat32x8", argLength: 1, commutative: false}, + {name: "CeilFloat32x8", argLength: 1, commutative: false}, {name: "DivFloat32x8", argLength: 2, commutative: false}, {name: "EqualFloat32x8", argLength: 2, commutative: true}, + {name: "FloorFloat32x8", argLength: 1, commutative: false}, {name: "GreaterFloat32x8", argLength: 2, commutative: false}, {name: "GreaterEqualFloat32x8", argLength: 2, commutative: false}, {name: "IsNanFloat32x8", argLength: 2, commutative: true}, @@ -130,16 +138,21 @@ func simdGenericOps() []opData { {name: "OrFloat32x8", argLength: 2, commutative: true}, {name: "PairwiseAddFloat32x8", argLength: 2, commutative: false}, {name: "PairwiseSubFloat32x8", argLength: 2, commutative: false}, + {name: "RoundFloat32x8", argLength: 1, commutative: false}, {name: "SqrtFloat32x8", argLength: 1, commutative: false}, {name: "SubFloat32x8", argLength: 2, commutative: false}, + {name: "TruncFloat32x8", argLength: 1, commutative: false}, {name: "XorFloat32x8", argLength: 2, commutative: true}, {name: "AddFloat64x2", argLength: 2, commutative: true}, + {name: "AddSubFloat64x2", argLength: 2, commutative: false}, {name: "AndFloat64x2", argLength: 2, commutative: true}, {name: "AndNotFloat64x2", argLength: 2, commutative: true}, {name: "ApproximateReciprocalFloat64x2", argLength: 1, commutative: false}, {name: "ApproximateReciprocalOfSqrtFloat64x2", argLength: 1, commutative: false}, + {name: "CeilFloat64x2", argLength: 1, commutative: false}, {name: "DivFloat64x2", argLength: 2, commutative: false}, {name: "EqualFloat64x2", argLength: 2, commutative: true}, + {name: "FloorFloat64x2", argLength: 1, commutative: false}, {name: "GreaterFloat64x2", argLength: 2, commutative: false}, {name: "GreaterEqualFloat64x2", argLength: 2, commutative: false}, {name: "IsNanFloat64x2", argLength: 2, commutative: true}, @@ -174,16 +187,21 @@ func simdGenericOps() []opData { {name: "OrFloat64x2", argLength: 2, commutative: true}, {name: "PairwiseAddFloat64x2", argLength: 2, commutative: false}, {name: "PairwiseSubFloat64x2", argLength: 2, commutative: false}, + {name: "RoundFloat64x2", argLength: 1, commutative: false}, {name: "SqrtFloat64x2", argLength: 1, commutative: false}, {name: "SubFloat64x2", argLength: 2, commutative: false}, + {name: "TruncFloat64x2", argLength: 1, commutative: false}, {name: "XorFloat64x2", argLength: 2, commutative: true}, {name: "AddFloat64x4", argLength: 2, commutative: true}, + {name: "AddSubFloat64x4", argLength: 2, commutative: false}, {name: "AndFloat64x4", argLength: 2, commutative: true}, {name: "AndNotFloat64x4", argLength: 2, commutative: true}, {name: "ApproximateReciprocalFloat64x4", argLength: 1, commutative: false}, {name: "ApproximateReciprocalOfSqrtFloat64x4", argLength: 1, commutative: false}, + {name: "CeilFloat64x4", argLength: 1, commutative: false}, {name: "DivFloat64x4", argLength: 2, commutative: false}, {name: "EqualFloat64x4", argLength: 2, commutative: true}, + {name: "FloorFloat64x4", argLength: 1, commutative: false}, {name: "GreaterFloat64x4", argLength: 2, commutative: false}, {name: "GreaterEqualFloat64x4", argLength: 2, commutative: false}, {name: "IsNanFloat64x4", argLength: 2, commutative: true}, @@ -218,8 +236,10 @@ func simdGenericOps() []opData { {name: "OrFloat64x4", argLength: 2, commutative: true}, {name: "PairwiseAddFloat64x4", argLength: 2, commutative: false}, {name: "PairwiseSubFloat64x4", argLength: 2, commutative: false}, + {name: "RoundFloat64x4", argLength: 1, commutative: false}, {name: "SqrtFloat64x4", argLength: 1, commutative: false}, {name: "SubFloat64x4", argLength: 2, commutative: false}, + {name: "TruncFloat64x4", argLength: 1, commutative: false}, {name: "XorFloat64x4", argLength: 2, commutative: true}, {name: "AddFloat64x8", argLength: 2, commutative: true}, {name: "AndFloat64x8", argLength: 2, commutative: true}, @@ -1075,5 +1095,197 @@ func simdGenericOps() []opData { {name: "SaturatedAddUint8x64", argLength: 2, commutative: true}, {name: "SaturatedSubUint8x64", argLength: 2, commutative: false}, {name: "SubUint8x64", argLength: 2, commutative: false}, + {name: "CeilSuppressExceptionWithPrecisionFloat32x16", argLength: 1, commutative: false, aux: "Int8"}, + {name: "CeilWithPrecisionFloat32x16", argLength: 1, commutative: false, aux: "Int8"}, + {name: "DiffWithCeilSuppressExceptionWithPrecisionFloat32x16", argLength: 1, commutative: false, aux: "Int8"}, + {name: "DiffWithCeilWithPrecisionFloat32x16", argLength: 1, commutative: false, aux: "Int8"}, + {name: "DiffWithFloorSuppressExceptionWithPrecisionFloat32x16", argLength: 1, commutative: false, aux: "Int8"}, + {name: "DiffWithFloorWithPrecisionFloat32x16", argLength: 1, commutative: false, aux: "Int8"}, + {name: "DiffWithRoundSuppressExceptionWithPrecisionFloat32x16", argLength: 1, commutative: false, aux: "Int8"}, + {name: "DiffWithRoundWithPrecisionFloat32x16", argLength: 1, commutative: false, aux: "Int8"}, + {name: "DiffWithTruncSuppressExceptionWithPrecisionFloat32x16", argLength: 1, commutative: false, aux: "Int8"}, + {name: "DiffWithTruncWithPrecisionFloat32x16", argLength: 1, commutative: false, aux: "Int8"}, + {name: "FloorSuppressExceptionWithPrecisionFloat32x16", argLength: 1, commutative: false, aux: "Int8"}, + {name: "FloorWithPrecisionFloat32x16", argLength: 1, commutative: false, aux: "Int8"}, + {name: "MaskedCeilSuppressExceptionWithPrecisionFloat32x16", argLength: 2, commutative: false, aux: "Int8"}, + {name: "MaskedCeilWithPrecisionFloat32x16", argLength: 2, commutative: false, aux: "Int8"}, + {name: "MaskedDiffWithCeilSuppressExceptionWithPrecisionFloat32x16", argLength: 2, commutative: false, aux: "Int8"}, + {name: "MaskedDiffWithCeilWithPrecisionFloat32x16", argLength: 2, commutative: false, aux: "Int8"}, + {name: "MaskedDiffWithFloorSuppressExceptionWithPrecisionFloat32x16", argLength: 2, commutative: false, aux: "Int8"}, + {name: "MaskedDiffWithFloorWithPrecisionFloat32x16", argLength: 2, commutative: false, aux: "Int8"}, + {name: "MaskedDiffWithRoundSuppressExceptionWithPrecisionFloat32x16", argLength: 2, commutative: false, aux: "Int8"}, + {name: "MaskedDiffWithRoundWithPrecisionFloat32x16", argLength: 2, commutative: false, aux: "Int8"}, + {name: "MaskedDiffWithTruncSuppressExceptionWithPrecisionFloat32x16", argLength: 2, commutative: false, aux: "Int8"}, + {name: "MaskedDiffWithTruncWithPrecisionFloat32x16", argLength: 2, commutative: false, aux: "Int8"}, + {name: "MaskedFloorSuppressExceptionWithPrecisionFloat32x16", argLength: 2, commutative: false, aux: "Int8"}, + {name: "MaskedFloorWithPrecisionFloat32x16", argLength: 2, commutative: false, aux: "Int8"}, + {name: "MaskedRoundSuppressExceptionWithPrecisionFloat32x16", argLength: 2, commutative: false, aux: "Int8"}, + {name: "MaskedRoundWithPrecisionFloat32x16", argLength: 2, commutative: false, aux: "Int8"}, + {name: "MaskedTruncSuppressExceptionWithPrecisionFloat32x16", argLength: 2, commutative: false, aux: "Int8"}, + {name: "MaskedTruncWithPrecisionFloat32x16", argLength: 2, commutative: false, aux: "Int8"}, + {name: "RoundSuppressExceptionWithPrecisionFloat32x16", argLength: 1, commutative: false, aux: "Int8"}, + {name: "RoundWithPrecisionFloat32x16", argLength: 1, commutative: false, aux: "Int8"}, + {name: "TruncSuppressExceptionWithPrecisionFloat32x16", argLength: 1, commutative: false, aux: "Int8"}, + {name: "TruncWithPrecisionFloat32x16", argLength: 1, commutative: false, aux: "Int8"}, + {name: "CeilSuppressExceptionWithPrecisionFloat32x4", argLength: 1, commutative: false, aux: "Int8"}, + {name: "CeilWithPrecisionFloat32x4", argLength: 1, commutative: false, aux: "Int8"}, + {name: "DiffWithCeilSuppressExceptionWithPrecisionFloat32x4", argLength: 1, commutative: false, aux: "Int8"}, + {name: "DiffWithCeilWithPrecisionFloat32x4", argLength: 1, commutative: false, aux: "Int8"}, + {name: "DiffWithFloorSuppressExceptionWithPrecisionFloat32x4", argLength: 1, commutative: false, aux: "Int8"}, + {name: "DiffWithFloorWithPrecisionFloat32x4", argLength: 1, commutative: false, aux: "Int8"}, + {name: "DiffWithRoundSuppressExceptionWithPrecisionFloat32x4", argLength: 1, commutative: false, aux: "Int8"}, + {name: "DiffWithRoundWithPrecisionFloat32x4", argLength: 1, commutative: false, aux: "Int8"}, + {name: "DiffWithTruncSuppressExceptionWithPrecisionFloat32x4", argLength: 1, commutative: false, aux: "Int8"}, + {name: "DiffWithTruncWithPrecisionFloat32x4", argLength: 1, commutative: false, aux: "Int8"}, + {name: "FloorSuppressExceptionWithPrecisionFloat32x4", argLength: 1, commutative: false, aux: "Int8"}, + {name: "FloorWithPrecisionFloat32x4", argLength: 1, commutative: false, aux: "Int8"}, + {name: "MaskedCeilSuppressExceptionWithPrecisionFloat32x4", argLength: 2, commutative: false, aux: "Int8"}, + {name: "MaskedCeilWithPrecisionFloat32x4", argLength: 2, commutative: false, aux: "Int8"}, + {name: "MaskedDiffWithCeilSuppressExceptionWithPrecisionFloat32x4", argLength: 2, commutative: false, aux: "Int8"}, + {name: "MaskedDiffWithCeilWithPrecisionFloat32x4", argLength: 2, commutative: false, aux: "Int8"}, + {name: "MaskedDiffWithFloorSuppressExceptionWithPrecisionFloat32x4", argLength: 2, commutative: false, aux: "Int8"}, + {name: "MaskedDiffWithFloorWithPrecisionFloat32x4", argLength: 2, commutative: false, aux: "Int8"}, + {name: "MaskedDiffWithRoundSuppressExceptionWithPrecisionFloat32x4", argLength: 2, commutative: false, aux: "Int8"}, + {name: "MaskedDiffWithRoundWithPrecisionFloat32x4", argLength: 2, commutative: false, aux: "Int8"}, + {name: "MaskedDiffWithTruncSuppressExceptionWithPrecisionFloat32x4", argLength: 2, commutative: false, aux: "Int8"}, + {name: "MaskedDiffWithTruncWithPrecisionFloat32x4", argLength: 2, commutative: false, aux: "Int8"}, + {name: "MaskedFloorSuppressExceptionWithPrecisionFloat32x4", argLength: 2, commutative: false, aux: "Int8"}, + {name: "MaskedFloorWithPrecisionFloat32x4", argLength: 2, commutative: false, aux: "Int8"}, + {name: "MaskedRoundSuppressExceptionWithPrecisionFloat32x4", argLength: 2, commutative: false, aux: "Int8"}, + {name: "MaskedRoundWithPrecisionFloat32x4", argLength: 2, commutative: false, aux: "Int8"}, + {name: "MaskedTruncSuppressExceptionWithPrecisionFloat32x4", argLength: 2, commutative: false, aux: "Int8"}, + {name: "MaskedTruncWithPrecisionFloat32x4", argLength: 2, commutative: false, aux: "Int8"}, + {name: "RoundSuppressExceptionWithPrecisionFloat32x4", argLength: 1, commutative: false, aux: "Int8"}, + {name: "RoundWithPrecisionFloat32x4", argLength: 1, commutative: false, aux: "Int8"}, + {name: "TruncSuppressExceptionWithPrecisionFloat32x4", argLength: 1, commutative: false, aux: "Int8"}, + {name: "TruncWithPrecisionFloat32x4", argLength: 1, commutative: false, aux: "Int8"}, + {name: "CeilSuppressExceptionWithPrecisionFloat32x8", argLength: 1, commutative: false, aux: "Int8"}, + {name: "CeilWithPrecisionFloat32x8", argLength: 1, commutative: false, aux: "Int8"}, + {name: "DiffWithCeilSuppressExceptionWithPrecisionFloat32x8", argLength: 1, commutative: false, aux: "Int8"}, + {name: "DiffWithCeilWithPrecisionFloat32x8", argLength: 1, commutative: false, aux: "Int8"}, + {name: "DiffWithFloorSuppressExceptionWithPrecisionFloat32x8", argLength: 1, commutative: false, aux: "Int8"}, + {name: "DiffWithFloorWithPrecisionFloat32x8", argLength: 1, commutative: false, aux: "Int8"}, + {name: "DiffWithRoundSuppressExceptionWithPrecisionFloat32x8", argLength: 1, commutative: false, aux: "Int8"}, + {name: "DiffWithRoundWithPrecisionFloat32x8", argLength: 1, commutative: false, aux: "Int8"}, + {name: "DiffWithTruncSuppressExceptionWithPrecisionFloat32x8", argLength: 1, commutative: false, aux: "Int8"}, + {name: "DiffWithTruncWithPrecisionFloat32x8", argLength: 1, commutative: false, aux: "Int8"}, + {name: "FloorSuppressExceptionWithPrecisionFloat32x8", argLength: 1, commutative: false, aux: "Int8"}, + {name: "FloorWithPrecisionFloat32x8", argLength: 1, commutative: false, aux: "Int8"}, + {name: "MaskedCeilSuppressExceptionWithPrecisionFloat32x8", argLength: 2, commutative: false, aux: "Int8"}, + {name: "MaskedCeilWithPrecisionFloat32x8", argLength: 2, commutative: false, aux: "Int8"}, + {name: "MaskedDiffWithCeilSuppressExceptionWithPrecisionFloat32x8", argLength: 2, commutative: false, aux: "Int8"}, + {name: "MaskedDiffWithCeilWithPrecisionFloat32x8", argLength: 2, commutative: false, aux: "Int8"}, + {name: "MaskedDiffWithFloorSuppressExceptionWithPrecisionFloat32x8", argLength: 2, commutative: false, aux: "Int8"}, + {name: "MaskedDiffWithFloorWithPrecisionFloat32x8", argLength: 2, commutative: false, aux: "Int8"}, + {name: "MaskedDiffWithRoundSuppressExceptionWithPrecisionFloat32x8", argLength: 2, commutative: false, aux: "Int8"}, + {name: "MaskedDiffWithRoundWithPrecisionFloat32x8", argLength: 2, commutative: false, aux: "Int8"}, + {name: "MaskedDiffWithTruncSuppressExceptionWithPrecisionFloat32x8", argLength: 2, commutative: false, aux: "Int8"}, + {name: "MaskedDiffWithTruncWithPrecisionFloat32x8", argLength: 2, commutative: false, aux: "Int8"}, + {name: "MaskedFloorSuppressExceptionWithPrecisionFloat32x8", argLength: 2, commutative: false, aux: "Int8"}, + {name: "MaskedFloorWithPrecisionFloat32x8", argLength: 2, commutative: false, aux: "Int8"}, + {name: "MaskedRoundSuppressExceptionWithPrecisionFloat32x8", argLength: 2, commutative: false, aux: "Int8"}, + {name: "MaskedRoundWithPrecisionFloat32x8", argLength: 2, commutative: false, aux: "Int8"}, + {name: "MaskedTruncSuppressExceptionWithPrecisionFloat32x8", argLength: 2, commutative: false, aux: "Int8"}, + {name: "MaskedTruncWithPrecisionFloat32x8", argLength: 2, commutative: false, aux: "Int8"}, + {name: "RoundSuppressExceptionWithPrecisionFloat32x8", argLength: 1, commutative: false, aux: "Int8"}, + {name: "RoundWithPrecisionFloat32x8", argLength: 1, commutative: false, aux: "Int8"}, + {name: "TruncSuppressExceptionWithPrecisionFloat32x8", argLength: 1, commutative: false, aux: "Int8"}, + {name: "TruncWithPrecisionFloat32x8", argLength: 1, commutative: false, aux: "Int8"}, + {name: "CeilSuppressExceptionWithPrecisionFloat64x2", argLength: 1, commutative: false, aux: "Int8"}, + {name: "CeilWithPrecisionFloat64x2", argLength: 1, commutative: false, aux: "Int8"}, + {name: "DiffWithCeilSuppressExceptionWithPrecisionFloat64x2", argLength: 1, commutative: false, aux: "Int8"}, + {name: "DiffWithCeilWithPrecisionFloat64x2", argLength: 1, commutative: false, aux: "Int8"}, + {name: "DiffWithFloorSuppressExceptionWithPrecisionFloat64x2", argLength: 1, commutative: false, aux: "Int8"}, + {name: "DiffWithFloorWithPrecisionFloat64x2", argLength: 1, commutative: false, aux: "Int8"}, + {name: "DiffWithRoundSuppressExceptionWithPrecisionFloat64x2", argLength: 1, commutative: false, aux: "Int8"}, + {name: "DiffWithRoundWithPrecisionFloat64x2", argLength: 1, commutative: false, aux: "Int8"}, + {name: "DiffWithTruncSuppressExceptionWithPrecisionFloat64x2", argLength: 1, commutative: false, aux: "Int8"}, + {name: "DiffWithTruncWithPrecisionFloat64x2", argLength: 1, commutative: false, aux: "Int8"}, + {name: "FloorSuppressExceptionWithPrecisionFloat64x2", argLength: 1, commutative: false, aux: "Int8"}, + {name: "FloorWithPrecisionFloat64x2", argLength: 1, commutative: false, aux: "Int8"}, + {name: "MaskedCeilSuppressExceptionWithPrecisionFloat64x2", argLength: 2, commutative: false, aux: "Int8"}, + {name: "MaskedCeilWithPrecisionFloat64x2", argLength: 2, commutative: false, aux: "Int8"}, + {name: "MaskedDiffWithCeilSuppressExceptionWithPrecisionFloat64x2", argLength: 2, commutative: false, aux: "Int8"}, + {name: "MaskedDiffWithCeilWithPrecisionFloat64x2", argLength: 2, commutative: false, aux: "Int8"}, + {name: "MaskedDiffWithFloorSuppressExceptionWithPrecisionFloat64x2", argLength: 2, commutative: false, aux: "Int8"}, + {name: "MaskedDiffWithFloorWithPrecisionFloat64x2", argLength: 2, commutative: false, aux: "Int8"}, + {name: "MaskedDiffWithRoundSuppressExceptionWithPrecisionFloat64x2", argLength: 2, commutative: false, aux: "Int8"}, + {name: "MaskedDiffWithRoundWithPrecisionFloat64x2", argLength: 2, commutative: false, aux: "Int8"}, + {name: "MaskedDiffWithTruncSuppressExceptionWithPrecisionFloat64x2", argLength: 2, commutative: false, aux: "Int8"}, + {name: "MaskedDiffWithTruncWithPrecisionFloat64x2", argLength: 2, commutative: false, aux: "Int8"}, + {name: "MaskedFloorSuppressExceptionWithPrecisionFloat64x2", argLength: 2, commutative: false, aux: "Int8"}, + {name: "MaskedFloorWithPrecisionFloat64x2", argLength: 2, commutative: false, aux: "Int8"}, + {name: "MaskedRoundSuppressExceptionWithPrecisionFloat64x2", argLength: 2, commutative: false, aux: "Int8"}, + {name: "MaskedRoundWithPrecisionFloat64x2", argLength: 2, commutative: false, aux: "Int8"}, + {name: "MaskedTruncSuppressExceptionWithPrecisionFloat64x2", argLength: 2, commutative: false, aux: "Int8"}, + {name: "MaskedTruncWithPrecisionFloat64x2", argLength: 2, commutative: false, aux: "Int8"}, + {name: "RoundSuppressExceptionWithPrecisionFloat64x2", argLength: 1, commutative: false, aux: "Int8"}, + {name: "RoundWithPrecisionFloat64x2", argLength: 1, commutative: false, aux: "Int8"}, + {name: "TruncSuppressExceptionWithPrecisionFloat64x2", argLength: 1, commutative: false, aux: "Int8"}, + {name: "TruncWithPrecisionFloat64x2", argLength: 1, commutative: false, aux: "Int8"}, + {name: "CeilSuppressExceptionWithPrecisionFloat64x4", argLength: 1, commutative: false, aux: "Int8"}, + {name: "CeilWithPrecisionFloat64x4", argLength: 1, commutative: false, aux: "Int8"}, + {name: "DiffWithCeilSuppressExceptionWithPrecisionFloat64x4", argLength: 1, commutative: false, aux: "Int8"}, + {name: "DiffWithCeilWithPrecisionFloat64x4", argLength: 1, commutative: false, aux: "Int8"}, + {name: "DiffWithFloorSuppressExceptionWithPrecisionFloat64x4", argLength: 1, commutative: false, aux: "Int8"}, + {name: "DiffWithFloorWithPrecisionFloat64x4", argLength: 1, commutative: false, aux: "Int8"}, + {name: "DiffWithRoundSuppressExceptionWithPrecisionFloat64x4", argLength: 1, commutative: false, aux: "Int8"}, + {name: "DiffWithRoundWithPrecisionFloat64x4", argLength: 1, commutative: false, aux: "Int8"}, + {name: "DiffWithTruncSuppressExceptionWithPrecisionFloat64x4", argLength: 1, commutative: false, aux: "Int8"}, + {name: "DiffWithTruncWithPrecisionFloat64x4", argLength: 1, commutative: false, aux: "Int8"}, + {name: "FloorSuppressExceptionWithPrecisionFloat64x4", argLength: 1, commutative: false, aux: "Int8"}, + {name: "FloorWithPrecisionFloat64x4", argLength: 1, commutative: false, aux: "Int8"}, + {name: "MaskedCeilSuppressExceptionWithPrecisionFloat64x4", argLength: 2, commutative: false, aux: "Int8"}, + {name: "MaskedCeilWithPrecisionFloat64x4", argLength: 2, commutative: false, aux: "Int8"}, + {name: "MaskedDiffWithCeilSuppressExceptionWithPrecisionFloat64x4", argLength: 2, commutative: false, aux: "Int8"}, + {name: "MaskedDiffWithCeilWithPrecisionFloat64x4", argLength: 2, commutative: false, aux: "Int8"}, + {name: "MaskedDiffWithFloorSuppressExceptionWithPrecisionFloat64x4", argLength: 2, commutative: false, aux: "Int8"}, + {name: "MaskedDiffWithFloorWithPrecisionFloat64x4", argLength: 2, commutative: false, aux: "Int8"}, + {name: "MaskedDiffWithRoundSuppressExceptionWithPrecisionFloat64x4", argLength: 2, commutative: false, aux: "Int8"}, + {name: "MaskedDiffWithRoundWithPrecisionFloat64x4", argLength: 2, commutative: false, aux: "Int8"}, + {name: "MaskedDiffWithTruncSuppressExceptionWithPrecisionFloat64x4", argLength: 2, commutative: false, aux: "Int8"}, + {name: "MaskedDiffWithTruncWithPrecisionFloat64x4", argLength: 2, commutative: false, aux: "Int8"}, + {name: "MaskedFloorSuppressExceptionWithPrecisionFloat64x4", argLength: 2, commutative: false, aux: "Int8"}, + {name: "MaskedFloorWithPrecisionFloat64x4", argLength: 2, commutative: false, aux: "Int8"}, + {name: "MaskedRoundSuppressExceptionWithPrecisionFloat64x4", argLength: 2, commutative: false, aux: "Int8"}, + {name: "MaskedRoundWithPrecisionFloat64x4", argLength: 2, commutative: false, aux: "Int8"}, + {name: "MaskedTruncSuppressExceptionWithPrecisionFloat64x4", argLength: 2, commutative: false, aux: "Int8"}, + {name: "MaskedTruncWithPrecisionFloat64x4", argLength: 2, commutative: false, aux: "Int8"}, + {name: "RoundSuppressExceptionWithPrecisionFloat64x4", argLength: 1, commutative: false, aux: "Int8"}, + {name: "RoundWithPrecisionFloat64x4", argLength: 1, commutative: false, aux: "Int8"}, + {name: "TruncSuppressExceptionWithPrecisionFloat64x4", argLength: 1, commutative: false, aux: "Int8"}, + {name: "TruncWithPrecisionFloat64x4", argLength: 1, commutative: false, aux: "Int8"}, + {name: "CeilSuppressExceptionWithPrecisionFloat64x8", argLength: 1, commutative: false, aux: "Int8"}, + {name: "CeilWithPrecisionFloat64x8", argLength: 1, commutative: false, aux: "Int8"}, + {name: "DiffWithCeilSuppressExceptionWithPrecisionFloat64x8", argLength: 1, commutative: false, aux: "Int8"}, + {name: "DiffWithCeilWithPrecisionFloat64x8", argLength: 1, commutative: false, aux: "Int8"}, + {name: "DiffWithFloorSuppressExceptionWithPrecisionFloat64x8", argLength: 1, commutative: false, aux: "Int8"}, + {name: "DiffWithFloorWithPrecisionFloat64x8", argLength: 1, commutative: false, aux: "Int8"}, + {name: "DiffWithRoundSuppressExceptionWithPrecisionFloat64x8", argLength: 1, commutative: false, aux: "Int8"}, + {name: "DiffWithRoundWithPrecisionFloat64x8", argLength: 1, commutative: false, aux: "Int8"}, + {name: "DiffWithTruncSuppressExceptionWithPrecisionFloat64x8", argLength: 1, commutative: false, aux: "Int8"}, + {name: "DiffWithTruncWithPrecisionFloat64x8", argLength: 1, commutative: false, aux: "Int8"}, + {name: "FloorSuppressExceptionWithPrecisionFloat64x8", argLength: 1, commutative: false, aux: "Int8"}, + {name: "FloorWithPrecisionFloat64x8", argLength: 1, commutative: false, aux: "Int8"}, + {name: "MaskedCeilSuppressExceptionWithPrecisionFloat64x8", argLength: 2, commutative: false, aux: "Int8"}, + {name: "MaskedCeilWithPrecisionFloat64x8", argLength: 2, commutative: false, aux: "Int8"}, + {name: "MaskedDiffWithCeilSuppressExceptionWithPrecisionFloat64x8", argLength: 2, commutative: false, aux: "Int8"}, + {name: "MaskedDiffWithCeilWithPrecisionFloat64x8", argLength: 2, commutative: false, aux: "Int8"}, + {name: "MaskedDiffWithFloorSuppressExceptionWithPrecisionFloat64x8", argLength: 2, commutative: false, aux: "Int8"}, + {name: "MaskedDiffWithFloorWithPrecisionFloat64x8", argLength: 2, commutative: false, aux: "Int8"}, + {name: "MaskedDiffWithRoundSuppressExceptionWithPrecisionFloat64x8", argLength: 2, commutative: false, aux: "Int8"}, + {name: "MaskedDiffWithRoundWithPrecisionFloat64x8", argLength: 2, commutative: false, aux: "Int8"}, + {name: "MaskedDiffWithTruncSuppressExceptionWithPrecisionFloat64x8", argLength: 2, commutative: false, aux: "Int8"}, + {name: "MaskedDiffWithTruncWithPrecisionFloat64x8", argLength: 2, commutative: false, aux: "Int8"}, + {name: "MaskedFloorSuppressExceptionWithPrecisionFloat64x8", argLength: 2, commutative: false, aux: "Int8"}, + {name: "MaskedFloorWithPrecisionFloat64x8", argLength: 2, commutative: false, aux: "Int8"}, + {name: "MaskedRoundSuppressExceptionWithPrecisionFloat64x8", argLength: 2, commutative: false, aux: "Int8"}, + {name: "MaskedRoundWithPrecisionFloat64x8", argLength: 2, commutative: false, aux: "Int8"}, + {name: "MaskedTruncSuppressExceptionWithPrecisionFloat64x8", argLength: 2, commutative: false, aux: "Int8"}, + {name: "MaskedTruncWithPrecisionFloat64x8", argLength: 2, commutative: false, aux: "Int8"}, + {name: "RoundSuppressExceptionWithPrecisionFloat64x8", argLength: 1, commutative: false, aux: "Int8"}, + {name: "RoundWithPrecisionFloat64x8", argLength: 1, commutative: false, aux: "Int8"}, + {name: "TruncSuppressExceptionWithPrecisionFloat64x8", argLength: 1, commutative: false, aux: "Int8"}, + {name: "TruncWithPrecisionFloat64x8", argLength: 1, commutative: false, aux: "Int8"}, } } diff --git a/src/cmd/compile/internal/ssa/opGen.go b/src/cmd/compile/internal/ssa/opGen.go index c7abca814e..090cf69032 100644 --- a/src/cmd/compile/internal/ssa/opGen.go +++ b/src/cmd/compile/internal/ssa/opGen.go @@ -1223,6 +1223,7 @@ const ( OpAMD64VSQRTPS512 OpAMD64VXORPS512 OpAMD64VADDPS128 + OpAMD64VADDSUBPS128 OpAMD64VANDPS128 OpAMD64VANDNPS128 OpAMD64VRCP14PS128 @@ -1251,6 +1252,7 @@ const ( OpAMD64VSQRTPS128 OpAMD64VXORPS128 OpAMD64VADDPS256 + OpAMD64VADDSUBPS256 OpAMD64VANDPS256 OpAMD64VANDNPS256 OpAMD64VRCP14PS256 @@ -1279,6 +1281,7 @@ const ( OpAMD64VSQRTPS256 OpAMD64VXORPS256 OpAMD64VADDPD128 + OpAMD64VADDSUBPD128 OpAMD64VANDPD128 OpAMD64VANDNPD128 OpAMD64VRCP14PD128 @@ -1307,6 +1310,7 @@ const ( OpAMD64VSQRTPD128 OpAMD64VXORPD128 OpAMD64VADDPD256 + OpAMD64VADDSUBPD256 OpAMD64VANDPD256 OpAMD64VANDNPD256 OpAMD64VRCP14PD256 @@ -1736,17 +1740,45 @@ const ( OpAMD64VPMINUBMasked512 OpAMD64VPMAXUB512 OpAMD64VPMINUB512 + OpAMD64VRNDSCALEPS512 + OpAMD64VREDUCEPS512 OpAMD64VCMPPS512 + OpAMD64VRNDSCALEPSMasked512 + OpAMD64VREDUCEPSMasked512 OpAMD64VCMPPSMasked512 + OpAMD64VROUNDPS128 + OpAMD64VRNDSCALEPS128 + OpAMD64VREDUCEPS128 OpAMD64VCMPPS128 + OpAMD64VRNDSCALEPSMasked128 + OpAMD64VREDUCEPSMasked128 OpAMD64VCMPPSMasked128 + OpAMD64VROUNDPS256 + OpAMD64VRNDSCALEPS256 + OpAMD64VREDUCEPS256 OpAMD64VCMPPS256 + OpAMD64VRNDSCALEPSMasked256 + OpAMD64VREDUCEPSMasked256 OpAMD64VCMPPSMasked256 + OpAMD64VROUNDPD128 + OpAMD64VRNDSCALEPD128 + OpAMD64VREDUCEPD128 OpAMD64VCMPPD128 + OpAMD64VRNDSCALEPDMasked128 + OpAMD64VREDUCEPDMasked128 OpAMD64VCMPPDMasked128 + OpAMD64VROUNDPD256 + OpAMD64VRNDSCALEPD256 + OpAMD64VREDUCEPD256 OpAMD64VCMPPD256 + OpAMD64VRNDSCALEPDMasked256 + OpAMD64VREDUCEPDMasked256 OpAMD64VCMPPDMasked256 + OpAMD64VRNDSCALEPD512 + OpAMD64VREDUCEPD512 OpAMD64VCMPPD512 + OpAMD64VRNDSCALEPDMasked512 + OpAMD64VREDUCEPDMasked512 OpAMD64VCMPPDMasked512 OpAMD64VPCMPW256 OpAMD64VPCMPWMasked256 @@ -4065,12 +4097,15 @@ const ( OpSubFloat32x16 OpXorFloat32x16 OpAddFloat32x4 + OpAddSubFloat32x4 OpAndFloat32x4 OpAndNotFloat32x4 OpApproximateReciprocalFloat32x4 OpApproximateReciprocalOfSqrtFloat32x4 + OpCeilFloat32x4 OpDivFloat32x4 OpEqualFloat32x4 + OpFloorFloat32x4 OpGreaterFloat32x4 OpGreaterEqualFloat32x4 OpIsNanFloat32x4 @@ -4105,16 +4140,21 @@ const ( OpOrFloat32x4 OpPairwiseAddFloat32x4 OpPairwiseSubFloat32x4 + OpRoundFloat32x4 OpSqrtFloat32x4 OpSubFloat32x4 + OpTruncFloat32x4 OpXorFloat32x4 OpAddFloat32x8 + OpAddSubFloat32x8 OpAndFloat32x8 OpAndNotFloat32x8 OpApproximateReciprocalFloat32x8 OpApproximateReciprocalOfSqrtFloat32x8 + OpCeilFloat32x8 OpDivFloat32x8 OpEqualFloat32x8 + OpFloorFloat32x8 OpGreaterFloat32x8 OpGreaterEqualFloat32x8 OpIsNanFloat32x8 @@ -4149,16 +4189,21 @@ const ( OpOrFloat32x8 OpPairwiseAddFloat32x8 OpPairwiseSubFloat32x8 + OpRoundFloat32x8 OpSqrtFloat32x8 OpSubFloat32x8 + OpTruncFloat32x8 OpXorFloat32x8 OpAddFloat64x2 + OpAddSubFloat64x2 OpAndFloat64x2 OpAndNotFloat64x2 OpApproximateReciprocalFloat64x2 OpApproximateReciprocalOfSqrtFloat64x2 + OpCeilFloat64x2 OpDivFloat64x2 OpEqualFloat64x2 + OpFloorFloat64x2 OpGreaterFloat64x2 OpGreaterEqualFloat64x2 OpIsNanFloat64x2 @@ -4193,16 +4238,21 @@ const ( OpOrFloat64x2 OpPairwiseAddFloat64x2 OpPairwiseSubFloat64x2 + OpRoundFloat64x2 OpSqrtFloat64x2 OpSubFloat64x2 + OpTruncFloat64x2 OpXorFloat64x2 OpAddFloat64x4 + OpAddSubFloat64x4 OpAndFloat64x4 OpAndNotFloat64x4 OpApproximateReciprocalFloat64x4 OpApproximateReciprocalOfSqrtFloat64x4 + OpCeilFloat64x4 OpDivFloat64x4 OpEqualFloat64x4 + OpFloorFloat64x4 OpGreaterFloat64x4 OpGreaterEqualFloat64x4 OpIsNanFloat64x4 @@ -4237,8 +4287,10 @@ const ( OpOrFloat64x4 OpPairwiseAddFloat64x4 OpPairwiseSubFloat64x4 + OpRoundFloat64x4 OpSqrtFloat64x4 OpSubFloat64x4 + OpTruncFloat64x4 OpXorFloat64x4 OpAddFloat64x8 OpAndFloat64x8 @@ -5094,6 +5146,198 @@ const ( OpSaturatedAddUint8x64 OpSaturatedSubUint8x64 OpSubUint8x64 + OpCeilSuppressExceptionWithPrecisionFloat32x16 + OpCeilWithPrecisionFloat32x16 + OpDiffWithCeilSuppressExceptionWithPrecisionFloat32x16 + OpDiffWithCeilWithPrecisionFloat32x16 + OpDiffWithFloorSuppressExceptionWithPrecisionFloat32x16 + OpDiffWithFloorWithPrecisionFloat32x16 + OpDiffWithRoundSuppressExceptionWithPrecisionFloat32x16 + OpDiffWithRoundWithPrecisionFloat32x16 + OpDiffWithTruncSuppressExceptionWithPrecisionFloat32x16 + OpDiffWithTruncWithPrecisionFloat32x16 + OpFloorSuppressExceptionWithPrecisionFloat32x16 + OpFloorWithPrecisionFloat32x16 + OpMaskedCeilSuppressExceptionWithPrecisionFloat32x16 + OpMaskedCeilWithPrecisionFloat32x16 + OpMaskedDiffWithCeilSuppressExceptionWithPrecisionFloat32x16 + OpMaskedDiffWithCeilWithPrecisionFloat32x16 + OpMaskedDiffWithFloorSuppressExceptionWithPrecisionFloat32x16 + OpMaskedDiffWithFloorWithPrecisionFloat32x16 + OpMaskedDiffWithRoundSuppressExceptionWithPrecisionFloat32x16 + OpMaskedDiffWithRoundWithPrecisionFloat32x16 + OpMaskedDiffWithTruncSuppressExceptionWithPrecisionFloat32x16 + OpMaskedDiffWithTruncWithPrecisionFloat32x16 + OpMaskedFloorSuppressExceptionWithPrecisionFloat32x16 + OpMaskedFloorWithPrecisionFloat32x16 + OpMaskedRoundSuppressExceptionWithPrecisionFloat32x16 + OpMaskedRoundWithPrecisionFloat32x16 + OpMaskedTruncSuppressExceptionWithPrecisionFloat32x16 + OpMaskedTruncWithPrecisionFloat32x16 + OpRoundSuppressExceptionWithPrecisionFloat32x16 + OpRoundWithPrecisionFloat32x16 + OpTruncSuppressExceptionWithPrecisionFloat32x16 + OpTruncWithPrecisionFloat32x16 + OpCeilSuppressExceptionWithPrecisionFloat32x4 + OpCeilWithPrecisionFloat32x4 + OpDiffWithCeilSuppressExceptionWithPrecisionFloat32x4 + OpDiffWithCeilWithPrecisionFloat32x4 + OpDiffWithFloorSuppressExceptionWithPrecisionFloat32x4 + OpDiffWithFloorWithPrecisionFloat32x4 + OpDiffWithRoundSuppressExceptionWithPrecisionFloat32x4 + OpDiffWithRoundWithPrecisionFloat32x4 + OpDiffWithTruncSuppressExceptionWithPrecisionFloat32x4 + OpDiffWithTruncWithPrecisionFloat32x4 + OpFloorSuppressExceptionWithPrecisionFloat32x4 + OpFloorWithPrecisionFloat32x4 + OpMaskedCeilSuppressExceptionWithPrecisionFloat32x4 + OpMaskedCeilWithPrecisionFloat32x4 + OpMaskedDiffWithCeilSuppressExceptionWithPrecisionFloat32x4 + OpMaskedDiffWithCeilWithPrecisionFloat32x4 + OpMaskedDiffWithFloorSuppressExceptionWithPrecisionFloat32x4 + OpMaskedDiffWithFloorWithPrecisionFloat32x4 + OpMaskedDiffWithRoundSuppressExceptionWithPrecisionFloat32x4 + OpMaskedDiffWithRoundWithPrecisionFloat32x4 + OpMaskedDiffWithTruncSuppressExceptionWithPrecisionFloat32x4 + OpMaskedDiffWithTruncWithPrecisionFloat32x4 + OpMaskedFloorSuppressExceptionWithPrecisionFloat32x4 + OpMaskedFloorWithPrecisionFloat32x4 + OpMaskedRoundSuppressExceptionWithPrecisionFloat32x4 + OpMaskedRoundWithPrecisionFloat32x4 + OpMaskedTruncSuppressExceptionWithPrecisionFloat32x4 + OpMaskedTruncWithPrecisionFloat32x4 + OpRoundSuppressExceptionWithPrecisionFloat32x4 + OpRoundWithPrecisionFloat32x4 + OpTruncSuppressExceptionWithPrecisionFloat32x4 + OpTruncWithPrecisionFloat32x4 + OpCeilSuppressExceptionWithPrecisionFloat32x8 + OpCeilWithPrecisionFloat32x8 + OpDiffWithCeilSuppressExceptionWithPrecisionFloat32x8 + OpDiffWithCeilWithPrecisionFloat32x8 + OpDiffWithFloorSuppressExceptionWithPrecisionFloat32x8 + OpDiffWithFloorWithPrecisionFloat32x8 + OpDiffWithRoundSuppressExceptionWithPrecisionFloat32x8 + OpDiffWithRoundWithPrecisionFloat32x8 + OpDiffWithTruncSuppressExceptionWithPrecisionFloat32x8 + OpDiffWithTruncWithPrecisionFloat32x8 + OpFloorSuppressExceptionWithPrecisionFloat32x8 + OpFloorWithPrecisionFloat32x8 + OpMaskedCeilSuppressExceptionWithPrecisionFloat32x8 + OpMaskedCeilWithPrecisionFloat32x8 + OpMaskedDiffWithCeilSuppressExceptionWithPrecisionFloat32x8 + OpMaskedDiffWithCeilWithPrecisionFloat32x8 + OpMaskedDiffWithFloorSuppressExceptionWithPrecisionFloat32x8 + OpMaskedDiffWithFloorWithPrecisionFloat32x8 + OpMaskedDiffWithRoundSuppressExceptionWithPrecisionFloat32x8 + OpMaskedDiffWithRoundWithPrecisionFloat32x8 + OpMaskedDiffWithTruncSuppressExceptionWithPrecisionFloat32x8 + OpMaskedDiffWithTruncWithPrecisionFloat32x8 + OpMaskedFloorSuppressExceptionWithPrecisionFloat32x8 + OpMaskedFloorWithPrecisionFloat32x8 + OpMaskedRoundSuppressExceptionWithPrecisionFloat32x8 + OpMaskedRoundWithPrecisionFloat32x8 + OpMaskedTruncSuppressExceptionWithPrecisionFloat32x8 + OpMaskedTruncWithPrecisionFloat32x8 + OpRoundSuppressExceptionWithPrecisionFloat32x8 + OpRoundWithPrecisionFloat32x8 + OpTruncSuppressExceptionWithPrecisionFloat32x8 + OpTruncWithPrecisionFloat32x8 + OpCeilSuppressExceptionWithPrecisionFloat64x2 + OpCeilWithPrecisionFloat64x2 + OpDiffWithCeilSuppressExceptionWithPrecisionFloat64x2 + OpDiffWithCeilWithPrecisionFloat64x2 + OpDiffWithFloorSuppressExceptionWithPrecisionFloat64x2 + OpDiffWithFloorWithPrecisionFloat64x2 + OpDiffWithRoundSuppressExceptionWithPrecisionFloat64x2 + OpDiffWithRoundWithPrecisionFloat64x2 + OpDiffWithTruncSuppressExceptionWithPrecisionFloat64x2 + OpDiffWithTruncWithPrecisionFloat64x2 + OpFloorSuppressExceptionWithPrecisionFloat64x2 + OpFloorWithPrecisionFloat64x2 + OpMaskedCeilSuppressExceptionWithPrecisionFloat64x2 + OpMaskedCeilWithPrecisionFloat64x2 + OpMaskedDiffWithCeilSuppressExceptionWithPrecisionFloat64x2 + OpMaskedDiffWithCeilWithPrecisionFloat64x2 + OpMaskedDiffWithFloorSuppressExceptionWithPrecisionFloat64x2 + OpMaskedDiffWithFloorWithPrecisionFloat64x2 + OpMaskedDiffWithRoundSuppressExceptionWithPrecisionFloat64x2 + OpMaskedDiffWithRoundWithPrecisionFloat64x2 + OpMaskedDiffWithTruncSuppressExceptionWithPrecisionFloat64x2 + OpMaskedDiffWithTruncWithPrecisionFloat64x2 + OpMaskedFloorSuppressExceptionWithPrecisionFloat64x2 + OpMaskedFloorWithPrecisionFloat64x2 + OpMaskedRoundSuppressExceptionWithPrecisionFloat64x2 + OpMaskedRoundWithPrecisionFloat64x2 + OpMaskedTruncSuppressExceptionWithPrecisionFloat64x2 + OpMaskedTruncWithPrecisionFloat64x2 + OpRoundSuppressExceptionWithPrecisionFloat64x2 + OpRoundWithPrecisionFloat64x2 + OpTruncSuppressExceptionWithPrecisionFloat64x2 + OpTruncWithPrecisionFloat64x2 + OpCeilSuppressExceptionWithPrecisionFloat64x4 + OpCeilWithPrecisionFloat64x4 + OpDiffWithCeilSuppressExceptionWithPrecisionFloat64x4 + OpDiffWithCeilWithPrecisionFloat64x4 + OpDiffWithFloorSuppressExceptionWithPrecisionFloat64x4 + OpDiffWithFloorWithPrecisionFloat64x4 + OpDiffWithRoundSuppressExceptionWithPrecisionFloat64x4 + OpDiffWithRoundWithPrecisionFloat64x4 + OpDiffWithTruncSuppressExceptionWithPrecisionFloat64x4 + OpDiffWithTruncWithPrecisionFloat64x4 + OpFloorSuppressExceptionWithPrecisionFloat64x4 + OpFloorWithPrecisionFloat64x4 + OpMaskedCeilSuppressExceptionWithPrecisionFloat64x4 + OpMaskedCeilWithPrecisionFloat64x4 + OpMaskedDiffWithCeilSuppressExceptionWithPrecisionFloat64x4 + OpMaskedDiffWithCeilWithPrecisionFloat64x4 + OpMaskedDiffWithFloorSuppressExceptionWithPrecisionFloat64x4 + OpMaskedDiffWithFloorWithPrecisionFloat64x4 + OpMaskedDiffWithRoundSuppressExceptionWithPrecisionFloat64x4 + OpMaskedDiffWithRoundWithPrecisionFloat64x4 + OpMaskedDiffWithTruncSuppressExceptionWithPrecisionFloat64x4 + OpMaskedDiffWithTruncWithPrecisionFloat64x4 + OpMaskedFloorSuppressExceptionWithPrecisionFloat64x4 + OpMaskedFloorWithPrecisionFloat64x4 + OpMaskedRoundSuppressExceptionWithPrecisionFloat64x4 + OpMaskedRoundWithPrecisionFloat64x4 + OpMaskedTruncSuppressExceptionWithPrecisionFloat64x4 + OpMaskedTruncWithPrecisionFloat64x4 + OpRoundSuppressExceptionWithPrecisionFloat64x4 + OpRoundWithPrecisionFloat64x4 + OpTruncSuppressExceptionWithPrecisionFloat64x4 + OpTruncWithPrecisionFloat64x4 + OpCeilSuppressExceptionWithPrecisionFloat64x8 + OpCeilWithPrecisionFloat64x8 + OpDiffWithCeilSuppressExceptionWithPrecisionFloat64x8 + OpDiffWithCeilWithPrecisionFloat64x8 + OpDiffWithFloorSuppressExceptionWithPrecisionFloat64x8 + OpDiffWithFloorWithPrecisionFloat64x8 + OpDiffWithRoundSuppressExceptionWithPrecisionFloat64x8 + OpDiffWithRoundWithPrecisionFloat64x8 + OpDiffWithTruncSuppressExceptionWithPrecisionFloat64x8 + OpDiffWithTruncWithPrecisionFloat64x8 + OpFloorSuppressExceptionWithPrecisionFloat64x8 + OpFloorWithPrecisionFloat64x8 + OpMaskedCeilSuppressExceptionWithPrecisionFloat64x8 + OpMaskedCeilWithPrecisionFloat64x8 + OpMaskedDiffWithCeilSuppressExceptionWithPrecisionFloat64x8 + OpMaskedDiffWithCeilWithPrecisionFloat64x8 + OpMaskedDiffWithFloorSuppressExceptionWithPrecisionFloat64x8 + OpMaskedDiffWithFloorWithPrecisionFloat64x8 + OpMaskedDiffWithRoundSuppressExceptionWithPrecisionFloat64x8 + OpMaskedDiffWithRoundWithPrecisionFloat64x8 + OpMaskedDiffWithTruncSuppressExceptionWithPrecisionFloat64x8 + OpMaskedDiffWithTruncWithPrecisionFloat64x8 + OpMaskedFloorSuppressExceptionWithPrecisionFloat64x8 + OpMaskedFloorWithPrecisionFloat64x8 + OpMaskedRoundSuppressExceptionWithPrecisionFloat64x8 + OpMaskedRoundWithPrecisionFloat64x8 + OpMaskedTruncSuppressExceptionWithPrecisionFloat64x8 + OpMaskedTruncWithPrecisionFloat64x8 + OpRoundSuppressExceptionWithPrecisionFloat64x8 + OpRoundWithPrecisionFloat64x8 + OpTruncSuppressExceptionWithPrecisionFloat64x8 + OpTruncWithPrecisionFloat64x8 ) var opcodeTable = [...]opInfo{ @@ -18091,6 +18335,20 @@ var opcodeTable = [...]opInfo{ }, }, }, + { + name: "VADDSUBPS128", + argLen: 2, + asm: x86.AVADDSUBPS, + reg: regInfo{ + inputs: []inputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, { name: "VANDPS128", argLen: 2, @@ -18506,6 +18764,20 @@ var opcodeTable = [...]opInfo{ }, }, }, + { + name: "VADDSUBPS256", + argLen: 2, + asm: x86.AVADDSUBPS, + reg: regInfo{ + inputs: []inputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, { name: "VANDPS256", argLen: 2, @@ -18921,6 +19193,20 @@ var opcodeTable = [...]opInfo{ }, }, }, + { + name: "VADDSUBPD128", + argLen: 2, + asm: x86.AVADDSUBPD, + reg: regInfo{ + inputs: []inputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, { name: "VANDPD128", argLen: 2, @@ -19336,6 +19622,20 @@ var opcodeTable = [...]opInfo{ }, }, }, + { + name: "VADDSUBPD256", + argLen: 2, + asm: x86.AVADDSUBPD, + reg: regInfo{ + inputs: []inputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, { name: "VANDPD256", argLen: 2, @@ -25772,6 +26072,34 @@ var opcodeTable = [...]opInfo{ }, }, }, + { + name: "VRNDSCALEPS512", + auxType: auxInt8, + argLen: 1, + asm: x86.AVRNDSCALEPS, + reg: regInfo{ + inputs: []inputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, + { + name: "VREDUCEPS512", + auxType: auxInt8, + argLen: 1, + asm: x86.AVREDUCEPS, + reg: regInfo{ + inputs: []inputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, { name: "VCMPPS512", auxType: auxInt8, @@ -25788,6 +26116,36 @@ var opcodeTable = [...]opInfo{ }, }, }, + { + name: "VRNDSCALEPSMasked512", + auxType: auxInt8, + argLen: 2, + asm: x86.AVRNDSCALEPS, + reg: regInfo{ + inputs: []inputInfo{ + {1, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, + { + name: "VREDUCEPSMasked512", + auxType: auxInt8, + argLen: 2, + asm: x86.AVREDUCEPS, + reg: regInfo{ + inputs: []inputInfo{ + {1, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, { name: "VCMPPSMasked512", auxType: auxInt8, @@ -25805,6 +26163,48 @@ var opcodeTable = [...]opInfo{ }, }, }, + { + name: "VROUNDPS128", + auxType: auxInt8, + argLen: 1, + asm: x86.AVROUNDPS, + reg: regInfo{ + inputs: []inputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, + { + name: "VRNDSCALEPS128", + auxType: auxInt8, + argLen: 1, + asm: x86.AVRNDSCALEPS, + reg: regInfo{ + inputs: []inputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, + { + name: "VREDUCEPS128", + auxType: auxInt8, + argLen: 1, + asm: x86.AVREDUCEPS, + reg: regInfo{ + inputs: []inputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, { name: "VCMPPS128", auxType: auxInt8, @@ -25821,6 +26221,36 @@ var opcodeTable = [...]opInfo{ }, }, }, + { + name: "VRNDSCALEPSMasked128", + auxType: auxInt8, + argLen: 2, + asm: x86.AVRNDSCALEPS, + reg: regInfo{ + inputs: []inputInfo{ + {1, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, + { + name: "VREDUCEPSMasked128", + auxType: auxInt8, + argLen: 2, + asm: x86.AVREDUCEPS, + reg: regInfo{ + inputs: []inputInfo{ + {1, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, { name: "VCMPPSMasked128", auxType: auxInt8, @@ -25838,6 +26268,48 @@ var opcodeTable = [...]opInfo{ }, }, }, + { + name: "VROUNDPS256", + auxType: auxInt8, + argLen: 1, + asm: x86.AVROUNDPS, + reg: regInfo{ + inputs: []inputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, + { + name: "VRNDSCALEPS256", + auxType: auxInt8, + argLen: 1, + asm: x86.AVRNDSCALEPS, + reg: regInfo{ + inputs: []inputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, + { + name: "VREDUCEPS256", + auxType: auxInt8, + argLen: 1, + asm: x86.AVREDUCEPS, + reg: regInfo{ + inputs: []inputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, { name: "VCMPPS256", auxType: auxInt8, @@ -25854,6 +26326,36 @@ var opcodeTable = [...]opInfo{ }, }, }, + { + name: "VRNDSCALEPSMasked256", + auxType: auxInt8, + argLen: 2, + asm: x86.AVRNDSCALEPS, + reg: regInfo{ + inputs: []inputInfo{ + {1, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, + { + name: "VREDUCEPSMasked256", + auxType: auxInt8, + argLen: 2, + asm: x86.AVREDUCEPS, + reg: regInfo{ + inputs: []inputInfo{ + {1, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, { name: "VCMPPSMasked256", auxType: auxInt8, @@ -25871,6 +26373,48 @@ var opcodeTable = [...]opInfo{ }, }, }, + { + name: "VROUNDPD128", + auxType: auxInt8, + argLen: 1, + asm: x86.AVROUNDPD, + reg: regInfo{ + inputs: []inputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, + { + name: "VRNDSCALEPD128", + auxType: auxInt8, + argLen: 1, + asm: x86.AVRNDSCALEPD, + reg: regInfo{ + inputs: []inputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, + { + name: "VREDUCEPD128", + auxType: auxInt8, + argLen: 1, + asm: x86.AVREDUCEPD, + reg: regInfo{ + inputs: []inputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, { name: "VCMPPD128", auxType: auxInt8, @@ -25887,6 +26431,36 @@ var opcodeTable = [...]opInfo{ }, }, }, + { + name: "VRNDSCALEPDMasked128", + auxType: auxInt8, + argLen: 2, + asm: x86.AVRNDSCALEPD, + reg: regInfo{ + inputs: []inputInfo{ + {1, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, + { + name: "VREDUCEPDMasked128", + auxType: auxInt8, + argLen: 2, + asm: x86.AVREDUCEPD, + reg: regInfo{ + inputs: []inputInfo{ + {1, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, { name: "VCMPPDMasked128", auxType: auxInt8, @@ -25904,6 +26478,48 @@ var opcodeTable = [...]opInfo{ }, }, }, + { + name: "VROUNDPD256", + auxType: auxInt8, + argLen: 1, + asm: x86.AVROUNDPD, + reg: regInfo{ + inputs: []inputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, + { + name: "VRNDSCALEPD256", + auxType: auxInt8, + argLen: 1, + asm: x86.AVRNDSCALEPD, + reg: regInfo{ + inputs: []inputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, + { + name: "VREDUCEPD256", + auxType: auxInt8, + argLen: 1, + asm: x86.AVREDUCEPD, + reg: regInfo{ + inputs: []inputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, { name: "VCMPPD256", auxType: auxInt8, @@ -25920,6 +26536,36 @@ var opcodeTable = [...]opInfo{ }, }, }, + { + name: "VRNDSCALEPDMasked256", + auxType: auxInt8, + argLen: 2, + asm: x86.AVRNDSCALEPD, + reg: regInfo{ + inputs: []inputInfo{ + {1, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, + { + name: "VREDUCEPDMasked256", + auxType: auxInt8, + argLen: 2, + asm: x86.AVREDUCEPD, + reg: regInfo{ + inputs: []inputInfo{ + {1, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, { name: "VCMPPDMasked256", auxType: auxInt8, @@ -25937,6 +26583,34 @@ var opcodeTable = [...]opInfo{ }, }, }, + { + name: "VRNDSCALEPD512", + auxType: auxInt8, + argLen: 1, + asm: x86.AVRNDSCALEPD, + reg: regInfo{ + inputs: []inputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, + { + name: "VREDUCEPD512", + auxType: auxInt8, + argLen: 1, + asm: x86.AVREDUCEPD, + reg: regInfo{ + inputs: []inputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, { name: "VCMPPD512", auxType: auxInt8, @@ -25953,6 +26627,36 @@ var opcodeTable = [...]opInfo{ }, }, }, + { + name: "VRNDSCALEPDMasked512", + auxType: auxInt8, + argLen: 2, + asm: x86.AVRNDSCALEPD, + reg: regInfo{ + inputs: []inputInfo{ + {1, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, + { + name: "VREDUCEPDMasked512", + auxType: auxInt8, + argLen: 2, + asm: x86.AVREDUCEPD, + reg: regInfo{ + inputs: []inputInfo{ + {1, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, { name: "VCMPPDMasked512", auxType: auxInt8, @@ -54128,6 +54832,11 @@ var opcodeTable = [...]opInfo{ commutative: true, generic: true, }, + { + name: "AddSubFloat32x4", + argLen: 2, + generic: true, + }, { name: "AndFloat32x4", argLen: 2, @@ -54150,6 +54859,11 @@ var opcodeTable = [...]opInfo{ argLen: 1, generic: true, }, + { + name: "CeilFloat32x4", + argLen: 1, + generic: true, + }, { name: "DivFloat32x4", argLen: 2, @@ -54161,6 +54875,11 @@ var opcodeTable = [...]opInfo{ commutative: true, generic: true, }, + { + name: "FloorFloat32x4", + argLen: 1, + generic: true, + }, { name: "GreaterFloat32x4", argLen: 2, @@ -54348,6 +55067,11 @@ var opcodeTable = [...]opInfo{ argLen: 2, generic: true, }, + { + name: "RoundFloat32x4", + argLen: 1, + generic: true, + }, { name: "SqrtFloat32x4", argLen: 1, @@ -54358,6 +55082,11 @@ var opcodeTable = [...]opInfo{ argLen: 2, generic: true, }, + { + name: "TruncFloat32x4", + argLen: 1, + generic: true, + }, { name: "XorFloat32x4", argLen: 2, @@ -54370,6 +55099,11 @@ var opcodeTable = [...]opInfo{ commutative: true, generic: true, }, + { + name: "AddSubFloat32x8", + argLen: 2, + generic: true, + }, { name: "AndFloat32x8", argLen: 2, @@ -54392,6 +55126,11 @@ var opcodeTable = [...]opInfo{ argLen: 1, generic: true, }, + { + name: "CeilFloat32x8", + argLen: 1, + generic: true, + }, { name: "DivFloat32x8", argLen: 2, @@ -54403,6 +55142,11 @@ var opcodeTable = [...]opInfo{ commutative: true, generic: true, }, + { + name: "FloorFloat32x8", + argLen: 1, + generic: true, + }, { name: "GreaterFloat32x8", argLen: 2, @@ -54590,6 +55334,11 @@ var opcodeTable = [...]opInfo{ argLen: 2, generic: true, }, + { + name: "RoundFloat32x8", + argLen: 1, + generic: true, + }, { name: "SqrtFloat32x8", argLen: 1, @@ -54600,6 +55349,11 @@ var opcodeTable = [...]opInfo{ argLen: 2, generic: true, }, + { + name: "TruncFloat32x8", + argLen: 1, + generic: true, + }, { name: "XorFloat32x8", argLen: 2, @@ -54612,6 +55366,11 @@ var opcodeTable = [...]opInfo{ commutative: true, generic: true, }, + { + name: "AddSubFloat64x2", + argLen: 2, + generic: true, + }, { name: "AndFloat64x2", argLen: 2, @@ -54634,6 +55393,11 @@ var opcodeTable = [...]opInfo{ argLen: 1, generic: true, }, + { + name: "CeilFloat64x2", + argLen: 1, + generic: true, + }, { name: "DivFloat64x2", argLen: 2, @@ -54645,6 +55409,11 @@ var opcodeTable = [...]opInfo{ commutative: true, generic: true, }, + { + name: "FloorFloat64x2", + argLen: 1, + generic: true, + }, { name: "GreaterFloat64x2", argLen: 2, @@ -54832,6 +55601,11 @@ var opcodeTable = [...]opInfo{ argLen: 2, generic: true, }, + { + name: "RoundFloat64x2", + argLen: 1, + generic: true, + }, { name: "SqrtFloat64x2", argLen: 1, @@ -54842,6 +55616,11 @@ var opcodeTable = [...]opInfo{ argLen: 2, generic: true, }, + { + name: "TruncFloat64x2", + argLen: 1, + generic: true, + }, { name: "XorFloat64x2", argLen: 2, @@ -54854,6 +55633,11 @@ var opcodeTable = [...]opInfo{ commutative: true, generic: true, }, + { + name: "AddSubFloat64x4", + argLen: 2, + generic: true, + }, { name: "AndFloat64x4", argLen: 2, @@ -54876,6 +55660,11 @@ var opcodeTable = [...]opInfo{ argLen: 1, generic: true, }, + { + name: "CeilFloat64x4", + argLen: 1, + generic: true, + }, { name: "DivFloat64x4", argLen: 2, @@ -54887,6 +55676,11 @@ var opcodeTable = [...]opInfo{ commutative: true, generic: true, }, + { + name: "FloorFloat64x4", + argLen: 1, + generic: true, + }, { name: "GreaterFloat64x4", argLen: 2, @@ -55074,6 +55868,11 @@ var opcodeTable = [...]opInfo{ argLen: 2, generic: true, }, + { + name: "RoundFloat64x4", + argLen: 1, + generic: true, + }, { name: "SqrtFloat64x4", argLen: 1, @@ -55084,6 +55883,11 @@ var opcodeTable = [...]opInfo{ argLen: 2, generic: true, }, + { + name: "TruncFloat64x4", + argLen: 1, + generic: true, + }, { name: "XorFloat64x4", argLen: 2, @@ -59832,6 +60636,1158 @@ var opcodeTable = [...]opInfo{ argLen: 2, generic: true, }, + { + name: "CeilSuppressExceptionWithPrecisionFloat32x16", + auxType: auxInt8, + argLen: 1, + generic: true, + }, + { + name: "CeilWithPrecisionFloat32x16", + auxType: auxInt8, + argLen: 1, + generic: true, + }, + { + name: "DiffWithCeilSuppressExceptionWithPrecisionFloat32x16", + auxType: auxInt8, + argLen: 1, + generic: true, + }, + { + name: "DiffWithCeilWithPrecisionFloat32x16", + auxType: auxInt8, + argLen: 1, + generic: true, + }, + { + name: "DiffWithFloorSuppressExceptionWithPrecisionFloat32x16", + auxType: auxInt8, + argLen: 1, + generic: true, + }, + { + name: "DiffWithFloorWithPrecisionFloat32x16", + auxType: auxInt8, + argLen: 1, + generic: true, + }, + { + name: "DiffWithRoundSuppressExceptionWithPrecisionFloat32x16", + auxType: auxInt8, + argLen: 1, + generic: true, + }, + { + name: "DiffWithRoundWithPrecisionFloat32x16", + auxType: auxInt8, + argLen: 1, + generic: true, + }, + { + name: "DiffWithTruncSuppressExceptionWithPrecisionFloat32x16", + auxType: auxInt8, + argLen: 1, + generic: true, + }, + { + name: "DiffWithTruncWithPrecisionFloat32x16", + auxType: auxInt8, + argLen: 1, + generic: true, + }, + { + name: "FloorSuppressExceptionWithPrecisionFloat32x16", + auxType: auxInt8, + argLen: 1, + generic: true, + }, + { + name: "FloorWithPrecisionFloat32x16", + auxType: auxInt8, + argLen: 1, + generic: true, + }, + { + name: "MaskedCeilSuppressExceptionWithPrecisionFloat32x16", + auxType: auxInt8, + argLen: 2, + generic: true, + }, + { + name: "MaskedCeilWithPrecisionFloat32x16", + auxType: auxInt8, + argLen: 2, + generic: true, + }, + { + name: "MaskedDiffWithCeilSuppressExceptionWithPrecisionFloat32x16", + auxType: auxInt8, + argLen: 2, + generic: true, + }, + { + name: "MaskedDiffWithCeilWithPrecisionFloat32x16", + auxType: auxInt8, + argLen: 2, + generic: true, + }, + { + name: "MaskedDiffWithFloorSuppressExceptionWithPrecisionFloat32x16", + auxType: auxInt8, + argLen: 2, + generic: true, + }, + { + name: "MaskedDiffWithFloorWithPrecisionFloat32x16", + auxType: auxInt8, + argLen: 2, + generic: true, + }, + { + name: "MaskedDiffWithRoundSuppressExceptionWithPrecisionFloat32x16", + auxType: auxInt8, + argLen: 2, + generic: true, + }, + { + name: "MaskedDiffWithRoundWithPrecisionFloat32x16", + auxType: auxInt8, + argLen: 2, + generic: true, + }, + { + name: "MaskedDiffWithTruncSuppressExceptionWithPrecisionFloat32x16", + auxType: auxInt8, + argLen: 2, + generic: true, + }, + { + name: "MaskedDiffWithTruncWithPrecisionFloat32x16", + auxType: auxInt8, + argLen: 2, + generic: true, + }, + { + name: "MaskedFloorSuppressExceptionWithPrecisionFloat32x16", + auxType: auxInt8, + argLen: 2, + generic: true, + }, + { + name: "MaskedFloorWithPrecisionFloat32x16", + auxType: auxInt8, + argLen: 2, + generic: true, + }, + { + name: "MaskedRoundSuppressExceptionWithPrecisionFloat32x16", + auxType: auxInt8, + argLen: 2, + generic: true, + }, + { + name: "MaskedRoundWithPrecisionFloat32x16", + auxType: auxInt8, + argLen: 2, + generic: true, + }, + { + name: "MaskedTruncSuppressExceptionWithPrecisionFloat32x16", + auxType: auxInt8, + argLen: 2, + generic: true, + }, + { + name: "MaskedTruncWithPrecisionFloat32x16", + auxType: auxInt8, + argLen: 2, + generic: true, + }, + { + name: "RoundSuppressExceptionWithPrecisionFloat32x16", + auxType: auxInt8, + argLen: 1, + generic: true, + }, + { + name: "RoundWithPrecisionFloat32x16", + auxType: auxInt8, + argLen: 1, + generic: true, + }, + { + name: "TruncSuppressExceptionWithPrecisionFloat32x16", + auxType: auxInt8, + argLen: 1, + generic: true, + }, + { + name: "TruncWithPrecisionFloat32x16", + auxType: auxInt8, + argLen: 1, + generic: true, + }, + { + name: "CeilSuppressExceptionWithPrecisionFloat32x4", + auxType: auxInt8, + argLen: 1, + generic: true, + }, + { + name: "CeilWithPrecisionFloat32x4", + auxType: auxInt8, + argLen: 1, + generic: true, + }, + { + name: "DiffWithCeilSuppressExceptionWithPrecisionFloat32x4", + auxType: auxInt8, + argLen: 1, + generic: true, + }, + { + name: "DiffWithCeilWithPrecisionFloat32x4", + auxType: auxInt8, + argLen: 1, + generic: true, + }, + { + name: "DiffWithFloorSuppressExceptionWithPrecisionFloat32x4", + auxType: auxInt8, + argLen: 1, + generic: true, + }, + { + name: "DiffWithFloorWithPrecisionFloat32x4", + auxType: auxInt8, + argLen: 1, + generic: true, + }, + { + name: "DiffWithRoundSuppressExceptionWithPrecisionFloat32x4", + auxType: auxInt8, + argLen: 1, + generic: true, + }, + { + name: "DiffWithRoundWithPrecisionFloat32x4", + auxType: auxInt8, + argLen: 1, + generic: true, + }, + { + name: "DiffWithTruncSuppressExceptionWithPrecisionFloat32x4", + auxType: auxInt8, + argLen: 1, + generic: true, + }, + { + name: "DiffWithTruncWithPrecisionFloat32x4", + auxType: auxInt8, + argLen: 1, + generic: true, + }, + { + name: "FloorSuppressExceptionWithPrecisionFloat32x4", + auxType: auxInt8, + argLen: 1, + generic: true, + }, + { + name: "FloorWithPrecisionFloat32x4", + auxType: auxInt8, + argLen: 1, + generic: true, + }, + { + name: "MaskedCeilSuppressExceptionWithPrecisionFloat32x4", + auxType: auxInt8, + argLen: 2, + generic: true, + }, + { + name: "MaskedCeilWithPrecisionFloat32x4", + auxType: auxInt8, + argLen: 2, + generic: true, + }, + { + name: "MaskedDiffWithCeilSuppressExceptionWithPrecisionFloat32x4", + auxType: auxInt8, + argLen: 2, + generic: true, + }, + { + name: "MaskedDiffWithCeilWithPrecisionFloat32x4", + auxType: auxInt8, + argLen: 2, + generic: true, + }, + { + name: "MaskedDiffWithFloorSuppressExceptionWithPrecisionFloat32x4", + auxType: auxInt8, + argLen: 2, + generic: true, + }, + { + name: "MaskedDiffWithFloorWithPrecisionFloat32x4", + auxType: auxInt8, + argLen: 2, + generic: true, + }, + { + name: "MaskedDiffWithRoundSuppressExceptionWithPrecisionFloat32x4", + auxType: auxInt8, + argLen: 2, + generic: true, + }, + { + name: "MaskedDiffWithRoundWithPrecisionFloat32x4", + auxType: auxInt8, + argLen: 2, + generic: true, + }, + { + name: "MaskedDiffWithTruncSuppressExceptionWithPrecisionFloat32x4", + auxType: auxInt8, + argLen: 2, + generic: true, + }, + { + name: "MaskedDiffWithTruncWithPrecisionFloat32x4", + auxType: auxInt8, + argLen: 2, + generic: true, + }, + { + name: "MaskedFloorSuppressExceptionWithPrecisionFloat32x4", + auxType: auxInt8, + argLen: 2, + generic: true, + }, + { + name: "MaskedFloorWithPrecisionFloat32x4", + auxType: auxInt8, + argLen: 2, + generic: true, + }, + { + name: "MaskedRoundSuppressExceptionWithPrecisionFloat32x4", + auxType: auxInt8, + argLen: 2, + generic: true, + }, + { + name: "MaskedRoundWithPrecisionFloat32x4", + auxType: auxInt8, + argLen: 2, + generic: true, + }, + { + name: "MaskedTruncSuppressExceptionWithPrecisionFloat32x4", + auxType: auxInt8, + argLen: 2, + generic: true, + }, + { + name: "MaskedTruncWithPrecisionFloat32x4", + auxType: auxInt8, + argLen: 2, + generic: true, + }, + { + name: "RoundSuppressExceptionWithPrecisionFloat32x4", + auxType: auxInt8, + argLen: 1, + generic: true, + }, + { + name: "RoundWithPrecisionFloat32x4", + auxType: auxInt8, + argLen: 1, + generic: true, + }, + { + name: "TruncSuppressExceptionWithPrecisionFloat32x4", + auxType: auxInt8, + argLen: 1, + generic: true, + }, + { + name: "TruncWithPrecisionFloat32x4", + auxType: auxInt8, + argLen: 1, + generic: true, + }, + { + name: "CeilSuppressExceptionWithPrecisionFloat32x8", + auxType: auxInt8, + argLen: 1, + generic: true, + }, + { + name: "CeilWithPrecisionFloat32x8", + auxType: auxInt8, + argLen: 1, + generic: true, + }, + { + name: "DiffWithCeilSuppressExceptionWithPrecisionFloat32x8", + auxType: auxInt8, + argLen: 1, + generic: true, + }, + { + name: "DiffWithCeilWithPrecisionFloat32x8", + auxType: auxInt8, + argLen: 1, + generic: true, + }, + { + name: "DiffWithFloorSuppressExceptionWithPrecisionFloat32x8", + auxType: auxInt8, + argLen: 1, + generic: true, + }, + { + name: "DiffWithFloorWithPrecisionFloat32x8", + auxType: auxInt8, + argLen: 1, + generic: true, + }, + { + name: "DiffWithRoundSuppressExceptionWithPrecisionFloat32x8", + auxType: auxInt8, + argLen: 1, + generic: true, + }, + { + name: "DiffWithRoundWithPrecisionFloat32x8", + auxType: auxInt8, + argLen: 1, + generic: true, + }, + { + name: "DiffWithTruncSuppressExceptionWithPrecisionFloat32x8", + auxType: auxInt8, + argLen: 1, + generic: true, + }, + { + name: "DiffWithTruncWithPrecisionFloat32x8", + auxType: auxInt8, + argLen: 1, + generic: true, + }, + { + name: "FloorSuppressExceptionWithPrecisionFloat32x8", + auxType: auxInt8, + argLen: 1, + generic: true, + }, + { + name: "FloorWithPrecisionFloat32x8", + auxType: auxInt8, + argLen: 1, + generic: true, + }, + { + name: "MaskedCeilSuppressExceptionWithPrecisionFloat32x8", + auxType: auxInt8, + argLen: 2, + generic: true, + }, + { + name: "MaskedCeilWithPrecisionFloat32x8", + auxType: auxInt8, + argLen: 2, + generic: true, + }, + { + name: "MaskedDiffWithCeilSuppressExceptionWithPrecisionFloat32x8", + auxType: auxInt8, + argLen: 2, + generic: true, + }, + { + name: "MaskedDiffWithCeilWithPrecisionFloat32x8", + auxType: auxInt8, + argLen: 2, + generic: true, + }, + { + name: "MaskedDiffWithFloorSuppressExceptionWithPrecisionFloat32x8", + auxType: auxInt8, + argLen: 2, + generic: true, + }, + { + name: "MaskedDiffWithFloorWithPrecisionFloat32x8", + auxType: auxInt8, + argLen: 2, + generic: true, + }, + { + name: "MaskedDiffWithRoundSuppressExceptionWithPrecisionFloat32x8", + auxType: auxInt8, + argLen: 2, + generic: true, + }, + { + name: "MaskedDiffWithRoundWithPrecisionFloat32x8", + auxType: auxInt8, + argLen: 2, + generic: true, + }, + { + name: "MaskedDiffWithTruncSuppressExceptionWithPrecisionFloat32x8", + auxType: auxInt8, + argLen: 2, + generic: true, + }, + { + name: "MaskedDiffWithTruncWithPrecisionFloat32x8", + auxType: auxInt8, + argLen: 2, + generic: true, + }, + { + name: "MaskedFloorSuppressExceptionWithPrecisionFloat32x8", + auxType: auxInt8, + argLen: 2, + generic: true, + }, + { + name: "MaskedFloorWithPrecisionFloat32x8", + auxType: auxInt8, + argLen: 2, + generic: true, + }, + { + name: "MaskedRoundSuppressExceptionWithPrecisionFloat32x8", + auxType: auxInt8, + argLen: 2, + generic: true, + }, + { + name: "MaskedRoundWithPrecisionFloat32x8", + auxType: auxInt8, + argLen: 2, + generic: true, + }, + { + name: "MaskedTruncSuppressExceptionWithPrecisionFloat32x8", + auxType: auxInt8, + argLen: 2, + generic: true, + }, + { + name: "MaskedTruncWithPrecisionFloat32x8", + auxType: auxInt8, + argLen: 2, + generic: true, + }, + { + name: "RoundSuppressExceptionWithPrecisionFloat32x8", + auxType: auxInt8, + argLen: 1, + generic: true, + }, + { + name: "RoundWithPrecisionFloat32x8", + auxType: auxInt8, + argLen: 1, + generic: true, + }, + { + name: "TruncSuppressExceptionWithPrecisionFloat32x8", + auxType: auxInt8, + argLen: 1, + generic: true, + }, + { + name: "TruncWithPrecisionFloat32x8", + auxType: auxInt8, + argLen: 1, + generic: true, + }, + { + name: "CeilSuppressExceptionWithPrecisionFloat64x2", + auxType: auxInt8, + argLen: 1, + generic: true, + }, + { + name: "CeilWithPrecisionFloat64x2", + auxType: auxInt8, + argLen: 1, + generic: true, + }, + { + name: "DiffWithCeilSuppressExceptionWithPrecisionFloat64x2", + auxType: auxInt8, + argLen: 1, + generic: true, + }, + { + name: "DiffWithCeilWithPrecisionFloat64x2", + auxType: auxInt8, + argLen: 1, + generic: true, + }, + { + name: "DiffWithFloorSuppressExceptionWithPrecisionFloat64x2", + auxType: auxInt8, + argLen: 1, + generic: true, + }, + { + name: "DiffWithFloorWithPrecisionFloat64x2", + auxType: auxInt8, + argLen: 1, + generic: true, + }, + { + name: "DiffWithRoundSuppressExceptionWithPrecisionFloat64x2", + auxType: auxInt8, + argLen: 1, + generic: true, + }, + { + name: "DiffWithRoundWithPrecisionFloat64x2", + auxType: auxInt8, + argLen: 1, + generic: true, + }, + { + name: "DiffWithTruncSuppressExceptionWithPrecisionFloat64x2", + auxType: auxInt8, + argLen: 1, + generic: true, + }, + { + name: "DiffWithTruncWithPrecisionFloat64x2", + auxType: auxInt8, + argLen: 1, + generic: true, + }, + { + name: "FloorSuppressExceptionWithPrecisionFloat64x2", + auxType: auxInt8, + argLen: 1, + generic: true, + }, + { + name: "FloorWithPrecisionFloat64x2", + auxType: auxInt8, + argLen: 1, + generic: true, + }, + { + name: "MaskedCeilSuppressExceptionWithPrecisionFloat64x2", + auxType: auxInt8, + argLen: 2, + generic: true, + }, + { + name: "MaskedCeilWithPrecisionFloat64x2", + auxType: auxInt8, + argLen: 2, + generic: true, + }, + { + name: "MaskedDiffWithCeilSuppressExceptionWithPrecisionFloat64x2", + auxType: auxInt8, + argLen: 2, + generic: true, + }, + { + name: "MaskedDiffWithCeilWithPrecisionFloat64x2", + auxType: auxInt8, + argLen: 2, + generic: true, + }, + { + name: "MaskedDiffWithFloorSuppressExceptionWithPrecisionFloat64x2", + auxType: auxInt8, + argLen: 2, + generic: true, + }, + { + name: "MaskedDiffWithFloorWithPrecisionFloat64x2", + auxType: auxInt8, + argLen: 2, + generic: true, + }, + { + name: "MaskedDiffWithRoundSuppressExceptionWithPrecisionFloat64x2", + auxType: auxInt8, + argLen: 2, + generic: true, + }, + { + name: "MaskedDiffWithRoundWithPrecisionFloat64x2", + auxType: auxInt8, + argLen: 2, + generic: true, + }, + { + name: "MaskedDiffWithTruncSuppressExceptionWithPrecisionFloat64x2", + auxType: auxInt8, + argLen: 2, + generic: true, + }, + { + name: "MaskedDiffWithTruncWithPrecisionFloat64x2", + auxType: auxInt8, + argLen: 2, + generic: true, + }, + { + name: "MaskedFloorSuppressExceptionWithPrecisionFloat64x2", + auxType: auxInt8, + argLen: 2, + generic: true, + }, + { + name: "MaskedFloorWithPrecisionFloat64x2", + auxType: auxInt8, + argLen: 2, + generic: true, + }, + { + name: "MaskedRoundSuppressExceptionWithPrecisionFloat64x2", + auxType: auxInt8, + argLen: 2, + generic: true, + }, + { + name: "MaskedRoundWithPrecisionFloat64x2", + auxType: auxInt8, + argLen: 2, + generic: true, + }, + { + name: "MaskedTruncSuppressExceptionWithPrecisionFloat64x2", + auxType: auxInt8, + argLen: 2, + generic: true, + }, + { + name: "MaskedTruncWithPrecisionFloat64x2", + auxType: auxInt8, + argLen: 2, + generic: true, + }, + { + name: "RoundSuppressExceptionWithPrecisionFloat64x2", + auxType: auxInt8, + argLen: 1, + generic: true, + }, + { + name: "RoundWithPrecisionFloat64x2", + auxType: auxInt8, + argLen: 1, + generic: true, + }, + { + name: "TruncSuppressExceptionWithPrecisionFloat64x2", + auxType: auxInt8, + argLen: 1, + generic: true, + }, + { + name: "TruncWithPrecisionFloat64x2", + auxType: auxInt8, + argLen: 1, + generic: true, + }, + { + name: "CeilSuppressExceptionWithPrecisionFloat64x4", + auxType: auxInt8, + argLen: 1, + generic: true, + }, + { + name: "CeilWithPrecisionFloat64x4", + auxType: auxInt8, + argLen: 1, + generic: true, + }, + { + name: "DiffWithCeilSuppressExceptionWithPrecisionFloat64x4", + auxType: auxInt8, + argLen: 1, + generic: true, + }, + { + name: "DiffWithCeilWithPrecisionFloat64x4", + auxType: auxInt8, + argLen: 1, + generic: true, + }, + { + name: "DiffWithFloorSuppressExceptionWithPrecisionFloat64x4", + auxType: auxInt8, + argLen: 1, + generic: true, + }, + { + name: "DiffWithFloorWithPrecisionFloat64x4", + auxType: auxInt8, + argLen: 1, + generic: true, + }, + { + name: "DiffWithRoundSuppressExceptionWithPrecisionFloat64x4", + auxType: auxInt8, + argLen: 1, + generic: true, + }, + { + name: "DiffWithRoundWithPrecisionFloat64x4", + auxType: auxInt8, + argLen: 1, + generic: true, + }, + { + name: "DiffWithTruncSuppressExceptionWithPrecisionFloat64x4", + auxType: auxInt8, + argLen: 1, + generic: true, + }, + { + name: "DiffWithTruncWithPrecisionFloat64x4", + auxType: auxInt8, + argLen: 1, + generic: true, + }, + { + name: "FloorSuppressExceptionWithPrecisionFloat64x4", + auxType: auxInt8, + argLen: 1, + generic: true, + }, + { + name: "FloorWithPrecisionFloat64x4", + auxType: auxInt8, + argLen: 1, + generic: true, + }, + { + name: "MaskedCeilSuppressExceptionWithPrecisionFloat64x4", + auxType: auxInt8, + argLen: 2, + generic: true, + }, + { + name: "MaskedCeilWithPrecisionFloat64x4", + auxType: auxInt8, + argLen: 2, + generic: true, + }, + { + name: "MaskedDiffWithCeilSuppressExceptionWithPrecisionFloat64x4", + auxType: auxInt8, + argLen: 2, + generic: true, + }, + { + name: "MaskedDiffWithCeilWithPrecisionFloat64x4", + auxType: auxInt8, + argLen: 2, + generic: true, + }, + { + name: "MaskedDiffWithFloorSuppressExceptionWithPrecisionFloat64x4", + auxType: auxInt8, + argLen: 2, + generic: true, + }, + { + name: "MaskedDiffWithFloorWithPrecisionFloat64x4", + auxType: auxInt8, + argLen: 2, + generic: true, + }, + { + name: "MaskedDiffWithRoundSuppressExceptionWithPrecisionFloat64x4", + auxType: auxInt8, + argLen: 2, + generic: true, + }, + { + name: "MaskedDiffWithRoundWithPrecisionFloat64x4", + auxType: auxInt8, + argLen: 2, + generic: true, + }, + { + name: "MaskedDiffWithTruncSuppressExceptionWithPrecisionFloat64x4", + auxType: auxInt8, + argLen: 2, + generic: true, + }, + { + name: "MaskedDiffWithTruncWithPrecisionFloat64x4", + auxType: auxInt8, + argLen: 2, + generic: true, + }, + { + name: "MaskedFloorSuppressExceptionWithPrecisionFloat64x4", + auxType: auxInt8, + argLen: 2, + generic: true, + }, + { + name: "MaskedFloorWithPrecisionFloat64x4", + auxType: auxInt8, + argLen: 2, + generic: true, + }, + { + name: "MaskedRoundSuppressExceptionWithPrecisionFloat64x4", + auxType: auxInt8, + argLen: 2, + generic: true, + }, + { + name: "MaskedRoundWithPrecisionFloat64x4", + auxType: auxInt8, + argLen: 2, + generic: true, + }, + { + name: "MaskedTruncSuppressExceptionWithPrecisionFloat64x4", + auxType: auxInt8, + argLen: 2, + generic: true, + }, + { + name: "MaskedTruncWithPrecisionFloat64x4", + auxType: auxInt8, + argLen: 2, + generic: true, + }, + { + name: "RoundSuppressExceptionWithPrecisionFloat64x4", + auxType: auxInt8, + argLen: 1, + generic: true, + }, + { + name: "RoundWithPrecisionFloat64x4", + auxType: auxInt8, + argLen: 1, + generic: true, + }, + { + name: "TruncSuppressExceptionWithPrecisionFloat64x4", + auxType: auxInt8, + argLen: 1, + generic: true, + }, + { + name: "TruncWithPrecisionFloat64x4", + auxType: auxInt8, + argLen: 1, + generic: true, + }, + { + name: "CeilSuppressExceptionWithPrecisionFloat64x8", + auxType: auxInt8, + argLen: 1, + generic: true, + }, + { + name: "CeilWithPrecisionFloat64x8", + auxType: auxInt8, + argLen: 1, + generic: true, + }, + { + name: "DiffWithCeilSuppressExceptionWithPrecisionFloat64x8", + auxType: auxInt8, + argLen: 1, + generic: true, + }, + { + name: "DiffWithCeilWithPrecisionFloat64x8", + auxType: auxInt8, + argLen: 1, + generic: true, + }, + { + name: "DiffWithFloorSuppressExceptionWithPrecisionFloat64x8", + auxType: auxInt8, + argLen: 1, + generic: true, + }, + { + name: "DiffWithFloorWithPrecisionFloat64x8", + auxType: auxInt8, + argLen: 1, + generic: true, + }, + { + name: "DiffWithRoundSuppressExceptionWithPrecisionFloat64x8", + auxType: auxInt8, + argLen: 1, + generic: true, + }, + { + name: "DiffWithRoundWithPrecisionFloat64x8", + auxType: auxInt8, + argLen: 1, + generic: true, + }, + { + name: "DiffWithTruncSuppressExceptionWithPrecisionFloat64x8", + auxType: auxInt8, + argLen: 1, + generic: true, + }, + { + name: "DiffWithTruncWithPrecisionFloat64x8", + auxType: auxInt8, + argLen: 1, + generic: true, + }, + { + name: "FloorSuppressExceptionWithPrecisionFloat64x8", + auxType: auxInt8, + argLen: 1, + generic: true, + }, + { + name: "FloorWithPrecisionFloat64x8", + auxType: auxInt8, + argLen: 1, + generic: true, + }, + { + name: "MaskedCeilSuppressExceptionWithPrecisionFloat64x8", + auxType: auxInt8, + argLen: 2, + generic: true, + }, + { + name: "MaskedCeilWithPrecisionFloat64x8", + auxType: auxInt8, + argLen: 2, + generic: true, + }, + { + name: "MaskedDiffWithCeilSuppressExceptionWithPrecisionFloat64x8", + auxType: auxInt8, + argLen: 2, + generic: true, + }, + { + name: "MaskedDiffWithCeilWithPrecisionFloat64x8", + auxType: auxInt8, + argLen: 2, + generic: true, + }, + { + name: "MaskedDiffWithFloorSuppressExceptionWithPrecisionFloat64x8", + auxType: auxInt8, + argLen: 2, + generic: true, + }, + { + name: "MaskedDiffWithFloorWithPrecisionFloat64x8", + auxType: auxInt8, + argLen: 2, + generic: true, + }, + { + name: "MaskedDiffWithRoundSuppressExceptionWithPrecisionFloat64x8", + auxType: auxInt8, + argLen: 2, + generic: true, + }, + { + name: "MaskedDiffWithRoundWithPrecisionFloat64x8", + auxType: auxInt8, + argLen: 2, + generic: true, + }, + { + name: "MaskedDiffWithTruncSuppressExceptionWithPrecisionFloat64x8", + auxType: auxInt8, + argLen: 2, + generic: true, + }, + { + name: "MaskedDiffWithTruncWithPrecisionFloat64x8", + auxType: auxInt8, + argLen: 2, + generic: true, + }, + { + name: "MaskedFloorSuppressExceptionWithPrecisionFloat64x8", + auxType: auxInt8, + argLen: 2, + generic: true, + }, + { + name: "MaskedFloorWithPrecisionFloat64x8", + auxType: auxInt8, + argLen: 2, + generic: true, + }, + { + name: "MaskedRoundSuppressExceptionWithPrecisionFloat64x8", + auxType: auxInt8, + argLen: 2, + generic: true, + }, + { + name: "MaskedRoundWithPrecisionFloat64x8", + auxType: auxInt8, + argLen: 2, + generic: true, + }, + { + name: "MaskedTruncSuppressExceptionWithPrecisionFloat64x8", + auxType: auxInt8, + argLen: 2, + generic: true, + }, + { + name: "MaskedTruncWithPrecisionFloat64x8", + auxType: auxInt8, + argLen: 2, + generic: true, + }, + { + name: "RoundSuppressExceptionWithPrecisionFloat64x8", + auxType: auxInt8, + argLen: 1, + generic: true, + }, + { + name: "RoundWithPrecisionFloat64x8", + auxType: auxInt8, + argLen: 1, + generic: true, + }, + { + name: "TruncSuppressExceptionWithPrecisionFloat64x8", + auxType: auxInt8, + argLen: 1, + generic: true, + }, + { + name: "TruncWithPrecisionFloat64x8", + auxType: auxInt8, + argLen: 1, + generic: true, + }, } func (o Op) Asm() obj.As { return opcodeTable[o].asm } diff --git a/src/cmd/compile/internal/ssa/rewriteAMD64.go b/src/cmd/compile/internal/ssa/rewriteAMD64.go index 86fbc988cf..a6cf0a0b7b 100644 --- a/src/cmd/compile/internal/ssa/rewriteAMD64.go +++ b/src/cmd/compile/internal/ssa/rewriteAMD64.go @@ -664,6 +664,18 @@ func rewriteValueAMD64(v *Value) bool { case OpAddPtr: v.Op = OpAMD64ADDQ return true + case OpAddSubFloat32x4: + v.Op = OpAMD64VADDSUBPS128 + return true + case OpAddSubFloat32x8: + v.Op = OpAMD64VADDSUBPS256 + return true + case OpAddSubFloat64x2: + v.Op = OpAMD64VADDSUBPD128 + return true + case OpAddSubFloat64x4: + v.Op = OpAMD64VADDSUBPD256 + return true case OpAddUint16x16: v.Op = OpAMD64VPADDW256 return true @@ -994,6 +1006,38 @@ func rewriteValueAMD64(v *Value) bool { return true case OpCeil: return rewriteValueAMD64_OpCeil(v) + case OpCeilFloat32x4: + return rewriteValueAMD64_OpCeilFloat32x4(v) + case OpCeilFloat32x8: + return rewriteValueAMD64_OpCeilFloat32x8(v) + case OpCeilFloat64x2: + return rewriteValueAMD64_OpCeilFloat64x2(v) + case OpCeilFloat64x4: + return rewriteValueAMD64_OpCeilFloat64x4(v) + case OpCeilSuppressExceptionWithPrecisionFloat32x16: + return rewriteValueAMD64_OpCeilSuppressExceptionWithPrecisionFloat32x16(v) + case OpCeilSuppressExceptionWithPrecisionFloat32x4: + return rewriteValueAMD64_OpCeilSuppressExceptionWithPrecisionFloat32x4(v) + case OpCeilSuppressExceptionWithPrecisionFloat32x8: + return rewriteValueAMD64_OpCeilSuppressExceptionWithPrecisionFloat32x8(v) + case OpCeilSuppressExceptionWithPrecisionFloat64x2: + return rewriteValueAMD64_OpCeilSuppressExceptionWithPrecisionFloat64x2(v) + case OpCeilSuppressExceptionWithPrecisionFloat64x4: + return rewriteValueAMD64_OpCeilSuppressExceptionWithPrecisionFloat64x4(v) + case OpCeilSuppressExceptionWithPrecisionFloat64x8: + return rewriteValueAMD64_OpCeilSuppressExceptionWithPrecisionFloat64x8(v) + case OpCeilWithPrecisionFloat32x16: + return rewriteValueAMD64_OpCeilWithPrecisionFloat32x16(v) + case OpCeilWithPrecisionFloat32x4: + return rewriteValueAMD64_OpCeilWithPrecisionFloat32x4(v) + case OpCeilWithPrecisionFloat32x8: + return rewriteValueAMD64_OpCeilWithPrecisionFloat32x8(v) + case OpCeilWithPrecisionFloat64x2: + return rewriteValueAMD64_OpCeilWithPrecisionFloat64x2(v) + case OpCeilWithPrecisionFloat64x4: + return rewriteValueAMD64_OpCeilWithPrecisionFloat64x4(v) + case OpCeilWithPrecisionFloat64x8: + return rewriteValueAMD64_OpCeilWithPrecisionFloat64x8(v) case OpClosureCall: v.Op = OpAMD64CALLclosure return true @@ -1080,6 +1124,102 @@ func rewriteValueAMD64(v *Value) bool { case OpCvtBoolToUint8: v.Op = OpCopy return true + case OpDiffWithCeilSuppressExceptionWithPrecisionFloat32x16: + return rewriteValueAMD64_OpDiffWithCeilSuppressExceptionWithPrecisionFloat32x16(v) + case OpDiffWithCeilSuppressExceptionWithPrecisionFloat32x4: + return rewriteValueAMD64_OpDiffWithCeilSuppressExceptionWithPrecisionFloat32x4(v) + case OpDiffWithCeilSuppressExceptionWithPrecisionFloat32x8: + return rewriteValueAMD64_OpDiffWithCeilSuppressExceptionWithPrecisionFloat32x8(v) + case OpDiffWithCeilSuppressExceptionWithPrecisionFloat64x2: + return rewriteValueAMD64_OpDiffWithCeilSuppressExceptionWithPrecisionFloat64x2(v) + case OpDiffWithCeilSuppressExceptionWithPrecisionFloat64x4: + return rewriteValueAMD64_OpDiffWithCeilSuppressExceptionWithPrecisionFloat64x4(v) + case OpDiffWithCeilSuppressExceptionWithPrecisionFloat64x8: + return rewriteValueAMD64_OpDiffWithCeilSuppressExceptionWithPrecisionFloat64x8(v) + case OpDiffWithCeilWithPrecisionFloat32x16: + return rewriteValueAMD64_OpDiffWithCeilWithPrecisionFloat32x16(v) + case OpDiffWithCeilWithPrecisionFloat32x4: + return rewriteValueAMD64_OpDiffWithCeilWithPrecisionFloat32x4(v) + case OpDiffWithCeilWithPrecisionFloat32x8: + return rewriteValueAMD64_OpDiffWithCeilWithPrecisionFloat32x8(v) + case OpDiffWithCeilWithPrecisionFloat64x2: + return rewriteValueAMD64_OpDiffWithCeilWithPrecisionFloat64x2(v) + case OpDiffWithCeilWithPrecisionFloat64x4: + return rewriteValueAMD64_OpDiffWithCeilWithPrecisionFloat64x4(v) + case OpDiffWithCeilWithPrecisionFloat64x8: + return rewriteValueAMD64_OpDiffWithCeilWithPrecisionFloat64x8(v) + case OpDiffWithFloorSuppressExceptionWithPrecisionFloat32x16: + return rewriteValueAMD64_OpDiffWithFloorSuppressExceptionWithPrecisionFloat32x16(v) + case OpDiffWithFloorSuppressExceptionWithPrecisionFloat32x4: + return rewriteValueAMD64_OpDiffWithFloorSuppressExceptionWithPrecisionFloat32x4(v) + case OpDiffWithFloorSuppressExceptionWithPrecisionFloat32x8: + return rewriteValueAMD64_OpDiffWithFloorSuppressExceptionWithPrecisionFloat32x8(v) + case OpDiffWithFloorSuppressExceptionWithPrecisionFloat64x2: + return rewriteValueAMD64_OpDiffWithFloorSuppressExceptionWithPrecisionFloat64x2(v) + case OpDiffWithFloorSuppressExceptionWithPrecisionFloat64x4: + return rewriteValueAMD64_OpDiffWithFloorSuppressExceptionWithPrecisionFloat64x4(v) + case OpDiffWithFloorSuppressExceptionWithPrecisionFloat64x8: + return rewriteValueAMD64_OpDiffWithFloorSuppressExceptionWithPrecisionFloat64x8(v) + case OpDiffWithFloorWithPrecisionFloat32x16: + return rewriteValueAMD64_OpDiffWithFloorWithPrecisionFloat32x16(v) + case OpDiffWithFloorWithPrecisionFloat32x4: + return rewriteValueAMD64_OpDiffWithFloorWithPrecisionFloat32x4(v) + case OpDiffWithFloorWithPrecisionFloat32x8: + return rewriteValueAMD64_OpDiffWithFloorWithPrecisionFloat32x8(v) + case OpDiffWithFloorWithPrecisionFloat64x2: + return rewriteValueAMD64_OpDiffWithFloorWithPrecisionFloat64x2(v) + case OpDiffWithFloorWithPrecisionFloat64x4: + return rewriteValueAMD64_OpDiffWithFloorWithPrecisionFloat64x4(v) + case OpDiffWithFloorWithPrecisionFloat64x8: + return rewriteValueAMD64_OpDiffWithFloorWithPrecisionFloat64x8(v) + case OpDiffWithRoundSuppressExceptionWithPrecisionFloat32x16: + return rewriteValueAMD64_OpDiffWithRoundSuppressExceptionWithPrecisionFloat32x16(v) + case OpDiffWithRoundSuppressExceptionWithPrecisionFloat32x4: + return rewriteValueAMD64_OpDiffWithRoundSuppressExceptionWithPrecisionFloat32x4(v) + case OpDiffWithRoundSuppressExceptionWithPrecisionFloat32x8: + return rewriteValueAMD64_OpDiffWithRoundSuppressExceptionWithPrecisionFloat32x8(v) + case OpDiffWithRoundSuppressExceptionWithPrecisionFloat64x2: + return rewriteValueAMD64_OpDiffWithRoundSuppressExceptionWithPrecisionFloat64x2(v) + case OpDiffWithRoundSuppressExceptionWithPrecisionFloat64x4: + return rewriteValueAMD64_OpDiffWithRoundSuppressExceptionWithPrecisionFloat64x4(v) + case OpDiffWithRoundSuppressExceptionWithPrecisionFloat64x8: + return rewriteValueAMD64_OpDiffWithRoundSuppressExceptionWithPrecisionFloat64x8(v) + case OpDiffWithRoundWithPrecisionFloat32x16: + return rewriteValueAMD64_OpDiffWithRoundWithPrecisionFloat32x16(v) + case OpDiffWithRoundWithPrecisionFloat32x4: + return rewriteValueAMD64_OpDiffWithRoundWithPrecisionFloat32x4(v) + case OpDiffWithRoundWithPrecisionFloat32x8: + return rewriteValueAMD64_OpDiffWithRoundWithPrecisionFloat32x8(v) + case OpDiffWithRoundWithPrecisionFloat64x2: + return rewriteValueAMD64_OpDiffWithRoundWithPrecisionFloat64x2(v) + case OpDiffWithRoundWithPrecisionFloat64x4: + return rewriteValueAMD64_OpDiffWithRoundWithPrecisionFloat64x4(v) + case OpDiffWithRoundWithPrecisionFloat64x8: + return rewriteValueAMD64_OpDiffWithRoundWithPrecisionFloat64x8(v) + case OpDiffWithTruncSuppressExceptionWithPrecisionFloat32x16: + return rewriteValueAMD64_OpDiffWithTruncSuppressExceptionWithPrecisionFloat32x16(v) + case OpDiffWithTruncSuppressExceptionWithPrecisionFloat32x4: + return rewriteValueAMD64_OpDiffWithTruncSuppressExceptionWithPrecisionFloat32x4(v) + case OpDiffWithTruncSuppressExceptionWithPrecisionFloat32x8: + return rewriteValueAMD64_OpDiffWithTruncSuppressExceptionWithPrecisionFloat32x8(v) + case OpDiffWithTruncSuppressExceptionWithPrecisionFloat64x2: + return rewriteValueAMD64_OpDiffWithTruncSuppressExceptionWithPrecisionFloat64x2(v) + case OpDiffWithTruncSuppressExceptionWithPrecisionFloat64x4: + return rewriteValueAMD64_OpDiffWithTruncSuppressExceptionWithPrecisionFloat64x4(v) + case OpDiffWithTruncSuppressExceptionWithPrecisionFloat64x8: + return rewriteValueAMD64_OpDiffWithTruncSuppressExceptionWithPrecisionFloat64x8(v) + case OpDiffWithTruncWithPrecisionFloat32x16: + return rewriteValueAMD64_OpDiffWithTruncWithPrecisionFloat32x16(v) + case OpDiffWithTruncWithPrecisionFloat32x4: + return rewriteValueAMD64_OpDiffWithTruncWithPrecisionFloat32x4(v) + case OpDiffWithTruncWithPrecisionFloat32x8: + return rewriteValueAMD64_OpDiffWithTruncWithPrecisionFloat32x8(v) + case OpDiffWithTruncWithPrecisionFloat64x2: + return rewriteValueAMD64_OpDiffWithTruncWithPrecisionFloat64x2(v) + case OpDiffWithTruncWithPrecisionFloat64x4: + return rewriteValueAMD64_OpDiffWithTruncWithPrecisionFloat64x4(v) + case OpDiffWithTruncWithPrecisionFloat64x8: + return rewriteValueAMD64_OpDiffWithTruncWithPrecisionFloat64x8(v) case OpDiv128u: v.Op = OpAMD64DIVQU2 return true @@ -1211,6 +1351,38 @@ func rewriteValueAMD64(v *Value) bool { return rewriteValueAMD64_OpFMA(v) case OpFloor: return rewriteValueAMD64_OpFloor(v) + case OpFloorFloat32x4: + return rewriteValueAMD64_OpFloorFloat32x4(v) + case OpFloorFloat32x8: + return rewriteValueAMD64_OpFloorFloat32x8(v) + case OpFloorFloat64x2: + return rewriteValueAMD64_OpFloorFloat64x2(v) + case OpFloorFloat64x4: + return rewriteValueAMD64_OpFloorFloat64x4(v) + case OpFloorSuppressExceptionWithPrecisionFloat32x16: + return rewriteValueAMD64_OpFloorSuppressExceptionWithPrecisionFloat32x16(v) + case OpFloorSuppressExceptionWithPrecisionFloat32x4: + return rewriteValueAMD64_OpFloorSuppressExceptionWithPrecisionFloat32x4(v) + case OpFloorSuppressExceptionWithPrecisionFloat32x8: + return rewriteValueAMD64_OpFloorSuppressExceptionWithPrecisionFloat32x8(v) + case OpFloorSuppressExceptionWithPrecisionFloat64x2: + return rewriteValueAMD64_OpFloorSuppressExceptionWithPrecisionFloat64x2(v) + case OpFloorSuppressExceptionWithPrecisionFloat64x4: + return rewriteValueAMD64_OpFloorSuppressExceptionWithPrecisionFloat64x4(v) + case OpFloorSuppressExceptionWithPrecisionFloat64x8: + return rewriteValueAMD64_OpFloorSuppressExceptionWithPrecisionFloat64x8(v) + case OpFloorWithPrecisionFloat32x16: + return rewriteValueAMD64_OpFloorWithPrecisionFloat32x16(v) + case OpFloorWithPrecisionFloat32x4: + return rewriteValueAMD64_OpFloorWithPrecisionFloat32x4(v) + case OpFloorWithPrecisionFloat32x8: + return rewriteValueAMD64_OpFloorWithPrecisionFloat32x8(v) + case OpFloorWithPrecisionFloat64x2: + return rewriteValueAMD64_OpFloorWithPrecisionFloat64x2(v) + case OpFloorWithPrecisionFloat64x4: + return rewriteValueAMD64_OpFloorWithPrecisionFloat64x4(v) + case OpFloorWithPrecisionFloat64x8: + return rewriteValueAMD64_OpFloorWithPrecisionFloat64x8(v) case OpGetCallerPC: v.Op = OpAMD64LoweredGetCallerPC return true @@ -1772,6 +1944,126 @@ func rewriteValueAMD64(v *Value) bool { return rewriteValueAMD64_OpMaskedAverageUint8x32(v) case OpMaskedAverageUint8x64: return rewriteValueAMD64_OpMaskedAverageUint8x64(v) + case OpMaskedCeilSuppressExceptionWithPrecisionFloat32x16: + return rewriteValueAMD64_OpMaskedCeilSuppressExceptionWithPrecisionFloat32x16(v) + case OpMaskedCeilSuppressExceptionWithPrecisionFloat32x4: + return rewriteValueAMD64_OpMaskedCeilSuppressExceptionWithPrecisionFloat32x4(v) + case OpMaskedCeilSuppressExceptionWithPrecisionFloat32x8: + return rewriteValueAMD64_OpMaskedCeilSuppressExceptionWithPrecisionFloat32x8(v) + case OpMaskedCeilSuppressExceptionWithPrecisionFloat64x2: + return rewriteValueAMD64_OpMaskedCeilSuppressExceptionWithPrecisionFloat64x2(v) + case OpMaskedCeilSuppressExceptionWithPrecisionFloat64x4: + return rewriteValueAMD64_OpMaskedCeilSuppressExceptionWithPrecisionFloat64x4(v) + case OpMaskedCeilSuppressExceptionWithPrecisionFloat64x8: + return rewriteValueAMD64_OpMaskedCeilSuppressExceptionWithPrecisionFloat64x8(v) + case OpMaskedCeilWithPrecisionFloat32x16: + return rewriteValueAMD64_OpMaskedCeilWithPrecisionFloat32x16(v) + case OpMaskedCeilWithPrecisionFloat32x4: + return rewriteValueAMD64_OpMaskedCeilWithPrecisionFloat32x4(v) + case OpMaskedCeilWithPrecisionFloat32x8: + return rewriteValueAMD64_OpMaskedCeilWithPrecisionFloat32x8(v) + case OpMaskedCeilWithPrecisionFloat64x2: + return rewriteValueAMD64_OpMaskedCeilWithPrecisionFloat64x2(v) + case OpMaskedCeilWithPrecisionFloat64x4: + return rewriteValueAMD64_OpMaskedCeilWithPrecisionFloat64x4(v) + case OpMaskedCeilWithPrecisionFloat64x8: + return rewriteValueAMD64_OpMaskedCeilWithPrecisionFloat64x8(v) + case OpMaskedDiffWithCeilSuppressExceptionWithPrecisionFloat32x16: + return rewriteValueAMD64_OpMaskedDiffWithCeilSuppressExceptionWithPrecisionFloat32x16(v) + case OpMaskedDiffWithCeilSuppressExceptionWithPrecisionFloat32x4: + return rewriteValueAMD64_OpMaskedDiffWithCeilSuppressExceptionWithPrecisionFloat32x4(v) + case OpMaskedDiffWithCeilSuppressExceptionWithPrecisionFloat32x8: + return rewriteValueAMD64_OpMaskedDiffWithCeilSuppressExceptionWithPrecisionFloat32x8(v) + case OpMaskedDiffWithCeilSuppressExceptionWithPrecisionFloat64x2: + return rewriteValueAMD64_OpMaskedDiffWithCeilSuppressExceptionWithPrecisionFloat64x2(v) + case OpMaskedDiffWithCeilSuppressExceptionWithPrecisionFloat64x4: + return rewriteValueAMD64_OpMaskedDiffWithCeilSuppressExceptionWithPrecisionFloat64x4(v) + case OpMaskedDiffWithCeilSuppressExceptionWithPrecisionFloat64x8: + return rewriteValueAMD64_OpMaskedDiffWithCeilSuppressExceptionWithPrecisionFloat64x8(v) + case OpMaskedDiffWithCeilWithPrecisionFloat32x16: + return rewriteValueAMD64_OpMaskedDiffWithCeilWithPrecisionFloat32x16(v) + case OpMaskedDiffWithCeilWithPrecisionFloat32x4: + return rewriteValueAMD64_OpMaskedDiffWithCeilWithPrecisionFloat32x4(v) + case OpMaskedDiffWithCeilWithPrecisionFloat32x8: + return rewriteValueAMD64_OpMaskedDiffWithCeilWithPrecisionFloat32x8(v) + case OpMaskedDiffWithCeilWithPrecisionFloat64x2: + return rewriteValueAMD64_OpMaskedDiffWithCeilWithPrecisionFloat64x2(v) + case OpMaskedDiffWithCeilWithPrecisionFloat64x4: + return rewriteValueAMD64_OpMaskedDiffWithCeilWithPrecisionFloat64x4(v) + case OpMaskedDiffWithCeilWithPrecisionFloat64x8: + return rewriteValueAMD64_OpMaskedDiffWithCeilWithPrecisionFloat64x8(v) + case OpMaskedDiffWithFloorSuppressExceptionWithPrecisionFloat32x16: + return rewriteValueAMD64_OpMaskedDiffWithFloorSuppressExceptionWithPrecisionFloat32x16(v) + case OpMaskedDiffWithFloorSuppressExceptionWithPrecisionFloat32x4: + return rewriteValueAMD64_OpMaskedDiffWithFloorSuppressExceptionWithPrecisionFloat32x4(v) + case OpMaskedDiffWithFloorSuppressExceptionWithPrecisionFloat32x8: + return rewriteValueAMD64_OpMaskedDiffWithFloorSuppressExceptionWithPrecisionFloat32x8(v) + case OpMaskedDiffWithFloorSuppressExceptionWithPrecisionFloat64x2: + return rewriteValueAMD64_OpMaskedDiffWithFloorSuppressExceptionWithPrecisionFloat64x2(v) + case OpMaskedDiffWithFloorSuppressExceptionWithPrecisionFloat64x4: + return rewriteValueAMD64_OpMaskedDiffWithFloorSuppressExceptionWithPrecisionFloat64x4(v) + case OpMaskedDiffWithFloorSuppressExceptionWithPrecisionFloat64x8: + return rewriteValueAMD64_OpMaskedDiffWithFloorSuppressExceptionWithPrecisionFloat64x8(v) + case OpMaskedDiffWithFloorWithPrecisionFloat32x16: + return rewriteValueAMD64_OpMaskedDiffWithFloorWithPrecisionFloat32x16(v) + case OpMaskedDiffWithFloorWithPrecisionFloat32x4: + return rewriteValueAMD64_OpMaskedDiffWithFloorWithPrecisionFloat32x4(v) + case OpMaskedDiffWithFloorWithPrecisionFloat32x8: + return rewriteValueAMD64_OpMaskedDiffWithFloorWithPrecisionFloat32x8(v) + case OpMaskedDiffWithFloorWithPrecisionFloat64x2: + return rewriteValueAMD64_OpMaskedDiffWithFloorWithPrecisionFloat64x2(v) + case OpMaskedDiffWithFloorWithPrecisionFloat64x4: + return rewriteValueAMD64_OpMaskedDiffWithFloorWithPrecisionFloat64x4(v) + case OpMaskedDiffWithFloorWithPrecisionFloat64x8: + return rewriteValueAMD64_OpMaskedDiffWithFloorWithPrecisionFloat64x8(v) + case OpMaskedDiffWithRoundSuppressExceptionWithPrecisionFloat32x16: + return rewriteValueAMD64_OpMaskedDiffWithRoundSuppressExceptionWithPrecisionFloat32x16(v) + case OpMaskedDiffWithRoundSuppressExceptionWithPrecisionFloat32x4: + return rewriteValueAMD64_OpMaskedDiffWithRoundSuppressExceptionWithPrecisionFloat32x4(v) + case OpMaskedDiffWithRoundSuppressExceptionWithPrecisionFloat32x8: + return rewriteValueAMD64_OpMaskedDiffWithRoundSuppressExceptionWithPrecisionFloat32x8(v) + case OpMaskedDiffWithRoundSuppressExceptionWithPrecisionFloat64x2: + return rewriteValueAMD64_OpMaskedDiffWithRoundSuppressExceptionWithPrecisionFloat64x2(v) + case OpMaskedDiffWithRoundSuppressExceptionWithPrecisionFloat64x4: + return rewriteValueAMD64_OpMaskedDiffWithRoundSuppressExceptionWithPrecisionFloat64x4(v) + case OpMaskedDiffWithRoundSuppressExceptionWithPrecisionFloat64x8: + return rewriteValueAMD64_OpMaskedDiffWithRoundSuppressExceptionWithPrecisionFloat64x8(v) + case OpMaskedDiffWithRoundWithPrecisionFloat32x16: + return rewriteValueAMD64_OpMaskedDiffWithRoundWithPrecisionFloat32x16(v) + case OpMaskedDiffWithRoundWithPrecisionFloat32x4: + return rewriteValueAMD64_OpMaskedDiffWithRoundWithPrecisionFloat32x4(v) + case OpMaskedDiffWithRoundWithPrecisionFloat32x8: + return rewriteValueAMD64_OpMaskedDiffWithRoundWithPrecisionFloat32x8(v) + case OpMaskedDiffWithRoundWithPrecisionFloat64x2: + return rewriteValueAMD64_OpMaskedDiffWithRoundWithPrecisionFloat64x2(v) + case OpMaskedDiffWithRoundWithPrecisionFloat64x4: + return rewriteValueAMD64_OpMaskedDiffWithRoundWithPrecisionFloat64x4(v) + case OpMaskedDiffWithRoundWithPrecisionFloat64x8: + return rewriteValueAMD64_OpMaskedDiffWithRoundWithPrecisionFloat64x8(v) + case OpMaskedDiffWithTruncSuppressExceptionWithPrecisionFloat32x16: + return rewriteValueAMD64_OpMaskedDiffWithTruncSuppressExceptionWithPrecisionFloat32x16(v) + case OpMaskedDiffWithTruncSuppressExceptionWithPrecisionFloat32x4: + return rewriteValueAMD64_OpMaskedDiffWithTruncSuppressExceptionWithPrecisionFloat32x4(v) + case OpMaskedDiffWithTruncSuppressExceptionWithPrecisionFloat32x8: + return rewriteValueAMD64_OpMaskedDiffWithTruncSuppressExceptionWithPrecisionFloat32x8(v) + case OpMaskedDiffWithTruncSuppressExceptionWithPrecisionFloat64x2: + return rewriteValueAMD64_OpMaskedDiffWithTruncSuppressExceptionWithPrecisionFloat64x2(v) + case OpMaskedDiffWithTruncSuppressExceptionWithPrecisionFloat64x4: + return rewriteValueAMD64_OpMaskedDiffWithTruncSuppressExceptionWithPrecisionFloat64x4(v) + case OpMaskedDiffWithTruncSuppressExceptionWithPrecisionFloat64x8: + return rewriteValueAMD64_OpMaskedDiffWithTruncSuppressExceptionWithPrecisionFloat64x8(v) + case OpMaskedDiffWithTruncWithPrecisionFloat32x16: + return rewriteValueAMD64_OpMaskedDiffWithTruncWithPrecisionFloat32x16(v) + case OpMaskedDiffWithTruncWithPrecisionFloat32x4: + return rewriteValueAMD64_OpMaskedDiffWithTruncWithPrecisionFloat32x4(v) + case OpMaskedDiffWithTruncWithPrecisionFloat32x8: + return rewriteValueAMD64_OpMaskedDiffWithTruncWithPrecisionFloat32x8(v) + case OpMaskedDiffWithTruncWithPrecisionFloat64x2: + return rewriteValueAMD64_OpMaskedDiffWithTruncWithPrecisionFloat64x2(v) + case OpMaskedDiffWithTruncWithPrecisionFloat64x4: + return rewriteValueAMD64_OpMaskedDiffWithTruncWithPrecisionFloat64x4(v) + case OpMaskedDiffWithTruncWithPrecisionFloat64x8: + return rewriteValueAMD64_OpMaskedDiffWithTruncWithPrecisionFloat64x8(v) case OpMaskedDivFloat32x16: return rewriteValueAMD64_OpMaskedDivFloat32x16(v) case OpMaskedDivFloat32x4: @@ -1844,6 +2136,30 @@ func rewriteValueAMD64(v *Value) bool { return rewriteValueAMD64_OpMaskedEqualUint8x32(v) case OpMaskedEqualUint8x64: return rewriteValueAMD64_OpMaskedEqualUint8x64(v) + case OpMaskedFloorSuppressExceptionWithPrecisionFloat32x16: + return rewriteValueAMD64_OpMaskedFloorSuppressExceptionWithPrecisionFloat32x16(v) + case OpMaskedFloorSuppressExceptionWithPrecisionFloat32x4: + return rewriteValueAMD64_OpMaskedFloorSuppressExceptionWithPrecisionFloat32x4(v) + case OpMaskedFloorSuppressExceptionWithPrecisionFloat32x8: + return rewriteValueAMD64_OpMaskedFloorSuppressExceptionWithPrecisionFloat32x8(v) + case OpMaskedFloorSuppressExceptionWithPrecisionFloat64x2: + return rewriteValueAMD64_OpMaskedFloorSuppressExceptionWithPrecisionFloat64x2(v) + case OpMaskedFloorSuppressExceptionWithPrecisionFloat64x4: + return rewriteValueAMD64_OpMaskedFloorSuppressExceptionWithPrecisionFloat64x4(v) + case OpMaskedFloorSuppressExceptionWithPrecisionFloat64x8: + return rewriteValueAMD64_OpMaskedFloorSuppressExceptionWithPrecisionFloat64x8(v) + case OpMaskedFloorWithPrecisionFloat32x16: + return rewriteValueAMD64_OpMaskedFloorWithPrecisionFloat32x16(v) + case OpMaskedFloorWithPrecisionFloat32x4: + return rewriteValueAMD64_OpMaskedFloorWithPrecisionFloat32x4(v) + case OpMaskedFloorWithPrecisionFloat32x8: + return rewriteValueAMD64_OpMaskedFloorWithPrecisionFloat32x8(v) + case OpMaskedFloorWithPrecisionFloat64x2: + return rewriteValueAMD64_OpMaskedFloorWithPrecisionFloat64x2(v) + case OpMaskedFloorWithPrecisionFloat64x4: + return rewriteValueAMD64_OpMaskedFloorWithPrecisionFloat64x4(v) + case OpMaskedFloorWithPrecisionFloat64x8: + return rewriteValueAMD64_OpMaskedFloorWithPrecisionFloat64x8(v) case OpMaskedGreaterEqualFloat32x16: return rewriteValueAMD64_OpMaskedGreaterEqualFloat32x16(v) case OpMaskedGreaterEqualFloat32x4: @@ -2426,6 +2742,30 @@ func rewriteValueAMD64(v *Value) bool { return rewriteValueAMD64_OpMaskedPopCountUint8x32(v) case OpMaskedPopCountUint8x64: return rewriteValueAMD64_OpMaskedPopCountUint8x64(v) + case OpMaskedRoundSuppressExceptionWithPrecisionFloat32x16: + return rewriteValueAMD64_OpMaskedRoundSuppressExceptionWithPrecisionFloat32x16(v) + case OpMaskedRoundSuppressExceptionWithPrecisionFloat32x4: + return rewriteValueAMD64_OpMaskedRoundSuppressExceptionWithPrecisionFloat32x4(v) + case OpMaskedRoundSuppressExceptionWithPrecisionFloat32x8: + return rewriteValueAMD64_OpMaskedRoundSuppressExceptionWithPrecisionFloat32x8(v) + case OpMaskedRoundSuppressExceptionWithPrecisionFloat64x2: + return rewriteValueAMD64_OpMaskedRoundSuppressExceptionWithPrecisionFloat64x2(v) + case OpMaskedRoundSuppressExceptionWithPrecisionFloat64x4: + return rewriteValueAMD64_OpMaskedRoundSuppressExceptionWithPrecisionFloat64x4(v) + case OpMaskedRoundSuppressExceptionWithPrecisionFloat64x8: + return rewriteValueAMD64_OpMaskedRoundSuppressExceptionWithPrecisionFloat64x8(v) + case OpMaskedRoundWithPrecisionFloat32x16: + return rewriteValueAMD64_OpMaskedRoundWithPrecisionFloat32x16(v) + case OpMaskedRoundWithPrecisionFloat32x4: + return rewriteValueAMD64_OpMaskedRoundWithPrecisionFloat32x4(v) + case OpMaskedRoundWithPrecisionFloat32x8: + return rewriteValueAMD64_OpMaskedRoundWithPrecisionFloat32x8(v) + case OpMaskedRoundWithPrecisionFloat64x2: + return rewriteValueAMD64_OpMaskedRoundWithPrecisionFloat64x2(v) + case OpMaskedRoundWithPrecisionFloat64x4: + return rewriteValueAMD64_OpMaskedRoundWithPrecisionFloat64x4(v) + case OpMaskedRoundWithPrecisionFloat64x8: + return rewriteValueAMD64_OpMaskedRoundWithPrecisionFloat64x8(v) case OpMaskedSaturatedAddInt16x16: return rewriteValueAMD64_OpMaskedSaturatedAddInt16x16(v) case OpMaskedSaturatedAddInt16x32: @@ -2546,6 +2886,30 @@ func rewriteValueAMD64(v *Value) bool { return rewriteValueAMD64_OpMaskedSubUint8x32(v) case OpMaskedSubUint8x64: return rewriteValueAMD64_OpMaskedSubUint8x64(v) + case OpMaskedTruncSuppressExceptionWithPrecisionFloat32x16: + return rewriteValueAMD64_OpMaskedTruncSuppressExceptionWithPrecisionFloat32x16(v) + case OpMaskedTruncSuppressExceptionWithPrecisionFloat32x4: + return rewriteValueAMD64_OpMaskedTruncSuppressExceptionWithPrecisionFloat32x4(v) + case OpMaskedTruncSuppressExceptionWithPrecisionFloat32x8: + return rewriteValueAMD64_OpMaskedTruncSuppressExceptionWithPrecisionFloat32x8(v) + case OpMaskedTruncSuppressExceptionWithPrecisionFloat64x2: + return rewriteValueAMD64_OpMaskedTruncSuppressExceptionWithPrecisionFloat64x2(v) + case OpMaskedTruncSuppressExceptionWithPrecisionFloat64x4: + return rewriteValueAMD64_OpMaskedTruncSuppressExceptionWithPrecisionFloat64x4(v) + case OpMaskedTruncSuppressExceptionWithPrecisionFloat64x8: + return rewriteValueAMD64_OpMaskedTruncSuppressExceptionWithPrecisionFloat64x8(v) + case OpMaskedTruncWithPrecisionFloat32x16: + return rewriteValueAMD64_OpMaskedTruncWithPrecisionFloat32x16(v) + case OpMaskedTruncWithPrecisionFloat32x4: + return rewriteValueAMD64_OpMaskedTruncWithPrecisionFloat32x4(v) + case OpMaskedTruncWithPrecisionFloat32x8: + return rewriteValueAMD64_OpMaskedTruncWithPrecisionFloat32x8(v) + case OpMaskedTruncWithPrecisionFloat64x2: + return rewriteValueAMD64_OpMaskedTruncWithPrecisionFloat64x2(v) + case OpMaskedTruncWithPrecisionFloat64x4: + return rewriteValueAMD64_OpMaskedTruncWithPrecisionFloat64x4(v) + case OpMaskedTruncWithPrecisionFloat64x8: + return rewriteValueAMD64_OpMaskedTruncWithPrecisionFloat64x8(v) case OpMaskedXorFloat32x16: return rewriteValueAMD64_OpMaskedXorFloat32x16(v) case OpMaskedXorFloat32x4: @@ -3292,8 +3656,40 @@ func rewriteValueAMD64(v *Value) bool { case OpRound64F: v.Op = OpAMD64LoweredRound64F return true + case OpRoundFloat32x4: + return rewriteValueAMD64_OpRoundFloat32x4(v) + case OpRoundFloat32x8: + return rewriteValueAMD64_OpRoundFloat32x8(v) + case OpRoundFloat64x2: + return rewriteValueAMD64_OpRoundFloat64x2(v) + case OpRoundFloat64x4: + return rewriteValueAMD64_OpRoundFloat64x4(v) + case OpRoundSuppressExceptionWithPrecisionFloat32x16: + return rewriteValueAMD64_OpRoundSuppressExceptionWithPrecisionFloat32x16(v) + case OpRoundSuppressExceptionWithPrecisionFloat32x4: + return rewriteValueAMD64_OpRoundSuppressExceptionWithPrecisionFloat32x4(v) + case OpRoundSuppressExceptionWithPrecisionFloat32x8: + return rewriteValueAMD64_OpRoundSuppressExceptionWithPrecisionFloat32x8(v) + case OpRoundSuppressExceptionWithPrecisionFloat64x2: + return rewriteValueAMD64_OpRoundSuppressExceptionWithPrecisionFloat64x2(v) + case OpRoundSuppressExceptionWithPrecisionFloat64x4: + return rewriteValueAMD64_OpRoundSuppressExceptionWithPrecisionFloat64x4(v) + case OpRoundSuppressExceptionWithPrecisionFloat64x8: + return rewriteValueAMD64_OpRoundSuppressExceptionWithPrecisionFloat64x8(v) case OpRoundToEven: return rewriteValueAMD64_OpRoundToEven(v) + case OpRoundWithPrecisionFloat32x16: + return rewriteValueAMD64_OpRoundWithPrecisionFloat32x16(v) + case OpRoundWithPrecisionFloat32x4: + return rewriteValueAMD64_OpRoundWithPrecisionFloat32x4(v) + case OpRoundWithPrecisionFloat32x8: + return rewriteValueAMD64_OpRoundWithPrecisionFloat32x8(v) + case OpRoundWithPrecisionFloat64x2: + return rewriteValueAMD64_OpRoundWithPrecisionFloat64x2(v) + case OpRoundWithPrecisionFloat64x4: + return rewriteValueAMD64_OpRoundWithPrecisionFloat64x4(v) + case OpRoundWithPrecisionFloat64x8: + return rewriteValueAMD64_OpRoundWithPrecisionFloat64x8(v) case OpRsh16Ux16: return rewriteValueAMD64_OpRsh16Ux16(v) case OpRsh16Ux32: @@ -3653,6 +4049,38 @@ func rewriteValueAMD64(v *Value) bool { case OpTrunc64to8: v.Op = OpCopy return true + case OpTruncFloat32x4: + return rewriteValueAMD64_OpTruncFloat32x4(v) + case OpTruncFloat32x8: + return rewriteValueAMD64_OpTruncFloat32x8(v) + case OpTruncFloat64x2: + return rewriteValueAMD64_OpTruncFloat64x2(v) + case OpTruncFloat64x4: + return rewriteValueAMD64_OpTruncFloat64x4(v) + case OpTruncSuppressExceptionWithPrecisionFloat32x16: + return rewriteValueAMD64_OpTruncSuppressExceptionWithPrecisionFloat32x16(v) + case OpTruncSuppressExceptionWithPrecisionFloat32x4: + return rewriteValueAMD64_OpTruncSuppressExceptionWithPrecisionFloat32x4(v) + case OpTruncSuppressExceptionWithPrecisionFloat32x8: + return rewriteValueAMD64_OpTruncSuppressExceptionWithPrecisionFloat32x8(v) + case OpTruncSuppressExceptionWithPrecisionFloat64x2: + return rewriteValueAMD64_OpTruncSuppressExceptionWithPrecisionFloat64x2(v) + case OpTruncSuppressExceptionWithPrecisionFloat64x4: + return rewriteValueAMD64_OpTruncSuppressExceptionWithPrecisionFloat64x4(v) + case OpTruncSuppressExceptionWithPrecisionFloat64x8: + return rewriteValueAMD64_OpTruncSuppressExceptionWithPrecisionFloat64x8(v) + case OpTruncWithPrecisionFloat32x16: + return rewriteValueAMD64_OpTruncWithPrecisionFloat32x16(v) + case OpTruncWithPrecisionFloat32x4: + return rewriteValueAMD64_OpTruncWithPrecisionFloat32x4(v) + case OpTruncWithPrecisionFloat32x8: + return rewriteValueAMD64_OpTruncWithPrecisionFloat32x8(v) + case OpTruncWithPrecisionFloat64x2: + return rewriteValueAMD64_OpTruncWithPrecisionFloat64x2(v) + case OpTruncWithPrecisionFloat64x4: + return rewriteValueAMD64_OpTruncWithPrecisionFloat64x4(v) + case OpTruncWithPrecisionFloat64x8: + return rewriteValueAMD64_OpTruncWithPrecisionFloat64x8(v) case OpWB: v.Op = OpAMD64LoweredWB return true @@ -27029,6 +27457,210 @@ func rewriteValueAMD64_OpCeil(v *Value) bool { return true } } +func rewriteValueAMD64_OpCeilFloat32x4(v *Value) bool { + v_0 := v.Args[0] + // match: (CeilFloat32x4 x) + // result: (VROUNDPS128 [2] x) + for { + x := v_0 + v.reset(OpAMD64VROUNDPS128) + v.AuxInt = int8ToAuxInt(2) + v.AddArg(x) + return true + } +} +func rewriteValueAMD64_OpCeilFloat32x8(v *Value) bool { + v_0 := v.Args[0] + // match: (CeilFloat32x8 x) + // result: (VROUNDPS256 [2] x) + for { + x := v_0 + v.reset(OpAMD64VROUNDPS256) + v.AuxInt = int8ToAuxInt(2) + v.AddArg(x) + return true + } +} +func rewriteValueAMD64_OpCeilFloat64x2(v *Value) bool { + v_0 := v.Args[0] + // match: (CeilFloat64x2 x) + // result: (VROUNDPD128 [2] x) + for { + x := v_0 + v.reset(OpAMD64VROUNDPD128) + v.AuxInt = int8ToAuxInt(2) + v.AddArg(x) + return true + } +} +func rewriteValueAMD64_OpCeilFloat64x4(v *Value) bool { + v_0 := v.Args[0] + // match: (CeilFloat64x4 x) + // result: (VROUNDPD256 [2] x) + for { + x := v_0 + v.reset(OpAMD64VROUNDPD256) + v.AuxInt = int8ToAuxInt(2) + v.AddArg(x) + return true + } +} +func rewriteValueAMD64_OpCeilSuppressExceptionWithPrecisionFloat32x16(v *Value) bool { + v_0 := v.Args[0] + // match: (CeilSuppressExceptionWithPrecisionFloat32x16 [a] x) + // result: (VRNDSCALEPS512 [a+10] x) + for { + a := auxIntToInt8(v.AuxInt) + x := v_0 + v.reset(OpAMD64VRNDSCALEPS512) + v.AuxInt = int8ToAuxInt(a + 10) + v.AddArg(x) + return true + } +} +func rewriteValueAMD64_OpCeilSuppressExceptionWithPrecisionFloat32x4(v *Value) bool { + v_0 := v.Args[0] + // match: (CeilSuppressExceptionWithPrecisionFloat32x4 [a] x) + // result: (VRNDSCALEPS128 [a+10] x) + for { + a := auxIntToInt8(v.AuxInt) + x := v_0 + v.reset(OpAMD64VRNDSCALEPS128) + v.AuxInt = int8ToAuxInt(a + 10) + v.AddArg(x) + return true + } +} +func rewriteValueAMD64_OpCeilSuppressExceptionWithPrecisionFloat32x8(v *Value) bool { + v_0 := v.Args[0] + // match: (CeilSuppressExceptionWithPrecisionFloat32x8 [a] x) + // result: (VRNDSCALEPS256 [a+10] x) + for { + a := auxIntToInt8(v.AuxInt) + x := v_0 + v.reset(OpAMD64VRNDSCALEPS256) + v.AuxInt = int8ToAuxInt(a + 10) + v.AddArg(x) + return true + } +} +func rewriteValueAMD64_OpCeilSuppressExceptionWithPrecisionFloat64x2(v *Value) bool { + v_0 := v.Args[0] + // match: (CeilSuppressExceptionWithPrecisionFloat64x2 [a] x) + // result: (VRNDSCALEPD128 [a+10] x) + for { + a := auxIntToInt8(v.AuxInt) + x := v_0 + v.reset(OpAMD64VRNDSCALEPD128) + v.AuxInt = int8ToAuxInt(a + 10) + v.AddArg(x) + return true + } +} +func rewriteValueAMD64_OpCeilSuppressExceptionWithPrecisionFloat64x4(v *Value) bool { + v_0 := v.Args[0] + // match: (CeilSuppressExceptionWithPrecisionFloat64x4 [a] x) + // result: (VRNDSCALEPD256 [a+10] x) + for { + a := auxIntToInt8(v.AuxInt) + x := v_0 + v.reset(OpAMD64VRNDSCALEPD256) + v.AuxInt = int8ToAuxInt(a + 10) + v.AddArg(x) + return true + } +} +func rewriteValueAMD64_OpCeilSuppressExceptionWithPrecisionFloat64x8(v *Value) bool { + v_0 := v.Args[0] + // match: (CeilSuppressExceptionWithPrecisionFloat64x8 [a] x) + // result: (VRNDSCALEPD512 [a+10] x) + for { + a := auxIntToInt8(v.AuxInt) + x := v_0 + v.reset(OpAMD64VRNDSCALEPD512) + v.AuxInt = int8ToAuxInt(a + 10) + v.AddArg(x) + return true + } +} +func rewriteValueAMD64_OpCeilWithPrecisionFloat32x16(v *Value) bool { + v_0 := v.Args[0] + // match: (CeilWithPrecisionFloat32x16 [a] x) + // result: (VRNDSCALEPS512 [a+2] x) + for { + a := auxIntToInt8(v.AuxInt) + x := v_0 + v.reset(OpAMD64VRNDSCALEPS512) + v.AuxInt = int8ToAuxInt(a + 2) + v.AddArg(x) + return true + } +} +func rewriteValueAMD64_OpCeilWithPrecisionFloat32x4(v *Value) bool { + v_0 := v.Args[0] + // match: (CeilWithPrecisionFloat32x4 [a] x) + // result: (VRNDSCALEPS128 [a+2] x) + for { + a := auxIntToInt8(v.AuxInt) + x := v_0 + v.reset(OpAMD64VRNDSCALEPS128) + v.AuxInt = int8ToAuxInt(a + 2) + v.AddArg(x) + return true + } +} +func rewriteValueAMD64_OpCeilWithPrecisionFloat32x8(v *Value) bool { + v_0 := v.Args[0] + // match: (CeilWithPrecisionFloat32x8 [a] x) + // result: (VRNDSCALEPS256 [a+2] x) + for { + a := auxIntToInt8(v.AuxInt) + x := v_0 + v.reset(OpAMD64VRNDSCALEPS256) + v.AuxInt = int8ToAuxInt(a + 2) + v.AddArg(x) + return true + } +} +func rewriteValueAMD64_OpCeilWithPrecisionFloat64x2(v *Value) bool { + v_0 := v.Args[0] + // match: (CeilWithPrecisionFloat64x2 [a] x) + // result: (VRNDSCALEPD128 [a+2] x) + for { + a := auxIntToInt8(v.AuxInt) + x := v_0 + v.reset(OpAMD64VRNDSCALEPD128) + v.AuxInt = int8ToAuxInt(a + 2) + v.AddArg(x) + return true + } +} +func rewriteValueAMD64_OpCeilWithPrecisionFloat64x4(v *Value) bool { + v_0 := v.Args[0] + // match: (CeilWithPrecisionFloat64x4 [a] x) + // result: (VRNDSCALEPD256 [a+2] x) + for { + a := auxIntToInt8(v.AuxInt) + x := v_0 + v.reset(OpAMD64VRNDSCALEPD256) + v.AuxInt = int8ToAuxInt(a + 2) + v.AddArg(x) + return true + } +} +func rewriteValueAMD64_OpCeilWithPrecisionFloat64x8(v *Value) bool { + v_0 := v.Args[0] + // match: (CeilWithPrecisionFloat64x8 [a] x) + // result: (VRNDSCALEPD512 [a+2] x) + for { + a := auxIntToInt8(v.AuxInt) + x := v_0 + v.reset(OpAMD64VRNDSCALEPD512) + v.AuxInt = int8ToAuxInt(a + 2) + v.AddArg(x) + return true + } +} func rewriteValueAMD64_OpCondSelect(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] @@ -28162,6 +28794,630 @@ func rewriteValueAMD64_OpCtz8NonZero(v *Value) bool { } return false } +func rewriteValueAMD64_OpDiffWithCeilSuppressExceptionWithPrecisionFloat32x16(v *Value) bool { + v_0 := v.Args[0] + // match: (DiffWithCeilSuppressExceptionWithPrecisionFloat32x16 [a] x) + // result: (VREDUCEPS512 [a+10] x) + for { + a := auxIntToInt8(v.AuxInt) + x := v_0 + v.reset(OpAMD64VREDUCEPS512) + v.AuxInt = int8ToAuxInt(a + 10) + v.AddArg(x) + return true + } +} +func rewriteValueAMD64_OpDiffWithCeilSuppressExceptionWithPrecisionFloat32x4(v *Value) bool { + v_0 := v.Args[0] + // match: (DiffWithCeilSuppressExceptionWithPrecisionFloat32x4 [a] x) + // result: (VREDUCEPS128 [a+10] x) + for { + a := auxIntToInt8(v.AuxInt) + x := v_0 + v.reset(OpAMD64VREDUCEPS128) + v.AuxInt = int8ToAuxInt(a + 10) + v.AddArg(x) + return true + } +} +func rewriteValueAMD64_OpDiffWithCeilSuppressExceptionWithPrecisionFloat32x8(v *Value) bool { + v_0 := v.Args[0] + // match: (DiffWithCeilSuppressExceptionWithPrecisionFloat32x8 [a] x) + // result: (VREDUCEPS256 [a+10] x) + for { + a := auxIntToInt8(v.AuxInt) + x := v_0 + v.reset(OpAMD64VREDUCEPS256) + v.AuxInt = int8ToAuxInt(a + 10) + v.AddArg(x) + return true + } +} +func rewriteValueAMD64_OpDiffWithCeilSuppressExceptionWithPrecisionFloat64x2(v *Value) bool { + v_0 := v.Args[0] + // match: (DiffWithCeilSuppressExceptionWithPrecisionFloat64x2 [a] x) + // result: (VREDUCEPD128 [a+10] x) + for { + a := auxIntToInt8(v.AuxInt) + x := v_0 + v.reset(OpAMD64VREDUCEPD128) + v.AuxInt = int8ToAuxInt(a + 10) + v.AddArg(x) + return true + } +} +func rewriteValueAMD64_OpDiffWithCeilSuppressExceptionWithPrecisionFloat64x4(v *Value) bool { + v_0 := v.Args[0] + // match: (DiffWithCeilSuppressExceptionWithPrecisionFloat64x4 [a] x) + // result: (VREDUCEPD256 [a+10] x) + for { + a := auxIntToInt8(v.AuxInt) + x := v_0 + v.reset(OpAMD64VREDUCEPD256) + v.AuxInt = int8ToAuxInt(a + 10) + v.AddArg(x) + return true + } +} +func rewriteValueAMD64_OpDiffWithCeilSuppressExceptionWithPrecisionFloat64x8(v *Value) bool { + v_0 := v.Args[0] + // match: (DiffWithCeilSuppressExceptionWithPrecisionFloat64x8 [a] x) + // result: (VREDUCEPD512 [a+10] x) + for { + a := auxIntToInt8(v.AuxInt) + x := v_0 + v.reset(OpAMD64VREDUCEPD512) + v.AuxInt = int8ToAuxInt(a + 10) + v.AddArg(x) + return true + } +} +func rewriteValueAMD64_OpDiffWithCeilWithPrecisionFloat32x16(v *Value) bool { + v_0 := v.Args[0] + // match: (DiffWithCeilWithPrecisionFloat32x16 [a] x) + // result: (VREDUCEPS512 [a+2] x) + for { + a := auxIntToInt8(v.AuxInt) + x := v_0 + v.reset(OpAMD64VREDUCEPS512) + v.AuxInt = int8ToAuxInt(a + 2) + v.AddArg(x) + return true + } +} +func rewriteValueAMD64_OpDiffWithCeilWithPrecisionFloat32x4(v *Value) bool { + v_0 := v.Args[0] + // match: (DiffWithCeilWithPrecisionFloat32x4 [a] x) + // result: (VREDUCEPS128 [a+2] x) + for { + a := auxIntToInt8(v.AuxInt) + x := v_0 + v.reset(OpAMD64VREDUCEPS128) + v.AuxInt = int8ToAuxInt(a + 2) + v.AddArg(x) + return true + } +} +func rewriteValueAMD64_OpDiffWithCeilWithPrecisionFloat32x8(v *Value) bool { + v_0 := v.Args[0] + // match: (DiffWithCeilWithPrecisionFloat32x8 [a] x) + // result: (VREDUCEPS256 [a+2] x) + for { + a := auxIntToInt8(v.AuxInt) + x := v_0 + v.reset(OpAMD64VREDUCEPS256) + v.AuxInt = int8ToAuxInt(a + 2) + v.AddArg(x) + return true + } +} +func rewriteValueAMD64_OpDiffWithCeilWithPrecisionFloat64x2(v *Value) bool { + v_0 := v.Args[0] + // match: (DiffWithCeilWithPrecisionFloat64x2 [a] x) + // result: (VREDUCEPD128 [a+2] x) + for { + a := auxIntToInt8(v.AuxInt) + x := v_0 + v.reset(OpAMD64VREDUCEPD128) + v.AuxInt = int8ToAuxInt(a + 2) + v.AddArg(x) + return true + } +} +func rewriteValueAMD64_OpDiffWithCeilWithPrecisionFloat64x4(v *Value) bool { + v_0 := v.Args[0] + // match: (DiffWithCeilWithPrecisionFloat64x4 [a] x) + // result: (VREDUCEPD256 [a+2] x) + for { + a := auxIntToInt8(v.AuxInt) + x := v_0 + v.reset(OpAMD64VREDUCEPD256) + v.AuxInt = int8ToAuxInt(a + 2) + v.AddArg(x) + return true + } +} +func rewriteValueAMD64_OpDiffWithCeilWithPrecisionFloat64x8(v *Value) bool { + v_0 := v.Args[0] + // match: (DiffWithCeilWithPrecisionFloat64x8 [a] x) + // result: (VREDUCEPD512 [a+2] x) + for { + a := auxIntToInt8(v.AuxInt) + x := v_0 + v.reset(OpAMD64VREDUCEPD512) + v.AuxInt = int8ToAuxInt(a + 2) + v.AddArg(x) + return true + } +} +func rewriteValueAMD64_OpDiffWithFloorSuppressExceptionWithPrecisionFloat32x16(v *Value) bool { + v_0 := v.Args[0] + // match: (DiffWithFloorSuppressExceptionWithPrecisionFloat32x16 [a] x) + // result: (VREDUCEPS512 [a+9] x) + for { + a := auxIntToInt8(v.AuxInt) + x := v_0 + v.reset(OpAMD64VREDUCEPS512) + v.AuxInt = int8ToAuxInt(a + 9) + v.AddArg(x) + return true + } +} +func rewriteValueAMD64_OpDiffWithFloorSuppressExceptionWithPrecisionFloat32x4(v *Value) bool { + v_0 := v.Args[0] + // match: (DiffWithFloorSuppressExceptionWithPrecisionFloat32x4 [a] x) + // result: (VREDUCEPS128 [a+9] x) + for { + a := auxIntToInt8(v.AuxInt) + x := v_0 + v.reset(OpAMD64VREDUCEPS128) + v.AuxInt = int8ToAuxInt(a + 9) + v.AddArg(x) + return true + } +} +func rewriteValueAMD64_OpDiffWithFloorSuppressExceptionWithPrecisionFloat32x8(v *Value) bool { + v_0 := v.Args[0] + // match: (DiffWithFloorSuppressExceptionWithPrecisionFloat32x8 [a] x) + // result: (VREDUCEPS256 [a+9] x) + for { + a := auxIntToInt8(v.AuxInt) + x := v_0 + v.reset(OpAMD64VREDUCEPS256) + v.AuxInt = int8ToAuxInt(a + 9) + v.AddArg(x) + return true + } +} +func rewriteValueAMD64_OpDiffWithFloorSuppressExceptionWithPrecisionFloat64x2(v *Value) bool { + v_0 := v.Args[0] + // match: (DiffWithFloorSuppressExceptionWithPrecisionFloat64x2 [a] x) + // result: (VREDUCEPD128 [a+9] x) + for { + a := auxIntToInt8(v.AuxInt) + x := v_0 + v.reset(OpAMD64VREDUCEPD128) + v.AuxInt = int8ToAuxInt(a + 9) + v.AddArg(x) + return true + } +} +func rewriteValueAMD64_OpDiffWithFloorSuppressExceptionWithPrecisionFloat64x4(v *Value) bool { + v_0 := v.Args[0] + // match: (DiffWithFloorSuppressExceptionWithPrecisionFloat64x4 [a] x) + // result: (VREDUCEPD256 [a+9] x) + for { + a := auxIntToInt8(v.AuxInt) + x := v_0 + v.reset(OpAMD64VREDUCEPD256) + v.AuxInt = int8ToAuxInt(a + 9) + v.AddArg(x) + return true + } +} +func rewriteValueAMD64_OpDiffWithFloorSuppressExceptionWithPrecisionFloat64x8(v *Value) bool { + v_0 := v.Args[0] + // match: (DiffWithFloorSuppressExceptionWithPrecisionFloat64x8 [a] x) + // result: (VREDUCEPD512 [a+9] x) + for { + a := auxIntToInt8(v.AuxInt) + x := v_0 + v.reset(OpAMD64VREDUCEPD512) + v.AuxInt = int8ToAuxInt(a + 9) + v.AddArg(x) + return true + } +} +func rewriteValueAMD64_OpDiffWithFloorWithPrecisionFloat32x16(v *Value) bool { + v_0 := v.Args[0] + // match: (DiffWithFloorWithPrecisionFloat32x16 [a] x) + // result: (VREDUCEPS512 [a+1] x) + for { + a := auxIntToInt8(v.AuxInt) + x := v_0 + v.reset(OpAMD64VREDUCEPS512) + v.AuxInt = int8ToAuxInt(a + 1) + v.AddArg(x) + return true + } +} +func rewriteValueAMD64_OpDiffWithFloorWithPrecisionFloat32x4(v *Value) bool { + v_0 := v.Args[0] + // match: (DiffWithFloorWithPrecisionFloat32x4 [a] x) + // result: (VREDUCEPS128 [a+1] x) + for { + a := auxIntToInt8(v.AuxInt) + x := v_0 + v.reset(OpAMD64VREDUCEPS128) + v.AuxInt = int8ToAuxInt(a + 1) + v.AddArg(x) + return true + } +} +func rewriteValueAMD64_OpDiffWithFloorWithPrecisionFloat32x8(v *Value) bool { + v_0 := v.Args[0] + // match: (DiffWithFloorWithPrecisionFloat32x8 [a] x) + // result: (VREDUCEPS256 [a+1] x) + for { + a := auxIntToInt8(v.AuxInt) + x := v_0 + v.reset(OpAMD64VREDUCEPS256) + v.AuxInt = int8ToAuxInt(a + 1) + v.AddArg(x) + return true + } +} +func rewriteValueAMD64_OpDiffWithFloorWithPrecisionFloat64x2(v *Value) bool { + v_0 := v.Args[0] + // match: (DiffWithFloorWithPrecisionFloat64x2 [a] x) + // result: (VREDUCEPD128 [a+1] x) + for { + a := auxIntToInt8(v.AuxInt) + x := v_0 + v.reset(OpAMD64VREDUCEPD128) + v.AuxInt = int8ToAuxInt(a + 1) + v.AddArg(x) + return true + } +} +func rewriteValueAMD64_OpDiffWithFloorWithPrecisionFloat64x4(v *Value) bool { + v_0 := v.Args[0] + // match: (DiffWithFloorWithPrecisionFloat64x4 [a] x) + // result: (VREDUCEPD256 [a+1] x) + for { + a := auxIntToInt8(v.AuxInt) + x := v_0 + v.reset(OpAMD64VREDUCEPD256) + v.AuxInt = int8ToAuxInt(a + 1) + v.AddArg(x) + return true + } +} +func rewriteValueAMD64_OpDiffWithFloorWithPrecisionFloat64x8(v *Value) bool { + v_0 := v.Args[0] + // match: (DiffWithFloorWithPrecisionFloat64x8 [a] x) + // result: (VREDUCEPD512 [a+1] x) + for { + a := auxIntToInt8(v.AuxInt) + x := v_0 + v.reset(OpAMD64VREDUCEPD512) + v.AuxInt = int8ToAuxInt(a + 1) + v.AddArg(x) + return true + } +} +func rewriteValueAMD64_OpDiffWithRoundSuppressExceptionWithPrecisionFloat32x16(v *Value) bool { + v_0 := v.Args[0] + // match: (DiffWithRoundSuppressExceptionWithPrecisionFloat32x16 [a] x) + // result: (VREDUCEPS512 [a+8] x) + for { + a := auxIntToInt8(v.AuxInt) + x := v_0 + v.reset(OpAMD64VREDUCEPS512) + v.AuxInt = int8ToAuxInt(a + 8) + v.AddArg(x) + return true + } +} +func rewriteValueAMD64_OpDiffWithRoundSuppressExceptionWithPrecisionFloat32x4(v *Value) bool { + v_0 := v.Args[0] + // match: (DiffWithRoundSuppressExceptionWithPrecisionFloat32x4 [a] x) + // result: (VREDUCEPS128 [a+8] x) + for { + a := auxIntToInt8(v.AuxInt) + x := v_0 + v.reset(OpAMD64VREDUCEPS128) + v.AuxInt = int8ToAuxInt(a + 8) + v.AddArg(x) + return true + } +} +func rewriteValueAMD64_OpDiffWithRoundSuppressExceptionWithPrecisionFloat32x8(v *Value) bool { + v_0 := v.Args[0] + // match: (DiffWithRoundSuppressExceptionWithPrecisionFloat32x8 [a] x) + // result: (VREDUCEPS256 [a+8] x) + for { + a := auxIntToInt8(v.AuxInt) + x := v_0 + v.reset(OpAMD64VREDUCEPS256) + v.AuxInt = int8ToAuxInt(a + 8) + v.AddArg(x) + return true + } +} +func rewriteValueAMD64_OpDiffWithRoundSuppressExceptionWithPrecisionFloat64x2(v *Value) bool { + v_0 := v.Args[0] + // match: (DiffWithRoundSuppressExceptionWithPrecisionFloat64x2 [a] x) + // result: (VREDUCEPD128 [a+8] x) + for { + a := auxIntToInt8(v.AuxInt) + x := v_0 + v.reset(OpAMD64VREDUCEPD128) + v.AuxInt = int8ToAuxInt(a + 8) + v.AddArg(x) + return true + } +} +func rewriteValueAMD64_OpDiffWithRoundSuppressExceptionWithPrecisionFloat64x4(v *Value) bool { + v_0 := v.Args[0] + // match: (DiffWithRoundSuppressExceptionWithPrecisionFloat64x4 [a] x) + // result: (VREDUCEPD256 [a+8] x) + for { + a := auxIntToInt8(v.AuxInt) + x := v_0 + v.reset(OpAMD64VREDUCEPD256) + v.AuxInt = int8ToAuxInt(a + 8) + v.AddArg(x) + return true + } +} +func rewriteValueAMD64_OpDiffWithRoundSuppressExceptionWithPrecisionFloat64x8(v *Value) bool { + v_0 := v.Args[0] + // match: (DiffWithRoundSuppressExceptionWithPrecisionFloat64x8 [a] x) + // result: (VREDUCEPD512 [a+8] x) + for { + a := auxIntToInt8(v.AuxInt) + x := v_0 + v.reset(OpAMD64VREDUCEPD512) + v.AuxInt = int8ToAuxInt(a + 8) + v.AddArg(x) + return true + } +} +func rewriteValueAMD64_OpDiffWithRoundWithPrecisionFloat32x16(v *Value) bool { + v_0 := v.Args[0] + // match: (DiffWithRoundWithPrecisionFloat32x16 [a] x) + // result: (VREDUCEPS512 [a+0] x) + for { + a := auxIntToInt8(v.AuxInt) + x := v_0 + v.reset(OpAMD64VREDUCEPS512) + v.AuxInt = int8ToAuxInt(a + 0) + v.AddArg(x) + return true + } +} +func rewriteValueAMD64_OpDiffWithRoundWithPrecisionFloat32x4(v *Value) bool { + v_0 := v.Args[0] + // match: (DiffWithRoundWithPrecisionFloat32x4 [a] x) + // result: (VREDUCEPS128 [a+0] x) + for { + a := auxIntToInt8(v.AuxInt) + x := v_0 + v.reset(OpAMD64VREDUCEPS128) + v.AuxInt = int8ToAuxInt(a + 0) + v.AddArg(x) + return true + } +} +func rewriteValueAMD64_OpDiffWithRoundWithPrecisionFloat32x8(v *Value) bool { + v_0 := v.Args[0] + // match: (DiffWithRoundWithPrecisionFloat32x8 [a] x) + // result: (VREDUCEPS256 [a+0] x) + for { + a := auxIntToInt8(v.AuxInt) + x := v_0 + v.reset(OpAMD64VREDUCEPS256) + v.AuxInt = int8ToAuxInt(a + 0) + v.AddArg(x) + return true + } +} +func rewriteValueAMD64_OpDiffWithRoundWithPrecisionFloat64x2(v *Value) bool { + v_0 := v.Args[0] + // match: (DiffWithRoundWithPrecisionFloat64x2 [a] x) + // result: (VREDUCEPD128 [a+0] x) + for { + a := auxIntToInt8(v.AuxInt) + x := v_0 + v.reset(OpAMD64VREDUCEPD128) + v.AuxInt = int8ToAuxInt(a + 0) + v.AddArg(x) + return true + } +} +func rewriteValueAMD64_OpDiffWithRoundWithPrecisionFloat64x4(v *Value) bool { + v_0 := v.Args[0] + // match: (DiffWithRoundWithPrecisionFloat64x4 [a] x) + // result: (VREDUCEPD256 [a+0] x) + for { + a := auxIntToInt8(v.AuxInt) + x := v_0 + v.reset(OpAMD64VREDUCEPD256) + v.AuxInt = int8ToAuxInt(a + 0) + v.AddArg(x) + return true + } +} +func rewriteValueAMD64_OpDiffWithRoundWithPrecisionFloat64x8(v *Value) bool { + v_0 := v.Args[0] + // match: (DiffWithRoundWithPrecisionFloat64x8 [a] x) + // result: (VREDUCEPD512 [a+0] x) + for { + a := auxIntToInt8(v.AuxInt) + x := v_0 + v.reset(OpAMD64VREDUCEPD512) + v.AuxInt = int8ToAuxInt(a + 0) + v.AddArg(x) + return true + } +} +func rewriteValueAMD64_OpDiffWithTruncSuppressExceptionWithPrecisionFloat32x16(v *Value) bool { + v_0 := v.Args[0] + // match: (DiffWithTruncSuppressExceptionWithPrecisionFloat32x16 [a] x) + // result: (VREDUCEPS512 [a+11] x) + for { + a := auxIntToInt8(v.AuxInt) + x := v_0 + v.reset(OpAMD64VREDUCEPS512) + v.AuxInt = int8ToAuxInt(a + 11) + v.AddArg(x) + return true + } +} +func rewriteValueAMD64_OpDiffWithTruncSuppressExceptionWithPrecisionFloat32x4(v *Value) bool { + v_0 := v.Args[0] + // match: (DiffWithTruncSuppressExceptionWithPrecisionFloat32x4 [a] x) + // result: (VREDUCEPS128 [a+11] x) + for { + a := auxIntToInt8(v.AuxInt) + x := v_0 + v.reset(OpAMD64VREDUCEPS128) + v.AuxInt = int8ToAuxInt(a + 11) + v.AddArg(x) + return true + } +} +func rewriteValueAMD64_OpDiffWithTruncSuppressExceptionWithPrecisionFloat32x8(v *Value) bool { + v_0 := v.Args[0] + // match: (DiffWithTruncSuppressExceptionWithPrecisionFloat32x8 [a] x) + // result: (VREDUCEPS256 [a+11] x) + for { + a := auxIntToInt8(v.AuxInt) + x := v_0 + v.reset(OpAMD64VREDUCEPS256) + v.AuxInt = int8ToAuxInt(a + 11) + v.AddArg(x) + return true + } +} +func rewriteValueAMD64_OpDiffWithTruncSuppressExceptionWithPrecisionFloat64x2(v *Value) bool { + v_0 := v.Args[0] + // match: (DiffWithTruncSuppressExceptionWithPrecisionFloat64x2 [a] x) + // result: (VREDUCEPD128 [a+11] x) + for { + a := auxIntToInt8(v.AuxInt) + x := v_0 + v.reset(OpAMD64VREDUCEPD128) + v.AuxInt = int8ToAuxInt(a + 11) + v.AddArg(x) + return true + } +} +func rewriteValueAMD64_OpDiffWithTruncSuppressExceptionWithPrecisionFloat64x4(v *Value) bool { + v_0 := v.Args[0] + // match: (DiffWithTruncSuppressExceptionWithPrecisionFloat64x4 [a] x) + // result: (VREDUCEPD256 [a+11] x) + for { + a := auxIntToInt8(v.AuxInt) + x := v_0 + v.reset(OpAMD64VREDUCEPD256) + v.AuxInt = int8ToAuxInt(a + 11) + v.AddArg(x) + return true + } +} +func rewriteValueAMD64_OpDiffWithTruncSuppressExceptionWithPrecisionFloat64x8(v *Value) bool { + v_0 := v.Args[0] + // match: (DiffWithTruncSuppressExceptionWithPrecisionFloat64x8 [a] x) + // result: (VREDUCEPD512 [a+11] x) + for { + a := auxIntToInt8(v.AuxInt) + x := v_0 + v.reset(OpAMD64VREDUCEPD512) + v.AuxInt = int8ToAuxInt(a + 11) + v.AddArg(x) + return true + } +} +func rewriteValueAMD64_OpDiffWithTruncWithPrecisionFloat32x16(v *Value) bool { + v_0 := v.Args[0] + // match: (DiffWithTruncWithPrecisionFloat32x16 [a] x) + // result: (VREDUCEPS512 [a+3] x) + for { + a := auxIntToInt8(v.AuxInt) + x := v_0 + v.reset(OpAMD64VREDUCEPS512) + v.AuxInt = int8ToAuxInt(a + 3) + v.AddArg(x) + return true + } +} +func rewriteValueAMD64_OpDiffWithTruncWithPrecisionFloat32x4(v *Value) bool { + v_0 := v.Args[0] + // match: (DiffWithTruncWithPrecisionFloat32x4 [a] x) + // result: (VREDUCEPS128 [a+3] x) + for { + a := auxIntToInt8(v.AuxInt) + x := v_0 + v.reset(OpAMD64VREDUCEPS128) + v.AuxInt = int8ToAuxInt(a + 3) + v.AddArg(x) + return true + } +} +func rewriteValueAMD64_OpDiffWithTruncWithPrecisionFloat32x8(v *Value) bool { + v_0 := v.Args[0] + // match: (DiffWithTruncWithPrecisionFloat32x8 [a] x) + // result: (VREDUCEPS256 [a+3] x) + for { + a := auxIntToInt8(v.AuxInt) + x := v_0 + v.reset(OpAMD64VREDUCEPS256) + v.AuxInt = int8ToAuxInt(a + 3) + v.AddArg(x) + return true + } +} +func rewriteValueAMD64_OpDiffWithTruncWithPrecisionFloat64x2(v *Value) bool { + v_0 := v.Args[0] + // match: (DiffWithTruncWithPrecisionFloat64x2 [a] x) + // result: (VREDUCEPD128 [a+3] x) + for { + a := auxIntToInt8(v.AuxInt) + x := v_0 + v.reset(OpAMD64VREDUCEPD128) + v.AuxInt = int8ToAuxInt(a + 3) + v.AddArg(x) + return true + } +} +func rewriteValueAMD64_OpDiffWithTruncWithPrecisionFloat64x4(v *Value) bool { + v_0 := v.Args[0] + // match: (DiffWithTruncWithPrecisionFloat64x4 [a] x) + // result: (VREDUCEPD256 [a+3] x) + for { + a := auxIntToInt8(v.AuxInt) + x := v_0 + v.reset(OpAMD64VREDUCEPD256) + v.AuxInt = int8ToAuxInt(a + 3) + v.AddArg(x) + return true + } +} +func rewriteValueAMD64_OpDiffWithTruncWithPrecisionFloat64x8(v *Value) bool { + v_0 := v.Args[0] + // match: (DiffWithTruncWithPrecisionFloat64x8 [a] x) + // result: (VREDUCEPD512 [a+3] x) + for { + a := auxIntToInt8(v.AuxInt) + x := v_0 + v.reset(OpAMD64VREDUCEPD512) + v.AuxInt = int8ToAuxInt(a + 3) + v.AddArg(x) + return true + } +} func rewriteValueAMD64_OpDiv16(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] @@ -28843,6 +30099,210 @@ func rewriteValueAMD64_OpFloor(v *Value) bool { return true } } +func rewriteValueAMD64_OpFloorFloat32x4(v *Value) bool { + v_0 := v.Args[0] + // match: (FloorFloat32x4 x) + // result: (VROUNDPS128 [1] x) + for { + x := v_0 + v.reset(OpAMD64VROUNDPS128) + v.AuxInt = int8ToAuxInt(1) + v.AddArg(x) + return true + } +} +func rewriteValueAMD64_OpFloorFloat32x8(v *Value) bool { + v_0 := v.Args[0] + // match: (FloorFloat32x8 x) + // result: (VROUNDPS256 [1] x) + for { + x := v_0 + v.reset(OpAMD64VROUNDPS256) + v.AuxInt = int8ToAuxInt(1) + v.AddArg(x) + return true + } +} +func rewriteValueAMD64_OpFloorFloat64x2(v *Value) bool { + v_0 := v.Args[0] + // match: (FloorFloat64x2 x) + // result: (VROUNDPD128 [1] x) + for { + x := v_0 + v.reset(OpAMD64VROUNDPD128) + v.AuxInt = int8ToAuxInt(1) + v.AddArg(x) + return true + } +} +func rewriteValueAMD64_OpFloorFloat64x4(v *Value) bool { + v_0 := v.Args[0] + // match: (FloorFloat64x4 x) + // result: (VROUNDPD256 [1] x) + for { + x := v_0 + v.reset(OpAMD64VROUNDPD256) + v.AuxInt = int8ToAuxInt(1) + v.AddArg(x) + return true + } +} +func rewriteValueAMD64_OpFloorSuppressExceptionWithPrecisionFloat32x16(v *Value) bool { + v_0 := v.Args[0] + // match: (FloorSuppressExceptionWithPrecisionFloat32x16 [a] x) + // result: (VRNDSCALEPS512 [a+9] x) + for { + a := auxIntToInt8(v.AuxInt) + x := v_0 + v.reset(OpAMD64VRNDSCALEPS512) + v.AuxInt = int8ToAuxInt(a + 9) + v.AddArg(x) + return true + } +} +func rewriteValueAMD64_OpFloorSuppressExceptionWithPrecisionFloat32x4(v *Value) bool { + v_0 := v.Args[0] + // match: (FloorSuppressExceptionWithPrecisionFloat32x4 [a] x) + // result: (VRNDSCALEPS128 [a+9] x) + for { + a := auxIntToInt8(v.AuxInt) + x := v_0 + v.reset(OpAMD64VRNDSCALEPS128) + v.AuxInt = int8ToAuxInt(a + 9) + v.AddArg(x) + return true + } +} +func rewriteValueAMD64_OpFloorSuppressExceptionWithPrecisionFloat32x8(v *Value) bool { + v_0 := v.Args[0] + // match: (FloorSuppressExceptionWithPrecisionFloat32x8 [a] x) + // result: (VRNDSCALEPS256 [a+9] x) + for { + a := auxIntToInt8(v.AuxInt) + x := v_0 + v.reset(OpAMD64VRNDSCALEPS256) + v.AuxInt = int8ToAuxInt(a + 9) + v.AddArg(x) + return true + } +} +func rewriteValueAMD64_OpFloorSuppressExceptionWithPrecisionFloat64x2(v *Value) bool { + v_0 := v.Args[0] + // match: (FloorSuppressExceptionWithPrecisionFloat64x2 [a] x) + // result: (VRNDSCALEPD128 [a+9] x) + for { + a := auxIntToInt8(v.AuxInt) + x := v_0 + v.reset(OpAMD64VRNDSCALEPD128) + v.AuxInt = int8ToAuxInt(a + 9) + v.AddArg(x) + return true + } +} +func rewriteValueAMD64_OpFloorSuppressExceptionWithPrecisionFloat64x4(v *Value) bool { + v_0 := v.Args[0] + // match: (FloorSuppressExceptionWithPrecisionFloat64x4 [a] x) + // result: (VRNDSCALEPD256 [a+9] x) + for { + a := auxIntToInt8(v.AuxInt) + x := v_0 + v.reset(OpAMD64VRNDSCALEPD256) + v.AuxInt = int8ToAuxInt(a + 9) + v.AddArg(x) + return true + } +} +func rewriteValueAMD64_OpFloorSuppressExceptionWithPrecisionFloat64x8(v *Value) bool { + v_0 := v.Args[0] + // match: (FloorSuppressExceptionWithPrecisionFloat64x8 [a] x) + // result: (VRNDSCALEPD512 [a+9] x) + for { + a := auxIntToInt8(v.AuxInt) + x := v_0 + v.reset(OpAMD64VRNDSCALEPD512) + v.AuxInt = int8ToAuxInt(a + 9) + v.AddArg(x) + return true + } +} +func rewriteValueAMD64_OpFloorWithPrecisionFloat32x16(v *Value) bool { + v_0 := v.Args[0] + // match: (FloorWithPrecisionFloat32x16 [a] x) + // result: (VRNDSCALEPS512 [a+1] x) + for { + a := auxIntToInt8(v.AuxInt) + x := v_0 + v.reset(OpAMD64VRNDSCALEPS512) + v.AuxInt = int8ToAuxInt(a + 1) + v.AddArg(x) + return true + } +} +func rewriteValueAMD64_OpFloorWithPrecisionFloat32x4(v *Value) bool { + v_0 := v.Args[0] + // match: (FloorWithPrecisionFloat32x4 [a] x) + // result: (VRNDSCALEPS128 [a+1] x) + for { + a := auxIntToInt8(v.AuxInt) + x := v_0 + v.reset(OpAMD64VRNDSCALEPS128) + v.AuxInt = int8ToAuxInt(a + 1) + v.AddArg(x) + return true + } +} +func rewriteValueAMD64_OpFloorWithPrecisionFloat32x8(v *Value) bool { + v_0 := v.Args[0] + // match: (FloorWithPrecisionFloat32x8 [a] x) + // result: (VRNDSCALEPS256 [a+1] x) + for { + a := auxIntToInt8(v.AuxInt) + x := v_0 + v.reset(OpAMD64VRNDSCALEPS256) + v.AuxInt = int8ToAuxInt(a + 1) + v.AddArg(x) + return true + } +} +func rewriteValueAMD64_OpFloorWithPrecisionFloat64x2(v *Value) bool { + v_0 := v.Args[0] + // match: (FloorWithPrecisionFloat64x2 [a] x) + // result: (VRNDSCALEPD128 [a+1] x) + for { + a := auxIntToInt8(v.AuxInt) + x := v_0 + v.reset(OpAMD64VRNDSCALEPD128) + v.AuxInt = int8ToAuxInt(a + 1) + v.AddArg(x) + return true + } +} +func rewriteValueAMD64_OpFloorWithPrecisionFloat64x4(v *Value) bool { + v_0 := v.Args[0] + // match: (FloorWithPrecisionFloat64x4 [a] x) + // result: (VRNDSCALEPD256 [a+1] x) + for { + a := auxIntToInt8(v.AuxInt) + x := v_0 + v.reset(OpAMD64VRNDSCALEPD256) + v.AuxInt = int8ToAuxInt(a + 1) + v.AddArg(x) + return true + } +} +func rewriteValueAMD64_OpFloorWithPrecisionFloat64x8(v *Value) bool { + v_0 := v.Args[0] + // match: (FloorWithPrecisionFloat64x8 [a] x) + // result: (VRNDSCALEPD512 [a+1] x) + for { + a := auxIntToInt8(v.AuxInt) + x := v_0 + v.reset(OpAMD64VRNDSCALEPD512) + v.AuxInt = int8ToAuxInt(a + 1) + v.AddArg(x) + return true + } +} func rewriteValueAMD64_OpGetG(v *Value) bool { v_0 := v.Args[0] // match: (GetG mem) @@ -33790,6 +35250,1086 @@ func rewriteValueAMD64_OpMaskedAverageUint8x64(v *Value) bool { return true } } +func rewriteValueAMD64_OpMaskedCeilSuppressExceptionWithPrecisionFloat32x16(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (MaskedCeilSuppressExceptionWithPrecisionFloat32x16 [a] x mask) + // result: (VRNDSCALEPSMasked512 [a+10] x (VPMOVVec32x16ToM mask)) + for { + a := auxIntToInt8(v.AuxInt) + x := v_0 + mask := v_1 + v.reset(OpAMD64VRNDSCALEPSMasked512) + v.AuxInt = int8ToAuxInt(a + 10) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg2(x, v0) + return true + } +} +func rewriteValueAMD64_OpMaskedCeilSuppressExceptionWithPrecisionFloat32x4(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (MaskedCeilSuppressExceptionWithPrecisionFloat32x4 [a] x mask) + // result: (VRNDSCALEPSMasked128 [a+10] x (VPMOVVec32x4ToM mask)) + for { + a := auxIntToInt8(v.AuxInt) + x := v_0 + mask := v_1 + v.reset(OpAMD64VRNDSCALEPSMasked128) + v.AuxInt = int8ToAuxInt(a + 10) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg2(x, v0) + return true + } +} +func rewriteValueAMD64_OpMaskedCeilSuppressExceptionWithPrecisionFloat32x8(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (MaskedCeilSuppressExceptionWithPrecisionFloat32x8 [a] x mask) + // result: (VRNDSCALEPSMasked256 [a+10] x (VPMOVVec32x8ToM mask)) + for { + a := auxIntToInt8(v.AuxInt) + x := v_0 + mask := v_1 + v.reset(OpAMD64VRNDSCALEPSMasked256) + v.AuxInt = int8ToAuxInt(a + 10) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg2(x, v0) + return true + } +} +func rewriteValueAMD64_OpMaskedCeilSuppressExceptionWithPrecisionFloat64x2(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (MaskedCeilSuppressExceptionWithPrecisionFloat64x2 [a] x mask) + // result: (VRNDSCALEPDMasked128 [a+10] x (VPMOVVec64x2ToM mask)) + for { + a := auxIntToInt8(v.AuxInt) + x := v_0 + mask := v_1 + v.reset(OpAMD64VRNDSCALEPDMasked128) + v.AuxInt = int8ToAuxInt(a + 10) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg2(x, v0) + return true + } +} +func rewriteValueAMD64_OpMaskedCeilSuppressExceptionWithPrecisionFloat64x4(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (MaskedCeilSuppressExceptionWithPrecisionFloat64x4 [a] x mask) + // result: (VRNDSCALEPDMasked256 [a+10] x (VPMOVVec64x4ToM mask)) + for { + a := auxIntToInt8(v.AuxInt) + x := v_0 + mask := v_1 + v.reset(OpAMD64VRNDSCALEPDMasked256) + v.AuxInt = int8ToAuxInt(a + 10) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg2(x, v0) + return true + } +} +func rewriteValueAMD64_OpMaskedCeilSuppressExceptionWithPrecisionFloat64x8(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (MaskedCeilSuppressExceptionWithPrecisionFloat64x8 [a] x mask) + // result: (VRNDSCALEPDMasked512 [a+10] x (VPMOVVec64x8ToM mask)) + for { + a := auxIntToInt8(v.AuxInt) + x := v_0 + mask := v_1 + v.reset(OpAMD64VRNDSCALEPDMasked512) + v.AuxInt = int8ToAuxInt(a + 10) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg2(x, v0) + return true + } +} +func rewriteValueAMD64_OpMaskedCeilWithPrecisionFloat32x16(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (MaskedCeilWithPrecisionFloat32x16 [a] x mask) + // result: (VRNDSCALEPSMasked512 [a+2] x (VPMOVVec32x16ToM mask)) + for { + a := auxIntToInt8(v.AuxInt) + x := v_0 + mask := v_1 + v.reset(OpAMD64VRNDSCALEPSMasked512) + v.AuxInt = int8ToAuxInt(a + 2) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg2(x, v0) + return true + } +} +func rewriteValueAMD64_OpMaskedCeilWithPrecisionFloat32x4(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (MaskedCeilWithPrecisionFloat32x4 [a] x mask) + // result: (VRNDSCALEPSMasked128 [a+2] x (VPMOVVec32x4ToM mask)) + for { + a := auxIntToInt8(v.AuxInt) + x := v_0 + mask := v_1 + v.reset(OpAMD64VRNDSCALEPSMasked128) + v.AuxInt = int8ToAuxInt(a + 2) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg2(x, v0) + return true + } +} +func rewriteValueAMD64_OpMaskedCeilWithPrecisionFloat32x8(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (MaskedCeilWithPrecisionFloat32x8 [a] x mask) + // result: (VRNDSCALEPSMasked256 [a+2] x (VPMOVVec32x8ToM mask)) + for { + a := auxIntToInt8(v.AuxInt) + x := v_0 + mask := v_1 + v.reset(OpAMD64VRNDSCALEPSMasked256) + v.AuxInt = int8ToAuxInt(a + 2) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg2(x, v0) + return true + } +} +func rewriteValueAMD64_OpMaskedCeilWithPrecisionFloat64x2(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (MaskedCeilWithPrecisionFloat64x2 [a] x mask) + // result: (VRNDSCALEPDMasked128 [a+2] x (VPMOVVec64x2ToM mask)) + for { + a := auxIntToInt8(v.AuxInt) + x := v_0 + mask := v_1 + v.reset(OpAMD64VRNDSCALEPDMasked128) + v.AuxInt = int8ToAuxInt(a + 2) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg2(x, v0) + return true + } +} +func rewriteValueAMD64_OpMaskedCeilWithPrecisionFloat64x4(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (MaskedCeilWithPrecisionFloat64x4 [a] x mask) + // result: (VRNDSCALEPDMasked256 [a+2] x (VPMOVVec64x4ToM mask)) + for { + a := auxIntToInt8(v.AuxInt) + x := v_0 + mask := v_1 + v.reset(OpAMD64VRNDSCALEPDMasked256) + v.AuxInt = int8ToAuxInt(a + 2) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg2(x, v0) + return true + } +} +func rewriteValueAMD64_OpMaskedCeilWithPrecisionFloat64x8(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (MaskedCeilWithPrecisionFloat64x8 [a] x mask) + // result: (VRNDSCALEPDMasked512 [a+2] x (VPMOVVec64x8ToM mask)) + for { + a := auxIntToInt8(v.AuxInt) + x := v_0 + mask := v_1 + v.reset(OpAMD64VRNDSCALEPDMasked512) + v.AuxInt = int8ToAuxInt(a + 2) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg2(x, v0) + return true + } +} +func rewriteValueAMD64_OpMaskedDiffWithCeilSuppressExceptionWithPrecisionFloat32x16(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (MaskedDiffWithCeilSuppressExceptionWithPrecisionFloat32x16 [a] x mask) + // result: (VREDUCEPSMasked512 [a+10] x (VPMOVVec32x16ToM mask)) + for { + a := auxIntToInt8(v.AuxInt) + x := v_0 + mask := v_1 + v.reset(OpAMD64VREDUCEPSMasked512) + v.AuxInt = int8ToAuxInt(a + 10) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg2(x, v0) + return true + } +} +func rewriteValueAMD64_OpMaskedDiffWithCeilSuppressExceptionWithPrecisionFloat32x4(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (MaskedDiffWithCeilSuppressExceptionWithPrecisionFloat32x4 [a] x mask) + // result: (VREDUCEPSMasked128 [a+10] x (VPMOVVec32x4ToM mask)) + for { + a := auxIntToInt8(v.AuxInt) + x := v_0 + mask := v_1 + v.reset(OpAMD64VREDUCEPSMasked128) + v.AuxInt = int8ToAuxInt(a + 10) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg2(x, v0) + return true + } +} +func rewriteValueAMD64_OpMaskedDiffWithCeilSuppressExceptionWithPrecisionFloat32x8(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (MaskedDiffWithCeilSuppressExceptionWithPrecisionFloat32x8 [a] x mask) + // result: (VREDUCEPSMasked256 [a+10] x (VPMOVVec32x8ToM mask)) + for { + a := auxIntToInt8(v.AuxInt) + x := v_0 + mask := v_1 + v.reset(OpAMD64VREDUCEPSMasked256) + v.AuxInt = int8ToAuxInt(a + 10) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg2(x, v0) + return true + } +} +func rewriteValueAMD64_OpMaskedDiffWithCeilSuppressExceptionWithPrecisionFloat64x2(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (MaskedDiffWithCeilSuppressExceptionWithPrecisionFloat64x2 [a] x mask) + // result: (VREDUCEPDMasked128 [a+10] x (VPMOVVec64x2ToM mask)) + for { + a := auxIntToInt8(v.AuxInt) + x := v_0 + mask := v_1 + v.reset(OpAMD64VREDUCEPDMasked128) + v.AuxInt = int8ToAuxInt(a + 10) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg2(x, v0) + return true + } +} +func rewriteValueAMD64_OpMaskedDiffWithCeilSuppressExceptionWithPrecisionFloat64x4(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (MaskedDiffWithCeilSuppressExceptionWithPrecisionFloat64x4 [a] x mask) + // result: (VREDUCEPDMasked256 [a+10] x (VPMOVVec64x4ToM mask)) + for { + a := auxIntToInt8(v.AuxInt) + x := v_0 + mask := v_1 + v.reset(OpAMD64VREDUCEPDMasked256) + v.AuxInt = int8ToAuxInt(a + 10) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg2(x, v0) + return true + } +} +func rewriteValueAMD64_OpMaskedDiffWithCeilSuppressExceptionWithPrecisionFloat64x8(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (MaskedDiffWithCeilSuppressExceptionWithPrecisionFloat64x8 [a] x mask) + // result: (VREDUCEPDMasked512 [a+10] x (VPMOVVec64x8ToM mask)) + for { + a := auxIntToInt8(v.AuxInt) + x := v_0 + mask := v_1 + v.reset(OpAMD64VREDUCEPDMasked512) + v.AuxInt = int8ToAuxInt(a + 10) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg2(x, v0) + return true + } +} +func rewriteValueAMD64_OpMaskedDiffWithCeilWithPrecisionFloat32x16(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (MaskedDiffWithCeilWithPrecisionFloat32x16 [a] x mask) + // result: (VREDUCEPSMasked512 [a+2] x (VPMOVVec32x16ToM mask)) + for { + a := auxIntToInt8(v.AuxInt) + x := v_0 + mask := v_1 + v.reset(OpAMD64VREDUCEPSMasked512) + v.AuxInt = int8ToAuxInt(a + 2) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg2(x, v0) + return true + } +} +func rewriteValueAMD64_OpMaskedDiffWithCeilWithPrecisionFloat32x4(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (MaskedDiffWithCeilWithPrecisionFloat32x4 [a] x mask) + // result: (VREDUCEPSMasked128 [a+2] x (VPMOVVec32x4ToM mask)) + for { + a := auxIntToInt8(v.AuxInt) + x := v_0 + mask := v_1 + v.reset(OpAMD64VREDUCEPSMasked128) + v.AuxInt = int8ToAuxInt(a + 2) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg2(x, v0) + return true + } +} +func rewriteValueAMD64_OpMaskedDiffWithCeilWithPrecisionFloat32x8(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (MaskedDiffWithCeilWithPrecisionFloat32x8 [a] x mask) + // result: (VREDUCEPSMasked256 [a+2] x (VPMOVVec32x8ToM mask)) + for { + a := auxIntToInt8(v.AuxInt) + x := v_0 + mask := v_1 + v.reset(OpAMD64VREDUCEPSMasked256) + v.AuxInt = int8ToAuxInt(a + 2) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg2(x, v0) + return true + } +} +func rewriteValueAMD64_OpMaskedDiffWithCeilWithPrecisionFloat64x2(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (MaskedDiffWithCeilWithPrecisionFloat64x2 [a] x mask) + // result: (VREDUCEPDMasked128 [a+2] x (VPMOVVec64x2ToM mask)) + for { + a := auxIntToInt8(v.AuxInt) + x := v_0 + mask := v_1 + v.reset(OpAMD64VREDUCEPDMasked128) + v.AuxInt = int8ToAuxInt(a + 2) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg2(x, v0) + return true + } +} +func rewriteValueAMD64_OpMaskedDiffWithCeilWithPrecisionFloat64x4(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (MaskedDiffWithCeilWithPrecisionFloat64x4 [a] x mask) + // result: (VREDUCEPDMasked256 [a+2] x (VPMOVVec64x4ToM mask)) + for { + a := auxIntToInt8(v.AuxInt) + x := v_0 + mask := v_1 + v.reset(OpAMD64VREDUCEPDMasked256) + v.AuxInt = int8ToAuxInt(a + 2) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg2(x, v0) + return true + } +} +func rewriteValueAMD64_OpMaskedDiffWithCeilWithPrecisionFloat64x8(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (MaskedDiffWithCeilWithPrecisionFloat64x8 [a] x mask) + // result: (VREDUCEPDMasked512 [a+2] x (VPMOVVec64x8ToM mask)) + for { + a := auxIntToInt8(v.AuxInt) + x := v_0 + mask := v_1 + v.reset(OpAMD64VREDUCEPDMasked512) + v.AuxInt = int8ToAuxInt(a + 2) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg2(x, v0) + return true + } +} +func rewriteValueAMD64_OpMaskedDiffWithFloorSuppressExceptionWithPrecisionFloat32x16(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (MaskedDiffWithFloorSuppressExceptionWithPrecisionFloat32x16 [a] x mask) + // result: (VREDUCEPSMasked512 [a+9] x (VPMOVVec32x16ToM mask)) + for { + a := auxIntToInt8(v.AuxInt) + x := v_0 + mask := v_1 + v.reset(OpAMD64VREDUCEPSMasked512) + v.AuxInt = int8ToAuxInt(a + 9) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg2(x, v0) + return true + } +} +func rewriteValueAMD64_OpMaskedDiffWithFloorSuppressExceptionWithPrecisionFloat32x4(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (MaskedDiffWithFloorSuppressExceptionWithPrecisionFloat32x4 [a] x mask) + // result: (VREDUCEPSMasked128 [a+9] x (VPMOVVec32x4ToM mask)) + for { + a := auxIntToInt8(v.AuxInt) + x := v_0 + mask := v_1 + v.reset(OpAMD64VREDUCEPSMasked128) + v.AuxInt = int8ToAuxInt(a + 9) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg2(x, v0) + return true + } +} +func rewriteValueAMD64_OpMaskedDiffWithFloorSuppressExceptionWithPrecisionFloat32x8(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (MaskedDiffWithFloorSuppressExceptionWithPrecisionFloat32x8 [a] x mask) + // result: (VREDUCEPSMasked256 [a+9] x (VPMOVVec32x8ToM mask)) + for { + a := auxIntToInt8(v.AuxInt) + x := v_0 + mask := v_1 + v.reset(OpAMD64VREDUCEPSMasked256) + v.AuxInt = int8ToAuxInt(a + 9) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg2(x, v0) + return true + } +} +func rewriteValueAMD64_OpMaskedDiffWithFloorSuppressExceptionWithPrecisionFloat64x2(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (MaskedDiffWithFloorSuppressExceptionWithPrecisionFloat64x2 [a] x mask) + // result: (VREDUCEPDMasked128 [a+9] x (VPMOVVec64x2ToM mask)) + for { + a := auxIntToInt8(v.AuxInt) + x := v_0 + mask := v_1 + v.reset(OpAMD64VREDUCEPDMasked128) + v.AuxInt = int8ToAuxInt(a + 9) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg2(x, v0) + return true + } +} +func rewriteValueAMD64_OpMaskedDiffWithFloorSuppressExceptionWithPrecisionFloat64x4(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (MaskedDiffWithFloorSuppressExceptionWithPrecisionFloat64x4 [a] x mask) + // result: (VREDUCEPDMasked256 [a+9] x (VPMOVVec64x4ToM mask)) + for { + a := auxIntToInt8(v.AuxInt) + x := v_0 + mask := v_1 + v.reset(OpAMD64VREDUCEPDMasked256) + v.AuxInt = int8ToAuxInt(a + 9) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg2(x, v0) + return true + } +} +func rewriteValueAMD64_OpMaskedDiffWithFloorSuppressExceptionWithPrecisionFloat64x8(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (MaskedDiffWithFloorSuppressExceptionWithPrecisionFloat64x8 [a] x mask) + // result: (VREDUCEPDMasked512 [a+9] x (VPMOVVec64x8ToM mask)) + for { + a := auxIntToInt8(v.AuxInt) + x := v_0 + mask := v_1 + v.reset(OpAMD64VREDUCEPDMasked512) + v.AuxInt = int8ToAuxInt(a + 9) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg2(x, v0) + return true + } +} +func rewriteValueAMD64_OpMaskedDiffWithFloorWithPrecisionFloat32x16(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (MaskedDiffWithFloorWithPrecisionFloat32x16 [a] x mask) + // result: (VREDUCEPSMasked512 [a+1] x (VPMOVVec32x16ToM mask)) + for { + a := auxIntToInt8(v.AuxInt) + x := v_0 + mask := v_1 + v.reset(OpAMD64VREDUCEPSMasked512) + v.AuxInt = int8ToAuxInt(a + 1) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg2(x, v0) + return true + } +} +func rewriteValueAMD64_OpMaskedDiffWithFloorWithPrecisionFloat32x4(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (MaskedDiffWithFloorWithPrecisionFloat32x4 [a] x mask) + // result: (VREDUCEPSMasked128 [a+1] x (VPMOVVec32x4ToM mask)) + for { + a := auxIntToInt8(v.AuxInt) + x := v_0 + mask := v_1 + v.reset(OpAMD64VREDUCEPSMasked128) + v.AuxInt = int8ToAuxInt(a + 1) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg2(x, v0) + return true + } +} +func rewriteValueAMD64_OpMaskedDiffWithFloorWithPrecisionFloat32x8(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (MaskedDiffWithFloorWithPrecisionFloat32x8 [a] x mask) + // result: (VREDUCEPSMasked256 [a+1] x (VPMOVVec32x8ToM mask)) + for { + a := auxIntToInt8(v.AuxInt) + x := v_0 + mask := v_1 + v.reset(OpAMD64VREDUCEPSMasked256) + v.AuxInt = int8ToAuxInt(a + 1) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg2(x, v0) + return true + } +} +func rewriteValueAMD64_OpMaskedDiffWithFloorWithPrecisionFloat64x2(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (MaskedDiffWithFloorWithPrecisionFloat64x2 [a] x mask) + // result: (VREDUCEPDMasked128 [a+1] x (VPMOVVec64x2ToM mask)) + for { + a := auxIntToInt8(v.AuxInt) + x := v_0 + mask := v_1 + v.reset(OpAMD64VREDUCEPDMasked128) + v.AuxInt = int8ToAuxInt(a + 1) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg2(x, v0) + return true + } +} +func rewriteValueAMD64_OpMaskedDiffWithFloorWithPrecisionFloat64x4(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (MaskedDiffWithFloorWithPrecisionFloat64x4 [a] x mask) + // result: (VREDUCEPDMasked256 [a+1] x (VPMOVVec64x4ToM mask)) + for { + a := auxIntToInt8(v.AuxInt) + x := v_0 + mask := v_1 + v.reset(OpAMD64VREDUCEPDMasked256) + v.AuxInt = int8ToAuxInt(a + 1) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg2(x, v0) + return true + } +} +func rewriteValueAMD64_OpMaskedDiffWithFloorWithPrecisionFloat64x8(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (MaskedDiffWithFloorWithPrecisionFloat64x8 [a] x mask) + // result: (VREDUCEPDMasked512 [a+1] x (VPMOVVec64x8ToM mask)) + for { + a := auxIntToInt8(v.AuxInt) + x := v_0 + mask := v_1 + v.reset(OpAMD64VREDUCEPDMasked512) + v.AuxInt = int8ToAuxInt(a + 1) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg2(x, v0) + return true + } +} +func rewriteValueAMD64_OpMaskedDiffWithRoundSuppressExceptionWithPrecisionFloat32x16(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (MaskedDiffWithRoundSuppressExceptionWithPrecisionFloat32x16 [a] x mask) + // result: (VREDUCEPSMasked512 [a+8] x (VPMOVVec32x16ToM mask)) + for { + a := auxIntToInt8(v.AuxInt) + x := v_0 + mask := v_1 + v.reset(OpAMD64VREDUCEPSMasked512) + v.AuxInt = int8ToAuxInt(a + 8) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg2(x, v0) + return true + } +} +func rewriteValueAMD64_OpMaskedDiffWithRoundSuppressExceptionWithPrecisionFloat32x4(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (MaskedDiffWithRoundSuppressExceptionWithPrecisionFloat32x4 [a] x mask) + // result: (VREDUCEPSMasked128 [a+8] x (VPMOVVec32x4ToM mask)) + for { + a := auxIntToInt8(v.AuxInt) + x := v_0 + mask := v_1 + v.reset(OpAMD64VREDUCEPSMasked128) + v.AuxInt = int8ToAuxInt(a + 8) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg2(x, v0) + return true + } +} +func rewriteValueAMD64_OpMaskedDiffWithRoundSuppressExceptionWithPrecisionFloat32x8(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (MaskedDiffWithRoundSuppressExceptionWithPrecisionFloat32x8 [a] x mask) + // result: (VREDUCEPSMasked256 [a+8] x (VPMOVVec32x8ToM mask)) + for { + a := auxIntToInt8(v.AuxInt) + x := v_0 + mask := v_1 + v.reset(OpAMD64VREDUCEPSMasked256) + v.AuxInt = int8ToAuxInt(a + 8) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg2(x, v0) + return true + } +} +func rewriteValueAMD64_OpMaskedDiffWithRoundSuppressExceptionWithPrecisionFloat64x2(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (MaskedDiffWithRoundSuppressExceptionWithPrecisionFloat64x2 [a] x mask) + // result: (VREDUCEPDMasked128 [a+8] x (VPMOVVec64x2ToM mask)) + for { + a := auxIntToInt8(v.AuxInt) + x := v_0 + mask := v_1 + v.reset(OpAMD64VREDUCEPDMasked128) + v.AuxInt = int8ToAuxInt(a + 8) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg2(x, v0) + return true + } +} +func rewriteValueAMD64_OpMaskedDiffWithRoundSuppressExceptionWithPrecisionFloat64x4(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (MaskedDiffWithRoundSuppressExceptionWithPrecisionFloat64x4 [a] x mask) + // result: (VREDUCEPDMasked256 [a+8] x (VPMOVVec64x4ToM mask)) + for { + a := auxIntToInt8(v.AuxInt) + x := v_0 + mask := v_1 + v.reset(OpAMD64VREDUCEPDMasked256) + v.AuxInt = int8ToAuxInt(a + 8) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg2(x, v0) + return true + } +} +func rewriteValueAMD64_OpMaskedDiffWithRoundSuppressExceptionWithPrecisionFloat64x8(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (MaskedDiffWithRoundSuppressExceptionWithPrecisionFloat64x8 [a] x mask) + // result: (VREDUCEPDMasked512 [a+8] x (VPMOVVec64x8ToM mask)) + for { + a := auxIntToInt8(v.AuxInt) + x := v_0 + mask := v_1 + v.reset(OpAMD64VREDUCEPDMasked512) + v.AuxInt = int8ToAuxInt(a + 8) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg2(x, v0) + return true + } +} +func rewriteValueAMD64_OpMaskedDiffWithRoundWithPrecisionFloat32x16(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (MaskedDiffWithRoundWithPrecisionFloat32x16 [a] x mask) + // result: (VREDUCEPSMasked512 [a+0] x (VPMOVVec32x16ToM mask)) + for { + a := auxIntToInt8(v.AuxInt) + x := v_0 + mask := v_1 + v.reset(OpAMD64VREDUCEPSMasked512) + v.AuxInt = int8ToAuxInt(a + 0) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg2(x, v0) + return true + } +} +func rewriteValueAMD64_OpMaskedDiffWithRoundWithPrecisionFloat32x4(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (MaskedDiffWithRoundWithPrecisionFloat32x4 [a] x mask) + // result: (VREDUCEPSMasked128 [a+0] x (VPMOVVec32x4ToM mask)) + for { + a := auxIntToInt8(v.AuxInt) + x := v_0 + mask := v_1 + v.reset(OpAMD64VREDUCEPSMasked128) + v.AuxInt = int8ToAuxInt(a + 0) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg2(x, v0) + return true + } +} +func rewriteValueAMD64_OpMaskedDiffWithRoundWithPrecisionFloat32x8(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (MaskedDiffWithRoundWithPrecisionFloat32x8 [a] x mask) + // result: (VREDUCEPSMasked256 [a+0] x (VPMOVVec32x8ToM mask)) + for { + a := auxIntToInt8(v.AuxInt) + x := v_0 + mask := v_1 + v.reset(OpAMD64VREDUCEPSMasked256) + v.AuxInt = int8ToAuxInt(a + 0) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg2(x, v0) + return true + } +} +func rewriteValueAMD64_OpMaskedDiffWithRoundWithPrecisionFloat64x2(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (MaskedDiffWithRoundWithPrecisionFloat64x2 [a] x mask) + // result: (VREDUCEPDMasked128 [a+0] x (VPMOVVec64x2ToM mask)) + for { + a := auxIntToInt8(v.AuxInt) + x := v_0 + mask := v_1 + v.reset(OpAMD64VREDUCEPDMasked128) + v.AuxInt = int8ToAuxInt(a + 0) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg2(x, v0) + return true + } +} +func rewriteValueAMD64_OpMaskedDiffWithRoundWithPrecisionFloat64x4(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (MaskedDiffWithRoundWithPrecisionFloat64x4 [a] x mask) + // result: (VREDUCEPDMasked256 [a+0] x (VPMOVVec64x4ToM mask)) + for { + a := auxIntToInt8(v.AuxInt) + x := v_0 + mask := v_1 + v.reset(OpAMD64VREDUCEPDMasked256) + v.AuxInt = int8ToAuxInt(a + 0) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg2(x, v0) + return true + } +} +func rewriteValueAMD64_OpMaskedDiffWithRoundWithPrecisionFloat64x8(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (MaskedDiffWithRoundWithPrecisionFloat64x8 [a] x mask) + // result: (VREDUCEPDMasked512 [a+0] x (VPMOVVec64x8ToM mask)) + for { + a := auxIntToInt8(v.AuxInt) + x := v_0 + mask := v_1 + v.reset(OpAMD64VREDUCEPDMasked512) + v.AuxInt = int8ToAuxInt(a + 0) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg2(x, v0) + return true + } +} +func rewriteValueAMD64_OpMaskedDiffWithTruncSuppressExceptionWithPrecisionFloat32x16(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (MaskedDiffWithTruncSuppressExceptionWithPrecisionFloat32x16 [a] x mask) + // result: (VREDUCEPSMasked512 [a+11] x (VPMOVVec32x16ToM mask)) + for { + a := auxIntToInt8(v.AuxInt) + x := v_0 + mask := v_1 + v.reset(OpAMD64VREDUCEPSMasked512) + v.AuxInt = int8ToAuxInt(a + 11) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg2(x, v0) + return true + } +} +func rewriteValueAMD64_OpMaskedDiffWithTruncSuppressExceptionWithPrecisionFloat32x4(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (MaskedDiffWithTruncSuppressExceptionWithPrecisionFloat32x4 [a] x mask) + // result: (VREDUCEPSMasked128 [a+11] x (VPMOVVec32x4ToM mask)) + for { + a := auxIntToInt8(v.AuxInt) + x := v_0 + mask := v_1 + v.reset(OpAMD64VREDUCEPSMasked128) + v.AuxInt = int8ToAuxInt(a + 11) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg2(x, v0) + return true + } +} +func rewriteValueAMD64_OpMaskedDiffWithTruncSuppressExceptionWithPrecisionFloat32x8(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (MaskedDiffWithTruncSuppressExceptionWithPrecisionFloat32x8 [a] x mask) + // result: (VREDUCEPSMasked256 [a+11] x (VPMOVVec32x8ToM mask)) + for { + a := auxIntToInt8(v.AuxInt) + x := v_0 + mask := v_1 + v.reset(OpAMD64VREDUCEPSMasked256) + v.AuxInt = int8ToAuxInt(a + 11) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg2(x, v0) + return true + } +} +func rewriteValueAMD64_OpMaskedDiffWithTruncSuppressExceptionWithPrecisionFloat64x2(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (MaskedDiffWithTruncSuppressExceptionWithPrecisionFloat64x2 [a] x mask) + // result: (VREDUCEPDMasked128 [a+11] x (VPMOVVec64x2ToM mask)) + for { + a := auxIntToInt8(v.AuxInt) + x := v_0 + mask := v_1 + v.reset(OpAMD64VREDUCEPDMasked128) + v.AuxInt = int8ToAuxInt(a + 11) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg2(x, v0) + return true + } +} +func rewriteValueAMD64_OpMaskedDiffWithTruncSuppressExceptionWithPrecisionFloat64x4(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (MaskedDiffWithTruncSuppressExceptionWithPrecisionFloat64x4 [a] x mask) + // result: (VREDUCEPDMasked256 [a+11] x (VPMOVVec64x4ToM mask)) + for { + a := auxIntToInt8(v.AuxInt) + x := v_0 + mask := v_1 + v.reset(OpAMD64VREDUCEPDMasked256) + v.AuxInt = int8ToAuxInt(a + 11) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg2(x, v0) + return true + } +} +func rewriteValueAMD64_OpMaskedDiffWithTruncSuppressExceptionWithPrecisionFloat64x8(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (MaskedDiffWithTruncSuppressExceptionWithPrecisionFloat64x8 [a] x mask) + // result: (VREDUCEPDMasked512 [a+11] x (VPMOVVec64x8ToM mask)) + for { + a := auxIntToInt8(v.AuxInt) + x := v_0 + mask := v_1 + v.reset(OpAMD64VREDUCEPDMasked512) + v.AuxInt = int8ToAuxInt(a + 11) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg2(x, v0) + return true + } +} +func rewriteValueAMD64_OpMaskedDiffWithTruncWithPrecisionFloat32x16(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (MaskedDiffWithTruncWithPrecisionFloat32x16 [a] x mask) + // result: (VREDUCEPSMasked512 [a+3] x (VPMOVVec32x16ToM mask)) + for { + a := auxIntToInt8(v.AuxInt) + x := v_0 + mask := v_1 + v.reset(OpAMD64VREDUCEPSMasked512) + v.AuxInt = int8ToAuxInt(a + 3) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg2(x, v0) + return true + } +} +func rewriteValueAMD64_OpMaskedDiffWithTruncWithPrecisionFloat32x4(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (MaskedDiffWithTruncWithPrecisionFloat32x4 [a] x mask) + // result: (VREDUCEPSMasked128 [a+3] x (VPMOVVec32x4ToM mask)) + for { + a := auxIntToInt8(v.AuxInt) + x := v_0 + mask := v_1 + v.reset(OpAMD64VREDUCEPSMasked128) + v.AuxInt = int8ToAuxInt(a + 3) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg2(x, v0) + return true + } +} +func rewriteValueAMD64_OpMaskedDiffWithTruncWithPrecisionFloat32x8(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (MaskedDiffWithTruncWithPrecisionFloat32x8 [a] x mask) + // result: (VREDUCEPSMasked256 [a+3] x (VPMOVVec32x8ToM mask)) + for { + a := auxIntToInt8(v.AuxInt) + x := v_0 + mask := v_1 + v.reset(OpAMD64VREDUCEPSMasked256) + v.AuxInt = int8ToAuxInt(a + 3) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg2(x, v0) + return true + } +} +func rewriteValueAMD64_OpMaskedDiffWithTruncWithPrecisionFloat64x2(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (MaskedDiffWithTruncWithPrecisionFloat64x2 [a] x mask) + // result: (VREDUCEPDMasked128 [a+3] x (VPMOVVec64x2ToM mask)) + for { + a := auxIntToInt8(v.AuxInt) + x := v_0 + mask := v_1 + v.reset(OpAMD64VREDUCEPDMasked128) + v.AuxInt = int8ToAuxInt(a + 3) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg2(x, v0) + return true + } +} +func rewriteValueAMD64_OpMaskedDiffWithTruncWithPrecisionFloat64x4(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (MaskedDiffWithTruncWithPrecisionFloat64x4 [a] x mask) + // result: (VREDUCEPDMasked256 [a+3] x (VPMOVVec64x4ToM mask)) + for { + a := auxIntToInt8(v.AuxInt) + x := v_0 + mask := v_1 + v.reset(OpAMD64VREDUCEPDMasked256) + v.AuxInt = int8ToAuxInt(a + 3) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg2(x, v0) + return true + } +} +func rewriteValueAMD64_OpMaskedDiffWithTruncWithPrecisionFloat64x8(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (MaskedDiffWithTruncWithPrecisionFloat64x8 [a] x mask) + // result: (VREDUCEPDMasked512 [a+3] x (VPMOVVec64x8ToM mask)) + for { + a := auxIntToInt8(v.AuxInt) + x := v_0 + mask := v_1 + v.reset(OpAMD64VREDUCEPDMasked512) + v.AuxInt = int8ToAuxInt(a + 3) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg2(x, v0) + return true + } +} func rewriteValueAMD64_OpMaskedDivFloat32x16(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] @@ -34546,6 +37086,222 @@ func rewriteValueAMD64_OpMaskedEqualUint8x64(v *Value) bool { return true } } +func rewriteValueAMD64_OpMaskedFloorSuppressExceptionWithPrecisionFloat32x16(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (MaskedFloorSuppressExceptionWithPrecisionFloat32x16 [a] x mask) + // result: (VRNDSCALEPSMasked512 [a+9] x (VPMOVVec32x16ToM mask)) + for { + a := auxIntToInt8(v.AuxInt) + x := v_0 + mask := v_1 + v.reset(OpAMD64VRNDSCALEPSMasked512) + v.AuxInt = int8ToAuxInt(a + 9) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg2(x, v0) + return true + } +} +func rewriteValueAMD64_OpMaskedFloorSuppressExceptionWithPrecisionFloat32x4(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (MaskedFloorSuppressExceptionWithPrecisionFloat32x4 [a] x mask) + // result: (VRNDSCALEPSMasked128 [a+9] x (VPMOVVec32x4ToM mask)) + for { + a := auxIntToInt8(v.AuxInt) + x := v_0 + mask := v_1 + v.reset(OpAMD64VRNDSCALEPSMasked128) + v.AuxInt = int8ToAuxInt(a + 9) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg2(x, v0) + return true + } +} +func rewriteValueAMD64_OpMaskedFloorSuppressExceptionWithPrecisionFloat32x8(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (MaskedFloorSuppressExceptionWithPrecisionFloat32x8 [a] x mask) + // result: (VRNDSCALEPSMasked256 [a+9] x (VPMOVVec32x8ToM mask)) + for { + a := auxIntToInt8(v.AuxInt) + x := v_0 + mask := v_1 + v.reset(OpAMD64VRNDSCALEPSMasked256) + v.AuxInt = int8ToAuxInt(a + 9) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg2(x, v0) + return true + } +} +func rewriteValueAMD64_OpMaskedFloorSuppressExceptionWithPrecisionFloat64x2(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (MaskedFloorSuppressExceptionWithPrecisionFloat64x2 [a] x mask) + // result: (VRNDSCALEPDMasked128 [a+9] x (VPMOVVec64x2ToM mask)) + for { + a := auxIntToInt8(v.AuxInt) + x := v_0 + mask := v_1 + v.reset(OpAMD64VRNDSCALEPDMasked128) + v.AuxInt = int8ToAuxInt(a + 9) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg2(x, v0) + return true + } +} +func rewriteValueAMD64_OpMaskedFloorSuppressExceptionWithPrecisionFloat64x4(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (MaskedFloorSuppressExceptionWithPrecisionFloat64x4 [a] x mask) + // result: (VRNDSCALEPDMasked256 [a+9] x (VPMOVVec64x4ToM mask)) + for { + a := auxIntToInt8(v.AuxInt) + x := v_0 + mask := v_1 + v.reset(OpAMD64VRNDSCALEPDMasked256) + v.AuxInt = int8ToAuxInt(a + 9) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg2(x, v0) + return true + } +} +func rewriteValueAMD64_OpMaskedFloorSuppressExceptionWithPrecisionFloat64x8(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (MaskedFloorSuppressExceptionWithPrecisionFloat64x8 [a] x mask) + // result: (VRNDSCALEPDMasked512 [a+9] x (VPMOVVec64x8ToM mask)) + for { + a := auxIntToInt8(v.AuxInt) + x := v_0 + mask := v_1 + v.reset(OpAMD64VRNDSCALEPDMasked512) + v.AuxInt = int8ToAuxInt(a + 9) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg2(x, v0) + return true + } +} +func rewriteValueAMD64_OpMaskedFloorWithPrecisionFloat32x16(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (MaskedFloorWithPrecisionFloat32x16 [a] x mask) + // result: (VRNDSCALEPSMasked512 [a+1] x (VPMOVVec32x16ToM mask)) + for { + a := auxIntToInt8(v.AuxInt) + x := v_0 + mask := v_1 + v.reset(OpAMD64VRNDSCALEPSMasked512) + v.AuxInt = int8ToAuxInt(a + 1) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg2(x, v0) + return true + } +} +func rewriteValueAMD64_OpMaskedFloorWithPrecisionFloat32x4(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (MaskedFloorWithPrecisionFloat32x4 [a] x mask) + // result: (VRNDSCALEPSMasked128 [a+1] x (VPMOVVec32x4ToM mask)) + for { + a := auxIntToInt8(v.AuxInt) + x := v_0 + mask := v_1 + v.reset(OpAMD64VRNDSCALEPSMasked128) + v.AuxInt = int8ToAuxInt(a + 1) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg2(x, v0) + return true + } +} +func rewriteValueAMD64_OpMaskedFloorWithPrecisionFloat32x8(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (MaskedFloorWithPrecisionFloat32x8 [a] x mask) + // result: (VRNDSCALEPSMasked256 [a+1] x (VPMOVVec32x8ToM mask)) + for { + a := auxIntToInt8(v.AuxInt) + x := v_0 + mask := v_1 + v.reset(OpAMD64VRNDSCALEPSMasked256) + v.AuxInt = int8ToAuxInt(a + 1) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg2(x, v0) + return true + } +} +func rewriteValueAMD64_OpMaskedFloorWithPrecisionFloat64x2(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (MaskedFloorWithPrecisionFloat64x2 [a] x mask) + // result: (VRNDSCALEPDMasked128 [a+1] x (VPMOVVec64x2ToM mask)) + for { + a := auxIntToInt8(v.AuxInt) + x := v_0 + mask := v_1 + v.reset(OpAMD64VRNDSCALEPDMasked128) + v.AuxInt = int8ToAuxInt(a + 1) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg2(x, v0) + return true + } +} +func rewriteValueAMD64_OpMaskedFloorWithPrecisionFloat64x4(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (MaskedFloorWithPrecisionFloat64x4 [a] x mask) + // result: (VRNDSCALEPDMasked256 [a+1] x (VPMOVVec64x4ToM mask)) + for { + a := auxIntToInt8(v.AuxInt) + x := v_0 + mask := v_1 + v.reset(OpAMD64VRNDSCALEPDMasked256) + v.AuxInt = int8ToAuxInt(a + 1) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg2(x, v0) + return true + } +} +func rewriteValueAMD64_OpMaskedFloorWithPrecisionFloat64x8(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (MaskedFloorWithPrecisionFloat64x8 [a] x mask) + // result: (VRNDSCALEPDMasked512 [a+1] x (VPMOVVec64x8ToM mask)) + for { + a := auxIntToInt8(v.AuxInt) + x := v_0 + mask := v_1 + v.reset(OpAMD64VRNDSCALEPDMasked512) + v.AuxInt = int8ToAuxInt(a + 1) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg2(x, v0) + return true + } +} func rewriteValueAMD64_OpMaskedGreaterEqualFloat32x16(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] @@ -40348,6 +43104,222 @@ func rewriteValueAMD64_OpMaskedPopCountUint8x64(v *Value) bool { return true } } +func rewriteValueAMD64_OpMaskedRoundSuppressExceptionWithPrecisionFloat32x16(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (MaskedRoundSuppressExceptionWithPrecisionFloat32x16 [a] x mask) + // result: (VRNDSCALEPSMasked512 [a+8] x (VPMOVVec32x16ToM mask)) + for { + a := auxIntToInt8(v.AuxInt) + x := v_0 + mask := v_1 + v.reset(OpAMD64VRNDSCALEPSMasked512) + v.AuxInt = int8ToAuxInt(a + 8) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg2(x, v0) + return true + } +} +func rewriteValueAMD64_OpMaskedRoundSuppressExceptionWithPrecisionFloat32x4(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (MaskedRoundSuppressExceptionWithPrecisionFloat32x4 [a] x mask) + // result: (VRNDSCALEPSMasked128 [a+8] x (VPMOVVec32x4ToM mask)) + for { + a := auxIntToInt8(v.AuxInt) + x := v_0 + mask := v_1 + v.reset(OpAMD64VRNDSCALEPSMasked128) + v.AuxInt = int8ToAuxInt(a + 8) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg2(x, v0) + return true + } +} +func rewriteValueAMD64_OpMaskedRoundSuppressExceptionWithPrecisionFloat32x8(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (MaskedRoundSuppressExceptionWithPrecisionFloat32x8 [a] x mask) + // result: (VRNDSCALEPSMasked256 [a+8] x (VPMOVVec32x8ToM mask)) + for { + a := auxIntToInt8(v.AuxInt) + x := v_0 + mask := v_1 + v.reset(OpAMD64VRNDSCALEPSMasked256) + v.AuxInt = int8ToAuxInt(a + 8) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg2(x, v0) + return true + } +} +func rewriteValueAMD64_OpMaskedRoundSuppressExceptionWithPrecisionFloat64x2(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (MaskedRoundSuppressExceptionWithPrecisionFloat64x2 [a] x mask) + // result: (VRNDSCALEPDMasked128 [a+8] x (VPMOVVec64x2ToM mask)) + for { + a := auxIntToInt8(v.AuxInt) + x := v_0 + mask := v_1 + v.reset(OpAMD64VRNDSCALEPDMasked128) + v.AuxInt = int8ToAuxInt(a + 8) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg2(x, v0) + return true + } +} +func rewriteValueAMD64_OpMaskedRoundSuppressExceptionWithPrecisionFloat64x4(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (MaskedRoundSuppressExceptionWithPrecisionFloat64x4 [a] x mask) + // result: (VRNDSCALEPDMasked256 [a+8] x (VPMOVVec64x4ToM mask)) + for { + a := auxIntToInt8(v.AuxInt) + x := v_0 + mask := v_1 + v.reset(OpAMD64VRNDSCALEPDMasked256) + v.AuxInt = int8ToAuxInt(a + 8) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg2(x, v0) + return true + } +} +func rewriteValueAMD64_OpMaskedRoundSuppressExceptionWithPrecisionFloat64x8(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (MaskedRoundSuppressExceptionWithPrecisionFloat64x8 [a] x mask) + // result: (VRNDSCALEPDMasked512 [a+8] x (VPMOVVec64x8ToM mask)) + for { + a := auxIntToInt8(v.AuxInt) + x := v_0 + mask := v_1 + v.reset(OpAMD64VRNDSCALEPDMasked512) + v.AuxInt = int8ToAuxInt(a + 8) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg2(x, v0) + return true + } +} +func rewriteValueAMD64_OpMaskedRoundWithPrecisionFloat32x16(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (MaskedRoundWithPrecisionFloat32x16 [a] x mask) + // result: (VRNDSCALEPSMasked512 [a+0] x (VPMOVVec32x16ToM mask)) + for { + a := auxIntToInt8(v.AuxInt) + x := v_0 + mask := v_1 + v.reset(OpAMD64VRNDSCALEPSMasked512) + v.AuxInt = int8ToAuxInt(a + 0) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg2(x, v0) + return true + } +} +func rewriteValueAMD64_OpMaskedRoundWithPrecisionFloat32x4(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (MaskedRoundWithPrecisionFloat32x4 [a] x mask) + // result: (VRNDSCALEPSMasked128 [a+0] x (VPMOVVec32x4ToM mask)) + for { + a := auxIntToInt8(v.AuxInt) + x := v_0 + mask := v_1 + v.reset(OpAMD64VRNDSCALEPSMasked128) + v.AuxInt = int8ToAuxInt(a + 0) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg2(x, v0) + return true + } +} +func rewriteValueAMD64_OpMaskedRoundWithPrecisionFloat32x8(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (MaskedRoundWithPrecisionFloat32x8 [a] x mask) + // result: (VRNDSCALEPSMasked256 [a+0] x (VPMOVVec32x8ToM mask)) + for { + a := auxIntToInt8(v.AuxInt) + x := v_0 + mask := v_1 + v.reset(OpAMD64VRNDSCALEPSMasked256) + v.AuxInt = int8ToAuxInt(a + 0) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg2(x, v0) + return true + } +} +func rewriteValueAMD64_OpMaskedRoundWithPrecisionFloat64x2(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (MaskedRoundWithPrecisionFloat64x2 [a] x mask) + // result: (VRNDSCALEPDMasked128 [a+0] x (VPMOVVec64x2ToM mask)) + for { + a := auxIntToInt8(v.AuxInt) + x := v_0 + mask := v_1 + v.reset(OpAMD64VRNDSCALEPDMasked128) + v.AuxInt = int8ToAuxInt(a + 0) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg2(x, v0) + return true + } +} +func rewriteValueAMD64_OpMaskedRoundWithPrecisionFloat64x4(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (MaskedRoundWithPrecisionFloat64x4 [a] x mask) + // result: (VRNDSCALEPDMasked256 [a+0] x (VPMOVVec64x4ToM mask)) + for { + a := auxIntToInt8(v.AuxInt) + x := v_0 + mask := v_1 + v.reset(OpAMD64VRNDSCALEPDMasked256) + v.AuxInt = int8ToAuxInt(a + 0) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg2(x, v0) + return true + } +} +func rewriteValueAMD64_OpMaskedRoundWithPrecisionFloat64x8(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (MaskedRoundWithPrecisionFloat64x8 [a] x mask) + // result: (VRNDSCALEPDMasked512 [a+0] x (VPMOVVec64x8ToM mask)) + for { + a := auxIntToInt8(v.AuxInt) + x := v_0 + mask := v_1 + v.reset(OpAMD64VRNDSCALEPDMasked512) + v.AuxInt = int8ToAuxInt(a + 0) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg2(x, v0) + return true + } +} func rewriteValueAMD64_OpMaskedSaturatedAddInt16x16(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] @@ -41416,6 +44388,222 @@ func rewriteValueAMD64_OpMaskedSubUint8x64(v *Value) bool { return true } } +func rewriteValueAMD64_OpMaskedTruncSuppressExceptionWithPrecisionFloat32x16(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (MaskedTruncSuppressExceptionWithPrecisionFloat32x16 [a] x mask) + // result: (VRNDSCALEPSMasked512 [a+11] x (VPMOVVec32x16ToM mask)) + for { + a := auxIntToInt8(v.AuxInt) + x := v_0 + mask := v_1 + v.reset(OpAMD64VRNDSCALEPSMasked512) + v.AuxInt = int8ToAuxInt(a + 11) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg2(x, v0) + return true + } +} +func rewriteValueAMD64_OpMaskedTruncSuppressExceptionWithPrecisionFloat32x4(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (MaskedTruncSuppressExceptionWithPrecisionFloat32x4 [a] x mask) + // result: (VRNDSCALEPSMasked128 [a+11] x (VPMOVVec32x4ToM mask)) + for { + a := auxIntToInt8(v.AuxInt) + x := v_0 + mask := v_1 + v.reset(OpAMD64VRNDSCALEPSMasked128) + v.AuxInt = int8ToAuxInt(a + 11) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg2(x, v0) + return true + } +} +func rewriteValueAMD64_OpMaskedTruncSuppressExceptionWithPrecisionFloat32x8(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (MaskedTruncSuppressExceptionWithPrecisionFloat32x8 [a] x mask) + // result: (VRNDSCALEPSMasked256 [a+11] x (VPMOVVec32x8ToM mask)) + for { + a := auxIntToInt8(v.AuxInt) + x := v_0 + mask := v_1 + v.reset(OpAMD64VRNDSCALEPSMasked256) + v.AuxInt = int8ToAuxInt(a + 11) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg2(x, v0) + return true + } +} +func rewriteValueAMD64_OpMaskedTruncSuppressExceptionWithPrecisionFloat64x2(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (MaskedTruncSuppressExceptionWithPrecisionFloat64x2 [a] x mask) + // result: (VRNDSCALEPDMasked128 [a+11] x (VPMOVVec64x2ToM mask)) + for { + a := auxIntToInt8(v.AuxInt) + x := v_0 + mask := v_1 + v.reset(OpAMD64VRNDSCALEPDMasked128) + v.AuxInt = int8ToAuxInt(a + 11) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg2(x, v0) + return true + } +} +func rewriteValueAMD64_OpMaskedTruncSuppressExceptionWithPrecisionFloat64x4(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (MaskedTruncSuppressExceptionWithPrecisionFloat64x4 [a] x mask) + // result: (VRNDSCALEPDMasked256 [a+11] x (VPMOVVec64x4ToM mask)) + for { + a := auxIntToInt8(v.AuxInt) + x := v_0 + mask := v_1 + v.reset(OpAMD64VRNDSCALEPDMasked256) + v.AuxInt = int8ToAuxInt(a + 11) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg2(x, v0) + return true + } +} +func rewriteValueAMD64_OpMaskedTruncSuppressExceptionWithPrecisionFloat64x8(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (MaskedTruncSuppressExceptionWithPrecisionFloat64x8 [a] x mask) + // result: (VRNDSCALEPDMasked512 [a+11] x (VPMOVVec64x8ToM mask)) + for { + a := auxIntToInt8(v.AuxInt) + x := v_0 + mask := v_1 + v.reset(OpAMD64VRNDSCALEPDMasked512) + v.AuxInt = int8ToAuxInt(a + 11) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg2(x, v0) + return true + } +} +func rewriteValueAMD64_OpMaskedTruncWithPrecisionFloat32x16(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (MaskedTruncWithPrecisionFloat32x16 [a] x mask) + // result: (VRNDSCALEPSMasked512 [a+3] x (VPMOVVec32x16ToM mask)) + for { + a := auxIntToInt8(v.AuxInt) + x := v_0 + mask := v_1 + v.reset(OpAMD64VRNDSCALEPSMasked512) + v.AuxInt = int8ToAuxInt(a + 3) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg2(x, v0) + return true + } +} +func rewriteValueAMD64_OpMaskedTruncWithPrecisionFloat32x4(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (MaskedTruncWithPrecisionFloat32x4 [a] x mask) + // result: (VRNDSCALEPSMasked128 [a+3] x (VPMOVVec32x4ToM mask)) + for { + a := auxIntToInt8(v.AuxInt) + x := v_0 + mask := v_1 + v.reset(OpAMD64VRNDSCALEPSMasked128) + v.AuxInt = int8ToAuxInt(a + 3) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg2(x, v0) + return true + } +} +func rewriteValueAMD64_OpMaskedTruncWithPrecisionFloat32x8(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (MaskedTruncWithPrecisionFloat32x8 [a] x mask) + // result: (VRNDSCALEPSMasked256 [a+3] x (VPMOVVec32x8ToM mask)) + for { + a := auxIntToInt8(v.AuxInt) + x := v_0 + mask := v_1 + v.reset(OpAMD64VRNDSCALEPSMasked256) + v.AuxInt = int8ToAuxInt(a + 3) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg2(x, v0) + return true + } +} +func rewriteValueAMD64_OpMaskedTruncWithPrecisionFloat64x2(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (MaskedTruncWithPrecisionFloat64x2 [a] x mask) + // result: (VRNDSCALEPDMasked128 [a+3] x (VPMOVVec64x2ToM mask)) + for { + a := auxIntToInt8(v.AuxInt) + x := v_0 + mask := v_1 + v.reset(OpAMD64VRNDSCALEPDMasked128) + v.AuxInt = int8ToAuxInt(a + 3) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg2(x, v0) + return true + } +} +func rewriteValueAMD64_OpMaskedTruncWithPrecisionFloat64x4(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (MaskedTruncWithPrecisionFloat64x4 [a] x mask) + // result: (VRNDSCALEPDMasked256 [a+3] x (VPMOVVec64x4ToM mask)) + for { + a := auxIntToInt8(v.AuxInt) + x := v_0 + mask := v_1 + v.reset(OpAMD64VRNDSCALEPDMasked256) + v.AuxInt = int8ToAuxInt(a + 3) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg2(x, v0) + return true + } +} +func rewriteValueAMD64_OpMaskedTruncWithPrecisionFloat64x8(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (MaskedTruncWithPrecisionFloat64x8 [a] x mask) + // result: (VRNDSCALEPDMasked512 [a+3] x (VPMOVVec64x8ToM mask)) + for { + a := auxIntToInt8(v.AuxInt) + x := v_0 + mask := v_1 + v.reset(OpAMD64VRNDSCALEPDMasked512) + v.AuxInt = int8ToAuxInt(a + 3) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg2(x, v0) + return true + } +} func rewriteValueAMD64_OpMaskedXorFloat32x16(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] @@ -43218,6 +46406,132 @@ func rewriteValueAMD64_OpPopCount8(v *Value) bool { return true } } +func rewriteValueAMD64_OpRoundFloat32x4(v *Value) bool { + v_0 := v.Args[0] + // match: (RoundFloat32x4 x) + // result: (VROUNDPS128 [0] x) + for { + x := v_0 + v.reset(OpAMD64VROUNDPS128) + v.AuxInt = int8ToAuxInt(0) + v.AddArg(x) + return true + } +} +func rewriteValueAMD64_OpRoundFloat32x8(v *Value) bool { + v_0 := v.Args[0] + // match: (RoundFloat32x8 x) + // result: (VROUNDPS256 [0] x) + for { + x := v_0 + v.reset(OpAMD64VROUNDPS256) + v.AuxInt = int8ToAuxInt(0) + v.AddArg(x) + return true + } +} +func rewriteValueAMD64_OpRoundFloat64x2(v *Value) bool { + v_0 := v.Args[0] + // match: (RoundFloat64x2 x) + // result: (VROUNDPD128 [0] x) + for { + x := v_0 + v.reset(OpAMD64VROUNDPD128) + v.AuxInt = int8ToAuxInt(0) + v.AddArg(x) + return true + } +} +func rewriteValueAMD64_OpRoundFloat64x4(v *Value) bool { + v_0 := v.Args[0] + // match: (RoundFloat64x4 x) + // result: (VROUNDPD256 [0] x) + for { + x := v_0 + v.reset(OpAMD64VROUNDPD256) + v.AuxInt = int8ToAuxInt(0) + v.AddArg(x) + return true + } +} +func rewriteValueAMD64_OpRoundSuppressExceptionWithPrecisionFloat32x16(v *Value) bool { + v_0 := v.Args[0] + // match: (RoundSuppressExceptionWithPrecisionFloat32x16 [a] x) + // result: (VRNDSCALEPS512 [a+8] x) + for { + a := auxIntToInt8(v.AuxInt) + x := v_0 + v.reset(OpAMD64VRNDSCALEPS512) + v.AuxInt = int8ToAuxInt(a + 8) + v.AddArg(x) + return true + } +} +func rewriteValueAMD64_OpRoundSuppressExceptionWithPrecisionFloat32x4(v *Value) bool { + v_0 := v.Args[0] + // match: (RoundSuppressExceptionWithPrecisionFloat32x4 [a] x) + // result: (VRNDSCALEPS128 [a+8] x) + for { + a := auxIntToInt8(v.AuxInt) + x := v_0 + v.reset(OpAMD64VRNDSCALEPS128) + v.AuxInt = int8ToAuxInt(a + 8) + v.AddArg(x) + return true + } +} +func rewriteValueAMD64_OpRoundSuppressExceptionWithPrecisionFloat32x8(v *Value) bool { + v_0 := v.Args[0] + // match: (RoundSuppressExceptionWithPrecisionFloat32x8 [a] x) + // result: (VRNDSCALEPS256 [a+8] x) + for { + a := auxIntToInt8(v.AuxInt) + x := v_0 + v.reset(OpAMD64VRNDSCALEPS256) + v.AuxInt = int8ToAuxInt(a + 8) + v.AddArg(x) + return true + } +} +func rewriteValueAMD64_OpRoundSuppressExceptionWithPrecisionFloat64x2(v *Value) bool { + v_0 := v.Args[0] + // match: (RoundSuppressExceptionWithPrecisionFloat64x2 [a] x) + // result: (VRNDSCALEPD128 [a+8] x) + for { + a := auxIntToInt8(v.AuxInt) + x := v_0 + v.reset(OpAMD64VRNDSCALEPD128) + v.AuxInt = int8ToAuxInt(a + 8) + v.AddArg(x) + return true + } +} +func rewriteValueAMD64_OpRoundSuppressExceptionWithPrecisionFloat64x4(v *Value) bool { + v_0 := v.Args[0] + // match: (RoundSuppressExceptionWithPrecisionFloat64x4 [a] x) + // result: (VRNDSCALEPD256 [a+8] x) + for { + a := auxIntToInt8(v.AuxInt) + x := v_0 + v.reset(OpAMD64VRNDSCALEPD256) + v.AuxInt = int8ToAuxInt(a + 8) + v.AddArg(x) + return true + } +} +func rewriteValueAMD64_OpRoundSuppressExceptionWithPrecisionFloat64x8(v *Value) bool { + v_0 := v.Args[0] + // match: (RoundSuppressExceptionWithPrecisionFloat64x8 [a] x) + // result: (VRNDSCALEPD512 [a+8] x) + for { + a := auxIntToInt8(v.AuxInt) + x := v_0 + v.reset(OpAMD64VRNDSCALEPD512) + v.AuxInt = int8ToAuxInt(a + 8) + v.AddArg(x) + return true + } +} func rewriteValueAMD64_OpRoundToEven(v *Value) bool { v_0 := v.Args[0] // match: (RoundToEven x) @@ -43230,6 +46544,84 @@ func rewriteValueAMD64_OpRoundToEven(v *Value) bool { return true } } +func rewriteValueAMD64_OpRoundWithPrecisionFloat32x16(v *Value) bool { + v_0 := v.Args[0] + // match: (RoundWithPrecisionFloat32x16 [a] x) + // result: (VRNDSCALEPS512 [a+0] x) + for { + a := auxIntToInt8(v.AuxInt) + x := v_0 + v.reset(OpAMD64VRNDSCALEPS512) + v.AuxInt = int8ToAuxInt(a + 0) + v.AddArg(x) + return true + } +} +func rewriteValueAMD64_OpRoundWithPrecisionFloat32x4(v *Value) bool { + v_0 := v.Args[0] + // match: (RoundWithPrecisionFloat32x4 [a] x) + // result: (VRNDSCALEPS128 [a+0] x) + for { + a := auxIntToInt8(v.AuxInt) + x := v_0 + v.reset(OpAMD64VRNDSCALEPS128) + v.AuxInt = int8ToAuxInt(a + 0) + v.AddArg(x) + return true + } +} +func rewriteValueAMD64_OpRoundWithPrecisionFloat32x8(v *Value) bool { + v_0 := v.Args[0] + // match: (RoundWithPrecisionFloat32x8 [a] x) + // result: (VRNDSCALEPS256 [a+0] x) + for { + a := auxIntToInt8(v.AuxInt) + x := v_0 + v.reset(OpAMD64VRNDSCALEPS256) + v.AuxInt = int8ToAuxInt(a + 0) + v.AddArg(x) + return true + } +} +func rewriteValueAMD64_OpRoundWithPrecisionFloat64x2(v *Value) bool { + v_0 := v.Args[0] + // match: (RoundWithPrecisionFloat64x2 [a] x) + // result: (VRNDSCALEPD128 [a+0] x) + for { + a := auxIntToInt8(v.AuxInt) + x := v_0 + v.reset(OpAMD64VRNDSCALEPD128) + v.AuxInt = int8ToAuxInt(a + 0) + v.AddArg(x) + return true + } +} +func rewriteValueAMD64_OpRoundWithPrecisionFloat64x4(v *Value) bool { + v_0 := v.Args[0] + // match: (RoundWithPrecisionFloat64x4 [a] x) + // result: (VRNDSCALEPD256 [a+0] x) + for { + a := auxIntToInt8(v.AuxInt) + x := v_0 + v.reset(OpAMD64VRNDSCALEPD256) + v.AuxInt = int8ToAuxInt(a + 0) + v.AddArg(x) + return true + } +} +func rewriteValueAMD64_OpRoundWithPrecisionFloat64x8(v *Value) bool { + v_0 := v.Args[0] + // match: (RoundWithPrecisionFloat64x8 [a] x) + // result: (VRNDSCALEPD512 [a+0] x) + for { + a := auxIntToInt8(v.AuxInt) + x := v_0 + v.reset(OpAMD64VRNDSCALEPD512) + v.AuxInt = int8ToAuxInt(a + 0) + v.AddArg(x) + return true + } +} func rewriteValueAMD64_OpRsh16Ux16(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] @@ -45190,6 +48582,210 @@ func rewriteValueAMD64_OpTrunc(v *Value) bool { return true } } +func rewriteValueAMD64_OpTruncFloat32x4(v *Value) bool { + v_0 := v.Args[0] + // match: (TruncFloat32x4 x) + // result: (VROUNDPS128 [3] x) + for { + x := v_0 + v.reset(OpAMD64VROUNDPS128) + v.AuxInt = int8ToAuxInt(3) + v.AddArg(x) + return true + } +} +func rewriteValueAMD64_OpTruncFloat32x8(v *Value) bool { + v_0 := v.Args[0] + // match: (TruncFloat32x8 x) + // result: (VROUNDPS256 [3] x) + for { + x := v_0 + v.reset(OpAMD64VROUNDPS256) + v.AuxInt = int8ToAuxInt(3) + v.AddArg(x) + return true + } +} +func rewriteValueAMD64_OpTruncFloat64x2(v *Value) bool { + v_0 := v.Args[0] + // match: (TruncFloat64x2 x) + // result: (VROUNDPD128 [3] x) + for { + x := v_0 + v.reset(OpAMD64VROUNDPD128) + v.AuxInt = int8ToAuxInt(3) + v.AddArg(x) + return true + } +} +func rewriteValueAMD64_OpTruncFloat64x4(v *Value) bool { + v_0 := v.Args[0] + // match: (TruncFloat64x4 x) + // result: (VROUNDPD256 [3] x) + for { + x := v_0 + v.reset(OpAMD64VROUNDPD256) + v.AuxInt = int8ToAuxInt(3) + v.AddArg(x) + return true + } +} +func rewriteValueAMD64_OpTruncSuppressExceptionWithPrecisionFloat32x16(v *Value) bool { + v_0 := v.Args[0] + // match: (TruncSuppressExceptionWithPrecisionFloat32x16 [a] x) + // result: (VRNDSCALEPS512 [a+11] x) + for { + a := auxIntToInt8(v.AuxInt) + x := v_0 + v.reset(OpAMD64VRNDSCALEPS512) + v.AuxInt = int8ToAuxInt(a + 11) + v.AddArg(x) + return true + } +} +func rewriteValueAMD64_OpTruncSuppressExceptionWithPrecisionFloat32x4(v *Value) bool { + v_0 := v.Args[0] + // match: (TruncSuppressExceptionWithPrecisionFloat32x4 [a] x) + // result: (VRNDSCALEPS128 [a+11] x) + for { + a := auxIntToInt8(v.AuxInt) + x := v_0 + v.reset(OpAMD64VRNDSCALEPS128) + v.AuxInt = int8ToAuxInt(a + 11) + v.AddArg(x) + return true + } +} +func rewriteValueAMD64_OpTruncSuppressExceptionWithPrecisionFloat32x8(v *Value) bool { + v_0 := v.Args[0] + // match: (TruncSuppressExceptionWithPrecisionFloat32x8 [a] x) + // result: (VRNDSCALEPS256 [a+11] x) + for { + a := auxIntToInt8(v.AuxInt) + x := v_0 + v.reset(OpAMD64VRNDSCALEPS256) + v.AuxInt = int8ToAuxInt(a + 11) + v.AddArg(x) + return true + } +} +func rewriteValueAMD64_OpTruncSuppressExceptionWithPrecisionFloat64x2(v *Value) bool { + v_0 := v.Args[0] + // match: (TruncSuppressExceptionWithPrecisionFloat64x2 [a] x) + // result: (VRNDSCALEPD128 [a+11] x) + for { + a := auxIntToInt8(v.AuxInt) + x := v_0 + v.reset(OpAMD64VRNDSCALEPD128) + v.AuxInt = int8ToAuxInt(a + 11) + v.AddArg(x) + return true + } +} +func rewriteValueAMD64_OpTruncSuppressExceptionWithPrecisionFloat64x4(v *Value) bool { + v_0 := v.Args[0] + // match: (TruncSuppressExceptionWithPrecisionFloat64x4 [a] x) + // result: (VRNDSCALEPD256 [a+11] x) + for { + a := auxIntToInt8(v.AuxInt) + x := v_0 + v.reset(OpAMD64VRNDSCALEPD256) + v.AuxInt = int8ToAuxInt(a + 11) + v.AddArg(x) + return true + } +} +func rewriteValueAMD64_OpTruncSuppressExceptionWithPrecisionFloat64x8(v *Value) bool { + v_0 := v.Args[0] + // match: (TruncSuppressExceptionWithPrecisionFloat64x8 [a] x) + // result: (VRNDSCALEPD512 [a+11] x) + for { + a := auxIntToInt8(v.AuxInt) + x := v_0 + v.reset(OpAMD64VRNDSCALEPD512) + v.AuxInt = int8ToAuxInt(a + 11) + v.AddArg(x) + return true + } +} +func rewriteValueAMD64_OpTruncWithPrecisionFloat32x16(v *Value) bool { + v_0 := v.Args[0] + // match: (TruncWithPrecisionFloat32x16 [a] x) + // result: (VRNDSCALEPS512 [a+3] x) + for { + a := auxIntToInt8(v.AuxInt) + x := v_0 + v.reset(OpAMD64VRNDSCALEPS512) + v.AuxInt = int8ToAuxInt(a + 3) + v.AddArg(x) + return true + } +} +func rewriteValueAMD64_OpTruncWithPrecisionFloat32x4(v *Value) bool { + v_0 := v.Args[0] + // match: (TruncWithPrecisionFloat32x4 [a] x) + // result: (VRNDSCALEPS128 [a+3] x) + for { + a := auxIntToInt8(v.AuxInt) + x := v_0 + v.reset(OpAMD64VRNDSCALEPS128) + v.AuxInt = int8ToAuxInt(a + 3) + v.AddArg(x) + return true + } +} +func rewriteValueAMD64_OpTruncWithPrecisionFloat32x8(v *Value) bool { + v_0 := v.Args[0] + // match: (TruncWithPrecisionFloat32x8 [a] x) + // result: (VRNDSCALEPS256 [a+3] x) + for { + a := auxIntToInt8(v.AuxInt) + x := v_0 + v.reset(OpAMD64VRNDSCALEPS256) + v.AuxInt = int8ToAuxInt(a + 3) + v.AddArg(x) + return true + } +} +func rewriteValueAMD64_OpTruncWithPrecisionFloat64x2(v *Value) bool { + v_0 := v.Args[0] + // match: (TruncWithPrecisionFloat64x2 [a] x) + // result: (VRNDSCALEPD128 [a+3] x) + for { + a := auxIntToInt8(v.AuxInt) + x := v_0 + v.reset(OpAMD64VRNDSCALEPD128) + v.AuxInt = int8ToAuxInt(a + 3) + v.AddArg(x) + return true + } +} +func rewriteValueAMD64_OpTruncWithPrecisionFloat64x4(v *Value) bool { + v_0 := v.Args[0] + // match: (TruncWithPrecisionFloat64x4 [a] x) + // result: (VRNDSCALEPD256 [a+3] x) + for { + a := auxIntToInt8(v.AuxInt) + x := v_0 + v.reset(OpAMD64VRNDSCALEPD256) + v.AuxInt = int8ToAuxInt(a + 3) + v.AddArg(x) + return true + } +} +func rewriteValueAMD64_OpTruncWithPrecisionFloat64x8(v *Value) bool { + v_0 := v.Args[0] + // match: (TruncWithPrecisionFloat64x8 [a] x) + // result: (VRNDSCALEPD512 [a+3] x) + for { + a := auxIntToInt8(v.AuxInt) + x := v_0 + v.reset(OpAMD64VRNDSCALEPD512) + v.AuxInt = int8ToAuxInt(a + 3) + v.AddArg(x) + return true + } +} func rewriteValueAMD64_OpZero(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] diff --git a/src/cmd/compile/internal/ssagen/simdintrinsics.go b/src/cmd/compile/internal/ssagen/simdintrinsics.go index 3c8104ec2c..d05d0e2066 100644 --- a/src/cmd/compile/internal/ssagen/simdintrinsics.go +++ b/src/cmd/compile/internal/ssagen/simdintrinsics.go @@ -16,16 +16,32 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies . addF(simdPackage, "Float32x16.Sqrt", opLen1(ssa.OpSqrtFloat32x16, types.TypeVec512), sys.AMD64) addF(simdPackage, "Float32x4.ApproximateReciprocal", opLen1(ssa.OpApproximateReciprocalFloat32x4, types.TypeVec128), sys.AMD64) addF(simdPackage, "Float32x4.ApproximateReciprocalOfSqrt", opLen1(ssa.OpApproximateReciprocalOfSqrtFloat32x4, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Float32x4.Ceil", opLen1(ssa.OpCeilFloat32x4, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Float32x4.Floor", opLen1(ssa.OpFloorFloat32x4, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Float32x4.Round", opLen1(ssa.OpRoundFloat32x4, types.TypeVec128), sys.AMD64) addF(simdPackage, "Float32x4.Sqrt", opLen1(ssa.OpSqrtFloat32x4, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Float32x4.Trunc", opLen1(ssa.OpTruncFloat32x4, types.TypeVec128), sys.AMD64) addF(simdPackage, "Float32x8.ApproximateReciprocal", opLen1(ssa.OpApproximateReciprocalFloat32x8, types.TypeVec256), sys.AMD64) addF(simdPackage, "Float32x8.ApproximateReciprocalOfSqrt", opLen1(ssa.OpApproximateReciprocalOfSqrtFloat32x8, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Float32x8.Ceil", opLen1(ssa.OpCeilFloat32x8, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Float32x8.Floor", opLen1(ssa.OpFloorFloat32x8, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Float32x8.Round", opLen1(ssa.OpRoundFloat32x8, types.TypeVec256), sys.AMD64) addF(simdPackage, "Float32x8.Sqrt", opLen1(ssa.OpSqrtFloat32x8, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Float32x8.Trunc", opLen1(ssa.OpTruncFloat32x8, types.TypeVec256), sys.AMD64) addF(simdPackage, "Float64x2.ApproximateReciprocal", opLen1(ssa.OpApproximateReciprocalFloat64x2, types.TypeVec128), sys.AMD64) addF(simdPackage, "Float64x2.ApproximateReciprocalOfSqrt", opLen1(ssa.OpApproximateReciprocalOfSqrtFloat64x2, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Float64x2.Ceil", opLen1(ssa.OpCeilFloat64x2, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Float64x2.Floor", opLen1(ssa.OpFloorFloat64x2, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Float64x2.Round", opLen1(ssa.OpRoundFloat64x2, types.TypeVec128), sys.AMD64) addF(simdPackage, "Float64x2.Sqrt", opLen1(ssa.OpSqrtFloat64x2, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Float64x2.Trunc", opLen1(ssa.OpTruncFloat64x2, types.TypeVec128), sys.AMD64) addF(simdPackage, "Float64x4.ApproximateReciprocal", opLen1(ssa.OpApproximateReciprocalFloat64x4, types.TypeVec256), sys.AMD64) addF(simdPackage, "Float64x4.ApproximateReciprocalOfSqrt", opLen1(ssa.OpApproximateReciprocalOfSqrtFloat64x4, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Float64x4.Ceil", opLen1(ssa.OpCeilFloat64x4, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Float64x4.Floor", opLen1(ssa.OpFloorFloat64x4, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Float64x4.Round", opLen1(ssa.OpRoundFloat64x4, types.TypeVec256), sys.AMD64) addF(simdPackage, "Float64x4.Sqrt", opLen1(ssa.OpSqrtFloat64x4, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Float64x4.Trunc", opLen1(ssa.OpTruncFloat64x4, types.TypeVec256), sys.AMD64) addF(simdPackage, "Float64x8.ApproximateReciprocal", opLen1(ssa.OpApproximateReciprocalFloat64x8, types.TypeVec512), sys.AMD64) addF(simdPackage, "Float64x8.ApproximateReciprocalOfSqrt", opLen1(ssa.OpApproximateReciprocalOfSqrtFloat64x8, types.TypeVec512), sys.AMD64) addF(simdPackage, "Float64x8.Sqrt", opLen1(ssa.OpSqrtFloat64x8, types.TypeVec512), sys.AMD64) @@ -87,6 +103,7 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies . addF(simdPackage, "Float32x16.Sub", opLen2(ssa.OpSubFloat32x16, types.TypeVec512), sys.AMD64) addF(simdPackage, "Float32x16.Xor", opLen2(ssa.OpXorFloat32x16, types.TypeVec512), sys.AMD64) addF(simdPackage, "Float32x4.Add", opLen2(ssa.OpAddFloat32x4, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Float32x4.AddSub", opLen2(ssa.OpAddSubFloat32x4, types.TypeVec128), sys.AMD64) addF(simdPackage, "Float32x4.And", opLen2(ssa.OpAndFloat32x4, types.TypeVec128), sys.AMD64) addF(simdPackage, "Float32x4.AndNot", opLen2(ssa.OpAndNotFloat32x4, types.TypeVec128), sys.AMD64) addF(simdPackage, "Float32x4.Div", opLen2(ssa.OpDivFloat32x4, types.TypeVec128), sys.AMD64) @@ -110,6 +127,7 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies . addF(simdPackage, "Float32x4.Sub", opLen2(ssa.OpSubFloat32x4, types.TypeVec128), sys.AMD64) addF(simdPackage, "Float32x4.Xor", opLen2(ssa.OpXorFloat32x4, types.TypeVec128), sys.AMD64) addF(simdPackage, "Float32x8.Add", opLen2(ssa.OpAddFloat32x8, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Float32x8.AddSub", opLen2(ssa.OpAddSubFloat32x8, types.TypeVec256), sys.AMD64) addF(simdPackage, "Float32x8.And", opLen2(ssa.OpAndFloat32x8, types.TypeVec256), sys.AMD64) addF(simdPackage, "Float32x8.AndNot", opLen2(ssa.OpAndNotFloat32x8, types.TypeVec256), sys.AMD64) addF(simdPackage, "Float32x8.Div", opLen2(ssa.OpDivFloat32x8, types.TypeVec256), sys.AMD64) @@ -133,6 +151,7 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies . addF(simdPackage, "Float32x8.Sub", opLen2(ssa.OpSubFloat32x8, types.TypeVec256), sys.AMD64) addF(simdPackage, "Float32x8.Xor", opLen2(ssa.OpXorFloat32x8, types.TypeVec256), sys.AMD64) addF(simdPackage, "Float64x2.Add", opLen2(ssa.OpAddFloat64x2, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Float64x2.AddSub", opLen2(ssa.OpAddSubFloat64x2, types.TypeVec128), sys.AMD64) addF(simdPackage, "Float64x2.And", opLen2(ssa.OpAndFloat64x2, types.TypeVec128), sys.AMD64) addF(simdPackage, "Float64x2.AndNot", opLen2(ssa.OpAndNotFloat64x2, types.TypeVec128), sys.AMD64) addF(simdPackage, "Float64x2.Div", opLen2(ssa.OpDivFloat64x2, types.TypeVec128), sys.AMD64) @@ -156,6 +175,7 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies . addF(simdPackage, "Float64x2.Sub", opLen2(ssa.OpSubFloat64x2, types.TypeVec128), sys.AMD64) addF(simdPackage, "Float64x2.Xor", opLen2(ssa.OpXorFloat64x2, types.TypeVec128), sys.AMD64) addF(simdPackage, "Float64x4.Add", opLen2(ssa.OpAddFloat64x4, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Float64x4.AddSub", opLen2(ssa.OpAddSubFloat64x4, types.TypeVec256), sys.AMD64) addF(simdPackage, "Float64x4.And", opLen2(ssa.OpAndFloat64x4, types.TypeVec256), sys.AMD64) addF(simdPackage, "Float64x4.AndNot", opLen2(ssa.OpAndNotFloat64x4, types.TypeVec256), sys.AMD64) addF(simdPackage, "Float64x4.Div", opLen2(ssa.OpDivFloat64x4, types.TypeVec256), sys.AMD64) @@ -1083,6 +1103,198 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies . addF(simdPackage, "Uint8x64.MaskedSaturatedAdd", opLen3(ssa.OpMaskedSaturatedAddUint8x64, types.TypeVec512), sys.AMD64) addF(simdPackage, "Uint8x64.MaskedSaturatedSub", opLen3(ssa.OpMaskedSaturatedSubUint8x64, types.TypeVec512), sys.AMD64) addF(simdPackage, "Uint8x64.MaskedSub", opLen3(ssa.OpMaskedSubUint8x64, types.TypeVec512), sys.AMD64) + addF(simdPackage, "Float32x16.CeilSuppressExceptionWithPrecision", opLen1Imm8(ssa.OpCeilSuppressExceptionWithPrecisionFloat32x16, types.TypeVec512, 4), sys.AMD64) + addF(simdPackage, "Float32x4.CeilSuppressExceptionWithPrecision", opLen1Imm8(ssa.OpCeilSuppressExceptionWithPrecisionFloat32x4, types.TypeVec128, 4), sys.AMD64) + addF(simdPackage, "Float32x8.CeilSuppressExceptionWithPrecision", opLen1Imm8(ssa.OpCeilSuppressExceptionWithPrecisionFloat32x8, types.TypeVec256, 4), sys.AMD64) + addF(simdPackage, "Float64x2.CeilSuppressExceptionWithPrecision", opLen1Imm8(ssa.OpCeilSuppressExceptionWithPrecisionFloat64x2, types.TypeVec128, 4), sys.AMD64) + addF(simdPackage, "Float64x4.CeilSuppressExceptionWithPrecision", opLen1Imm8(ssa.OpCeilSuppressExceptionWithPrecisionFloat64x4, types.TypeVec256, 4), sys.AMD64) + addF(simdPackage, "Float64x8.CeilSuppressExceptionWithPrecision", opLen1Imm8(ssa.OpCeilSuppressExceptionWithPrecisionFloat64x8, types.TypeVec512, 4), sys.AMD64) + addF(simdPackage, "Float32x16.CeilWithPrecision", opLen1Imm8(ssa.OpCeilWithPrecisionFloat32x16, types.TypeVec512, 4), sys.AMD64) + addF(simdPackage, "Float32x4.CeilWithPrecision", opLen1Imm8(ssa.OpCeilWithPrecisionFloat32x4, types.TypeVec128, 4), sys.AMD64) + addF(simdPackage, "Float32x8.CeilWithPrecision", opLen1Imm8(ssa.OpCeilWithPrecisionFloat32x8, types.TypeVec256, 4), sys.AMD64) + addF(simdPackage, "Float64x2.CeilWithPrecision", opLen1Imm8(ssa.OpCeilWithPrecisionFloat64x2, types.TypeVec128, 4), sys.AMD64) + addF(simdPackage, "Float64x4.CeilWithPrecision", opLen1Imm8(ssa.OpCeilWithPrecisionFloat64x4, types.TypeVec256, 4), sys.AMD64) + addF(simdPackage, "Float64x8.CeilWithPrecision", opLen1Imm8(ssa.OpCeilWithPrecisionFloat64x8, types.TypeVec512, 4), sys.AMD64) + addF(simdPackage, "Float32x16.DiffWithCeilSuppressExceptionWithPrecision", opLen1Imm8(ssa.OpDiffWithCeilSuppressExceptionWithPrecisionFloat32x16, types.TypeVec512, 4), sys.AMD64) + addF(simdPackage, "Float32x4.DiffWithCeilSuppressExceptionWithPrecision", opLen1Imm8(ssa.OpDiffWithCeilSuppressExceptionWithPrecisionFloat32x4, types.TypeVec128, 4), sys.AMD64) + addF(simdPackage, "Float32x8.DiffWithCeilSuppressExceptionWithPrecision", opLen1Imm8(ssa.OpDiffWithCeilSuppressExceptionWithPrecisionFloat32x8, types.TypeVec256, 4), sys.AMD64) + addF(simdPackage, "Float64x2.DiffWithCeilSuppressExceptionWithPrecision", opLen1Imm8(ssa.OpDiffWithCeilSuppressExceptionWithPrecisionFloat64x2, types.TypeVec128, 4), sys.AMD64) + addF(simdPackage, "Float64x4.DiffWithCeilSuppressExceptionWithPrecision", opLen1Imm8(ssa.OpDiffWithCeilSuppressExceptionWithPrecisionFloat64x4, types.TypeVec256, 4), sys.AMD64) + addF(simdPackage, "Float64x8.DiffWithCeilSuppressExceptionWithPrecision", opLen1Imm8(ssa.OpDiffWithCeilSuppressExceptionWithPrecisionFloat64x8, types.TypeVec512, 4), sys.AMD64) + addF(simdPackage, "Float32x16.DiffWithCeilWithPrecision", opLen1Imm8(ssa.OpDiffWithCeilWithPrecisionFloat32x16, types.TypeVec512, 4), sys.AMD64) + addF(simdPackage, "Float32x4.DiffWithCeilWithPrecision", opLen1Imm8(ssa.OpDiffWithCeilWithPrecisionFloat32x4, types.TypeVec128, 4), sys.AMD64) + addF(simdPackage, "Float32x8.DiffWithCeilWithPrecision", opLen1Imm8(ssa.OpDiffWithCeilWithPrecisionFloat32x8, types.TypeVec256, 4), sys.AMD64) + addF(simdPackage, "Float64x2.DiffWithCeilWithPrecision", opLen1Imm8(ssa.OpDiffWithCeilWithPrecisionFloat64x2, types.TypeVec128, 4), sys.AMD64) + addF(simdPackage, "Float64x4.DiffWithCeilWithPrecision", opLen1Imm8(ssa.OpDiffWithCeilWithPrecisionFloat64x4, types.TypeVec256, 4), sys.AMD64) + addF(simdPackage, "Float64x8.DiffWithCeilWithPrecision", opLen1Imm8(ssa.OpDiffWithCeilWithPrecisionFloat64x8, types.TypeVec512, 4), sys.AMD64) + addF(simdPackage, "Float32x16.DiffWithFloorSuppressExceptionWithPrecision", opLen1Imm8(ssa.OpDiffWithFloorSuppressExceptionWithPrecisionFloat32x16, types.TypeVec512, 4), sys.AMD64) + addF(simdPackage, "Float32x4.DiffWithFloorSuppressExceptionWithPrecision", opLen1Imm8(ssa.OpDiffWithFloorSuppressExceptionWithPrecisionFloat32x4, types.TypeVec128, 4), sys.AMD64) + addF(simdPackage, "Float32x8.DiffWithFloorSuppressExceptionWithPrecision", opLen1Imm8(ssa.OpDiffWithFloorSuppressExceptionWithPrecisionFloat32x8, types.TypeVec256, 4), sys.AMD64) + addF(simdPackage, "Float64x2.DiffWithFloorSuppressExceptionWithPrecision", opLen1Imm8(ssa.OpDiffWithFloorSuppressExceptionWithPrecisionFloat64x2, types.TypeVec128, 4), sys.AMD64) + addF(simdPackage, "Float64x4.DiffWithFloorSuppressExceptionWithPrecision", opLen1Imm8(ssa.OpDiffWithFloorSuppressExceptionWithPrecisionFloat64x4, types.TypeVec256, 4), sys.AMD64) + addF(simdPackage, "Float64x8.DiffWithFloorSuppressExceptionWithPrecision", opLen1Imm8(ssa.OpDiffWithFloorSuppressExceptionWithPrecisionFloat64x8, types.TypeVec512, 4), sys.AMD64) + addF(simdPackage, "Float32x16.DiffWithFloorWithPrecision", opLen1Imm8(ssa.OpDiffWithFloorWithPrecisionFloat32x16, types.TypeVec512, 4), sys.AMD64) + addF(simdPackage, "Float32x4.DiffWithFloorWithPrecision", opLen1Imm8(ssa.OpDiffWithFloorWithPrecisionFloat32x4, types.TypeVec128, 4), sys.AMD64) + addF(simdPackage, "Float32x8.DiffWithFloorWithPrecision", opLen1Imm8(ssa.OpDiffWithFloorWithPrecisionFloat32x8, types.TypeVec256, 4), sys.AMD64) + addF(simdPackage, "Float64x2.DiffWithFloorWithPrecision", opLen1Imm8(ssa.OpDiffWithFloorWithPrecisionFloat64x2, types.TypeVec128, 4), sys.AMD64) + addF(simdPackage, "Float64x4.DiffWithFloorWithPrecision", opLen1Imm8(ssa.OpDiffWithFloorWithPrecisionFloat64x4, types.TypeVec256, 4), sys.AMD64) + addF(simdPackage, "Float64x8.DiffWithFloorWithPrecision", opLen1Imm8(ssa.OpDiffWithFloorWithPrecisionFloat64x8, types.TypeVec512, 4), sys.AMD64) + addF(simdPackage, "Float32x16.DiffWithRoundSuppressExceptionWithPrecision", opLen1Imm8(ssa.OpDiffWithRoundSuppressExceptionWithPrecisionFloat32x16, types.TypeVec512, 4), sys.AMD64) + addF(simdPackage, "Float32x4.DiffWithRoundSuppressExceptionWithPrecision", opLen1Imm8(ssa.OpDiffWithRoundSuppressExceptionWithPrecisionFloat32x4, types.TypeVec128, 4), sys.AMD64) + addF(simdPackage, "Float32x8.DiffWithRoundSuppressExceptionWithPrecision", opLen1Imm8(ssa.OpDiffWithRoundSuppressExceptionWithPrecisionFloat32x8, types.TypeVec256, 4), sys.AMD64) + addF(simdPackage, "Float64x2.DiffWithRoundSuppressExceptionWithPrecision", opLen1Imm8(ssa.OpDiffWithRoundSuppressExceptionWithPrecisionFloat64x2, types.TypeVec128, 4), sys.AMD64) + addF(simdPackage, "Float64x4.DiffWithRoundSuppressExceptionWithPrecision", opLen1Imm8(ssa.OpDiffWithRoundSuppressExceptionWithPrecisionFloat64x4, types.TypeVec256, 4), sys.AMD64) + addF(simdPackage, "Float64x8.DiffWithRoundSuppressExceptionWithPrecision", opLen1Imm8(ssa.OpDiffWithRoundSuppressExceptionWithPrecisionFloat64x8, types.TypeVec512, 4), sys.AMD64) + addF(simdPackage, "Float32x16.DiffWithRoundWithPrecision", opLen1Imm8(ssa.OpDiffWithRoundWithPrecisionFloat32x16, types.TypeVec512, 4), sys.AMD64) + addF(simdPackage, "Float32x4.DiffWithRoundWithPrecision", opLen1Imm8(ssa.OpDiffWithRoundWithPrecisionFloat32x4, types.TypeVec128, 4), sys.AMD64) + addF(simdPackage, "Float32x8.DiffWithRoundWithPrecision", opLen1Imm8(ssa.OpDiffWithRoundWithPrecisionFloat32x8, types.TypeVec256, 4), sys.AMD64) + addF(simdPackage, "Float64x2.DiffWithRoundWithPrecision", opLen1Imm8(ssa.OpDiffWithRoundWithPrecisionFloat64x2, types.TypeVec128, 4), sys.AMD64) + addF(simdPackage, "Float64x4.DiffWithRoundWithPrecision", opLen1Imm8(ssa.OpDiffWithRoundWithPrecisionFloat64x4, types.TypeVec256, 4), sys.AMD64) + addF(simdPackage, "Float64x8.DiffWithRoundWithPrecision", opLen1Imm8(ssa.OpDiffWithRoundWithPrecisionFloat64x8, types.TypeVec512, 4), sys.AMD64) + addF(simdPackage, "Float32x16.DiffWithTruncSuppressExceptionWithPrecision", opLen1Imm8(ssa.OpDiffWithTruncSuppressExceptionWithPrecisionFloat32x16, types.TypeVec512, 4), sys.AMD64) + addF(simdPackage, "Float32x4.DiffWithTruncSuppressExceptionWithPrecision", opLen1Imm8(ssa.OpDiffWithTruncSuppressExceptionWithPrecisionFloat32x4, types.TypeVec128, 4), sys.AMD64) + addF(simdPackage, "Float32x8.DiffWithTruncSuppressExceptionWithPrecision", opLen1Imm8(ssa.OpDiffWithTruncSuppressExceptionWithPrecisionFloat32x8, types.TypeVec256, 4), sys.AMD64) + addF(simdPackage, "Float64x2.DiffWithTruncSuppressExceptionWithPrecision", opLen1Imm8(ssa.OpDiffWithTruncSuppressExceptionWithPrecisionFloat64x2, types.TypeVec128, 4), sys.AMD64) + addF(simdPackage, "Float64x4.DiffWithTruncSuppressExceptionWithPrecision", opLen1Imm8(ssa.OpDiffWithTruncSuppressExceptionWithPrecisionFloat64x4, types.TypeVec256, 4), sys.AMD64) + addF(simdPackage, "Float64x8.DiffWithTruncSuppressExceptionWithPrecision", opLen1Imm8(ssa.OpDiffWithTruncSuppressExceptionWithPrecisionFloat64x8, types.TypeVec512, 4), sys.AMD64) + addF(simdPackage, "Float32x16.DiffWithTruncWithPrecision", opLen1Imm8(ssa.OpDiffWithTruncWithPrecisionFloat32x16, types.TypeVec512, 4), sys.AMD64) + addF(simdPackage, "Float32x4.DiffWithTruncWithPrecision", opLen1Imm8(ssa.OpDiffWithTruncWithPrecisionFloat32x4, types.TypeVec128, 4), sys.AMD64) + addF(simdPackage, "Float32x8.DiffWithTruncWithPrecision", opLen1Imm8(ssa.OpDiffWithTruncWithPrecisionFloat32x8, types.TypeVec256, 4), sys.AMD64) + addF(simdPackage, "Float64x2.DiffWithTruncWithPrecision", opLen1Imm8(ssa.OpDiffWithTruncWithPrecisionFloat64x2, types.TypeVec128, 4), sys.AMD64) + addF(simdPackage, "Float64x4.DiffWithTruncWithPrecision", opLen1Imm8(ssa.OpDiffWithTruncWithPrecisionFloat64x4, types.TypeVec256, 4), sys.AMD64) + addF(simdPackage, "Float64x8.DiffWithTruncWithPrecision", opLen1Imm8(ssa.OpDiffWithTruncWithPrecisionFloat64x8, types.TypeVec512, 4), sys.AMD64) + addF(simdPackage, "Float32x16.FloorSuppressExceptionWithPrecision", opLen1Imm8(ssa.OpFloorSuppressExceptionWithPrecisionFloat32x16, types.TypeVec512, 4), sys.AMD64) + addF(simdPackage, "Float32x4.FloorSuppressExceptionWithPrecision", opLen1Imm8(ssa.OpFloorSuppressExceptionWithPrecisionFloat32x4, types.TypeVec128, 4), sys.AMD64) + addF(simdPackage, "Float32x8.FloorSuppressExceptionWithPrecision", opLen1Imm8(ssa.OpFloorSuppressExceptionWithPrecisionFloat32x8, types.TypeVec256, 4), sys.AMD64) + addF(simdPackage, "Float64x2.FloorSuppressExceptionWithPrecision", opLen1Imm8(ssa.OpFloorSuppressExceptionWithPrecisionFloat64x2, types.TypeVec128, 4), sys.AMD64) + addF(simdPackage, "Float64x4.FloorSuppressExceptionWithPrecision", opLen1Imm8(ssa.OpFloorSuppressExceptionWithPrecisionFloat64x4, types.TypeVec256, 4), sys.AMD64) + addF(simdPackage, "Float64x8.FloorSuppressExceptionWithPrecision", opLen1Imm8(ssa.OpFloorSuppressExceptionWithPrecisionFloat64x8, types.TypeVec512, 4), sys.AMD64) + addF(simdPackage, "Float32x16.FloorWithPrecision", opLen1Imm8(ssa.OpFloorWithPrecisionFloat32x16, types.TypeVec512, 4), sys.AMD64) + addF(simdPackage, "Float32x4.FloorWithPrecision", opLen1Imm8(ssa.OpFloorWithPrecisionFloat32x4, types.TypeVec128, 4), sys.AMD64) + addF(simdPackage, "Float32x8.FloorWithPrecision", opLen1Imm8(ssa.OpFloorWithPrecisionFloat32x8, types.TypeVec256, 4), sys.AMD64) + addF(simdPackage, "Float64x2.FloorWithPrecision", opLen1Imm8(ssa.OpFloorWithPrecisionFloat64x2, types.TypeVec128, 4), sys.AMD64) + addF(simdPackage, "Float64x4.FloorWithPrecision", opLen1Imm8(ssa.OpFloorWithPrecisionFloat64x4, types.TypeVec256, 4), sys.AMD64) + addF(simdPackage, "Float64x8.FloorWithPrecision", opLen1Imm8(ssa.OpFloorWithPrecisionFloat64x8, types.TypeVec512, 4), sys.AMD64) + addF(simdPackage, "Float32x16.RoundSuppressExceptionWithPrecision", opLen1Imm8(ssa.OpRoundSuppressExceptionWithPrecisionFloat32x16, types.TypeVec512, 4), sys.AMD64) + addF(simdPackage, "Float32x4.RoundSuppressExceptionWithPrecision", opLen1Imm8(ssa.OpRoundSuppressExceptionWithPrecisionFloat32x4, types.TypeVec128, 4), sys.AMD64) + addF(simdPackage, "Float32x8.RoundSuppressExceptionWithPrecision", opLen1Imm8(ssa.OpRoundSuppressExceptionWithPrecisionFloat32x8, types.TypeVec256, 4), sys.AMD64) + addF(simdPackage, "Float64x2.RoundSuppressExceptionWithPrecision", opLen1Imm8(ssa.OpRoundSuppressExceptionWithPrecisionFloat64x2, types.TypeVec128, 4), sys.AMD64) + addF(simdPackage, "Float64x4.RoundSuppressExceptionWithPrecision", opLen1Imm8(ssa.OpRoundSuppressExceptionWithPrecisionFloat64x4, types.TypeVec256, 4), sys.AMD64) + addF(simdPackage, "Float64x8.RoundSuppressExceptionWithPrecision", opLen1Imm8(ssa.OpRoundSuppressExceptionWithPrecisionFloat64x8, types.TypeVec512, 4), sys.AMD64) + addF(simdPackage, "Float32x16.RoundWithPrecision", opLen1Imm8(ssa.OpRoundWithPrecisionFloat32x16, types.TypeVec512, 4), sys.AMD64) + addF(simdPackage, "Float32x4.RoundWithPrecision", opLen1Imm8(ssa.OpRoundWithPrecisionFloat32x4, types.TypeVec128, 4), sys.AMD64) + addF(simdPackage, "Float32x8.RoundWithPrecision", opLen1Imm8(ssa.OpRoundWithPrecisionFloat32x8, types.TypeVec256, 4), sys.AMD64) + addF(simdPackage, "Float64x2.RoundWithPrecision", opLen1Imm8(ssa.OpRoundWithPrecisionFloat64x2, types.TypeVec128, 4), sys.AMD64) + addF(simdPackage, "Float64x4.RoundWithPrecision", opLen1Imm8(ssa.OpRoundWithPrecisionFloat64x4, types.TypeVec256, 4), sys.AMD64) + addF(simdPackage, "Float64x8.RoundWithPrecision", opLen1Imm8(ssa.OpRoundWithPrecisionFloat64x8, types.TypeVec512, 4), sys.AMD64) + addF(simdPackage, "Float32x16.TruncSuppressExceptionWithPrecision", opLen1Imm8(ssa.OpTruncSuppressExceptionWithPrecisionFloat32x16, types.TypeVec512, 4), sys.AMD64) + addF(simdPackage, "Float32x4.TruncSuppressExceptionWithPrecision", opLen1Imm8(ssa.OpTruncSuppressExceptionWithPrecisionFloat32x4, types.TypeVec128, 4), sys.AMD64) + addF(simdPackage, "Float32x8.TruncSuppressExceptionWithPrecision", opLen1Imm8(ssa.OpTruncSuppressExceptionWithPrecisionFloat32x8, types.TypeVec256, 4), sys.AMD64) + addF(simdPackage, "Float64x2.TruncSuppressExceptionWithPrecision", opLen1Imm8(ssa.OpTruncSuppressExceptionWithPrecisionFloat64x2, types.TypeVec128, 4), sys.AMD64) + addF(simdPackage, "Float64x4.TruncSuppressExceptionWithPrecision", opLen1Imm8(ssa.OpTruncSuppressExceptionWithPrecisionFloat64x4, types.TypeVec256, 4), sys.AMD64) + addF(simdPackage, "Float64x8.TruncSuppressExceptionWithPrecision", opLen1Imm8(ssa.OpTruncSuppressExceptionWithPrecisionFloat64x8, types.TypeVec512, 4), sys.AMD64) + addF(simdPackage, "Float32x16.TruncWithPrecision", opLen1Imm8(ssa.OpTruncWithPrecisionFloat32x16, types.TypeVec512, 4), sys.AMD64) + addF(simdPackage, "Float32x4.TruncWithPrecision", opLen1Imm8(ssa.OpTruncWithPrecisionFloat32x4, types.TypeVec128, 4), sys.AMD64) + addF(simdPackage, "Float32x8.TruncWithPrecision", opLen1Imm8(ssa.OpTruncWithPrecisionFloat32x8, types.TypeVec256, 4), sys.AMD64) + addF(simdPackage, "Float64x2.TruncWithPrecision", opLen1Imm8(ssa.OpTruncWithPrecisionFloat64x2, types.TypeVec128, 4), sys.AMD64) + addF(simdPackage, "Float64x4.TruncWithPrecision", opLen1Imm8(ssa.OpTruncWithPrecisionFloat64x4, types.TypeVec256, 4), sys.AMD64) + addF(simdPackage, "Float64x8.TruncWithPrecision", opLen1Imm8(ssa.OpTruncWithPrecisionFloat64x8, types.TypeVec512, 4), sys.AMD64) + addF(simdPackage, "Float32x16.MaskedCeilSuppressExceptionWithPrecision", opLen2Imm8(ssa.OpMaskedCeilSuppressExceptionWithPrecisionFloat32x16, types.TypeVec512, 4), sys.AMD64) + addF(simdPackage, "Float32x4.MaskedCeilSuppressExceptionWithPrecision", opLen2Imm8(ssa.OpMaskedCeilSuppressExceptionWithPrecisionFloat32x4, types.TypeVec128, 4), sys.AMD64) + addF(simdPackage, "Float32x8.MaskedCeilSuppressExceptionWithPrecision", opLen2Imm8(ssa.OpMaskedCeilSuppressExceptionWithPrecisionFloat32x8, types.TypeVec256, 4), sys.AMD64) + addF(simdPackage, "Float64x2.MaskedCeilSuppressExceptionWithPrecision", opLen2Imm8(ssa.OpMaskedCeilSuppressExceptionWithPrecisionFloat64x2, types.TypeVec128, 4), sys.AMD64) + addF(simdPackage, "Float64x4.MaskedCeilSuppressExceptionWithPrecision", opLen2Imm8(ssa.OpMaskedCeilSuppressExceptionWithPrecisionFloat64x4, types.TypeVec256, 4), sys.AMD64) + addF(simdPackage, "Float64x8.MaskedCeilSuppressExceptionWithPrecision", opLen2Imm8(ssa.OpMaskedCeilSuppressExceptionWithPrecisionFloat64x8, types.TypeVec512, 4), sys.AMD64) + addF(simdPackage, "Float32x16.MaskedCeilWithPrecision", opLen2Imm8(ssa.OpMaskedCeilWithPrecisionFloat32x16, types.TypeVec512, 4), sys.AMD64) + addF(simdPackage, "Float32x4.MaskedCeilWithPrecision", opLen2Imm8(ssa.OpMaskedCeilWithPrecisionFloat32x4, types.TypeVec128, 4), sys.AMD64) + addF(simdPackage, "Float32x8.MaskedCeilWithPrecision", opLen2Imm8(ssa.OpMaskedCeilWithPrecisionFloat32x8, types.TypeVec256, 4), sys.AMD64) + addF(simdPackage, "Float64x2.MaskedCeilWithPrecision", opLen2Imm8(ssa.OpMaskedCeilWithPrecisionFloat64x2, types.TypeVec128, 4), sys.AMD64) + addF(simdPackage, "Float64x4.MaskedCeilWithPrecision", opLen2Imm8(ssa.OpMaskedCeilWithPrecisionFloat64x4, types.TypeVec256, 4), sys.AMD64) + addF(simdPackage, "Float64x8.MaskedCeilWithPrecision", opLen2Imm8(ssa.OpMaskedCeilWithPrecisionFloat64x8, types.TypeVec512, 4), sys.AMD64) + addF(simdPackage, "Float32x16.MaskedDiffWithCeilSuppressExceptionWithPrecision", opLen2Imm8(ssa.OpMaskedDiffWithCeilSuppressExceptionWithPrecisionFloat32x16, types.TypeVec512, 4), sys.AMD64) + addF(simdPackage, "Float32x4.MaskedDiffWithCeilSuppressExceptionWithPrecision", opLen2Imm8(ssa.OpMaskedDiffWithCeilSuppressExceptionWithPrecisionFloat32x4, types.TypeVec128, 4), sys.AMD64) + addF(simdPackage, "Float32x8.MaskedDiffWithCeilSuppressExceptionWithPrecision", opLen2Imm8(ssa.OpMaskedDiffWithCeilSuppressExceptionWithPrecisionFloat32x8, types.TypeVec256, 4), sys.AMD64) + addF(simdPackage, "Float64x2.MaskedDiffWithCeilSuppressExceptionWithPrecision", opLen2Imm8(ssa.OpMaskedDiffWithCeilSuppressExceptionWithPrecisionFloat64x2, types.TypeVec128, 4), sys.AMD64) + addF(simdPackage, "Float64x4.MaskedDiffWithCeilSuppressExceptionWithPrecision", opLen2Imm8(ssa.OpMaskedDiffWithCeilSuppressExceptionWithPrecisionFloat64x4, types.TypeVec256, 4), sys.AMD64) + addF(simdPackage, "Float64x8.MaskedDiffWithCeilSuppressExceptionWithPrecision", opLen2Imm8(ssa.OpMaskedDiffWithCeilSuppressExceptionWithPrecisionFloat64x8, types.TypeVec512, 4), sys.AMD64) + addF(simdPackage, "Float32x16.MaskedDiffWithCeilWithPrecision", opLen2Imm8(ssa.OpMaskedDiffWithCeilWithPrecisionFloat32x16, types.TypeVec512, 4), sys.AMD64) + addF(simdPackage, "Float32x4.MaskedDiffWithCeilWithPrecision", opLen2Imm8(ssa.OpMaskedDiffWithCeilWithPrecisionFloat32x4, types.TypeVec128, 4), sys.AMD64) + addF(simdPackage, "Float32x8.MaskedDiffWithCeilWithPrecision", opLen2Imm8(ssa.OpMaskedDiffWithCeilWithPrecisionFloat32x8, types.TypeVec256, 4), sys.AMD64) + addF(simdPackage, "Float64x2.MaskedDiffWithCeilWithPrecision", opLen2Imm8(ssa.OpMaskedDiffWithCeilWithPrecisionFloat64x2, types.TypeVec128, 4), sys.AMD64) + addF(simdPackage, "Float64x4.MaskedDiffWithCeilWithPrecision", opLen2Imm8(ssa.OpMaskedDiffWithCeilWithPrecisionFloat64x4, types.TypeVec256, 4), sys.AMD64) + addF(simdPackage, "Float64x8.MaskedDiffWithCeilWithPrecision", opLen2Imm8(ssa.OpMaskedDiffWithCeilWithPrecisionFloat64x8, types.TypeVec512, 4), sys.AMD64) + addF(simdPackage, "Float32x16.MaskedDiffWithFloorSuppressExceptionWithPrecision", opLen2Imm8(ssa.OpMaskedDiffWithFloorSuppressExceptionWithPrecisionFloat32x16, types.TypeVec512, 4), sys.AMD64) + addF(simdPackage, "Float32x4.MaskedDiffWithFloorSuppressExceptionWithPrecision", opLen2Imm8(ssa.OpMaskedDiffWithFloorSuppressExceptionWithPrecisionFloat32x4, types.TypeVec128, 4), sys.AMD64) + addF(simdPackage, "Float32x8.MaskedDiffWithFloorSuppressExceptionWithPrecision", opLen2Imm8(ssa.OpMaskedDiffWithFloorSuppressExceptionWithPrecisionFloat32x8, types.TypeVec256, 4), sys.AMD64) + addF(simdPackage, "Float64x2.MaskedDiffWithFloorSuppressExceptionWithPrecision", opLen2Imm8(ssa.OpMaskedDiffWithFloorSuppressExceptionWithPrecisionFloat64x2, types.TypeVec128, 4), sys.AMD64) + addF(simdPackage, "Float64x4.MaskedDiffWithFloorSuppressExceptionWithPrecision", opLen2Imm8(ssa.OpMaskedDiffWithFloorSuppressExceptionWithPrecisionFloat64x4, types.TypeVec256, 4), sys.AMD64) + addF(simdPackage, "Float64x8.MaskedDiffWithFloorSuppressExceptionWithPrecision", opLen2Imm8(ssa.OpMaskedDiffWithFloorSuppressExceptionWithPrecisionFloat64x8, types.TypeVec512, 4), sys.AMD64) + addF(simdPackage, "Float32x16.MaskedDiffWithFloorWithPrecision", opLen2Imm8(ssa.OpMaskedDiffWithFloorWithPrecisionFloat32x16, types.TypeVec512, 4), sys.AMD64) + addF(simdPackage, "Float32x4.MaskedDiffWithFloorWithPrecision", opLen2Imm8(ssa.OpMaskedDiffWithFloorWithPrecisionFloat32x4, types.TypeVec128, 4), sys.AMD64) + addF(simdPackage, "Float32x8.MaskedDiffWithFloorWithPrecision", opLen2Imm8(ssa.OpMaskedDiffWithFloorWithPrecisionFloat32x8, types.TypeVec256, 4), sys.AMD64) + addF(simdPackage, "Float64x2.MaskedDiffWithFloorWithPrecision", opLen2Imm8(ssa.OpMaskedDiffWithFloorWithPrecisionFloat64x2, types.TypeVec128, 4), sys.AMD64) + addF(simdPackage, "Float64x4.MaskedDiffWithFloorWithPrecision", opLen2Imm8(ssa.OpMaskedDiffWithFloorWithPrecisionFloat64x4, types.TypeVec256, 4), sys.AMD64) + addF(simdPackage, "Float64x8.MaskedDiffWithFloorWithPrecision", opLen2Imm8(ssa.OpMaskedDiffWithFloorWithPrecisionFloat64x8, types.TypeVec512, 4), sys.AMD64) + addF(simdPackage, "Float32x16.MaskedDiffWithRoundSuppressExceptionWithPrecision", opLen2Imm8(ssa.OpMaskedDiffWithRoundSuppressExceptionWithPrecisionFloat32x16, types.TypeVec512, 4), sys.AMD64) + addF(simdPackage, "Float32x4.MaskedDiffWithRoundSuppressExceptionWithPrecision", opLen2Imm8(ssa.OpMaskedDiffWithRoundSuppressExceptionWithPrecisionFloat32x4, types.TypeVec128, 4), sys.AMD64) + addF(simdPackage, "Float32x8.MaskedDiffWithRoundSuppressExceptionWithPrecision", opLen2Imm8(ssa.OpMaskedDiffWithRoundSuppressExceptionWithPrecisionFloat32x8, types.TypeVec256, 4), sys.AMD64) + addF(simdPackage, "Float64x2.MaskedDiffWithRoundSuppressExceptionWithPrecision", opLen2Imm8(ssa.OpMaskedDiffWithRoundSuppressExceptionWithPrecisionFloat64x2, types.TypeVec128, 4), sys.AMD64) + addF(simdPackage, "Float64x4.MaskedDiffWithRoundSuppressExceptionWithPrecision", opLen2Imm8(ssa.OpMaskedDiffWithRoundSuppressExceptionWithPrecisionFloat64x4, types.TypeVec256, 4), sys.AMD64) + addF(simdPackage, "Float64x8.MaskedDiffWithRoundSuppressExceptionWithPrecision", opLen2Imm8(ssa.OpMaskedDiffWithRoundSuppressExceptionWithPrecisionFloat64x8, types.TypeVec512, 4), sys.AMD64) + addF(simdPackage, "Float32x16.MaskedDiffWithRoundWithPrecision", opLen2Imm8(ssa.OpMaskedDiffWithRoundWithPrecisionFloat32x16, types.TypeVec512, 4), sys.AMD64) + addF(simdPackage, "Float32x4.MaskedDiffWithRoundWithPrecision", opLen2Imm8(ssa.OpMaskedDiffWithRoundWithPrecisionFloat32x4, types.TypeVec128, 4), sys.AMD64) + addF(simdPackage, "Float32x8.MaskedDiffWithRoundWithPrecision", opLen2Imm8(ssa.OpMaskedDiffWithRoundWithPrecisionFloat32x8, types.TypeVec256, 4), sys.AMD64) + addF(simdPackage, "Float64x2.MaskedDiffWithRoundWithPrecision", opLen2Imm8(ssa.OpMaskedDiffWithRoundWithPrecisionFloat64x2, types.TypeVec128, 4), sys.AMD64) + addF(simdPackage, "Float64x4.MaskedDiffWithRoundWithPrecision", opLen2Imm8(ssa.OpMaskedDiffWithRoundWithPrecisionFloat64x4, types.TypeVec256, 4), sys.AMD64) + addF(simdPackage, "Float64x8.MaskedDiffWithRoundWithPrecision", opLen2Imm8(ssa.OpMaskedDiffWithRoundWithPrecisionFloat64x8, types.TypeVec512, 4), sys.AMD64) + addF(simdPackage, "Float32x16.MaskedDiffWithTruncSuppressExceptionWithPrecision", opLen2Imm8(ssa.OpMaskedDiffWithTruncSuppressExceptionWithPrecisionFloat32x16, types.TypeVec512, 4), sys.AMD64) + addF(simdPackage, "Float32x4.MaskedDiffWithTruncSuppressExceptionWithPrecision", opLen2Imm8(ssa.OpMaskedDiffWithTruncSuppressExceptionWithPrecisionFloat32x4, types.TypeVec128, 4), sys.AMD64) + addF(simdPackage, "Float32x8.MaskedDiffWithTruncSuppressExceptionWithPrecision", opLen2Imm8(ssa.OpMaskedDiffWithTruncSuppressExceptionWithPrecisionFloat32x8, types.TypeVec256, 4), sys.AMD64) + addF(simdPackage, "Float64x2.MaskedDiffWithTruncSuppressExceptionWithPrecision", opLen2Imm8(ssa.OpMaskedDiffWithTruncSuppressExceptionWithPrecisionFloat64x2, types.TypeVec128, 4), sys.AMD64) + addF(simdPackage, "Float64x4.MaskedDiffWithTruncSuppressExceptionWithPrecision", opLen2Imm8(ssa.OpMaskedDiffWithTruncSuppressExceptionWithPrecisionFloat64x4, types.TypeVec256, 4), sys.AMD64) + addF(simdPackage, "Float64x8.MaskedDiffWithTruncSuppressExceptionWithPrecision", opLen2Imm8(ssa.OpMaskedDiffWithTruncSuppressExceptionWithPrecisionFloat64x8, types.TypeVec512, 4), sys.AMD64) + addF(simdPackage, "Float32x16.MaskedDiffWithTruncWithPrecision", opLen2Imm8(ssa.OpMaskedDiffWithTruncWithPrecisionFloat32x16, types.TypeVec512, 4), sys.AMD64) + addF(simdPackage, "Float32x4.MaskedDiffWithTruncWithPrecision", opLen2Imm8(ssa.OpMaskedDiffWithTruncWithPrecisionFloat32x4, types.TypeVec128, 4), sys.AMD64) + addF(simdPackage, "Float32x8.MaskedDiffWithTruncWithPrecision", opLen2Imm8(ssa.OpMaskedDiffWithTruncWithPrecisionFloat32x8, types.TypeVec256, 4), sys.AMD64) + addF(simdPackage, "Float64x2.MaskedDiffWithTruncWithPrecision", opLen2Imm8(ssa.OpMaskedDiffWithTruncWithPrecisionFloat64x2, types.TypeVec128, 4), sys.AMD64) + addF(simdPackage, "Float64x4.MaskedDiffWithTruncWithPrecision", opLen2Imm8(ssa.OpMaskedDiffWithTruncWithPrecisionFloat64x4, types.TypeVec256, 4), sys.AMD64) + addF(simdPackage, "Float64x8.MaskedDiffWithTruncWithPrecision", opLen2Imm8(ssa.OpMaskedDiffWithTruncWithPrecisionFloat64x8, types.TypeVec512, 4), sys.AMD64) + addF(simdPackage, "Float32x16.MaskedFloorSuppressExceptionWithPrecision", opLen2Imm8(ssa.OpMaskedFloorSuppressExceptionWithPrecisionFloat32x16, types.TypeVec512, 4), sys.AMD64) + addF(simdPackage, "Float32x4.MaskedFloorSuppressExceptionWithPrecision", opLen2Imm8(ssa.OpMaskedFloorSuppressExceptionWithPrecisionFloat32x4, types.TypeVec128, 4), sys.AMD64) + addF(simdPackage, "Float32x8.MaskedFloorSuppressExceptionWithPrecision", opLen2Imm8(ssa.OpMaskedFloorSuppressExceptionWithPrecisionFloat32x8, types.TypeVec256, 4), sys.AMD64) + addF(simdPackage, "Float64x2.MaskedFloorSuppressExceptionWithPrecision", opLen2Imm8(ssa.OpMaskedFloorSuppressExceptionWithPrecisionFloat64x2, types.TypeVec128, 4), sys.AMD64) + addF(simdPackage, "Float64x4.MaskedFloorSuppressExceptionWithPrecision", opLen2Imm8(ssa.OpMaskedFloorSuppressExceptionWithPrecisionFloat64x4, types.TypeVec256, 4), sys.AMD64) + addF(simdPackage, "Float64x8.MaskedFloorSuppressExceptionWithPrecision", opLen2Imm8(ssa.OpMaskedFloorSuppressExceptionWithPrecisionFloat64x8, types.TypeVec512, 4), sys.AMD64) + addF(simdPackage, "Float32x16.MaskedFloorWithPrecision", opLen2Imm8(ssa.OpMaskedFloorWithPrecisionFloat32x16, types.TypeVec512, 4), sys.AMD64) + addF(simdPackage, "Float32x4.MaskedFloorWithPrecision", opLen2Imm8(ssa.OpMaskedFloorWithPrecisionFloat32x4, types.TypeVec128, 4), sys.AMD64) + addF(simdPackage, "Float32x8.MaskedFloorWithPrecision", opLen2Imm8(ssa.OpMaskedFloorWithPrecisionFloat32x8, types.TypeVec256, 4), sys.AMD64) + addF(simdPackage, "Float64x2.MaskedFloorWithPrecision", opLen2Imm8(ssa.OpMaskedFloorWithPrecisionFloat64x2, types.TypeVec128, 4), sys.AMD64) + addF(simdPackage, "Float64x4.MaskedFloorWithPrecision", opLen2Imm8(ssa.OpMaskedFloorWithPrecisionFloat64x4, types.TypeVec256, 4), sys.AMD64) + addF(simdPackage, "Float64x8.MaskedFloorWithPrecision", opLen2Imm8(ssa.OpMaskedFloorWithPrecisionFloat64x8, types.TypeVec512, 4), sys.AMD64) + addF(simdPackage, "Float32x16.MaskedRoundSuppressExceptionWithPrecision", opLen2Imm8(ssa.OpMaskedRoundSuppressExceptionWithPrecisionFloat32x16, types.TypeVec512, 4), sys.AMD64) + addF(simdPackage, "Float32x4.MaskedRoundSuppressExceptionWithPrecision", opLen2Imm8(ssa.OpMaskedRoundSuppressExceptionWithPrecisionFloat32x4, types.TypeVec128, 4), sys.AMD64) + addF(simdPackage, "Float32x8.MaskedRoundSuppressExceptionWithPrecision", opLen2Imm8(ssa.OpMaskedRoundSuppressExceptionWithPrecisionFloat32x8, types.TypeVec256, 4), sys.AMD64) + addF(simdPackage, "Float64x2.MaskedRoundSuppressExceptionWithPrecision", opLen2Imm8(ssa.OpMaskedRoundSuppressExceptionWithPrecisionFloat64x2, types.TypeVec128, 4), sys.AMD64) + addF(simdPackage, "Float64x4.MaskedRoundSuppressExceptionWithPrecision", opLen2Imm8(ssa.OpMaskedRoundSuppressExceptionWithPrecisionFloat64x4, types.TypeVec256, 4), sys.AMD64) + addF(simdPackage, "Float64x8.MaskedRoundSuppressExceptionWithPrecision", opLen2Imm8(ssa.OpMaskedRoundSuppressExceptionWithPrecisionFloat64x8, types.TypeVec512, 4), sys.AMD64) + addF(simdPackage, "Float32x16.MaskedRoundWithPrecision", opLen2Imm8(ssa.OpMaskedRoundWithPrecisionFloat32x16, types.TypeVec512, 4), sys.AMD64) + addF(simdPackage, "Float32x4.MaskedRoundWithPrecision", opLen2Imm8(ssa.OpMaskedRoundWithPrecisionFloat32x4, types.TypeVec128, 4), sys.AMD64) + addF(simdPackage, "Float32x8.MaskedRoundWithPrecision", opLen2Imm8(ssa.OpMaskedRoundWithPrecisionFloat32x8, types.TypeVec256, 4), sys.AMD64) + addF(simdPackage, "Float64x2.MaskedRoundWithPrecision", opLen2Imm8(ssa.OpMaskedRoundWithPrecisionFloat64x2, types.TypeVec128, 4), sys.AMD64) + addF(simdPackage, "Float64x4.MaskedRoundWithPrecision", opLen2Imm8(ssa.OpMaskedRoundWithPrecisionFloat64x4, types.TypeVec256, 4), sys.AMD64) + addF(simdPackage, "Float64x8.MaskedRoundWithPrecision", opLen2Imm8(ssa.OpMaskedRoundWithPrecisionFloat64x8, types.TypeVec512, 4), sys.AMD64) + addF(simdPackage, "Float32x16.MaskedTruncSuppressExceptionWithPrecision", opLen2Imm8(ssa.OpMaskedTruncSuppressExceptionWithPrecisionFloat32x16, types.TypeVec512, 4), sys.AMD64) + addF(simdPackage, "Float32x4.MaskedTruncSuppressExceptionWithPrecision", opLen2Imm8(ssa.OpMaskedTruncSuppressExceptionWithPrecisionFloat32x4, types.TypeVec128, 4), sys.AMD64) + addF(simdPackage, "Float32x8.MaskedTruncSuppressExceptionWithPrecision", opLen2Imm8(ssa.OpMaskedTruncSuppressExceptionWithPrecisionFloat32x8, types.TypeVec256, 4), sys.AMD64) + addF(simdPackage, "Float64x2.MaskedTruncSuppressExceptionWithPrecision", opLen2Imm8(ssa.OpMaskedTruncSuppressExceptionWithPrecisionFloat64x2, types.TypeVec128, 4), sys.AMD64) + addF(simdPackage, "Float64x4.MaskedTruncSuppressExceptionWithPrecision", opLen2Imm8(ssa.OpMaskedTruncSuppressExceptionWithPrecisionFloat64x4, types.TypeVec256, 4), sys.AMD64) + addF(simdPackage, "Float64x8.MaskedTruncSuppressExceptionWithPrecision", opLen2Imm8(ssa.OpMaskedTruncSuppressExceptionWithPrecisionFloat64x8, types.TypeVec512, 4), sys.AMD64) + addF(simdPackage, "Float32x16.MaskedTruncWithPrecision", opLen2Imm8(ssa.OpMaskedTruncWithPrecisionFloat32x16, types.TypeVec512, 4), sys.AMD64) + addF(simdPackage, "Float32x4.MaskedTruncWithPrecision", opLen2Imm8(ssa.OpMaskedTruncWithPrecisionFloat32x4, types.TypeVec128, 4), sys.AMD64) + addF(simdPackage, "Float32x8.MaskedTruncWithPrecision", opLen2Imm8(ssa.OpMaskedTruncWithPrecisionFloat32x8, types.TypeVec256, 4), sys.AMD64) + addF(simdPackage, "Float64x2.MaskedTruncWithPrecision", opLen2Imm8(ssa.OpMaskedTruncWithPrecisionFloat64x2, types.TypeVec128, 4), sys.AMD64) + addF(simdPackage, "Float64x4.MaskedTruncWithPrecision", opLen2Imm8(ssa.OpMaskedTruncWithPrecisionFloat64x4, types.TypeVec256, 4), sys.AMD64) + addF(simdPackage, "Float64x8.MaskedTruncWithPrecision", opLen2Imm8(ssa.OpMaskedTruncWithPrecisionFloat64x8, types.TypeVec512, 4), sys.AMD64) addF(simdPackage, "Float32x16.AsFloat64x8", func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { return args[0] }, sys.AMD64) addF(simdPackage, "Float32x16.AsInt16x32", func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { return args[0] }, sys.AMD64) addF(simdPackage, "Float32x16.AsInt32x16", func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { return args[0] }, sys.AMD64) diff --git a/src/simd/stubs_amd64.go b/src/simd/stubs_amd64.go index 5dfb49cf2d..d433b67c9a 100644 --- a/src/simd/stubs_amd64.go +++ b/src/simd/stubs_amd64.go @@ -19,36 +19,84 @@ func (x Float32x4) ApproximateReciprocal() Float32x4 // Asm: VRSQRTPS, CPU Feature: AVX func (x Float32x4) ApproximateReciprocalOfSqrt() Float32x4 +// Asm: VROUNDPS, CPU Feature: AVX +func (x Float32x4) Ceil() Float32x4 + +// Asm: VROUNDPS, CPU Feature: AVX +func (x Float32x4) Floor() Float32x4 + +// Asm: VROUNDPS, CPU Feature: AVX +func (x Float32x4) Round() Float32x4 + // Asm: VSQRTPS, CPU Feature: AVX func (x Float32x4) Sqrt() Float32x4 +// Asm: VROUNDPS, CPU Feature: AVX +func (x Float32x4) Trunc() Float32x4 + // Asm: VRCP14PS, CPU Feature: AVX512EVEX func (x Float32x8) ApproximateReciprocal() Float32x8 // Asm: VRSQRTPS, CPU Feature: AVX func (x Float32x8) ApproximateReciprocalOfSqrt() Float32x8 +// Asm: VROUNDPS, CPU Feature: AVX +func (x Float32x8) Ceil() Float32x8 + +// Asm: VROUNDPS, CPU Feature: AVX +func (x Float32x8) Floor() Float32x8 + +// Asm: VROUNDPS, CPU Feature: AVX +func (x Float32x8) Round() Float32x8 + // Asm: VSQRTPS, CPU Feature: AVX func (x Float32x8) Sqrt() Float32x8 +// Asm: VROUNDPS, CPU Feature: AVX +func (x Float32x8) Trunc() Float32x8 + // Asm: VRCP14PD, CPU Feature: AVX512EVEX func (x Float64x2) ApproximateReciprocal() Float64x2 // Asm: VRSQRT14PD, CPU Feature: AVX512EVEX func (x Float64x2) ApproximateReciprocalOfSqrt() Float64x2 +// Asm: VROUNDPD, CPU Feature: AVX +func (x Float64x2) Ceil() Float64x2 + +// Asm: VROUNDPD, CPU Feature: AVX +func (x Float64x2) Floor() Float64x2 + +// Asm: VROUNDPD, CPU Feature: AVX +func (x Float64x2) Round() Float64x2 + // Asm: VSQRTPD, CPU Feature: AVX func (x Float64x2) Sqrt() Float64x2 +// Asm: VROUNDPD, CPU Feature: AVX +func (x Float64x2) Trunc() Float64x2 + // Asm: VRCP14PD, CPU Feature: AVX512EVEX func (x Float64x4) ApproximateReciprocal() Float64x4 // Asm: VRSQRT14PD, CPU Feature: AVX512EVEX func (x Float64x4) ApproximateReciprocalOfSqrt() Float64x4 +// Asm: VROUNDPD, CPU Feature: AVX +func (x Float64x4) Ceil() Float64x4 + +// Asm: VROUNDPD, CPU Feature: AVX +func (x Float64x4) Floor() Float64x4 + +// Asm: VROUNDPD, CPU Feature: AVX +func (x Float64x4) Round() Float64x4 + // Asm: VSQRTPD, CPU Feature: AVX func (x Float64x4) Sqrt() Float64x4 +// Asm: VROUNDPD, CPU Feature: AVX +func (x Float64x4) Trunc() Float64x4 + // Asm: VRCP14PD, CPU Feature: AVX512EVEX func (x Float64x8) ApproximateReciprocal() Float64x8 @@ -246,6 +294,9 @@ func (x Float32x16) Xor(y Float32x16) Float32x16 // Asm: VADDPS, CPU Feature: AVX func (x Float32x4) Add(y Float32x4) Float32x4 +// Asm: VADDSUBPS, CPU Feature: AVX +func (x Float32x4) AddSub(y Float32x4) Float32x4 + // Asm: VANDPS, CPU Feature: AVX func (x Float32x4) And(y Float32x4) Float32x4 @@ -333,6 +384,9 @@ func (x Float32x4) Xor(y Float32x4) Float32x4 // Asm: VADDPS, CPU Feature: AVX func (x Float32x8) Add(y Float32x8) Float32x8 +// Asm: VADDSUBPS, CPU Feature: AVX +func (x Float32x8) AddSub(y Float32x8) Float32x8 + // Asm: VANDPS, CPU Feature: AVX func (x Float32x8) And(y Float32x8) Float32x8 @@ -420,6 +474,9 @@ func (x Float32x8) Xor(y Float32x8) Float32x8 // Asm: VADDPD, CPU Feature: AVX func (x Float64x2) Add(y Float64x2) Float64x2 +// Asm: VADDSUBPD, CPU Feature: AVX +func (x Float64x2) AddSub(y Float64x2) Float64x2 + // Asm: VANDPD, CPU Feature: AVX func (x Float64x2) And(y Float64x2) Float64x2 @@ -507,6 +564,9 @@ func (x Float64x2) Xor(y Float64x2) Float64x2 // Asm: VADDPD, CPU Feature: AVX func (x Float64x4) Add(y Float64x4) Float64x4 +// Asm: VADDSUBPD, CPU Feature: AVX +func (x Float64x4) AddSub(y Float64x4) Float64x4 + // Asm: VANDPD, CPU Feature: AVX func (x Float64x4) And(y Float64x4) Float64x4 @@ -4112,6 +4172,582 @@ func (x Uint8x64) MaskedSaturatedSub(y Uint8x64, z Mask8x64) Uint8x64 // Asm: VPSUBB, CPU Feature: AVX512EVEX func (x Uint8x64) MaskedSub(y Uint8x64, z Mask8x64) Uint8x64 +// Asm: VRNDSCALEPS, CPU Feature: AVX512EVEX +func (x Float32x16) CeilSuppressExceptionWithPrecision(imm8 uint8) Float32x16 + +// Asm: VRNDSCALEPS, CPU Feature: AVX512EVEX +func (x Float32x4) CeilSuppressExceptionWithPrecision(imm8 uint8) Float32x4 + +// Asm: VRNDSCALEPS, CPU Feature: AVX512EVEX +func (x Float32x8) CeilSuppressExceptionWithPrecision(imm8 uint8) Float32x8 + +// Asm: VRNDSCALEPD, CPU Feature: AVX512EVEX +func (x Float64x2) CeilSuppressExceptionWithPrecision(imm8 uint8) Float64x2 + +// Asm: VRNDSCALEPD, CPU Feature: AVX512EVEX +func (x Float64x4) CeilSuppressExceptionWithPrecision(imm8 uint8) Float64x4 + +// Asm: VRNDSCALEPD, CPU Feature: AVX512EVEX +func (x Float64x8) CeilSuppressExceptionWithPrecision(imm8 uint8) Float64x8 + +// Asm: VRNDSCALEPS, CPU Feature: AVX512EVEX +func (x Float32x16) CeilWithPrecision(imm8 uint8) Float32x16 + +// Asm: VRNDSCALEPS, CPU Feature: AVX512EVEX +func (x Float32x4) CeilWithPrecision(imm8 uint8) Float32x4 + +// Asm: VRNDSCALEPS, CPU Feature: AVX512EVEX +func (x Float32x8) CeilWithPrecision(imm8 uint8) Float32x8 + +// Asm: VRNDSCALEPD, CPU Feature: AVX512EVEX +func (x Float64x2) CeilWithPrecision(imm8 uint8) Float64x2 + +// Asm: VRNDSCALEPD, CPU Feature: AVX512EVEX +func (x Float64x4) CeilWithPrecision(imm8 uint8) Float64x4 + +// Asm: VRNDSCALEPD, CPU Feature: AVX512EVEX +func (x Float64x8) CeilWithPrecision(imm8 uint8) Float64x8 + +// Asm: VREDUCEPS, CPU Feature: AVX512EVEX +func (x Float32x16) DiffWithCeilSuppressExceptionWithPrecision(imm8 uint8) Float32x16 + +// Asm: VREDUCEPS, CPU Feature: AVX512EVEX +func (x Float32x4) DiffWithCeilSuppressExceptionWithPrecision(imm8 uint8) Float32x4 + +// Asm: VREDUCEPS, CPU Feature: AVX512EVEX +func (x Float32x8) DiffWithCeilSuppressExceptionWithPrecision(imm8 uint8) Float32x8 + +// Asm: VREDUCEPD, CPU Feature: AVX512EVEX +func (x Float64x2) DiffWithCeilSuppressExceptionWithPrecision(imm8 uint8) Float64x2 + +// Asm: VREDUCEPD, CPU Feature: AVX512EVEX +func (x Float64x4) DiffWithCeilSuppressExceptionWithPrecision(imm8 uint8) Float64x4 + +// Asm: VREDUCEPD, CPU Feature: AVX512EVEX +func (x Float64x8) DiffWithCeilSuppressExceptionWithPrecision(imm8 uint8) Float64x8 + +// Asm: VREDUCEPS, CPU Feature: AVX512EVEX +func (x Float32x16) DiffWithCeilWithPrecision(imm8 uint8) Float32x16 + +// Asm: VREDUCEPS, CPU Feature: AVX512EVEX +func (x Float32x4) DiffWithCeilWithPrecision(imm8 uint8) Float32x4 + +// Asm: VREDUCEPS, CPU Feature: AVX512EVEX +func (x Float32x8) DiffWithCeilWithPrecision(imm8 uint8) Float32x8 + +// Asm: VREDUCEPD, CPU Feature: AVX512EVEX +func (x Float64x2) DiffWithCeilWithPrecision(imm8 uint8) Float64x2 + +// Asm: VREDUCEPD, CPU Feature: AVX512EVEX +func (x Float64x4) DiffWithCeilWithPrecision(imm8 uint8) Float64x4 + +// Asm: VREDUCEPD, CPU Feature: AVX512EVEX +func (x Float64x8) DiffWithCeilWithPrecision(imm8 uint8) Float64x8 + +// Asm: VREDUCEPS, CPU Feature: AVX512EVEX +func (x Float32x16) DiffWithFloorSuppressExceptionWithPrecision(imm8 uint8) Float32x16 + +// Asm: VREDUCEPS, CPU Feature: AVX512EVEX +func (x Float32x4) DiffWithFloorSuppressExceptionWithPrecision(imm8 uint8) Float32x4 + +// Asm: VREDUCEPS, CPU Feature: AVX512EVEX +func (x Float32x8) DiffWithFloorSuppressExceptionWithPrecision(imm8 uint8) Float32x8 + +// Asm: VREDUCEPD, CPU Feature: AVX512EVEX +func (x Float64x2) DiffWithFloorSuppressExceptionWithPrecision(imm8 uint8) Float64x2 + +// Asm: VREDUCEPD, CPU Feature: AVX512EVEX +func (x Float64x4) DiffWithFloorSuppressExceptionWithPrecision(imm8 uint8) Float64x4 + +// Asm: VREDUCEPD, CPU Feature: AVX512EVEX +func (x Float64x8) DiffWithFloorSuppressExceptionWithPrecision(imm8 uint8) Float64x8 + +// Asm: VREDUCEPS, CPU Feature: AVX512EVEX +func (x Float32x16) DiffWithFloorWithPrecision(imm8 uint8) Float32x16 + +// Asm: VREDUCEPS, CPU Feature: AVX512EVEX +func (x Float32x4) DiffWithFloorWithPrecision(imm8 uint8) Float32x4 + +// Asm: VREDUCEPS, CPU Feature: AVX512EVEX +func (x Float32x8) DiffWithFloorWithPrecision(imm8 uint8) Float32x8 + +// Asm: VREDUCEPD, CPU Feature: AVX512EVEX +func (x Float64x2) DiffWithFloorWithPrecision(imm8 uint8) Float64x2 + +// Asm: VREDUCEPD, CPU Feature: AVX512EVEX +func (x Float64x4) DiffWithFloorWithPrecision(imm8 uint8) Float64x4 + +// Asm: VREDUCEPD, CPU Feature: AVX512EVEX +func (x Float64x8) DiffWithFloorWithPrecision(imm8 uint8) Float64x8 + +// Asm: VREDUCEPS, CPU Feature: AVX512EVEX +func (x Float32x16) DiffWithRoundSuppressExceptionWithPrecision(imm8 uint8) Float32x16 + +// Asm: VREDUCEPS, CPU Feature: AVX512EVEX +func (x Float32x4) DiffWithRoundSuppressExceptionWithPrecision(imm8 uint8) Float32x4 + +// Asm: VREDUCEPS, CPU Feature: AVX512EVEX +func (x Float32x8) DiffWithRoundSuppressExceptionWithPrecision(imm8 uint8) Float32x8 + +// Asm: VREDUCEPD, CPU Feature: AVX512EVEX +func (x Float64x2) DiffWithRoundSuppressExceptionWithPrecision(imm8 uint8) Float64x2 + +// Asm: VREDUCEPD, CPU Feature: AVX512EVEX +func (x Float64x4) DiffWithRoundSuppressExceptionWithPrecision(imm8 uint8) Float64x4 + +// Asm: VREDUCEPD, CPU Feature: AVX512EVEX +func (x Float64x8) DiffWithRoundSuppressExceptionWithPrecision(imm8 uint8) Float64x8 + +// Asm: VREDUCEPS, CPU Feature: AVX512EVEX +func (x Float32x16) DiffWithRoundWithPrecision(imm8 uint8) Float32x16 + +// Asm: VREDUCEPS, CPU Feature: AVX512EVEX +func (x Float32x4) DiffWithRoundWithPrecision(imm8 uint8) Float32x4 + +// Asm: VREDUCEPS, CPU Feature: AVX512EVEX +func (x Float32x8) DiffWithRoundWithPrecision(imm8 uint8) Float32x8 + +// Asm: VREDUCEPD, CPU Feature: AVX512EVEX +func (x Float64x2) DiffWithRoundWithPrecision(imm8 uint8) Float64x2 + +// Asm: VREDUCEPD, CPU Feature: AVX512EVEX +func (x Float64x4) DiffWithRoundWithPrecision(imm8 uint8) Float64x4 + +// Asm: VREDUCEPD, CPU Feature: AVX512EVEX +func (x Float64x8) DiffWithRoundWithPrecision(imm8 uint8) Float64x8 + +// Asm: VREDUCEPS, CPU Feature: AVX512EVEX +func (x Float32x16) DiffWithTruncSuppressExceptionWithPrecision(imm8 uint8) Float32x16 + +// Asm: VREDUCEPS, CPU Feature: AVX512EVEX +func (x Float32x4) DiffWithTruncSuppressExceptionWithPrecision(imm8 uint8) Float32x4 + +// Asm: VREDUCEPS, CPU Feature: AVX512EVEX +func (x Float32x8) DiffWithTruncSuppressExceptionWithPrecision(imm8 uint8) Float32x8 + +// Asm: VREDUCEPD, CPU Feature: AVX512EVEX +func (x Float64x2) DiffWithTruncSuppressExceptionWithPrecision(imm8 uint8) Float64x2 + +// Asm: VREDUCEPD, CPU Feature: AVX512EVEX +func (x Float64x4) DiffWithTruncSuppressExceptionWithPrecision(imm8 uint8) Float64x4 + +// Asm: VREDUCEPD, CPU Feature: AVX512EVEX +func (x Float64x8) DiffWithTruncSuppressExceptionWithPrecision(imm8 uint8) Float64x8 + +// Asm: VREDUCEPS, CPU Feature: AVX512EVEX +func (x Float32x16) DiffWithTruncWithPrecision(imm8 uint8) Float32x16 + +// Asm: VREDUCEPS, CPU Feature: AVX512EVEX +func (x Float32x4) DiffWithTruncWithPrecision(imm8 uint8) Float32x4 + +// Asm: VREDUCEPS, CPU Feature: AVX512EVEX +func (x Float32x8) DiffWithTruncWithPrecision(imm8 uint8) Float32x8 + +// Asm: VREDUCEPD, CPU Feature: AVX512EVEX +func (x Float64x2) DiffWithTruncWithPrecision(imm8 uint8) Float64x2 + +// Asm: VREDUCEPD, CPU Feature: AVX512EVEX +func (x Float64x4) DiffWithTruncWithPrecision(imm8 uint8) Float64x4 + +// Asm: VREDUCEPD, CPU Feature: AVX512EVEX +func (x Float64x8) DiffWithTruncWithPrecision(imm8 uint8) Float64x8 + +// Asm: VRNDSCALEPS, CPU Feature: AVX512EVEX +func (x Float32x16) FloorSuppressExceptionWithPrecision(imm8 uint8) Float32x16 + +// Asm: VRNDSCALEPS, CPU Feature: AVX512EVEX +func (x Float32x4) FloorSuppressExceptionWithPrecision(imm8 uint8) Float32x4 + +// Asm: VRNDSCALEPS, CPU Feature: AVX512EVEX +func (x Float32x8) FloorSuppressExceptionWithPrecision(imm8 uint8) Float32x8 + +// Asm: VRNDSCALEPD, CPU Feature: AVX512EVEX +func (x Float64x2) FloorSuppressExceptionWithPrecision(imm8 uint8) Float64x2 + +// Asm: VRNDSCALEPD, CPU Feature: AVX512EVEX +func (x Float64x4) FloorSuppressExceptionWithPrecision(imm8 uint8) Float64x4 + +// Asm: VRNDSCALEPD, CPU Feature: AVX512EVEX +func (x Float64x8) FloorSuppressExceptionWithPrecision(imm8 uint8) Float64x8 + +// Asm: VRNDSCALEPS, CPU Feature: AVX512EVEX +func (x Float32x16) FloorWithPrecision(imm8 uint8) Float32x16 + +// Asm: VRNDSCALEPS, CPU Feature: AVX512EVEX +func (x Float32x4) FloorWithPrecision(imm8 uint8) Float32x4 + +// Asm: VRNDSCALEPS, CPU Feature: AVX512EVEX +func (x Float32x8) FloorWithPrecision(imm8 uint8) Float32x8 + +// Asm: VRNDSCALEPD, CPU Feature: AVX512EVEX +func (x Float64x2) FloorWithPrecision(imm8 uint8) Float64x2 + +// Asm: VRNDSCALEPD, CPU Feature: AVX512EVEX +func (x Float64x4) FloorWithPrecision(imm8 uint8) Float64x4 + +// Asm: VRNDSCALEPD, CPU Feature: AVX512EVEX +func (x Float64x8) FloorWithPrecision(imm8 uint8) Float64x8 + +// Asm: VRNDSCALEPS, CPU Feature: AVX512EVEX +func (x Float32x16) RoundSuppressExceptionWithPrecision(imm8 uint8) Float32x16 + +// Asm: VRNDSCALEPS, CPU Feature: AVX512EVEX +func (x Float32x4) RoundSuppressExceptionWithPrecision(imm8 uint8) Float32x4 + +// Asm: VRNDSCALEPS, CPU Feature: AVX512EVEX +func (x Float32x8) RoundSuppressExceptionWithPrecision(imm8 uint8) Float32x8 + +// Asm: VRNDSCALEPD, CPU Feature: AVX512EVEX +func (x Float64x2) RoundSuppressExceptionWithPrecision(imm8 uint8) Float64x2 + +// Asm: VRNDSCALEPD, CPU Feature: AVX512EVEX +func (x Float64x4) RoundSuppressExceptionWithPrecision(imm8 uint8) Float64x4 + +// Asm: VRNDSCALEPD, CPU Feature: AVX512EVEX +func (x Float64x8) RoundSuppressExceptionWithPrecision(imm8 uint8) Float64x8 + +// Asm: VRNDSCALEPS, CPU Feature: AVX512EVEX +func (x Float32x16) RoundWithPrecision(imm8 uint8) Float32x16 + +// Asm: VRNDSCALEPS, CPU Feature: AVX512EVEX +func (x Float32x4) RoundWithPrecision(imm8 uint8) Float32x4 + +// Asm: VRNDSCALEPS, CPU Feature: AVX512EVEX +func (x Float32x8) RoundWithPrecision(imm8 uint8) Float32x8 + +// Asm: VRNDSCALEPD, CPU Feature: AVX512EVEX +func (x Float64x2) RoundWithPrecision(imm8 uint8) Float64x2 + +// Asm: VRNDSCALEPD, CPU Feature: AVX512EVEX +func (x Float64x4) RoundWithPrecision(imm8 uint8) Float64x4 + +// Asm: VRNDSCALEPD, CPU Feature: AVX512EVEX +func (x Float64x8) RoundWithPrecision(imm8 uint8) Float64x8 + +// Asm: VRNDSCALEPS, CPU Feature: AVX512EVEX +func (x Float32x16) TruncSuppressExceptionWithPrecision(imm8 uint8) Float32x16 + +// Asm: VRNDSCALEPS, CPU Feature: AVX512EVEX +func (x Float32x4) TruncSuppressExceptionWithPrecision(imm8 uint8) Float32x4 + +// Asm: VRNDSCALEPS, CPU Feature: AVX512EVEX +func (x Float32x8) TruncSuppressExceptionWithPrecision(imm8 uint8) Float32x8 + +// Asm: VRNDSCALEPD, CPU Feature: AVX512EVEX +func (x Float64x2) TruncSuppressExceptionWithPrecision(imm8 uint8) Float64x2 + +// Asm: VRNDSCALEPD, CPU Feature: AVX512EVEX +func (x Float64x4) TruncSuppressExceptionWithPrecision(imm8 uint8) Float64x4 + +// Asm: VRNDSCALEPD, CPU Feature: AVX512EVEX +func (x Float64x8) TruncSuppressExceptionWithPrecision(imm8 uint8) Float64x8 + +// Asm: VRNDSCALEPS, CPU Feature: AVX512EVEX +func (x Float32x16) TruncWithPrecision(imm8 uint8) Float32x16 + +// Asm: VRNDSCALEPS, CPU Feature: AVX512EVEX +func (x Float32x4) TruncWithPrecision(imm8 uint8) Float32x4 + +// Asm: VRNDSCALEPS, CPU Feature: AVX512EVEX +func (x Float32x8) TruncWithPrecision(imm8 uint8) Float32x8 + +// Asm: VRNDSCALEPD, CPU Feature: AVX512EVEX +func (x Float64x2) TruncWithPrecision(imm8 uint8) Float64x2 + +// Asm: VRNDSCALEPD, CPU Feature: AVX512EVEX +func (x Float64x4) TruncWithPrecision(imm8 uint8) Float64x4 + +// Asm: VRNDSCALEPD, CPU Feature: AVX512EVEX +func (x Float64x8) TruncWithPrecision(imm8 uint8) Float64x8 + +// Asm: VRNDSCALEPS, CPU Feature: AVX512EVEX +func (x Float32x16) MaskedCeilSuppressExceptionWithPrecision(imm uint8, y Mask32x16) Float32x16 + +// Asm: VRNDSCALEPS, CPU Feature: AVX512EVEX +func (x Float32x4) MaskedCeilSuppressExceptionWithPrecision(imm uint8, y Mask32x4) Float32x4 + +// Asm: VRNDSCALEPS, CPU Feature: AVX512EVEX +func (x Float32x8) MaskedCeilSuppressExceptionWithPrecision(imm uint8, y Mask32x8) Float32x8 + +// Asm: VRNDSCALEPD, CPU Feature: AVX512EVEX +func (x Float64x2) MaskedCeilSuppressExceptionWithPrecision(imm uint8, y Mask64x2) Float64x2 + +// Asm: VRNDSCALEPD, CPU Feature: AVX512EVEX +func (x Float64x4) MaskedCeilSuppressExceptionWithPrecision(imm uint8, y Mask64x4) Float64x4 + +// Asm: VRNDSCALEPD, CPU Feature: AVX512EVEX +func (x Float64x8) MaskedCeilSuppressExceptionWithPrecision(imm uint8, y Mask64x8) Float64x8 + +// Asm: VRNDSCALEPS, CPU Feature: AVX512EVEX +func (x Float32x16) MaskedCeilWithPrecision(imm uint8, y Mask32x16) Float32x16 + +// Asm: VRNDSCALEPS, CPU Feature: AVX512EVEX +func (x Float32x4) MaskedCeilWithPrecision(imm uint8, y Mask32x4) Float32x4 + +// Asm: VRNDSCALEPS, CPU Feature: AVX512EVEX +func (x Float32x8) MaskedCeilWithPrecision(imm uint8, y Mask32x8) Float32x8 + +// Asm: VRNDSCALEPD, CPU Feature: AVX512EVEX +func (x Float64x2) MaskedCeilWithPrecision(imm uint8, y Mask64x2) Float64x2 + +// Asm: VRNDSCALEPD, CPU Feature: AVX512EVEX +func (x Float64x4) MaskedCeilWithPrecision(imm uint8, y Mask64x4) Float64x4 + +// Asm: VRNDSCALEPD, CPU Feature: AVX512EVEX +func (x Float64x8) MaskedCeilWithPrecision(imm uint8, y Mask64x8) Float64x8 + +// Asm: VREDUCEPS, CPU Feature: AVX512EVEX +func (x Float32x16) MaskedDiffWithCeilSuppressExceptionWithPrecision(imm uint8, y Mask32x16) Float32x16 + +// Asm: VREDUCEPS, CPU Feature: AVX512EVEX +func (x Float32x4) MaskedDiffWithCeilSuppressExceptionWithPrecision(imm uint8, y Mask32x4) Float32x4 + +// Asm: VREDUCEPS, CPU Feature: AVX512EVEX +func (x Float32x8) MaskedDiffWithCeilSuppressExceptionWithPrecision(imm uint8, y Mask32x8) Float32x8 + +// Asm: VREDUCEPD, CPU Feature: AVX512EVEX +func (x Float64x2) MaskedDiffWithCeilSuppressExceptionWithPrecision(imm uint8, y Mask64x2) Float64x2 + +// Asm: VREDUCEPD, CPU Feature: AVX512EVEX +func (x Float64x4) MaskedDiffWithCeilSuppressExceptionWithPrecision(imm uint8, y Mask64x4) Float64x4 + +// Asm: VREDUCEPD, CPU Feature: AVX512EVEX +func (x Float64x8) MaskedDiffWithCeilSuppressExceptionWithPrecision(imm uint8, y Mask64x8) Float64x8 + +// Asm: VREDUCEPS, CPU Feature: AVX512EVEX +func (x Float32x16) MaskedDiffWithCeilWithPrecision(imm uint8, y Mask32x16) Float32x16 + +// Asm: VREDUCEPS, CPU Feature: AVX512EVEX +func (x Float32x4) MaskedDiffWithCeilWithPrecision(imm uint8, y Mask32x4) Float32x4 + +// Asm: VREDUCEPS, CPU Feature: AVX512EVEX +func (x Float32x8) MaskedDiffWithCeilWithPrecision(imm uint8, y Mask32x8) Float32x8 + +// Asm: VREDUCEPD, CPU Feature: AVX512EVEX +func (x Float64x2) MaskedDiffWithCeilWithPrecision(imm uint8, y Mask64x2) Float64x2 + +// Asm: VREDUCEPD, CPU Feature: AVX512EVEX +func (x Float64x4) MaskedDiffWithCeilWithPrecision(imm uint8, y Mask64x4) Float64x4 + +// Asm: VREDUCEPD, CPU Feature: AVX512EVEX +func (x Float64x8) MaskedDiffWithCeilWithPrecision(imm uint8, y Mask64x8) Float64x8 + +// Asm: VREDUCEPS, CPU Feature: AVX512EVEX +func (x Float32x16) MaskedDiffWithFloorSuppressExceptionWithPrecision(imm uint8, y Mask32x16) Float32x16 + +// Asm: VREDUCEPS, CPU Feature: AVX512EVEX +func (x Float32x4) MaskedDiffWithFloorSuppressExceptionWithPrecision(imm uint8, y Mask32x4) Float32x4 + +// Asm: VREDUCEPS, CPU Feature: AVX512EVEX +func (x Float32x8) MaskedDiffWithFloorSuppressExceptionWithPrecision(imm uint8, y Mask32x8) Float32x8 + +// Asm: VREDUCEPD, CPU Feature: AVX512EVEX +func (x Float64x2) MaskedDiffWithFloorSuppressExceptionWithPrecision(imm uint8, y Mask64x2) Float64x2 + +// Asm: VREDUCEPD, CPU Feature: AVX512EVEX +func (x Float64x4) MaskedDiffWithFloorSuppressExceptionWithPrecision(imm uint8, y Mask64x4) Float64x4 + +// Asm: VREDUCEPD, CPU Feature: AVX512EVEX +func (x Float64x8) MaskedDiffWithFloorSuppressExceptionWithPrecision(imm uint8, y Mask64x8) Float64x8 + +// Asm: VREDUCEPS, CPU Feature: AVX512EVEX +func (x Float32x16) MaskedDiffWithFloorWithPrecision(imm uint8, y Mask32x16) Float32x16 + +// Asm: VREDUCEPS, CPU Feature: AVX512EVEX +func (x Float32x4) MaskedDiffWithFloorWithPrecision(imm uint8, y Mask32x4) Float32x4 + +// Asm: VREDUCEPS, CPU Feature: AVX512EVEX +func (x Float32x8) MaskedDiffWithFloorWithPrecision(imm uint8, y Mask32x8) Float32x8 + +// Asm: VREDUCEPD, CPU Feature: AVX512EVEX +func (x Float64x2) MaskedDiffWithFloorWithPrecision(imm uint8, y Mask64x2) Float64x2 + +// Asm: VREDUCEPD, CPU Feature: AVX512EVEX +func (x Float64x4) MaskedDiffWithFloorWithPrecision(imm uint8, y Mask64x4) Float64x4 + +// Asm: VREDUCEPD, CPU Feature: AVX512EVEX +func (x Float64x8) MaskedDiffWithFloorWithPrecision(imm uint8, y Mask64x8) Float64x8 + +// Asm: VREDUCEPS, CPU Feature: AVX512EVEX +func (x Float32x16) MaskedDiffWithRoundSuppressExceptionWithPrecision(imm uint8, y Mask32x16) Float32x16 + +// Asm: VREDUCEPS, CPU Feature: AVX512EVEX +func (x Float32x4) MaskedDiffWithRoundSuppressExceptionWithPrecision(imm uint8, y Mask32x4) Float32x4 + +// Asm: VREDUCEPS, CPU Feature: AVX512EVEX +func (x Float32x8) MaskedDiffWithRoundSuppressExceptionWithPrecision(imm uint8, y Mask32x8) Float32x8 + +// Asm: VREDUCEPD, CPU Feature: AVX512EVEX +func (x Float64x2) MaskedDiffWithRoundSuppressExceptionWithPrecision(imm uint8, y Mask64x2) Float64x2 + +// Asm: VREDUCEPD, CPU Feature: AVX512EVEX +func (x Float64x4) MaskedDiffWithRoundSuppressExceptionWithPrecision(imm uint8, y Mask64x4) Float64x4 + +// Asm: VREDUCEPD, CPU Feature: AVX512EVEX +func (x Float64x8) MaskedDiffWithRoundSuppressExceptionWithPrecision(imm uint8, y Mask64x8) Float64x8 + +// Asm: VREDUCEPS, CPU Feature: AVX512EVEX +func (x Float32x16) MaskedDiffWithRoundWithPrecision(imm uint8, y Mask32x16) Float32x16 + +// Asm: VREDUCEPS, CPU Feature: AVX512EVEX +func (x Float32x4) MaskedDiffWithRoundWithPrecision(imm uint8, y Mask32x4) Float32x4 + +// Asm: VREDUCEPS, CPU Feature: AVX512EVEX +func (x Float32x8) MaskedDiffWithRoundWithPrecision(imm uint8, y Mask32x8) Float32x8 + +// Asm: VREDUCEPD, CPU Feature: AVX512EVEX +func (x Float64x2) MaskedDiffWithRoundWithPrecision(imm uint8, y Mask64x2) Float64x2 + +// Asm: VREDUCEPD, CPU Feature: AVX512EVEX +func (x Float64x4) MaskedDiffWithRoundWithPrecision(imm uint8, y Mask64x4) Float64x4 + +// Asm: VREDUCEPD, CPU Feature: AVX512EVEX +func (x Float64x8) MaskedDiffWithRoundWithPrecision(imm uint8, y Mask64x8) Float64x8 + +// Asm: VREDUCEPS, CPU Feature: AVX512EVEX +func (x Float32x16) MaskedDiffWithTruncSuppressExceptionWithPrecision(imm uint8, y Mask32x16) Float32x16 + +// Asm: VREDUCEPS, CPU Feature: AVX512EVEX +func (x Float32x4) MaskedDiffWithTruncSuppressExceptionWithPrecision(imm uint8, y Mask32x4) Float32x4 + +// Asm: VREDUCEPS, CPU Feature: AVX512EVEX +func (x Float32x8) MaskedDiffWithTruncSuppressExceptionWithPrecision(imm uint8, y Mask32x8) Float32x8 + +// Asm: VREDUCEPD, CPU Feature: AVX512EVEX +func (x Float64x2) MaskedDiffWithTruncSuppressExceptionWithPrecision(imm uint8, y Mask64x2) Float64x2 + +// Asm: VREDUCEPD, CPU Feature: AVX512EVEX +func (x Float64x4) MaskedDiffWithTruncSuppressExceptionWithPrecision(imm uint8, y Mask64x4) Float64x4 + +// Asm: VREDUCEPD, CPU Feature: AVX512EVEX +func (x Float64x8) MaskedDiffWithTruncSuppressExceptionWithPrecision(imm uint8, y Mask64x8) Float64x8 + +// Asm: VREDUCEPS, CPU Feature: AVX512EVEX +func (x Float32x16) MaskedDiffWithTruncWithPrecision(imm uint8, y Mask32x16) Float32x16 + +// Asm: VREDUCEPS, CPU Feature: AVX512EVEX +func (x Float32x4) MaskedDiffWithTruncWithPrecision(imm uint8, y Mask32x4) Float32x4 + +// Asm: VREDUCEPS, CPU Feature: AVX512EVEX +func (x Float32x8) MaskedDiffWithTruncWithPrecision(imm uint8, y Mask32x8) Float32x8 + +// Asm: VREDUCEPD, CPU Feature: AVX512EVEX +func (x Float64x2) MaskedDiffWithTruncWithPrecision(imm uint8, y Mask64x2) Float64x2 + +// Asm: VREDUCEPD, CPU Feature: AVX512EVEX +func (x Float64x4) MaskedDiffWithTruncWithPrecision(imm uint8, y Mask64x4) Float64x4 + +// Asm: VREDUCEPD, CPU Feature: AVX512EVEX +func (x Float64x8) MaskedDiffWithTruncWithPrecision(imm uint8, y Mask64x8) Float64x8 + +// Asm: VRNDSCALEPS, CPU Feature: AVX512EVEX +func (x Float32x16) MaskedFloorSuppressExceptionWithPrecision(imm uint8, y Mask32x16) Float32x16 + +// Asm: VRNDSCALEPS, CPU Feature: AVX512EVEX +func (x Float32x4) MaskedFloorSuppressExceptionWithPrecision(imm uint8, y Mask32x4) Float32x4 + +// Asm: VRNDSCALEPS, CPU Feature: AVX512EVEX +func (x Float32x8) MaskedFloorSuppressExceptionWithPrecision(imm uint8, y Mask32x8) Float32x8 + +// Asm: VRNDSCALEPD, CPU Feature: AVX512EVEX +func (x Float64x2) MaskedFloorSuppressExceptionWithPrecision(imm uint8, y Mask64x2) Float64x2 + +// Asm: VRNDSCALEPD, CPU Feature: AVX512EVEX +func (x Float64x4) MaskedFloorSuppressExceptionWithPrecision(imm uint8, y Mask64x4) Float64x4 + +// Asm: VRNDSCALEPD, CPU Feature: AVX512EVEX +func (x Float64x8) MaskedFloorSuppressExceptionWithPrecision(imm uint8, y Mask64x8) Float64x8 + +// Asm: VRNDSCALEPS, CPU Feature: AVX512EVEX +func (x Float32x16) MaskedFloorWithPrecision(imm uint8, y Mask32x16) Float32x16 + +// Asm: VRNDSCALEPS, CPU Feature: AVX512EVEX +func (x Float32x4) MaskedFloorWithPrecision(imm uint8, y Mask32x4) Float32x4 + +// Asm: VRNDSCALEPS, CPU Feature: AVX512EVEX +func (x Float32x8) MaskedFloorWithPrecision(imm uint8, y Mask32x8) Float32x8 + +// Asm: VRNDSCALEPD, CPU Feature: AVX512EVEX +func (x Float64x2) MaskedFloorWithPrecision(imm uint8, y Mask64x2) Float64x2 + +// Asm: VRNDSCALEPD, CPU Feature: AVX512EVEX +func (x Float64x4) MaskedFloorWithPrecision(imm uint8, y Mask64x4) Float64x4 + +// Asm: VRNDSCALEPD, CPU Feature: AVX512EVEX +func (x Float64x8) MaskedFloorWithPrecision(imm uint8, y Mask64x8) Float64x8 + +// Asm: VRNDSCALEPS, CPU Feature: AVX512EVEX +func (x Float32x16) MaskedRoundSuppressExceptionWithPrecision(imm uint8, y Mask32x16) Float32x16 + +// Asm: VRNDSCALEPS, CPU Feature: AVX512EVEX +func (x Float32x4) MaskedRoundSuppressExceptionWithPrecision(imm uint8, y Mask32x4) Float32x4 + +// Asm: VRNDSCALEPS, CPU Feature: AVX512EVEX +func (x Float32x8) MaskedRoundSuppressExceptionWithPrecision(imm uint8, y Mask32x8) Float32x8 + +// Asm: VRNDSCALEPD, CPU Feature: AVX512EVEX +func (x Float64x2) MaskedRoundSuppressExceptionWithPrecision(imm uint8, y Mask64x2) Float64x2 + +// Asm: VRNDSCALEPD, CPU Feature: AVX512EVEX +func (x Float64x4) MaskedRoundSuppressExceptionWithPrecision(imm uint8, y Mask64x4) Float64x4 + +// Asm: VRNDSCALEPD, CPU Feature: AVX512EVEX +func (x Float64x8) MaskedRoundSuppressExceptionWithPrecision(imm uint8, y Mask64x8) Float64x8 + +// Asm: VRNDSCALEPS, CPU Feature: AVX512EVEX +func (x Float32x16) MaskedRoundWithPrecision(imm uint8, y Mask32x16) Float32x16 + +// Asm: VRNDSCALEPS, CPU Feature: AVX512EVEX +func (x Float32x4) MaskedRoundWithPrecision(imm uint8, y Mask32x4) Float32x4 + +// Asm: VRNDSCALEPS, CPU Feature: AVX512EVEX +func (x Float32x8) MaskedRoundWithPrecision(imm uint8, y Mask32x8) Float32x8 + +// Asm: VRNDSCALEPD, CPU Feature: AVX512EVEX +func (x Float64x2) MaskedRoundWithPrecision(imm uint8, y Mask64x2) Float64x2 + +// Asm: VRNDSCALEPD, CPU Feature: AVX512EVEX +func (x Float64x4) MaskedRoundWithPrecision(imm uint8, y Mask64x4) Float64x4 + +// Asm: VRNDSCALEPD, CPU Feature: AVX512EVEX +func (x Float64x8) MaskedRoundWithPrecision(imm uint8, y Mask64x8) Float64x8 + +// Asm: VRNDSCALEPS, CPU Feature: AVX512EVEX +func (x Float32x16) MaskedTruncSuppressExceptionWithPrecision(imm uint8, y Mask32x16) Float32x16 + +// Asm: VRNDSCALEPS, CPU Feature: AVX512EVEX +func (x Float32x4) MaskedTruncSuppressExceptionWithPrecision(imm uint8, y Mask32x4) Float32x4 + +// Asm: VRNDSCALEPS, CPU Feature: AVX512EVEX +func (x Float32x8) MaskedTruncSuppressExceptionWithPrecision(imm uint8, y Mask32x8) Float32x8 + +// Asm: VRNDSCALEPD, CPU Feature: AVX512EVEX +func (x Float64x2) MaskedTruncSuppressExceptionWithPrecision(imm uint8, y Mask64x2) Float64x2 + +// Asm: VRNDSCALEPD, CPU Feature: AVX512EVEX +func (x Float64x4) MaskedTruncSuppressExceptionWithPrecision(imm uint8, y Mask64x4) Float64x4 + +// Asm: VRNDSCALEPD, CPU Feature: AVX512EVEX +func (x Float64x8) MaskedTruncSuppressExceptionWithPrecision(imm uint8, y Mask64x8) Float64x8 + +// Asm: VRNDSCALEPS, CPU Feature: AVX512EVEX +func (x Float32x16) MaskedTruncWithPrecision(imm uint8, y Mask32x16) Float32x16 + +// Asm: VRNDSCALEPS, CPU Feature: AVX512EVEX +func (x Float32x4) MaskedTruncWithPrecision(imm uint8, y Mask32x4) Float32x4 + +// Asm: VRNDSCALEPS, CPU Feature: AVX512EVEX +func (x Float32x8) MaskedTruncWithPrecision(imm uint8, y Mask32x8) Float32x8 + +// Asm: VRNDSCALEPD, CPU Feature: AVX512EVEX +func (x Float64x2) MaskedTruncWithPrecision(imm uint8, y Mask64x2) Float64x2 + +// Asm: VRNDSCALEPD, CPU Feature: AVX512EVEX +func (x Float64x4) MaskedTruncWithPrecision(imm uint8, y Mask64x4) Float64x4 + +// Asm: VRNDSCALEPD, CPU Feature: AVX512EVEX +func (x Float64x8) MaskedTruncWithPrecision(imm uint8, y Mask64x8) Float64x8 + // Float64x8 converts from Float32x16 to Float64x8 func (from Float32x16) AsFloat64x8() (to Float64x8)