From b2073fb9b9282c0f59861a137660f6a0782d7468 Mon Sep 17 00:00:00 2001 From: Momchil Velikov Date: Tue, 24 Dec 2024 10:18:48 +0000 Subject: [PATCH] [AArch64] Prefer SVE2.2 zeroing forms of certain instructions with an all-true predicate (#120595) When the predicate of a destructive operation is known to be all-true, for example fabs z0.s, p0/m, z1.s then the entire output register is written and we can use a zeroing (instead of a merging) form of the instruction, for example fabs z0.s, p0/z, z1.s thus eliminate the dependency on the input-output destination register without the need to insert a `movprfx`. This patch complements (and in the case of https://github.com/llvm/llvm-project/commit/2b3266c1701f315d7e89c81977800001563afacb, fixes a regression) the following: https://github.com/llvm/llvm-project/commit/7f4414b2a1a4d9f802a03f56894c406f0fe3e9a9 [AArch64] Generate zeroing forms of certain SVE2.2 instructions (4/11) (https://github.com/llvm/llvm-project/pull/116830) https://github.com/llvm/llvm-project/commit/2474cf7ad123ea14308293a2237e3552cddb1136 [AArch64] Generate zeroing forms of certain SVE2.2 instructions (3/11) (https://github.com/llvm/llvm-project/pull/116829) https://github.com/llvm/llvm-project/commit/6f285d31159501050de5563b1a844a3e1ac79a03 [AArch64] Generate zeroing forms of certain SVE2.2 instructions (2/11) (https://github.com/llvm/llvm-project/pull/116828) https://github.com/llvm/llvm-project/commit/2b3266c1701f315d7e89c81977800001563afacb [AArch64] Generate zeroing forms of certain SVE2.2 instructions (1/11) (https://github.com/llvm/llvm-project/pull/116259) --- .../Target/AArch64/AArch64ISelDAGToDAG.cpp | 2 + llvm/lib/Target/AArch64/AArch64InstrInfo.td | 3 - .../lib/Target/AArch64/AArch64SVEInstrInfo.td | 8 - llvm/lib/Target/AArch64/SVEInstrFormats.td | 102 ++- .../CodeGen/AArch64/zeroing-forms-abs-neg.ll | 600 +++++++++++++++++- .../AArch64/zeroing-forms-fcvt-bfcvt.ll | 287 ++++++++- .../AArch64/zeroing-forms-fcvtlt-fcvtx.ll | 123 +++- .../CodeGen/AArch64/zeroing-forms-fcvtzsu.ll | 580 ++++++++++++++++- 8 files changed, 1577 insertions(+), 128 deletions(-) diff --git a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp index ff3ca8a24fc04a..6aa8cd4f0232ac 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp @@ -228,6 +228,8 @@ class AArch64DAGToDAGISel : public SelectionDAGISel { return false; } + bool SelectAny(SDValue) { return true; } + bool SelectDupZero(SDValue N) { switch(N->getOpcode()) { case AArch64ISD::DUP: diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td index b37f4a08755c5f..629098cda0c4e7 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td @@ -381,9 +381,6 @@ def NoUseScalarIncVL : Predicate<"!Subtarget->useScalarIncVL()">; def UseSVEFPLD1R : Predicate<"!Subtarget->noSVEFPLD1R()">; -def UseUnaryUndefPseudos - : Predicate<"!(Subtarget->isSVEorStreamingSVEAvailable() && (Subtarget->hasSVE2p2() || Subtarget->hasSME2p2()))">; - def AArch64LocalRecover : SDNode<"ISD::LOCAL_RECOVER", SDTypeProfile<1, 1, [SDTCisSameAs<0, 1>, SDTCisInt<1>]>>; diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td index c8892de6474375..7dd6d49bf20227 100644 --- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td @@ -675,14 +675,6 @@ let Predicates = [HasSVEorSME] in { defm FABS_ZPmZ : sve_int_un_pred_arit_bitwise_fp<0b100, "fabs", AArch64fabs_mt>; defm FNEG_ZPmZ : sve_int_un_pred_arit_bitwise_fp<0b101, "fneg", AArch64fneg_mt>; - let Predicates = [HasSVEorSME, UseUnaryUndefPseudos] in { - defm FABS_ZPmZ : sve_fp_un_pred_arit_hsd; - defm FNEG_ZPmZ : sve_fp_un_pred_arit_hsd; - - defm ABS_ZPmZ : sve_int_un_pred_arit_bhsd; - defm NEG_ZPmZ : sve_int_un_pred_arit_bhsd; - } - foreach VT = [nxv2bf16, nxv4bf16, nxv8bf16] in { // No dedicated instruction, so just clear the sign bit. def : Pat<(VT (fabs VT:$op)), diff --git a/llvm/lib/Target/AArch64/SVEInstrFormats.td b/llvm/lib/Target/AArch64/SVEInstrFormats.td index a831de878a9107..0ef862fc1a27cf 100644 --- a/llvm/lib/Target/AArch64/SVEInstrFormats.td +++ b/llvm/lib/Target/AArch64/SVEInstrFormats.td @@ -484,6 +484,7 @@ let Predicates = [HasSVEorSME] in { //===----------------------------------------------------------------------===// def SVEDup0 : ComplexPattern; def SVEDup0Undef : ComplexPattern; +def SVEAny : ComplexPattern; class SVE_1_Op_Pat @@ -504,10 +505,15 @@ multiclass SVE_1_Op_PassthruUndef_Pat; } -class SVE_1_Op_PassthruUndefZero_Pat - : Pat<(vtd (op pg:$Op1, vts:$Op2, (vtd (SVEDup0Undef)))), - (inst $Op1, $Op2)>; +multiclass SVE_1_Op_PassthruUndefZero_Pat { + let AddedComplexity = 1 in { + def : Pat<(vtd (op pg:$Op1, vts:$Op2, (vtd (SVEDup0Undef)))), + (inst $Op1, $Op2)>; + def : Pat<(vtd (op (pg (SVEAllActive:$Op1)), vts:$Op2, (vtd (SVEAny)))), + (inst $Op1, $Op2)>; + } +} // Used to match FP_ROUND_MERGE_PASSTHRU, which has an additional flag for the // type of rounding. This is matched by timm0_1 in pattern below and ignored. @@ -576,10 +582,15 @@ multiclass SVE_3_Op_Undef_Pat; } -class SVE_3_Op_UndefZero_Pat - : Pat<(vtd (op (vt1 (SVEDup0Undef)), vt2:$Op1, vt3:$Op2)), - (inst $Op1, $Op2)>; +multiclass SVE_3_Op_UndefZero_Pat { + let AddedComplexity = 1 in { + def : Pat<(vtd (op (vt1 (SVEDup0Undef)), vt2:$Op1, vt3:$Op2)), + (inst $Op1, $Op2)>; + def : Pat<(vtd (op (vt1 (SVEAny)), (vt2 (SVEAllActive:$Op2)), vt3:$Op3)), + (inst $Op2, $Op3)>; + } +} class SVE_4_Op_Pat { def _HtoS : sve2_fp_convert_precision<0b1001, 0b0, asm, ZPR32, ZPR16>; def _StoD : sve2_fp_convert_precision<0b1111, 0b0, asm, ZPR64, ZPR32>; - def : SVE_3_Op_UndefZero_Pat(op # _f32f16), nxv4f32, nxv4i1, nxv8f16, !cast(NAME # _HtoS)>; - def : SVE_3_Op_UndefZero_Pat(op # _f64f32), nxv2f64, nxv2i1, nxv4f32, !cast(NAME # _StoD)>; + defm : SVE_3_Op_UndefZero_Pat(op # _f32f16), nxv4f32, nxv4i1, nxv8f16, !cast(NAME # _HtoS)>; + defm : SVE_3_Op_UndefZero_Pat(op # _f64f32), nxv2f64, nxv2i1, nxv4f32, !cast(NAME # _StoD)>; } multiclass sve2_fp_convert_down_narrow_z { @@ -3256,7 +3267,7 @@ class sve_fp_z2op_p_zd opc,string asm, RegisterOperand i_zprtype, multiclass sve_fp_z2op_p_zd { def _DtoS : sve_fp_z2op_p_zd<0b0001010, asm, ZPR64, ZPR32>; - def : SVE_3_Op_UndefZero_Pat(NAME # _DtoS)>; + defm : SVE_3_Op_UndefZero_Pat(NAME # _DtoS)>; } multiclass sve_fp_z2op_p_zd_hsd opc, string asm> { @@ -3273,7 +3284,7 @@ multiclass sve_fp_z2op_p_zd_frint opc, string asm> { multiclass sve_fp_z2op_p_zd_bfcvt { def NAME : sve_fp_z2op_p_zd<0b1001010, asm, ZPR32, ZPR16>; - def : SVE_3_Op_UndefZero_Pat(NAME)>; + defm : SVE_3_Op_UndefZero_Pat(NAME)>; } multiclass sve_fp_z2op_p_zd_d { @@ -3285,14 +3296,14 @@ multiclass sve_fp_z2op_p_zd_d; def _DtoD : sve_fp_z2op_p_zd<{ 0b111111, U }, asm, ZPR64, ZPR64>; - def : SVE_3_Op_UndefZero_Pat(int_op # _i32f64), nxv4i32, nxv2i1, nxv2f64, !cast(NAME # _DtoS)>; - def : SVE_3_Op_UndefZero_Pat(int_op # _i64f32), nxv2i64, nxv2i1, nxv4f32, !cast(NAME # _StoD)>; - def : SVE_3_Op_UndefZero_Pat(int_op # _i32f16), nxv4i32, nxv4i1, nxv8f16, !cast(NAME # _HtoS)>; - def : SVE_3_Op_UndefZero_Pat(int_op # _i64f16), nxv2i64, nxv2i1, nxv8f16, !cast(NAME # _HtoD)>; + defm : SVE_3_Op_UndefZero_Pat(int_op # _i32f64), nxv4i32, nxv2i1, nxv2f64, !cast(NAME # _DtoS)>; + defm : SVE_3_Op_UndefZero_Pat(int_op # _i64f32), nxv2i64, nxv2i1, nxv4f32, !cast(NAME # _StoD)>; + defm : SVE_3_Op_UndefZero_Pat(int_op # _i32f16), nxv4i32, nxv4i1, nxv8f16, !cast(NAME # _HtoS)>; + defm : SVE_3_Op_UndefZero_Pat(int_op # _i64f16), nxv2i64, nxv2i1, nxv8f16, !cast(NAME # _HtoD)>; - def : SVE_1_Op_PassthruUndefZero_Pat(NAME # _HtoH)>; - def : SVE_1_Op_PassthruUndefZero_Pat(NAME # _StoS)>; - def : SVE_1_Op_PassthruUndefZero_Pat(NAME # _DtoD)>; + defm : SVE_1_Op_PassthruUndefZero_Pat(NAME # _HtoH)>; + defm : SVE_1_Op_PassthruUndefZero_Pat(NAME # _StoS)>; + defm : SVE_1_Op_PassthruUndefZero_Pat(NAME # _DtoD)>; } multiclass sve_fp_z2op_p_zd_c { @@ -3319,12 +3330,12 @@ multiclass sve_fp_z2op_p_zd_b_0 { def _DtoS : sve_fp_z2op_p_zd<0b1101010, asm, ZPR64, ZPR32>; def _StoD : sve_fp_z2op_p_zd<0b1101011, asm, ZPR32, ZPR64>; - def : SVE_3_Op_UndefZero_Pat(op # _f16f32), nxv8f16, nxv4i1, nxv4f32, !cast(NAME # _StoH)>; - def : SVE_3_Op_UndefZero_Pat(op # _f16f64), nxv8f16, nxv2i1, nxv2f64, !cast(NAME # _DtoH)>; - def : SVE_3_Op_UndefZero_Pat(op # _f32f64), nxv4f32, nxv2i1, nxv2f64, !cast(NAME # _DtoS)>; - def : SVE_3_Op_UndefZero_Pat(op # _f32f16), nxv4f32, nxv4i1, nxv8f16, !cast(NAME # _HtoS)>; - def : SVE_3_Op_UndefZero_Pat(op # _f64f16), nxv2f64, nxv2i1, nxv8f16, !cast(NAME # _HtoD)>; - def : SVE_3_Op_UndefZero_Pat(op # _f64f32), nxv2f64, nxv2i1, nxv4f32, !cast(NAME # _StoD)>; + defm : SVE_3_Op_UndefZero_Pat(op # _f16f32), nxv8f16, nxv4i1, nxv4f32, !cast(NAME # _StoH)>; + defm : SVE_3_Op_UndefZero_Pat(op # _f16f64), nxv8f16, nxv2i1, nxv2f64, !cast(NAME # _DtoH)>; + defm : SVE_3_Op_UndefZero_Pat(op # _f32f64), nxv4f32, nxv2i1, nxv2f64, !cast(NAME # _DtoS)>; + defm : SVE_3_Op_UndefZero_Pat(op # _f32f16), nxv4f32, nxv4i1, nxv8f16, !cast(NAME # _HtoS)>; + defm : SVE_3_Op_UndefZero_Pat(op # _f64f16), nxv2f64, nxv2i1, nxv8f16, !cast(NAME # _HtoD)>; + defm : SVE_3_Op_UndefZero_Pat(op # _f64f32), nxv2f64, nxv2i1, nxv4f32, !cast(NAME # _StoD)>; } //===----------------------------------------------------------------------===// @@ -4842,6 +4853,16 @@ multiclass sve_int_un_pred_arit opc, string asm, def : SVE_1_Op_Passthru_Pat(NAME # _H)>; def : SVE_1_Op_Passthru_Pat(NAME # _S)>; def : SVE_1_Op_Passthru_Pat(NAME # _D)>; + + def _B_UNDEF : PredOneOpPassthruPseudo; + def _H_UNDEF : PredOneOpPassthruPseudo; + def _S_UNDEF : PredOneOpPassthruPseudo; + def _D_UNDEF : PredOneOpPassthruPseudo; + + defm : SVE_1_Op_PassthruUndef_Pat(NAME # _B_UNDEF)>; + defm : SVE_1_Op_PassthruUndef_Pat(NAME # _H_UNDEF)>; + defm : SVE_1_Op_PassthruUndef_Pat(NAME # _S_UNDEF)>; + defm : SVE_1_Op_PassthruUndef_Pat(NAME # _D_UNDEF)>; } multiclass sve_int_un_pred_arit_z opc, string asm, SDPatternOperator op> { @@ -4850,10 +4871,10 @@ multiclass sve_int_un_pred_arit_z opc, string asm, SDPatternOperator op> def _S : sve_int_un_pred_arit_z<0b10, { opc, 0b0 }, asm, ZPR32>; def _D : sve_int_un_pred_arit_z<0b11, { opc, 0b0 }, asm, ZPR64>; - def : SVE_1_Op_PassthruUndefZero_Pat(NAME # _B)>; - def : SVE_1_Op_PassthruUndefZero_Pat(NAME # _H)>; - def : SVE_1_Op_PassthruUndefZero_Pat(NAME # _S)>; - def : SVE_1_Op_PassthruUndefZero_Pat(NAME # _D)>; + defm : SVE_1_Op_PassthruUndefZero_Pat(NAME # _B)>; + defm : SVE_1_Op_PassthruUndefZero_Pat(NAME # _H)>; + defm : SVE_1_Op_PassthruUndefZero_Pat(NAME # _S)>; + defm : SVE_1_Op_PassthruUndefZero_Pat(NAME # _D)>; } multiclass sve_int_un_pred_arit_h opc, string asm, @@ -4967,6 +4988,17 @@ multiclass sve_int_un_pred_arit_bitwise_fp opc, string asm, def : SVE_1_Op_Passthru_Pat(NAME # _S)>; def : SVE_1_Op_Passthru_Pat(NAME # _S)>; def : SVE_1_Op_Passthru_Pat(NAME # _D)>; + + def _H_UNDEF : PredOneOpPassthruPseudo; + def _S_UNDEF : PredOneOpPassthruPseudo; + def _D_UNDEF : PredOneOpPassthruPseudo; + + defm : SVE_1_Op_PassthruUndef_Pat(NAME # _H_UNDEF)>; + defm : SVE_1_Op_PassthruUndef_Pat(NAME # _H_UNDEF)>; + defm : SVE_1_Op_PassthruUndef_Pat(NAME # _H_UNDEF)>; + defm : SVE_1_Op_PassthruUndef_Pat(NAME # _S_UNDEF)>; + defm : SVE_1_Op_PassthruUndef_Pat(NAME # _S_UNDEF)>; + defm : SVE_1_Op_PassthruUndef_Pat(NAME # _D_UNDEF)>; } multiclass sve_int_un_pred_arit_bitwise_fp_z opc, string asm, SDPatternOperator op> { @@ -4974,12 +5006,12 @@ multiclass sve_int_un_pred_arit_bitwise_fp_z opc, string asm, SDPatternO def _S : sve_int_un_pred_arit_z<0b10, { opc, 0b1 }, asm, ZPR32>; def _D : sve_int_un_pred_arit_z<0b11, { opc, 0b1 }, asm, ZPR64>; - def : SVE_1_Op_PassthruUndefZero_Pat(NAME # _H)>; - def : SVE_1_Op_PassthruUndefZero_Pat(NAME # _H)>; - def : SVE_1_Op_PassthruUndefZero_Pat(NAME # _H)>; - def : SVE_1_Op_PassthruUndefZero_Pat(NAME # _S)>; - def : SVE_1_Op_PassthruUndefZero_Pat(NAME # _S)>; - def : SVE_1_Op_PassthruUndefZero_Pat(NAME # _D)>; + defm : SVE_1_Op_PassthruUndefZero_Pat(NAME # _H)>; + defm : SVE_1_Op_PassthruUndefZero_Pat(NAME # _H)>; + defm : SVE_1_Op_PassthruUndefZero_Pat(NAME # _H)>; + defm : SVE_1_Op_PassthruUndefZero_Pat(NAME # _S)>; + defm : SVE_1_Op_PassthruUndefZero_Pat(NAME # _S)>; + defm : SVE_1_Op_PassthruUndefZero_Pat(NAME # _D)>; } multiclass sve_fp_un_pred_arit_hsd { diff --git a/llvm/test/CodeGen/AArch64/zeroing-forms-abs-neg.ll b/llvm/test/CodeGen/AArch64/zeroing-forms-abs-neg.ll index 1caee994220f05..510d4576646f12 100644 --- a/llvm/test/CodeGen/AArch64/zeroing-forms-abs-neg.ll +++ b/llvm/test/CodeGen/AArch64/zeroing-forms-abs-neg.ll @@ -18,7 +18,7 @@ define @test_svabs_f64_x_1( %pg, @llvm.aarch64.sve.fabs.nxv2f64( undef, %pg, %x) + %0 = tail call @llvm.aarch64.sve.fabs.nxv2f64( poison, %pg, %x) ret %0 } @@ -34,7 +34,7 @@ define @test_svabs_f64_x_2( %pg, double % ; CHECK-2p2-NEXT: fabs z0.d, p0/z, z1.d ; CHECK-2p2-NEXT: ret entry: - %0 = tail call @llvm.aarch64.sve.fabs.nxv2f64( undef, %pg, %x) + %0 = tail call @llvm.aarch64.sve.fabs.nxv2f64( poison, %pg, %x) ret %0 } @@ -65,7 +65,7 @@ define @test_svabs_f32_x_1( %pg, @llvm.aarch64.sve.fabs.nxv4f32( undef, %pg, %x) + %0 = tail call @llvm.aarch64.sve.fabs.nxv4f32( poison, %pg, %x) ret %0 } @@ -81,7 +81,7 @@ define @test_svabs_f32_x_2( %pg, double %z ; CHECK-2p2-NEXT: fabs z0.s, p0/z, z1.s ; CHECK-2p2-NEXT: ret entry: - %0 = tail call @llvm.aarch64.sve.fabs.nxv4f32( undef, %pg, %x) + %0 = tail call @llvm.aarch64.sve.fabs.nxv4f32( poison, %pg, %x) ret %0 } @@ -112,7 +112,7 @@ define @test_svabs_f16_x_1( %pg, @llvm.aarch64.sve.fabs.nxv8f16( undef, %pg, %x) + %0 = tail call @llvm.aarch64.sve.fabs.nxv8f16( poison, %pg, %x) ret %0 } @@ -128,7 +128,7 @@ define @test_svabs_f16_x_2( %pg, double %z0 ; CHECK-2p2-NEXT: fabs z0.h, p0/z, z1.h ; CHECK-2p2-NEXT: ret entry: - %0 = tail call @llvm.aarch64.sve.fabs.nxv8f16( undef, %pg, %x) + %0 = tail call @llvm.aarch64.sve.fabs.nxv8f16( poison, %pg, %x) ret %0 } @@ -159,7 +159,7 @@ define @test_svabs_s8_x_1( %pg, @llvm.aarch64.sve.abs.nxv16i8( undef, %pg, %x) + %0 = tail call @llvm.aarch64.sve.abs.nxv16i8( poison, %pg, %x) ret %0 } @@ -175,8 +175,8 @@ define @test_svabs_s8_x_2( %pg, double %z0, ; CHECK-2p2-NEXT: abs z0.b, p0/z, z1.b ; CHECK-2p2-NEXT: ret entry: - %1 = tail call @llvm.aarch64.sve.abs.nxv16i8( undef, %pg, %x) - ret %1 + %0 = tail call @llvm.aarch64.sve.abs.nxv16i8( poison, %pg, %x) + ret %0 } define @test_svabs_s8_z( %pg, double %z0, %x) { @@ -191,8 +191,8 @@ define @test_svabs_s8_z( %pg, double %z0, < ; CHECK-2p2-NEXT: abs z0.b, p0/z, z1.b ; CHECK-2p2-NEXT: ret entry: - %1 = tail call @llvm.aarch64.sve.abs.nxv16i8( zeroinitializer, %pg, %x) - ret %1 + %0 = tail call @llvm.aarch64.sve.abs.nxv16i8( zeroinitializer, %pg, %x) + ret %0 } define @test_svabs_s16_x_1( %pg, %x) { @@ -206,7 +206,7 @@ define @test_svabs_s16_x_1( %pg, @llvm.aarch64.sve.abs.nxv8i16( undef, %pg, %x) + %0 = tail call @llvm.aarch64.sve.abs.nxv8i16( poison, %pg, %x) ret %0 } @@ -222,7 +222,7 @@ define @test_svabs_s16_x_2( %pg, double %z0, ; CHECK-2p2-NEXT: abs z0.h, p0/z, z1.h ; CHECK-2p2-NEXT: ret entry: - %0 = tail call @llvm.aarch64.sve.abs.nxv8i16( undef, %pg, %x) + %0 = tail call @llvm.aarch64.sve.abs.nxv8i16( poison, %pg, %x) ret %0 } @@ -253,7 +253,7 @@ define @test_svabs_s32_x_1( %pg, @llvm.aarch64.sve.abs.nxv4i32( undef, %pg, %x) + %0 = tail call @llvm.aarch64.sve.abs.nxv4i32( poison, %pg, %x) ret %0 } @@ -269,7 +269,7 @@ define @test_svabs_s32_x_2( %pg, double %z0, ; CHECK-2p2-NEXT: abs z0.s, p0/z, z1.s ; CHECK-2p2-NEXT: ret entry: - %0 = tail call @llvm.aarch64.sve.abs.nxv4i32( undef, %pg, %x) + %0 = tail call @llvm.aarch64.sve.abs.nxv4i32( poison, %pg, %x) ret %0 } @@ -300,7 +300,7 @@ define @test_svabs_s64_x_1( %pg, @llvm.aarch64.sve.abs.nxv2i64( undef, %pg, %x) + %0 = tail call @llvm.aarch64.sve.abs.nxv2i64( poison, %pg, %x) ret %0 } @@ -316,7 +316,7 @@ define @test_svabs_s64_x_2( %pg, double %z0, ; CHECK-2p2-NEXT: abs z0.d, p0/z, z1.d ; CHECK-2p2-NEXT: ret entry: - %0 = tail call @llvm.aarch64.sve.abs.nxv2i64( undef, %pg, %x) + %0 = tail call @llvm.aarch64.sve.abs.nxv2i64( poison, %pg, %x) ret %0 } @@ -347,7 +347,7 @@ define @test_svneg_f64_x_1( %pg, @llvm.aarch64.sve.fneg.nxv2f64( undef, %pg, %x) + %0 = tail call @llvm.aarch64.sve.fneg.nxv2f64( poison, %pg, %x) ret %0 } @@ -363,7 +363,7 @@ define @test_svneg_f64_x_2( %pg, double % ; CHECK-2p2-NEXT: fneg z0.d, p0/z, z1.d ; CHECK-2p2-NEXT: ret entry: - %0 = tail call @llvm.aarch64.sve.fneg.nxv2f64( undef, %pg, %x) + %0 = tail call @llvm.aarch64.sve.fneg.nxv2f64( poison, %pg, %x) ret %0 } @@ -394,7 +394,7 @@ define @test_svneg_f32_x_1( %pg, @llvm.aarch64.sve.fneg.nxv4f32( undef, %pg, %x) + %0 = tail call @llvm.aarch64.sve.fneg.nxv4f32( poison, %pg, %x) ret %0 } @@ -410,7 +410,7 @@ define @test_svneg_f32_x_2( %pg, double %z ; CHECK-2p2-NEXT: fneg z0.s, p0/z, z1.s ; CHECK-2p2-NEXT: ret entry: - %0 = tail call @llvm.aarch64.sve.fneg.nxv4f32( undef, %pg, %x) + %0 = tail call @llvm.aarch64.sve.fneg.nxv4f32( poison, %pg, %x) ret %0 } @@ -441,7 +441,7 @@ define @test_svneg_f16_x_1( %pg, @llvm.aarch64.sve.fneg.nxv8f16( undef, %pg, %x) + %0 = tail call @llvm.aarch64.sve.fneg.nxv8f16( poison, %pg, %x) ret %0 } @@ -457,7 +457,7 @@ define @test_svneg_f16_x_2( %pg, double %z0 ; CHECK-2p2-NEXT: fneg z0.h, p0/z, z1.h ; CHECK-2p2-NEXT: ret entry: - %0 = tail call @llvm.aarch64.sve.fneg.nxv8f16( undef, %pg, %x) + %0 = tail call @llvm.aarch64.sve.fneg.nxv8f16( poison, %pg, %x) ret %0 } @@ -488,7 +488,7 @@ define @test_svneg_s8_x_1( %pg, @llvm.aarch64.sve.neg.nxv16i8( undef, %pg, %x) + %0 = tail call @llvm.aarch64.sve.neg.nxv16i8( poison, %pg, %x) ret %0 } @@ -504,8 +504,8 @@ define @test_svneg_s8_x_2( %pg, double %z0, ; CHECK-2p2-NEXT: neg z0.b, p0/z, z1.b ; CHECK-2p2-NEXT: ret entry: - %1 = tail call @llvm.aarch64.sve.neg.nxv16i8( undef, %pg, %x) - ret %1 + %0 = tail call @llvm.aarch64.sve.neg.nxv16i8( poison, %pg, %x) + ret %0 } define @test_svneg_s8_z( %pg, double %z0, %x) { @@ -520,8 +520,8 @@ define @test_svneg_s8_z( %pg, double %z0, < ; CHECK-2p2-NEXT: neg z0.b, p0/z, z1.b ; CHECK-2p2-NEXT: ret entry: - %1 = tail call @llvm.aarch64.sve.neg.nxv16i8( zeroinitializer, %pg, %x) - ret %1 + %0 = tail call @llvm.aarch64.sve.neg.nxv16i8( zeroinitializer, %pg, %x) + ret %0 } define @test_svneg_s16_x_1( %pg, %x) { @@ -535,7 +535,7 @@ define @test_svneg_s16_x_1( %pg, @llvm.aarch64.sve.neg.nxv8i16( undef, %pg, %x) + %0 = tail call @llvm.aarch64.sve.neg.nxv8i16( poison, %pg, %x) ret %0 } @@ -551,7 +551,7 @@ define @test_svneg_s16_x_2( %pg, double %z0, ; CHECK-2p2-NEXT: neg z0.h, p0/z, z1.h ; CHECK-2p2-NEXT: ret entry: - %0 = tail call @llvm.aarch64.sve.neg.nxv8i16( undef, %pg, %x) + %0 = tail call @llvm.aarch64.sve.neg.nxv8i16( poison, %pg, %x) ret %0 } @@ -582,7 +582,7 @@ define @test_svneg_s32_x_1( %pg, @llvm.aarch64.sve.neg.nxv4i32( undef, %pg, %x) + %0 = tail call @llvm.aarch64.sve.neg.nxv4i32( poison, %pg, %x) ret %0 } @@ -598,7 +598,7 @@ define @test_svneg_s32_x_2( %pg, double %z0, ; CHECK-2p2-NEXT: neg z0.s, p0/z, z1.s ; CHECK-2p2-NEXT: ret entry: - %0 = tail call @llvm.aarch64.sve.neg.nxv4i32( undef, %pg, %x) + %0 = tail call @llvm.aarch64.sve.neg.nxv4i32( poison, %pg, %x) ret %0 } @@ -629,7 +629,7 @@ define @test_svneg_s64_x_1( %pg, @llvm.aarch64.sve.neg.nxv2i64( undef, %pg, %x) + %0 = tail call @llvm.aarch64.sve.neg.nxv2i64( poison, %pg, %x) ret %0 } @@ -645,7 +645,7 @@ define @test_svneg_s64_x_2( %pg, double %z0, ; CHECK-2p2-NEXT: neg z0.d, p0/z, z1.d ; CHECK-2p2-NEXT: ret entry: - %0 = tail call @llvm.aarch64.sve.neg.nxv2i64( undef, %pg, %x) + %0 = tail call @llvm.aarch64.sve.neg.nxv2i64( poison, %pg, %x) ret %0 } @@ -664,3 +664,535 @@ entry: %0 = tail call @llvm.aarch64.sve.neg.nxv2i64( zeroinitializer, %pg, %x) ret %0 } + +define @test_svfabs_f64_ptrue_u(double %z0, %x) { +; CHECK-LABEL: test_svfabs_f64_ptrue_u: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: fabs z0.d, p0/m, z1.d +; CHECK-NEXT: ret +; +; CHECK-2p2-LABEL: test_svfabs_f64_ptrue_u: +; CHECK-2p2: // %bb.0: // %entry +; CHECK-2p2-NEXT: ptrue p0.d +; CHECK-2p2-NEXT: fabs z0.d, p0/z, z1.d +; CHECK-2p2-NEXT: ret +entry: + %pg = call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) + %0 = tail call @llvm.aarch64.sve.fabs.nxv2f64( poison, %pg, %x) + ret %0 +} + +define @test_svfabs_f64_ptrue(double %z0, %x, %y) { +; CHECK-LABEL: test_svfabs_f64_ptrue: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: movprfx z0, z2 +; CHECK-NEXT: fabs z0.d, p0/m, z2.d +; CHECK-NEXT: ret +; +; CHECK-2p2-LABEL: test_svfabs_f64_ptrue: +; CHECK-2p2: // %bb.0: // %entry +; CHECK-2p2-NEXT: ptrue p0.d +; CHECK-2p2-NEXT: fabs z0.d, p0/z, z2.d +; CHECK-2p2-NEXT: ret +entry: + %pg = call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) + %0 = tail call @llvm.aarch64.sve.fabs.nxv2f64( %x, %pg, %y) + ret %0 +} + +define @test_svfabs_f32_ptrue_u(double %z0, %x) { +; CHECK-LABEL: test_svfabs_f32_ptrue_u: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: fabs z0.s, p0/m, z1.s +; CHECK-NEXT: ret +; +; CHECK-2p2-LABEL: test_svfabs_f32_ptrue_u: +; CHECK-2p2: // %bb.0: // %entry +; CHECK-2p2-NEXT: ptrue p0.s +; CHECK-2p2-NEXT: fabs z0.s, p0/z, z1.s +; CHECK-2p2-NEXT: ret +entry: + %pg = call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) + %0 = tail call @llvm.aarch64.sve.fabs.nxv4f32( poison, %pg, %x) + ret %0 +} + +define @test_svfabs_f32_ptrue(double %z0, %x, %y) { +; CHECK-LABEL: test_svfabs_f32_ptrue: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: movprfx z0, z2 +; CHECK-NEXT: fabs z0.s, p0/m, z2.s +; CHECK-NEXT: ret +; +; CHECK-2p2-LABEL: test_svfabs_f32_ptrue: +; CHECK-2p2: // %bb.0: // %entry +; CHECK-2p2-NEXT: ptrue p0.s +; CHECK-2p2-NEXT: fabs z0.s, p0/z, z2.s +; CHECK-2p2-NEXT: ret +entry: + %pg = call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) + %0 = tail call @llvm.aarch64.sve.fabs.nxv4f32( %x, %pg, %y) + ret %0 +} + +define @test_svfabs_f16_ptrue_u(double %z0, %x) { +; CHECK-LABEL: test_svfabs_f16_ptrue_u: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ptrue p0.h +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: fabs z0.h, p0/m, z1.h +; CHECK-NEXT: ret +; +; CHECK-2p2-LABEL: test_svfabs_f16_ptrue_u: +; CHECK-2p2: // %bb.0: // %entry +; CHECK-2p2-NEXT: ptrue p0.h +; CHECK-2p2-NEXT: fabs z0.h, p0/z, z1.h +; CHECK-2p2-NEXT: ret +entry: + %pg = call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) + %0 = tail call @llvm.aarch64.sve.fabs.nxv8f16( poison, %pg, %x) + ret %0 +} + +define @test_svfabs_f16_ptrue(double %z0, %x, %y) { +; CHECK-LABEL: test_svfabs_f16_ptrue: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ptrue p0.h +; CHECK-NEXT: movprfx z0, z2 +; CHECK-NEXT: fabs z0.h, p0/m, z2.h +; CHECK-NEXT: ret +; +; CHECK-2p2-LABEL: test_svfabs_f16_ptrue: +; CHECK-2p2: // %bb.0: // %entry +; CHECK-2p2-NEXT: ptrue p0.h +; CHECK-2p2-NEXT: fabs z0.h, p0/z, z2.h +; CHECK-2p2-NEXT: ret +entry: + %pg = call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) + %0 = tail call @llvm.aarch64.sve.fabs.nxv8f16( %x, %pg, %y) + ret %0 +} + +define @test_svabs_s8_ptrue_u(double %z0, %x) { +; CHECK-LABEL: test_svabs_s8_ptrue_u: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ptrue p0.b +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: abs z0.b, p0/m, z1.b +; CHECK-NEXT: ret +; +; CHECK-2p2-LABEL: test_svabs_s8_ptrue_u: +; CHECK-2p2: // %bb.0: // %entry +; CHECK-2p2-NEXT: ptrue p0.b +; CHECK-2p2-NEXT: abs z0.b, p0/z, z1.b +; CHECK-2p2-NEXT: ret +entry: + %pg = call @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) + %0 = tail call @llvm.aarch64.sve.abs.nxv16i8( poison, %pg, %x) + ret %0 +} + +define @test_svabs_s8_ptrue(double %z0, %x, %y) { +; CHECK-LABEL: test_svabs_s8_ptrue: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ptrue p0.b +; CHECK-NEXT: movprfx z0, z2 +; CHECK-NEXT: abs z0.b, p0/m, z2.b +; CHECK-NEXT: ret +; +; CHECK-2p2-LABEL: test_svabs_s8_ptrue: +; CHECK-2p2: // %bb.0: // %entry +; CHECK-2p2-NEXT: ptrue p0.b +; CHECK-2p2-NEXT: abs z0.b, p0/z, z2.b +; CHECK-2p2-NEXT: ret +entry: + %pg = call @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) + %0 = tail call @llvm.aarch64.sve.abs.nxv16i8( %x, %pg, %y) + ret %0 +} + +define @test_svabs_s16_ptrue_u(double %z0, %x) { +; CHECK-LABEL: test_svabs_s16_ptrue_u: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ptrue p0.h +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: abs z0.h, p0/m, z1.h +; CHECK-NEXT: ret +; +; CHECK-2p2-LABEL: test_svabs_s16_ptrue_u: +; CHECK-2p2: // %bb.0: // %entry +; CHECK-2p2-NEXT: ptrue p0.h +; CHECK-2p2-NEXT: abs z0.h, p0/z, z1.h +; CHECK-2p2-NEXT: ret +entry: + %pg = call @llvm.aarch64.sve.ptrue.nx84i1(i32 31) + %0 = tail call @llvm.aarch64.sve.abs.nxv8i16( poison, %pg, %x) + ret %0 +} + +define @test_svabs_s16_ptrue(double %z0, %x, %y) { +; CHECK-LABEL: test_svabs_s16_ptrue: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ptrue p0.h +; CHECK-NEXT: movprfx z0, z2 +; CHECK-NEXT: abs z0.h, p0/m, z2.h +; CHECK-NEXT: ret +; +; CHECK-2p2-LABEL: test_svabs_s16_ptrue: +; CHECK-2p2: // %bb.0: // %entry +; CHECK-2p2-NEXT: ptrue p0.h +; CHECK-2p2-NEXT: abs z0.h, p0/z, z2.h +; CHECK-2p2-NEXT: ret +entry: + %pg = call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) + %0 = tail call @llvm.aarch64.sve.abs.nxv8i16( %x, %pg, %y) + ret %0 +} + +define @test_svabs_s32_ptrue_u(double %z0, %x) { +; CHECK-LABEL: test_svabs_s32_ptrue_u: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: abs z0.s, p0/m, z1.s +; CHECK-NEXT: ret +; +; CHECK-2p2-LABEL: test_svabs_s32_ptrue_u: +; CHECK-2p2: // %bb.0: // %entry +; CHECK-2p2-NEXT: ptrue p0.s +; CHECK-2p2-NEXT: abs z0.s, p0/z, z1.s +; CHECK-2p2-NEXT: ret +entry: + %pg = call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) + %0 = tail call @llvm.aarch64.sve.abs.nxv4i32( poison, %pg, %x) + ret %0 +} + +define @test_svabs_s32_ptrue(double %z0, %x, %y) { +; CHECK-LABEL: test_svabs_s32_ptrue: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: movprfx z0, z2 +; CHECK-NEXT: abs z0.s, p0/m, z2.s +; CHECK-NEXT: ret +; +; CHECK-2p2-LABEL: test_svabs_s32_ptrue: +; CHECK-2p2: // %bb.0: // %entry +; CHECK-2p2-NEXT: ptrue p0.s +; CHECK-2p2-NEXT: abs z0.s, p0/z, z2.s +; CHECK-2p2-NEXT: ret +entry: + %pg = call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) + %0 = tail call @llvm.aarch64.sve.abs.nxv4i32( %x, %pg, %y) + ret %0 +} + +define @test_svabs_s64_ptrue_u(double %z0, %x) { +; CHECK-LABEL: test_svabs_s64_ptrue_u: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: abs z0.d, p0/m, z1.d +; CHECK-NEXT: ret +; +; CHECK-2p2-LABEL: test_svabs_s64_ptrue_u: +; CHECK-2p2: // %bb.0: // %entry +; CHECK-2p2-NEXT: ptrue p0.d +; CHECK-2p2-NEXT: abs z0.d, p0/z, z1.d +; CHECK-2p2-NEXT: ret +entry: + %pg = call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) + %0 = tail call @llvm.aarch64.sve.abs.nxv2i64( poison, %pg, %x) + ret %0 +} + +define @test_svabs_s64_ptrue(double %z0, %x, %y) { +; CHECK-LABEL: test_svabs_s64_ptrue: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: movprfx z0, z2 +; CHECK-NEXT: abs z0.d, p0/m, z2.d +; CHECK-NEXT: ret +; +; CHECK-2p2-LABEL: test_svabs_s64_ptrue: +; CHECK-2p2: // %bb.0: // %entry +; CHECK-2p2-NEXT: ptrue p0.d +; CHECK-2p2-NEXT: abs z0.d, p0/z, z2.d +; CHECK-2p2-NEXT: ret +entry: + %pg = call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) + %0 = tail call @llvm.aarch64.sve.abs.nxv2i64( %x, %pg, %y) + ret %0 +} + +define @test_svfneg_f64_ptrue_u(double %z0, %x) { +; CHECK-LABEL: test_svfneg_f64_ptrue_u: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: fneg z0.d, p0/m, z1.d +; CHECK-NEXT: ret +; +; CHECK-2p2-LABEL: test_svfneg_f64_ptrue_u: +; CHECK-2p2: // %bb.0: // %entry +; CHECK-2p2-NEXT: ptrue p0.d +; CHECK-2p2-NEXT: fneg z0.d, p0/z, z1.d +; CHECK-2p2-NEXT: ret +entry: + %pg = call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) + %0 = tail call @llvm.aarch64.sve.fneg.nxv2f64( poison, %pg, %x) + ret %0 +} + +define @test_svfneg_f64_ptrue(double %z0, %x, %y) { +; CHECK-LABEL: test_svfneg_f64_ptrue: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: movprfx z0, z2 +; CHECK-NEXT: fneg z0.d, p0/m, z2.d +; CHECK-NEXT: ret +; +; CHECK-2p2-LABEL: test_svfneg_f64_ptrue: +; CHECK-2p2: // %bb.0: // %entry +; CHECK-2p2-NEXT: ptrue p0.d +; CHECK-2p2-NEXT: fneg z0.d, p0/z, z2.d +; CHECK-2p2-NEXT: ret +entry: + %pg = call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) + %0 = tail call @llvm.aarch64.sve.fneg.nxv2f64( %x, %pg, %y) + ret %0 +} + +define @test_svfneg_f32_ptrue_u(double %z0, %x) { +; CHECK-LABEL: test_svfneg_f32_ptrue_u: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: fneg z0.s, p0/m, z1.s +; CHECK-NEXT: ret +; +; CHECK-2p2-LABEL: test_svfneg_f32_ptrue_u: +; CHECK-2p2: // %bb.0: // %entry +; CHECK-2p2-NEXT: ptrue p0.s +; CHECK-2p2-NEXT: fneg z0.s, p0/z, z1.s +; CHECK-2p2-NEXT: ret +entry: + %pg = call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) + %0 = tail call @llvm.aarch64.sve.fneg.nxv4f32( poison, %pg, %x) + ret %0 +} + +define @test_svfneg_f32_ptrue(double %z0, %x, %y) { +; CHECK-LABEL: test_svfneg_f32_ptrue: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: movprfx z0, z2 +; CHECK-NEXT: fneg z0.s, p0/m, z2.s +; CHECK-NEXT: ret +; +; CHECK-2p2-LABEL: test_svfneg_f32_ptrue: +; CHECK-2p2: // %bb.0: // %entry +; CHECK-2p2-NEXT: ptrue p0.s +; CHECK-2p2-NEXT: fneg z0.s, p0/z, z2.s +; CHECK-2p2-NEXT: ret +entry: + %pg = call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) + %0 = tail call @llvm.aarch64.sve.fneg.nxv4f32( %x, %pg, %y) + ret %0 +} + +define @test_svfneg_f16_ptrue_u(double %z0, %x) { +; CHECK-LABEL: test_svfneg_f16_ptrue_u: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ptrue p0.h +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: fneg z0.h, p0/m, z1.h +; CHECK-NEXT: ret +; +; CHECK-2p2-LABEL: test_svfneg_f16_ptrue_u: +; CHECK-2p2: // %bb.0: // %entry +; CHECK-2p2-NEXT: ptrue p0.h +; CHECK-2p2-NEXT: fneg z0.h, p0/z, z1.h +; CHECK-2p2-NEXT: ret +entry: + %pg = call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) + %0 = tail call @llvm.aarch64.sve.fneg.nxv8f16( poison, %pg, %x) + ret %0 +} + +define @test_svfneg_f16_ptrue(double %z0, %x, %y) { +; CHECK-LABEL: test_svfneg_f16_ptrue: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ptrue p0.h +; CHECK-NEXT: movprfx z0, z2 +; CHECK-NEXT: fneg z0.h, p0/m, z2.h +; CHECK-NEXT: ret +; +; CHECK-2p2-LABEL: test_svfneg_f16_ptrue: +; CHECK-2p2: // %bb.0: // %entry +; CHECK-2p2-NEXT: ptrue p0.h +; CHECK-2p2-NEXT: fneg z0.h, p0/z, z2.h +; CHECK-2p2-NEXT: ret +entry: + %pg = call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) + %0 = tail call @llvm.aarch64.sve.fneg.nxv8f16( %x, %pg, %y) + ret %0 +} + +define @test_svneg_s8_ptrue_u(double %z0, %x) { +; CHECK-LABEL: test_svneg_s8_ptrue_u: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ptrue p0.b +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: neg z0.b, p0/m, z1.b +; CHECK-NEXT: ret +; +; CHECK-2p2-LABEL: test_svneg_s8_ptrue_u: +; CHECK-2p2: // %bb.0: // %entry +; CHECK-2p2-NEXT: ptrue p0.b +; CHECK-2p2-NEXT: neg z0.b, p0/z, z1.b +; CHECK-2p2-NEXT: ret +entry: + %pg = call @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) + %0 = tail call @llvm.aarch64.sve.neg.nxv16i8( poison, %pg, %x) + ret %0 +} + +define @test_svneg_s8_ptrue(double %z0, %x, %y) { +; CHECK-LABEL: test_svneg_s8_ptrue: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ptrue p0.b +; CHECK-NEXT: movprfx z0, z2 +; CHECK-NEXT: neg z0.b, p0/m, z2.b +; CHECK-NEXT: ret +; +; CHECK-2p2-LABEL: test_svneg_s8_ptrue: +; CHECK-2p2: // %bb.0: // %entry +; CHECK-2p2-NEXT: ptrue p0.b +; CHECK-2p2-NEXT: neg z0.b, p0/z, z2.b +; CHECK-2p2-NEXT: ret +entry: + %pg = call @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) + %0 = tail call @llvm.aarch64.sve.neg.nxv16i8( %x, %pg, %y) + ret %0 +} + +define @test_svneg_s16_ptrue_u(double %z0, %x) { +; CHECK-LABEL: test_svneg_s16_ptrue_u: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ptrue p0.h +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: neg z0.h, p0/m, z1.h +; CHECK-NEXT: ret +; +; CHECK-2p2-LABEL: test_svneg_s16_ptrue_u: +; CHECK-2p2: // %bb.0: // %entry +; CHECK-2p2-NEXT: ptrue p0.h +; CHECK-2p2-NEXT: neg z0.h, p0/z, z1.h +; CHECK-2p2-NEXT: ret +entry: + %pg = call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) + %0 = tail call @llvm.aarch64.sve.neg.nxv8i16( poison, %pg, %x) + ret %0 +} + +define @test_svneg_s16_ptrue(double %z0, %x, %y) { +; CHECK-LABEL: test_svneg_s16_ptrue: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ptrue p0.h +; CHECK-NEXT: movprfx z0, z2 +; CHECK-NEXT: neg z0.h, p0/m, z2.h +; CHECK-NEXT: ret +; +; CHECK-2p2-LABEL: test_svneg_s16_ptrue: +; CHECK-2p2: // %bb.0: // %entry +; CHECK-2p2-NEXT: ptrue p0.h +; CHECK-2p2-NEXT: neg z0.h, p0/z, z2.h +; CHECK-2p2-NEXT: ret +entry: + %pg = call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) + %0 = tail call @llvm.aarch64.sve.neg.nxv8i16( %x, %pg, %y) + ret %0 +} + +define @test_svneg_s32_ptrue_u(double %z0, %x) { +; CHECK-LABEL: test_svneg_s32_ptrue_u: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: neg z0.s, p0/m, z1.s +; CHECK-NEXT: ret +; +; CHECK-2p2-LABEL: test_svneg_s32_ptrue_u: +; CHECK-2p2: // %bb.0: // %entry +; CHECK-2p2-NEXT: ptrue p0.s +; CHECK-2p2-NEXT: neg z0.s, p0/z, z1.s +; CHECK-2p2-NEXT: ret +entry: + %pg = call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) + %0 = tail call @llvm.aarch64.sve.neg.nxv4i32( poison, %pg, %x) + ret %0 +} + +define @test_svneg_s32_ptrue(double %z0, %x, %y) { +; CHECK-LABEL: test_svneg_s32_ptrue: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: movprfx z0, z2 +; CHECK-NEXT: neg z0.s, p0/m, z2.s +; CHECK-NEXT: ret +; +; CHECK-2p2-LABEL: test_svneg_s32_ptrue: +; CHECK-2p2: // %bb.0: // %entry +; CHECK-2p2-NEXT: ptrue p0.s +; CHECK-2p2-NEXT: neg z0.s, p0/z, z2.s +; CHECK-2p2-NEXT: ret +entry: + %pg = call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) + %0 = tail call @llvm.aarch64.sve.neg.nxv4i32( %x, %pg, %y) + ret %0 +} + +define @test_svneg_s64_ptrue_u(double %z0, %x) { +; CHECK-LABEL: test_svneg_s64_ptrue_u: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: neg z0.d, p0/m, z1.d +; CHECK-NEXT: ret +; +; CHECK-2p2-LABEL: test_svneg_s64_ptrue_u: +; CHECK-2p2: // %bb.0: // %entry +; CHECK-2p2-NEXT: ptrue p0.d +; CHECK-2p2-NEXT: neg z0.d, p0/z, z1.d +; CHECK-2p2-NEXT: ret +entry: + %pg = call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) + %0 = tail call @llvm.aarch64.sve.neg.nxv2i64( poison, %pg, %x) + ret %0 +} + +define @test_svneg_s64_ptrue(double %z0, %x, %y) { +; CHECK-LABEL: test_svneg_s64_ptrue: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: movprfx z0, z2 +; CHECK-NEXT: neg z0.d, p0/m, z2.d +; CHECK-NEXT: ret +; +; CHECK-2p2-LABEL: test_svneg_s64_ptrue: +; CHECK-2p2: // %bb.0: // %entry +; CHECK-2p2-NEXT: ptrue p0.d +; CHECK-2p2-NEXT: neg z0.d, p0/z, z2.d +; CHECK-2p2-NEXT: ret +entry: + %pg = call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) + %0 = tail call @llvm.aarch64.sve.neg.nxv2i64( %x, %pg, %y) + ret %0 +} diff --git a/llvm/test/CodeGen/AArch64/zeroing-forms-fcvt-bfcvt.ll b/llvm/test/CodeGen/AArch64/zeroing-forms-fcvt-bfcvt.ll index cf9ac49ca7b236..855bf9a3b3c491 100644 --- a/llvm/test/CodeGen/AArch64/zeroing-forms-fcvt-bfcvt.ll +++ b/llvm/test/CodeGen/AArch64/zeroing-forms-fcvt-bfcvt.ll @@ -18,7 +18,7 @@ define @test_svcvt_f16_f32_x_1( %pg, @llvm.aarch64.sve.fcvt.f16f32( undef, %pg, %x) + %0 = tail call @llvm.aarch64.sve.fcvt.f16f32( poison, %pg, %x) ret %0 } @@ -33,7 +33,7 @@ define @test_svcvt_f16_f32_x_2( %pg, double ; CHECK-2p2-NEXT: fcvt z0.h, p0/z, z1.s ; CHECK-2p2-NEXT: ret entry: - %0 = tail call @llvm.aarch64.sve.fcvt.f16f32( undef, %pg, %x) + %0 = tail call @llvm.aarch64.sve.fcvt.f16f32( poison, %pg, %x) ret %0 } @@ -64,7 +64,7 @@ define @test_svcvt_bf16_f32_x_1( %pg, @llvm.aarch64.sve.fcvt.bf16f32.v2( undef, %pg, %x) + %0 = tail call @llvm.aarch64.sve.fcvt.bf16f32.v2( poison, %pg, %x) ret %0 } @@ -79,7 +79,7 @@ define @test_svcvt_bf16_f32_x_2( %pg, dou ; CHECK-2p2-NEXT: bfcvt z0.h, p0/z, z1.s ; CHECK-2p2-NEXT: ret entry: - %0 = tail call @llvm.aarch64.sve.fcvt.bf16f32.v2( undef, %pg, %x) + %0 = tail call @llvm.aarch64.sve.fcvt.bf16f32.v2( poison, %pg, %x) ret %0 } @@ -110,7 +110,7 @@ define @test_svcvt_f16_f64_x_1( %pg, @llvm.aarch64.sve.fcvt.f16f64( undef, %pg, %x) + %0 = tail call @llvm.aarch64.sve.fcvt.f16f64( poison, %pg, %x) ret %0 } @@ -125,7 +125,7 @@ define @test_svcvt_f16_f64_x_2( %pg, double ; CHECK-2p2-NEXT: fcvt z0.h, p0/z, z1.d ; CHECK-2p2-NEXT: ret entry: - %0 = tail call @llvm.aarch64.sve.fcvt.f16f64( undef, %pg, %x) + %0 = tail call @llvm.aarch64.sve.fcvt.f16f64( poison, %pg, %x) ret %0 } @@ -156,7 +156,7 @@ define @test_svcvt_f32_f64_x_1( %pg, @llvm.aarch64.sve.fcvt.f32f64( undef, %pg, %x) + %0 = tail call @llvm.aarch64.sve.fcvt.f32f64( poison, %pg, %x) ret %0 } @@ -171,7 +171,7 @@ define @test_svcvt_f32_f64_x_2( %pg, doubl ; CHECK-2p2-NEXT: fcvt z0.s, p0/z, z1.d ; CHECK-2p2-NEXT: ret entry: - %0 = tail call @llvm.aarch64.sve.fcvt.f32f64( undef, %pg, %x) + %0 = tail call @llvm.aarch64.sve.fcvt.f32f64( poison, %pg, %x) ret %0 } @@ -202,7 +202,7 @@ define @test_svcvt_f32_f16_x_1( %pg, @llvm.aarch64.sve.fcvt.f32f16( undef, %pg, %x) + %0 = tail call @llvm.aarch64.sve.fcvt.f32f16( poison, %pg, %x) ret %0 } @@ -217,7 +217,7 @@ define @test_svcvt_f32_f16_x_2( %pg, doubl ; CHECK-2p2-NEXT: fcvt z0.s, p0/z, z1.h ; CHECK-2p2-NEXT: ret entry: - %0 = tail call @llvm.aarch64.sve.fcvt.f32f16( undef, %pg, %x) + %0 = tail call @llvm.aarch64.sve.fcvt.f32f16( poison, %pg, %x) ret %0 } @@ -248,7 +248,7 @@ define @test_svcvt_f64_f16_x_1( %pg, @llvm.aarch64.sve.fcvt.f64f16( undef, %pg, %x) + %0 = tail call @llvm.aarch64.sve.fcvt.f64f16( poison, %pg, %x) ret %0 } @@ -263,7 +263,7 @@ define @test_svcvt_f64_f16_x_2( %pg, doub ; CHECK-2p2-NEXT: fcvt z0.d, p0/z, z1.h ; CHECK-2p2-NEXT: ret entry: - %0 = tail call @llvm.aarch64.sve.fcvt.f64f16( undef, %pg, %x) + %0 = tail call @llvm.aarch64.sve.fcvt.f64f16( poison, %pg, %x) ret %0 } @@ -294,7 +294,7 @@ define @test_svcvt_f64_f32_x_1( %pg, @llvm.aarch64.sve.fcvt.f64f32( undef, %pg, %x) + %0 = tail call @llvm.aarch64.sve.fcvt.f64f32( poison, %pg, %x) ret %0 } @@ -309,7 +309,7 @@ define @test_svcvt_f64_f32_x_2( %pg, doub ; CHECK-2p2-NEXT: fcvt z0.d, p0/z, z1.s ; CHECK-2p2-NEXT: ret entry: - %0 = tail call @llvm.aarch64.sve.fcvt.f64f32( undef, %pg, %x) + %0 = tail call @llvm.aarch64.sve.fcvt.f64f32( poison, %pg, %x) ret %0 } @@ -328,3 +328,262 @@ entry: %0 = tail call @llvm.aarch64.sve.fcvt.f64f32( zeroinitializer, %pg, %x) ret %0 } + +define @test_svcvt_f16_f32_ptrue_u(double %z0, %x) { +; CHECK-LABEL: test_svcvt_f16_f32_ptrue_u: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: fcvt z0.h, p0/m, z1.s +; CHECK-NEXT: ret +; +; CHECK-2p2-LABEL: test_svcvt_f16_f32_ptrue_u: +; CHECK-2p2: // %bb.0: // %entry +; CHECK-2p2-NEXT: ptrue p0.s +; CHECK-2p2-NEXT: fcvt z0.h, p0/z, z1.s +; CHECK-2p2-NEXT: ret +entry: + %pg = call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) + %0 = tail call @llvm.aarch64.sve.fcvt.f16f32( poison, %pg, %x) + ret %0 +} + +define @test_svcvt_f16_f32_ptrue(double %z0, %x, %y ) { +; CHECK-LABEL: test_svcvt_f16_f32_ptrue: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: mov z0.d, z1.d +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: fcvt z0.h, p0/m, z2.s +; CHECK-NEXT: ret +; +; CHECK-2p2-LABEL: test_svcvt_f16_f32_ptrue: +; CHECK-2p2: // %bb.0: // %entry +; CHECK-2p2-NEXT: ptrue p0.s +; CHECK-2p2-NEXT: fcvt z0.h, p0/z, z2.s +; CHECK-2p2-NEXT: ret +entry: + %pg = call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) + %0 = tail call @llvm.aarch64.sve.fcvt.f16f32( %x, %pg, %y) + ret %0 +} + +define @test_svcvt_bf16_f32_ptrue_u(double %z0, %x) { +; CHECK-LABEL: test_svcvt_bf16_f32_ptrue_u: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: bfcvt z0.h, p0/m, z1.s +; CHECK-NEXT: ret +; +; CHECK-2p2-LABEL: test_svcvt_bf16_f32_ptrue_u: +; CHECK-2p2: // %bb.0: // %entry +; CHECK-2p2-NEXT: ptrue p0.s +; CHECK-2p2-NEXT: bfcvt z0.h, p0/z, z1.s +; CHECK-2p2-NEXT: ret +entry: + %pg = call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) + %0 = tail call @llvm.aarch64.sve.fcvt.bf16f32.v2( poison, %pg, %x) + ret %0 +} + +define @test_svcvt_bf16_f32_ptrue(double %z0, %x, %y ) { +; CHECK-LABEL: test_svcvt_bf16_f32_ptrue: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: mov z0.d, z1.d +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: bfcvt z0.h, p0/m, z2.s +; CHECK-NEXT: ret +; +; CHECK-2p2-LABEL: test_svcvt_bf16_f32_ptrue: +; CHECK-2p2: // %bb.0: // %entry +; CHECK-2p2-NEXT: ptrue p0.s +; CHECK-2p2-NEXT: bfcvt z0.h, p0/z, z2.s +; CHECK-2p2-NEXT: ret +entry: + %pg = call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) + %0 = tail call @llvm.aarch64.sve.fcvt.bf16f32.v2( %x, %pg, %y) + ret %0 +} + +define @test_svcvt_f16_f64_ptrue_u(double %z0, %x) { +; CHECK-LABEL: test_svcvt_f16_f64_ptrue_u: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: fcvt z0.h, p0/m, z1.d +; CHECK-NEXT: ret +; +; CHECK-2p2-LABEL: test_svcvt_f16_f64_ptrue_u: +; CHECK-2p2: // %bb.0: // %entry +; CHECK-2p2-NEXT: ptrue p0.d +; CHECK-2p2-NEXT: fcvt z0.h, p0/z, z1.d +; CHECK-2p2-NEXT: ret +entry: + %pg = call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) + %0 = tail call @llvm.aarch64.sve.fcvt.f16f64( poison, %pg, %x) + ret %0 +} + +define @test_svcvt_f16_f64_ptrue(double %z0, %x, %y ) { +; CHECK-LABEL: test_svcvt_f16_f64_ptrue: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: mov z0.d, z1.d +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: fcvt z0.h, p0/m, z2.d +; CHECK-NEXT: ret +; +; CHECK-2p2-LABEL: test_svcvt_f16_f64_ptrue: +; CHECK-2p2: // %bb.0: // %entry +; CHECK-2p2-NEXT: ptrue p0.d +; CHECK-2p2-NEXT: fcvt z0.h, p0/z, z2.d +; CHECK-2p2-NEXT: ret +entry: + %pg = call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) + %0 = tail call @llvm.aarch64.sve.fcvt.f16f64( %x, %pg, %y) + ret %0 +} + +define @test_svcvt_f32_f64_ptrue_u(double %z0, %x) { +; CHECK-LABEL: test_svcvt_f32_f64_ptrue_u: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: fcvt z0.s, p0/m, z1.d +; CHECK-NEXT: ret +; +; CHECK-2p2-LABEL: test_svcvt_f32_f64_ptrue_u: +; CHECK-2p2: // %bb.0: // %entry +; CHECK-2p2-NEXT: ptrue p0.d +; CHECK-2p2-NEXT: fcvt z0.s, p0/z, z1.d +; CHECK-2p2-NEXT: ret +entry: + %pg = call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) + %0 = tail call @llvm.aarch64.sve.fcvt.f32f64( poison, %pg, %x) + ret %0 +} + +define @test_svcvt_f32_f64_ptrue(double %z0, %x, %y ) { +; CHECK-LABEL: test_svcvt_f32_f64_ptrue: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: mov z0.d, z1.d +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: fcvt z0.s, p0/m, z2.d +; CHECK-NEXT: ret +; +; CHECK-2p2-LABEL: test_svcvt_f32_f64_ptrue: +; CHECK-2p2: // %bb.0: // %entry +; CHECK-2p2-NEXT: ptrue p0.d +; CHECK-2p2-NEXT: fcvt z0.s, p0/z, z2.d +; CHECK-2p2-NEXT: ret +entry: + %pg = call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) + %0 = tail call @llvm.aarch64.sve.fcvt.f32f64( %x, %pg, %y) + ret %0 +} + +define @test_svcvt_f32_f16_ptrue_u(double %z0, %x) { +; CHECK-LABEL: test_svcvt_f32_f16_ptrue_u: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: fcvt z0.s, p0/m, z1.h +; CHECK-NEXT: ret +; +; CHECK-2p2-LABEL: test_svcvt_f32_f16_ptrue_u: +; CHECK-2p2: // %bb.0: // %entry +; CHECK-2p2-NEXT: ptrue p0.s +; CHECK-2p2-NEXT: fcvt z0.s, p0/z, z1.h +; CHECK-2p2-NEXT: ret +entry: + %pg = call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) + %0 = tail call @llvm.aarch64.sve.fcvt.f32f16( poison, %pg, %x) + ret %0 +} + +define @test_svcvt_f32_f16_ptrue(double %z0, %x, %y ) { +; CHECK-LABEL: test_svcvt_f32_f16_ptrue: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: mov z0.d, z1.d +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: fcvt z0.s, p0/m, z2.h +; CHECK-NEXT: ret +; +; CHECK-2p2-LABEL: test_svcvt_f32_f16_ptrue: +; CHECK-2p2: // %bb.0: // %entry +; CHECK-2p2-NEXT: ptrue p0.s +; CHECK-2p2-NEXT: fcvt z0.s, p0/z, z2.h +; CHECK-2p2-NEXT: ret +entry: + %pg = call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) + %0 = tail call @llvm.aarch64.sve.fcvt.f32f16( %x, %pg, %y) + ret %0 +} + +define @test_svcvt_f64_f16_ptrue_u(double %z0, %x) { +; CHECK-LABEL: test_svcvt_f64_f16_ptrue_u: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: fcvt z0.d, p0/m, z1.h +; CHECK-NEXT: ret +; +; CHECK-2p2-LABEL: test_svcvt_f64_f16_ptrue_u: +; CHECK-2p2: // %bb.0: // %entry +; CHECK-2p2-NEXT: ptrue p0.d +; CHECK-2p2-NEXT: fcvt z0.d, p0/z, z1.h +; CHECK-2p2-NEXT: ret +entry: + %pg = call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) + %0 = tail call @llvm.aarch64.sve.fcvt.f64f16( poison, %pg, %x) + ret %0 +} + +define @test_svcvt_f64_f16_ptrue(double %z0, %x, %y ) { +; CHECK-LABEL: test_svcvt_f64_f16_ptrue: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: mov z0.d, z1.d +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: fcvt z0.d, p0/m, z2.h +; CHECK-NEXT: ret +; +; CHECK-2p2-LABEL: test_svcvt_f64_f16_ptrue: +; CHECK-2p2: // %bb.0: // %entry +; CHECK-2p2-NEXT: ptrue p0.d +; CHECK-2p2-NEXT: fcvt z0.d, p0/z, z2.h +; CHECK-2p2-NEXT: ret +entry: + %pg = call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) + %0 = tail call @llvm.aarch64.sve.fcvt.f64f16( %x, %pg, %y) + ret %0 +} + +define @test_svcvt_f64_f32_ptrue_u(double %z0, %x) { +; CHECK-LABEL: test_svcvt_f64_f32_ptrue_u: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: fcvt z0.d, p0/m, z1.s +; CHECK-NEXT: ret +; +; CHECK-2p2-LABEL: test_svcvt_f64_f32_ptrue_u: +; CHECK-2p2: // %bb.0: // %entry +; CHECK-2p2-NEXT: ptrue p0.d +; CHECK-2p2-NEXT: fcvt z0.d, p0/z, z1.s +; CHECK-2p2-NEXT: ret +entry: + %pg = call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) + %0 = tail call @llvm.aarch64.sve.fcvt.f64f32( poison, %pg, %x) + ret %0 +} + +define @test_svcvt_f64_f32_ptrue(double %z0, %x, %y ) { +; CHECK-LABEL: test_svcvt_f64_f32_ptrue: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: mov z0.d, z1.d +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: fcvt z0.d, p0/m, z2.s +; CHECK-NEXT: ret +; +; CHECK-2p2-LABEL: test_svcvt_f64_f32_ptrue: +; CHECK-2p2: // %bb.0: // %entry +; CHECK-2p2-NEXT: ptrue p0.d +; CHECK-2p2-NEXT: fcvt z0.d, p0/z, z2.s +; CHECK-2p2-NEXT: ret +entry: + %pg = call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) + %0 = tail call @llvm.aarch64.sve.fcvt.f64f32( %x, %pg, %y) + ret %0 +} diff --git a/llvm/test/CodeGen/AArch64/zeroing-forms-fcvtlt-fcvtx.ll b/llvm/test/CodeGen/AArch64/zeroing-forms-fcvtlt-fcvtx.ll index 60879b1529230f..c7431e11c21ca3 100644 --- a/llvm/test/CodeGen/AArch64/zeroing-forms-fcvtlt-fcvtx.ll +++ b/llvm/test/CodeGen/AArch64/zeroing-forms-fcvtlt-fcvtx.ll @@ -18,7 +18,7 @@ define @test_svcvtlt_f32_f16_x_1( %pg, @llvm.aarch64.sve.fcvtlt.f32f16( undef, %pg, %x) + %0 = tail call @llvm.aarch64.sve.fcvtlt.f32f16( poison, %pg, %x) ret %0 } @@ -33,7 +33,7 @@ define @test_svcvtlt_f32_f16_x_2( %pg, dou ; CHECK-2p2-NEXT: fcvtlt z0.s, p0/z, z1.h ; CHECK-2p2-NEXT: ret entry: - %0 = tail call @llvm.aarch64.sve.fcvtlt.f32f16( undef, %pg, %x) + %0 = tail call @llvm.aarch64.sve.fcvtlt.f32f16( poison, %pg, %x) ret %0 } @@ -64,7 +64,7 @@ define @test_svcvtlt_f64_f32_x_1( %pg, @llvm.aarch64.sve.fcvtlt.f64f32( undef, %pg, %x) + %0 = tail call @llvm.aarch64.sve.fcvtlt.f64f32( poison, %pg, %x) ret %0 } @@ -79,7 +79,7 @@ define @test_svcvtlt_f64_f32_x_2( %pg, do ; CHECK-2p2-NEXT: fcvtlt z0.d, p0/z, z1.s ; CHECK-2p2-NEXT: ret entry: - %0 = tail call @llvm.aarch64.sve.fcvtlt.f64f32( undef, %pg, %x) + %0 = tail call @llvm.aarch64.sve.fcvtlt.f64f32( poison, %pg, %x) ret %0 } @@ -110,7 +110,7 @@ define @test_svcvtx_f32_f64_x_1( %pg, @llvm.aarch64.sve.fcvtx.f32f64( undef, %pg, %x) + %0 = tail call @llvm.aarch64.sve.fcvtx.f32f64( poison, %pg, %x) ret %0 } @@ -125,7 +125,7 @@ define @test_svcvtx_f32_f64_x_2( %pg, doub ; CHECK-2p2-NEXT: fcvtx z0.s, p0/z, z1.d ; CHECK-2p2-NEXT: ret entry: - %0 = tail call @llvm.aarch64.sve.fcvtx.f32f64( undef, %pg, %x) + %0 = tail call @llvm.aarch64.sve.fcvtx.f32f64( poison, %pg, %x) ret %0 } @@ -144,3 +144,114 @@ entry: %0 = tail call @llvm.aarch64.sve.fcvtx.f32f64( zeroinitializer, %pg, %x) ret %0 } + +define @test_svcvtlt_f32_f16_ptrue_u(double %z0, %x) { +; CHECK-LABEL: test_svcvtlt_f32_f16_ptrue_u: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: fcvtlt z0.s, p0/m, z1.h +; CHECK-NEXT: ret +; +; CHECK-2p2-LABEL: test_svcvtlt_f32_f16_ptrue_u: +; CHECK-2p2: // %bb.0: // %entry +; CHECK-2p2-NEXT: ptrue p0.s +; CHECK-2p2-NEXT: fcvtlt z0.s, p0/z, z1.h +; CHECK-2p2-NEXT: ret +entry: + %pg = call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) + %0 = tail call @llvm.aarch64.sve.fcvtlt.f32f16( poison, %pg, %x) + ret %0 +} + +define @test_svcvtlt_f32_f16_ptrue(double %z0, %x, %y) { +; CHECK-LABEL: test_svcvtlt_f32_f16_ptrue: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: mov z0.d, z1.d +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: fcvtlt z0.s, p0/m, z2.h +; CHECK-NEXT: ret +; +; CHECK-2p2-LABEL: test_svcvtlt_f32_f16_ptrue: +; CHECK-2p2: // %bb.0: // %entry +; CHECK-2p2-NEXT: ptrue p0.s +; CHECK-2p2-NEXT: fcvtlt z0.s, p0/z, z2.h +; CHECK-2p2-NEXT: ret +entry: + %pg = call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) + %0 = tail call @llvm.aarch64.sve.fcvtlt.f32f16( %x, %pg, %y) + ret %0 +} + +define @test_svcvtlt_f64_f32_ptrue_u(double %z0, %x) { +; CHECK-LABEL: test_svcvtlt_f64_f32_ptrue_u: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: fcvtlt z0.d, p0/m, z1.s +; CHECK-NEXT: ret +; +; CHECK-2p2-LABEL: test_svcvtlt_f64_f32_ptrue_u: +; CHECK-2p2: // %bb.0: // %entry +; CHECK-2p2-NEXT: ptrue p0.d +; CHECK-2p2-NEXT: fcvtlt z0.d, p0/z, z1.s +; CHECK-2p2-NEXT: ret +entry: + %pg = call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) + %0 = tail call @llvm.aarch64.sve.fcvtlt.f64f32( poison, %pg, %x) + ret %0 +} + +define @test_svcvtlt_f64_f32_ptrue(double %z0, %x, %y) { +; CHECK-LABEL: test_svcvtlt_f64_f32_ptrue: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: mov z0.d, z1.d +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: fcvtlt z0.d, p0/m, z2.s +; CHECK-NEXT: ret +; +; CHECK-2p2-LABEL: test_svcvtlt_f64_f32_ptrue: +; CHECK-2p2: // %bb.0: // %entry +; CHECK-2p2-NEXT: ptrue p0.d +; CHECK-2p2-NEXT: fcvtlt z0.d, p0/z, z2.s +; CHECK-2p2-NEXT: ret +entry: + %pg = call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) + %0 = tail call @llvm.aarch64.sve.fcvtlt.f64f32( %x, %pg, %y) + ret %0 +} + +define @test_svcvtx_f32_f64_ptrue_u(double %z0, %x) { +; CHECK-LABEL: test_svcvtx_f32_f64_ptrue_u: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: fcvtx z0.s, p0/m, z1.d +; CHECK-NEXT: ret +; +; CHECK-2p2-LABEL: test_svcvtx_f32_f64_ptrue_u: +; CHECK-2p2: // %bb.0: // %entry +; CHECK-2p2-NEXT: ptrue p0.d +; CHECK-2p2-NEXT: fcvtx z0.s, p0/z, z1.d +; CHECK-2p2-NEXT: ret +entry: + %pg = call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) + %0 = tail call @llvm.aarch64.sve.fcvtx.f32f64( poison, %pg, %x) + ret %0 +} + +define @test_svcvtx_f32_f64_ptrue(double %z0, %x, %y) { +; CHECK-LABEL: test_svcvtx_f32_f64_ptrue: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: mov z0.d, z1.d +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: fcvtx z0.s, p0/m, z2.d +; CHECK-NEXT: ret +; +; CHECK-2p2-LABEL: test_svcvtx_f32_f64_ptrue: +; CHECK-2p2: // %bb.0: // %entry +; CHECK-2p2-NEXT: ptrue p0.d +; CHECK-2p2-NEXT: fcvtx z0.s, p0/z, z2.d +; CHECK-2p2-NEXT: ret +entry: + %pg = call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) + %0 = tail call @llvm.aarch64.sve.fcvtx.f32f64( %x, %pg, %y) + ret %0 +} diff --git a/llvm/test/CodeGen/AArch64/zeroing-forms-fcvtzsu.ll b/llvm/test/CodeGen/AArch64/zeroing-forms-fcvtzsu.ll index b8b36d390330af..7259502bf44002 100644 --- a/llvm/test/CodeGen/AArch64/zeroing-forms-fcvtzsu.ll +++ b/llvm/test/CodeGen/AArch64/zeroing-forms-fcvtzsu.ll @@ -18,7 +18,7 @@ define @test_fcvtzs_s32_f64_x_1( %pg, @llvm.aarch64.sve.fcvtzs.i32f64( undef, %pg, %x) + %0 = tail call @llvm.aarch64.sve.fcvtzs.i32f64( poison, %pg, %x) ret %0 } @@ -33,7 +33,7 @@ define @test_fcvtzs_s32_f64_x_2( %pg, double ; CHECK-2p2-NEXT: fcvtzs z0.s, p0/z, z1.d ; CHECK-2p2-NEXT: ret entry: - %0 = tail call @llvm.aarch64.sve.fcvtzs.i32f64( undef, %pg, %x) + %0 = tail call @llvm.aarch64.sve.fcvtzs.i32f64( poison, %pg, %x) ret %0 } @@ -64,7 +64,7 @@ define @test_fcvtzs_s64_f32_x_1( %pg, @llvm.aarch64.sve.fcvtzs.i64f32( undef, %pg, %x) + %0 = tail call @llvm.aarch64.sve.fcvtzs.i64f32( poison, %pg, %x) ret %0 } @@ -79,7 +79,7 @@ define @test_fcvtzs_s64_f32_x_2( %pg, double ; CHECK-2p2-NEXT: fcvtzs z0.d, p0/z, z1.s ; CHECK-2p2-NEXT: ret entry: - %0 = tail call @llvm.aarch64.sve.fcvtzs.i64f32( undef, %pg, %x) + %0 = tail call @llvm.aarch64.sve.fcvtzs.i64f32( poison, %pg, %x) ret %0 } @@ -110,7 +110,7 @@ define @test_fcvtzs_s32_f16_x_1( %pg, @llvm.aarch64.sve.fcvtzs.i32f16( undef, %pg, %x) + %0 = tail call @llvm.aarch64.sve.fcvtzs.i32f16( poison, %pg, %x) ret %0 } @@ -125,7 +125,7 @@ define @test_fcvtzs_s32_f16_x_2( %pg, double ; CHECK-2p2-NEXT: fcvtzs z0.s, p0/z, z1.h ; CHECK-2p2-NEXT: ret entry: - %0 = tail call @llvm.aarch64.sve.fcvtzs.i32f16( undef, %pg, %x) + %0 = tail call @llvm.aarch64.sve.fcvtzs.i32f16( poison, %pg, %x) ret %0 } @@ -156,7 +156,7 @@ define @test_fcvtzs_s64_f16_x_1( %pg, @llvm.aarch64.sve.fcvtzs.i64f16( undef, %pg, %x) + %0 = tail call @llvm.aarch64.sve.fcvtzs.i64f16( poison, %pg, %x) ret %0 } @@ -171,7 +171,7 @@ define @test_fcvtzs_s64_f16_x_2( %pg, double ; CHECK-2p2-NEXT: fcvtzs z0.d, p0/z, z1.h ; CHECK-2p2-NEXT: ret entry: - %0 = tail call @llvm.aarch64.sve.fcvtzs.i64f16( undef, %pg, %x) + %0 = tail call @llvm.aarch64.sve.fcvtzs.i64f16( poison, %pg, %x) ret %0 } @@ -202,7 +202,7 @@ define @test_fcvtzu_u32_f64_x_1( %pg, @llvm.aarch64.sve.fcvtzu.i32f64( undef, %pg, %x) + %0 = tail call @llvm.aarch64.sve.fcvtzu.i32f64( poison, %pg, %x) ret %0 } @@ -217,7 +217,7 @@ define @test_fcvtzu_u32_f64_x_2( %pg, double ; CHECK-2p2-NEXT: fcvtzu z0.s, p0/z, z1.d ; CHECK-2p2-NEXT: ret entry: - %0 = tail call @llvm.aarch64.sve.fcvtzu.i32f64( undef, %pg, %x) + %0 = tail call @llvm.aarch64.sve.fcvtzu.i32f64( poison, %pg, %x) ret %0 } @@ -248,7 +248,7 @@ define @test_fcvtzu_u64_f32_x_1( %pg, @llvm.aarch64.sve.fcvtzu.i64f32( undef, %pg, %x) + %0 = tail call @llvm.aarch64.sve.fcvtzu.i64f32( poison, %pg, %x) ret %0 } @@ -263,7 +263,7 @@ define @test_fcvtzu_u64_f32_x_2( %pg, double ; CHECK-2p2-NEXT: fcvtzu z0.d, p0/z, z1.s ; CHECK-2p2-NEXT: ret entry: - %0 = tail call @llvm.aarch64.sve.fcvtzu.i64f32( undef, %pg, %x) + %0 = tail call @llvm.aarch64.sve.fcvtzu.i64f32( poison, %pg, %x) ret %0 } @@ -294,7 +294,7 @@ define @test_fcvtzu_u32_f16_x_1( %pg, @llvm.aarch64.sve.fcvtzu.i32f16( undef, %pg, %x) + %0 = tail call @llvm.aarch64.sve.fcvtzu.i32f16( poison, %pg, %x) ret %0 } @@ -309,7 +309,7 @@ define @test_fcvtzu_u32_f16_x_2( %pg, double ; CHECK-2p2-NEXT: fcvtzu z0.s, p0/z, z1.h ; CHECK-2p2-NEXT: ret entry: - %0 = tail call @llvm.aarch64.sve.fcvtzu.i32f16( undef, %pg, %x) + %0 = tail call @llvm.aarch64.sve.fcvtzu.i32f16( poison, %pg, %x) ret %0 } @@ -340,7 +340,7 @@ define @test_fcvtzu_u64_f16_x_1( %pg, @llvm.aarch64.sve.fcvtzu.i64f16( undef, %pg, %x) + %0 = tail call @llvm.aarch64.sve.fcvtzu.i64f16( poison, %pg, %x) ret %0 } @@ -355,7 +355,7 @@ define @test_fcvtzu_u64_f16_x_2( %pg, double ; CHECK-2p2-NEXT: fcvtzu z0.d, p0/z, z1.h ; CHECK-2p2-NEXT: ret entry: - %0 = tail call @llvm.aarch64.sve.fcvtzu.i64f16( undef, %pg, %x) + %0 = tail call @llvm.aarch64.sve.fcvtzu.i64f16( poison, %pg, %x) ret %0 } @@ -387,7 +387,7 @@ define @test_svcvt_s16_f16_x_1( %pg, @llvm.aarch64.sve.fcvtzs.nxv8i16.nxv8f16( undef, %pg, %x) + %0 = tail call @llvm.aarch64.sve.fcvtzs.nxv8i16.nxv8f16( poison, %pg, %x) ret %0 } @@ -403,7 +403,7 @@ define @test_svcvt_s16_f16_x_2( %pg, double ; CHECK-2p2-NEXT: fcvtzs z0.h, p0/z, z1.h ; CHECK-2p2-NEXT: ret entry: - %0 = tail call @llvm.aarch64.sve.fcvtzs.nxv8i16.nxv8f16( undef, %pg, %x) + %0 = tail call @llvm.aarch64.sve.fcvtzs.nxv8i16.nxv8f16( poison, %pg, %x) ret %0 } @@ -434,7 +434,7 @@ define @test_svcvt_u16_f16_x_1( %pg, @llvm.aarch64.sve.fcvtzu.nxv8i16.nxv8f16( undef, %pg, %x) + %0 = tail call @llvm.aarch64.sve.fcvtzu.nxv8i16.nxv8f16( poison, %pg, %x) ret %0 } @@ -450,7 +450,7 @@ define @test_svcvt_u16_f16_x_2( %pg, double ; CHECK-2p2-NEXT: fcvtzu z0.h, p0/z, z1.h ; CHECK-2p2-NEXT: ret entry: - %0 = tail call @llvm.aarch64.sve.fcvtzu.nxv8i16.nxv8f16( undef, %pg, %x) + %0 = tail call @llvm.aarch64.sve.fcvtzu.nxv8i16.nxv8f16( poison, %pg, %x) ret %0 } @@ -481,7 +481,7 @@ define @test_svcvt_s32_f32_x_1( %pg, @llvm.aarch64.sve.fcvtzs.nxv4i32.nxv4f32( undef, %pg, %x) + %0 = tail call @llvm.aarch64.sve.fcvtzs.nxv4i32.nxv4f32( poison, %pg, %x) ret %0 } @@ -497,7 +497,7 @@ define @test_svcvt_s32_f32_x_2( %pg, double ; CHECK-2p2-NEXT: fcvtzs z0.s, p0/z, z1.s ; CHECK-2p2-NEXT: ret entry: - %0 = tail call @llvm.aarch64.sve.fcvtzs.nxv4i32.nxv4f32( undef, %pg, %x) + %0 = tail call @llvm.aarch64.sve.fcvtzs.nxv4i32.nxv4f32( poison, %pg, %x) ret %0 } @@ -528,7 +528,7 @@ define @test_svcvt_u32_f32_x_1( %pg, @llvm.aarch64.sve.fcvtzu.nxv4i32.nxv4f32( undef, %pg, %x) + %0 = tail call @llvm.aarch64.sve.fcvtzu.nxv4i32.nxv4f32( poison, %pg, %x) ret %0 } @@ -544,7 +544,7 @@ define @test_svcvt_u32_f32_x_2( %pg, double ; CHECK-2p2-NEXT: fcvtzu z0.s, p0/z, z1.s ; CHECK-2p2-NEXT: ret entry: - %0 = tail call @llvm.aarch64.sve.fcvtzu.nxv4i32.nxv4f32( undef, %pg, %x) + %0 = tail call @llvm.aarch64.sve.fcvtzu.nxv4i32.nxv4f32( poison, %pg, %x) ret %0 } @@ -575,7 +575,7 @@ define @test_svcvt_s64_f64_x_1( %pg, @llvm.aarch64.sve.fcvtzs.nxv2i64.nxv2f64( undef, %pg, %x) + %0 = tail call @llvm.aarch64.sve.fcvtzs.nxv2i64.nxv2f64( poison, %pg, %x) ret %0 } @@ -591,7 +591,7 @@ define @test_svcvt_s64_f64_x_2( %pg, double ; CHECK-2p2-NEXT: fcvtzs z0.d, p0/z, z1.d ; CHECK-2p2-NEXT: ret entry: - %0 = tail call @llvm.aarch64.sve.fcvtzs.nxv2i64.nxv2f64( undef, %pg, %x) + %0 = tail call @llvm.aarch64.sve.fcvtzs.nxv2i64.nxv2f64( poison, %pg, %x) ret %0 } @@ -622,7 +622,7 @@ define @test_svcvt_u64_f64_x_1( %pg, @llvm.aarch64.sve.fcvtzu.nxv2i64.nxv2f64( undef, %pg, %x) + %0 = tail call @llvm.aarch64.sve.fcvtzu.nxv2i64.nxv2f64( poison, %pg, %x) ret %0 } @@ -638,7 +638,7 @@ define @test_svcvt_u64_f64_x_2( %pg, double ; CHECK-2p2-NEXT: fcvtzu z0.d, p0/z, z1.d ; CHECK-2p2-NEXT: ret entry: - %0 = tail call @llvm.aarch64.sve.fcvtzu.nxv2i64.nxv2f64( undef, %pg, %x) + %0 = tail call @llvm.aarch64.sve.fcvtzu.nxv2i64.nxv2f64( poison, %pg, %x) ret %0 } @@ -657,3 +657,527 @@ entry: %0 = tail call @llvm.aarch64.sve.fcvtzu.nxv2i64.nxv2f64( zeroinitializer, %pg, %x) ret %0 } + +define @test_fcvtzs_i32_f64_ptrue_u(double %z0, %x) { +; CHECK-LABEL: test_fcvtzs_i32_f64_ptrue_u: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: fcvtzs z0.s, p0/m, z1.d +; CHECK-NEXT: ret +; +; CHECK-2p2-LABEL: test_fcvtzs_i32_f64_ptrue_u: +; CHECK-2p2: // %bb.0: // %entry +; CHECK-2p2-NEXT: ptrue p0.d +; CHECK-2p2-NEXT: fcvtzs z0.s, p0/z, z1.d +; CHECK-2p2-NEXT: ret +entry: + %pg = call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) + %0 = tail call @llvm.aarch64.sve.fcvtzs.i32f64( poison, %pg, %x) + ret %0 +} + +define @test_fcvtzs_i32_f64_ptrue(double %z0, %x, %y) { +; CHECK-LABEL: test_fcvtzs_i32_f64_ptrue: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: mov z0.d, z1.d +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: fcvtzs z0.s, p0/m, z2.d +; CHECK-NEXT: ret +; +; CHECK-2p2-LABEL: test_fcvtzs_i32_f64_ptrue: +; CHECK-2p2: // %bb.0: // %entry +; CHECK-2p2-NEXT: ptrue p0.d +; CHECK-2p2-NEXT: fcvtzs z0.s, p0/z, z2.d +; CHECK-2p2-NEXT: ret +entry: + %pg = call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) + %0 = tail call @llvm.aarch64.sve.fcvtzs.i32f64( %x, %pg, %y) + ret %0 +} + +define @test_fcvtzu_i32_f64_ptrue_u(double %z0, %x) { +; CHECK-LABEL: test_fcvtzu_i32_f64_ptrue_u: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: fcvtzu z0.s, p0/m, z1.d +; CHECK-NEXT: ret +; +; CHECK-2p2-LABEL: test_fcvtzu_i32_f64_ptrue_u: +; CHECK-2p2: // %bb.0: // %entry +; CHECK-2p2-NEXT: ptrue p0.d +; CHECK-2p2-NEXT: fcvtzu z0.s, p0/z, z1.d +; CHECK-2p2-NEXT: ret +entry: + %pg = call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) + %0 = tail call @llvm.aarch64.sve.fcvtzu.i32f64( poison, %pg, %x) + ret %0 +} + +define @test_fcvtzu_i32_f64_ptrue(double %z0, %x, %y) { +; CHECK-LABEL: test_fcvtzu_i32_f64_ptrue: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: mov z0.d, z1.d +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: fcvtzu z0.s, p0/m, z2.d +; CHECK-NEXT: ret +; +; CHECK-2p2-LABEL: test_fcvtzu_i32_f64_ptrue: +; CHECK-2p2: // %bb.0: // %entry +; CHECK-2p2-NEXT: ptrue p0.d +; CHECK-2p2-NEXT: fcvtzu z0.s, p0/z, z2.d +; CHECK-2p2-NEXT: ret +entry: + %pg = call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) + %0 = tail call @llvm.aarch64.sve.fcvtzu.i32f64( %x, %pg, %y) + ret %0 +} + +define @test_fcvtzs_i64_f32_ptrue_u(double %z0, %x) { +; CHECK-LABEL: test_fcvtzs_i64_f32_ptrue_u: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: fcvtzs z0.d, p0/m, z1.s +; CHECK-NEXT: ret +; +; CHECK-2p2-LABEL: test_fcvtzs_i64_f32_ptrue_u: +; CHECK-2p2: // %bb.0: // %entry +; CHECK-2p2-NEXT: ptrue p0.d +; CHECK-2p2-NEXT: fcvtzs z0.d, p0/z, z1.s +; CHECK-2p2-NEXT: ret +entry: + %pg = call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) + %0 = tail call @llvm.aarch64.sve.fcvtzs.i64f32( poison, %pg, %x) + ret %0 +} + +define @test_fcvtzs_i64_f32_ptrue(double %z0, %x, %y) { +; CHECK-LABEL: test_fcvtzs_i64_f32_ptrue: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: mov z0.d, z1.d +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: fcvtzs z0.d, p0/m, z2.s +; CHECK-NEXT: ret +; +; CHECK-2p2-LABEL: test_fcvtzs_i64_f32_ptrue: +; CHECK-2p2: // %bb.0: // %entry +; CHECK-2p2-NEXT: ptrue p0.d +; CHECK-2p2-NEXT: fcvtzs z0.d, p0/z, z2.s +; CHECK-2p2-NEXT: ret +entry: + %pg = call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) + %0 = tail call @llvm.aarch64.sve.fcvtzs.i64f32( %x, %pg, %y) + ret %0 +} + +define @test_fcvtzu_i64_f32_ptrue_u(double %z0, %x) { +; CHECK-LABEL: test_fcvtzu_i64_f32_ptrue_u: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: fcvtzu z0.d, p0/m, z1.s +; CHECK-NEXT: ret +; +; CHECK-2p2-LABEL: test_fcvtzu_i64_f32_ptrue_u: +; CHECK-2p2: // %bb.0: // %entry +; CHECK-2p2-NEXT: ptrue p0.d +; CHECK-2p2-NEXT: fcvtzu z0.d, p0/z, z1.s +; CHECK-2p2-NEXT: ret +entry: + %pg = call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) + %0 = tail call @llvm.aarch64.sve.fcvtzu.i64f32( poison, %pg, %x) + ret %0 +} + +define @test_fcvtzu_i64_f32_ptrue(double %z0, %x, %y) { +; CHECK-LABEL: test_fcvtzu_i64_f32_ptrue: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: mov z0.d, z1.d +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: fcvtzu z0.d, p0/m, z2.s +; CHECK-NEXT: ret +; +; CHECK-2p2-LABEL: test_fcvtzu_i64_f32_ptrue: +; CHECK-2p2: // %bb.0: // %entry +; CHECK-2p2-NEXT: ptrue p0.d +; CHECK-2p2-NEXT: fcvtzu z0.d, p0/z, z2.s +; CHECK-2p2-NEXT: ret +entry: + %pg = call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) + %0 = tail call @llvm.aarch64.sve.fcvtzu.i64f32( %x, %pg, %y) + ret %0 +} + +define @test_fcvtzs_i32_f16_ptrue_u(double %z0, %x) { +; CHECK-LABEL: test_fcvtzs_i32_f16_ptrue_u: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: fcvtzs z0.s, p0/m, z1.h +; CHECK-NEXT: ret +; +; CHECK-2p2-LABEL: test_fcvtzs_i32_f16_ptrue_u: +; CHECK-2p2: // %bb.0: // %entry +; CHECK-2p2-NEXT: ptrue p0.s +; CHECK-2p2-NEXT: fcvtzs z0.s, p0/z, z1.h +; CHECK-2p2-NEXT: ret +entry: + %pg = call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) + %0 = tail call @llvm.aarch64.sve.fcvtzs.i32f16( poison, %pg, %x) + ret %0 +} + +define @test_fcvtzs_i32_f16_ptrue(double %z0, %x, %y) { +; CHECK-LABEL: test_fcvtzs_i32_f16_ptrue: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: mov z0.d, z1.d +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: fcvtzs z0.s, p0/m, z2.h +; CHECK-NEXT: ret +; +; CHECK-2p2-LABEL: test_fcvtzs_i32_f16_ptrue: +; CHECK-2p2: // %bb.0: // %entry +; CHECK-2p2-NEXT: ptrue p0.s +; CHECK-2p2-NEXT: fcvtzs z0.s, p0/z, z2.h +; CHECK-2p2-NEXT: ret +entry: + %pg = call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) + %0 = tail call @llvm.aarch64.sve.fcvtzs.i32f16( %x, %pg, %y) + ret %0 +} + +define @test_fcvtzu_i32_f16_ptrue_u(double %z0, %x) { +; CHECK-LABEL: test_fcvtzu_i32_f16_ptrue_u: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: fcvtzu z0.s, p0/m, z1.h +; CHECK-NEXT: ret +; +; CHECK-2p2-LABEL: test_fcvtzu_i32_f16_ptrue_u: +; CHECK-2p2: // %bb.0: // %entry +; CHECK-2p2-NEXT: ptrue p0.s +; CHECK-2p2-NEXT: fcvtzu z0.s, p0/z, z1.h +; CHECK-2p2-NEXT: ret +entry: + %pg = call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) + %0 = tail call @llvm.aarch64.sve.fcvtzu.i32f16( poison, %pg, %x) + ret %0 +} + +define @test_fcvtzu_i32_f16_ptrue(double %z0, %x, %y) { +; CHECK-LABEL: test_fcvtzu_i32_f16_ptrue: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: mov z0.d, z1.d +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: fcvtzu z0.s, p0/m, z2.h +; CHECK-NEXT: ret +; +; CHECK-2p2-LABEL: test_fcvtzu_i32_f16_ptrue: +; CHECK-2p2: // %bb.0: // %entry +; CHECK-2p2-NEXT: ptrue p0.s +; CHECK-2p2-NEXT: fcvtzu z0.s, p0/z, z2.h +; CHECK-2p2-NEXT: ret +entry: + %pg = call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) + %0 = tail call @llvm.aarch64.sve.fcvtzu.i32f16( %x, %pg, %y) + ret %0 +} + +define @test_fcvtzs_i64_f16_ptrue_u(double %z0, %x) { +; CHECK-LABEL: test_fcvtzs_i64_f16_ptrue_u: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: fcvtzs z0.d, p0/m, z1.h +; CHECK-NEXT: ret +; +; CHECK-2p2-LABEL: test_fcvtzs_i64_f16_ptrue_u: +; CHECK-2p2: // %bb.0: // %entry +; CHECK-2p2-NEXT: ptrue p0.d +; CHECK-2p2-NEXT: fcvtzs z0.d, p0/z, z1.h +; CHECK-2p2-NEXT: ret +entry: + %pg = call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) + %0 = tail call @llvm.aarch64.sve.fcvtzs.i64f16( poison, %pg, %x) + ret %0 +} + +define @test_fcvtzs_i64_f16_ptrue(double %z0, %x, %y) { +; CHECK-LABEL: test_fcvtzs_i64_f16_ptrue: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: mov z0.d, z1.d +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: fcvtzs z0.d, p0/m, z2.h +; CHECK-NEXT: ret +; +; CHECK-2p2-LABEL: test_fcvtzs_i64_f16_ptrue: +; CHECK-2p2: // %bb.0: // %entry +; CHECK-2p2-NEXT: ptrue p0.d +; CHECK-2p2-NEXT: fcvtzs z0.d, p0/z, z2.h +; CHECK-2p2-NEXT: ret +entry: + %pg = call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) + %0 = tail call @llvm.aarch64.sve.fcvtzs.i64f16( %x, %pg, %y) + ret %0 +} + +define @test_fcvtzu_i64_f16_ptrue_u(double %z0, %x) { +; CHECK-LABEL: test_fcvtzu_i64_f16_ptrue_u: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: fcvtzu z0.d, p0/m, z1.h +; CHECK-NEXT: ret +; +; CHECK-2p2-LABEL: test_fcvtzu_i64_f16_ptrue_u: +; CHECK-2p2: // %bb.0: // %entry +; CHECK-2p2-NEXT: ptrue p0.d +; CHECK-2p2-NEXT: fcvtzu z0.d, p0/z, z1.h +; CHECK-2p2-NEXT: ret +entry: + %pg = call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) + %0 = tail call @llvm.aarch64.sve.fcvtzu.i64f16( poison, %pg, %x) + ret %0 +} + +define @test_fcvtzu_i64_f16_ptrue(double %z0, %x, %y) { +; CHECK-LABEL: test_fcvtzu_i64_f16_ptrue: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: mov z0.d, z1.d +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: fcvtzu z0.d, p0/m, z2.h +; CHECK-NEXT: ret +; +; CHECK-2p2-LABEL: test_fcvtzu_i64_f16_ptrue: +; CHECK-2p2: // %bb.0: // %entry +; CHECK-2p2-NEXT: ptrue p0.d +; CHECK-2p2-NEXT: fcvtzu z0.d, p0/z, z2.h +; CHECK-2p2-NEXT: ret +entry: + %pg = call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) + %0 = tail call @llvm.aarch64.sve.fcvtzu.i64f16( %x, %pg, %y) + ret %0 +} + +define @test_fcvtzs_i16_f16_ptrue_u(double %z0, %x) { +; CHECK-LABEL: test_fcvtzs_i16_f16_ptrue_u: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ptrue p0.h +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: fcvtzs z0.h, p0/m, z1.h +; CHECK-NEXT: ret +; +; CHECK-2p2-LABEL: test_fcvtzs_i16_f16_ptrue_u: +; CHECK-2p2: // %bb.0: // %entry +; CHECK-2p2-NEXT: ptrue p0.h +; CHECK-2p2-NEXT: fcvtzs z0.h, p0/z, z1.h +; CHECK-2p2-NEXT: ret +entry: + %pg = call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) + %0 = tail call @llvm.aarch64.sve.fcvtzs.nxv8i16.nxv8f16( poison, %pg, %x) + ret %0 +} + +define @test_fcvtzs_i16_f16_ptrue(double %z0, %x, %y) { +; CHECK-LABEL: test_fcvtzs_i16_f16_ptrue: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ptrue p0.h +; CHECK-NEXT: movprfx z0, z2 +; CHECK-NEXT: fcvtzs z0.h, p0/m, z2.h +; CHECK-NEXT: ret +; +; CHECK-2p2-LABEL: test_fcvtzs_i16_f16_ptrue: +; CHECK-2p2: // %bb.0: // %entry +; CHECK-2p2-NEXT: ptrue p0.h +; CHECK-2p2-NEXT: fcvtzs z0.h, p0/z, z2.h +; CHECK-2p2-NEXT: ret +entry: + %pg = call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) + %0 = tail call @llvm.aarch64.sve.fcvtzs.nxv8i16.nxv8f16( %x, %pg, %y) + ret %0 +} + +define @test_fcvtzu_i16_f16_ptrue_u(double %z0, %x) { +; CHECK-LABEL: test_fcvtzu_i16_f16_ptrue_u: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ptrue p0.h +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: fcvtzu z0.h, p0/m, z1.h +; CHECK-NEXT: ret +; +; CHECK-2p2-LABEL: test_fcvtzu_i16_f16_ptrue_u: +; CHECK-2p2: // %bb.0: // %entry +; CHECK-2p2-NEXT: ptrue p0.h +; CHECK-2p2-NEXT: fcvtzu z0.h, p0/z, z1.h +; CHECK-2p2-NEXT: ret +entry: + %pg = call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) + %0 = tail call @llvm.aarch64.sve.fcvtzu.nxv8i16.nxv8f16( poison, %pg, %x) + ret %0 +} + +define @test_fcvtzu_i16_f16_ptrue(double %z0, %x, %y) { +; CHECK-LABEL: test_fcvtzu_i16_f16_ptrue: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ptrue p0.h +; CHECK-NEXT: movprfx z0, z2 +; CHECK-NEXT: fcvtzu z0.h, p0/m, z2.h +; CHECK-NEXT: ret +; +; CHECK-2p2-LABEL: test_fcvtzu_i16_f16_ptrue: +; CHECK-2p2: // %bb.0: // %entry +; CHECK-2p2-NEXT: ptrue p0.h +; CHECK-2p2-NEXT: fcvtzu z0.h, p0/z, z2.h +; CHECK-2p2-NEXT: ret +entry: + %pg = call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) + %0 = tail call @llvm.aarch64.sve.fcvtzu.nxv8i16.nxv8f16( %x, %pg, %y) + ret %0 +} + +define @test_fcvtzs_i32_f32_ptrue_u(double %z0, %x) { +; CHECK-LABEL: test_fcvtzs_i32_f32_ptrue_u: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: fcvtzs z0.s, p0/m, z1.s +; CHECK-NEXT: ret +; +; CHECK-2p2-LABEL: test_fcvtzs_i32_f32_ptrue_u: +; CHECK-2p2: // %bb.0: // %entry +; CHECK-2p2-NEXT: ptrue p0.s +; CHECK-2p2-NEXT: fcvtzs z0.s, p0/z, z1.s +; CHECK-2p2-NEXT: ret +entry: + %pg = call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) + %0 = tail call @llvm.aarch64.sve.fcvtzs.nxv4i32.nxv4f32( poison, %pg, %x) + ret %0 +} + +define @test_fcvtzs_i32_f32_ptrue(double %z0, %x, %y) { +; CHECK-LABEL: test_fcvtzs_i32_f32_ptrue: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: movprfx z0, z2 +; CHECK-NEXT: fcvtzs z0.s, p0/m, z2.s +; CHECK-NEXT: ret +; +; CHECK-2p2-LABEL: test_fcvtzs_i32_f32_ptrue: +; CHECK-2p2: // %bb.0: // %entry +; CHECK-2p2-NEXT: ptrue p0.s +; CHECK-2p2-NEXT: fcvtzs z0.s, p0/z, z2.s +; CHECK-2p2-NEXT: ret +entry: + %pg = call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) + %0 = tail call @llvm.aarch64.sve.fcvtzs.nxv4i32.nxv4f32( %x, %pg, %y) + ret %0 +} + +define @test_fcvtzu_i32_f32_ptrue_u(double %z0, %x) { +; CHECK-LABEL: test_fcvtzu_i32_f32_ptrue_u: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: fcvtzu z0.s, p0/m, z1.s +; CHECK-NEXT: ret +; +; CHECK-2p2-LABEL: test_fcvtzu_i32_f32_ptrue_u: +; CHECK-2p2: // %bb.0: // %entry +; CHECK-2p2-NEXT: ptrue p0.s +; CHECK-2p2-NEXT: fcvtzu z0.s, p0/z, z1.s +; CHECK-2p2-NEXT: ret +entry: + %pg = call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) + %0 = tail call @llvm.aarch64.sve.fcvtzu.nxv4i32.nxv4f32( poison, %pg, %x) + ret %0 +} + +define @test_fcvtzu_i32_f32_ptrue(double %z0, %x, %y) { +; CHECK-LABEL: test_fcvtzu_i32_f32_ptrue: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: movprfx z0, z2 +; CHECK-NEXT: fcvtzu z0.s, p0/m, z2.s +; CHECK-NEXT: ret +; +; CHECK-2p2-LABEL: test_fcvtzu_i32_f32_ptrue: +; CHECK-2p2: // %bb.0: // %entry +; CHECK-2p2-NEXT: ptrue p0.s +; CHECK-2p2-NEXT: fcvtzu z0.s, p0/z, z2.s +; CHECK-2p2-NEXT: ret +entry: + %pg = call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) + %0 = tail call @llvm.aarch64.sve.fcvtzu.nxv4i32.nxv4f32( %x, %pg, %y) + ret %0 +} + +define @test_fcvtzs_i64_f64_ptrue_u(double %z0, %x) { +; CHECK-LABEL: test_fcvtzs_i64_f64_ptrue_u: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: fcvtzs z0.d, p0/m, z1.d +; CHECK-NEXT: ret +; +; CHECK-2p2-LABEL: test_fcvtzs_i64_f64_ptrue_u: +; CHECK-2p2: // %bb.0: // %entry +; CHECK-2p2-NEXT: ptrue p0.d +; CHECK-2p2-NEXT: fcvtzs z0.d, p0/z, z1.d +; CHECK-2p2-NEXT: ret +entry: + %pg = call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) + %0 = tail call @llvm.aarch64.sve.fcvtzs.nxv2i64.nxv2f64( poison, %pg, %x) + ret %0 +} + +define @test_fcvtzs_i64_f64_ptrue(double %z0, %x, %y) { +; CHECK-LABEL: test_fcvtzs_i64_f64_ptrue: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: movprfx z0, z2 +; CHECK-NEXT: fcvtzs z0.d, p0/m, z2.d +; CHECK-NEXT: ret +; +; CHECK-2p2-LABEL: test_fcvtzs_i64_f64_ptrue: +; CHECK-2p2: // %bb.0: // %entry +; CHECK-2p2-NEXT: ptrue p0.d +; CHECK-2p2-NEXT: fcvtzs z0.d, p0/z, z2.d +; CHECK-2p2-NEXT: ret +entry: + %pg = call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) + %0 = tail call @llvm.aarch64.sve.fcvtzs.nxv2i64.nxv2f64( %x, %pg, %y) + ret %0 +} + +define @test_fcvtzu_i64_f64_ptrue_u(double %z0, %x) { +; CHECK-LABEL: test_fcvtzu_i64_f64_ptrue_u: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: fcvtzu z0.d, p0/m, z1.d +; CHECK-NEXT: ret +; +; CHECK-2p2-LABEL: test_fcvtzu_i64_f64_ptrue_u: +; CHECK-2p2: // %bb.0: // %entry +; CHECK-2p2-NEXT: ptrue p0.d +; CHECK-2p2-NEXT: fcvtzu z0.d, p0/z, z1.d +; CHECK-2p2-NEXT: ret +entry: + %pg = call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) + %0 = tail call @llvm.aarch64.sve.fcvtzu.nxv2i64.nxv2f64( poison, %pg, %x) + ret %0 +} + +define @test_fcvtzu_i64_f64_ptrue(double %z0, %x, %y) { +; CHECK-LABEL: test_fcvtzu_i64_f64_ptrue: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: movprfx z0, z2 +; CHECK-NEXT: fcvtzu z0.d, p0/m, z2.d +; CHECK-NEXT: ret +; +; CHECK-2p2-LABEL: test_fcvtzu_i64_f64_ptrue: +; CHECK-2p2: // %bb.0: // %entry +; CHECK-2p2-NEXT: ptrue p0.d +; CHECK-2p2-NEXT: fcvtzu z0.d, p0/z, z2.d +; CHECK-2p2-NEXT: ret +entry: + %pg = call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) + %0 = tail call @llvm.aarch64.sve.fcvtzu.nxv2i64.nxv2f64( %x, %pg, %y) + ret %0 +}