diff options
-rw-r--r-- | include/llvm/IR/IntrinsicsAArch64.td | 13 | ||||
-rw-r--r-- | lib/Target/AArch64/AArch64InstrNEON.td | 25 | ||||
-rw-r--r-- | test/CodeGen/AArch64/neon-scalar-shift-imm.ll | 36 |
3 files changed, 38 insertions, 36 deletions
diff --git a/include/llvm/IR/IntrinsicsAArch64.td b/include/llvm/IR/IntrinsicsAArch64.td index 4d54a23..782fea2 100644 --- a/include/llvm/IR/IntrinsicsAArch64.td +++ b/include/llvm/IR/IntrinsicsAArch64.td @@ -232,10 +232,6 @@ class Neon_3Arg_ShiftImm_Intrinsic def int_aarch64_neon_vshrds_n : Neon_2Arg_ShiftImm_Intrinsic; def int_aarch64_neon_vshrdu_n : Neon_2Arg_ShiftImm_Intrinsic; -// Scalar Rounding Shift Right (Immediate) -def int_aarch64_neon_vrshrds_n : Neon_2Arg_ShiftImm_Intrinsic; -def int_aarch64_neon_vrshrdu_n : Neon_2Arg_ShiftImm_Intrinsic; - // Scalar Shift Right and Accumulate (Immediate) def int_aarch64_neon_vsrads_n : Neon_3Arg_ShiftImm_Intrinsic; def int_aarch64_neon_vsradu_n : Neon_3Arg_ShiftImm_Intrinsic; @@ -251,15 +247,6 @@ def int_aarch64_neon_vshld_n : Neon_2Arg_ShiftImm_Intrinsic; def int_aarch64_neon_vqshls_n : Neon_N2V_Intrinsic; def int_aarch64_neon_vqshlu_n : Neon_N2V_Intrinsic; -// Scalar Signed Saturating Shift Left Unsigned (Immediate) -def int_aarch64_neon_vqshlus_n : Neon_N2V_Intrinsic; - -// Shift Right And Insert (Immediate) -def int_aarch64_neon_vsrid_n : Neon_3Arg_ShiftImm_Intrinsic; - -// Shift Left And Insert (Immediate) -def int_aarch64_neon_vslid_n : Neon_3Arg_ShiftImm_Intrinsic; - // Scalar Signed Fixed-point Convert To Floating-Point (Immediate) def int_aarch64_neon_vcvtf32_n_s32 : Intrinsic<[llvm_v1f32_ty], [llvm_v1i32_ty, llvm_i32_ty], [IntrNoMem]>; diff --git a/lib/Target/AArch64/AArch64InstrNEON.td b/lib/Target/AArch64/AArch64InstrNEON.td index 83bb1fa..c1b43a8 100644 --- a/lib/Target/AArch64/AArch64InstrNEON.td +++ b/lib/Target/AArch64/AArch64InstrNEON.td @@ -4088,6 +4088,11 @@ multiclass Neon_ScalarShiftImm_D_size_patterns<SDPatternOperator opnode, (INSTD FPR64:$Rn, imm:$Imm)>; } +class Neon_ScalarShiftImm_arm_D_size_patterns<SDPatternOperator opnode, + Instruction INSTD> + : Pat<(v1i64 (opnode (v1i64 FPR64:$Rn), (v1i64 (Neon_vdup (i32 imm:$Imm))))), + (INSTD FPR64:$Rn, imm:$Imm)>; + multiclass Neon_ScalarShiftImm_BHSD_size_patterns<SDPatternOperator opnode, Instruction INSTB, Instruction INSTH, @@ -4143,18 +4148,22 @@ multiclass Neon_ScalarShiftImm_fcvts_SD_size_patterns<SDPatternOperator Sopnode, // Scalar Signed Shift Right (Immediate) defm SSHR : NeonI_ScalarShiftRightImm_D_size<0b0, 0b00000, "sshr">; defm : Neon_ScalarShiftImm_D_size_patterns<int_aarch64_neon_vshrds_n, SSHRddi>; +// Pattern to match llvm.arm.* intrinsic. +def : Neon_ScalarShiftImm_arm_D_size_patterns<sra, SSHRddi>; // Scalar Unsigned Shift Right (Immediate) defm USHR : NeonI_ScalarShiftRightImm_D_size<0b1, 0b00000, "ushr">; defm : Neon_ScalarShiftImm_D_size_patterns<int_aarch64_neon_vshrdu_n, USHRddi>; +// Pattern to match llvm.arm.* intrinsic. +def : Neon_ScalarShiftImm_arm_D_size_patterns<srl, USHRddi>; // Scalar Signed Rounding Shift Right (Immediate) defm SRSHR : NeonI_ScalarShiftRightImm_D_size<0b0, 0b00100, "srshr">; -defm : Neon_ScalarShiftImm_D_size_patterns<int_aarch64_neon_vrshrds_n, SRSHRddi>; +defm : Neon_ScalarShiftImm_D_size_patterns<int_aarch64_neon_vsrshr, SRSHRddi>; // Scalar Unigned Rounding Shift Right (Immediate) defm URSHR : NeonI_ScalarShiftRightImm_D_size<0b1, 0b00100, "urshr">; -defm : Neon_ScalarShiftImm_D_size_patterns<int_aarch64_neon_vrshrdu_n, URSHRddi>; +defm : Neon_ScalarShiftImm_D_size_patterns<int_aarch64_neon_vurshr, URSHRddi>; // Scalar Signed Shift Right and Accumulate (Immediate) def SSRA : NeonI_ScalarShiftRightImm_accum_D_size<0b0, 0b00010, "ssra">; @@ -4175,32 +4184,38 @@ def : Neon_ScalarShiftImm_accum_D_size_patterns<int_aarch64_neon_vrsradu_n, URSR // Scalar Shift Left (Immediate) defm SHL : NeonI_ScalarShiftLeftImm_D_size<0b0, 0b01010, "shl">; defm : Neon_ScalarShiftImm_D_size_patterns<int_aarch64_neon_vshld_n, SHLddi>; +// Pattern to match llvm.arm.* intrinsic. +def : Neon_ScalarShiftImm_arm_D_size_patterns<shl, SHLddi>; // Signed Saturating Shift Left (Immediate) defm SQSHL : NeonI_ScalarShiftLeftImm_BHSD_size<0b0, 0b01110, "sqshl">; defm : Neon_ScalarShiftImm_BHSD_size_patterns<int_aarch64_neon_vqshls_n, SQSHLbbi, SQSHLhhi, SQSHLssi, SQSHLddi>; +// Pattern to match llvm.arm.* intrinsic. +defm : Neon_ScalarShiftImm_D_size_patterns<Neon_sqrshlImm, SQSHLddi>; // Unsigned Saturating Shift Left (Immediate) defm UQSHL : NeonI_ScalarShiftLeftImm_BHSD_size<0b1, 0b01110, "uqshl">; defm : Neon_ScalarShiftImm_BHSD_size_patterns<int_aarch64_neon_vqshlu_n, UQSHLbbi, UQSHLhhi, UQSHLssi, UQSHLddi>; +// Pattern to match llvm.arm.* intrinsic. +defm : Neon_ScalarShiftImm_D_size_patterns<Neon_uqrshlImm, UQSHLddi>; // Signed Saturating Shift Left Unsigned (Immediate) defm SQSHLU : NeonI_ScalarShiftLeftImm_BHSD_size<0b1, 0b01100, "sqshlu">; -defm : Neon_ScalarShiftImm_BHSD_size_patterns<int_aarch64_neon_vqshlus_n, +defm : Neon_ScalarShiftImm_BHSD_size_patterns<int_aarch64_neon_vsqshlu, SQSHLUbbi, SQSHLUhhi, SQSHLUssi, SQSHLUddi>; // Shift Right And Insert (Immediate) def SRI : NeonI_ScalarShiftRightImm_accum_D_size<0b1, 0b01000, "sri">; -def : Neon_ScalarShiftImm_accum_D_size_patterns<int_aarch64_neon_vsrid_n, SRI>; +def : Neon_ScalarShiftImm_accum_D_size_patterns<int_aarch64_neon_vsri, SRI>; // Shift Left And Insert (Immediate) def SLI : NeonI_ScalarShiftLeftImm_accum_D_size<0b1, 0b01010, "sli">; -def : Neon_ScalarShiftImm_accum_D_size_patterns<int_aarch64_neon_vslid_n, SLI>; +def : Neon_ScalarShiftImm_accum_D_size_patterns<int_aarch64_neon_vsli, SLI>; // Signed Saturating Shift Right Narrow (Immediate) defm SQSHRN : NeonI_ScalarShiftImm_narrow_HSD_size<0b0, 0b10010, "sqshrn">; diff --git a/test/CodeGen/AArch64/neon-scalar-shift-imm.ll b/test/CodeGen/AArch64/neon-scalar-shift-imm.ll index 7e099a3..693db13 100644 --- a/test/CodeGen/AArch64/neon-scalar-shift-imm.ll +++ b/test/CodeGen/AArch64/neon-scalar-shift-imm.ll @@ -29,24 +29,24 @@ define i64 @test_vrshrd_n_s64(i64 %a) { ; CHECK: srshr {{d[0-9]+}}, {{d[0-9]+}}, #63 entry: %vsrshr = insertelement <1 x i64> undef, i64 %a, i32 0 - %vsrshr1 = call <1 x i64> @llvm.aarch64.neon.vrshrds.n(<1 x i64> %vsrshr, i32 63) + %vsrshr1 = call <1 x i64> @llvm.aarch64.neon.vsrshr.v1i64(<1 x i64> %vsrshr, i32 63) %0 = extractelement <1 x i64> %vsrshr1, i32 0 ret i64 %0 } -declare <1 x i64> @llvm.aarch64.neon.vrshrds.n(<1 x i64>, i32) +declare <1 x i64> @llvm.aarch64.neon.vsrshr.v1i64(<1 x i64>, i32) define i64 @test_vrshrd_n_u64(i64 %a) { ; CHECK: test_vrshrd_n_u64 ; CHECK: urshr {{d[0-9]+}}, {{d[0-9]+}}, #63 entry: %vurshr = insertelement <1 x i64> undef, i64 %a, i32 0 - %vurshr1 = call <1 x i64> @llvm.aarch64.neon.vrshrdu.n(<1 x i64> %vurshr, i32 63) + %vurshr1 = call <1 x i64> @llvm.aarch64.neon.vurshr.v1i64(<1 x i64> %vurshr, i32 63) %0 = extractelement <1 x i64> %vurshr1, i32 0 ret i64 %0 } -declare <1 x i64> @llvm.aarch64.neon.vrshrdu.n(<1 x i64>, i32) +declare <1 x i64> @llvm.aarch64.neon.vurshr.v1i64(<1 x i64>, i32) define i64 @test_vsrad_n_s64(i64 %a, i64 %b) { ; CHECK: test_vsrad_n_s64 @@ -223,48 +223,48 @@ define i8 @test_vqshlub_n_s8(i8 %a) { ; CHECK: sqshlu {{b[0-9]+}}, {{b[0-9]+}}, #7 entry: %vsqshlu = insertelement <1 x i8> undef, i8 %a, i32 0 - %vsqshlu1 = call <1 x i8> @llvm.aarch64.neon.vqshlus.n.v1i8(<1 x i8> %vsqshlu, i32 7) + %vsqshlu1 = call <1 x i8> @llvm.aarch64.neon.vsqshlu.v1i8(<1 x i8> %vsqshlu, i32 7) %0 = extractelement <1 x i8> %vsqshlu1, i32 0 ret i8 %0 } -declare <1 x i8> @llvm.aarch64.neon.vqshlus.n.v1i8(<1 x i8>, i32) +declare <1 x i8> @llvm.aarch64.neon.vsqshlu.v1i8(<1 x i8>, i32) define i16 @test_vqshluh_n_s16(i16 %a) { ; CHECK: test_vqshluh_n_s16 ; CHECK: sqshlu {{h[0-9]+}}, {{h[0-9]+}}, #15 entry: %vsqshlu = insertelement <1 x i16> undef, i16 %a, i32 0 - %vsqshlu1 = call <1 x i16> @llvm.aarch64.neon.vqshlus.n.v1i16(<1 x i16> %vsqshlu, i32 15) + %vsqshlu1 = call <1 x i16> @llvm.aarch64.neon.vsqshlu.v1i16(<1 x i16> %vsqshlu, i32 15) %0 = extractelement <1 x i16> %vsqshlu1, i32 0 ret i16 %0 } -declare <1 x i16> @llvm.aarch64.neon.vqshlus.n.v1i16(<1 x i16>, i32) +declare <1 x i16> @llvm.aarch64.neon.vsqshlu.v1i16(<1 x i16>, i32) define i32 @test_vqshlus_n_s32(i32 %a) { ; CHECK: test_vqshlus_n_s32 ; CHECK: sqshlu {{s[0-9]+}}, {{s[0-9]+}}, #31 entry: %vsqshlu = insertelement <1 x i32> undef, i32 %a, i32 0 - %vsqshlu1 = call <1 x i32> @llvm.aarch64.neon.vqshlus.n.v1i32(<1 x i32> %vsqshlu, i32 31) + %vsqshlu1 = call <1 x i32> @llvm.aarch64.neon.vsqshlu.v1i32(<1 x i32> %vsqshlu, i32 31) %0 = extractelement <1 x i32> %vsqshlu1, i32 0 ret i32 %0 } -declare <1 x i32> @llvm.aarch64.neon.vqshlus.n.v1i32(<1 x i32>, i32) +declare <1 x i32> @llvm.aarch64.neon.vsqshlu.v1i32(<1 x i32>, i32) define i64 @test_vqshlud_n_s64(i64 %a) { ; CHECK: test_vqshlud_n_s64 ; CHECK: sqshlu {{d[0-9]+}}, {{d[0-9]+}}, #63 entry: %vsqshlu = insertelement <1 x i64> undef, i64 %a, i32 0 - %vsqshlu1 = call <1 x i64> @llvm.aarch64.neon.vqshlus.n.v1i64(<1 x i64> %vsqshlu, i32 63) + %vsqshlu1 = call <1 x i64> @llvm.aarch64.neon.vsqshlu.v1i64(<1 x i64> %vsqshlu, i32 63) %0 = extractelement <1 x i64> %vsqshlu1, i32 0 ret i64 %0 } -declare <1 x i64> @llvm.aarch64.neon.vqshlus.n.v1i64(<1 x i64>, i32) +declare <1 x i64> @llvm.aarch64.neon.vsqshlu.v1i64(<1 x i64>, i32) define i64 @test_vsrid_n_s64(i64 %a, i64 %b) { ; CHECK: test_vsrid_n_s64 @@ -272,12 +272,12 @@ define i64 @test_vsrid_n_s64(i64 %a, i64 %b) { entry: %vsri = insertelement <1 x i64> undef, i64 %a, i32 0 %vsri1 = insertelement <1 x i64> undef, i64 %b, i32 0 - %vsri2 = call <1 x i64> @llvm.aarch64.neon.vsrid.n(<1 x i64> %vsri, <1 x i64> %vsri1, i32 63) + %vsri2 = call <1 x i64> @llvm.aarch64.neon.vsri.v1i64(<1 x i64> %vsri, <1 x i64> %vsri1, i32 63) %0 = extractelement <1 x i64> %vsri2, i32 0 ret i64 %0 } -declare <1 x i64> @llvm.aarch64.neon.vsrid.n(<1 x i64>, <1 x i64>, i32) +declare <1 x i64> @llvm.aarch64.neon.vsri.v1i64(<1 x i64>, <1 x i64>, i32) define i64 @test_vsrid_n_u64(i64 %a, i64 %b) { ; CHECK: test_vsrid_n_u64 @@ -285,7 +285,7 @@ define i64 @test_vsrid_n_u64(i64 %a, i64 %b) { entry: %vsri = insertelement <1 x i64> undef, i64 %a, i32 0 %vsri1 = insertelement <1 x i64> undef, i64 %b, i32 0 - %vsri2 = call <1 x i64> @llvm.aarch64.neon.vsrid.n(<1 x i64> %vsri, <1 x i64> %vsri1, i32 63) + %vsri2 = call <1 x i64> @llvm.aarch64.neon.vsri.v1i64(<1 x i64> %vsri, <1 x i64> %vsri1, i32 63) %0 = extractelement <1 x i64> %vsri2, i32 0 ret i64 %0 } @@ -296,12 +296,12 @@ define i64 @test_vslid_n_s64(i64 %a, i64 %b) { entry: %vsli = insertelement <1 x i64> undef, i64 %a, i32 0 %vsli1 = insertelement <1 x i64> undef, i64 %b, i32 0 - %vsli2 = call <1 x i64> @llvm.aarch64.neon.vslid.n(<1 x i64> %vsli, <1 x i64> %vsli1, i32 63) + %vsli2 = call <1 x i64> @llvm.aarch64.neon.vsli.v1i64(<1 x i64> %vsli, <1 x i64> %vsli1, i32 63) %0 = extractelement <1 x i64> %vsli2, i32 0 ret i64 %0 } -declare <1 x i64> @llvm.aarch64.neon.vslid.n(<1 x i64>, <1 x i64>, i32) +declare <1 x i64> @llvm.aarch64.neon.vsli.v1i64(<1 x i64>, <1 x i64>, i32) define i64 @test_vslid_n_u64(i64 %a, i64 %b) { ; CHECK: test_vslid_n_u64 @@ -309,7 +309,7 @@ define i64 @test_vslid_n_u64(i64 %a, i64 %b) { entry: %vsli = insertelement <1 x i64> undef, i64 %a, i32 0 %vsli1 = insertelement <1 x i64> undef, i64 %b, i32 0 - %vsli2 = call <1 x i64> @llvm.aarch64.neon.vslid.n(<1 x i64> %vsli, <1 x i64> %vsli1, i32 63) + %vsli2 = call <1 x i64> @llvm.aarch64.neon.vsli.v1i64(<1 x i64> %vsli, <1 x i64> %vsli1, i32 63) %0 = extractelement <1 x i64> %vsli2, i32 0 ret i64 %0 } |