diff options
-rw-r--r-- | include/llvm/IR/IntrinsicsAArch64.td | 3 | ||||
-rw-r--r-- | lib/Target/AArch64/AArch64InstrNEON.td | 44 | ||||
-rw-r--r-- | test/CodeGen/AArch64/neon-scalar-recip.ll | 69 | ||||
-rw-r--r-- | test/MC/AArch64/neon-diagnostics.s | 41 | ||||
-rw-r--r-- | test/MC/AArch64/neon-scalar-recip.s | 30 | ||||
-rw-r--r-- | test/MC/Disassembler/AArch64/neon-instructions.txt | 24 |
6 files changed, 201 insertions, 10 deletions
diff --git a/include/llvm/IR/IntrinsicsAArch64.td b/include/llvm/IR/IntrinsicsAArch64.td index 52810be..8fcef7f 100644 --- a/include/llvm/IR/IntrinsicsAArch64.td +++ b/include/llvm/IR/IntrinsicsAArch64.td @@ -164,4 +164,7 @@ def int_aarch64_neon_vcvtf32_u32 : Intrinsic<[llvm_v1f32_ty], [llvm_v1i32_ty], [IntrNoMem]>; def int_aarch64_neon_vcvtf64_u64 : Intrinsic<[llvm_v1f64_ty], [llvm_v1i64_ty], [IntrNoMem]>; + +// Scalar Floating-point Reciprocal Exponent +def int_aarch64_neon_vrecpx : Neon_1Arg_Intrinsic; } diff --git a/lib/Target/AArch64/AArch64InstrNEON.td b/lib/Target/AArch64/AArch64InstrNEON.td index b627171..a9f6061 100644 --- a/lib/Target/AArch64/AArch64InstrNEON.td +++ b/lib/Target/AArch64/AArch64InstrNEON.td @@ -3106,16 +3106,25 @@ multiclass NeonI_Scalar2SameMisc_SD_size<bit u, bit size_high, bits<5> opcode, [], NoItinerary>; } -multiclass Neon_Scalar2SameMisc_SD_size_patterns<SDPatternOperator Sopnode, - SDPatternOperator Dopnode, - Instruction INSTS, - Instruction INSTD> { +multiclass Neon_Scalar2SameMisc_cvt_SD_size_patterns<SDPatternOperator Sopnode, + SDPatternOperator Dopnode, + Instruction INSTS, + Instruction INSTD> { def : Pat<(v1f32 (Sopnode (v1i32 FPR32:$Rn))), (INSTS FPR32:$Rn)>; def : Pat<(v1f64 (Dopnode (v1i64 FPR64:$Rn))), (INSTD FPR64:$Rn)>; } +multiclass Neon_Scalar2SameMisc_SD_size_patterns<SDPatternOperator opnode, + Instruction INSTS, + Instruction INSTD> { + def : Pat<(v1f32 (opnode (v1f32 FPR32:$Rn))), + (INSTS FPR32:$Rn)>; + def : Pat<(v1f64 (opnode (v1f64 FPR64:$Rn))), + (INSTD FPR64:$Rn)>; +} + // Scalar Integer Add let isCommutable = 1 in { def ADDddd : NeonI_Scalar3Same_D_size<0b0, 0b10000, "add">; @@ -3258,15 +3267,30 @@ defm : Neon_Scalar3Same_D_size_patterns<int_arm_neon_vqrshiftu, UQRSHLddd>; // Scalar Signed Integer Convert To Floating-point defm SCVTF : NeonI_Scalar2SameMisc_SD_size<0b0, 0b0, 0b11101, "scvtf">; -defm : Neon_Scalar2SameMisc_SD_size_patterns<int_aarch64_neon_vcvtf32_s32, - int_aarch64_neon_vcvtf64_s64, - SCVTFss, SCVTFdd>; +defm : Neon_Scalar2SameMisc_cvt_SD_size_patterns<int_aarch64_neon_vcvtf32_s32, + int_aarch64_neon_vcvtf64_s64, + SCVTFss, SCVTFdd>; // Scalar Unsigned Integer Convert To Floating-point defm UCVTF : NeonI_Scalar2SameMisc_SD_size<0b1, 0b0, 0b11101, "ucvtf">; -defm : Neon_Scalar2SameMisc_SD_size_patterns<int_aarch64_neon_vcvtf32_u32, - int_aarch64_neon_vcvtf64_u64, - UCVTFss, UCVTFdd>; +defm : Neon_Scalar2SameMisc_cvt_SD_size_patterns<int_aarch64_neon_vcvtf32_u32, + int_aarch64_neon_vcvtf64_u64, + UCVTFss, UCVTFdd>; + +// Scalar Floating-point Reciprocal Estimate +defm FRECPE : NeonI_Scalar2SameMisc_SD_size<0b0, 0b1, 0b11101, "frecpe">; +defm : Neon_Scalar2SameMisc_SD_size_patterns<int_arm_neon_vrecpe, + FRECPEss, FRECPEdd>; + +// Scalar Floating-point Reciprocal Exponent +defm FRECPX : NeonI_Scalar2SameMisc_SD_size<0b0, 0b1, 0b11111, "frecpx">; +defm : Neon_Scalar2SameMisc_SD_size_patterns<int_aarch64_neon_vrecpx, + FRECPXss, FRECPXdd>; + +// Scalar Floating-point Reciprocal Square Root Estimate +defm FRSQRTE: NeonI_Scalar2SameMisc_SD_size<0b1, 0b1, 0b11101, "frsqrte">; +defm : Neon_Scalar2SameMisc_SD_size_patterns<int_arm_neon_vrsqrte, + FRSQRTEss, FRSQRTEdd>; // Scalar Reduce Pairwise diff --git a/test/CodeGen/AArch64/neon-scalar-recip.ll b/test/CodeGen/AArch64/neon-scalar-recip.ll index 91ee12c..f21c27b 100644 --- a/test/CodeGen/AArch64/neon-scalar-recip.ll +++ b/test/CodeGen/AArch64/neon-scalar-recip.ll @@ -45,3 +45,72 @@ define double @test_vrsqrtsd_f64(double %a, double %b) { declare <1 x float> @llvm.arm.neon.vrsqrts.v1f32(<1 x float>, <1 x float>) declare <1 x double> @llvm.arm.neon.vrsqrts.v1f64(<1 x double>, <1 x double>) + +define float @test_vrecpes_f32(float %a) { +; CHECK: test_vrecpes_f32 +; CHECK: frecpe {{s[0-9]+}}, {{s[0-9]+}} +entry: + %vrecpe.i = insertelement <1 x float> undef, float %a, i32 0 + %vrecpe1.i = tail call <1 x float> @llvm.arm.neon.vrecpe.v1f32(<1 x float> %vrecpe.i) + %0 = extractelement <1 x float> %vrecpe1.i, i32 0 + ret float %0 +} + +define double @test_vrecped_f64(double %a) { +; CHECK: test_vrecped_f64 +; CHECK: frecpe {{d[0-9]+}}, {{d[0-9]+}} +entry: + %vrecpe.i = insertelement <1 x double> undef, double %a, i32 0 + %vrecpe1.i = tail call <1 x double> @llvm.arm.neon.vrecpe.v1f64(<1 x double> %vrecpe.i) + %0 = extractelement <1 x double> %vrecpe1.i, i32 0 + ret double %0 +} + +declare <1 x float> @llvm.arm.neon.vrecpe.v1f32(<1 x float>) +declare <1 x double> @llvm.arm.neon.vrecpe.v1f64(<1 x double>) + +define float @test_vrecpxs_f32(float %a) { +; CHECK: test_vrecpxs_f32 +; CHECK: frecpx {{s[0-9]+}}, {{s[0-9]+}} +entry: + %vrecpx.i = insertelement <1 x float> undef, float %a, i32 0 + %vrecpx1.i = tail call <1 x float> @llvm.aarch64.neon.vrecpx.v1f32(<1 x float> %vrecpx.i) + %0 = extractelement <1 x float> %vrecpx1.i, i32 0 + ret float %0 +} + +define double @test_vrecpxd_f64(double %a) { +; CHECK: test_vrecpxd_f64 +; CHECK: frecpx {{d[0-9]+}}, {{d[0-9]+}} +entry: + %vrecpx.i = insertelement <1 x double> undef, double %a, i32 0 + %vrecpx1.i = tail call <1 x double> @llvm.aarch64.neon.vrecpx.v1f64(<1 x double> %vrecpx.i) + %0 = extractelement <1 x double> %vrecpx1.i, i32 0 + ret double %0 +} + +declare <1 x float> @llvm.aarch64.neon.vrecpx.v1f32(<1 x float>) +declare <1 x double> @llvm.aarch64.neon.vrecpx.v1f64(<1 x double>) + +define float @test_vrsqrtes_f32(float %a) { +; CHECK: test_vrsqrtes_f32 +; CHECK: frsqrte {{s[0-9]+}}, {{s[0-9]+}} +entry: + %vrsqrte.i = insertelement <1 x float> undef, float %a, i32 0 + %vrsqrte1.i = tail call <1 x float> @llvm.arm.neon.vrsqrte.v1f32(<1 x float> %vrsqrte.i) + %0 = extractelement <1 x float> %vrsqrte1.i, i32 0 + ret float %0 +} + +define double @test_vrsqrted_f64(double %a) { +; CHECK: test_vrsqrted_f64 +; CHECK: frsqrte {{d[0-9]+}}, {{d[0-9]+}} +entry: + %vrsqrte.i = insertelement <1 x double> undef, double %a, i32 0 + %vrsqrte1.i = tail call <1 x double> @llvm.arm.neon.vrsqrte.v1f64(<1 x double> %vrsqrte.i) + %0 = extractelement <1 x double> %vrsqrte1.i, i32 0 + ret double %0 +} + +declare <1 x float> @llvm.arm.neon.vrsqrte.v1f32(<1 x float>) +declare <1 x double> @llvm.arm.neon.vrsqrte.v1f64(<1 x double>) diff --git a/test/MC/AArch64/neon-diagnostics.s b/test/MC/AArch64/neon-diagnostics.s index be5a871..9127ed8 100644 --- a/test/MC/AArch64/neon-diagnostics.s +++ b/test/MC/AArch64/neon-diagnostics.s @@ -213,6 +213,47 @@ // CHECK-ERROR: movi v1.16b, #256 // CHECK-ERROR: ^ +//---------------------------------------------------------------------- +// Scalar Floating-point Reciprocal Estimate +//---------------------------------------------------------------------- + + frecpe s19, h14 + frecpe d13, s13 + +// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: frecpe s19, h14 +// CHECK-ERROR: ^ +// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: frecpe d13, s13 +// CHECK-ERROR: ^ + +//---------------------------------------------------------------------- +// Scalar Floating-point Reciprocal Exponent +//---------------------------------------------------------------------- + + frecpx s18, h10 + frecpx d16, s19 + +// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: frecpx s18, h10 +// CHECK-ERROR: ^ +// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: frecpx d16, s19 +// CHECK-ERROR: ^ + +//---------------------------------------------------------------------- +// Scalar Floating-point Reciprocal Square Root Estimate +//---------------------------------------------------------------------- + + frsqrte s22, h13 + frsqrte d21, s12 + +// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: frsqrte s22, h13 +// CHECK-ERROR: ^ +// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: frsqrte d21, s12 +// CHECK-ERROR: ^ //---------------------------------------------------------------------- // Vector Move Immediate - bytemask, per doubleword diff --git a/test/MC/AArch64/neon-scalar-recip.s b/test/MC/AArch64/neon-scalar-recip.s index bb9c170..7a886f3 100644 --- a/test/MC/AArch64/neon-scalar-recip.s +++ b/test/MC/AArch64/neon-scalar-recip.s @@ -21,3 +21,33 @@ // CHECK: frsqrts s21, s5, s12 // encoding: [0xb5,0xfc,0xac,0x5e] // CHECK: frsqrts d8, d22, d18 // encoding: [0xc8,0xfe,0xf2,0x5e] + +//---------------------------------------------------------------------- +// Scalar Floating-point Reciprocal Estimate +//---------------------------------------------------------------------- + + frecpe s19, s14 + frecpe d13, d13 + +// CHECK: frecpe s19, s14 // encoding: [0xd3,0xd9,0xa1,0x5e] +// CHECK: frecpe d13, d13 // encoding: [0xad,0xd9,0xe1,0x5e] + +//---------------------------------------------------------------------- +// Scalar Floating-point Reciprocal Exponent +//---------------------------------------------------------------------- + + frecpx s18, s10 + frecpx d16, d19 + +// CHECK: frecpx s18, s10 // encoding: [0x52,0xf9,0xa1,0x5e] +// CHECK: frecpx d16, d19 // encoding: [0x70,0xfa,0xe1,0x5e] + +//---------------------------------------------------------------------- +// Scalar Floating-point Reciprocal Square Root Estimate +//---------------------------------------------------------------------- + + frsqrte s22, s13 + frsqrte d21, d12 + +// CHECK: frsqrte s22, s13 // encoding: [0xb6,0xd9,0xa1,0x7e] +// CHECK: frsqrte d21, d12 // encoding: [0x95,0xd9,0xe1,0x7e] diff --git a/test/MC/Disassembler/AArch64/neon-instructions.txt b/test/MC/Disassembler/AArch64/neon-instructions.txt index 7d7d795..b05d3cb 100644 --- a/test/MC/Disassembler/AArch64/neon-instructions.txt +++ b/test/MC/Disassembler/AArch64/neon-instructions.txt @@ -1508,3 +1508,27 @@ # CHECK: ucvtf d21, d14 0xb6,0xd9,0x21,0x7e 0xd5,0xd9,0x61,0x7e + +#---------------------------------------------------------------------- +# Scalar Floating-point Reciprocal Estimate +#---------------------------------------------------------------------- +# CHECK: frecpe s19, s14 +# CHECK: frecpe d13, d13 +0xd3,0xd9,0xa1,0x5e +0xad,0xd9,0xe1,0x5e + +#---------------------------------------------------------------------- +# Scalar Floating-point Reciprocal Exponent +#---------------------------------------------------------------------- +# CHECK: frecpx s18, s10 +# CHECK: frecpx d16, d19 +0x52,0xf9,0xa1,0x5e +0x70,0xfa,0xe1,0x5e + +#---------------------------------------------------------------------- +# Scalar Floating-point Reciprocal Square Root Estimate +#---------------------------------------------------------------------- +# CHECK: frsqrte s22, s13 +# CHECK: frsqrte d21, d12 +0xb6,0xd9,0xa1,0x7e +0x95,0xd9,0xe1,0x7e |