diff options
-rw-r--r-- | lib/Target/AArch64/AArch64ISelLowering.cpp | 3 | ||||
-rw-r--r-- | lib/Target/AArch64/AArch64InstrNEON.td | 182 | ||||
-rw-r--r-- | test/CodeGen/AArch64/neon-scalar-mul.ll | 70 | ||||
-rw-r--r-- | test/CodeGen/AArch64/neon-scalar-recip.ll | 47 | ||||
-rw-r--r-- | test/MC/AArch64/neon-diagnostics.s | 68 | ||||
-rw-r--r-- | test/MC/AArch64/neon-scalar-mul.s | 33 | ||||
-rw-r--r-- | test/MC/AArch64/neon-scalar-recip.s | 23 | ||||
-rw-r--r-- | test/MC/Disassembler/AArch64/neon-instructions.txt | 40 |
8 files changed, 422 insertions, 44 deletions
diff --git a/lib/Target/AArch64/AArch64ISelLowering.cpp b/lib/Target/AArch64/AArch64ISelLowering.cpp index b19731c..d70548a 100644 --- a/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -318,9 +318,12 @@ AArch64TargetLowering::AArch64TargetLowering(AArch64TargetMachine &TM) setOperationAction(ISD::SETCC, MVT::v8i16, Custom); setOperationAction(ISD::SETCC, MVT::v2i32, Custom); setOperationAction(ISD::SETCC, MVT::v4i32, Custom); + setOperationAction(ISD::SETCC, MVT::v1i64, Custom); setOperationAction(ISD::SETCC, MVT::v2i64, Custom); + setOperationAction(ISD::SETCC, MVT::v1f32, Custom); setOperationAction(ISD::SETCC, MVT::v2f32, Custom); setOperationAction(ISD::SETCC, MVT::v4f32, Custom); + setOperationAction(ISD::SETCC, MVT::v1f64, Custom); setOperationAction(ISD::SETCC, MVT::v2f64, Custom); } } diff --git a/lib/Target/AArch64/AArch64InstrNEON.td b/lib/Target/AArch64/AArch64InstrNEON.td index c780f3a..e4c946b 100644 --- a/lib/Target/AArch64/AArch64InstrNEON.td +++ b/lib/Target/AArch64/AArch64InstrNEON.td @@ -2991,6 +2991,40 @@ class NeonI_Scalar3Same_D_size<bit u, bits<5> opcode, string asmop> [], NoItinerary>; +multiclass NeonI_Scalar3Same_HS_sizes<bit u, bits<5> opcode, + string asmop, bit Commutable = 0> +{ + let isCommutable = Commutable in { + def hhh : NeonI_Scalar3Same<u, 0b01, opcode, + (outs FPR16:$Rd), (ins FPR16:$Rn, FPR16:$Rm), + !strconcat(asmop, " $Rd, $Rn, $Rm"), + [], + NoItinerary>; + def sss : NeonI_Scalar3Same<u, 0b10, opcode, + (outs FPR32:$Rd), (ins FPR32:$Rn, FPR32:$Rm), + !strconcat(asmop, " $Rd, $Rn, $Rm"), + [], + NoItinerary>; + } +} + +multiclass NeonI_Scalar3Same_SD_sizes<bit u, bit size_high, bits<5> opcode, + string asmop, bit Commutable = 0> +{ + let isCommutable = Commutable in { + def sss : NeonI_Scalar3Same<u, {size_high, 0b0}, opcode, + (outs FPR32:$Rd), (ins FPR32:$Rn, FPR32:$Rm), + !strconcat(asmop, " $Rd, $Rn, $Rm"), + [], + NoItinerary>; + def ddd : NeonI_Scalar3Same<u, {size_high, 0b1}, opcode, + (outs FPR64:$Rd), (ins FPR64:$Rn, FPR64:$Rm), + !strconcat(asmop, " $Rd, $Rn, $Rm"), + [], + NoItinerary>; + } +} + multiclass NeonI_Scalar3Same_BHSD_sizes<bit u, bits<5> opcode, string asmop, bit Commutable = 0> { @@ -3018,16 +3052,18 @@ multiclass NeonI_Scalar3Same_BHSD_sizes<bit u, bits<5> opcode, } } -multiclass Neon_Scalar_D_size_patterns<SDPatternOperator opnode, - Instruction INSTD> { +multiclass Neon_Scalar3Same_D_size_patterns<SDPatternOperator opnode, + Instruction INSTD> { def : Pat<(v1i64 (opnode (v1i64 FPR64:$Rn), (v1i64 FPR64:$Rm))), (INSTD FPR64:$Rn, FPR64:$Rm)>; } -multiclass Neon_Scalar_BHSD_size_patterns<SDPatternOperator opnode, - Instruction INSTB, Instruction INSTH, - Instruction INSTS, Instruction INSTD> - : Neon_Scalar_D_size_patterns<opnode, INSTD> { +multiclass Neon_Scalar3Same_BHSD_size_patterns<SDPatternOperator opnode, + Instruction INSTB, + Instruction INSTH, + Instruction INSTS, + Instruction INSTD> + : Neon_Scalar3Same_D_size_patterns<opnode, INSTD> { def: Pat<(v1i8 (opnode (v1i8 FPR8:$Rn), (v1i8 FPR8:$Rm))), (INSTB FPR8:$Rn, FPR8:$Rm)>; @@ -3038,6 +3074,24 @@ multiclass Neon_Scalar_BHSD_size_patterns<SDPatternOperator opnode, (INSTS FPR32:$Rn, FPR32:$Rm)>; } +multiclass Neon_Scalar3Same_HS_size_patterns<SDPatternOperator opnode, + Instruction INSTH, + Instruction INSTS> { + def : Pat<(v1i16 (opnode (v1i16 FPR16:$Rn), (v1i16 FPR16:$Rm))), + (INSTH FPR16:$Rn, FPR16:$Rm)>; + def : Pat<(v1i32 (opnode (v1i32 FPR32:$Rn), (v1i32 FPR32:$Rm))), + (INSTS FPR32:$Rn, FPR32:$Rm)>; +} + +multiclass Neon_Scalar3Same_SD_size_patterns<SDPatternOperator opnode, + Instruction INSTS, + Instruction INSTD> { + def : Pat<(v1f32 (opnode (v1f32 FPR32:$Rn), (v1f32 FPR32:$Rm))), + (INSTS FPR32:$Rn, FPR32:$Rm)>; + def : Pat<(v1f64 (opnode (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))), + (INSTD FPR64:$Rn, FPR64:$Rm)>; +} + // Scalar Integer Add let isCommutable = 1 in { def ADDddd : NeonI_Scalar3Same_D_size<0b0, 0b10000, "add">; @@ -3047,14 +3101,14 @@ def ADDddd : NeonI_Scalar3Same_D_size<0b0, 0b10000, "add">; def SUBddd : NeonI_Scalar3Same_D_size<0b1, 0b10000, "sub">; // Pattern for Scalar Integer Add and Sub with D register only -defm : Neon_Scalar_D_size_patterns<add, ADDddd>; -defm : Neon_Scalar_D_size_patterns<sub, SUBddd>; +defm : Neon_Scalar3Same_D_size_patterns<add, ADDddd>; +defm : Neon_Scalar3Same_D_size_patterns<sub, SUBddd>; // Patterns to match llvm.aarch64.* intrinsic for Scalar Add, Sub -defm : Neon_Scalar_D_size_patterns<int_aarch64_neon_vaddds, ADDddd>; -defm : Neon_Scalar_D_size_patterns<int_aarch64_neon_vadddu, ADDddd>; -defm : Neon_Scalar_D_size_patterns<int_aarch64_neon_vsubds, SUBddd>; -defm : Neon_Scalar_D_size_patterns<int_aarch64_neon_vsubdu, SUBddd>; +defm : Neon_Scalar3Same_D_size_patterns<int_aarch64_neon_vaddds, ADDddd>; +defm : Neon_Scalar3Same_D_size_patterns<int_aarch64_neon_vadddu, ADDddd>; +defm : Neon_Scalar3Same_D_size_patterns<int_aarch64_neon_vsubds, SUBddd>; +defm : Neon_Scalar3Same_D_size_patterns<int_aarch64_neon_vsubdu, SUBddd>; // Scalar Integer Saturating Add (Signed, Unsigned) defm SQADD : NeonI_Scalar3Same_BHSD_sizes<0b0, 0b00001, "sqadd", 1>; @@ -3066,21 +3120,57 @@ defm UQSUB : NeonI_Scalar3Same_BHSD_sizes<0b1, 0b00101, "uqsub", 0>; // Patterns to match llvm.arm.* intrinsic for // Scalar Integer Saturating Add, Sub (Signed, Unsigned) -defm : Neon_Scalar_D_size_patterns<int_arm_neon_vqadds, SQADDddd>; -defm : Neon_Scalar_D_size_patterns<int_arm_neon_vqaddu, UQADDddd>; -defm : Neon_Scalar_D_size_patterns<int_arm_neon_vqsubs, SQSUBddd>; -defm : Neon_Scalar_D_size_patterns<int_arm_neon_vqsubu, UQSUBddd>; +defm : Neon_Scalar3Same_D_size_patterns<int_arm_neon_vqadds, SQADDddd>; +defm : Neon_Scalar3Same_D_size_patterns<int_arm_neon_vqaddu, UQADDddd>; +defm : Neon_Scalar3Same_D_size_patterns<int_arm_neon_vqsubs, SQSUBddd>; +defm : Neon_Scalar3Same_D_size_patterns<int_arm_neon_vqsubu, UQSUBddd>; // Patterns to match llvm.aarch64.* intrinsic for // Scalar Integer Saturating Add, Sub (Signed, Unsigned) -defm : Neon_Scalar_BHSD_size_patterns<int_aarch64_neon_vqadds, SQADDbbb, SQADDhhh, - SQADDsss, SQADDddd>; -defm : Neon_Scalar_BHSD_size_patterns<int_aarch64_neon_vqaddu, UQADDbbb, UQADDhhh, - UQADDsss, UQADDddd>; -defm : Neon_Scalar_BHSD_size_patterns<int_aarch64_neon_vqsubs, SQSUBbbb, SQSUBhhh, - SQSUBsss, SQSUBddd>; -defm : Neon_Scalar_BHSD_size_patterns<int_aarch64_neon_vqsubu, UQSUBbbb, UQSUBhhh, - UQSUBsss, UQSUBddd>; +defm : Neon_Scalar3Same_BHSD_size_patterns<int_aarch64_neon_vqadds, SQADDbbb, + SQADDhhh, SQADDsss, SQADDddd>; +defm : Neon_Scalar3Same_BHSD_size_patterns<int_aarch64_neon_vqaddu, UQADDbbb, + UQADDhhh, UQADDsss, UQADDddd>; +defm : Neon_Scalar3Same_BHSD_size_patterns<int_aarch64_neon_vqsubs, SQSUBbbb, + SQSUBhhh, SQSUBsss, SQSUBddd>; +defm : Neon_Scalar3Same_BHSD_size_patterns<int_aarch64_neon_vqsubu, UQSUBbbb, + UQSUBhhh, UQSUBsss, UQSUBddd>; + +// Scalar Integer Saturating Doubling Multiply Half High +defm SQDMULH : NeonI_Scalar3Same_HS_sizes<0b0, 0b10110, "sqdmulh", 1>; + +// Scalar Integer Saturating Rounding Doubling Multiply Half High +defm SQRDMULH : NeonI_Scalar3Same_HS_sizes<0b1, 0b10110, "sqrdmulh", 1>; + +// Patterns to match llvm.arm.* intrinsic for +// Scalar Integer Saturating Doubling Multiply Half High and +// Scalar Integer Saturating Rounding Doubling Multiply Half High +defm : Neon_Scalar3Same_HS_size_patterns<int_arm_neon_vqdmulh, SQDMULHhhh, + SQDMULHsss>; +defm : Neon_Scalar3Same_HS_size_patterns<int_arm_neon_vqrdmulh, SQRDMULHhhh, + SQRDMULHsss>; + +// Scalar Floating-point Multiply Extended +defm FMULX : NeonI_Scalar3Same_SD_sizes<0b0, 0b0, 0b11011, "fmulx", 1>; + +// Scalar Floating-point Reciprocal Step +defm FRECPS : NeonI_Scalar3Same_SD_sizes<0b0, 0b0, 0b11111, "frecps", 0>; + +// Scalar Floating-point Reciprocal Square Root Step +defm FRSQRTS : NeonI_Scalar3Same_SD_sizes<0b0, 0b1, 0b11111, "frsqrts", 0>; + +// Patterns to match llvm.arm.* intrinsic for +// Scalar Floating-point Reciprocal Step and +// Scalar Floating-point Reciprocal Square Root Step +defm : Neon_Scalar3Same_SD_size_patterns<int_arm_neon_vrecps, FRECPSsss, + FRECPSddd>; +defm : Neon_Scalar3Same_SD_size_patterns<int_arm_neon_vrsqrts, FRSQRTSsss, + FRSQRTSddd>; + +// Patterns to match llvm.aarch64.* intrinsic for +// Scalar Floating-point Multiply Extended, +defm : Neon_Scalar3Same_SD_size_patterns<int_aarch64_neon_vmulx, FMULXsss, + FMULXddd>; // Scalar Integer Shift Left (Signed, Unsigned) def SSHLddd : NeonI_Scalar3Same_D_size<0b0, 0b01000, "sshl">; @@ -3088,13 +3178,13 @@ def USHLddd : NeonI_Scalar3Same_D_size<0b1, 0b01000, "ushl">; // Patterns to match llvm.arm.* intrinsic for // Scalar Integer Shift Left (Signed, Unsigned) -defm : Neon_Scalar_D_size_patterns<int_arm_neon_vshifts, SSHLddd>; -defm : Neon_Scalar_D_size_patterns<int_arm_neon_vshiftu, USHLddd>; +defm : Neon_Scalar3Same_D_size_patterns<int_arm_neon_vshifts, SSHLddd>; +defm : Neon_Scalar3Same_D_size_patterns<int_arm_neon_vshiftu, USHLddd>; // Patterns to match llvm.aarch64.* intrinsic for // Scalar Integer Shift Left (Signed, Unsigned) -defm : Neon_Scalar_D_size_patterns<int_aarch64_neon_vshlds, SSHLddd>; -defm : Neon_Scalar_D_size_patterns<int_aarch64_neon_vshldu, USHLddd>; +defm : Neon_Scalar3Same_D_size_patterns<int_aarch64_neon_vshlds, SSHLddd>; +defm : Neon_Scalar3Same_D_size_patterns<int_aarch64_neon_vshldu, USHLddd>; // Scalar Integer Saturating Shift Left (Signed, Unsigned) defm SQSHL: NeonI_Scalar3Same_BHSD_sizes<0b0, 0b01001, "sqshl", 0>; @@ -3102,15 +3192,15 @@ defm UQSHL: NeonI_Scalar3Same_BHSD_sizes<0b1, 0b01001, "uqshl", 0>; // Patterns to match llvm.aarch64.* intrinsic for // Scalar Integer Saturating Shift Letf (Signed, Unsigned) -defm : Neon_Scalar_BHSD_size_patterns<int_aarch64_neon_vqshls, SQSHLbbb, SQSHLhhh, - SQSHLsss, SQSHLddd>; -defm : Neon_Scalar_BHSD_size_patterns<int_aarch64_neon_vqshlu, UQSHLbbb, UQSHLhhh, - UQSHLsss, UQSHLddd>; +defm : Neon_Scalar3Same_BHSD_size_patterns<int_aarch64_neon_vqshls, SQSHLbbb, + SQSHLhhh, SQSHLsss, SQSHLddd>; +defm : Neon_Scalar3Same_BHSD_size_patterns<int_aarch64_neon_vqshlu, UQSHLbbb, + UQSHLhhh, UQSHLsss, UQSHLddd>; // Patterns to match llvm.arm.* intrinsic for // Scalar Integer Saturating Shift Letf (Signed, Unsigned) -defm : Neon_Scalar_D_size_patterns<int_arm_neon_vqshifts, SQSHLddd>; -defm : Neon_Scalar_D_size_patterns<int_arm_neon_vqshiftu, UQSHLddd>; +defm : Neon_Scalar3Same_D_size_patterns<int_arm_neon_vqshifts, SQSHLddd>; +defm : Neon_Scalar3Same_D_size_patterns<int_arm_neon_vqshiftu, UQSHLddd>; // Scalar Integer Rounding Shift Left (Signed, Unsigned) def SRSHLddd: NeonI_Scalar3Same_D_size<0b0, 0b01010, "srshl">; @@ -3118,13 +3208,13 @@ def URSHLddd: NeonI_Scalar3Same_D_size<0b1, 0b01010, "urshl">; // Patterns to match llvm.aarch64.* intrinsic for // Scalar Integer Rounding Shift Left (Signed, Unsigned) -defm : Neon_Scalar_D_size_patterns<int_aarch64_neon_vrshlds, SRSHLddd>; -defm : Neon_Scalar_D_size_patterns<int_aarch64_neon_vrshldu, URSHLddd>; +defm : Neon_Scalar3Same_D_size_patterns<int_aarch64_neon_vrshlds, SRSHLddd>; +defm : Neon_Scalar3Same_D_size_patterns<int_aarch64_neon_vrshldu, URSHLddd>; // Patterns to match llvm.arm.* intrinsic for // Scalar Integer Rounding Shift Left (Signed, Unsigned) -defm : Neon_Scalar_D_size_patterns<int_arm_neon_vrshifts, SRSHLddd>; -defm : Neon_Scalar_D_size_patterns<int_arm_neon_vrshiftu, URSHLddd>; +defm : Neon_Scalar3Same_D_size_patterns<int_arm_neon_vrshifts, SRSHLddd>; +defm : Neon_Scalar3Same_D_size_patterns<int_arm_neon_vrshiftu, URSHLddd>; // Scalar Integer Saturating Rounding Shift Left (Signed, Unsigned) defm SQRSHL: NeonI_Scalar3Same_BHSD_sizes<0b0, 0b01011, "sqrshl", 0>; @@ -3132,15 +3222,15 @@ defm UQRSHL: NeonI_Scalar3Same_BHSD_sizes<0b1, 0b01011, "uqrshl", 0>; // Patterns to match llvm.aarch64.* intrinsic for // Scalar Integer Saturating Rounding Shift Left (Signed, Unsigned) -defm : Neon_Scalar_BHSD_size_patterns<int_aarch64_neon_vqrshls, SQRSHLbbb, SQRSHLhhh, - SQRSHLsss, SQRSHLddd>; -defm : Neon_Scalar_BHSD_size_patterns<int_aarch64_neon_vqrshlu, UQRSHLbbb, UQRSHLhhh, - UQRSHLsss, UQRSHLddd>; +defm : Neon_Scalar3Same_BHSD_size_patterns<int_aarch64_neon_vqrshls, SQRSHLbbb, + SQRSHLhhh, SQRSHLsss, SQRSHLddd>; +defm : Neon_Scalar3Same_BHSD_size_patterns<int_aarch64_neon_vqrshlu, UQRSHLbbb, + UQRSHLhhh, UQRSHLsss, UQRSHLddd>; // Patterns to match llvm.arm.* intrinsic for // Scalar Integer Saturating Rounding Shift Left (Signed, Unsigned) -defm : Neon_Scalar_D_size_patterns<int_arm_neon_vqrshifts, SQRSHLddd>; -defm : Neon_Scalar_D_size_patterns<int_arm_neon_vqrshiftu, UQRSHLddd>; +defm : Neon_Scalar3Same_D_size_patterns<int_arm_neon_vqrshifts, SQRSHLddd>; +defm : Neon_Scalar3Same_D_size_patterns<int_arm_neon_vqrshiftu, UQRSHLddd>; // Scalar Reduce Pairwise @@ -4507,3 +4597,7 @@ def : Pat<(v1i32 (scalar_to_vector GPR32:$src)), def : Pat<(v1i64 (scalar_to_vector GPR64:$src)), (FMOVdx $src)>; +def : Pat<(v1f32 (scalar_to_vector (f32 FPR32:$Rn))), + (v1f32 FPR32:$Rn)>; +def : Pat<(v1f64 (scalar_to_vector (f64 FPR64:$Rn))), + (v1f64 FPR64:$Rn)>;
\ No newline at end of file diff --git a/test/CodeGen/AArch64/neon-scalar-mul.ll b/test/CodeGen/AArch64/neon-scalar-mul.ll new file mode 100644 index 0000000..6914a08 --- /dev/null +++ b/test/CodeGen/AArch64/neon-scalar-mul.ll @@ -0,0 +1,70 @@ +; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=+neon < %s | FileCheck %s + +define i16 @test_vqdmulhh_s16(i16 %a, i16 %b) { +; CHECK: test_vqdmulhh_s16 +; CHECK: sqdmulh {{h[0-9]+}}, {{h[0-9]+}}, {{h[0-9]+}} + %1 = insertelement <1 x i16> undef, i16 %a, i32 0 + %2 = insertelement <1 x i16> undef, i16 %b, i32 0 + %3 = call <1 x i16> @llvm.arm.neon.vqdmulh.v1i16(<1 x i16> %1, <1 x i16> %2) + %4 = extractelement <1 x i16> %3, i32 0 + ret i16 %4 +} + +define i32 @test_vqdmulhs_s32(i32 %a, i32 %b) { +; CHECK: test_vqdmulhs_s32 +; CHECK: sqdmulh {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}} + %1 = insertelement <1 x i32> undef, i32 %a, i32 0 + %2 = insertelement <1 x i32> undef, i32 %b, i32 0 + %3 = call <1 x i32> @llvm.arm.neon.vqdmulh.v1i32(<1 x i32> %1, <1 x i32> %2) + %4 = extractelement <1 x i32> %3, i32 0 + ret i32 %4 +} + +declare <1 x i16> @llvm.arm.neon.vqdmulh.v1i16(<1 x i16>, <1 x i16>) +declare <1 x i32> @llvm.arm.neon.vqdmulh.v1i32(<1 x i32>, <1 x i32>) + +define i16 @test_vqrdmulhh_s16(i16 %a, i16 %b) { +; CHECK: test_vqrdmulhh_s16 +; CHECK: sqrdmulh {{h[0-9]+}}, {{h[0-9]+}}, {{h[0-9]+}} + %1 = insertelement <1 x i16> undef, i16 %a, i32 0 + %2 = insertelement <1 x i16> undef, i16 %b, i32 0 + %3 = call <1 x i16> @llvm.arm.neon.vqrdmulh.v1i16(<1 x i16> %1, <1 x i16> %2) + %4 = extractelement <1 x i16> %3, i32 0 + ret i16 %4 +} + +define i32 @test_vqrdmulhs_s32(i32 %a, i32 %b) { +; CHECK: test_vqrdmulhs_s32 +; CHECK: sqrdmulh {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}} + %1 = insertelement <1 x i32> undef, i32 %a, i32 0 + %2 = insertelement <1 x i32> undef, i32 %b, i32 0 + %3 = call <1 x i32> @llvm.arm.neon.vqrdmulh.v1i32(<1 x i32> %1, <1 x i32> %2) + %4 = extractelement <1 x i32> %3, i32 0 + ret i32 %4 +} + +declare <1 x i16> @llvm.arm.neon.vqrdmulh.v1i16(<1 x i16>, <1 x i16>) +declare <1 x i32> @llvm.arm.neon.vqrdmulh.v1i32(<1 x i32>, <1 x i32>) + +define float @test_vmulxs_f32(float %a, float %b) { +; CHECK: test_vmulxs_f32 +; CHECK: fmulx {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}} + %1 = insertelement <1 x float> undef, float %a, i32 0 + %2 = insertelement <1 x float> undef, float %b, i32 0 + %3 = call <1 x float> @llvm.aarch64.neon.vmulx.v1f32(<1 x float> %1, <1 x float> %2) + %4 = extractelement <1 x float> %3, i32 0 + ret float %4 +} + +define double @test_vmulxd_f64(double %a, double %b) { +; CHECK: test_vmulxd_f64 +; CHECK: fmulx {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}} + %1 = insertelement <1 x double> undef, double %a, i32 0 + %2 = insertelement <1 x double> undef, double %b, i32 0 + %3 = call <1 x double> @llvm.aarch64.neon.vmulx.v1f64(<1 x double> %1, <1 x double> %2) + %4 = extractelement <1 x double> %3, i32 0 + ret double %4 +} + +declare <1 x float> @llvm.aarch64.neon.vmulx.v1f32(<1 x float>, <1 x float>) +declare <1 x double> @llvm.aarch64.neon.vmulx.v1f64(<1 x double>, <1 x double>) diff --git a/test/CodeGen/AArch64/neon-scalar-recip.ll b/test/CodeGen/AArch64/neon-scalar-recip.ll new file mode 100644 index 0000000..91ee12c --- /dev/null +++ b/test/CodeGen/AArch64/neon-scalar-recip.ll @@ -0,0 +1,47 @@ +; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=+neon < %s | FileCheck %s + +define float @test_vrecpss_f32(float %a, float %b) { +; CHECK: test_vrecpss_f32 +; CHECK: frecps {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}} + %1 = insertelement <1 x float> undef, float %a, i32 0 + %2 = insertelement <1 x float> undef, float %b, i32 0 + %3 = call <1 x float> @llvm.arm.neon.vrecps.v1f32(<1 x float> %1, <1 x float> %2) + %4 = extractelement <1 x float> %3, i32 0 + ret float %4 +} + +define double @test_vrecpsd_f64(double %a, double %b) { +; CHECK: test_vrecpsd_f64 +; CHECK: frecps {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}} + %1 = insertelement <1 x double> undef, double %a, i32 0 + %2 = insertelement <1 x double> undef, double %b, i32 0 + %3 = call <1 x double> @llvm.arm.neon.vrecps.v1f64(<1 x double> %1, <1 x double> %2) + %4 = extractelement <1 x double> %3, i32 0 + ret double %4 +} + +declare <1 x float> @llvm.arm.neon.vrecps.v1f32(<1 x float>, <1 x float>) +declare <1 x double> @llvm.arm.neon.vrecps.v1f64(<1 x double>, <1 x double>) + +define float @test_vrsqrtss_f32(float %a, float %b) { +; CHECK: test_vrsqrtss_f32 +; CHECK: frsqrts {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}} + %1 = insertelement <1 x float> undef, float %a, i32 0 + %2 = insertelement <1 x float> undef, float %b, i32 0 + %3 = call <1 x float> @llvm.arm.neon.vrsqrts.v1f32(<1 x float> %1, <1 x float> %2) + %4 = extractelement <1 x float> %3, i32 0 + ret float %4 +} + +define double @test_vrsqrtsd_f64(double %a, double %b) { +; CHECK: test_vrsqrtsd_f64 +; CHECK: frsqrts {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}} + %1 = insertelement <1 x double> undef, double %a, i32 0 + %2 = insertelement <1 x double> undef, double %b, i32 0 + %3 = call <1 x double> @llvm.arm.neon.vrsqrts.v1f64(<1 x double> %1, <1 x double> %2) + %4 = extractelement <1 x double> %3, i32 0 + ret double %4 +} + +declare <1 x float> @llvm.arm.neon.vrsqrts.v1f32(<1 x float>, <1 x float>) +declare <1 x double> @llvm.arm.neon.vrsqrts.v1f64(<1 x double>, <1 x double>) diff --git a/test/MC/AArch64/neon-diagnostics.s b/test/MC/AArch64/neon-diagnostics.s index a86796f..be5a871 100644 --- a/test/MC/AArch64/neon-diagnostics.s +++ b/test/MC/AArch64/neon-diagnostics.s @@ -826,6 +826,33 @@ // CHECK-ERROR: uqsub h1, h2, d2 // CHECK-ERROR: ^ +//---------------------------------------------------------------------- +// Scalar Integer Saturating Doubling Multiply Half High (Signed) +//---------------------------------------------------------------------- + + sqdmulh h10, s11, h12 + sqdmulh s20, h21, s2 + +// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: sqdmulh h10, s11, h12 +// CHECK-ERROR: ^ +// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: sqdmulh s20, h21, s2 +// CHECK-ERROR: ^ + +//------------------------------------------------------------------------ +// Scalar Integer Saturating Rounding Doubling Multiply Half High (Signed) +//------------------------------------------------------------------------ + + sqrdmulh h10, s11, h12 + sqrdmulh s20, h21, s2 + +// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: sqrdmulh h10, s11, h12 +// CHECK-ERROR: ^ +// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: sqrdmulh s20, h21, s2 +// CHECK-ERROR: ^ //---------------------------------------------------------------------- // Vector Shift Left (Signed and Unsigned Integer) @@ -3771,3 +3798,44 @@ // CHECK-ERROR: fminv d0, v1.2d // CHECK-ERROR: ^ +//---------------------------------------------------------------------- +// Floating-point Multiply Extended +//---------------------------------------------------------------------- + + fmulx s20, h22, s15 + fmulx d23, d11, s1 + +// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: fmulx s20, h22, s15 +// CHECK-ERROR: ^ +// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: fmulx d23, d11, s1 +// CHECK-ERROR: ^ + +//---------------------------------------------------------------------- +// Floating-point Reciprocal Step +//---------------------------------------------------------------------- + + frecps s21, s16, h13 + frecps d22, s30, d21 + +// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: frecps s21, s16, h13 +// CHECK-ERROR: ^ +// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: frecps d22, s30, d21 +// CHECK-ERROR: ^ + +//---------------------------------------------------------------------- +// Floating-point Reciprocal Square Root Step +//---------------------------------------------------------------------- + + frsqrts s21, h5, s12 + frsqrts d8, s22, d18 + +// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: frsqrts s21, h5, s12 +// CHECK-ERROR: ^ +// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: frsqrts d8, s22, d18 +// CHECK-ERROR: ^ diff --git a/test/MC/AArch64/neon-scalar-mul.s b/test/MC/AArch64/neon-scalar-mul.s new file mode 100644 index 0000000..8caddb4 --- /dev/null +++ b/test/MC/AArch64/neon-scalar-mul.s @@ -0,0 +1,33 @@ +// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s + +// Check that the assembler can handle the documented syntax for AArch64 + +//---------------------------------------------------------------------- +// Scalar Integer Saturating Doubling Multiply Half High +//---------------------------------------------------------------------- + + sqdmulh h10, h11, h12 + sqdmulh s20, s21, s2 + +// CHECK: sqdmulh h10, h11, h12 // encoding: [0x6a,0xb5,0x6c,0x5e] +// CHECK: sqdmulh s20, s21, s2 // encoding: [0xb4,0xb6,0xa2,0x5e] + +//---------------------------------------------------------------------- +// Scalar Integer Saturating Rounding Doubling Multiply Half High +//---------------------------------------------------------------------- + + sqrdmulh h10, h11, h12 + sqrdmulh s20, s21, s2 + +// CHECK: sqrdmulh h10, h11, h12 // encoding: [0x6a,0xb5,0x6c,0x7e] +// CHECK: sqrdmulh s20, s21, s2 // encoding: [0xb4,0xb6,0xa2,0x7e] + +//---------------------------------------------------------------------- +// Floating-point Multiply Extended +//---------------------------------------------------------------------- + + fmulx s20, s22, s15 + fmulx d23, d11, d1 + +// CHECK: fmulx s20, s22, s15 // encoding: [0xd4,0xde,0x2f,0x5e] +// CHECK: fmulx d23, d11, d1 // encoding: [0x77,0xdd,0x61,0x5e] diff --git a/test/MC/AArch64/neon-scalar-recip.s b/test/MC/AArch64/neon-scalar-recip.s new file mode 100644 index 0000000..bb9c170 --- /dev/null +++ b/test/MC/AArch64/neon-scalar-recip.s @@ -0,0 +1,23 @@ +// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s + +// Check that the assembler can handle the documented syntax for AArch64 + +//---------------------------------------------------------------------- +// Floating-point Reciprocal Step +//---------------------------------------------------------------------- + + frecps s21, s16, s13 + frecps d22, d30, d21 + +// CHECK: frecps s21, s16, s13 // encoding: [0x15,0xfe,0x2d,0x5e] +// CHECK: frecps d22, d30, d21 // encoding: [0xd6,0xff,0x75,0x5e] + +//---------------------------------------------------------------------- +// Floating-point Reciprocal Square Root Step +//---------------------------------------------------------------------- + + frsqrts s21, s5, s12 + frsqrts d8, d22, d18 + +// CHECK: frsqrts s21, s5, s12 // encoding: [0xb5,0xfc,0xac,0x5e] +// CHECK: frsqrts d8, d22, d18 // encoding: [0xc8,0xfe,0xf2,0x5e] diff --git a/test/MC/Disassembler/AArch64/neon-instructions.txt b/test/MC/Disassembler/AArch64/neon-instructions.txt index ecb6249..5057ecd 100644 --- a/test/MC/Disassembler/AArch64/neon-instructions.txt +++ b/test/MC/Disassembler/AArch64/neon-instructions.txt @@ -1452,3 +1452,43 @@ 0x20 0x60 0x22 0x6e 0x20 0x60 0x62 0x6e 0x20 0x60 0xa2 0x6e + +#---------------------------------------------------------------------- +# Scalar Integer Saturating Doubling Multiply Half High +#---------------------------------------------------------------------- +# CHECK: sqdmulh h10, h11, h12 +# CHECK: sqdmulh s20, s21, s2 +0x6a,0xb5,0x6c,0x5e +0xb4,0xb6,0xa2,0x5e + +#---------------------------------------------------------------------- +# Scalar Integer Saturating Rounding Doubling Multiply Half High +#---------------------------------------------------------------------- +# CHECK: sqrdmulh h10, h11, h12 +# CHECK: sqrdmulh s20, s21, s2 +0x6a,0xb5,0x6c,0x7e +0xb4,0xb6,0xa2,0x7e + +#---------------------------------------------------------------------- +# Floating-point multiply extended +#---------------------------------------------------------------------- +# CHECK: fmulx s20, s22, s15 +# CHECK: fmulx d23, d11, d1 +0xd4,0xde,0x2f,0x5e +0x77,0xdd,0x61,0x5e + +#---------------------------------------------------------------------- +# Floating-point Reciprocal Step +#---------------------------------------------------------------------- +# CHECK: frecps s21, s16, s13 +# CHECK: frecps d22, d30, d21 +0x15,0xfe,0x2d,0x5e +0xd6,0xff,0x75,0x5e + +#---------------------------------------------------------------------- +# Floating-point Reciprocal Square Root Step +#---------------------------------------------------------------------- +# CHECK: frsqrts s21, s5, s12 +# CHECK: frsqrts d8, d22, d18 +0xb5,0xfc,0xac,0x5e +0xc8,0xfe,0xf2,0x5e |