diff options
author | Chad Rosier <mcrosier@codeaurora.org> | 2013-10-30 15:19:37 +0000 |
---|---|---|
committer | Chad Rosier <mcrosier@codeaurora.org> | 2013-10-30 15:19:37 +0000 |
commit | f853a034a1fdccd194da04ca1e2e1aa8bcbd16b4 (patch) | |
tree | a6f39c0d311ccd6e32495be46d5fa1ff0c544948 /lib/Target | |
parent | 95efb037f7ddc4cac67007eb3a9864e6012eda3b (diff) | |
download | external_llvm-f853a034a1fdccd194da04ca1e2e1aa8bcbd16b4.zip external_llvm-f853a034a1fdccd194da04ca1e2e1aa8bcbd16b4.tar.gz external_llvm-f853a034a1fdccd194da04ca1e2e1aa8bcbd16b4.tar.bz2 |
[AArch64] Add support for NEON scalar floating-point compare instructions.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@193691 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'lib/Target')
-rw-r--r-- | lib/Target/AArch64/AArch64ISelLowering.cpp | 5 | ||||
-rw-r--r-- | lib/Target/AArch64/AArch64InstrInfo.td | 7 | ||||
-rw-r--r-- | lib/Target/AArch64/AArch64InstrNEON.td | 95 |
3 files changed, 102 insertions, 5 deletions
diff --git a/lib/Target/AArch64/AArch64ISelLowering.cpp b/lib/Target/AArch64/AArch64ISelLowering.cpp index 77aadee..87bb847 100644 --- a/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -3492,12 +3492,15 @@ AArch64TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG, unsigned SplatBitSize; bool HasAnyUndefs; + unsigned UseNeonMov = VT.getSizeInBits() >= 64; + // Note we favor lowering MOVI over MVNI. // This has implications on the definition of patterns in TableGen to select // BIC immediate instructions but not ORR immediate instructions. // If this lowering order is changed, TableGen patterns for BIC immediate and // ORR immediate instructions have to be updated. - if (BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs)) { + if (UseNeonMov && + BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs)) { if (SplatBitSize <= 64) { // First attempt to use vector immediate-form MOVI EVT NeonMovVT; diff --git a/lib/Target/AArch64/AArch64InstrInfo.td b/lib/Target/AArch64/AArch64InstrInfo.td index 2332799..43df2b4 100644 --- a/lib/Target/AArch64/AArch64InstrInfo.td +++ b/lib/Target/AArch64/AArch64InstrInfo.td @@ -1980,6 +1980,13 @@ def fpz64 : Operand<f64>, let DecoderMethod = "DecodeFPZeroOperand"; } +def fpz64movi : Operand<i64>, + ComplexPattern<f64, 1, "SelectFPZeroOperand", [fpimm]> { + let ParserMatchClass = fpzero_asmoperand; + let PrintMethod = "printFPZeroOperand"; + let DecoderMethod = "DecodeFPZeroOperand"; +} + multiclass A64I_fpcmpSignal<bits<2> type, bit imm, dag ins, dag pattern> { def _quiet : A64I_fpcmp<0b0, 0b0, type, 0b00, {0b0, imm, 0b0, 0b0, 0b0}, (outs), ins, "fcmp\t$Rn, $Rm", [pattern], diff --git a/lib/Target/AArch64/AArch64InstrNEON.td b/lib/Target/AArch64/AArch64InstrNEON.td index 10dde19..b9f83f7 100644 --- a/lib/Target/AArch64/AArch64InstrNEON.td +++ b/lib/Target/AArch64/AArch64InstrNEON.td @@ -3210,8 +3210,8 @@ multiclass Neon_Scalar3Same_BHSD_size_patterns<SDPatternOperator opnode, class Neon_Scalar3Same_cmp_D_size_patterns<SDPatternOperator opnode, Instruction INSTD> - : Pat<(v1i64 (opnode (v1i64 VPR64:$Rn), (v1i64 VPR64:$Rm))), - (INSTD VPR64:$Rn, VPR64:$Rm)>; + : Pat<(v1i64 (opnode (v1i64 FPR64:$Rn), (v1i64 FPR64:$Rm))), + (INSTD FPR64:$Rn, FPR64:$Rm)>; multiclass Neon_Scalar3Same_HS_size_patterns<SDPatternOperator opnode, Instruction INSTH, @@ -3231,6 +3231,15 @@ multiclass Neon_Scalar3Same_SD_size_patterns<SDPatternOperator opnode, (INSTD FPR64:$Rn, FPR64:$Rm)>; } +multiclass Neon_Scalar3Same_cmp_SD_size_patterns<SDPatternOperator opnode, + Instruction INSTS, + Instruction INSTD> { + def : Pat<(v1i32 (opnode (v1f32 FPR32:$Rn), (v1f32 FPR32:$Rm))), + (INSTS FPR32:$Rn, FPR32:$Rm)>; + def : Pat<(v1i64 (opnode (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))), + (INSTD FPR64:$Rn, FPR64:$Rm)>; +} + // Scalar Three Different multiclass NeonI_Scalar3Diff_HS_size<bit u, bits<4> opcode, string asmop> { @@ -3381,10 +3390,36 @@ class NeonI_Scalar2SameMisc_cmpz_D_size<bit u, bits<5> opcode, string asmop> [], NoItinerary>; +multiclass NeonI_Scalar2SameMisc_cmpz_SD_size<bit u, bits<5> opcode, + string asmop> { + def ssi : NeonI_Scalar2SameMisc<u, 0b10, opcode, + (outs FPR32:$Rd), (ins FPR32:$Rn, fpz32:$FPImm), + !strconcat(asmop, " $Rd, $Rn, $FPImm"), + [], + NoItinerary>; + def ddi : NeonI_Scalar2SameMisc<u, 0b11, opcode, + (outs FPR64:$Rd), (ins FPR64:$Rn, fpz64movi:$FPImm), + !strconcat(asmop, " $Rd, $Rn, $FPImm"), + [], + NoItinerary>; +} + class Neon_Scalar2SameMisc_cmpz_D_size_patterns<SDPatternOperator opnode, Instruction INSTD> - : Pat<(v1i64 (opnode (v1i64 VPR64:$Rn), (v1i64 (bitconvert (v8i8 Neon_immAllZeros))))), - (INSTD VPR64:$Rn, 0)>; + : Pat<(v1i64 (opnode (v1i64 FPR64:$Rn), + (v1i64 (bitconvert (v8i8 Neon_immAllZeros))))), + (INSTD FPR64:$Rn, 0)>; + +multiclass Neon_Scalar2SameMisc_cmpz_SD_size_patterns<SDPatternOperator opnode, + Instruction INSTS, + Instruction INSTD> { + def : Pat<(v1i32 (opnode (v1f32 FPR32:$Rn), + (v1f32 (scalar_to_vector (f32 fpimm:$FPImm))))), + (INSTS FPR32:$Rn, fpimm:$FPImm)>; + def : Pat<(v1i64 (opnode (v1f64 FPR64:$Rn), + (v1f64 (bitconvert (v8i8 Neon_immAllZeros))))), + (INSTD FPR64:$Rn, 0)>; +} multiclass Neon_Scalar2SameMisc_D_size_patterns<SDPatternOperator opnode, Instruction INSTD> { @@ -3669,6 +3704,58 @@ def CMLTddi: NeonI_Scalar2SameMisc_cmpz_D_size<0b0, 0b01010, "cmlt">; def : Neon_Scalar2SameMisc_cmpz_D_size_patterns<int_aarch64_neon_vcltz, CMLTddi>; +// Scalar Floating-point Compare + +// Scalar Floating-point Compare Mask Equal +defm FCMEQ: NeonI_Scalar3Same_SD_sizes<0b0, 0b0, 0b11100, "fcmeq">; +defm : Neon_Scalar3Same_cmp_SD_size_patterns<int_aarch64_neon_vceq, + FCMEQsss, FCMEQddd>; + +// Scalar Floating-point Compare Mask Equal To Zero +defm FCMEQZ: NeonI_Scalar2SameMisc_cmpz_SD_size<0b0, 0b01101, "fcmeq">; +defm : Neon_Scalar2SameMisc_cmpz_SD_size_patterns<int_aarch64_neon_vceq, + FCMEQZssi, FCMEQZddi>; + +// Scalar Floating-point Compare Mask Greater Than Or Equal +defm FCMGE: NeonI_Scalar3Same_SD_sizes<0b1, 0b0, 0b11100, "fcmge">; +defm : Neon_Scalar3Same_cmp_SD_size_patterns<int_aarch64_neon_vcge, + FCMGEsss, FCMGEddd>; + +// Scalar Floating-point Compare Mask Greater Than Or Equal To Zero +defm FCMGEZ: NeonI_Scalar2SameMisc_cmpz_SD_size<0b1, 0b01100, "fcmge">; +defm : Neon_Scalar2SameMisc_cmpz_SD_size_patterns<int_aarch64_neon_vcge, + FCMGEZssi, FCMGEZddi>; + +// Scalar Floating-point Compare Mask Greather Than +defm FCMGT: NeonI_Scalar3Same_SD_sizes<0b1, 0b1, 0b11100, "fcmgt">; +defm : Neon_Scalar3Same_cmp_SD_size_patterns<int_aarch64_neon_vcgt, + FCMGTsss, FCMGTddd>; + +// Scalar Floating-point Compare Mask Greather Than Zero +defm FCMGTZ: NeonI_Scalar2SameMisc_cmpz_SD_size<0b0, 0b01100, "fcmgt">; +defm : Neon_Scalar2SameMisc_cmpz_SD_size_patterns<int_aarch64_neon_vcgt, + FCMGTZssi, FCMGTZddi>; + +// Scalar Floating-point Compare Mask Less Than Or Equal To Zero +defm FCMLEZ: NeonI_Scalar2SameMisc_cmpz_SD_size<0b1, 0b01101, "fcmle">; +defm : Neon_Scalar2SameMisc_cmpz_SD_size_patterns<int_aarch64_neon_vclez, + FCMLEZssi, FCMLEZddi>; + +// Scalar Floating-point Compare Mask Less Than Zero +defm FCMLTZ: NeonI_Scalar2SameMisc_cmpz_SD_size<0b0, 0b01110, "fcmlt">; +defm : Neon_Scalar2SameMisc_cmpz_SD_size_patterns<int_aarch64_neon_vcltz, + FCMLTZssi, FCMLTZddi>; + +// Scalar Floating-point Absolute Compare Mask Greater Than Or Equal +defm FACGE: NeonI_Scalar3Same_SD_sizes<0b1, 0b0, 0b11101, "facge">; +defm : Neon_Scalar3Same_cmp_SD_size_patterns<int_aarch64_neon_vcage, + FACGEsss, FACGEddd>; + +// Scalar Floating-point Absolute Compare Mask Greater Than +defm FACGT: NeonI_Scalar3Same_SD_sizes<0b1, 0b1, 0b11101, "facgt">; +defm : Neon_Scalar3Same_cmp_SD_size_patterns<int_aarch64_neon_vcagt, + FACGTsss, FACGTddd>; + // Scalar Absolute Value defm ABS : NeonI_Scalar2SameMisc_D_size<0b0, 0b01011, "abs">; defm : Neon_Scalar2SameMisc_D_size_patterns<int_aarch64_neon_vabs, ABSdd>; |