diff options
author | Amara Emerson <amara.emerson@arm.com> | 2013-10-31 09:32:11 +0000 |
---|---|---|
committer | Amara Emerson <amara.emerson@arm.com> | 2013-10-31 09:32:11 +0000 |
commit | c2884320feebc543d2ce51151d5418dfc18da9e4 (patch) | |
tree | 0741cd0182a73f25f8f852c90a8b7751323dc4ef /lib | |
parent | 0f5e68e3dc64e7dde41577649ad44b4453859276 (diff) | |
download | external_llvm-c2884320feebc543d2ce51151d5418dfc18da9e4.zip external_llvm-c2884320feebc543d2ce51151d5418dfc18da9e4.tar.gz external_llvm-c2884320feebc543d2ce51151d5418dfc18da9e4.tar.bz2 |
[AArch64] Make the use of FP instructions optional, but enabled by default.
This adds a new subtarget feature called FPARMv8 (implied by NEON), and
predicates the support of the FP instructions and registers on this feature.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@193739 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'lib')
-rw-r--r-- | lib/Target/AArch64/AArch64.td | 8 | ||||
-rw-r--r-- | lib/Target/AArch64/AArch64ISelLowering.cpp | 50 | ||||
-rw-r--r-- | lib/Target/AArch64/AArch64InstrFormats.td | 4 | ||||
-rw-r--r-- | lib/Target/AArch64/AArch64InstrInfo.td | 21 | ||||
-rw-r--r-- | lib/Target/AArch64/AArch64Subtarget.cpp | 23 | ||||
-rw-r--r-- | lib/Target/AArch64/AArch64Subtarget.h | 12 |
6 files changed, 90 insertions, 28 deletions
diff --git a/lib/Target/AArch64/AArch64.td b/lib/Target/AArch64/AArch64.td index e17052b..083064b 100644 --- a/lib/Target/AArch64/AArch64.td +++ b/lib/Target/AArch64/AArch64.td @@ -21,8 +21,11 @@ include "llvm/Target/Target.td" // AArch64 Subtarget features. // +def FeatureFPARMv8 : SubtargetFeature<"fp-armv8", "HasFPARMv8", "true", + "Enable ARMv8 FP">; + def FeatureNEON : SubtargetFeature<"neon", "HasNEON", "true", - "Enable Advanced SIMD instructions">; + "Enable Advanced SIMD instructions", [FeatureFPARMv8]>; def FeatureCrypto : SubtargetFeature<"crypto", "HasCrypto", "true", "Enable cryptographic instructions">; @@ -33,7 +36,8 @@ def FeatureCrypto : SubtargetFeature<"crypto", "HasCrypto", "true", include "AArch64Schedule.td" -def : Processor<"generic", GenericItineraries, [FeatureNEON, FeatureCrypto]>; +def : Processor<"generic", GenericItineraries, + [FeatureFPARMv8, FeatureNEON, FeatureCrypto]>; //===----------------------------------------------------------------------===// // Register File Description diff --git a/lib/Target/AArch64/AArch64ISelLowering.cpp b/lib/Target/AArch64/AArch64ISelLowering.cpp index 87bb847..323acdd 100644 --- a/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -50,10 +50,13 @@ AArch64TargetLowering::AArch64TargetLowering(AArch64TargetMachine &TM) // Scalar register <-> type mapping addRegisterClass(MVT::i32, &AArch64::GPR32RegClass); addRegisterClass(MVT::i64, &AArch64::GPR64RegClass); - addRegisterClass(MVT::f16, &AArch64::FPR16RegClass); - addRegisterClass(MVT::f32, &AArch64::FPR32RegClass); - addRegisterClass(MVT::f64, &AArch64::FPR64RegClass); - addRegisterClass(MVT::f128, &AArch64::FPR128RegClass); + + if (Subtarget->hasFPARMv8()) { + addRegisterClass(MVT::f16, &AArch64::FPR16RegClass); + addRegisterClass(MVT::f32, &AArch64::FPR32RegClass); + addRegisterClass(MVT::f64, &AArch64::FPR64RegClass); + addRegisterClass(MVT::f128, &AArch64::FPR128RegClass); + } if (Subtarget->hasNEON()) { // And the vectors @@ -961,24 +964,31 @@ AArch64TargetLowering::SaveVarArgRegisters(CCState &CCInfo, SelectionDAG &DAG, } } + if (getSubtarget()->hasFPARMv8()) { unsigned FPRSaveSize = 16 * (NumFPRArgRegs - FirstVariadicFPR); int FPRIdx = 0; - if (FPRSaveSize != 0) { - FPRIdx = MFI->CreateStackObject(FPRSaveSize, 16, false); - - SDValue FIN = DAG.getFrameIndex(FPRIdx, getPointerTy()); - - for (unsigned i = FirstVariadicFPR; i < NumFPRArgRegs; ++i) { - unsigned VReg = MF.addLiveIn(AArch64FPRArgRegs[i], - &AArch64::FPR128RegClass); - SDValue Val = DAG.getCopyFromReg(Chain, DL, VReg, MVT::f128); - SDValue Store = DAG.getStore(Val.getValue(1), DL, Val, FIN, - MachinePointerInfo::getStack(i * 16), - false, false, 0); - MemOps.push_back(Store); - FIN = DAG.getNode(ISD::ADD, DL, getPointerTy(), FIN, - DAG.getConstant(16, getPointerTy())); + // According to the AArch64 Procedure Call Standard, section B.1/B.3, we + // can omit a register save area if we know we'll never use registers of + // that class. + if (FPRSaveSize != 0) { + FPRIdx = MFI->CreateStackObject(FPRSaveSize, 16, false); + + SDValue FIN = DAG.getFrameIndex(FPRIdx, getPointerTy()); + + for (unsigned i = FirstVariadicFPR; i < NumFPRArgRegs; ++i) { + unsigned VReg = MF.addLiveIn(AArch64FPRArgRegs[i], + &AArch64::FPR128RegClass); + SDValue Val = DAG.getCopyFromReg(Chain, DL, VReg, MVT::f128); + SDValue Store = DAG.getStore(Val.getValue(1), DL, Val, FIN, + MachinePointerInfo::getStack(i * 16), + false, false, 0); + MemOps.push_back(Store); + FIN = DAG.getNode(ISD::ADD, DL, getPointerTy(), FIN, + DAG.getConstant(16, getPointerTy())); + } } + FuncInfo->setVariadicFPRIdx(FPRIdx); + FuncInfo->setVariadicFPRSize(FPRSaveSize); } int StackIdx = MFI->CreateFixedObject(8, CCInfo.getNextStackOffset(), true); @@ -986,8 +996,6 @@ AArch64TargetLowering::SaveVarArgRegisters(CCState &CCInfo, SelectionDAG &DAG, FuncInfo->setVariadicStackIdx(StackIdx); FuncInfo->setVariadicGPRIdx(GPRIdx); FuncInfo->setVariadicGPRSize(GPRSaveSize); - FuncInfo->setVariadicFPRIdx(FPRIdx); - FuncInfo->setVariadicFPRSize(FPRSaveSize); if (!MemOps.empty()) { Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, &MemOps[0], diff --git a/lib/Target/AArch64/AArch64InstrFormats.td b/lib/Target/AArch64/AArch64InstrFormats.td index d69db31..7a41ce0 100644 --- a/lib/Target/AArch64/AArch64InstrFormats.td +++ b/lib/Target/AArch64/AArch64InstrFormats.td @@ -383,6 +383,8 @@ class A64I_extract<bit sf, bits<3> op, bit n, // Inherits Rd in 4-0 } +let Predicates = [HasFPARMv8] in { + // Format for floating-point compare instructions. class A64I_fpcmp<bit m, bit s, bits<2> type, bits<2> op, bits<5> opcode2, dag outs, dag ins, string asmstr, @@ -562,6 +564,8 @@ class A64I_fpimm<bit m, bit s, bits<2> type, bits<5> imm5, // Inherit Rd in 4-0 } +} + // Format for load-register (literal) instructions. class A64I_LDRlit<bits<2> opc, bit v, dag outs, dag ins, string asmstr, diff --git a/lib/Target/AArch64/AArch64InstrInfo.td b/lib/Target/AArch64/AArch64InstrInfo.td index 43df2b4..ae217f9 100644 --- a/lib/Target/AArch64/AArch64InstrInfo.td +++ b/lib/Target/AArch64/AArch64InstrInfo.td @@ -14,6 +14,8 @@ //===----------------------------------------------------------------------===// // ARM Instruction Predicate Definitions. // +def HasFPARMv8 : Predicate<"Subtarget->hasFPARMv8()">, + AssemblerPredicate<"FeatureFPARMv8", "fp-armv8">; def HasNEON : Predicate<"Subtarget->hasNEON()">, AssemblerPredicate<"FeatureNEON", "neon">; def HasCrypto : Predicate<"Subtarget->hasCrypto()">, @@ -2195,7 +2197,7 @@ def FNMSUBdddd : A64I_fpdp3Impl<"fnmsub", FPR64, f64, 0b01, 0b1, 0b1, fnmsub>; // Extra patterns for when we're allowed to optimise separate multiplication and // addition. -let Predicates = [UseFusedMAC] in { +let Predicates = [HasFPARMv8, UseFusedMAC] in { def : Pat<(f32 (fadd FPR32:$Ra, (f32 (fmul FPR32:$Rn, FPR32:$Rm)))), (FMADDssss FPR32:$Rn, FPR32:$Rm, FPR32:$Ra)>; def : Pat<(f32 (fsub FPR32:$Ra, (f32 (fmul FPR32:$Rn, FPR32:$Rm)))), @@ -2351,6 +2353,7 @@ defm FCVTM : A64I_fptointRM<0b10, 0b0, "fcvtm">; defm FCVTZ : A64I_fptointRM<0b11, 0b0, "fcvtz">; defm FCVTA : A64I_fptointRM<0b00, 0b1, "fcvta">; +let Predicates = [HasFPARMv8] in { def : Pat<(i32 (fp_to_sint f32:$Rn)), (FCVTZSws $Rn)>; def : Pat<(i64 (fp_to_sint f32:$Rn)), (FCVTZSxs $Rn)>; def : Pat<(i32 (fp_to_uint f32:$Rn)), (FCVTZUws $Rn)>; @@ -2359,6 +2362,7 @@ def : Pat<(i32 (fp_to_sint f64:$Rn)), (FCVTZSwd $Rn)>; def : Pat<(i64 (fp_to_sint f64:$Rn)), (FCVTZSxd $Rn)>; def : Pat<(i32 (fp_to_uint f64:$Rn)), (FCVTZUwd $Rn)>; def : Pat<(i64 (fp_to_uint f64:$Rn)), (FCVTZUxd $Rn)>; +} multiclass A64I_inttofp<bit o0, string asmop> { def CVTFsw : A64I_fpintI<0b0, 0b00, 0b00, {0, 1, o0}, FPR32, GPR32, asmop>; @@ -2370,6 +2374,7 @@ multiclass A64I_inttofp<bit o0, string asmop> { defm S : A64I_inttofp<0b0, "scvtf">; defm U : A64I_inttofp<0b1, "ucvtf">; +let Predicates = [HasFPARMv8] in { def : Pat<(f32 (sint_to_fp i32:$Rn)), (SCVTFsw $Rn)>; def : Pat<(f32 (sint_to_fp i64:$Rn)), (SCVTFsx $Rn)>; def : Pat<(f64 (sint_to_fp i32:$Rn)), (SCVTFdw $Rn)>; @@ -2378,16 +2383,19 @@ def : Pat<(f32 (uint_to_fp i32:$Rn)), (UCVTFsw $Rn)>; def : Pat<(f32 (uint_to_fp i64:$Rn)), (UCVTFsx $Rn)>; def : Pat<(f64 (uint_to_fp i32:$Rn)), (UCVTFdw $Rn)>; def : Pat<(f64 (uint_to_fp i64:$Rn)), (UCVTFdx $Rn)>; +} def FMOVws : A64I_fpintI<0b0, 0b00, 0b00, 0b110, GPR32, FPR32, "fmov">; def FMOVsw : A64I_fpintI<0b0, 0b00, 0b00, 0b111, FPR32, GPR32, "fmov">; def FMOVxd : A64I_fpintI<0b1, 0b01, 0b00, 0b110, GPR64, FPR64, "fmov">; def FMOVdx : A64I_fpintI<0b1, 0b01, 0b00, 0b111, FPR64, GPR64, "fmov">; +let Predicates = [HasFPARMv8] in { def : Pat<(i32 (bitconvert f32:$Rn)), (FMOVws $Rn)>; def : Pat<(f32 (bitconvert i32:$Rn)), (FMOVsw $Rn)>; def : Pat<(i64 (bitconvert f64:$Rn)), (FMOVxd $Rn)>; def : Pat<(f64 (bitconvert i64:$Rn)), (FMOVdx $Rn)>; +} def lane1_asmoperand : AsmOperandClass { let Name = "Lane1"; @@ -2410,11 +2418,13 @@ let DecoderMethod = "DecodeFMOVLaneInstruction" in { "fmov\t$Rd.d[$Lane], $Rn", [], NoItinerary>; } +let Predicates = [HasFPARMv8] in { def : InstAlias<"fmov $Rd, $Rn.2d[$Lane]", (FMOVxv GPR64:$Rd, VPR128:$Rn, lane1:$Lane), 0b0>; def : InstAlias<"fmov $Rd.2d[$Lane], $Rn", (FMOVvx VPR128:$Rd, GPR64:$Rn, lane1:$Lane), 0b0>; +} //===----------------------------------------------------------------------===// // Floating-point immediate instructions @@ -2508,11 +2518,15 @@ let mayLoad = 1 in { def LDRx_lit : A64I_LDRlitSimple<0b01, 0b0, GPR64>; } +let Predicates = [HasFPARMv8] in { def LDRs_lit : A64I_LDRlitSimple<0b00, 0b1, FPR32>; def LDRd_lit : A64I_LDRlitSimple<0b01, 0b1, FPR64>; +} let mayLoad = 1 in { + let Predicates = [HasFPARMv8] in { def LDRq_lit : A64I_LDRlitSimple<0b10, 0b1, FPR128>; + } def LDRSWx_lit : A64I_LDRlit<0b10, 0b0, @@ -3106,6 +3120,7 @@ defm LS32 defm LS64 : A64I_LDRSTR_unsigned<"LS64", 0b11, 0b0, 0b0, "", GPR64, dword_addrparams>; +let Predicates = [HasFPARMv8] in { // STR/LDR to/from a B register defm LSFP8 : A64I_LDRSTR_unsigned<"LSFP8", 0b00, 0b1, 0b0, "", FPR8, byte_addrparams>; @@ -3124,6 +3139,7 @@ defm LSFP64 defm LSFP128 : A64I_LDRSTR_unsigned<"LSFP128", 0b00, 0b1, 0b1, "", FPR128, qword_addrparams>; +} //===------------------------------ // 2.3 Signed loads @@ -3579,10 +3595,13 @@ multiclass A64I_LSPsimple<bits<2> opc, bit v, RegisterClass SomeReg, defm LSPair32 : A64I_LSPsimple<0b00, 0b0, GPR32, word_simm7, "LSPair32">; defm LSPair64 : A64I_LSPsimple<0b10, 0b0, GPR64, dword_simm7, "LSPair64">; + +let Predicates = [HasFPARMv8] in { defm LSFPPair32 : A64I_LSPsimple<0b00, 0b1, FPR32, word_simm7, "LSFPPair32">; defm LSFPPair64 : A64I_LSPsimple<0b01, 0b1, FPR64, dword_simm7, "LSFPPair64">; defm LSFPPair128 : A64I_LSPsimple<0b10, 0b1, FPR128, qword_simm7, "LSFPPair128">; +} def LDPSWx : A64I_LSPoffset<0b01, 0b0, 0b1, diff --git a/lib/Target/AArch64/AArch64Subtarget.cpp b/lib/Target/AArch64/AArch64Subtarget.cpp index d71bb4e..eece389 100644 --- a/lib/Target/AArch64/AArch64Subtarget.cpp +++ b/lib/Target/AArch64/AArch64Subtarget.cpp @@ -26,10 +26,27 @@ using namespace llvm; AArch64Subtarget::AArch64Subtarget(StringRef TT, StringRef CPU, StringRef FS) - : AArch64GenSubtargetInfo(TT, CPU, FS), HasNEON(false), HasCrypto(false), - TargetTriple(TT) { + : AArch64GenSubtargetInfo(TT, CPU, FS), HasFPARMv8(false), HasNEON(false), + HasCrypto(false), TargetTriple(TT), CPUString(CPU) { - ParseSubtargetFeatures(CPU, FS); + initializeSubtargetFeatures(CPU, FS); +} + +void AArch64Subtarget::initializeSubtargetFeatures(StringRef CPU, + StringRef FS) { + if (CPU.empty()) + CPUString = "generic"; + + std::string FullFS = FS; + if (CPUString == "generic") { + // Enable FP by default. + if (FullFS.empty()) + FullFS = "+fp-armv8"; + else + FullFS = "+fp-armv8," + FullFS; + } + + ParseSubtargetFeatures(CPU, FullFS); } bool AArch64Subtarget::GVIsIndirectSymbol(const GlobalValue *GV, diff --git a/lib/Target/AArch64/AArch64Subtarget.h b/lib/Target/AArch64/AArch64Subtarget.h index f262b94..57eb187 100644 --- a/lib/Target/AArch64/AArch64Subtarget.h +++ b/lib/Target/AArch64/AArch64Subtarget.h @@ -28,11 +28,19 @@ class GlobalValue; class AArch64Subtarget : public AArch64GenSubtargetInfo { protected: + bool HasFPARMv8; bool HasNEON; bool HasCrypto; /// TargetTriple - What processor and OS we're targeting. Triple TargetTriple; + + /// CPUString - String name of used CPU. + std::string CPUString; + +private: + void initializeSubtargetFeatures(StringRef CPU, StringRef FS); + public: /// This constructor initializes the data members to match that /// of the specified triple. @@ -52,9 +60,11 @@ public: bool isTargetELF() const { return TargetTriple.isOSBinFormatELF(); } bool isTargetLinux() const { return TargetTriple.isOSLinux(); } + bool hasFPARMv8() const { return HasFPARMv8; } bool hasNEON() const { return HasNEON; } - bool hasCrypto() const { return HasCrypto; } + + const std::string & getCPUString() const { return CPUString; } }; } // End llvm namespace |