diff options
Diffstat (limited to 'lib/Target/R600/AMDGPUTargetMachine.cpp')
-rw-r--r-- | lib/Target/R600/AMDGPUTargetMachine.cpp | 95 |
1 files changed, 50 insertions, 45 deletions
diff --git a/lib/Target/R600/AMDGPUTargetMachine.cpp b/lib/Target/R600/AMDGPUTargetMachine.cpp index 8aab944..b2cd988 100644 --- a/lib/Target/R600/AMDGPUTargetMachine.cpp +++ b/lib/Target/R600/AMDGPUTargetMachine.cpp @@ -22,6 +22,7 @@ #include "SIInstrInfo.h" #include "llvm/Analysis/Passes.h" #include "llvm/CodeGen/MachineFunctionAnalysis.h" +#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/Passes.h" #include "llvm/IR/Verifier.h" @@ -33,7 +34,6 @@ #include "llvm/Transforms/Scalar.h" #include <llvm/CodeGen/Passes.h> - using namespace llvm; extern "C" void LLVMInitializeR600Target() { @@ -49,46 +49,20 @@ static MachineSchedRegistry SchedCustomRegistry("r600", "Run R600's custom scheduler", createR600MachineScheduler); -static std::string computeDataLayout(const AMDGPUSubtarget &ST) { - std::string Ret = "e-p:32:32"; - - if (ST.is64bit()) { - // 32-bit private, local, and region pointers. 64-bit global and constant. - Ret += "-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32-p24:64:64"; - } - - Ret += "-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256" - "-v512:512-v1024:1024-v2048:2048-n32:64"; - - return Ret; -} - AMDGPUTargetMachine::AMDGPUTargetMachine(const Target &T, StringRef TT, - StringRef CPU, StringRef FS, - TargetOptions Options, - Reloc::Model RM, CodeModel::Model CM, - CodeGenOpt::Level OptLevel -) -: - LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OptLevel), - Subtarget(TT, CPU, FS), - Layout(computeDataLayout(Subtarget)), - FrameLowering(TargetFrameLowering::StackGrowsUp, - 64 * 16 // Maximum stack alignment (long16) - , 0), - IntrinsicInfo(this), - InstrItins(&Subtarget.getInstrItineraryData()) { - // TLInfo uses InstrInfo so it must be initialized after. - if (Subtarget.getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS) { - TLInfo.reset(new R600TargetLowering(*this)); - } else { - TLInfo.reset(new SITargetLowering(*this)); - } + StringRef CPU, StringRef FS, + TargetOptions Options, Reloc::Model RM, + CodeModel::Model CM, + CodeGenOpt::Level OptLevel) + : LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OptLevel), + TLOF(new TargetLoweringObjectFileELF()), + Subtarget(TT, CPU, FS, *this), IntrinsicInfo() { setRequiresStructuredCFG(true); initAsmInfo(); } AMDGPUTargetMachine::~AMDGPUTargetMachine() { + delete TLOF; } namespace { @@ -109,7 +83,8 @@ public: return nullptr; } - virtual void addCodeGenPrepare(); + void addIRPasses() override; + void addCodeGenPrepare() override; bool addPreISel() override; bool addInstSelector() override; bool addPreRegAlloc() override; @@ -135,10 +110,26 @@ void AMDGPUTargetMachine::addAnalysisPasses(PassManagerBase &PM) { PM.add(createAMDGPUTargetTransformInfoPass(this)); } +void AMDGPUPassConfig::addIRPasses() { + // Function calls are not supported, so make sure we inline everything. + addPass(createAMDGPUAlwaysInlinePass()); + addPass(createAlwaysInlinerPass()); + // We need to add the barrier noop pass, otherwise adding the function + // inlining pass will cause all of the PassConfigs passes to be run + // one function at a time, which means if we have a nodule with two + // functions, then we will generate code for the first function + // without ever running any passes on the second. + addPass(createBarrierNoopPass()); + TargetPassConfig::addIRPasses(); +} + void AMDGPUPassConfig::addCodeGenPrepare() { const AMDGPUSubtarget &ST = TM->getSubtarget<AMDGPUSubtarget>(); - addPass(createAMDGPUPromoteAlloca(ST)); - addPass(createSROAPass()); + if (ST.isPromoteAllocaEnabled()) { + addPass(createAMDGPUPromoteAlloca(ST)); + addPass(createSROAPass()); + } + TargetPassConfig::addCodeGenPrepare(); } @@ -159,8 +150,15 @@ AMDGPUPassConfig::addPreISel() { } bool AMDGPUPassConfig::addInstSelector() { + const AMDGPUSubtarget &ST = TM->getSubtarget<AMDGPUSubtarget>(); + addPass(createAMDGPUISelDag(getAMDGPUTargetMachine())); - addPass(createSILowerI1CopiesPass()); + + if (ST.getGeneration() >= AMDGPUSubtarget::SOUTHERN_ISLANDS) { + addPass(createSILowerI1CopiesPass()); + addPass(createSIFixSGPRCopiesPass(*TM)); + } + return false; } @@ -170,12 +168,18 @@ bool AMDGPUPassConfig::addPreRegAlloc() { if (ST.getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS) { addPass(createR600VectorRegMerger(*TM)); } else { - addPass(createSIFixSGPRCopiesPass(*TM)); - // SIFixSGPRCopies can generate a lot of duplicate instructions, - // so we need to run MachineCSE afterwards. - addPass(&MachineCSEID); - initializeSIFixSGPRLiveRangesPass(*PassRegistry::getPassRegistry()); - insertPass(&RegisterCoalescerID, &SIFixSGPRLiveRangesID); + if (getOptLevel() > CodeGenOpt::None && ST.loadStoreOptEnabled()) { + // Don't do this with no optimizations since it throws away debug info by + // merging nonadjacent loads. + + // This should be run after scheduling, but before register allocation. It + // also need extra copies to the address operand to be eliminated. + initializeSILoadStoreOptimizerPass(*PassRegistry::getPassRegistry()); + insertPass(&MachineSchedulerID, &SILoadStoreOptimizerID); + } + + addPass(createSIShrinkInstructionsPass()); + addPass(createSIFixSGPRLiveRangesPass()); } return false; } @@ -183,6 +187,7 @@ bool AMDGPUPassConfig::addPreRegAlloc() { bool AMDGPUPassConfig::addPostRegAlloc() { const AMDGPUSubtarget &ST = TM->getSubtarget<AMDGPUSubtarget>(); + addPass(createSIShrinkInstructionsPass()); if (ST.getGeneration() > AMDGPUSubtarget::NORTHERN_ISLANDS) { addPass(createSIInsertWaits(*TM)); } |