diff options
Diffstat (limited to 'lib/Target/R600/AMDGPUTargetMachine.cpp')
-rw-r--r-- | lib/Target/R600/AMDGPUTargetMachine.cpp | 217 |
1 files changed, 139 insertions, 78 deletions
diff --git a/lib/Target/R600/AMDGPUTargetMachine.cpp b/lib/Target/R600/AMDGPUTargetMachine.cpp index b2cd988..a862f3c 100644 --- a/lib/Target/R600/AMDGPUTargetMachine.cpp +++ b/lib/Target/R600/AMDGPUTargetMachine.cpp @@ -15,6 +15,7 @@ #include "AMDGPUTargetMachine.h" #include "AMDGPU.h" +#include "AMDGPUTargetTransformInfo.h" #include "R600ISelLowering.h" #include "R600InstrInfo.h" #include "R600MachineScheduler.h" @@ -27,7 +28,7 @@ #include "llvm/CodeGen/Passes.h" #include "llvm/IR/Verifier.h" #include "llvm/MC/MCAsmInfo.h" -#include "llvm/PassManager.h" +#include "llvm/IR/LegacyPassManager.h" #include "llvm/Support/TargetRegistry.h" #include "llvm/Support/raw_os_ostream.h" #include "llvm/Transforms/IPO.h" @@ -38,7 +39,8 @@ using namespace llvm; extern "C" void LLVMInitializeR600Target() { // Register the target - RegisterTargetMachine<AMDGPUTargetMachine> X(TheAMDGPUTarget); + RegisterTargetMachine<R600TargetMachine> X(TheAMDGPUTarget); + RegisterTargetMachine<GCNTargetMachine> Y(TheGCNTarget); } static ScheduleDAGInstrs *createR600MachineScheduler(MachineSchedContext *C) { @@ -49,12 +51,28 @@ static MachineSchedRegistry SchedCustomRegistry("r600", "Run R600's custom scheduler", createR600MachineScheduler); +static std::string computeDataLayout(StringRef TT) { + Triple Triple(TT); + std::string Ret = "e-p:32:32"; + + if (Triple.getArch() == Triple::amdgcn) { + // 32-bit private, local, and region pointers. 64-bit global and constant. + Ret += "-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32-p24:64:64"; + } + + Ret += "-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256" + "-v512:512-v1024:1024-v2048:2048-n32:64"; + + return Ret; +} + AMDGPUTargetMachine::AMDGPUTargetMachine(const Target &T, StringRef TT, StringRef CPU, StringRef FS, TargetOptions Options, Reloc::Model RM, CodeModel::Model CM, CodeGenOpt::Level OptLevel) : LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OptLevel), + DL(computeDataLayout(TT)), TLOF(new TargetLoweringObjectFileELF()), Subtarget(TT, CPU, FS, *this), IntrinsicInfo() { setRequiresStructuredCFG(true); @@ -65,10 +83,33 @@ AMDGPUTargetMachine::~AMDGPUTargetMachine() { delete TLOF; } +//===----------------------------------------------------------------------===// +// R600 Target Machine (R600 -> Cayman) +//===----------------------------------------------------------------------===// + +R600TargetMachine::R600TargetMachine(const Target &T, StringRef TT, StringRef FS, + StringRef CPU, TargetOptions Options, Reloc::Model RM, + CodeModel::Model CM, CodeGenOpt::Level OL) : + AMDGPUTargetMachine(T, TT, FS, CPU, Options, RM, CM, OL) { } + + +//===----------------------------------------------------------------------===// +// GCN Target Machine (SI+) +//===----------------------------------------------------------------------===// + +GCNTargetMachine::GCNTargetMachine(const Target &T, StringRef TT, StringRef FS, + StringRef CPU, TargetOptions Options, Reloc::Model RM, + CodeModel::Model CM, CodeGenOpt::Level OL) : + AMDGPUTargetMachine(T, TT, FS, CPU, Options, RM, CM, OL) { } + +//===----------------------------------------------------------------------===// +// AMDGPU Pass Setup +//===----------------------------------------------------------------------===// + namespace { class AMDGPUPassConfig : public TargetPassConfig { public: - AMDGPUPassConfig(AMDGPUTargetMachine *TM, PassManagerBase &PM) + AMDGPUPassConfig(TargetMachine *TM, PassManagerBase &PM) : TargetPassConfig(TM, PM) {} AMDGPUTargetMachine &getAMDGPUTargetMachine() const { @@ -85,29 +126,38 @@ public: void addIRPasses() override; void addCodeGenPrepare() override; + virtual bool addPreISel() override; + virtual bool addInstSelector() override; +}; + +class R600PassConfig : public AMDGPUPassConfig { +public: + R600PassConfig(TargetMachine *TM, PassManagerBase &PM) + : AMDGPUPassConfig(TM, PM) { } + bool addPreISel() override; - bool addInstSelector() override; - bool addPreRegAlloc() override; - bool addPostRegAlloc() override; - bool addPreSched2() override; - bool addPreEmitPass() override; + void addPreRegAlloc() override; + void addPreSched2() override; + void addPreEmitPass() override; }; -} // End of anonymous namespace -TargetPassConfig *AMDGPUTargetMachine::createPassConfig(PassManagerBase &PM) { - return new AMDGPUPassConfig(this, PM); -} +class GCNPassConfig : public AMDGPUPassConfig { +public: + GCNPassConfig(TargetMachine *TM, PassManagerBase &PM) + : AMDGPUPassConfig(TM, PM) { } + bool addPreISel() override; + bool addInstSelector() override; + void addPreRegAlloc() override; + void addPostRegAlloc() override; + void addPreSched2() override; + void addPreEmitPass() override; +}; -//===----------------------------------------------------------------------===// -// AMDGPU Analysis Pass Setup -//===----------------------------------------------------------------------===// +} // End of anonymous namespace -void AMDGPUTargetMachine::addAnalysisPasses(PassManagerBase &PM) { - // Add first the target-independent BasicTTI pass, then our AMDGPU pass. This - // allows the AMDGPU pass to delegate to the target independent layer when - // appropriate. - PM.add(createBasicTargetTransformInfoPass(this)); - PM.add(createAMDGPUTargetTransformInfoPass(this)); +TargetIRAnalysis AMDGPUTargetMachine::getTargetIRAnalysis() { + return TargetIRAnalysis( + [this](Function &F) { return TargetTransformInfo(AMDGPUTTIImpl(this)); }); } void AMDGPUPassConfig::addIRPasses() { @@ -129,7 +179,6 @@ void AMDGPUPassConfig::addCodeGenPrepare() { addPass(createAMDGPUPromoteAlloca(ST)); addPass(createSROAPass()); } - TargetPassConfig::addCodeGenPrepare(); } @@ -139,84 +188,96 @@ AMDGPUPassConfig::addPreISel() { addPass(createFlattenCFGPass()); if (ST.IsIRStructurizerEnabled()) addPass(createStructurizeCFGPass()); - if (ST.getGeneration() >= AMDGPUSubtarget::SOUTHERN_ISLANDS) { - addPass(createSinkingPass()); - addPass(createSITypeRewriter()); - addPass(createSIAnnotateControlFlowPass()); - } else { - addPass(createR600TextureIntrinsicsReplacer()); - } return false; } bool AMDGPUPassConfig::addInstSelector() { - const AMDGPUSubtarget &ST = TM->getSubtarget<AMDGPUSubtarget>(); - addPass(createAMDGPUISelDag(getAMDGPUTargetMachine())); + return false; +} - if (ST.getGeneration() >= AMDGPUSubtarget::SOUTHERN_ISLANDS) { - addPass(createSILowerI1CopiesPass()); - addPass(createSIFixSGPRCopiesPass(*TM)); - } +//===----------------------------------------------------------------------===// +// R600 Pass Setup +//===----------------------------------------------------------------------===// +bool R600PassConfig::addPreISel() { + AMDGPUPassConfig::addPreISel(); + addPass(createR600TextureIntrinsicsReplacer()); return false; } -bool AMDGPUPassConfig::addPreRegAlloc() { +void R600PassConfig::addPreRegAlloc() { + addPass(createR600VectorRegMerger(*TM)); +} + +void R600PassConfig::addPreSched2() { const AMDGPUSubtarget &ST = TM->getSubtarget<AMDGPUSubtarget>(); + addPass(createR600EmitClauseMarkers(), false); + if (ST.isIfCvtEnabled()) + addPass(&IfConverterID, false); + addPass(createR600ClauseMergePass(*TM), false); +} - if (ST.getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS) { - addPass(createR600VectorRegMerger(*TM)); - } else { - if (getOptLevel() > CodeGenOpt::None && ST.loadStoreOptEnabled()) { - // Don't do this with no optimizations since it throws away debug info by - // merging nonadjacent loads. - - // This should be run after scheduling, but before register allocation. It - // also need extra copies to the address operand to be eliminated. - initializeSILoadStoreOptimizerPass(*PassRegistry::getPassRegistry()); - insertPass(&MachineSchedulerID, &SILoadStoreOptimizerID); - } - - addPass(createSIShrinkInstructionsPass()); - addPass(createSIFixSGPRLiveRangesPass()); - } - return false; +void R600PassConfig::addPreEmitPass() { + addPass(createAMDGPUCFGStructurizerPass(), false); + addPass(createR600ExpandSpecialInstrsPass(*TM), false); + addPass(&FinalizeMachineBundlesID, false); + addPass(createR600Packetizer(*TM), false); + addPass(createR600ControlFlowFinalizer(*TM), false); } -bool AMDGPUPassConfig::addPostRegAlloc() { - const AMDGPUSubtarget &ST = TM->getSubtarget<AMDGPUSubtarget>(); +TargetPassConfig *R600TargetMachine::createPassConfig(PassManagerBase &PM) { + return new R600PassConfig(this, PM); +} - addPass(createSIShrinkInstructionsPass()); - if (ST.getGeneration() > AMDGPUSubtarget::NORTHERN_ISLANDS) { - addPass(createSIInsertWaits(*TM)); - } +//===----------------------------------------------------------------------===// +// GCN Pass Setup +//===----------------------------------------------------------------------===// + +bool GCNPassConfig::addPreISel() { + AMDGPUPassConfig::addPreISel(); + addPass(createSinkingPass()); + addPass(createSITypeRewriter()); + addPass(createSIAnnotateControlFlowPass()); return false; } -bool AMDGPUPassConfig::addPreSched2() { - const AMDGPUSubtarget &ST = TM->getSubtarget<AMDGPUSubtarget>(); - - if (ST.getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS) - addPass(createR600EmitClauseMarkers()); - if (ST.isIfCvtEnabled()) - addPass(&IfConverterID); - if (ST.getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS) - addPass(createR600ClauseMergePass(*TM)); +bool GCNPassConfig::addInstSelector() { + AMDGPUPassConfig::addInstSelector(); + addPass(createSILowerI1CopiesPass()); + addPass(createSIFixSGPRCopiesPass(*TM)); + addPass(createSIFoldOperandsPass()); return false; } -bool AMDGPUPassConfig::addPreEmitPass() { +void GCNPassConfig::addPreRegAlloc() { const AMDGPUSubtarget &ST = TM->getSubtarget<AMDGPUSubtarget>(); - if (ST.getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS) { - addPass(createAMDGPUCFGStructurizerPass()); - addPass(createR600ExpandSpecialInstrsPass(*TM)); - addPass(&FinalizeMachineBundlesID); - addPass(createR600Packetizer(*TM)); - addPass(createR600ControlFlowFinalizer(*TM)); - } else { - addPass(createSILowerControlFlowPass(*TM)); + if (getOptLevel() > CodeGenOpt::None && ST.loadStoreOptEnabled()) { + // Don't do this with no optimizations since it throws away debug info by + // merging nonadjacent loads. + + // This should be run after scheduling, but before register allocation. It + // also need extra copies to the address operand to be eliminated. + initializeSILoadStoreOptimizerPass(*PassRegistry::getPassRegistry()); + insertPass(&MachineSchedulerID, &SILoadStoreOptimizerID); } + addPass(createSIShrinkInstructionsPass(), false); + addPass(createSIFixSGPRLiveRangesPass(), false); +} - return false; +void GCNPassConfig::addPostRegAlloc() { + addPass(createSIPrepareScratchRegs(), false); + addPass(createSIShrinkInstructionsPass(), false); +} + +void GCNPassConfig::addPreSched2() { + addPass(createSIInsertWaits(*TM), false); +} + +void GCNPassConfig::addPreEmitPass() { + addPass(createSILowerControlFlowPass(*TM), false); +} + +TargetPassConfig *GCNTargetMachine::createPassConfig(PassManagerBase &PM) { + return new GCNPassConfig(this, PM); } |