diff options
Diffstat (limited to 'lib/Target/R600/AMDGPUTargetTransformInfo.cpp')
-rw-r--r-- | lib/Target/R600/AMDGPUTargetTransformInfo.cpp | 58 |
1 files changed, 46 insertions, 12 deletions
diff --git a/lib/Target/R600/AMDGPUTargetTransformInfo.cpp b/lib/Target/R600/AMDGPUTargetTransformInfo.cpp index ea78f43..e7bc006 100644 --- a/lib/Target/R600/AMDGPUTargetTransformInfo.cpp +++ b/lib/Target/R600/AMDGPUTargetTransformInfo.cpp @@ -52,7 +52,7 @@ public: AMDGPUTTI(const AMDGPUTargetMachine *TM) : ImmutablePass(ID), TM(TM), ST(TM->getSubtargetImpl()), - TLI(TM->getTargetLowering()) { + TLI(TM->getSubtargetImpl()->getTargetLowering()) { initializeAMDGPUTTIPass(*PassRegistry::getPassRegistry()); } @@ -74,10 +74,14 @@ public: bool hasBranchDivergence() const override; - void getUnrollingPreferences(Loop *L, + void getUnrollingPreferences(const Function *F, Loop *L, UnrollingPreferences &UP) const override; - /// @} + PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) const override; + + unsigned getNumberOfRegisters(bool Vector) const override; + unsigned getRegisterBitWidth(bool Vector) const override; + unsigned getMaxInterleaveFactor() const override; }; } // end anonymous namespace @@ -93,16 +97,20 @@ llvm::createAMDGPUTargetTransformInfoPass(const AMDGPUTargetMachine *TM) { bool AMDGPUTTI::hasBranchDivergence() const { return true; } -void AMDGPUTTI::getUnrollingPreferences(Loop *L, +void AMDGPUTTI::getUnrollingPreferences(const Function *, Loop *L, UnrollingPreferences &UP) const { - for (Loop::block_iterator BI = L->block_begin(), BE = L->block_end(); - BI != BE; ++BI) { - BasicBlock *BB = *BI; - for (BasicBlock::const_iterator I = BB->begin(), E = BB->end(); - I != E; ++I) { - const GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(I); - if (!GEP) + UP.Threshold = 300; // Twice the default. + UP.Count = UINT_MAX; + UP.Partial = true; + + // TODO: Do we want runtime unrolling? + + for (const BasicBlock *BB : L->getBlocks()) { + for (const Instruction &I : *BB) { + const GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(&I); + if (!GEP || GEP->getAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS) continue; + const Value *Ptr = GEP->getPointerOperand(); const AllocaInst *Alloca = dyn_cast<AllocaInst>(GetUnderlyingObject(Ptr)); if (Alloca) { @@ -116,8 +124,34 @@ void AMDGPUTTI::getUnrollingPreferences(Loop *L, // // Don't use the maximum allowed value here as it will make some // programs way too big. - UP.Threshold = 500; + UP.Threshold = 800; } } } } + +AMDGPUTTI::PopcntSupportKind +AMDGPUTTI::getPopcntSupport(unsigned TyWidth) const { + assert(isPowerOf2_32(TyWidth) && "Ty width must be power of 2"); + return ST->hasBCNT(TyWidth) ? PSK_FastHardware : PSK_Software; +} + +unsigned AMDGPUTTI::getNumberOfRegisters(bool Vec) const { + if (Vec) + return 0; + + // Number of VGPRs on SI. + if (ST->getGeneration() >= AMDGPUSubtarget::SOUTHERN_ISLANDS) + return 256; + + return 4 * 128; // XXX - 4 channels. Should these count as vector instead? +} + +unsigned AMDGPUTTI::getRegisterBitWidth(bool) const { + return 32; +} + +unsigned AMDGPUTTI::getMaxInterleaveFactor() const { + // Semi-arbitrary large amount. + return 64; +} |