diff options
author | Hongbin Zheng <etherzhhb@gmail.com> | 2012-04-05 15:46:55 +0000 |
---|---|---|
committer | Hongbin Zheng <etherzhhb@gmail.com> | 2012-04-05 15:46:55 +0000 |
commit | bef377b7d7ce31edb40c87f8786d1b7bb6cdd6b1 (patch) | |
tree | a730cfaadaf3916d0e0d3ee6ffa45489887e71de /lib/Transforms | |
parent | 17dcaf5ef9e761bd3b516a3f4cb85a9fdcb5975e (diff) | |
download | external_llvm-bef377b7d7ce31edb40c87f8786d1b7bb6cdd6b1.zip external_llvm-bef377b7d7ce31edb40c87f8786d1b7bb6cdd6b1.tar.gz external_llvm-bef377b7d7ce31edb40c87f8786d1b7bb6cdd6b1.tar.bz2 |
Introduce the VectorizeConfig class, with which we can control the behavior
of the BBVectorizePass without using command line option. As pointed out
by Hal, we can ask the TargetLoweringInfo for the architecture specific
VectorizeConfig to perform vectorizing with architecture specific
information.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@154096 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'lib/Transforms')
-rw-r--r-- | lib/Transforms/Vectorize/BBVectorize.cpp | 92 |
1 files changed, 60 insertions, 32 deletions
diff --git a/lib/Transforms/Vectorize/BBVectorize.cpp b/lib/Transforms/Vectorize/BBVectorize.cpp index 5abb242..7d5bb31 100644 --- a/lib/Transforms/Vectorize/BBVectorize.cpp +++ b/lib/Transforms/Vectorize/BBVectorize.cpp @@ -140,11 +140,16 @@ STATISTIC(NumFusedOps, "Number of operations fused by bb-vectorize"); namespace { struct BBVectorize : public BasicBlockPass { static char ID; // Pass identification, replacement for typeid - BBVectorize() : BasicBlockPass(ID) { + + VectorizeConfig Config; + + BBVectorize(const VectorizeConfig &C = VectorizeConfig()) + : BasicBlockPass(ID), Config(C) { initializeBBVectorizePass(*PassRegistry::getPassRegistry()); } - BBVectorize(Pass *P) : BasicBlockPass(ID) { + BBVectorize(Pass *P, const VectorizeConfig &C) + : BasicBlockPass(ID), Config(C) { AA = &P->getAnalysis<AliasAnalysis>(); SE = &P->getAnalysis<ScalarEvolution>(); TD = P->getAnalysisIfAvailable<TargetData>(); @@ -291,9 +296,10 @@ namespace { // Iterate a sufficient number of times to merge types of size 1 bit, // then 2 bits, then 4, etc. up to half of the target vector width of the // target vector register. - for (unsigned v = 2, n = 1; v <= VectorBits && (!MaxIter || n <= MaxIter); + for (unsigned v = 2, n = 1; + v <= Config.VectorBits && (!Config.MaxIter || n <= Config.MaxIter); v *= 2, ++n) { - DEBUG(dbgs() << "BBV: fusing loop #" << n << + DEBUG(dbgs() << "BBV: fusing loop #" << n << " for " << BB.getName() << " in " << BB.getParent()->getName() << "...\n"); if (vectorizePairs(BB)) @@ -343,7 +349,7 @@ namespace { // candidate chains where longer chains are considered to be better. // Note: when this function returns 0, the resulting instructions are // not actually fused. - static inline size_t getDepthFactor(Value *V) { + inline size_t getDepthFactor(Value *V) { // InsertElement and ExtractElement have a depth factor of zero. This is // for two reasons: First, they cannot be usefully fused. Second, because // the pass generates a lot of these, they can confuse the simple metric @@ -357,8 +363,8 @@ namespace { // Give a load or store half of the required depth so that load/store // pairs will vectorize. - if (!NoMemOpBoost && (isa<LoadInst>(V) || isa<StoreInst>(V))) - return ReqChainDepth/2; + if (!Config.NoMemOpBoost && (isa<LoadInst>(V) || isa<StoreInst>(V))) + return Config.ReqChainDepth/2; return 1; } @@ -431,9 +437,9 @@ namespace { case Intrinsic::exp: case Intrinsic::exp2: case Intrinsic::pow: - return !NoMath; + return !Config.NoMath; case Intrinsic::fma: - return !NoFMA; + return !Config.NoFMA; } } @@ -527,16 +533,16 @@ namespace { } else if (LoadInst *L = dyn_cast<LoadInst>(I)) { // Vectorize simple loads if possbile: IsSimpleLoadStore = L->isSimple(); - if (!IsSimpleLoadStore || NoMemOps) + if (!IsSimpleLoadStore || Config.NoMemOps) return false; } else if (StoreInst *S = dyn_cast<StoreInst>(I)) { // Vectorize simple stores if possbile: IsSimpleLoadStore = S->isSimple(); - if (!IsSimpleLoadStore || NoMemOps) + if (!IsSimpleLoadStore || Config.NoMemOps) return false; } else if (CastInst *C = dyn_cast<CastInst>(I)) { // We can vectorize casts, but not casts of pointer types, etc. - if (NoCasts) + if (Config.NoCasts) return false; Type *SrcTy = C->getSrcTy(); @@ -576,14 +582,14 @@ namespace { !(VectorType::isValidElementType(T2) || T2->isVectorTy())) return false; - if (NoInts && (T1->isIntOrIntVectorTy() || T2->isIntOrIntVectorTy())) + if (Config.NoInts && (T1->isIntOrIntVectorTy() || T2->isIntOrIntVectorTy())) return false; - if (NoFloats && (T1->isFPOrFPVectorTy() || T2->isFPOrFPVectorTy())) + if (Config.NoFloats && (T1->isFPOrFPVectorTy() || T2->isFPOrFPVectorTy())) return false; - if (T1->getPrimitiveSizeInBits() > VectorBits/2 || - T2->getPrimitiveSizeInBits() > VectorBits/2) + if (T1->getPrimitiveSizeInBits() > Config.VectorBits/2 || + T2->getPrimitiveSizeInBits() > Config.VectorBits/2) return false; return true; @@ -611,7 +617,7 @@ namespace { LI->isVolatile() != LJ->isVolatile() || LI->getOrdering() != LJ->getOrdering() || LI->getSynchScope() != LJ->getSynchScope()) - return false; + return false; } else if ((SI = dyn_cast<StoreInst>(I)) && (SJ = dyn_cast<StoreInst>(J))) { if (SI->getValueOperand()->getType() != SJ->getValueOperand()->getType() || @@ -632,7 +638,7 @@ namespace { int64_t OffsetInElmts = 0; if (getPairPtrInfo(I, J, IPtr, JPtr, IAlignment, JAlignment, OffsetInElmts) && abs64(OffsetInElmts) == 1) { - if (AlignedOnly) { + if (Config.AlignedOnly) { Type *aType = isa<StoreInst>(I) ? cast<StoreInst>(I)->getValueOperand()->getType() : I->getType(); // An aligned load or store is possible only if the instruction @@ -753,12 +759,12 @@ namespace { AliasSetTracker WriteSet(*AA); bool JAfterStart = IAfterStart; BasicBlock::iterator J = llvm::next(I); - for (unsigned ss = 0; J != E && ss <= SearchLimit; ++J, ++ss) { + for (unsigned ss = 0; J != E && ss <= Config.SearchLimit; ++J, ++ss) { if (J == Start) JAfterStart = true; // Determine if J uses I, if so, exit the loop. - bool UsesI = trackUsesOfI(Users, WriteSet, I, J, !FastDep); - if (FastDep) { + bool UsesI = trackUsesOfI(Users, WriteSet, I, J, !Config.FastDep); + if (Config.FastDep) { // Note: For this heuristic to be effective, independent operations // must tend to be intermixed. This is likely to be true from some // kinds of grouped loop unrolling (but not the generic LLVM pass), @@ -796,7 +802,7 @@ namespace { // If we have already found too many pairs, break here and this function // will be called again starting after the last instruction selected // during this invocation. - if (PairableInsts.size() >= MaxInsts) { + if (PairableInsts.size() >= Config.MaxInsts) { ShouldContinue = true; break; } @@ -841,7 +847,7 @@ namespace { ConnectedPairs.insert(VPPair(P, ValuePair(*J, *I))); } - if (SplatBreaksChain) continue; + if (Config.SplatBreaksChain) continue; // Look for cases where just the first value in the pair is used by // both members of another pair (splatting). for (Value::use_iterator J = P.first->use_begin(); J != E; ++J) { @@ -850,7 +856,7 @@ namespace { } } - if (SplatBreaksChain) return; + if (Config.SplatBreaksChain) return; // Look for cases where just the second value in the pair is used by // both members of another pair (splatting). for (Value::use_iterator I = P.second->use_begin(), @@ -1280,7 +1286,7 @@ namespace { << *J->first << " <-> " << *J->second << "} of depth " << MaxDepth << " and size " << PrunedTree.size() << " (effective size: " << EffSize << ")\n"); - if (MaxDepth >= ReqChainDepth && EffSize > BestEffSize) { + if (MaxDepth >= Config.ReqChainDepth && EffSize > BestEffSize) { BestMaxDepth = MaxDepth; BestEffSize = EffSize; BestTree = PrunedTree; @@ -1296,7 +1302,8 @@ namespace { std::multimap<ValuePair, ValuePair> &ConnectedPairs, DenseSet<ValuePair> &PairableInstUsers, DenseMap<Value *, Value *>& ChosenPairs) { - bool UseCycleCheck = CandidatePairs.size() <= MaxCandPairsForCycleCheck; + bool UseCycleCheck = + CandidatePairs.size() <= Config.MaxCandPairsForCycleCheck; std::multimap<ValuePair, ValuePair> PairableInstUserMap; for (std::vector<Value *>::iterator I = PairableInsts.begin(), E = PairableInsts.end(); I != E; ++I) { @@ -1547,11 +1554,11 @@ namespace { unsigned IID = F->getIntrinsicID(); if (o == NumOperands-1) { BasicBlock &BB = *I->getParent(); - + Module *M = BB.getParent()->getParent(); Type *ArgType = I->getType(); Type *VArgType = getVecTypeForPair(ArgType); - + // FIXME: is it safe to do this here? ReplacedOperands[o] = Intrinsic::getDeclaration(M, (Intrinsic::ID) IID, VArgType); @@ -1867,11 +1874,32 @@ INITIALIZE_AG_DEPENDENCY(AliasAnalysis) INITIALIZE_PASS_DEPENDENCY(ScalarEvolution) INITIALIZE_PASS_END(BBVectorize, BBV_NAME, bb_vectorize_name, false, false) -BasicBlockPass *llvm::createBBVectorizePass() { - return new BBVectorize(); +BasicBlockPass *llvm::createBBVectorizePass(const VectorizeConfig &C) { + return new BBVectorize(C); } -bool llvm::vectorizeBasicBlock(Pass *P, BasicBlock &BB) { - BBVectorize BBVectorizer(P); +bool +llvm::vectorizeBasicBlock(Pass *P, BasicBlock &BB, const VectorizeConfig &C) { + BBVectorize BBVectorizer(P, C); return BBVectorizer.vectorizeBB(BB); } + +//===----------------------------------------------------------------------===// +VectorizeConfig::VectorizeConfig() { + VectorBits = ::VectorBits; + NoInts = ::NoInts; + NoFloats = ::NoFloats; + NoCasts = ::NoCasts; + NoMath = ::NoMath; + NoFMA = ::NoFMA; + NoMemOps = ::NoMemOps; + AlignedOnly = ::AlignedOnly; + ReqChainDepth= ::ReqChainDepth; + SearchLimit = ::SearchLimit; + MaxCandPairsForCycleCheck = ::MaxCandPairsForCycleCheck; + SplatBreaksChain = ::SplatBreaksChain; + MaxInsts = ::MaxInsts; + MaxIter = ::MaxIter; + NoMemOpBoost = ::NoMemOpBoost; + FastDep = ::FastDep; +} |