diff options
Diffstat (limited to 'lib/Transforms/Vectorize/BBVectorize.cpp')
-rw-r--r-- | lib/Transforms/Vectorize/BBVectorize.cpp | 213 |
1 files changed, 170 insertions, 43 deletions
diff --git a/lib/Transforms/Vectorize/BBVectorize.cpp b/lib/Transforms/Vectorize/BBVectorize.cpp index 32eec79..9d62306 100644 --- a/lib/Transforms/Vectorize/BBVectorize.cpp +++ b/lib/Transforms/Vectorize/BBVectorize.cpp @@ -84,6 +84,10 @@ NoFloats("bb-vectorize-no-floats", cl::init(false), cl::Hidden, cl::desc("Don't try to vectorize floating-point values")); static cl::opt<bool> +NoPointers("bb-vectorize-no-pointers", cl::init(false), cl::Hidden, + cl::desc("Don't try to vectorize pointer values")); + +static cl::opt<bool> NoCasts("bb-vectorize-no-casts", cl::init(false), cl::Hidden, cl::desc("Don't try to vectorize casting (conversion) operations")); @@ -96,6 +100,14 @@ NoFMA("bb-vectorize-no-fma", cl::init(false), cl::Hidden, cl::desc("Don't try to vectorize the fused-multiply-add intrinsic")); static cl::opt<bool> +NoSelect("bb-vectorize-no-select", cl::init(false), cl::Hidden, + cl::desc("Don't try to vectorize select instructions")); + +static cl::opt<bool> +NoGEP("bb-vectorize-no-gep", cl::init(false), cl::Hidden, + cl::desc("Don't try to vectorize getelementptr instructions")); + +static cl::opt<bool> NoMemOps("bb-vectorize-no-mem-ops", cl::init(false), cl::Hidden, cl::desc("Don't try to vectorize loads and stores")); @@ -140,10 +152,21 @@ STATISTIC(NumFusedOps, "Number of operations fused by bb-vectorize"); namespace { struct BBVectorize : public BasicBlockPass { static char ID; // Pass identification, replacement for typeid - BBVectorize() : BasicBlockPass(ID) { + + const VectorizeConfig Config; + + BBVectorize(const VectorizeConfig &C = VectorizeConfig()) + : BasicBlockPass(ID), Config(C) { initializeBBVectorizePass(*PassRegistry::getPassRegistry()); } + BBVectorize(Pass *P, const VectorizeConfig &C) + : BasicBlockPass(ID), Config(C) { + AA = &P->getAnalysis<AliasAnalysis>(); + SE = &P->getAnalysis<ScalarEvolution>(); + TD = P->getAnalysisIfAvailable<TargetData>(); + } + typedef std::pair<Value *, Value *> ValuePair; typedef std::pair<ValuePair, size_t> ValuePairWithDepth; typedef std::pair<ValuePair, ValuePair> VPPair; // A ValuePair pair @@ -280,18 +303,15 @@ namespace { Instruction *&InsertionPt, Instruction *I, Instruction *J); - virtual bool runOnBasicBlock(BasicBlock &BB) { - AA = &getAnalysis<AliasAnalysis>(); - SE = &getAnalysis<ScalarEvolution>(); - TD = getAnalysisIfAvailable<TargetData>(); - + bool vectorizeBB(BasicBlock &BB) { bool changed = false; // Iterate a sufficient number of times to merge types of size 1 bit, // then 2 bits, then 4, etc. up to half of the target vector width of the // target vector register. - for (unsigned v = 2, n = 1; v <= VectorBits && (!MaxIter || n <= MaxIter); + for (unsigned v = 2, n = 1; + v <= Config.VectorBits && (!Config.MaxIter || n <= Config.MaxIter); v *= 2, ++n) { - DEBUG(dbgs() << "BBV: fusing loop #" << n << + DEBUG(dbgs() << "BBV: fusing loop #" << n << " for " << BB.getName() << " in " << BB.getParent()->getName() << "...\n"); if (vectorizePairs(BB)) @@ -304,6 +324,14 @@ namespace { return changed; } + virtual bool runOnBasicBlock(BasicBlock &BB) { + AA = &getAnalysis<AliasAnalysis>(); + SE = &getAnalysis<ScalarEvolution>(); + TD = getAnalysisIfAvailable<TargetData>(); + + return vectorizeBB(BB); + } + virtual void getAnalysisUsage(AnalysisUsage &AU) const { BasicBlockPass::getAnalysisUsage(AU); AU.addRequired<AliasAnalysis>(); @@ -333,7 +361,7 @@ namespace { // candidate chains where longer chains are considered to be better. // Note: when this function returns 0, the resulting instructions are // not actually fused. - static inline size_t getDepthFactor(Value *V) { + inline size_t getDepthFactor(Value *V) { // InsertElement and ExtractElement have a depth factor of zero. This is // for two reasons: First, they cannot be usefully fused. Second, because // the pass generates a lot of these, they can confuse the simple metric @@ -347,8 +375,8 @@ namespace { // Give a load or store half of the required depth so that load/store // pairs will vectorize. - if (!NoMemOpBoost && (isa<LoadInst>(V) || isa<StoreInst>(V))) - return ReqChainDepth/2; + if (!Config.NoMemOpBoost && (isa<LoadInst>(V) || isa<StoreInst>(V))) + return Config.ReqChainDepth/2; return 1; } @@ -421,9 +449,9 @@ namespace { case Intrinsic::exp: case Intrinsic::exp2: case Intrinsic::pow: - return !NoMath; + return Config.VectorizeMath; case Intrinsic::fma: - return !NoFMA; + return Config.VectorizeFMA; } } @@ -517,24 +545,34 @@ namespace { } else if (LoadInst *L = dyn_cast<LoadInst>(I)) { // Vectorize simple loads if possbile: IsSimpleLoadStore = L->isSimple(); - if (!IsSimpleLoadStore || NoMemOps) + if (!IsSimpleLoadStore || !Config.VectorizeMemOps) return false; } else if (StoreInst *S = dyn_cast<StoreInst>(I)) { // Vectorize simple stores if possbile: IsSimpleLoadStore = S->isSimple(); - if (!IsSimpleLoadStore || NoMemOps) + if (!IsSimpleLoadStore || !Config.VectorizeMemOps) return false; } else if (CastInst *C = dyn_cast<CastInst>(I)) { // We can vectorize casts, but not casts of pointer types, etc. - if (NoCasts) + if (!Config.VectorizeCasts) return false; Type *SrcTy = C->getSrcTy(); - if (!SrcTy->isSingleValueType() || SrcTy->isPointerTy()) + if (!SrcTy->isSingleValueType()) return false; Type *DestTy = C->getDestTy(); - if (!DestTy->isSingleValueType() || DestTy->isPointerTy()) + if (!DestTy->isSingleValueType()) + return false; + } else if (isa<SelectInst>(I)) { + if (!Config.VectorizeSelect) + return false; + } else if (GetElementPtrInst *G = dyn_cast<GetElementPtrInst>(I)) { + if (!Config.VectorizeGEP) + return false; + + // Currently, vector GEPs exist only with one index. + if (G->getNumIndices() != 1) return false; } else if (!(I->isBinaryOp() || isa<ShuffleVectorInst>(I) || isa<ExtractElementInst>(I) || isa<InsertElementInst>(I))) { @@ -566,14 +604,21 @@ namespace { !(VectorType::isValidElementType(T2) || T2->isVectorTy())) return false; - if (NoInts && (T1->isIntOrIntVectorTy() || T2->isIntOrIntVectorTy())) + if (!Config.VectorizeInts + && (T1->isIntOrIntVectorTy() || T2->isIntOrIntVectorTy())) + return false; + + if (!Config.VectorizeFloats + && (T1->isFPOrFPVectorTy() || T2->isFPOrFPVectorTy())) return false; - if (NoFloats && (T1->isFPOrFPVectorTy() || T2->isFPOrFPVectorTy())) + if ((!Config.VectorizePointers || TD == 0) && + (T1->getScalarType()->isPointerTy() || + T2->getScalarType()->isPointerTy())) return false; - if (T1->getPrimitiveSizeInBits() > VectorBits/2 || - T2->getPrimitiveSizeInBits() > VectorBits/2) + if (T1->getPrimitiveSizeInBits() > Config.VectorBits/2 || + T2->getPrimitiveSizeInBits() > Config.VectorBits/2) return false; return true; @@ -601,7 +646,7 @@ namespace { LI->isVolatile() != LJ->isVolatile() || LI->getOrdering() != LJ->getOrdering() || LI->getSynchScope() != LJ->getSynchScope()) - return false; + return false; } else if ((SI = dyn_cast<StoreInst>(I)) && (SJ = dyn_cast<StoreInst>(J))) { if (SI->getValueOperand()->getType() != SJ->getValueOperand()->getType() || @@ -622,7 +667,7 @@ namespace { int64_t OffsetInElmts = 0; if (getPairPtrInfo(I, J, IPtr, JPtr, IAlignment, JAlignment, OffsetInElmts) && abs64(OffsetInElmts) == 1) { - if (AlignedOnly) { + if (Config.AlignedOnly) { Type *aType = isa<StoreInst>(I) ? cast<StoreInst>(I)->getValueOperand()->getType() : I->getType(); // An aligned load or store is possible only if the instruction @@ -647,6 +692,20 @@ namespace { // FIXME: We may want to vectorize non-constant shuffles also. } + // The powi intrinsic is special because only the first argument is + // vectorized, the second arguments must be equal. + CallInst *CI = dyn_cast<CallInst>(I); + Function *FI; + if (CI && (FI = CI->getCalledFunction()) && + FI->getIntrinsicID() == Intrinsic::powi) { + + Value *A1I = CI->getArgOperand(1), + *A1J = cast<CallInst>(J)->getArgOperand(1); + const SCEV *A1ISCEV = SE->getSCEV(A1I), + *A1JSCEV = SE->getSCEV(A1J); + return (A1ISCEV == A1JSCEV); + } + return true; } @@ -729,12 +788,12 @@ namespace { AliasSetTracker WriteSet(*AA); bool JAfterStart = IAfterStart; BasicBlock::iterator J = llvm::next(I); - for (unsigned ss = 0; J != E && ss <= SearchLimit; ++J, ++ss) { + for (unsigned ss = 0; J != E && ss <= Config.SearchLimit; ++J, ++ss) { if (J == Start) JAfterStart = true; // Determine if J uses I, if so, exit the loop. - bool UsesI = trackUsesOfI(Users, WriteSet, I, J, !FastDep); - if (FastDep) { + bool UsesI = trackUsesOfI(Users, WriteSet, I, J, !Config.FastDep); + if (Config.FastDep) { // Note: For this heuristic to be effective, independent operations // must tend to be intermixed. This is likely to be true from some // kinds of grouped loop unrolling (but not the generic LLVM pass), @@ -772,7 +831,7 @@ namespace { // If we have already found too many pairs, break here and this function // will be called again starting after the last instruction selected // during this invocation. - if (PairableInsts.size() >= MaxInsts) { + if (PairableInsts.size() >= Config.MaxInsts) { ShouldContinue = true; break; } @@ -796,16 +855,33 @@ namespace { std::vector<Value *> &PairableInsts, std::multimap<ValuePair, ValuePair> &ConnectedPairs, ValuePair P) { + StoreInst *SI, *SJ; + // For each possible pairing for this variable, look at the uses of // the first value... for (Value::use_iterator I = P.first->use_begin(), E = P.first->use_end(); I != E; ++I) { + if (isa<LoadInst>(*I)) { + // A pair cannot be connected to a load because the load only takes one + // operand (the address) and it is a scalar even after vectorization. + continue; + } else if ((SI = dyn_cast<StoreInst>(*I)) && + P.first == SI->getPointerOperand()) { + // Similarly, a pair cannot be connected to a store through its + // pointer operand. + continue; + } + VPIteratorPair IPairRange = CandidatePairs.equal_range(*I); // For each use of the first variable, look for uses of the second // variable... for (Value::use_iterator J = P.second->use_begin(), E2 = P.second->use_end(); J != E2; ++J) { + if ((SJ = dyn_cast<StoreInst>(*J)) && + P.second == SJ->getPointerOperand()) + continue; + VPIteratorPair JPairRange = CandidatePairs.equal_range(*J); // Look for <I, J>: @@ -817,23 +893,37 @@ namespace { ConnectedPairs.insert(VPPair(P, ValuePair(*J, *I))); } - if (SplatBreaksChain) continue; + if (Config.SplatBreaksChain) continue; // Look for cases where just the first value in the pair is used by // both members of another pair (splatting). for (Value::use_iterator J = P.first->use_begin(); J != E; ++J) { + if ((SJ = dyn_cast<StoreInst>(*J)) && + P.first == SJ->getPointerOperand()) + continue; + if (isSecondInIteratorPair<Value*>(*J, IPairRange)) ConnectedPairs.insert(VPPair(P, ValuePair(*I, *J))); } } - if (SplatBreaksChain) return; + if (Config.SplatBreaksChain) return; // Look for cases where just the second value in the pair is used by // both members of another pair (splatting). for (Value::use_iterator I = P.second->use_begin(), E = P.second->use_end(); I != E; ++I) { + if (isa<LoadInst>(*I)) + continue; + else if ((SI = dyn_cast<StoreInst>(*I)) && + P.second == SI->getPointerOperand()) + continue; + VPIteratorPair IPairRange = CandidatePairs.equal_range(*I); for (Value::use_iterator J = P.second->use_begin(); J != E; ++J) { + if ((SJ = dyn_cast<StoreInst>(*J)) && + P.second == SJ->getPointerOperand()) + continue; + if (isSecondInIteratorPair<Value*>(*J, IPairRange)) ConnectedPairs.insert(VPPair(P, ValuePair(*I, *J))); } @@ -1256,7 +1346,7 @@ namespace { << *J->first << " <-> " << *J->second << "} of depth " << MaxDepth << " and size " << PrunedTree.size() << " (effective size: " << EffSize << ")\n"); - if (MaxDepth >= ReqChainDepth && EffSize > BestEffSize) { + if (MaxDepth >= Config.ReqChainDepth && EffSize > BestEffSize) { BestMaxDepth = MaxDepth; BestEffSize = EffSize; BestTree = PrunedTree; @@ -1272,7 +1362,8 @@ namespace { std::multimap<ValuePair, ValuePair> &ConnectedPairs, DenseSet<ValuePair> &PairableInstUsers, DenseMap<Value *, Value *>& ChosenPairs) { - bool UseCycleCheck = CandidatePairs.size() <= MaxCandPairsForCycleCheck; + bool UseCycleCheck = + CandidatePairs.size() <= Config.MaxCandPairsForCycleCheck; std::multimap<ValuePair, ValuePair> PairableInstUserMap; for (std::vector<Value *>::iterator I = PairableInsts.begin(), E = PairableInsts.end(); I != E; ++I) { @@ -1518,19 +1609,27 @@ namespace { ReplacedOperands[o] = getReplacementPointerInput(Context, I, J, o, FlipMemInputs); continue; - } else if (isa<CallInst>(I) && o == NumOperands-1) { + } else if (isa<CallInst>(I)) { Function *F = cast<CallInst>(I)->getCalledFunction(); unsigned IID = F->getIntrinsicID(); - BasicBlock &BB = *I->getParent(); + if (o == NumOperands-1) { + BasicBlock &BB = *I->getParent(); - Module *M = BB.getParent()->getParent(); - Type *ArgType = I->getType(); - Type *VArgType = getVecTypeForPair(ArgType); + Module *M = BB.getParent()->getParent(); + Type *ArgType = I->getType(); + Type *VArgType = getVecTypeForPair(ArgType); - // FIXME: is it safe to do this here? - ReplacedOperands[o] = Intrinsic::getDeclaration(M, - (Intrinsic::ID) IID, VArgType); - continue; + // FIXME: is it safe to do this here? + ReplacedOperands[o] = Intrinsic::getDeclaration(M, + (Intrinsic::ID) IID, VArgType); + continue; + } else if (IID == Intrinsic::powi && o == 1) { + // The second argument of powi is a single integer and we've already + // checked that both arguments are equal. As a result, we just keep + // I's second argument. + ReplacedOperands[o] = I->getOperand(o); + continue; + } } else if (isa<ShuffleVectorInst>(I) && o == NumOperands-1) { ReplacedOperands[o] = getReplacementShuffleMask(Context, I, J); continue; @@ -1835,7 +1934,35 @@ INITIALIZE_AG_DEPENDENCY(AliasAnalysis) INITIALIZE_PASS_DEPENDENCY(ScalarEvolution) INITIALIZE_PASS_END(BBVectorize, BBV_NAME, bb_vectorize_name, false, false) -BasicBlockPass *llvm::createBBVectorizePass() { - return new BBVectorize(); +BasicBlockPass *llvm::createBBVectorizePass(const VectorizeConfig &C) { + return new BBVectorize(C); +} + +bool +llvm::vectorizeBasicBlock(Pass *P, BasicBlock &BB, const VectorizeConfig &C) { + BBVectorize BBVectorizer(P, C); + return BBVectorizer.vectorizeBB(BB); } +//===----------------------------------------------------------------------===// +VectorizeConfig::VectorizeConfig() { + VectorBits = ::VectorBits; + VectorizeInts = !::NoInts; + VectorizeFloats = !::NoFloats; + VectorizePointers = !::NoPointers; + VectorizeCasts = !::NoCasts; + VectorizeMath = !::NoMath; + VectorizeFMA = !::NoFMA; + VectorizeSelect = !::NoSelect; + VectorizeGEP = !::NoGEP; + VectorizeMemOps = !::NoMemOps; + AlignedOnly = ::AlignedOnly; + ReqChainDepth= ::ReqChainDepth; + SearchLimit = ::SearchLimit; + MaxCandPairsForCycleCheck = ::MaxCandPairsForCycleCheck; + SplatBreaksChain = ::SplatBreaksChain; + MaxInsts = ::MaxInsts; + MaxIter = ::MaxIter; + NoMemOpBoost = ::NoMemOpBoost; + FastDep = ::FastDep; +} |