diff options
Diffstat (limited to 'lib/Transforms/Vectorize/SLPVectorizer.cpp')
-rw-r--r-- | lib/Transforms/Vectorize/SLPVectorizer.cpp | 257 |
1 files changed, 156 insertions, 101 deletions
diff --git a/lib/Transforms/Vectorize/SLPVectorizer.cpp b/lib/Transforms/Vectorize/SLPVectorizer.cpp index ee32227..e13ba95 100644 --- a/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -15,9 +15,6 @@ // "Loop-Aware SLP in GCC" by Ira Rosen, Dorit Nuzman, Ayal Zaks. // //===----------------------------------------------------------------------===// -#define SV_NAME "slp-vectorizer" -#define DEBUG_TYPE "SLP" - #include "llvm/Transforms/Vectorize.h" #include "llvm/ADT/MapVector.h" #include "llvm/ADT/PostOrderIterator.h" @@ -34,6 +31,7 @@ #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Module.h" +#include "llvm/IR/NoFolder.h" #include "llvm/IR/Type.h" #include "llvm/IR/Value.h" #include "llvm/IR/Verifier.h" @@ -41,11 +39,15 @@ #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/Transforms/Utils/VectorUtils.h" #include <algorithm> #include <map> using namespace llvm; +#define SV_NAME "slp-vectorizer" +#define DEBUG_TYPE "SLP" + static cl::opt<int> SLPCostThreshold("slp-threshold", cl::init(0), cl::Hidden, cl::desc("Only vectorize if you gain more than this " @@ -72,8 +74,6 @@ struct BlockNumbering { BlockNumbering(BasicBlock *Bb) : BB(Bb), Valid(false) {} - BlockNumbering() : BB(0), Valid(false) {} - void numberInstructions() { unsigned Loc = 0; InstrIdx.clear(); @@ -120,15 +120,15 @@ private: static BasicBlock *getSameBlock(ArrayRef<Value *> VL) { Instruction *I0 = dyn_cast<Instruction>(VL[0]); if (!I0) - return 0; + return nullptr; BasicBlock *BB = I0->getParent(); for (int i = 1, e = VL.size(); i < e; i++) { Instruction *I = dyn_cast<Instruction>(VL[i]); if (!I) - return 0; + return nullptr; if (BB != I->getParent()) - return 0; + return nullptr; } return BB; } @@ -180,7 +180,7 @@ static Instruction *propagateMetadata(Instruction *I, ArrayRef<Value *> VL) { switch (Kind) { default: - MD = 0; // Remove unknown metadata + MD = nullptr; // Remove unknown metadata break; case LLVMContext::MD_tbaa: MD = MDNode::getMostGenericTBAA(MD, IMD); @@ -201,7 +201,7 @@ static Type* getSameType(ArrayRef<Value *> VL) { Type *Ty = VL[0]->getType(); for (int i = 1, e = VL.size(); i < e; i++) if (VL[i]->getType() != Ty) - return 0; + return nullptr; return Ty; } @@ -345,17 +345,10 @@ public: typedef SmallVector<StoreInst *, 8> StoreList; BoUpSLP(Function *Func, ScalarEvolution *Se, const DataLayout *Dl, - TargetTransformInfo *Tti, AliasAnalysis *Aa, LoopInfo *Li, - DominatorTree *Dt) : - F(Func), SE(Se), DL(Dl), TTI(Tti), AA(Aa), LI(Li), DT(Dt), - Builder(Se->getContext()) { - // Setup the block numbering utility for all of the blocks in the - // function. - for (Function::iterator it = F->begin(), e = F->end(); it != e; ++it) { - BasicBlock *BB = it; - BlocksNumbers[BB] = BlockNumbering(BB); - } - } + TargetTransformInfo *Tti, TargetLibraryInfo *TLi, AliasAnalysis *Aa, + LoopInfo *Li, DominatorTree *Dt) + : F(Func), SE(Se), DL(Dl), TTI(Tti), TLI(TLi), AA(Aa), LI(Li), DT(Dt), + Builder(Se->getContext()) {} /// \brief Vectorize the tree that starts with the elements in \p VL. /// Returns the vectorized root. @@ -365,13 +358,13 @@ public: /// A negative number means that this is profitable. int getTreeCost(); - /// Construct a vectorizable tree that starts at \p Roots and is possibly - /// used by a reduction of \p RdxOps. - void buildTree(ArrayRef<Value *> Roots, ValueSet *RdxOps = 0); + /// Construct a vectorizable tree that starts at \p Roots, ignoring users for + /// the purpose of scheduling and extraction in the \p UserIgnoreLst. + void buildTree(ArrayRef<Value *> Roots, + ArrayRef<Value *> UserIgnoreLst = None); /// Clear the internal data structures that are created by 'buildTree'. void deleteTree() { - RdxOps = 0; VectorizableTree.clear(); ScalarToTreeEntry.clear(); MustGather.clear(); @@ -446,7 +439,7 @@ private: bool isFullyVectorizableTinyTree(); struct TreeEntry { - TreeEntry() : Scalars(), VectorizedValue(0), LastScalarIndex(0), + TreeEntry() : Scalars(), VectorizedValue(nullptr), LastScalarIndex(0), NeedToGather(0) {} /// \returns true if the scalars in VL are equal to this entry. @@ -527,14 +520,22 @@ private: /// Numbers instructions in different blocks. DenseMap<BasicBlock *, BlockNumbering> BlocksNumbers; - /// Reduction operators. - ValueSet *RdxOps; + /// \brief Get the corresponding instruction numbering list for a given + /// BasicBlock. The list is allocated lazily. + BlockNumbering &getBlockNumbering(BasicBlock *BB) { + auto I = BlocksNumbers.insert(std::make_pair(BB, BlockNumbering(BB))); + return I.first->second; + } + + /// List of users to ignore during scheduling and that don't need extracting. + ArrayRef<Value *> UserIgnoreList; // Analysis and block reference. Function *F; ScalarEvolution *SE; const DataLayout *DL; TargetTransformInfo *TTI; + TargetLibraryInfo *TLI; AliasAnalysis *AA; LoopInfo *LI; DominatorTree *DT; @@ -542,9 +543,10 @@ private: IRBuilder<> Builder; }; -void BoUpSLP::buildTree(ArrayRef<Value *> Roots, ValueSet *Rdx) { +void BoUpSLP::buildTree(ArrayRef<Value *> Roots, + ArrayRef<Value *> UserIgnoreLst) { deleteTree(); - RdxOps = Rdx; + UserIgnoreList = UserIgnoreLst; if (!getSameType(Roots)) return; buildTree_rec(Roots, 0); @@ -576,8 +578,9 @@ void BoUpSLP::buildTree(ArrayRef<Value *> Roots, ValueSet *Rdx) { if (!UserInst) continue; - // Ignore uses that are part of the reduction. - if (Rdx && std::find(Rdx->begin(), Rdx->end(), UserInst) != Rdx->end()) + // Ignore users in the user ignore list. + if (std::find(UserIgnoreList.begin(), UserIgnoreList.end(), UserInst) != + UserIgnoreList.end()) continue; DEBUG(dbgs() << "SLP: Need to extract:" << *U << " from lane " << @@ -708,12 +711,13 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth) { continue; } - // This user is part of the reduction. - if (RdxOps && RdxOps->count(UI)) + // Ignore users in the user ignore list. + if (std::find(UserIgnoreList.begin(), UserIgnoreList.end(), UI) != + UserIgnoreList.end()) continue; // Make sure that we can schedule this unknown user. - BlockNumbering &BN = BlocksNumbers[BB]; + BlockNumbering &BN = getBlockNumbering(BB); int UserIndex = BN.getIndex(UI); if (UserIndex < MyLastIndex) { @@ -948,32 +952,36 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth) { } case Instruction::Call: { // Check if the calls are all to the same vectorizable intrinsic. - IntrinsicInst *II = dyn_cast<IntrinsicInst>(VL[0]); - if (II==NULL) { + CallInst *CI = cast<CallInst>(VL[0]); + // Check if this is an Intrinsic call or something that can be + // represented by an intrinsic call + Intrinsic::ID ID = getIntrinsicIDForCall(CI, TLI); + if (!isTriviallyVectorizable(ID)) { newTreeEntry(VL, false); DEBUG(dbgs() << "SLP: Non-vectorizable call.\n"); return; } - Function *Int = II->getCalledFunction(); + Function *Int = CI->getCalledFunction(); for (unsigned i = 1, e = VL.size(); i != e; ++i) { - IntrinsicInst *II2 = dyn_cast<IntrinsicInst>(VL[i]); - if (!II2 || II2->getCalledFunction() != Int) { + CallInst *CI2 = dyn_cast<CallInst>(VL[i]); + if (!CI2 || CI2->getCalledFunction() != Int || + getIntrinsicIDForCall(CI2, TLI) != ID) { newTreeEntry(VL, false); - DEBUG(dbgs() << "SLP: mismatched calls:" << *II << "!=" << *VL[i] + DEBUG(dbgs() << "SLP: mismatched calls:" << *CI << "!=" << *VL[i] << "\n"); return; } } newTreeEntry(VL, true); - for (unsigned i = 0, e = II->getNumArgOperands(); i != e; ++i) { + for (unsigned i = 0, e = CI->getNumArgOperands(); i != e; ++i) { ValueList Operands; // Prepare the operand vector. for (unsigned j = 0; j < VL.size(); ++j) { - IntrinsicInst *II2 = dyn_cast<IntrinsicInst>(VL[j]); - Operands.push_back(II2->getArgOperand(i)); + CallInst *CI2 = dyn_cast<CallInst>(VL[j]); + Operands.push_back(CI2->getArgOperand(i)); } buildTree_rec(Operands, Depth + 1); } @@ -1090,7 +1098,7 @@ int BoUpSLP::getEntryCost(TreeEntry *E) { // If instead not all operands are constants, then set the operand kind // to OK_AnyValue. If all operands are constants but not the same, // then set the operand kind to OK_NonUniformConstantValue. - ConstantInt *CInt = NULL; + ConstantInt *CInt = nullptr; for (unsigned i = 0; i < VL.size(); ++i) { const Instruction *I = cast<Instruction>(VL[i]); if (!isa<ConstantInt>(I->getOperand(1))) { @@ -1129,12 +1137,11 @@ int BoUpSLP::getEntryCost(TreeEntry *E) { } case Instruction::Call: { CallInst *CI = cast<CallInst>(VL0); - IntrinsicInst *II = cast<IntrinsicInst>(CI); - Intrinsic::ID ID = II->getIntrinsicID(); + Intrinsic::ID ID = getIntrinsicIDForCall(CI, TLI); // Calculate the cost of the scalar and vector calls. SmallVector<Type*, 4> ScalarTys, VecTys; - for (unsigned op = 0, opc = II->getNumArgOperands(); op!= opc; ++op) { + for (unsigned op = 0, opc = CI->getNumArgOperands(); op!= opc; ++op) { ScalarTys.push_back(CI->getArgOperand(op)->getType()); VecTys.push_back(VectorType::get(CI->getArgOperand(op)->getType(), VecTy->getNumElements())); @@ -1147,7 +1154,7 @@ int BoUpSLP::getEntryCost(TreeEntry *E) { DEBUG(dbgs() << "SLP: Call cost "<< VecCallCost - ScalarCallCost << " (" << VecCallCost << "-" << ScalarCallCost << ")" - << " for " << *II << "\n"); + << " for " << *CI << "\n"); return VecCallCost - ScalarCallCost; } @@ -1244,7 +1251,7 @@ Value *BoUpSLP::getPointerOperand(Value *I) { return LI->getPointerOperand(); if (StoreInst *SI = dyn_cast<StoreInst>(I)) return SI->getPointerOperand(); - return 0; + return nullptr; } unsigned BoUpSLP::getAddressSpaceOperand(Value *I) { @@ -1318,13 +1325,13 @@ Value *BoUpSLP::getSinkBarrier(Instruction *Src, Instruction *Dst) { if (!A.Ptr || !B.Ptr || AA->alias(A, B)) return I; } - return 0; + return nullptr; } int BoUpSLP::getLastIndex(ArrayRef<Value *> VL) { BasicBlock *BB = cast<Instruction>(VL[0])->getParent(); - assert(BB == getSameBlock(VL) && BlocksNumbers.count(BB) && "Invalid block"); - BlockNumbering &BN = BlocksNumbers[BB]; + assert(BB == getSameBlock(VL) && "Invalid block"); + BlockNumbering &BN = getBlockNumbering(BB); int MaxIdx = BN.getIndex(BB->getFirstNonPHI()); for (unsigned i = 0, e = VL.size(); i < e; ++i) @@ -1334,8 +1341,8 @@ int BoUpSLP::getLastIndex(ArrayRef<Value *> VL) { Instruction *BoUpSLP::getLastInstruction(ArrayRef<Value *> VL) { BasicBlock *BB = cast<Instruction>(VL[0])->getParent(); - assert(BB == getSameBlock(VL) && BlocksNumbers.count(BB) && "Invalid block"); - BlockNumbering &BN = BlocksNumbers[BB]; + assert(BB == getSameBlock(VL) && "Invalid block"); + BlockNumbering &BN = getBlockNumbering(BB); int MaxIdx = BN.getIndex(cast<Instruction>(VL[0])); for (unsigned i = 1, e = VL.size(); i < e; ++i) @@ -1394,7 +1401,7 @@ Value *BoUpSLP::alreadyVectorized(ArrayRef<Value *> VL) const { if (En->isSame(VL) && En->VectorizedValue) return En->VectorizedValue; } - return 0; + return nullptr; } Value *BoUpSLP::vectorizeTree(ArrayRef<Value *> VL) { @@ -1615,6 +1622,8 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) { VecTy->getPointerTo(AS)); unsigned Alignment = LI->getAlignment(); LI = Builder.CreateLoad(VecPtr); + if (!Alignment) + Alignment = DL->getABITypeAlignment(LI->getPointerOperand()->getType()); LI->setAlignment(Alignment); E->VectorizedValue = LI; return propagateMetadata(LI, E->Scalars); @@ -1634,13 +1643,14 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) { Value *VecPtr = Builder.CreateBitCast(SI->getPointerOperand(), VecTy->getPointerTo(AS)); StoreInst *S = Builder.CreateStore(VecValue, VecPtr); + if (!Alignment) + Alignment = DL->getABITypeAlignment(SI->getPointerOperand()->getType()); S->setAlignment(Alignment); E->VectorizedValue = S; return propagateMetadata(S, E->Scalars); } case Instruction::Call: { CallInst *CI = cast<CallInst>(VL0); - setInsertPointAfterBundle(E->Scalars); std::vector<Value *> OpVecs; for (int j = 0, e = CI->getNumArgOperands(); j < e; ++j) { @@ -1656,8 +1666,7 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) { } Module *M = F->getParent(); - IntrinsicInst *II = cast<IntrinsicInst>(CI); - Intrinsic::ID ID = II->getIntrinsicID(); + Intrinsic::ID ID = getIntrinsicIDForCall(CI, TLI); Type *Tys[] = { VectorType::get(CI->getType(), E->Scalars.size()) }; Function *CF = Intrinsic::getDeclaration(M, ID, Tys); Value *V = Builder.CreateCall(CF, OpVecs); @@ -1667,7 +1676,7 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) { default: llvm_unreachable("unknown inst"); } - return 0; + return nullptr; } Value *BoUpSLP::vectorizeTree() { @@ -1746,8 +1755,9 @@ Value *BoUpSLP::vectorizeTree() { DEBUG(dbgs() << "SLP: \tvalidating user:" << *U << ".\n"); assert((ScalarToTreeEntry.count(U) || - // It is legal to replace the reduction users by undef. - (RdxOps && RdxOps->count(U))) && + // It is legal to replace users in the ignorelist by undef. + (std::find(UserIgnoreList.begin(), UserIgnoreList.end(), U) != + UserIgnoreList.end())) && "Replacing out-of-tree value with undef"); } #endif @@ -1759,9 +1769,9 @@ Value *BoUpSLP::vectorizeTree() { } } - for (Function::iterator it = F->begin(), e = F->end(); it != e; ++it) { - BlocksNumbers[it].forget(); - } + for (auto &BN : BlocksNumbers) + BN.second.forget(); + Builder.ClearInsertionPoint(); return VectorizableTree[0].VectorizedValue; @@ -1802,11 +1812,19 @@ void BoUpSLP::optimizeGatherSequence() { Insert->moveBefore(PreHeader->getTerminator()); } + // Make a list of all reachable blocks in our CSE queue. + SmallVector<const DomTreeNode *, 8> CSEWorkList; + CSEWorkList.reserve(CSEBlocks.size()); + for (BasicBlock *BB : CSEBlocks) + if (DomTreeNode *N = DT->getNode(BB)) { + assert(DT->isReachableFromEntry(N)); + CSEWorkList.push_back(N); + } + // Sort blocks by domination. This ensures we visit a block after all blocks // dominating it are visited. - SmallVector<BasicBlock *, 8> CSEWorkList(CSEBlocks.begin(), CSEBlocks.end()); std::stable_sort(CSEWorkList.begin(), CSEWorkList.end(), - [this](const BasicBlock *A, const BasicBlock *B) { + [this](const DomTreeNode *A, const DomTreeNode *B) { return DT->properlyDominates(A, B); }); @@ -1814,12 +1832,10 @@ void BoUpSLP::optimizeGatherSequence() { // instructions. TODO: We can further optimize this scan if we split the // instructions into different buckets based on the insert lane. SmallVector<Instruction *, 16> Visited; - for (SmallVectorImpl<BasicBlock *>::iterator I = CSEWorkList.begin(), - E = CSEWorkList.end(); - I != E; ++I) { + for (auto I = CSEWorkList.begin(), E = CSEWorkList.end(); I != E; ++I) { assert((I == CSEWorkList.begin() || !DT->dominates(*I, *std::prev(I))) && "Worklist not sorted properly!"); - BasicBlock *BB = *I; + BasicBlock *BB = (*I)->getBlock(); // For all instructions in blocks containing gather sequences: for (BasicBlock::iterator it = BB->begin(), e = BB->end(); it != e;) { Instruction *In = it++; @@ -1835,7 +1851,7 @@ void BoUpSLP::optimizeGatherSequence() { DT->dominates((*v)->getParent(), In->getParent())) { In->replaceAllUsesWith(*v); In->eraseFromParent(); - In = 0; + In = nullptr; break; } } @@ -1864,6 +1880,7 @@ struct SLPVectorizer : public FunctionPass { ScalarEvolution *SE; const DataLayout *DL; TargetTransformInfo *TTI; + TargetLibraryInfo *TLI; AliasAnalysis *AA; LoopInfo *LI; DominatorTree *DT; @@ -1874,8 +1891,9 @@ struct SLPVectorizer : public FunctionPass { SE = &getAnalysis<ScalarEvolution>(); DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>(); - DL = DLP ? &DLP->getDataLayout() : 0; + DL = DLP ? &DLP->getDataLayout() : nullptr; TTI = &getAnalysis<TargetTransformInfo>(); + TLI = getAnalysisIfAvailable<TargetLibraryInfo>(); AA = &getAnalysis<AliasAnalysis>(); LI = &getAnalysis<LoopInfo>(); DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree(); @@ -1900,8 +1918,8 @@ struct SLPVectorizer : public FunctionPass { DEBUG(dbgs() << "SLP: Analyzing blocks in " << F.getName() << ".\n"); // Use the bottom up slp vectorizer to construct chains that start with - // he store instructions. - BoUpSLP R(&F, SE, DL, TTI, AA, LI, DT); + // store instructions. + BoUpSLP R(&F, SE, DL, TTI, TLI, AA, LI, DT); // Scan the blocks in the function in post order. for (po_iterator<BasicBlock*> it = po_begin(&F.getEntryBlock()), @@ -1951,8 +1969,11 @@ private: bool tryToVectorizePair(Value *A, Value *B, BoUpSLP &R); /// \brief Try to vectorize a list of operands. + /// \@param BuildVector A list of users to ignore for the purpose of + /// scheduling and that don't need extracting. /// \returns true if a value was vectorized. - bool tryToVectorizeList(ArrayRef<Value *> VL, BoUpSLP &R); + bool tryToVectorizeList(ArrayRef<Value *> VL, BoUpSLP &R, + ArrayRef<Value *> BuildVector = None); /// \brief Try to vectorize a chain that may start at the operands of \V; bool tryToVectorize(BinaryOperator *V, BoUpSLP &R); @@ -2106,7 +2127,7 @@ unsigned SLPVectorizer::collectStores(BasicBlock *BB, BoUpSLP &R) { // Check that the pointer points to scalars. Type *Ty = SI->getValueOperand()->getType(); if (Ty->isAggregateType() || Ty->isVectorTy()) - return 0; + continue; // Find the base pointer. Value *Ptr = GetUnderlyingObject(SI->getPointerOperand(), DL); @@ -2125,7 +2146,8 @@ bool SLPVectorizer::tryToVectorizePair(Value *A, Value *B, BoUpSLP &R) { return tryToVectorizeList(VL, R); } -bool SLPVectorizer::tryToVectorizeList(ArrayRef<Value *> VL, BoUpSLP &R) { +bool SLPVectorizer::tryToVectorizeList(ArrayRef<Value *> VL, BoUpSLP &R, + ArrayRef<Value *> BuildVector) { if (VL.size() < 2) return false; @@ -2153,7 +2175,7 @@ bool SLPVectorizer::tryToVectorizeList(ArrayRef<Value *> VL, BoUpSLP &R) { bool Changed = false; - // Keep track of values that were delete by vectorizing in the loop below. + // Keep track of values that were deleted by vectorizing in the loop below. SmallVector<WeakVH, 8> TrackValues(VL.begin(), VL.end()); for (unsigned i = 0, e = VL.size(); i < e; ++i) { @@ -2175,13 +2197,38 @@ bool SLPVectorizer::tryToVectorizeList(ArrayRef<Value *> VL, BoUpSLP &R) { << "\n"); ArrayRef<Value *> Ops = VL.slice(i, OpsWidth); - R.buildTree(Ops); + ArrayRef<Value *> BuildVectorSlice; + if (!BuildVector.empty()) + BuildVectorSlice = BuildVector.slice(i, OpsWidth); + + R.buildTree(Ops, BuildVectorSlice); int Cost = R.getTreeCost(); if (Cost < -SLPCostThreshold) { DEBUG(dbgs() << "SLP: Vectorizing list at cost:" << Cost << ".\n"); - R.vectorizeTree(); - + Value *VectorizedRoot = R.vectorizeTree(); + + // Reconstruct the build vector by extracting the vectorized root. This + // way we handle the case where some elements of the vector are undefined. + // (return (inserelt <4 xi32> (insertelt undef (opd0) 0) (opd1) 2)) + if (!BuildVectorSlice.empty()) { + // The insert point is the last build vector instruction. The vectorized + // root will precede it. This guarantees that we get an instruction. The + // vectorized tree could have been constant folded. + Instruction *InsertAfter = cast<Instruction>(BuildVectorSlice.back()); + unsigned VecIdx = 0; + for (auto &V : BuildVectorSlice) { + IRBuilder<true, NoFolder> Builder( + ++BasicBlock::iterator(InsertAfter)); + InsertElementInst *IE = cast<InsertElementInst>(V); + Instruction *Extract = cast<Instruction>(Builder.CreateExtractElement( + VectorizedRoot, Builder.getInt32(VecIdx++))); + IE->setOperand(1, Extract); + IE->removeFromParent(); + IE->insertAfter(Extract); + InsertAfter = IE; + } + } // Move to the next bundle. i += VF - 1; Changed = true; @@ -2290,7 +2337,7 @@ static Value *createRdxShuffleMask(unsigned VecLen, unsigned NumEltsToRdx, /// *p = /// class HorizontalReduction { - SmallPtrSet<Value *, 16> ReductionOps; + SmallVector<Value *, 16> ReductionOps; SmallVector<Value *, 32> ReducedVals; BinaryOperator *ReductionRoot; @@ -2308,7 +2355,7 @@ class HorizontalReduction { public: HorizontalReduction() - : ReductionRoot(0), ReductionPHI(0), ReductionOpcode(0), + : ReductionRoot(nullptr), ReductionPHI(nullptr), ReductionOpcode(0), ReducedValueOpcode(0), ReduxWidth(0), IsPairwiseReduction(false) {} /// \brief Try to find a reduction tree. @@ -2323,10 +2370,10 @@ public: // In such a case start looking for a tree rooted in the first '+'. if (Phi) { if (B->getOperand(0) == Phi) { - Phi = 0; + Phi = nullptr; B = dyn_cast<BinaryOperator>(B->getOperand(1)); } else if (B->getOperand(1) == Phi) { - Phi = 0; + Phi = nullptr; B = dyn_cast<BinaryOperator>(B->getOperand(0)); } } @@ -2384,7 +2431,7 @@ public: // We need to be able to reassociate the adds. if (!TreeN->isAssociative()) return false; - ReductionOps.insert(TreeN); + ReductionOps.push_back(TreeN); } // Retract. Stack.pop_back(); @@ -2412,7 +2459,7 @@ public: if (NumReducedVals < ReduxWidth) return false; - Value *VectorizedTree = 0; + Value *VectorizedTree = nullptr; IRBuilder<> Builder(ReductionRoot); FastMathFlags Unsafe; Unsafe.setUnsafeAlgebra(); @@ -2421,7 +2468,7 @@ public: for (; i < NumReducedVals - ReduxWidth + 1; i += ReduxWidth) { ArrayRef<Value *> ValsToReduce(&ReducedVals[i], ReduxWidth); - V.buildTree(ValsToReduce, &ReductionOps); + V.buildTree(ValsToReduce, ReductionOps); // Estimate cost. int Cost = V.getTreeCost() + getReductionCost(TTI, ReducedVals[i]); @@ -2455,13 +2502,13 @@ public: } // Update users. if (ReductionPHI) { - assert(ReductionRoot != NULL && "Need a reduction operation"); + assert(ReductionRoot && "Need a reduction operation"); ReductionRoot->setOperand(0, VectorizedTree); ReductionRoot->setOperand(1, ReductionPHI); } else ReductionRoot->replaceAllUsesWith(VectorizedTree); } - return VectorizedTree != 0; + return VectorizedTree != nullptr; } private: @@ -2540,13 +2587,16 @@ private: /// /// Returns true if it matches /// -static bool findBuildVector(InsertElementInst *IE, - SmallVectorImpl<Value *> &Ops) { - if (!isa<UndefValue>(IE->getOperand(0))) +static bool findBuildVector(InsertElementInst *FirstInsertElem, + SmallVectorImpl<Value *> &BuildVector, + SmallVectorImpl<Value *> &BuildVectorOpds) { + if (!isa<UndefValue>(FirstInsertElem->getOperand(0))) return false; + InsertElementInst *IE = FirstInsertElem; while (true) { - Ops.push_back(IE->getOperand(1)); + BuildVector.push_back(IE); + BuildVectorOpds.push_back(IE->getOperand(1)); if (IE->use_empty()) return false; @@ -2641,7 +2691,8 @@ bool SLPVectorizer::vectorizeChainsInBlock(BasicBlock *BB, BoUpSLP &R) { Value *Rdx = (P->getIncomingBlock(0) == BB ? (P->getIncomingValue(0)) - : (P->getIncomingBlock(1) == BB ? P->getIncomingValue(1) : 0)); + : (P->getIncomingBlock(1) == BB ? P->getIncomingValue(1) + : nullptr)); // Check if this is a Binary Operator. BinaryOperator *BI = dyn_cast_or_null<BinaryOperator>(Rdx); if (!BI) @@ -2680,7 +2731,7 @@ bool SLPVectorizer::vectorizeChainsInBlock(BasicBlock *BB, BoUpSLP &R) { if (BinaryOperator *BinOp = dyn_cast<BinaryOperator>(SI->getValueOperand())) { HorizontalReduction HorRdx; - if (((HorRdx.matchAssociativeReduction(0, BinOp, DL) && + if (((HorRdx.matchAssociativeReduction(nullptr, BinOp, DL) && HorRdx.tryToReduce(R, TTI)) || tryToVectorize(BinOp, R))) { Changed = true; @@ -2716,12 +2767,16 @@ bool SLPVectorizer::vectorizeChainsInBlock(BasicBlock *BB, BoUpSLP &R) { } // Try to vectorize trees that start at insertelement instructions. - if (InsertElementInst *IE = dyn_cast<InsertElementInst>(it)) { - SmallVector<Value *, 8> Ops; - if (!findBuildVector(IE, Ops)) + if (InsertElementInst *FirstInsertElem = dyn_cast<InsertElementInst>(it)) { + SmallVector<Value *, 16> BuildVector; + SmallVector<Value *, 16> BuildVectorOpds; + if (!findBuildVector(FirstInsertElem, BuildVector, BuildVectorOpds)) continue; - if (tryToVectorizeList(Ops, R)) { + // Vectorize starting with the build vector operands ignoring the + // BuildVector instructions for the purpose of scheduling and user + // extraction. + if (tryToVectorizeList(BuildVectorOpds, R, BuildVector)) { Changed = true; it = BB->begin(); e = BB->end(); |