aboutsummaryrefslogtreecommitdiffstats
path: root/lib/Transforms/Vectorize/SLPVectorizer.cpp
diff options
context:
space:
mode:
authorArnold Schwaighofer <aschwaighofer@apple.com>2013-09-21 00:06:20 +0000
committerArnold Schwaighofer <aschwaighofer@apple.com>2013-09-21 00:06:20 +0000
commit74d3482f76d1f8a20cedfc6701e017e7fd337cf9 (patch)
treeb316fb5892a85df71c30779e4c3e2522fff5206e /lib/Transforms/Vectorize/SLPVectorizer.cpp
parent9e0b08dd2053843fd330774cdbac06a7b0191f14 (diff)
downloadexternal_llvm-74d3482f76d1f8a20cedfc6701e017e7fd337cf9.zip
external_llvm-74d3482f76d1f8a20cedfc6701e017e7fd337cf9.tar.gz
external_llvm-74d3482f76d1f8a20cedfc6701e017e7fd337cf9.tar.bz2
Revert "SLPVectorizer: Handle more horizontal reductions (disabled)"
This reverts commit r191108. The horizontal.ll test case fails under libgmalloc. Thanks Shuxin for pointing this out to me. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@191121 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'lib/Transforms/Vectorize/SLPVectorizer.cpp')
-rw-r--r--lib/Transforms/Vectorize/SLPVectorizer.cpp376
1 files changed, 8 insertions, 368 deletions
diff --git a/lib/Transforms/Vectorize/SLPVectorizer.cpp b/lib/Transforms/Vectorize/SLPVectorizer.cpp
index caedd09..cd3f723 100644
--- a/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -49,11 +49,6 @@ static cl::opt<int>
SLPCostThreshold("slp-threshold", cl::init(0), cl::Hidden,
cl::desc("Only vectorize if you gain more than this "
"number "));
-
-static cl::opt<bool>
-ShouldVectorizeHor("slp-vectorize-hor", cl::init(false), cl::Hidden,
- cl::desc("Attempt to vectorize horizontal reductions"));
-
namespace {
static const unsigned MinVecRegSize = 128;
@@ -243,21 +238,17 @@ public:
}
/// \brief Vectorize the tree that starts with the elements in \p VL.
- /// Returns the vectorized root and the scalar operations the root was based
- /// on.
- std::pair<Value *, ValueList *> vectorizeTree();
+ void vectorizeTree();
/// \returns the vectorization cost of the subtree that starts at \p VL.
/// A negative number means that this is profitable.
int getTreeCost();
- /// Construct a vectorizable tree that starts at \p Roots and is possibly
- /// used by a reduction of \p RdxOps.
- void buildTree(ArrayRef<Value *> Roots, ValueSet *RdxOps = 0);
+ /// Construct a vectorizable tree that starts at \p Roots.
+ void buildTree(ArrayRef<Value *> Roots);
/// Clear the internal data structures that are created by 'buildTree'.
void deleteTree() {
- RdxOps = 0;
VectorizableTree.clear();
ScalarToTreeEntry.clear();
MustGather.clear();
@@ -410,9 +401,6 @@ private:
/// Numbers instructions in different blocks.
DenseMap<BasicBlock *, BlockNumbering> BlocksNumbers;
- /// Reduction operators.
- ValueSet *RdxOps;
-
// Analysis and block reference.
Function *F;
ScalarEvolution *SE;
@@ -425,9 +413,8 @@ private:
IRBuilder<> Builder;
};
-void BoUpSLP::buildTree(ArrayRef<Value *> Roots, ValueSet *Rdx) {
+void BoUpSLP::buildTree(ArrayRef<Value *> Roots) {
deleteTree();
- RdxOps = Rdx;
if (!getSameType(Roots))
return;
buildTree_rec(Roots, 0);
@@ -458,12 +445,8 @@ void BoUpSLP::buildTree(ArrayRef<Value *> Roots, ValueSet *Rdx) {
assert(!VectorizableTree[Idx].NeedToGather && "Bad state");
continue;
}
- Instruction *UserInst = dyn_cast<Instruction>(*User);
- if (!UserInst)
- continue;
- // Ignore uses that are part of the reduction.
- if (Rdx && std::find(Rdx->begin(), Rdx->end(), UserInst) != Rdx->end())
+ if (!isa<Instruction>(*User))
continue;
DEBUG(dbgs() << "SLP: Need to extract:" << **User << " from lane " <<
@@ -595,10 +578,6 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth) {
continue;
}
- // This user is part of the reduction.
- if (RdxOps && RdxOps->count(User))
- continue;
-
// Make sure that we can schedule this unknown user.
BlockNumbering &BN = BlocksNumbers[BB];
int UserIndex = BN.getIndex(User);
@@ -1393,7 +1372,7 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
return 0;
}
-std::pair<Value *, BoUpSLP::ValueList *> BoUpSLP::vectorizeTree() {
+void BoUpSLP::vectorizeTree() {
Builder.SetInsertPoint(F->getEntryBlock().begin());
vectorizeTree(&VectorizableTree[0]);
@@ -1470,10 +1449,7 @@ std::pair<Value *, BoUpSLP::ValueList *> BoUpSLP::vectorizeTree() {
DEBUG(dbgs() << "SLP: \tvalidating user:" << **User << ".\n");
assert(!MustGather.count(*User) &&
"Replacing gathered value with undef");
-
- assert((ScalarToTreeEntry.count(*User) ||
- // It is legal to replace the reduction users by undef.
- (RdxOps && RdxOps->count(*User))) &&
+ assert(ScalarToTreeEntry.count(*User) &&
"Replacing out-of-tree value with undef");
}
Value *Undef = UndefValue::get(Ty);
@@ -1488,9 +1464,6 @@ std::pair<Value *, BoUpSLP::ValueList *> BoUpSLP::vectorizeTree() {
BlocksNumbers[it].forget();
}
Builder.ClearInsertionPoint();
-
- return std::make_pair(VectorizableTree[0].VectorizedValue,
- &VectorizableTree[0].Scalars);
}
void BoUpSLP::optimizeGatherSequence() {
@@ -1914,310 +1887,6 @@ bool SLPVectorizer::tryToVectorize(BinaryOperator *V, BoUpSLP &R) {
return 0;
}
-/// \brief Generate a shuffle mask to be used in a reduction tree.
-///
-/// \param VecLen The length of the vector to be reduced.
-/// \param NumEltsToRdx The number of elements that should be reduced in the
-/// vector.
-/// \param IsPairwise Whether the reduction is a pairwise or splitting
-/// reduction. A pairwise reduction will generate a mask of
-/// <0,2,...> or <1,3,..> while a splitting reduction will generate
-/// <2,3, undef,undef> for a vector of 4 and NumElts = 2.
-/// \param IsLeft True will generate a mask of even elements, odd otherwise.
-static Value *createRdxShuffleMask(unsigned VecLen, unsigned NumEltsToRdx,
- bool IsPairwise, bool IsLeft,
- IRBuilder<> &Builder) {
- assert((IsPairwise || !IsLeft) && "Don't support a <0,1,undef,...> mask");
-
- SmallVector<Constant *, 32> ShuffleMask(
- VecLen, UndefValue::get(Builder.getInt32Ty()));
-
- if (IsPairwise)
- // Build a mask of 0, 2, ... (left) or 1, 3, ... (right).
- for (unsigned i = 0; i != NumEltsToRdx; ++i)
- ShuffleMask[i] = Builder.getInt32(2 * i + !IsLeft);
- else
- // Move the upper half of the vector to the lower half.
- for (unsigned i = 0; i != NumEltsToRdx; ++i)
- ShuffleMask[i] = Builder.getInt32(NumEltsToRdx + i);
-
- return ConstantVector::get(ShuffleMask);
-}
-
-
-/// Model horizontal reductions.
-///
-/// A horizontal reduction is a tree of reduction operations (currently add and
-/// fadd) that has operations that can be put into a vector as its leaf.
-/// For example, this tree:
-///
-/// mul mul mul mul
-/// \ / \ /
-/// + +
-/// \ /
-/// +
-/// This tree has "mul" as its reduced values and "+" as its reduction
-/// operations. A reduction might be feeding into a store or a binary operation
-/// feeding a phi.
-/// ...
-/// \ /
-/// +
-/// \
-/// phi +=
-///
-/// Or:
-/// ...
-/// \ /
-/// +
-/// \
-/// *p =
-///
-class HorizontalReduction {
- SmallPtrSet<Value *, 16> ReductionOps;
- SmallVector<Value *, 32> ReducedVals;
-
- BinaryOperator *ReductionRoot;
- PHINode *ReductionPHI;
-
- /// The opcode of the reduction.
- unsigned ReductionOpcode;
- /// The opcode of the values we perform a reduction on.
- unsigned ReducedValueOpcode;
- /// The width of one full horizontal reduction operation.
- unsigned ReduxWidth;
- /// Should we model this reduction as a pairwise reduction tree or a tree that
- /// splits the vector in halves and adds those halves.
- bool IsPairwiseReduction;
-
-public:
- HorizontalReduction()
- : ReductionRoot(0), ReductionPHI(0), ReductionOpcode(0),
- ReducedValueOpcode(0), ReduxWidth(0), IsPairwiseReduction(false) {}
-
- /// \brief Try to find a reduction tree.
- bool matchAssociativeReduction(PHINode *Phi, BinaryOperator *B,
- DataLayout *DL) {
- assert((!Phi ||
- std::find(Phi->op_begin(), Phi->op_end(), B) != Phi->op_end()) &&
- "Thi phi needs to use the binary operator");
-
- // We could have a initial reductions that is not an add.
- // r *= v1 + v2 + v3 + v4
- // In such a case start looking for a tree rooted in the first '+'.
- if (Phi) {
- if (B->getOperand(0) == Phi) {
- Phi = 0;
- B = dyn_cast<BinaryOperator>(B->getOperand(1));
- } else if (B->getOperand(1) == Phi) {
- Phi = 0;
- B = dyn_cast<BinaryOperator>(B->getOperand(0));
- }
- }
-
- if (!B)
- return false;
-
- Type *Ty = B->getType();
- if (Ty->isVectorTy())
- return false;
-
- ReductionOpcode = B->getOpcode();
- ReducedValueOpcode = 0;
- ReduxWidth = MinVecRegSize / DL->getTypeSizeInBits(Ty);
- ReductionRoot = B;
- ReductionPHI = Phi;
-
- if (ReduxWidth < 4)
- return false;
-
- // We currently only support adds.
- if (ReductionOpcode != Instruction::Add &&
- ReductionOpcode != Instruction::FAdd)
- return false;
-
- // Post order traverse the reduction tree starting at B. We only handle true
- // trees containing only binary operators.
- SmallVector<std::pair<BinaryOperator *, unsigned>, 32> Stack;
- Stack.push_back(std::make_pair(B, 0));
- while (!Stack.empty()) {
- BinaryOperator *TreeN = Stack.back().first;
- unsigned EdgeToVist = Stack.back().second++;
- bool IsReducedValue = TreeN->getOpcode() != ReductionOpcode;
-
- // Only handle trees in the current basic block.
- if (TreeN->getParent() != B->getParent())
- return false;
-
- // Each tree node needs to have one user except for the ultimate
- // reduction.
- if (!TreeN->hasOneUse() && TreeN != B)
- return false;
-
- // Postorder vist.
- if (EdgeToVist == 2 || IsReducedValue) {
- if (IsReducedValue) {
- // Make sure that the opcodes of the operations that we are going to
- // reduce match.
- if (!ReducedValueOpcode)
- ReducedValueOpcode = TreeN->getOpcode();
- else if (ReducedValueOpcode != TreeN->getOpcode())
- return false;
- ReducedVals.push_back(TreeN);
- } else {
- // We need to be able to reassociate the adds.
- if (!TreeN->isAssociative())
- return false;
- ReductionOps.insert(TreeN);
- }
- // Retract.
- Stack.pop_back();
- continue;
- }
-
- // Visit left or right.
- Value *NextV = TreeN->getOperand(EdgeToVist);
- BinaryOperator *Next = dyn_cast<BinaryOperator>(NextV);
- if (Next)
- Stack.push_back(std::make_pair(Next, 0));
- else if (NextV != Phi)
- return false;
- }
- return true;
- }
-
- /// \brief Attempt to vectorize the tree found by
- /// matchAssociativeReduction.
- bool tryToReduce(BoUpSLP &V, TargetTransformInfo *TTI) {
- if (ReducedVals.empty())
- return false;
-
- unsigned NumReducedVals = ReducedVals.size();
- if (NumReducedVals < ReduxWidth)
- return false;
-
- Value *VectorizedTree = 0;
- IRBuilder<> Builder(ReductionRoot);
- FastMathFlags Unsafe;
- Unsafe.setUnsafeAlgebra();
- Builder.SetFastMathFlags(Unsafe);
- unsigned i = 0;
-
- for (; i < NumReducedVals - ReduxWidth + 1; i += ReduxWidth) {
- ArrayRef<Value *> ValsToReduce(&ReducedVals[i], ReduxWidth);
- V.buildTree(ValsToReduce, &ReductionOps);
-
- // Estimate cost.
- int Cost = V.getTreeCost() + getReductionCost(TTI, ReducedVals[i]);
- if (Cost >= -SLPCostThreshold)
- break;
-
- DEBUG(dbgs() << "SLP: Vectorizing horizontal reduction at cost:" << Cost
- << ". (HorRdx)\n");
-
- // Vectorize a tree.
- Value *VectorizedRoot;
- BoUpSLP::ValueList *Scalars;
- tie(VectorizedRoot, Scalars) = V.vectorizeTree();
-
- // Emit a reduction.
- Value *ReducedSubTree = emitReduction(VectorizedRoot, Builder);
- if (VectorizedTree) {
- Builder.SetCurrentDebugLocation(
- cast<Instruction>((*Scalars)[0])->getDebugLoc());
- VectorizedTree = createBinOp(Builder, ReductionOpcode, VectorizedTree,
- ReducedSubTree, "bin.rdx");
- } else
- VectorizedTree = ReducedSubTree;
- }
-
- if (VectorizedTree) {
- // Finish the reduction.
- for (; i < NumReducedVals; ++i) {
- Builder.SetCurrentDebugLocation(
- cast<Instruction>(ReducedVals[i])->getDebugLoc());
- VectorizedTree = createBinOp(Builder, ReductionOpcode, VectorizedTree,
- ReducedVals[i]);
- }
- // Update users.
- if (ReductionPHI) {
- assert(ReductionRoot != NULL && "Need a reduction operation");
- ReductionRoot->setOperand(0, VectorizedTree);
- ReductionRoot->setOperand(1, ReductionPHI);
- } else
- ReductionRoot->replaceAllUsesWith(VectorizedTree);
- }
- return VectorizedTree != 0;
- }
-
-private:
-
- /// \brief Calcuate the cost of a reduction.
- int getReductionCost(TargetTransformInfo *TTI, Value *FirstReducedVal) {
- Type *ScalarTy = FirstReducedVal->getType();
- Type *VecTy = VectorType::get(ScalarTy, ReduxWidth);
-
- int PairwiseRdxCost = TTI->getReductionCost(ReductionOpcode, VecTy, true);
- int SplittingRdxCost = TTI->getReductionCost(ReductionOpcode, VecTy, false);
-
- IsPairwiseReduction = PairwiseRdxCost < SplittingRdxCost;
- int VecReduxCost = IsPairwiseReduction ? PairwiseRdxCost : SplittingRdxCost;
-
- int ScalarReduxCost =
- ReduxWidth * TTI->getArithmeticInstrCost(ReductionOpcode, VecTy);
-
- DEBUG(dbgs() << "SLP: Adding cost " << VecReduxCost - ScalarReduxCost
- << " for reduction that starts with " << *FirstReducedVal
- << " (It is a "
- << (IsPairwiseReduction ? "pairwise" : "splitting")
- << " reduction)\n");
-
- return VecReduxCost - ScalarReduxCost;
- }
-
- static Value *createBinOp(IRBuilder<> &Builder, unsigned Opcode, Value *L,
- Value *R, const Twine &Name = "") {
- if (Opcode == Instruction::FAdd)
- return Builder.CreateFAdd(L, R, Name);
- return Builder.CreateBinOp((Instruction::BinaryOps)Opcode, L, R, Name);
- }
-
- /// \brief Emit a horizontal reduction of the vectorized value.
- Value *emitReduction(Value *VectorizedValue, IRBuilder<> &Builder) {
- assert(VectorizedValue && "Need to have a vectorized tree node");
- Instruction *ValToReduce = dyn_cast<Instruction>(VectorizedValue);
- assert(isPowerOf2_32(ReduxWidth) &&
- "We only handle power-of-two reductions for now");
-
- SmallVector<Constant *, 32> ShuffleMask(ReduxWidth, 0);
- Value *TmpVec = ValToReduce;
- for (unsigned i = ReduxWidth / 2; i != 0; i >>= 1) {
- if (IsPairwiseReduction) {
- Value *LeftMask =
- createRdxShuffleMask(ReduxWidth, i, true, true, Builder);
- Value *RightMask =
- createRdxShuffleMask(ReduxWidth, i, true, false, Builder);
-
- Value *LeftShuf = Builder.CreateShuffleVector(
- TmpVec, UndefValue::get(TmpVec->getType()), LeftMask, "rdx.shuf.l");
- Value *RightShuf = Builder.CreateShuffleVector(
- TmpVec, UndefValue::get(TmpVec->getType()), (RightMask),
- "rdx.shuf.r");
- TmpVec = createBinOp(Builder, ReductionOpcode, LeftShuf, RightShuf,
- "bin.rdx");
- } else {
- Value *UpperHalf =
- createRdxShuffleMask(ReduxWidth, i, false, false, Builder);
- Value *Shuf = Builder.CreateShuffleVector(
- TmpVec, UndefValue::get(TmpVec->getType()), UpperHalf, "rdx.shuf");
- TmpVec = createBinOp(Builder, ReductionOpcode, TmpVec, Shuf, "bin.rdx");
- }
- }
-
- // The result is in the first element of the vector.
- return Builder.CreateExtractElement(TmpVec, Builder.getInt32(0));
- }
-};
-
/// \brief Recognize construction of vectors like
/// %ra = insertelement <4 x float> undef, float %s0, i32 0
/// %rb = insertelement <4 x float> %ra, float %s1, i32 1
@@ -2312,18 +1981,7 @@ bool SLPVectorizer::vectorizeChainsInBlock(BasicBlock *BB, BoUpSLP &R) {
if (!BI)
continue;
- // Try to match and vectorize a horizontal reduction.
- HorizontalReduction HorRdx;
- if (ShouldVectorizeHor &&
- HorRdx.matchAssociativeReduction(P, BI, DL) &&
- HorRdx.tryToReduce(R, TTI)) {
- Changed = true;
- it = BB->begin();
- e = BB->end();
- continue;
- }
-
- Value *Inst = BI->getOperand(0);
+ Value *Inst = BI->getOperand(0);
if (Inst == P)
Inst = BI->getOperand(1);
@@ -2333,28 +1991,10 @@ bool SLPVectorizer::vectorizeChainsInBlock(BasicBlock *BB, BoUpSLP &R) {
Changed = true;
it = BB->begin();
e = BB->end();
- continue;
}
-
continue;
}
- // Try to vectorize horizontal reductions feeding into a store.
- if (StoreInst *SI = dyn_cast<StoreInst>(it))
- if (BinaryOperator *BinOp =
- dyn_cast<BinaryOperator>(SI->getValueOperand())) {
- HorizontalReduction HorRdx;
- if (ShouldVectorizeHor &&
- ((HorRdx.matchAssociativeReduction(0, BinOp, DL) &&
- HorRdx.tryToReduce(R, TTI)) ||
- tryToVectorize(BinOp, R))) {
- Changed = true;
- it = BB->begin();
- e = BB->end();
- continue;
- }
- }
-
// Try to vectorize trees that start at compare instructions.
if (CmpInst *CI = dyn_cast<CmpInst>(it)) {
if (tryToVectorizePair(CI->getOperand(0), CI->getOperand(1), R)) {