diff options
author | Arnold Schwaighofer <aschwaighofer@apple.com> | 2013-05-11 23:04:28 +0000 |
---|---|---|
committer | Arnold Schwaighofer <aschwaighofer@apple.com> | 2013-05-11 23:04:28 +0000 |
commit | 9b5d70f07630f99f1ec5589aeaba96c6d8ab0aee (patch) | |
tree | 7a6c6cc63565a5ad554578a33d5b854c723c0830 /lib/Transforms | |
parent | 6bb539a643b385520c25f3b03199f68012b9c09b (diff) | |
download | external_llvm-9b5d70f07630f99f1ec5589aeaba96c6d8ab0aee.zip external_llvm-9b5d70f07630f99f1ec5589aeaba96c6d8ab0aee.tar.gz external_llvm-9b5d70f07630f99f1ec5589aeaba96c6d8ab0aee.tar.bz2 |
LoopVectorize: Use the widest induction variable type
Use the widest induction type encountered for the cannonical induction variable.
We used to turn the following loop into an empty loop because we used i8 as
induction variable type and truncated 1024 to 0 as trip count.
int a[1024];
void fail() {
int reverse_induction = 1023;
unsigned char forward_induction = 0;
while ((reverse_induction) >= 0) {
forward_induction++;
a[reverse_induction] = forward_induction;
--reverse_induction;
}
}
radar://13862901
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@181667 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'lib/Transforms')
-rw-r--r-- | lib/Transforms/Vectorize/LoopVectorize.cpp | 90 |
1 files changed, 69 insertions, 21 deletions
diff --git a/lib/Transforms/Vectorize/LoopVectorize.cpp b/lib/Transforms/Vectorize/LoopVectorize.cpp index c9275b2..0dd6abb1ae 100644 --- a/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -312,6 +312,8 @@ private: PHINode *Induction; /// The induction variable of the old basic block. PHINode *OldInduction; + /// Holds the extended (to the widest induction type) start index. + Value *ExtendedIdx; /// Maps scalars to widened vectors. ValueMap WidenMap; }; @@ -335,7 +337,7 @@ public: DominatorTree *DT, TargetTransformInfo* TTI, AliasAnalysis *AA, TargetLibraryInfo *TLI) : TheLoop(L), SE(SE), DL(DL), DT(DT), TTI(TTI), AA(AA), TLI(TLI), - Induction(0), HasFunNoNaNAttr(false) {} + Induction(0), WidestIndTy(0), HasFunNoNaNAttr(false) {} /// This enum represents the kinds of reductions that we support. enum ReductionKind { @@ -473,6 +475,9 @@ public: /// Returns the induction variables found in the loop. InductionList *getInductionVars() { return &Inductions; } + /// Returns the widest induction type. + Type *getWidestInductionType() { return WidestIndTy; } + /// Returns True if V is an induction variable in this loop. bool isInductionVariable(const Value *V); @@ -579,6 +584,8 @@ private: /// Notice that inductions don't need to start at zero and that induction /// variables can be pointers. InductionList Inductions; + /// Holds the widest induction type encountered. + Type *WidestIndTy; /// Allowed outside users. This holds the reduction /// vars which can be accessed from outside the loop. @@ -1243,8 +1250,7 @@ InnerLoopVectorizer::createEmptyLoop(LoopVectorizationLegality *Legal) { // induction variables. In the code below we also support a case where we // don't have a single induction variable. OldInduction = Legal->getInduction(); - Type *IdxTy = OldInduction ? OldInduction->getType() : - DL->getIntPtrType(SE->getContext()); + Type *IdxTy = Legal->getWidestInductionType(); // Find the loop boundaries. const SCEV *ExitCount = SE->getExitCount(OrigLoop, OrigLoop->getLoopLatch()); @@ -1265,9 +1271,11 @@ InnerLoopVectorizer::createEmptyLoop(LoopVectorizationLegality *Legal) { // The loop index does not have to start at Zero. Find the original start // value from the induction PHI node. If we don't have an induction variable // then we know that it starts at zero. - Value *StartIdx = OldInduction ? - OldInduction->getIncomingValueForBlock(BypassBlock): - ConstantInt::get(IdxTy, 0); + Builder.SetInsertPoint(BypassBlock->getTerminator()); + Value *StartIdx = ExtendedIdx = OldInduction ? + Builder.CreateZExt(OldInduction->getIncomingValueForBlock(BypassBlock), + IdxTy): + ConstantInt::get(IdxTy, 0); assert(BypassBlock && "Invalid loop structure"); LoopBypassBlocks.push_back(BypassBlock); @@ -1366,8 +1374,16 @@ InnerLoopVectorizer::createEmptyLoop(LoopVectorizationLegality *Legal) { for (I = List->begin(), E = List->end(); I != E; ++I) { PHINode *OrigPhi = I->first; LoopVectorizationLegality::InductionInfo II = I->second; - PHINode *ResumeVal = PHINode::Create(OrigPhi->getType(), 2, "resume.val", + + Type *ResumeValTy = (OrigPhi == OldInduction) ? IdxTy : OrigPhi->getType(); + PHINode *ResumeVal = PHINode::Create(ResumeValTy, 2, "resume.val", MiddleBlock->getTerminator()); + // We might have extended the type of the induction variable but we need a + // truncated version for the scalar loop. + PHINode *TruncResumeVal = (OrigPhi == OldInduction) ? + PHINode::Create(OrigPhi->getType(), 2, "trunc.resume.val", + MiddleBlock->getTerminator()) : 0; + Value *EndValue = 0; switch (II.IK) { case LoopVectorizationLegality::IK_NoInduction: @@ -1376,6 +1392,17 @@ InnerLoopVectorizer::createEmptyLoop(LoopVectorizationLegality *Legal) { // Handle the integer induction counter: assert(OrigPhi->getType()->isIntegerTy() && "Invalid type"); assert(OrigPhi == OldInduction && "Unknown integer PHI"); + if (OrigPhi == OldInduction) { + // Create a truncated version of the resume value for the scalar loop, + // we might have promoted the type to a larger width. + EndValue = + BypassBuilder.CreateTrunc(IdxEndRoundDown, OrigPhi->getType()); + // The new PHI merges the original incoming value, in case of a bypass, + // or the value at the end of the vectorized loop. + for (unsigned I = 0, E = LoopBypassBlocks.size(); I != E; ++I) + TruncResumeVal->addIncoming(II.StartValue, LoopBypassBlocks[I]); + TruncResumeVal->addIncoming(EndValue, VecBody); + } // We know what the end value is. EndValue = IdxEndRoundDown; // We also know which PHI node holds it. @@ -1412,13 +1439,21 @@ InnerLoopVectorizer::createEmptyLoop(LoopVectorizationLegality *Legal) { // The new PHI merges the original incoming value, in case of a bypass, // or the value at the end of the vectorized loop. - for (unsigned I = 0, E = LoopBypassBlocks.size(); I != E; ++I) - ResumeVal->addIncoming(II.StartValue, LoopBypassBlocks[I]); + for (unsigned I = 0, E = LoopBypassBlocks.size(); I != E; ++I) { + if (OrigPhi == OldInduction) + ResumeVal->addIncoming(StartIdx, LoopBypassBlocks[I]); + else + ResumeVal->addIncoming(II.StartValue, LoopBypassBlocks[I]); + } ResumeVal->addIncoming(EndValue, VecBody); // Fix the scalar body counter (PHI node). unsigned BlockIdx = OrigPhi->getBasicBlockIndex(ScalarPH); - OrigPhi->setIncomingValue(BlockIdx, ResumeVal); + // The old inductions phi node in the scalar body needs the truncated value. + if (OrigPhi == OldInduction) + OrigPhi->setIncomingValue(BlockIdx, TruncResumeVal); + else + OrigPhi->setIncomingValue(BlockIdx, ResumeVal); } // If we are generating a new induction variable then we also need to @@ -2022,7 +2057,9 @@ InnerLoopVectorizer::vectorizeBlockInLoop(LoopVectorizationLegality *Legal, llvm_unreachable("Unknown induction"); case LoopVectorizationLegality::IK_IntInduction: { assert(P == OldInduction && "Unexpected PHI"); - Value *Broadcasted = getBroadcastInstrs(Induction); + // We might have had to extend the type. + Value *Trunc = Builder.CreateTrunc(Induction, P->getType()); + Value *Broadcasted = getBroadcastInstrs(Trunc); // After broadcasting the induction variable we need to make the // vector consecutive by adding 0, 1, 2 ... for (unsigned part = 0; part < UF; ++part) @@ -2033,16 +2070,7 @@ InnerLoopVectorizer::vectorizeBlockInLoop(LoopVectorizationLegality *Legal, case LoopVectorizationLegality::IK_PtrInduction: case LoopVectorizationLegality::IK_ReversePtrInduction: // Handle reverse integer and pointer inductions. - Value *StartIdx = 0; - // If we have a single integer induction variable then use it. - // Otherwise, start counting at zero. - if (OldInduction) { - LoopVectorizationLegality::InductionInfo OldII = - Legal->getInductionVars()->lookup(OldInduction); - StartIdx = OldII.StartValue; - } else { - StartIdx = ConstantInt::get(Induction->getType(), 0); - } + Value *StartIdx = ExtendedIdx; // This is the normalized GEP that starts counting at zero. Value *NormalizedIdx = Builder.CreateSub(Induction, StartIdx, "normalized.idx"); @@ -2362,6 +2390,20 @@ bool LoopVectorizationLegality::canVectorize() { return true; } +static Type *convertPointerToIntegerType(DataLayout &DL, Type *Ty) { + if (Ty->isPointerTy()) + return DL.getIntPtrType(Ty->getContext()); + return Ty; +} + +static Type* getWiderType(DataLayout &DL, Type *Ty0, Type *Ty1) { + Ty0 = convertPointerToIntegerType(DL, Ty0); + Ty1 = convertPointerToIntegerType(DL, Ty1); + if (Ty0->getScalarSizeInBits() > Ty1->getScalarSizeInBits()) + return Ty0; + return Ty1; +} + bool LoopVectorizationLegality::canVectorizeInstrs() { BasicBlock *PreHeader = TheLoop->getLoopPreheader(); BasicBlock *Header = TheLoop->getHeader(); @@ -2416,6 +2458,12 @@ bool LoopVectorizationLegality::canVectorizeInstrs() { InductionKind IK = isInductionVariable(Phi); if (IK_NoInduction != IK) { + // Get the widest type. + if (!WidestIndTy) + WidestIndTy = convertPointerToIntegerType(*DL, PhiTy); + else + WidestIndTy = getWiderType(*DL, PhiTy, WidestIndTy); + // Int inductions are special because we only allow one IV. if (IK == IK_IntInduction) { if (Induction) { |