diff options
-rw-r--r-- | lib/Transforms/Vectorize/LoopVectorize.cpp | 90 | ||||
-rw-r--r-- | test/Transforms/LoopVectorize/reverse_induction.ll | 69 |
2 files changed, 138 insertions, 21 deletions
diff --git a/lib/Transforms/Vectorize/LoopVectorize.cpp b/lib/Transforms/Vectorize/LoopVectorize.cpp index c9275b2..0dd6abb1ae 100644 --- a/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -312,6 +312,8 @@ private: PHINode *Induction; /// The induction variable of the old basic block. PHINode *OldInduction; + /// Holds the extended (to the widest induction type) start index. + Value *ExtendedIdx; /// Maps scalars to widened vectors. ValueMap WidenMap; }; @@ -335,7 +337,7 @@ public: DominatorTree *DT, TargetTransformInfo* TTI, AliasAnalysis *AA, TargetLibraryInfo *TLI) : TheLoop(L), SE(SE), DL(DL), DT(DT), TTI(TTI), AA(AA), TLI(TLI), - Induction(0), HasFunNoNaNAttr(false) {} + Induction(0), WidestIndTy(0), HasFunNoNaNAttr(false) {} /// This enum represents the kinds of reductions that we support. enum ReductionKind { @@ -473,6 +475,9 @@ public: /// Returns the induction variables found in the loop. InductionList *getInductionVars() { return &Inductions; } + /// Returns the widest induction type. + Type *getWidestInductionType() { return WidestIndTy; } + /// Returns True if V is an induction variable in this loop. bool isInductionVariable(const Value *V); @@ -579,6 +584,8 @@ private: /// Notice that inductions don't need to start at zero and that induction /// variables can be pointers. InductionList Inductions; + /// Holds the widest induction type encountered. + Type *WidestIndTy; /// Allowed outside users. This holds the reduction /// vars which can be accessed from outside the loop. @@ -1243,8 +1250,7 @@ InnerLoopVectorizer::createEmptyLoop(LoopVectorizationLegality *Legal) { // induction variables. In the code below we also support a case where we // don't have a single induction variable. OldInduction = Legal->getInduction(); - Type *IdxTy = OldInduction ? OldInduction->getType() : - DL->getIntPtrType(SE->getContext()); + Type *IdxTy = Legal->getWidestInductionType(); // Find the loop boundaries. const SCEV *ExitCount = SE->getExitCount(OrigLoop, OrigLoop->getLoopLatch()); @@ -1265,9 +1271,11 @@ InnerLoopVectorizer::createEmptyLoop(LoopVectorizationLegality *Legal) { // The loop index does not have to start at Zero. Find the original start // value from the induction PHI node. If we don't have an induction variable // then we know that it starts at zero. - Value *StartIdx = OldInduction ? - OldInduction->getIncomingValueForBlock(BypassBlock): - ConstantInt::get(IdxTy, 0); + Builder.SetInsertPoint(BypassBlock->getTerminator()); + Value *StartIdx = ExtendedIdx = OldInduction ? + Builder.CreateZExt(OldInduction->getIncomingValueForBlock(BypassBlock), + IdxTy): + ConstantInt::get(IdxTy, 0); assert(BypassBlock && "Invalid loop structure"); LoopBypassBlocks.push_back(BypassBlock); @@ -1366,8 +1374,16 @@ InnerLoopVectorizer::createEmptyLoop(LoopVectorizationLegality *Legal) { for (I = List->begin(), E = List->end(); I != E; ++I) { PHINode *OrigPhi = I->first; LoopVectorizationLegality::InductionInfo II = I->second; - PHINode *ResumeVal = PHINode::Create(OrigPhi->getType(), 2, "resume.val", + + Type *ResumeValTy = (OrigPhi == OldInduction) ? IdxTy : OrigPhi->getType(); + PHINode *ResumeVal = PHINode::Create(ResumeValTy, 2, "resume.val", MiddleBlock->getTerminator()); + // We might have extended the type of the induction variable but we need a + // truncated version for the scalar loop. + PHINode *TruncResumeVal = (OrigPhi == OldInduction) ? + PHINode::Create(OrigPhi->getType(), 2, "trunc.resume.val", + MiddleBlock->getTerminator()) : 0; + Value *EndValue = 0; switch (II.IK) { case LoopVectorizationLegality::IK_NoInduction: @@ -1376,6 +1392,17 @@ InnerLoopVectorizer::createEmptyLoop(LoopVectorizationLegality *Legal) { // Handle the integer induction counter: assert(OrigPhi->getType()->isIntegerTy() && "Invalid type"); assert(OrigPhi == OldInduction && "Unknown integer PHI"); + if (OrigPhi == OldInduction) { + // Create a truncated version of the resume value for the scalar loop, + // we might have promoted the type to a larger width. + EndValue = + BypassBuilder.CreateTrunc(IdxEndRoundDown, OrigPhi->getType()); + // The new PHI merges the original incoming value, in case of a bypass, + // or the value at the end of the vectorized loop. + for (unsigned I = 0, E = LoopBypassBlocks.size(); I != E; ++I) + TruncResumeVal->addIncoming(II.StartValue, LoopBypassBlocks[I]); + TruncResumeVal->addIncoming(EndValue, VecBody); + } // We know what the end value is. EndValue = IdxEndRoundDown; // We also know which PHI node holds it. @@ -1412,13 +1439,21 @@ InnerLoopVectorizer::createEmptyLoop(LoopVectorizationLegality *Legal) { // The new PHI merges the original incoming value, in case of a bypass, // or the value at the end of the vectorized loop. - for (unsigned I = 0, E = LoopBypassBlocks.size(); I != E; ++I) - ResumeVal->addIncoming(II.StartValue, LoopBypassBlocks[I]); + for (unsigned I = 0, E = LoopBypassBlocks.size(); I != E; ++I) { + if (OrigPhi == OldInduction) + ResumeVal->addIncoming(StartIdx, LoopBypassBlocks[I]); + else + ResumeVal->addIncoming(II.StartValue, LoopBypassBlocks[I]); + } ResumeVal->addIncoming(EndValue, VecBody); // Fix the scalar body counter (PHI node). unsigned BlockIdx = OrigPhi->getBasicBlockIndex(ScalarPH); - OrigPhi->setIncomingValue(BlockIdx, ResumeVal); + // The old inductions phi node in the scalar body needs the truncated value. + if (OrigPhi == OldInduction) + OrigPhi->setIncomingValue(BlockIdx, TruncResumeVal); + else + OrigPhi->setIncomingValue(BlockIdx, ResumeVal); } // If we are generating a new induction variable then we also need to @@ -2022,7 +2057,9 @@ InnerLoopVectorizer::vectorizeBlockInLoop(LoopVectorizationLegality *Legal, llvm_unreachable("Unknown induction"); case LoopVectorizationLegality::IK_IntInduction: { assert(P == OldInduction && "Unexpected PHI"); - Value *Broadcasted = getBroadcastInstrs(Induction); + // We might have had to extend the type. + Value *Trunc = Builder.CreateTrunc(Induction, P->getType()); + Value *Broadcasted = getBroadcastInstrs(Trunc); // After broadcasting the induction variable we need to make the // vector consecutive by adding 0, 1, 2 ... for (unsigned part = 0; part < UF; ++part) @@ -2033,16 +2070,7 @@ InnerLoopVectorizer::vectorizeBlockInLoop(LoopVectorizationLegality *Legal, case LoopVectorizationLegality::IK_PtrInduction: case LoopVectorizationLegality::IK_ReversePtrInduction: // Handle reverse integer and pointer inductions. - Value *StartIdx = 0; - // If we have a single integer induction variable then use it. - // Otherwise, start counting at zero. - if (OldInduction) { - LoopVectorizationLegality::InductionInfo OldII = - Legal->getInductionVars()->lookup(OldInduction); - StartIdx = OldII.StartValue; - } else { - StartIdx = ConstantInt::get(Induction->getType(), 0); - } + Value *StartIdx = ExtendedIdx; // This is the normalized GEP that starts counting at zero. Value *NormalizedIdx = Builder.CreateSub(Induction, StartIdx, "normalized.idx"); @@ -2362,6 +2390,20 @@ bool LoopVectorizationLegality::canVectorize() { return true; } +static Type *convertPointerToIntegerType(DataLayout &DL, Type *Ty) { + if (Ty->isPointerTy()) + return DL.getIntPtrType(Ty->getContext()); + return Ty; +} + +static Type* getWiderType(DataLayout &DL, Type *Ty0, Type *Ty1) { + Ty0 = convertPointerToIntegerType(DL, Ty0); + Ty1 = convertPointerToIntegerType(DL, Ty1); + if (Ty0->getScalarSizeInBits() > Ty1->getScalarSizeInBits()) + return Ty0; + return Ty1; +} + bool LoopVectorizationLegality::canVectorizeInstrs() { BasicBlock *PreHeader = TheLoop->getLoopPreheader(); BasicBlock *Header = TheLoop->getHeader(); @@ -2416,6 +2458,12 @@ bool LoopVectorizationLegality::canVectorizeInstrs() { InductionKind IK = isInductionVariable(Phi); if (IK_NoInduction != IK) { + // Get the widest type. + if (!WidestIndTy) + WidestIndTy = convertPointerToIntegerType(*DL, PhiTy); + else + WidestIndTy = getWiderType(*DL, PhiTy, WidestIndTy); + // Int inductions are special because we only allow one IV. if (IK == IK_IntInduction) { if (Induction) { diff --git a/test/Transforms/LoopVectorize/reverse_induction.ll b/test/Transforms/LoopVectorize/reverse_induction.ll index f43f02b..9e8c1b1 100644 --- a/test/Transforms/LoopVectorize/reverse_induction.ll +++ b/test/Transforms/LoopVectorize/reverse_induction.ll @@ -77,3 +77,72 @@ loopend: } +@a = common global [1024 x i32] zeroinitializer, align 16 + +; We incorrectly transformed this loop into an empty one because we left the +; induction variable in i8 type and truncated the exit value 1024 to 0. +; int a[1024]; +; +; void fail() { +; int reverse_induction = 1023; +; unsigned char forward_induction = 0; +; while ((reverse_induction) >= 0) { +; forward_induction++; +; a[reverse_induction] = forward_induction; +; --reverse_induction; +; } +; } + +; CHECK: reverse_forward_induction_i64_i8 +; CHECK: vector.body +; CHECK: %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ] +; CHECK: %normalized.idx = sub i64 %index, 0 +; CHECK: %reverse.idx = sub i64 1023, %normalized.idx +; CHECK: trunc i64 %index to i8 + +define void @reverse_forward_induction_i64_i8() { +entry: + br label %while.body + +while.body: + %indvars.iv = phi i64 [ 1023, %entry ], [ %indvars.iv.next, %while.body ] + %forward_induction.05 = phi i8 [ 0, %entry ], [ %inc, %while.body ] + %inc = add i8 %forward_induction.05, 1 + %conv = zext i8 %inc to i32 + %arrayidx = getelementptr inbounds [1024 x i32]* @a, i64 0, i64 %indvars.iv + store i32 %conv, i32* %arrayidx, align 4 + %indvars.iv.next = add i64 %indvars.iv, -1 + %0 = trunc i64 %indvars.iv to i32 + %cmp = icmp sgt i32 %0, 0 + br i1 %cmp, label %while.body, label %while.end + +while.end: + ret void +} + +; CHECK: reverse_forward_induction_i64_i8_signed +; CHECK: vector.body: +; CHECK: %index = phi i64 [ 129, %vector.ph ], [ %index.next, %vector.body ] +; CHECK: %normalized.idx = sub i64 %index, 129 +; CHECK: %reverse.idx = sub i64 1023, %normalized.idx +; CHECK: trunc i64 %index to i8 + +define void @reverse_forward_induction_i64_i8_signed() { +entry: + br label %while.body + +while.body: + %indvars.iv = phi i64 [ 1023, %entry ], [ %indvars.iv.next, %while.body ] + %forward_induction.05 = phi i8 [ -127, %entry ], [ %inc, %while.body ] + %inc = add i8 %forward_induction.05, 1 + %conv = sext i8 %inc to i32 + %arrayidx = getelementptr inbounds [1024 x i32]* @a, i64 0, i64 %indvars.iv + store i32 %conv, i32* %arrayidx, align 4 + %indvars.iv.next = add i64 %indvars.iv, -1 + %0 = trunc i64 %indvars.iv to i32 + %cmp = icmp sgt i32 %0, 0 + br i1 %cmp, label %while.body, label %while.end + +while.end: + ret void +} |