aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--lib/Transforms/Vectorize/LoopVectorize.cpp90
-rw-r--r--test/Transforms/LoopVectorize/reverse_induction.ll69
2 files changed, 138 insertions, 21 deletions
diff --git a/lib/Transforms/Vectorize/LoopVectorize.cpp b/lib/Transforms/Vectorize/LoopVectorize.cpp
index c9275b2..0dd6abb1ae 100644
--- a/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -312,6 +312,8 @@ private:
PHINode *Induction;
/// The induction variable of the old basic block.
PHINode *OldInduction;
+ /// Holds the extended (to the widest induction type) start index.
+ Value *ExtendedIdx;
/// Maps scalars to widened vectors.
ValueMap WidenMap;
};
@@ -335,7 +337,7 @@ public:
DominatorTree *DT, TargetTransformInfo* TTI,
AliasAnalysis *AA, TargetLibraryInfo *TLI)
: TheLoop(L), SE(SE), DL(DL), DT(DT), TTI(TTI), AA(AA), TLI(TLI),
- Induction(0), HasFunNoNaNAttr(false) {}
+ Induction(0), WidestIndTy(0), HasFunNoNaNAttr(false) {}
/// This enum represents the kinds of reductions that we support.
enum ReductionKind {
@@ -473,6 +475,9 @@ public:
/// Returns the induction variables found in the loop.
InductionList *getInductionVars() { return &Inductions; }
+ /// Returns the widest induction type.
+ Type *getWidestInductionType() { return WidestIndTy; }
+
/// Returns True if V is an induction variable in this loop.
bool isInductionVariable(const Value *V);
@@ -579,6 +584,8 @@ private:
/// Notice that inductions don't need to start at zero and that induction
/// variables can be pointers.
InductionList Inductions;
+ /// Holds the widest induction type encountered.
+ Type *WidestIndTy;
/// Allowed outside users. This holds the reduction
/// vars which can be accessed from outside the loop.
@@ -1243,8 +1250,7 @@ InnerLoopVectorizer::createEmptyLoop(LoopVectorizationLegality *Legal) {
// induction variables. In the code below we also support a case where we
// don't have a single induction variable.
OldInduction = Legal->getInduction();
- Type *IdxTy = OldInduction ? OldInduction->getType() :
- DL->getIntPtrType(SE->getContext());
+ Type *IdxTy = Legal->getWidestInductionType();
// Find the loop boundaries.
const SCEV *ExitCount = SE->getExitCount(OrigLoop, OrigLoop->getLoopLatch());
@@ -1265,9 +1271,11 @@ InnerLoopVectorizer::createEmptyLoop(LoopVectorizationLegality *Legal) {
// The loop index does not have to start at Zero. Find the original start
// value from the induction PHI node. If we don't have an induction variable
// then we know that it starts at zero.
- Value *StartIdx = OldInduction ?
- OldInduction->getIncomingValueForBlock(BypassBlock):
- ConstantInt::get(IdxTy, 0);
+ Builder.SetInsertPoint(BypassBlock->getTerminator());
+ Value *StartIdx = ExtendedIdx = OldInduction ?
+ Builder.CreateZExt(OldInduction->getIncomingValueForBlock(BypassBlock),
+ IdxTy):
+ ConstantInt::get(IdxTy, 0);
assert(BypassBlock && "Invalid loop structure");
LoopBypassBlocks.push_back(BypassBlock);
@@ -1366,8 +1374,16 @@ InnerLoopVectorizer::createEmptyLoop(LoopVectorizationLegality *Legal) {
for (I = List->begin(), E = List->end(); I != E; ++I) {
PHINode *OrigPhi = I->first;
LoopVectorizationLegality::InductionInfo II = I->second;
- PHINode *ResumeVal = PHINode::Create(OrigPhi->getType(), 2, "resume.val",
+
+ Type *ResumeValTy = (OrigPhi == OldInduction) ? IdxTy : OrigPhi->getType();
+ PHINode *ResumeVal = PHINode::Create(ResumeValTy, 2, "resume.val",
MiddleBlock->getTerminator());
+ // We might have extended the type of the induction variable but we need a
+ // truncated version for the scalar loop.
+ PHINode *TruncResumeVal = (OrigPhi == OldInduction) ?
+ PHINode::Create(OrigPhi->getType(), 2, "trunc.resume.val",
+ MiddleBlock->getTerminator()) : 0;
+
Value *EndValue = 0;
switch (II.IK) {
case LoopVectorizationLegality::IK_NoInduction:
@@ -1376,6 +1392,17 @@ InnerLoopVectorizer::createEmptyLoop(LoopVectorizationLegality *Legal) {
// Handle the integer induction counter:
assert(OrigPhi->getType()->isIntegerTy() && "Invalid type");
assert(OrigPhi == OldInduction && "Unknown integer PHI");
+ if (OrigPhi == OldInduction) {
+ // Create a truncated version of the resume value for the scalar loop,
+ // we might have promoted the type to a larger width.
+ EndValue =
+ BypassBuilder.CreateTrunc(IdxEndRoundDown, OrigPhi->getType());
+ // The new PHI merges the original incoming value, in case of a bypass,
+ // or the value at the end of the vectorized loop.
+ for (unsigned I = 0, E = LoopBypassBlocks.size(); I != E; ++I)
+ TruncResumeVal->addIncoming(II.StartValue, LoopBypassBlocks[I]);
+ TruncResumeVal->addIncoming(EndValue, VecBody);
+ }
// We know what the end value is.
EndValue = IdxEndRoundDown;
// We also know which PHI node holds it.
@@ -1412,13 +1439,21 @@ InnerLoopVectorizer::createEmptyLoop(LoopVectorizationLegality *Legal) {
// The new PHI merges the original incoming value, in case of a bypass,
// or the value at the end of the vectorized loop.
- for (unsigned I = 0, E = LoopBypassBlocks.size(); I != E; ++I)
- ResumeVal->addIncoming(II.StartValue, LoopBypassBlocks[I]);
+ for (unsigned I = 0, E = LoopBypassBlocks.size(); I != E; ++I) {
+ if (OrigPhi == OldInduction)
+ ResumeVal->addIncoming(StartIdx, LoopBypassBlocks[I]);
+ else
+ ResumeVal->addIncoming(II.StartValue, LoopBypassBlocks[I]);
+ }
ResumeVal->addIncoming(EndValue, VecBody);
// Fix the scalar body counter (PHI node).
unsigned BlockIdx = OrigPhi->getBasicBlockIndex(ScalarPH);
- OrigPhi->setIncomingValue(BlockIdx, ResumeVal);
+ // The old inductions phi node in the scalar body needs the truncated value.
+ if (OrigPhi == OldInduction)
+ OrigPhi->setIncomingValue(BlockIdx, TruncResumeVal);
+ else
+ OrigPhi->setIncomingValue(BlockIdx, ResumeVal);
}
// If we are generating a new induction variable then we also need to
@@ -2022,7 +2057,9 @@ InnerLoopVectorizer::vectorizeBlockInLoop(LoopVectorizationLegality *Legal,
llvm_unreachable("Unknown induction");
case LoopVectorizationLegality::IK_IntInduction: {
assert(P == OldInduction && "Unexpected PHI");
- Value *Broadcasted = getBroadcastInstrs(Induction);
+ // We might have had to extend the type.
+ Value *Trunc = Builder.CreateTrunc(Induction, P->getType());
+ Value *Broadcasted = getBroadcastInstrs(Trunc);
// After broadcasting the induction variable we need to make the
// vector consecutive by adding 0, 1, 2 ...
for (unsigned part = 0; part < UF; ++part)
@@ -2033,16 +2070,7 @@ InnerLoopVectorizer::vectorizeBlockInLoop(LoopVectorizationLegality *Legal,
case LoopVectorizationLegality::IK_PtrInduction:
case LoopVectorizationLegality::IK_ReversePtrInduction:
// Handle reverse integer and pointer inductions.
- Value *StartIdx = 0;
- // If we have a single integer induction variable then use it.
- // Otherwise, start counting at zero.
- if (OldInduction) {
- LoopVectorizationLegality::InductionInfo OldII =
- Legal->getInductionVars()->lookup(OldInduction);
- StartIdx = OldII.StartValue;
- } else {
- StartIdx = ConstantInt::get(Induction->getType(), 0);
- }
+ Value *StartIdx = ExtendedIdx;
// This is the normalized GEP that starts counting at zero.
Value *NormalizedIdx = Builder.CreateSub(Induction, StartIdx,
"normalized.idx");
@@ -2362,6 +2390,20 @@ bool LoopVectorizationLegality::canVectorize() {
return true;
}
+static Type *convertPointerToIntegerType(DataLayout &DL, Type *Ty) {
+ if (Ty->isPointerTy())
+ return DL.getIntPtrType(Ty->getContext());
+ return Ty;
+}
+
+static Type* getWiderType(DataLayout &DL, Type *Ty0, Type *Ty1) {
+ Ty0 = convertPointerToIntegerType(DL, Ty0);
+ Ty1 = convertPointerToIntegerType(DL, Ty1);
+ if (Ty0->getScalarSizeInBits() > Ty1->getScalarSizeInBits())
+ return Ty0;
+ return Ty1;
+}
+
bool LoopVectorizationLegality::canVectorizeInstrs() {
BasicBlock *PreHeader = TheLoop->getLoopPreheader();
BasicBlock *Header = TheLoop->getHeader();
@@ -2416,6 +2458,12 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {
InductionKind IK = isInductionVariable(Phi);
if (IK_NoInduction != IK) {
+ // Get the widest type.
+ if (!WidestIndTy)
+ WidestIndTy = convertPointerToIntegerType(*DL, PhiTy);
+ else
+ WidestIndTy = getWiderType(*DL, PhiTy, WidestIndTy);
+
// Int inductions are special because we only allow one IV.
if (IK == IK_IntInduction) {
if (Induction) {
diff --git a/test/Transforms/LoopVectorize/reverse_induction.ll b/test/Transforms/LoopVectorize/reverse_induction.ll
index f43f02b..9e8c1b1 100644
--- a/test/Transforms/LoopVectorize/reverse_induction.ll
+++ b/test/Transforms/LoopVectorize/reverse_induction.ll
@@ -77,3 +77,72 @@ loopend:
}
+@a = common global [1024 x i32] zeroinitializer, align 16
+
+; We incorrectly transformed this loop into an empty one because we left the
+; induction variable in i8 type and truncated the exit value 1024 to 0.
+; int a[1024];
+;
+; void fail() {
+; int reverse_induction = 1023;
+; unsigned char forward_induction = 0;
+; while ((reverse_induction) >= 0) {
+; forward_induction++;
+; a[reverse_induction] = forward_induction;
+; --reverse_induction;
+; }
+; }
+
+; CHECK: reverse_forward_induction_i64_i8
+; CHECK: vector.body
+; CHECK: %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+; CHECK: %normalized.idx = sub i64 %index, 0
+; CHECK: %reverse.idx = sub i64 1023, %normalized.idx
+; CHECK: trunc i64 %index to i8
+
+define void @reverse_forward_induction_i64_i8() {
+entry:
+ br label %while.body
+
+while.body:
+ %indvars.iv = phi i64 [ 1023, %entry ], [ %indvars.iv.next, %while.body ]
+ %forward_induction.05 = phi i8 [ 0, %entry ], [ %inc, %while.body ]
+ %inc = add i8 %forward_induction.05, 1
+ %conv = zext i8 %inc to i32
+ %arrayidx = getelementptr inbounds [1024 x i32]* @a, i64 0, i64 %indvars.iv
+ store i32 %conv, i32* %arrayidx, align 4
+ %indvars.iv.next = add i64 %indvars.iv, -1
+ %0 = trunc i64 %indvars.iv to i32
+ %cmp = icmp sgt i32 %0, 0
+ br i1 %cmp, label %while.body, label %while.end
+
+while.end:
+ ret void
+}
+
+; CHECK: reverse_forward_induction_i64_i8_signed
+; CHECK: vector.body:
+; CHECK: %index = phi i64 [ 129, %vector.ph ], [ %index.next, %vector.body ]
+; CHECK: %normalized.idx = sub i64 %index, 129
+; CHECK: %reverse.idx = sub i64 1023, %normalized.idx
+; CHECK: trunc i64 %index to i8
+
+define void @reverse_forward_induction_i64_i8_signed() {
+entry:
+ br label %while.body
+
+while.body:
+ %indvars.iv = phi i64 [ 1023, %entry ], [ %indvars.iv.next, %while.body ]
+ %forward_induction.05 = phi i8 [ -127, %entry ], [ %inc, %while.body ]
+ %inc = add i8 %forward_induction.05, 1
+ %conv = sext i8 %inc to i32
+ %arrayidx = getelementptr inbounds [1024 x i32]* @a, i64 0, i64 %indvars.iv
+ store i32 %conv, i32* %arrayidx, align 4
+ %indvars.iv.next = add i64 %indvars.iv, -1
+ %0 = trunc i64 %indvars.iv to i32
+ %cmp = icmp sgt i32 %0, 0
+ br i1 %cmp, label %while.body, label %while.end
+
+while.end:
+ ret void
+}