aboutsummaryrefslogtreecommitdiffstats
path: root/lib/Transforms/Vectorize/LoopVectorize.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'lib/Transforms/Vectorize/LoopVectorize.cpp')
-rw-r--r--lib/Transforms/Vectorize/LoopVectorize.cpp373
1 files changed, 308 insertions, 65 deletions
diff --git a/lib/Transforms/Vectorize/LoopVectorize.cpp b/lib/Transforms/Vectorize/LoopVectorize.cpp
index 34d8a10..cb8a41d 100644
--- a/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -209,6 +209,29 @@ namespace {
class LoopVectorizationLegality;
class LoopVectorizationCostModel;
+/// Optimization analysis message produced during vectorization. Messages inform
+/// the user why vectorization did not occur.
+class Report {
+ std::string Message;
+ raw_string_ostream Out;
+ Instruction *Instr;
+
+public:
+ Report(Instruction *I = nullptr) : Out(Message), Instr(I) {
+ Out << "loop not vectorized: ";
+ }
+
+ template <typename A> Report &operator<<(const A &Value) {
+ Out << Value;
+ return *this;
+ }
+
+ Instruction *getInstr() { return Instr; }
+
+ std::string &str() { return Out.str(); }
+ operator Twine() { return Out.str(); }
+};
+
/// InnerLoopVectorizer vectorizes loops which contain only one basic
/// block to a specified vectorization factor (VF).
/// This class performs the widening of scalars into vectors, or multiple
@@ -515,10 +538,12 @@ public:
unsigned NumPredStores;
LoopVectorizationLegality(Loop *L, ScalarEvolution *SE, const DataLayout *DL,
- DominatorTree *DT, TargetLibraryInfo *TLI)
+ DominatorTree *DT, TargetLibraryInfo *TLI,
+ Function *F)
: NumLoads(0), NumStores(0), NumPredStores(0), TheLoop(L), SE(SE), DL(DL),
- DT(DT), TLI(TLI), Induction(nullptr), WidestIndTy(nullptr),
- HasFunNoNaNAttr(false), MaxSafeDepDistBytes(-1U) {}
+ DT(DT), TLI(TLI), TheFunction(F), Induction(nullptr),
+ WidestIndTy(nullptr), HasFunNoNaNAttr(false), MaxSafeDepDistBytes(-1U) {
+ }
/// This enum represents the kinds of reductions that we support.
enum ReductionKind {
@@ -747,6 +772,16 @@ private:
/// invariant.
void collectStridedAcccess(Value *LoadOrStoreInst);
+ /// Report an analysis message to assist the user in diagnosing loops that are
+ /// not vectorized.
+ void emitAnalysis(Report &Message) {
+ DebugLoc DL = TheLoop->getStartLoc();
+ if (Instruction *I = Message.getInstr())
+ DL = I->getDebugLoc();
+ emitOptimizationRemarkAnalysis(TheFunction->getContext(), DEBUG_TYPE,
+ *TheFunction, DL, Message.str());
+ }
+
/// The loop that we evaluate.
Loop *TheLoop;
/// Scev analysis.
@@ -757,6 +792,8 @@ private:
DominatorTree *DT;
/// Target Library Info.
TargetLibraryInfo *TLI;
+ /// Parent function
+ Function *TheFunction;
// --- vectorization state --- //
@@ -906,7 +943,7 @@ public:
}
/// Return the loop vectorizer metadata prefix.
- static StringRef Prefix() { return "llvm.vectorizer."; }
+ static StringRef Prefix() { return "llvm.loop.vectorize."; }
MDNode *createHint(LLVMContext &Context, StringRef Name, unsigned V) const {
SmallVector<Value*, 2> Vals;
@@ -942,6 +979,29 @@ public:
LoopID = NewLoopID;
}
+ std::string emitRemark() const {
+ Report R;
+ R << "vectorization ";
+ switch (Force) {
+ case LoopVectorizeHints::FK_Disabled:
+ R << "is explicitly disabled";
+ break;
+ case LoopVectorizeHints::FK_Enabled:
+ R << "is explicitly enabled";
+ if (Width != 0 && Unroll != 0)
+ R << " with width " << Width << " and interleave count " << Unroll;
+ else if (Width != 0)
+ R << " with width " << Width;
+ else if (Unroll != 0)
+ R << " with interleave count " << Unroll;
+ break;
+ case LoopVectorizeHints::FK_Undefined:
+ R << "was not specified";
+ break;
+ }
+ return R.str();
+ }
+
unsigned getWidth() const { return Width; }
unsigned getUnroll() const { return Unroll; }
enum ForceKind getForce() const { return Force; }
@@ -1125,18 +1185,37 @@ struct LoopVectorize : public FunctionPass {
: "?")) << " width=" << Hints.getWidth()
<< " unroll=" << Hints.getUnroll() << "\n");
+ // Function containing loop
+ Function *F = L->getHeader()->getParent();
+
+ // Looking at the diagnostic output is the only way to determine if a loop
+ // was vectorized (other than looking at the IR or machine code), so it
+ // is important to generate an optimization remark for each loop. Most of
+ // these messages are generated by emitOptimizationRemarkAnalysis. Remarks
+ // generated by emitOptimizationRemark and emitOptimizationRemarkMissed are
+ // less verbose reporting vectorized loops and unvectorized loops that may
+ // benefit from vectorization, respectively.
+
if (Hints.getForce() == LoopVectorizeHints::FK_Disabled) {
DEBUG(dbgs() << "LV: Not vectorizing: #pragma vectorize disable.\n");
+ emitOptimizationRemarkAnalysis(F->getContext(), DEBUG_TYPE, *F,
+ L->getStartLoc(), Hints.emitRemark());
return false;
}
if (!AlwaysVectorize && Hints.getForce() != LoopVectorizeHints::FK_Enabled) {
DEBUG(dbgs() << "LV: Not vectorizing: No #pragma vectorize enable.\n");
+ emitOptimizationRemarkAnalysis(F->getContext(), DEBUG_TYPE, *F,
+ L->getStartLoc(), Hints.emitRemark());
return false;
}
if (Hints.getWidth() == 1 && Hints.getUnroll() == 1) {
DEBUG(dbgs() << "LV: Not vectorizing: Disabled/already vectorized.\n");
+ emitOptimizationRemarkAnalysis(
+ F->getContext(), DEBUG_TYPE, *F, L->getStartLoc(),
+ "loop not vectorized: vector width and interleave count are "
+ "explicitly set to 1");
return false;
}
@@ -1151,14 +1230,19 @@ struct LoopVectorize : public FunctionPass {
DEBUG(dbgs() << " But vectorizing was explicitly forced.\n");
else {
DEBUG(dbgs() << "\n");
+ emitOptimizationRemarkAnalysis(
+ F->getContext(), DEBUG_TYPE, *F, L->getStartLoc(),
+ "vectorization is not beneficial and is not explicitly forced");
return false;
}
}
// Check if it is legal to vectorize the loop.
- LoopVectorizationLegality LVL(L, SE, DL, DT, TLI);
+ LoopVectorizationLegality LVL(L, SE, DL, DT, TLI, F);
if (!LVL.canVectorize()) {
DEBUG(dbgs() << "LV: Not vectorizing: Cannot prove legality.\n");
+ emitOptimizationRemarkMissed(F->getContext(), DEBUG_TYPE, *F,
+ L->getStartLoc(), Hints.emitRemark());
return false;
}
@@ -1167,7 +1251,6 @@ struct LoopVectorize : public FunctionPass {
// Check the function attributes to find out if this function should be
// optimized for size.
- Function *F = L->getHeader()->getParent();
bool OptForSize = Hints.getForce() != LoopVectorizeHints::FK_Enabled &&
F->hasFnAttribute(Attribute::OptimizeForSize);
@@ -1190,6 +1273,11 @@ struct LoopVectorize : public FunctionPass {
if (F->hasFnAttribute(Attribute::NoImplicitFloat)) {
DEBUG(dbgs() << "LV: Can't vectorize when the NoImplicitFloat"
"attribute is used.\n");
+ emitOptimizationRemarkAnalysis(
+ F->getContext(), DEBUG_TYPE, *F, L->getStartLoc(),
+ "loop not vectorized due to NoImplicitFloat attribute");
+ emitOptimizationRemarkMissed(F->getContext(), DEBUG_TYPE, *F,
+ L->getStartLoc(), Hints.emitRemark());
return false;
}
@@ -1208,9 +1296,14 @@ struct LoopVectorize : public FunctionPass {
DEBUG(dbgs() << "LV: Unroll Factor is " << UF << '\n');
if (VF.Width == 1) {
- DEBUG(dbgs() << "LV: Vectorization is possible but not beneficial.\n");
- if (UF == 1)
+ DEBUG(dbgs() << "LV: Vectorization is possible but not beneficial\n");
+
+ if (UF == 1) {
+ emitOptimizationRemarkAnalysis(
+ F->getContext(), DEBUG_TYPE, *F, L->getStartLoc(),
+ "not beneficial to vectorize and user disabled interleaving");
return false;
+ }
DEBUG(dbgs() << "LV: Trying to at least unroll the loops.\n");
// Report the unrolling decision.
@@ -1220,6 +1313,7 @@ struct LoopVectorize : public FunctionPass {
" (vectorization not beneficial)"));
// We decided not to vectorize, but we may want to unroll.
+
InnerLoopUnroller Unroller(L, SE, LI, DT, DL, TLI, UF);
Unroller.vectorize(&LVL);
} else {
@@ -1909,20 +2003,23 @@ void InnerLoopVectorizer::createEmptyLoop() {
the vectorized instructions while the old loop will continue to run the
scalar remainder.
- [ ] <-- vector loop bypass (may consist of multiple blocks).
- / |
- / v
- | [ ] <-- vector pre header.
- | |
- | v
- | [ ] \
- | [ ]_| <-- vector loop.
- | |
- \ v
- >[ ] <--- middle-block.
- / |
- / v
- | [ ] <--- new preheader.
+ [ ] <-- Back-edge taken count overflow check.
+ / |
+ / v
+ | [ ] <-- vector loop bypass (may consist of multiple blocks).
+ | / |
+ | / v
+ || [ ] <-- vector pre header.
+ || |
+ || v
+ || [ ] \
+ || [ ]_| <-- vector loop.
+ || |
+ | \ v
+ | >[ ] <--- middle-block.
+ | / |
+ | / v
+ -|- >[ ] <--- new preheader.
| |
| v
| [ ] \
@@ -1936,6 +2033,7 @@ void InnerLoopVectorizer::createEmptyLoop() {
BasicBlock *OldBasicBlock = OrigLoop->getHeader();
BasicBlock *BypassBlock = OrigLoop->getLoopPreheader();
BasicBlock *ExitBlock = OrigLoop->getExitBlock();
+ assert(BypassBlock && "Invalid loop structure");
assert(ExitBlock && "Must have an exit block");
// Some loops have a single integer induction variable, while other loops
@@ -1958,18 +2056,30 @@ void InnerLoopVectorizer::createEmptyLoop() {
IdxTy->getPrimitiveSizeInBits())
ExitCount = SE->getTruncateOrNoop(ExitCount, IdxTy);
- ExitCount = SE->getNoopOrZeroExtend(ExitCount, IdxTy);
+ const SCEV *BackedgeTakeCount = SE->getNoopOrZeroExtend(ExitCount, IdxTy);
// Get the total trip count from the count by adding 1.
- ExitCount = SE->getAddExpr(ExitCount,
- SE->getConstant(ExitCount->getType(), 1));
+ ExitCount = SE->getAddExpr(BackedgeTakeCount,
+ SE->getConstant(BackedgeTakeCount->getType(), 1));
// Expand the trip count and place the new instructions in the preheader.
// Notice that the pre-header does not change, only the loop body.
SCEVExpander Exp(*SE, "induction");
- // Count holds the overall loop count (N).
- Value *Count = Exp.expandCodeFor(ExitCount, ExitCount->getType(),
- BypassBlock->getTerminator());
+ // We need to test whether the backedge-taken count is uint##_max. Adding one
+ // to it will cause overflow and an incorrect loop trip count in the vector
+ // body. In case of overflow we want to directly jump to the scalar remainder
+ // loop.
+ Value *BackedgeCount =
+ Exp.expandCodeFor(BackedgeTakeCount, BackedgeTakeCount->getType(),
+ BypassBlock->getTerminator());
+ if (BackedgeCount->getType()->isPointerTy())
+ BackedgeCount = CastInst::CreatePointerCast(BackedgeCount, IdxTy,
+ "backedge.ptrcnt.to.int",
+ BypassBlock->getTerminator());
+ Instruction *CheckBCOverflow =
+ CmpInst::Create(Instruction::ICmp, CmpInst::ICMP_EQ, BackedgeCount,
+ Constant::getAllOnesValue(BackedgeCount->getType()),
+ "backedge.overflow", BypassBlock->getTerminator());
// The loop index does not have to start at Zero. Find the original start
// value from the induction PHI node. If we don't have an induction variable
@@ -1980,7 +2090,18 @@ void InnerLoopVectorizer::createEmptyLoop() {
IdxTy):
ConstantInt::get(IdxTy, 0);
- assert(BypassBlock && "Invalid loop structure");
+ // We need an instruction to anchor the overflow check on. StartIdx needs to
+ // be defined before the overflow check branch. Because the scalar preheader
+ // is going to merge the start index and so the overflow branch block needs to
+ // contain a definition of the start index.
+ Instruction *OverflowCheckAnchor = BinaryOperator::CreateAdd(
+ StartIdx, ConstantInt::get(IdxTy, 0), "overflow.check.anchor",
+ BypassBlock->getTerminator());
+
+ // Count holds the overall loop count (N).
+ Value *Count = Exp.expandCodeFor(ExitCount, ExitCount->getType(),
+ BypassBlock->getTerminator());
+
LoopBypassBlocks.push_back(BypassBlock);
// Split the single block loop into the two loop structure described above.
@@ -2049,29 +2170,45 @@ void InnerLoopVectorizer::createEmptyLoop() {
// Now, compare the new count to zero. If it is zero skip the vector loop and
// jump to the scalar loop.
- Value *Cmp = BypassBuilder.CreateICmpEQ(IdxEndRoundDown, StartIdx,
- "cmp.zero");
+ Value *Cmp =
+ BypassBuilder.CreateICmpEQ(IdxEndRoundDown, StartIdx, "cmp.zero");
BasicBlock *LastBypassBlock = BypassBlock;
+ // Generate code to check that the loops trip count that we computed by adding
+ // one to the backedge-taken count will not overflow.
+ {
+ auto PastOverflowCheck =
+ std::next(BasicBlock::iterator(OverflowCheckAnchor));
+ BasicBlock *CheckBlock =
+ LastBypassBlock->splitBasicBlock(PastOverflowCheck, "overflow.checked");
+ if (ParentLoop)
+ ParentLoop->addBasicBlockToLoop(CheckBlock, LI->getBase());
+ LoopBypassBlocks.push_back(CheckBlock);
+ Instruction *OldTerm = LastBypassBlock->getTerminator();
+ BranchInst::Create(ScalarPH, CheckBlock, CheckBCOverflow, OldTerm);
+ OldTerm->eraseFromParent();
+ LastBypassBlock = CheckBlock;
+ }
+
// Generate the code to check that the strides we assumed to be one are really
// one. We want the new basic block to start at the first instruction in a
// sequence of instructions that form a check.
Instruction *StrideCheck;
Instruction *FirstCheckInst;
std::tie(FirstCheckInst, StrideCheck) =
- addStrideCheck(BypassBlock->getTerminator());
+ addStrideCheck(LastBypassBlock->getTerminator());
if (StrideCheck) {
// Create a new block containing the stride check.
BasicBlock *CheckBlock =
- BypassBlock->splitBasicBlock(FirstCheckInst, "vector.stridecheck");
+ LastBypassBlock->splitBasicBlock(FirstCheckInst, "vector.stridecheck");
if (ParentLoop)
ParentLoop->addBasicBlockToLoop(CheckBlock, LI->getBase());
LoopBypassBlocks.push_back(CheckBlock);
// Replace the branch into the memory check block with a conditional branch
// for the "few elements case".
- Instruction *OldTerm = BypassBlock->getTerminator();
+ Instruction *OldTerm = LastBypassBlock->getTerminator();
BranchInst::Create(MiddleBlock, CheckBlock, Cmp, OldTerm);
OldTerm->eraseFromParent();
@@ -2134,6 +2271,19 @@ void InnerLoopVectorizer::createEmptyLoop() {
PHINode::Create(OrigPhi->getType(), 2, "trunc.resume.val",
MiddleBlock->getTerminator()) : nullptr;
+ // Create phi nodes to merge from the backedge-taken check block.
+ PHINode *BCResumeVal = PHINode::Create(ResumeValTy, 3, "bc.resume.val",
+ ScalarPH->getTerminator());
+ BCResumeVal->addIncoming(ResumeVal, MiddleBlock);
+
+ PHINode *BCTruncResumeVal = nullptr;
+ if (OrigPhi == OldInduction) {
+ BCTruncResumeVal =
+ PHINode::Create(OrigPhi->getType(), 2, "bc.trunc.resume.val",
+ ScalarPH->getTerminator());
+ BCTruncResumeVal->addIncoming(TruncResumeVal, MiddleBlock);
+ }
+
Value *EndValue = nullptr;
switch (II.IK) {
case LoopVectorizationLegality::IK_NoInduction:
@@ -2150,10 +2300,12 @@ void InnerLoopVectorizer::createEmptyLoop() {
BypassBuilder.CreateTrunc(IdxEndRoundDown, OrigPhi->getType());
// The new PHI merges the original incoming value, in case of a bypass,
// or the value at the end of the vectorized loop.
- for (unsigned I = 0, E = LoopBypassBlocks.size(); I != E; ++I)
+ for (unsigned I = 1, E = LoopBypassBlocks.size(); I != E; ++I)
TruncResumeVal->addIncoming(II.StartValue, LoopBypassBlocks[I]);
TruncResumeVal->addIncoming(EndValue, VecBody);
+ BCTruncResumeVal->addIncoming(II.StartValue, LoopBypassBlocks[0]);
+
// We know what the end value is.
EndValue = IdxEndRoundDown;
// We also know which PHI node holds it.
@@ -2199,7 +2351,7 @@ void InnerLoopVectorizer::createEmptyLoop() {
// The new PHI merges the original incoming value, in case of a bypass,
// or the value at the end of the vectorized loop.
- for (unsigned I = 0, E = LoopBypassBlocks.size(); I != E; ++I) {
+ for (unsigned I = 1, E = LoopBypassBlocks.size(); I != E; ++I) {
if (OrigPhi == OldInduction)
ResumeVal->addIncoming(StartIdx, LoopBypassBlocks[I]);
else
@@ -2209,11 +2361,16 @@ void InnerLoopVectorizer::createEmptyLoop() {
// Fix the scalar body counter (PHI node).
unsigned BlockIdx = OrigPhi->getBasicBlockIndex(ScalarPH);
- // The old inductions phi node in the scalar body needs the truncated value.
- if (OrigPhi == OldInduction)
- OrigPhi->setIncomingValue(BlockIdx, TruncResumeVal);
- else
- OrigPhi->setIncomingValue(BlockIdx, ResumeVal);
+
+ // The old induction's phi node in the scalar body needs the truncated
+ // value.
+ if (OrigPhi == OldInduction) {
+ BCResumeVal->addIncoming(StartIdx, LoopBypassBlocks[0]);
+ OrigPhi->setIncomingValue(BlockIdx, BCTruncResumeVal);
+ } else {
+ BCResumeVal->addIncoming(II.StartValue, LoopBypassBlocks[0]);
+ OrigPhi->setIncomingValue(BlockIdx, BCResumeVal);
+ }
}
// If we are generating a new induction variable then we also need to
@@ -2224,7 +2381,7 @@ void InnerLoopVectorizer::createEmptyLoop() {
assert(!ResumeIndex && "Unexpected resume value found");
ResumeIndex = PHINode::Create(IdxTy, 2, "new.indc.resume.val",
MiddleBlock->getTerminator());
- for (unsigned I = 0, E = LoopBypassBlocks.size(); I != E; ++I)
+ for (unsigned I = 1, E = LoopBypassBlocks.size(); I != E; ++I)
ResumeIndex->addIncoming(StartIdx, LoopBypassBlocks[I]);
ResumeIndex->addIncoming(IdxEndRoundDown, VecBody);
}
@@ -2494,7 +2651,7 @@ void InnerLoopVectorizer::vectorizeLoop() {
// To do so, we need to generate the 'identity' vector and override
// one of the elements with the incoming scalar reduction. We need
// to do it in the vector-loop preheader.
- Builder.SetInsertPoint(LoopBypassBlocks.front()->getTerminator());
+ Builder.SetInsertPoint(LoopBypassBlocks[1]->getTerminator());
// This is the vector-clone of the value that leaves the loop.
VectorParts &VectorExit = getVectorValue(RdxDesc.LoopExitInstr);
@@ -2568,7 +2725,7 @@ void InnerLoopVectorizer::vectorizeLoop() {
VectorParts &RdxExitVal = getVectorValue(RdxDesc.LoopExitInstr);
PHINode *NewPhi = Builder.CreatePHI(VecTy, 2, "rdx.vec.exit.phi");
Value *StartVal = (part == 0) ? VectorStart : Identity;
- for (unsigned I = 0, E = LoopBypassBlocks.size(); I != E; ++I)
+ for (unsigned I = 1, E = LoopBypassBlocks.size(); I != E; ++I)
NewPhi->addIncoming(StartVal, LoopBypassBlocks[I]);
NewPhi->addIncoming(RdxExitVal[part],
LoopVectorBody.back());
@@ -2626,6 +2783,13 @@ void InnerLoopVectorizer::vectorizeLoop() {
Builder.getInt32(0));
}
+ // Create a phi node that merges control-flow from the backedge-taken check
+ // block and the middle block.
+ PHINode *BCBlockPhi = PHINode::Create(RdxPhi->getType(), 2, "bc.merge.rdx",
+ LoopScalarPreHeader->getTerminator());
+ BCBlockPhi->addIncoming(RdxDesc.StartValue, LoopBypassBlocks[0]);
+ BCBlockPhi->addIncoming(ReducedPartRdx, LoopMiddleBlock);
+
// Now, we need to fix the users of the reduction variable
// inside and outside of the scalar remainder loop.
// We know that the loop is in LCSSA form. We need to update the
@@ -2655,7 +2819,7 @@ void InnerLoopVectorizer::vectorizeLoop() {
assert(IncomingEdgeBlockIdx >= 0 && "Invalid block index");
// Pick the other block.
int SelfEdgeBlockIdx = (IncomingEdgeBlockIdx ? 0 : 1);
- (RdxPhi)->setIncomingValue(SelfEdgeBlockIdx, ReducedPartRdx);
+ (RdxPhi)->setIncomingValue(SelfEdgeBlockIdx, BCBlockPhi);
(RdxPhi)->setIncomingValue(IncomingEdgeBlockIdx, RdxDesc.LoopExitInstr);
}// end of for each redux variable.
@@ -3062,9 +3226,14 @@ void InnerLoopVectorizer::vectorizeBlockInLoop(BasicBlock *BB, PhiVector *PV) {
scalarizeInstruction(it);
break;
default:
+ bool HasScalarOpd = hasVectorInstrinsicScalarOpd(ID, 1);
for (unsigned Part = 0; Part < UF; ++Part) {
SmallVector<Value *, 4> Args;
for (unsigned i = 0, ie = CI->getNumArgOperands(); i != ie; ++i) {
+ if (HasScalarOpd && i == 1) {
+ Args.push_back(CI->getArgOperand(i));
+ continue;
+ }
VectorParts &Arg = getVectorValue(CI->getArgOperand(i));
Args.push_back(Arg[Part]);
}
@@ -3112,8 +3281,8 @@ void InnerLoopVectorizer::updateAnalysis() {
}
}
- DT->addNewBlock(LoopMiddleBlock, LoopBypassBlocks.front());
- DT->addNewBlock(LoopScalarPreHeader, LoopMiddleBlock);
+ DT->addNewBlock(LoopMiddleBlock, LoopBypassBlocks[1]);
+ DT->addNewBlock(LoopScalarPreHeader, LoopBypassBlocks[0]);
DT->changeImmediateDominator(LoopScalarBody, LoopScalarPreHeader);
DT->changeImmediateDominator(LoopExitBlock, LoopMiddleBlock);
@@ -3138,8 +3307,10 @@ static bool canIfConvertPHINodes(BasicBlock *BB) {
}
bool LoopVectorizationLegality::canVectorizeWithIfConvert() {
- if (!EnableIfConversion)
+ if (!EnableIfConversion) {
+ emitAnalysis(Report() << "if-conversion is disabled");
return false;
+ }
assert(TheLoop->getNumBlocks() > 1 && "Single block loops are vectorizable");
@@ -3169,16 +3340,24 @@ bool LoopVectorizationLegality::canVectorizeWithIfConvert() {
BasicBlock *BB = *BI;
// We don't support switch statements inside loops.
- if (!isa<BranchInst>(BB->getTerminator()))
+ if (!isa<BranchInst>(BB->getTerminator())) {
+ emitAnalysis(Report(BB->getTerminator())
+ << "loop contains a switch statement");
return false;
+ }
// We must be able to predicate all blocks that need to be predicated.
if (blockNeedsPredication(BB)) {
- if (!blockCanBePredicated(BB, SafePointes))
+ if (!blockCanBePredicated(BB, SafePointes)) {
+ emitAnalysis(Report(BB->getTerminator())
+ << "control flow cannot be substituted for a select");
return false;
- } else if (BB != Header && !canIfConvertPHINodes(BB))
+ }
+ } else if (BB != Header && !canIfConvertPHINodes(BB)) {
+ emitAnalysis(Report(BB->getTerminator())
+ << "control flow cannot be substituted for a select");
return false;
-
+ }
}
// We can if-convert this loop.
@@ -3188,20 +3367,31 @@ bool LoopVectorizationLegality::canVectorizeWithIfConvert() {
bool LoopVectorizationLegality::canVectorize() {
// We must have a loop in canonical form. Loops with indirectbr in them cannot
// be canonicalized.
- if (!TheLoop->getLoopPreheader())
+ if (!TheLoop->getLoopPreheader()) {
+ emitAnalysis(
+ Report() << "loop control flow is not understood by vectorizer");
return false;
+ }
// We can only vectorize innermost loops.
- if (TheLoop->getSubLoopsVector().size())
+ if (TheLoop->getSubLoopsVector().size()) {
+ emitAnalysis(Report() << "loop is not the innermost loop");
return false;
+ }
// We must have a single backedge.
- if (TheLoop->getNumBackEdges() != 1)
+ if (TheLoop->getNumBackEdges() != 1) {
+ emitAnalysis(
+ Report() << "loop control flow is not understood by vectorizer");
return false;
+ }
// We must have a single exiting block.
- if (!TheLoop->getExitingBlock())
+ if (!TheLoop->getExitingBlock()) {
+ emitAnalysis(
+ Report() << "loop control flow is not understood by vectorizer");
return false;
+ }
// We need to have a loop header.
DEBUG(dbgs() << "LV: Found a loop: " <<
@@ -3217,6 +3407,7 @@ bool LoopVectorizationLegality::canVectorize() {
// ScalarEvolution needs to be able to find the exit count.
const SCEV *ExitCount = SE->getBackedgeTakenCount(TheLoop);
if (ExitCount == SE->getCouldNotCompute()) {
+ emitAnalysis(Report() << "could not determine number of loop iterations");
DEBUG(dbgs() << "LV: SCEV could not compute the loop exit count.\n");
return false;
}
@@ -3310,6 +3501,8 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {
if (!PhiTy->isIntegerTy() &&
!PhiTy->isFloatingPointTy() &&
!PhiTy->isPointerTy()) {
+ emitAnalysis(Report(it)
+ << "loop control flow is not understood by vectorizer");
DEBUG(dbgs() << "LV: Found an non-int non-pointer PHI.\n");
return false;
}
@@ -3320,13 +3513,17 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {
if (*bb != Header) {
// Check that this instruction has no outside users or is an
// identified reduction value with an outside user.
- if(!hasOutsideLoopUser(TheLoop, it, AllowedExit))
+ if (!hasOutsideLoopUser(TheLoop, it, AllowedExit))
continue;
+ emitAnalysis(Report(it) << "value that could not be identified as "
+ "reduction is used outside the loop");
return false;
}
// We only allow if-converted PHIs with more than two incoming values.
if (Phi->getNumIncomingValues() != 2) {
+ emitAnalysis(Report(it)
+ << "control flow not understood by vectorizer");
DEBUG(dbgs() << "LV: Found an invalid PHI.\n");
return false;
}
@@ -3357,8 +3554,11 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {
// Until we explicitly handle the case of an induction variable with
// an outside loop user we have to give up vectorizing this loop.
- if (hasOutsideLoopUser(TheLoop, it, AllowedExit))
+ if (hasOutsideLoopUser(TheLoop, it, AllowedExit)) {
+ emitAnalysis(Report(it) << "use of induction value outside of the "
+ "loop is not handled by vectorizer");
return false;
+ }
continue;
}
@@ -3401,6 +3601,7 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {
continue;
}
+ emitAnalysis(Report(it) << "unvectorizable operation");
DEBUG(dbgs() << "LV: Found an unidentified PHI."<< *Phi <<"\n");
return false;
}// end of PHI handling
@@ -3409,14 +3610,29 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {
// calls and we do handle certain intrinsic and libm functions.
CallInst *CI = dyn_cast<CallInst>(it);
if (CI && !getIntrinsicIDForCall(CI, TLI) && !isa<DbgInfoIntrinsic>(CI)) {
+ emitAnalysis(Report(it) << "call instruction cannot be vectorized");
DEBUG(dbgs() << "LV: Found a call site.\n");
return false;
}
+ // Intrinsics such as powi,cttz and ctlz are legal to vectorize if the
+ // second argument is the same (i.e. loop invariant)
+ if (CI &&
+ hasVectorInstrinsicScalarOpd(getIntrinsicIDForCall(CI, TLI), 1)) {
+ if (!SE->isLoopInvariant(SE->getSCEV(CI->getOperand(1)), TheLoop)) {
+ emitAnalysis(Report(it)
+ << "intrinsic instruction cannot be vectorized");
+ DEBUG(dbgs() << "LV: Found unvectorizable intrinsic " << *CI << "\n");
+ return false;
+ }
+ }
+
// Check that the instruction return type is vectorizable.
// Also, we can't vectorize extractelement instructions.
if ((!VectorType::isValidElementType(it->getType()) &&
!it->getType()->isVoidTy()) || isa<ExtractElementInst>(it)) {
+ emitAnalysis(Report(it)
+ << "instruction return type cannot be vectorized");
DEBUG(dbgs() << "LV: Found unvectorizable type.\n");
return false;
}
@@ -3424,8 +3640,10 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {
// Check that the stored type is vectorizable.
if (StoreInst *ST = dyn_cast<StoreInst>(it)) {
Type *T = ST->getValueOperand()->getType();
- if (!VectorType::isValidElementType(T))
+ if (!VectorType::isValidElementType(T)) {
+ emitAnalysis(Report(ST) << "store instruction cannot be vectorized");
return false;
+ }
if (EnableMemAccessVersioning)
collectStridedAcccess(ST);
}
@@ -3436,8 +3654,10 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {
// Reduction instructions are allowed to have exit users.
// All other instructions must not have external users.
- if (hasOutsideLoopUser(TheLoop, it, AllowedExit))
+ if (hasOutsideLoopUser(TheLoop, it, AllowedExit)) {
+ emitAnalysis(Report(it) << "value cannot be used outside the loop");
return false;
+ }
} // next instr.
@@ -3445,8 +3665,11 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {
if (!Induction) {
DEBUG(dbgs() << "LV: Did not find one integer induction var.\n");
- if (Inductions.empty())
+ if (Inductions.empty()) {
+ emitAnalysis(Report()
+ << "loop induction variable could not be identified");
return false;
+ }
}
return true;
@@ -4353,8 +4576,9 @@ bool LoopVectorizationLegality::canVectorizeMemory() {
continue;
LoadInst *Ld = dyn_cast<LoadInst>(it);
- if (!Ld) return false;
- if (!Ld->isSimple() && !IsAnnotatedParallel) {
+ if (!Ld || (!Ld->isSimple() && !IsAnnotatedParallel)) {
+ emitAnalysis(Report(Ld)
+ << "read with atomic ordering or volatile read");
DEBUG(dbgs() << "LV: Found a non-simple load.\n");
return false;
}
@@ -4367,8 +4591,13 @@ bool LoopVectorizationLegality::canVectorizeMemory() {
// Save 'store' instructions. Abort if other instructions write to memory.
if (it->mayWriteToMemory()) {
StoreInst *St = dyn_cast<StoreInst>(it);
- if (!St) return false;
+ if (!St) {
+ emitAnalysis(Report(it) << "instruction cannot be vectorized");
+ return false;
+ }
if (!St->isSimple() && !IsAnnotatedParallel) {
+ emitAnalysis(Report(St)
+ << "write with atomic ordering or volatile write");
DEBUG(dbgs() << "LV: Found a non-simple store.\n");
return false;
}
@@ -4405,6 +4634,9 @@ bool LoopVectorizationLegality::canVectorizeMemory() {
Value* Ptr = ST->getPointerOperand();
if (isUniform(Ptr)) {
+ emitAnalysis(
+ Report(ST)
+ << "write to a loop invariant address could not be vectorized");
DEBUG(dbgs() << "LV: We don't allow storing to uniform addresses\n");
return false;
}
@@ -4483,6 +4715,7 @@ bool LoopVectorizationLegality::canVectorizeMemory() {
}
if (NeedRTCheck && !CanDoRT) {
+ emitAnalysis(Report() << "cannot identify array bounds");
DEBUG(dbgs() << "LV: We can't vectorize because we can't find " <<
"the array bounds.\n");
PtrRtCheck.reset();
@@ -4513,6 +4746,14 @@ bool LoopVectorizationLegality::canVectorizeMemory() {
// Check that we did not collect too many pointers or found an unsizeable
// pointer.
if (!CanDoRT || NumComparisons > RuntimeMemoryCheckThreshold) {
+ if (!CanDoRT && NumComparisons > 0)
+ emitAnalysis(Report()
+ << "cannot check memory dependencies at runtime");
+ else
+ emitAnalysis(Report()
+ << NumComparisons << " exceeds limit of "
+ << RuntimeMemoryCheckThreshold
+ << " dependent memory operations checked at runtime");
DEBUG(dbgs() << "LV: Can't vectorize with memory checks\n");
PtrRtCheck.reset();
return false;
@@ -4522,6 +4763,9 @@ bool LoopVectorizationLegality::canVectorizeMemory() {
}
}
+ if (!CanVecMem)
+ emitAnalysis(Report() << "unsafe dependent memory operations in loop");
+
DEBUG(dbgs() << "LV: We" << (NeedRTCheck ? "" : " don't") <<
" need a runtime memory check.\n");
@@ -5774,4 +6018,3 @@ Value *InnerLoopUnroller::getConsecutiveVector(Value* Val, int StartIdx,
Constant *C = ConstantInt::get(ITy, StartIdx, Negate);
return Builder.CreateAdd(Val, C, "induction");
}
-