diff options
author | Benjamin Kramer <benny.kra@googlemail.com> | 2013-11-01 14:09:50 +0000 |
---|---|---|
committer | Benjamin Kramer <benny.kra@googlemail.com> | 2013-11-01 14:09:50 +0000 |
commit | 7208b0763c9fca2c6364c642d28fd014a496cc37 (patch) | |
tree | 1dc0c78f1ac0c28e9bff57c26e8cdb698961b489 | |
parent | 6186de5c54b580414b2d162e0f335b62b3d9812c (diff) | |
download | external_llvm-7208b0763c9fca2c6364c642d28fd014a496cc37.zip external_llvm-7208b0763c9fca2c6364c642d28fd014a496cc37.tar.gz external_llvm-7208b0763c9fca2c6364c642d28fd014a496cc37.tar.bz2 |
LoopVectorize: Look for consecutive acces in GEPs with trailing zero indices
If we have a pointer to a single-element struct we can still build wide loads
and stores to it (if there is no padding).
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@193860 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r-- | lib/Transforms/Vectorize/LoopVectorize.cpp | 49 | ||||
-rw-r--r-- | test/Transforms/LoopVectorize/struct_access.ll | 42 |
2 files changed, 80 insertions, 11 deletions
diff --git a/lib/Transforms/Vectorize/LoopVectorize.cpp b/lib/Transforms/Vectorize/LoopVectorize.cpp index f18707c..ee94173 100644 --- a/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -1069,6 +1069,31 @@ Value *InnerLoopVectorizer::getConsecutiveVector(Value* Val, int StartIdx, return Builder.CreateAdd(Val, Cv, "induction"); } +/// \brief Find the operand of the GEP that should be checked for consecutive +/// stores. This ignores trailing indices that have no effect on the final +/// pointer. +static unsigned getGEPInductionOperand(DataLayout *DL, + const GetElementPtrInst *Gep) { + unsigned LastOperand = Gep->getNumOperands() - 1; + unsigned GEPAllocSize = DL->getTypeAllocSize( + cast<PointerType>(Gep->getType()->getScalarType())->getElementType()); + + // Walk backwards and try to peel off zeros. + while (LastOperand > 1 && match(Gep->getOperand(LastOperand), m_Zero())) { + // Find the type we're currently indexing into. + gep_type_iterator GEPTI = gep_type_begin(Gep); + std::advance(GEPTI, LastOperand - 1); + + // If it's a type with the same allocation size as the result of the GEP we + // can peel off the zero index. + if (DL->getTypeAllocSize(*GEPTI) != GEPAllocSize) + break; + --LastOperand; + } + + return LastOperand; +} + int LoopVectorizationLegality::isConsecutivePtr(Value *Ptr) { assert(Ptr->getType()->isPointerTy() && "Unexpected non ptr"); // Make sure that the pointer does not point to structs. @@ -1090,8 +1115,6 @@ int LoopVectorizationLegality::isConsecutivePtr(Value *Ptr) { return 0; unsigned NumOperands = Gep->getNumOperands(); - Value *LastIndex = Gep->getOperand(NumOperands - 1); - Value *GpPtr = Gep->getPointerOperand(); // If this GEP value is a consecutive pointer induction variable and all of // the indices are constant then we know it is consecutive. We can @@ -1115,14 +1138,18 @@ int LoopVectorizationLegality::isConsecutivePtr(Value *Ptr) { return -1; } - // Check that all of the gep indices are uniform except for the last. - for (unsigned i = 0; i < NumOperands - 1; ++i) - if (!SE->isLoopInvariant(SE->getSCEV(Gep->getOperand(i)), TheLoop)) + unsigned InductionOperand = getGEPInductionOperand(DL, Gep); + + // Check that all of the gep indices are uniform except for our induction + // operand. + for (unsigned i = 0; i != NumOperands; ++i) + if (i != InductionOperand && + !SE->isLoopInvariant(SE->getSCEV(Gep->getOperand(i)), TheLoop)) return 0; - // We can emit wide load/stores only if the last index is the induction - // variable. - const SCEV *Last = SE->getSCEV(LastIndex); + // We can emit wide load/stores only if the last non-zero index is the + // induction variable. + const SCEV *Last = SE->getSCEV(Gep->getOperand(InductionOperand)); if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Last)) { const SCEV *Step = AR->getStepRecurrence(*SE); @@ -1219,7 +1246,7 @@ void InnerLoopVectorizer::vectorizeMemoryInstruction(Instruction *Instr, // The last index does not have to be the induction. It can be // consecutive and be a function of the index. For example A[I+1]; unsigned NumOperands = Gep->getNumOperands(); - unsigned LastOperand = NumOperands - 1; + unsigned InductionOperand = getGEPInductionOperand(DL, Gep); // Create the new GEP with the new induction variable. GetElementPtrInst *Gep2 = cast<GetElementPtrInst>(Gep->clone()); @@ -1228,9 +1255,9 @@ void InnerLoopVectorizer::vectorizeMemoryInstruction(Instruction *Instr, Instruction *GepOperandInst = dyn_cast<Instruction>(GepOperand); // Update last index or loop invariant instruction anchored in loop. - if (i == LastOperand || + if (i == InductionOperand || (GepOperandInst && OrigLoop->contains(GepOperandInst))) { - assert((i == LastOperand || + assert((i == InductionOperand || SE->isLoopInvariant(SE->getSCEV(GepOperandInst), OrigLoop)) && "Must be last index or loop invariant"); diff --git a/test/Transforms/LoopVectorize/struct_access.ll b/test/Transforms/LoopVectorize/struct_access.ll index 0cfaabe..75beae8 100644 --- a/test/Transforms/LoopVectorize/struct_access.ll +++ b/test/Transforms/LoopVectorize/struct_access.ll @@ -44,3 +44,45 @@ for.end: ; preds = %for.body, %entry %sum.0.lcssa = phi i32 [ 0, %entry ], [ %add, %for.body ] ret i32 %sum.0.lcssa } + +%struct.lit = type { i32 } + +; Verify that we still vectorize the access if the struct has the same size as +; the loaded element. +; struct lit { +; int x; +; }; +; +; +; int bar(struct lit *A, int n) { +; +; int sum = 0; +; for (int i = 0; i < n; ++i) +; sum += A[i].x; +; +; return sum; +; } + +;CHECK-LABEL: @bar( +;CHECK: load <4 x i32> +;CHECK: ret +define i32 @bar(%struct.lit* nocapture %A, i32 %n) nounwind uwtable readonly ssp { +entry: + %cmp4 = icmp sgt i32 %n, 0 + br i1 %cmp4, label %for.body, label %for.end + +for.body: ; preds = %entry, %for.body + %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ] + %sum.05 = phi i32 [ %add, %for.body ], [ 0, %entry ] + %x = getelementptr inbounds %struct.lit* %A, i64 %indvars.iv, i32 0 + %0 = load i32* %x, align 4 + %add = add nsw i32 %0, %sum.05 + %indvars.iv.next = add i64 %indvars.iv, 1 + %lftr.wideiv = trunc i64 %indvars.iv.next to i32 + %exitcond = icmp eq i32 %lftr.wideiv, %n + br i1 %exitcond, label %for.end, label %for.body + +for.end: ; preds = %for.body, %entry + %sum.0.lcssa = phi i32 [ 0, %entry ], [ %add, %for.body ] + ret i32 %sum.0.lcssa +} |