diff options
author | Nadav Rotem <nrotem@apple.com> | 2012-12-26 19:08:17 +0000 |
---|---|---|
committer | Nadav Rotem <nrotem@apple.com> | 2012-12-26 19:08:17 +0000 |
commit | 13eb1e7817be11ea84be6571dce827a77bc9640b (patch) | |
tree | ca209e4b4e90751bc936b3a5dc4bc9be739b5390 /lib/Transforms/Vectorize/LoopVectorize.cpp | |
parent | f1a26cf9df900101b9cbea42b67f7466edc7deed (diff) | |
download | external_llvm-13eb1e7817be11ea84be6571dce827a77bc9640b.zip external_llvm-13eb1e7817be11ea84be6571dce827a77bc9640b.tar.gz external_llvm-13eb1e7817be11ea84be6571dce827a77bc9640b.tar.bz2 |
LoopVectorizer: Optimize the vectorization of consecutive memory access when the iteration step is -1
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@171114 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'lib/Transforms/Vectorize/LoopVectorize.cpp')
-rw-r--r-- | lib/Transforms/Vectorize/LoopVectorize.cpp | 85 |
1 files changed, 63 insertions, 22 deletions
diff --git a/lib/Transforms/Vectorize/LoopVectorize.cpp b/lib/Transforms/Vectorize/LoopVectorize.cpp index b8b934a..d64295c 100644 --- a/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -202,7 +202,7 @@ Value *InnerLoopVectorizer::getConsecutiveVector(Value* Val, bool Negate) { return Builder.CreateAdd(Val, Cv, "induction"); } -bool LoopVectorizationLegality::isConsecutivePtr(Value *Ptr) { +int LoopVectorizationLegality::isConsecutivePtr(Value *Ptr) { assert(Ptr->getType()->isPointerTy() && "Unexpected non ptr"); // If this value is a pointer induction variable we know it is consecutive. @@ -210,12 +210,12 @@ bool LoopVectorizationLegality::isConsecutivePtr(Value *Ptr) { if (Phi && Inductions.count(Phi)) { InductionInfo II = Inductions[Phi]; if (PtrInduction == II.IK) - return true; + return 1; } GetElementPtrInst *Gep = dyn_cast_or_null<GetElementPtrInst>(Ptr); if (!Gep) - return false; + return 0; unsigned NumOperands = Gep->getNumOperands(); Value *LastIndex = Gep->getOperand(NumOperands - 1); @@ -223,7 +223,7 @@ bool LoopVectorizationLegality::isConsecutivePtr(Value *Ptr) { // Check that all of the gep indices are uniform except for the last. for (unsigned i = 0; i < NumOperands - 1; ++i) if (!SE->isLoopInvariant(SE->getSCEV(Gep->getOperand(i)), TheLoop)) - return false; + return 0; // We can emit wide load/stores only if the last index is the induction // variable. @@ -234,10 +234,12 @@ bool LoopVectorizationLegality::isConsecutivePtr(Value *Ptr) { // The memory is consecutive because the last index is consecutive // and all other indices are loop invariant. if (Step->isOne()) - return true; + return 1; + if (Step->isAllOnesValue()) + return -1; } - return false; + return 0; } bool LoopVectorizationLegality::isUniform(Value *V) { @@ -263,6 +265,17 @@ InnerLoopVectorizer::getUniformVector(unsigned Val, Type* ScalarTy) { return ConstantVector::getSplat(VF, ConstantInt::get(ScalarTy, Val, true)); } +Value *InnerLoopVectorizer::reverseVector(Value *Vec) { + assert(Vec->getType()->isVectorTy() && "Invalid type"); + SmallVector<Constant*, 8> ShuffleMask; + for (unsigned i = 0; i < VF; ++i) + ShuffleMask.push_back(Builder.getInt32(VF - i - 1)); + + return Builder.CreateShuffleVector(Vec, UndefValue::get(Vec->getType()), + ConstantVector::get(ShuffleMask), + "reverse"); +} + void InnerLoopVectorizer::scalarizeInstruction(Instruction *Instr) { assert(!Instr->getType()->isAggregateType() && "Can't handle vectors"); // Holds vector parameters or scalars, in case of uniform vals. @@ -941,8 +954,7 @@ Value *InnerLoopVectorizer::createBlockInMask(BasicBlock *BB) { void InnerLoopVectorizer::vectorizeBlockInLoop(LoopVectorizationLegality *Legal, BasicBlock *BB, PhiVector *PV) { - Constant *Zero = - ConstantInt::get(IntegerType::getInt32Ty(BB->getContext()), 0); + Constant *Zero = Builder.getInt32(0); // For each instruction in the old loop. for (BasicBlock::iterator it = BB->begin(), e = BB->end(); it != e; ++it) { @@ -1142,14 +1154,15 @@ InnerLoopVectorizer::vectorizeBlockInLoop(LoopVectorizationLegality *Legal, assert(!Legal->isUniform(Ptr) && "We do not allow storing to uniform addresses"); - GetElementPtrInst *Gep = dyn_cast<GetElementPtrInst>(Ptr); - // This store does not use GEPs. - if (!Legal->isConsecutivePtr(Ptr)) { + int Stride = Legal->isConsecutivePtr(Ptr); + bool Reverse = Stride < 0; + if (Stride == 0) { scalarizeInstruction(it); break; } + GetElementPtrInst *Gep = dyn_cast<GetElementPtrInst>(Ptr); if (Gep) { // The last index does not have to be the induction. It can be // consecutive and be a function of the index. For example A[I+1]; @@ -1166,8 +1179,16 @@ InnerLoopVectorizer::vectorizeBlockInLoop(LoopVectorizationLegality *Legal, assert(isa<PHINode>(Ptr) && "Invalid induction ptr"); Ptr = Builder.CreateExtractElement(getVectorValue(Ptr), Zero); } + + // If the address is consecutive but reversed, then the + // wide load needs to start at the last vector element. + if (Reverse) + Ptr = Builder.CreateGEP(Ptr, Builder.getInt32(1 - VF)); + Ptr = Builder.CreateBitCast(Ptr, StTy->getPointerTo()); Value *Val = getVectorValue(SI->getValueOperand()); + if (Reverse) + Val = reverseVector(Val); Builder.CreateStore(Val, Ptr)->setAlignment(Alignment); break; } @@ -1177,16 +1198,17 @@ InnerLoopVectorizer::vectorizeBlockInLoop(LoopVectorizationLegality *Legal, Type *RetTy = VectorType::get(LI->getType(), VF); Value *Ptr = LI->getPointerOperand(); unsigned Alignment = LI->getAlignment(); - GetElementPtrInst *Gep = dyn_cast<GetElementPtrInst>(Ptr); // If the pointer is loop invariant or if it is non consecutive, // scalarize the load. - bool Con = Legal->isConsecutivePtr(Ptr); - if (Legal->isUniform(Ptr) || !Con) { + int Stride = Legal->isConsecutivePtr(Ptr); + bool Reverse = Stride < 0; + if (Legal->isUniform(Ptr) || Stride == 0) { scalarizeInstruction(it); break; } + GetElementPtrInst *Gep = dyn_cast<GetElementPtrInst>(Ptr); if (Gep) { // The last index does not have to be the induction. It can be // consecutive and be a function of the index. For example A[I+1]; @@ -1203,12 +1225,17 @@ InnerLoopVectorizer::vectorizeBlockInLoop(LoopVectorizationLegality *Legal, assert(isa<PHINode>(Ptr) && "Invalid induction ptr"); Ptr = Builder.CreateExtractElement(getVectorValue(Ptr), Zero); } + // If the address is consecutive but reversed, then the + // wide load needs to start at the last vector element. + if (Reverse) + Ptr = Builder.CreateGEP(Ptr, Builder.getInt32(1 - VF)); Ptr = Builder.CreateBitCast(Ptr, RetTy->getPointerTo()); LI = Builder.CreateLoad(Ptr); LI->setAlignment(Alignment); + // Use this vector value for all users of the load. - WidenMap[it] = LI; + WidenMap[it] = Reverse ? reverseVector(LI) : LI; break; } case Instruction::ZExt: @@ -1625,7 +1652,7 @@ bool LoopVectorizationLegality::canVectorizeMemory() { // If the address of i is unknown (for example A[B[i]]) then we may // read a few words, modify, and write a few words, and some of the // words may be written to the same address. - if (Seen.insert(Ptr) || !isConsecutivePtr(Ptr)) + if (Seen.insert(Ptr) || 0 == isConsecutivePtr(Ptr)) Reads.push_back(Ptr); } @@ -2094,7 +2121,9 @@ LoopVectorizationCostModel::getInstructionCost(Instruction *I, unsigned VF) { SI->getPointerAddressSpace()); // Scalarized stores. - if (!Legal->isConsecutivePtr(SI->getPointerOperand())) { + int Stride = Legal->isConsecutivePtr(SI->getPointerOperand()); + bool Reverse = Stride < 0; + if (0 == Stride) { unsigned Cost = 0; // The cost of extracting from the value vector and pointer vector. @@ -2115,8 +2144,13 @@ LoopVectorizationCostModel::getInstructionCost(Instruction *I, unsigned VF) { } // Wide stores. - return VTTI->getMemoryOpCost(I->getOpcode(), VectorTy, SI->getAlignment(), - SI->getPointerAddressSpace()); + unsigned Cost = VTTI->getMemoryOpCost(I->getOpcode(), VectorTy, + SI->getAlignment(), + SI->getPointerAddressSpace()); + if (Reverse) + Cost += VTTI->getShuffleCost(VectorTargetTransformInfo::Reverse, + VectorTy, 0); + return Cost; } case Instruction::Load: { LoadInst *LI = cast<LoadInst>(I); @@ -2127,7 +2161,9 @@ LoopVectorizationCostModel::getInstructionCost(Instruction *I, unsigned VF) { LI->getPointerAddressSpace()); // Scalarized loads. - if (!Legal->isConsecutivePtr(LI->getPointerOperand())) { + int Stride = Legal->isConsecutivePtr(LI->getPointerOperand()); + bool Reverse = Stride < 0; + if (0 == Stride) { unsigned Cost = 0; Type *PtrTy = ToVectorTy(I->getOperand(0)->getType(), VF); @@ -2150,8 +2186,13 @@ LoopVectorizationCostModel::getInstructionCost(Instruction *I, unsigned VF) { } // Wide loads. - return VTTI->getMemoryOpCost(I->getOpcode(), VectorTy, LI->getAlignment(), - LI->getPointerAddressSpace()); + unsigned Cost = VTTI->getMemoryOpCost(I->getOpcode(), VectorTy, + LI->getAlignment(), + LI->getPointerAddressSpace()); + if (Reverse) + Cost += VTTI->getShuffleCost(VectorTargetTransformInfo::Reverse, + VectorTy, 0); + return Cost; } case Instruction::ZExt: case Instruction::SExt: |