aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorArnold Schwaighofer <aschwaighofer@apple.com>2013-07-12 19:16:02 +0000
committerArnold Schwaighofer <aschwaighofer@apple.com>2013-07-12 19:16:02 +0000
commitc0a11edba6ea46c782672ab3fb4e4ab3dc267a22 (patch)
treedfd8a80460e3764c602d6e8df3e60176dab3f136
parentec1170615515081135ea324e545b57c9503cd574 (diff)
downloadexternal_llvm-c0a11edba6ea46c782672ab3fb4e4ab3dc267a22.zip
external_llvm-c0a11edba6ea46c782672ab3fb4e4ab3dc267a22.tar.gz
external_llvm-c0a11edba6ea46c782672ab3fb4e4ab3dc267a22.tar.bz2
TargetTransformInfo: address calculation parameter for gather/scather
Address calculation for gather/scather in vectorized code can incur a significant cost making vectorization unbeneficial. Add infrastructure to add cost. Tests and cost model for targets will be in follow-up commits. radar://14351991 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@186187 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r--include/llvm/Analysis/TargetTransformInfo.h6
-rw-r--r--lib/Analysis/TargetTransformInfo.cpp7
-rw-r--r--lib/CodeGen/BasicTargetTransformInfo.cpp4
-rw-r--r--lib/Target/ARM/ARMTargetTransformInfo.cpp4
-rw-r--r--lib/Transforms/Vectorize/LoopVectorize.cpp57
5 files changed, 69 insertions, 9 deletions
diff --git a/include/llvm/Analysis/TargetTransformInfo.h b/include/llvm/Analysis/TargetTransformInfo.h
index eb29e34..b8a44b5 100644
--- a/include/llvm/Analysis/TargetTransformInfo.h
+++ b/include/llvm/Analysis/TargetTransformInfo.h
@@ -339,7 +339,11 @@ public:
/// merged into the instruction indexing mode. Some targets might want to
/// distinguish between address computation for memory operations on vector
/// types and scalar types. Such targets should override this function.
- virtual unsigned getAddressComputationCost(Type *Ty) const;
+ /// The 'IsComplex' parameter is a hint that the address computation is likely
+ /// to involve multiple instructions and as such unlikely to be merged into
+ /// the address indexing mode.
+ virtual unsigned getAddressComputationCost(Type *Ty,
+ bool IsComplex = false) const;
/// @}
diff --git a/lib/Analysis/TargetTransformInfo.cpp b/lib/Analysis/TargetTransformInfo.cpp
index 35ce794..4b2bf3c 100644
--- a/lib/Analysis/TargetTransformInfo.cpp
+++ b/lib/Analysis/TargetTransformInfo.cpp
@@ -206,8 +206,9 @@ unsigned TargetTransformInfo::getNumberOfParts(Type *Tp) const {
return PrevTTI->getNumberOfParts(Tp);
}
-unsigned TargetTransformInfo::getAddressComputationCost(Type *Tp) const {
- return PrevTTI->getAddressComputationCost(Tp);
+unsigned TargetTransformInfo::getAddressComputationCost(Type *Tp,
+ bool IsComplex) const {
+ return PrevTTI->getAddressComputationCost(Tp, IsComplex);
}
namespace {
@@ -559,7 +560,7 @@ struct NoTTI : ImmutablePass, TargetTransformInfo {
return 0;
}
- unsigned getAddressComputationCost(Type *Tp) const {
+ unsigned getAddressComputationCost(Type *Tp, bool) const {
return 0;
}
};
diff --git a/lib/CodeGen/BasicTargetTransformInfo.cpp b/lib/CodeGen/BasicTargetTransformInfo.cpp
index 19ace64..3755320 100644
--- a/lib/CodeGen/BasicTargetTransformInfo.cpp
+++ b/lib/CodeGen/BasicTargetTransformInfo.cpp
@@ -108,7 +108,7 @@ public:
virtual unsigned getIntrinsicInstrCost(Intrinsic::ID, Type *RetTy,
ArrayRef<Type*> Tys) const;
virtual unsigned getNumberOfParts(Type *Tp) const;
- virtual unsigned getAddressComputationCost(Type *Ty) const;
+ virtual unsigned getAddressComputationCost(Type *Ty, bool IsComplex) const;
/// @}
};
@@ -489,6 +489,6 @@ unsigned BasicTTI::getNumberOfParts(Type *Tp) const {
return LT.first;
}
-unsigned BasicTTI::getAddressComputationCost(Type *Ty) const {
+unsigned BasicTTI::getAddressComputationCost(Type *Ty, bool IsComplex) const {
return 0;
}
diff --git a/lib/Target/ARM/ARMTargetTransformInfo.cpp b/lib/Target/ARM/ARMTargetTransformInfo.cpp
index 53ece66..79f56a4 100644
--- a/lib/Target/ARM/ARMTargetTransformInfo.cpp
+++ b/lib/Target/ARM/ARMTargetTransformInfo.cpp
@@ -124,7 +124,7 @@ public:
unsigned getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index) const;
- unsigned getAddressComputationCost(Type *Val) const;
+ unsigned getAddressComputationCost(Type *Val, bool IsComplex) const;
unsigned getArithmeticInstrCost(unsigned Opcode, Type *Ty,
OperandValueKind Op1Info = OK_AnyValue,
@@ -425,7 +425,7 @@ unsigned ARMTTI::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
return TargetTransformInfo::getCmpSelInstrCost(Opcode, ValTy, CondTy);
}
-unsigned ARMTTI::getAddressComputationCost(Type *Ty) const {
+unsigned ARMTTI::getAddressComputationCost(Type *Ty, bool IsComplex) const {
// In many cases the address computation is not merged into the instruction
// addressing mode.
return 1;
diff --git a/lib/Transforms/Vectorize/LoopVectorize.cpp b/lib/Transforms/Vectorize/LoopVectorize.cpp
index 020eb61..b35ed74 100644
--- a/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -4403,6 +4403,59 @@ unsigned LoopVectorizationCostModel::expectedCost(unsigned VF) {
return Cost;
}
+/// \brief Check whether the address computation for a non-consecutive memory
+/// access looks like an unlikely candidate for being merged into the indexing
+/// mode.
+///
+/// We look for a GEP which has one index that is an induction variable and all
+/// other indices are loop invariant. If the stride of this access is also
+/// within a small bound we decide that this address computation can likely be
+/// merged into the addressing mode.
+/// In all other cases, we identify the address computation as complex.
+static bool isLikelyComplexAddressComputation(Value *Ptr,
+ LoopVectorizationLegality *Legal,
+ ScalarEvolution *SE,
+ const Loop *TheLoop) {
+ GetElementPtrInst *Gep = dyn_cast<GetElementPtrInst>(Ptr);
+ if (!Gep)
+ return true;
+
+ // We are looking for a gep with all loop invariant indices except for one
+ // which should be an induction variable.
+ unsigned NumOperands = Gep->getNumOperands();
+ for (unsigned i = 1; i < NumOperands; ++i) {
+ Value *Opd = Gep->getOperand(i);
+ if (!SE->isLoopInvariant(SE->getSCEV(Opd), TheLoop) &&
+ !Legal->isInductionVariable(Opd))
+ return true;
+ }
+
+ // Now we know we have a GEP ptr, %inv, %ind, %inv. Make sure that the step
+ // can likely be merged into the address computation.
+ unsigned MaxMergeDistance = 64;
+
+ const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(Ptr));
+ if (!AddRec)
+ return true;
+
+ // Check the step is constant.
+ const SCEV *Step = AddRec->getStepRecurrence(*SE);
+ // Calculate the pointer stride and check if it is consecutive.
+ const SCEVConstant *C = dyn_cast<SCEVConstant>(Step);
+ if (!C)
+ return true;
+
+ const APInt &APStepVal = C->getValue()->getValue();
+
+ // Huge step value - give up.
+ if (APStepVal.getBitWidth() > 64)
+ return true;
+
+ int64_t StepVal = APStepVal.getSExtValue();
+
+ return StepVal > MaxMergeDistance;
+}
+
unsigned
LoopVectorizationCostModel::getInstructionCost(Instruction *I, unsigned VF) {
// If we know that this instruction will remain uniform, check the cost of
@@ -4498,6 +4551,8 @@ LoopVectorizationCostModel::getInstructionCost(Instruction *I, unsigned VF) {
unsigned ScalarAllocatedSize = DL->getTypeAllocSize(ValTy);
unsigned VectorElementSize = DL->getTypeStoreSize(VectorTy)/VF;
if (!ConsecutiveStride || ScalarAllocatedSize != VectorElementSize) {
+ bool IsComplexComputation =
+ isLikelyComplexAddressComputation(Ptr, Legal, SE, TheLoop);
unsigned Cost = 0;
// The cost of extracting from the value vector and pointer vector.
Type *PtrTy = ToVectorTy(Ptr->getType(), VF);
@@ -4513,7 +4568,7 @@ LoopVectorizationCostModel::getInstructionCost(Instruction *I, unsigned VF) {
}
// The cost of the scalar loads/stores.
- Cost += VF * TTI.getAddressComputationCost(ValTy->getScalarType());
+ Cost += VF * TTI.getAddressComputationCost(PtrTy, IsComplexComputation);
Cost += VF * TTI.getMemoryOpCost(I->getOpcode(), ValTy->getScalarType(),
Alignment, AS);
return Cost;