aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorHongbin Zheng <etherzhhb@gmail.com>2012-04-05 15:46:55 +0000
committerHongbin Zheng <etherzhhb@gmail.com>2012-04-05 15:46:55 +0000
commitbef377b7d7ce31edb40c87f8786d1b7bb6cdd6b1 (patch)
treea730cfaadaf3916d0e0d3ee6ffa45489887e71de
parent17dcaf5ef9e761bd3b516a3f4cb85a9fdcb5975e (diff)
downloadexternal_llvm-bef377b7d7ce31edb40c87f8786d1b7bb6cdd6b1.zip
external_llvm-bef377b7d7ce31edb40c87f8786d1b7bb6cdd6b1.tar.gz
external_llvm-bef377b7d7ce31edb40c87f8786d1b7bb6cdd6b1.tar.bz2
Introduce the VectorizeConfig class, with which we can control the behavior
of the BBVectorizePass without using command line option. As pointed out by Hal, we can ask the TargetLoweringInfo for the architecture specific VectorizeConfig to perform vectorizing with architecture specific information. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@154096 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r--include/llvm/Transforms/Vectorize.h68
-rw-r--r--lib/Transforms/Vectorize/BBVectorize.cpp92
2 files changed, 126 insertions, 34 deletions
diff --git a/include/llvm/Transforms/Vectorize.h b/include/llvm/Transforms/Vectorize.h
index ad06937..6691258 100644
--- a/include/llvm/Transforms/Vectorize.h
+++ b/include/llvm/Transforms/Vectorize.h
@@ -20,10 +20,73 @@ class BasicBlock;
class BasicBlockPass;
//===----------------------------------------------------------------------===//
+/// @brief Vectorize configuration.
+struct VectorizeConfig {
+ //===--------------------------------------------------------------------===//
+ // Target architecture related parameters
+
+ /// @brief The size of the native vector registers.
+ unsigned VectorBits;
+
+ /// @brief Don't try to vectorize integer values.
+ bool NoInts;
+
+ /// @brief Don't try to vectorize floating-point values.
+ bool NoFloats;
+
+ /// @brief Don't try to vectorize casting (conversion) operations.
+ bool NoCasts;
+
+ /// @brief Don't try to vectorize floating-point math intrinsics.
+ bool NoMath;
+
+ /// @brief Don't try to vectorize the fused-multiply-add intrinsic.
+ bool NoFMA;
+
+ /// @brief Don't try to vectorize loads and stores.
+ bool NoMemOps;
+
+ /// @brief Only generate aligned loads and stores.
+ bool AlignedOnly;
+
+ //===--------------------------------------------------------------------===//
+ // Misc parameters
+
+ /// @brief The required chain depth for vectorization.
+ unsigned ReqChainDepth;
+
+ /// @brief The maximum search distance for instruction pairs.
+ unsigned SearchLimit;
+
+ /// @brief The maximum number of candidate pairs with which to use a full
+ /// cycle check.
+ unsigned MaxCandPairsForCycleCheck;
+
+ /// @brief Replicating one element to a pair breaks the chain.
+ bool SplatBreaksChain;
+
+ /// @brief The maximum number of pairable instructions per group.
+ unsigned MaxInsts;
+
+ /// @brief The maximum number of pairing iterations.
+ unsigned MaxIter;
+
+ /// @brief Don't boost the chain-depth contribution of loads and stores.
+ bool NoMemOpBoost;
+
+ /// @brief Use a fast instruction dependency analysis.
+ bool FastDep;
+
+ /// @brief Initialize the VectorizeConfig from command line options.
+ VectorizeConfig();
+};
+
+//===----------------------------------------------------------------------===//
//
// BBVectorize - A basic-block vectorization pass.
//
-BasicBlockPass *createBBVectorizePass();
+BasicBlockPass *
+createBBVectorizePass(const VectorizeConfig &C = VectorizeConfig());
//===----------------------------------------------------------------------===//
/// @brief Vectorize the BasicBlock.
@@ -35,7 +98,8 @@ BasicBlockPass *createBBVectorizePass();
///
/// @return True if the BB is changed, false otherwise.
///
-bool vectorizeBasicBlock(Pass *P, BasicBlock &BB);
+bool vectorizeBasicBlock(Pass *P, BasicBlock &BB,
+ const VectorizeConfig &C = VectorizeConfig());
} // End llvm namespace
diff --git a/lib/Transforms/Vectorize/BBVectorize.cpp b/lib/Transforms/Vectorize/BBVectorize.cpp
index 5abb242..7d5bb31 100644
--- a/lib/Transforms/Vectorize/BBVectorize.cpp
+++ b/lib/Transforms/Vectorize/BBVectorize.cpp
@@ -140,11 +140,16 @@ STATISTIC(NumFusedOps, "Number of operations fused by bb-vectorize");
namespace {
struct BBVectorize : public BasicBlockPass {
static char ID; // Pass identification, replacement for typeid
- BBVectorize() : BasicBlockPass(ID) {
+
+ VectorizeConfig Config;
+
+ BBVectorize(const VectorizeConfig &C = VectorizeConfig())
+ : BasicBlockPass(ID), Config(C) {
initializeBBVectorizePass(*PassRegistry::getPassRegistry());
}
- BBVectorize(Pass *P) : BasicBlockPass(ID) {
+ BBVectorize(Pass *P, const VectorizeConfig &C)
+ : BasicBlockPass(ID), Config(C) {
AA = &P->getAnalysis<AliasAnalysis>();
SE = &P->getAnalysis<ScalarEvolution>();
TD = P->getAnalysisIfAvailable<TargetData>();
@@ -291,9 +296,10 @@ namespace {
// Iterate a sufficient number of times to merge types of size 1 bit,
// then 2 bits, then 4, etc. up to half of the target vector width of the
// target vector register.
- for (unsigned v = 2, n = 1; v <= VectorBits && (!MaxIter || n <= MaxIter);
+ for (unsigned v = 2, n = 1;
+ v <= Config.VectorBits && (!Config.MaxIter || n <= Config.MaxIter);
v *= 2, ++n) {
- DEBUG(dbgs() << "BBV: fusing loop #" << n <<
+ DEBUG(dbgs() << "BBV: fusing loop #" << n <<
" for " << BB.getName() << " in " <<
BB.getParent()->getName() << "...\n");
if (vectorizePairs(BB))
@@ -343,7 +349,7 @@ namespace {
// candidate chains where longer chains are considered to be better.
// Note: when this function returns 0, the resulting instructions are
// not actually fused.
- static inline size_t getDepthFactor(Value *V) {
+ inline size_t getDepthFactor(Value *V) {
// InsertElement and ExtractElement have a depth factor of zero. This is
// for two reasons: First, they cannot be usefully fused. Second, because
// the pass generates a lot of these, they can confuse the simple metric
@@ -357,8 +363,8 @@ namespace {
// Give a load or store half of the required depth so that load/store
// pairs will vectorize.
- if (!NoMemOpBoost && (isa<LoadInst>(V) || isa<StoreInst>(V)))
- return ReqChainDepth/2;
+ if (!Config.NoMemOpBoost && (isa<LoadInst>(V) || isa<StoreInst>(V)))
+ return Config.ReqChainDepth/2;
return 1;
}
@@ -431,9 +437,9 @@ namespace {
case Intrinsic::exp:
case Intrinsic::exp2:
case Intrinsic::pow:
- return !NoMath;
+ return !Config.NoMath;
case Intrinsic::fma:
- return !NoFMA;
+ return !Config.NoFMA;
}
}
@@ -527,16 +533,16 @@ namespace {
} else if (LoadInst *L = dyn_cast<LoadInst>(I)) {
// Vectorize simple loads if possbile:
IsSimpleLoadStore = L->isSimple();
- if (!IsSimpleLoadStore || NoMemOps)
+ if (!IsSimpleLoadStore || Config.NoMemOps)
return false;
} else if (StoreInst *S = dyn_cast<StoreInst>(I)) {
// Vectorize simple stores if possbile:
IsSimpleLoadStore = S->isSimple();
- if (!IsSimpleLoadStore || NoMemOps)
+ if (!IsSimpleLoadStore || Config.NoMemOps)
return false;
} else if (CastInst *C = dyn_cast<CastInst>(I)) {
// We can vectorize casts, but not casts of pointer types, etc.
- if (NoCasts)
+ if (Config.NoCasts)
return false;
Type *SrcTy = C->getSrcTy();
@@ -576,14 +582,14 @@ namespace {
!(VectorType::isValidElementType(T2) || T2->isVectorTy()))
return false;
- if (NoInts && (T1->isIntOrIntVectorTy() || T2->isIntOrIntVectorTy()))
+ if (Config.NoInts && (T1->isIntOrIntVectorTy() || T2->isIntOrIntVectorTy()))
return false;
- if (NoFloats && (T1->isFPOrFPVectorTy() || T2->isFPOrFPVectorTy()))
+ if (Config.NoFloats && (T1->isFPOrFPVectorTy() || T2->isFPOrFPVectorTy()))
return false;
- if (T1->getPrimitiveSizeInBits() > VectorBits/2 ||
- T2->getPrimitiveSizeInBits() > VectorBits/2)
+ if (T1->getPrimitiveSizeInBits() > Config.VectorBits/2 ||
+ T2->getPrimitiveSizeInBits() > Config.VectorBits/2)
return false;
return true;
@@ -611,7 +617,7 @@ namespace {
LI->isVolatile() != LJ->isVolatile() ||
LI->getOrdering() != LJ->getOrdering() ||
LI->getSynchScope() != LJ->getSynchScope())
- return false;
+ return false;
} else if ((SI = dyn_cast<StoreInst>(I)) && (SJ = dyn_cast<StoreInst>(J))) {
if (SI->getValueOperand()->getType() !=
SJ->getValueOperand()->getType() ||
@@ -632,7 +638,7 @@ namespace {
int64_t OffsetInElmts = 0;
if (getPairPtrInfo(I, J, IPtr, JPtr, IAlignment, JAlignment,
OffsetInElmts) && abs64(OffsetInElmts) == 1) {
- if (AlignedOnly) {
+ if (Config.AlignedOnly) {
Type *aType = isa<StoreInst>(I) ?
cast<StoreInst>(I)->getValueOperand()->getType() : I->getType();
// An aligned load or store is possible only if the instruction
@@ -753,12 +759,12 @@ namespace {
AliasSetTracker WriteSet(*AA);
bool JAfterStart = IAfterStart;
BasicBlock::iterator J = llvm::next(I);
- for (unsigned ss = 0; J != E && ss <= SearchLimit; ++J, ++ss) {
+ for (unsigned ss = 0; J != E && ss <= Config.SearchLimit; ++J, ++ss) {
if (J == Start) JAfterStart = true;
// Determine if J uses I, if so, exit the loop.
- bool UsesI = trackUsesOfI(Users, WriteSet, I, J, !FastDep);
- if (FastDep) {
+ bool UsesI = trackUsesOfI(Users, WriteSet, I, J, !Config.FastDep);
+ if (Config.FastDep) {
// Note: For this heuristic to be effective, independent operations
// must tend to be intermixed. This is likely to be true from some
// kinds of grouped loop unrolling (but not the generic LLVM pass),
@@ -796,7 +802,7 @@ namespace {
// If we have already found too many pairs, break here and this function
// will be called again starting after the last instruction selected
// during this invocation.
- if (PairableInsts.size() >= MaxInsts) {
+ if (PairableInsts.size() >= Config.MaxInsts) {
ShouldContinue = true;
break;
}
@@ -841,7 +847,7 @@ namespace {
ConnectedPairs.insert(VPPair(P, ValuePair(*J, *I)));
}
- if (SplatBreaksChain) continue;
+ if (Config.SplatBreaksChain) continue;
// Look for cases where just the first value in the pair is used by
// both members of another pair (splatting).
for (Value::use_iterator J = P.first->use_begin(); J != E; ++J) {
@@ -850,7 +856,7 @@ namespace {
}
}
- if (SplatBreaksChain) return;
+ if (Config.SplatBreaksChain) return;
// Look for cases where just the second value in the pair is used by
// both members of another pair (splatting).
for (Value::use_iterator I = P.second->use_begin(),
@@ -1280,7 +1286,7 @@ namespace {
<< *J->first << " <-> " << *J->second << "} of depth " <<
MaxDepth << " and size " << PrunedTree.size() <<
" (effective size: " << EffSize << ")\n");
- if (MaxDepth >= ReqChainDepth && EffSize > BestEffSize) {
+ if (MaxDepth >= Config.ReqChainDepth && EffSize > BestEffSize) {
BestMaxDepth = MaxDepth;
BestEffSize = EffSize;
BestTree = PrunedTree;
@@ -1296,7 +1302,8 @@ namespace {
std::multimap<ValuePair, ValuePair> &ConnectedPairs,
DenseSet<ValuePair> &PairableInstUsers,
DenseMap<Value *, Value *>& ChosenPairs) {
- bool UseCycleCheck = CandidatePairs.size() <= MaxCandPairsForCycleCheck;
+ bool UseCycleCheck =
+ CandidatePairs.size() <= Config.MaxCandPairsForCycleCheck;
std::multimap<ValuePair, ValuePair> PairableInstUserMap;
for (std::vector<Value *>::iterator I = PairableInsts.begin(),
E = PairableInsts.end(); I != E; ++I) {
@@ -1547,11 +1554,11 @@ namespace {
unsigned IID = F->getIntrinsicID();
if (o == NumOperands-1) {
BasicBlock &BB = *I->getParent();
-
+
Module *M = BB.getParent()->getParent();
Type *ArgType = I->getType();
Type *VArgType = getVecTypeForPair(ArgType);
-
+
// FIXME: is it safe to do this here?
ReplacedOperands[o] = Intrinsic::getDeclaration(M,
(Intrinsic::ID) IID, VArgType);
@@ -1867,11 +1874,32 @@ INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
INITIALIZE_PASS_DEPENDENCY(ScalarEvolution)
INITIALIZE_PASS_END(BBVectorize, BBV_NAME, bb_vectorize_name, false, false)
-BasicBlockPass *llvm::createBBVectorizePass() {
- return new BBVectorize();
+BasicBlockPass *llvm::createBBVectorizePass(const VectorizeConfig &C) {
+ return new BBVectorize(C);
}
-bool llvm::vectorizeBasicBlock(Pass *P, BasicBlock &BB) {
- BBVectorize BBVectorizer(P);
+bool
+llvm::vectorizeBasicBlock(Pass *P, BasicBlock &BB, const VectorizeConfig &C) {
+ BBVectorize BBVectorizer(P, C);
return BBVectorizer.vectorizeBB(BB);
}
+
+//===----------------------------------------------------------------------===//
+VectorizeConfig::VectorizeConfig() {
+ VectorBits = ::VectorBits;
+ NoInts = ::NoInts;
+ NoFloats = ::NoFloats;
+ NoCasts = ::NoCasts;
+ NoMath = ::NoMath;
+ NoFMA = ::NoFMA;
+ NoMemOps = ::NoMemOps;
+ AlignedOnly = ::AlignedOnly;
+ ReqChainDepth= ::ReqChainDepth;
+ SearchLimit = ::SearchLimit;
+ MaxCandPairsForCycleCheck = ::MaxCandPairsForCycleCheck;
+ SplatBreaksChain = ::SplatBreaksChain;
+ MaxInsts = ::MaxInsts;
+ MaxIter = ::MaxIter;
+ NoMemOpBoost = ::NoMemOpBoost;
+ FastDep = ::FastDep;
+}