aboutsummaryrefslogtreecommitdiffstats
path: root/lib/Transforms
diff options
context:
space:
mode:
authorHal Finkel <hfinkel@anl.gov>2012-11-01 21:50:12 +0000
committerHal Finkel <hfinkel@anl.gov>2012-11-01 21:50:12 +0000
commit78fd353d5e5daedc47ecc31b6193ca48793c249c (patch)
tree281e57ed39b5d3e3e57985640866d89fa075fa39 /lib/Transforms
parent5fc8c7cb8571f99b69264aeba48c45eed1c69f6a (diff)
downloadexternal_llvm-78fd353d5e5daedc47ecc31b6193ca48793c249c.zip
external_llvm-78fd353d5e5daedc47ecc31b6193ca48793c249c.tar.gz
external_llvm-78fd353d5e5daedc47ecc31b6193ca48793c249c.tar.bz2
BBVectorize: Use target costs for incoming and outgoing values instead of the depth heuristic.
When target cost information is available, compute explicit costs of inserting and extracting values from vectors. At this point, all costs are estimated using the target information, and the chain-depth heuristic is not needed. As a result, it is now, by default, disabled when using target costs. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@167256 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'lib/Transforms')
-rw-r--r--lib/Transforms/Vectorize/BBVectorize.cpp200
1 files changed, 191 insertions, 9 deletions
diff --git a/lib/Transforms/Vectorize/BBVectorize.cpp b/lib/Transforms/Vectorize/BBVectorize.cpp
index 6e36ff7..4653a7d 100644
--- a/lib/Transforms/Vectorize/BBVectorize.cpp
+++ b/lib/Transforms/Vectorize/BBVectorize.cpp
@@ -58,6 +58,11 @@ static cl::opt<unsigned>
ReqChainDepth("bb-vectorize-req-chain-depth", cl::init(6), cl::Hidden,
cl::desc("The required chain depth for vectorization"));
+static cl::opt<bool>
+UseChainDepthWithTI("bb-vectorize-use-chain-depth", cl::init(false),
+ cl::Hidden, cl::desc("Use the chain depth requirement with"
+ " target information"));
+
static cl::opt<unsigned>
SearchLimit("bb-vectorize-search-limit", cl::init(400), cl::Hidden,
cl::desc("The maximum search distance for instruction pairs"));
@@ -227,6 +232,9 @@ namespace {
DenseMap<ValuePair, int> &CandidatePairCostSavings,
std::vector<Value *> &PairableInsts, bool NonPow2Len);
+ // FIXME: The current implementation does not account for pairs that
+ // are connected in multiple ways. For example:
+ // C1 = A1 / A2; C2 = A2 / A1 (which may be both direct and a swap)
enum PairConnectionType {
PairConnectionDirect,
PairConnectionSwap,
@@ -1665,20 +1673,39 @@ namespace {
int EffSize = 0;
if (VTTI) {
+ DenseSet<Value *> PrunedTreeInstrs;
+ for (DenseSet<ValuePair>::iterator S = PrunedTree.begin(),
+ E = PrunedTree.end(); S != E; ++S) {
+ PrunedTreeInstrs.insert(S->first);
+ PrunedTreeInstrs.insert(S->second);
+ }
+
+ // The set of pairs that have already contributed to the total cost.
+ DenseSet<ValuePair> IncomingPairs;
+
// The node weights represent the cost savings associated with
// fusing the pair of instructions.
for (DenseSet<ValuePair>::iterator S = PrunedTree.begin(),
E = PrunedTree.end(); S != E; ++S) {
- if (getDepthFactor(S->first))
- EffSize += CandidatePairCostSavings.find(*S)->second;
+ bool FlipOrder = false;
+
+ if (getDepthFactor(S->first)) {
+ int ESContrib = CandidatePairCostSavings.find(*S)->second;
+ DEBUG(if (DebugPairSelection) dbgs() << "\tweight {"
+ << *S->first << " <-> " << *S->second << "} = " <<
+ ESContrib << "\n");
+ EffSize += ESContrib;
+ }
- // The edge weights contribute in a negative sense: they represent
- // the cost of shuffles.
+ // The edge weights contribute in a negative sense: they represent
+ // the cost of shuffles.
VPPIteratorPair IP = ConnectedPairDeps.equal_range(*S);
if (IP.first != ConnectedPairDeps.end()) {
unsigned NumDepsDirect = 0, NumDepsSwap = 0;
for (std::multimap<ValuePair, ValuePair>::iterator Q = IP.first;
Q != IP.second; ++Q) {
+ if (!PrunedTree.count(Q->second))
+ continue;
DenseMap<VPPair, unsigned>::iterator R =
PairConnectionTypes.find(VPPair(Q->second, Q->first));
assert(R != PairConnectionTypes.end() &&
@@ -1692,12 +1719,14 @@ namespace {
// If there are more swaps than direct connections, then
// the pair order will be flipped during fusion. So the real
// number of swaps is the minimum number.
- bool FlipOrder = !FixedOrderPairs.count(*S) &&
+ FlipOrder = !FixedOrderPairs.count(*S) &&
((NumDepsSwap > NumDepsDirect) ||
FixedOrderPairs.count(ValuePair(S->second, S->first)));
for (std::multimap<ValuePair, ValuePair>::iterator Q = IP.first;
Q != IP.second; ++Q) {
+ if (!PrunedTree.count(Q->second))
+ continue;
DenseMap<VPPair, unsigned>::iterator R =
PairConnectionTypes.find(VPPair(Q->second, Q->first));
assert(R != PairConnectionTypes.end() &&
@@ -1707,9 +1736,161 @@ namespace {
Type *VTy = getVecTypeForPair(Ty1, Ty2);
if ((R->second == PairConnectionDirect && FlipOrder) ||
(R->second == PairConnectionSwap && !FlipOrder) ||
- R->second == PairConnectionSplat)
- EffSize -= (int) getInstrCost(Instruction::ShuffleVector,
- VTy, VTy);
+ R->second == PairConnectionSplat) {
+ int ESContrib = (int) getInstrCost(Instruction::ShuffleVector,
+ VTy, VTy);
+ DEBUG(if (DebugPairSelection) dbgs() << "\tcost {" <<
+ *Q->second.first << " <-> " << *Q->second.second <<
+ "} -> {" <<
+ *S->first << " <-> " << *S->second << "} = " <<
+ ESContrib << "\n");
+ EffSize -= ESContrib;
+ }
+ }
+ }
+
+ // Compute the cost of outgoing edges. We assume that edges outgoing
+ // to shuffles, inserts or extracts can be merged, and so contribute
+ // no additional cost.
+ if (!S->first->getType()->isVoidTy()) {
+ Type *Ty1 = S->first->getType(),
+ *Ty2 = S->second->getType();
+ Type *VTy = getVecTypeForPair(Ty1, Ty2);
+
+ bool NeedsExtraction = false;
+ for (Value::use_iterator I = S->first->use_begin(),
+ IE = S->first->use_end(); I != IE; ++I) {
+ if (isa<ShuffleVectorInst>(*I) ||
+ isa<InsertElementInst>(*I) ||
+ isa<ExtractElementInst>(*I))
+ continue;
+ if (PrunedTreeInstrs.count(*I))
+ continue;
+ NeedsExtraction = true;
+ break;
+ }
+
+ if (NeedsExtraction) {
+ int ESContrib;
+ if (Ty1->isVectorTy())
+ ESContrib = (int) getInstrCost(Instruction::ShuffleVector,
+ Ty1, VTy);
+ else
+ ESContrib = (int) VTTI->getVectorInstrCost(
+ Instruction::ExtractElement, VTy, 0);
+
+ DEBUG(if (DebugPairSelection) dbgs() << "\tcost {" <<
+ *S->first << "} = " << ESContrib << "\n");
+ EffSize -= ESContrib;
+ }
+
+ NeedsExtraction = false;
+ for (Value::use_iterator I = S->second->use_begin(),
+ IE = S->second->use_end(); I != IE; ++I) {
+ if (isa<ShuffleVectorInst>(*I) ||
+ isa<InsertElementInst>(*I) ||
+ isa<ExtractElementInst>(*I))
+ continue;
+ if (PrunedTreeInstrs.count(*I))
+ continue;
+ NeedsExtraction = true;
+ break;
+ }
+
+ if (NeedsExtraction) {
+ int ESContrib;
+ if (Ty2->isVectorTy())
+ ESContrib = (int) getInstrCost(Instruction::ShuffleVector,
+ Ty2, VTy);
+ else
+ ESContrib = (int) VTTI->getVectorInstrCost(
+ Instruction::ExtractElement, VTy, 1);
+ DEBUG(if (DebugPairSelection) dbgs() << "\tcost {" <<
+ *S->second << "} = " << ESContrib << "\n");
+ EffSize -= ESContrib;
+ }
+ }
+
+ // Compute the cost of incoming edges.
+ if (!isa<LoadInst>(S->first) && !isa<StoreInst>(S->first)) {
+ Instruction *S1 = cast<Instruction>(S->first),
+ *S2 = cast<Instruction>(S->second);
+ for (unsigned o = 0; o < S1->getNumOperands(); ++o) {
+ Value *O1 = S1->getOperand(o), *O2 = S2->getOperand(o);
+
+ // Combining constants into vector constants (or small vector
+ // constants into larger ones are assumed free).
+ if (isa<Constant>(O1) && isa<Constant>(O2))
+ continue;
+
+ if (FlipOrder)
+ std::swap(O1, O2);
+
+ ValuePair VP = ValuePair(O1, O2);
+ ValuePair VPR = ValuePair(O2, O1);
+
+ // Internal edges are not handled here.
+ if (PrunedTree.count(VP) || PrunedTree.count(VPR))
+ continue;
+
+ Type *Ty1 = O1->getType(),
+ *Ty2 = O2->getType();
+ Type *VTy = getVecTypeForPair(Ty1, Ty2);
+
+ // Combining vector operations of the same type is also assumed
+ // folded with other operations.
+ if (Ty1 == Ty2 &&
+ (isa<ShuffleVectorInst>(O1) ||
+ isa<InsertElementInst>(O1) ||
+ isa<InsertElementInst>(O1)) &&
+ (isa<ShuffleVectorInst>(O2) ||
+ isa<InsertElementInst>(O2) ||
+ isa<InsertElementInst>(O2)))
+ continue;
+
+ int ESContrib;
+ // This pair has already been formed.
+ if (IncomingPairs.count(VP)) {
+ continue;
+ } else if (IncomingPairs.count(VPR)) {
+ ESContrib = (int) getInstrCost(Instruction::ShuffleVector,
+ VTy, VTy);
+ } else if (!Ty1->isVectorTy() && !Ty2->isVectorTy()) {
+ ESContrib = (int) VTTI->getVectorInstrCost(
+ Instruction::InsertElement, VTy, 0);
+ ESContrib += (int) VTTI->getVectorInstrCost(
+ Instruction::InsertElement, VTy, 1);
+ } else if (!Ty1->isVectorTy()) {
+ // O1 needs to be inserted into a vector of size O2, and then
+ // both need to be shuffled together.
+ ESContrib = (int) VTTI->getVectorInstrCost(
+ Instruction::InsertElement, Ty2, 0);
+ ESContrib += (int) getInstrCost(Instruction::ShuffleVector,
+ VTy, Ty2);
+ } else if (!Ty2->isVectorTy()) {
+ // O2 needs to be inserted into a vector of size O1, and then
+ // both need to be shuffled together.
+ ESContrib = (int) VTTI->getVectorInstrCost(
+ Instruction::InsertElement, Ty1, 0);
+ ESContrib += (int) getInstrCost(Instruction::ShuffleVector,
+ VTy, Ty1);
+ } else {
+ Type *TyBig = Ty1, *TySmall = Ty2;
+ if (Ty2->getVectorNumElements() > Ty1->getVectorNumElements())
+ std::swap(TyBig, TySmall);
+
+ ESContrib = (int) getInstrCost(Instruction::ShuffleVector,
+ VTy, TyBig);
+ if (TyBig != TySmall)
+ ESContrib += (int) getInstrCost(Instruction::ShuffleVector,
+ TyBig, TySmall);
+ }
+
+ DEBUG(if (DebugPairSelection) dbgs() << "\tcost {"
+ << *O1 << " <-> " << *O2 << "} = " <<
+ ESContrib << "\n");
+ EffSize -= ESContrib;
+ IncomingPairs.insert(VP);
}
}
}
@@ -1724,7 +1905,8 @@ namespace {
<< *J->first << " <-> " << *J->second << "} of depth " <<
MaxDepth << " and size " << PrunedTree.size() <<
" (effective size: " << EffSize << ")\n");
- if (MaxDepth >= Config.ReqChainDepth &&
+ if (((VTTI && !UseChainDepthWithTI) ||
+ MaxDepth >= Config.ReqChainDepth) &&
EffSize > 0 && EffSize > BestEffSize) {
BestMaxDepth = MaxDepth;
BestEffSize = EffSize;