aboutsummaryrefslogtreecommitdiffstats
path: root/lib/Transforms/Vectorize/SLPVectorizer.cpp
diff options
context:
space:
mode:
authorNadav Rotem <nrotem@apple.com>2013-04-09 19:44:35 +0000
committerNadav Rotem <nrotem@apple.com>2013-04-09 19:44:35 +0000
commit8383b539ff4c039108ee0c202a27b787621d96cf (patch)
treea94c718adf657b35e9c1581987a588bac83242f1 /lib/Transforms/Vectorize/SLPVectorizer.cpp
parent376e05fd7ba37b76ea26fa7604671c9abd32307e (diff)
downloadexternal_llvm-8383b539ff4c039108ee0c202a27b787621d96cf.zip
external_llvm-8383b539ff4c039108ee0c202a27b787621d96cf.tar.gz
external_llvm-8383b539ff4c039108ee0c202a27b787621d96cf.tar.bz2
Add support for bottom-up SLP vectorization infrastructure.
This commit adds the infrastructure for performing bottom-up SLP vectorization (and other optimizations) on parallel computations. The infrastructure has three potential users: 1. The loop vectorizer needs to be able to vectorize AOS data structures such as (sum += A[i] + A[i+1]). 2. The BB-vectorizer needs this infrastructure for bottom-up SLP vectorization, because bottom-up vectorization is faster to compute. 3. A loop-roller needs to be able to analyze consecutive chains and roll them into a loop, in order to reduce code size. A loop roller does not need to create vector instructions, and this infrastructure separates the chain analysis from the vectorization. This patch also includes a simple (100 LOC) bottom up SLP vectorizer that uses the infrastructure, and can vectorize this code: void SAXPY(int *x, int *y, int a, int i) { x[i] = a * x[i] + y[i]; x[i+1] = a * x[i+1] + y[i+1]; x[i+2] = a * x[i+2] + y[i+2]; x[i+3] = a * x[i+3] + y[i+3]; } git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@179117 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'lib/Transforms/Vectorize/SLPVectorizer.cpp')
-rw-r--r--lib/Transforms/Vectorize/SLPVectorizer.cpp153
1 files changed, 153 insertions, 0 deletions
diff --git a/lib/Transforms/Vectorize/SLPVectorizer.cpp b/lib/Transforms/Vectorize/SLPVectorizer.cpp
new file mode 100644
index 0000000..4b61dc9
--- /dev/null
+++ b/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -0,0 +1,153 @@
+//===- SLPVectorizer.cpp - A bottom up SLP Vectorizer ---------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+// This pass implements the Bottom Up SLP vectorizer. It detects consecutive
+// stores that can be put together into vector-stores. Next, it attempts to
+// construct vectorizable tree using the use-def chains. If a profitable tree
+// was found, the SLP vectorizer performs vectorization on the tree.
+//
+// The pass is inspired by the work described in the paper:
+// "Loop-Aware SLP in GCC" by Ira Rosen, Dorit Nuzman, Ayal Zaks.
+//
+//===----------------------------------------------------------------------===//
+#define SV_NAME "slp-vectorizer"
+#define DEBUG_TYPE SV_NAME
+
+#include "VecUtils.h"
+#include "llvm/Transforms/Vectorize.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/ScalarEvolution.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/Analysis/Verifier.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/Type.h"
+#include "llvm/IR/Value.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include <map>
+
+using namespace llvm;
+
+static cl::opt<int>
+SLPCostThreshold("slp-threshold", cl::init(1), cl::Hidden,
+ cl::desc("Only vectorize trees if the gain is above this "
+ "number. (gain = -cost of vectorization)"));
+namespace {
+
+/// The SLPVectorizer Pass.
+struct SLPVectorizer : public BasicBlockPass {
+ typedef std::map<Value*, BoUpSLP::StoreList> StoreListMap;
+
+ /// Pass identification, replacement for typeid
+ static char ID;
+
+ explicit SLPVectorizer() : BasicBlockPass(ID) {
+ initializeSLPVectorizerPass(*PassRegistry::getPassRegistry());
+ }
+
+ ScalarEvolution *SE;
+ DataLayout *DL;
+ TargetTransformInfo *TTI;
+ AliasAnalysis *AA;
+
+ /// \brief Collect memory references and sort them according to their base
+ /// object. We sort the stores to their base objects to reduce the cost of the
+ /// quadratic search on the stores. TODO: We can further reduce this cost
+ /// if we flush the chain creation every time we run into a memory barrier.
+ bool CollectStores(BasicBlock *BB, BoUpSLP &R) {
+ for (BasicBlock::iterator it = BB->begin(), e = BB->end(); it != e; ++it) {
+ // Can't vectorize instructions with side effects.
+ if (it->mayThrow())
+ return false;
+
+ StoreInst *SI = dyn_cast<StoreInst>(it);
+ if (!SI)
+ continue;
+
+ // Check that the pointer points to scalars.
+ if (SI->getValueOperand()->getType()->isAggregateType())
+ return false;
+
+ // Find the base of the GEP.
+ Value *Ptr = SI->getPointerOperand();
+ if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Ptr))
+ Ptr = GEP->getPointerOperand();
+
+ // Save the store locations.
+ StoreRefs[Ptr].push_back(SI);
+ }
+ return true;
+ }
+
+ bool RollStoreChains(BoUpSLP &R) {
+ bool Changed = false;
+ // Attempt to sort and vectorize each of the store-groups.
+ for (StoreListMap::iterator it = StoreRefs.begin(), e = StoreRefs.end();
+ it != e; ++it) {
+ if (it->second.size() < 2)
+ continue;
+ Changed |= R.vectorizeStores(it->second, -SLPCostThreshold);
+ }
+ return Changed;
+ }
+
+ virtual bool runOnBasicBlock(BasicBlock &BB) {
+ SE = &getAnalysis<ScalarEvolution>();
+ DL = getAnalysisIfAvailable<DataLayout>();
+ TTI = &getAnalysis<TargetTransformInfo>();
+ AA = &getAnalysis<AliasAnalysis>();
+ StoreRefs.clear();
+
+ // Use the bollom up slp vectorizer to construct chains that start with
+ // he store instructions.
+ BoUpSLP R(&BB, SE, DL, TTI, AA);
+
+ if (!CollectStores(&BB, R))
+ return false;
+
+ bool Changed = RollStoreChains(R);
+ if (Changed) {
+ DEBUG(dbgs()<<"Rolled chains in \""<<BB.getParent()->getName()<<"\"\n");
+ DEBUG(verifyFunction(*BB.getParent()));
+ }
+
+ return Changed;
+ }
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ BasicBlockPass::getAnalysisUsage(AU);
+ AU.addRequired<ScalarEvolution>();
+ AU.addRequired<AliasAnalysis>();
+ AU.addRequired<TargetTransformInfo>();
+ }
+
+private:
+ StoreListMap StoreRefs;
+};
+
+} // end anonymous namespace
+
+char SLPVectorizer::ID = 0;
+static const char lv_name[] = "SLP Vectorizer";
+INITIALIZE_PASS_BEGIN(SLPVectorizer, SV_NAME, lv_name, false, false)
+INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
+INITIALIZE_AG_DEPENDENCY(TargetTransformInfo)
+INITIALIZE_PASS_DEPENDENCY(ScalarEvolution)
+INITIALIZE_PASS_DEPENDENCY(LoopSimplify)
+INITIALIZE_PASS_END(SLPVectorizer, SV_NAME, lv_name, false, false)
+
+namespace llvm {
+ Pass *createSLPVectorizerPass() {
+ return new SLPVectorizer();
+ }
+}
+