aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--include/llvm/Analysis/TargetTransformInfo.h6
-rw-r--r--include/llvm/InitializePasses.h3
-rw-r--r--include/llvm/Transforms/Scalar.h2
-rw-r--r--include/llvm/Transforms/Utils/Local.h3
-rw-r--r--lib/Analysis/TargetTransformInfo.cpp6
-rw-r--r--lib/CodeGen/BasicTargetTransformInfo.cpp3
-rw-r--r--lib/Target/R600/AMDGPU.h4
-rw-r--r--lib/Target/R600/AMDGPUTargetMachine.cpp12
-rw-r--r--lib/Target/R600/AMDGPUTargetMachine.h3
-rw-r--r--lib/Target/R600/AMDGPUTargetTransformInfo.cpp90
-rw-r--r--lib/Target/R600/CMakeLists.txt1
-rw-r--r--lib/Transforms/IPO/PassManagerBuilder.cpp4
-rw-r--r--lib/Transforms/Scalar/Scalar.cpp3
-rw-r--r--lib/Transforms/Scalar/SimplifyCFGPass.cpp81
-rw-r--r--lib/Transforms/Utils/SimplifyCFG.cpp443
-rw-r--r--tools/lto/LTOCodeGenerator.cpp2
-rw-r--r--tools/opt/opt.cpp3
17 files changed, 629 insertions, 40 deletions
diff --git a/include/llvm/Analysis/TargetTransformInfo.h b/include/llvm/Analysis/TargetTransformInfo.h
index b8a44b5..21a3a12 100644
--- a/include/llvm/Analysis/TargetTransformInfo.h
+++ b/include/llvm/Analysis/TargetTransformInfo.h
@@ -171,6 +171,12 @@ public:
/// comments for a detailed explanation of the cost values.
virtual unsigned getUserCost(const User *U) const;
+ /// \brief hasBranchDivergence - Return true if branch divergence exists.
+ /// Branch divergence has a significantly negative impact on GPU performance
+ /// when threads in the same wavefront take different paths due to conditional
+ /// branches.
+ virtual bool hasBranchDivergence() const;
+
/// \brief Test whether calls to a function lower to actual program function
/// calls.
///
diff --git a/include/llvm/InitializePasses.h b/include/llvm/InitializePasses.h
index 86fd851..d49636d 100644
--- a/include/llvm/InitializePasses.h
+++ b/include/llvm/InitializePasses.h
@@ -86,7 +86,8 @@ void initializeCallGraphViewerPass(PassRegistry&);
void initializeCFGOnlyPrinterPass(PassRegistry&);
void initializeCFGOnlyViewerPass(PassRegistry&);
void initializeCFGPrinterPass(PassRegistry&);
-void initializeCFGSimplifyPassPass(PassRegistry&);
+void initializeCFGOptimizePass(PassRegistry&);
+void initializeCFGCanonicalizePass(PassRegistry&);
void initializeStructurizeCFGPass(PassRegistry&);
void initializeCFGViewerPass(PassRegistry&);
void initializeCalculateSpillWeightsPass(PassRegistry&);
diff --git a/include/llvm/Transforms/Scalar.h b/include/llvm/Transforms/Scalar.h
index a327dff..b52c327 100644
--- a/include/llvm/Transforms/Scalar.h
+++ b/include/llvm/Transforms/Scalar.h
@@ -196,7 +196,7 @@ FunctionPass *createJumpThreadingPass();
// CFGSimplification - Merge basic blocks, eliminate unreachable blocks,
// simplify terminator instructions, etc...
//
-FunctionPass *createCFGSimplificationPass();
+FunctionPass *createCFGSimplificationPass(bool IsTargetAware = false);
//===----------------------------------------------------------------------===//
//
diff --git a/include/llvm/Transforms/Utils/Local.h b/include/llvm/Transforms/Utils/Local.h
index 2678250..ec2ab2a 100644
--- a/include/llvm/Transforms/Utils/Local.h
+++ b/include/llvm/Transforms/Utils/Local.h
@@ -39,6 +39,7 @@ class DataLayout;
class TargetLibraryInfo;
class TargetTransformInfo;
class DIBuilder;
+class AliasAnalysis;
template<typename T> class SmallVectorImpl;
@@ -136,7 +137,7 @@ bool EliminateDuplicatePHINodes(BasicBlock *BB);
/// the basic block that was pointed to.
///
bool SimplifyCFG(BasicBlock *BB, const TargetTransformInfo &TTI,
- const DataLayout *TD = 0);
+ const DataLayout *TD = 0, AliasAnalysis *AA = 0);
/// FoldBranchToCommonDest - If this basic block is ONLY a setcc and a branch,
/// and if a predecessor branches to us and one of our successors, fold the
diff --git a/lib/Analysis/TargetTransformInfo.cpp b/lib/Analysis/TargetTransformInfo.cpp
index 4b2bf3c..4ad7162 100644
--- a/lib/Analysis/TargetTransformInfo.cpp
+++ b/lib/Analysis/TargetTransformInfo.cpp
@@ -88,6 +88,10 @@ unsigned TargetTransformInfo::getUserCost(const User *U) const {
return PrevTTI->getUserCost(U);
}
+bool TargetTransformInfo::hasBranchDivergence() const {
+ return PrevTTI->hasBranchDivergence();
+}
+
bool TargetTransformInfo::isLoweredToCall(const Function *F) const {
return PrevTTI->isLoweredToCall(F);
}
@@ -420,6 +424,8 @@ struct NoTTI : ImmutablePass, TargetTransformInfo {
U->getOperand(0)->getType() : 0);
}
+ bool hasBranchDivergence() const { return false; }
+
bool isLoweredToCall(const Function *F) const {
// FIXME: These should almost certainly not be handled here, and instead
// handled with the help of TLI or the target itself. This was largely
diff --git a/lib/CodeGen/BasicTargetTransformInfo.cpp b/lib/CodeGen/BasicTargetTransformInfo.cpp
index 3755320..cb12a40 100644
--- a/lib/CodeGen/BasicTargetTransformInfo.cpp
+++ b/lib/CodeGen/BasicTargetTransformInfo.cpp
@@ -65,6 +65,8 @@ public:
return this;
}
+ virtual bool hasBranchDivergence() const;
+
/// \name Scalar TTI Implementations
/// @{
@@ -124,6 +126,7 @@ llvm::createBasicTargetTransformInfoPass(const TargetMachine *TM) {
return new BasicTTI(TM);
}
+bool BasicTTI::hasBranchDivergence() const { return false; }
bool BasicTTI::isLegalAddImmediate(int64_t imm) const {
return getTLI()->isLegalAddImmediate(imm);
diff --git a/lib/Target/R600/AMDGPU.h b/lib/Target/R600/AMDGPU.h
index fbf1fce..51d0d3c 100644
--- a/lib/Target/R600/AMDGPU.h
+++ b/lib/Target/R600/AMDGPU.h
@@ -45,6 +45,10 @@ FunctionPass *createAMDGPUConvertToISAPass(TargetMachine &tm);
FunctionPass *createAMDGPUIndirectAddressingPass(TargetMachine &tm);
FunctionPass *createAMDGPUISelDag(TargetMachine &tm);
+/// \brief Creates an AMDGPU-specific Target Transformation Info pass.
+ImmutablePass *
+createAMDGPUTargetTransformInfoPass(const AMDGPUTargetMachine *TM);
+
extern Target TheAMDGPUTarget;
} // End namespace llvm
diff --git a/lib/Target/R600/AMDGPUTargetMachine.cpp b/lib/Target/R600/AMDGPUTargetMachine.cpp
index 1dc1b6b..33e2dae 100644
--- a/lib/Target/R600/AMDGPUTargetMachine.cpp
+++ b/lib/Target/R600/AMDGPUTargetMachine.cpp
@@ -105,6 +105,18 @@ TargetPassConfig *AMDGPUTargetMachine::createPassConfig(PassManagerBase &PM) {
return new AMDGPUPassConfig(this, PM);
}
+//===----------------------------------------------------------------------===//
+// AMDGPU Analysis Pass Setup
+//===----------------------------------------------------------------------===//
+
+void AMDGPUTargetMachine::addAnalysisPasses(PassManagerBase &PM) {
+ // Add first the target-independent BasicTTI pass, then our AMDGPU pass. This
+ // allows the AMDGPU pass to delegate to the target independent layer when
+ // appropriate.
+ PM.add(createBasicTargetTransformInfoPass(this));
+ PM.add(createAMDGPUTargetTransformInfoPass(this));
+}
+
bool
AMDGPUPassConfig::addPreISel() {
const AMDGPUSubtarget &ST = TM->getSubtarget<AMDGPUSubtarget>();
diff --git a/lib/Target/R600/AMDGPUTargetMachine.h b/lib/Target/R600/AMDGPUTargetMachine.h
index 26e95d3..f942614 100644
--- a/lib/Target/R600/AMDGPUTargetMachine.h
+++ b/lib/Target/R600/AMDGPUTargetMachine.h
@@ -61,6 +61,9 @@ public:
}
virtual const DataLayout *getDataLayout() const { return &Layout; }
virtual TargetPassConfig *createPassConfig(PassManagerBase &PM);
+
+ /// \brief Register R600 analysis passes with a pass manager.
+ virtual void addAnalysisPasses(PassManagerBase &PM);
};
} // End namespace llvm
diff --git a/lib/Target/R600/AMDGPUTargetTransformInfo.cpp b/lib/Target/R600/AMDGPUTargetTransformInfo.cpp
new file mode 100644
index 0000000..8db319c
--- /dev/null
+++ b/lib/Target/R600/AMDGPUTargetTransformInfo.cpp
@@ -0,0 +1,90 @@
+//===-- AMDGPUTargetTransformInfo.cpp - AMDGPU specific TTI pass ---------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// \file
+// This file implements a TargetTransformInfo analysis pass specific to the
+// AMDGPU target machine. It uses the target's detailed information to provide
+// more precise answers to certain TTI queries, while letting the target
+// independent and default TTI implementations handle the rest.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "AMDGPUtti"
+#include "AMDGPU.h"
+#include "AMDGPUTargetMachine.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/CostTable.h"
+using namespace llvm;
+
+// Declare the pass initialization routine locally as target-specific passes
+// don't have a target-wide initialization entry point, and so we rely on the
+// pass constructor initialization.
+namespace llvm {
+void initializeAMDGPUTTIPass(PassRegistry &);
+}
+
+namespace {
+
+class AMDGPUTTI : public ImmutablePass, public TargetTransformInfo {
+ const AMDGPUTargetMachine *TM;
+ const AMDGPUSubtarget *ST;
+ const AMDGPUTargetLowering *TLI;
+
+ /// Estimate the overhead of scalarizing an instruction. Insert and Extract
+ /// are set if the result needs to be inserted and/or extracted from vectors.
+ unsigned getScalarizationOverhead(Type *Ty, bool Insert, bool Extract) const;
+
+public:
+ AMDGPUTTI() : ImmutablePass(ID), TM(0), ST(0), TLI(0) {
+ llvm_unreachable("This pass cannot be directly constructed");
+ }
+
+ AMDGPUTTI(const AMDGPUTargetMachine *TM)
+ : ImmutablePass(ID), TM(TM), ST(TM->getSubtargetImpl()),
+ TLI(TM->getTargetLowering()) {
+ initializeAMDGPUTTIPass(*PassRegistry::getPassRegistry());
+ }
+
+ virtual void initializePass() { pushTTIStack(this); }
+
+ virtual void finalizePass() { popTTIStack(); }
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ TargetTransformInfo::getAnalysisUsage(AU);
+ }
+
+ /// Pass identification.
+ static char ID;
+
+ /// Provide necessary pointer adjustments for the two base classes.
+ virtual void *getAdjustedAnalysisPointer(const void *ID) {
+ if (ID == &TargetTransformInfo::ID)
+ return (TargetTransformInfo *)this;
+ return this;
+ }
+
+ virtual bool hasBranchDivergence() const;
+
+ /// @}
+};
+
+} // end anonymous namespace
+
+INITIALIZE_AG_PASS(AMDGPUTTI, TargetTransformInfo, "AMDGPUtti",
+ "AMDGPU Target Transform Info", true, true, false)
+char AMDGPUTTI::ID = 0;
+
+ImmutablePass *
+llvm::createAMDGPUTargetTransformInfoPass(const AMDGPUTargetMachine *TM) {
+ return new AMDGPUTTI(TM);
+}
+
+bool AMDGPUTTI::hasBranchDivergence() const { return true; }
diff --git a/lib/Target/R600/CMakeLists.txt b/lib/Target/R600/CMakeLists.txt
index 4f8665b..40d255a 100644
--- a/lib/Target/R600/CMakeLists.txt
+++ b/lib/Target/R600/CMakeLists.txt
@@ -23,6 +23,7 @@ add_llvm_target(R600CodeGen
AMDGPUMachineFunction.cpp
AMDGPUSubtarget.cpp
AMDGPUTargetMachine.cpp
+ AMDGPUTargetTransformInfo.cpp
AMDGPUISelLowering.cpp
AMDGPUConvertToISA.cpp
AMDGPUInstrInfo.cpp
diff --git a/lib/Transforms/IPO/PassManagerBuilder.cpp b/lib/Transforms/IPO/PassManagerBuilder.cpp
index 1917cc8..6355204 100644
--- a/lib/Transforms/IPO/PassManagerBuilder.cpp
+++ b/lib/Transforms/IPO/PassManagerBuilder.cpp
@@ -235,7 +235,7 @@ void PassManagerBuilder::populateModulePassManager(PassManagerBase &MPM) {
}
MPM.add(createAggressiveDCEPass()); // Delete dead instructions
- MPM.add(createCFGSimplificationPass()); // Merge & remove BBs
+ MPM.add(createCFGSimplificationPass(true)); // Merge & remove BBs
MPM.add(createInstructionCombiningPass()); // Clean up after everything.
// As an experimental mode, run any vectorization passes in a separate
@@ -371,7 +371,7 @@ void PassManagerBuilder::populateLTOPassManager(PassManagerBase &PM,
PM.add(createJumpThreadingPass());
// Delete basic blocks, which optimization passes may have killed.
- PM.add(createCFGSimplificationPass());
+ PM.add(createCFGSimplificationPass(true));
// Now that we have optimized the program, discard unreachable functions.
PM.add(createGlobalDCEPass());
diff --git a/lib/Transforms/Scalar/Scalar.cpp b/lib/Transforms/Scalar/Scalar.cpp
index 758334d..18cdcfe 100644
--- a/lib/Transforms/Scalar/Scalar.cpp
+++ b/lib/Transforms/Scalar/Scalar.cpp
@@ -57,7 +57,8 @@ void llvm::initializeScalarOpts(PassRegistry &Registry) {
initializeSROAPass(Registry);
initializeSROA_DTPass(Registry);
initializeSROA_SSAUpPass(Registry);
- initializeCFGSimplifyPassPass(Registry);
+ initializeCFGCanonicalizePass(Registry);
+ initializeCFGOptimizePass(Registry);
initializeStructurizeCFGPass(Registry);
initializeSinkingPass(Registry);
initializeTailCallElimPass(Registry);
diff --git a/lib/Transforms/Scalar/SimplifyCFGPass.cpp b/lib/Transforms/Scalar/SimplifyCFGPass.cpp
index c243d34..9ab5f45 100644
--- a/lib/Transforms/Scalar/SimplifyCFGPass.cpp
+++ b/lib/Transforms/Scalar/SimplifyCFGPass.cpp
@@ -27,6 +27,7 @@
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/IR/Attributes.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DataLayout.h"
@@ -41,30 +42,62 @@ using namespace llvm;
STATISTIC(NumSimpl, "Number of blocks simplified");
namespace {
- struct CFGSimplifyPass : public FunctionPass {
- static char ID; // Pass identification, replacement for typeid
- CFGSimplifyPass() : FunctionPass(ID) {
- initializeCFGSimplifyPassPass(*PassRegistry::getPassRegistry());
- }
-
- virtual bool runOnFunction(Function &F);
+struct CFGSimplifyPass : public FunctionPass {
+ CFGSimplifyPass(char &ID, bool isTargetAware)
+ : FunctionPass(ID), IsTargetAware(isTargetAware) {}
+ virtual bool runOnFunction(Function &F);
- virtual void getAnalysisUsage(AnalysisUsage &AU) const {
- AU.addRequired<TargetTransformInfo>();
- }
- };
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequired<TargetTransformInfo>();
+ }
+private:
+ AliasAnalysis *AA;
+ bool IsTargetAware; // Should the pass be target-aware?
+};
+
+// CFGSimplifyPass that does optimizations.
+struct CFGOptimize : public CFGSimplifyPass {
+ static char ID; // Pass identification, replacement for typeid
+public:
+ CFGOptimize() : CFGSimplifyPass(ID, true) {
+ initializeCFGOptimizePass(*PassRegistry::getPassRegistry());
+ }
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequired<TargetTransformInfo>();
+ AU.addRequired<AliasAnalysis>();
+ }
+};
+
+// CFGSimplifyPass that does canonicalizations.
+struct CFGCanonicalize : public CFGSimplifyPass {
+ static char ID; // Pass identification, replacement for typeid
+public:
+ CFGCanonicalize() : CFGSimplifyPass(ID, false) {
+ initializeCFGCanonicalizePass(*PassRegistry::getPassRegistry());
+ }
+};
}
-char CFGSimplifyPass::ID = 0;
-INITIALIZE_PASS_BEGIN(CFGSimplifyPass, "simplifycfg", "Simplify the CFG",
- false, false)
+char CFGCanonicalize::ID = 0;
+char CFGOptimize::ID = 0;
+INITIALIZE_PASS_BEGIN(CFGCanonicalize, "simplifycfg", "Simplify the CFG", false,
+ false)
+INITIALIZE_AG_DEPENDENCY(TargetTransformInfo)
+INITIALIZE_PASS_END(CFGCanonicalize, "simplifycfg", "Simplify the CFG", false,
+ false)
+INITIALIZE_PASS_BEGIN(CFGOptimize, "optimizecfg", "optimize the CFG", false,
+ false)
INITIALIZE_AG_DEPENDENCY(TargetTransformInfo)
-INITIALIZE_PASS_END(CFGSimplifyPass, "simplifycfg", "Simplify the CFG",
- false, false)
+INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
+INITIALIZE_PASS_END(CFGOptimize, "optimizecfg", "Optimize the CFG", false,
+ false)
// Public interface to the CFGSimplification pass
-FunctionPass *llvm::createCFGSimplificationPass() {
- return new CFGSimplifyPass();
+FunctionPass *llvm::createCFGSimplificationPass(bool IsTargetAware) {
+ if (IsTargetAware)
+ return new CFGOptimize();
+ else
+ return new CFGCanonicalize();
}
/// changeToUnreachable - Insert an unreachable instruction before the specified
@@ -301,7 +334,7 @@ static bool mergeEmptyReturnBlocks(Function &F) {
/// iterativelySimplifyCFG - Call SimplifyCFG on all the blocks in the function,
/// iterating until no more changes are made.
static bool iterativelySimplifyCFG(Function &F, const TargetTransformInfo &TTI,
- const DataLayout *TD) {
+ const DataLayout *TD, AliasAnalysis *AA) {
bool Changed = false;
bool LocalChange = true;
while (LocalChange) {
@@ -310,7 +343,7 @@ static bool iterativelySimplifyCFG(Function &F, const TargetTransformInfo &TTI,
// Loop over all of the basic blocks and remove them if they are unneeded...
//
for (Function::iterator BBIt = F.begin(); BBIt != F.end(); ) {
- if (SimplifyCFG(BBIt++, TTI, TD)) {
+ if (SimplifyCFG(BBIt++, TTI, TD, AA)) {
LocalChange = true;
++NumSimpl;
}
@@ -324,11 +357,15 @@ static bool iterativelySimplifyCFG(Function &F, const TargetTransformInfo &TTI,
// simplify the CFG.
//
bool CFGSimplifyPass::runOnFunction(Function &F) {
+ if (IsTargetAware)
+ AA = &getAnalysis<AliasAnalysis>();
+ else
+ AA = NULL;
const TargetTransformInfo &TTI = getAnalysis<TargetTransformInfo>();
const DataLayout *TD = getAnalysisIfAvailable<DataLayout>();
bool EverChanged = removeUnreachableBlocksFromFn(F);
EverChanged |= mergeEmptyReturnBlocks(F);
- EverChanged |= iterativelySimplifyCFG(F, TTI, TD);
+ EverChanged |= iterativelySimplifyCFG(F, TTI, TD, AA);
// If neither pass changed anything, we're done.
if (!EverChanged) return false;
@@ -342,7 +379,7 @@ bool CFGSimplifyPass::runOnFunction(Function &F) {
return true;
do {
- EverChanged = iterativelySimplifyCFG(F, TTI, TD);
+ EverChanged = iterativelySimplifyCFG(F, TTI, TD, AA);
EverChanged |= removeUnreachableBlocksFromFn(F);
} while (EverChanged);
diff --git a/lib/Transforms/Utils/SimplifyCFG.cpp b/lib/Transforms/Utils/SimplifyCFG.cpp
index eaeb19f..084b74c 100644
--- a/lib/Transforms/Utils/SimplifyCFG.cpp
+++ b/lib/Transforms/Utils/SimplifyCFG.cpp
@@ -19,6 +19,7 @@
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/Analysis/ValueTracking.h"
@@ -65,6 +66,10 @@ static cl::opt<bool>
HoistCondStores("simplifycfg-hoist-cond-stores", cl::Hidden, cl::init(true),
cl::desc("Hoist conditional stores if an unconditional store preceeds"));
+static cl::opt<bool>
+ParallelAndOr("simplifycfg-parallel-and-or", cl::Hidden, cl::init(true),
+ cl::desc("Use parallel-and-or mode for branch conditions"));
+
STATISTIC(NumBitMaps, "Number of switch instructions turned into bitmaps");
STATISTIC(NumLookupTables, "Number of switch instructions turned into lookup tables");
STATISTIC(NumSinkCommons, "Number of common instructions sunk down to the end block");
@@ -90,6 +95,7 @@ namespace {
class SimplifyCFGOpt {
const TargetTransformInfo &TTI;
const DataLayout *const TD;
+ AliasAnalysis *AA;
Value *isValueEqualityComparison(TerminatorInst *TI);
BasicBlock *GetValueEqualityComparisonCases(TerminatorInst *TI,
@@ -107,10 +113,25 @@ class SimplifyCFGOpt {
bool SimplifyIndirectBr(IndirectBrInst *IBI);
bool SimplifyUncondBranch(BranchInst *BI, IRBuilder <> &Builder);
bool SimplifyCondBranch(BranchInst *BI, IRBuilder <>&Builder);
+ /// \brief Use parallel-and or parallel-or to generate conditions for
+ /// conditional branches.
+ bool SimplifyParallelAndOr(BasicBlock *BB, IRBuilder<> &Builder, Pass *P = 0);
+ /// \brief If \param BB is the merge block of an if-region, attempt to merge
+ /// the if-region with an adjacent if-region upstream if two if-regions
+ /// contain identical instructions.
+ bool MergeIfRegion(BasicBlock *BB, IRBuilder<> &Builder, Pass *P = 0);
+ /// \brief Compare a pair of blocks: \param Block1 and \param Block2, which
+ /// are from two if-regions whose entry blocks are \param Head1 and \param
+ /// Head2. \returns true if \param Block1 and \param Block2 contain identical
+ /// instructions, and have no memory reference alias with \param Head2.
+ /// This is used as a legality check for merging if-regions.
+ bool CompareIfRegionBlock(BasicBlock *Head1, BasicBlock *Head2,
+ BasicBlock *Block1, BasicBlock *Block2);
public:
- SimplifyCFGOpt(const TargetTransformInfo &TTI, const DataLayout *TD)
- : TTI(TTI), TD(TD) {}
+ SimplifyCFGOpt(const TargetTransformInfo &TTI, const DataLayout *TD,
+ AliasAnalysis *AA)
+ : TTI(TTI), TD(TD), AA(AA) {}
bool run(BasicBlock *BB);
};
}
@@ -197,8 +218,8 @@ static void AddPredecessorToBlock(BasicBlock *Succ, BasicBlock *NewPred,
}
-/// GetIfCondition - Given a basic block (BB) with two predecessors (and at
-/// least one PHI node in it), check to see if the merge at this block is due
+/// GetIfCondition - Given a basic block (BB) with two predecessors,
+/// check to see if the merge at this block is due
/// to an "if condition". If so, return the boolean condition that determines
/// which entry into BB will be taken. Also, return by references the block
/// that will be entered from if the condition is true, and the block that will
@@ -208,11 +229,26 @@ static void AddPredecessorToBlock(BasicBlock *Succ, BasicBlock *NewPred,
/// instructions in them.
static Value *GetIfCondition(BasicBlock *BB, BasicBlock *&IfTrue,
BasicBlock *&IfFalse) {
- PHINode *SomePHI = cast<PHINode>(BB->begin());
- assert(SomePHI->getNumIncomingValues() == 2 &&
- "Function can only handle blocks with 2 predecessors!");
- BasicBlock *Pred1 = SomePHI->getIncomingBlock(0);
- BasicBlock *Pred2 = SomePHI->getIncomingBlock(1);
+ PHINode *SomePHI = dyn_cast<PHINode>(BB->begin());
+ BasicBlock *Pred1 = NULL;
+ BasicBlock *Pred2 = NULL;
+
+ if (SomePHI) {
+ if (SomePHI->getNumIncomingValues() != 2)
+ return NULL;
+ Pred1 = SomePHI->getIncomingBlock(0);
+ Pred2 = SomePHI->getIncomingBlock(1);
+ } else {
+ pred_iterator PI = pred_begin(BB), PE = pred_end(BB);
+ if (PI == PE) // No predecessor
+ return NULL;
+ Pred1 = *PI++;
+ if (PI == PE) // Only one predecessor
+ return NULL;
+ Pred2 = *PI++;
+ if (PI != PE) // More than two predecessors
+ return NULL;
+ }
// We can only handle branches. Other control flow will be lowered to
// branches if possible anyway.
@@ -4066,6 +4102,383 @@ bool SimplifyCFGOpt::SimplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) {
return false;
}
+/// If \param [in] BB has more than one predecessor that is a conditional
+/// branch, attempt to use parallel and/or for the branch condition. \returns
+/// true on success.
+///
+/// Before:
+/// ......
+/// %cmp10 = fcmp une float %tmp1, %tmp2
+/// br i1 %cmp1, label %if.then, label %lor.rhs
+///
+/// lor.rhs:
+/// ......
+/// %cmp11 = fcmp une float %tmp3, %tmp4
+/// br i1 %cmp11, label %if.then, label %ifend
+///
+/// if.end: // the merge block
+/// ......
+///
+/// if.then: // has two predecessors, both of them contains conditional branch.
+/// ......
+/// br label %if.end;
+///
+/// After:
+/// ......
+/// %cmp10 = fcmp une float %tmp1, %tmp2
+/// ......
+/// %cmp11 = fcmp une float %tmp3, %tmp4
+/// %cmp12 = or i1 %cmp10, %cmp11 // parallel-or mode.
+/// br i1 %cmp12, label %if.then, label %ifend
+///
+/// if.end:
+/// ......
+///
+/// if.then:
+/// ......
+/// br label %if.end;
+///
+/// Current implementation handles two cases.
+/// Case 1: \param BB is on the else-path.
+///
+/// BB1
+/// / |
+/// BB2 |
+/// / \ |
+/// BB3 \ | where, BB1, BB2 contain conditional branches.
+/// \ | / BB3 contains unconditional branch.
+/// \ | / BB4 corresponds to \param BB which is also the merge.
+/// BB => BB4
+///
+///
+/// Corresponding source code:
+///
+/// if (a == b && c == d)
+/// statement; // BB3
+///
+/// Case 2: \param BB BB is on the then-path.
+///
+/// BB1
+/// / |
+/// | BB2
+/// \ / | where BB1, BB2 contain conditional branches.
+/// BB => BB3 | BB3 contains unconditiona branch and corresponds
+/// \ / to \param BB. BB4 is the merge.
+/// BB4
+///
+/// Corresponding source code:
+///
+/// if (a == b || c == d)
+/// statement; // BB3
+///
+/// In both cases, \param BB is the common successor of conditional branches.
+/// In Case 1, \param BB (BB4) has an unconditional branch (BB3) as
+/// its predecessor. In Case 2, \param BB (BB3) only has conditional branches
+/// as its predecessors.
+///
+bool SimplifyCFGOpt::SimplifyParallelAndOr(BasicBlock *BB, IRBuilder<> &Builder,
+ Pass *P) {
+ PHINode *PHI = dyn_cast<PHINode>(BB->begin());
+ if (PHI)
+ return false; // For simplicity, avoid cases containing PHI nodes.
+
+ BasicBlock *LastCondBlock = NULL;
+ BasicBlock *FirstCondBlock = NULL;
+ BasicBlock *UnCondBlock = NULL;
+ int Idx = -1;
+
+ // Check predecessors of \param BB.
+ SmallPtrSet<BasicBlock*, 16> Preds(pred_begin(BB), pred_end(BB));
+ for (SmallPtrSetIterator<BasicBlock*> PI = Preds.begin(), PE = Preds.end();
+ PI != PE; ++PI) {
+ BasicBlock *Pred = *PI;
+ BranchInst *PBI = dyn_cast<BranchInst>(Pred->getTerminator());
+
+ // All predecessors should terminate with a branch.
+ if (!PBI)
+ return false;
+
+ BasicBlock *PP = Pred->getSinglePredecessor();
+
+ if (PBI->isUnconditional()) {
+ // Case 1: Pred (BB3) is an unconditional block, it should
+ // have a single predecessor (BB2) that is also a predecessor
+ // of \param BB (BB4) and should not have address-taken.
+ // There should exist only one such unconditional
+ // branch among the predecessors.
+ if (UnCondBlock || !PP || (Preds.count(PP) == 0) ||
+ Pred->hasAddressTaken())
+ return false;
+
+ UnCondBlock = Pred;
+ continue;
+ }
+
+ // Only conditional branches are allowed beyond this point.
+ assert(PBI->isConditional());
+
+ // Condition's unique use should be the branch instruction.
+ Value *PC = PBI->getCondition();
+ if (!PC || !PC->hasOneUse())
+ return false;
+
+ if (PP && Preds.count(PP)) {
+ // These are internal condition blocks to be merged from, e.g.,
+ // BB2 in both cases.
+ // Should not be address-taken.
+ if (Pred->hasAddressTaken())
+ return false;
+
+ // Instructions in the internal condition blocks should be safe
+ // to hoist up.
+ for (BasicBlock::iterator BI = Pred->begin(), BE = PBI; BI != BE;) {
+ Instruction *CI = BI++;
+ if (isa<PHINode>(CI) ||
+ !isSafeToSpeculativelyExecute(CI))
+ return false;
+ }
+ } else {
+ // This is the condition block to be merged into, e.g. BB1 in
+ // both cases.
+ if (FirstCondBlock)
+ return false;
+ FirstCondBlock = Pred;
+ }
+
+ // Find whether BB is uniformly on the true (or false) path
+ // for all of its predecessors.
+ BasicBlock *PS1 = PBI->getSuccessor(0);
+ BasicBlock *PS2 = PBI->getSuccessor(1);
+ BasicBlock *PS = (PS1 == BB) ? PS2 : PS1;
+ int CIdx = (PS1 == BB) ? 0 : 1;
+
+ if (Idx == -1)
+ Idx = CIdx;
+ else if (CIdx != Idx)
+ return false;
+
+ // PS is the successor which is not BB. Check successors to identify
+ // the last conditional branch.
+ if (Preds.count(PS) == 0) {
+ // Case 2.
+ // BB must have an unique successor.
+ TerminatorInst *TBB = BB->getTerminator();
+ if (TBB->getNumSuccessors() != 1)
+ return false;
+
+ BasicBlock *SBB = TBB->getSuccessor(0);
+ PHI = dyn_cast<PHINode>(SBB->begin());
+ if (PHI)
+ return false;
+
+ // PS (BB4) should be BB's successor.
+ if (SBB != PS)
+ return false;
+ LastCondBlock = Pred;
+ } else {
+ BranchInst *BPS = dyn_cast<BranchInst>(PS->getTerminator());
+ if (BPS && BPS->isUnconditional()) {
+ // Case 1: PS(BB3) should be an unconditional branch.
+ LastCondBlock = Pred;
+ }
+ }
+ }
+
+ if (!FirstCondBlock || !LastCondBlock || (FirstCondBlock == LastCondBlock))
+ return false;
+
+ // Do the transformation.
+ BasicBlock *CB;
+ bool Iteration = true;
+ BasicBlock::iterator ItOld = Builder.GetInsertPoint();
+ BranchInst *PBI = dyn_cast<BranchInst>(FirstCondBlock->getTerminator());
+ Value *PC = PBI->getCondition();
+ do {
+ CB = PBI->getSuccessor(1 - Idx);
+ // Delete the conditional branch.
+ FirstCondBlock->getInstList().pop_back();
+ FirstCondBlock->getInstList().splice(FirstCondBlock->end(), CB->getInstList());
+ PBI = cast<BranchInst>(FirstCondBlock->getTerminator());
+ Value *CC = PBI->getCondition();
+ // Merge conditions.
+ Builder.SetInsertPoint(PBI);
+ Value *NC;
+ if (Idx == 0)
+ // Case 2, use parallel or.
+ NC = Builder.CreateOr(PC, CC);
+ else
+ // Case 1, use parallel and.
+ NC = Builder.CreateAnd(PC, CC);
+
+ PBI->replaceUsesOfWith(CC, NC);
+ PC = NC;
+ if (CB == LastCondBlock)
+ Iteration = false;
+ // Remove internal conditional branches.
+ CB->dropAllReferences();
+ // make CB unreachable and let downstream to delete the block.
+ new UnreachableInst(CB->getContext(), CB);
+ } while (Iteration);
+
+ Builder.SetInsertPoint(ItOld);
+ DEBUG(dbgs() << "Use parallel and/or in:\n" << *FirstCondBlock);
+ return true;
+}
+
+/// Compare blocks from two if-regions, where \param Head1 is the entry of the
+/// 1st if-region. \param Head2 is the entry of the 2nd if-region. \param
+/// Block1 is a block in the 1st if-region to compare. \param Block2 is a block
+// in the 2nd if-region to compare. \returns true if \param Block1 and \param
+/// Block2 have identical instructions and do not have memory reference alias
+/// with \param Head2.
+///
+bool SimplifyCFGOpt::CompareIfRegionBlock(BasicBlock *Head1, BasicBlock *Head2,
+ BasicBlock *Block1, BasicBlock *Block2) {
+ TerminatorInst *PTI2 = Head2->getTerminator();
+ Instruction *PBI2 = Head2->begin();
+
+ bool eq1 = (Block1 == Head1);
+ bool eq2 = (Block2 == Head2);
+ if (eq1 || eq2) {
+ // An empty then-path or else-path.
+ return (eq1 == eq2);
+ }
+
+ // Check whether instructions in Block1 and Block2 are identical
+ // and do not alias with instructions in Head2.
+ BasicBlock::iterator iter1 = Block1->begin();
+ BasicBlock::iterator end1 = Block1->getTerminator();
+ BasicBlock::iterator iter2 = Block2->begin();
+ BasicBlock::iterator end2 = Block2->getTerminator();
+
+ while (1) {
+ if (iter1 == end1) {
+ if (iter2 != end2)
+ return false;
+ break;
+ }
+
+ if (!iter1->isIdenticalTo(iter2))
+ return false;
+
+ // Illegal to remove instructions with side effects except
+ // non-volatile stores.
+ if (iter1->mayHaveSideEffects()) {
+ Instruction *CurI = &*iter1;
+ StoreInst *SI = dyn_cast<StoreInst>(CurI);
+ if (!SI || SI->isVolatile())
+ return false;
+ }
+
+ // For simplicity and speed, data dependency check can be
+ // avoided if read from memory doesn't exist.
+ if (iter1->mayReadFromMemory())
+ return false;
+
+ if (iter1->mayWriteToMemory()) {
+ for (BasicBlock::iterator BI = PBI2, BE = PTI2; BI != BE; ++BI) {
+ if (BI->mayReadFromMemory() || BI->mayWriteToMemory()) {
+ // Check alias with Head2.
+ if (!AA || AA->alias(iter1, BI))
+ return false;
+ }
+ }
+ }
+ ++iter1;
+ ++iter2;
+ }
+
+ return true;
+}
+
+/// Check whether \param BB is the merge block of a if-region. If yes, check
+/// whether there exists an adjacent if-region upstream, the two if-regions
+/// contain identical instuctions and can be legally merged. \returns true if
+/// the two if-regions are merged.
+///
+/// From:
+/// if (a)
+/// statement;
+/// if (b)
+/// statement;
+///
+/// To:
+/// if (a || b)
+/// statement;
+///
+bool SimplifyCFGOpt::MergeIfRegion(BasicBlock *BB, IRBuilder<> &Builder,
+ Pass *P) {
+ BasicBlock *IfTrue2, *IfFalse2;
+ Value *IfCond2 = GetIfCondition(BB, IfTrue2, IfFalse2);
+ Instruction *CInst2 = dyn_cast_or_null<Instruction>(IfCond2);
+ if (!CInst2)
+ return false;
+
+ BasicBlock *SecondEntryBlock = CInst2->getParent();
+ if (SecondEntryBlock->hasAddressTaken())
+ return false;
+
+ BasicBlock *IfTrue1, *IfFalse1;
+ Value *IfCond1 = GetIfCondition(SecondEntryBlock, IfTrue1, IfFalse1);
+ Instruction *CInst1 = dyn_cast_or_null<Instruction>(IfCond1);
+ if (!CInst1)
+ return false;
+
+ BasicBlock *FirstEntryBlock = CInst1->getParent();
+
+ // Either then-path or else-path should be empty.
+ if ((IfTrue1 != FirstEntryBlock) && (IfFalse1 != FirstEntryBlock))
+ return false;
+ if ((IfTrue2 != SecondEntryBlock) && (IfFalse2 != SecondEntryBlock))
+ return false;
+
+ TerminatorInst *PTI2 = SecondEntryBlock->getTerminator();
+ Instruction *PBI2 = SecondEntryBlock->begin();
+
+ if (!CompareIfRegionBlock(FirstEntryBlock, SecondEntryBlock, IfTrue1, IfTrue2))
+ return false;
+
+ if (!CompareIfRegionBlock(FirstEntryBlock, SecondEntryBlock, IfFalse1, IfFalse2))
+ return false;
+
+ // Check whether \param SecondEntryBlock has side-effect and is safe to speculate.
+ for (BasicBlock::iterator BI = PBI2, BE = PTI2; BI != BE; ++BI) {
+ Instruction *CI = BI;
+ if (isa<PHINode>(CI) || CI->mayHaveSideEffects() ||
+ !isSafeToSpeculativelyExecute(CI))
+ return false;
+ }
+
+ // Merge \param SecondEntryBlock into \param FirstEntryBlock.
+ FirstEntryBlock->getInstList().pop_back();
+ FirstEntryBlock->getInstList().splice(FirstEntryBlock->end(), SecondEntryBlock->getInstList());
+ BranchInst *PBI = dyn_cast<BranchInst>(FirstEntryBlock->getTerminator());
+ Value *CC = PBI->getCondition();
+ BasicBlock::iterator ItOld = Builder.GetInsertPoint();
+ Builder.SetInsertPoint(PBI);
+ Value *NC = Builder.CreateOr(CInst1, CC);
+ PBI->replaceUsesOfWith(CC, NC);
+ Builder.SetInsertPoint(ItOld);
+
+ // Remove IfTrue1
+ if (IfTrue1 != FirstEntryBlock) {
+ IfTrue1->dropAllReferences();
+ IfTrue1->eraseFromParent();
+ }
+
+ // Remove IfFalse1
+ if (IfFalse1 != FirstEntryBlock) {
+ IfFalse1->dropAllReferences();
+ IfFalse1->eraseFromParent();
+ }
+
+ // Remove \param SecondEntryBlock
+ SecondEntryBlock->dropAllReferences();
+ SecondEntryBlock->eraseFromParent();
+ DEBUG(dbgs() << "If conditions merged into:\n" << *FirstEntryBlock);
+ return true;
+}
+
/// Check if passing a value to an instruction will cause undefined behavior.
static bool passingValueIsAlwaysUndefined(Value *V, Instruction *I) {
Constant *C = dyn_cast<Constant>(V);
@@ -4168,6 +4581,11 @@ bool SimplifyCFGOpt::run(BasicBlock *BB) {
return true;
IRBuilder<> Builder(BB);
+ // Whether to optimize conditional branches.
+ bool OptCB = (ParallelAndOr && AA && TTI.hasBranchDivergence());
+
+ if (OptCB && SimplifyParallelAndOr(BB, Builder))
+ return true;
// If there is a trivial two-entry PHI node in this basic block, and we can
// eliminate it, do so now.
@@ -4196,6 +4614,9 @@ bool SimplifyCFGOpt::run(BasicBlock *BB) {
if (SimplifyIndirectBr(IBI)) return true;
}
+ if (OptCB && MergeIfRegion(BB, Builder))
+ return true;
+
return Changed;
}
@@ -4205,6 +4626,6 @@ bool SimplifyCFGOpt::run(BasicBlock *BB) {
/// of the CFG. It returns true if a modification was made.
///
bool llvm::SimplifyCFG(BasicBlock *BB, const TargetTransformInfo &TTI,
- const DataLayout *TD) {
- return SimplifyCFGOpt(TTI, TD).run(BB);
+ const DataLayout *TD, AliasAnalysis *AA) {
+ return SimplifyCFGOpt(TTI, TD, AA).run(BB);
}
diff --git a/tools/lto/LTOCodeGenerator.cpp b/tools/lto/LTOCodeGenerator.cpp
index 6139ade..776a5b9 100644
--- a/tools/lto/LTOCodeGenerator.cpp
+++ b/tools/lto/LTOCodeGenerator.cpp
@@ -118,7 +118,7 @@ void LTOCodeGenerator::initializeLTOPasses() {
initializeGVNPass(R);
initializeMemCpyOptPass(R);
initializeDCEPass(R);
- initializeCFGSimplifyPassPass(R);
+ initializeCFGCanonicalizePass(R);
}
bool LTOCodeGenerator::addModule(LTOModule* mod, std::string& errMsg) {
diff --git a/tools/opt/opt.cpp b/tools/opt/opt.cpp
index 68fca83..fa0a0ed 100644
--- a/tools/opt/opt.cpp
+++ b/tools/opt/opt.cpp
@@ -667,6 +667,9 @@ int main(int argc, char **argv) {
FPasses.reset(new FunctionPassManager(M.get()));
if (TD)
FPasses->add(new DataLayout(*TD));
+ if (TM.get())
+ TM->addAnalysisPasses(*FPasses);
+
}
if (PrintBreakpoints) {