diff options
21 files changed, 832 insertions, 703 deletions
diff --git a/include/llvm/InitializePasses.h b/include/llvm/InitializePasses.h index d49636d..d7d18d0 100644 --- a/include/llvm/InitializePasses.h +++ b/include/llvm/InitializePasses.h @@ -86,8 +86,8 @@ void initializeCallGraphViewerPass(PassRegistry&); void initializeCFGOnlyPrinterPass(PassRegistry&); void initializeCFGOnlyViewerPass(PassRegistry&); void initializeCFGPrinterPass(PassRegistry&); -void initializeCFGOptimizePass(PassRegistry&); -void initializeCFGCanonicalizePass(PassRegistry&); +void initializeCFGSimplifyPassPass(PassRegistry&); +void initializeFlattenCFGPassPass(PassRegistry&); void initializeStructurizeCFGPass(PassRegistry&); void initializeCFGViewerPass(PassRegistry&); void initializeCalculateSpillWeightsPass(PassRegistry&); diff --git a/include/llvm/Transforms/Scalar.h b/include/llvm/Transforms/Scalar.h index b52c327..037ab6b 100644 --- a/include/llvm/Transforms/Scalar.h +++ b/include/llvm/Transforms/Scalar.h @@ -196,7 +196,14 @@ FunctionPass *createJumpThreadingPass(); // CFGSimplification - Merge basic blocks, eliminate unreachable blocks, // simplify terminator instructions, etc... // -FunctionPass *createCFGSimplificationPass(bool IsTargetAware = false); +FunctionPass *createCFGSimplificationPass(); + +//===----------------------------------------------------------------------===// +// +// FlattenCFG - flatten CFG, reduce number of conditional branches by using +// parallel-and and parallel-or mode, etc... +// +FunctionPass *createFlattenCFGPass(); //===----------------------------------------------------------------------===// // diff --git a/include/llvm/Transforms/Utils/BasicBlockUtils.h b/include/llvm/Transforms/Utils/BasicBlockUtils.h index b5478cb..65cafe2 100644 --- a/include/llvm/Transforms/Utils/BasicBlockUtils.h +++ b/include/llvm/Transforms/Utils/BasicBlockUtils.h @@ -205,6 +205,15 @@ ReturnInst *FoldReturnIntoUncondBranch(ReturnInst *RI, BasicBlock *BB, TerminatorInst *SplitBlockAndInsertIfThen(Instruction *Cmp, bool Unreachable, MDNode *BranchWeights = 0); +/// +/// GetIfCondition - Check whether BB is the merge point of a if-region. +/// If so, return the boolean condition that determines which entry into +/// BB will be taken. Also, return by references the block that will be +/// entered from if the condition is true, and the block that will be +/// entered if the condition is false. + +Value *GetIfCondition(BasicBlock *BB, BasicBlock *&IfTrue, + BasicBlock *&IfFalse); } // End llvm namespace #endif diff --git a/include/llvm/Transforms/Utils/Local.h b/include/llvm/Transforms/Utils/Local.h index 5e60286..65755d0 100644 --- a/include/llvm/Transforms/Utils/Local.h +++ b/include/llvm/Transforms/Utils/Local.h @@ -137,7 +137,13 @@ bool EliminateDuplicatePHINodes(BasicBlock *BB); /// the basic block that was pointed to. /// bool SimplifyCFG(BasicBlock *BB, const TargetTransformInfo &TTI, - const DataLayout *TD = 0, AliasAnalysis *AA = 0); + const DataLayout *TD = 0); + +/// FlatternCFG - This function is used to flatten a CFG. For +/// example, it uses parallel-and and parallel-or mode to collapse +// if-conditions and merge if-regions with identical statements. +/// +bool FlattenCFG(BasicBlock *BB, AliasAnalysis *AA = 0); /// FoldBranchToCommonDest - If this basic block is ONLY a setcc and a branch, /// and if a predecessor branches to us and one of our successors, fold the diff --git a/lib/Target/R600/AMDGPUTargetMachine.cpp b/lib/Target/R600/AMDGPUTargetMachine.cpp index 33e2dae..1a30496 100644 --- a/lib/Target/R600/AMDGPUTargetMachine.cpp +++ b/lib/Target/R600/AMDGPUTargetMachine.cpp @@ -91,7 +91,6 @@ public: AMDGPUTargetMachine &getAMDGPUTargetMachine() const { return getTM<AMDGPUTargetMachine>(); } - virtual bool addPreISel(); virtual bool addInstSelector(); virtual bool addPreRegAlloc(); @@ -120,6 +119,7 @@ void AMDGPUTargetMachine::addAnalysisPasses(PassManagerBase &PM) { bool AMDGPUPassConfig::addPreISel() { const AMDGPUSubtarget &ST = TM->getSubtarget<AMDGPUSubtarget>(); + addPass(createFlattenCFGPass()); if (ST.getGeneration() > AMDGPUSubtarget::NORTHERN_ISLANDS) { addPass(createStructurizeCFGPass()); addPass(createSIAnnotateControlFlowPass()); diff --git a/lib/Transforms/IPO/PassManagerBuilder.cpp b/lib/Transforms/IPO/PassManagerBuilder.cpp index 6355204..a6b3f4e 100644 --- a/lib/Transforms/IPO/PassManagerBuilder.cpp +++ b/lib/Transforms/IPO/PassManagerBuilder.cpp @@ -235,7 +235,7 @@ void PassManagerBuilder::populateModulePassManager(PassManagerBase &MPM) { } MPM.add(createAggressiveDCEPass()); // Delete dead instructions - MPM.add(createCFGSimplificationPass(true)); // Merge & remove BBs + MPM.add(createCFGSimplificationPass()); // Merge & remove BBs MPM.add(createInstructionCombiningPass()); // Clean up after everything. // As an experimental mode, run any vectorization passes in a separate @@ -371,7 +371,7 @@ void PassManagerBuilder::populateLTOPassManager(PassManagerBase &PM, PM.add(createJumpThreadingPass()); // Delete basic blocks, which optimization passes may have killed. - PM.add(createCFGSimplificationPass(true)); + PM.add(createCFGSimplificationPass()); // Now that we have optimized the program, discard unreachable functions. PM.add(createGlobalDCEPass()); diff --git a/lib/Transforms/Scalar/CMakeLists.txt b/lib/Transforms/Scalar/CMakeLists.txt index 4a35859..f5d1db1 100644 --- a/lib/Transforms/Scalar/CMakeLists.txt +++ b/lib/Transforms/Scalar/CMakeLists.txt @@ -28,6 +28,7 @@ add_llvm_library(LLVMScalarOpts Scalar.cpp ScalarReplAggregates.cpp SimplifyCFGPass.cpp + FlattenCFGPass.cpp Sink.cpp StructurizeCFG.cpp TailRecursionElimination.cpp diff --git a/lib/Transforms/Scalar/FlattenCFGPass.cpp b/lib/Transforms/Scalar/FlattenCFGPass.cpp new file mode 100644 index 0000000..e7de07f --- /dev/null +++ b/lib/Transforms/Scalar/FlattenCFGPass.cpp @@ -0,0 +1,79 @@ +//===- FlattenCFGPass.cpp - CFG Flatten Pass ----------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements flattening of CFG. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "flattencfg" +#include "llvm/Transforms/Scalar.h" +#include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Pass.h" +#include "llvm/Support/CFG.h" +#include "llvm/Transforms/Utils/Local.h" +using namespace llvm; + +namespace { +struct FlattenCFGPass : public FunctionPass { + static char ID; // Pass identification, replacement for typeid +public: + FlattenCFGPass() : FunctionPass(ID) { + initializeFlattenCFGPassPass(*PassRegistry::getPassRegistry()); + } + bool runOnFunction(Function &F); + + void getAnalysisUsage(AnalysisUsage &AU) const { + AU.addRequired<AliasAnalysis>(); + } + +private: + AliasAnalysis *AA; +}; +} + +char FlattenCFGPass::ID = 0; +INITIALIZE_PASS_BEGIN(FlattenCFGPass, "flattencfg", "Flatten the CFG", false, + false) +INITIALIZE_AG_DEPENDENCY(AliasAnalysis) +INITIALIZE_PASS_END(FlattenCFGPass, "flattencfg", "Flatten the CFG", false, + false) + +// Public interface to the FlattenCFG pass +FunctionPass *llvm::createFlattenCFGPass() { return new FlattenCFGPass(); } + +/// iterativelyFlattenCFG - Call FlattenCFG on all the blocks in the function, +/// iterating until no more changes are made. +static bool iterativelyFlattenCFG(Function &F, AliasAnalysis *AA) { + bool Changed = false; + bool LocalChange = true; + while (LocalChange) { + LocalChange = false; + + // Loop over all of the basic blocks and remove them if they are unneeded... + // + for (Function::iterator BBIt = F.begin(); BBIt != F.end();) { + if (FlattenCFG(BBIt++, AA)) { + LocalChange = true; + } + } + Changed |= LocalChange; + } + return Changed; +} + +bool FlattenCFGPass::runOnFunction(Function &F) { + AA = &getAnalysis<AliasAnalysis>(); + bool EverChanged = false; + // iterativelyFlattenCFG can make some blocks dead. + while (iterativelyFlattenCFG(F, AA)) { + removeUnreachableBlocks(F); + EverChanged = true; + } + return EverChanged; +} diff --git a/lib/Transforms/Scalar/Scalar.cpp b/lib/Transforms/Scalar/Scalar.cpp index 18cdcfe..758334d 100644 --- a/lib/Transforms/Scalar/Scalar.cpp +++ b/lib/Transforms/Scalar/Scalar.cpp @@ -57,8 +57,7 @@ void llvm::initializeScalarOpts(PassRegistry &Registry) { initializeSROAPass(Registry); initializeSROA_DTPass(Registry); initializeSROA_SSAUpPass(Registry); - initializeCFGCanonicalizePass(Registry); - initializeCFGOptimizePass(Registry); + initializeCFGSimplifyPassPass(Registry); initializeStructurizeCFGPass(Registry); initializeSinkingPass(Registry); initializeTailCallElimPass(Registry); diff --git a/lib/Transforms/Scalar/SimplifyCFGPass.cpp b/lib/Transforms/Scalar/SimplifyCFGPass.cpp index 9ab5f45..6d05640 100644 --- a/lib/Transforms/Scalar/SimplifyCFGPass.cpp +++ b/lib/Transforms/Scalar/SimplifyCFGPass.cpp @@ -27,7 +27,6 @@ #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/TargetTransformInfo.h" -#include "llvm/Analysis/AliasAnalysis.h" #include "llvm/IR/Attributes.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DataLayout.h" @@ -43,61 +42,28 @@ STATISTIC(NumSimpl, "Number of blocks simplified"); namespace { struct CFGSimplifyPass : public FunctionPass { - CFGSimplifyPass(char &ID, bool isTargetAware) - : FunctionPass(ID), IsTargetAware(isTargetAware) {} - virtual bool runOnFunction(Function &F); - - virtual void getAnalysisUsage(AnalysisUsage &AU) const { - AU.addRequired<TargetTransformInfo>(); - } -private: - AliasAnalysis *AA; - bool IsTargetAware; // Should the pass be target-aware? -}; - -// CFGSimplifyPass that does optimizations. -struct CFGOptimize : public CFGSimplifyPass { static char ID; // Pass identification, replacement for typeid -public: - CFGOptimize() : CFGSimplifyPass(ID, true) { - initializeCFGOptimizePass(*PassRegistry::getPassRegistry()); + CFGSimplifyPass() : FunctionPass(ID) { + initializeCFGSimplifyPassPass(*PassRegistry::getPassRegistry()); } + virtual bool runOnFunction(Function &F); + virtual void getAnalysisUsage(AnalysisUsage &AU) const { AU.addRequired<TargetTransformInfo>(); - AU.addRequired<AliasAnalysis>(); - } -}; - -// CFGSimplifyPass that does canonicalizations. -struct CFGCanonicalize : public CFGSimplifyPass { - static char ID; // Pass identification, replacement for typeid -public: - CFGCanonicalize() : CFGSimplifyPass(ID, false) { - initializeCFGCanonicalizePass(*PassRegistry::getPassRegistry()); } }; } -char CFGCanonicalize::ID = 0; -char CFGOptimize::ID = 0; -INITIALIZE_PASS_BEGIN(CFGCanonicalize, "simplifycfg", "Simplify the CFG", false, - false) -INITIALIZE_AG_DEPENDENCY(TargetTransformInfo) -INITIALIZE_PASS_END(CFGCanonicalize, "simplifycfg", "Simplify the CFG", false, - false) -INITIALIZE_PASS_BEGIN(CFGOptimize, "optimizecfg", "optimize the CFG", false, +char CFGSimplifyPass::ID = 0; +INITIALIZE_PASS_BEGIN(CFGSimplifyPass, "simplifycfg", "Simplify the CFG", false, false) INITIALIZE_AG_DEPENDENCY(TargetTransformInfo) -INITIALIZE_AG_DEPENDENCY(AliasAnalysis) -INITIALIZE_PASS_END(CFGOptimize, "optimizecfg", "Optimize the CFG", false, +INITIALIZE_PASS_END(CFGSimplifyPass, "simplifycfg", "Simplify the CFG", false, false) // Public interface to the CFGSimplification pass -FunctionPass *llvm::createCFGSimplificationPass(bool IsTargetAware) { - if (IsTargetAware) - return new CFGOptimize(); - else - return new CFGCanonicalize(); +FunctionPass *llvm::createCFGSimplificationPass() { + return new CFGSimplifyPass(); } /// changeToUnreachable - Insert an unreachable instruction before the specified @@ -334,7 +300,7 @@ static bool mergeEmptyReturnBlocks(Function &F) { /// iterativelySimplifyCFG - Call SimplifyCFG on all the blocks in the function, /// iterating until no more changes are made. static bool iterativelySimplifyCFG(Function &F, const TargetTransformInfo &TTI, - const DataLayout *TD, AliasAnalysis *AA) { + const DataLayout *TD) { bool Changed = false; bool LocalChange = true; while (LocalChange) { @@ -343,7 +309,7 @@ static bool iterativelySimplifyCFG(Function &F, const TargetTransformInfo &TTI, // Loop over all of the basic blocks and remove them if they are unneeded... // for (Function::iterator BBIt = F.begin(); BBIt != F.end(); ) { - if (SimplifyCFG(BBIt++, TTI, TD, AA)) { + if (SimplifyCFG(BBIt++, TTI, TD)) { LocalChange = true; ++NumSimpl; } @@ -357,15 +323,11 @@ static bool iterativelySimplifyCFG(Function &F, const TargetTransformInfo &TTI, // simplify the CFG. // bool CFGSimplifyPass::runOnFunction(Function &F) { - if (IsTargetAware) - AA = &getAnalysis<AliasAnalysis>(); - else - AA = NULL; const TargetTransformInfo &TTI = getAnalysis<TargetTransformInfo>(); const DataLayout *TD = getAnalysisIfAvailable<DataLayout>(); bool EverChanged = removeUnreachableBlocksFromFn(F); EverChanged |= mergeEmptyReturnBlocks(F); - EverChanged |= iterativelySimplifyCFG(F, TTI, TD, AA); + EverChanged |= iterativelySimplifyCFG(F, TTI, TD); // If neither pass changed anything, we're done. if (!EverChanged) return false; @@ -379,7 +341,7 @@ bool CFGSimplifyPass::runOnFunction(Function &F) { return true; do { - EverChanged = iterativelySimplifyCFG(F, TTI, TD, AA); + EverChanged = iterativelySimplifyCFG(F, TTI, TD); EverChanged |= removeUnreachableBlocksFromFn(F); } while (EverChanged); diff --git a/lib/Transforms/Utils/BasicBlockUtils.cpp b/lib/Transforms/Utils/BasicBlockUtils.cpp index 28c08d5..e17a416 100644 --- a/lib/Transforms/Utils/BasicBlockUtils.cpp +++ b/lib/Transforms/Utils/BasicBlockUtils.cpp @@ -665,3 +665,104 @@ TerminatorInst *llvm::SplitBlockAndInsertIfThen(Instruction *Cmp, ReplaceInstWithInst(HeadOldTerm, HeadNewTerm); return CheckTerm; } + +/// GetIfCondition - Given a basic block (BB) with two predecessors, +/// check to see if the merge at this block is due +/// to an "if condition". If so, return the boolean condition that determines +/// which entry into BB will be taken. Also, return by references the block +/// that will be entered from if the condition is true, and the block that will +/// be entered if the condition is false. +/// +/// This does no checking to see if the true/false blocks have large or unsavory +/// instructions in them. +Value *llvm::GetIfCondition(BasicBlock *BB, BasicBlock *&IfTrue, + BasicBlock *&IfFalse) { + PHINode *SomePHI = dyn_cast<PHINode>(BB->begin()); + BasicBlock *Pred1 = NULL; + BasicBlock *Pred2 = NULL; + + if (SomePHI) { + if (SomePHI->getNumIncomingValues() != 2) + return NULL; + Pred1 = SomePHI->getIncomingBlock(0); + Pred2 = SomePHI->getIncomingBlock(1); + } else { + pred_iterator PI = pred_begin(BB), PE = pred_end(BB); + if (PI == PE) // No predecessor + return NULL; + Pred1 = *PI++; + if (PI == PE) // Only one predecessor + return NULL; + Pred2 = *PI++; + if (PI != PE) // More than two predecessors + return NULL; + } + + // We can only handle branches. Other control flow will be lowered to + // branches if possible anyway. + BranchInst *Pred1Br = dyn_cast<BranchInst>(Pred1->getTerminator()); + BranchInst *Pred2Br = dyn_cast<BranchInst>(Pred2->getTerminator()); + if (Pred1Br == 0 || Pred2Br == 0) + return 0; + + // Eliminate code duplication by ensuring that Pred1Br is conditional if + // either are. + if (Pred2Br->isConditional()) { + // If both branches are conditional, we don't have an "if statement". In + // reality, we could transform this case, but since the condition will be + // required anyway, we stand no chance of eliminating it, so the xform is + // probably not profitable. + if (Pred1Br->isConditional()) + return 0; + + std::swap(Pred1, Pred2); + std::swap(Pred1Br, Pred2Br); + } + + if (Pred1Br->isConditional()) { + // The only thing we have to watch out for here is to make sure that Pred2 + // doesn't have incoming edges from other blocks. If it does, the condition + // doesn't dominate BB. + if (Pred2->getSinglePredecessor() == 0) + return 0; + + // If we found a conditional branch predecessor, make sure that it branches + // to BB and Pred2Br. If it doesn't, this isn't an "if statement". + if (Pred1Br->getSuccessor(0) == BB && + Pred1Br->getSuccessor(1) == Pred2) { + IfTrue = Pred1; + IfFalse = Pred2; + } else if (Pred1Br->getSuccessor(0) == Pred2 && + Pred1Br->getSuccessor(1) == BB) { + IfTrue = Pred2; + IfFalse = Pred1; + } else { + // We know that one arm of the conditional goes to BB, so the other must + // go somewhere unrelated, and this must not be an "if statement". + return 0; + } + + return Pred1Br->getCondition(); + } + + // Ok, if we got here, both predecessors end with an unconditional branch to + // BB. Don't panic! If both blocks only have a single (identical) + // predecessor, and THAT is a conditional branch, then we're all ok! + BasicBlock *CommonPred = Pred1->getSinglePredecessor(); + if (CommonPred == 0 || CommonPred != Pred2->getSinglePredecessor()) + return 0; + + // Otherwise, if this is a conditional branch, then we can use it! + BranchInst *BI = dyn_cast<BranchInst>(CommonPred->getTerminator()); + if (BI == 0) return 0; + + assert(BI->isConditional() && "Two successors but not conditional?"); + if (BI->getSuccessor(0) == Pred1) { + IfTrue = Pred1; + IfFalse = Pred2; + } else { + IfTrue = Pred2; + IfFalse = Pred1; + } + return BI->getCondition(); +} diff --git a/lib/Transforms/Utils/CMakeLists.txt b/lib/Transforms/Utils/CMakeLists.txt index 470cf34..3648fd6 100644 --- a/lib/Transforms/Utils/CMakeLists.txt +++ b/lib/Transforms/Utils/CMakeLists.txt @@ -25,6 +25,7 @@ add_llvm_library(LLVMTransformUtils PromoteMemoryToRegister.cpp SSAUpdater.cpp SimplifyCFG.cpp + FlattenCFG.cpp SimplifyIndVar.cpp SimplifyInstructions.cpp SimplifyLibCalls.cpp diff --git a/lib/Transforms/Utils/FlattenCFG.cpp b/lib/Transforms/Utils/FlattenCFG.cpp new file mode 100644 index 0000000..656e897 --- /dev/null +++ b/lib/Transforms/Utils/FlattenCFG.cpp @@ -0,0 +1,487 @@ +//===- FlatternCFG.cpp - Code to perform CFG flattening ---------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Reduce conditional branches in CFG. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "flattencfg" +#include "llvm/Transforms/Utils/Local.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Analysis/ValueTracking.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/Support/Debug.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" +using namespace llvm; + +namespace { +class FlattenCFGOpt { + AliasAnalysis *AA; + /// \brief Use parallel-and or parallel-or to generate conditions for + /// conditional branches. + bool FlattenParallelAndOr(BasicBlock *BB, IRBuilder<> &Builder, Pass *P = 0); + /// \brief If \param BB is the merge block of an if-region, attempt to merge + /// the if-region with an adjacent if-region upstream if two if-regions + /// contain identical instructions. + bool MergeIfRegion(BasicBlock *BB, IRBuilder<> &Builder, Pass *P = 0); + /// \brief Compare a pair of blocks: \p Block1 and \p Block2, which + /// are from two if-regions whose entry blocks are \p Head1 and \p + /// Head2. \returns true if \p Block1 and \p Block2 contain identical + /// instructions, and have no memory reference alias with \p Head2. + /// This is used as a legality check for merging if-regions. + bool CompareIfRegionBlock(BasicBlock *Head1, BasicBlock *Head2, + BasicBlock *Block1, BasicBlock *Block2); + +public: + FlattenCFGOpt(AliasAnalysis *AA) : AA(AA) {} + bool run(BasicBlock *BB); +}; +} + +/// If \param [in] BB has more than one predecessor that is a conditional +/// branch, attempt to use parallel and/or for the branch condition. \returns +/// true on success. +/// +/// Before: +/// ...... +/// %cmp10 = fcmp une float %tmp1, %tmp2 +/// br i1 %cmp1, label %if.then, label %lor.rhs +/// +/// lor.rhs: +/// ...... +/// %cmp11 = fcmp une float %tmp3, %tmp4 +/// br i1 %cmp11, label %if.then, label %ifend +/// +/// if.end: // the merge block +/// ...... +/// +/// if.then: // has two predecessors, both of them contains conditional branch. +/// ...... +/// br label %if.end; +/// +/// After: +/// ...... +/// %cmp10 = fcmp une float %tmp1, %tmp2 +/// ...... +/// %cmp11 = fcmp une float %tmp3, %tmp4 +/// %cmp12 = or i1 %cmp10, %cmp11 // parallel-or mode. +/// br i1 %cmp12, label %if.then, label %ifend +/// +/// if.end: +/// ...... +/// +/// if.then: +/// ...... +/// br label %if.end; +/// +/// Current implementation handles two cases. +/// Case 1: \param BB is on the else-path. +/// +/// BB1 +/// / | +/// BB2 | +/// / \ | +/// BB3 \ | where, BB1, BB2 contain conditional branches. +/// \ | / BB3 contains unconditional branch. +/// \ | / BB4 corresponds to \param BB which is also the merge. +/// BB => BB4 +/// +/// +/// Corresponding source code: +/// +/// if (a == b && c == d) +/// statement; // BB3 +/// +/// Case 2: \param BB BB is on the then-path. +/// +/// BB1 +/// / | +/// | BB2 +/// \ / | where BB1, BB2 contain conditional branches. +/// BB => BB3 | BB3 contains unconditiona branch and corresponds +/// \ / to \param BB. BB4 is the merge. +/// BB4 +/// +/// Corresponding source code: +/// +/// if (a == b || c == d) +/// statement; // BB3 +/// +/// In both cases, \param BB is the common successor of conditional branches. +/// In Case 1, \param BB (BB4) has an unconditional branch (BB3) as +/// its predecessor. In Case 2, \param BB (BB3) only has conditional branches +/// as its predecessors. +/// +bool FlattenCFGOpt::FlattenParallelAndOr(BasicBlock *BB, IRBuilder<> &Builder, + Pass *P) { + PHINode *PHI = dyn_cast<PHINode>(BB->begin()); + if (PHI) + return false; // For simplicity, avoid cases containing PHI nodes. + + BasicBlock *LastCondBlock = NULL; + BasicBlock *FirstCondBlock = NULL; + BasicBlock *UnCondBlock = NULL; + int Idx = -1; + + // Check predecessors of \param BB. + SmallPtrSet<BasicBlock *, 16> Preds(pred_begin(BB), pred_end(BB)); + for (SmallPtrSetIterator<BasicBlock *> PI = Preds.begin(), PE = Preds.end(); + PI != PE; ++PI) { + BasicBlock *Pred = *PI; + BranchInst *PBI = dyn_cast<BranchInst>(Pred->getTerminator()); + + // All predecessors should terminate with a branch. + if (!PBI) + return false; + + BasicBlock *PP = Pred->getSinglePredecessor(); + + if (PBI->isUnconditional()) { + // Case 1: Pred (BB3) is an unconditional block, it should + // have a single predecessor (BB2) that is also a predecessor + // of \param BB (BB4) and should not have address-taken. + // There should exist only one such unconditional + // branch among the predecessors. + if (UnCondBlock || !PP || (Preds.count(PP) == 0) || + Pred->hasAddressTaken()) + return false; + + UnCondBlock = Pred; + continue; + } + + // Only conditional branches are allowed beyond this point. + assert(PBI->isConditional()); + + // Condition's unique use should be the branch instruction. + Value *PC = PBI->getCondition(); + if (!PC || !PC->hasOneUse()) + return false; + + if (PP && Preds.count(PP)) { + // These are internal condition blocks to be merged from, e.g., + // BB2 in both cases. + // Should not be address-taken. + if (Pred->hasAddressTaken()) + return false; + + // Instructions in the internal condition blocks should be safe + // to hoist up. + for (BasicBlock::iterator BI = Pred->begin(), BE = PBI; BI != BE;) { + Instruction *CI = BI++; + if (isa<PHINode>(CI) || !isSafeToSpeculativelyExecute(CI)) + return false; + } + } else { + // This is the condition block to be merged into, e.g. BB1 in + // both cases. + if (FirstCondBlock) + return false; + FirstCondBlock = Pred; + } + + // Find whether BB is uniformly on the true (or false) path + // for all of its predecessors. + BasicBlock *PS1 = PBI->getSuccessor(0); + BasicBlock *PS2 = PBI->getSuccessor(1); + BasicBlock *PS = (PS1 == BB) ? PS2 : PS1; + int CIdx = (PS1 == BB) ? 0 : 1; + + if (Idx == -1) + Idx = CIdx; + else if (CIdx != Idx) + return false; + + // PS is the successor which is not BB. Check successors to identify + // the last conditional branch. + if (Preds.count(PS) == 0) { + // Case 2. + LastCondBlock = Pred; + } else { + // Case 1 + BranchInst *BPS = dyn_cast<BranchInst>(PS->getTerminator()); + if (BPS && BPS->isUnconditional()) { + // Case 1: PS(BB3) should be an unconditional branch. + LastCondBlock = Pred; + } + } + } + + if (!FirstCondBlock || !LastCondBlock || (FirstCondBlock == LastCondBlock)) + return false; + + TerminatorInst *TBB = LastCondBlock->getTerminator(); + BasicBlock *PS1 = TBB->getSuccessor(0); + BasicBlock *PS2 = TBB->getSuccessor(1); + BranchInst *PBI1 = dyn_cast<BranchInst>(PS1->getTerminator()); + BranchInst *PBI2 = dyn_cast<BranchInst>(PS2->getTerminator()); + + // If PS1 does not jump into PS2, but PS2 jumps into PS1, + // attempt branch inversion. + if (!PBI1 || !PBI1->isUnconditional() || + (PS1->getTerminator()->getSuccessor(0) != PS2)) { + // Check whether PS2 jumps into PS1. + if (!PBI2 || !PBI2->isUnconditional() || + (PS2->getTerminator()->getSuccessor(0) != PS1)) + return false; + + // Do branch inversion. + BasicBlock *CurrBlock = LastCondBlock; + bool EverChanged = false; + while (1) { + BranchInst *BI = dyn_cast<BranchInst>(CurrBlock->getTerminator()); + CmpInst *CI = dyn_cast<CmpInst>(BI->getCondition()); + CmpInst::Predicate Predicate = CI->getPredicate(); + // Cannonicalize icmp_ne -> icmp_eq, fcmp_one -> fcmp_oeq + if ((Predicate == CmpInst::ICMP_NE) || (Predicate == CmpInst::FCMP_ONE)) { + CI->setPredicate(ICmpInst::getInversePredicate(Predicate)); + BI->swapSuccessors(); + EverChanged = true; + } + if (CurrBlock == FirstCondBlock) + break; + CurrBlock = CurrBlock->getSinglePredecessor(); + } + return EverChanged; + } + + // PS1 must have a conditional branch. + if (!PBI1 || !PBI1->isUnconditional()) + return false; + + // PS2 should not contain PHI node. + PHI = dyn_cast<PHINode>(PS2->begin()); + if (PHI) + return false; + + // Do the transformation. + BasicBlock *CB; + BranchInst *PBI = dyn_cast<BranchInst>(FirstCondBlock->getTerminator()); + bool Iteration = true; + BasicBlock *SaveInsertBB = Builder.GetInsertBlock(); + BasicBlock::iterator SaveInsertPt = Builder.GetInsertPoint(); + Value *PC = PBI->getCondition(); + + do { + CB = PBI->getSuccessor(1 - Idx); + // Delete the conditional branch. + FirstCondBlock->getInstList().pop_back(); + FirstCondBlock->getInstList() + .splice(FirstCondBlock->end(), CB->getInstList()); + PBI = cast<BranchInst>(FirstCondBlock->getTerminator()); + Value *CC = PBI->getCondition(); + // Merge conditions. + Builder.SetInsertPoint(PBI); + Value *NC; + if (Idx == 0) + // Case 2, use parallel or. + NC = Builder.CreateOr(PC, CC); + else + // Case 1, use parallel and. + NC = Builder.CreateAnd(PC, CC); + + PBI->replaceUsesOfWith(CC, NC); + PC = NC; + if (CB == LastCondBlock) + Iteration = false; + // Remove internal conditional branches. + CB->dropAllReferences(); + // make CB unreachable and let downstream to delete the block. + new UnreachableInst(CB->getContext(), CB); + } while (Iteration); + + Builder.SetInsertPoint(SaveInsertBB, SaveInsertPt); + DEBUG(dbgs() << "Use parallel and/or in:\n" << *FirstCondBlock); + return true; +} + +/// Compare blocks from two if-regions, where \param Head1 is the entry of the +/// 1st if-region. \param Head2 is the entry of the 2nd if-region. \param +/// Block1 is a block in the 1st if-region to compare. \param Block2 is a block +// in the 2nd if-region to compare. \returns true if \param Block1 and \param +/// Block2 have identical instructions and do not have memory reference alias +/// with \param Head2. +/// +bool FlattenCFGOpt::CompareIfRegionBlock(BasicBlock *Head1, BasicBlock *Head2, + BasicBlock *Block1, + BasicBlock *Block2) { + TerminatorInst *PTI2 = Head2->getTerminator(); + Instruction *PBI2 = Head2->begin(); + + bool eq1 = (Block1 == Head1); + bool eq2 = (Block2 == Head2); + if (eq1 || eq2) { + // An empty then-path or else-path. + return (eq1 == eq2); + } + + // Check whether instructions in Block1 and Block2 are identical + // and do not alias with instructions in Head2. + BasicBlock::iterator iter1 = Block1->begin(); + BasicBlock::iterator end1 = Block1->getTerminator(); + BasicBlock::iterator iter2 = Block2->begin(); + BasicBlock::iterator end2 = Block2->getTerminator(); + + while (1) { + if (iter1 == end1) { + if (iter2 != end2) + return false; + break; + } + + if (!iter1->isIdenticalTo(iter2)) + return false; + + // Illegal to remove instructions with side effects except + // non-volatile stores. + if (iter1->mayHaveSideEffects()) { + Instruction *CurI = &*iter1; + StoreInst *SI = dyn_cast<StoreInst>(CurI); + if (!SI || SI->isVolatile()) + return false; + } + + // For simplicity and speed, data dependency check can be + // avoided if read from memory doesn't exist. + if (iter1->mayReadFromMemory()) + return false; + + if (iter1->mayWriteToMemory()) { + for (BasicBlock::iterator BI = PBI2, BE = PTI2; BI != BE; ++BI) { + if (BI->mayReadFromMemory() || BI->mayWriteToMemory()) { + // Check alias with Head2. + if (!AA || AA->alias(iter1, BI)) + return false; + } + } + } + ++iter1; + ++iter2; + } + + return true; +} + +/// Check whether \param BB is the merge block of a if-region. If yes, check +/// whether there exists an adjacent if-region upstream, the two if-regions +/// contain identical instuctions and can be legally merged. \returns true if +/// the two if-regions are merged. +/// +/// From: +/// if (a) +/// statement; +/// if (b) +/// statement; +/// +/// To: +/// if (a || b) +/// statement; +/// +bool FlattenCFGOpt::MergeIfRegion(BasicBlock *BB, IRBuilder<> &Builder, + Pass *P) { + BasicBlock *IfTrue2, *IfFalse2; + Value *IfCond2 = GetIfCondition(BB, IfTrue2, IfFalse2); + Instruction *CInst2 = dyn_cast_or_null<Instruction>(IfCond2); + if (!CInst2) + return false; + + BasicBlock *SecondEntryBlock = CInst2->getParent(); + if (SecondEntryBlock->hasAddressTaken()) + return false; + + BasicBlock *IfTrue1, *IfFalse1; + Value *IfCond1 = GetIfCondition(SecondEntryBlock, IfTrue1, IfFalse1); + Instruction *CInst1 = dyn_cast_or_null<Instruction>(IfCond1); + if (!CInst1) + return false; + + BasicBlock *FirstEntryBlock = CInst1->getParent(); + + // Either then-path or else-path should be empty. + if ((IfTrue1 != FirstEntryBlock) && (IfFalse1 != FirstEntryBlock)) + return false; + if ((IfTrue2 != SecondEntryBlock) && (IfFalse2 != SecondEntryBlock)) + return false; + + TerminatorInst *PTI2 = SecondEntryBlock->getTerminator(); + Instruction *PBI2 = SecondEntryBlock->begin(); + + if (!CompareIfRegionBlock(FirstEntryBlock, SecondEntryBlock, IfTrue1, + IfTrue2)) + return false; + + if (!CompareIfRegionBlock(FirstEntryBlock, SecondEntryBlock, IfFalse1, + IfFalse2)) + return false; + + // Check whether \param SecondEntryBlock has side-effect and is safe to + // speculate. + for (BasicBlock::iterator BI = PBI2, BE = PTI2; BI != BE; ++BI) { + Instruction *CI = BI; + if (isa<PHINode>(CI) || CI->mayHaveSideEffects() || + !isSafeToSpeculativelyExecute(CI)) + return false; + } + + // Merge \param SecondEntryBlock into \param FirstEntryBlock. + FirstEntryBlock->getInstList().pop_back(); + FirstEntryBlock->getInstList() + .splice(FirstEntryBlock->end(), SecondEntryBlock->getInstList()); + BranchInst *PBI = dyn_cast<BranchInst>(FirstEntryBlock->getTerminator()); + Value *CC = PBI->getCondition(); + BasicBlock *SaveInsertBB = Builder.GetInsertBlock(); + BasicBlock::iterator SaveInsertPt = Builder.GetInsertPoint(); + Builder.SetInsertPoint(PBI); + Value *NC = Builder.CreateOr(CInst1, CC); + PBI->replaceUsesOfWith(CC, NC); + Builder.SetInsertPoint(SaveInsertBB, SaveInsertPt); + + // Remove IfTrue1 + if (IfTrue1 != FirstEntryBlock) { + IfTrue1->dropAllReferences(); + IfTrue1->eraseFromParent(); + } + + // Remove IfFalse1 + if (IfFalse1 != FirstEntryBlock) { + IfFalse1->dropAllReferences(); + IfFalse1->eraseFromParent(); + } + + // Remove \param SecondEntryBlock + SecondEntryBlock->dropAllReferences(); + SecondEntryBlock->eraseFromParent(); + DEBUG(dbgs() << "If conditions merged into:\n" << *FirstEntryBlock); + return true; +} + +bool FlattenCFGOpt::run(BasicBlock *BB) { + bool Changed = false; + assert(BB && BB->getParent() && "Block not embedded in function!"); + assert(BB->getTerminator() && "Degenerate basic block encountered!"); + + IRBuilder<> Builder(BB); + + if (FlattenParallelAndOr(BB, Builder)) + return true; + + if (MergeIfRegion(BB, Builder)) + return true; + + return Changed; +} + +/// FlattenCFG - This function is used to flatten a CFG. For +/// example, it uses parallel-and and parallel-or mode to collapse +// if-conditions and merge if-regions with identical statements. +/// +bool llvm::FlattenCFG(BasicBlock *BB, AliasAnalysis *AA) { + return FlattenCFGOpt(AA).run(BB); +} diff --git a/lib/Transforms/Utils/SimplifyCFG.cpp b/lib/Transforms/Utils/SimplifyCFG.cpp index d48213f..c4c1423 100644 --- a/lib/Transforms/Utils/SimplifyCFG.cpp +++ b/lib/Transforms/Utils/SimplifyCFG.cpp @@ -19,7 +19,6 @@ #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" -#include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/Analysis/ValueTracking.h" @@ -66,10 +65,6 @@ static cl::opt<bool> HoistCondStores("simplifycfg-hoist-cond-stores", cl::Hidden, cl::init(true), cl::desc("Hoist conditional stores if an unconditional store preceeds")); -static cl::opt<bool> -ParallelAndOr("simplifycfg-parallel-and-or", cl::Hidden, cl::init(true), - cl::desc("Use parallel-and-or mode for branch conditions")); - STATISTIC(NumBitMaps, "Number of switch instructions turned into bitmaps"); STATISTIC(NumLookupTables, "Number of switch instructions turned into lookup tables"); STATISTIC(NumSinkCommons, "Number of common instructions sunk down to the end block"); @@ -95,8 +90,6 @@ namespace { class SimplifyCFGOpt { const TargetTransformInfo &TTI; const DataLayout *const TD; - AliasAnalysis *AA; - Value *isValueEqualityComparison(TerminatorInst *TI); BasicBlock *GetValueEqualityComparisonCases(TerminatorInst *TI, std::vector<ValueEqualityComparisonCase> &Cases); @@ -113,25 +106,10 @@ class SimplifyCFGOpt { bool SimplifyIndirectBr(IndirectBrInst *IBI); bool SimplifyUncondBranch(BranchInst *BI, IRBuilder <> &Builder); bool SimplifyCondBranch(BranchInst *BI, IRBuilder <>&Builder); - /// \brief Use parallel-and or parallel-or to generate conditions for - /// conditional branches. - bool SimplifyParallelAndOr(BasicBlock *BB, IRBuilder<> &Builder, Pass *P = 0); - /// \brief If \param BB is the merge block of an if-region, attempt to merge - /// the if-region with an adjacent if-region upstream if two if-regions - /// contain identical instructions. - bool MergeIfRegion(BasicBlock *BB, IRBuilder<> &Builder, Pass *P = 0); - /// \brief Compare a pair of blocks: \p Block1 and \p Block2, which - /// are from two if-regions whose entry blocks are \p Head1 and \p - /// Head2. \returns true if \p Block1 and \p Block2 contain identical - /// instructions, and have no memory reference alias with \p Head2. - /// This is used as a legality check for merging if-regions. - bool CompareIfRegionBlock(BasicBlock *Head1, BasicBlock *Head2, - BasicBlock *Block1, BasicBlock *Block2); public: - SimplifyCFGOpt(const TargetTransformInfo &TTI, const DataLayout *TD, - AliasAnalysis *AA) - : TTI(TTI), TD(TD), AA(AA) {} + SimplifyCFGOpt(const TargetTransformInfo &TTI, const DataLayout *TD) + : TTI(TTI), TD(TD) {} bool run(BasicBlock *BB); }; } @@ -217,108 +195,6 @@ static void AddPredecessorToBlock(BasicBlock *Succ, BasicBlock *NewPred, PN->addIncoming(PN->getIncomingValueForBlock(ExistPred), NewPred); } - -/// GetIfCondition - Given a basic block (BB) with two predecessors, -/// check to see if the merge at this block is due -/// to an "if condition". If so, return the boolean condition that determines -/// which entry into BB will be taken. Also, return by references the block -/// that will be entered from if the condition is true, and the block that will -/// be entered if the condition is false. -/// -/// This does no checking to see if the true/false blocks have large or unsavory -/// instructions in them. -static Value *GetIfCondition(BasicBlock *BB, BasicBlock *&IfTrue, - BasicBlock *&IfFalse) { - PHINode *SomePHI = dyn_cast<PHINode>(BB->begin()); - BasicBlock *Pred1 = NULL; - BasicBlock *Pred2 = NULL; - - if (SomePHI) { - if (SomePHI->getNumIncomingValues() != 2) - return NULL; - Pred1 = SomePHI->getIncomingBlock(0); - Pred2 = SomePHI->getIncomingBlock(1); - } else { - pred_iterator PI = pred_begin(BB), PE = pred_end(BB); - if (PI == PE) // No predecessor - return NULL; - Pred1 = *PI++; - if (PI == PE) // Only one predecessor - return NULL; - Pred2 = *PI++; - if (PI != PE) // More than two predecessors - return NULL; - } - - // We can only handle branches. Other control flow will be lowered to - // branches if possible anyway. - BranchInst *Pred1Br = dyn_cast<BranchInst>(Pred1->getTerminator()); - BranchInst *Pred2Br = dyn_cast<BranchInst>(Pred2->getTerminator()); - if (Pred1Br == 0 || Pred2Br == 0) - return 0; - - // Eliminate code duplication by ensuring that Pred1Br is conditional if - // either are. - if (Pred2Br->isConditional()) { - // If both branches are conditional, we don't have an "if statement". In - // reality, we could transform this case, but since the condition will be - // required anyway, we stand no chance of eliminating it, so the xform is - // probably not profitable. - if (Pred1Br->isConditional()) - return 0; - - std::swap(Pred1, Pred2); - std::swap(Pred1Br, Pred2Br); - } - - if (Pred1Br->isConditional()) { - // The only thing we have to watch out for here is to make sure that Pred2 - // doesn't have incoming edges from other blocks. If it does, the condition - // doesn't dominate BB. - if (Pred2->getSinglePredecessor() == 0) - return 0; - - // If we found a conditional branch predecessor, make sure that it branches - // to BB and Pred2Br. If it doesn't, this isn't an "if statement". - if (Pred1Br->getSuccessor(0) == BB && - Pred1Br->getSuccessor(1) == Pred2) { - IfTrue = Pred1; - IfFalse = Pred2; - } else if (Pred1Br->getSuccessor(0) == Pred2 && - Pred1Br->getSuccessor(1) == BB) { - IfTrue = Pred2; - IfFalse = Pred1; - } else { - // We know that one arm of the conditional goes to BB, so the other must - // go somewhere unrelated, and this must not be an "if statement". - return 0; - } - - return Pred1Br->getCondition(); - } - - // Ok, if we got here, both predecessors end with an unconditional branch to - // BB. Don't panic! If both blocks only have a single (identical) - // predecessor, and THAT is a conditional branch, then we're all ok! - BasicBlock *CommonPred = Pred1->getSinglePredecessor(); - if (CommonPred == 0 || CommonPred != Pred2->getSinglePredecessor()) - return 0; - - // Otherwise, if this is a conditional branch, then we can use it! - BranchInst *BI = dyn_cast<BranchInst>(CommonPred->getTerminator()); - if (BI == 0) return 0; - - assert(BI->isConditional() && "Two successors but not conditional?"); - if (BI->getSuccessor(0) == Pred1) { - IfTrue = Pred1; - IfFalse = Pred2; - } else { - IfTrue = Pred2; - IfFalse = Pred1; - } - return BI->getCondition(); -} - /// ComputeSpeculationCost - Compute an abstract "cost" of speculating the /// given instruction, which is assumed to be safe to speculate. 1 means /// cheap, 2 means less cheap, and UINT_MAX means prohibitively expensive. @@ -4102,386 +3978,6 @@ bool SimplifyCFGOpt::SimplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) { return false; } -/// If \param [in] BB has more than one predecessor that is a conditional -/// branch, attempt to use parallel and/or for the branch condition. \returns -/// true on success. -/// -/// Before: -/// ...... -/// %cmp10 = fcmp une float %tmp1, %tmp2 -/// br i1 %cmp1, label %if.then, label %lor.rhs -/// -/// lor.rhs: -/// ...... -/// %cmp11 = fcmp une float %tmp3, %tmp4 -/// br i1 %cmp11, label %if.then, label %ifend -/// -/// if.end: // the merge block -/// ...... -/// -/// if.then: // has two predecessors, both of them contains conditional branch. -/// ...... -/// br label %if.end; -/// -/// After: -/// ...... -/// %cmp10 = fcmp une float %tmp1, %tmp2 -/// ...... -/// %cmp11 = fcmp une float %tmp3, %tmp4 -/// %cmp12 = or i1 %cmp10, %cmp11 // parallel-or mode. -/// br i1 %cmp12, label %if.then, label %ifend -/// -/// if.end: -/// ...... -/// -/// if.then: -/// ...... -/// br label %if.end; -/// -/// Current implementation handles two cases. -/// Case 1: \param BB is on the else-path. -/// -/// BB1 -/// / | -/// BB2 | -/// / \ | -/// BB3 \ | where, BB1, BB2 contain conditional branches. -/// \ | / BB3 contains unconditional branch. -/// \ | / BB4 corresponds to \param BB which is also the merge. -/// BB => BB4 -/// -/// -/// Corresponding source code: -/// -/// if (a == b && c == d) -/// statement; // BB3 -/// -/// Case 2: \param BB BB is on the then-path. -/// -/// BB1 -/// / | -/// | BB2 -/// \ / | where BB1, BB2 contain conditional branches. -/// BB => BB3 | BB3 contains unconditiona branch and corresponds -/// \ / to \param BB. BB4 is the merge. -/// BB4 -/// -/// Corresponding source code: -/// -/// if (a == b || c == d) -/// statement; // BB3 -/// -/// In both cases, \param BB is the common successor of conditional branches. -/// In Case 1, \param BB (BB4) has an unconditional branch (BB3) as -/// its predecessor. In Case 2, \param BB (BB3) only has conditional branches -/// as its predecessors. -/// -bool SimplifyCFGOpt::SimplifyParallelAndOr(BasicBlock *BB, IRBuilder<> &Builder, - Pass *P) { - PHINode *PHI = dyn_cast<PHINode>(BB->begin()); - if (PHI) - return false; // For simplicity, avoid cases containing PHI nodes. - - BasicBlock *LastCondBlock = NULL; - BasicBlock *FirstCondBlock = NULL; - BasicBlock *UnCondBlock = NULL; - int Idx = -1; - - // Check predecessors of \param BB. - SmallPtrSet<BasicBlock*, 16> Preds(pred_begin(BB), pred_end(BB)); - for (SmallPtrSetIterator<BasicBlock*> PI = Preds.begin(), PE = Preds.end(); - PI != PE; ++PI) { - BasicBlock *Pred = *PI; - BranchInst *PBI = dyn_cast<BranchInst>(Pred->getTerminator()); - - // All predecessors should terminate with a branch. - if (!PBI) - return false; - - BasicBlock *PP = Pred->getSinglePredecessor(); - - if (PBI->isUnconditional()) { - // Case 1: Pred (BB3) is an unconditional block, it should - // have a single predecessor (BB2) that is also a predecessor - // of \param BB (BB4) and should not have address-taken. - // There should exist only one such unconditional - // branch among the predecessors. - if (UnCondBlock || !PP || (Preds.count(PP) == 0) || - Pred->hasAddressTaken()) - return false; - - UnCondBlock = Pred; - continue; - } - - // Only conditional branches are allowed beyond this point. - assert(PBI->isConditional()); - - // Condition's unique use should be the branch instruction. - Value *PC = PBI->getCondition(); - if (!PC || !PC->hasOneUse()) - return false; - - if (PP && Preds.count(PP)) { - // These are internal condition blocks to be merged from, e.g., - // BB2 in both cases. - // Should not be address-taken. - if (Pred->hasAddressTaken()) - return false; - - // Instructions in the internal condition blocks should be safe - // to hoist up. - for (BasicBlock::iterator BI = Pred->begin(), BE = PBI; BI != BE;) { - Instruction *CI = BI++; - if (isa<PHINode>(CI) || - !isSafeToSpeculativelyExecute(CI)) - return false; - } - } else { - // This is the condition block to be merged into, e.g. BB1 in - // both cases. - if (FirstCondBlock) - return false; - FirstCondBlock = Pred; - } - - // Find whether BB is uniformly on the true (or false) path - // for all of its predecessors. - BasicBlock *PS1 = PBI->getSuccessor(0); - BasicBlock *PS2 = PBI->getSuccessor(1); - BasicBlock *PS = (PS1 == BB) ? PS2 : PS1; - int CIdx = (PS1 == BB) ? 0 : 1; - - if (Idx == -1) - Idx = CIdx; - else if (CIdx != Idx) - return false; - - // PS is the successor which is not BB. Check successors to identify - // the last conditional branch. - if (Preds.count(PS) == 0) { - // Case 2. - // BB must have an unique successor. - TerminatorInst *TBB = BB->getTerminator(); - if (TBB->getNumSuccessors() != 1) - return false; - - BasicBlock *SBB = TBB->getSuccessor(0); - PHI = dyn_cast<PHINode>(SBB->begin()); - if (PHI) - return false; - - // PS (BB4) should be BB's successor. - if (SBB != PS) - return false; - LastCondBlock = Pred; - } else { - BranchInst *BPS = dyn_cast<BranchInst>(PS->getTerminator()); - if (BPS && BPS->isUnconditional()) { - // Case 1: PS(BB3) should be an unconditional branch. - LastCondBlock = Pred; - } - } - } - - if (!FirstCondBlock || !LastCondBlock || (FirstCondBlock == LastCondBlock)) - return false; - - // Do the transformation. - BasicBlock *CB; - bool Iteration = true; - BasicBlock *SaveInsertBB = Builder.GetInsertBlock(); - BasicBlock::iterator SaveInsertPt = Builder.GetInsertPoint(); - BranchInst *PBI = dyn_cast<BranchInst>(FirstCondBlock->getTerminator()); - Value *PC = PBI->getCondition(); - do { - CB = PBI->getSuccessor(1 - Idx); - // Delete the conditional branch. - FirstCondBlock->getInstList().pop_back(); - FirstCondBlock->getInstList().splice(FirstCondBlock->end(), CB->getInstList()); - PBI = cast<BranchInst>(FirstCondBlock->getTerminator()); - Value *CC = PBI->getCondition(); - // Merge conditions. - Builder.SetInsertPoint(PBI); - Value *NC; - if (Idx == 0) - // Case 2, use parallel or. - NC = Builder.CreateOr(PC, CC); - else - // Case 1, use parallel and. - NC = Builder.CreateAnd(PC, CC); - - PBI->replaceUsesOfWith(CC, NC); - PC = NC; - if (CB == LastCondBlock) - Iteration = false; - // Remove internal conditional branches. - CB->dropAllReferences(); - // make CB unreachable and let downstream to delete the block. - new UnreachableInst(CB->getContext(), CB); - } while (Iteration); - if (SaveInsertBB) - Builder.SetInsertPoint(SaveInsertBB, SaveInsertPt); - DEBUG(dbgs() << "Use parallel and/or in:\n" << *FirstCondBlock); - return true; -} - -/// Compare blocks from two if-regions, where \param Head1 is the entry of the -/// 1st if-region. \param Head2 is the entry of the 2nd if-region. \param -/// Block1 is a block in the 1st if-region to compare. \param Block2 is a block -// in the 2nd if-region to compare. \returns true if \param Block1 and \param -/// Block2 have identical instructions and do not have memory reference alias -/// with \param Head2. -/// -bool SimplifyCFGOpt::CompareIfRegionBlock(BasicBlock *Head1, BasicBlock *Head2, - BasicBlock *Block1, BasicBlock *Block2) { - TerminatorInst *PTI2 = Head2->getTerminator(); - Instruction *PBI2 = Head2->begin(); - - bool eq1 = (Block1 == Head1); - bool eq2 = (Block2 == Head2); - if (eq1 || eq2) { - // An empty then-path or else-path. - return (eq1 == eq2); - } - - // Check whether instructions in Block1 and Block2 are identical - // and do not alias with instructions in Head2. - BasicBlock::iterator iter1 = Block1->begin(); - BasicBlock::iterator end1 = Block1->getTerminator(); - BasicBlock::iterator iter2 = Block2->begin(); - BasicBlock::iterator end2 = Block2->getTerminator(); - - while (1) { - if (iter1 == end1) { - if (iter2 != end2) - return false; - break; - } - - if (!iter1->isIdenticalTo(iter2)) - return false; - - // Illegal to remove instructions with side effects except - // non-volatile stores. - if (iter1->mayHaveSideEffects()) { - Instruction *CurI = &*iter1; - StoreInst *SI = dyn_cast<StoreInst>(CurI); - if (!SI || SI->isVolatile()) - return false; - } - - // For simplicity and speed, data dependency check can be - // avoided if read from memory doesn't exist. - if (iter1->mayReadFromMemory()) - return false; - - if (iter1->mayWriteToMemory()) { - for (BasicBlock::iterator BI = PBI2, BE = PTI2; BI != BE; ++BI) { - if (BI->mayReadFromMemory() || BI->mayWriteToMemory()) { - // Check alias with Head2. - if (!AA || AA->alias(iter1, BI)) - return false; - } - } - } - ++iter1; - ++iter2; - } - - return true; -} - -/// Check whether \param BB is the merge block of a if-region. If yes, check -/// whether there exists an adjacent if-region upstream, the two if-regions -/// contain identical instuctions and can be legally merged. \returns true if -/// the two if-regions are merged. -/// -/// From: -/// if (a) -/// statement; -/// if (b) -/// statement; -/// -/// To: -/// if (a || b) -/// statement; -/// -bool SimplifyCFGOpt::MergeIfRegion(BasicBlock *BB, IRBuilder<> &Builder, - Pass *P) { - BasicBlock *IfTrue2, *IfFalse2; - Value *IfCond2 = GetIfCondition(BB, IfTrue2, IfFalse2); - Instruction *CInst2 = dyn_cast_or_null<Instruction>(IfCond2); - if (!CInst2) - return false; - - BasicBlock *SecondEntryBlock = CInst2->getParent(); - if (SecondEntryBlock->hasAddressTaken()) - return false; - - BasicBlock *IfTrue1, *IfFalse1; - Value *IfCond1 = GetIfCondition(SecondEntryBlock, IfTrue1, IfFalse1); - Instruction *CInst1 = dyn_cast_or_null<Instruction>(IfCond1); - if (!CInst1) - return false; - - BasicBlock *FirstEntryBlock = CInst1->getParent(); - - // Either then-path or else-path should be empty. - if ((IfTrue1 != FirstEntryBlock) && (IfFalse1 != FirstEntryBlock)) - return false; - if ((IfTrue2 != SecondEntryBlock) && (IfFalse2 != SecondEntryBlock)) - return false; - - TerminatorInst *PTI2 = SecondEntryBlock->getTerminator(); - Instruction *PBI2 = SecondEntryBlock->begin(); - - if (!CompareIfRegionBlock(FirstEntryBlock, SecondEntryBlock, IfTrue1, IfTrue2)) - return false; - - if (!CompareIfRegionBlock(FirstEntryBlock, SecondEntryBlock, IfFalse1, IfFalse2)) - return false; - - // Check whether \param SecondEntryBlock has side-effect and is safe to speculate. - for (BasicBlock::iterator BI = PBI2, BE = PTI2; BI != BE; ++BI) { - Instruction *CI = BI; - if (isa<PHINode>(CI) || CI->mayHaveSideEffects() || - !isSafeToSpeculativelyExecute(CI)) - return false; - } - - // Merge \param SecondEntryBlock into \param FirstEntryBlock. - FirstEntryBlock->getInstList().pop_back(); - FirstEntryBlock->getInstList().splice(FirstEntryBlock->end(), SecondEntryBlock->getInstList()); - BranchInst *PBI = dyn_cast<BranchInst>(FirstEntryBlock->getTerminator()); - Value *CC = PBI->getCondition(); - BasicBlock *SaveInsertBB = Builder.GetInsertBlock(); - BasicBlock::iterator SaveInsertPt = Builder.GetInsertPoint(); - Builder.SetInsertPoint(PBI); - Value *NC = Builder.CreateOr(CInst1, CC); - PBI->replaceUsesOfWith(CC, NC); - if (SaveInsertBB) - Builder.SetInsertPoint(SaveInsertBB, SaveInsertPt); - - // Remove IfTrue1 - if (IfTrue1 != FirstEntryBlock) { - IfTrue1->dropAllReferences(); - IfTrue1->eraseFromParent(); - } - - // Remove IfFalse1 - if (IfFalse1 != FirstEntryBlock) { - IfFalse1->dropAllReferences(); - IfFalse1->eraseFromParent(); - } - - // Remove \param SecondEntryBlock - SecondEntryBlock->dropAllReferences(); - SecondEntryBlock->eraseFromParent(); - DEBUG(dbgs() << "If conditions merged into:\n" << *FirstEntryBlock); - return true; -} - /// Check if passing a value to an instruction will cause undefined behavior. static bool passingValueIsAlwaysUndefined(Value *V, Instruction *I) { Constant *C = dyn_cast<Constant>(V); @@ -4584,11 +4080,6 @@ bool SimplifyCFGOpt::run(BasicBlock *BB) { return true; IRBuilder<> Builder(BB); - // Whether to optimize conditional branches. - bool OptCB = (ParallelAndOr && AA && TTI.hasBranchDivergence()); - - if (OptCB && SimplifyParallelAndOr(BB, Builder)) - return true; // If there is a trivial two-entry PHI node in this basic block, and we can // eliminate it, do so now. @@ -4617,9 +4108,6 @@ bool SimplifyCFGOpt::run(BasicBlock *BB) { if (SimplifyIndirectBr(IBI)) return true; } - if (OptCB && MergeIfRegion(BB, Builder)) - return true; - return Changed; } @@ -4629,6 +4117,6 @@ bool SimplifyCFGOpt::run(BasicBlock *BB) { /// of the CFG. It returns true if a modification was made. /// bool llvm::SimplifyCFG(BasicBlock *BB, const TargetTransformInfo &TTI, - const DataLayout *TD, AliasAnalysis *AA) { - return SimplifyCFGOpt(TTI, TD, AA).run(BB); + const DataLayout *TD) { + return SimplifyCFGOpt(TTI, TD).run(BB); } diff --git a/test/CodeGen/R600/parallelandifcollapse.ll b/test/CodeGen/R600/parallelandifcollapse.ll new file mode 100644 index 0000000..4afaf68 --- /dev/null +++ b/test/CodeGen/R600/parallelandifcollapse.ll @@ -0,0 +1,54 @@ +; Function Attrs: nounwind +; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s +; +; CFG flattening should use parallel-and mode to generate branch conditions and +; then merge if-regions with the same bodies. +; +; CHECK: AND_INT +; CHECK-NEXT: AND_INT +; CHECK-NEXT: OR_INT +define void @_Z9chk1D_512v() #0 { +entry: + %a0 = alloca i32, align 4 + %b0 = alloca i32, align 4 + %c0 = alloca i32, align 4 + %d0 = alloca i32, align 4 + %a1 = alloca i32, align 4 + %b1 = alloca i32, align 4 + %c1 = alloca i32, align 4 + %d1 = alloca i32, align 4 + %data = alloca i32, align 4 + %0 = load i32* %a0, align 4 + %1 = load i32* %b0, align 4 + %cmp = icmp ne i32 %0, %1 + br i1 %cmp, label %land.lhs.true, label %if.end + +land.lhs.true: ; preds = %entry + %2 = load i32* %c0, align 4 + %3 = load i32* %d0, align 4 + %cmp1 = icmp ne i32 %2, %3 + br i1 %cmp1, label %if.then, label %if.end + +if.then: ; preds = %land.lhs.true + store i32 1, i32* %data, align 4 + br label %if.end + +if.end: ; preds = %if.then, %land.lhs.true, %entry + %4 = load i32* %a1, align 4 + %5 = load i32* %b1, align 4 + %cmp2 = icmp ne i32 %4, %5 + br i1 %cmp2, label %land.lhs.true3, label %if.end6 + +land.lhs.true3: ; preds = %if.end + %6 = load i32* %c1, align 4 + %7 = load i32* %d1, align 4 + %cmp4 = icmp ne i32 %6, %7 + br i1 %cmp4, label %if.then5, label %if.end6 + +if.then5: ; preds = %land.lhs.true3 + store i32 1, i32* %data, align 4 + br label %if.end6 + +if.end6: ; preds = %if.then5, %land.lhs.true3, %if.end + ret void +} diff --git a/test/CodeGen/R600/parallelorifcollapse.ll b/test/CodeGen/R600/parallelorifcollapse.ll new file mode 100644 index 0000000..b0db7cd --- /dev/null +++ b/test/CodeGen/R600/parallelorifcollapse.ll @@ -0,0 +1,61 @@ +; Function Attrs: nounwind +; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s +; +; CFG flattening should use parallel-or to generate branch conditions and +; then merge if-regions with the same bodies. +; +; CHECK: OR_INT +; CHECK-NEXT: OR_INT +; CHECK-NEXT: OR_INT +define void @_Z9chk1D_512v() #0 { +entry: + %a0 = alloca i32, align 4 + %b0 = alloca i32, align 4 + %c0 = alloca i32, align 4 + %d0 = alloca i32, align 4 + %a1 = alloca i32, align 4 + %b1 = alloca i32, align 4 + %c1 = alloca i32, align 4 + %d1 = alloca i32, align 4 + %data = alloca i32, align 4 + %0 = load i32* %a0, align 4 + %1 = load i32* %b0, align 4 + %cmp = icmp ne i32 %0, %1 + br i1 %cmp, label %land.lhs.true, label %if.else + +land.lhs.true: ; preds = %entry + %2 = load i32* %c0, align 4 + %3 = load i32* %d0, align 4 + %cmp1 = icmp ne i32 %2, %3 + br i1 %cmp1, label %if.then, label %if.else + +if.then: ; preds = %land.lhs.true + br label %if.end + +if.else: ; preds = %land.lhs.true, %entry + store i32 1, i32* %data, align 4 + br label %if.end + +if.end: ; preds = %if.else, %if.then + %4 = load i32* %a1, align 4 + %5 = load i32* %b1, align 4 + %cmp2 = icmp ne i32 %4, %5 + br i1 %cmp2, label %land.lhs.true3, label %if.else6 + +land.lhs.true3: ; preds = %if.end + %6 = load i32* %c1, align 4 + %7 = load i32* %d1, align 4 + %cmp4 = icmp ne i32 %6, %7 + br i1 %cmp4, label %if.then5, label %if.else6 + +if.then5: ; preds = %land.lhs.true3 + br label %if.end7 + +if.else6: ; preds = %land.lhs.true3, %if.end + store i32 1, i32* %data, align 4 + br label %if.end7 + +if.end7: ; preds = %if.else6, %if.then5 + ret void +} + diff --git a/test/Transforms/SimplifyCFG/R600/lit.local.cfg b/test/Transforms/SimplifyCFG/R600/lit.local.cfg index 4f6e579..e69de29 100644 --- a/test/Transforms/SimplifyCFG/R600/lit.local.cfg +++ b/test/Transforms/SimplifyCFG/R600/lit.local.cfg @@ -1,6 +0,0 @@ -config.suffixes = ['.ll', '.c', '.cpp'] - -targets = set(config.root.targets_to_build.split()) -if not 'R600' in targets: - config.unsupported = True - diff --git a/test/Transforms/SimplifyCFG/R600/parallelandifcollapse.ll b/test/Transforms/SimplifyCFG/R600/parallelandifcollapse.ll index 053921c..e69de29 100644 --- a/test/Transforms/SimplifyCFG/R600/parallelandifcollapse.ll +++ b/test/Transforms/SimplifyCFG/R600/parallelandifcollapse.ll @@ -1,63 +0,0 @@ -; Function Attrs: nounwind -; RUN: opt < %s -mtriple=r600-unknown-linux-gnu -optimizecfg -basicaa -S | FileCheck %s -; -; CFG optimization should use parallel-and mode to generate branch conditions and -; then merge if-regions with the same bodies, which should result in 2 branches. -; To see the assembly output without this transformation, remove -basicaa option. -; -; CHECK: or i1 -; CHECK-NEXT: br -; CHECK: br -; CHECK: ret -define void @_Z9chk1D_512v() #0 { -entry: - %a0 = alloca i32, align 4 - %b0 = alloca i32, align 4 - %c0 = alloca i32, align 4 - %d0 = alloca i32, align 4 - %a1 = alloca i32, align 4 - %b1 = alloca i32, align 4 - %c1 = alloca i32, align 4 - %d1 = alloca i32, align 4 - %data = alloca i32, align 4 - %0 = load i32* %a0, align 4 - %1 = load i32* %b0, align 4 - %cmp = icmp ne i32 %0, %1 - br i1 %cmp, label %land.lhs.true, label %if.else - -land.lhs.true: ; preds = %entry - %2 = load i32* %c0, align 4 - %3 = load i32* %d0, align 4 - %cmp1 = icmp ne i32 %2, %3 - br i1 %cmp1, label %if.then, label %if.else - -if.then: ; preds = %land.lhs.true - br label %if.end - -if.else: ; preds = %land.lhs.true, %entry - store i32 1, i32* %data, align 4 - br label %if.end - -if.end: ; preds = %if.else, %if.then - %4 = load i32* %a1, align 4 - %5 = load i32* %b1, align 4 - %cmp2 = icmp ne i32 %4, %5 - br i1 %cmp2, label %land.lhs.true3, label %if.else6 - -land.lhs.true3: ; preds = %if.end - %6 = load i32* %c1, align 4 - %7 = load i32* %d1, align 4 - %cmp4 = icmp ne i32 %6, %7 - br i1 %cmp4, label %if.then5, label %if.else6 - -if.then5: ; preds = %land.lhs.true3 - br label %if.end7 - -if.else6: ; preds = %land.lhs.true3, %if.end - store i32 1, i32* %data, align 4 - br label %if.end7 - -if.end7: ; preds = %if.else6, %if.then5 - ret void -} - diff --git a/test/Transforms/SimplifyCFG/R600/parallelorifcollapse.ll b/test/Transforms/SimplifyCFG/R600/parallelorifcollapse.ll index e1bb5fc..e69de29 100644 --- a/test/Transforms/SimplifyCFG/R600/parallelorifcollapse.ll +++ b/test/Transforms/SimplifyCFG/R600/parallelorifcollapse.ll @@ -1,56 +0,0 @@ -; Function Attrs: nounwind -; RUN: opt < %s -mtriple=r600-unknown-linux-gnu -optimizecfg -basicaa -S | FileCheck %s -; -; CFG optimization should use parallel-or mode to generate branch conditions and -; then merge if-regions with the same bodies, which should result in 2 branches. -; To see the assembly output without this transformation, remove -basicaa option. -; -; CHECK: or i1 -; CHECK-NEXT: br -; CHECK: br -; CHECK: ret -define void @_Z9chk1D_512v() #0 { -entry: - %a0 = alloca i32, align 4 - %b0 = alloca i32, align 4 - %c0 = alloca i32, align 4 - %d0 = alloca i32, align 4 - %a1 = alloca i32, align 4 - %b1 = alloca i32, align 4 - %c1 = alloca i32, align 4 - %d1 = alloca i32, align 4 - %data = alloca i32, align 4 - %0 = load i32* %a0, align 4 - %1 = load i32* %b0, align 4 - %cmp = icmp ne i32 %0, %1 - br i1 %cmp, label %land.lhs.true, label %if.end - -land.lhs.true: ; preds = %entry - %2 = load i32* %c0, align 4 - %3 = load i32* %d0, align 4 - %cmp1 = icmp ne i32 %2, %3 - br i1 %cmp1, label %if.then, label %if.end - -if.then: ; preds = %land.lhs.true - store i32 1, i32* %data, align 4 - br label %if.end - -if.end: ; preds = %if.then, %land.lhs.true, %entry - %4 = load i32* %a1, align 4 - %5 = load i32* %b1, align 4 - %cmp2 = icmp ne i32 %4, %5 - br i1 %cmp2, label %land.lhs.true3, label %if.end6 - -land.lhs.true3: ; preds = %if.end - %6 = load i32* %c1, align 4 - %7 = load i32* %d1, align 4 - %cmp4 = icmp ne i32 %6, %7 - br i1 %cmp4, label %if.then5, label %if.end6 - -if.then5: ; preds = %land.lhs.true3 - store i32 1, i32* %data, align 4 - br label %if.end6 - -if.end6: ; preds = %if.then5, %land.lhs.true3, %if.end - ret void -} diff --git a/test/Transforms/SimplifyCFG/lit.local.cfg b/test/Transforms/SimplifyCFG/lit.local.cfg index 19eebc0..e69de29 100644 --- a/test/Transforms/SimplifyCFG/lit.local.cfg +++ b/test/Transforms/SimplifyCFG/lit.local.cfg @@ -1 +0,0 @@ -config.suffixes = ['.ll', '.c', '.cpp'] diff --git a/tools/lto/LTOCodeGenerator.cpp b/tools/lto/LTOCodeGenerator.cpp index 776a5b9..6139ade 100644 --- a/tools/lto/LTOCodeGenerator.cpp +++ b/tools/lto/LTOCodeGenerator.cpp @@ -118,7 +118,7 @@ void LTOCodeGenerator::initializeLTOPasses() { initializeGVNPass(R); initializeMemCpyOptPass(R); initializeDCEPass(R); - initializeCFGCanonicalizePass(R); + initializeCFGSimplifyPassPass(R); } bool LTOCodeGenerator::addModule(LTOModule* mod, std::string& errMsg) { |