From 8e0d1c03ca7fd86e6879b4e37d0d7f0e982feef6 Mon Sep 17 00:00:00 2001 From: Benjamin Kramer Date: Wed, 29 Aug 2012 15:32:21 +0000 Subject: Make MemoryBuiltins aware of TargetLibraryInfo. This disables malloc-specific optimization when -fno-builtin (or -ffreestanding) is specified. This has been a problem for a long time but became more severe with the recent memory builtin improvements. Since the memory builtin functions are used everywhere, this required passing TLI in many places. This means that functions that now have an optional TLI argument, like RecursivelyDeleteTriviallyDeadFunctions, won't remove dead mallocs anymore if the TLI argument is missing. I've updated most passes to do the right thing. Fixes PR13694 and probably others. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@162841 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Transforms/Utils/BasicBlockUtils.cpp | 4 +-- lib/Transforms/Utils/Local.cpp | 36 ++++++++++++++++----------- lib/Transforms/Utils/SimplifyInstructions.cpp | 2 +- 3 files changed, 24 insertions(+), 18 deletions(-) (limited to 'lib/Transforms/Utils') diff --git a/lib/Transforms/Utils/BasicBlockUtils.cpp b/lib/Transforms/Utils/BasicBlockUtils.cpp index 2679b93..75a7817 100644 --- a/lib/Transforms/Utils/BasicBlockUtils.cpp +++ b/lib/Transforms/Utils/BasicBlockUtils.cpp @@ -94,7 +94,7 @@ void llvm::FoldSingleEntryPHINodes(BasicBlock *BB, Pass *P) { /// is dead. Also recursively delete any operands that become dead as /// a result. This includes tracing the def-use list from the PHI to see if /// it is ultimately unused or if it reaches an unused cycle. -bool llvm::DeleteDeadPHIs(BasicBlock *BB) { +bool llvm::DeleteDeadPHIs(BasicBlock *BB, const TargetLibraryInfo *TLI) { // Recursively deleting a PHI may cause multiple PHIs to be deleted // or RAUW'd undef, so use an array of WeakVH for the PHIs to delete. SmallVector PHIs; @@ -105,7 +105,7 @@ bool llvm::DeleteDeadPHIs(BasicBlock *BB) { bool Changed = false; for (unsigned i = 0, e = PHIs.size(); i != e; ++i) if (PHINode *PN = dyn_cast_or_null(PHIs[i].operator Value*())) - Changed |= RecursivelyDeleteDeadPHINode(PN); + Changed |= RecursivelyDeleteDeadPHINode(PN, TLI); return Changed; } diff --git a/lib/Transforms/Utils/Local.cpp b/lib/Transforms/Utils/Local.cpp index bed7d72..0601433 100644 --- a/lib/Transforms/Utils/Local.cpp +++ b/lib/Transforms/Utils/Local.cpp @@ -52,7 +52,8 @@ using namespace llvm; /// Also calls RecursivelyDeleteTriviallyDeadInstructions() on any branch/switch /// conditions and indirectbr addresses this might make dead if /// DeleteDeadConditions is true. -bool llvm::ConstantFoldTerminator(BasicBlock *BB, bool DeleteDeadConditions) { +bool llvm::ConstantFoldTerminator(BasicBlock *BB, bool DeleteDeadConditions, + const TargetLibraryInfo *TLI) { TerminatorInst *T = BB->getTerminator(); IRBuilder<> Builder(T); @@ -96,7 +97,7 @@ bool llvm::ConstantFoldTerminator(BasicBlock *BB, bool DeleteDeadConditions) { Value *Cond = BI->getCondition(); BI->eraseFromParent(); if (DeleteDeadConditions) - RecursivelyDeleteTriviallyDeadInstructions(Cond); + RecursivelyDeleteTriviallyDeadInstructions(Cond, TLI); return true; } return false; @@ -161,7 +162,7 @@ bool llvm::ConstantFoldTerminator(BasicBlock *BB, bool DeleteDeadConditions) { Value *Cond = SI->getCondition(); SI->eraseFromParent(); if (DeleteDeadConditions) - RecursivelyDeleteTriviallyDeadInstructions(Cond); + RecursivelyDeleteTriviallyDeadInstructions(Cond, TLI); return true; } @@ -205,7 +206,7 @@ bool llvm::ConstantFoldTerminator(BasicBlock *BB, bool DeleteDeadConditions) { Value *Address = IBI->getAddress(); IBI->eraseFromParent(); if (DeleteDeadConditions) - RecursivelyDeleteTriviallyDeadInstructions(Address); + RecursivelyDeleteTriviallyDeadInstructions(Address, TLI); // If we didn't find our destination in the IBI successor list, then we // have undefined behavior. Replace the unconditional branch with an @@ -230,7 +231,8 @@ bool llvm::ConstantFoldTerminator(BasicBlock *BB, bool DeleteDeadConditions) { /// isInstructionTriviallyDead - Return true if the result produced by the /// instruction is not used, and the instruction has no side effects. /// -bool llvm::isInstructionTriviallyDead(Instruction *I) { +bool llvm::isInstructionTriviallyDead(Instruction *I, + const TargetLibraryInfo *TLI) { if (!I->use_empty() || isa(I)) return false; // We don't want the landingpad instruction removed by anything this general. @@ -265,9 +267,9 @@ bool llvm::isInstructionTriviallyDead(Instruction *I) { return isa(II->getArgOperand(1)); } - if (isAllocLikeFn(I)) return true; + if (isAllocLikeFn(I, TLI)) return true; - if (CallInst *CI = isFreeCall(I)) + if (CallInst *CI = isFreeCall(I, TLI)) if (Constant *C = dyn_cast(CI->getArgOperand(0))) return C->isNullValue() || isa(C); @@ -278,9 +280,11 @@ bool llvm::isInstructionTriviallyDead(Instruction *I) { /// trivially dead instruction, delete it. If that makes any of its operands /// trivially dead, delete them too, recursively. Return true if any /// instructions were deleted. -bool llvm::RecursivelyDeleteTriviallyDeadInstructions(Value *V) { +bool +llvm::RecursivelyDeleteTriviallyDeadInstructions(Value *V, + const TargetLibraryInfo *TLI) { Instruction *I = dyn_cast(V); - if (!I || !I->use_empty() || !isInstructionTriviallyDead(I)) + if (!I || !I->use_empty() || !isInstructionTriviallyDead(I, TLI)) return false; SmallVector DeadInsts; @@ -301,7 +305,7 @@ bool llvm::RecursivelyDeleteTriviallyDeadInstructions(Value *V) { // operand, and if it is 'trivially' dead, delete it in a future loop // iteration. if (Instruction *OpI = dyn_cast(OpV)) - if (isInstructionTriviallyDead(OpI)) + if (isInstructionTriviallyDead(OpI, TLI)) DeadInsts.push_back(OpI); } @@ -334,19 +338,20 @@ static bool areAllUsesEqual(Instruction *I) { /// either forms a cycle or is terminated by a trivially dead instruction, /// delete it. If that makes any of its operands trivially dead, delete them /// too, recursively. Return true if a change was made. -bool llvm::RecursivelyDeleteDeadPHINode(PHINode *PN) { +bool llvm::RecursivelyDeleteDeadPHINode(PHINode *PN, + const TargetLibraryInfo *TLI) { SmallPtrSet Visited; for (Instruction *I = PN; areAllUsesEqual(I) && !I->mayHaveSideEffects(); I = cast(*I->use_begin())) { if (I->use_empty()) - return RecursivelyDeleteTriviallyDeadInstructions(I); + return RecursivelyDeleteTriviallyDeadInstructions(I, TLI); // If we find an instruction more than once, we're on a cycle that // won't prove fruitful. if (!Visited.insert(I)) { // Break the cycle and delete the instruction and its operands. I->replaceAllUsesWith(UndefValue::get(I->getType())); - (void)RecursivelyDeleteTriviallyDeadInstructions(I); + (void)RecursivelyDeleteTriviallyDeadInstructions(I, TLI); return true; } } @@ -358,7 +363,8 @@ bool llvm::RecursivelyDeleteDeadPHINode(PHINode *PN) { /// /// This returns true if it changed the code, note that it can delete /// instructions in other blocks as well in this block. -bool llvm::SimplifyInstructionsInBlock(BasicBlock *BB, const TargetData *TD) { +bool llvm::SimplifyInstructionsInBlock(BasicBlock *BB, const TargetData *TD, + const TargetLibraryInfo *TLI) { bool MadeChange = false; #ifndef NDEBUG @@ -381,7 +387,7 @@ bool llvm::SimplifyInstructionsInBlock(BasicBlock *BB, const TargetData *TD) { continue; } - MadeChange |= RecursivelyDeleteTriviallyDeadInstructions(Inst); + MadeChange |= RecursivelyDeleteTriviallyDeadInstructions(Inst, TLI); if (BIHandle != BI) BI = BB->begin(); } diff --git a/lib/Transforms/Utils/SimplifyInstructions.cpp b/lib/Transforms/Utils/SimplifyInstructions.cpp index 81eb9e0..528e6a1 100644 --- a/lib/Transforms/Utils/SimplifyInstructions.cpp +++ b/lib/Transforms/Utils/SimplifyInstructions.cpp @@ -72,7 +72,7 @@ namespace { ++NumSimplified; Changed = true; } - Changed |= RecursivelyDeleteTriviallyDeadInstructions(I); + Changed |= RecursivelyDeleteTriviallyDeadInstructions(I, TLI); } // Place the list of instructions to simplify on the next loop iteration -- cgit v1.1 From 6b01438decfc1e2efa642bb80a546c534675c894 Mon Sep 17 00:00:00 2001 From: Andrew Trick Date: Wed, 29 Aug 2012 21:46:36 +0000 Subject: whitespace git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@162867 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Transforms/Utils/SimplifyCFG.cpp | 336 +++++++++++++++++------------------ 1 file changed, 168 insertions(+), 168 deletions(-) (limited to 'lib/Transforms/Utils') diff --git a/lib/Transforms/Utils/SimplifyCFG.cpp b/lib/Transforms/Utils/SimplifyCFG.cpp index 518df7c..06a61a8 100644 --- a/lib/Transforms/Utils/SimplifyCFG.cpp +++ b/lib/Transforms/Utils/SimplifyCFG.cpp @@ -101,14 +101,14 @@ public: /// static bool SafeToMergeTerminators(TerminatorInst *SI1, TerminatorInst *SI2) { if (SI1 == SI2) return false; // Can't merge with self! - + // It is not safe to merge these two switch instructions if they have a common // successor, and if that successor has a PHI node, and if *that* PHI node has // conflicting incoming values from the two switch blocks. BasicBlock *SI1BB = SI1->getParent(); BasicBlock *SI2BB = SI2->getParent(); SmallPtrSet SI1Succs(succ_begin(SI1BB), succ_end(SI1BB)); - + for (succ_iterator I = succ_begin(SI2BB), E = succ_end(SI2BB); I != E; ++I) if (SI1Succs.count(*I)) for (BasicBlock::iterator BBI = (*I)->begin(); @@ -118,7 +118,7 @@ static bool SafeToMergeTerminators(TerminatorInst *SI1, TerminatorInst *SI2) { PN->getIncomingValueForBlock(SI2BB)) return false; } - + return true; } @@ -135,7 +135,7 @@ static bool isProfitableToFoldUnconditional(BranchInst *SI1, assert(SI1->isUnconditional() && SI2->isConditional()); // We fold the unconditional branch if we can easily update all PHI nodes in - // common successors: + // common successors: // 1> We have a constant incoming value for the conditional branch; // 2> We have "Cond" as the incoming value for the unconditional branch; // 3> SI2->getCondition() and Cond have same operands. @@ -170,7 +170,7 @@ static bool isProfitableToFoldUnconditional(BranchInst *SI1, static void AddPredecessorToBlock(BasicBlock *Succ, BasicBlock *NewPred, BasicBlock *ExistPred) { if (!isa(Succ->begin())) return; // Quick exit if nothing to do - + PHINode *PN; for (BasicBlock::iterator I = Succ->begin(); (PN = dyn_cast(I)); ++I) @@ -222,7 +222,7 @@ static Value *GetIfCondition(BasicBlock *BB, BasicBlock *&IfTrue, // doesn't dominate BB. if (Pred2->getSinglePredecessor() == 0) return 0; - + // If we found a conditional branch predecessor, make sure that it branches // to BB and Pred2Br. If it doesn't, this isn't an "if statement". if (Pred1Br->getSuccessor(0) == BB && @@ -252,7 +252,7 @@ static Value *GetIfCondition(BasicBlock *BB, BasicBlock *&IfTrue, // Otherwise, if this is a conditional branch, then we can use it! BranchInst *BI = dyn_cast(CommonPred->getTerminator()); if (BI == 0) return 0; - + assert(BI->isConditional() && "Two successors but not conditional?"); if (BI->getSuccessor(0) == Pred1) { IfTrue = Pred1; @@ -345,7 +345,7 @@ static bool DominatesMergePoint(Value *V, BasicBlock *BB, // If we aren't allowing aggressive promotion anymore, then don't consider // instructions in the 'if region'. if (AggressiveInsts == 0) return false; - + // If we have seen this instruction before, don't count it again. if (AggressiveInsts->count(I)) return true; @@ -411,7 +411,7 @@ GatherConstantCompares(Value *V, std::vector &Vals, Value *&Extra, const TargetData *TD, bool isEQ, unsigned &UsedICmps) { Instruction *I = dyn_cast(V); if (I == 0) return 0; - + // If this is an icmp against a constant, handle this as one of the cases. if (ICmpInst *ICI = dyn_cast(I)) { if (ConstantInt *C = GetConstantInt(I->getOperand(1), TD)) { @@ -420,21 +420,21 @@ GatherConstantCompares(Value *V, std::vector &Vals, Value *&Extra, Vals.push_back(C); return I->getOperand(0); } - + // If we have "x ult 3" comparison, for example, then we can add 0,1,2 to // the set. ConstantRange Span = ConstantRange::makeICmpRegion(ICI->getPredicate(), C->getValue()); - + // If this is an and/!= check then we want to optimize "x ugt 2" into // x != 0 && x != 1. if (!isEQ) Span = Span.inverse(); - + // If there are a ton of values, we don't want to make a ginormous switch. if (Span.getSetSize().ugt(8) || Span.isEmptySet()) return 0; - + for (APInt Tmp = Span.getLower(); Tmp != Span.getUpper(); ++Tmp) Vals.push_back(ConstantInt::get(V->getContext(), Tmp)); UsedICmps++; @@ -442,11 +442,11 @@ GatherConstantCompares(Value *V, std::vector &Vals, Value *&Extra, } return 0; } - + // Otherwise, we can only handle an | or &, depending on isEQ. if (I->getOpcode() != (isEQ ? Instruction::Or : Instruction::And)) return 0; - + unsigned NumValsBeforeLHS = Vals.size(); unsigned UsedICmpsBeforeLHS = UsedICmps; if (Value *LHS = GatherConstantCompares(I->getOperand(0), Vals, Extra, TD, @@ -467,12 +467,12 @@ GatherConstantCompares(Value *V, std::vector &Vals, Value *&Extra, Extra = I->getOperand(1); return LHS; } - + Vals.resize(NumValsBeforeLHS); UsedICmps = UsedICmpsBeforeLHS; return 0; } - + // If the LHS can't be folded in, but Extra is available and RHS can, try to // use LHS as Extra. if (Extra == 0 || Extra == I->getOperand(0)) { @@ -484,7 +484,7 @@ GatherConstantCompares(Value *V, std::vector &Vals, Value *&Extra, assert(Vals.size() == NumValsBeforeLHS); Extra = OldExtra; } - + return 0; } @@ -634,7 +634,7 @@ SimplifyEqualityComparisonWithOnlyPredecessor(TerminatorInst *TI, // can simplify TI. if (!ValuesOverlap(PredCases, ThisCases)) return false; - + if (isa(TI)) { // Okay, one of the successors of this condbr is dead. Convert it to a // uncond br. @@ -652,7 +652,7 @@ SimplifyEqualityComparisonWithOnlyPredecessor(TerminatorInst *TI, EraseTerminatorInstAndDCECond(TI); return true; } - + SwitchInst *SI = cast(TI); // Okay, TI has cases that are statically dead, prune them away. SmallPtrSet DeadCases; @@ -673,7 +673,7 @@ SimplifyEqualityComparisonWithOnlyPredecessor(TerminatorInst *TI, DEBUG(dbgs() << "Leaving: " << *TI << "\n"); return true; } - + // Otherwise, TI's block must correspond to some matched value. Find out // which value (or set of values) this is. ConstantInt *TIV = 0; @@ -822,7 +822,7 @@ bool SimplifyCFGOpt::FoldValueComparisonIntoPredecessors(TerminatorInst *TI, // If there are any constants vectored to BB that TI doesn't handle, // they must go to the default destination of TI. - for (std::set::iterator I = + for (std::set::iterator I = PTIHandled.begin(), E = PTIHandled.end(); I != E; ++I) { PredCases.push_back(ValueEqualityComparisonCase(*I, BBDefault)); @@ -984,11 +984,11 @@ HoistTerminator: Value *BB1V = PN->getIncomingValueForBlock(BB1); Value *BB2V = PN->getIncomingValueForBlock(BB2); if (BB1V == BB2V) continue; - + // These values do not agree. Insert a select instruction before NT // that determines the right value. SelectInst *&SI = InsertedSelects[std::make_pair(BB1V, BB2V)]; - if (SI == 0) + if (SI == 0) SI = cast (Builder.CreateSelect(BI->getCondition(), BB1V, BB2V, BB1V->getName()+"."+BB2V->getName())); @@ -1056,7 +1056,7 @@ static bool SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *BB1) { // Do not hoist the instruction if any of its operands are defined but not // used in this BB. The transformation will prevent the operand from // being sunk into the use block. - for (User::op_iterator i = HInst->op_begin(), e = HInst->op_end(); + for (User::op_iterator i = HInst->op_begin(), e = HInst->op_end(); i != e; ++i) { Instruction *OpI = dyn_cast(*i); if (OpI && OpI->getParent() == BIParent && @@ -1112,7 +1112,7 @@ static bool SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *BB1) { // as well. if (PHIs.empty()) return false; - + // If we get here, we can hoist the instruction and if-convert. DEBUG(dbgs() << "SPECULATIVELY EXECUTING BB" << *BB1 << "\n";); @@ -1162,13 +1162,13 @@ static bool SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *BB1) { static bool BlockIsSimpleEnoughToThreadThrough(BasicBlock *BB) { BranchInst *BI = cast(BB->getTerminator()); unsigned Size = 0; - + for (BasicBlock::iterator BBI = BB->begin(); &*BBI != BI; ++BBI) { if (isa(BBI)) continue; if (Size > 10) return false; // Don't clone large BB's. ++Size; - + // We can only support instructions that do not define values that are // live outside of the current basic block. for (Value::use_iterator UI = BBI->use_begin(), E = BBI->use_end(); @@ -1176,7 +1176,7 @@ static bool BlockIsSimpleEnoughToThreadThrough(BasicBlock *BB) { Instruction *U = cast(*UI); if (U->getParent() != BB || isa(U)) return false; } - + // Looks ok, continue checking. } @@ -1194,31 +1194,31 @@ static bool FoldCondBranchOnPHI(BranchInst *BI, const TargetData *TD) { // outside of the block. if (!PN || PN->getParent() != BB || !PN->hasOneUse()) return false; - + // Degenerate case of a single entry PHI. if (PN->getNumIncomingValues() == 1) { FoldSingleEntryPHINodes(PN->getParent()); - return true; + return true; } // Now we know that this block has multiple preds and two succs. if (!BlockIsSimpleEnoughToThreadThrough(BB)) return false; - + // Okay, this is a simple enough basic block. See if any phi values are // constants. for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { ConstantInt *CB = dyn_cast(PN->getIncomingValue(i)); if (CB == 0 || !CB->getType()->isIntegerTy(1)) continue; - + // Okay, we now know that all edges from PredBB should be revectored to // branch to RealDest. BasicBlock *PredBB = PN->getIncomingBlock(i); BasicBlock *RealDest = BI->getSuccessor(!CB->getZExtValue()); - + if (RealDest == BB) continue; // Skip self loops. // Skip if the predecessor's terminator is an indirect branch. if (isa(PredBB->getTerminator())) continue; - + // The dest block might have PHI nodes, other predecessors and other // difficult cases. Instead of being smart about this, just insert a new // block that jumps to the destination block, effectively splitting @@ -1227,7 +1227,7 @@ static bool FoldCondBranchOnPHI(BranchInst *BI, const TargetData *TD) { RealDest->getName()+".critedge", RealDest->getParent(), RealDest); BranchInst::Create(RealDest, EdgeBB); - + // Update PHI nodes. AddPredecessorToBlock(RealDest, EdgeBB, BB); @@ -1244,7 +1244,7 @@ static bool FoldCondBranchOnPHI(BranchInst *BI, const TargetData *TD) { // Clone the instruction. Instruction *N = BBI->clone(); if (BBI->hasName()) N->setName(BBI->getName()+".c"); - + // Update operands due to translation. for (User::op_iterator i = N->op_begin(), e = N->op_end(); i != e; ++i) { @@ -1252,7 +1252,7 @@ static bool FoldCondBranchOnPHI(BranchInst *BI, const TargetData *TD) { if (PI != TranslateMap.end()) *i = PI->second; } - + // Check for trivial simplification. if (Value *V = SimplifyInstruction(N, TD)) { TranslateMap[BBI] = V; @@ -1297,7 +1297,7 @@ static bool FoldTwoEntryPHINode(PHINode *PN, const TargetData *TD) { // Don't bother if the branch will be constant folded trivially. isa(IfCond)) return false; - + // Okay, we found that we can merge this two-entry phi node into a select. // Doing so would require us to fold *all* two entry phi nodes in this block. // At some point this becomes non-profitable (particularly if the target @@ -1307,14 +1307,14 @@ static bool FoldTwoEntryPHINode(PHINode *PN, const TargetData *TD) { for (BasicBlock::iterator I = BB->begin(); isa(I); ++NumPhis, ++I) if (NumPhis > 2) return false; - + // Loop over the PHI's seeing if we can promote them all to select // instructions. While we are at it, keep track of the instructions // that need to be moved to the dominating block. SmallPtrSet AggressiveInsts; unsigned MaxCostVal0 = PHINodeFoldingThreshold, MaxCostVal1 = PHINodeFoldingThreshold; - + for (BasicBlock::iterator II = BB->begin(); isa(II);) { PHINode *PN = cast(II++); if (Value *V = SimplifyInstruction(PN, TD)) { @@ -1322,19 +1322,19 @@ static bool FoldTwoEntryPHINode(PHINode *PN, const TargetData *TD) { PN->eraseFromParent(); continue; } - + if (!DominatesMergePoint(PN->getIncomingValue(0), BB, &AggressiveInsts, MaxCostVal0) || !DominatesMergePoint(PN->getIncomingValue(1), BB, &AggressiveInsts, MaxCostVal1)) return false; } - + // If we folded the first phi, PN dangles at this point. Refresh it. If // we ran out of PHIs then we simplified them all. PN = dyn_cast(BB->begin()); if (PN == 0) return true; - + // Don't fold i1 branches on PHIs which contain binary operators. These can // often be turned into switches and other things. if (PN->getType()->isIntegerTy(1) && @@ -1342,7 +1342,7 @@ static bool FoldTwoEntryPHINode(PHINode *PN, const TargetData *TD) { isa(PN->getIncomingValue(1)) || isa(IfCond))) return false; - + // If we all PHI nodes are promotable, check to make sure that all // instructions in the predecessor blocks can be promoted as well. If // not, we won't be able to get rid of the control flow, so it's not @@ -1362,7 +1362,7 @@ static bool FoldTwoEntryPHINode(PHINode *PN, const TargetData *TD) { return false; } } - + if (cast(IfBlock2->getTerminator())->isConditional()) { IfBlock2 = 0; } else { @@ -1375,15 +1375,15 @@ static bool FoldTwoEntryPHINode(PHINode *PN, const TargetData *TD) { return false; } } - + DEBUG(dbgs() << "FOUND IF CONDITION! " << *IfCond << " T: " << IfTrue->getName() << " F: " << IfFalse->getName() << "\n"); - + // If we can still promote the PHI nodes after this gauntlet of tests, // do all of the PHI's now. Instruction *InsertPt = DomBlock->getTerminator(); IRBuilder Builder(InsertPt); - + // Move all 'aggressive' instructions, which are defined in the // conditional parts of the if's up to the dominating block. if (IfBlock1) @@ -1394,19 +1394,19 @@ static bool FoldTwoEntryPHINode(PHINode *PN, const TargetData *TD) { DomBlock->getInstList().splice(InsertPt, IfBlock2->getInstList(), IfBlock2->begin(), IfBlock2->getTerminator()); - + while (PHINode *PN = dyn_cast(BB->begin())) { // Change the PHI node into a select instruction. Value *TrueVal = PN->getIncomingValue(PN->getIncomingBlock(0) == IfFalse); Value *FalseVal = PN->getIncomingValue(PN->getIncomingBlock(0) == IfTrue); - - SelectInst *NV = + + SelectInst *NV = cast(Builder.CreateSelect(IfCond, TrueVal, FalseVal, "")); PN->replaceAllUsesWith(NV); NV->takeName(PN); PN->eraseFromParent(); } - + // At this point, IfBlock1 and IfBlock2 are both empty, so our if statement // has been flattened. Change DomBlock to jump directly to our new block to // avoid other simplifycfg's kicking in on the diamond. @@ -1420,14 +1420,14 @@ static bool FoldTwoEntryPHINode(PHINode *PN, const TargetData *TD) { /// SimplifyCondBranchToTwoReturns - If we found a conditional branch that goes /// to two returning blocks, try to merge them together into one return, /// introducing a select if the return values disagree. -static bool SimplifyCondBranchToTwoReturns(BranchInst *BI, +static bool SimplifyCondBranchToTwoReturns(BranchInst *BI, IRBuilder<> &Builder) { assert(BI->isConditional() && "Must be a conditional branch"); BasicBlock *TrueSucc = BI->getSuccessor(0); BasicBlock *FalseSucc = BI->getSuccessor(1); ReturnInst *TrueRet = cast(TrueSucc->getTerminator()); ReturnInst *FalseRet = cast(FalseSucc->getTerminator()); - + // Check to ensure both blocks are empty (just a return) or optionally empty // with PHI nodes. If there are other instructions, merging would cause extra // computation on one path or the other. @@ -1447,12 +1447,12 @@ static bool SimplifyCondBranchToTwoReturns(BranchInst *BI, EraseTerminatorInstAndDCECond(BI); return true; } - + // Otherwise, figure out what the true and false return values are // so we can insert a new select instruction. Value *TrueValue = TrueRet->getReturnValue(); Value *FalseValue = FalseRet->getReturnValue(); - + // Unwrap any PHI nodes in the return blocks. if (PHINode *TVPN = dyn_cast_or_null(TrueValue)) if (TVPN->getParent() == TrueSucc) @@ -1460,7 +1460,7 @@ static bool SimplifyCondBranchToTwoReturns(BranchInst *BI, if (PHINode *FVPN = dyn_cast_or_null(FalseValue)) if (FVPN->getParent() == FalseSucc) FalseValue = FVPN->getIncomingValueForBlock(BI->getParent()); - + // In order for this transformation to be safe, we must be able to // unconditionally execute both operands to the return. This is // normally the case, but we could have a potentially-trapping @@ -1472,12 +1472,12 @@ static bool SimplifyCondBranchToTwoReturns(BranchInst *BI, if (ConstantExpr *FCV = dyn_cast_or_null(FalseValue)) if (FCV->canTrap()) return false; - + // Okay, we collected all the mapped values and checked them for sanity, and // defined to really do this transformation. First, update the CFG. TrueSucc->removePredecessor(BI->getParent()); FalseSucc->removePredecessor(BI->getParent()); - + // Insert select instructions where needed. Value *BrCond = BI->getCondition(); if (TrueValue) { @@ -1491,15 +1491,15 @@ static bool SimplifyCondBranchToTwoReturns(BranchInst *BI, } } - Value *RI = !TrueValue ? + Value *RI = !TrueValue ? Builder.CreateRetVoid() : Builder.CreateRet(TrueValue); (void) RI; - + DEBUG(dbgs() << "\nCHANGING BRANCH TO TWO RETURNS INTO SELECT:" << "\n " << *BI << "NewRet = " << *RI << "TRUEBLOCK: " << *TrueSucc << "FALSEBLOCK: "<< *FalseSucc); - + EraseTerminatorInstAndDCECond(BI); return true; @@ -1600,7 +1600,7 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI) { if (Cond == 0) return false; } - + if (Cond == 0 || (!isa(Cond) && !isa(Cond)) || Cond->getParent() != BB || !Cond->hasOneUse()) return false; @@ -1623,7 +1623,7 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI) { isSafeToSpeculativelyExecute(FrontIt)) { BonusInst = &*FrontIt; ++FrontIt; - + // Ignore dbg intrinsics. while (isa(FrontIt)) ++FrontIt; } @@ -1631,13 +1631,13 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI) { // Only a single bonus inst is allowed. if (&*FrontIt != Cond) return false; - + // Make sure the instruction after the condition is the cond branch. BasicBlock::iterator CondIt = Cond; ++CondIt; // Ingore dbg intrinsics. while (isa(CondIt)) ++CondIt; - + if (&*CondIt != BI) return false; @@ -1649,7 +1649,7 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI) { if (ConstantExpr *CE = dyn_cast(Cond->getOperand(1))) if (CE->canTrap()) return false; - + // Finally, don't infinitely unroll conditional loops. BasicBlock *TrueDest = BI->getSuccessor(0); BasicBlock *FalseDest = (BI->isConditional()) ? BI->getSuccessor(1) : 0; @@ -1659,22 +1659,22 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI) { for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) { BasicBlock *PredBlock = *PI; BranchInst *PBI = dyn_cast(PredBlock->getTerminator()); - + // Check that we have two conditional branches. If there is a PHI node in // the common successor, verify that the same value flows in from both // blocks. SmallVector PHIs; if (PBI == 0 || PBI->isUnconditional() || - (BI->isConditional() && + (BI->isConditional() && !SafeToMergeTerminators(BI, PBI)) || (!BI->isConditional() && !isProfitableToFoldUnconditional(BI, PBI, Cond, PHIs))) continue; - + // Determine if the two branches share a common destination. Instruction::BinaryOps Opc; bool InvertPredCond = false; - + if (BI->isConditional()) { if (PBI->getSuccessor(0) == TrueDest) Opc = Instruction::Or; @@ -1693,7 +1693,7 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI) { // Ensure that any values used in the bonus instruction are also used // by the terminator of the predecessor. This means that those values - // must already have been resolved, so we won't be inhibiting the + // must already have been resolved, so we won't be inhibiting the // out-of-order core by speculating them earlier. if (BonusInst) { // Collect the values used by the bonus inst @@ -1707,47 +1707,47 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI) { SmallVector, 4> Worklist; Worklist.push_back(std::make_pair(PBI->getOperand(0), 0)); - + // Walk up to four levels back up the use-def chain of the predecessor's // terminator to see if all those values were used. The choice of four // levels is arbitrary, to provide a compile-time-cost bound. while (!Worklist.empty()) { std::pair Pair = Worklist.back(); Worklist.pop_back(); - + if (Pair.second >= 4) continue; UsedValues.erase(Pair.first); if (UsedValues.empty()) break; - + if (Instruction *I = dyn_cast(Pair.first)) { for (Instruction::op_iterator OI = I->op_begin(), OE = I->op_end(); OI != OE; ++OI) Worklist.push_back(std::make_pair(OI->get(), Pair.second+1)); - } + } } - + if (!UsedValues.empty()) return false; } DEBUG(dbgs() << "FOLDING BRANCH TO COMMON DEST:\n" << *PBI << *BB); - IRBuilder<> Builder(PBI); + IRBuilder<> Builder(PBI); // If we need to invert the condition in the pred block to match, do so now. if (InvertPredCond) { Value *NewCond = PBI->getCondition(); - + if (NewCond->hasOneUse() && isa(NewCond)) { CmpInst *CI = cast(NewCond); CI->setPredicate(CI->getInversePredicate()); } else { - NewCond = Builder.CreateNot(NewCond, + NewCond = Builder.CreateNot(NewCond, PBI->getCondition()->getName()+".not"); } - + PBI->setCondition(NewCond); PBI->swapSuccessors(); } - + // If we have a bonus inst, clone it into the predecessor block. Instruction *NewBonus = 0; if (BonusInst) { @@ -1756,7 +1756,7 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI) { NewBonus->takeName(BonusInst); BonusInst->setName(BonusInst->getName()+".old"); } - + // Clone Cond into the predecessor basic block, and or/and the // two conditions together. Instruction *New = Cond->clone(); @@ -1764,9 +1764,9 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI) { PredBlock->getInstList().insert(PBI, New); New->takeName(Cond); Cond->setName(New->getName()+".old"); - + if (BI->isConditional()) { - Instruction *NewCond = + Instruction *NewCond = cast(Builder.CreateBinOp(Opc, PBI->getCondition(), New, "or.cond")); PBI->setCondition(NewCond); @@ -1806,7 +1806,7 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI) { // Create (PBI_Cond and BI_Value) or (!PBI_Cond and PBI_C) // PBI_C is true: (PBI_Cond and BI_Value) or (!PBI_Cond) // is false: PBI_Cond and BI_Value - MergedCond = + MergedCond = cast(Builder.CreateBinOp(Instruction::And, PBI->getCondition(), New, "and.cond")); @@ -1814,7 +1814,7 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI) { Instruction *NotCond = cast(Builder.CreateNot(PBI->getCondition(), "not.cond")); - MergedCond = + MergedCond = cast(Builder.CreateBinOp(Instruction::Or, NotCond, MergedCond, "or.cond")); @@ -1921,7 +1921,7 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI) { for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) if (isa(*I)) I->clone()->insertBefore(PBI); - + return true; } return false; @@ -1936,7 +1936,7 @@ static bool SimplifyCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI) { BasicBlock *BB = BI->getParent(); // If this block ends with a branch instruction, and if there is a - // predecessor that ends on a branch of the same condition, make + // predecessor that ends on a branch of the same condition, make // this conditional branch redundant. if (PBI->getCondition() == BI->getCondition() && PBI->getSuccessor(0) != PBI->getSuccessor(1)) { @@ -1945,11 +1945,11 @@ static bool SimplifyCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI) { if (BB->getSinglePredecessor()) { // Turn this into a branch on constant. bool CondIsTrue = PBI->getSuccessor(0) == BB; - BI->setCondition(ConstantInt::get(Type::getInt1Ty(BB->getContext()), + BI->setCondition(ConstantInt::get(Type::getInt1Ty(BB->getContext()), CondIsTrue)); return true; // Nuke the branch on constant. } - + // Otherwise, if there are multiple predecessors, insert a PHI that merges // in the constant and simplify the block result. Subsequent passes of // simplifycfg will thread the block. @@ -1969,18 +1969,18 @@ static bool SimplifyCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI) { PBI->getCondition() == BI->getCondition() && PBI->getSuccessor(0) != PBI->getSuccessor(1)) { bool CondIsTrue = PBI->getSuccessor(0) == BB; - NewPN->addIncoming(ConstantInt::get(Type::getInt1Ty(BB->getContext()), + NewPN->addIncoming(ConstantInt::get(Type::getInt1Ty(BB->getContext()), CondIsTrue), P); } else { NewPN->addIncoming(BI->getCondition(), P); } } - + BI->setCondition(NewPN); return true; } } - + // If this is a conditional branch in an empty block, and if any // predecessors is a conditional branch to one of our destinations, // fold the conditions into logical ops and one cond br. @@ -1991,11 +1991,11 @@ static bool SimplifyCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI) { if (&*BBI != BI) return false; - + if (ConstantExpr *CE = dyn_cast(BI->getCondition())) if (CE->canTrap()) return false; - + int PBIOp, BIOp; if (PBI->getSuccessor(0) == BI->getSuccessor(0)) PBIOp = BIOp = 0; @@ -2007,31 +2007,31 @@ static bool SimplifyCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI) { PBIOp = BIOp = 1; else return false; - + // Check to make sure that the other destination of this branch // isn't BB itself. If so, this is an infinite loop that will // keep getting unwound. if (PBI->getSuccessor(PBIOp) == BB) return false; - - // Do not perform this transformation if it would require + + // Do not perform this transformation if it would require // insertion of a large number of select instructions. For targets // without predication/cmovs, this is a big pessimization. BasicBlock *CommonDest = PBI->getSuccessor(PBIOp); - + unsigned NumPhis = 0; for (BasicBlock::iterator II = CommonDest->begin(); isa(II); ++II, ++NumPhis) if (NumPhis > 2) // Disable this xform. return false; - + // Finally, if everything is ok, fold the branches to logical ops. BasicBlock *OtherDest = BI->getSuccessor(BIOp ^ 1); - + DEBUG(dbgs() << "FOLDING BRs:" << *PBI->getParent() << "AND: " << *BI->getParent()); - - + + // If OtherDest *is* BB, then BB is a basic block with a single conditional // branch in it, where one edge (OtherDest) goes back to itself but the other // exits. We don't *know* that the program avoids the infinite loop @@ -2046,13 +2046,13 @@ static bool SimplifyCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI) { "infloop", BB->getParent()); BranchInst::Create(InfLoopBlock, InfLoopBlock); OtherDest = InfLoopBlock; - } - + } + DEBUG(dbgs() << *PBI->getParent()->getParent()); // BI may have other predecessors. Because of this, we leave // it alone, but modify PBI. - + // Make sure we get to CommonDest on True&True directions. Value *PBICond = PBI->getCondition(); IRBuilder Builder(PBI); @@ -2065,16 +2065,16 @@ static bool SimplifyCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI) { // Merge the conditions. Value *Cond = Builder.CreateOr(PBICond, BICond, "brmerge"); - + // Modify PBI to branch on the new condition to the new dests. PBI->setCondition(Cond); PBI->setSuccessor(0, CommonDest); PBI->setSuccessor(1, OtherDest); - + // OtherDest may have phi nodes. If so, add an entry from PBI's // block that are identical to the entries for BI's block. AddPredecessorToBlock(OtherDest, PBI->getParent(), BB); - + // We know that the CommonDest already had an edge from PBI to // it. If it has PHIs though, the PHIs may have different // entries for BB and PBI's BB. If so, insert a select to make @@ -2092,10 +2092,10 @@ static bool SimplifyCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI) { PN->setIncomingValue(PBBIdx, NV); } } - + DEBUG(dbgs() << "INTO: " << *PBI->getParent()); DEBUG(dbgs() << *PBI->getParent()->getParent()); - + // This basic block is probably dead. We know it has at least // one fewer predecessor. return true; @@ -2214,7 +2214,7 @@ static bool SimplifyIndirectBrOnSelect(IndirectBrInst *IBI, SelectInst *SI) { /// br label %end /// end: /// ... = phi i1 [ true, %entry ], [ %tmp, %DEFAULT ], [ true, %entry ] -/// +/// /// We prefer to split the edge to 'end' so that there is a true/false entry to /// the PHI, merging the third icmp into the switch. static bool TryToSimplifyUncondBranchWithICmpInIt(ICmpInst *ICI, @@ -2228,17 +2228,17 @@ static bool TryToSimplifyUncondBranchWithICmpInIt(ICmpInst *ICI, Value *V = ICI->getOperand(0); ConstantInt *Cst = cast(ICI->getOperand(1)); - + // The pattern we're looking for is where our only predecessor is a switch on // 'V' and this block is the default case for the switch. In this case we can // fold the compared value into the switch to simplify things. BasicBlock *Pred = BB->getSinglePredecessor(); if (Pred == 0 || !isa(Pred->getTerminator())) return false; - + SwitchInst *SI = cast(Pred->getTerminator()); if (SI->getCondition() != V) return false; - + // If BB is reachable on a non-default case, then we simply know the value of // V in this block. Substitute it and constant fold the icmp instruction // away. @@ -2246,7 +2246,7 @@ static bool TryToSimplifyUncondBranchWithICmpInIt(ICmpInst *ICI, ConstantInt *VVal = SI->findCaseDest(BB); assert(VVal && "Should have a unique destination value"); ICI->setOperand(0, VVal); - + if (Value *V = SimplifyInstruction(ICI, TD)) { ICI->replaceAllUsesWith(V); ICI->eraseFromParent(); @@ -2254,7 +2254,7 @@ static bool TryToSimplifyUncondBranchWithICmpInIt(ICmpInst *ICI, // BB is now empty, so it is likely to simplify away. return SimplifyCFG(BB) | true; } - + // Ok, the block is reachable from the default dest. If the constant we're // comparing exists in one of the other edges, then we can constant fold ICI // and zap it. @@ -2264,13 +2264,13 @@ static bool TryToSimplifyUncondBranchWithICmpInIt(ICmpInst *ICI, V = ConstantInt::getFalse(BB->getContext()); else V = ConstantInt::getTrue(BB->getContext()); - + ICI->replaceAllUsesWith(V); ICI->eraseFromParent(); // BB is now empty, so it is likely to simplify away. return SimplifyCFG(BB) | true; } - + // The use of the icmp has to be in the 'end' block, by the only PHI node in // the block. BasicBlock *SuccBlock = BB->getTerminator()->getSuccessor(0); @@ -2297,7 +2297,7 @@ static bool TryToSimplifyUncondBranchWithICmpInIt(ICmpInst *ICI, BasicBlock *NewBB = BasicBlock::Create(BB->getContext(), "switch.edge", BB->getParent(), BB); SI->addCase(Cst, NewBB); - + // NewBB branches to the phi block, add the uncond branch and the phi entry. Builder.SetInsertPoint(NewBB); Builder.SetCurrentDebugLocation(SI->getDebugLoc()); @@ -2313,8 +2313,8 @@ static bool SimplifyBranchOnICmpChain(BranchInst *BI, const TargetData *TD, IRBuilder<> &Builder) { Instruction *Cond = dyn_cast(BI->getCondition()); if (Cond == 0) return false; - - + + // Change br (X == 0 | X == 1), T, F into a switch instruction. // If this is a bunch of seteq's or'd together, or if it's a bunch of // 'setne's and'ed together, collect them. @@ -2323,7 +2323,7 @@ static bool SimplifyBranchOnICmpChain(BranchInst *BI, const TargetData *TD, bool TrueWhenEqual = true; Value *ExtraCase = 0; unsigned UsedICmps = 0; - + if (Cond->getOpcode() == Instruction::Or) { CompVal = GatherConstantCompares(Cond, Values, ExtraCase, TD, true, UsedICmps); @@ -2332,7 +2332,7 @@ static bool SimplifyBranchOnICmpChain(BranchInst *BI, const TargetData *TD, UsedICmps); TrueWhenEqual = false; } - + // If we didn't have a multiply compared value, fail. if (CompVal == 0) return false; @@ -2344,21 +2344,21 @@ static bool SimplifyBranchOnICmpChain(BranchInst *BI, const TargetData *TD, // instruction can't handle, remove them now. array_pod_sort(Values.begin(), Values.end(), ConstantIntSortPredicate); Values.erase(std::unique(Values.begin(), Values.end()), Values.end()); - + // If Extra was used, we require at least two switch values to do the // transformation. A switch with one value is just an cond branch. if (ExtraCase && Values.size() < 2) return false; - + // Figure out which block is which destination. BasicBlock *DefaultBB = BI->getSuccessor(1); BasicBlock *EdgeBB = BI->getSuccessor(0); if (!TrueWhenEqual) std::swap(DefaultBB, EdgeBB); - + BasicBlock *BB = BI->getParent(); - + DEBUG(dbgs() << "Converting 'icmp' chain with " << Values.size() << " cases into SWITCH. BB is:\n" << *BB); - + // If there are any extra values that couldn't be folded into the switch // then we evaluate them with an explicit branch first. Split the block // right before the condbr to handle it. @@ -2372,13 +2372,13 @@ static bool SimplifyBranchOnICmpChain(BranchInst *BI, const TargetData *TD, Builder.CreateCondBr(ExtraCase, EdgeBB, NewBB); else Builder.CreateCondBr(ExtraCase, NewBB, EdgeBB); - + OldTI->eraseFromParent(); - + // If there are PHI nodes in EdgeBB, then we need to add a new entry to them // for the edge we just added. AddPredecessorToBlock(EdgeBB, BB, NewBB); - + DEBUG(dbgs() << " ** 'icmp' chain unhandled condition: " << *ExtraCase << "\nEXTRABB = " << *BB); BB = NewBB; @@ -2392,14 +2392,14 @@ static bool SimplifyBranchOnICmpChain(BranchInst *BI, const TargetData *TD, TD->getIntPtrType(CompVal->getContext()), "magicptr"); } - + // Create the new switch instruction now. SwitchInst *New = Builder.CreateSwitch(CompVal, DefaultBB, Values.size()); // Add all of the 'cases' to the switch instruction. for (unsigned i = 0, e = Values.size(); i != e; ++i) New->addCase(Values[i], EdgeBB); - + // We added edges from PI to the EdgeBB. As such, if there were any // PHI nodes in EdgeBB, they need entries to be added corresponding to // the number of edges added. @@ -2410,10 +2410,10 @@ static bool SimplifyBranchOnICmpChain(BranchInst *BI, const TargetData *TD, for (unsigned i = 0, e = Values.size()-1; i != e; ++i) PN->addIncoming(InVal, BB); } - + // Erase the old branch instruction. EraseTerminatorInstAndDCECond(BI); - + DEBUG(dbgs() << " ** 'icmp' chain result is:\n" << *BB << '\n'); return true; } @@ -2467,7 +2467,7 @@ bool SimplifyCFGOpt::SimplifyResume(ResumeInst *RI, IRBuilder<> &Builder) { bool SimplifyCFGOpt::SimplifyReturn(ReturnInst *RI, IRBuilder<> &Builder) { BasicBlock *BB = RI->getParent(); if (!BB->getFirstNonPHIOrDbg()->isTerminator()) return false; - + // Find predecessors that end with branches. SmallVector UncondBranchPreds; SmallVector CondBranchPreds; @@ -2481,7 +2481,7 @@ bool SimplifyCFGOpt::SimplifyReturn(ReturnInst *RI, IRBuilder<> &Builder) { CondBranchPreds.push_back(BI); } } - + // If we found some, do the transformation! if (!UncondBranchPreds.empty() && DupRet) { while (!UncondBranchPreds.empty()) { @@ -2490,21 +2490,21 @@ bool SimplifyCFGOpt::SimplifyReturn(ReturnInst *RI, IRBuilder<> &Builder) { << "INTO UNCOND BRANCH PRED: " << *Pred); (void)FoldReturnIntoUncondBranch(RI, BB, Pred); } - + // If we eliminated all predecessors of the block, delete the block now. if (pred_begin(BB) == pred_end(BB)) // We know there are no successors, so just nuke the block. BB->eraseFromParent(); - + return true; } - + // Check out all of the conditional branches going to this return // instruction. If any of them just select between returns, change the // branch itself into a select/return pair. while (!CondBranchPreds.empty()) { BranchInst *BI = CondBranchPreds.pop_back_val(); - + // Check to see if the non-BB successor is also a return block. if (isa(BI->getSuccessor(0)->getTerminator()) && isa(BI->getSuccessor(1)->getTerminator()) && @@ -2516,9 +2516,9 @@ bool SimplifyCFGOpt::SimplifyReturn(ReturnInst *RI, IRBuilder<> &Builder) { bool SimplifyCFGOpt::SimplifyUnreachable(UnreachableInst *UI) { BasicBlock *BB = UI->getParent(); - + bool Changed = false; - + // If there are any instructions immediately before the unreachable that can // be removed, do so. while (UI != BB->begin()) { @@ -2558,11 +2558,11 @@ bool SimplifyCFGOpt::SimplifyUnreachable(UnreachableInst *UI) { BBI->eraseFromParent(); Changed = true; } - + // If the unreachable instruction is the first in the block, take a gander // at all of the predecessors of this instruction, and simplify them. if (&BB->front() != UI) return Changed; - + SmallVector Preds(pred_begin(BB), pred_end(BB)); for (unsigned i = 0, e = Preds.size(); i != e; ++i) { TerminatorInst *TI = Preds[i]->getTerminator(); @@ -2615,7 +2615,7 @@ bool SimplifyCFGOpt::SimplifyUnreachable(UnreachableInst *UI) { BasicBlock *MaxBlock = 0; for (std::map >::iterator I = Popularity.begin(), E = Popularity.end(); I != E; ++I) { - if (I->second.first > MaxPop || + if (I->second.first > MaxPop || (I->second.first == MaxPop && MaxIndex > I->second.second)) { MaxPop = I->second.first; MaxIndex = I->second.second; @@ -2627,13 +2627,13 @@ bool SimplifyCFGOpt::SimplifyUnreachable(UnreachableInst *UI) { // edges to it. SI->setDefaultDest(MaxBlock); Changed = true; - + // If MaxBlock has phinodes in it, remove MaxPop-1 entries from // it. if (isa(MaxBlock->begin())) for (unsigned i = 0; i != MaxPop-1; ++i) MaxBlock->removePredecessor(SI->getParent()); - + for (SwitchInst::CaseIt i = SI->case_begin(), e = SI->case_end(); i != e; ++i) if (i.getCaseSuccessor() == MaxBlock) { @@ -2648,7 +2648,7 @@ bool SimplifyCFGOpt::SimplifyUnreachable(UnreachableInst *UI) { // place to note that the call does not throw though. BranchInst *BI = Builder.CreateBr(II->getNormalDest()); II->removeFromParent(); // Take out of symbol table - + // Insert the call now... SmallVector Args(II->op_begin(), II->op_end()-3); Builder.SetInsertPoint(BI); @@ -2663,7 +2663,7 @@ bool SimplifyCFGOpt::SimplifyUnreachable(UnreachableInst *UI) { } } } - + // If this block is now dead, remove it. if (pred_begin(BB) == pred_end(BB) && BB != &BB->getParent()->getEntryBlock()) { @@ -2868,7 +2868,7 @@ bool SimplifyCFGOpt::SimplifySwitch(SwitchInst *SI, IRBuilder<> &Builder) { bool SimplifyCFGOpt::SimplifyIndirectBr(IndirectBrInst *IBI) { BasicBlock *BB = IBI->getParent(); bool Changed = false; - + // Eliminate redundant destinations. SmallPtrSet Succs; for (unsigned i = 0, e = IBI->getNumDestinations(); i != e; ++i) { @@ -2879,7 +2879,7 @@ bool SimplifyCFGOpt::SimplifyIndirectBr(IndirectBrInst *IBI) { --i; --e; Changed = true; } - } + } if (IBI->getNumDestinations() == 0) { // If the indirectbr has no successors, change it to unreachable. @@ -2887,14 +2887,14 @@ bool SimplifyCFGOpt::SimplifyIndirectBr(IndirectBrInst *IBI) { EraseTerminatorInstAndDCECond(IBI); return true; } - + if (IBI->getNumDestinations() == 1) { // If the indirectbr has one successor, change it to a direct branch. BranchInst::Create(IBI->getDestination(0), IBI); EraseTerminatorInstAndDCECond(IBI); return true; } - + if (SelectInst *SI = dyn_cast(IBI->getAddress())) { if (SimplifyIndirectBrOnSelect(IBI, SI)) return SimplifyCFG(BB) | true; @@ -2904,13 +2904,13 @@ bool SimplifyCFGOpt::SimplifyIndirectBr(IndirectBrInst *IBI) { bool SimplifyCFGOpt::SimplifyUncondBranch(BranchInst *BI, IRBuilder<> &Builder){ BasicBlock *BB = BI->getParent(); - + // If the Terminator is the only non-phi instruction, simplify the block. BasicBlock::iterator I = BB->getFirstNonPHIOrDbgOrLifetime(); if (I->isTerminator() && BB != &BB->getParent()->getEntryBlock() && TryToSimplifyUncondBranchFromEmptyBlock(BB)) return true; - + // If the only instruction in the block is a seteq/setne comparison // against a constant, try to simplify the block. if (ICmpInst *ICI = dyn_cast(I)) @@ -2921,7 +2921,7 @@ bool SimplifyCFGOpt::SimplifyUncondBranch(BranchInst *BI, IRBuilder<> &Builder){ TryToSimplifyUncondBranchWithICmpInIt(ICI, TD, Builder)) return true; } - + // If this basic block is ONLY a compare and a branch, and if a predecessor // branches to us and our successor, fold the comparison into the // predecessor and use logical operations to update the incoming value @@ -2934,7 +2934,7 @@ bool SimplifyCFGOpt::SimplifyUncondBranch(BranchInst *BI, IRBuilder<> &Builder){ bool SimplifyCFGOpt::SimplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) { BasicBlock *BB = BI->getParent(); - + // Conditional branch if (isValueEqualityComparison(BI)) { // If we only have one predecessor, and if it is a branch on this value, @@ -2943,7 +2943,7 @@ bool SimplifyCFGOpt::SimplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) { if (BasicBlock *OnlyPred = BB->getSinglePredecessor()) if (SimplifyEqualityComparisonWithOnlyPredecessor(BI, OnlyPred, Builder)) return SimplifyCFG(BB) | true; - + // This block must be empty, except for the setcond inst, if it exists. // Ignore dbg intrinsics. BasicBlock::iterator I = BB->begin(); @@ -2962,17 +2962,17 @@ bool SimplifyCFGOpt::SimplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) { return SimplifyCFG(BB) | true; } } - + // Try to turn "br (X == 0 | X == 1), T, F" into a switch instruction. if (SimplifyBranchOnICmpChain(BI, TD, Builder)) return true; - + // If this basic block is ONLY a compare and a branch, and if a predecessor // branches to us and one of our successors, fold the comparison into the // predecessor and use logical operations to pick the right destination. if (FoldBranchToCommonDest(BI)) return SimplifyCFG(BB) | true; - + // We have a conditional branch to two blocks that are only reachable // from BI. We know that the condbr dominates the two blocks, so see if // there is any identical code in the "then" and "else" blocks. If so, we @@ -2999,14 +2999,14 @@ bool SimplifyCFGOpt::SimplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) { if (SpeculativelyExecuteBB(BI, BI->getSuccessor(1))) return SimplifyCFG(BB) | true; } - + // If this is a branch on a phi node in the current block, thread control // through this block if any PHI node entries are constants. if (PHINode *PN = dyn_cast(BI->getCondition())) if (PN->getParent() == BI->getParent()) if (FoldCondBranchOnPHI(BI, TD)) return SimplifyCFG(BB) | true; - + // Scan predecessor blocks for conditional branches. for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) if (BranchInst *PBI = dyn_cast((*PI)->getTerminator())) @@ -3114,7 +3114,7 @@ bool SimplifyCFGOpt::run(BasicBlock *BB) { // if (MergeBlockIntoPredecessor(BB)) return true; - + IRBuilder<> Builder(BB); // If there is a trivial two-entry PHI node in this basic block, and we can -- cgit v1.1 From b1b97833aeaf8a7ef6dd3b314a502a1521b02657 Mon Sep 17 00:00:00 2001 From: Andrew Trick Date: Wed, 29 Aug 2012 21:46:38 +0000 Subject: Preserve branch profile metadata during switch formation. Patch by Michael Ilseman! This fixes SimplifyCFGOpt::FoldValueComparisonIntoPredecessors to preserve metata when folding conditional branches into switches. void foo(int x) { if (x == 0) bar(1); else if (__builtin_expect(x == 10, 1)) bar(2); else if (x == 20) bar(3); } CFG: B0 | \ | X0 B10 | \ | X10 B20 | \ E X20 Merge B0-B10: w(B0-X0) = w(B0-X0)*sum-weights(B10) = w(B0-X0) * (w(B10-X10) + w(B10-B20)) w(B0-X10) = w(B0-B10) * w(B10-X10) w(B0-B20) = w(B0-B10) * w(B10-B20) B0 __ | \ \ | X10 X0 B20 | \ E X20 Merge B0-B20: w(B0-X0) = w(B0-X0) * sum-weights(B20) = w(B0-X0) * (w(B20-E) + w(B20-X20)) w(B0-X10) = w(B0-X10) * sum-weights(B20) = ... w(B0-X20) = w(B0-B20) * w(B20-X20) w(B0-E) = w(B0-B20) * w(B20-E) git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@162868 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Transforms/Utils/SimplifyCFG.cpp | 154 +++++++++++++++++++++++++++++++++++ 1 file changed, 154 insertions(+) (limited to 'lib/Transforms/Utils') diff --git a/lib/Transforms/Utils/SimplifyCFG.cpp b/lib/Transforms/Utils/SimplifyCFG.cpp index 06a61a8..dddc18f 100644 --- a/lib/Transforms/Utils/SimplifyCFG.cpp +++ b/lib/Transforms/Utils/SimplifyCFG.cpp @@ -615,6 +615,9 @@ SimplifyEqualityComparisonWithOnlyPredecessor(TerminatorInst *TI, assert(ThisVal && "This isn't a value comparison!!"); if (ThisVal != PredVal) return false; // Different predicates. + // TODO: Preserve branch weight metadata, similarly to how + // FoldValueComparisonIntoPredecessors preserves it. + // Find out information about when control will move from Pred to TI's block. std::vector PredCases; BasicBlock *PredDef = GetValueEqualityComparisonCases(Pred->getTerminator(), @@ -738,6 +741,67 @@ static int ConstantIntSortPredicate(const void *P1, const void *P2) { return -1; } +static inline bool HasBranchWeights(const Instruction* I) { + MDNode* ProfMD = I->getMetadata(LLVMContext::MD_prof); + if (ProfMD && ProfMD->getOperand(0)) + if (MDString* MDS = dyn_cast(ProfMD->getOperand(0))) + return MDS->getString().equals("branch_weights"); + + return false; +} + +/// Tries to get a branch weight for the given instruction, returns NULL if it +/// can't. Pos starts at 0. +static ConstantInt* GetWeight(Instruction* I, int Pos) { + MDNode* ProfMD = I->getMetadata(LLVMContext::MD_prof); + if (ProfMD && ProfMD->getOperand(0)) { + if (MDString* MDS = dyn_cast(ProfMD->getOperand(0))) { + if (MDS->getString().equals("branch_weights")) { + assert(ProfMD->getNumOperands() >= 3); + return dyn_cast(ProfMD->getOperand(1 + Pos)); + } + } + } + + return 0; +} + +/// Scale the given weights based on the new TI's metadata. Scaling is done by +/// multiplying every weight by the sum of the successor's weights. +static void ScaleWeights(Instruction* STI, MutableArrayRef Weights) { + // Sum the successor's weights + assert(HasBranchWeights(STI)); + unsigned Scale = 0; + MDNode* ProfMD = STI->getMetadata(LLVMContext::MD_prof); + for (unsigned i = 1; i < ProfMD->getNumOperands(); ++i) { + ConstantInt* CI = dyn_cast(ProfMD->getOperand(i)); + assert(CI); + Scale += CI->getValue().getZExtValue(); + } + + // Skip default, as it's replaced during the folding + for (unsigned i = 1; i < Weights.size(); ++i) { + Weights[i] *= Scale; + } +} + +/// Sees if any of the weights are too big for a uint32_t, and halves all the +/// weights if any are. +static void FitWeights(MutableArrayRef Weights) { + bool Halve = false; + for (unsigned i = 0; i < Weights.size(); ++i) + if (Weights[i] > UINT_MAX) { + Halve = true; + break; + } + + if (! Halve) + return; + + for (unsigned i = 0; i < Weights.size(); ++i) + Weights[i] /= 2; +} + /// FoldValueComparisonIntoPredecessors - The specified terminator is a value /// equality comparison instruction (either a switch or a branch on "X == c"). /// See if any of the predecessors of the terminator block are value comparisons @@ -770,6 +834,55 @@ bool SimplifyCFGOpt::FoldValueComparisonIntoPredecessors(TerminatorInst *TI, // build. SmallVector NewSuccessors; + // Update the branch weight metadata along the way + SmallVector Weights; + uint64_t PredDefaultWeight = 0; + bool PredHasWeights = HasBranchWeights(PTI); + bool SuccHasWeights = HasBranchWeights(TI); + + if (PredHasWeights) { + MDNode* MD = PTI->getMetadata(LLVMContext::MD_prof); + assert(MD); + for (unsigned i = 1, e = MD->getNumOperands(); i < e; ++i) { + ConstantInt* CI = dyn_cast(MD->getOperand(i)); + assert(CI); + Weights.push_back(CI->getValue().getZExtValue()); + } + + // If the predecessor is a conditional eq, then swap the default weight + // to be the first entry. + if (BranchInst* BI = dyn_cast(PTI)) { + assert(Weights.size() == 2); + ICmpInst *ICI = cast(BI->getCondition()); + + if (ICI->getPredicate() == ICmpInst::ICMP_EQ) { + std::swap(Weights.front(), Weights.back()); + } + } + + PredDefaultWeight = Weights.front(); + } else if (SuccHasWeights) { + // If there are no predecessor weights but there are successor weights, + // populate Weights with 1, which will later be scaled to the sum of + // successor's weights + Weights.assign(1 + PredCases.size(), 1); + PredDefaultWeight = 1; + } + + uint64_t SuccDefaultWeight = 0; + if (SuccHasWeights) { + int Index = 0; + if (BranchInst* BI = dyn_cast(TI)) { + ICmpInst* ICI = dyn_cast(BI->getCondition()); + assert(ICI); + + if (ICI->getPredicate() == ICmpInst::ICMP_EQ) + Index = 1; + } + + SuccDefaultWeight = GetWeight(TI, Index)->getValue().getZExtValue(); + } + if (PredDefault == BB) { // If this is the default destination from PTI, only the edges in TI // that don't occur in PTI, or that branch to BB will be activated. @@ -780,6 +893,12 @@ bool SimplifyCFGOpt::FoldValueComparisonIntoPredecessors(TerminatorInst *TI, else { // The default destination is BB, we don't need explicit targets. std::swap(PredCases[i], PredCases.back()); + + if (PredHasWeights) { + std::swap(Weights[i+1], Weights.back()); + Weights.pop_back(); + } + PredCases.pop_back(); --i; --e; } @@ -790,14 +909,35 @@ bool SimplifyCFGOpt::FoldValueComparisonIntoPredecessors(TerminatorInst *TI, PredDefault = BBDefault; NewSuccessors.push_back(BBDefault); } + + if (SuccHasWeights) { + ScaleWeights(TI, Weights); + Weights.front() *= SuccDefaultWeight; + } else if (PredHasWeights) { + Weights.front() /= (1 + BBCases.size()); + } + for (unsigned i = 0, e = BBCases.size(); i != e; ++i) if (!PTIHandled.count(BBCases[i].Value) && BBCases[i].Dest != BBDefault) { PredCases.push_back(BBCases[i]); NewSuccessors.push_back(BBCases[i].Dest); + if (SuccHasWeights) { + Weights.push_back(PredDefaultWeight * + GetWeight(TI, i)->getValue().getZExtValue()); + } else if (PredHasWeights) { + // Split the old default's weight amongst the children + assert(PredDefaultWeight != 0); + Weights.push_back(PredDefaultWeight / (1 + BBCases.size())); + } } } else { + // FIXME: preserve branch weight metadata, similarly to the 'then' + // above. For now, drop it. + PredHasWeights = false; + SuccHasWeights = false; + // If this is not the default destination from PSI, only the edges // in SI that occur in PSI with a destination of BB will be // activated. @@ -851,6 +991,17 @@ bool SimplifyCFGOpt::FoldValueComparisonIntoPredecessors(TerminatorInst *TI, for (unsigned i = 0, e = PredCases.size(); i != e; ++i) NewSI->addCase(PredCases[i].Value, PredCases[i].Dest); + if (PredHasWeights || SuccHasWeights) { + // Halve the weights if any of them cannot fit in an uint32_t + FitWeights(Weights); + + SmallVector MDWeights(Weights.begin(), Weights.end()); + + NewSI->setMetadata(LLVMContext::MD_prof, + MDBuilder(BB->getContext()). + createBranchWeights(MDWeights)); + } + EraseTerminatorInstAndDCECond(PTI); // Okay, last check. If BB is still a successor of PSI, then we must @@ -2349,6 +2500,9 @@ static bool SimplifyBranchOnICmpChain(BranchInst *BI, const TargetData *TD, // transformation. A switch with one value is just an cond branch. if (ExtraCase && Values.size() < 2) return false; + // TODO: Preserve branch weight metadata, similarly to how + // FoldValueComparisonIntoPredecessors preserves it. + // Figure out which block is which destination. BasicBlock *DefaultBB = BI->getSuccessor(1); BasicBlock *EdgeBB = BI->getSuccessor(0); -- cgit v1.1 From 749807852bd927dd225a360e9a388e0cc2792036 Mon Sep 17 00:00:00 2001 From: Michael Ilseman Date: Thu, 30 Aug 2012 15:45:16 +0000 Subject: test git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@162914 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Transforms/Utils/SimplifyCFG.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'lib/Transforms/Utils') diff --git a/lib/Transforms/Utils/SimplifyCFG.cpp b/lib/Transforms/Utils/SimplifyCFG.cpp index dddc18f..04cc11d 100644 --- a/lib/Transforms/Utils/SimplifyCFG.cpp +++ b/lib/Transforms/Utils/SimplifyCFG.cpp @@ -766,8 +766,8 @@ static ConstantInt* GetWeight(Instruction* I, int Pos) { return 0; } -/// Scale the given weights based on the new TI's metadata. Scaling is done by -/// multiplying every weight by the sum of the successor's weights. +/// Scale the given weights based on the successor TI's metadata. Scaling is +/// done by multiplying every weight by the sum of the successor's weights. static void ScaleWeights(Instruction* STI, MutableArrayRef Weights) { // Sum the successor's weights assert(HasBranchWeights(STI)); -- cgit v1.1 From 2e2efd960056bbb7e4bbd843c8de55116d52aa7d Mon Sep 17 00:00:00 2001 From: Preston Gurd Date: Tue, 4 Sep 2012 18:22:17 +0000 Subject: Generic Bypass Slow Div - CodeGenPrepare pass for identifying div/rem ops - Backend specifies the type mapping using addBypassSlowDivType - Enabled only for Intel Atom with O2 32-bit -> 8-bit - Replace IDIV with instructions which test its value and use DIVB if the value is positive and less than 256. - In the case when the quotient and remainder of a divide are used a DIV and a REM instruction will be present in the IR. In the non-Atom case they are both lowered to IDIVs and CSE removes the redundant IDIV instruction, using the quotient and remainder from the first IDIV. However, due to this optimization CSE is not able to eliminate redundant IDIV instructions because they are located in different basic blocks. This is overcome by calculating both the quotient (DIV) and remainder (REM) in each basic block that is inserted by the optimization and reusing the result values when a subsequent DIV or REM instruction uses the same operands. - Test cases check for the presents of the optimization when calculating either the quotient, remainder, or both. Patch by Tyler Nowicki! git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@163150 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Transforms/Utils/BypassSlowDivision.cpp | 251 ++++++++++++++++++++++++++++ lib/Transforms/Utils/CMakeLists.txt | 1 + 2 files changed, 252 insertions(+) create mode 100644 lib/Transforms/Utils/BypassSlowDivision.cpp (limited to 'lib/Transforms/Utils') diff --git a/lib/Transforms/Utils/BypassSlowDivision.cpp b/lib/Transforms/Utils/BypassSlowDivision.cpp new file mode 100644 index 0000000..1c58bec --- /dev/null +++ b/lib/Transforms/Utils/BypassSlowDivision.cpp @@ -0,0 +1,251 @@ +//===-- BypassSlowDivision.cpp - Bypass slow division ---------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains an optimization for div and rem on architectures that +// execute short instructions significantly faster than longer instructions. +// For example, on Intel Atom 32-bit divides are slow enough that during +// runtime it is profitable to check the value of the operands, and if they are +// positive and less than 256 use an unsigned 8-bit divide. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "bypass-slow-division" +#include "llvm/Instructions.h" +#include "llvm/Function.h" +#include "llvm/IRBuilder.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/Transforms/Utils/BypassSlowDivision.h" + +using namespace llvm; + +namespace llvm { + struct DivOpInfo { + bool SignedOp; + Value *Dividend; + Value *Divisor; + + DivOpInfo(bool InSignedOp, Value *InDividend, Value *InDivisor) + : SignedOp(InSignedOp), Dividend(InDividend), Divisor(InDivisor) {} + }; + + struct DivPhiNodes { + PHINode *Quotient; + PHINode *Remainder; + + DivPhiNodes(PHINode *InQuotient, PHINode *InRemainder) + : Quotient(InQuotient), Remainder(InRemainder) {} + }; + + template<> + struct DenseMapInfo { + static bool isEqual(const DivOpInfo &Val1, const DivOpInfo &Val2) { + return Val1.SignedOp == Val2.SignedOp && + Val1.Dividend == Val2.Dividend && + Val1.Divisor == Val2.Divisor; + } + + static DivOpInfo getEmptyKey() { + return DivOpInfo(false, 0, 0); + } + + static DivOpInfo getTombstoneKey() { + return DivOpInfo(true, 0, 0); + } + + static unsigned getHashValue(const DivOpInfo &Val) { + return (unsigned)(reinterpret_cast(Val.Dividend) ^ + reinterpret_cast(Val.Divisor)) ^ + (unsigned)Val.SignedOp; + } + }; + + typedef DenseMap DivCacheTy; +} + +// insertFastDiv - Substitutes the div/rem instruction with code that checks the +// value of the operands and uses a shorter-faster div/rem instruction when +// possible and the longer-slower div/rem instruction otherwise. +static void insertFastDiv(Function &F, + Function::iterator &I, + BasicBlock::iterator &J, + IntegerType *BypassType, + bool UseDivOp, + bool UseSignedOp, + DivCacheTy &PerBBDivCache) +{ + // Get instruction operands + Instruction *Instr = J; + Value *Dividend = Instr->getOperand(0); + Value *Divisor = Instr->getOperand(1); + + if (dyn_cast(Divisor) != 0 || + (dyn_cast(Dividend) != 0 && + dyn_cast(Divisor) != 0)) { + // Operations with immediate values should have + // been solved and replaced during compile time. + return; + } + + // Basic Block is split before divide + BasicBlock *MainBB = I; + BasicBlock *SuccessorBB = I->splitBasicBlock(J); + I++; //advance iterator I to successorBB + + // Add new basic block for slow divide operation + BasicBlock *SlowBB = BasicBlock::Create(F.getContext(), "", + MainBB->getParent(), SuccessorBB); + SlowBB->moveBefore(SuccessorBB); + IRBuilder<> SlowBuilder(SlowBB, SlowBB->begin()); + Value *SlowQuotientV; + Value *SlowRemainderV; + if (UseSignedOp) { + SlowQuotientV = SlowBuilder.CreateSDiv(Dividend, Divisor); + SlowRemainderV = SlowBuilder.CreateSRem(Dividend, Divisor); + } else { + SlowQuotientV = SlowBuilder.CreateUDiv(Dividend, Divisor); + SlowRemainderV = SlowBuilder.CreateURem(Dividend, Divisor); + } + SlowBuilder.CreateBr(SuccessorBB); + + // Add new basic block for fast divide operation + BasicBlock *FastBB = BasicBlock::Create(F.getContext(), "", + MainBB->getParent(), SuccessorBB); + FastBB->moveBefore(SlowBB); + IRBuilder<> FastBuilder(FastBB, FastBB->begin()); + Value *ShortDivisorV = FastBuilder.CreateCast(Instruction::Trunc, Divisor, BypassType); + Value *ShortDividendV = FastBuilder.CreateCast(Instruction::Trunc, Dividend, BypassType); + + // udiv/urem because optimization only handles positive numbers + Value *ShortQuotientV = FastBuilder.CreateExactUDiv(ShortDividendV, + ShortDivisorV); + Value *ShortRemainderV = FastBuilder.CreateURem(ShortDividendV, + ShortDivisorV); + Value *FastQuotientV = FastBuilder.CreateCast(Instruction::ZExt, + ShortQuotientV, + Dividend->getType()); + Value *FastRemainderV = FastBuilder.CreateCast(Instruction::ZExt, + ShortRemainderV, + Dividend->getType()); + FastBuilder.CreateBr(SuccessorBB); + + // Phi nodes for result of div and rem + IRBuilder<> SuccessorBuilder(SuccessorBB, SuccessorBB->begin()); + PHINode *QuoPhi = SuccessorBuilder.CreatePHI(Instr->getType(), 2); + QuoPhi->addIncoming(SlowQuotientV, SlowBB); + QuoPhi->addIncoming(FastQuotientV, FastBB); + PHINode *RemPhi = SuccessorBuilder.CreatePHI(Instr->getType(), 2); + RemPhi->addIncoming(SlowRemainderV, SlowBB); + RemPhi->addIncoming(FastRemainderV, FastBB); + + // Replace Instr with appropriate phi node + if (UseDivOp) { + Instr->replaceAllUsesWith(QuoPhi); + } else { + Instr->replaceAllUsesWith(RemPhi); + } + Instr->eraseFromParent(); + + // Combine operands into a single value with OR for value testing below + MainBB->getInstList().back().eraseFromParent(); + IRBuilder<> MainBuilder(MainBB, MainBB->end()); + Value *OrV = MainBuilder.CreateOr(Dividend, Divisor); + + // BitMask is inverted to check if the operands are + // larger than the bypass type + uint64_t BitMask = ~BypassType->getBitMask(); + Value *AndV = MainBuilder.CreateAnd(OrV, BitMask); + + // Compare operand values and branch + Value *ZeroV = MainBuilder.getInt32(0); + Value *CmpV = MainBuilder.CreateICmpEQ(AndV, ZeroV); + MainBuilder.CreateCondBr(CmpV, FastBB, SlowBB); + + // point iterator J at first instruction of successorBB + J = I->begin(); + + // Cache phi nodes to be used later in place of other instances + // of div or rem with the same sign, dividend, and divisor + DivOpInfo Key(UseSignedOp, Dividend, Divisor); + DivPhiNodes Value(QuoPhi, RemPhi); + PerBBDivCache.insert(std::pair(Key, Value)); +} + +// reuseOrInsertFastDiv - Reuses previously computed dividend or remainder if +// operands and operation are identical. Otherwise call insertFastDiv to perform +// the optimization and cache the resulting dividend and remainder. +static void reuseOrInsertFastDiv(Function &F, + Function::iterator &I, + BasicBlock::iterator &J, + IntegerType *BypassType, + bool UseDivOp, + bool UseSignedOp, + DivCacheTy &PerBBDivCache) +{ + // Get instruction operands + Instruction *Instr = J; + DivOpInfo Key(UseSignedOp, Instr->getOperand(0), Instr->getOperand(1)); + DivCacheTy::const_iterator CacheI = PerBBDivCache.find(Key); + + if (CacheI == PerBBDivCache.end()) { + // If previous instance does not exist, insert fast div + insertFastDiv(F, I, J, BypassType, UseDivOp, UseSignedOp, PerBBDivCache); + return; + } + + // Replace operation value with previously generated phi node + DivPhiNodes Value = CacheI->second; + if (UseDivOp) { + // Replace all uses of div instruction with quotient phi node + J->replaceAllUsesWith(Value.Quotient); + } else { + // Replace all uses of rem instruction with remainder phi node + J->replaceAllUsesWith(Value.Remainder); + } + + // Advance to next operation + J++; + + // Remove redundant operation + Instr->eraseFromParent(); +} + +// bypassSlowDivision - This optimization identifies DIV instructions that can +// be profitably bypassed and carried out with a shorter, faster divide. +bool bypassSlowDivision(Function &F, + Function::iterator &I, + const llvm::DenseMap &BypassTypeMap) +{ + DivCacheTy DivCache; + + bool MadeChange = false; + for (BasicBlock::iterator J = I->begin(); J != I->end(); J++) { + + // Get instruction details + unsigned Opcode = J->getOpcode(); + bool UseDivOp = Opcode == Instruction::SDiv || Opcode == Instruction::UDiv; + bool UseRemOp = Opcode == Instruction::SRem || Opcode == Instruction::URem; + bool UseSignedOp = Opcode == Instruction::SDiv || Opcode == Instruction::SRem; + + // Only optimize div or rem ops + if (!UseDivOp && !UseRemOp) { + continue; + } + // Continue if div/rem type is not bypassed + DenseMap::const_iterator BT = BypassTypeMap.find(J->getType()); + if (BT == BypassTypeMap.end()) { + continue; + } + + IntegerType *BypassType = (IntegerType *)BT->second; + reuseOrInsertFastDiv(F, I, J, BypassType, UseDivOp, UseSignedOp, DivCache); + MadeChange = true; + } + + return MadeChange; +} diff --git a/lib/Transforms/Utils/CMakeLists.txt b/lib/Transforms/Utils/CMakeLists.txt index 4ff31ca..215a16f 100644 --- a/lib/Transforms/Utils/CMakeLists.txt +++ b/lib/Transforms/Utils/CMakeLists.txt @@ -3,6 +3,7 @@ add_llvm_library(LLVMTransformUtils BasicBlockUtils.cpp BreakCriticalEdges.cpp BuildLibCalls.cpp + BypassSlowDivision.cpp CloneFunction.cpp CloneModule.cpp CmpInstAnalysis.cpp -- cgit v1.1 From 7b2d20d0600ffbc9ae6df67a18b6f6485ebceb54 Mon Sep 17 00:00:00 2001 From: Jakub Staszak Date: Tue, 4 Sep 2012 20:48:24 +0000 Subject: Return false if BypassSlowDivision doesn't change anything. Also a few minor changes: - use pre-inc instead of post-inc - use isa instead of dyn_cast - 80 col - trailing spaces git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@163164 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Transforms/Utils/BypassSlowDivision.cpp | 67 +++++++++++++++-------------- 1 file changed, 34 insertions(+), 33 deletions(-) (limited to 'lib/Transforms/Utils') diff --git a/lib/Transforms/Utils/BypassSlowDivision.cpp b/lib/Transforms/Utils/BypassSlowDivision.cpp index 1c58bec..af0633c 100644 --- a/lib/Transforms/Utils/BypassSlowDivision.cpp +++ b/lib/Transforms/Utils/BypassSlowDivision.cpp @@ -61,7 +61,7 @@ namespace llvm { static unsigned getHashValue(const DivOpInfo &Val) { return (unsigned)(reinterpret_cast(Val.Dividend) ^ reinterpret_cast(Val.Divisor)) ^ - (unsigned)Val.SignedOp; + (unsigned)Val.SignedOp; } }; @@ -71,31 +71,29 @@ namespace llvm { // insertFastDiv - Substitutes the div/rem instruction with code that checks the // value of the operands and uses a shorter-faster div/rem instruction when // possible and the longer-slower div/rem instruction otherwise. -static void insertFastDiv(Function &F, +static bool insertFastDiv(Function &F, Function::iterator &I, BasicBlock::iterator &J, IntegerType *BypassType, bool UseDivOp, bool UseSignedOp, - DivCacheTy &PerBBDivCache) -{ + DivCacheTy &PerBBDivCache) { // Get instruction operands Instruction *Instr = J; Value *Dividend = Instr->getOperand(0); Value *Divisor = Instr->getOperand(1); - if (dyn_cast(Divisor) != 0 || - (dyn_cast(Dividend) != 0 && - dyn_cast(Divisor) != 0)) { + if (isa(Divisor) || + (isa(Dividend) && isa(Divisor))) { // Operations with immediate values should have // been solved and replaced during compile time. - return; + return false; } // Basic Block is split before divide BasicBlock *MainBB = I; BasicBlock *SuccessorBB = I->splitBasicBlock(J); - I++; //advance iterator I to successorBB + ++I; //advance iterator I to successorBB // Add new basic block for slow divide operation BasicBlock *SlowBB = BasicBlock::Create(F.getContext(), "", @@ -118,17 +116,19 @@ static void insertFastDiv(Function &F, MainBB->getParent(), SuccessorBB); FastBB->moveBefore(SlowBB); IRBuilder<> FastBuilder(FastBB, FastBB->begin()); - Value *ShortDivisorV = FastBuilder.CreateCast(Instruction::Trunc, Divisor, BypassType); - Value *ShortDividendV = FastBuilder.CreateCast(Instruction::Trunc, Dividend, BypassType); + Value *ShortDivisorV = FastBuilder.CreateCast(Instruction::Trunc, Divisor, + BypassType); + Value *ShortDividendV = FastBuilder.CreateCast(Instruction::Trunc, Dividend, + BypassType); // udiv/urem because optimization only handles positive numbers Value *ShortQuotientV = FastBuilder.CreateExactUDiv(ShortDividendV, - ShortDivisorV); + ShortDivisorV); Value *ShortRemainderV = FastBuilder.CreateURem(ShortDividendV, ShortDivisorV); Value *FastQuotientV = FastBuilder.CreateCast(Instruction::ZExt, - ShortQuotientV, - Dividend->getType()); + ShortQuotientV, + Dividend->getType()); Value *FastRemainderV = FastBuilder.CreateCast(Instruction::ZExt, ShortRemainderV, Dividend->getType()); @@ -144,11 +144,10 @@ static void insertFastDiv(Function &F, RemPhi->addIncoming(FastRemainderV, FastBB); // Replace Instr with appropriate phi node - if (UseDivOp) { + if (UseDivOp) Instr->replaceAllUsesWith(QuoPhi); - } else { + else Instr->replaceAllUsesWith(RemPhi); - } Instr->eraseFromParent(); // Combine operands into a single value with OR for value testing below @@ -174,19 +173,19 @@ static void insertFastDiv(Function &F, DivOpInfo Key(UseSignedOp, Dividend, Divisor); DivPhiNodes Value(QuoPhi, RemPhi); PerBBDivCache.insert(std::pair(Key, Value)); + return true; } // reuseOrInsertFastDiv - Reuses previously computed dividend or remainder if // operands and operation are identical. Otherwise call insertFastDiv to perform // the optimization and cache the resulting dividend and remainder. -static void reuseOrInsertFastDiv(Function &F, +static bool reuseOrInsertFastDiv(Function &F, Function::iterator &I, BasicBlock::iterator &J, IntegerType *BypassType, bool UseDivOp, bool UseSignedOp, - DivCacheTy &PerBBDivCache) -{ + DivCacheTy &PerBBDivCache) { // Get instruction operands Instruction *Instr = J; DivOpInfo Key(UseSignedOp, Instr->getOperand(0), Instr->getOperand(1)); @@ -194,8 +193,8 @@ static void reuseOrInsertFastDiv(Function &F, if (CacheI == PerBBDivCache.end()) { // If previous instance does not exist, insert fast div - insertFastDiv(F, I, J, BypassType, UseDivOp, UseSignedOp, PerBBDivCache); - return; + return insertFastDiv(F, I, J, BypassType, UseDivOp, UseSignedOp, + PerBBDivCache); } // Replace operation value with previously generated phi node @@ -209,18 +208,18 @@ static void reuseOrInsertFastDiv(Function &F, } // Advance to next operation - J++; + ++J; // Remove redundant operation Instr->eraseFromParent(); + return true; } // bypassSlowDivision - This optimization identifies DIV instructions that can // be profitably bypassed and carried out with a shorter, faster divide. bool bypassSlowDivision(Function &F, Function::iterator &I, - const llvm::DenseMap &BypassTypeMap) -{ + const llvm::DenseMap &BypassTypeMap) { DivCacheTy DivCache; bool MadeChange = false; @@ -230,20 +229,22 @@ bool bypassSlowDivision(Function &F, unsigned Opcode = J->getOpcode(); bool UseDivOp = Opcode == Instruction::SDiv || Opcode == Instruction::UDiv; bool UseRemOp = Opcode == Instruction::SRem || Opcode == Instruction::URem; - bool UseSignedOp = Opcode == Instruction::SDiv || Opcode == Instruction::SRem; + bool UseSignedOp = Opcode == Instruction::SDiv || + Opcode == Instruction::SRem; // Only optimize div or rem ops - if (!UseDivOp && !UseRemOp) { + if (!UseDivOp && !UseRemOp) continue; - } + // Continue if div/rem type is not bypassed - DenseMap::const_iterator BT = BypassTypeMap.find(J->getType()); - if (BT == BypassTypeMap.end()) { + DenseMap::const_iterator BT = + BypassTypeMap.find(J->getType()); + if (BT == BypassTypeMap.end()) continue; - } - IntegerType *BypassType = (IntegerType *)BT->second; - reuseOrInsertFastDiv(F, I, J, BypassType, UseDivOp, UseSignedOp, DivCache); + IntegerType *BypassType = cast(BT->second); + MadeChange |= reuseOrInsertFastDiv(F, I, J, BypassType, UseDivOp, + UseSignedOp, DivCache); MadeChange = true; } -- cgit v1.1 From ed0e3a31e1dd201d87288c2e73fc74484d2e8c4d Mon Sep 17 00:00:00 2001 From: Jakub Staszak Date: Tue, 4 Sep 2012 21:16:59 +0000 Subject: Fix my previous patch (r163164). It does now what it is supposed to do: Doesn't set MadeChange to TRUE if BypassSlowDivision doesn't change anything. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@163165 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Transforms/Utils/BypassSlowDivision.cpp | 1 - 1 file changed, 1 deletion(-) (limited to 'lib/Transforms/Utils') diff --git a/lib/Transforms/Utils/BypassSlowDivision.cpp b/lib/Transforms/Utils/BypassSlowDivision.cpp index af0633c..4130def 100644 --- a/lib/Transforms/Utils/BypassSlowDivision.cpp +++ b/lib/Transforms/Utils/BypassSlowDivision.cpp @@ -245,7 +245,6 @@ bool bypassSlowDivision(Function &F, IntegerType *BypassType = cast(BT->second); MadeChange |= reuseOrInsertFastDiv(F, I, J, BypassType, UseDivOp, UseSignedOp, DivCache); - MadeChange = true; } return MadeChange; -- cgit v1.1 From be11991208f175892666887bc59fd9d32ee3e6a4 Mon Sep 17 00:00:00 2001 From: Jakub Staszak Date: Tue, 4 Sep 2012 23:11:11 +0000 Subject: BypassSlowDivision: Assign to reference, don't copy the object. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@163179 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Transforms/Utils/BypassSlowDivision.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'lib/Transforms/Utils') diff --git a/lib/Transforms/Utils/BypassSlowDivision.cpp b/lib/Transforms/Utils/BypassSlowDivision.cpp index 4130def..b694779 100644 --- a/lib/Transforms/Utils/BypassSlowDivision.cpp +++ b/lib/Transforms/Utils/BypassSlowDivision.cpp @@ -189,7 +189,7 @@ static bool reuseOrInsertFastDiv(Function &F, // Get instruction operands Instruction *Instr = J; DivOpInfo Key(UseSignedOp, Instr->getOperand(0), Instr->getOperand(1)); - DivCacheTy::const_iterator CacheI = PerBBDivCache.find(Key); + DivCacheTy::iterator CacheI = PerBBDivCache.find(Key); if (CacheI == PerBBDivCache.end()) { // If previous instance does not exist, insert fast div @@ -198,7 +198,7 @@ static bool reuseOrInsertFastDiv(Function &F, } // Replace operation value with previously generated phi node - DivPhiNodes Value = CacheI->second; + DivPhiNodes &Value = CacheI->second; if (UseDivOp) { // Replace all uses of div instruction with quotient phi node J->replaceAllUsesWith(Value.Quotient); -- cgit v1.1 From 59324297650c12a8dccf1a7ad650a9e895fdc17e Mon Sep 17 00:00:00 2001 From: Roman Divacky Date: Wed, 5 Sep 2012 22:26:57 +0000 Subject: Stop casting away const qualifier needlessly. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@163258 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Transforms/Utils/SimplifyCFG.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'lib/Transforms/Utils') diff --git a/lib/Transforms/Utils/SimplifyCFG.cpp b/lib/Transforms/Utils/SimplifyCFG.cpp index 04cc11d..6cd3bbc 100644 --- a/lib/Transforms/Utils/SimplifyCFG.cpp +++ b/lib/Transforms/Utils/SimplifyCFG.cpp @@ -732,8 +732,8 @@ namespace { } static int ConstantIntSortPredicate(const void *P1, const void *P2) { - const ConstantInt *LHS = *(const ConstantInt**)P1; - const ConstantInt *RHS = *(const ConstantInt**)P2; + const ConstantInt *LHS = *(const ConstantInt*const*)P1; + const ConstantInt *RHS = *(const ConstantInt*const*)P2; if (LHS->getValue().ult(RHS->getValue())) return 1; if (LHS->getValue() == RHS->getValue()) -- cgit v1.1 From 486270aee6ffd2a0c3c2333a8a0091c29f037aae Mon Sep 17 00:00:00 2001 From: Hans Wennborg Date: Thu, 6 Sep 2012 09:43:28 +0000 Subject: Build lookup tables for switches (PR884) This adds a transformation to SimplifyCFG that attemps to turn switch instructions into loads from lookup tables. It works on switches that are only used to initialize one or more phi nodes in a common successor basic block, for example: int f(int x) { switch (x) { case 0: return 5; case 1: return 4; case 2: return -2; case 5: return 7; case 6: return 9; default: return 42; } This speeds up the code by removing the hard-to-predict jump, and reduces code size by removing the code for the jump targets. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@163302 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Transforms/Utils/SimplifyCFG.cpp | 286 +++++++++++++++++++++++++++++++++++ 1 file changed, 286 insertions(+) (limited to 'lib/Transforms/Utils') diff --git a/lib/Transforms/Utils/SimplifyCFG.cpp b/lib/Transforms/Utils/SimplifyCFG.cpp index 6cd3bbc..62b98cb 100644 --- a/lib/Transforms/Utils/SimplifyCFG.cpp +++ b/lib/Transforms/Utils/SimplifyCFG.cpp @@ -22,6 +22,7 @@ #include "llvm/LLVMContext.h" #include "llvm/MDBuilder.h" #include "llvm/Metadata.h" +#include "llvm/Module.h" #include "llvm/Operator.h" #include "llvm/Type.h" #include "llvm/ADT/DenseMap.h" @@ -54,6 +55,7 @@ DupRet("simplifycfg-dup-ret", cl::Hidden, cl::init(false), cl::desc("Duplicate return instructions into unconditional branches")); STATISTIC(NumSpeculations, "Number of speculative executed instructions"); +STATISTIC(NumLookupTables, "Number of switch instructions turned into lookup tables"); namespace { /// ValueEqualityComparisonCase - Represents a case of a switch. @@ -2977,6 +2979,287 @@ static bool ForwardSwitchConditionToPHI(SwitchInst *SI) { return Changed; } +/// ValidLookupTableConstant - Return true if the backend will be able to handle +/// initializing an array of constants like C. +bool ValidLookupTableConstant(Constant *C) { + if (ConstantExpr *CE = dyn_cast(C)) + return CE->isGEPWithNoNotionalOverIndexing(); + + return isa(C) || + isa(C) || + isa(C) || + isa(C) || + isa(C); +} + +/// GetCaseResulsts - Try to determine the resulting constant values in phi +/// nodes at the common destination basic block for one of the case +/// destinations of a switch instruction. +static bool GetCaseResults(SwitchInst *SI, + BasicBlock *CaseDest, + BasicBlock **CommonDest, + SmallVector, 4> &Res) { + // The block from which we enter the common destination. + BasicBlock *Pred = SI->getParent(); + + // If CaseDest is empty, continue to its successor. + if (CaseDest->getFirstNonPHIOrDbg() == CaseDest->getTerminator() && + !isa(CaseDest->begin())) { + + TerminatorInst *Terminator = CaseDest->getTerminator(); + if (Terminator->getNumSuccessors() != 1) + return false; + + Pred = CaseDest; + CaseDest = Terminator->getSuccessor(0); + } + + // If we did not have a CommonDest before, use the current one. + if (!*CommonDest) + *CommonDest = CaseDest; + // If the destination isn't the common one, abort. + if (CaseDest != *CommonDest) + return false; + + // Get the values for this case from phi nodes in the destination block. + BasicBlock::iterator I = (*CommonDest)->begin(); + while (PHINode *PHI = dyn_cast(I++)) { + int Idx = PHI->getBasicBlockIndex(Pred); + if (Idx == -1) + continue; + + Constant *ConstVal = dyn_cast(PHI->getIncomingValue(Idx)); + if (!ConstVal) + return false; + + // Be conservative about which kinds of constants we support. + if (!ValidLookupTableConstant(ConstVal)) + return false; + + Res.push_back(std::make_pair(PHI, ConstVal)); + } + + return true; +} + +/// BuildLookupTable - Build a lookup table with the contents of Results, using +/// DefaultResult to fill the holes in the table. If the table ends up +/// containing the same result in each element, set *SingleResult to that value +/// and return NULL. +static GlobalVariable *BuildLookupTable( + Module &M, + uint64_t TableSize, + ConstantInt *Offset, + const std::vector >& Results, + Constant *DefaultResult, + Constant **SingleResult) { + assert(Results.size() && "Need values to build lookup table"); + assert(TableSize >= Results.size() && "Table needs to hold all values"); + + // If all values in the table are equal, this is that value. + Constant *SameResult = Results.begin()->second; + + // Build up the table contents. + std::vector TableContents(TableSize); + for (size_t I = 0, E = Results.size(); I != E; ++I) { + ConstantInt *CaseVal = Results[I].first; + Constant *CaseRes = Results[I].second; + + uint64_t Idx = (CaseVal->getValue() - Offset->getValue()).getLimitedValue(); + TableContents[Idx] = CaseRes; + + if (CaseRes != SameResult) + SameResult = NULL; + } + + // Fill in any holes in the table with the default result. + if (Results.size() < TableSize) { + for (unsigned i = 0; i < TableSize; ++i) { + if (!TableContents[i]) + TableContents[i] = DefaultResult; + } + + if (DefaultResult != SameResult) + SameResult = NULL; + } + + // Same result was used in the entire table; just return that. + if (SameResult) { + *SingleResult = SameResult; + return NULL; + } + + ArrayType *ArrayTy = ArrayType::get(DefaultResult->getType(), TableSize); + Constant *Initializer = ConstantArray::get(ArrayTy, TableContents); + + GlobalVariable *GV = new GlobalVariable(M, ArrayTy, /*constant=*/ true, + GlobalVariable::PrivateLinkage, + Initializer, + "switch.table"); + GV->setUnnamedAddr(true); + return GV; +} + +/// SwitchToLookupTable - If the switch is only used to initialize one or more +/// phi nodes in a common successor block with different constant values, +/// replace the switch with lookup tables. +static bool SwitchToLookupTable(SwitchInst *SI, + IRBuilder<> &Builder) { + assert(SI->getNumCases() > 1 && "Degenerate switch?"); + // FIXME: Handle unreachable cases. + + // FIXME: If the switch is too sparse for a lookup table, perhaps we could + // split off a dense part and build a lookup table for that. + + // FIXME: If the results are all integers and the lookup table would fit in a + // target-legal register, we should store them as a bitmap and use shift/mask + // to look up the result. + + // FIXME: This creates arrays of GEPs to constant strings, which means each + // GEP needs a runtime relocation in PIC code. We should just build one big + // string and lookup indices into that. + + // Ignore the switch if the number of cases are too small. + // This is similar to the check when building jump tables in + // SelectionDAGBuilder::handleJTSwitchCase. + // FIXME: Determine the best cut-off. + if (SI->getNumCases() < 4) + return false; + + // Figure out the corresponding result for each case value and phi node in the + // common destination, as well as the the min and max case values. + assert(SI->case_begin() != SI->case_end()); + SwitchInst::CaseIt CI = SI->case_begin(); + ConstantInt *MinCaseVal = CI.getCaseValue(); + ConstantInt *MaxCaseVal = CI.getCaseValue(); + + BasicBlock *CommonDest = NULL; + typedef std::vector > ResultListTy; + SmallDenseMap ResultLists; + SmallDenseMap DefaultResults; + SmallDenseMap ResultTypes; + SmallVector PHIs; + + for (SwitchInst::CaseIt E = SI->case_end(); CI != E; ++CI) { + ConstantInt *CaseVal = CI.getCaseValue(); + if (CaseVal->getValue().slt(MinCaseVal->getValue())) + MinCaseVal = CaseVal; + if (CaseVal->getValue().sgt(MaxCaseVal->getValue())) + MaxCaseVal = CaseVal; + + // Resulting value at phi nodes for this case value. + typedef SmallVector, 4> ResultsTy; + ResultsTy Results; + if (!GetCaseResults(SI, CI.getCaseSuccessor(), &CommonDest, Results)) + return false; + + // Append the result from this case to the list for each phi. + for (ResultsTy::iterator I = Results.begin(), E = Results.end(); I!=E; ++I) { + if (!ResultLists.count(I->first)) + PHIs.push_back(I->first); + ResultLists[I->first].push_back(std::make_pair(CaseVal, I->second)); + } + } + + // Get the resulting values for the default case. + { + SmallVector, 4> DefaultResultsList; + if (!GetCaseResults(SI, SI->getDefaultDest(), &CommonDest, DefaultResultsList)) + return false; + for (size_t I = 0, E = DefaultResultsList.size(); I != E; ++I) { + PHINode *PHI = DefaultResultsList[I].first; + Constant *Result = DefaultResultsList[I].second; + DefaultResults[PHI] = Result; + ResultTypes[PHI] = Result->getType(); + } + } + + APInt RangeSpread = MaxCaseVal->getValue() - MinCaseVal->getValue(); + // The table density should be at lest 40%. This is the same criterion as for + // jump tables, see SelectionDAGBuilder::handleJTSwitchCase. + // FIXME: Find the best cut-off. + // Be careful to avoid overlow in the density computation. + if (RangeSpread.zextOrSelf(64).ugt(UINT64_MAX / 4 - 1)) + return false; + uint64_t TableSize = RangeSpread.getLimitedValue() + 1; + if (SI->getNumCases() * 10 < TableSize * 4) + return false; + + // Build the lookup tables. + SmallDenseMap LookupTables; + SmallDenseMap SingleResults; + + Module &Mod = *CommonDest->getParent()->getParent(); + for (SmallDenseMap::iterator I = ResultLists.begin(), + E = ResultLists.end(); I != E; ++I) { + PHINode *PHI = I->first; + + Constant *SingleResult = NULL; + LookupTables[PHI] = BuildLookupTable(Mod, TableSize, MinCaseVal, I->second, + DefaultResults[PHI], &SingleResult); + SingleResults[PHI] = SingleResult; + } + + // Create the BB that does the lookups. + BasicBlock *LookupBB = BasicBlock::Create(Mod.getContext(), + "switch.lookup", + CommonDest->getParent(), + CommonDest); + + // Check whether the condition value is within the case range, and branch to + // the new BB. + Builder.SetInsertPoint(SI); + Value *TableIndex = Builder.CreateSub(SI->getCondition(), MinCaseVal, + "switch.tableidx"); + Value *Cmp = Builder.CreateICmpULT(TableIndex, ConstantInt::get( + MinCaseVal->getType(), TableSize)); + Builder.CreateCondBr(Cmp, LookupBB, SI->getDefaultDest()); + + // Populate the BB that does the lookups. + Builder.SetInsertPoint(LookupBB); + bool ReturnedEarly = false; + for (SmallVector::iterator I = PHIs.begin(), E = PHIs.end(); + I != E; ++I) { + PHINode *PHI = *I; + // There was a single result for this phi; just use that. + if (Constant *SingleResult = SingleResults[PHI]) { + PHI->addIncoming(SingleResult, LookupBB); + continue; + } + + Value *GEPIndices[] = { Builder.getInt32(0), TableIndex }; + Value *GEP = Builder.CreateInBoundsGEP(LookupTables[PHI], GEPIndices, + "switch.gep"); + Value *Result = Builder.CreateLoad(GEP, "switch.load"); + + // If the result is only going to be used to return from the function, + // we want to do that right here. + if (PHI->hasOneUse() && isa(*PHI->use_begin())) { + if (CommonDest->getFirstNonPHIOrDbg() == CommonDest->getTerminator()) { + Builder.CreateRet(Result); + ReturnedEarly = true; + } + } + + if (!ReturnedEarly) + PHI->addIncoming(Result, LookupBB); + } + + if (!ReturnedEarly) + Builder.CreateBr(CommonDest); + + // Remove the switch. + for (unsigned i = 0; i < SI->getNumSuccessors(); ++i) { + BasicBlock *Succ = SI->getSuccessor(i); + if (Succ == SI->getDefaultDest()) continue; + Succ->removePredecessor(SI->getParent()); + } + SI->eraseFromParent(); + + ++NumLookupTables; + return true; +} + bool SimplifyCFGOpt::SimplifySwitch(SwitchInst *SI, IRBuilder<> &Builder) { // If this switch is too complex to want to look at, ignore it. if (!isValueEqualityComparison(SI)) @@ -3016,6 +3299,9 @@ bool SimplifyCFGOpt::SimplifySwitch(SwitchInst *SI, IRBuilder<> &Builder) { if (ForwardSwitchConditionToPHI(SI)) return SimplifyCFG(BB) | true; + if (SwitchToLookupTable(SI, Builder)) + return SimplifyCFG(BB) | true; + return false; } -- cgit v1.1 From 3bd51b8df3212f765e6ffee06e32b9a670f9b16c Mon Sep 17 00:00:00 2001 From: Hans Wennborg Date: Thu, 6 Sep 2012 10:10:35 +0000 Subject: Fix switch_to_lookup_table.ll test from r163302. The lookup tables did not get built in a deterministic order. This makes them get built in the order that the corresponding phi nodes were found. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@163305 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Transforms/Utils/SimplifyCFG.cpp | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) (limited to 'lib/Transforms/Utils') diff --git a/lib/Transforms/Utils/SimplifyCFG.cpp b/lib/Transforms/Utils/SimplifyCFG.cpp index 62b98cb..d757c05 100644 --- a/lib/Transforms/Utils/SimplifyCFG.cpp +++ b/lib/Transforms/Utils/SimplifyCFG.cpp @@ -3190,13 +3190,14 @@ static bool SwitchToLookupTable(SwitchInst *SI, SmallDenseMap SingleResults; Module &Mod = *CommonDest->getParent()->getParent(); - for (SmallDenseMap::iterator I = ResultLists.begin(), - E = ResultLists.end(); I != E; ++I) { - PHINode *PHI = I->first; + for (SmallVector::iterator I = PHIs.begin(), E = PHIs.end(); + I != E; ++I) { + PHINode *PHI = *I; Constant *SingleResult = NULL; - LookupTables[PHI] = BuildLookupTable(Mod, TableSize, MinCaseVal, I->second, - DefaultResults[PHI], &SingleResult); + LookupTables[PHI] = BuildLookupTable(Mod, TableSize, MinCaseVal, + ResultLists[PHI], DefaultResults[PHI], + &SingleResult); SingleResults[PHI] = SingleResult; } -- cgit v1.1 From cc77eece74c8db09acc2af425e7e6c88a5bb30d1 Mon Sep 17 00:00:00 2001 From: Manman Ren Date: Thu, 6 Sep 2012 19:55:56 +0000 Subject: Release build: guard dump functions with "ifndef NDEBUG" No functional change. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@163344 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Transforms/Utils/AddrModeMatcher.cpp | 2 ++ 1 file changed, 2 insertions(+) (limited to 'lib/Transforms/Utils') diff --git a/lib/Transforms/Utils/AddrModeMatcher.cpp b/lib/Transforms/Utils/AddrModeMatcher.cpp index d831452..1e6586b 100644 --- a/lib/Transforms/Utils/AddrModeMatcher.cpp +++ b/lib/Transforms/Utils/AddrModeMatcher.cpp @@ -55,10 +55,12 @@ void ExtAddrMode::print(raw_ostream &OS) const { OS << ']'; } +#ifndef NDEBUG void ExtAddrMode::dump() const { print(dbgs()); dbgs() << '\n'; } +#endif /// MatchScaledValue - Try adding ScaleReg*Scale to the current addressing mode. -- cgit v1.1 From bf01582165a9cf8e95a21a284930a82c3fc3bda5 Mon Sep 17 00:00:00 2001 From: Hans Wennborg Date: Fri, 7 Sep 2012 08:22:57 +0000 Subject: SimplifyCFG: ValidLookupTableConstant should be static git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@163378 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Transforms/Utils/SimplifyCFG.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib/Transforms/Utils') diff --git a/lib/Transforms/Utils/SimplifyCFG.cpp b/lib/Transforms/Utils/SimplifyCFG.cpp index d757c05..3df3099 100644 --- a/lib/Transforms/Utils/SimplifyCFG.cpp +++ b/lib/Transforms/Utils/SimplifyCFG.cpp @@ -2981,7 +2981,7 @@ static bool ForwardSwitchConditionToPHI(SwitchInst *SI) { /// ValidLookupTableConstant - Return true if the backend will be able to handle /// initializing an array of constants like C. -bool ValidLookupTableConstant(Constant *C) { +static bool ValidLookupTableConstant(Constant *C) { if (ConstantExpr *CE = dyn_cast(C)) return CE->isGEPWithNoNotionalOverIndexing(); -- cgit v1.1 From a34434184915cf869e2daf26a9d15483b7981aaa Mon Sep 17 00:00:00 2001 From: Andrew Trick Date: Sat, 8 Sep 2012 00:07:26 +0000 Subject: Remove an incorrect assert during branch weight propagation. Patch and test case by Alastair Murray! git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@163437 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Transforms/Utils/SimplifyCFG.cpp | 1 - 1 file changed, 1 deletion(-) (limited to 'lib/Transforms/Utils') diff --git a/lib/Transforms/Utils/SimplifyCFG.cpp b/lib/Transforms/Utils/SimplifyCFG.cpp index 3df3099..db8edea 100644 --- a/lib/Transforms/Utils/SimplifyCFG.cpp +++ b/lib/Transforms/Utils/SimplifyCFG.cpp @@ -929,7 +929,6 @@ bool SimplifyCFGOpt::FoldValueComparisonIntoPredecessors(TerminatorInst *TI, GetWeight(TI, i)->getValue().getZExtValue()); } else if (PredHasWeights) { // Split the old default's weight amongst the children - assert(PredDefaultWeight != 0); Weights.push_back(PredDefaultWeight / (1 + BBCases.size())); } } -- cgit v1.1 From 2f9fc761d22fb62d63d5f74a826890d2ec7c72f2 Mon Sep 17 00:00:00 2001 From: Hans Wennborg Date: Mon, 10 Sep 2012 07:44:22 +0000 Subject: Fix style issues from r163302 pointed out by Evan. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@163491 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Transforms/Utils/SimplifyCFG.cpp | 33 +++++++++++++++------------------ 1 file changed, 15 insertions(+), 18 deletions(-) (limited to 'lib/Transforms/Utils') diff --git a/lib/Transforms/Utils/SimplifyCFG.cpp b/lib/Transforms/Utils/SimplifyCFG.cpp index db8edea..32d7fa1 100644 --- a/lib/Transforms/Utils/SimplifyCFG.cpp +++ b/lib/Transforms/Utils/SimplifyCFG.cpp @@ -3045,13 +3045,12 @@ static bool GetCaseResults(SwitchInst *SI, /// DefaultResult to fill the holes in the table. If the table ends up /// containing the same result in each element, set *SingleResult to that value /// and return NULL. -static GlobalVariable *BuildLookupTable( - Module &M, - uint64_t TableSize, - ConstantInt *Offset, - const std::vector >& Results, - Constant *DefaultResult, - Constant **SingleResult) { +static GlobalVariable *BuildLookupTable(Module &M, + uint64_t TableSize, + ConstantInt *Offset, + const SmallVector, 4>& Results, + Constant *DefaultResult, + Constant **SingleResult) { assert(Results.size() && "Need values to build lookup table"); assert(TableSize >= Results.size() && "Table needs to hold all values"); @@ -3133,7 +3132,7 @@ static bool SwitchToLookupTable(SwitchInst *SI, ConstantInt *MaxCaseVal = CI.getCaseValue(); BasicBlock *CommonDest = NULL; - typedef std::vector > ResultListTy; + typedef SmallVector, 4> ResultListTy; SmallDenseMap ResultLists; SmallDenseMap DefaultResults; SmallDenseMap ResultTypes; @@ -3161,16 +3160,14 @@ static bool SwitchToLookupTable(SwitchInst *SI, } // Get the resulting values for the default case. - { - SmallVector, 4> DefaultResultsList; - if (!GetCaseResults(SI, SI->getDefaultDest(), &CommonDest, DefaultResultsList)) - return false; - for (size_t I = 0, E = DefaultResultsList.size(); I != E; ++I) { - PHINode *PHI = DefaultResultsList[I].first; - Constant *Result = DefaultResultsList[I].second; - DefaultResults[PHI] = Result; - ResultTypes[PHI] = Result->getType(); - } + SmallVector, 4> DefaultResultsList; + if (!GetCaseResults(SI, SI->getDefaultDest(), &CommonDest, DefaultResultsList)) + return false; + for (size_t I = 0, E = DefaultResultsList.size(); I != E; ++I) { + PHINode *PHI = DefaultResultsList[I].first; + Constant *Result = DefaultResultsList[I].second; + DefaultResults[PHI] = Result; + ResultTypes[PHI] = Result->getType(); } APInt RangeSpread = MaxCaseVal->getValue() - MinCaseVal->getValue(); -- cgit v1.1 From 04142bc845c513141046e852db86670505459416 Mon Sep 17 00:00:00 2001 From: Benjamin Kramer Date: Mon, 10 Sep 2012 11:52:08 +0000 Subject: Move bypassSlowDivision into the llvm namespace. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@163503 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Transforms/Utils/BypassSlowDivision.cpp | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) (limited to 'lib/Transforms/Utils') diff --git a/lib/Transforms/Utils/BypassSlowDivision.cpp b/lib/Transforms/Utils/BypassSlowDivision.cpp index b694779..30d60be 100644 --- a/lib/Transforms/Utils/BypassSlowDivision.cpp +++ b/lib/Transforms/Utils/BypassSlowDivision.cpp @@ -24,7 +24,7 @@ using namespace llvm; -namespace llvm { +namespace { struct DivOpInfo { bool SignedOp; Value *Dividend; @@ -41,7 +41,9 @@ namespace llvm { DivPhiNodes(PHINode *InQuotient, PHINode *InRemainder) : Quotient(InQuotient), Remainder(InRemainder) {} }; +} +namespace llvm { template<> struct DenseMapInfo { static bool isEqual(const DivOpInfo &Val1, const DivOpInfo &Val2) { @@ -217,9 +219,9 @@ static bool reuseOrInsertFastDiv(Function &F, // bypassSlowDivision - This optimization identifies DIV instructions that can // be profitably bypassed and carried out with a shorter, faster divide. -bool bypassSlowDivision(Function &F, - Function::iterator &I, - const llvm::DenseMap &BypassTypeMap) { +bool llvm::bypassSlowDivision(Function &F, + Function::iterator &I, + const DenseMap &BypassTypeMap) { DivCacheTy DivCache; bool MadeChange = false; -- cgit v1.1