diff options
Diffstat (limited to 'lib/Transforms')
53 files changed, 1185 insertions, 485 deletions
diff --git a/lib/Transforms/IPO/CMakeLists.txt b/lib/Transforms/IPO/CMakeLists.txt index 8fa66fc..58b3551 100644 --- a/lib/Transforms/IPO/CMakeLists.txt +++ b/lib/Transforms/IPO/CMakeLists.txt @@ -20,14 +20,3 @@ add_llvm_library(LLVMipo StripDeadPrototypes.cpp StripSymbols.cpp ) - -add_llvm_library_dependencies(LLVMipo - LLVMAnalysis - LLVMCore - LLVMInstCombine - LLVMScalarOpts - LLVMSupport - LLVMTarget - LLVMTransformUtils - LLVMipa - ) diff --git a/lib/Transforms/IPO/GlobalOpt.cpp b/lib/Transforms/IPO/GlobalOpt.cpp index c57e9fc..2e869e6 100644 --- a/lib/Transforms/IPO/GlobalOpt.cpp +++ b/lib/Transforms/IPO/GlobalOpt.cpp @@ -26,6 +26,7 @@ #include "llvm/Analysis/ConstantFolding.h" #include "llvm/Analysis/MemoryBuiltins.h" #include "llvm/Target/TargetData.h" +#include "llvm/Target/TargetLibraryInfo.h" #include "llvm/Support/CallSite.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" @@ -61,6 +62,7 @@ namespace { struct GlobalStatus; struct GlobalOpt : public ModulePass { virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AU.addRequired<TargetLibraryInfo>(); } static char ID; // Pass identification, replacement for typeid GlobalOpt() : ModulePass(ID) { @@ -84,7 +86,10 @@ namespace { } char GlobalOpt::ID = 0; -INITIALIZE_PASS(GlobalOpt, "globalopt", +INITIALIZE_PASS_BEGIN(GlobalOpt, "globalopt", + "Global Variable Optimizer", false, false) +INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfo) +INITIALIZE_PASS_END(GlobalOpt, "globalopt", "Global Variable Optimizer", false, false) ModulePass *llvm::createGlobalOptimizerPass() { return new GlobalOpt(); } @@ -345,6 +350,7 @@ static bool CleanupConstantGlobalUsers(Value *V, Constant *Init) { // and will invalidate our notion of what Init is. Constant *SubInit = 0; if (!isa<ConstantExpr>(GEP->getOperand(0))) { + // FIXME: use TargetData/TargetLibraryInfo for smarter constant folding. ConstantExpr *CE = dyn_cast_or_null<ConstantExpr>(ConstantFoldInstruction(GEP)); if (Init && CE && CE->getOpcode() == Instruction::GetElementPtr) @@ -828,6 +834,7 @@ static bool OptimizeAwayTrappingUsesOfLoads(GlobalVariable *GV, Constant *LV) { static void ConstantPropUsersOf(Value *V) { for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI != E; ) if (Instruction *I = dyn_cast<Instruction>(*UI++)) + // FIXME: use TargetData/TargetLibraryInfo for smarter constant folding. if (Constant *NewC = ConstantFoldInstruction(I)) { I->replaceAllUsesWith(NewC); @@ -1931,7 +1938,8 @@ bool GlobalOpt::OptimizeGlobalVars(Module &M) { if (GV->hasInitializer()) if (ConstantExpr *CE = dyn_cast<ConstantExpr>(GV->getInitializer())) { TargetData *TD = getAnalysisIfAvailable<TargetData>(); - Constant *New = ConstantFoldConstantExpression(CE, TD); + TargetLibraryInfo *TLI = &getAnalysis<TargetLibraryInfo>(); + Constant *New = ConstantFoldConstantExpression(CE, TD, TLI); if (New && New != CE) GV->setInitializer(New); } @@ -2304,7 +2312,8 @@ static bool EvaluateFunction(Function *F, Constant *&RetVal, DenseMap<Constant*, Constant*> &MutatedMemory, std::vector<GlobalVariable*> &AllocaTmps, SmallPtrSet<Constant*, 8> &SimpleConstants, - const TargetData *TD) { + const TargetData *TD, + const TargetLibraryInfo *TLI) { // Check to see if this function is already executing (recursion). If so, // bail out. TODO: we might want to accept limited recursion. if (std::find(CallStack.begin(), CallStack.end(), F) != CallStack.end()) @@ -2461,7 +2470,7 @@ static bool EvaluateFunction(Function *F, Constant *&RetVal, if (Callee->isDeclaration()) { // If this is a function we can constant fold, do it. - if (Constant *C = ConstantFoldCall(Callee, Formals)) { + if (Constant *C = ConstantFoldCall(Callee, Formals, TLI)) { InstResult = C; } else { return false; @@ -2473,7 +2482,8 @@ static bool EvaluateFunction(Function *F, Constant *&RetVal, Constant *RetVal; // Execute the call, if successful, use the return value. if (!EvaluateFunction(Callee, RetVal, Formals, CallStack, - MutatedMemory, AllocaTmps, SimpleConstants, TD)) + MutatedMemory, AllocaTmps, SimpleConstants, TD, + TLI)) return false; InstResult = RetVal; } @@ -2535,7 +2545,7 @@ static bool EvaluateFunction(Function *F, Constant *&RetVal, if (!CurInst->use_empty()) { if (ConstantExpr *CE = dyn_cast<ConstantExpr>(InstResult)) - InstResult = ConstantFoldConstantExpression(CE, TD); + InstResult = ConstantFoldConstantExpression(CE, TD, TLI); Values[CurInst] = InstResult; } @@ -2547,7 +2557,8 @@ static bool EvaluateFunction(Function *F, Constant *&RetVal, /// EvaluateStaticConstructor - Evaluate static constructors in the function, if /// we can. Return true if we can, false otherwise. -static bool EvaluateStaticConstructor(Function *F, const TargetData *TD) { +static bool EvaluateStaticConstructor(Function *F, const TargetData *TD, + const TargetLibraryInfo *TLI) { /// MutatedMemory - For each store we execute, we update this map. Loads /// check this to get the most up-to-date value. If evaluation is successful, /// this state is committed to the process. @@ -2572,7 +2583,7 @@ static bool EvaluateStaticConstructor(Function *F, const TargetData *TD) { bool EvalSuccess = EvaluateFunction(F, RetValDummy, SmallVector<Constant*, 0>(), CallStack, MutatedMemory, AllocaTmps, - SimpleConstants, TD); + SimpleConstants, TD, TLI); if (EvalSuccess) { // We succeeded at evaluation: commit the result. @@ -2601,8 +2612,6 @@ static bool EvaluateStaticConstructor(Function *F, const TargetData *TD) { return EvalSuccess; } - - /// OptimizeGlobalCtorsList - Simplify and evaluation global ctors if possible. /// Return true if anything changed. bool GlobalOpt::OptimizeGlobalCtorsList(GlobalVariable *&GCL) { @@ -2611,6 +2620,8 @@ bool GlobalOpt::OptimizeGlobalCtorsList(GlobalVariable *&GCL) { if (Ctors.empty()) return false; const TargetData *TD = getAnalysisIfAvailable<TargetData>(); + const TargetLibraryInfo *TLI = &getAnalysis<TargetLibraryInfo>(); + // Loop over global ctors, optimizing them when we can. for (unsigned i = 0; i != Ctors.size(); ++i) { Function *F = Ctors[i]; @@ -2628,7 +2639,7 @@ bool GlobalOpt::OptimizeGlobalCtorsList(GlobalVariable *&GCL) { if (F->empty()) continue; // If we can evaluate the ctor at compile time, do. - if (EvaluateStaticConstructor(F, TD)) { + if (EvaluateStaticConstructor(F, TD, TLI)) { Ctors.erase(Ctors.begin()+i); MadeChange = true; --i; diff --git a/lib/Transforms/IPO/LLVMBuild.txt b/lib/Transforms/IPO/LLVMBuild.txt index 884faca..b358fab 100644 --- a/lib/Transforms/IPO/LLVMBuild.txt +++ b/lib/Transforms/IPO/LLVMBuild.txt @@ -21,4 +21,3 @@ name = IPO parent = Transforms library_name = ipo required_libraries = Analysis Core IPA InstCombine Scalar Support Target TransformUtils - diff --git a/lib/Transforms/IPO/PassManagerBuilder.cpp b/lib/Transforms/IPO/PassManagerBuilder.cpp index 8fdfd72..f63f532 100644 --- a/lib/Transforms/IPO/PassManagerBuilder.cpp +++ b/lib/Transforms/IPO/PassManagerBuilder.cpp @@ -21,7 +21,6 @@ #include "llvm/DefaultPasses.h" #include "llvm/PassManager.h" #include "llvm/Analysis/Passes.h" -#include "llvm/Analysis/Verifier.h" #include "llvm/Target/TargetLibraryInfo.h" #include "llvm/Transforms/Scalar.h" #include "llvm/Transforms/IPO.h" @@ -101,6 +100,7 @@ void PassManagerBuilder::populateModulePassManager(PassManagerBase &MPM) { MPM.add(Inliner); Inliner = 0; } + addExtensionsToPM(EP_EnabledOnOptLevel0, MPM); return; } @@ -340,4 +340,3 @@ void LLVMPassManagerBuilderPopulateLTOPassManager(LLVMPassManagerBuilderRef PMB, PassManagerBase *LPM = unwrap(PM); Builder->populateLTOPassManager(*LPM, Internalize, RunInliner); } - diff --git a/lib/Transforms/InstCombine/CMakeLists.txt b/lib/Transforms/InstCombine/CMakeLists.txt index a46d5ad..d070ccc 100644 --- a/lib/Transforms/InstCombine/CMakeLists.txt +++ b/lib/Transforms/InstCombine/CMakeLists.txt @@ -13,11 +13,3 @@ add_llvm_library(LLVMInstCombine InstCombineSimplifyDemanded.cpp InstCombineVectorOps.cpp ) - -add_llvm_library_dependencies(LLVMInstCombine - LLVMAnalysis - LLVMCore - LLVMSupport - LLVMTarget - LLVMTransformUtils - ) diff --git a/lib/Transforms/InstCombine/InstCombine.h b/lib/Transforms/InstCombine/InstCombine.h index 3808278..464e9d0 100644 --- a/lib/Transforms/InstCombine/InstCombine.h +++ b/lib/Transforms/InstCombine/InstCombine.h @@ -22,6 +22,7 @@ namespace llvm { class CallSite; class TargetData; + class TargetLibraryInfo; class DbgDeclareInst; class MemIntrinsic; class MemSetInst; @@ -71,6 +72,7 @@ class LLVM_LIBRARY_VISIBILITY InstCombiner : public FunctionPass, public InstVisitor<InstCombiner, Instruction*> { TargetData *TD; + TargetLibraryInfo *TLI; bool MadeIRChange; public: /// Worklist - All of the instructions that need to be simplified. @@ -92,9 +94,11 @@ public: bool DoOneIteration(Function &F, unsigned ItNum); virtual void getAnalysisUsage(AnalysisUsage &AU) const; - + TargetData *getTargetData() const { return TD; } + TargetLibraryInfo *getTargetLibraryInfo() const { return TLI; } + // Visitation implementation - Implement instruction combining for different // instruction types. The semantics are as follows: // Return Value: diff --git a/lib/Transforms/InstCombine/InstCombineCalls.cpp b/lib/Transforms/InstCombine/InstCombineCalls.cpp index e8136ab..27c7c54 100644 --- a/lib/Transforms/InstCombine/InstCombineCalls.cpp +++ b/lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -265,6 +265,8 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { // Get the current byte offset into the thing. Use the original // operand in case we're looking through a bitcast. SmallVector<Value*, 8> Ops(GEP->idx_begin(), GEP->idx_end()); + if (!GEP->getPointerOperandType()->isPointerTy()) + return 0; Offset = TD->getIndexedOffset(GEP->getPointerOperandType(), Ops); Op1 = GEP->getPointerOperand()->stripPointerCasts(); @@ -960,7 +962,7 @@ Instruction *InstCombiner::visitCallSite(CallSite CS) { PointerType *PTy = cast<PointerType>(Callee->getType()); FunctionType *FTy = cast<FunctionType>(PTy->getElementType()); if (FTy->isVarArg()) { - int ix = FTy->getNumParams() + (isa<InvokeInst>(Callee) ? 2 : 0); + int ix = FTy->getNumParams(); // See if we can optimize any arguments passed through the varargs area of // the call. for (CallSite::arg_iterator I = CS.arg_begin()+FTy->getNumParams(), diff --git a/lib/Transforms/InstCombine/InstCombineCasts.cpp b/lib/Transforms/InstCombine/InstCombineCasts.cpp index f10e48a..46e4acd 100644 --- a/lib/Transforms/InstCombine/InstCombineCasts.cpp +++ b/lib/Transforms/InstCombine/InstCombineCasts.cpp @@ -14,6 +14,7 @@ #include "InstCombine.h" #include "llvm/Analysis/ConstantFolding.h" #include "llvm/Target/TargetData.h" +#include "llvm/Target/TargetLibraryInfo.h" #include "llvm/Support/PatternMatch.h" using namespace llvm; using namespace PatternMatch; @@ -147,8 +148,6 @@ Instruction *InstCombiner::PromoteCastOfAllocation(BitCastInst &CI, return ReplaceInstUsesWith(CI, New); } - - /// EvaluateInDifferentType - Given an expression that /// CanEvaluateTruncated or CanEvaluateSExtd returns true for, actually /// insert the code to evaluate the expression. @@ -158,7 +157,7 @@ Value *InstCombiner::EvaluateInDifferentType(Value *V, Type *Ty, C = ConstantExpr::getIntegerCast(C, Ty, isSigned /*Sext or ZExt*/); // If we got a constantexpr back, try to simplify it with TD info. if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C)) - C = ConstantFoldConstantExpression(CE, TD); + C = ConstantFoldConstantExpression(CE, TD, TLI); return C; } @@ -528,9 +527,7 @@ Instruction *InstCombiner::transformZExtICmp(ICmpInst *ICI, Instruction &CI, return ReplaceInstUsesWith(CI, In); } - - - + // zext (X == 0) to i32 --> X^1 iff X has only the low bit set. // zext (X == 0) to i32 --> (X>>1)^1 iff X has only the 2nd bit set. // zext (X == 1) to i32 --> X iff X has only the low bit set. @@ -1213,10 +1210,9 @@ Instruction *InstCombiner::visitFPTrunc(FPTruncInst &CI) { } // Fold (fptrunc (sqrt (fpext x))) -> (sqrtf x) - // NOTE: This should be disabled by -fno-builtin-sqrt if we ever support it. CallInst *Call = dyn_cast<CallInst>(CI.getOperand(0)); - if (Call && Call->getCalledFunction() && - Call->getCalledFunction()->getName() == "sqrt" && + if (Call && Call->getCalledFunction() && TLI->has(LibFunc::sqrtf) && + Call->getCalledFunction()->getName() == TLI->getName(LibFunc::sqrt) && Call->getNumArgOperands() == 1 && Call->hasOneUse()) { CastInst *Arg = dyn_cast<CastInst>(Call->getArgOperand(0)); diff --git a/lib/Transforms/InstCombine/InstCombineCompares.cpp b/lib/Transforms/InstCombine/InstCombineCompares.cpp index bb1cbfa..144b92b 100644 --- a/lib/Transforms/InstCombine/InstCombineCompares.cpp +++ b/lib/Transforms/InstCombine/InstCombineCompares.cpp @@ -284,7 +284,7 @@ FoldCmpLoadFromIndexedGlobal(GetElementPtrInst *GEP, GlobalVariable *GV, // Find out if the comparison would be true or false for the i'th element. Constant *C = ConstantFoldCompareInstOperands(ICI.getPredicate(), Elt, - CompareRHS, TD); + CompareRHS, TD, TLI); // If the result is undef for this element, ignore it. if (isa<UndefValue>(C)) { // Extend range state machines to cover this element in case there is an @@ -1657,6 +1657,14 @@ static Instruction *ProcessUGT_ADDCST_ADD(ICmpInst &I, Value *A, Value *B, CI1->getValue() != APInt::getLowBitsSet(CI1->getBitWidth(), NewWidth)) return 0; + // This is only really a signed overflow check if the inputs have been + // sign-extended; check for that condition. For example, if CI2 is 2^31 and + // the operands of the add are 64 bits wide, we need at least 33 sign bits. + unsigned NeededSignBits = CI1->getBitWidth() - NewWidth + 1; + if (IC.ComputeNumSignBits(A) < NeededSignBits || + IC.ComputeNumSignBits(B) < NeededSignBits) + return 0; + // In order to replace the original add with a narrower // llvm.sadd.with.overflow, the only uses allowed are the add-with-constant // and truncates that discard the high bits of the add. Verify that this is @@ -1787,6 +1795,24 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) { if (Value *V = SimplifyICmpInst(I.getPredicate(), Op0, Op1, TD)) return ReplaceInstUsesWith(I, V); + // comparing -val or val with non-zero is the same as just comparing val + // ie, abs(val) != 0 -> val != 0 + if (I.getPredicate() == ICmpInst::ICMP_NE && match(Op1, m_Zero())) + { + Value *Cond, *SelectTrue, *SelectFalse; + if (match(Op0, m_Select(m_Value(Cond), m_Value(SelectTrue), + m_Value(SelectFalse)))) { + if (Value *V = dyn_castNegVal(SelectTrue)) { + if (V == SelectFalse) + return CmpInst::Create(Instruction::ICmp, I.getPredicate(), V, Op1); + } + else if (Value *V = dyn_castNegVal(SelectFalse)) { + if (V == SelectTrue) + return CmpInst::Create(Instruction::ICmp, I.getPredicate(), V, Op1); + } + } + } + Type *Ty = Op0->getType(); // icmp's with boolean values can always be turned into bitwise operations diff --git a/lib/Transforms/InstCombine/InstCombineSelect.cpp b/lib/Transforms/InstCombine/InstCombineSelect.cpp index 91e60a4..f1ea8ea 100644 --- a/lib/Transforms/InstCombine/InstCombineSelect.cpp +++ b/lib/Transforms/InstCombine/InstCombineSelect.cpp @@ -282,7 +282,8 @@ Instruction *InstCombiner::FoldSelectIntoOp(SelectInst &SI, Value *TrueVal, /// SimplifyWithOpReplaced - See if V simplifies when its operand Op is /// replaced with RepOp. static Value *SimplifyWithOpReplaced(Value *V, Value *Op, Value *RepOp, - const TargetData *TD) { + const TargetData *TD, + const TargetLibraryInfo *TLI) { // Trivial replacement. if (V == Op) return RepOp; @@ -294,17 +295,19 @@ static Value *SimplifyWithOpReplaced(Value *V, Value *Op, Value *RepOp, // If this is a binary operator, try to simplify it with the replaced op. if (BinaryOperator *B = dyn_cast<BinaryOperator>(I)) { if (B->getOperand(0) == Op) - return SimplifyBinOp(B->getOpcode(), RepOp, B->getOperand(1), TD); + return SimplifyBinOp(B->getOpcode(), RepOp, B->getOperand(1), TD, TLI); if (B->getOperand(1) == Op) - return SimplifyBinOp(B->getOpcode(), B->getOperand(0), RepOp, TD); + return SimplifyBinOp(B->getOpcode(), B->getOperand(0), RepOp, TD, TLI); } // Same for CmpInsts. if (CmpInst *C = dyn_cast<CmpInst>(I)) { if (C->getOperand(0) == Op) - return SimplifyCmpInst(C->getPredicate(), RepOp, C->getOperand(1), TD); + return SimplifyCmpInst(C->getPredicate(), RepOp, C->getOperand(1), TD, + TLI); if (C->getOperand(1) == Op) - return SimplifyCmpInst(C->getPredicate(), C->getOperand(0), RepOp, TD); + return SimplifyCmpInst(C->getPredicate(), C->getOperand(0), RepOp, TD, + TLI); } // TODO: We could hand off more cases to instsimplify here. @@ -330,7 +333,7 @@ static Value *SimplifyWithOpReplaced(Value *V, Value *Op, Value *RepOp, return ConstantFoldLoadFromConstPtr(ConstOps[0], TD); return ConstantFoldInstOperands(I->getOpcode(), I->getType(), - ConstOps, TD); + ConstOps, TD, TLI); } } @@ -479,18 +482,18 @@ Instruction *InstCombiner::visitSelectInstWithICmp(SelectInst &SI, // arms of the select. See if substituting this value into the arm and // simplifying the result yields the same value as the other arm. if (Pred == ICmpInst::ICMP_EQ) { - if (SimplifyWithOpReplaced(FalseVal, CmpLHS, CmpRHS, TD) == TrueVal || - SimplifyWithOpReplaced(FalseVal, CmpRHS, CmpLHS, TD) == TrueVal) + if (SimplifyWithOpReplaced(FalseVal, CmpLHS, CmpRHS, TD, TLI) == TrueVal || + SimplifyWithOpReplaced(FalseVal, CmpRHS, CmpLHS, TD, TLI) == TrueVal) return ReplaceInstUsesWith(SI, FalseVal); - if (SimplifyWithOpReplaced(TrueVal, CmpLHS, CmpRHS, TD) == FalseVal || - SimplifyWithOpReplaced(TrueVal, CmpRHS, CmpLHS, TD) == FalseVal) + if (SimplifyWithOpReplaced(TrueVal, CmpLHS, CmpRHS, TD, TLI) == FalseVal || + SimplifyWithOpReplaced(TrueVal, CmpRHS, CmpLHS, TD, TLI) == FalseVal) return ReplaceInstUsesWith(SI, FalseVal); } else if (Pred == ICmpInst::ICMP_NE) { - if (SimplifyWithOpReplaced(TrueVal, CmpLHS, CmpRHS, TD) == FalseVal || - SimplifyWithOpReplaced(TrueVal, CmpRHS, CmpLHS, TD) == FalseVal) + if (SimplifyWithOpReplaced(TrueVal, CmpLHS, CmpRHS, TD, TLI) == FalseVal || + SimplifyWithOpReplaced(TrueVal, CmpRHS, CmpLHS, TD, TLI) == FalseVal) return ReplaceInstUsesWith(SI, TrueVal); - if (SimplifyWithOpReplaced(FalseVal, CmpLHS, CmpRHS, TD) == TrueVal || - SimplifyWithOpReplaced(FalseVal, CmpRHS, CmpLHS, TD) == TrueVal) + if (SimplifyWithOpReplaced(FalseVal, CmpLHS, CmpRHS, TD, TLI) == TrueVal || + SimplifyWithOpReplaced(FalseVal, CmpRHS, CmpLHS, TD, TLI) == TrueVal) return ReplaceInstUsesWith(SI, TrueVal); } @@ -679,6 +682,13 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) { return BinaryOperator::CreateOr(CondVal, FalseVal); else if (CondVal == FalseVal) return BinaryOperator::CreateAnd(CondVal, TrueVal); + + // select a, ~a, b -> (~a)&b + // select a, b, ~a -> (~a)|b + if (match(TrueVal, m_Not(m_Specific(CondVal)))) + return BinaryOperator::CreateAnd(TrueVal, FalseVal); + else if (match(FalseVal, m_Not(m_Specific(CondVal)))) + return BinaryOperator::CreateOr(TrueVal, FalseVal); } // Selecting between two integer constants? diff --git a/lib/Transforms/InstCombine/InstCombineShifts.cpp b/lib/Transforms/InstCombine/InstCombineShifts.cpp index 6d85add..702e0f2 100644 --- a/lib/Transforms/InstCombine/InstCombineShifts.cpp +++ b/lib/Transforms/InstCombine/InstCombineShifts.cpp @@ -190,7 +190,8 @@ static Value *GetShiftedValue(Value *V, unsigned NumBits, bool isLeftShift, V = IC.Builder->CreateLShr(C, NumBits); // If we got a constantexpr back, try to simplify it with TD info. if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V)) - V = ConstantFoldConstantExpression(CE, IC.getTargetData()); + V = ConstantFoldConstantExpression(CE, IC.getTargetData(), + IC.getTargetLibraryInfo()); return V; } diff --git a/lib/Transforms/InstCombine/InstructionCombining.cpp b/lib/Transforms/InstCombine/InstructionCombining.cpp index a7a6311..af065cd 100644 --- a/lib/Transforms/InstCombine/InstructionCombining.cpp +++ b/lib/Transforms/InstCombine/InstructionCombining.cpp @@ -41,6 +41,7 @@ #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/MemoryBuiltins.h" #include "llvm/Target/TargetData.h" +#include "llvm/Target/TargetLibraryInfo.h" #include "llvm/Transforms/Utils/Local.h" #include "llvm/Support/CFG.h" #include "llvm/Support/Debug.h" @@ -74,11 +75,15 @@ void LLVMInitializeInstCombine(LLVMPassRegistryRef R) { } char InstCombiner::ID = 0; -INITIALIZE_PASS(InstCombiner, "instcombine", +INITIALIZE_PASS_BEGIN(InstCombiner, "instcombine", + "Combine redundant instructions", false, false) +INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfo) +INITIALIZE_PASS_END(InstCombiner, "instcombine", "Combine redundant instructions", false, false) void InstCombiner::getAnalysisUsage(AnalysisUsage &AU) const { AU.setPreservesCFG(); + AU.addRequired<TargetLibraryInfo>(); } @@ -826,7 +831,8 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) { MadeChange = true; } - if ((*I)->getType() != IntPtrTy) { + Type *IndexTy = (*I)->getType(); + if (IndexTy != IntPtrTy && !IndexTy->isVectorTy()) { // If we are using a wider index than needed for this platform, shrink // it to what we need. If narrower, sign-extend it to what we need. // This explicit cast can make subsequent optimizations more obvious. @@ -909,7 +915,11 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) { // Handle gep(bitcast x) and gep(gep x, 0, 0, 0). Value *StrippedPtr = PtrOp->stripPointerCasts(); - PointerType *StrippedPtrTy =cast<PointerType>(StrippedPtr->getType()); + PointerType *StrippedPtrTy = dyn_cast<PointerType>(StrippedPtr->getType()); + // We do not handle pointer-vector geps here + if (!StrippedPtr) + return 0; + if (StrippedPtr != PtrOp && StrippedPtrTy->getAddressSpace() == GEP.getPointerAddressSpace()) { @@ -1798,7 +1808,8 @@ static bool TryToSinkInstruction(Instruction *I, BasicBlock *DestBlock) { static bool AddReachableCodeToWorklist(BasicBlock *BB, SmallPtrSet<BasicBlock*, 64> &Visited, InstCombiner &IC, - const TargetData *TD) { + const TargetData *TD, + const TargetLibraryInfo *TLI) { bool MadeIRChange = false; SmallVector<BasicBlock*, 256> Worklist; Worklist.push_back(BB); @@ -1825,7 +1836,7 @@ static bool AddReachableCodeToWorklist(BasicBlock *BB, // ConstantProp instruction if trivially constant. if (!Inst->use_empty() && isa<Constant>(Inst->getOperand(0))) - if (Constant *C = ConstantFoldInstruction(Inst, TD)) { + if (Constant *C = ConstantFoldInstruction(Inst, TD, TLI)) { DEBUG(errs() << "IC: ConstFold to: " << *C << " from: " << *Inst << '\n'); Inst->replaceAllUsesWith(C); @@ -1843,7 +1854,7 @@ static bool AddReachableCodeToWorklist(BasicBlock *BB, Constant*& FoldRes = FoldedConstants[CE]; if (!FoldRes) - FoldRes = ConstantFoldConstantExpression(CE, TD); + FoldRes = ConstantFoldConstantExpression(CE, TD, TLI); if (!FoldRes) FoldRes = CE; @@ -1909,7 +1920,8 @@ bool InstCombiner::DoOneIteration(Function &F, unsigned Iteration) { // the reachable instructions. Ignore blocks that are not reachable. Keep // track of which blocks we visit. SmallPtrSet<BasicBlock*, 64> Visited; - MadeIRChange |= AddReachableCodeToWorklist(F.begin(), Visited, *this, TD); + MadeIRChange |= AddReachableCodeToWorklist(F.begin(), Visited, *this, TD, + TLI); // Do a quick scan over the function. If we find any blocks that are // unreachable, remove any instructions inside of them. This prevents @@ -1954,7 +1966,7 @@ bool InstCombiner::DoOneIteration(Function &F, unsigned Iteration) { // Instruction isn't dead, see if we can constant propagate it. if (!I->use_empty() && isa<Constant>(I->getOperand(0))) - if (Constant *C = ConstantFoldInstruction(I, TD)) { + if (Constant *C = ConstantFoldInstruction(I, TD, TLI)) { DEBUG(errs() << "IC: ConstFold to: " << *C << " from: " << *I << '\n'); // Add operands to the worklist. @@ -2062,7 +2074,7 @@ bool InstCombiner::DoOneIteration(Function &F, unsigned Iteration) { bool InstCombiner::runOnFunction(Function &F) { TD = getAnalysisIfAvailable<TargetData>(); - + TLI = &getAnalysis<TargetLibraryInfo>(); /// Builder - This is an IRBuilder that automatically inserts new /// instructions into the worklist when they are created. diff --git a/lib/Transforms/InstCombine/LLVMBuild.txt b/lib/Transforms/InstCombine/LLVMBuild.txt index b73c303..62c6161 100644 --- a/lib/Transforms/InstCombine/LLVMBuild.txt +++ b/lib/Transforms/InstCombine/LLVMBuild.txt @@ -20,4 +20,3 @@ type = Library name = InstCombine parent = Transforms required_libraries = Analysis Core Support Target TransformUtils - diff --git a/lib/Transforms/Instrumentation/AddressSanitizer.cpp b/lib/Transforms/Instrumentation/AddressSanitizer.cpp index b617539..4cc5727 100644 --- a/lib/Transforms/Instrumentation/AddressSanitizer.cpp +++ b/lib/Transforms/Instrumentation/AddressSanitizer.cpp @@ -55,8 +55,11 @@ static const uintptr_t kCurrentStackFrameMagic = 0x41B58AB3; static const uintptr_t kRetiredStackFrameMagic = 0x45E0360E; static const char *kAsanModuleCtorName = "asan.module_ctor"; +static const char *kAsanModuleDtorName = "asan.module_dtor"; +static const int kAsanCtorAndCtorPriority = 1; static const char *kAsanReportErrorTemplate = "__asan_report_"; static const char *kAsanRegisterGlobalsName = "__asan_register_globals"; +static const char *kAsanUnregisterGlobalsName = "__asan_unregister_globals"; static const char *kAsanInitName = "__asan_init"; static const char *kAsanMappingOffsetName = "__asan_mapping_offset"; static const char *kAsanMappingScaleName = "__asan_mapping_scale"; @@ -434,6 +437,7 @@ void AddressSanitizer::instrumentAddress(Instruction *OrigIns, IRBuilder<> IRB1(CheckTerm); Instruction *Crash = generateCrashCode(IRB1, AddrLong, IsWrite, TypeSize); Crash->setDebugLoc(OrigIns->getDebugLoc()); + ReplaceInstWithInst(CheckTerm, new UnreachableInst(*C)); } // This function replaces all global variables with new variables that have @@ -517,7 +521,11 @@ bool AddressSanitizer::insertGlobalRedzones(Module &M) { NewTy, G->getInitializer(), Constant::getNullValue(RightRedZoneTy), NULL); - GlobalVariable *Name = createPrivateGlobalForString(M, G->getName()); + SmallString<2048> DescriptionOfGlobal = G->getName(); + DescriptionOfGlobal += " ("; + DescriptionOfGlobal += M.getModuleIdentifier(); + DescriptionOfGlobal += ")"; + GlobalVariable *Name = createPrivateGlobalForString(M, DescriptionOfGlobal); // Create a new global variable with enough space for a redzone. GlobalVariable *NewGlobal = new GlobalVariable( @@ -558,6 +566,22 @@ bool AddressSanitizer::insertGlobalRedzones(Module &M) { IRB.CreatePointerCast(AllGlobals, IntptrTy), ConstantInt::get(IntptrTy, n)); + // We also need to unregister globals at the end, e.g. when a shared library + // gets closed. + Function *AsanDtorFunction = Function::Create( + FunctionType::get(Type::getVoidTy(*C), false), + GlobalValue::InternalLinkage, kAsanModuleDtorName, &M); + BasicBlock *AsanDtorBB = BasicBlock::Create(*C, "", AsanDtorFunction); + IRBuilder<> IRB_Dtor(ReturnInst::Create(*C, AsanDtorBB)); + Function *AsanUnregisterGlobals = cast<Function>(M.getOrInsertFunction( + kAsanUnregisterGlobalsName, IRB.getVoidTy(), IntptrTy, IntptrTy, NULL)); + AsanUnregisterGlobals->setLinkage(Function::ExternalLinkage); + + IRB_Dtor.CreateCall2(AsanUnregisterGlobals, + IRB.CreatePointerCast(AllGlobals, IntptrTy), + ConstantInt::get(IntptrTy, n)); + appendToGlobalDtors(M, AsanDtorFunction, kAsanCtorAndCtorPriority); + DEBUG(dbgs() << M); return true; } @@ -631,7 +655,7 @@ bool AddressSanitizer::runOnModule(Module &M) { Res |= handleFunction(M, *F); } - appendToGlobalCtors(M, AsanCtorFunction, 1 /*high priority*/); + appendToGlobalCtors(M, AsanCtorFunction, kAsanCtorAndCtorPriority); return Res; } @@ -951,8 +975,8 @@ BlackList::BlackList(const std::string &Path) { OwningPtr<MemoryBuffer> File; if (error_code EC = MemoryBuffer::getFile(ClBlackListFile.c_str(), File)) { - errs() << EC.message(); - exit(1); + report_fatal_error("Can't open blacklist file " + ClBlackListFile + ": " + + EC.message()); } MemoryBuffer *Buff = File.take(); const char *Data = Buff->getBufferStart(); @@ -962,15 +986,23 @@ BlackList::BlackList(const std::string &Path) { for (size_t i = 0, numLines = Lines.size(); i < numLines; i++) { if (Lines[i].startswith(kFunPrefix)) { std::string ThisFunc = Lines[i].substr(strlen(kFunPrefix)); - if (Fun.size()) { - Fun += "|"; - } + std::string ThisFuncRE; // add ThisFunc replacing * with .* for (size_t j = 0, n = ThisFunc.size(); j < n; j++) { if (ThisFunc[j] == '*') - Fun += '.'; - Fun += ThisFunc[j]; + ThisFuncRE += '.'; + ThisFuncRE += ThisFunc[j]; } + // Check that the regexp is valid. + Regex CheckRE(ThisFuncRE); + std::string Error; + if (!CheckRE.isValid(Error)) + report_fatal_error("malformed blacklist regex: " + ThisFunc + + ": " + Error); + // Append to the final regexp. + if (Fun.size()) + Fun += "|"; + Fun += ThisFuncRE; } } if (Fun.size()) { diff --git a/lib/Transforms/Instrumentation/CMakeLists.txt b/lib/Transforms/Instrumentation/CMakeLists.txt index 929b7cd..a4a1fef 100644 --- a/lib/Transforms/Instrumentation/CMakeLists.txt +++ b/lib/Transforms/Instrumentation/CMakeLists.txt @@ -7,10 +7,3 @@ add_llvm_library(LLVMInstrumentation PathProfiling.cpp ProfilingUtils.cpp ) - -add_llvm_library_dependencies(LLVMInstrumentation - LLVMAnalysis - LLVMCore - LLVMSupport - LLVMTransformUtils - ) diff --git a/lib/Transforms/Instrumentation/GCOVProfiling.cpp b/lib/Transforms/Instrumentation/GCOVProfiling.cpp index ccf7e11..96e5d5b 100644 --- a/lib/Transforms/Instrumentation/GCOVProfiling.cpp +++ b/lib/Transforms/Instrumentation/GCOVProfiling.cpp @@ -43,12 +43,14 @@ namespace { public: static char ID; GCOVProfiler() - : ModulePass(ID), EmitNotes(true), EmitData(true), Use402Format(false) { + : ModulePass(ID), EmitNotes(true), EmitData(true), Use402Format(false), + UseExtraChecksum(false) { initializeGCOVProfilerPass(*PassRegistry::getPassRegistry()); } - GCOVProfiler(bool EmitNotes, bool EmitData, bool use402Format = false) + GCOVProfiler(bool EmitNotes, bool EmitData, bool use402Format = false, + bool useExtraChecksum = false) : ModulePass(ID), EmitNotes(EmitNotes), EmitData(EmitData), - Use402Format(use402Format) { + Use402Format(use402Format), UseExtraChecksum(useExtraChecksum) { assert((EmitNotes || EmitData) && "GCOVProfiler asked to do nothing?"); initializeGCOVProfilerPass(*PassRegistry::getPassRegistry()); } @@ -94,6 +96,7 @@ namespace { bool EmitNotes; bool EmitData; bool Use402Format; + bool UseExtraChecksum; Module *M; LLVMContext *Ctx; @@ -105,8 +108,9 @@ INITIALIZE_PASS(GCOVProfiler, "insert-gcov-profiling", "Insert instrumentation for GCOV profiling", false, false) ModulePass *llvm::createGCOVProfilerPass(bool EmitNotes, bool EmitData, - bool Use402Format) { - return new GCOVProfiler(EmitNotes, EmitData, Use402Format); + bool Use402Format, + bool UseExtraChecksum) { + return new GCOVProfiler(EmitNotes, EmitData, Use402Format, UseExtraChecksum); } namespace { @@ -167,7 +171,7 @@ namespace { } uint32_t length() { - // Here 2 = 1 for string lenght + 1 for '0' id#. + // Here 2 = 1 for string length + 1 for '0' id#. return lengthOfGCOVString(Filename) + 2 + Lines.size(); } @@ -244,10 +248,12 @@ namespace { // object users can construct, the blocks and lines will be rooted here. class GCOVFunction : public GCOVRecord { public: - GCOVFunction(DISubprogram SP, raw_ostream *os, bool Use402Format) { + GCOVFunction(DISubprogram SP, raw_ostream *os, + bool Use402Format, bool UseExtraChecksum) { this->os = os; Function *F = SP.getFunction(); + DEBUG(dbgs() << "Function: " << F->getName() << "\n"); uint32_t i = 0; for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) { Blocks[BB] = new GCOVBlock(i++, os); @@ -257,14 +263,14 @@ namespace { writeBytes(FunctionTag, 4); uint32_t BlockLen = 1 + 1 + 1 + lengthOfGCOVString(SP.getName()) + 1 + lengthOfGCOVString(SP.getFilename()) + 1; - if (!Use402Format) - ++BlockLen; // For second checksum. + if (UseExtraChecksum) + ++BlockLen; write(BlockLen); uint32_t Ident = reinterpret_cast<intptr_t>((MDNode*)SP); write(Ident); - write(0); // checksum #1 - if (!Use402Format) - write(0); // checksum #2 + write(0); // lineno checksum + if (UseExtraChecksum) + write(0); // cfg checksum writeGCOVString(SP.getName()); writeGCOVString(SP.getFilename()); write(SP.getLineNumber()); @@ -290,6 +296,7 @@ namespace { for (int i = 0, e = Blocks.size() + 1; i != e; ++i) { write(0); // No flags on our blocks. } + DEBUG(dbgs() << Blocks.size() << " blocks.\n"); // Emit edges between blocks. for (DenseMap<BasicBlock *, GCOVBlock *>::iterator I = Blocks.begin(), @@ -301,6 +308,8 @@ namespace { write(Block.OutEdges.size() * 2 + 1); write(Block.Number); for (int i = 0, e = Block.OutEdges.size(); i != e; ++i) { + DEBUG(dbgs() << Block.Number << " -> " << Block.OutEdges[i]->Number + << "\n"); write(Block.OutEdges[i]->Number); write(0); // no flags } @@ -350,68 +359,60 @@ bool GCOVProfiler::runOnModule(Module &M) { } void GCOVProfiler::emitGCNO() { - DenseMap<const MDNode *, raw_fd_ostream *> GcnoFiles; NamedMDNode *CU_Nodes = M->getNamedMetadata("llvm.dbg.cu"); - if (CU_Nodes) { - for (unsigned i = 0, e = CU_Nodes->getNumOperands(); i != e; ++i) { - // Each compile unit gets its own .gcno file. This means that whether we run - // this pass over the original .o's as they're produced, or run it after - // LTO, we'll generate the same .gcno files. - - DICompileUnit CU(CU_Nodes->getOperand(i)); - raw_fd_ostream *&out = GcnoFiles[CU]; - std::string ErrorInfo; - out = new raw_fd_ostream(mangleName(CU, "gcno").c_str(), ErrorInfo, - raw_fd_ostream::F_Binary); - if (!Use402Format) - out->write("oncg*404MVLL", 12); - else - out->write("oncg*204MVLL", 12); - - DIArray SPs = CU.getSubprograms(); - for (unsigned i = 0, e = SPs.getNumElements(); i != e; ++i) { - DISubprogram SP(SPs.getElement(i)); - if (!SP.Verify()) continue; - raw_fd_ostream *&os = GcnoFiles[CU]; - - Function *F = SP.getFunction(); - if (!F) continue; - GCOVFunction Func(SP, os, Use402Format); - - for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) { - GCOVBlock &Block = Func.getBlock(BB); - TerminatorInst *TI = BB->getTerminator(); - if (int successors = TI->getNumSuccessors()) { - for (int i = 0; i != successors; ++i) { - Block.addEdge(Func.getBlock(TI->getSuccessor(i))); - } - } else if (isa<ReturnInst>(TI)) { - Block.addEdge(Func.getReturnBlock()); - } - - uint32_t Line = 0; - for (BasicBlock::iterator I = BB->begin(), IE = BB->end(); I != IE; ++I) { - const DebugLoc &Loc = I->getDebugLoc(); - if (Loc.isUnknown()) continue; - if (Line == Loc.getLine()) continue; - Line = Loc.getLine(); - if (SP != getDISubprogram(Loc.getScope(*Ctx))) continue; - - GCOVLines &Lines = Block.getFile(SP.getFilename()); - Lines.addLine(Loc.getLine()); + if (!CU_Nodes) return; + + for (unsigned i = 0, e = CU_Nodes->getNumOperands(); i != e; ++i) { + // Each compile unit gets its own .gcno file. This means that whether we run + // this pass over the original .o's as they're produced, or run it after + // LTO, we'll generate the same .gcno files. + + DICompileUnit CU(CU_Nodes->getOperand(i)); + std::string ErrorInfo; + raw_fd_ostream out(mangleName(CU, "gcno").c_str(), ErrorInfo, + raw_fd_ostream::F_Binary); + if (!Use402Format) + out.write("oncg*404MVLL", 12); + else + out.write("oncg*204MVLL", 12); + + DIArray SPs = CU.getSubprograms(); + for (unsigned i = 0, e = SPs.getNumElements(); i != e; ++i) { + DISubprogram SP(SPs.getElement(i)); + if (!SP.Verify()) continue; + + Function *F = SP.getFunction(); + if (!F) continue; + GCOVFunction Func(SP, &out, Use402Format, UseExtraChecksum); + + for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) { + GCOVBlock &Block = Func.getBlock(BB); + TerminatorInst *TI = BB->getTerminator(); + if (int successors = TI->getNumSuccessors()) { + for (int i = 0; i != successors; ++i) { + Block.addEdge(Func.getBlock(TI->getSuccessor(i))); } + } else if (isa<ReturnInst>(TI)) { + Block.addEdge(Func.getReturnBlock()); + } + + uint32_t Line = 0; + for (BasicBlock::iterator I = BB->begin(), IE = BB->end(); + I != IE; ++I) { + const DebugLoc &Loc = I->getDebugLoc(); + if (Loc.isUnknown()) continue; + if (Line == Loc.getLine()) continue; + Line = Loc.getLine(); + if (SP != getDISubprogram(Loc.getScope(*Ctx))) continue; + + GCOVLines &Lines = Block.getFile(SP.getFilename()); + Lines.addLine(Loc.getLine()); } - Func.writeOut(); } + Func.writeOut(); } - } - - for (DenseMap<const MDNode *, raw_fd_ostream *>::iterator - I = GcnoFiles.begin(), E = GcnoFiles.end(); I != E; ++I) { - raw_fd_ostream *&out = I->second; - out->write("\0\0\0\0\0\0\0\0", 8); // EOF - out->close(); - delete out; + out.write("\0\0\0\0\0\0\0\0", 8); // EOF + out.close(); } } diff --git a/lib/Transforms/Instrumentation/LLVMBuild.txt b/lib/Transforms/Instrumentation/LLVMBuild.txt index f302d03..d36ad54 100644 --- a/lib/Transforms/Instrumentation/LLVMBuild.txt +++ b/lib/Transforms/Instrumentation/LLVMBuild.txt @@ -20,4 +20,3 @@ type = Library name = Instrumentation parent = Transforms required_libraries = Analysis Core Support TransformUtils - diff --git a/lib/Transforms/LLVMBuild.txt b/lib/Transforms/LLVMBuild.txt index d36b898..b2ef49a 100644 --- a/lib/Transforms/LLVMBuild.txt +++ b/lib/Transforms/LLVMBuild.txt @@ -15,8 +15,10 @@ ; ;===------------------------------------------------------------------------===; +[common] +subdirectories = IPO InstCombine Instrumentation Scalar Utils + [component_0] type = Group name = Transforms parent = Libraries - diff --git a/lib/Transforms/Scalar/CMakeLists.txt b/lib/Transforms/Scalar/CMakeLists.txt index a6f0cf3..d660c72 100644 --- a/lib/Transforms/Scalar/CMakeLists.txt +++ b/lib/Transforms/Scalar/CMakeLists.txt @@ -32,12 +32,3 @@ add_llvm_library(LLVMScalarOpts Sink.cpp TailRecursionElimination.cpp ) - -add_llvm_library_dependencies(LLVMScalarOpts - LLVMAnalysis - LLVMCore - LLVMInstCombine - LLVMSupport - LLVMTarget - LLVMTransformUtils - ) diff --git a/lib/Transforms/Scalar/CodeGenPrepare.cpp b/lib/Transforms/Scalar/CodeGenPrepare.cpp index f8f18b2..f9abfe9 100644 --- a/lib/Transforms/Scalar/CodeGenPrepare.cpp +++ b/lib/Transforms/Scalar/CodeGenPrepare.cpp @@ -26,6 +26,7 @@ #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/ProfileInfo.h" #include "llvm/Target/TargetData.h" +#include "llvm/Target/TargetLibraryInfo.h" #include "llvm/Target/TargetLowering.h" #include "llvm/Transforms/Utils/AddrModeMatcher.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" @@ -69,6 +70,7 @@ namespace { /// TLI - Keep a pointer of a TargetLowering to consult for determining /// transformation profitability. const TargetLowering *TLI; + const TargetLibraryInfo *TLInfo; DominatorTree *DT; ProfileInfo *PFI; @@ -97,6 +99,7 @@ namespace { virtual void getAnalysisUsage(AnalysisUsage &AU) const { AU.addPreserved<DominatorTree>(); AU.addPreserved<ProfileInfo>(); + AU.addRequired<TargetLibraryInfo>(); } private: @@ -116,7 +119,10 @@ namespace { } char CodeGenPrepare::ID = 0; -INITIALIZE_PASS(CodeGenPrepare, "codegenprepare", +INITIALIZE_PASS_BEGIN(CodeGenPrepare, "codegenprepare", + "Optimize for code generation", false, false) +INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfo) +INITIALIZE_PASS_END(CodeGenPrepare, "codegenprepare", "Optimize for code generation", false, false) FunctionPass *llvm::createCodeGenPreparePass(const TargetLowering *TLI) { @@ -127,6 +133,7 @@ bool CodeGenPrepare::runOnFunction(Function &F) { bool EverMadeChange = false; ModifiedDT = false; + TLInfo = &getAnalysis<TargetLibraryInfo>(); DT = getAnalysisIfAvailable<DominatorTree>(); PFI = getAnalysisIfAvailable<ProfileInfo>(); @@ -542,7 +549,7 @@ bool CodeGenPrepare::OptimizeCallInst(CallInst *CI) { WeakVH IterHandle(CurInstIterator); ReplaceAndSimplifyAllUses(CI, RetVal, TLI ? TLI->getTargetData() : 0, - ModifiedDT ? 0 : DT); + TLInfo, ModifiedDT ? 0 : DT); // If the iterator instruction was recursively deleted, start over at the // start of the block. diff --git a/lib/Transforms/Scalar/ConstantProp.cpp b/lib/Transforms/Scalar/ConstantProp.cpp index 664c3f6..5430f62 100644 --- a/lib/Transforms/Scalar/ConstantProp.cpp +++ b/lib/Transforms/Scalar/ConstantProp.cpp @@ -24,6 +24,8 @@ #include "llvm/Constant.h" #include "llvm/Instruction.h" #include "llvm/Pass.h" +#include "llvm/Target/TargetData.h" +#include "llvm/Target/TargetLibraryInfo.h" #include "llvm/Support/InstIterator.h" #include "llvm/ADT/Statistic.h" #include <set> @@ -42,19 +44,22 @@ namespace { virtual void getAnalysisUsage(AnalysisUsage &AU) const { AU.setPreservesCFG(); + AU.addRequired<TargetLibraryInfo>(); } }; } char ConstantPropagation::ID = 0; -INITIALIZE_PASS(ConstantPropagation, "constprop", +INITIALIZE_PASS_BEGIN(ConstantPropagation, "constprop", + "Simple constant propagation", false, false) +INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfo) +INITIALIZE_PASS_END(ConstantPropagation, "constprop", "Simple constant propagation", false, false) FunctionPass *llvm::createConstantPropagationPass() { return new ConstantPropagation(); } - bool ConstantPropagation::runOnFunction(Function &F) { // Initialize the worklist to all of the instructions ready to process... std::set<Instruction*> WorkList; @@ -62,13 +67,15 @@ bool ConstantPropagation::runOnFunction(Function &F) { WorkList.insert(&*i); } bool Changed = false; + TargetData *TD = getAnalysisIfAvailable<TargetData>(); + TargetLibraryInfo *TLI = &getAnalysis<TargetLibraryInfo>(); while (!WorkList.empty()) { Instruction *I = *WorkList.begin(); WorkList.erase(WorkList.begin()); // Get an element from the worklist... if (!I->use_empty()) // Don't muck with dead instructions... - if (Constant *C = ConstantFoldInstruction(I)) { + if (Constant *C = ConstantFoldInstruction(I, TD, TLI)) { // Add all of the users of this instruction to the worklist, they might // be constant propagatable now... for (Value::use_iterator UI = I->use_begin(), UE = I->use_end(); diff --git a/lib/Transforms/Scalar/DeadStoreElimination.cpp b/lib/Transforms/Scalar/DeadStoreElimination.cpp index f5688cb..8729019 100644 --- a/lib/Transforms/Scalar/DeadStoreElimination.cpp +++ b/lib/Transforms/Scalar/DeadStoreElimination.cpp @@ -416,7 +416,7 @@ static OverwriteResult isOverwrite(const AliasAnalysis::Location &Later, // writes to addresses which will definitely be overwritten later if (LaterOff > EarlierOff && LaterOff < int64_t(EarlierOff + Earlier.Size) && - LaterOff + Later.Size >= EarlierOff + Earlier.Size) + int64_t(LaterOff + Later.Size) >= int64_t(EarlierOff + Earlier.Size)) return OverwriteEnd; // Otherwise, they don't completely overlap. @@ -624,6 +624,7 @@ static void FindUnconditionalPreds(SmallVectorImpl<BasicBlock *> &Blocks, BasicBlock *BB, DominatorTree *DT) { for (pred_iterator I = pred_begin(BB), E = pred_end(BB); I != E; ++I) { BasicBlock *Pred = *I; + if (Pred == BB) continue; TerminatorInst *PredTI = Pred->getTerminator(); if (PredTI->getNumSuccessors() != 1) continue; @@ -853,4 +854,3 @@ void DSE::RemoveAccessedObjects(const AliasAnalysis::Location &LoadedLoc, I != E; ++I) DeadStackObjects.erase(*I); } - diff --git a/lib/Transforms/Scalar/EarlyCSE.cpp b/lib/Transforms/Scalar/EarlyCSE.cpp index c0223d2..5241e11 100644 --- a/lib/Transforms/Scalar/EarlyCSE.cpp +++ b/lib/Transforms/Scalar/EarlyCSE.cpp @@ -19,6 +19,7 @@ #include "llvm/Analysis/Dominators.h" #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Target/TargetData.h" +#include "llvm/Target/TargetLibraryInfo.h" #include "llvm/Transforms/Utils/Local.h" #include "llvm/Support/Debug.h" #include "llvm/Support/RecyclingAllocator.h" @@ -215,6 +216,7 @@ namespace { class EarlyCSE : public FunctionPass { public: const TargetData *TD; + const TargetLibraryInfo *TLI; DominatorTree *DT; typedef RecyclingAllocator<BumpPtrAllocator, ScopedHashTableVal<SimpleValue, Value*> > AllocatorTy; @@ -263,6 +265,7 @@ private: // This transformation requires dominator postdominator info virtual void getAnalysisUsage(AnalysisUsage &AU) const { AU.addRequired<DominatorTree>(); + AU.addRequired<TargetLibraryInfo>(); AU.setPreservesCFG(); } }; @@ -277,6 +280,7 @@ FunctionPass *llvm::createEarlyCSEPass() { INITIALIZE_PASS_BEGIN(EarlyCSE, "early-cse", "Early CSE", false, false) INITIALIZE_PASS_DEPENDENCY(DominatorTree) +INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfo) INITIALIZE_PASS_END(EarlyCSE, "early-cse", "Early CSE", false, false) bool EarlyCSE::processNode(DomTreeNode *Node) { @@ -328,7 +332,7 @@ bool EarlyCSE::processNode(DomTreeNode *Node) { // If the instruction can be simplified (e.g. X+0 = X) then replace it with // its simpler value. - if (Value *V = SimplifyInstruction(Inst, TD, DT)) { + if (Value *V = SimplifyInstruction(Inst, TD, TLI, DT)) { DEBUG(dbgs() << "EarlyCSE Simplify: " << *Inst << " to: " << *V << '\n'); Inst->replaceAllUsesWith(V); Inst->eraseFromParent(); @@ -455,6 +459,7 @@ bool EarlyCSE::processNode(DomTreeNode *Node) { bool EarlyCSE::runOnFunction(Function &F) { TD = getAnalysisIfAvailable<TargetData>(); + TLI = &getAnalysis<TargetLibraryInfo>(); DT = &getAnalysis<DominatorTree>(); // Tables that the pass uses when walking the domtree. diff --git a/lib/Transforms/Scalar/GVN.cpp b/lib/Transforms/Scalar/GVN.cpp index a51cbb6..374fdd7 100644 --- a/lib/Transforms/Scalar/GVN.cpp +++ b/lib/Transforms/Scalar/GVN.cpp @@ -31,6 +31,7 @@ #include "llvm/Analysis/ValueTracking.h" #include "llvm/Assembly/Writer.h" #include "llvm/Target/TargetData.h" +#include "llvm/Target/TargetLibraryInfo.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/SSAUpdater.h" #include "llvm/ADT/DenseMap.h" @@ -446,7 +447,8 @@ namespace { MemoryDependenceAnalysis *MD; DominatorTree *DT; const TargetData *TD; - + const TargetLibraryInfo *TLI; + ValueTable VN; /// LeaderTable - A mapping from value numbers to lists of Value*'s that @@ -530,6 +532,7 @@ namespace { // This transformation requires dominator postdominator info virtual void getAnalysisUsage(AnalysisUsage &AU) const { AU.addRequired<DominatorTree>(); + AU.addRequired<TargetLibraryInfo>(); if (!NoLoads) AU.addRequired<MemoryDependenceAnalysis>(); AU.addRequired<AliasAnalysis>(); @@ -568,6 +571,7 @@ FunctionPass *llvm::createGVNPass(bool NoLoads) { INITIALIZE_PASS_BEGIN(GVN, "gvn", "Global Value Numbering", false, false) INITIALIZE_PASS_DEPENDENCY(MemoryDependenceAnalysis) INITIALIZE_PASS_DEPENDENCY(DominatorTree) +INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfo) INITIALIZE_AG_DEPENDENCY(AliasAnalysis) INITIALIZE_PASS_END(GVN, "gvn", "Global Value Numbering", false, false) @@ -2032,7 +2036,7 @@ bool GVN::processInstruction(Instruction *I) { // to value numbering it. Value numbering often exposes redundancies, for // example if it determines that %y is equal to %x then the instruction // "%z = and i32 %x, %y" becomes "%z = and i32 %x, %x" which we now simplify. - if (Value *V = SimplifyInstruction(I, TD, DT)) { + if (Value *V = SimplifyInstruction(I, TD, TLI, DT)) { I->replaceAllUsesWith(V); if (MD && V->getType()->isPointerTy()) MD->invalidateCachedPointerInfo(V); @@ -2134,6 +2138,7 @@ bool GVN::runOnFunction(Function& F) { MD = &getAnalysis<MemoryDependenceAnalysis>(); DT = &getAnalysis<DominatorTree>(); TD = getAnalysisIfAvailable<TargetData>(); + TLI = &getAnalysis<TargetLibraryInfo>(); VN.setAliasAnalysis(&getAnalysis<AliasAnalysis>()); VN.setMemDep(MD); VN.setDomTree(DT); diff --git a/lib/Transforms/Scalar/GlobalMerge.cpp b/lib/Transforms/Scalar/GlobalMerge.cpp index 0772b48..ad8689a 100644 --- a/lib/Transforms/Scalar/GlobalMerge.cpp +++ b/lib/Transforms/Scalar/GlobalMerge.cpp @@ -182,7 +182,7 @@ bool GlobalMerge::doInitialization(Module &M) { continue; // Ignore fancy-aligned globals for now. - unsigned Alignment = I->getAlignment(); + unsigned Alignment = TD->getPreferredAlignment(I); Type *Ty = I->getType()->getElementType(); if (Alignment > TD->getABITypeAlignment(Ty)) continue; diff --git a/lib/Transforms/Scalar/IndVarSimplify.cpp b/lib/Transforms/Scalar/IndVarSimplify.cpp index 1f21108..6d52b22 100644 --- a/lib/Transforms/Scalar/IndVarSimplify.cpp +++ b/lib/Transforms/Scalar/IndVarSimplify.cpp @@ -58,18 +58,16 @@ STATISTIC(NumLFTR , "Number of loop exit tests replaced"); STATISTIC(NumElimExt , "Number of IV sign/zero extends eliminated"); STATISTIC(NumElimIV , "Number of congruent IVs eliminated"); -namespace llvm { - cl::opt<bool> EnableIVRewrite( - "enable-iv-rewrite", cl::Hidden, - cl::desc("Enable canonical induction variable rewriting")); - - // Trip count verification can be enabled by default under NDEBUG if we - // implement a strong expression equivalence checker in SCEV. Until then, we - // use the verify-indvars flag, which may assert in some cases. - cl::opt<bool> VerifyIndvars( - "verify-indvars", cl::Hidden, - cl::desc("Verify the ScalarEvolution result after running indvars")); -} +static cl::opt<bool> EnableIVRewrite( + "enable-iv-rewrite", cl::Hidden, + cl::desc("Enable canonical induction variable rewriting")); + +// Trip count verification can be enabled by default under NDEBUG if we +// implement a strong expression equivalence checker in SCEV. Until then, we +// use the verify-indvars flag, which may assert in some cases. +static cl::opt<bool> VerifyIndvars( + "verify-indvars", cl::Hidden, + cl::desc("Verify the ScalarEvolution result after running indvars")); namespace { class IndVarSimplify : public LoopPass { @@ -180,6 +178,11 @@ bool IndVarSimplify::isValidRewrite(Value *FromVal, Value *ToVal) { // base of a recurrence. This handles the case in which SCEV expansion // converts a pointer type recurrence into a nonrecurrent pointer base // indexed by an integer recurrence. + + // If the GEP base pointer is a vector of pointers, abort. + if (!FromPtr->getType()->isPointerTy() || !ToPtr->getType()->isPointerTy()) + return false; + const SCEV *FromBase = SE->getPointerBase(SE->getSCEV(FromPtr)); const SCEV *ToBase = SE->getPointerBase(SE->getSCEV(ToPtr)); if (FromBase == ToBase) @@ -946,9 +949,13 @@ const SCEVAddRecExpr* WidenIV::GetExtendedOperandRecurrence(NarrowIVDefUse DU) { else return 0; + // When creating this AddExpr, don't apply the current operations NSW or NUW + // flags. This instruction may be guarded by control flow that the no-wrap + // behavior depends on. Non-control-equivalent instructions can be mapped to + // the same SCEV expression, and it would be incorrect to transfer NSW/NUW + // semantics to those operations. const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>( - SE->getAddExpr(SE->getSCEV(DU.WideDef), ExtendOperExpr, - IsSigned ? SCEV::FlagNSW : SCEV::FlagNUW)); + SE->getAddExpr(SE->getSCEV(DU.WideDef), ExtendOperExpr)); if (!AddRec || AddRec->getLoop() != L) return 0; @@ -1231,7 +1238,11 @@ void IndVarSimplify::SimplifyAndExtend(Loop *L, /// BackedgeTakenInfo. If these expressions have not been reduced, then /// expanding them may incur additional cost (albeit in the loop preheader). static bool isHighCostExpansion(const SCEV *S, BranchInst *BI, + SmallPtrSet<const SCEV*, 8> &Processed, ScalarEvolution *SE) { + if (!Processed.insert(S)) + return false; + // If the backedge-taken count is a UDiv, it's very likely a UDiv that // ScalarEvolution's HowFarToZero or HowManyLessThans produced to compute a // precise expression, rather than a UDiv from the user's code. If we can't @@ -1259,7 +1270,7 @@ static bool isHighCostExpansion(const SCEV *S, BranchInst *BI, if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) { for (SCEVAddExpr::op_iterator I = Add->op_begin(), E = Add->op_end(); I != E; ++I) { - if (isHighCostExpansion(*I, BI, SE)) + if (isHighCostExpansion(*I, BI, Processed, SE)) return true; } return false; @@ -1302,7 +1313,8 @@ static bool canExpandBackedgeTakenCount(Loop *L, ScalarEvolution *SE) { if (!BI) return false; - if (isHighCostExpansion(BackedgeTakenCount, BI, SE)) + SmallPtrSet<const SCEV*, 8> Processed; + if (isHighCostExpansion(BackedgeTakenCount, BI, Processed, SE)) return false; return true; diff --git a/lib/Transforms/Scalar/JumpThreading.cpp b/lib/Transforms/Scalar/JumpThreading.cpp index f410af3..c78db3f 100644 --- a/lib/Transforms/Scalar/JumpThreading.cpp +++ b/lib/Transforms/Scalar/JumpThreading.cpp @@ -24,6 +24,7 @@ #include "llvm/Transforms/Utils/Local.h" #include "llvm/Transforms/Utils/SSAUpdater.h" #include "llvm/Target/TargetData.h" +#include "llvm/Target/TargetLibraryInfo.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/DenseSet.h" #include "llvm/ADT/Statistic.h" @@ -75,6 +76,7 @@ namespace { /// class JumpThreading : public FunctionPass { TargetData *TD; + TargetLibraryInfo *TLI; LazyValueInfo *LVI; #ifdef NDEBUG SmallPtrSet<BasicBlock*, 16> LoopHeaders; @@ -107,6 +109,7 @@ namespace { virtual void getAnalysisUsage(AnalysisUsage &AU) const { AU.addRequired<LazyValueInfo>(); AU.addPreserved<LazyValueInfo>(); + AU.addRequired<TargetLibraryInfo>(); } void FindLoopHeaders(Function &F); @@ -133,6 +136,7 @@ char JumpThreading::ID = 0; INITIALIZE_PASS_BEGIN(JumpThreading, "jump-threading", "Jump Threading", false, false) INITIALIZE_PASS_DEPENDENCY(LazyValueInfo) +INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfo) INITIALIZE_PASS_END(JumpThreading, "jump-threading", "Jump Threading", false, false) @@ -144,6 +148,7 @@ FunctionPass *llvm::createJumpThreadingPass() { return new JumpThreading(); } bool JumpThreading::runOnFunction(Function &F) { DEBUG(dbgs() << "Jump threading on function '" << F.getName() << "'\n"); TD = getAnalysisIfAvailable<TargetData>(); + TLI = &getAnalysis<TargetLibraryInfo>(); LVI = &getAnalysis<LazyValueInfo>(); FindLoopHeaders(F); @@ -674,7 +679,7 @@ bool JumpThreading::ProcessBlock(BasicBlock *BB) { // Run constant folding to see if we can reduce the condition to a simple // constant. if (Instruction *I = dyn_cast<Instruction>(Condition)) { - Value *SimpleVal = ConstantFoldInstruction(I, TD); + Value *SimpleVal = ConstantFoldInstruction(I, TD, TLI); if (SimpleVal) { I->replaceAllUsesWith(SimpleVal); I->eraseFromParent(); @@ -921,8 +926,7 @@ bool JumpThreading::SimplifyPartiallyRedundantLoad(LoadInst *LI) { // Split them out to their own block. UnavailablePred = - SplitBlockPredecessors(LoadBB, &PredsToSplit[0], PredsToSplit.size(), - "thread-pre-split", this); + SplitBlockPredecessors(LoadBB, PredsToSplit, "thread-pre-split", this); } // If the value isn't available in all predecessors, then there will be @@ -1334,8 +1338,7 @@ bool JumpThreading::ThreadEdge(BasicBlock *BB, else { DEBUG(dbgs() << " Factoring out " << PredBBs.size() << " common predecessors.\n"); - PredBB = SplitBlockPredecessors(BB, &PredBBs[0], PredBBs.size(), - ".thr_comm", this); + PredBB = SplitBlockPredecessors(BB, PredBBs, ".thr_comm", this); } // And finally, do it! @@ -1479,8 +1482,7 @@ bool JumpThreading::DuplicateCondBranchOnPHIIntoPred(BasicBlock *BB, else { DEBUG(dbgs() << " Factoring out " << PredBBs.size() << " common predecessors.\n"); - PredBB = SplitBlockPredecessors(BB, &PredBBs[0], PredBBs.size(), - ".thr_comm", this); + PredBB = SplitBlockPredecessors(BB, PredBBs, ".thr_comm", this); } // Okay, we decided to do this! Clone all the instructions in BB onto the end diff --git a/lib/Transforms/Scalar/LICM.cpp b/lib/Transforms/Scalar/LICM.cpp index 8098b36..8795cd8 100644 --- a/lib/Transforms/Scalar/LICM.cpp +++ b/lib/Transforms/Scalar/LICM.cpp @@ -43,8 +43,11 @@ #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/LoopPass.h" #include "llvm/Analysis/Dominators.h" +#include "llvm/Analysis/ValueTracking.h" #include "llvm/Transforms/Utils/Local.h" #include "llvm/Transforms/Utils/SSAUpdater.h" +#include "llvm/Target/TargetData.h" +#include "llvm/Target/TargetLibraryInfo.h" #include "llvm/Support/CFG.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/raw_ostream.h" @@ -84,6 +87,7 @@ namespace { AU.addPreserved<AliasAnalysis>(); AU.addPreserved("scalar-evolution"); AU.addPreservedID(LoopSimplifyID); + AU.addRequired<TargetLibraryInfo>(); } bool doFinalization() { @@ -96,6 +100,9 @@ namespace { LoopInfo *LI; // Current LoopInfo DominatorTree *DT; // Dominator Tree for the current Loop. + TargetData *TD; // TargetData for constant folding. + TargetLibraryInfo *TLI; // TargetLibraryInfo for constant folding. + // State that is updated as we process loops. bool Changed; // Set to true when we change anything. BasicBlock *Preheader; // The preheader block of the current loop... @@ -177,6 +184,7 @@ INITIALIZE_PASS_BEGIN(LICM, "licm", "Loop Invariant Code Motion", false, false) INITIALIZE_PASS_DEPENDENCY(DominatorTree) INITIALIZE_PASS_DEPENDENCY(LoopInfo) INITIALIZE_PASS_DEPENDENCY(LoopSimplify) +INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfo) INITIALIZE_AG_DEPENDENCY(AliasAnalysis) INITIALIZE_PASS_END(LICM, "licm", "Loop Invariant Code Motion", false, false) @@ -194,6 +202,9 @@ bool LICM::runOnLoop(Loop *L, LPPassManager &LPM) { AA = &getAnalysis<AliasAnalysis>(); DT = &getAnalysis<DominatorTree>(); + TD = getAnalysisIfAvailable<TargetData>(); + TLI = &getAnalysis<TargetLibraryInfo>(); + CurAST = new AliasSetTracker(*AA); // Collect Alias info from subloops. for (Loop::iterator LoopItr = L->begin(), LoopItrE = L->end(); @@ -333,7 +344,7 @@ void LICM::HoistRegion(DomTreeNode *N) { // Try constant folding this instruction. If all the operands are // constants, it is technically hoistable, but it would be better to just // fold it. - if (Constant *C = ConstantFoldInstruction(&I)) { + if (Constant *C = ConstantFoldInstruction(&I, TD, TLI)) { DEBUG(dbgs() << "LICM folding inst: " << I << " --> " << *C << '\n'); CurAST->copyValue(&I, C); CurAST->deleteValue(&I); @@ -369,7 +380,7 @@ bool LICM::canSinkOrHoistInst(Instruction &I) { // in the same alias set as something that ends up being modified. if (AA->pointsToConstantMemory(LI->getOperand(0))) return true; - if (LI->getMetadata(LI->getContext().getMDKindID("invariant.load"))) + if (LI->getMetadata("invariant.load")) return true; // Don't hoist loads which have may-aliased stores in loop. @@ -581,7 +592,7 @@ void LICM::hoist(Instruction &I) { /// bool LICM::isSafeToExecuteUnconditionally(Instruction &Inst) { // If it is not a trapping instruction, it is always safe to hoist. - if (Inst.isSafeToSpeculativelyExecute()) + if (isSafeToSpeculativelyExecute(&Inst)) return true; return isGuaranteedToExecute(Inst); diff --git a/lib/Transforms/Scalar/LLVMBuild.txt b/lib/Transforms/Scalar/LLVMBuild.txt index 027634d..cee9119 100644 --- a/lib/Transforms/Scalar/LLVMBuild.txt +++ b/lib/Transforms/Scalar/LLVMBuild.txt @@ -21,4 +21,3 @@ name = Scalar parent = Transforms library_name = ScalarOpts required_libraries = Analysis Core InstCombine Support Target TransformUtils - diff --git a/lib/Transforms/Scalar/LoopInstSimplify.cpp b/lib/Transforms/Scalar/LoopInstSimplify.cpp index af25c5c..f0f05e6 100644 --- a/lib/Transforms/Scalar/LoopInstSimplify.cpp +++ b/lib/Transforms/Scalar/LoopInstSimplify.cpp @@ -19,6 +19,7 @@ #include "llvm/Analysis/LoopPass.h" #include "llvm/Support/Debug.h" #include "llvm/Target/TargetData.h" +#include "llvm/Target/TargetLibraryInfo.h" #include "llvm/Transforms/Scalar.h" #include "llvm/Transforms/Utils/Local.h" #include "llvm/ADT/Statistic.h" @@ -43,6 +44,7 @@ namespace { AU.addPreservedID(LoopSimplifyID); AU.addPreservedID(LCSSAID); AU.addPreserved("scalar-evolution"); + AU.addRequired<TargetLibraryInfo>(); } }; } @@ -50,6 +52,7 @@ namespace { char LoopInstSimplify::ID = 0; INITIALIZE_PASS_BEGIN(LoopInstSimplify, "loop-instsimplify", "Simplify instructions in loops", false, false) +INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfo) INITIALIZE_PASS_DEPENDENCY(DominatorTree) INITIALIZE_PASS_DEPENDENCY(LoopInfo) INITIALIZE_PASS_DEPENDENCY(LCSSA) @@ -64,6 +67,7 @@ bool LoopInstSimplify::runOnLoop(Loop *L, LPPassManager &LPM) { DominatorTree *DT = getAnalysisIfAvailable<DominatorTree>(); LoopInfo *LI = &getAnalysis<LoopInfo>(); const TargetData *TD = getAnalysisIfAvailable<TargetData>(); + const TargetLibraryInfo *TLI = &getAnalysis<TargetLibraryInfo>(); SmallVector<BasicBlock*, 8> ExitBlocks; L->getUniqueExitBlocks(ExitBlocks); @@ -104,7 +108,7 @@ bool LoopInstSimplify::runOnLoop(Loop *L, LPPassManager &LPM) { // Don't bother simplifying unused instructions. if (!I->use_empty()) { - Value *V = SimplifyInstruction(I, TD, DT); + Value *V = SimplifyInstruction(I, TD, TLI, DT); if (V && LI->replacementPreservesLCSSAForm(I, V)) { // Mark all uses for resimplification next time round the loop. for (Value::use_iterator UI = I->use_begin(), UE = I->use_end(); diff --git a/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/lib/Transforms/Scalar/LoopStrengthReduce.cpp index 4ae51d5..840614e 100644 --- a/lib/Transforms/Scalar/LoopStrengthReduce.cpp +++ b/lib/Transforms/Scalar/LoopStrengthReduce.cpp @@ -77,19 +77,17 @@ #include <algorithm> using namespace llvm; -namespace llvm { -cl::opt<bool> EnableNested( +static cl::opt<bool> EnableNested( "enable-lsr-nested", cl::Hidden, cl::desc("Enable LSR on nested loops")); -cl::opt<bool> EnableRetry( - "enable-lsr-retry", cl::Hidden, cl::desc("Enable LSR retry")); +static cl::opt<bool> EnableRetry( + "enable-lsr-retry", cl::Hidden, cl::desc("Enable LSR retry")); // Temporary flag to cleanup congruent phis after LSR phi expansion. // It's currently disabled until we can determine whether it's truly useful or // not. The flag should be removed after the v3.0 release. -cl::opt<bool> EnablePhiElim( - "enable-lsr-phielim", cl::Hidden, cl::desc("Enable LSR phi elimination")); -} +static cl::opt<bool> EnablePhiElim( + "enable-lsr-phielim", cl::Hidden, cl::desc("Enable LSR phi elimination")); namespace { @@ -636,6 +634,19 @@ static Type *getAccessType(const Instruction *Inst) { return AccessTy; } +/// isExistingPhi - Return true if this AddRec is already a phi in its loop. +static bool isExistingPhi(const SCEVAddRecExpr *AR, ScalarEvolution &SE) { + for (BasicBlock::iterator I = AR->getLoop()->getHeader()->begin(); + PHINode *PN = dyn_cast<PHINode>(I); ++I) { + if (SE.isSCEVable(PN->getType()) && + (SE.getEffectiveSCEVType(PN->getType()) == + SE.getEffectiveSCEVType(AR->getType())) && + SE.getSCEV(PN) == AR) + return true; + } + return false; +} + /// DeleteTriviallyDeadInstructions - If any of the instructions is the /// specified set are trivially dead, delete them and see if this makes any of /// their operands subsequently dead. @@ -705,7 +716,8 @@ public: const DenseSet<const SCEV *> &VisitedRegs, const Loop *L, const SmallVectorImpl<int64_t> &Offsets, - ScalarEvolution &SE, DominatorTree &DT); + ScalarEvolution &SE, DominatorTree &DT, + SmallPtrSet<const SCEV *, 16> *LoserRegs = 0); void print(raw_ostream &OS) const; void dump() const; @@ -718,7 +730,8 @@ private: void RatePrimaryRegister(const SCEV *Reg, SmallPtrSet<const SCEV *, 16> &Regs, const Loop *L, - ScalarEvolution &SE, DominatorTree &DT); + ScalarEvolution &SE, DominatorTree &DT, + SmallPtrSet<const SCEV *, 16> *LoserRegs); }; } @@ -738,18 +751,13 @@ void Cost::RateRegister(const SCEV *Reg, // on other loops, and cannot be expected to change sibling loops. If the // AddRec exists, consider it's register free and leave it alone. Otherwise, // do not consider this formula at all. - // FIXME: why do we need to generate such fomulae? else if (!EnableNested || L->contains(AR->getLoop()) || (!AR->getLoop()->contains(L) && DT.dominates(L->getHeader(), AR->getLoop()->getHeader()))) { - for (BasicBlock::iterator I = AR->getLoop()->getHeader()->begin(); - PHINode *PN = dyn_cast<PHINode>(I); ++I) { - if (SE.isSCEVable(PN->getType()) && - (SE.getEffectiveSCEVType(PN->getType()) == - SE.getEffectiveSCEVType(AR->getType())) && - SE.getSCEV(PN) == AR) - return; - } + if (isExistingPhi(AR, SE)) + return; + + // For !EnableNested, never rewrite IVs in other loops. if (!EnableNested) { Loose(); return; @@ -791,13 +799,22 @@ void Cost::RateRegister(const SCEV *Reg, } /// RatePrimaryRegister - Record this register in the set. If we haven't seen it -/// before, rate it. +/// before, rate it. Optional LoserRegs provides a way to declare any formula +/// that refers to one of those regs an instant loser. void Cost::RatePrimaryRegister(const SCEV *Reg, SmallPtrSet<const SCEV *, 16> &Regs, const Loop *L, - ScalarEvolution &SE, DominatorTree &DT) { - if (Regs.insert(Reg)) + ScalarEvolution &SE, DominatorTree &DT, + SmallPtrSet<const SCEV *, 16> *LoserRegs) { + if (LoserRegs && LoserRegs->count(Reg)) { + Loose(); + return; + } + if (Regs.insert(Reg)) { RateRegister(Reg, Regs, L, SE, DT); + if (isLoser()) + LoserRegs->insert(Reg); + } } void Cost::RateFormula(const Formula &F, @@ -805,14 +822,15 @@ void Cost::RateFormula(const Formula &F, const DenseSet<const SCEV *> &VisitedRegs, const Loop *L, const SmallVectorImpl<int64_t> &Offsets, - ScalarEvolution &SE, DominatorTree &DT) { + ScalarEvolution &SE, DominatorTree &DT, + SmallPtrSet<const SCEV *, 16> *LoserRegs) { // Tally up the registers. if (const SCEV *ScaledReg = F.ScaledReg) { if (VisitedRegs.count(ScaledReg)) { Loose(); return; } - RatePrimaryRegister(ScaledReg, Regs, L, SE, DT); + RatePrimaryRegister(ScaledReg, Regs, L, SE, DT, LoserRegs); if (isLoser()) return; } @@ -823,7 +841,7 @@ void Cost::RateFormula(const Formula &F, Loose(); return; } - RatePrimaryRegister(BaseReg, Regs, L, SE, DT); + RatePrimaryRegister(BaseReg, Regs, L, SE, DT, LoserRegs); if (isLoser()) return; } @@ -1105,7 +1123,6 @@ bool LSRUse::InsertFormula(const Formula &F) { Formulae.push_back(F); // Record registers now being used by this use. - if (F.ScaledReg) Regs.insert(F.ScaledReg); Regs.insert(F.BaseRegs.begin(), F.BaseRegs.end()); return true; @@ -1116,7 +1133,6 @@ void LSRUse::DeleteFormula(Formula &F) { if (&F != &Formulae.back()) std::swap(F, Formulae.back()); Formulae.pop_back(); - assert(!Formulae.empty() && "LSRUse has no formulae left!"); } /// RecomputeRegs - Recompute the Regs field, and update RegUses. @@ -1389,7 +1405,6 @@ class LSRInstance { LSRUse *FindUseWithSimilarFormula(const Formula &F, const LSRUse &OrigLU); -public: void InsertInitialFormula(const SCEV *S, LSRUse &LU, size_t LUIdx); void InsertSupplementalFormula(const SCEV *S, LSRUse &LU, size_t LUIdx); void CountRegisters(const Formula &F, size_t LUIdx); @@ -1450,6 +1465,7 @@ public: void ImplementSolution(const SmallVectorImpl<const Formula *> &Solution, Pass *P); +public: LSRInstance(const TargetLowering *tli, Loop *l, Pass *P); bool getChanged() const { return Changed; } @@ -2045,7 +2061,8 @@ void LSRInstance::CollectInterestingTypesAndFactors() { do { const SCEV *S = Worklist.pop_back_val(); if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) { - Strides.insert(AR->getStepRecurrence(SE)); + if (EnableNested || AR->getLoop() == L) + Strides.insert(AR->getStepRecurrence(SE)); Worklist.push_back(AR->getStart()); } else if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) { Worklist.append(Add->op_begin(), Add->op_end()); @@ -2914,6 +2931,7 @@ LSRInstance::GenerateAllReuseFormulae() { void LSRInstance::FilterOutUndesirableDedicatedRegisters() { DenseSet<const SCEV *> VisitedRegs; SmallPtrSet<const SCEV *, 16> Regs; + SmallPtrSet<const SCEV *, 16> LoserRegs; #ifndef NDEBUG bool ChangedFormulae = false; #endif @@ -2933,46 +2951,66 @@ void LSRInstance::FilterOutUndesirableDedicatedRegisters() { FIdx != NumForms; ++FIdx) { Formula &F = LU.Formulae[FIdx]; - SmallVector<const SCEV *, 2> Key; - for (SmallVectorImpl<const SCEV *>::const_iterator J = F.BaseRegs.begin(), - JE = F.BaseRegs.end(); J != JE; ++J) { - const SCEV *Reg = *J; - if (RegUses.isRegUsedByUsesOtherThan(Reg, LUIdx)) - Key.push_back(Reg); + // Some formulas are instant losers. For example, they may depend on + // nonexistent AddRecs from other loops. These need to be filtered + // immediately, otherwise heuristics could choose them over others leading + // to an unsatisfactory solution. Passing LoserRegs into RateFormula here + // avoids the need to recompute this information across formulae using the + // same bad AddRec. Passing LoserRegs is also essential unless we remove + // the corresponding bad register from the Regs set. + Cost CostF; + Regs.clear(); + CostF.RateFormula(F, Regs, VisitedRegs, L, LU.Offsets, SE, DT, + &LoserRegs); + if (CostF.isLoser()) { + // During initial formula generation, undesirable formulae are generated + // by uses within other loops that have some non-trivial address mode or + // use the postinc form of the IV. LSR needs to provide these formulae + // as the basis of rediscovering the desired formula that uses an AddRec + // corresponding to the existing phi. Once all formulae have been + // generated, these initial losers may be pruned. + DEBUG(dbgs() << " Filtering loser "; F.print(dbgs()); + dbgs() << "\n"); } - if (F.ScaledReg && - RegUses.isRegUsedByUsesOtherThan(F.ScaledReg, LUIdx)) - Key.push_back(F.ScaledReg); - // Unstable sort by host order ok, because this is only used for - // uniquifying. - std::sort(Key.begin(), Key.end()); - - std::pair<BestFormulaeTy::const_iterator, bool> P = - BestFormulae.insert(std::make_pair(Key, FIdx)); - if (!P.second) { + else { + SmallVector<const SCEV *, 2> Key; + for (SmallVectorImpl<const SCEV *>::const_iterator J = F.BaseRegs.begin(), + JE = F.BaseRegs.end(); J != JE; ++J) { + const SCEV *Reg = *J; + if (RegUses.isRegUsedByUsesOtherThan(Reg, LUIdx)) + Key.push_back(Reg); + } + if (F.ScaledReg && + RegUses.isRegUsedByUsesOtherThan(F.ScaledReg, LUIdx)) + Key.push_back(F.ScaledReg); + // Unstable sort by host order ok, because this is only used for + // uniquifying. + std::sort(Key.begin(), Key.end()); + + std::pair<BestFormulaeTy::const_iterator, bool> P = + BestFormulae.insert(std::make_pair(Key, FIdx)); + if (P.second) + continue; + Formula &Best = LU.Formulae[P.first->second]; - Cost CostF; - CostF.RateFormula(F, Regs, VisitedRegs, L, LU.Offsets, SE, DT); - Regs.clear(); Cost CostBest; - CostBest.RateFormula(Best, Regs, VisitedRegs, L, LU.Offsets, SE, DT); Regs.clear(); + CostBest.RateFormula(Best, Regs, VisitedRegs, L, LU.Offsets, SE, DT); if (CostF < CostBest) std::swap(F, Best); DEBUG(dbgs() << " Filtering out formula "; F.print(dbgs()); dbgs() << "\n" " in favor of formula "; Best.print(dbgs()); dbgs() << '\n'); + } #ifndef NDEBUG - ChangedFormulae = true; + ChangedFormulae = true; #endif - LU.DeleteFormula(F); - --FIdx; - --NumForms; - Any = true; - continue; - } + LU.DeleteFormula(F); + --FIdx; + --NumForms; + Any = true; } // Now that we've filtered out some formulae, recompute the Regs set. diff --git a/lib/Transforms/Scalar/LoopUnrollPass.cpp b/lib/Transforms/Scalar/LoopUnrollPass.cpp index 37f4c2c..22dbfe3 100644 --- a/lib/Transforms/Scalar/LoopUnrollPass.cpp +++ b/lib/Transforms/Scalar/LoopUnrollPass.cpp @@ -40,10 +40,9 @@ UnrollAllowPartial("unroll-allow-partial", cl::init(false), cl::Hidden, cl::desc("Allows loops to be partially unrolled until " "-unroll-threshold loop size is reached.")); -// Temporary flag to be removed in 3.0 static cl::opt<bool> -NoSCEVUnroll("disable-unroll-scev", cl::init(false), cl::Hidden, - cl::desc("Use ScalarEvolution to analyze loop trip counts for unrolling")); +UnrollRuntime("unroll-runtime", cl::ZeroOrMore, cl::init(false), cl::Hidden, + cl::desc("Unroll loops with run-time trip counts")); namespace { class LoopUnroll : public LoopPass { @@ -68,6 +67,10 @@ namespace { // explicit -unroll-threshold). static const unsigned OptSizeUnrollThreshold = 50; + // Default unroll count for loops with run-time trip count if + // -unroll-count is not set + static const unsigned UnrollRuntimeCount = 8; + unsigned CurrentCount; unsigned CurrentThreshold; bool CurrentAllowPartial; @@ -148,23 +151,21 @@ bool LoopUnroll::runOnLoop(Loop *L, LPPassManager &LPM) { // Find trip count and trip multiple if count is not available unsigned TripCount = 0; unsigned TripMultiple = 1; - if (!NoSCEVUnroll) { - // Find "latch trip count". UnrollLoop assumes that control cannot exit - // via the loop latch on any iteration prior to TripCount. The loop may exit - // early via an earlier branch. - BasicBlock *LatchBlock = L->getLoopLatch(); - if (LatchBlock) { - TripCount = SE->getSmallConstantTripCount(L, LatchBlock); - TripMultiple = SE->getSmallConstantTripMultiple(L, LatchBlock); - } - } - else { - TripCount = L->getSmallConstantTripCount(); - if (TripCount == 0) - TripMultiple = L->getSmallConstantTripMultiple(); + // Find "latch trip count". UnrollLoop assumes that control cannot exit + // via the loop latch on any iteration prior to TripCount. The loop may exit + // early via an earlier branch. + BasicBlock *LatchBlock = L->getLoopLatch(); + if (LatchBlock) { + TripCount = SE->getSmallConstantTripCount(L, LatchBlock); + TripMultiple = SE->getSmallConstantTripMultiple(L, LatchBlock); } - // Automatically select an unroll count. + // Use a default unroll-count if the user doesn't specify a value + // and the trip count is a run-time value. The default is different + // for run-time or compile-time trip count loops. unsigned Count = CurrentCount; + if (UnrollRuntime && CurrentCount == 0 && TripCount == 0) + Count = UnrollRuntimeCount; + if (Count == 0) { // Conservative heuristic: if we know the trip count, see if we can // completely unroll (subject to the threshold, checked below); otherwise @@ -189,15 +190,23 @@ bool LoopUnroll::runOnLoop(Loop *L, LPPassManager &LPM) { if (TripCount != 1 && Size > Threshold) { DEBUG(dbgs() << " Too large to fully unroll with count: " << Count << " because size: " << Size << ">" << Threshold << "\n"); - if (!CurrentAllowPartial) { + if (!CurrentAllowPartial && !(UnrollRuntime && TripCount == 0)) { DEBUG(dbgs() << " will not try to unroll partially because " << "-unroll-allow-partial not given\n"); return false; } - // Reduce unroll count to be modulo of TripCount for partial unrolling - Count = Threshold / LoopSize; - while (Count != 0 && TripCount%Count != 0) { - Count--; + if (TripCount) { + // Reduce unroll count to be modulo of TripCount for partial unrolling + Count = CurrentThreshold / LoopSize; + while (Count != 0 && TripCount%Count != 0) + Count--; + } + else if (UnrollRuntime) { + // Reduce unroll count to be a lower power-of-two value + while (Count != 0 && Size > CurrentThreshold) { + Count >>= 1; + Size = LoopSize*Count; + } } if (Count < 2) { DEBUG(dbgs() << " could not unroll partially\n"); @@ -208,7 +217,7 @@ bool LoopUnroll::runOnLoop(Loop *L, LPPassManager &LPM) { } // Unroll the loop. - if (!UnrollLoop(L, Count, TripCount, TripMultiple, LI, &LPM)) + if (!UnrollLoop(L, Count, TripCount, UnrollRuntime, TripMultiple, LI, &LPM)) return false; return true; diff --git a/lib/Transforms/Scalar/LoopUnswitch.cpp b/lib/Transforms/Scalar/LoopUnswitch.cpp index 458949c..a2d0e98 100644 --- a/lib/Transforms/Scalar/LoopUnswitch.cpp +++ b/lib/Transforms/Scalar/LoopUnswitch.cpp @@ -71,7 +71,9 @@ namespace { // LoopProcessWorklist - Used to check if second loop needs processing // after RewriteLoopBodyWithConditionConstant rewrites first loop. std::vector<Loop*> LoopProcessWorklist; - SmallPtrSet<Value *,8> UnswitchedVals; + + // FIXME: Consider custom class for this. + std::map<const SwitchInst*, SmallPtrSet<const Value *,8> > UnswitchedVals; bool OptimizeForSize; bool redoLoop; @@ -117,7 +119,15 @@ namespace { private: virtual void releaseMemory() { - UnswitchedVals.clear(); + // We need to forget about all switches in the current loop. + // FIXME: Do it better than enumerating all blocks of code + // and see if it is a switch instruction. + for (Loop::block_iterator I = currentLoop->block_begin(), + E = currentLoop->block_end(); I != E; ++I) { + SwitchInst* SI = dyn_cast<SwitchInst>((*I)->getTerminator()); + if (SI) + UnswitchedVals.erase(SI); + } } /// RemoveLoopFromWorklist - If the specified loop is on the loop worklist, @@ -128,6 +138,12 @@ namespace { if (I != LoopProcessWorklist.end()) LoopProcessWorklist.erase(I); } + + /// For new loop switches we clone info about values that was + /// already unswitched and has redundant successors. + /// Note, that new loop data is stored inside the VMap. + void CloneUnswitchedVals(const ValueToValueMapTy& VMap, + const BasicBlock* SrcBB); void initLoopData() { loopHeader = currentLoop->getHeader(); @@ -255,13 +271,25 @@ bool LoopUnswitch::processCurrentLoop() { } else if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) { Value *LoopCond = FindLIVLoopCondition(SI->getCondition(), currentLoop, Changed); - if (LoopCond && SI->getNumCases() > 1) { + unsigned NumCases = SI->getNumCases(); + if (LoopCond && NumCases > 1) { // Find a value to unswitch on: // FIXME: this should chose the most expensive case! // FIXME: scan for a case with a non-critical edge? - Constant *UnswitchVal = SI->getCaseValue(1); + Constant *UnswitchVal = NULL; + // Do not process same value again and again. - if (!UnswitchedVals.insert(UnswitchVal)) + // At this point we have some cases already unswitched and + // some not yet unswitched. Let's find the first not yet unswitched one. + for (unsigned i = 1; i < NumCases; ++i) { + Constant* UnswitchValCandidate = SI->getCaseValue(i); + if (!UnswitchedVals[SI].count(UnswitchValCandidate)) { + UnswitchVal = UnswitchValCandidate; + break; + } + } + + if (!UnswitchVal) continue; if (UnswitchIfProfitable(LoopCond, UnswitchVal)) { @@ -287,6 +315,23 @@ bool LoopUnswitch::processCurrentLoop() { return Changed; } +/// For new loop switches we clone info about values that was +/// already unswitched and has redundant successors. +/// Not that new loop data is stored inside the VMap. +void LoopUnswitch::CloneUnswitchedVals(const ValueToValueMapTy& VMap, + const BasicBlock* SrcBB) { + + const SwitchInst* SI = dyn_cast<SwitchInst>(SrcBB->getTerminator()); + if (SI && UnswitchedVals.count(SI)) { + // Don't clone a totally simplified switch. + if (isa<Constant>(SI->getCondition())) + return; + Value* I = VMap.lookup(SI); + assert(I && "All instructions that are in SrcBB must be in VMap."); + UnswitchedVals[cast<SwitchInst>(I)] = UnswitchedVals[SI]; + } +} + /// isTrivialLoopExitBlock - Check to see if all paths from BB exit the /// loop with no side effects (including infinite loops). /// @@ -378,14 +423,25 @@ bool LoopUnswitch::IsTrivialUnswitchCondition(Value *Cond, Constant **Val, // Check to see if a successor of the switch is guaranteed to go to the // latch block or exit through a one exit block without having any // side-effects. If so, determine the value of Cond that causes it to do - // this. Note that we can't trivially unswitch on the default case. - for (unsigned i = 1, e = SI->getNumSuccessors(); i != e; ++i) - if ((LoopExitBB = isTrivialLoopExitBlock(currentLoop, + // this. + // Note that we can't trivially unswitch on the default case or + // on already unswitched cases. + for (unsigned i = 1, e = SI->getNumSuccessors(); i != e; ++i) { + BasicBlock* LoopExitCandidate; + if ((LoopExitCandidate = isTrivialLoopExitBlock(currentLoop, SI->getSuccessor(i)))) { // Okay, we found a trivial case, remember the value that is trivial. - if (Val) *Val = SI->getCaseValue(i); + ConstantInt* CaseVal = SI->getCaseValue(i); + + // Check that it was not unswitched before, since already unswitched + // trivial vals are looks trivial too. + if (UnswitchedVals[SI].count(CaseVal)) + continue; + LoopExitBB = LoopExitCandidate; + if (Val) *Val = CaseVal; break; } + } } // If we didn't find a single unique LoopExit block, or if the loop exit block @@ -447,8 +503,14 @@ bool LoopUnswitch::UnswitchIfProfitable(Value *LoopCond, Constant *Val) { // expansion, and the number of basic blocks, to avoid loops with // large numbers of branches which cause loop unswitching to go crazy. // This is a very ad-hoc heuristic. - if (Metrics.NumInsts > Threshold || - Metrics.NumBlocks * 5 > Threshold || + + unsigned NumUnswitched = + (NumSwitches + NumBranches) + 1 /*take in account current iteration*/; + + unsigned NumInsts = Metrics.NumInsts * NumUnswitched; + unsigned NumBlocks = Metrics.NumBlocks * NumUnswitched; + + if (NumInsts > Threshold || NumBlocks * 5 > Threshold || Metrics.containsIndirectBr || Metrics.isRecursive) { DEBUG(dbgs() << "NOT unswitching loop %" << currentLoop->getHeader()->getName() << ", cost too high: " @@ -565,8 +627,7 @@ void LoopUnswitch::SplitExitEdges(Loop *L, // Although SplitBlockPredecessors doesn't preserve loop-simplify in // general, if we call it on all predecessors of all exits then it does. if (!ExitBlock->isLandingPad()) { - SplitBlockPredecessors(ExitBlock, Preds.data(), Preds.size(), - ".us-lcssa", this); + SplitBlockPredecessors(ExitBlock, Preds, ".us-lcssa", this); } else { SmallVector<BasicBlock*, 2> NewBBs; SplitLandingPadPredecessors(ExitBlock, Preds, ".us-lcssa", ".us-lcssa", @@ -621,6 +682,12 @@ void LoopUnswitch::UnswitchNontrivialCondition(Value *LIC, Constant *Val, ValueToValueMapTy VMap; for (unsigned i = 0, e = LoopBlocks.size(); i != e; ++i) { BasicBlock *NewBB = CloneBasicBlock(LoopBlocks[i], VMap, ".us", F); + + // Inherit simplified switches info for NewBB + // We needn't pass NewBB since its instructions are already contained + // inside the VMap. + CloneUnswitchedVals(VMap, LoopBlocks[i]); + NewBlocks.push_back(NewBB); VMap[LoopBlocks[i]] = NewBB; // Keep the BB mapping. LPM->cloneBasicBlockSimpleAnalysis(LoopBlocks[i], NewBB, L); @@ -907,9 +974,13 @@ void LoopUnswitch::RewriteLoopBodyWithConditionConstant(Loop *L, Value *LIC, Instruction *U = dyn_cast<Instruction>(*UI); if (!U || !L->contains(U)) continue; - U->replaceUsesOfWith(LIC, Replacement); Worklist.push_back(U); } + + for (std::vector<Instruction*>::iterator UI = Worklist.begin(); + UI != Worklist.end(); ++UI) + (*UI)->replaceUsesOfWith(LIC, Replacement); + SimplifyCode(Worklist, L); return; } @@ -942,6 +1013,9 @@ void LoopUnswitch::RewriteLoopBodyWithConditionConstant(Loop *L, Value *LIC, BasicBlock *Switch = SI->getParent(); BasicBlock *SISucc = SI->getSuccessor(DeadCase); BasicBlock *Latch = L->getLoopLatch(); + + UnswitchedVals[SI].insert(Val); + if (!SI->findCaseDest(SISucc)) continue; // Edge is critical. // If the DeadCase successor dominates the loop latch, then the // transformation isn't safe since it will delete the sole predecessor edge @@ -1017,7 +1091,7 @@ void LoopUnswitch::SimplifyCode(std::vector<Instruction*> &Worklist, Loop *L) { // See if instruction simplification can hack this up. This is common for // things like "select false, X, Y" after unswitching made the condition be // 'false'. - if (Value *V = SimplifyInstruction(I, 0, DT)) + if (Value *V = SimplifyInstruction(I, 0, 0, DT)) if (LI->replacementPreservesLCSSAForm(I, V)) { ReplaceUsesOfWith(I, V, Worklist, L, LPM); continue; diff --git a/lib/Transforms/Scalar/MemCpyOptimizer.cpp b/lib/Transforms/Scalar/MemCpyOptimizer.cpp index 9e4f51f..7335626 100644 --- a/lib/Transforms/Scalar/MemCpyOptimizer.cpp +++ b/lib/Transforms/Scalar/MemCpyOptimizer.cpp @@ -147,8 +147,8 @@ struct MemsetRange { } // end anon namespace bool MemsetRange::isProfitableToUseMemset(const TargetData &TD) const { - // If we found more than 8 stores to merge or 64 bytes, use memset. - if (TheStores.size() >= 8 || End-Start >= 64) return true; + // If we found more than 4 stores to merge or 16 bytes, use memset. + if (TheStores.size() >= 4 || End-Start >= 16) return true; // If there is nothing to merge, don't do anything. if (TheStores.size() < 2) return false; diff --git a/lib/Transforms/Scalar/ObjCARC.cpp b/lib/Transforms/Scalar/ObjCARC.cpp index 80f5f01..8e9449f 100644 --- a/lib/Transforms/Scalar/ObjCARC.cpp +++ b/lib/Transforms/Scalar/ObjCARC.cpp @@ -179,9 +179,13 @@ static bool IsPotentialUse(const Value *Op) { Arg->hasNestAttr() || Arg->hasStructRetAttr()) return false; - // Only consider values with pointer types, and not function pointers. + // Only consider values with pointer types. + // It seemes intuitive to exclude function pointer types as well, since + // functions are never reference-counted, however clang occasionally + // bitcasts reference-counted pointers to function-pointer type + // temporarily. PointerType *Ty = dyn_cast<PointerType>(Op->getType()); - if (!Ty || isa<FunctionType>(Ty->getElementType())) + if (!Ty) return false; // Conservatively assume anything else is a potential use. return true; @@ -896,8 +900,9 @@ bool ObjCARCExpand::runOnFunction(Function &F) { #include "llvm/LLVMContext.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/CFG.h" -#include "llvm/ADT/PostOrderIterator.h" #include "llvm/ADT/Statistic.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/DenseSet.h" STATISTIC(NumNoops, "Number of no-op objc calls eliminated"); STATISTIC(NumPartialNoops, "Number of partially no-op objc calls eliminated"); @@ -1165,6 +1170,7 @@ namespace { /// Partial - True of we've seen an opportunity for partial RR elimination, /// such as pushing calls into a CFG triangle or into one side of a /// CFG diamond. + /// TODO: Consider moving this to PtrState. bool Partial; /// ReleaseMetadata - If the Calls are objc_release calls and they all have @@ -1251,16 +1257,6 @@ namespace { Seq = NewSeq; } - void SetSeqToRelease(MDNode *M) { - if (Seq == S_None || Seq == S_Use) { - Seq = M ? S_MovableRelease : S_Release; - RRI.ReleaseMetadata = M; - } else if (Seq != S_MovableRelease || RRI.ReleaseMetadata != M) { - Seq = S_Release; - RRI.ReleaseMetadata = 0; - } - } - Sequence GetSeq() const { return Seq; } @@ -1488,7 +1484,7 @@ namespace { /// metadata. unsigned ImpreciseReleaseMDKind; - /// CopyOnEscape - The Metadata Kind for clang.arc.copy_on_escape + /// CopyOnEscapeMDKind - The Metadata Kind for clang.arc.copy_on_escape /// metadata. unsigned CopyOnEscapeMDKind; @@ -2255,6 +2251,7 @@ ObjCARCOpt::CheckForCFGHazards(const BasicBlock *BB, // guards against loops in the middle of a sequence. if (SomeSuccHasSame && !AllSuccsHaveSame) S.ClearSequenceProgress(); + break; } case S_CanRelease: { const Value *Arg = I->first; @@ -2289,6 +2286,7 @@ ObjCARCOpt::CheckForCFGHazards(const BasicBlock *BB, // guards against loops in the middle of a sequence. if (SomeSuccHasSame && !AllSuccsHaveSame) S.ClearSequenceProgress(); + break; } } } @@ -2350,8 +2348,11 @@ ObjCARCOpt::VisitBottomUp(BasicBlock *BB, if (S.GetSeq() == S_Release || S.GetSeq() == S_MovableRelease) NestingDetected = true; - S.SetSeqToRelease(Inst->getMetadata(ImpreciseReleaseMDKind)); S.RRI.clear(); + + MDNode *ReleaseMetadata = Inst->getMetadata(ImpreciseReleaseMDKind); + S.SetSeq(ReleaseMetadata ? S_MovableRelease : S_Release); + S.RRI.ReleaseMetadata = ReleaseMetadata; S.RRI.KnownSafe = S.IsKnownNested() || S.IsKnownIncremented(); S.RRI.IsTailCallRelease = cast<CallInst>(Inst)->isTailCall(); S.RRI.Calls.insert(Inst); @@ -2494,18 +2495,16 @@ ObjCARCOpt::VisitTopDown(BasicBlock *BB, if (Pred == BB) continue; DenseMap<const BasicBlock *, BBState>::iterator I = BBStates.find(Pred); - assert(I != BBStates.end()); // If we haven't seen this node yet, then we've found a CFG cycle. // Be optimistic here; it's CheckForCFGHazards' job detect trouble. - if (!I->second.isVisitedTopDown()) + if (I == BBStates.end() || !I->second.isVisitedTopDown()) continue; MyStates.InitFromPred(I->second); while (PI != PE) { Pred = *PI++; if (Pred != BB) { I = BBStates.find(Pred); - assert(I != BBStates.end()); - if (I->second.isVisitedTopDown()) + if (I == BBStates.end() || I->second.isVisitedTopDown()) MyStates.MergePred(I->second); } } @@ -2661,49 +2660,106 @@ ObjCARCOpt::VisitTopDown(BasicBlock *BB, return NestingDetected; } -// Visit - Visit the function both top-down and bottom-up. -bool -ObjCARCOpt::Visit(Function &F, - DenseMap<const BasicBlock *, BBState> &BBStates, - MapVector<Value *, RRInfo> &Retains, - DenseMap<Value *, RRInfo> &Releases) { - // Use reverse-postorder on the reverse CFG for bottom-up, because we - // magically know that loops will be well behaved, i.e. they won't repeatedly - // call retain on a single pointer without doing a release. We can't use - // ReversePostOrderTraversal here because we want to walk up from each - // function exit point. +static void +ComputePostOrders(Function &F, + SmallVectorImpl<BasicBlock *> &PostOrder, + SmallVectorImpl<BasicBlock *> &ReverseCFGPostOrder) { + /// Backedges - Backedges detected in the DFS. These edges will be + /// ignored in the reverse-CFG DFS, so that loops with multiple exits will be + /// traversed in the desired order. + DenseSet<std::pair<BasicBlock *, BasicBlock *> > Backedges; + + /// Visited - The visited set, for doing DFS walks. SmallPtrSet<BasicBlock *, 16> Visited; - SmallVector<std::pair<BasicBlock *, pred_iterator>, 16> Stack; - SmallVector<BasicBlock *, 16> Order; + + // Do DFS, computing the PostOrder. + SmallPtrSet<BasicBlock *, 16> OnStack; + SmallVector<std::pair<BasicBlock *, succ_iterator>, 16> SuccStack; + BasicBlock *EntryBB = &F.getEntryBlock(); + SuccStack.push_back(std::make_pair(EntryBB, succ_begin(EntryBB))); + Visited.insert(EntryBB); + OnStack.insert(EntryBB); + do { + dfs_next_succ: + succ_iterator End = succ_end(SuccStack.back().first); + while (SuccStack.back().second != End) { + BasicBlock *BB = *SuccStack.back().second++; + if (Visited.insert(BB)) { + SuccStack.push_back(std::make_pair(BB, succ_begin(BB))); + OnStack.insert(BB); + goto dfs_next_succ; + } + if (OnStack.count(BB)) + Backedges.insert(std::make_pair(SuccStack.back().first, BB)); + } + OnStack.erase(SuccStack.back().first); + PostOrder.push_back(SuccStack.pop_back_val().first); + } while (!SuccStack.empty()); + + Visited.clear(); + + // Compute the exits, which are the starting points for reverse-CFG DFS. + SmallVector<BasicBlock *, 4> Exits; for (Function::iterator I = F.begin(), E = F.end(); I != E; ++I) { BasicBlock *BB = I; if (BB->getTerminator()->getNumSuccessors() == 0) - Stack.push_back(std::make_pair(BB, pred_begin(BB))); + Exits.push_back(BB); } - while (!Stack.empty()) { - pred_iterator End = pred_end(Stack.back().first); - while (Stack.back().second != End) { - BasicBlock *BB = *Stack.back().second++; - if (Visited.insert(BB)) - Stack.push_back(std::make_pair(BB, pred_begin(BB))); - } - Order.push_back(Stack.pop_back_val().first); + + // Do reverse-CFG DFS, computing the reverse-CFG PostOrder. + SmallVector<std::pair<BasicBlock *, pred_iterator>, 16> PredStack; + for (SmallVectorImpl<BasicBlock *>::iterator I = Exits.begin(), E = Exits.end(); + I != E; ++I) { + BasicBlock *ExitBB = *I; + PredStack.push_back(std::make_pair(ExitBB, pred_begin(ExitBB))); + Visited.insert(ExitBB); + while (!PredStack.empty()) { + reverse_dfs_next_succ: + pred_iterator End = pred_end(PredStack.back().first); + while (PredStack.back().second != End) { + BasicBlock *BB = *PredStack.back().second++; + // Skip backedges detected in the forward-CFG DFS. + if (Backedges.count(std::make_pair(BB, PredStack.back().first))) + continue; + if (Visited.insert(BB)) { + PredStack.push_back(std::make_pair(BB, pred_begin(BB))); + goto reverse_dfs_next_succ; + } + } + ReverseCFGPostOrder.push_back(PredStack.pop_back_val().first); + } } +} + +// Visit - Visit the function both top-down and bottom-up. +bool +ObjCARCOpt::Visit(Function &F, + DenseMap<const BasicBlock *, BBState> &BBStates, + MapVector<Value *, RRInfo> &Retains, + DenseMap<Value *, RRInfo> &Releases) { + + // Use reverse-postorder traversals, because we magically know that loops + // will be well behaved, i.e. they won't repeatedly call retain on a single + // pointer without doing a release. We can't use the ReversePostOrderTraversal + // class here because we want the reverse-CFG postorder to consider each + // function exit point, and we want to ignore selected cycle edges. + SmallVector<BasicBlock *, 16> PostOrder; + SmallVector<BasicBlock *, 16> ReverseCFGPostOrder; + ComputePostOrders(F, PostOrder, ReverseCFGPostOrder); + + // Use reverse-postorder on the reverse CFG for bottom-up. bool BottomUpNestingDetected = false; for (SmallVectorImpl<BasicBlock *>::const_reverse_iterator I = - Order.rbegin(), E = Order.rend(); I != E; ++I) { - BasicBlock *BB = *I; - BottomUpNestingDetected |= VisitBottomUp(BB, BBStates, Retains); - } + ReverseCFGPostOrder.rbegin(), E = ReverseCFGPostOrder.rend(); + I != E; ++I) + BottomUpNestingDetected |= VisitBottomUp(*I, BBStates, Retains); - // Use regular reverse-postorder for top-down. + // Use reverse-postorder for top-down. bool TopDownNestingDetected = false; - typedef ReversePostOrderTraversal<Function *> RPOTType; - RPOTType RPOT(&F); - for (RPOTType::rpo_iterator I = RPOT.begin(), E = RPOT.end(); I != E; ++I) { - BasicBlock *BB = *I; - TopDownNestingDetected |= VisitTopDown(BB, BBStates, Releases); - } + for (SmallVectorImpl<BasicBlock *>::const_reverse_iterator I = + PostOrder.rbegin(), E = PostOrder.rend(); + I != E; ++I) + TopDownNestingDetected |= VisitTopDown(*I, BBStates, Releases); return TopDownNestingDetected && BottomUpNestingDetected; } @@ -3139,7 +3195,7 @@ void ObjCARCOpt::OptimizeWeakCalls(Function &F) { UE = Alloca->use_end(); UI != UE; ) { CallInst *UserInst = cast<CallInst>(*UI++); if (!UserInst->use_empty()) - UserInst->replaceAllUsesWith(UserInst->getOperand(1)); + UserInst->replaceAllUsesWith(UserInst->getArgOperand(0)); UserInst->eraseFromParent(); } Alloca->eraseFromParent(); diff --git a/lib/Transforms/Scalar/SCCP.cpp b/lib/Transforms/Scalar/SCCP.cpp index f6762ad..e4cb55c 100644 --- a/lib/Transforms/Scalar/SCCP.cpp +++ b/lib/Transforms/Scalar/SCCP.cpp @@ -28,6 +28,7 @@ #include "llvm/Analysis/ValueTracking.h" #include "llvm/Transforms/Utils/Local.h" #include "llvm/Target/TargetData.h" +#include "llvm/Target/TargetLibraryInfo.h" #include "llvm/Support/CallSite.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" @@ -156,6 +157,7 @@ namespace { /// class SCCPSolver : public InstVisitor<SCCPSolver> { const TargetData *TD; + const TargetLibraryInfo *TLI; SmallPtrSet<BasicBlock*, 8> BBExecutable; // The BBs that are executable. DenseMap<Value*, LatticeVal> ValueState; // The state each value is in. @@ -206,7 +208,8 @@ class SCCPSolver : public InstVisitor<SCCPSolver> { typedef std::pair<BasicBlock*, BasicBlock*> Edge; DenseSet<Edge> KnownFeasibleEdges; public: - SCCPSolver(const TargetData *td) : TD(td) {} + SCCPSolver(const TargetData *td, const TargetLibraryInfo *tli) + : TD(td), TLI(tli) {} /// MarkBlockExecutable - This method can be used by clients to mark all of /// the blocks that are known to be intrinsically live in the processed unit. @@ -1125,7 +1128,7 @@ CallOverdefined: // If we can constant fold this, mark the result of the call as a // constant. - if (Constant *C = ConstantFoldCall(F, Operands)) + if (Constant *C = ConstantFoldCall(F, Operands, TLI)) return markConstant(I, C); } @@ -1517,6 +1520,9 @@ namespace { /// Sparse Conditional Constant Propagator. /// struct SCCP : public FunctionPass { + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AU.addRequired<TargetLibraryInfo>(); + } static char ID; // Pass identification, replacement for typeid SCCP() : FunctionPass(ID) { initializeSCCPPass(*PassRegistry::getPassRegistry()); @@ -1569,7 +1575,9 @@ static void DeleteInstructionInBlock(BasicBlock *BB) { // bool SCCP::runOnFunction(Function &F) { DEBUG(dbgs() << "SCCP on function '" << F.getName() << "'\n"); - SCCPSolver Solver(getAnalysisIfAvailable<TargetData>()); + const TargetData *TD = getAnalysisIfAvailable<TargetData>(); + const TargetLibraryInfo *TLI = &getAnalysis<TargetLibraryInfo>(); + SCCPSolver Solver(TD, TLI); // Mark the first block of the function as being executable. Solver.MarkBlockExecutable(F.begin()); @@ -1641,6 +1649,9 @@ namespace { /// Constant Propagation. /// struct IPSCCP : public ModulePass { + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AU.addRequired<TargetLibraryInfo>(); + } static char ID; IPSCCP() : ModulePass(ID) { initializeIPSCCPPass(*PassRegistry::getPassRegistry()); @@ -1650,7 +1661,11 @@ namespace { } // end anonymous namespace char IPSCCP::ID = 0; -INITIALIZE_PASS(IPSCCP, "ipsccp", +INITIALIZE_PASS_BEGIN(IPSCCP, "ipsccp", + "Interprocedural Sparse Conditional Constant Propagation", + false, false) +INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfo) +INITIALIZE_PASS_END(IPSCCP, "ipsccp", "Interprocedural Sparse Conditional Constant Propagation", false, false) @@ -1689,7 +1704,9 @@ static bool AddressIsTaken(const GlobalValue *GV) { } bool IPSCCP::runOnModule(Module &M) { - SCCPSolver Solver(getAnalysisIfAvailable<TargetData>()); + const TargetData *TD = getAnalysisIfAvailable<TargetData>(); + const TargetLibraryInfo *TLI = &getAnalysis<TargetLibraryInfo>(); + SCCPSolver Solver(TD, TLI); // AddressTakenFunctions - This set keeps track of the address-taken functions // that are in the input. As IPSCCP runs through and simplifies code, diff --git a/lib/Transforms/Scalar/ScalarReplAggregates.cpp b/lib/Transforms/Scalar/ScalarReplAggregates.cpp index 4b14efc..bc70c51 100644 --- a/lib/Transforms/Scalar/ScalarReplAggregates.cpp +++ b/lib/Transforms/Scalar/ScalarReplAggregates.cpp @@ -453,6 +453,8 @@ bool ConvertToScalarInfo::CanConvertToScalar(Value *V, uint64_t Offset) { // Compute the offset that this GEP adds to the pointer. SmallVector<Value*, 8> Indices(GEP->op_begin()+1, GEP->op_end()); + if (!GEP->getPointerOperandType()->isPointerTy()) + return false; uint64_t GEPOffset = TD.getIndexedOffset(GEP->getPointerOperandType(), Indices); // See if all uses can be converted. diff --git a/lib/Transforms/Scalar/SimplifyLibCalls.cpp b/lib/Transforms/Scalar/SimplifyLibCalls.cpp index 6e169de..f3184ec 100644 --- a/lib/Transforms/Scalar/SimplifyLibCalls.cpp +++ b/lib/Transforms/Scalar/SimplifyLibCalls.cpp @@ -999,7 +999,7 @@ struct FFSOpt : public LibCallOptimization { Type *ArgType = Op->getType(); Value *F = Intrinsic::getDeclaration(Callee->getParent(), Intrinsic::cttz, ArgType); - Value *V = B.CreateCall(F, Op, "cttz"); + Value *V = B.CreateCall2(F, Op, B.getFalse(), "cttz"); V = B.CreateAdd(V, ConstantInt::get(V->getType(), 1)); V = B.CreateIntCast(V, B.getInt32Ty(), false); @@ -1293,7 +1293,8 @@ struct FWriteOpt : public LibCallOptimization { return ConstantInt::get(CI->getType(), 0); // If this is writing one byte, turn it into fputc. - if (Bytes == 1) { // fwrite(S,1,1,F) -> fputc(S[0],F) + // This optimisation is only valid, if the return value is unused. + if (Bytes == 1 && CI->use_empty()) { // fwrite(S,1,1,F) -> fputc(S[0],F) Value *Char = B.CreateLoad(CastToCStr(CI->getArgOperand(0), B), "char"); EmitFPutC(Char, CI->getArgOperand(3), B, TD); return ConstantInt::get(CI->getType(), 1); diff --git a/lib/Transforms/Scalar/Sink.cpp b/lib/Transforms/Scalar/Sink.cpp index c83f56c..ef65c0a 100644 --- a/lib/Transforms/Scalar/Sink.cpp +++ b/lib/Transforms/Scalar/Sink.cpp @@ -18,6 +18,7 @@ #include "llvm/Analysis/Dominators.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Analysis/ValueTracking.h" #include "llvm/Assembly/Writer.h" #include "llvm/ADT/Statistic.h" #include "llvm/Support/CFG.h" @@ -240,7 +241,7 @@ bool Sinking::SinkInstruction(Instruction *Inst, if (SuccToSinkTo->getUniquePredecessor() != ParentBlock) { // We cannot sink a load across a critical edge - there may be stores in // other code paths. - if (!Inst->isSafeToSpeculativelyExecute()) { + if (!isSafeToSpeculativelyExecute(Inst)) { DEBUG(dbgs() << " *** PUNTING: Wont sink load along critical edge.\n"); return false; } diff --git a/lib/Transforms/Utils/AddrModeMatcher.cpp b/lib/Transforms/Utils/AddrModeMatcher.cpp index 8e5a1eb..d831452 100644 --- a/lib/Transforms/Utils/AddrModeMatcher.cpp +++ b/lib/Transforms/Utils/AddrModeMatcher.cpp @@ -473,14 +473,7 @@ bool AddressingModeMatcher::ValueAlreadyLiveAtInst(Value *Val,Value *KnownLive1, // Check to see if this value is already used in the memory instruction's // block. If so, it's already live into the block at the very least, so we // can reasonably fold it. - BasicBlock *MemBB = MemoryInst->getParent(); - for (Value::use_iterator UI = Val->use_begin(), E = Val->use_end(); - UI != E; ++UI) - // We know that uses of arguments and instructions have to be instructions. - if (cast<Instruction>(*UI)->getParent() == MemBB) - return true; - - return false; + return Val->isUsedInBasicBlock(MemoryInst->getParent()); } diff --git a/lib/Transforms/Utils/BasicBlockUtils.cpp b/lib/Transforms/Utils/BasicBlockUtils.cpp index a7f9efd..ef4a473 100644 --- a/lib/Transforms/Utils/BasicBlockUtils.cpp +++ b/lib/Transforms/Utils/BasicBlockUtils.cpp @@ -453,9 +453,8 @@ static void UpdatePHINodes(BasicBlock *OrigBB, BasicBlock *NewBB, /// of the edges being split is an exit of a loop with other exits). /// BasicBlock *llvm::SplitBlockPredecessors(BasicBlock *BB, - BasicBlock *const *Preds, - unsigned NumPreds, const char *Suffix, - Pass *P) { + ArrayRef<BasicBlock*> Preds, + const char *Suffix, Pass *P) { // Create new basic block, insert right before the original block. BasicBlock *NewBB = BasicBlock::Create(BB->getContext(), BB->getName()+Suffix, BB->getParent(), BB); @@ -464,7 +463,7 @@ BasicBlock *llvm::SplitBlockPredecessors(BasicBlock *BB, BranchInst *BI = BranchInst::Create(BB, NewBB); // Move the edges from Preds to point to NewBB instead of BB. - for (unsigned i = 0; i != NumPreds; ++i) { + for (unsigned i = 0, e = Preds.size(); i != e; ++i) { // This is slightly more strict than necessary; the minimum requirement // is that there be no more than one indirectbr branching to BB. And // all BlockAddress uses would need to be updated. @@ -477,7 +476,7 @@ BasicBlock *llvm::SplitBlockPredecessors(BasicBlock *BB, // node becomes an incoming value for BB's phi node. However, if the Preds // list is empty, we need to insert dummy entries into the PHI nodes in BB to // account for the newly created predecessor. - if (NumPreds == 0) { + if (Preds.size() == 0) { // Insert dummy values as the incoming value. for (BasicBlock::iterator I = BB->begin(); isa<PHINode>(I); ++I) cast<PHINode>(I)->addIncoming(UndefValue::get(I->getType()), NewBB); @@ -486,12 +485,10 @@ BasicBlock *llvm::SplitBlockPredecessors(BasicBlock *BB, // Update DominatorTree, LoopInfo, and LCCSA analysis information. bool HasLoopExit = false; - UpdateAnalysisInformation(BB, NewBB, ArrayRef<BasicBlock*>(Preds, NumPreds), - P, HasLoopExit); + UpdateAnalysisInformation(BB, NewBB, Preds, P, HasLoopExit); // Update the PHI nodes in BB with the values coming from NewBB. - UpdatePHINodes(BB, NewBB, ArrayRef<BasicBlock*>(Preds, NumPreds), BI, - P, HasLoopExit); + UpdatePHINodes(BB, NewBB, Preds, BI, P, HasLoopExit); return NewBB; } diff --git a/lib/Transforms/Utils/BreakCriticalEdges.cpp b/lib/Transforms/Utils/BreakCriticalEdges.cpp index c052910..f752d79 100644 --- a/lib/Transforms/Utils/BreakCriticalEdges.cpp +++ b/lib/Transforms/Utils/BreakCriticalEdges.cpp @@ -372,8 +372,7 @@ BasicBlock *llvm::SplitCriticalEdge(TerminatorInst *TI, unsigned SuccNum, // form, which we're in the process of restoring! if (!Preds.empty() && HasPredOutsideOfLoop) { BasicBlock *NewExitBB = - SplitBlockPredecessors(Exit, Preds.data(), Preds.size(), - "split", P); + SplitBlockPredecessors(Exit, Preds, "split", P); if (P->mustPreserveAnalysisID(LCSSAID)) CreatePHIsForSplitLoopExit(Preds, NewExitBB, Exit); } diff --git a/lib/Transforms/Utils/CMakeLists.txt b/lib/Transforms/Utils/CMakeLists.txt index 6d5432d..d96f59c 100644 --- a/lib/Transforms/Utils/CMakeLists.txt +++ b/lib/Transforms/Utils/CMakeLists.txt @@ -14,6 +14,7 @@ add_llvm_library(LLVMTransformUtils Local.cpp LoopSimplify.cpp LoopUnroll.cpp + LoopUnrollRuntime.cpp LowerExpectIntrinsic.cpp LowerInvoke.cpp LowerSwitch.cpp @@ -28,11 +29,3 @@ add_llvm_library(LLVMTransformUtils Utils.cpp ValueMapper.cpp ) - -add_llvm_library_dependencies(LLVMTransformUtils - LLVMAnalysis - LLVMCore - LLVMSupport - LLVMTarget - LLVMipa - ) diff --git a/lib/Transforms/Utils/InlineFunction.cpp b/lib/Transforms/Utils/InlineFunction.cpp index dd4a659..f89e1b1 100644 --- a/lib/Transforms/Utils/InlineFunction.cpp +++ b/lib/Transforms/Utils/InlineFunction.cpp @@ -924,40 +924,44 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI) { return false; } - // Find the personality function used by the landing pads of the caller. If it - // exists, then check to see that it matches the personality function used in - // the callee. - for (Function::const_iterator - I = Caller->begin(), E = Caller->end(); I != E; ++I) + // Get the personality function from the callee if it contains a landing pad. + Value *CalleePersonality = 0; + for (Function::const_iterator I = CalledFunc->begin(), E = CalledFunc->end(); + I != E; ++I) if (const InvokeInst *II = dyn_cast<InvokeInst>(I->getTerminator())) { const BasicBlock *BB = II->getUnwindDest(); - // FIXME: This 'isa' here should become go away once the new EH system is - // in place. - if (!isa<LandingPadInst>(BB->getFirstNonPHI())) - continue; - const LandingPadInst *LP = cast<LandingPadInst>(BB->getFirstNonPHI()); - const Value *CallerPersFn = LP->getPersonalityFn(); - - // If the personality functions match, then we can perform the - // inlining. Otherwise, we can't inline. - // TODO: This isn't 100% true. Some personality functions are proper - // supersets of others and can be used in place of the other. - for (Function::const_iterator - I = CalledFunc->begin(), E = CalledFunc->end(); I != E; ++I) - if (const InvokeInst *II = dyn_cast<InvokeInst>(I->getTerminator())) { - const BasicBlock *BB = II->getUnwindDest(); - // FIXME: This 'if/dyn_cast' here should become a normal 'cast' once - // the new EH system is in place. - if (const LandingPadInst *LP = - dyn_cast<LandingPadInst>(BB->getFirstNonPHI())) - if (CallerPersFn != LP->getPersonalityFn()) - return false; - break; - } - + // FIXME: This 'if/dyn_cast' here should become a normal 'cast' once + // the new EH system is in place. + if (const LandingPadInst *LP = + dyn_cast<LandingPadInst>(BB->getFirstNonPHI())) + CalleePersonality = LP->getPersonalityFn(); break; } + // Find the personality function used by the landing pads of the caller. If it + // exists, then check to see that it matches the personality function used in + // the callee. + if (CalleePersonality) + for (Function::const_iterator I = Caller->begin(), E = Caller->end(); + I != E; ++I) + if (const InvokeInst *II = dyn_cast<InvokeInst>(I->getTerminator())) { + const BasicBlock *BB = II->getUnwindDest(); + // FIXME: This 'isa' here should become go away once the new EH system + // is in place. + if (!isa<LandingPadInst>(BB->getFirstNonPHI())) + continue; + const LandingPadInst *LP = cast<LandingPadInst>(BB->getFirstNonPHI()); + + // If the personality functions match, then we can perform the + // inlining. Otherwise, we can't inline. + // TODO: This isn't 100% true. Some personality functions are proper + // supersets of others and can be used in place of the other. + if (LP->getPersonalityFn() != CalleePersonality) + return false; + + break; + } + // Get an iterator to the last basic block in the function, which will have // the new function inlined after it. // diff --git a/lib/Transforms/Utils/LLVMBuild.txt b/lib/Transforms/Utils/LLVMBuild.txt index dea7b02..88b2ffe 100644 --- a/lib/Transforms/Utils/LLVMBuild.txt +++ b/lib/Transforms/Utils/LLVMBuild.txt @@ -20,4 +20,3 @@ type = Library name = TransformUtils parent = Transforms required_libraries = Analysis Core IPA Support Target - diff --git a/lib/Transforms/Utils/Local.cpp b/lib/Transforms/Utils/Local.cpp index 134ab71..4dd93cf 100644 --- a/lib/Transforms/Utils/Local.cpp +++ b/lib/Transforms/Utils/Local.cpp @@ -494,22 +494,8 @@ static bool CanPropagatePredecessorsForPHIs(BasicBlock *BB, BasicBlock *Succ) { if (Succ->getSinglePredecessor()) return true; // Make a list of the predecessors of BB - typedef SmallPtrSet<BasicBlock*, 16> BlockSet; - BlockSet BBPreds(pred_begin(BB), pred_end(BB)); - - // Use that list to make another list of common predecessors of BB and Succ - BlockSet CommonPreds; - for (pred_iterator PI = pred_begin(Succ), PE = pred_end(Succ); - PI != PE; ++PI) { - BasicBlock *P = *PI; - if (BBPreds.count(P)) - CommonPreds.insert(P); - } + SmallPtrSet<BasicBlock*, 16> BBPreds(pred_begin(BB), pred_end(BB)); - // Shortcut, if there are no common predecessors, merging is always safe - if (CommonPreds.empty()) - return true; - // Look at all the phi nodes in Succ, to see if they present a conflict when // merging these blocks for (BasicBlock::iterator I = Succ->begin(); isa<PHINode>(I); ++I) { @@ -520,28 +506,28 @@ static bool CanPropagatePredecessorsForPHIs(BasicBlock *BB, BasicBlock *Succ) { // merge the phi nodes and then the blocks can still be merged PHINode *BBPN = dyn_cast<PHINode>(PN->getIncomingValueForBlock(BB)); if (BBPN && BBPN->getParent() == BB) { - for (BlockSet::iterator PI = CommonPreds.begin(), PE = CommonPreds.end(); - PI != PE; PI++) { - if (BBPN->getIncomingValueForBlock(*PI) - != PN->getIncomingValueForBlock(*PI)) { + for (unsigned PI = 0, PE = PN->getNumIncomingValues(); PI != PE; ++PI) { + BasicBlock *IBB = PN->getIncomingBlock(PI); + if (BBPreds.count(IBB) && + BBPN->getIncomingValueForBlock(IBB) != PN->getIncomingValue(PI)) { DEBUG(dbgs() << "Can't fold, phi node " << PN->getName() << " in " << Succ->getName() << " is conflicting with " << BBPN->getName() << " with regard to common predecessor " - << (*PI)->getName() << "\n"); + << IBB->getName() << "\n"); return false; } } } else { Value* Val = PN->getIncomingValueForBlock(BB); - for (BlockSet::iterator PI = CommonPreds.begin(), PE = CommonPreds.end(); - PI != PE; PI++) { + for (unsigned PI = 0, PE = PN->getNumIncomingValues(); PI != PE; ++PI) { // See if the incoming value for the common predecessor is equal to the // one for BB, in which case this phi node will not prevent the merging // of the block. - if (Val != PN->getIncomingValueForBlock(*PI)) { + BasicBlock *IBB = PN->getIncomingBlock(PI); + if (BBPreds.count(IBB) && Val != PN->getIncomingValue(PI)) { DEBUG(dbgs() << "Can't fold, phi node " << PN->getName() << " in " << Succ->getName() << " is conflicting with regard to common " - << "predecessor " << (*PI)->getName() << "\n"); + << "predecessor " << IBB->getName() << "\n"); return false; } } @@ -748,6 +734,10 @@ static unsigned enforceKnownAlignment(Value *V, unsigned Align, // If there is a large requested alignment and we can, bump up the alignment // of the global. if (GV->isDeclaration()) return Align; + // If the memory we set aside for the global may not be the memory used by + // the final program then it is impossible for us to reliably enforce the + // preferred alignment. + if (GV->isWeakForLinker()) return Align; if (GV->getAlignment() >= PrefAlign) return GV->getAlignment(); diff --git a/lib/Transforms/Utils/LoopSimplify.cpp b/lib/Transforms/Utils/LoopSimplify.cpp index cbd54a8..4376265 100644 --- a/lib/Transforms/Utils/LoopSimplify.cpp +++ b/lib/Transforms/Utils/LoopSimplify.cpp @@ -99,7 +99,8 @@ namespace { bool ProcessLoop(Loop *L, LPPassManager &LPM); BasicBlock *RewriteLoopExitBlock(Loop *L, BasicBlock *Exit); BasicBlock *InsertPreheaderForLoop(Loop *L); - Loop *SeparateNestedLoop(Loop *L, LPPassManager &LPM); + Loop *SeparateNestedLoop(Loop *L, LPPassManager &LPM, + BasicBlock *Preheader); BasicBlock *InsertUniqueBackedgeBlock(Loop *L, BasicBlock *Preheader); void PlaceSplitBlockCarefully(BasicBlock *NewBB, SmallVectorImpl<BasicBlock*> &SplitPreds, @@ -240,7 +241,7 @@ ReprocessLoop: // this for loops with a giant number of backedges, just factor them into a // common backedge instead. if (L->getNumBackEdges() < 8) { - if (SeparateNestedLoop(L, LPM)) { + if (SeparateNestedLoop(L, LPM, Preheader)) { ++NumNested; // This is a big restructuring change, reprocess the whole loop. Changed = true; @@ -265,7 +266,7 @@ ReprocessLoop: PHINode *PN; for (BasicBlock::iterator I = L->getHeader()->begin(); (PN = dyn_cast<PHINode>(I++)); ) - if (Value *V = SimplifyInstruction(PN, 0, DT)) { + if (Value *V = SimplifyInstruction(PN, 0, 0, DT)) { if (AA) AA->deleteValue(PN); if (SE) SE->forgetValue(PN); PN->replaceAllUsesWith(V); @@ -379,19 +380,27 @@ BasicBlock *LoopSimplify::InsertPreheaderForLoop(Loop *L) { } // Split out the loop pre-header. - BasicBlock *NewBB = - SplitBlockPredecessors(Header, &OutsideBlocks[0], OutsideBlocks.size(), - ".preheader", this); + BasicBlock *PreheaderBB; + if (!Header->isLandingPad()) { + PreheaderBB = SplitBlockPredecessors(Header, OutsideBlocks, ".preheader", + this); + } else { + SmallVector<BasicBlock*, 2> NewBBs; + SplitLandingPadPredecessors(Header, OutsideBlocks, ".preheader", + ".split-lp", this, NewBBs); + PreheaderBB = NewBBs[0]; + } - NewBB->getTerminator()->setDebugLoc(Header->getFirstNonPHI()->getDebugLoc()); - DEBUG(dbgs() << "LoopSimplify: Creating pre-header " << NewBB->getName() - << "\n"); + PreheaderBB->getTerminator()->setDebugLoc( + Header->getFirstNonPHI()->getDebugLoc()); + DEBUG(dbgs() << "LoopSimplify: Creating pre-header " + << PreheaderBB->getName() << "\n"); // Make sure that NewBB is put someplace intelligent, which doesn't mess up // code layout too horribly. - PlaceSplitBlockCarefully(NewBB, OutsideBlocks, L); + PlaceSplitBlockCarefully(PreheaderBB, OutsideBlocks, L); - return NewBB; + return PreheaderBB; } /// RewriteLoopExitBlock - Ensure that the loop preheader dominates all exit @@ -420,9 +429,7 @@ BasicBlock *LoopSimplify::RewriteLoopExitBlock(Loop *L, BasicBlock *Exit) { this, NewBBs); NewExitBB = NewBBs[0]; } else { - NewExitBB = SplitBlockPredecessors(Exit, &LoopBlocks[0], - LoopBlocks.size(), ".loopexit", - this); + NewExitBB = SplitBlockPredecessors(Exit, LoopBlocks, ".loopexit", this); } DEBUG(dbgs() << "LoopSimplify: Creating dedicated exit block " @@ -456,7 +463,7 @@ static PHINode *FindPHIToPartitionLoops(Loop *L, DominatorTree *DT, for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ) { PHINode *PN = cast<PHINode>(I); ++I; - if (Value *V = SimplifyInstruction(PN, 0, DT)) { + if (Value *V = SimplifyInstruction(PN, 0, 0, DT)) { // This is a degenerate PHI already, don't modify it! PN->replaceAllUsesWith(V); if (AA) AA->deleteValue(PN); @@ -529,7 +536,17 @@ void LoopSimplify::PlaceSplitBlockCarefully(BasicBlock *NewBB, /// If we are able to separate out a loop, return the new outer loop that was /// created. /// -Loop *LoopSimplify::SeparateNestedLoop(Loop *L, LPPassManager &LPM) { +Loop *LoopSimplify::SeparateNestedLoop(Loop *L, LPPassManager &LPM, + BasicBlock *Preheader) { + // Don't try to separate loops without a preheader (this excludes + // loop headers which are targeted by an indirectbr). + if (!Preheader) + return 0; + + // The header is not a landing pad; preheader insertion should ensure this. + assert(!L->getHeader()->isLandingPad() && + "Can't insert backedge to landing pad"); + PHINode *PN = FindPHIToPartitionLoops(L, DT, AA, LI); if (PN == 0) return 0; // No known way to partition. @@ -539,13 +556,8 @@ Loop *LoopSimplify::SeparateNestedLoop(Loop *L, LPPassManager &LPM) { SmallVector<BasicBlock*, 8> OuterLoopPreds; for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) if (PN->getIncomingValue(i) != PN || - !L->contains(PN->getIncomingBlock(i))) { - // We can't split indirectbr edges. - if (isa<IndirectBrInst>(PN->getIncomingBlock(i)->getTerminator())) - return 0; - + !L->contains(PN->getIncomingBlock(i))) OuterLoopPreds.push_back(PN->getIncomingBlock(i)); - } DEBUG(dbgs() << "LoopSimplify: Splitting out a new outer loop\n"); @@ -556,9 +568,8 @@ Loop *LoopSimplify::SeparateNestedLoop(Loop *L, LPPassManager &LPM) { SE->forgetLoop(L); BasicBlock *Header = L->getHeader(); - BasicBlock *NewBB = SplitBlockPredecessors(Header, &OuterLoopPreds[0], - OuterLoopPreds.size(), - ".outer", this); + BasicBlock *NewBB = + SplitBlockPredecessors(Header, OuterLoopPreds, ".outer", this); // Make sure that NewBB is put someplace intelligent, which doesn't mess up // code layout too horribly. @@ -640,6 +651,9 @@ LoopSimplify::InsertUniqueBackedgeBlock(Loop *L, BasicBlock *Preheader) { if (!Preheader) return 0; + // The header is not a landing pad; preheader insertion should ensure this. + assert(!Header->isLandingPad() && "Can't insert backedge to landing pad"); + // Figure out which basic blocks contain back-edges to the loop header. std::vector<BasicBlock*> BackedgeBlocks; for (pred_iterator I = pred_begin(Header), E = pred_end(Header); I != E; ++I){ diff --git a/lib/Transforms/Utils/LoopUnroll.cpp b/lib/Transforms/Utils/LoopUnroll.cpp index 62e4fa2..b96f14b 100644 --- a/lib/Transforms/Utils/LoopUnroll.cpp +++ b/lib/Transforms/Utils/LoopUnroll.cpp @@ -135,7 +135,8 @@ static BasicBlock *FoldBlockIntoPredecessor(BasicBlock *BB, LoopInfo* LI, /// This utility preserves LoopInfo. If DominatorTree or ScalarEvolution are /// available it must also preserve those analyses. bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount, - unsigned TripMultiple, LoopInfo *LI, LPPassManager *LPM) { + bool AllowRuntime, unsigned TripMultiple, + LoopInfo *LI, LPPassManager *LPM) { BasicBlock *Preheader = L->getLoopPreheader(); if (!Preheader) { DEBUG(dbgs() << " Can't unroll; loop preheader-insertion failed.\n"); @@ -165,12 +166,6 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount, return false; } - // Notify ScalarEvolution that the loop will be substantially changed, - // if not outright eliminated. - ScalarEvolution *SE = LPM->getAnalysisIfAvailable<ScalarEvolution>(); - if (SE) - SE->forgetLoop(L); - if (TripCount != 0) DEBUG(dbgs() << " Trip Count = " << TripCount << "\n"); if (TripMultiple != 1) @@ -188,6 +183,20 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount, // Are we eliminating the loop control altogether? bool CompletelyUnroll = Count == TripCount; + // We assume a run-time trip count if the compiler cannot + // figure out the loop trip count and the unroll-runtime + // flag is specified. + bool RuntimeTripCount = (TripCount == 0 && Count > 0 && AllowRuntime); + + if (RuntimeTripCount && !UnrollRuntimeLoopProlog(L, Count, LI, LPM)) + return false; + + // Notify ScalarEvolution that the loop will be substantially changed, + // if not outright eliminated. + ScalarEvolution *SE = LPM->getAnalysisIfAvailable<ScalarEvolution>(); + if (SE) + SE->forgetLoop(L); + // If we know the trip count, we know the multiple... unsigned BreakoutTrip = 0; if (TripCount != 0) { @@ -209,6 +218,8 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount, DEBUG(dbgs() << " with a breakout at trip " << BreakoutTrip); } else if (TripMultiple != 1) { DEBUG(dbgs() << " with " << TripMultiple << " trips per branch"); + } else if (RuntimeTripCount) { + DEBUG(dbgs() << " with run-time trip count"); } DEBUG(dbgs() << "!\n"); } @@ -332,6 +343,10 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount, BasicBlock *Dest = Headers[j]; bool NeedConditional = true; + if (RuntimeTripCount && j != 0) { + NeedConditional = false; + } + // For a complete unroll, make the last iteration end with a branch // to the exit block. if (CompletelyUnroll && j == 0) { diff --git a/lib/Transforms/Utils/LoopUnrollRuntime.cpp b/lib/Transforms/Utils/LoopUnrollRuntime.cpp new file mode 100644 index 0000000..b351852 --- /dev/null +++ b/lib/Transforms/Utils/LoopUnrollRuntime.cpp @@ -0,0 +1,374 @@ +//===-- UnrollLoopRuntime.cpp - Runtime Loop unrolling utilities ----------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements some loop unrolling utilities for loops with run-time +// trip counts. See LoopUnroll.cpp for unrolling loops with compile-time +// trip counts. +// +// The functions in this file are used to generate extra code when the +// run-time trip count modulo the unroll factor is not 0. When this is the +// case, we need to generate code to execute these 'left over' iterations. +// +// The current strategy generates an if-then-else sequence prior to the +// unrolled loop to execute the 'left over' iterations. Other strategies +// include generate a loop before or after the unrolled loop. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "loop-unroll" +#include "llvm/Transforms/Utils/UnrollLoop.h" +#include "llvm/BasicBlock.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/LoopIterator.h" +#include "llvm/Analysis/LoopPass.h" +#include "llvm/Analysis/ScalarEvolution.h" +#include "llvm/Analysis/ScalarEvolutionExpander.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" +#include "llvm/Transforms/Utils/Cloning.h" +#include <algorithm> + +using namespace llvm; + +STATISTIC(NumRuntimeUnrolled, + "Number of loops unrolled with run-time trip counts"); + +/// Connect the unrolling prolog code to the original loop. +/// The unrolling prolog code contains code to execute the +/// 'extra' iterations if the run-time trip count modulo the +/// unroll count is non-zero. +/// +/// This function performs the following: +/// - Create PHI nodes at prolog end block to combine values +/// that exit the prolog code and jump around the prolog. +/// - Add a PHI operand to a PHI node at the loop exit block +/// for values that exit the prolog and go around the loop. +/// - Branch around the original loop if the trip count is less +/// than the unroll factor. +/// +static void ConnectProlog(Loop *L, Value *TripCount, unsigned Count, + BasicBlock *LastPrologBB, BasicBlock *PrologEnd, + BasicBlock *OrigPH, BasicBlock *NewPH, + ValueToValueMapTy &LVMap, Pass *P) { + BasicBlock *Latch = L->getLoopLatch(); + assert(Latch != 0 && "Loop must have a latch"); + + // Create a PHI node for each outgoing value from the original loop + // (which means it is an outgoing value from the prolog code too). + // The new PHI node is inserted in the prolog end basic block. + // The new PHI name is added as an operand of a PHI node in either + // the loop header or the loop exit block. + for (succ_iterator SBI = succ_begin(Latch), SBE = succ_end(Latch); + SBI != SBE; ++SBI) { + for (BasicBlock::iterator BBI = (*SBI)->begin(); + PHINode *PN = dyn_cast<PHINode>(BBI); ++BBI) { + + // Add a new PHI node to the prolog end block and add the + // appropriate incoming values. + PHINode *NewPN = PHINode::Create(PN->getType(), 2, PN->getName()+".unr", + PrologEnd->getTerminator()); + // Adding a value to the new PHI node from the original loop preheader. + // This is the value that skips all the prolog code. + if (L->contains(PN)) { + NewPN->addIncoming(PN->getIncomingValueForBlock(NewPH), OrigPH); + } else { + NewPN->addIncoming(Constant::getNullValue(PN->getType()), OrigPH); + } + Value *OrigVal = PN->getIncomingValueForBlock(Latch); + Value *V = OrigVal; + if (Instruction *I = dyn_cast<Instruction>(V)) { + if (L->contains(I)) { + V = LVMap[I]; + } + } + // Adding a value to the new PHI node from the last prolog block + // that was created. + NewPN->addIncoming(V, LastPrologBB); + + // Update the existing PHI node operand with the value from the + // new PHI node. How this is done depends on if the existing + // PHI node is in the original loop block, or the exit block. + if (L->contains(PN)) { + PN->setIncomingValue(PN->getBasicBlockIndex(NewPH), NewPN); + } else { + PN->addIncoming(NewPN, PrologEnd); + } + } + } + + // Create a branch around the orignal loop, which is taken if the + // trip count is less than the unroll factor. + Instruction *InsertPt = PrologEnd->getTerminator(); + Instruction *BrLoopExit = + new ICmpInst(InsertPt, ICmpInst::ICMP_ULT, TripCount, + ConstantInt::get(TripCount->getType(), Count)); + BasicBlock *Exit = L->getUniqueExitBlock(); + assert(Exit != 0 && "Loop must have a single exit block only"); + // Split the exit to maintain loop canonicalization guarantees + SmallVector<BasicBlock*, 4> Preds(pred_begin(Exit), pred_end(Exit)); + if (!Exit->isLandingPad()) { + SplitBlockPredecessors(Exit, Preds, ".unr-lcssa", P); + } else { + SmallVector<BasicBlock*, 2> NewBBs; + SplitLandingPadPredecessors(Exit, Preds, ".unr1-lcssa", ".unr2-lcssa", + P, NewBBs); + } + // Add the branch to the exit block (around the unrolled loop) + BranchInst::Create(Exit, NewPH, BrLoopExit, InsertPt); + InsertPt->eraseFromParent(); +} + +/// Create a clone of the blocks in a loop and connect them together. +/// This function doesn't create a clone of the loop structure. +/// +/// There are two value maps that are defined and used. VMap is +/// for the values in the current loop instance. LVMap contains +/// the values from the last loop instance. We need the LVMap values +/// to update the inital values for the current loop instance. +/// +static void CloneLoopBlocks(Loop *L, + bool FirstCopy, + BasicBlock *InsertTop, + BasicBlock *InsertBot, + std::vector<BasicBlock *> &NewBlocks, + LoopBlocksDFS &LoopBlocks, + ValueToValueMapTy &VMap, + ValueToValueMapTy &LVMap, + LoopInfo *LI) { + + BasicBlock *Preheader = L->getLoopPreheader(); + BasicBlock *Header = L->getHeader(); + BasicBlock *Latch = L->getLoopLatch(); + Function *F = Header->getParent(); + LoopBlocksDFS::RPOIterator BlockBegin = LoopBlocks.beginRPO(); + LoopBlocksDFS::RPOIterator BlockEnd = LoopBlocks.endRPO(); + // For each block in the original loop, create a new copy, + // and update the value map with the newly created values. + for (LoopBlocksDFS::RPOIterator BB = BlockBegin; BB != BlockEnd; ++BB) { + BasicBlock *NewBB = CloneBasicBlock(*BB, VMap, ".unr", F); + NewBlocks.push_back(NewBB); + + if (Loop *ParentLoop = L->getParentLoop()) + ParentLoop->addBasicBlockToLoop(NewBB, LI->getBase()); + + VMap[*BB] = NewBB; + if (Header == *BB) { + // For the first block, add a CFG connection to this newly + // created block + InsertTop->getTerminator()->setSuccessor(0, NewBB); + + // Change the incoming values to the ones defined in the + // previously cloned loop. + for (BasicBlock::iterator I = Header->begin(); isa<PHINode>(I); ++I) { + PHINode *NewPHI = cast<PHINode>(VMap[I]); + if (FirstCopy) { + // We replace the first phi node with the value from the preheader + VMap[I] = NewPHI->getIncomingValueForBlock(Preheader); + NewBB->getInstList().erase(NewPHI); + } else { + // Update VMap with values from the previous block + unsigned idx = NewPHI->getBasicBlockIndex(Latch); + Value *InVal = NewPHI->getIncomingValue(idx); + if (Instruction *I = dyn_cast<Instruction>(InVal)) + if (L->contains(I)) + InVal = LVMap[InVal]; + NewPHI->setIncomingValue(idx, InVal); + NewPHI->setIncomingBlock(idx, InsertTop); + } + } + } + + if (Latch == *BB) { + VMap.erase((*BB)->getTerminator()); + NewBB->getTerminator()->eraseFromParent(); + BranchInst::Create(InsertBot, NewBB); + } + } + // LastValueMap is updated with the values for the current loop + // which are used the next time this function is called. + for (ValueToValueMapTy::iterator VI = VMap.begin(), VE = VMap.end(); + VI != VE; ++VI) { + LVMap[VI->first] = VI->second; + } +} + +/// Insert code in the prolog code when unrolling a loop with a +/// run-time trip-count. +/// +/// This method assumes that the loop unroll factor is total number +/// of loop bodes in the loop after unrolling. (Some folks refer +/// to the unroll factor as the number of *extra* copies added). +/// We assume also that the loop unroll factor is a power-of-two. So, after +/// unrolling the loop, the number of loop bodies executed is 2, +/// 4, 8, etc. Note - LLVM converts the if-then-sequence to a switch +/// instruction in SimplifyCFG.cpp. Then, the backend decides how code for +/// the switch instruction is generated. +/// +/// extraiters = tripcount % loopfactor +/// if (extraiters == 0) jump Loop: +/// if (extraiters == loopfactor) jump L1 +/// if (extraiters == loopfactor-1) jump L2 +/// ... +/// L1: LoopBody; +/// L2: LoopBody; +/// ... +/// if tripcount < loopfactor jump End +/// Loop: +/// ... +/// End: +/// +bool llvm::UnrollRuntimeLoopProlog(Loop *L, unsigned Count, LoopInfo *LI, + LPPassManager *LPM) { + // for now, only unroll loops that contain a single exit + SmallVector<BasicBlock*, 4> ExitingBlocks; + L->getExitingBlocks(ExitingBlocks); + if (ExitingBlocks.size() > 1) + return false; + + // Make sure the loop is in canonical form, and there is a single + // exit block only. + if (!L->isLoopSimplifyForm() || L->getUniqueExitBlock() == 0) + return false; + + // Use Scalar Evolution to compute the trip count. This allows more + // loops to be unrolled than relying on induction var simplification + ScalarEvolution *SE = LPM->getAnalysisIfAvailable<ScalarEvolution>(); + if (SE == 0) + return false; + + // Only unroll loops with a computable trip count and the trip count needs + // to be an int value (allowing a pointer type is a TODO item) + const SCEV *BECount = SE->getBackedgeTakenCount(L); + if (isa<SCEVCouldNotCompute>(BECount) || !BECount->getType()->isIntegerTy()) + return false; + + // Add 1 since the backedge count doesn't include the first loop iteration + const SCEV *TripCountSC = + SE->getAddExpr(BECount, SE->getConstant(BECount->getType(), 1)); + if (isa<SCEVCouldNotCompute>(TripCountSC)) + return false; + + // We only handle cases when the unroll factor is a power of 2. + // Count is the loop unroll factor, the number of extra copies added + 1. + if ((Count & (Count-1)) != 0) + return false; + + // If this loop is nested, then the loop unroller changes the code in + // parent loop, so the Scalar Evolution pass needs to be run again + if (Loop *ParentLoop = L->getParentLoop()) + SE->forgetLoop(ParentLoop); + + BasicBlock *PH = L->getLoopPreheader(); + BasicBlock *Header = L->getHeader(); + BasicBlock *Latch = L->getLoopLatch(); + // It helps to splits the original preheader twice, one for the end of the + // prolog code and one for a new loop preheader + BasicBlock *PEnd = SplitEdge(PH, Header, LPM->getAsPass()); + BasicBlock *NewPH = SplitBlock(PEnd, PEnd->getTerminator(), LPM->getAsPass()); + BranchInst *PreHeaderBR = cast<BranchInst>(PH->getTerminator()); + + // Compute the number of extra iterations required, which is: + // extra iterations = run-time trip count % (loop unroll factor + 1) + SCEVExpander Expander(*SE, "loop-unroll"); + Value *TripCount = Expander.expandCodeFor(TripCountSC, TripCountSC->getType(), + PreHeaderBR); + Type *CountTy = TripCount->getType(); + BinaryOperator *ModVal = + BinaryOperator::CreateURem(TripCount, + ConstantInt::get(CountTy, Count), + "xtraiter"); + ModVal->insertBefore(PreHeaderBR); + + // Check if for no extra iterations, then jump to unrolled loop + Value *BranchVal = new ICmpInst(PreHeaderBR, + ICmpInst::ICMP_NE, ModVal, + ConstantInt::get(CountTy, 0), "lcmp"); + // Branch to either the extra iterations or the unrolled loop + // We will fix up the true branch label when adding loop body copies + BranchInst::Create(PEnd, PEnd, BranchVal, PreHeaderBR); + assert(PreHeaderBR->isUnconditional() && + PreHeaderBR->getSuccessor(0) == PEnd && + "CFG edges in Preheader are not correct"); + PreHeaderBR->eraseFromParent(); + + ValueToValueMapTy LVMap; + Function *F = Header->getParent(); + // These variables are used to update the CFG links in each iteration + BasicBlock *CompareBB = 0; + BasicBlock *LastLoopBB = PH; + // Get an ordered list of blocks in the loop to help with the ordering of the + // cloned blocks in the prolog code + LoopBlocksDFS LoopBlocks(L); + LoopBlocks.perform(LI); + + // + // For each extra loop iteration, create a copy of the loop's basic blocks + // and generate a condition that branches to the copy depending on the + // number of 'left over' iterations. + // + for (unsigned leftOverIters = Count-1; leftOverIters > 0; --leftOverIters) { + std::vector<BasicBlock*> NewBlocks; + ValueToValueMapTy VMap; + + // Clone all the basic blocks in the loop, but we don't clone the loop + // This function adds the appropriate CFG connections. + CloneLoopBlocks(L, (leftOverIters == Count-1), LastLoopBB, PEnd, NewBlocks, + LoopBlocks, VMap, LVMap, LI); + LastLoopBB = cast<BasicBlock>(VMap[Latch]); + + // Insert the cloned blocks into function just before the original loop + F->getBasicBlockList().splice(PEnd, F->getBasicBlockList(), + NewBlocks[0], F->end()); + + // Generate the code for the comparison which determines if the loop + // prolog code needs to be executed. + if (leftOverIters == Count-1) { + // There is no compare block for the fall-thru case when for the last + // left over iteration + CompareBB = NewBlocks[0]; + } else { + // Create a new block for the comparison + BasicBlock *NewBB = BasicBlock::Create(CompareBB->getContext(), "unr.cmp", + F, CompareBB); + if (Loop *ParentLoop = L->getParentLoop()) { + // Add the new block to the parent loop, if needed + ParentLoop->addBasicBlockToLoop(NewBB, LI->getBase()); + } + + // The comparison w/ the extra iteration value and branch + Value *BranchVal = new ICmpInst(*NewBB, ICmpInst::ICMP_EQ, ModVal, + ConstantInt::get(CountTy, leftOverIters), + "un.tmp"); + // Branch to either the extra iterations or the unrolled loop + BranchInst::Create(NewBlocks[0], CompareBB, + BranchVal, NewBB); + CompareBB = NewBB; + PH->getTerminator()->setSuccessor(0, NewBB); + VMap[NewPH] = CompareBB; + } + + // Rewrite the cloned instruction operands to use the values + // created when the clone is created. + for (unsigned i = 0, e = NewBlocks.size(); i != e; ++i) { + for (BasicBlock::iterator I = NewBlocks[i]->begin(), + E = NewBlocks[i]->end(); I != E; ++I) { + RemapInstruction(I, VMap, + RF_NoModuleLevelChanges|RF_IgnoreMissingEntries); + } + } + } + + // Connect the prolog code to the original loop and update the + // PHI functions. + ConnectProlog(L, TripCount, Count, LastLoopBB, PEnd, PH, NewPH, LVMap, + LPM->getAsPass()); + NumRuntimeUnrolled++; + return true; +} diff --git a/lib/Transforms/Utils/ModuleUtils.cpp b/lib/Transforms/Utils/ModuleUtils.cpp index 5e294a3..8491c55 100644 --- a/lib/Transforms/Utils/ModuleUtils.cpp +++ b/lib/Transforms/Utils/ModuleUtils.cpp @@ -19,7 +19,8 @@ using namespace llvm; -void llvm::appendToGlobalCtors(Module &M, Function *F, int Priority) { +static void appendToGlobalArray(const char *Array, + Module &M, Function *F, int Priority) { IRBuilder<> IRB(M.getContext()); FunctionType *FnTy = FunctionType::get(IRB.getVoidTy(), false); StructType *Ty = StructType::get( @@ -31,7 +32,7 @@ void llvm::appendToGlobalCtors(Module &M, Function *F, int Priority) { // Get the current set of static global constructors and add the new ctor // to the list. SmallVector<Constant *, 16> CurrentCtors; - if (GlobalVariable * GVCtor = M.getNamedGlobal("llvm.global_ctors")) { + if (GlobalVariable * GVCtor = M.getNamedGlobal(Array)) { if (Constant *Init = GVCtor->getInitializer()) { unsigned n = Init->getNumOperands(); CurrentCtors.reserve(n + 1); @@ -51,6 +52,13 @@ void llvm::appendToGlobalCtors(Module &M, Function *F, int Priority) { // Create the new global variable and replace all uses of // the old global variable with the new one. (void)new GlobalVariable(M, NewInit->getType(), false, - GlobalValue::AppendingLinkage, NewInit, - "llvm.global_ctors"); + GlobalValue::AppendingLinkage, NewInit, Array); +} + +void llvm::appendToGlobalCtors(Module &M, Function *F, int Priority) { + appendToGlobalArray("llvm.global_ctors", M, F, Priority); +} + +void llvm::appendToGlobalDtors(Module &M, Function *F, int Priority) { + appendToGlobalArray("llvm.global_dtors", M, F, Priority); } diff --git a/lib/Transforms/Utils/PromoteMemoryToRegister.cpp b/lib/Transforms/Utils/PromoteMemoryToRegister.cpp index db3e942..e8f4285 100644 --- a/lib/Transforms/Utils/PromoteMemoryToRegister.cpp +++ b/lib/Transforms/Utils/PromoteMemoryToRegister.cpp @@ -590,7 +590,7 @@ void PromoteMem2Reg::run() { PHINode *PN = I->second; // If this PHI node merges one value and/or undefs, get the value. - if (Value *V = SimplifyInstruction(PN, 0, &DT)) { + if (Value *V = SimplifyInstruction(PN, 0, 0, &DT)) { if (AST && PN->getType()->isPointerTy()) AST->deleteValue(PN); PN->replaceAllUsesWith(V); diff --git a/lib/Transforms/Utils/SimplifyCFG.cpp b/lib/Transforms/Utils/SimplifyCFG.cpp index b8c3ab4..bf2cb49 100644 --- a/lib/Transforms/Utils/SimplifyCFG.cpp +++ b/lib/Transforms/Utils/SimplifyCFG.cpp @@ -257,7 +257,7 @@ static bool DominatesMergePoint(Value *V, BasicBlock *BB, // Okay, it looks like the instruction IS in the "condition". Check to // see if it's a cheap instruction to unconditionally compute, and if it // only uses stuff defined outside of the condition. If so, hoist it out. - if (!I->isSafeToSpeculativelyExecute()) + if (!isSafeToSpeculativelyExecute(I)) return false; unsigned Cost = 0; @@ -1487,7 +1487,7 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI) { Instruction *BonusInst = 0; if (&*FrontIt != Cond && FrontIt->hasOneUse() && *FrontIt->use_begin() == Cond && - FrontIt->isSafeToSpeculativelyExecute()) { + isSafeToSpeculativelyExecute(FrontIt)) { BonusInst = &*FrontIt; ++FrontIt; diff --git a/lib/Transforms/Utils/SimplifyInstructions.cpp b/lib/Transforms/Utils/SimplifyInstructions.cpp index ac005f9..81eb9e0 100644 --- a/lib/Transforms/Utils/SimplifyInstructions.cpp +++ b/lib/Transforms/Utils/SimplifyInstructions.cpp @@ -24,6 +24,7 @@ #include "llvm/Analysis/Dominators.h" #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Target/TargetData.h" +#include "llvm/Target/TargetLibraryInfo.h" #include "llvm/Transforms/Scalar.h" #include "llvm/Transforms/Utils/Local.h" using namespace llvm; @@ -39,12 +40,14 @@ namespace { void getAnalysisUsage(AnalysisUsage &AU) const { AU.setPreservesCFG(); + AU.addRequired<TargetLibraryInfo>(); } /// runOnFunction - Remove instructions that simplify. bool runOnFunction(Function &F) { const DominatorTree *DT = getAnalysisIfAvailable<DominatorTree>(); const TargetData *TD = getAnalysisIfAvailable<TargetData>(); + const TargetLibraryInfo *TLI = &getAnalysis<TargetLibraryInfo>(); SmallPtrSet<const Instruction*, 8> S1, S2, *ToSimplify = &S1, *Next = &S2; bool Changed = false; @@ -60,7 +63,7 @@ namespace { continue; // Don't waste time simplifying unused instructions. if (!I->use_empty()) - if (Value *V = SimplifyInstruction(I, TD, DT)) { + if (Value *V = SimplifyInstruction(I, TD, TLI, DT)) { // Mark all uses for resimplification next time round the loop. for (Value::use_iterator UI = I->use_begin(), UE = I->use_end(); UI != UE; ++UI) @@ -84,8 +87,11 @@ namespace { } char InstSimplifier::ID = 0; -INITIALIZE_PASS(InstSimplifier, "instsimplify", "Remove redundant instructions", - false, false) +INITIALIZE_PASS_BEGIN(InstSimplifier, "instsimplify", + "Remove redundant instructions", false, false) +INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfo) +INITIALIZE_PASS_END(InstSimplifier, "instsimplify", + "Remove redundant instructions", false, false) char &llvm::InstructionSimplifierID = InstSimplifier::ID; // Public interface to the simplify instructions pass. |