diff options
Diffstat (limited to 'lib/Transforms')
40 files changed, 1361 insertions, 1057 deletions
diff --git a/lib/Transforms/IPO/ArgumentPromotion.cpp b/lib/Transforms/IPO/ArgumentPromotion.cpp index 54a7f67..fa007cf 100644 --- a/lib/Transforms/IPO/ArgumentPromotion.cpp +++ b/lib/Transforms/IPO/ArgumentPromotion.cpp @@ -493,7 +493,7 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F, // Start by computing a new prototype for the function, which is the same as // the old function, but has modified arguments. const FunctionType *FTy = F->getFunctionType(); - std::vector<const Type*> Params; + std::vector<Type*> Params; typedef std::set<IndicesVector> ScalarizeTable; @@ -733,12 +733,12 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F, Instruction *New; if (InvokeInst *II = dyn_cast<InvokeInst>(Call)) { New = InvokeInst::Create(NF, II->getNormalDest(), II->getUnwindDest(), - Args.begin(), Args.end(), "", Call); + Args, "", Call); cast<InvokeInst>(New)->setCallingConv(CS.getCallingConv()); cast<InvokeInst>(New)->setAttributes(AttrListPtr::get(AttributesVec.begin(), AttributesVec.end())); } else { - New = CallInst::Create(NF, Args.begin(), Args.end(), "", Call); + New = CallInst::Create(NF, Args, "", Call); cast<CallInst>(New)->setCallingConv(CS.getCallingConv()); cast<CallInst>(New)->setAttributes(AttrListPtr::get(AttributesVec.begin(), AttributesVec.end())); diff --git a/lib/Transforms/IPO/CMakeLists.txt b/lib/Transforms/IPO/CMakeLists.txt index 179b150..3de7bfc 100644 --- a/lib/Transforms/IPO/CMakeLists.txt +++ b/lib/Transforms/IPO/CMakeLists.txt @@ -2,7 +2,6 @@ add_llvm_library(LLVMipo ArgumentPromotion.cpp ConstantMerge.cpp DeadArgumentElimination.cpp - DeadTypeElimination.cpp ExtractGV.cpp FunctionAttrs.cpp GlobalDCE.cpp diff --git a/lib/Transforms/IPO/DeadArgumentElimination.cpp b/lib/Transforms/IPO/DeadArgumentElimination.cpp index d4eaf0c..1517765 100644 --- a/lib/Transforms/IPO/DeadArgumentElimination.cpp +++ b/lib/Transforms/IPO/DeadArgumentElimination.cpp @@ -208,7 +208,7 @@ bool DAE::DeleteDeadVarargs(Function &Fn) { // the old function, but doesn't have isVarArg set. const FunctionType *FTy = Fn.getFunctionType(); - std::vector<const Type*> Params(FTy->param_begin(), FTy->param_end()); + std::vector<Type*> Params(FTy->param_begin(), FTy->param_end()); FunctionType *NFTy = FunctionType::get(FTy->getReturnType(), Params, false); unsigned NumArgs = Params.size(); @@ -244,11 +244,11 @@ bool DAE::DeleteDeadVarargs(Function &Fn) { Instruction *New; if (InvokeInst *II = dyn_cast<InvokeInst>(Call)) { New = InvokeInst::Create(NF, II->getNormalDest(), II->getUnwindDest(), - Args.begin(), Args.end(), "", Call); + Args, "", Call); cast<InvokeInst>(New)->setCallingConv(CS.getCallingConv()); cast<InvokeInst>(New)->setAttributes(PAL); } else { - New = CallInst::Create(NF, Args.begin(), Args.end(), "", Call); + New = CallInst::Create(NF, Args, "", Call); cast<CallInst>(New)->setCallingConv(CS.getCallingConv()); cast<CallInst>(New)->setAttributes(PAL); if (cast<CallInst>(Call)->isTailCall()) @@ -647,7 +647,7 @@ bool DAE::RemoveDeadStuffFromFunction(Function *F) { // Start by computing a new prototype for the function, which is the same as // the old function, but has fewer arguments and a different return type. const FunctionType *FTy = F->getFunctionType(); - std::vector<const Type*> Params; + std::vector<Type*> Params; // Set up to build a new list of parameter attributes. SmallVector<AttributeWithIndex, 8> AttributesVec; @@ -659,13 +659,13 @@ bool DAE::RemoveDeadStuffFromFunction(Function *F) { // Find out the new return value. - const Type *RetTy = FTy->getReturnType(); + Type *RetTy = FTy->getReturnType(); const Type *NRetTy = NULL; unsigned RetCount = NumRetVals(F); // -1 means unused, other numbers are the new index SmallVector<int, 5> NewRetIdxs(RetCount, -1); - std::vector<const Type*> RetTypes; + std::vector<Type*> RetTypes; if (RetTy->isVoidTy()) { NRetTy = RetTy; } else { @@ -822,11 +822,11 @@ bool DAE::RemoveDeadStuffFromFunction(Function *F) { Instruction *New; if (InvokeInst *II = dyn_cast<InvokeInst>(Call)) { New = InvokeInst::Create(NF, II->getNormalDest(), II->getUnwindDest(), - Args.begin(), Args.end(), "", Call); + Args, "", Call); cast<InvokeInst>(New)->setCallingConv(CS.getCallingConv()); cast<InvokeInst>(New)->setAttributes(NewCallPAL); } else { - New = CallInst::Create(NF, Args.begin(), Args.end(), "", Call); + New = CallInst::Create(NF, Args, "", Call); cast<CallInst>(New)->setCallingConv(CS.getCallingConv()); cast<CallInst>(New)->setAttributes(NewCallPAL); if (cast<CallInst>(Call)->isTailCall()) diff --git a/lib/Transforms/IPO/DeadTypeElimination.cpp b/lib/Transforms/IPO/DeadTypeElimination.cpp deleted file mode 100644 index d3d4963..0000000 --- a/lib/Transforms/IPO/DeadTypeElimination.cpp +++ /dev/null @@ -1,112 +0,0 @@ -//===- DeadTypeElimination.cpp - Eliminate unused types for symbol table --===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This pass is used to cleanup the output of GCC. It eliminate names for types -// that are unused in the entire translation unit, using the FindUsedTypes pass. -// -//===----------------------------------------------------------------------===// - -#define DEBUG_TYPE "deadtypeelim" -#include "llvm/Transforms/IPO.h" -#include "llvm/Analysis/FindUsedTypes.h" -#include "llvm/Module.h" -#include "llvm/TypeSymbolTable.h" -#include "llvm/DerivedTypes.h" -#include "llvm/ADT/Statistic.h" -using namespace llvm; - -STATISTIC(NumKilled, "Number of unused typenames removed from symtab"); - -namespace { - struct DTE : public ModulePass { - static char ID; // Pass identification, replacement for typeid - DTE() : ModulePass(ID) { - initializeDTEPass(*PassRegistry::getPassRegistry()); - } - - // doPassInitialization - For this pass, it removes global symbol table - // entries for primitive types. These are never used for linking in GCC and - // they make the output uglier to look at, so we nuke them. - // - // Also, initialize instance variables. - // - bool runOnModule(Module &M); - - // getAnalysisUsage - This function needs FindUsedTypes to do its job... - // - virtual void getAnalysisUsage(AnalysisUsage &AU) const { - AU.addRequired<FindUsedTypes>(); - } - }; -} - -char DTE::ID = 0; -INITIALIZE_PASS_BEGIN(DTE, "deadtypeelim", "Dead Type Elimination", - false, false) -INITIALIZE_PASS_DEPENDENCY(FindUsedTypes) -INITIALIZE_PASS_END(DTE, "deadtypeelim", "Dead Type Elimination", false, false) - -ModulePass *llvm::createDeadTypeEliminationPass() { - return new DTE(); -} - - -// ShouldNukeSymtabEntry - Return true if this module level symbol table entry -// should be eliminated. -// -static inline bool ShouldNukeSymtabEntry(const Type *Ty){ - // Nuke all names for primitive types! - if (Ty->isPrimitiveType() || Ty->isIntegerTy()) - return true; - - // Nuke all pointers to primitive types as well... - if (const PointerType *PT = dyn_cast<PointerType>(Ty)) - if (PT->getElementType()->isPrimitiveType() || - PT->getElementType()->isIntegerTy()) - return true; - - return false; -} - -// run - For this pass, it removes global symbol table entries for primitive -// types. These are never used for linking in GCC and they make the output -// uglier to look at, so we nuke them. Also eliminate types that are never used -// in the entire program as indicated by FindUsedTypes. -// -bool DTE::runOnModule(Module &M) { - bool Changed = false; - - TypeSymbolTable &ST = M.getTypeSymbolTable(); - const SetVector<const Type*> &T = getAnalysis<FindUsedTypes>().getTypes(); - std::set<const Type*> UsedTypes(T.begin(), T.end()); - - // Check the symbol table for superfluous type entries... - // - // Grab the 'type' plane of the module symbol... - TypeSymbolTable::iterator TI = ST.begin(); - TypeSymbolTable::iterator TE = ST.end(); - while ( TI != TE ) { - // If this entry should be unconditionally removed, or if we detect that - // the type is not used, remove it. - const Type *RHS = TI->second; - if (ShouldNukeSymtabEntry(RHS) || !UsedTypes.count(RHS)) { - ST.remove(TI++); - ++NumKilled; - Changed = true; - } else { - ++TI; - // We only need to leave one name for each type. - UsedTypes.erase(RHS); - } - } - - return Changed; -} - -// vim: sw=2 diff --git a/lib/Transforms/IPO/IPO.cpp b/lib/Transforms/IPO/IPO.cpp index 21dcb51..31ce95f 100644 --- a/lib/Transforms/IPO/IPO.cpp +++ b/lib/Transforms/IPO/IPO.cpp @@ -25,7 +25,6 @@ void llvm::initializeIPO(PassRegistry &Registry) { initializeConstantMergePass(Registry); initializeDAEPass(Registry); initializeDAHPass(Registry); - initializeDTEPass(Registry); initializeFunctionAttrsPass(Registry); initializeGlobalDCEPass(Registry); initializeGlobalOptPass(Registry); @@ -63,10 +62,6 @@ void LLVMAddDeadArgEliminationPass(LLVMPassManagerRef PM) { unwrap(PM)->add(createDeadArgEliminationPass()); } -void LLVMAddDeadTypeEliminationPass(LLVMPassManagerRef PM) { - unwrap(PM)->add(createDeadTypeEliminationPass()); -} - void LLVMAddFunctionAttrsPass(LLVMPassManagerRef PM) { unwrap(PM)->add(createFunctionAttrsPass()); } diff --git a/lib/Transforms/IPO/LowerSetJmp.cpp b/lib/Transforms/IPO/LowerSetJmp.cpp index 52ecf17..659476b 100644 --- a/lib/Transforms/IPO/LowerSetJmp.cpp +++ b/lib/Transforms/IPO/LowerSetJmp.cpp @@ -267,7 +267,7 @@ void LowerSetJmp::TransformLongJmpCall(CallInst* Inst) CastInst* CI = new BitCastInst(Inst->getArgOperand(0), SBPTy, "LJBuf", Inst); Value *Args[] = { CI, Inst->getArgOperand(1) }; - CallInst::Create(ThrowLongJmp, Args, Args + 2, "", Inst); + CallInst::Create(ThrowLongJmp, Args, "", Inst); SwitchValuePair& SVP = SwitchValMap[Inst->getParent()->getParent()]; @@ -386,7 +386,7 @@ void LowerSetJmp::TransformSetJmpCall(CallInst* Inst) GetSetJmpMap(Func), BufPtr, ConstantInt::get(Type::getInt32Ty(Inst->getContext()), SetJmpIDMap[Func]++) }; - CallInst::Create(AddSJToMap, Args, Args + 3, "", Inst); + CallInst::Create(AddSJToMap, Args, "", Inst); // We are guaranteed that there are no values live across basic blocks // (because we are "not in SSA form" yet), but there can still be values live @@ -482,7 +482,7 @@ void LowerSetJmp::visitCallInst(CallInst& CI) std::vector<Value*> Params(CS.arg_begin(), CS.arg_end()); InvokeInst* II = InvokeInst::Create(CI.getCalledValue(), NewBB, PrelimBBMap[Func], - Params.begin(), Params.end(), CI.getName(), Term); + Params, CI.getName(), Term); II->setCallingConv(CI.getCallingConv()); II->setAttributes(CI.getAttributes()); diff --git a/lib/Transforms/IPO/MergeFunctions.cpp b/lib/Transforms/IPO/MergeFunctions.cpp index f741443..7796d05 100644 --- a/lib/Transforms/IPO/MergeFunctions.cpp +++ b/lib/Transforms/IPO/MergeFunctions.cpp @@ -218,7 +218,6 @@ bool FunctionComparator::isEquivalentType(const Type *Ty1, llvm_unreachable("Unknown type!"); // Fall through in Release mode. case Type::IntegerTyID: - case Type::OpaqueTyID: case Type::VectorTyID: // Ty1 == Ty2 would have returned true earlier. return false; @@ -733,7 +732,7 @@ void MergeFunctions::writeThunk(Function *F, Function *G) { ++i; } - CallInst *CI = Builder.CreateCall(F, Args.begin(), Args.end()); + CallInst *CI = Builder.CreateCall(F, Args); CI->setTailCall(); CI->setCallingConv(F->getCallingConv()); if (NewG->getReturnType()->isVoidTy()) { diff --git a/lib/Transforms/IPO/PruneEH.cpp b/lib/Transforms/IPO/PruneEH.cpp index 2f3baeb..b7e63dc 100644 --- a/lib/Transforms/IPO/PruneEH.cpp +++ b/lib/Transforms/IPO/PruneEH.cpp @@ -175,8 +175,7 @@ bool PruneEH::SimplifyFunction(Function *F) { if (II->doesNotThrow()) { SmallVector<Value*, 8> Args(II->op_begin(), II->op_end() - 3); // Insert a call instruction before the invoke. - CallInst *Call = CallInst::Create(II->getCalledValue(), - Args.begin(), Args.end(), "", II); + CallInst *Call = CallInst::Create(II->getCalledValue(), Args, "", II); Call->takeName(II); Call->setCallingConv(II->getCallingConv()); Call->setAttributes(II->getAttributes()); diff --git a/lib/Transforms/IPO/StripSymbols.cpp b/lib/Transforms/IPO/StripSymbols.cpp index a690765..0fbaff1 100644 --- a/lib/Transforms/IPO/StripSymbols.cpp +++ b/lib/Transforms/IPO/StripSymbols.cpp @@ -28,8 +28,8 @@ #include "llvm/Pass.h" #include "llvm/Analysis/DebugInfo.h" #include "llvm/ValueSymbolTable.h" -#include "llvm/TypeSymbolTable.h" #include "llvm/Transforms/Utils/Local.h" +#include "llvm/ADT/DenseMap.h" #include "llvm/ADT/SmallPtrSet.h" using namespace llvm; @@ -143,8 +143,7 @@ static void RemoveDeadConstant(Constant *C) { assert(C->use_empty() && "Constant is not dead!"); SmallPtrSet<Constant*, 4> Operands; for (unsigned i = 0, e = C->getNumOperands(); i != e; ++i) - if (isa<DerivedType>(C->getOperand(i)->getType()) && - OnlyUsedBy(C->getOperand(i), C)) + if (OnlyUsedBy(C->getOperand(i), C)) Operands.insert(cast<Constant>(C->getOperand(i))); if (GlobalVariable *GV = dyn_cast<GlobalVariable>(C)) { if (!GV->hasLocalLinkage()) return; // Don't delete non static globals. @@ -174,13 +173,19 @@ static void StripSymtab(ValueSymbolTable &ST, bool PreserveDbgInfo) { } } -// Strip the symbol table of its names. -static void StripTypeSymtab(TypeSymbolTable &ST, bool PreserveDbgInfo) { - for (TypeSymbolTable::iterator TI = ST.begin(), E = ST.end(); TI != E; ) { - if (PreserveDbgInfo && StringRef(TI->first).startswith("llvm.dbg")) - ++TI; - else - ST.remove(TI++); +// Strip any named types of their names. +static void StripTypeNames(Module &M, bool PreserveDbgInfo) { + std::vector<StructType*> StructTypes; + M.findUsedStructTypes(StructTypes); + + for (unsigned i = 0, e = StructTypes.size(); i != e; ++i) { + StructType *STy = StructTypes[i]; + if (STy->isAnonymous() || STy->getName().empty()) continue; + + if (PreserveDbgInfo && STy->getName().startswith("llvm.dbg")) + continue; + + STy->setName(""); } } @@ -221,7 +226,7 @@ static bool StripSymbolNames(Module &M, bool PreserveDbgInfo) { } // Remove all names from types. - StripTypeSymtab(M.getTypeSymbolTable(), PreserveDbgInfo); + StripTypeNames(M, PreserveDbgInfo); return true; } diff --git a/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp index a08446e..64ea36f 100644 --- a/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp +++ b/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp @@ -1400,7 +1400,7 @@ static bool CollectBSwapParts(Value *V, int OverallLeftShift, uint32_t ByteMask, /// MatchBSwap - Given an OR instruction, check to see if this is a bswap idiom. /// If so, insert the new bswap intrinsic and return it. Instruction *InstCombiner::MatchBSwap(BinaryOperator &I) { - const IntegerType *ITy = dyn_cast<IntegerType>(I.getType()); + IntegerType *ITy = dyn_cast<IntegerType>(I.getType()); if (!ITy || ITy->getBitWidth() % 16 || // ByteMask only allows up to 32-byte values. ITy->getBitWidth() > 32*8) @@ -1424,9 +1424,8 @@ Instruction *InstCombiner::MatchBSwap(BinaryOperator &I) { for (unsigned i = 1, e = ByteValues.size(); i != e; ++i) if (ByteValues[i] != V) return 0; - const Type *Tys[] = { ITy }; Module *M = I.getParent()->getParent()->getParent(); - Function *F = Intrinsic::getDeclaration(M, Intrinsic::bswap, Tys, 1); + Function *F = Intrinsic::getDeclaration(M, Intrinsic::bswap, ITy); return CallInst::Create(F, V); } diff --git a/lib/Transforms/InstCombine/InstCombineCalls.cpp b/lib/Transforms/InstCombine/InstCombineCalls.cpp index 27e15c3..537f2b3 100644 --- a/lib/Transforms/InstCombine/InstCombineCalls.cpp +++ b/lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -217,10 +217,10 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { if (GVSrc->isConstant()) { Module *M = CI.getParent()->getParent()->getParent(); Intrinsic::ID MemCpyID = Intrinsic::memcpy; - const Type *Tys[3] = { CI.getArgOperand(0)->getType(), - CI.getArgOperand(1)->getType(), - CI.getArgOperand(2)->getType() }; - CI.setCalledFunction(Intrinsic::getDeclaration(M, MemCpyID, Tys, 3)); + Type *Tys[3] = { CI.getArgOperand(0)->getType(), + CI.getArgOperand(1)->getType(), + CI.getArgOperand(2)->getType() }; + CI.setCalledFunction(Intrinsic::getDeclaration(M, MemCpyID, Tys)); Changed = true; } } @@ -1118,13 +1118,13 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) { Instruction *NC; if (InvokeInst *II = dyn_cast<InvokeInst>(Caller)) { NC = Builder->CreateInvoke(Callee, II->getNormalDest(), - II->getUnwindDest(), Args.begin(), Args.end()); + II->getUnwindDest(), Args); NC->takeName(II); cast<InvokeInst>(NC)->setCallingConv(II->getCallingConv()); cast<InvokeInst>(NC)->setAttributes(NewCallerPAL); } else { CallInst *CI = cast<CallInst>(Caller); - NC = Builder->CreateCall(Callee, Args.begin(), Args.end()); + NC = Builder->CreateCall(Callee, Args); NC->takeName(CI); if (CI->isTailCall()) cast<CallInst>(NC)->setTailCall(); @@ -1187,7 +1187,7 @@ Instruction *InstCombiner::transformCallThroughTrampoline(CallSite CS) { const AttrListPtr &NestAttrs = NestF->getAttributes(); if (!NestAttrs.isEmpty()) { unsigned NestIdx = 1; - const Type *NestTy = 0; + Type *NestTy = 0; Attributes NestAttr = Attribute::None; // Look for a parameter marked with the 'nest' attribute. @@ -1249,7 +1249,7 @@ Instruction *InstCombiner::transformCallThroughTrampoline(CallSite CS) { // Handle this by synthesizing a new function type, equal to FTy // with the chain parameter inserted. - std::vector<const Type*> NewTypes; + std::vector<Type*> NewTypes; NewTypes.reserve(FTy->getNumParams()+1); // Insert the chain's type into the list of parameter types, which may @@ -1289,11 +1289,11 @@ Instruction *InstCombiner::transformCallThroughTrampoline(CallSite CS) { if (InvokeInst *II = dyn_cast<InvokeInst>(Caller)) { NewCaller = InvokeInst::Create(NewCallee, II->getNormalDest(), II->getUnwindDest(), - NewArgs.begin(), NewArgs.end()); + NewArgs); cast<InvokeInst>(NewCaller)->setCallingConv(II->getCallingConv()); cast<InvokeInst>(NewCaller)->setAttributes(NewPAL); } else { - NewCaller = CallInst::Create(NewCallee, NewArgs.begin(), NewArgs.end()); + NewCaller = CallInst::Create(NewCallee, NewArgs); if (cast<CallInst>(Caller)->isTailCall()) cast<CallInst>(NewCaller)->setTailCall(); cast<CallInst>(NewCaller)-> diff --git a/lib/Transforms/InstCombine/InstCombineCasts.cpp b/lib/Transforms/InstCombine/InstCombineCasts.cpp index 601d9b4..82c734e 100644 --- a/lib/Transforms/InstCombine/InstCombineCasts.cpp +++ b/lib/Transforms/InstCombine/InstCombineCasts.cpp @@ -30,6 +30,14 @@ static Value *DecomposeSimpleLinearExpr(Value *Val, unsigned &Scale, } if (BinaryOperator *I = dyn_cast<BinaryOperator>(Val)) { + // Cannot look past anything that might overflow. + OverflowingBinaryOperator *OBI = dyn_cast<OverflowingBinaryOperator>(Val); + if (OBI && !OBI->hasNoUnsignedWrap()) { + Scale = 1; + Offset = 0; + return Val; + } + if (ConstantInt *RHS = dyn_cast<ConstantInt>(I->getOperand(1))) { if (I->getOpcode() == Instruction::Shl) { // This is a value scaled by '1 << the shift amt'. @@ -71,11 +79,6 @@ Instruction *InstCombiner::PromoteCastOfAllocation(BitCastInst &CI, // This requires TargetData to get the alloca alignment and size information. if (!TD) return 0; - // Insist that the amount-to-allocate not overflow. - OverflowingBinaryOperator *OBI = - dyn_cast<OverflowingBinaryOperator>(AI.getOperand(0)); - if (OBI && !(OBI->hasNoSignedWrap() || OBI->hasNoUnsignedWrap())) return 0; - const PointerType *PTy = cast<PointerType>(CI.getType()); BuilderTy AllocaBuilder(*Builder); @@ -1213,7 +1216,8 @@ Instruction *InstCombiner::visitFPTrunc(FPTruncInst &CI) { CallInst *Call = dyn_cast<CallInst>(CI.getOperand(0)); if (Call && Call->getCalledFunction() && Call->getCalledFunction()->getName() == "sqrt" && - Call->getNumArgOperands() == 1) { + Call->getNumArgOperands() == 1 && + Call->hasOneUse()) { CastInst *Arg = dyn_cast<CastInst>(Call->getArgOperand(0)); if (Arg && Arg->getOpcode() == Instruction::FPExt && CI.getType()->isFloatTy() && diff --git a/lib/Transforms/InstCombine/InstCombineCompares.cpp b/lib/Transforms/InstCombine/InstCombineCompares.cpp index 42db444..c78760b 100644 --- a/lib/Transforms/InstCombine/InstCombineCompares.cpp +++ b/lib/Transforms/InstCombine/InstCombineCompares.cpp @@ -42,13 +42,12 @@ static ConstantInt *ExtractElement(Constant *V, Constant *Idx) { static bool HasAddOverflow(ConstantInt *Result, ConstantInt *In1, ConstantInt *In2, bool IsSigned) { - if (IsSigned) - if (In2->getValue().isNegative()) - return Result->getValue().sgt(In1->getValue()); - else - return Result->getValue().slt(In1->getValue()); - else + if (!IsSigned) return Result->getValue().ult(In1->getValue()); + + if (In2->isNegative()) + return Result->getValue().sgt(In1->getValue()); + return Result->getValue().slt(In1->getValue()); } /// AddWithOverflow - Compute Result = In1+In2, returning true if the result @@ -77,13 +76,13 @@ static bool AddWithOverflow(Constant *&Result, Constant *In1, static bool HasSubOverflow(ConstantInt *Result, ConstantInt *In1, ConstantInt *In2, bool IsSigned) { - if (IsSigned) - if (In2->getValue().isNegative()) - return Result->getValue().slt(In1->getValue()); - else - return Result->getValue().sgt(In1->getValue()); - else + if (!IsSigned) return Result->getValue().ugt(In1->getValue()); + + if (In2->isNegative()) + return Result->getValue().slt(In1->getValue()); + + return Result->getValue().sgt(In1->getValue()); } /// SubWithOverflow - Compute Result = In1-In2, returning true if the result @@ -128,8 +127,7 @@ static bool isSignBitCheck(ICmpInst::Predicate pred, ConstantInt *RHS, case ICmpInst::ICMP_UGT: // True if LHS u> RHS and RHS == high-bit-mask - 1 TrueIfSigned = true; - return RHS->getValue() == - APInt::getSignedMaxValue(RHS->getType()->getPrimitiveSizeInBits()); + return RHS->isMaxValue(true); case ICmpInst::ICMP_UGE: // True if LHS u>= RHS and RHS == high-bit-mask (2^7, 2^15, 2^31, etc) TrueIfSigned = true; @@ -278,8 +276,7 @@ FoldCmpLoadFromIndexedGlobal(GetElementPtrInst *GEP, GlobalVariable *GV, // If this is indexing an array of structures, get the structure element. if (!LaterIndices.empty()) - Elt = ConstantExpr::getExtractValue(Elt, LaterIndices.data(), - LaterIndices.size()); + Elt = ConstantExpr::getExtractValue(Elt, LaterIndices); // If the element is masked, handle it. if (AndCst) Elt = ConstantExpr::getAnd(Elt, AndCst); @@ -828,7 +825,7 @@ Instruction *InstCombiner::FoldICmpDivCst(ICmpInst &ICI, BinaryOperator *DivI, LoOverflow = AddWithOverflow(LoBound, HiBound, DivNeg, true) ? -1 : 0; } } - } else if (DivRHS->getValue().isNegative()) { // Divisor is < 0. + } else if (DivRHS->isNegative()) { // Divisor is < 0. if (DivI->isExact()) RangeSize = cast<ConstantInt>(ConstantExpr::getNeg(RangeSize)); if (CmpRHSV == 0) { // (X / neg) op 0 @@ -1028,7 +1025,7 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI, // If the sign bit of the XorCST is not set, there is no change to // the operation, just stop using the Xor. - if (!XorCST->getValue().isNegative()) { + if (!XorCST->isNegative()) { ICI.setOperand(0, CompareVal); Worklist.Add(LHSI); return &ICI; @@ -1061,7 +1058,7 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI, } // (icmp u/s (xor A ~SignBit), C) -> (icmp s/u (xor C ~SignBit), A) - if (!ICI.isEquality() && XorCST->getValue().isMaxSignedValue()) { + if (!ICI.isEquality() && XorCST->isMaxValue(true)) { const APInt &NotSignBit = XorCST->getValue(); ICmpInst::Predicate Pred = ICI.isSigned() ? ICI.getUnsignedPredicate() @@ -1088,7 +1085,7 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI, // Extending a relational comparison when we're checking the sign // bit would not work. if (ICI.isEquality() || - (AndCST->getValue().isNonNegative() && RHSV.isNonNegative())) { + (!AndCST->isNegative() && RHSV.isNonNegative())) { Value *NewAnd = Builder->CreateAnd(Cast->getOperand(0), ConstantExpr::getZExt(AndCST, Cast->getSrcTy())); @@ -1454,7 +1451,11 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI, return new ICmpInst(isICMP_NE ? ICmpInst::ICMP_EQ : ICmpInst::ICMP_NE, LHSI, Constant::getNullValue(RHS->getType())); - + + // Don't perform the following transforms if the AND has multiple uses + if (!BO->hasOneUse()) + break; + // Replace (and X, (1 << size(X)-1) != 0) with x s< 0 if (BOC->getValue().isSignBit()) { Value *X = BO->getOperand(0); @@ -1679,9 +1680,9 @@ static Instruction *ProcessUGT_ADDCST_ADD(ICmpInst &I, Value *A, Value *B, // result and the overflow bit. Module *M = I.getParent()->getParent()->getParent(); - const Type *NewType = IntegerType::get(OrigAdd->getContext(), NewWidth); + Type *NewType = IntegerType::get(OrigAdd->getContext(), NewWidth); Value *F = Intrinsic::getDeclaration(M, Intrinsic::sadd_with_overflow, - &NewType, 1); + NewType); InstCombiner::BuilderTy *Builder = IC.Builder; @@ -1721,8 +1722,8 @@ static Instruction *ProcessUAddIdiom(Instruction &I, Value *OrigAddV, Builder->SetInsertPoint(OrigAdd); Module *M = I.getParent()->getParent()->getParent(); - const Type *Ty = LHS->getType(); - Value *F = Intrinsic::getDeclaration(M, Intrinsic::uadd_with_overflow, &Ty,1); + Type *Ty = LHS->getType(); + Value *F = Intrinsic::getDeclaration(M, Intrinsic::uadd_with_overflow, Ty); CallInst *Call = Builder->CreateCall2(F, LHS, RHS, "uadd"); Value *Add = Builder->CreateExtractValue(Call, 0); @@ -2384,7 +2385,7 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) { BO1->getOperand(0)); } - if (CI->getValue().isMaxSignedValue()) { + if (CI->isMaxValue(true)) { ICmpInst::Predicate Pred = I.isSigned() ? I.getUnsignedPredicate() : I.getSignedPredicate(); diff --git a/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp b/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp index 2d29403..630a6fe 100644 --- a/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp +++ b/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp @@ -691,14 +691,14 @@ Instruction *InstCombiner::visitSRem(BinaryOperator &I) { bool hasNegative = false; for (unsigned i = 0; !hasNegative && i != VWidth; ++i) if (ConstantInt *RHS = dyn_cast<ConstantInt>(RHSV->getOperand(i))) - if (RHS->getValue().isNegative()) + if (RHS->isNegative()) hasNegative = true; if (hasNegative) { std::vector<Constant *> Elts(VWidth); for (unsigned i = 0; i != VWidth; ++i) { if (ConstantInt *RHS = dyn_cast<ConstantInt>(RHSV->getOperand(i))) { - if (RHS->getValue().isNegative()) + if (RHS->isNegative()) Elts[i] = cast<ConstantInt>(ConstantExpr::getNeg(RHS)); else Elts[i] = RHS; diff --git a/lib/Transforms/InstCombine/InstructionCombining.cpp b/lib/Transforms/InstCombine/InstructionCombining.cpp index 92c10f5..ab98ef9 100644 --- a/lib/Transforms/InstCombine/InstructionCombining.cpp +++ b/lib/Transforms/InstCombine/InstructionCombining.cpp @@ -785,6 +785,14 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) { // getelementptr instructions into a single instruction. // if (GEPOperator *Src = dyn_cast<GEPOperator>(PtrOp)) { + + // If this GEP has only 0 indices, it is the same pointer as + // Src. If Src is not a trivial GEP too, don't combine + // the indices. + if (GEP.hasAllZeroIndices() && !Src->hasAllZeroIndices() && + !Src->hasOneUse()) + return 0; + // Note that if our source is a gep chain itself that we wait for that // chain to be resolved before we perform this transformation. This // avoids us creating a TON of code in some cases. @@ -1191,7 +1199,7 @@ Instruction *InstCombiner::visitExtractValueInst(ExtractValueInst &EV) { if (EV.getNumIndices() > 1) // Extract the remaining indices out of the constant indexed by the // first index - return ExtractValueInst::Create(V, EV.idx_begin() + 1, EV.idx_end()); + return ExtractValueInst::Create(V, EV.getIndices().slice(1)); else return ReplaceInstUsesWith(EV, V); } @@ -1214,7 +1222,7 @@ Instruction *InstCombiner::visitExtractValueInst(ExtractValueInst &EV) { // with // %E = extractvalue { i32, { i32 } } %A, 0 return ExtractValueInst::Create(IV->getAggregateOperand(), - EV.idx_begin(), EV.idx_end()); + EV.getIndices()); } if (exti == exte && insi == inse) // Both iterators are at the end: Index lists are identical. Replace @@ -1232,9 +1240,9 @@ Instruction *InstCombiner::visitExtractValueInst(ExtractValueInst &EV) { // by switching the order of the insert and extract (though the // insertvalue should be left in, since it may have other uses). Value *NewEV = Builder->CreateExtractValue(IV->getAggregateOperand(), - EV.idx_begin(), EV.idx_end()); + EV.getIndices()); return InsertValueInst::Create(NewEV, IV->getInsertedValueOperand(), - insi, inse); + ArrayRef<unsigned>(insi, inse)); } if (insi == inse) // The insert list is a prefix of the extract list @@ -1246,7 +1254,7 @@ Instruction *InstCombiner::visitExtractValueInst(ExtractValueInst &EV) { // with // %E extractvalue { i32 } { i32 42 }, 0 return ExtractValueInst::Create(IV->getInsertedValueOperand(), - exti, exte); + ArrayRef<unsigned>(exti, exte)); } if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Agg)) { // We're extracting from an intrinsic, see if we're the only user, which diff --git a/lib/Transforms/Instrumentation/GCOVProfiling.cpp b/lib/Transforms/Instrumentation/GCOVProfiling.cpp index 07d69e8..3f2c412 100644 --- a/lib/Transforms/Instrumentation/GCOVProfiling.cpp +++ b/lib/Transforms/Instrumentation/GCOVProfiling.cpp @@ -572,14 +572,13 @@ GlobalVariable *GCOVProfiler::buildEdgeLookupTable( } Constant *GCOVProfiler::getStartFileFunc() { - const Type *Args[] = { Type::getInt8PtrTy(*Ctx) }; const FunctionType *FTy = FunctionType::get(Type::getVoidTy(*Ctx), - Args, false); + Type::getInt8PtrTy(*Ctx), false); return M->getOrInsertFunction("llvm_gcda_start_file", FTy); } Constant *GCOVProfiler::getIncrementIndirectCounterFunc() { - const Type *Args[] = { + Type *Args[] = { Type::getInt32PtrTy(*Ctx), // uint32_t *predecessor Type::getInt64PtrTy(*Ctx)->getPointerTo(), // uint64_t **state_table_row }; @@ -589,7 +588,7 @@ Constant *GCOVProfiler::getIncrementIndirectCounterFunc() { } Constant *GCOVProfiler::getEmitFunctionFunc() { - const Type *Args[2] = { + Type *Args[2] = { Type::getInt32Ty(*Ctx), // uint32_t ident Type::getInt8PtrTy(*Ctx), // const char *function_name }; @@ -599,7 +598,7 @@ Constant *GCOVProfiler::getEmitFunctionFunc() { } Constant *GCOVProfiler::getEmitArcsFunc() { - const Type *Args[] = { + Type *Args[] = { Type::getInt32Ty(*Ctx), // uint32_t num_counters Type::getInt64PtrTy(*Ctx), // uint64_t *counters }; diff --git a/lib/Transforms/Instrumentation/PathProfiling.cpp b/lib/Transforms/Instrumentation/PathProfiling.cpp index 1e5e3f6..7541663 100644 --- a/lib/Transforms/Instrumentation/PathProfiling.cpp +++ b/lib/Transforms/Instrumentation/PathProfiling.cpp @@ -1062,7 +1062,7 @@ void PathProfiler::insertCounterIncrement(Value* incValue, CallInst::Create( increment ? llvmIncrementHashFunction : llvmDecrementHashFunction, - args.begin(), args.end(), "", insertPoint); + args, "", insertPoint); } } diff --git a/lib/Transforms/Instrumentation/ProfilingUtils.cpp b/lib/Transforms/Instrumentation/ProfilingUtils.cpp index 4224ee3..445a5b6 100644 --- a/lib/Transforms/Instrumentation/ProfilingUtils.cpp +++ b/lib/Transforms/Instrumentation/ProfilingUtils.cpp @@ -62,8 +62,7 @@ void llvm::InsertProfilingInitCall(Function *MainFn, const char *FnName, } Args[3] = ConstantInt::get(Type::getInt32Ty(Context), NumElements); - CallInst *InitCall = CallInst::Create(InitFn, Args.begin(), Args.end(), - "newargc", InsertPos); + CallInst *InitCall = CallInst::Create(InitFn, Args, "newargc", InsertPos); // If argc or argv are not available in main, just pass null values in. Function::arg_iterator AI; @@ -134,7 +133,7 @@ void llvm::IncrementCounterInBlock(BasicBlock *BB, unsigned CounterNum, void llvm::InsertProfilingShutdownCall(Function *Callee, Module *Mod) { // llvm.global_dtors is an array of type { i32, void ()* }. Prepare those // types. - const Type *GlobalDtorElems[2] = { + Type *GlobalDtorElems[2] = { Type::getInt32Ty(Mod->getContext()), FunctionType::get(Type::getVoidTy(Mod->getContext()), false)->getPointerTo() }; diff --git a/lib/Transforms/Scalar/GVN.cpp b/lib/Transforms/Scalar/GVN.cpp index 759e681..87b7317 100644 --- a/lib/Transforms/Scalar/GVN.cpp +++ b/lib/Transforms/Scalar/GVN.cpp @@ -91,6 +91,7 @@ namespace { uint32_t nextValueNumber; Expression create_expression(Instruction* I); + Expression create_extractvalue_expression(ExtractValueInst* EI); uint32_t lookup_or_add_call(CallInst* C); public: ValueTable() : nextValueNumber(1) { } @@ -141,7 +142,6 @@ template <> struct DenseMapInfo<Expression> { // ValueTable Internal Functions //===----------------------------------------------------------------------===// - Expression ValueTable::create_expression(Instruction *I) { Expression e; e.type = I->getType(); @@ -150,12 +150,8 @@ Expression ValueTable::create_expression(Instruction *I) { OI != OE; ++OI) e.varargs.push_back(lookup_or_add(*OI)); - if (CmpInst *C = dyn_cast<CmpInst>(I)) + if (CmpInst *C = dyn_cast<CmpInst>(I)) { e.opcode = (C->getOpcode() << 8) | C->getPredicate(); - else if (ExtractValueInst *E = dyn_cast<ExtractValueInst>(I)) { - for (ExtractValueInst::idx_iterator II = E->idx_begin(), IE = E->idx_end(); - II != IE; ++II) - e.varargs.push_back(*II); } else if (InsertValueInst *E = dyn_cast<InsertValueInst>(I)) { for (InsertValueInst::idx_iterator II = E->idx_begin(), IE = E->idx_end(); II != IE; ++II) @@ -165,6 +161,58 @@ Expression ValueTable::create_expression(Instruction *I) { return e; } +Expression ValueTable::create_extractvalue_expression(ExtractValueInst *EI) { + assert(EI != 0 && "Not an ExtractValueInst?"); + Expression e; + e.type = EI->getType(); + e.opcode = 0; + + IntrinsicInst *I = dyn_cast<IntrinsicInst>(EI->getAggregateOperand()); + if (I != 0 && EI->getNumIndices() == 1 && *EI->idx_begin() == 0 ) { + // EI might be an extract from one of our recognised intrinsics. If it + // is we'll synthesize a semantically equivalent expression instead on + // an extract value expression. + switch (I->getIntrinsicID()) { + case Intrinsic::sadd_with_overflow: + case Intrinsic::uadd_with_overflow: + e.opcode = Instruction::Add; + break; + case Intrinsic::ssub_with_overflow: + case Intrinsic::usub_with_overflow: + e.opcode = Instruction::Sub; + break; + case Intrinsic::smul_with_overflow: + case Intrinsic::umul_with_overflow: + e.opcode = Instruction::Mul; + break; + default: + break; + } + + if (e.opcode != 0) { + // Intrinsic recognized. Grab its args to finish building the expression. + assert(I->getNumArgOperands() == 2 && + "Expect two args for recognised intrinsics."); + e.varargs.push_back(lookup_or_add(I->getArgOperand(0))); + e.varargs.push_back(lookup_or_add(I->getArgOperand(1))); + return e; + } + } + + // Not a recognised intrinsic. Fall back to producing an extract value + // expression. + e.opcode = EI->getOpcode(); + for (Instruction::op_iterator OI = EI->op_begin(), OE = EI->op_end(); + OI != OE; ++OI) + e.varargs.push_back(lookup_or_add(*OI)); + + for (ExtractValueInst::idx_iterator II = EI->idx_begin(), IE = EI->idx_end(); + II != IE; ++II) + e.varargs.push_back(*II); + + return e; +} + //===----------------------------------------------------------------------===// // ValueTable External Functions //===----------------------------------------------------------------------===// @@ -336,11 +384,13 @@ uint32_t ValueTable::lookup_or_add(Value *V) { case Instruction::ExtractElement: case Instruction::InsertElement: case Instruction::ShuffleVector: - case Instruction::ExtractValue: case Instruction::InsertValue: case Instruction::GetElementPtr: exp = create_expression(I); break; + case Instruction::ExtractValue: + exp = create_extractvalue_expression(cast<ExtractValueInst>(I)); + break; default: valueNumbering[V] = nextValueNumber; return nextValueNumber++; diff --git a/lib/Transforms/Scalar/IndVarSimplify.cpp b/lib/Transforms/Scalar/IndVarSimplify.cpp index b7c97e5..dee3d38 100644 --- a/lib/Transforms/Scalar/IndVarSimplify.cpp +++ b/lib/Transforms/Scalar/IndVarSimplify.cpp @@ -58,6 +58,7 @@ #include "llvm/Transforms/Utils/Local.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Target/TargetData.h" +#include "llvm/ADT/DenseMap.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/ADT/STLExtras.h" @@ -72,6 +73,7 @@ STATISTIC(NumElimIdentity, "Number of IV identities eliminated"); STATISTIC(NumElimExt , "Number of IV sign/zero extends eliminated"); STATISTIC(NumElimRem , "Number of IV remainder operations eliminated"); STATISTIC(NumElimCmp , "Number of IV comparisons eliminated"); +STATISTIC(NumElimIV , "Number of congruent IVs eliminated"); static cl::opt<bool> DisableIVRewrite( "disable-iv-rewrite", cl::Hidden, @@ -114,8 +116,17 @@ namespace { } private: + virtual void releaseMemory() { + DeadInsts.clear(); + } + bool isValidRewrite(Value *FromVal, Value *ToVal); + void HandleFloatingPointIV(Loop *L, PHINode *PH); + void RewriteNonIntegerIVs(Loop *L); + + void RewriteLoopExitValues(Loop *L, SCEVExpander &Rewriter); + void SimplifyIVUsers(SCEVExpander &Rewriter); void SimplifyIVUsersNoRewrite(Loop *L, SCEVExpander &Rewriter); @@ -124,20 +135,16 @@ namespace { void EliminateIVRemainder(BinaryOperator *Rem, Value *IVOperand, bool IsSigned); - bool isSimpleIVUser(Instruction *I, const Loop *L); - void RewriteNonIntegerIVs(Loop *L); + + void SimplifyCongruentIVs(Loop *L); + + void RewriteIVExpressions(Loop *L, SCEVExpander &Rewriter); ICmpInst *LinearFunctionTestReplace(Loop *L, const SCEV *BackedgeTakenCount, PHINode *IndVar, SCEVExpander &Rewriter); - void RewriteLoopExitValues(Loop *L, SCEVExpander &Rewriter); - - void RewriteIVExpressions(Loop *L, SCEVExpander &Rewriter); - void SinkUnusedInvariants(Loop *L); - - void HandleFloatingPointIV(Loop *L, PHINode *PH); }; } @@ -204,156 +211,262 @@ bool IndVarSimplify::isValidRewrite(Value *FromVal, Value *ToVal) { return true; } -/// canExpandBackedgeTakenCount - Return true if this loop's backedge taken -/// count expression can be safely and cheaply expanded into an instruction -/// sequence that can be used by LinearFunctionTestReplace. -static bool canExpandBackedgeTakenCount(Loop *L, ScalarEvolution *SE) { - const SCEV *BackedgeTakenCount = SE->getBackedgeTakenCount(L); - if (isa<SCEVCouldNotCompute>(BackedgeTakenCount) || - BackedgeTakenCount->isZero()) - return false; +//===----------------------------------------------------------------------===// +// RewriteNonIntegerIVs and helpers. Prefer integer IVs. +//===----------------------------------------------------------------------===// - if (!L->getExitingBlock()) +/// ConvertToSInt - Convert APF to an integer, if possible. +static bool ConvertToSInt(const APFloat &APF, int64_t &IntVal) { + bool isExact = false; + if (&APF.getSemantics() == &APFloat::PPCDoubleDouble) return false; - - // Can't rewrite non-branch yet. - BranchInst *BI = dyn_cast<BranchInst>(L->getExitingBlock()->getTerminator()); - if (!BI) + // See if we can convert this to an int64_t + uint64_t UIntVal; + if (APF.convertToInteger(&UIntVal, 64, true, APFloat::rmTowardZero, + &isExact) != APFloat::opOK || !isExact) return false; - - // Special case: If the backedge-taken count is a UDiv, it's very likely a - // UDiv that ScalarEvolution produced in order to compute a precise - // expression, rather than a UDiv from the user's code. If we can't find a - // UDiv in the code with some simple searching, assume the former and forego - // rewriting the loop. - if (isa<SCEVUDivExpr>(BackedgeTakenCount)) { - ICmpInst *OrigCond = dyn_cast<ICmpInst>(BI->getCondition()); - if (!OrigCond) return false; - const SCEV *R = SE->getSCEV(OrigCond->getOperand(1)); - R = SE->getMinusSCEV(R, SE->getConstant(R->getType(), 1)); - if (R != BackedgeTakenCount) { - const SCEV *L = SE->getSCEV(OrigCond->getOperand(0)); - L = SE->getMinusSCEV(L, SE->getConstant(L->getType(), 1)); - if (L != BackedgeTakenCount) - return false; - } - } + IntVal = UIntVal; return true; } -/// getBackedgeIVType - Get the widest type used by the loop test after peeking -/// through Truncs. +/// HandleFloatingPointIV - If the loop has floating induction variable +/// then insert corresponding integer induction variable if possible. +/// For example, +/// for(double i = 0; i < 10000; ++i) +/// bar(i) +/// is converted into +/// for(int i = 0; i < 10000; ++i) +/// bar((double)i); /// -/// TODO: Unnecessary once LinearFunctionTestReplace is removed. -static const Type *getBackedgeIVType(Loop *L) { - if (!L->getExitingBlock()) - return 0; +void IndVarSimplify::HandleFloatingPointIV(Loop *L, PHINode *PN) { + unsigned IncomingEdge = L->contains(PN->getIncomingBlock(0)); + unsigned BackEdge = IncomingEdge^1; - // Can't rewrite non-branch yet. - BranchInst *BI = dyn_cast<BranchInst>(L->getExitingBlock()->getTerminator()); - if (!BI) - return 0; + // Check incoming value. + ConstantFP *InitValueVal = + dyn_cast<ConstantFP>(PN->getIncomingValue(IncomingEdge)); - ICmpInst *Cond = dyn_cast<ICmpInst>(BI->getCondition()); - if (!Cond) - return 0; + int64_t InitValue; + if (!InitValueVal || !ConvertToSInt(InitValueVal->getValueAPF(), InitValue)) + return; - const Type *Ty = 0; - for(User::op_iterator OI = Cond->op_begin(), OE = Cond->op_end(); - OI != OE; ++OI) { - assert((!Ty || Ty == (*OI)->getType()) && "bad icmp operand types"); - TruncInst *Trunc = dyn_cast<TruncInst>(*OI); - if (!Trunc) - continue; + // Check IV increment. Reject this PN if increment operation is not + // an add or increment value can not be represented by an integer. + BinaryOperator *Incr = + dyn_cast<BinaryOperator>(PN->getIncomingValue(BackEdge)); + if (Incr == 0 || Incr->getOpcode() != Instruction::FAdd) return; - return Trunc->getSrcTy(); + // If this is not an add of the PHI with a constantfp, or if the constant fp + // is not an integer, bail out. + ConstantFP *IncValueVal = dyn_cast<ConstantFP>(Incr->getOperand(1)); + int64_t IncValue; + if (IncValueVal == 0 || Incr->getOperand(0) != PN || + !ConvertToSInt(IncValueVal->getValueAPF(), IncValue)) + return; + + // Check Incr uses. One user is PN and the other user is an exit condition + // used by the conditional terminator. + Value::use_iterator IncrUse = Incr->use_begin(); + Instruction *U1 = cast<Instruction>(*IncrUse++); + if (IncrUse == Incr->use_end()) return; + Instruction *U2 = cast<Instruction>(*IncrUse++); + if (IncrUse != Incr->use_end()) return; + + // Find exit condition, which is an fcmp. If it doesn't exist, or if it isn't + // only used by a branch, we can't transform it. + FCmpInst *Compare = dyn_cast<FCmpInst>(U1); + if (!Compare) + Compare = dyn_cast<FCmpInst>(U2); + if (Compare == 0 || !Compare->hasOneUse() || + !isa<BranchInst>(Compare->use_back())) + return; + + BranchInst *TheBr = cast<BranchInst>(Compare->use_back()); + + // We need to verify that the branch actually controls the iteration count + // of the loop. If not, the new IV can overflow and no one will notice. + // The branch block must be in the loop and one of the successors must be out + // of the loop. + assert(TheBr->isConditional() && "Can't use fcmp if not conditional"); + if (!L->contains(TheBr->getParent()) || + (L->contains(TheBr->getSuccessor(0)) && + L->contains(TheBr->getSuccessor(1)))) + return; + + + // If it isn't a comparison with an integer-as-fp (the exit value), we can't + // transform it. + ConstantFP *ExitValueVal = dyn_cast<ConstantFP>(Compare->getOperand(1)); + int64_t ExitValue; + if (ExitValueVal == 0 || + !ConvertToSInt(ExitValueVal->getValueAPF(), ExitValue)) + return; + + // Find new predicate for integer comparison. + CmpInst::Predicate NewPred = CmpInst::BAD_ICMP_PREDICATE; + switch (Compare->getPredicate()) { + default: return; // Unknown comparison. + case CmpInst::FCMP_OEQ: + case CmpInst::FCMP_UEQ: NewPred = CmpInst::ICMP_EQ; break; + case CmpInst::FCMP_ONE: + case CmpInst::FCMP_UNE: NewPred = CmpInst::ICMP_NE; break; + case CmpInst::FCMP_OGT: + case CmpInst::FCMP_UGT: NewPred = CmpInst::ICMP_SGT; break; + case CmpInst::FCMP_OGE: + case CmpInst::FCMP_UGE: NewPred = CmpInst::ICMP_SGE; break; + case CmpInst::FCMP_OLT: + case CmpInst::FCMP_ULT: NewPred = CmpInst::ICMP_SLT; break; + case CmpInst::FCMP_OLE: + case CmpInst::FCMP_ULE: NewPred = CmpInst::ICMP_SLE; break; } - return Ty; -} -/// LinearFunctionTestReplace - This method rewrites the exit condition of the -/// loop to be a canonical != comparison against the incremented loop induction -/// variable. This pass is able to rewrite the exit tests of any loop where the -/// SCEV analysis can determine a loop-invariant trip count of the loop, which -/// is actually a much broader range than just linear tests. -ICmpInst *IndVarSimplify:: -LinearFunctionTestReplace(Loop *L, - const SCEV *BackedgeTakenCount, - PHINode *IndVar, - SCEVExpander &Rewriter) { - assert(canExpandBackedgeTakenCount(L, SE) && "precondition"); - BranchInst *BI = cast<BranchInst>(L->getExitingBlock()->getTerminator()); + // We convert the floating point induction variable to a signed i32 value if + // we can. This is only safe if the comparison will not overflow in a way + // that won't be trapped by the integer equivalent operations. Check for this + // now. + // TODO: We could use i64 if it is native and the range requires it. - // If the exiting block is not the same as the backedge block, we must compare - // against the preincremented value, otherwise we prefer to compare against - // the post-incremented value. - Value *CmpIndVar; - const SCEV *RHS = BackedgeTakenCount; - if (L->getExitingBlock() == L->getLoopLatch()) { - // Add one to the "backedge-taken" count to get the trip count. - // If this addition may overflow, we have to be more pessimistic and - // cast the induction variable before doing the add. - const SCEV *Zero = SE->getConstant(BackedgeTakenCount->getType(), 0); - const SCEV *N = - SE->getAddExpr(BackedgeTakenCount, - SE->getConstant(BackedgeTakenCount->getType(), 1)); - if ((isa<SCEVConstant>(N) && !N->isZero()) || - SE->isLoopEntryGuardedByCond(L, ICmpInst::ICMP_NE, N, Zero)) { - // No overflow. Cast the sum. - RHS = SE->getTruncateOrZeroExtend(N, IndVar->getType()); - } else { - // Potential overflow. Cast before doing the add. - RHS = SE->getTruncateOrZeroExtend(BackedgeTakenCount, - IndVar->getType()); - RHS = SE->getAddExpr(RHS, - SE->getConstant(IndVar->getType(), 1)); + // The start/stride/exit values must all fit in signed i32. + if (!isInt<32>(InitValue) || !isInt<32>(IncValue) || !isInt<32>(ExitValue)) + return; + + // If not actually striding (add x, 0.0), avoid touching the code. + if (IncValue == 0) + return; + + // Positive and negative strides have different safety conditions. + if (IncValue > 0) { + // If we have a positive stride, we require the init to be less than the + // exit value and an equality or less than comparison. + if (InitValue >= ExitValue || + NewPred == CmpInst::ICMP_SGT || NewPred == CmpInst::ICMP_SGE) + return; + + uint32_t Range = uint32_t(ExitValue-InitValue); + if (NewPred == CmpInst::ICMP_SLE) { + // Normalize SLE -> SLT, check for infinite loop. + if (++Range == 0) return; // Range overflows. } - // The BackedgeTaken expression contains the number of times that the - // backedge branches to the loop header. This is one less than the - // number of times the loop executes, so use the incremented indvar. - CmpIndVar = IndVar->getIncomingValueForBlock(L->getExitingBlock()); + unsigned Leftover = Range % uint32_t(IncValue); + + // If this is an equality comparison, we require that the strided value + // exactly land on the exit value, otherwise the IV condition will wrap + // around and do things the fp IV wouldn't. + if ((NewPred == CmpInst::ICMP_EQ || NewPred == CmpInst::ICMP_NE) && + Leftover != 0) + return; + + // If the stride would wrap around the i32 before exiting, we can't + // transform the IV. + if (Leftover != 0 && int32_t(ExitValue+IncValue) < ExitValue) + return; + } else { - // We have to use the preincremented value... - RHS = SE->getTruncateOrZeroExtend(BackedgeTakenCount, - IndVar->getType()); - CmpIndVar = IndVar; + // If we have a negative stride, we require the init to be greater than the + // exit value and an equality or greater than comparison. + if (InitValue >= ExitValue || + NewPred == CmpInst::ICMP_SLT || NewPred == CmpInst::ICMP_SLE) + return; + + uint32_t Range = uint32_t(InitValue-ExitValue); + if (NewPred == CmpInst::ICMP_SGE) { + // Normalize SGE -> SGT, check for infinite loop. + if (++Range == 0) return; // Range overflows. + } + + unsigned Leftover = Range % uint32_t(-IncValue); + + // If this is an equality comparison, we require that the strided value + // exactly land on the exit value, otherwise the IV condition will wrap + // around and do things the fp IV wouldn't. + if ((NewPred == CmpInst::ICMP_EQ || NewPred == CmpInst::ICMP_NE) && + Leftover != 0) + return; + + // If the stride would wrap around the i32 before exiting, we can't + // transform the IV. + if (Leftover != 0 && int32_t(ExitValue+IncValue) > ExitValue) + return; } - // Expand the code for the iteration count. - assert(SE->isLoopInvariant(RHS, L) && - "Computed iteration count is not loop invariant!"); - Value *ExitCnt = Rewriter.expandCodeFor(RHS, IndVar->getType(), BI); + const IntegerType *Int32Ty = Type::getInt32Ty(PN->getContext()); - // Insert a new icmp_ne or icmp_eq instruction before the branch. - ICmpInst::Predicate Opcode; - if (L->contains(BI->getSuccessor(0))) - Opcode = ICmpInst::ICMP_NE; - else - Opcode = ICmpInst::ICMP_EQ; + // Insert new integer induction variable. + PHINode *NewPHI = PHINode::Create(Int32Ty, 2, PN->getName()+".int", PN); + NewPHI->addIncoming(ConstantInt::get(Int32Ty, InitValue), + PN->getIncomingBlock(IncomingEdge)); - DEBUG(dbgs() << "INDVARS: Rewriting loop exit condition to:\n" - << " LHS:" << *CmpIndVar << '\n' - << " op:\t" - << (Opcode == ICmpInst::ICMP_NE ? "!=" : "==") << "\n" - << " RHS:\t" << *RHS << "\n"); + Value *NewAdd = + BinaryOperator::CreateAdd(NewPHI, ConstantInt::get(Int32Ty, IncValue), + Incr->getName()+".int", Incr); + NewPHI->addIncoming(NewAdd, PN->getIncomingBlock(BackEdge)); - ICmpInst *Cond = new ICmpInst(BI, Opcode, CmpIndVar, ExitCnt, "exitcond"); + ICmpInst *NewCompare = new ICmpInst(TheBr, NewPred, NewAdd, + ConstantInt::get(Int32Ty, ExitValue), + Compare->getName()); - Value *OrigCond = BI->getCondition(); - // It's tempting to use replaceAllUsesWith here to fully replace the old - // comparison, but that's not immediately safe, since users of the old - // comparison may not be dominated by the new comparison. Instead, just - // update the branch to use the new comparison; in the common case this - // will make old comparison dead. - BI->setCondition(Cond); - DeadInsts.push_back(OrigCond); + // In the following deletions, PN may become dead and may be deleted. + // Use a WeakVH to observe whether this happens. + WeakVH WeakPH = PN; - ++NumLFTR; - Changed = true; - return Cond; + // Delete the old floating point exit comparison. The branch starts using the + // new comparison. + NewCompare->takeName(Compare); + Compare->replaceAllUsesWith(NewCompare); + RecursivelyDeleteTriviallyDeadInstructions(Compare); + + // Delete the old floating point increment. + Incr->replaceAllUsesWith(UndefValue::get(Incr->getType())); + RecursivelyDeleteTriviallyDeadInstructions(Incr); + + // If the FP induction variable still has uses, this is because something else + // in the loop uses its value. In order to canonicalize the induction + // variable, we chose to eliminate the IV and rewrite it in terms of an + // int->fp cast. + // + // We give preference to sitofp over uitofp because it is faster on most + // platforms. + if (WeakPH) { + Value *Conv = new SIToFPInst(NewPHI, PN->getType(), "indvar.conv", + PN->getParent()->getFirstNonPHI()); + PN->replaceAllUsesWith(Conv); + RecursivelyDeleteTriviallyDeadInstructions(PN); + } + + // Add a new IVUsers entry for the newly-created integer PHI. + if (IU) + IU->AddUsersIfInteresting(NewPHI); } +void IndVarSimplify::RewriteNonIntegerIVs(Loop *L) { + // First step. Check to see if there are any floating-point recurrences. + // If there are, change them into integer recurrences, permitting analysis by + // the SCEV routines. + // + BasicBlock *Header = L->getHeader(); + + SmallVector<WeakVH, 8> PHIs; + for (BasicBlock::iterator I = Header->begin(); + PHINode *PN = dyn_cast<PHINode>(I); ++I) + PHIs.push_back(PN); + + for (unsigned i = 0, e = PHIs.size(); i != e; ++i) + if (PHINode *PN = dyn_cast_or_null<PHINode>(&*PHIs[i])) + HandleFloatingPointIV(L, PN); + + // If the loop previously had floating-point IV, ScalarEvolution + // may not have been able to compute a trip count. Now that we've done some + // re-writing, the trip count may be computable. + if (Changed) + SE->forgetLoop(L); +} + +//===----------------------------------------------------------------------===// +// RewriteLoopExitValues - Optimize IV users outside the loop. +// As a side effect, reduces the amount of IV processing within the loop. +//===----------------------------------------------------------------------===// + /// RewriteLoopExitValues - Check to see if this loop has a computable /// loop-invariant execution count. If so, this means that we can compute the /// final value of any expressions that are recurrent in the loop, and @@ -467,28 +580,10 @@ void IndVarSimplify::RewriteLoopExitValues(Loop *L, SCEVExpander &Rewriter) { Rewriter.clearInsertPoint(); } -void IndVarSimplify::RewriteNonIntegerIVs(Loop *L) { - // First step. Check to see if there are any floating-point recurrences. - // If there are, change them into integer recurrences, permitting analysis by - // the SCEV routines. - // - BasicBlock *Header = L->getHeader(); - - SmallVector<WeakVH, 8> PHIs; - for (BasicBlock::iterator I = Header->begin(); - PHINode *PN = dyn_cast<PHINode>(I); ++I) - PHIs.push_back(PN); - - for (unsigned i = 0, e = PHIs.size(); i != e; ++i) - if (PHINode *PN = dyn_cast_or_null<PHINode>(&*PHIs[i])) - HandleFloatingPointIV(L, PN); - - // If the loop previously had floating-point IV, ScalarEvolution - // may not have been able to compute a trip count. Now that we've done some - // re-writing, the trip count may be computable. - if (Changed) - SE->forgetLoop(L); -} +//===----------------------------------------------------------------------===// +// Rewrite IV users based on a canonical IV. +// To be replaced by -disable-iv-rewrite. +//===----------------------------------------------------------------------===// /// SimplifyIVUsers - Iteratively perform simplification on IVUsers within this /// loop. IVUsers is treated as a worklist. Each successive simplification may @@ -520,6 +615,133 @@ void IndVarSimplify::SimplifyIVUsers(SCEVExpander &Rewriter) { } } +// FIXME: It is an extremely bad idea to indvar substitute anything more +// complex than affine induction variables. Doing so will put expensive +// polynomial evaluations inside of the loop, and the str reduction pass +// currently can only reduce affine polynomials. For now just disable +// indvar subst on anything more complex than an affine addrec, unless +// it can be expanded to a trivial value. +static bool isSafe(const SCEV *S, const Loop *L, ScalarEvolution *SE) { + // Loop-invariant values are safe. + if (SE->isLoopInvariant(S, L)) return true; + + // Affine addrecs are safe. Non-affine are not, because LSR doesn't know how + // to transform them into efficient code. + if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) + return AR->isAffine(); + + // An add is safe it all its operands are safe. + if (const SCEVCommutativeExpr *Commutative = dyn_cast<SCEVCommutativeExpr>(S)) { + for (SCEVCommutativeExpr::op_iterator I = Commutative->op_begin(), + E = Commutative->op_end(); I != E; ++I) + if (!isSafe(*I, L, SE)) return false; + return true; + } + + // A cast is safe if its operand is. + if (const SCEVCastExpr *C = dyn_cast<SCEVCastExpr>(S)) + return isSafe(C->getOperand(), L, SE); + + // A udiv is safe if its operands are. + if (const SCEVUDivExpr *UD = dyn_cast<SCEVUDivExpr>(S)) + return isSafe(UD->getLHS(), L, SE) && + isSafe(UD->getRHS(), L, SE); + + // SCEVUnknown is always safe. + if (isa<SCEVUnknown>(S)) + return true; + + // Nothing else is safe. + return false; +} + +void IndVarSimplify::RewriteIVExpressions(Loop *L, SCEVExpander &Rewriter) { + // Rewrite all induction variable expressions in terms of the canonical + // induction variable. + // + // If there were induction variables of other sizes or offsets, manually + // add the offsets to the primary induction variable and cast, avoiding + // the need for the code evaluation methods to insert induction variables + // of different sizes. + for (IVUsers::iterator UI = IU->begin(), E = IU->end(); UI != E; ++UI) { + Value *Op = UI->getOperandValToReplace(); + const Type *UseTy = Op->getType(); + Instruction *User = UI->getUser(); + + // Compute the final addrec to expand into code. + const SCEV *AR = IU->getReplacementExpr(*UI); + + // Evaluate the expression out of the loop, if possible. + if (!L->contains(UI->getUser())) { + const SCEV *ExitVal = SE->getSCEVAtScope(AR, L->getParentLoop()); + if (SE->isLoopInvariant(ExitVal, L)) + AR = ExitVal; + } + + // FIXME: It is an extremely bad idea to indvar substitute anything more + // complex than affine induction variables. Doing so will put expensive + // polynomial evaluations inside of the loop, and the str reduction pass + // currently can only reduce affine polynomials. For now just disable + // indvar subst on anything more complex than an affine addrec, unless + // it can be expanded to a trivial value. + if (!isSafe(AR, L, SE)) + continue; + + // Determine the insertion point for this user. By default, insert + // immediately before the user. The SCEVExpander class will automatically + // hoist loop invariants out of the loop. For PHI nodes, there may be + // multiple uses, so compute the nearest common dominator for the + // incoming blocks. + Instruction *InsertPt = User; + if (PHINode *PHI = dyn_cast<PHINode>(InsertPt)) + for (unsigned i = 0, e = PHI->getNumIncomingValues(); i != e; ++i) + if (PHI->getIncomingValue(i) == Op) { + if (InsertPt == User) + InsertPt = PHI->getIncomingBlock(i)->getTerminator(); + else + InsertPt = + DT->findNearestCommonDominator(InsertPt->getParent(), + PHI->getIncomingBlock(i)) + ->getTerminator(); + } + + // Now expand it into actual Instructions and patch it into place. + Value *NewVal = Rewriter.expandCodeFor(AR, UseTy, InsertPt); + + DEBUG(dbgs() << "INDVARS: Rewrote IV '" << *AR << "' " << *Op << '\n' + << " into = " << *NewVal << "\n"); + + if (!isValidRewrite(Op, NewVal)) { + DeadInsts.push_back(NewVal); + continue; + } + // Inform ScalarEvolution that this value is changing. The change doesn't + // affect its value, but it does potentially affect which use lists the + // value will be on after the replacement, which affects ScalarEvolution's + // ability to walk use lists and drop dangling pointers when a value is + // deleted. + SE->forgetValue(User); + + // Patch the new value into place. + if (Op->hasName()) + NewVal->takeName(Op); + if (Instruction *NewValI = dyn_cast<Instruction>(NewVal)) + NewValI->setDebugLoc(User->getDebugLoc()); + User->replaceUsesOfWith(Op, NewVal); + UI->setOperandValToReplace(NewVal); + + ++NumRemoved; + Changed = true; + + // The old value may be dead now. + DeadInsts.push_back(Op); + } +} + +//===----------------------------------------------------------------------===// +// IV Widening - Extend the width of an IV to cover its widest uses. +//===----------------------------------------------------------------------===// + namespace { // Collect information about induction variables that are used by sign/zero // extend operations. This information is recorded by CollectExtend and @@ -608,6 +830,8 @@ protected: Instruction *NarrowDef, Instruction *WideDef); + const SCEVAddRecExpr *GetWideRecurrence(Instruction *NarrowUse); + Instruction *WidenIVUse(Use &NarrowDefUse, Instruction *NarrowDef, Instruction *WideDef); @@ -704,6 +928,33 @@ static bool HoistStep(Instruction *IncV, Instruction *InsertPos, return true; } +// GetWideRecurrence - Is this instruction potentially interesting from IVUsers' +// perspective after widening it's type? In other words, can the extend be +// safely hoisted out of the loop with SCEV reducing the value to a recurrence +// on the same loop. If so, return the sign or zero extended +// recurrence. Otherwise return NULL. +const SCEVAddRecExpr *WidenIV::GetWideRecurrence(Instruction *NarrowUse) { + if (!SE->isSCEVable(NarrowUse->getType())) + return 0; + + const SCEV *NarrowExpr = SE->getSCEV(NarrowUse); + if (SE->getTypeSizeInBits(NarrowExpr->getType()) + >= SE->getTypeSizeInBits(WideType)) { + // NarrowUse implicitly widens its operand. e.g. a gep with a narrow + // index. So don't follow this use. + return 0; + } + + const SCEV *WideExpr = IsSigned ? + SE->getSignExtendExpr(NarrowExpr, WideType) : + SE->getZeroExtendExpr(NarrowExpr, WideType); + const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(WideExpr); + if (!AddRec || AddRec->getLoop() != L) + return 0; + + return AddRec; +} + /// WidenIVUse - Determine whether an individual user of the narrow IV can be /// widened. If so, return the wide clone of the user. Instruction *WidenIV::WidenIVUse(Use &NarrowDefUse, Instruction *NarrowDef, @@ -753,24 +1004,7 @@ Instruction *WidenIV::WidenIVUse(Use &NarrowDefUse, Instruction *NarrowDef, } // Does this user itself evaluate to a recurrence after widening? - const SCEVAddRecExpr *WideAddRec = 0; - if (SE->isSCEVable(NarrowUse->getType())) { - const SCEV *NarrowExpr = SE->getSCEV(NarrowUse); - if (SE->getTypeSizeInBits(NarrowExpr->getType()) - >= SE->getTypeSizeInBits(WideType)) { - // NarrowUse implicitly widens its operand. e.g. a gep with a narrow - // index. We have already extended the operand, so we're done. - return 0; - } - const SCEV *WideExpr = IsSigned ? - SE->getSignExtendExpr(NarrowExpr, WideType) : - SE->getZeroExtendExpr(NarrowExpr, WideType); - - // Only widen past values that evaluate to a recurrence in the same loop. - const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(WideExpr); - if (AddRec && AddRec->getLoop() == L) - WideAddRec = AddRec; - } + const SCEVAddRecExpr *WideAddRec = GetWideRecurrence(NarrowUse); if (!WideAddRec) { // This user does not evaluate to a recurence after widening, so don't // follow it. Instead insert a Trunc to kill off the original use, @@ -911,6 +1145,10 @@ PHINode *WidenIV::CreateWideIV(SCEVExpander &Rewriter) { return WidePhi; } +//===----------------------------------------------------------------------===// +// Simplification of IV users based on SCEV evaluation. +//===----------------------------------------------------------------------===// + void IndVarSimplify::EliminateIVComparison(ICmpInst *ICmp, Value *IVOperand) { unsigned IVOperIdx = 0; ICmpInst::Predicate Pred = ICmp->getPredicate(); @@ -1057,7 +1295,7 @@ static void pushIVUsers( /// This is similar to IVUsers' isInsteresting() but processes each instruction /// non-recursively when the operand is already known to be a simpleIVUser. /// -bool IndVarSimplify::isSimpleIVUser(Instruction *I, const Loop *L) { +static bool isSimpleIVUser(Instruction *I, const Loop *L, ScalarEvolution *SE) { if (!SE->isSCEVable(I->getType())) return false; @@ -1117,7 +1355,7 @@ void IndVarSimplify::SimplifyIVUsersNoRewrite(Loop *L, SCEVExpander &Rewriter) { // Instructions processed by SimplifyIVUsers for CurrIV. SmallPtrSet<Instruction*,16> Simplified; - // Use-def pairs if IVUsers waiting to be processed for CurrIV. + // Use-def pairs if IV users waiting to be processed for CurrIV. SmallVector<std::pair<Instruction*, Instruction*>, 8> SimpleIVUsers; // Push users of the current LoopPhi. In rare cases, pushIVUsers may be @@ -1142,7 +1380,7 @@ void IndVarSimplify::SimplifyIVUsersNoRewrite(Loop *L, SCEVExpander &Rewriter) { } continue; } - if (isSimpleIVUser(UseInst, L)) { + if (isSimpleIVUser(UseInst, L, SE)) { pushIVUsers(UseInst, Simplified, SimpleIVUsers); } } @@ -1163,6 +1401,292 @@ void IndVarSimplify::SimplifyIVUsersNoRewrite(Loop *L, SCEVExpander &Rewriter) { } } +/// SimplifyCongruentIVs - Check for congruent phis in this loop header and +/// populate ExprToIVMap for use later. +/// +void IndVarSimplify::SimplifyCongruentIVs(Loop *L) { + DenseMap<const SCEV *, PHINode *> ExprToIVMap; + for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ++I) { + PHINode *Phi = cast<PHINode>(I); + if (!SE->isSCEVable(Phi->getType())) + continue; + + const SCEV *S = SE->getSCEV(Phi); + DenseMap<const SCEV *, PHINode *>::const_iterator Pos; + bool Inserted; + tie(Pos, Inserted) = ExprToIVMap.insert(std::make_pair(S, Phi)); + if (Inserted) + continue; + PHINode *OrigPhi = Pos->second; + // Replacing the congruent phi is sufficient because acyclic redundancy + // elimination, CSE/GVN, should handle the rest. However, once SCEV proves + // that a phi is congruent, it's almost certain to be the head of an IV + // user cycle that is isomorphic with the original phi. So it's worth + // eagerly cleaning up the common case of a single IV increment. + if (BasicBlock *LatchBlock = L->getLoopLatch()) { + Instruction *OrigInc = + cast<Instruction>(OrigPhi->getIncomingValueForBlock(LatchBlock)); + Instruction *IsomorphicInc = + cast<Instruction>(Phi->getIncomingValueForBlock(LatchBlock)); + if (OrigInc != IsomorphicInc && + SE->getSCEV(OrigInc) == SE->getSCEV(IsomorphicInc) && + HoistStep(OrigInc, IsomorphicInc, DT)) { + DEBUG(dbgs() << "INDVARS: Eliminated congruent iv.inc: " + << *IsomorphicInc << '\n'); + IsomorphicInc->replaceAllUsesWith(OrigInc); + DeadInsts.push_back(IsomorphicInc); + } + } + DEBUG(dbgs() << "INDVARS: Eliminated congruent iv: " << *Phi << '\n'); + ++NumElimIV; + Phi->replaceAllUsesWith(OrigPhi); + DeadInsts.push_back(Phi); + } +} + +//===----------------------------------------------------------------------===// +// LinearFunctionTestReplace and its kin. Rewrite the loop exit condition. +//===----------------------------------------------------------------------===// + +/// canExpandBackedgeTakenCount - Return true if this loop's backedge taken +/// count expression can be safely and cheaply expanded into an instruction +/// sequence that can be used by LinearFunctionTestReplace. +static bool canExpandBackedgeTakenCount(Loop *L, ScalarEvolution *SE) { + const SCEV *BackedgeTakenCount = SE->getBackedgeTakenCount(L); + if (isa<SCEVCouldNotCompute>(BackedgeTakenCount) || + BackedgeTakenCount->isZero()) + return false; + + if (!L->getExitingBlock()) + return false; + + // Can't rewrite non-branch yet. + BranchInst *BI = dyn_cast<BranchInst>(L->getExitingBlock()->getTerminator()); + if (!BI) + return false; + + // Special case: If the backedge-taken count is a UDiv, it's very likely a + // UDiv that ScalarEvolution produced in order to compute a precise + // expression, rather than a UDiv from the user's code. If we can't find a + // UDiv in the code with some simple searching, assume the former and forego + // rewriting the loop. + if (isa<SCEVUDivExpr>(BackedgeTakenCount)) { + ICmpInst *OrigCond = dyn_cast<ICmpInst>(BI->getCondition()); + if (!OrigCond) return false; + const SCEV *R = SE->getSCEV(OrigCond->getOperand(1)); + R = SE->getMinusSCEV(R, SE->getConstant(R->getType(), 1)); + if (R != BackedgeTakenCount) { + const SCEV *L = SE->getSCEV(OrigCond->getOperand(0)); + L = SE->getMinusSCEV(L, SE->getConstant(L->getType(), 1)); + if (L != BackedgeTakenCount) + return false; + } + } + return true; +} + +/// getBackedgeIVType - Get the widest type used by the loop test after peeking +/// through Truncs. +/// +/// TODO: Unnecessary if LFTR does not force a canonical IV. +static const Type *getBackedgeIVType(Loop *L) { + if (!L->getExitingBlock()) + return 0; + + // Can't rewrite non-branch yet. + BranchInst *BI = dyn_cast<BranchInst>(L->getExitingBlock()->getTerminator()); + if (!BI) + return 0; + + ICmpInst *Cond = dyn_cast<ICmpInst>(BI->getCondition()); + if (!Cond) + return 0; + + const Type *Ty = 0; + for(User::op_iterator OI = Cond->op_begin(), OE = Cond->op_end(); + OI != OE; ++OI) { + assert((!Ty || Ty == (*OI)->getType()) && "bad icmp operand types"); + TruncInst *Trunc = dyn_cast<TruncInst>(*OI); + if (!Trunc) + continue; + + return Trunc->getSrcTy(); + } + return Ty; +} + +/// LinearFunctionTestReplace - This method rewrites the exit condition of the +/// loop to be a canonical != comparison against the incremented loop induction +/// variable. This pass is able to rewrite the exit tests of any loop where the +/// SCEV analysis can determine a loop-invariant trip count of the loop, which +/// is actually a much broader range than just linear tests. +ICmpInst *IndVarSimplify:: +LinearFunctionTestReplace(Loop *L, + const SCEV *BackedgeTakenCount, + PHINode *IndVar, + SCEVExpander &Rewriter) { + assert(canExpandBackedgeTakenCount(L, SE) && "precondition"); + BranchInst *BI = cast<BranchInst>(L->getExitingBlock()->getTerminator()); + + // If the exiting block is not the same as the backedge block, we must compare + // against the preincremented value, otherwise we prefer to compare against + // the post-incremented value. + Value *CmpIndVar; + const SCEV *RHS = BackedgeTakenCount; + if (L->getExitingBlock() == L->getLoopLatch()) { + // Add one to the "backedge-taken" count to get the trip count. + // If this addition may overflow, we have to be more pessimistic and + // cast the induction variable before doing the add. + const SCEV *Zero = SE->getConstant(BackedgeTakenCount->getType(), 0); + const SCEV *N = + SE->getAddExpr(BackedgeTakenCount, + SE->getConstant(BackedgeTakenCount->getType(), 1)); + if ((isa<SCEVConstant>(N) && !N->isZero()) || + SE->isLoopEntryGuardedByCond(L, ICmpInst::ICMP_NE, N, Zero)) { + // No overflow. Cast the sum. + RHS = SE->getTruncateOrZeroExtend(N, IndVar->getType()); + } else { + // Potential overflow. Cast before doing the add. + RHS = SE->getTruncateOrZeroExtend(BackedgeTakenCount, + IndVar->getType()); + RHS = SE->getAddExpr(RHS, + SE->getConstant(IndVar->getType(), 1)); + } + + // The BackedgeTaken expression contains the number of times that the + // backedge branches to the loop header. This is one less than the + // number of times the loop executes, so use the incremented indvar. + CmpIndVar = IndVar->getIncomingValueForBlock(L->getExitingBlock()); + } else { + // We have to use the preincremented value... + RHS = SE->getTruncateOrZeroExtend(BackedgeTakenCount, + IndVar->getType()); + CmpIndVar = IndVar; + } + + // Expand the code for the iteration count. + assert(SE->isLoopInvariant(RHS, L) && + "Computed iteration count is not loop invariant!"); + Value *ExitCnt = Rewriter.expandCodeFor(RHS, IndVar->getType(), BI); + + // Insert a new icmp_ne or icmp_eq instruction before the branch. + ICmpInst::Predicate Opcode; + if (L->contains(BI->getSuccessor(0))) + Opcode = ICmpInst::ICMP_NE; + else + Opcode = ICmpInst::ICMP_EQ; + + DEBUG(dbgs() << "INDVARS: Rewriting loop exit condition to:\n" + << " LHS:" << *CmpIndVar << '\n' + << " op:\t" + << (Opcode == ICmpInst::ICMP_NE ? "!=" : "==") << "\n" + << " RHS:\t" << *RHS << "\n"); + + ICmpInst *Cond = new ICmpInst(BI, Opcode, CmpIndVar, ExitCnt, "exitcond"); + Cond->setDebugLoc(BI->getDebugLoc()); + Value *OrigCond = BI->getCondition(); + // It's tempting to use replaceAllUsesWith here to fully replace the old + // comparison, but that's not immediately safe, since users of the old + // comparison may not be dominated by the new comparison. Instead, just + // update the branch to use the new comparison; in the common case this + // will make old comparison dead. + BI->setCondition(Cond); + DeadInsts.push_back(OrigCond); + + ++NumLFTR; + Changed = true; + return Cond; +} + +//===----------------------------------------------------------------------===// +// SinkUnusedInvariants. A late subpass to cleanup loop preheaders. +//===----------------------------------------------------------------------===// + +/// If there's a single exit block, sink any loop-invariant values that +/// were defined in the preheader but not used inside the loop into the +/// exit block to reduce register pressure in the loop. +void IndVarSimplify::SinkUnusedInvariants(Loop *L) { + BasicBlock *ExitBlock = L->getExitBlock(); + if (!ExitBlock) return; + + BasicBlock *Preheader = L->getLoopPreheader(); + if (!Preheader) return; + + Instruction *InsertPt = ExitBlock->getFirstNonPHI(); + BasicBlock::iterator I = Preheader->getTerminator(); + while (I != Preheader->begin()) { + --I; + // New instructions were inserted at the end of the preheader. + if (isa<PHINode>(I)) + break; + + // Don't move instructions which might have side effects, since the side + // effects need to complete before instructions inside the loop. Also don't + // move instructions which might read memory, since the loop may modify + // memory. Note that it's okay if the instruction might have undefined + // behavior: LoopSimplify guarantees that the preheader dominates the exit + // block. + if (I->mayHaveSideEffects() || I->mayReadFromMemory()) + continue; + + // Skip debug info intrinsics. + if (isa<DbgInfoIntrinsic>(I)) + continue; + + // Don't sink static AllocaInsts out of the entry block, which would + // turn them into dynamic allocas! + if (AllocaInst *AI = dyn_cast<AllocaInst>(I)) + if (AI->isStaticAlloca()) + continue; + + // Determine if there is a use in or before the loop (direct or + // otherwise). + bool UsedInLoop = false; + for (Value::use_iterator UI = I->use_begin(), UE = I->use_end(); + UI != UE; ++UI) { + User *U = *UI; + BasicBlock *UseBB = cast<Instruction>(U)->getParent(); + if (PHINode *P = dyn_cast<PHINode>(U)) { + unsigned i = + PHINode::getIncomingValueNumForOperand(UI.getOperandNo()); + UseBB = P->getIncomingBlock(i); + } + if (UseBB == Preheader || L->contains(UseBB)) { + UsedInLoop = true; + break; + } + } + + // If there is, the def must remain in the preheader. + if (UsedInLoop) + continue; + + // Otherwise, sink it to the exit block. + Instruction *ToMove = I; + bool Done = false; + + if (I != Preheader->begin()) { + // Skip debug info intrinsics. + do { + --I; + } while (isa<DbgInfoIntrinsic>(I) && I != Preheader->begin()); + + if (isa<DbgInfoIntrinsic>(I) && I == Preheader->begin()) + Done = true; + } else { + Done = true; + } + + ToMove->moveBefore(InsertPt); + if (Done) break; + InsertPt = ToMove; + } +} + +//===----------------------------------------------------------------------===// +// IndVarSimplify driver. Manage several subpasses of IV simplification. +//===----------------------------------------------------------------------===// + bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) { // If LoopSimplify form is not available, stay out of trouble. Some notes: // - LSR currently only supports LoopSimplify-form loops. Indvars' @@ -1218,6 +1742,10 @@ bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) { if (!DisableIVRewrite) SimplifyIVUsers(Rewriter); + // Eliminate redundant IV cycles. + if (DisableIVRewrite) + SimplifyCongruentIVs(L); + // Compute the type of the largest recurrence expression, and decide whether // a canonical induction variable should be inserted. const Type *LargestType = 0; @@ -1295,8 +1823,18 @@ bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) { "canonical IV disrupted BackedgeTaken expansion"); assert(NeedCannIV && "LinearFunctionTestReplace requires a canonical induction variable"); - NewICmp = LinearFunctionTestReplace(L, BackedgeTakenCount, IndVar, - Rewriter); + // Check preconditions for proper SCEVExpander operation. SCEV does not + // express SCEVExpander's dependencies, such as LoopSimplify. Instead any + // pass that uses the SCEVExpander must do it. This does not work well for + // loop passes because SCEVExpander makes assumptions about all loops, while + // LoopPassManager only forces the current loop to be simplified. + // + // FIXME: SCEV expansion has no way to bail out, so the caller must + // explicitly check any assumptions made by SCEV. Brittle. + const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(BackedgeTakenCount); + if (!AR || AR->getLoop()->getLoopPreheader()) + NewICmp = + LinearFunctionTestReplace(L, BackedgeTakenCount, IndVar, Rewriter); } // Rewrite IV-derived expressions. if (!DisableIVRewrite) @@ -1331,431 +1869,3 @@ bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) { assert(L->isLCSSAForm(*DT) && "Indvars did not leave the loop in lcssa form!"); return Changed; } - -// FIXME: It is an extremely bad idea to indvar substitute anything more -// complex than affine induction variables. Doing so will put expensive -// polynomial evaluations inside of the loop, and the str reduction pass -// currently can only reduce affine polynomials. For now just disable -// indvar subst on anything more complex than an affine addrec, unless -// it can be expanded to a trivial value. -static bool isSafe(const SCEV *S, const Loop *L, ScalarEvolution *SE) { - // Loop-invariant values are safe. - if (SE->isLoopInvariant(S, L)) return true; - - // Affine addrecs are safe. Non-affine are not, because LSR doesn't know how - // to transform them into efficient code. - if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) - return AR->isAffine(); - - // An add is safe it all its operands are safe. - if (const SCEVCommutativeExpr *Commutative = dyn_cast<SCEVCommutativeExpr>(S)) { - for (SCEVCommutativeExpr::op_iterator I = Commutative->op_begin(), - E = Commutative->op_end(); I != E; ++I) - if (!isSafe(*I, L, SE)) return false; - return true; - } - - // A cast is safe if its operand is. - if (const SCEVCastExpr *C = dyn_cast<SCEVCastExpr>(S)) - return isSafe(C->getOperand(), L, SE); - - // A udiv is safe if its operands are. - if (const SCEVUDivExpr *UD = dyn_cast<SCEVUDivExpr>(S)) - return isSafe(UD->getLHS(), L, SE) && - isSafe(UD->getRHS(), L, SE); - - // SCEVUnknown is always safe. - if (isa<SCEVUnknown>(S)) - return true; - - // Nothing else is safe. - return false; -} - -void IndVarSimplify::RewriteIVExpressions(Loop *L, SCEVExpander &Rewriter) { - // Rewrite all induction variable expressions in terms of the canonical - // induction variable. - // - // If there were induction variables of other sizes or offsets, manually - // add the offsets to the primary induction variable and cast, avoiding - // the need for the code evaluation methods to insert induction variables - // of different sizes. - for (IVUsers::iterator UI = IU->begin(), E = IU->end(); UI != E; ++UI) { - Value *Op = UI->getOperandValToReplace(); - const Type *UseTy = Op->getType(); - Instruction *User = UI->getUser(); - - // Compute the final addrec to expand into code. - const SCEV *AR = IU->getReplacementExpr(*UI); - - // Evaluate the expression out of the loop, if possible. - if (!L->contains(UI->getUser())) { - const SCEV *ExitVal = SE->getSCEVAtScope(AR, L->getParentLoop()); - if (SE->isLoopInvariant(ExitVal, L)) - AR = ExitVal; - } - - // FIXME: It is an extremely bad idea to indvar substitute anything more - // complex than affine induction variables. Doing so will put expensive - // polynomial evaluations inside of the loop, and the str reduction pass - // currently can only reduce affine polynomials. For now just disable - // indvar subst on anything more complex than an affine addrec, unless - // it can be expanded to a trivial value. - if (!isSafe(AR, L, SE)) - continue; - - // Determine the insertion point for this user. By default, insert - // immediately before the user. The SCEVExpander class will automatically - // hoist loop invariants out of the loop. For PHI nodes, there may be - // multiple uses, so compute the nearest common dominator for the - // incoming blocks. - Instruction *InsertPt = User; - if (PHINode *PHI = dyn_cast<PHINode>(InsertPt)) - for (unsigned i = 0, e = PHI->getNumIncomingValues(); i != e; ++i) - if (PHI->getIncomingValue(i) == Op) { - if (InsertPt == User) - InsertPt = PHI->getIncomingBlock(i)->getTerminator(); - else - InsertPt = - DT->findNearestCommonDominator(InsertPt->getParent(), - PHI->getIncomingBlock(i)) - ->getTerminator(); - } - - // Now expand it into actual Instructions and patch it into place. - Value *NewVal = Rewriter.expandCodeFor(AR, UseTy, InsertPt); - - DEBUG(dbgs() << "INDVARS: Rewrote IV '" << *AR << "' " << *Op << '\n' - << " into = " << *NewVal << "\n"); - - if (!isValidRewrite(Op, NewVal)) { - DeadInsts.push_back(NewVal); - continue; - } - // Inform ScalarEvolution that this value is changing. The change doesn't - // affect its value, but it does potentially affect which use lists the - // value will be on after the replacement, which affects ScalarEvolution's - // ability to walk use lists and drop dangling pointers when a value is - // deleted. - SE->forgetValue(User); - - // Patch the new value into place. - if (Op->hasName()) - NewVal->takeName(Op); - if (Instruction *NewValI = dyn_cast<Instruction>(NewVal)) - NewValI->setDebugLoc(User->getDebugLoc()); - User->replaceUsesOfWith(Op, NewVal); - UI->setOperandValToReplace(NewVal); - - ++NumRemoved; - Changed = true; - - // The old value may be dead now. - DeadInsts.push_back(Op); - } -} - -/// If there's a single exit block, sink any loop-invariant values that -/// were defined in the preheader but not used inside the loop into the -/// exit block to reduce register pressure in the loop. -void IndVarSimplify::SinkUnusedInvariants(Loop *L) { - BasicBlock *ExitBlock = L->getExitBlock(); - if (!ExitBlock) return; - - BasicBlock *Preheader = L->getLoopPreheader(); - if (!Preheader) return; - - Instruction *InsertPt = ExitBlock->getFirstNonPHI(); - BasicBlock::iterator I = Preheader->getTerminator(); - while (I != Preheader->begin()) { - --I; - // New instructions were inserted at the end of the preheader. - if (isa<PHINode>(I)) - break; - - // Don't move instructions which might have side effects, since the side - // effects need to complete before instructions inside the loop. Also don't - // move instructions which might read memory, since the loop may modify - // memory. Note that it's okay if the instruction might have undefined - // behavior: LoopSimplify guarantees that the preheader dominates the exit - // block. - if (I->mayHaveSideEffects() || I->mayReadFromMemory()) - continue; - - // Skip debug info intrinsics. - if (isa<DbgInfoIntrinsic>(I)) - continue; - - // Don't sink static AllocaInsts out of the entry block, which would - // turn them into dynamic allocas! - if (AllocaInst *AI = dyn_cast<AllocaInst>(I)) - if (AI->isStaticAlloca()) - continue; - - // Determine if there is a use in or before the loop (direct or - // otherwise). - bool UsedInLoop = false; - for (Value::use_iterator UI = I->use_begin(), UE = I->use_end(); - UI != UE; ++UI) { - User *U = *UI; - BasicBlock *UseBB = cast<Instruction>(U)->getParent(); - if (PHINode *P = dyn_cast<PHINode>(U)) { - unsigned i = - PHINode::getIncomingValueNumForOperand(UI.getOperandNo()); - UseBB = P->getIncomingBlock(i); - } - if (UseBB == Preheader || L->contains(UseBB)) { - UsedInLoop = true; - break; - } - } - - // If there is, the def must remain in the preheader. - if (UsedInLoop) - continue; - - // Otherwise, sink it to the exit block. - Instruction *ToMove = I; - bool Done = false; - - if (I != Preheader->begin()) { - // Skip debug info intrinsics. - do { - --I; - } while (isa<DbgInfoIntrinsic>(I) && I != Preheader->begin()); - - if (isa<DbgInfoIntrinsic>(I) && I == Preheader->begin()) - Done = true; - } else { - Done = true; - } - - ToMove->moveBefore(InsertPt); - if (Done) break; - InsertPt = ToMove; - } -} - -/// ConvertToSInt - Convert APF to an integer, if possible. -static bool ConvertToSInt(const APFloat &APF, int64_t &IntVal) { - bool isExact = false; - if (&APF.getSemantics() == &APFloat::PPCDoubleDouble) - return false; - // See if we can convert this to an int64_t - uint64_t UIntVal; - if (APF.convertToInteger(&UIntVal, 64, true, APFloat::rmTowardZero, - &isExact) != APFloat::opOK || !isExact) - return false; - IntVal = UIntVal; - return true; -} - -/// HandleFloatingPointIV - If the loop has floating induction variable -/// then insert corresponding integer induction variable if possible. -/// For example, -/// for(double i = 0; i < 10000; ++i) -/// bar(i) -/// is converted into -/// for(int i = 0; i < 10000; ++i) -/// bar((double)i); -/// -void IndVarSimplify::HandleFloatingPointIV(Loop *L, PHINode *PN) { - unsigned IncomingEdge = L->contains(PN->getIncomingBlock(0)); - unsigned BackEdge = IncomingEdge^1; - - // Check incoming value. - ConstantFP *InitValueVal = - dyn_cast<ConstantFP>(PN->getIncomingValue(IncomingEdge)); - - int64_t InitValue; - if (!InitValueVal || !ConvertToSInt(InitValueVal->getValueAPF(), InitValue)) - return; - - // Check IV increment. Reject this PN if increment operation is not - // an add or increment value can not be represented by an integer. - BinaryOperator *Incr = - dyn_cast<BinaryOperator>(PN->getIncomingValue(BackEdge)); - if (Incr == 0 || Incr->getOpcode() != Instruction::FAdd) return; - - // If this is not an add of the PHI with a constantfp, or if the constant fp - // is not an integer, bail out. - ConstantFP *IncValueVal = dyn_cast<ConstantFP>(Incr->getOperand(1)); - int64_t IncValue; - if (IncValueVal == 0 || Incr->getOperand(0) != PN || - !ConvertToSInt(IncValueVal->getValueAPF(), IncValue)) - return; - - // Check Incr uses. One user is PN and the other user is an exit condition - // used by the conditional terminator. - Value::use_iterator IncrUse = Incr->use_begin(); - Instruction *U1 = cast<Instruction>(*IncrUse++); - if (IncrUse == Incr->use_end()) return; - Instruction *U2 = cast<Instruction>(*IncrUse++); - if (IncrUse != Incr->use_end()) return; - - // Find exit condition, which is an fcmp. If it doesn't exist, or if it isn't - // only used by a branch, we can't transform it. - FCmpInst *Compare = dyn_cast<FCmpInst>(U1); - if (!Compare) - Compare = dyn_cast<FCmpInst>(U2); - if (Compare == 0 || !Compare->hasOneUse() || - !isa<BranchInst>(Compare->use_back())) - return; - - BranchInst *TheBr = cast<BranchInst>(Compare->use_back()); - - // We need to verify that the branch actually controls the iteration count - // of the loop. If not, the new IV can overflow and no one will notice. - // The branch block must be in the loop and one of the successors must be out - // of the loop. - assert(TheBr->isConditional() && "Can't use fcmp if not conditional"); - if (!L->contains(TheBr->getParent()) || - (L->contains(TheBr->getSuccessor(0)) && - L->contains(TheBr->getSuccessor(1)))) - return; - - - // If it isn't a comparison with an integer-as-fp (the exit value), we can't - // transform it. - ConstantFP *ExitValueVal = dyn_cast<ConstantFP>(Compare->getOperand(1)); - int64_t ExitValue; - if (ExitValueVal == 0 || - !ConvertToSInt(ExitValueVal->getValueAPF(), ExitValue)) - return; - - // Find new predicate for integer comparison. - CmpInst::Predicate NewPred = CmpInst::BAD_ICMP_PREDICATE; - switch (Compare->getPredicate()) { - default: return; // Unknown comparison. - case CmpInst::FCMP_OEQ: - case CmpInst::FCMP_UEQ: NewPred = CmpInst::ICMP_EQ; break; - case CmpInst::FCMP_ONE: - case CmpInst::FCMP_UNE: NewPred = CmpInst::ICMP_NE; break; - case CmpInst::FCMP_OGT: - case CmpInst::FCMP_UGT: NewPred = CmpInst::ICMP_SGT; break; - case CmpInst::FCMP_OGE: - case CmpInst::FCMP_UGE: NewPred = CmpInst::ICMP_SGE; break; - case CmpInst::FCMP_OLT: - case CmpInst::FCMP_ULT: NewPred = CmpInst::ICMP_SLT; break; - case CmpInst::FCMP_OLE: - case CmpInst::FCMP_ULE: NewPred = CmpInst::ICMP_SLE; break; - } - - // We convert the floating point induction variable to a signed i32 value if - // we can. This is only safe if the comparison will not overflow in a way - // that won't be trapped by the integer equivalent operations. Check for this - // now. - // TODO: We could use i64 if it is native and the range requires it. - - // The start/stride/exit values must all fit in signed i32. - if (!isInt<32>(InitValue) || !isInt<32>(IncValue) || !isInt<32>(ExitValue)) - return; - - // If not actually striding (add x, 0.0), avoid touching the code. - if (IncValue == 0) - return; - - // Positive and negative strides have different safety conditions. - if (IncValue > 0) { - // If we have a positive stride, we require the init to be less than the - // exit value and an equality or less than comparison. - if (InitValue >= ExitValue || - NewPred == CmpInst::ICMP_SGT || NewPred == CmpInst::ICMP_SGE) - return; - - uint32_t Range = uint32_t(ExitValue-InitValue); - if (NewPred == CmpInst::ICMP_SLE) { - // Normalize SLE -> SLT, check for infinite loop. - if (++Range == 0) return; // Range overflows. - } - - unsigned Leftover = Range % uint32_t(IncValue); - - // If this is an equality comparison, we require that the strided value - // exactly land on the exit value, otherwise the IV condition will wrap - // around and do things the fp IV wouldn't. - if ((NewPred == CmpInst::ICMP_EQ || NewPred == CmpInst::ICMP_NE) && - Leftover != 0) - return; - - // If the stride would wrap around the i32 before exiting, we can't - // transform the IV. - if (Leftover != 0 && int32_t(ExitValue+IncValue) < ExitValue) - return; - - } else { - // If we have a negative stride, we require the init to be greater than the - // exit value and an equality or greater than comparison. - if (InitValue >= ExitValue || - NewPred == CmpInst::ICMP_SLT || NewPred == CmpInst::ICMP_SLE) - return; - - uint32_t Range = uint32_t(InitValue-ExitValue); - if (NewPred == CmpInst::ICMP_SGE) { - // Normalize SGE -> SGT, check for infinite loop. - if (++Range == 0) return; // Range overflows. - } - - unsigned Leftover = Range % uint32_t(-IncValue); - - // If this is an equality comparison, we require that the strided value - // exactly land on the exit value, otherwise the IV condition will wrap - // around and do things the fp IV wouldn't. - if ((NewPred == CmpInst::ICMP_EQ || NewPred == CmpInst::ICMP_NE) && - Leftover != 0) - return; - - // If the stride would wrap around the i32 before exiting, we can't - // transform the IV. - if (Leftover != 0 && int32_t(ExitValue+IncValue) > ExitValue) - return; - } - - const IntegerType *Int32Ty = Type::getInt32Ty(PN->getContext()); - - // Insert new integer induction variable. - PHINode *NewPHI = PHINode::Create(Int32Ty, 2, PN->getName()+".int", PN); - NewPHI->addIncoming(ConstantInt::get(Int32Ty, InitValue), - PN->getIncomingBlock(IncomingEdge)); - - Value *NewAdd = - BinaryOperator::CreateAdd(NewPHI, ConstantInt::get(Int32Ty, IncValue), - Incr->getName()+".int", Incr); - NewPHI->addIncoming(NewAdd, PN->getIncomingBlock(BackEdge)); - - ICmpInst *NewCompare = new ICmpInst(TheBr, NewPred, NewAdd, - ConstantInt::get(Int32Ty, ExitValue), - Compare->getName()); - - // In the following deletions, PN may become dead and may be deleted. - // Use a WeakVH to observe whether this happens. - WeakVH WeakPH = PN; - - // Delete the old floating point exit comparison. The branch starts using the - // new comparison. - NewCompare->takeName(Compare); - Compare->replaceAllUsesWith(NewCompare); - RecursivelyDeleteTriviallyDeadInstructions(Compare); - - // Delete the old floating point increment. - Incr->replaceAllUsesWith(UndefValue::get(Incr->getType())); - RecursivelyDeleteTriviallyDeadInstructions(Incr); - - // If the FP induction variable still has uses, this is because something else - // in the loop uses its value. In order to canonicalize the induction - // variable, we chose to eliminate the IV and rewrite it in terms of an - // int->fp cast. - // - // We give preference to sitofp over uitofp because it is faster on most - // platforms. - if (WeakPH) { - Value *Conv = new SIToFPInst(NewPHI, PN->getType(), "indvar.conv", - PN->getParent()->getFirstNonPHI()); - PN->replaceAllUsesWith(Conv); - RecursivelyDeleteTriviallyDeadInstructions(PN); - } - - // Add a new IVUsers entry for the newly-created integer PHI. - if (IU) - IU->AddUsersIfInteresting(NewPHI); -} diff --git a/lib/Transforms/Scalar/LICM.cpp b/lib/Transforms/Scalar/LICM.cpp index 13bd022..66add6c 100644 --- a/lib/Transforms/Scalar/LICM.cpp +++ b/lib/Transforms/Scalar/LICM.cpp @@ -178,7 +178,7 @@ INITIALIZE_PASS_END(LICM, "licm", "Loop Invariant Code Motion", false, false) Pass *llvm::createLICMPass() { return new LICM(); } /// Hoist expressions out of the specified loop. Note, alias info for inner -/// loop is not preserved so it is not a good idea to run LICM multiple +/// loop is not preserved so it is not a good idea to run LICM multiple /// times on one loop. /// bool LICM::runOnLoop(Loop *L, LPPassManager &LPM) { @@ -199,13 +199,13 @@ bool LICM::runOnLoop(Loop *L, LPPassManager &LPM) { // What if InnerLoop was modified by other passes ? CurAST->add(*InnerAST); - + // Once we've incorporated the inner loop's AST into ours, we don't need the // subloop's anymore. delete InnerAST; LoopToAliasSetMap.erase(InnerL); } - + CurLoop = L; // Get the preheader block to move instructions into... @@ -245,7 +245,7 @@ bool LICM::runOnLoop(Loop *L, LPPassManager &LPM) { I != E; ++I) PromoteAliasSet(*I); } - + // Clear out loops state information for the next iteration CurLoop = 0; Preheader = 0; @@ -283,7 +283,7 @@ void LICM::SinkRegion(DomTreeNode *N) { for (BasicBlock::iterator II = BB->end(); II != BB->begin(); ) { Instruction &I = *--II; - + // If the instruction is dead, we would try to sink it because it isn't used // in the loop, instead, just delete it. if (isInstructionTriviallyDead(&I)) { @@ -336,7 +336,7 @@ void LICM::HoistRegion(DomTreeNode *N) { I.eraseFromParent(); continue; } - + // Try hoisting the instruction out to the preheader. We can only do this // if all of the operands of the instruction are loop invariant and if it // is safe to hoist the instruction. @@ -364,7 +364,7 @@ bool LICM::canSinkOrHoistInst(Instruction &I) { // in the same alias set as something that ends up being modified. if (AA->pointsToConstantMemory(LI->getOperand(0))) return true; - + // Don't hoist loads which have may-aliased stores in loop. uint64_t Size = 0; if (LI->getType()->isSized()) @@ -470,7 +470,7 @@ void LICM::sink(Instruction &I) { } return; } - + if (ExitBlocks.empty()) { // The instruction is actually dead if there ARE NO exit blocks. CurAST->deleteValue(&I); @@ -482,30 +482,30 @@ void LICM::sink(Instruction &I) { I.eraseFromParent(); return; } - + // Otherwise, if we have multiple exits, use the SSAUpdater to do all of the // hard work of inserting PHI nodes as necessary. SmallVector<PHINode*, 8> NewPHIs; SSAUpdater SSA(&NewPHIs); - + if (!I.use_empty()) SSA.Initialize(I.getType(), I.getName()); - + // Insert a copy of the instruction in each exit block of the loop that is // dominated by the instruction. Each exit block is known to only be in the // ExitBlocks list once. BasicBlock *InstOrigBB = I.getParent(); unsigned NumInserted = 0; - + for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i) { BasicBlock *ExitBlock = ExitBlocks[i]; - + if (!DT->dominates(InstOrigBB, ExitBlock)) continue; - + // Insert the code after the last PHI node. BasicBlock::iterator InsertPt = ExitBlock->getFirstNonPHI(); - + // If this is the first exit block processed, just move the original // instruction, otherwise clone the original instruction and insert // the copy. @@ -519,12 +519,12 @@ void LICM::sink(Instruction &I) { New->setName(I.getName()+".le"); ExitBlock->getInstList().insert(InsertPt, New); } - + // Now that we have inserted the instruction, inform SSAUpdater. if (!I.use_empty()) SSA.AddAvailableValue(ExitBlock, New); } - + // If the instruction doesn't dominate any exit blocks, it must be dead. if (NumInserted == 0) { CurAST->deleteValue(&I); @@ -533,7 +533,7 @@ void LICM::sink(Instruction &I) { I.eraseFromParent(); return; } - + // Next, rewrite uses of the instruction, inserting PHI nodes as needed. for (Value::use_iterator UI = I.use_begin(), UE = I.use_end(); UI != UE; ) { // Grab the use before incrementing the iterator. @@ -542,12 +542,12 @@ void LICM::sink(Instruction &I) { ++UI; SSA.RewriteUseAfterInsertions(U); } - + // Update CurAST for NewPHIs if I had pointer type. if (I.getType()->isPointerTy()) for (unsigned i = 0, e = NewPHIs.size(); i != e; ++i) CurAST->copyValue(&I, NewPHIs[i]); - + // Finally, remove the instruction from CurAST. It is no longer in the loop. CurAST->deleteValue(&I); } @@ -606,15 +606,17 @@ namespace { SmallVectorImpl<BasicBlock*> &LoopExitBlocks; AliasSetTracker &AST; DebugLoc DL; + int Alignment; public: LoopPromoter(Value *SP, const SmallVectorImpl<Instruction*> &Insts, SSAUpdater &S, SmallPtrSet<Value*, 4> &PMA, SmallVectorImpl<BasicBlock*> &LEB, AliasSetTracker &ast, - DebugLoc dl) - : LoadAndStorePromoter(Insts, S, 0, 0), SomePtr(SP), - PointerMustAliases(PMA), LoopExitBlocks(LEB), AST(ast), DL(dl) {} - + DebugLoc dl, int alignment) + : LoadAndStorePromoter(Insts, S), SomePtr(SP), + PointerMustAliases(PMA), LoopExitBlocks(LEB), AST(ast), DL(dl), + Alignment(alignment) {} + virtual bool isInstInList(Instruction *I, const SmallVectorImpl<Instruction*> &) const { Value *Ptr; @@ -624,7 +626,7 @@ namespace { Ptr = cast<StoreInst>(I)->getPointerOperand(); return PointerMustAliases.count(Ptr); } - + virtual void doExtraRewritesBeforeFinalDeletion() const { // Insert stores after in the loop exit blocks. Each exit block gets a // store of the live-out values that feed them. Since we've already told @@ -635,6 +637,7 @@ namespace { Value *LiveInValue = SSA.GetValueInMiddleOfBlock(ExitBlock); Instruction *InsertPos = ExitBlock->getFirstNonPHI(); StoreInst *NewSI = new StoreInst(LiveInValue, SomePtr, InsertPos); + NewSI->setAlignment(Alignment); NewSI->setDebugLoc(DL); } } @@ -661,7 +664,7 @@ void LICM::PromoteAliasSet(AliasSet &AS) { if (AS.isForwardingAliasSet() || !AS.isMod() || !AS.isMustAlias() || AS.isVolatile() || !CurLoop->isLoopInvariant(AS.begin()->getValue())) return; - + assert(!AS.empty() && "Must alias set should have at least one pointer element in it!"); Value *SomePtr = AS.begin()->getValue(); @@ -676,60 +679,78 @@ void LICM::PromoteAliasSet(AliasSet &AS) { // tmp = *P; for () { if (c) tmp +=1; } *P = tmp; // // is not safe, because *P may only be valid to access if 'c' is true. - // + // // It is safe to promote P if all uses are direct load/stores and if at // least one is guaranteed to be executed. bool GuaranteedToExecute = false; - + SmallVector<Instruction*, 64> LoopUses; SmallPtrSet<Value*, 4> PointerMustAliases; + // We start with an alignment of one and try to find instructions that allow + // us to prove better alignment. + unsigned Alignment = 1; + // Check that all of the pointers in the alias set have the same type. We // cannot (yet) promote a memory location that is loaded and stored in // different sizes. for (AliasSet::iterator ASI = AS.begin(), E = AS.end(); ASI != E; ++ASI) { Value *ASIV = ASI->getValue(); PointerMustAliases.insert(ASIV); - + // Check that all of the pointers in the alias set have the same type. We // cannot (yet) promote a memory location that is loaded and stored in // different sizes. if (SomePtr->getType() != ASIV->getType()) return; - + for (Value::use_iterator UI = ASIV->use_begin(), UE = ASIV->use_end(); UI != UE; ++UI) { // Ignore instructions that are outside the loop. Instruction *Use = dyn_cast<Instruction>(*UI); if (!Use || !CurLoop->contains(Use)) continue; - + // If there is an non-load/store instruction in the loop, we can't promote // it. - if (isa<LoadInst>(Use)) + unsigned InstAlignment; + if (LoadInst *load = dyn_cast<LoadInst>(Use)) { assert(!cast<LoadInst>(Use)->isVolatile() && "AST broken"); - else if (isa<StoreInst>(Use)) { + InstAlignment = load->getAlignment(); + } else if (StoreInst *store = dyn_cast<StoreInst>(Use)) { // Stores *of* the pointer are not interesting, only stores *to* the // pointer. if (Use->getOperand(1) != ASIV) continue; + InstAlignment = store->getAlignment(); assert(!cast<StoreInst>(Use)->isVolatile() && "AST broken"); } else return; // Not a load or store. - + + // If the alignment of this instruction allows us to specify a more + // restrictive (and performant) alignment and if we are sure this + // instruction will be executed, update the alignment. + // Larger is better, with the exception of 0 being the best alignment. + if ((InstAlignment > Alignment || InstAlignment == 0) + && (Alignment != 0)) + if (isSafeToExecuteUnconditionally(*Use)) { + GuaranteedToExecute = true; + Alignment = InstAlignment; + } + if (!GuaranteedToExecute) GuaranteedToExecute = isSafeToExecuteUnconditionally(*Use); - + LoopUses.push_back(Use); } } - + // If there isn't a guaranteed-to-execute instruction, we can't promote. if (!GuaranteedToExecute) return; - + // Otherwise, this is safe to promote, lets do it! - DEBUG(dbgs() << "LICM: Promoting value stored to in loop: " <<*SomePtr<<'\n'); + DEBUG(dbgs() << "LICM: Promoting value stored to in loop: " <<*SomePtr<<'\n'); Changed = true; ++NumPromoted; @@ -741,18 +762,19 @@ void LICM::PromoteAliasSet(AliasSet &AS) { SmallVector<BasicBlock*, 8> ExitBlocks; CurLoop->getUniqueExitBlocks(ExitBlocks); - + // We use the SSAUpdater interface to insert phi nodes as required. SmallVector<PHINode*, 16> NewPHIs; SSAUpdater SSA(&NewPHIs); LoopPromoter Promoter(SomePtr, LoopUses, SSA, PointerMustAliases, ExitBlocks, - *CurAST, DL); - + *CurAST, DL, Alignment); + // Set up the preheader to have a definition of the value. It is the live-out // value from the preheader that uses in the loop will use. LoadInst *PreheaderLoad = new LoadInst(SomePtr, SomePtr->getName()+".promoted", Preheader->getTerminator()); + PreheaderLoad->setAlignment(Alignment); PreheaderLoad->setDebugLoc(DL); SSA.AddAvailableValue(Preheader, PreheaderLoad); diff --git a/lib/Transforms/Scalar/LoopIdiomRecognize.cpp b/lib/Transforms/Scalar/LoopIdiomRecognize.cpp index a7bc0e0..a0e41d9 100644 --- a/lib/Transforms/Scalar/LoopIdiomRecognize.cpp +++ b/lib/Transforms/Scalar/LoopIdiomRecognize.cpp @@ -173,6 +173,11 @@ static void deleteIfDeadInstruction(Value *V, ScalarEvolution &SE) { bool LoopIdiomRecognize::runOnLoop(Loop *L, LPPassManager &LPM) { CurLoop = L; + // Disable loop idiom recognition if the function's name is a common idiom. + StringRef Name = L->getHeader()->getParent()->getName(); + if (Name == "memset" || Name == "memcpy") + return false; + // The trip count of the loop must be analyzable. SE = &getAnalysis<ScalarEvolution>(); if (!SE->hasLoopInvariantBackedgeTakenCount(L)) diff --git a/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/lib/Transforms/Scalar/LoopStrengthReduce.cpp index c6ca99a..509d026 100644 --- a/lib/Transforms/Scalar/LoopStrengthReduce.cpp +++ b/lib/Transforms/Scalar/LoopStrengthReduce.cpp @@ -2767,7 +2767,7 @@ void LSRInstance::GenerateCrossUseConstantOffsets() { // value to the immediate would produce a value closer to zero than the // immediate itself, then the formula isn't worthwhile. if (const SCEVConstant *C = dyn_cast<SCEVConstant>(NewF.ScaledReg)) - if (C->getValue()->getValue().isNegative() != + if (C->getValue()->isNegative() != (NewF.AM.BaseOffs < 0) && (C->getValue()->getValue().abs() * APInt(BitWidth, F.AM.Scale)) .ule(abs64(NewF.AM.BaseOffs))) diff --git a/lib/Transforms/Scalar/MemCpyOptimizer.cpp b/lib/Transforms/Scalar/MemCpyOptimizer.cpp index bd4c2d6..7ed3db6 100644 --- a/lib/Transforms/Scalar/MemCpyOptimizer.cpp +++ b/lib/Transforms/Scalar/MemCpyOptimizer.cpp @@ -840,11 +840,11 @@ bool MemCpyOpt::processMemMove(MemMoveInst *M) { // If not, then we know we can transform this. Module *Mod = M->getParent()->getParent()->getParent(); - const Type *ArgTys[3] = { M->getRawDest()->getType(), - M->getRawSource()->getType(), - M->getLength()->getType() }; + Type *ArgTys[3] = { M->getRawDest()->getType(), + M->getRawSource()->getType(), + M->getLength()->getType() }; M->setCalledFunction(Intrinsic::getDeclaration(Mod, Intrinsic::memcpy, - ArgTys, 3)); + ArgTys)); // MemDep may have over conservative information about this instruction, just // conservatively flush it from the cache. diff --git a/lib/Transforms/Scalar/ObjCARC.cpp b/lib/Transforms/Scalar/ObjCARC.cpp index 89a451e..ee132d3 100644 --- a/lib/Transforms/Scalar/ObjCARC.cpp +++ b/lib/Transforms/Scalar/ObjCARC.cpp @@ -1498,8 +1498,8 @@ void ObjCARCOpt::getAnalysisUsage(AnalysisUsage &AU) const { Constant *ObjCARCOpt::getRetainRVCallee(Module *M) { if (!RetainRVCallee) { LLVMContext &C = M->getContext(); - const Type *I8X = PointerType::getUnqual(Type::getInt8Ty(C)); - std::vector<const Type *> Params; + Type *I8X = PointerType::getUnqual(Type::getInt8Ty(C)); + std::vector<Type *> Params; Params.push_back(I8X); const FunctionType *FTy = FunctionType::get(I8X, Params, /*isVarArg=*/false); @@ -1515,8 +1515,8 @@ Constant *ObjCARCOpt::getRetainRVCallee(Module *M) { Constant *ObjCARCOpt::getAutoreleaseRVCallee(Module *M) { if (!AutoreleaseRVCallee) { LLVMContext &C = M->getContext(); - const Type *I8X = PointerType::getUnqual(Type::getInt8Ty(C)); - std::vector<const Type *> Params; + Type *I8X = PointerType::getUnqual(Type::getInt8Ty(C)); + std::vector<Type *> Params; Params.push_back(I8X); const FunctionType *FTy = FunctionType::get(I8X, Params, /*isVarArg=*/false); @@ -1532,7 +1532,7 @@ Constant *ObjCARCOpt::getAutoreleaseRVCallee(Module *M) { Constant *ObjCARCOpt::getReleaseCallee(Module *M) { if (!ReleaseCallee) { LLVMContext &C = M->getContext(); - std::vector<const Type *> Params; + std::vector<Type *> Params; Params.push_back(PointerType::getUnqual(Type::getInt8Ty(C))); AttrListPtr Attributes; Attributes.addAttr(~0u, Attribute::NoUnwind); @@ -1548,7 +1548,7 @@ Constant *ObjCARCOpt::getReleaseCallee(Module *M) { Constant *ObjCARCOpt::getRetainCallee(Module *M) { if (!RetainCallee) { LLVMContext &C = M->getContext(); - std::vector<const Type *> Params; + std::vector<Type *> Params; Params.push_back(PointerType::getUnqual(Type::getInt8Ty(C))); AttrListPtr Attributes; Attributes.addAttr(~0u, Attribute::NoUnwind); @@ -1564,7 +1564,7 @@ Constant *ObjCARCOpt::getRetainCallee(Module *M) { Constant *ObjCARCOpt::getAutoreleaseCallee(Module *M) { if (!AutoreleaseCallee) { LLVMContext &C = M->getContext(); - std::vector<const Type *> Params; + std::vector<Type *> Params; Params.push_back(PointerType::getUnqual(Type::getInt8Ty(C))); AttrListPtr Attributes; Attributes.addAttr(~0u, Attribute::NoUnwind); @@ -3269,9 +3269,9 @@ void ObjCARCContract::getAnalysisUsage(AnalysisUsage &AU) const { Constant *ObjCARCContract::getStoreStrongCallee(Module *M) { if (!StoreStrongCallee) { LLVMContext &C = M->getContext(); - const Type *I8X = PointerType::getUnqual(Type::getInt8Ty(C)); - const Type *I8XX = PointerType::getUnqual(I8X); - std::vector<const Type *> Params; + Type *I8X = PointerType::getUnqual(Type::getInt8Ty(C)); + Type *I8XX = PointerType::getUnqual(I8X); + std::vector<Type *> Params; Params.push_back(I8XX); Params.push_back(I8X); @@ -3291,8 +3291,8 @@ Constant *ObjCARCContract::getStoreStrongCallee(Module *M) { Constant *ObjCARCContract::getRetainAutoreleaseCallee(Module *M) { if (!RetainAutoreleaseCallee) { LLVMContext &C = M->getContext(); - const Type *I8X = PointerType::getUnqual(Type::getInt8Ty(C)); - std::vector<const Type *> Params; + Type *I8X = PointerType::getUnqual(Type::getInt8Ty(C)); + std::vector<Type *> Params; Params.push_back(I8X); const FunctionType *FTy = FunctionType::get(I8X, Params, /*isVarArg=*/false); @@ -3307,8 +3307,8 @@ Constant *ObjCARCContract::getRetainAutoreleaseCallee(Module *M) { Constant *ObjCARCContract::getRetainAutoreleaseRVCallee(Module *M) { if (!RetainAutoreleaseRVCallee) { LLVMContext &C = M->getContext(); - const Type *I8X = PointerType::getUnqual(Type::getInt8Ty(C)); - std::vector<const Type *> Params; + Type *I8X = PointerType::getUnqual(Type::getInt8Ty(C)); + std::vector<Type *> Params; Params.push_back(I8X); const FunctionType *FTy = FunctionType::get(I8X, Params, /*isVarArg=*/false); @@ -3421,7 +3421,7 @@ void ObjCARCContract::ContractRelease(Instruction *Release, Args[1] = new BitCastInst(Args[1], I8X, "", Store); CallInst *StoreStrong = CallInst::Create(getStoreStrongCallee(BB->getParent()->getParent()), - Args, array_endof(Args), "", Store); + Args, "", Store); StoreStrong->setDoesNotThrow(); StoreStrong->setDebugLoc(Store->getDebugLoc()); diff --git a/lib/Transforms/Scalar/Reassociate.cpp b/lib/Transforms/Scalar/Reassociate.cpp index c1dfe15..e6341ae 100644 --- a/lib/Transforms/Scalar/Reassociate.cpp +++ b/lib/Transforms/Scalar/Reassociate.cpp @@ -812,7 +812,7 @@ Value *Reassociate::OptimizeAdd(Instruction *I, // because we can percolate the negate out. Watch for minint, which // cannot be positivified. if (ConstantInt *CI = dyn_cast<ConstantInt>(Factor)) - if (CI->getValue().isNegative() && !CI->getValue().isMinSignedValue()) { + if (CI->isNegative() && !CI->isMinValue(true)) { Factor = ConstantInt::get(CI->getContext(), -CI->getValue()); assert(!Duplicates.count(Factor) && "Shouldn't have two constant factors, missed a canonicalize"); diff --git a/lib/Transforms/Scalar/Scalar.cpp b/lib/Transforms/Scalar/Scalar.cpp index 158d653..302c287 100644 --- a/lib/Transforms/Scalar/Scalar.cpp +++ b/lib/Transforms/Scalar/Scalar.cpp @@ -48,6 +48,7 @@ void llvm::initializeScalarOpts(PassRegistry &Registry) { initializeLoopUnswitchPass(Registry); initializeLoopIdiomRecognizePass(Registry); initializeLowerAtomicPass(Registry); + initializeLowerExpectIntrinsicPass(Registry); initializeMemCpyOptPass(Registry); initializeObjCARCAliasAnalysisPass(Registry); initializeObjCARCExpandPass(Registry); diff --git a/lib/Transforms/Scalar/ScalarReplAggregates.cpp b/lib/Transforms/Scalar/ScalarReplAggregates.cpp index 87e364d..7d6349c 100644 --- a/lib/Transforms/Scalar/ScalarReplAggregates.cpp +++ b/lib/Transforms/Scalar/ScalarReplAggregates.cpp @@ -30,6 +30,7 @@ #include "llvm/LLVMContext.h" #include "llvm/Module.h" #include "llvm/Pass.h" +#include "llvm/Analysis/DebugInfo.h" #include "llvm/Analysis/DIBuilder.h" #include "llvm/Analysis/Dominators.h" #include "llvm/Analysis/Loads.h" @@ -1094,16 +1095,37 @@ bool SROA::runOnFunction(Function &F) { namespace { class AllocaPromoter : public LoadAndStorePromoter { AllocaInst *AI; + DIBuilder *DIB; + SmallVector<DbgDeclareInst *, 4> DDIs; + SmallVector<DbgValueInst *, 4> DVIs; public: AllocaPromoter(const SmallVectorImpl<Instruction*> &Insts, SSAUpdater &S, - DbgDeclareInst *DD, DIBuilder *&DB) - : LoadAndStorePromoter(Insts, S, DD, DB), AI(0) {} + DIBuilder *DB) + : LoadAndStorePromoter(Insts, S), AI(0), DIB(DB) {} void run(AllocaInst *AI, const SmallVectorImpl<Instruction*> &Insts) { // Remember which alloca we're promoting (for isInstInList). this->AI = AI; + if (MDNode *DebugNode = MDNode::getIfExists(AI->getContext(), AI)) + for (Value::use_iterator UI = DebugNode->use_begin(), + E = DebugNode->use_end(); UI != E; ++UI) + if (DbgDeclareInst *DDI = dyn_cast<DbgDeclareInst>(*UI)) + DDIs.push_back(DDI); + else if (DbgValueInst *DVI = dyn_cast<DbgValueInst>(*UI)) + DVIs.push_back(DVI); + LoadAndStorePromoter::run(Insts); AI->eraseFromParent(); + for (SmallVector<DbgDeclareInst *, 4>::iterator I = DDIs.begin(), + E = DDIs.end(); I != E; ++I) { + DbgDeclareInst *DDI = *I; + DDI->eraseFromParent(); + } + for (SmallVector<DbgValueInst *, 4>::iterator I = DVIs.begin(), + E = DVIs.end(); I != E; ++I) { + DbgValueInst *DVI = *I; + DVI->eraseFromParent(); + } } virtual bool isInstInList(Instruction *I, @@ -1112,6 +1134,45 @@ public: return LI->getOperand(0) == AI; return cast<StoreInst>(I)->getPointerOperand() == AI; } + + virtual void updateDebugInfo(Instruction *Inst) const { + for (SmallVector<DbgDeclareInst *, 4>::const_iterator I = DDIs.begin(), + E = DDIs.end(); I != E; ++I) { + DbgDeclareInst *DDI = *I; + if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) + ConvertDebugDeclareToDebugValue(DDI, SI, *DIB); + else if (LoadInst *LI = dyn_cast<LoadInst>(Inst)) + ConvertDebugDeclareToDebugValue(DDI, LI, *DIB); + } + for (SmallVector<DbgValueInst *, 4>::const_iterator I = DVIs.begin(), + E = DVIs.end(); I != E; ++I) { + DbgValueInst *DVI = *I; + if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) { + Instruction *DbgVal = NULL; + // If an argument is zero extended then use argument directly. The ZExt + // may be zapped by an optimization pass in future. + Argument *ExtendedArg = NULL; + if (ZExtInst *ZExt = dyn_cast<ZExtInst>(SI->getOperand(0))) + ExtendedArg = dyn_cast<Argument>(ZExt->getOperand(0)); + if (SExtInst *SExt = dyn_cast<SExtInst>(SI->getOperand(0))) + ExtendedArg = dyn_cast<Argument>(SExt->getOperand(0)); + if (ExtendedArg) + DbgVal = DIB->insertDbgValueIntrinsic(ExtendedArg, 0, + DIVariable(DVI->getVariable()), + SI); + else + DbgVal = DIB->insertDbgValueIntrinsic(SI->getOperand(0), 0, + DIVariable(DVI->getVariable()), + SI); + DbgVal->setDebugLoc(DVI->getDebugLoc()); + } else if (LoadInst *LI = dyn_cast<LoadInst>(Inst)) { + Instruction *DbgVal = + DIB->insertDbgValueIntrinsic(LI->getOperand(0), 0, + DIVariable(DVI->getVariable()), LI); + DbgVal->setDebugLoc(DVI->getDebugLoc()); + } + } + } }; } // end anon namespace @@ -1381,10 +1442,9 @@ bool SROA::performPromotion(Function &F) { DT = &getAnalysis<DominatorTree>(); BasicBlock &BB = F.getEntryBlock(); // Get the entry node for the function - + DIBuilder DIB(*F.getParent()); bool Changed = false; SmallVector<Instruction*, 64> Insts; - DIBuilder *DIB = 0; while (1) { Allocas.clear(); @@ -1408,11 +1468,7 @@ bool SROA::performPromotion(Function &F) { for (Value::use_iterator UI = AI->use_begin(), E = AI->use_end(); UI != E; ++UI) Insts.push_back(cast<Instruction>(*UI)); - - DbgDeclareInst *DDI = FindAllocaDbgDeclare(AI); - if (DDI && !DIB) - DIB = new DIBuilder(*AI->getParent()->getParent()->getParent()); - AllocaPromoter(Insts, SSA, DDI, DIB).run(AI, Insts); + AllocaPromoter(Insts, SSA, &DIB).run(AI, Insts); Insts.clear(); } } @@ -1420,10 +1476,6 @@ bool SROA::performPromotion(Function &F) { Changed = true; } - // FIXME: Is there a better way to handle the lazy initialization of DIB - // so that there doesn't need to be an explicit delete? - delete DIB; - return Changed; } diff --git a/lib/Transforms/Scalar/SimplifyCFGPass.cpp b/lib/Transforms/Scalar/SimplifyCFGPass.cpp index 7e9cc80..a66b3e3 100644 --- a/lib/Transforms/Scalar/SimplifyCFGPass.cpp +++ b/lib/Transforms/Scalar/SimplifyCFGPass.cpp @@ -91,8 +91,7 @@ static void ChangeToUnreachable(Instruction *I, bool UseLLVMTrap) { static void ChangeToCall(InvokeInst *II) { BasicBlock *BB = II->getParent(); SmallVector<Value*, 8> Args(II->op_begin(), II->op_end() - 3); - CallInst *NewCall = CallInst::Create(II->getCalledValue(), Args.begin(), - Args.end(), "", II); + CallInst *NewCall = CallInst::Create(II->getCalledValue(), Args, "", II); NewCall->takeName(II); NewCall->setCallingConv(II->getCallingConv()); NewCall->setAttributes(II->getAttributes()); diff --git a/lib/Transforms/Scalar/SimplifyLibCalls.cpp b/lib/Transforms/Scalar/SimplifyLibCalls.cpp index 6247b03..7c415e5 100644 --- a/lib/Transforms/Scalar/SimplifyLibCalls.cpp +++ b/lib/Transforms/Scalar/SimplifyLibCalls.cpp @@ -992,9 +992,9 @@ struct FFSOpt : public LibCallOptimization { } // ffs(x) -> x != 0 ? (i32)llvm.cttz(x)+1 : 0 - const Type *ArgType = Op->getType(); + Type *ArgType = Op->getType(); Value *F = Intrinsic::getDeclaration(Callee->getParent(), - Intrinsic::cttz, &ArgType, 1); + Intrinsic::cttz, ArgType); Value *V = B.CreateCall(F, Op, "cttz"); V = B.CreateAdd(V, ConstantInt::get(V->getType(), 1), "tmp"); V = B.CreateIntCast(V, B.getInt32Ty(), false, "tmp"); diff --git a/lib/Transforms/Utils/CMakeLists.txt b/lib/Transforms/Utils/CMakeLists.txt index 2df534f..204c2c6 100644 --- a/lib/Transforms/Utils/CMakeLists.txt +++ b/lib/Transforms/Utils/CMakeLists.txt @@ -14,6 +14,7 @@ add_llvm_library(LLVMTransformUtils Local.cpp LoopSimplify.cpp LoopUnroll.cpp + LowerExpectIntrinsic.cpp LowerInvoke.cpp LowerSwitch.cpp Mem2Reg.cpp diff --git a/lib/Transforms/Utils/CloneFunction.cpp b/lib/Transforms/Utils/CloneFunction.cpp index 561b69d..6ea831f 100644 --- a/lib/Transforms/Utils/CloneFunction.cpp +++ b/lib/Transforms/Utils/CloneFunction.cpp @@ -140,7 +140,7 @@ void llvm::CloneFunctionInto(Function *NewFunc, const Function *OldFunc, Function *llvm::CloneFunction(const Function *F, ValueToValueMapTy &VMap, bool ModuleLevelChanges, ClonedCodeInfo *CodeInfo) { - std::vector<const Type*> ArgTypes; + std::vector<Type*> ArgTypes; // The user might be deleting arguments to the function by specifying them in // the VMap. If so, we need to not add the arguments to the arg ty vector @@ -342,18 +342,6 @@ ConstantFoldMappedInstruction(const Instruction *I) { Ops.size(), TD); } -static DebugLoc -UpdateInlinedAtInfo(const DebugLoc &InsnDL, const DebugLoc &TheCallDL, - LLVMContext &Ctx) { - DebugLoc NewLoc = TheCallDL; - if (MDNode *IA = InsnDL.getInlinedAt(Ctx)) - NewLoc = UpdateInlinedAtInfo(DebugLoc::getFromDILocation(IA), TheCallDL, - Ctx); - - return DebugLoc::get(InsnDL.getLine(), InsnDL.getCol(), - InsnDL.getScope(Ctx), NewLoc.getAsMDNode(Ctx)); -} - /// CloneAndPruneFunctionInto - This works exactly like CloneFunctionInto, /// except that it does some simple constant prop and DCE on the fly. The /// effect of this is to copy significantly less code in cases where (for @@ -418,50 +406,14 @@ void llvm::CloneAndPruneFunctionInto(Function *NewFunc, const Function *OldFunc, if (PHINode *PN = dyn_cast<PHINode>(I)) { // Skip over all PHI nodes, remembering them for later. BasicBlock::const_iterator OldI = BI->begin(); - for (; (PN = dyn_cast<PHINode>(I)); ++I, ++OldI) { - if (I->hasMetadata()) { - if (!TheCallDL.isUnknown()) { - DebugLoc IDL = I->getDebugLoc(); - if (!IDL.isUnknown()) { - DebugLoc NewDL = UpdateInlinedAtInfo(IDL, TheCallDL, - I->getContext()); - I->setDebugLoc(NewDL); - } - } else { - // The cloned instruction has dbg info but the call instruction - // does not have dbg info. Remove dbg info from cloned instruction. - I->setDebugLoc(DebugLoc()); - } - } + for (; (PN = dyn_cast<PHINode>(I)); ++I, ++OldI) PHIToResolve.push_back(cast<PHINode>(OldI)); - } } - // FIXME: - // FIXME: - // FIXME: Unclone all this metadata stuff. - // FIXME: - // FIXME: - // Otherwise, remap the rest of the instructions normally. - for (; I != NewBB->end(); ++I) { - if (I->hasMetadata()) { - if (!TheCallDL.isUnknown()) { - DebugLoc IDL = I->getDebugLoc(); - if (!IDL.isUnknown()) { - DebugLoc NewDL = UpdateInlinedAtInfo(IDL, TheCallDL, - I->getContext()); - I->setDebugLoc(NewDL); - } - } else { - // The cloned instruction has dbg info but the call instruction - // does not have dbg info. Remove dbg info from cloned instruction. - I->setDebugLoc(DebugLoc()); - } - } + for (; I != NewBB->end(); ++I) RemapInstruction(I, VMap, ModuleLevelChanges ? RF_None : RF_NoModuleLevelChanges); - } } // Defer PHI resolution until rest of function is resolved, PHI resolution diff --git a/lib/Transforms/Utils/CloneModule.cpp b/lib/Transforms/Utils/CloneModule.cpp index 1046c38..a08fa35 100644 --- a/lib/Transforms/Utils/CloneModule.cpp +++ b/lib/Transforms/Utils/CloneModule.cpp @@ -15,7 +15,6 @@ #include "llvm/Transforms/Utils/Cloning.h" #include "llvm/Module.h" #include "llvm/DerivedTypes.h" -#include "llvm/TypeSymbolTable.h" #include "llvm/Constant.h" #include "llvm/Transforms/Utils/ValueMapper.h" using namespace llvm; @@ -32,20 +31,13 @@ Module *llvm::CloneModule(const Module *M) { return CloneModule(M, VMap); } -Module *llvm::CloneModule(const Module *M, - ValueToValueMapTy &VMap) { - // First off, we need to create the new module... +Module *llvm::CloneModule(const Module *M, ValueToValueMapTy &VMap) { + // First off, we need to create the new module. Module *New = new Module(M->getModuleIdentifier(), M->getContext()); New->setDataLayout(M->getDataLayout()); New->setTargetTriple(M->getTargetTriple()); New->setModuleInlineAsm(M->getModuleInlineAsm()); - - // Copy all of the type symbol table entries over. - const TypeSymbolTable &TST = M->getTypeSymbolTable(); - for (TypeSymbolTable::const_iterator TI = TST.begin(), TE = TST.end(); - TI != TE; ++TI) - New->addTypeName(TI->first, TI->second); - + // Copy all of the dependent libraries over. for (Module::lib_iterator I = M->lib_begin(), E = M->lib_end(); I != E; ++I) New->addLibrary(*I); @@ -88,8 +80,7 @@ Module *llvm::CloneModule(const Module *M, I != E; ++I) { GlobalVariable *GV = cast<GlobalVariable>(VMap[I]); if (I->hasInitializer()) - GV->setInitializer(cast<Constant>(MapValue(I->getInitializer(), - VMap, RF_None))); + GV->setInitializer(MapValue(I->getInitializer(), VMap)); GV->setLinkage(I->getLinkage()); GV->setThreadLocal(I->isThreadLocal()); GV->setConstant(I->isConstant()); @@ -119,8 +110,8 @@ Module *llvm::CloneModule(const Module *M, I != E; ++I) { GlobalAlias *GA = cast<GlobalAlias>(VMap[I]); GA->setLinkage(I->getLinkage()); - if (const Constant* C = I->getAliasee()) - GA->setAliasee(cast<Constant>(MapValue(C, VMap, RF_None))); + if (const Constant *C = I->getAliasee()) + GA->setAliasee(MapValue(C, VMap)); } // And named metadata.... @@ -129,8 +120,7 @@ Module *llvm::CloneModule(const Module *M, const NamedMDNode &NMD = *I; NamedMDNode *NewNMD = New->getOrInsertNamedMetadata(NMD.getName()); for (unsigned i = 0, e = NMD.getNumOperands(); i != e; ++i) - NewNMD->addOperand(cast<MDNode>(MapValue(NMD.getOperand(i), VMap, - RF_None))); + NewNMD->addOperand(MapValue(NMD.getOperand(i), VMap)); } return New; diff --git a/lib/Transforms/Utils/CodeExtractor.cpp b/lib/Transforms/Utils/CodeExtractor.cpp index 8c133ea..0813523 100644 --- a/lib/Transforms/Utils/CodeExtractor.cpp +++ b/lib/Transforms/Utils/CodeExtractor.cpp @@ -258,7 +258,7 @@ Function *CodeExtractor::constructFunction(const Values &inputs, default: RetTy = Type::getInt16Ty(header->getContext()); break; } - std::vector<const Type*> paramTy; + std::vector<Type*> paramTy; // Add the types of the input values to the function's argument list for (Values::const_iterator i = inputs.begin(), @@ -279,7 +279,7 @@ Function *CodeExtractor::constructFunction(const Values &inputs, } DEBUG(dbgs() << "Function type: " << *RetTy << " f("); - for (std::vector<const Type*>::iterator i = paramTy.begin(), + for (std::vector<Type*>::iterator i = paramTy.begin(), e = paramTy.end(); i != e; ++i) DEBUG(dbgs() << **i << ", "); DEBUG(dbgs() << ")\n"); @@ -403,7 +403,7 @@ emitCallAndSwitchStatement(Function *newFunction, BasicBlock *codeReplacer, AllocaInst *Struct = 0; if (AggregateArgs && (inputs.size() + outputs.size() > 0)) { - std::vector<const Type*> ArgTypes; + std::vector<Type*> ArgTypes; for (Values::iterator v = StructValues.begin(), ve = StructValues.end(); v != ve; ++v) ArgTypes.push_back((*v)->getType()); @@ -429,7 +429,7 @@ emitCallAndSwitchStatement(Function *newFunction, BasicBlock *codeReplacer, } // Emit the call to the function - CallInst *call = CallInst::Create(newFunction, params.begin(), params.end(), + CallInst *call = CallInst::Create(newFunction, params, NumExitBlocks > 1 ? "targetBlock" : ""); codeReplacer->getInstList().push_back(call); diff --git a/lib/Transforms/Utils/InlineFunction.cpp b/lib/Transforms/Utils/InlineFunction.cpp index 18ecd61..d5b382e 100644 --- a/lib/Transforms/Utils/InlineFunction.cpp +++ b/lib/Transforms/Utils/InlineFunction.cpp @@ -450,9 +450,7 @@ static bool HandleCallsInBlockInlinedThroughInvoke(BasicBlock *BB, NewSelector.push_back(Outer->getArgOperand(i)); CallInst *NewInner = - IRBuilder<>(Inner).CreateCall(Inner->getCalledValue(), - NewSelector.begin(), - NewSelector.end()); + IRBuilder<>(Inner).CreateCall(Inner->getCalledValue(), NewSelector); // No need to copy attributes, calling convention, etc. NewInner->takeName(Inner); Inner->replaceAllUsesWith(NewInner); @@ -488,8 +486,7 @@ static bool HandleCallsInBlockInlinedThroughInvoke(BasicBlock *BB, InvokeInst *II = InvokeInst::Create(CI->getCalledValue(), Split, Invoke.getOuterUnwindDest(), - InvokeArgs.begin(), InvokeArgs.end(), - CI->getName(), BB); + InvokeArgs, CI->getName(), BB); II->setCallingConv(CI->getCallingConv()); II->setAttributes(CI->getAttributes()); @@ -663,7 +660,7 @@ static Value *HandleByValArgument(Value *Arg, Instruction *TheCall, LLVMContext &Context = Arg->getContext(); - const Type *VoidPtrTy = Type::getInt8PtrTy(Context); + Type *VoidPtrTy = Type::getInt8PtrTy(Context); // Create the alloca. If we have TargetData, use nice alignment. unsigned Align = 1; @@ -680,10 +677,10 @@ static Value *HandleByValArgument(Value *Arg, Instruction *TheCall, Value *NewAlloca = new AllocaInst(AggTy, 0, Align, Arg->getName(), &*Caller->begin()->begin()); // Emit a memcpy. - const Type *Tys[3] = {VoidPtrTy, VoidPtrTy, Type::getInt64Ty(Context)}; + Type *Tys[3] = {VoidPtrTy, VoidPtrTy, Type::getInt64Ty(Context)}; Function *MemCpyFn = Intrinsic::getDeclaration(Caller->getParent(), Intrinsic::memcpy, - Tys, 3); + Tys); Value *DestCast = new BitCastInst(NewAlloca, VoidPtrTy, "tmp", TheCall); Value *SrcCast = new BitCastInst(Arg, VoidPtrTy, "tmp", TheCall); @@ -702,7 +699,7 @@ static Value *HandleByValArgument(Value *Arg, Instruction *TheCall, ConstantInt::get(Type::getInt32Ty(Context), 1), ConstantInt::getFalse(Context) // isVolatile }; - IRBuilder<>(TheCall).CreateCall(MemCpyFn, CallArgs, CallArgs+5); + IRBuilder<>(TheCall).CreateCall(MemCpyFn, CallArgs); // Uses of the argument in the function should use our new alloca // instead. @@ -744,6 +741,41 @@ static bool hasLifetimeMarkers(AllocaInst *AI) { return false; } +/// updateInlinedAtInfo - Helper function used by fixupLineNumbers to recursively +/// update InlinedAtEntry of a DebugLoc. +static DebugLoc updateInlinedAtInfo(const DebugLoc &DL, + const DebugLoc &InlinedAtDL, + LLVMContext &Ctx) { + if (MDNode *IA = DL.getInlinedAt(Ctx)) { + DebugLoc NewInlinedAtDL + = updateInlinedAtInfo(DebugLoc::getFromDILocation(IA), InlinedAtDL, Ctx); + return DebugLoc::get(DL.getLine(), DL.getCol(), DL.getScope(Ctx), + NewInlinedAtDL.getAsMDNode(Ctx)); + } + + return DebugLoc::get(DL.getLine(), DL.getCol(), DL.getScope(Ctx), + InlinedAtDL.getAsMDNode(Ctx)); +} + + +/// fixupLineNumbers - Update inlined instructions' line numbers to +/// to encode location where these instructions are inlined. +static void fixupLineNumbers(Function *Fn, Function::iterator FI, + Instruction *TheCall) { + DebugLoc TheCallDL = TheCall->getDebugLoc(); + if (TheCallDL.isUnknown()) + return; + + for (; FI != Fn->end(); ++FI) { + for (BasicBlock::iterator BI = FI->begin(), BE = FI->end(); + BI != BE; ++BI) { + DebugLoc DL = BI->getDebugLoc(); + if (!DL.isUnknown()) + BI->setDebugLoc(updateInlinedAtInfo(DL, TheCallDL, BI->getContext())); + } + } +} + // InlineFunction - This function inlines the called function into the basic // block of the caller. This returns false if it is not possible to inline this // call. The program is still in a well defined state if this occurs though. @@ -846,6 +878,9 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI) { // Update the callgraph if requested. if (IFI.CG) UpdateCallGraphAfterInlining(CS, FirstNewBlock, VMap, IFI); + + // Update inlined instructions' line number information. + fixupLineNumbers(Caller, FirstNewBlock, TheCall); } // If there are any alloca instructions in the block that used to be the entry diff --git a/lib/Transforms/Utils/LowerExpectIntrinsic.cpp b/lib/Transforms/Utils/LowerExpectIntrinsic.cpp new file mode 100644 index 0000000..c1213fa --- /dev/null +++ b/lib/Transforms/Utils/LowerExpectIntrinsic.cpp @@ -0,0 +1,166 @@ +#define DEBUG_TYPE "lower-expect-intrinsic" +#include "llvm/Constants.h" +#include "llvm/Function.h" +#include "llvm/BasicBlock.h" +#include "llvm/LLVMContext.h" +#include "llvm/Instructions.h" +#include "llvm/Intrinsics.h" +#include "llvm/Metadata.h" +#include "llvm/Pass.h" +#include "llvm/Transforms/Scalar.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/ADT/Statistic.h" +#include <vector> + +using namespace llvm; + +STATISTIC(IfHandled, "Number of 'expect' intrinsic intructions handled"); + +static cl::opt<uint32_t> +LikelyBranchWeight("likely-branch-weight", cl::Hidden, cl::init(64), + cl::desc("Weight of the branch likely to be taken (default = 64)")); +static cl::opt<uint32_t> +UnlikelyBranchWeight("unlikely-branch-weight", cl::Hidden, cl::init(4), + cl::desc("Weight of the branch unlikely to be taken (default = 4)")); + +namespace { + + class LowerExpectIntrinsic : public FunctionPass { + + bool HandleSwitchExpect(SwitchInst *SI); + + bool HandleIfExpect(BranchInst *BI); + + public: + static char ID; + LowerExpectIntrinsic() : FunctionPass(ID) { + initializeLowerExpectIntrinsicPass(*PassRegistry::getPassRegistry()); + } + + bool runOnFunction(Function &F); + }; +} + + +bool LowerExpectIntrinsic::HandleSwitchExpect(SwitchInst *SI) { + CallInst *CI = dyn_cast<CallInst>(SI->getCondition()); + if (!CI) + return false; + + Function *Fn = CI->getCalledFunction(); + if (!Fn || Fn->getIntrinsicID() != Intrinsic::expect) + return false; + + Value *ArgValue = CI->getArgOperand(0); + ConstantInt *ExpectedValue = dyn_cast<ConstantInt>(CI->getArgOperand(1)); + if (!ExpectedValue) + return false; + + LLVMContext &Context = CI->getContext(); + const Type *Int32Ty = Type::getInt32Ty(Context); + + unsigned caseNo = SI->findCaseValue(ExpectedValue); + std::vector<Value *> Vec; + unsigned n = SI->getNumCases(); + Vec.resize(n + 1); // +1 for MDString + + Vec[0] = MDString::get(Context, "branch_weights"); + for (unsigned i = 0; i < n; ++i) { + Vec[i + 1] = ConstantInt::get(Int32Ty, i == caseNo ? LikelyBranchWeight : UnlikelyBranchWeight); + } + + MDNode *WeightsNode = llvm::MDNode::get(Context, Vec); + SI->setMetadata(LLVMContext::MD_prof, WeightsNode); + + SI->setCondition(ArgValue); + return true; +} + + +bool LowerExpectIntrinsic::HandleIfExpect(BranchInst *BI) { + if (BI->isUnconditional()) + return false; + + // Handle non-optimized IR code like: + // %expval = call i64 @llvm.expect.i64.i64(i64 %conv1, i64 1) + // %tobool = icmp ne i64 %expval, 0 + // br i1 %tobool, label %if.then, label %if.end + + ICmpInst *CmpI = dyn_cast<ICmpInst>(BI->getCondition()); + if (!CmpI || CmpI->getPredicate() != CmpInst::ICMP_NE) + return false; + + CallInst *CI = dyn_cast<CallInst>(CmpI->getOperand(0)); + if (!CI) + return false; + + Function *Fn = CI->getCalledFunction(); + if (!Fn || Fn->getIntrinsicID() != Intrinsic::expect) + return false; + + Value *ArgValue = CI->getArgOperand(0); + ConstantInt *ExpectedValue = dyn_cast<ConstantInt>(CI->getArgOperand(1)); + if (!ExpectedValue) + return false; + + LLVMContext &Context = CI->getContext(); + const Type *Int32Ty = Type::getInt32Ty(Context); + bool Likely = ExpectedValue->isOne(); + + // If expect value is equal to 1 it means that we are more likely to take + // branch 0, in other case more likely is branch 1. + Value *Ops[] = { + MDString::get(Context, "branch_weights"), + ConstantInt::get(Int32Ty, Likely ? LikelyBranchWeight : UnlikelyBranchWeight), + ConstantInt::get(Int32Ty, Likely ? UnlikelyBranchWeight : LikelyBranchWeight) + }; + + MDNode *WeightsNode = MDNode::get(Context, Ops); + BI->setMetadata(LLVMContext::MD_prof, WeightsNode); + + CmpI->setOperand(0, ArgValue); + return true; +} + + +bool LowerExpectIntrinsic::runOnFunction(Function &F) { + for (Function::iterator I = F.begin(), E = F.end(); I != E;) { + BasicBlock *BB = I++; + + // Create "block_weights" metadata. + if (BranchInst *BI = dyn_cast<BranchInst>(BB->getTerminator())) { + if (HandleIfExpect(BI)) + IfHandled++; + } else if (SwitchInst *SI = dyn_cast<SwitchInst>(BB->getTerminator())) { + if (HandleSwitchExpect(SI)) + IfHandled++; + } + + // remove llvm.expect intrinsics. + for (BasicBlock::iterator BI = BB->begin(), BE = BB->end(); + BI != BE; ) { + CallInst *CI = dyn_cast<CallInst>(BI++); + if (!CI) + continue; + + Function *Fn = CI->getCalledFunction(); + if (Fn && Fn->getIntrinsicID() == Intrinsic::expect) { + Value *Exp = CI->getArgOperand(0); + CI->replaceAllUsesWith(Exp); + CI->eraseFromParent(); + } + } + } + + return false; +} + + +char LowerExpectIntrinsic::ID = 0; +INITIALIZE_PASS(LowerExpectIntrinsic, "lower-expect", "Lower 'expect' " + "Intrinsics", false, false) + +FunctionPass *llvm::createLowerExpectIntrinsicPass() { + return new LowerExpectIntrinsic(); +} diff --git a/lib/Transforms/Utils/LowerInvoke.cpp b/lib/Transforms/Utils/LowerInvoke.cpp index 025ae0d..f77d19d 100644 --- a/lib/Transforms/Utils/LowerInvoke.cpp +++ b/lib/Transforms/Utils/LowerInvoke.cpp @@ -66,7 +66,7 @@ namespace { Constant *AbortFn; // Used for expensive EH support. - const Type *JBLinkTy; + StructType *JBLinkTy; GlobalVariable *JBListHead; Constant *SetJmpFn, *LongJmpFn, *StackSaveFn, *StackRestoreFn; bool useExpensiveEHSupport; @@ -120,24 +120,16 @@ FunctionPass *llvm::createLowerInvokePass(const TargetLowering *TLI, // doInitialization - Make sure that there is a prototype for abort in the // current module. bool LowerInvoke::doInitialization(Module &M) { - const Type *VoidPtrTy = - Type::getInt8PtrTy(M.getContext()); + const Type *VoidPtrTy = Type::getInt8PtrTy(M.getContext()); if (useExpensiveEHSupport) { // Insert a type for the linked list of jump buffers. unsigned JBSize = TLI ? TLI->getJumpBufSize() : 0; JBSize = JBSize ? JBSize : 200; - const Type *JmpBufTy = ArrayType::get(VoidPtrTy, JBSize); - - { // The type is recursive, so use a type holder. - std::vector<const Type*> Elements; - Elements.push_back(JmpBufTy); - OpaqueType *OT = OpaqueType::get(M.getContext()); - Elements.push_back(PointerType::getUnqual(OT)); - PATypeHolder JBLType(StructType::get(M.getContext(), Elements)); - OT->refineAbstractTypeTo(JBLType.get()); // Complete the cycle. - JBLinkTy = JBLType.get(); - M.addTypeName("llvm.sjljeh.jmpbufty", JBLinkTy); - } + Type *JmpBufTy = ArrayType::get(VoidPtrTy, JBSize); + + JBLinkTy = StructType::createNamed(M.getContext(), "llvm.sjljeh.jmpbufty"); + Type *Elts[] = { JmpBufTy, PointerType::getUnqual(JBLinkTy) }; + JBLinkTy->setBody(Elts); const Type *PtrJBList = PointerType::getUnqual(JBLinkTy); @@ -184,8 +176,7 @@ bool LowerInvoke::insertCheapEHSupport(Function &F) { SmallVector<Value*,16> CallArgs(II->op_begin(), II->op_end() - 3); // Insert a normal call instruction... CallInst *NewCall = CallInst::Create(II->getCalledValue(), - CallArgs.begin(), CallArgs.end(), - "",II); + CallArgs, "", II); NewCall->takeName(II); NewCall->setCallingConv(II->getCallingConv()); NewCall->setAttributes(II->getAttributes()); @@ -265,8 +256,7 @@ void LowerInvoke::rewriteExpensiveInvoke(InvokeInst *II, unsigned InvokeNo, // Insert a normal call instruction. SmallVector<Value*,16> CallArgs(II->op_begin(), II->op_end() - 3); CallInst *NewCall = CallInst::Create(II->getCalledValue(), - CallArgs.begin(), CallArgs.end(), "", - II); + CallArgs, "", II); NewCall->takeName(II); NewCall->setCallingConv(II->getCallingConv()); NewCall->setAttributes(II->getAttributes()); @@ -573,7 +563,7 @@ bool LowerInvoke::insertExpensiveEHSupport(Function &F) { Type::getInt8PtrTy(F.getContext()), "tmp", UnwindBlock); Idx[1] = ConstantInt::get(Type::getInt32Ty(F.getContext()), 1); - CallInst::Create(LongJmpFn, &Idx[0], &Idx[2], "", UnwindBlock); + CallInst::Create(LongJmpFn, Idx, "", UnwindBlock); new UnreachableInst(F.getContext(), UnwindBlock); // Set up the term block ("throw without a catch"). diff --git a/lib/Transforms/Utils/SSAUpdater.cpp b/lib/Transforms/Utils/SSAUpdater.cpp index b336194..b47a7cc 100644 --- a/lib/Transforms/Utils/SSAUpdater.cpp +++ b/lib/Transforms/Utils/SSAUpdater.cpp @@ -16,7 +16,6 @@ #include "llvm/Instructions.h" #include "llvm/IntrinsicInst.h" #include "llvm/ADT/DenseMap.h" -#include "llvm/Analysis/DIBuilder.h" #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Support/AlignOf.h" #include "llvm/Support/Allocator.h" @@ -358,8 +357,7 @@ Value *SSAUpdater::GetValueAtEndOfBlockInternal(BasicBlock *BB) { LoadAndStorePromoter:: LoadAndStorePromoter(const SmallVectorImpl<Instruction*> &Insts, - SSAUpdater &S, DbgDeclareInst *DD, DIBuilder *DB, - StringRef BaseName) : SSA(S), DDI(DD), DIB(DB) { + SSAUpdater &S, StringRef BaseName) : SSA(S) { if (Insts.empty()) return; Value *SomeVal; @@ -407,8 +405,7 @@ run(const SmallVectorImpl<Instruction*> &Insts) const { if (BlockUses.size() == 1) { // If it is a store, it is a trivial def of the value in the block. if (StoreInst *SI = dyn_cast<StoreInst>(User)) { - if (DDI) - ConvertDebugDeclareToDebugValue(DDI, SI, *DIB); + updateDebugInfo(SI); SSA.AddAvailableValue(BB, SI->getOperand(0)); } else // Otherwise it is a load, queue it to rewrite as a live-in load. @@ -462,9 +459,7 @@ run(const SmallVectorImpl<Instruction*> &Insts) const { if (StoreInst *SI = dyn_cast<StoreInst>(II)) { // If this is a store to an unrelated pointer, ignore it. if (!isInstInList(SI, Insts)) continue; - - if (DDI) - ConvertDebugDeclareToDebugValue(DDI, SI, *DIB); + updateDebugInfo(SI); // Remember that this is the active value in the block. StoredValue = SI->getOperand(0); @@ -522,7 +517,4 @@ run(const SmallVectorImpl<Instruction*> &Insts) const { instructionDeleted(User); User->eraseFromParent(); } - - if (DDI) - DDI->eraseFromParent(); } diff --git a/lib/Transforms/Utils/SimplifyCFG.cpp b/lib/Transforms/Utils/SimplifyCFG.cpp index 49726d5..9d9c324 100644 --- a/lib/Transforms/Utils/SimplifyCFG.cpp +++ b/lib/Transforms/Utils/SimplifyCFG.cpp @@ -2211,8 +2211,7 @@ bool SimplifyCFGOpt::SimplifyUnwind(UnwindInst *UI, IRBuilder<> &Builder) { SmallVector<Value*,8> Args(II->op_begin(), II->op_end()-3); Builder.SetInsertPoint(BI); CallInst *CI = Builder.CreateCall(II->getCalledValue(), - Args.begin(), Args.end(), - II->getName()); + Args, II->getName()); CI->setCallingConv(II->getCallingConv()); CI->setAttributes(II->getAttributes()); // If the invoke produced a value, the Call now does instead. @@ -2355,8 +2354,7 @@ bool SimplifyCFGOpt::SimplifyUnreachable(UnreachableInst *UI) { SmallVector<Value*, 8> Args(II->op_begin(), II->op_end()-3); Builder.SetInsertPoint(BI); CallInst *CI = Builder.CreateCall(II->getCalledValue(), - Args.begin(), Args.end(), - II->getName()); + Args, II->getName()); CI->setCallingConv(II->getCallingConv()); CI->setAttributes(II->getAttributes()); // If the invoke produced a value, the call does now instead. diff --git a/lib/Transforms/Utils/ValueMapper.cpp b/lib/Transforms/Utils/ValueMapper.cpp index de6cbdc..973b105 100644 --- a/lib/Transforms/Utils/ValueMapper.cpp +++ b/lib/Transforms/Utils/ValueMapper.cpp @@ -13,16 +13,18 @@ //===----------------------------------------------------------------------===// #include "llvm/Transforms/Utils/ValueMapper.h" -#include "llvm/Type.h" #include "llvm/Constants.h" #include "llvm/Function.h" +#include "llvm/InlineAsm.h" #include "llvm/Instructions.h" #include "llvm/Metadata.h" -#include "llvm/ADT/SmallVector.h" using namespace llvm; -Value *llvm::MapValue(const Value *V, ValueToValueMapTy &VM, - RemapFlags Flags) { +// Out of line method to get vtable etc for class. +void ValueMapTypeRemapper::Anchor() {} + +Value *llvm::MapValue(const Value *V, ValueToValueMapTy &VM, RemapFlags Flags, + ValueMapTypeRemapper *TypeMapper) { ValueToValueMapTy::iterator I = VM.find(V); // If the value already exists in the map, use it. @@ -30,8 +32,23 @@ Value *llvm::MapValue(const Value *V, ValueToValueMapTy &VM, // Global values do not need to be seeded into the VM if they // are using the identity mapping. - if (isa<GlobalValue>(V) || isa<InlineAsm>(V) || isa<MDString>(V)) + if (isa<GlobalValue>(V) || isa<MDString>(V)) return VM[V] = const_cast<Value*>(V); + + if (const InlineAsm *IA = dyn_cast<InlineAsm>(V)) { + // Inline asm may need *type* remapping. + FunctionType *NewTy = IA->getFunctionType(); + if (TypeMapper) { + NewTy = cast<FunctionType>(TypeMapper->remapType(NewTy)); + + if (NewTy != IA->getFunctionType()) + V = InlineAsm::get(NewTy, IA->getAsmString(), IA->getConstraintString(), + IA->hasSideEffects(), IA->isAlignStack()); + } + + return VM[V] = const_cast<Value*>(V); + } + if (const MDNode *MD = dyn_cast<MDNode>(V)) { // If this is a module-level metadata and we know that nothing at the module @@ -46,14 +63,14 @@ Value *llvm::MapValue(const Value *V, ValueToValueMapTy &VM, // Check all operands to see if any need to be remapped. for (unsigned i = 0, e = MD->getNumOperands(); i != e; ++i) { Value *OP = MD->getOperand(i); - if (OP == 0 || MapValue(OP, VM, Flags) == OP) continue; + if (OP == 0 || MapValue(OP, VM, Flags, TypeMapper) == OP) continue; // Ok, at least one operand needs remapping. SmallVector<Value*, 4> Elts; Elts.reserve(MD->getNumOperands()); for (i = 0; i != e; ++i) { Value *Op = MD->getOperand(i); - Elts.push_back(Op ? MapValue(Op, VM, Flags) : 0); + Elts.push_back(Op ? MapValue(Op, VM, Flags, TypeMapper) : 0); } MDNode *NewMD = MDNode::get(V->getContext(), Elts); Dummy->replaceAllUsesWith(NewMD); @@ -76,51 +93,75 @@ Value *llvm::MapValue(const Value *V, ValueToValueMapTy &VM, return 0; if (BlockAddress *BA = dyn_cast<BlockAddress>(C)) { - Function *F = cast<Function>(MapValue(BA->getFunction(), VM, Flags)); + Function *F = + cast<Function>(MapValue(BA->getFunction(), VM, Flags, TypeMapper)); BasicBlock *BB = cast_or_null<BasicBlock>(MapValue(BA->getBasicBlock(), VM, - Flags)); + Flags, TypeMapper)); return VM[V] = BlockAddress::get(F, BB ? BB : BA->getBasicBlock()); } - for (unsigned i = 0, e = C->getNumOperands(); i != e; ++i) { - Value *Op = C->getOperand(i); - Value *Mapped = MapValue(Op, VM, Flags); - if (Mapped == C) continue; - - // Okay, the operands don't all match. We've already processed some or all - // of the operands, set them up now. - std::vector<Constant*> Ops; - Ops.reserve(C->getNumOperands()); - for (unsigned j = 0; j != i; ++j) - Ops.push_back(cast<Constant>(C->getOperand(i))); + // Otherwise, we have some other constant to remap. Start by checking to see + // if all operands have an identity remapping. + unsigned OpNo = 0, NumOperands = C->getNumOperands(); + Value *Mapped = 0; + for (; OpNo != NumOperands; ++OpNo) { + Value *Op = C->getOperand(OpNo); + Mapped = MapValue(Op, VM, Flags, TypeMapper); + if (Mapped != C) break; + } + + // See if the type mapper wants to remap the type as well. + Type *NewTy = C->getType(); + if (TypeMapper) + NewTy = TypeMapper->remapType(NewTy); + + // If the result type and all operands match up, then just insert an identity + // mapping. + if (OpNo == NumOperands && NewTy == C->getType()) + return VM[V] = C; + + // Okay, we need to create a new constant. We've already processed some or + // all of the operands, set them all up now. + SmallVector<Constant*, 8> Ops; + Ops.reserve(NumOperands); + for (unsigned j = 0; j != OpNo; ++j) + Ops.push_back(cast<Constant>(C->getOperand(j))); + + // If one of the operands mismatch, push it and the other mapped operands. + if (OpNo != NumOperands) { Ops.push_back(cast<Constant>(Mapped)); - + // Map the rest of the operands that aren't processed yet. - for (++i; i != e; ++i) - Ops.push_back(cast<Constant>(MapValue(C->getOperand(i), VM, Flags))); - - if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C)) - return VM[V] = CE->getWithOperands(Ops); - if (ConstantArray *CA = dyn_cast<ConstantArray>(C)) - return VM[V] = ConstantArray::get(CA->getType(), Ops); - if (ConstantStruct *CS = dyn_cast<ConstantStruct>(C)) - return VM[V] = ConstantStruct::get(CS->getType(), Ops); - assert(isa<ConstantVector>(C) && "Unknown mapped constant type"); - return VM[V] = ConstantVector::get(Ops); + for (++OpNo; OpNo != NumOperands; ++OpNo) + Ops.push_back(MapValue(cast<Constant>(C->getOperand(OpNo)), VM, + Flags, TypeMapper)); } - - // If we reach here, all of the operands of the constant match. - return VM[V] = C; + + if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C)) + return VM[V] = CE->getWithOperands(Ops, NewTy); + if (isa<ConstantArray>(C)) + return VM[V] = ConstantArray::get(cast<ArrayType>(NewTy), Ops); + if (isa<ConstantStruct>(C)) + return VM[V] = ConstantStruct::get(cast<StructType>(NewTy), Ops); + if (isa<ConstantVector>(C)) + return VM[V] = ConstantVector::get(Ops); + // If this is a no-operand constant, it must be because the type was remapped. + if (isa<UndefValue>(C)) + return VM[V] = UndefValue::get(NewTy); + if (isa<ConstantAggregateZero>(C)) + return VM[V] = ConstantAggregateZero::get(NewTy); + assert(isa<ConstantPointerNull>(C)); + return VM[V] = ConstantPointerNull::get(cast<PointerType>(NewTy)); } /// RemapInstruction - Convert the instruction operands from referencing the /// current values into those specified by VMap. /// void llvm::RemapInstruction(Instruction *I, ValueToValueMapTy &VMap, - RemapFlags Flags) { + RemapFlags Flags, ValueMapTypeRemapper *TypeMapper){ // Remap operands. for (User::op_iterator op = I->op_begin(), E = I->op_end(); op != E; ++op) { - Value *V = MapValue(*op, VMap, Flags); + Value *V = MapValue(*op, VMap, Flags, TypeMapper); // If we aren't ignoring missing entries, assert that something happened. if (V != 0) *op = V; @@ -142,14 +183,19 @@ void llvm::RemapInstruction(Instruction *I, ValueToValueMapTy &VMap, } } - // Remap attached metadata. + // Remap attached metadata. Don't bother remapping DebugLoc, it can never + // have mappings to do. SmallVector<std::pair<unsigned, MDNode *>, 4> MDs; - I->getAllMetadata(MDs); + I->getAllMetadataOtherThanDebugLoc(MDs); for (SmallVectorImpl<std::pair<unsigned, MDNode *> >::iterator MI = MDs.begin(), ME = MDs.end(); MI != ME; ++MI) { - Value *Old = MI->second; - Value *New = MapValue(Old, VMap, Flags); + MDNode *Old = MI->second; + MDNode *New = MapValue(Old, VMap, Flags, TypeMapper); if (New != Old) - I->setMetadata(MI->first, cast<MDNode>(New)); + I->setMetadata(MI->first, New); } + + // If the instruction's type is being remapped, do so now. + if (TypeMapper) + I->mutateType(TypeMapper->remapType(I->getType())); } |