diff options
Diffstat (limited to 'lib/Transforms/Utils')
34 files changed, 3780 insertions, 1239 deletions
diff --git a/lib/Transforms/Utils/AddrModeMatcher.cpp b/lib/Transforms/Utils/AddrModeMatcher.cpp deleted file mode 100644 index 1e6586b..0000000 --- a/lib/Transforms/Utils/AddrModeMatcher.cpp +++ /dev/null @@ -1,577 +0,0 @@ -//===- AddrModeMatcher.cpp - Addressing mode matching facility --*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file implements target addressing mode matcher class. -// -//===----------------------------------------------------------------------===// - -#include "llvm/Transforms/Utils/AddrModeMatcher.h" -#include "llvm/DerivedTypes.h" -#include "llvm/GlobalValue.h" -#include "llvm/Instruction.h" -#include "llvm/Assembly/Writer.h" -#include "llvm/Target/TargetData.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/GetElementPtrTypeIterator.h" -#include "llvm/Support/PatternMatch.h" -#include "llvm/Support/raw_ostream.h" -#include "llvm/Support/CallSite.h" - -using namespace llvm; -using namespace llvm::PatternMatch; - -void ExtAddrMode::print(raw_ostream &OS) const { - bool NeedPlus = false; - OS << "["; - if (BaseGV) { - OS << (NeedPlus ? " + " : "") - << "GV:"; - WriteAsOperand(OS, BaseGV, /*PrintType=*/false); - NeedPlus = true; - } - - if (BaseOffs) - OS << (NeedPlus ? " + " : "") << BaseOffs, NeedPlus = true; - - if (BaseReg) { - OS << (NeedPlus ? " + " : "") - << "Base:"; - WriteAsOperand(OS, BaseReg, /*PrintType=*/false); - NeedPlus = true; - } - if (Scale) { - OS << (NeedPlus ? " + " : "") - << Scale << "*"; - WriteAsOperand(OS, ScaledReg, /*PrintType=*/false); - NeedPlus = true; - } - - OS << ']'; -} - -#ifndef NDEBUG -void ExtAddrMode::dump() const { - print(dbgs()); - dbgs() << '\n'; -} -#endif - - -/// MatchScaledValue - Try adding ScaleReg*Scale to the current addressing mode. -/// Return true and update AddrMode if this addr mode is legal for the target, -/// false if not. -bool AddressingModeMatcher::MatchScaledValue(Value *ScaleReg, int64_t Scale, - unsigned Depth) { - // If Scale is 1, then this is the same as adding ScaleReg to the addressing - // mode. Just process that directly. - if (Scale == 1) - return MatchAddr(ScaleReg, Depth); - - // If the scale is 0, it takes nothing to add this. - if (Scale == 0) - return true; - - // If we already have a scale of this value, we can add to it, otherwise, we - // need an available scale field. - if (AddrMode.Scale != 0 && AddrMode.ScaledReg != ScaleReg) - return false; - - ExtAddrMode TestAddrMode = AddrMode; - - // Add scale to turn X*4+X*3 -> X*7. This could also do things like - // [A+B + A*7] -> [B+A*8]. - TestAddrMode.Scale += Scale; - TestAddrMode.ScaledReg = ScaleReg; - - // If the new address isn't legal, bail out. - if (!TLI.isLegalAddressingMode(TestAddrMode, AccessTy)) - return false; - - // It was legal, so commit it. - AddrMode = TestAddrMode; - - // Okay, we decided that we can add ScaleReg+Scale to AddrMode. Check now - // to see if ScaleReg is actually X+C. If so, we can turn this into adding - // X*Scale + C*Scale to addr mode. - ConstantInt *CI = 0; Value *AddLHS = 0; - if (isa<Instruction>(ScaleReg) && // not a constant expr. - match(ScaleReg, m_Add(m_Value(AddLHS), m_ConstantInt(CI)))) { - TestAddrMode.ScaledReg = AddLHS; - TestAddrMode.BaseOffs += CI->getSExtValue()*TestAddrMode.Scale; - - // If this addressing mode is legal, commit it and remember that we folded - // this instruction. - if (TLI.isLegalAddressingMode(TestAddrMode, AccessTy)) { - AddrModeInsts.push_back(cast<Instruction>(ScaleReg)); - AddrMode = TestAddrMode; - return true; - } - } - - // Otherwise, not (x+c)*scale, just return what we have. - return true; -} - -/// MightBeFoldableInst - This is a little filter, which returns true if an -/// addressing computation involving I might be folded into a load/store -/// accessing it. This doesn't need to be perfect, but needs to accept at least -/// the set of instructions that MatchOperationAddr can. -static bool MightBeFoldableInst(Instruction *I) { - switch (I->getOpcode()) { - case Instruction::BitCast: - // Don't touch identity bitcasts. - if (I->getType() == I->getOperand(0)->getType()) - return false; - return I->getType()->isPointerTy() || I->getType()->isIntegerTy(); - case Instruction::PtrToInt: - // PtrToInt is always a noop, as we know that the int type is pointer sized. - return true; - case Instruction::IntToPtr: - // We know the input is intptr_t, so this is foldable. - return true; - case Instruction::Add: - return true; - case Instruction::Mul: - case Instruction::Shl: - // Can only handle X*C and X << C. - return isa<ConstantInt>(I->getOperand(1)); - case Instruction::GetElementPtr: - return true; - default: - return false; - } -} - - -/// MatchOperationAddr - Given an instruction or constant expr, see if we can -/// fold the operation into the addressing mode. If so, update the addressing -/// mode and return true, otherwise return false without modifying AddrMode. -bool AddressingModeMatcher::MatchOperationAddr(User *AddrInst, unsigned Opcode, - unsigned Depth) { - // Avoid exponential behavior on extremely deep expression trees. - if (Depth >= 5) return false; - - switch (Opcode) { - case Instruction::PtrToInt: - // PtrToInt is always a noop, as we know that the int type is pointer sized. - return MatchAddr(AddrInst->getOperand(0), Depth); - case Instruction::IntToPtr: - // This inttoptr is a no-op if the integer type is pointer sized. - if (TLI.getValueType(AddrInst->getOperand(0)->getType()) == - TLI.getPointerTy()) - return MatchAddr(AddrInst->getOperand(0), Depth); - return false; - case Instruction::BitCast: - // BitCast is always a noop, and we can handle it as long as it is - // int->int or pointer->pointer (we don't want int<->fp or something). - if ((AddrInst->getOperand(0)->getType()->isPointerTy() || - AddrInst->getOperand(0)->getType()->isIntegerTy()) && - // Don't touch identity bitcasts. These were probably put here by LSR, - // and we don't want to mess around with them. Assume it knows what it - // is doing. - AddrInst->getOperand(0)->getType() != AddrInst->getType()) - return MatchAddr(AddrInst->getOperand(0), Depth); - return false; - case Instruction::Add: { - // Check to see if we can merge in the RHS then the LHS. If so, we win. - ExtAddrMode BackupAddrMode = AddrMode; - unsigned OldSize = AddrModeInsts.size(); - if (MatchAddr(AddrInst->getOperand(1), Depth+1) && - MatchAddr(AddrInst->getOperand(0), Depth+1)) - return true; - - // Restore the old addr mode info. - AddrMode = BackupAddrMode; - AddrModeInsts.resize(OldSize); - - // Otherwise this was over-aggressive. Try merging in the LHS then the RHS. - if (MatchAddr(AddrInst->getOperand(0), Depth+1) && - MatchAddr(AddrInst->getOperand(1), Depth+1)) - return true; - - // Otherwise we definitely can't merge the ADD in. - AddrMode = BackupAddrMode; - AddrModeInsts.resize(OldSize); - break; - } - //case Instruction::Or: - // TODO: We can handle "Or Val, Imm" iff this OR is equivalent to an ADD. - //break; - case Instruction::Mul: - case Instruction::Shl: { - // Can only handle X*C and X << C. - ConstantInt *RHS = dyn_cast<ConstantInt>(AddrInst->getOperand(1)); - if (!RHS) return false; - int64_t Scale = RHS->getSExtValue(); - if (Opcode == Instruction::Shl) - Scale = 1LL << Scale; - - return MatchScaledValue(AddrInst->getOperand(0), Scale, Depth); - } - case Instruction::GetElementPtr: { - // Scan the GEP. We check it if it contains constant offsets and at most - // one variable offset. - int VariableOperand = -1; - unsigned VariableScale = 0; - - int64_t ConstantOffset = 0; - const TargetData *TD = TLI.getTargetData(); - gep_type_iterator GTI = gep_type_begin(AddrInst); - for (unsigned i = 1, e = AddrInst->getNumOperands(); i != e; ++i, ++GTI) { - if (StructType *STy = dyn_cast<StructType>(*GTI)) { - const StructLayout *SL = TD->getStructLayout(STy); - unsigned Idx = - cast<ConstantInt>(AddrInst->getOperand(i))->getZExtValue(); - ConstantOffset += SL->getElementOffset(Idx); - } else { - uint64_t TypeSize = TD->getTypeAllocSize(GTI.getIndexedType()); - if (ConstantInt *CI = dyn_cast<ConstantInt>(AddrInst->getOperand(i))) { - ConstantOffset += CI->getSExtValue()*TypeSize; - } else if (TypeSize) { // Scales of zero don't do anything. - // We only allow one variable index at the moment. - if (VariableOperand != -1) - return false; - - // Remember the variable index. - VariableOperand = i; - VariableScale = TypeSize; - } - } - } - - // A common case is for the GEP to only do a constant offset. In this case, - // just add it to the disp field and check validity. - if (VariableOperand == -1) { - AddrMode.BaseOffs += ConstantOffset; - if (ConstantOffset == 0 || TLI.isLegalAddressingMode(AddrMode, AccessTy)){ - // Check to see if we can fold the base pointer in too. - if (MatchAddr(AddrInst->getOperand(0), Depth+1)) - return true; - } - AddrMode.BaseOffs -= ConstantOffset; - return false; - } - - // Save the valid addressing mode in case we can't match. - ExtAddrMode BackupAddrMode = AddrMode; - unsigned OldSize = AddrModeInsts.size(); - - // See if the scale and offset amount is valid for this target. - AddrMode.BaseOffs += ConstantOffset; - - // Match the base operand of the GEP. - if (!MatchAddr(AddrInst->getOperand(0), Depth+1)) { - // If it couldn't be matched, just stuff the value in a register. - if (AddrMode.HasBaseReg) { - AddrMode = BackupAddrMode; - AddrModeInsts.resize(OldSize); - return false; - } - AddrMode.HasBaseReg = true; - AddrMode.BaseReg = AddrInst->getOperand(0); - } - - // Match the remaining variable portion of the GEP. - if (!MatchScaledValue(AddrInst->getOperand(VariableOperand), VariableScale, - Depth)) { - // If it couldn't be matched, try stuffing the base into a register - // instead of matching it, and retrying the match of the scale. - AddrMode = BackupAddrMode; - AddrModeInsts.resize(OldSize); - if (AddrMode.HasBaseReg) - return false; - AddrMode.HasBaseReg = true; - AddrMode.BaseReg = AddrInst->getOperand(0); - AddrMode.BaseOffs += ConstantOffset; - if (!MatchScaledValue(AddrInst->getOperand(VariableOperand), - VariableScale, Depth)) { - // If even that didn't work, bail. - AddrMode = BackupAddrMode; - AddrModeInsts.resize(OldSize); - return false; - } - } - - return true; - } - } - return false; -} - -/// MatchAddr - If we can, try to add the value of 'Addr' into the current -/// addressing mode. If Addr can't be added to AddrMode this returns false and -/// leaves AddrMode unmodified. This assumes that Addr is either a pointer type -/// or intptr_t for the target. -/// -bool AddressingModeMatcher::MatchAddr(Value *Addr, unsigned Depth) { - if (ConstantInt *CI = dyn_cast<ConstantInt>(Addr)) { - // Fold in immediates if legal for the target. - AddrMode.BaseOffs += CI->getSExtValue(); - if (TLI.isLegalAddressingMode(AddrMode, AccessTy)) - return true; - AddrMode.BaseOffs -= CI->getSExtValue(); - } else if (GlobalValue *GV = dyn_cast<GlobalValue>(Addr)) { - // If this is a global variable, try to fold it into the addressing mode. - if (AddrMode.BaseGV == 0) { - AddrMode.BaseGV = GV; - if (TLI.isLegalAddressingMode(AddrMode, AccessTy)) - return true; - AddrMode.BaseGV = 0; - } - } else if (Instruction *I = dyn_cast<Instruction>(Addr)) { - ExtAddrMode BackupAddrMode = AddrMode; - unsigned OldSize = AddrModeInsts.size(); - - // Check to see if it is possible to fold this operation. - if (MatchOperationAddr(I, I->getOpcode(), Depth)) { - // Okay, it's possible to fold this. Check to see if it is actually - // *profitable* to do so. We use a simple cost model to avoid increasing - // register pressure too much. - if (I->hasOneUse() || - IsProfitableToFoldIntoAddressingMode(I, BackupAddrMode, AddrMode)) { - AddrModeInsts.push_back(I); - return true; - } - - // It isn't profitable to do this, roll back. - //cerr << "NOT FOLDING: " << *I; - AddrMode = BackupAddrMode; - AddrModeInsts.resize(OldSize); - } - } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Addr)) { - if (MatchOperationAddr(CE, CE->getOpcode(), Depth)) - return true; - } else if (isa<ConstantPointerNull>(Addr)) { - // Null pointer gets folded without affecting the addressing mode. - return true; - } - - // Worse case, the target should support [reg] addressing modes. :) - if (!AddrMode.HasBaseReg) { - AddrMode.HasBaseReg = true; - AddrMode.BaseReg = Addr; - // Still check for legality in case the target supports [imm] but not [i+r]. - if (TLI.isLegalAddressingMode(AddrMode, AccessTy)) - return true; - AddrMode.HasBaseReg = false; - AddrMode.BaseReg = 0; - } - - // If the base register is already taken, see if we can do [r+r]. - if (AddrMode.Scale == 0) { - AddrMode.Scale = 1; - AddrMode.ScaledReg = Addr; - if (TLI.isLegalAddressingMode(AddrMode, AccessTy)) - return true; - AddrMode.Scale = 0; - AddrMode.ScaledReg = 0; - } - // Couldn't match. - return false; -} - - -/// IsOperandAMemoryOperand - Check to see if all uses of OpVal by the specified -/// inline asm call are due to memory operands. If so, return true, otherwise -/// return false. -static bool IsOperandAMemoryOperand(CallInst *CI, InlineAsm *IA, Value *OpVal, - const TargetLowering &TLI) { - TargetLowering::AsmOperandInfoVector TargetConstraints = TLI.ParseConstraints(ImmutableCallSite(CI)); - for (unsigned i = 0, e = TargetConstraints.size(); i != e; ++i) { - TargetLowering::AsmOperandInfo &OpInfo = TargetConstraints[i]; - - // Compute the constraint code and ConstraintType to use. - TLI.ComputeConstraintToUse(OpInfo, SDValue()); - - // If this asm operand is our Value*, and if it isn't an indirect memory - // operand, we can't fold it! - if (OpInfo.CallOperandVal == OpVal && - (OpInfo.ConstraintType != TargetLowering::C_Memory || - !OpInfo.isIndirect)) - return false; - } - - return true; -} - - -/// FindAllMemoryUses - Recursively walk all the uses of I until we find a -/// memory use. If we find an obviously non-foldable instruction, return true. -/// Add the ultimately found memory instructions to MemoryUses. -static bool FindAllMemoryUses(Instruction *I, - SmallVectorImpl<std::pair<Instruction*,unsigned> > &MemoryUses, - SmallPtrSet<Instruction*, 16> &ConsideredInsts, - const TargetLowering &TLI) { - // If we already considered this instruction, we're done. - if (!ConsideredInsts.insert(I)) - return false; - - // If this is an obviously unfoldable instruction, bail out. - if (!MightBeFoldableInst(I)) - return true; - - // Loop over all the uses, recursively processing them. - for (Value::use_iterator UI = I->use_begin(), E = I->use_end(); - UI != E; ++UI) { - User *U = *UI; - - if (LoadInst *LI = dyn_cast<LoadInst>(U)) { - MemoryUses.push_back(std::make_pair(LI, UI.getOperandNo())); - continue; - } - - if (StoreInst *SI = dyn_cast<StoreInst>(U)) { - unsigned opNo = UI.getOperandNo(); - if (opNo == 0) return true; // Storing addr, not into addr. - MemoryUses.push_back(std::make_pair(SI, opNo)); - continue; - } - - if (CallInst *CI = dyn_cast<CallInst>(U)) { - InlineAsm *IA = dyn_cast<InlineAsm>(CI->getCalledValue()); - if (!IA) return true; - - // If this is a memory operand, we're cool, otherwise bail out. - if (!IsOperandAMemoryOperand(CI, IA, I, TLI)) - return true; - continue; - } - - if (FindAllMemoryUses(cast<Instruction>(U), MemoryUses, ConsideredInsts, - TLI)) - return true; - } - - return false; -} - - -/// ValueAlreadyLiveAtInst - Retrn true if Val is already known to be live at -/// the use site that we're folding it into. If so, there is no cost to -/// include it in the addressing mode. KnownLive1 and KnownLive2 are two values -/// that we know are live at the instruction already. -bool AddressingModeMatcher::ValueAlreadyLiveAtInst(Value *Val,Value *KnownLive1, - Value *KnownLive2) { - // If Val is either of the known-live values, we know it is live! - if (Val == 0 || Val == KnownLive1 || Val == KnownLive2) - return true; - - // All values other than instructions and arguments (e.g. constants) are live. - if (!isa<Instruction>(Val) && !isa<Argument>(Val)) return true; - - // If Val is a constant sized alloca in the entry block, it is live, this is - // true because it is just a reference to the stack/frame pointer, which is - // live for the whole function. - if (AllocaInst *AI = dyn_cast<AllocaInst>(Val)) - if (AI->isStaticAlloca()) - return true; - - // Check to see if this value is already used in the memory instruction's - // block. If so, it's already live into the block at the very least, so we - // can reasonably fold it. - return Val->isUsedInBasicBlock(MemoryInst->getParent()); -} - - - -/// IsProfitableToFoldIntoAddressingMode - It is possible for the addressing -/// mode of the machine to fold the specified instruction into a load or store -/// that ultimately uses it. However, the specified instruction has multiple -/// uses. Given this, it may actually increase register pressure to fold it -/// into the load. For example, consider this code: -/// -/// X = ... -/// Y = X+1 -/// use(Y) -> nonload/store -/// Z = Y+1 -/// load Z -/// -/// In this case, Y has multiple uses, and can be folded into the load of Z -/// (yielding load [X+2]). However, doing this will cause both "X" and "X+1" to -/// be live at the use(Y) line. If we don't fold Y into load Z, we use one -/// fewer register. Since Y can't be folded into "use(Y)" we don't increase the -/// number of computations either. -/// -/// Note that this (like most of CodeGenPrepare) is just a rough heuristic. If -/// X was live across 'load Z' for other reasons, we actually *would* want to -/// fold the addressing mode in the Z case. This would make Y die earlier. -bool AddressingModeMatcher:: -IsProfitableToFoldIntoAddressingMode(Instruction *I, ExtAddrMode &AMBefore, - ExtAddrMode &AMAfter) { - if (IgnoreProfitability) return true; - - // AMBefore is the addressing mode before this instruction was folded into it, - // and AMAfter is the addressing mode after the instruction was folded. Get - // the set of registers referenced by AMAfter and subtract out those - // referenced by AMBefore: this is the set of values which folding in this - // address extends the lifetime of. - // - // Note that there are only two potential values being referenced here, - // BaseReg and ScaleReg (global addresses are always available, as are any - // folded immediates). - Value *BaseReg = AMAfter.BaseReg, *ScaledReg = AMAfter.ScaledReg; - - // If the BaseReg or ScaledReg was referenced by the previous addrmode, their - // lifetime wasn't extended by adding this instruction. - if (ValueAlreadyLiveAtInst(BaseReg, AMBefore.BaseReg, AMBefore.ScaledReg)) - BaseReg = 0; - if (ValueAlreadyLiveAtInst(ScaledReg, AMBefore.BaseReg, AMBefore.ScaledReg)) - ScaledReg = 0; - - // If folding this instruction (and it's subexprs) didn't extend any live - // ranges, we're ok with it. - if (BaseReg == 0 && ScaledReg == 0) - return true; - - // If all uses of this instruction are ultimately load/store/inlineasm's, - // check to see if their addressing modes will include this instruction. If - // so, we can fold it into all uses, so it doesn't matter if it has multiple - // uses. - SmallVector<std::pair<Instruction*,unsigned>, 16> MemoryUses; - SmallPtrSet<Instruction*, 16> ConsideredInsts; - if (FindAllMemoryUses(I, MemoryUses, ConsideredInsts, TLI)) - return false; // Has a non-memory, non-foldable use! - - // Now that we know that all uses of this instruction are part of a chain of - // computation involving only operations that could theoretically be folded - // into a memory use, loop over each of these uses and see if they could - // *actually* fold the instruction. - SmallVector<Instruction*, 32> MatchedAddrModeInsts; - for (unsigned i = 0, e = MemoryUses.size(); i != e; ++i) { - Instruction *User = MemoryUses[i].first; - unsigned OpNo = MemoryUses[i].second; - - // Get the access type of this use. If the use isn't a pointer, we don't - // know what it accesses. - Value *Address = User->getOperand(OpNo); - if (!Address->getType()->isPointerTy()) - return false; - Type *AddressAccessTy = - cast<PointerType>(Address->getType())->getElementType(); - - // Do a match against the root of this address, ignoring profitability. This - // will tell us if the addressing mode for the memory operation will - // *actually* cover the shared instruction. - ExtAddrMode Result; - AddressingModeMatcher Matcher(MatchedAddrModeInsts, TLI, AddressAccessTy, - MemoryInst, Result); - Matcher.IgnoreProfitability = true; - bool Success = Matcher.MatchAddr(Address, 0); - (void)Success; assert(Success && "Couldn't select *anything*?"); - - // If the match didn't cover I, then it won't be shared by it. - if (std::find(MatchedAddrModeInsts.begin(), MatchedAddrModeInsts.end(), - I) == MatchedAddrModeInsts.end()) - return false; - - MatchedAddrModeInsts.clear(); - } - - return true; -} diff --git a/lib/Transforms/Utils/BasicBlockUtils.cpp b/lib/Transforms/Utils/BasicBlockUtils.cpp index 75a7817..8330e84 100644 --- a/lib/Transforms/Utils/BasicBlockUtils.cpp +++ b/lib/Transforms/Utils/BasicBlockUtils.cpp @@ -13,20 +13,20 @@ //===----------------------------------------------------------------------===// #include "llvm/Transforms/Utils/BasicBlockUtils.h" -#include "llvm/Function.h" -#include "llvm/Instructions.h" -#include "llvm/IntrinsicInst.h" -#include "llvm/Constant.h" -#include "llvm/Type.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/Dominators.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/MemoryDependenceAnalysis.h" -#include "llvm/Target/TargetData.h" -#include "llvm/Transforms/Utils/Local.h" -#include "llvm/Transforms/Scalar.h" +#include "llvm/IR/Constant.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/Type.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/ValueHandle.h" +#include "llvm/Transforms/Scalar.h" +#include "llvm/Transforms/Utils/Local.h" #include <algorithm> using namespace llvm; @@ -687,3 +687,42 @@ ReturnInst *llvm::FoldReturnIntoUncondBranch(ReturnInst *RI, BasicBlock *BB, return cast<ReturnInst>(NewRet); } +/// SplitBlockAndInsertIfThen - Split the containing block at the +/// specified instruction - everything before and including Cmp stays +/// in the old basic block, and everything after Cmp is moved to a +/// new block. The two blocks are connected by a conditional branch +/// (with value of Cmp being the condition). +/// Before: +/// Head +/// Cmp +/// Tail +/// After: +/// Head +/// Cmp +/// if (Cmp) +/// ThenBlock +/// Tail +/// +/// If Unreachable is true, then ThenBlock ends with +/// UnreachableInst, otherwise it branches to Tail. +/// Returns the NewBasicBlock's terminator. + +TerminatorInst *llvm::SplitBlockAndInsertIfThen(Instruction *Cmp, + bool Unreachable, MDNode *BranchWeights) { + Instruction *SplitBefore = Cmp->getNextNode(); + BasicBlock *Head = SplitBefore->getParent(); + BasicBlock *Tail = Head->splitBasicBlock(SplitBefore); + TerminatorInst *HeadOldTerm = Head->getTerminator(); + LLVMContext &C = Head->getContext(); + BasicBlock *ThenBlock = BasicBlock::Create(C, "", Head->getParent(), Tail); + TerminatorInst *CheckTerm; + if (Unreachable) + CheckTerm = new UnreachableInst(C, ThenBlock); + else + CheckTerm = BranchInst::Create(Tail, ThenBlock); + BranchInst *HeadNewTerm = + BranchInst::Create(/*ifTrue*/ThenBlock, /*ifFalse*/Tail, Cmp); + HeadNewTerm->setMetadata(LLVMContext::MD_prof, BranchWeights); + ReplaceInstWithInst(HeadOldTerm, HeadNewTerm); + return CheckTerm; +} diff --git a/lib/Transforms/Utils/BreakCriticalEdges.cpp b/lib/Transforms/Utils/BreakCriticalEdges.cpp index 6b04e3d..8513772 100644 --- a/lib/Transforms/Utils/BreakCriticalEdges.cpp +++ b/lib/Transforms/Utils/BreakCriticalEdges.cpp @@ -17,17 +17,17 @@ #define DEBUG_TYPE "break-crit-edges" #include "llvm/Transforms/Scalar.h" -#include "llvm/Transforms/Utils/BasicBlockUtils.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/Statistic.h" #include "llvm/Analysis/Dominators.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/ProfileInfo.h" -#include "llvm/Function.h" -#include "llvm/Instructions.h" -#include "llvm/Type.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/Type.h" #include "llvm/Support/CFG.h" #include "llvm/Support/ErrorHandling.h" -#include "llvm/ADT/SmallVector.h" -#include "llvm/ADT/Statistic.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" using namespace llvm; STATISTIC(NumBroken, "Number of blocks inserted"); diff --git a/lib/Transforms/Utils/BuildLibCalls.cpp b/lib/Transforms/Utils/BuildLibCalls.cpp index e13fd71..bf540b0 100644 --- a/lib/Transforms/Utils/BuildLibCalls.cpp +++ b/lib/Transforms/Utils/BuildLibCalls.cpp @@ -12,17 +12,15 @@ //===----------------------------------------------------------------------===// #include "llvm/Transforms/Utils/BuildLibCalls.h" -#include "llvm/Constants.h" -#include "llvm/Function.h" -#include "llvm/IRBuilder.h" -#include "llvm/Intrinsics.h" -#include "llvm/Intrinsics.h" -#include "llvm/LLVMContext.h" -#include "llvm/LLVMContext.h" -#include "llvm/Module.h" -#include "llvm/Type.h" #include "llvm/ADT/SmallString.h" -#include "llvm/Target/TargetData.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/Intrinsics.h" +#include "llvm/IR/LLVMContext.h" +#include "llvm/IR/Module.h" +#include "llvm/IR/Type.h" #include "llvm/Target/TargetLibraryInfo.h" using namespace llvm; @@ -34,19 +32,22 @@ Value *llvm::CastToCStr(Value *V, IRBuilder<> &B) { /// EmitStrLen - Emit a call to the strlen function to the builder, for the /// specified pointer. This always returns an integer value of size intptr_t. -Value *llvm::EmitStrLen(Value *Ptr, IRBuilder<> &B, const TargetData *TD, +Value *llvm::EmitStrLen(Value *Ptr, IRBuilder<> &B, const DataLayout *TD, const TargetLibraryInfo *TLI) { if (!TLI->has(LibFunc::strlen)) return 0; Module *M = B.GetInsertBlock()->getParent()->getParent(); AttributeWithIndex AWI[2]; - AWI[0] = AttributeWithIndex::get(1, Attribute::NoCapture); - AWI[1] = AttributeWithIndex::get(~0u, Attribute::ReadOnly | - Attribute::NoUnwind); + AWI[0] = AttributeWithIndex::get(M->getContext(), 1, Attribute::NoCapture); + Attribute::AttrKind AVs[2] = { Attribute::ReadOnly, Attribute::NoUnwind }; + AWI[1] = AttributeWithIndex::get(M->getContext(), AttributeSet::FunctionIndex, + ArrayRef<Attribute::AttrKind>(AVs, 2)); LLVMContext &Context = B.GetInsertBlock()->getContext(); - Constant *StrLen = M->getOrInsertFunction("strlen", AttrListPtr::get(AWI), + Constant *StrLen = M->getOrInsertFunction("strlen", + AttributeSet::get(M->getContext(), + AWI), TD->getIntPtrType(Context), B.getInt8PtrTy(), NULL); @@ -61,18 +62,21 @@ Value *llvm::EmitStrLen(Value *Ptr, IRBuilder<> &B, const TargetData *TD, /// specified pointer. Ptr is required to be some pointer type, MaxLen must /// be of size_t type, and the return value has 'intptr_t' type. Value *llvm::EmitStrNLen(Value *Ptr, Value *MaxLen, IRBuilder<> &B, - const TargetData *TD, const TargetLibraryInfo *TLI) { + const DataLayout *TD, const TargetLibraryInfo *TLI) { if (!TLI->has(LibFunc::strnlen)) return 0; Module *M = B.GetInsertBlock()->getParent()->getParent(); AttributeWithIndex AWI[2]; - AWI[0] = AttributeWithIndex::get(1, Attribute::NoCapture); - AWI[1] = AttributeWithIndex::get(~0u, Attribute::ReadOnly | - Attribute::NoUnwind); + AWI[0] = AttributeWithIndex::get(M->getContext(), 1, Attribute::NoCapture); + Attribute::AttrKind AVs[2] = { Attribute::ReadOnly, Attribute::NoUnwind }; + AWI[1] = AttributeWithIndex::get(M->getContext(), AttributeSet::FunctionIndex, + ArrayRef<Attribute::AttrKind>(AVs, 2)); LLVMContext &Context = B.GetInsertBlock()->getContext(); - Constant *StrNLen = M->getOrInsertFunction("strnlen", AttrListPtr::get(AWI), + Constant *StrNLen = M->getOrInsertFunction("strnlen", + AttributeSet::get(M->getContext(), + AWI), TD->getIntPtrType(Context), B.getInt8PtrTy(), TD->getIntPtrType(Context), @@ -88,17 +92,21 @@ Value *llvm::EmitStrNLen(Value *Ptr, Value *MaxLen, IRBuilder<> &B, /// specified pointer and character. Ptr is required to be some pointer type, /// and the return value has 'i8*' type. Value *llvm::EmitStrChr(Value *Ptr, char C, IRBuilder<> &B, - const TargetData *TD, const TargetLibraryInfo *TLI) { + const DataLayout *TD, const TargetLibraryInfo *TLI) { if (!TLI->has(LibFunc::strchr)) return 0; Module *M = B.GetInsertBlock()->getParent()->getParent(); + Attribute::AttrKind AVs[2] = { Attribute::ReadOnly, Attribute::NoUnwind }; AttributeWithIndex AWI = - AttributeWithIndex::get(~0u, Attribute::ReadOnly | Attribute::NoUnwind); + AttributeWithIndex::get(M->getContext(), AttributeSet::FunctionIndex, + ArrayRef<Attribute::AttrKind>(AVs, 2)); Type *I8Ptr = B.getInt8PtrTy(); Type *I32Ty = B.getInt32Ty(); - Constant *StrChr = M->getOrInsertFunction("strchr", AttrListPtr::get(AWI), + Constant *StrChr = M->getOrInsertFunction("strchr", + AttributeSet::get(M->getContext(), + AWI), I8Ptr, I8Ptr, I32Ty, NULL); CallInst *CI = B.CreateCall2(StrChr, CastToCStr(Ptr, B), ConstantInt::get(I32Ty, C), "strchr"); @@ -109,20 +117,23 @@ Value *llvm::EmitStrChr(Value *Ptr, char C, IRBuilder<> &B, /// EmitStrNCmp - Emit a call to the strncmp function to the builder. Value *llvm::EmitStrNCmp(Value *Ptr1, Value *Ptr2, Value *Len, - IRBuilder<> &B, const TargetData *TD, + IRBuilder<> &B, const DataLayout *TD, const TargetLibraryInfo *TLI) { if (!TLI->has(LibFunc::strncmp)) return 0; Module *M = B.GetInsertBlock()->getParent()->getParent(); AttributeWithIndex AWI[3]; - AWI[0] = AttributeWithIndex::get(1, Attribute::NoCapture); - AWI[1] = AttributeWithIndex::get(2, Attribute::NoCapture); - AWI[2] = AttributeWithIndex::get(~0u, Attribute::ReadOnly | - Attribute::NoUnwind); + AWI[0] = AttributeWithIndex::get(M->getContext(), 1, Attribute::NoCapture); + AWI[1] = AttributeWithIndex::get(M->getContext(), 2, Attribute::NoCapture); + Attribute::AttrKind AVs[2] = { Attribute::ReadOnly, Attribute::NoUnwind }; + AWI[2] = AttributeWithIndex::get(M->getContext(), AttributeSet::FunctionIndex, + ArrayRef<Attribute::AttrKind>(AVs, 2)); LLVMContext &Context = B.GetInsertBlock()->getContext(); - Value *StrNCmp = M->getOrInsertFunction("strncmp", AttrListPtr::get(AWI), + Value *StrNCmp = M->getOrInsertFunction("strncmp", + AttributeSet::get(M->getContext(), + AWI), B.getInt32Ty(), B.getInt8PtrTy(), B.getInt8PtrTy(), @@ -139,17 +150,19 @@ Value *llvm::EmitStrNCmp(Value *Ptr1, Value *Ptr2, Value *Len, /// EmitStrCpy - Emit a call to the strcpy function to the builder, for the /// specified pointer arguments. Value *llvm::EmitStrCpy(Value *Dst, Value *Src, IRBuilder<> &B, - const TargetData *TD, const TargetLibraryInfo *TLI, + const DataLayout *TD, const TargetLibraryInfo *TLI, StringRef Name) { if (!TLI->has(LibFunc::strcpy)) return 0; Module *M = B.GetInsertBlock()->getParent()->getParent(); AttributeWithIndex AWI[2]; - AWI[0] = AttributeWithIndex::get(2, Attribute::NoCapture); - AWI[1] = AttributeWithIndex::get(~0u, Attribute::NoUnwind); + AWI[0] = AttributeWithIndex::get(M->getContext(), 2, Attribute::NoCapture); + AWI[1] = AttributeWithIndex::get(M->getContext(), AttributeSet::FunctionIndex, + Attribute::NoUnwind); Type *I8Ptr = B.getInt8PtrTy(); - Value *StrCpy = M->getOrInsertFunction(Name, AttrListPtr::get(AWI), + Value *StrCpy = M->getOrInsertFunction(Name, + AttributeSet::get(M->getContext(), AWI), I8Ptr, I8Ptr, I8Ptr, NULL); CallInst *CI = B.CreateCall2(StrCpy, CastToCStr(Dst, B), CastToCStr(Src, B), Name); @@ -161,17 +174,20 @@ Value *llvm::EmitStrCpy(Value *Dst, Value *Src, IRBuilder<> &B, /// EmitStrNCpy - Emit a call to the strncpy function to the builder, for the /// specified pointer arguments. Value *llvm::EmitStrNCpy(Value *Dst, Value *Src, Value *Len, - IRBuilder<> &B, const TargetData *TD, + IRBuilder<> &B, const DataLayout *TD, const TargetLibraryInfo *TLI, StringRef Name) { if (!TLI->has(LibFunc::strncpy)) return 0; Module *M = B.GetInsertBlock()->getParent()->getParent(); AttributeWithIndex AWI[2]; - AWI[0] = AttributeWithIndex::get(2, Attribute::NoCapture); - AWI[1] = AttributeWithIndex::get(~0u, Attribute::NoUnwind); + AWI[0] = AttributeWithIndex::get(M->getContext(), 2, Attribute::NoCapture); + AWI[1] = AttributeWithIndex::get(M->getContext(), AttributeSet::FunctionIndex, + Attribute::NoUnwind); Type *I8Ptr = B.getInt8PtrTy(); - Value *StrNCpy = M->getOrInsertFunction(Name, AttrListPtr::get(AWI), + Value *StrNCpy = M->getOrInsertFunction(Name, + AttributeSet::get(M->getContext(), + AWI), I8Ptr, I8Ptr, I8Ptr, Len->getType(), NULL); CallInst *CI = B.CreateCall3(StrNCpy, CastToCStr(Dst, B), CastToCStr(Src, B), @@ -185,17 +201,18 @@ Value *llvm::EmitStrNCpy(Value *Dst, Value *Src, Value *Len, /// This expects that the Len and ObjSize have type 'intptr_t' and Dst/Src /// are pointers. Value *llvm::EmitMemCpyChk(Value *Dst, Value *Src, Value *Len, Value *ObjSize, - IRBuilder<> &B, const TargetData *TD, + IRBuilder<> &B, const DataLayout *TD, const TargetLibraryInfo *TLI) { if (!TLI->has(LibFunc::memcpy_chk)) return 0; Module *M = B.GetInsertBlock()->getParent()->getParent(); AttributeWithIndex AWI; - AWI = AttributeWithIndex::get(~0u, Attribute::NoUnwind); + AWI = AttributeWithIndex::get(M->getContext(), AttributeSet::FunctionIndex, + Attribute::NoUnwind); LLVMContext &Context = B.GetInsertBlock()->getContext(); Value *MemCpy = M->getOrInsertFunction("__memcpy_chk", - AttrListPtr::get(AWI), + AttributeSet::get(M->getContext(), AWI), B.getInt8PtrTy(), B.getInt8PtrTy(), B.getInt8PtrTy(), @@ -212,16 +229,19 @@ Value *llvm::EmitMemCpyChk(Value *Dst, Value *Src, Value *Len, Value *ObjSize, /// EmitMemChr - Emit a call to the memchr function. This assumes that Ptr is /// a pointer, Val is an i32 value, and Len is an 'intptr_t' value. Value *llvm::EmitMemChr(Value *Ptr, Value *Val, - Value *Len, IRBuilder<> &B, const TargetData *TD, + Value *Len, IRBuilder<> &B, const DataLayout *TD, const TargetLibraryInfo *TLI) { if (!TLI->has(LibFunc::memchr)) return 0; Module *M = B.GetInsertBlock()->getParent()->getParent(); AttributeWithIndex AWI; - AWI = AttributeWithIndex::get(~0u, Attribute::ReadOnly | Attribute::NoUnwind); + Attribute::AttrKind AVs[2] = { Attribute::ReadOnly, Attribute::NoUnwind }; + AWI = AttributeWithIndex::get(M->getContext(), AttributeSet::FunctionIndex, + ArrayRef<Attribute::AttrKind>(AVs, 2)); LLVMContext &Context = B.GetInsertBlock()->getContext(); - Value *MemChr = M->getOrInsertFunction("memchr", AttrListPtr::get(AWI), + Value *MemChr = M->getOrInsertFunction("memchr", + AttributeSet::get(M->getContext(), AWI), B.getInt8PtrTy(), B.getInt8PtrTy(), B.getInt32Ty(), @@ -237,20 +257,22 @@ Value *llvm::EmitMemChr(Value *Ptr, Value *Val, /// EmitMemCmp - Emit a call to the memcmp function. Value *llvm::EmitMemCmp(Value *Ptr1, Value *Ptr2, - Value *Len, IRBuilder<> &B, const TargetData *TD, + Value *Len, IRBuilder<> &B, const DataLayout *TD, const TargetLibraryInfo *TLI) { if (!TLI->has(LibFunc::memcmp)) return 0; Module *M = B.GetInsertBlock()->getParent()->getParent(); AttributeWithIndex AWI[3]; - AWI[0] = AttributeWithIndex::get(1, Attribute::NoCapture); - AWI[1] = AttributeWithIndex::get(2, Attribute::NoCapture); - AWI[2] = AttributeWithIndex::get(~0u, Attribute::ReadOnly | - Attribute::NoUnwind); + AWI[0] = AttributeWithIndex::get(M->getContext(), 1, Attribute::NoCapture); + AWI[1] = AttributeWithIndex::get(M->getContext(), 2, Attribute::NoCapture); + Attribute::AttrKind AVs[2] = { Attribute::ReadOnly, Attribute::NoUnwind }; + AWI[2] = AttributeWithIndex::get(M->getContext(), AttributeSet::FunctionIndex, + ArrayRef<Attribute::AttrKind>(AVs, 2)); LLVMContext &Context = B.GetInsertBlock()->getContext(); - Value *MemCmp = M->getOrInsertFunction("memcmp", AttrListPtr::get(AWI), + Value *MemCmp = M->getOrInsertFunction("memcmp", + AttributeSet::get(M->getContext(), AWI), B.getInt32Ty(), B.getInt8PtrTy(), B.getInt8PtrTy(), @@ -269,7 +291,7 @@ Value *llvm::EmitMemCmp(Value *Ptr1, Value *Ptr2, /// returns one value with the same type. If 'Op' is a long double, 'l' is /// added as the suffix of name, if 'Op' is a float, we add a 'f' suffix. Value *llvm::EmitUnaryFloatFnCall(Value *Op, StringRef Name, IRBuilder<> &B, - const AttrListPtr &Attrs) { + const AttributeSet &Attrs) { SmallString<20> NameBuffer; if (!Op->getType()->isDoubleTy()) { // If we need to add a suffix, copy into NameBuffer. @@ -294,7 +316,7 @@ Value *llvm::EmitUnaryFloatFnCall(Value *Op, StringRef Name, IRBuilder<> &B, /// EmitPutChar - Emit a call to the putchar function. This assumes that Char /// is an integer. -Value *llvm::EmitPutChar(Value *Char, IRBuilder<> &B, const TargetData *TD, +Value *llvm::EmitPutChar(Value *Char, IRBuilder<> &B, const DataLayout *TD, const TargetLibraryInfo *TLI) { if (!TLI->has(LibFunc::putchar)) return 0; @@ -316,17 +338,19 @@ Value *llvm::EmitPutChar(Value *Char, IRBuilder<> &B, const TargetData *TD, /// EmitPutS - Emit a call to the puts function. This assumes that Str is /// some pointer. -Value *llvm::EmitPutS(Value *Str, IRBuilder<> &B, const TargetData *TD, +Value *llvm::EmitPutS(Value *Str, IRBuilder<> &B, const DataLayout *TD, const TargetLibraryInfo *TLI) { if (!TLI->has(LibFunc::puts)) return 0; Module *M = B.GetInsertBlock()->getParent()->getParent(); AttributeWithIndex AWI[2]; - AWI[0] = AttributeWithIndex::get(1, Attribute::NoCapture); - AWI[1] = AttributeWithIndex::get(~0u, Attribute::NoUnwind); + AWI[0] = AttributeWithIndex::get(M->getContext(), 1, Attribute::NoCapture); + AWI[1] = AttributeWithIndex::get(M->getContext(), AttributeSet::FunctionIndex, + Attribute::NoUnwind); - Value *PutS = M->getOrInsertFunction("puts", AttrListPtr::get(AWI), + Value *PutS = M->getOrInsertFunction("puts", + AttributeSet::get(M->getContext(), AWI), B.getInt32Ty(), B.getInt8PtrTy(), NULL); @@ -339,17 +363,19 @@ Value *llvm::EmitPutS(Value *Str, IRBuilder<> &B, const TargetData *TD, /// EmitFPutC - Emit a call to the fputc function. This assumes that Char is /// an integer and File is a pointer to FILE. Value *llvm::EmitFPutC(Value *Char, Value *File, IRBuilder<> &B, - const TargetData *TD, const TargetLibraryInfo *TLI) { + const DataLayout *TD, const TargetLibraryInfo *TLI) { if (!TLI->has(LibFunc::fputc)) return 0; Module *M = B.GetInsertBlock()->getParent()->getParent(); AttributeWithIndex AWI[2]; - AWI[0] = AttributeWithIndex::get(2, Attribute::NoCapture); - AWI[1] = AttributeWithIndex::get(~0u, Attribute::NoUnwind); + AWI[0] = AttributeWithIndex::get(M->getContext(), 2, Attribute::NoCapture); + AWI[1] = AttributeWithIndex::get(M->getContext(), AttributeSet::FunctionIndex, + Attribute::NoUnwind); Constant *F; if (File->getType()->isPointerTy()) - F = M->getOrInsertFunction("fputc", AttrListPtr::get(AWI), + F = M->getOrInsertFunction("fputc", + AttributeSet::get(M->getContext(), AWI), B.getInt32Ty(), B.getInt32Ty(), File->getType(), NULL); @@ -370,19 +396,21 @@ Value *llvm::EmitFPutC(Value *Char, Value *File, IRBuilder<> &B, /// EmitFPutS - Emit a call to the puts function. Str is required to be a /// pointer and File is a pointer to FILE. Value *llvm::EmitFPutS(Value *Str, Value *File, IRBuilder<> &B, - const TargetData *TD, const TargetLibraryInfo *TLI) { + const DataLayout *TD, const TargetLibraryInfo *TLI) { if (!TLI->has(LibFunc::fputs)) return 0; Module *M = B.GetInsertBlock()->getParent()->getParent(); AttributeWithIndex AWI[3]; - AWI[0] = AttributeWithIndex::get(1, Attribute::NoCapture); - AWI[1] = AttributeWithIndex::get(2, Attribute::NoCapture); - AWI[2] = AttributeWithIndex::get(~0u, Attribute::NoUnwind); + AWI[0] = AttributeWithIndex::get(M->getContext(), 1, Attribute::NoCapture); + AWI[1] = AttributeWithIndex::get(M->getContext(), 2, Attribute::NoCapture); + AWI[2] = AttributeWithIndex::get(M->getContext(), AttributeSet::FunctionIndex, + Attribute::NoUnwind); StringRef FPutsName = TLI->getName(LibFunc::fputs); Constant *F; if (File->getType()->isPointerTy()) - F = M->getOrInsertFunction(FPutsName, AttrListPtr::get(AWI), + F = M->getOrInsertFunction(FPutsName, + AttributeSet::get(M->getContext(), AWI), B.getInt32Ty(), B.getInt8PtrTy(), File->getType(), NULL); @@ -400,21 +428,23 @@ Value *llvm::EmitFPutS(Value *Str, Value *File, IRBuilder<> &B, /// EmitFWrite - Emit a call to the fwrite function. This assumes that Ptr is /// a pointer, Size is an 'intptr_t', and File is a pointer to FILE. Value *llvm::EmitFWrite(Value *Ptr, Value *Size, Value *File, - IRBuilder<> &B, const TargetData *TD, + IRBuilder<> &B, const DataLayout *TD, const TargetLibraryInfo *TLI) { if (!TLI->has(LibFunc::fwrite)) return 0; Module *M = B.GetInsertBlock()->getParent()->getParent(); AttributeWithIndex AWI[3]; - AWI[0] = AttributeWithIndex::get(1, Attribute::NoCapture); - AWI[1] = AttributeWithIndex::get(4, Attribute::NoCapture); - AWI[2] = AttributeWithIndex::get(~0u, Attribute::NoUnwind); + AWI[0] = AttributeWithIndex::get(M->getContext(), 1, Attribute::NoCapture); + AWI[1] = AttributeWithIndex::get(M->getContext(), 4, Attribute::NoCapture); + AWI[2] = AttributeWithIndex::get(M->getContext(), AttributeSet::FunctionIndex, + Attribute::NoUnwind); LLVMContext &Context = B.GetInsertBlock()->getContext(); StringRef FWriteName = TLI->getName(LibFunc::fwrite); Constant *F; if (File->getType()->isPointerTy()) - F = M->getOrInsertFunction(FWriteName, AttrListPtr::get(AWI), + F = M->getOrInsertFunction(FWriteName, + AttributeSet::get(M->getContext(), AWI), TD->getIntPtrType(Context), B.getInt8PtrTy(), TD->getIntPtrType(Context), @@ -436,9 +466,9 @@ Value *llvm::EmitFWrite(Value *Ptr, Value *Size, Value *File, SimplifyFortifiedLibCalls::~SimplifyFortifiedLibCalls() { } -bool SimplifyFortifiedLibCalls::fold(CallInst *CI, const TargetData *TD, +bool SimplifyFortifiedLibCalls::fold(CallInst *CI, const DataLayout *TD, const TargetLibraryInfo *TLI) { - // We really need TargetData for later. + // We really need DataLayout for later. if (!TD) return false; this->CI = CI; diff --git a/lib/Transforms/Utils/BypassSlowDivision.cpp b/lib/Transforms/Utils/BypassSlowDivision.cpp index 30d60be..00cda8e 100644 --- a/lib/Transforms/Utils/BypassSlowDivision.cpp +++ b/lib/Transforms/Utils/BypassSlowDivision.cpp @@ -16,11 +16,11 @@ //===----------------------------------------------------------------------===// #define DEBUG_TYPE "bypass-slow-division" -#include "llvm/Instructions.h" -#include "llvm/Function.h" -#include "llvm/IRBuilder.h" -#include "llvm/ADT/DenseMap.h" #include "llvm/Transforms/Utils/BypassSlowDivision.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/Instructions.h" using namespace llvm; @@ -221,7 +221,7 @@ static bool reuseOrInsertFastDiv(Function &F, // be profitably bypassed and carried out with a shorter, faster divide. bool llvm::bypassSlowDivision(Function &F, Function::iterator &I, - const DenseMap<Type *, Type *> &BypassTypeMap) { + const DenseMap<unsigned int, unsigned int> &BypassWidths) { DivCacheTy DivCache; bool MadeChange = false; @@ -238,14 +238,23 @@ bool llvm::bypassSlowDivision(Function &F, if (!UseDivOp && !UseRemOp) continue; - // Continue if div/rem type is not bypassed - DenseMap<Type *, Type *>::const_iterator BT = - BypassTypeMap.find(J->getType()); - if (BT == BypassTypeMap.end()) + // Skip division on vector types, only optimize integer instructions + if (!J->getType()->isIntegerTy()) + continue; + + // Get bitwidth of div/rem instruction + IntegerType *T = cast<IntegerType>(J->getType()); + int bitwidth = T->getBitWidth(); + + // Continue if bitwidth is not bypassed + DenseMap<unsigned int, unsigned int>::const_iterator BI = BypassWidths.find(bitwidth); + if (BI == BypassWidths.end()) continue; - IntegerType *BypassType = cast<IntegerType>(BT->second); - MadeChange |= reuseOrInsertFastDiv(F, I, J, BypassType, UseDivOp, + // Get type for div/rem instruction with bypass bitwidth + IntegerType *BT = IntegerType::get(J->getContext(), BI->second); + + MadeChange |= reuseOrInsertFastDiv(F, I, J, BT, UseDivOp, UseSignedOp, DivCache); } diff --git a/lib/Transforms/Utils/CMakeLists.txt b/lib/Transforms/Utils/CMakeLists.txt index 215a16f..b71628b 100644 --- a/lib/Transforms/Utils/CMakeLists.txt +++ b/lib/Transforms/Utils/CMakeLists.txt @@ -1,5 +1,4 @@ add_llvm_library(LLVMTransformUtils - AddrModeMatcher.cpp BasicBlockUtils.cpp BreakCriticalEdges.cpp BuildLibCalls.cpp @@ -11,6 +10,7 @@ add_llvm_library(LLVMTransformUtils DemoteRegToStack.cpp InlineFunction.cpp InstructionNamer.cpp + IntegerDivision.cpp LCSSA.cpp Local.cpp LoopSimplify.cpp @@ -20,12 +20,14 @@ add_llvm_library(LLVMTransformUtils LowerInvoke.cpp LowerSwitch.cpp Mem2Reg.cpp + MetaRenamer.cpp ModuleUtils.cpp PromoteMemoryToRegister.cpp SSAUpdater.cpp SimplifyCFG.cpp SimplifyIndVar.cpp SimplifyInstructions.cpp + SimplifyLibCalls.cpp UnifyFunctionExitNodes.cpp Utils.cpp ValueMapper.cpp diff --git a/lib/Transforms/Utils/CloneFunction.cpp b/lib/Transforms/Utils/CloneFunction.cpp index 99237b8..ccc3eae 100644 --- a/lib/Transforms/Utils/CloneFunction.cpp +++ b/lib/Transforms/Utils/CloneFunction.cpp @@ -14,22 +14,22 @@ //===----------------------------------------------------------------------===// #include "llvm/Transforms/Utils/Cloning.h" -#include "llvm/Constants.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/Analysis/ConstantFolding.h" +#include "llvm/Analysis/InstructionSimplify.h" #include "llvm/DebugInfo.h" -#include "llvm/DerivedTypes.h" -#include "llvm/Instructions.h" -#include "llvm/IntrinsicInst.h" -#include "llvm/GlobalVariable.h" -#include "llvm/Function.h" -#include "llvm/LLVMContext.h" -#include "llvm/Metadata.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/GlobalVariable.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/LLVMContext.h" +#include "llvm/IR/Metadata.h" #include "llvm/Support/CFG.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/Local.h" #include "llvm/Transforms/Utils/ValueMapper.h" -#include "llvm/Analysis/ConstantFolding.h" -#include "llvm/Analysis/InstructionSimplify.h" -#include "llvm/ADT/SmallVector.h" #include <map> using namespace llvm; @@ -98,10 +98,14 @@ void llvm::CloneFunctionInto(Function *NewFunc, const Function *OldFunc, Anew->addAttr( OldFunc->getAttributes() .getParamAttributes(I->getArgNo() + 1)); NewFunc->setAttributes(NewFunc->getAttributes() - .addAttr(0, OldFunc->getAttributes() + .addAttr(NewFunc->getContext(), + AttributeSet::ReturnIndex, + OldFunc->getAttributes() .getRetAttributes())); NewFunc->setAttributes(NewFunc->getAttributes() - .addAttr(~0, OldFunc->getAttributes() + .addAttr(NewFunc->getContext(), + AttributeSet::FunctionIndex, + OldFunc->getAttributes() .getFnAttributes())); } @@ -202,14 +206,14 @@ namespace { bool ModuleLevelChanges; const char *NameSuffix; ClonedCodeInfo *CodeInfo; - const TargetData *TD; + const DataLayout *TD; public: PruningFunctionCloner(Function *newFunc, const Function *oldFunc, ValueToValueMapTy &valueMap, bool moduleLevelChanges, const char *nameSuffix, ClonedCodeInfo *codeInfo, - const TargetData *td) + const DataLayout *td) : NewFunc(newFunc), OldFunc(oldFunc), VMap(valueMap), ModuleLevelChanges(moduleLevelChanges), NameSuffix(nameSuffix), CodeInfo(codeInfo), TD(td) { @@ -365,7 +369,7 @@ void llvm::CloneAndPruneFunctionInto(Function *NewFunc, const Function *OldFunc, SmallVectorImpl<ReturnInst*> &Returns, const char *NameSuffix, ClonedCodeInfo *CodeInfo, - const TargetData *TD, + const DataLayout *TD, Instruction *TheCall) { assert(NameSuffix && "NameSuffix cannot be null!"); diff --git a/lib/Transforms/Utils/CloneModule.cpp b/lib/Transforms/Utils/CloneModule.cpp index 1dac6b5..64df089 100644 --- a/lib/Transforms/Utils/CloneModule.cpp +++ b/lib/Transforms/Utils/CloneModule.cpp @@ -13,9 +13,9 @@ //===----------------------------------------------------------------------===// #include "llvm/Transforms/Utils/Cloning.h" -#include "llvm/Module.h" -#include "llvm/DerivedTypes.h" -#include "llvm/Constant.h" +#include "llvm/IR/Constant.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/Module.h" #include "llvm/Transforms/Utils/ValueMapper.h" using namespace llvm; @@ -38,10 +38,6 @@ Module *llvm::CloneModule(const Module *M, ValueToValueMapTy &VMap) { New->setTargetTriple(M->getTargetTriple()); New->setModuleInlineAsm(M->getModuleInlineAsm()); - // Copy all of the dependent libraries over. - for (Module::lib_iterator I = M->lib_begin(), E = M->lib_end(); I != E; ++I) - New->addLibrary(*I); - // Loop over all of the global variables, making corresponding globals in the // new module. Here we add them to the VMap and to the new Module. We // don't worry about attributes or initializers, they will come later. diff --git a/lib/Transforms/Utils/CmpInstAnalysis.cpp b/lib/Transforms/Utils/CmpInstAnalysis.cpp index 9b09915..8fa412a 100644 --- a/lib/Transforms/Utils/CmpInstAnalysis.cpp +++ b/lib/Transforms/Utils/CmpInstAnalysis.cpp @@ -13,8 +13,8 @@ //===----------------------------------------------------------------------===// #include "llvm/Transforms/Utils/CmpInstAnalysis.h" -#include "llvm/Constants.h" -#include "llvm/Instructions.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/Instructions.h" using namespace llvm; diff --git a/lib/Transforms/Utils/CodeExtractor.cpp b/lib/Transforms/Utils/CodeExtractor.cpp index c545cd6..3a21528 100644 --- a/lib/Transforms/Utils/CodeExtractor.cpp +++ b/lib/Transforms/Utils/CodeExtractor.cpp @@ -14,25 +14,25 @@ //===----------------------------------------------------------------------===// #include "llvm/Transforms/Utils/CodeExtractor.h" -#include "llvm/Constants.h" -#include "llvm/DerivedTypes.h" -#include "llvm/Instructions.h" -#include "llvm/Intrinsics.h" -#include "llvm/LLVMContext.h" -#include "llvm/Module.h" -#include "llvm/Pass.h" +#include "llvm/ADT/SetVector.h" +#include "llvm/ADT/StringExtras.h" #include "llvm/Analysis/Dominators.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/RegionInfo.h" #include "llvm/Analysis/RegionIterator.h" #include "llvm/Analysis/Verifier.h" -#include "llvm/Transforms/Utils/BasicBlockUtils.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/Intrinsics.h" +#include "llvm/IR/LLVMContext.h" +#include "llvm/IR/Module.h" +#include "llvm/Pass.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/ADT/SetVector.h" -#include "llvm/ADT/StringExtras.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" #include <algorithm> #include <set> using namespace llvm; @@ -346,7 +346,7 @@ Function *CodeExtractor::constructFunction(const ValueSet &inputs, header->getName(), M); // If the old function is no-throw, so is the new one. if (oldFunction->doesNotThrow()) - newFunction->setDoesNotThrow(true); + newFunction->setDoesNotThrow(); newFunction->getBasicBlockList().push_back(newRootNode); diff --git a/lib/Transforms/Utils/DemoteRegToStack.cpp b/lib/Transforms/Utils/DemoteRegToStack.cpp index 99b5830..d5c41f5 100644 --- a/lib/Transforms/Utils/DemoteRegToStack.cpp +++ b/lib/Transforms/Utils/DemoteRegToStack.cpp @@ -8,10 +8,10 @@ //===----------------------------------------------------------------------===// #include "llvm/Transforms/Utils/Local.h" -#include "llvm/Function.h" -#include "llvm/Instructions.h" -#include "llvm/Type.h" #include "llvm/ADT/DenseMap.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/Type.h" using namespace llvm; /// DemoteRegToStack - This function takes a virtual register computed by an @@ -124,7 +124,12 @@ AllocaInst *llvm::DemotePHIToStack(PHINode *P, Instruction *AllocaPoint) { } // Insert a load in place of the PHI and replace all uses. - Value *V = new LoadInst(Slot, P->getName()+".reload", P); + BasicBlock::iterator InsertPt = P; + + for (; isa<PHINode>(InsertPt) || isa<LandingPadInst>(InsertPt); ++InsertPt) + /* empty */; // Don't insert before PHI nodes or landingpad instrs. + + Value *V = new LoadInst(Slot, P->getName()+".reload", InsertPt); P->replaceAllUsesWith(V); // Delete PHI. diff --git a/lib/Transforms/Utils/InlineFunction.cpp b/lib/Transforms/Utils/InlineFunction.cpp index 89e89e7..0d2598a 100644 --- a/lib/Transforms/Utils/InlineFunction.cpp +++ b/lib/Transforms/Utils/InlineFunction.cpp @@ -13,21 +13,21 @@ //===----------------------------------------------------------------------===// #include "llvm/Transforms/Utils/Cloning.h" -#include "llvm/Attributes.h" -#include "llvm/Constants.h" -#include "llvm/DebugInfo.h" -#include "llvm/DerivedTypes.h" -#include "llvm/IRBuilder.h" -#include "llvm/Instructions.h" -#include "llvm/IntrinsicInst.h" -#include "llvm/Intrinsics.h" -#include "llvm/Module.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringExtras.h" #include "llvm/Analysis/CallGraph.h" #include "llvm/Analysis/InstructionSimplify.h" +#include "llvm/DebugInfo.h" +#include "llvm/IR/Attributes.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/Intrinsics.h" +#include "llvm/IR/Module.h" #include "llvm/Support/CallSite.h" -#include "llvm/Target/TargetData.h" #include "llvm/Transforms/Utils/Local.h" using namespace llvm; @@ -357,7 +357,7 @@ static Value *HandleByValArgument(Value *Arg, Instruction *TheCall, Type *VoidPtrTy = Type::getInt8PtrTy(Context); - // Create the alloca. If we have TargetData, use nice alignment. + // Create the alloca. If we have DataLayout, use nice alignment. unsigned Align = 1; if (IFI.TD) Align = IFI.TD->getPrefTypeAlignment(AggTy); @@ -668,10 +668,29 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI, if (hasLifetimeMarkers(AI)) continue; - builder.CreateLifetimeStart(AI); + // Try to determine the size of the allocation. + ConstantInt *AllocaSize = 0; + if (ConstantInt *AIArraySize = + dyn_cast<ConstantInt>(AI->getArraySize())) { + if (IFI.TD) { + Type *AllocaType = AI->getAllocatedType(); + uint64_t AllocaTypeSize = IFI.TD->getTypeAllocSize(AllocaType); + uint64_t AllocaArraySize = AIArraySize->getLimitedValue(); + assert(AllocaArraySize > 0 && "array size of AllocaInst is zero"); + // Check that array size doesn't saturate uint64_t and doesn't + // overflow when it's multiplied by type size. + if (AllocaArraySize != ~0ULL && + UINT64_MAX / AllocaArraySize >= AllocaTypeSize) { + AllocaSize = ConstantInt::get(Type::getInt64Ty(AI->getContext()), + AllocaArraySize * AllocaTypeSize); + } + } + } + + builder.CreateLifetimeStart(AI, AllocaSize); for (unsigned ri = 0, re = Returns.size(); ri != re; ++ri) { IRBuilder<> builder(Returns[ri]); - builder.CreateLifetimeEnd(AI); + builder.CreateLifetimeEnd(AI, AllocaSize); } } } diff --git a/lib/Transforms/Utils/InstructionNamer.cpp b/lib/Transforms/Utils/InstructionNamer.cpp index 45c15de..a020bc7 100644 --- a/lib/Transforms/Utils/InstructionNamer.cpp +++ b/lib/Transforms/Utils/InstructionNamer.cpp @@ -15,9 +15,9 @@ //===----------------------------------------------------------------------===// #include "llvm/Transforms/Scalar.h" -#include "llvm/Function.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/Type.h" #include "llvm/Pass.h" -#include "llvm/Type.h" using namespace llvm; namespace { diff --git a/lib/Transforms/Utils/IntegerDivision.cpp b/lib/Transforms/Utils/IntegerDivision.cpp new file mode 100644 index 0000000..5187d7c --- /dev/null +++ b/lib/Transforms/Utils/IntegerDivision.cpp @@ -0,0 +1,420 @@ +//===-- IntegerDivision.cpp - Expand integer division ---------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains an implementation of 32bit scalar integer division for +// targets that don't have native support. It's largely derived from +// compiler-rt's implementation of __udivsi3, but hand-tuned to reduce the +// amount of control flow +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "integer-division" +#include "llvm/Transforms/Utils/IntegerDivision.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/Intrinsics.h" + +using namespace llvm; + +/// Generate code to compute the remainder of two signed integers. Returns the +/// remainder, which will have the sign of the dividend. Builder's insert point +/// should be pointing where the caller wants code generated, e.g. at the srem +/// instruction. This will generate a urem in the process, and Builder's insert +/// point will be pointing at the uren (if present, i.e. not folded), ready to +/// be expanded if the user wishes +static Value *generateSignedRemainderCode(Value *Dividend, Value *Divisor, + IRBuilder<> &Builder) { + ConstantInt *ThirtyOne = Builder.getInt32(31); + + // ; %dividend_sgn = ashr i32 %dividend, 31 + // ; %divisor_sgn = ashr i32 %divisor, 31 + // ; %dvd_xor = xor i32 %dividend, %dividend_sgn + // ; %dvs_xor = xor i32 %divisor, %divisor_sgn + // ; %u_dividend = sub i32 %dvd_xor, %dividend_sgn + // ; %u_divisor = sub i32 %dvs_xor, %divisor_sgn + // ; %urem = urem i32 %dividend, %divisor + // ; %xored = xor i32 %urem, %dividend_sgn + // ; %srem = sub i32 %xored, %dividend_sgn + Value *DividendSign = Builder.CreateAShr(Dividend, ThirtyOne); + Value *DivisorSign = Builder.CreateAShr(Divisor, ThirtyOne); + Value *DvdXor = Builder.CreateXor(Dividend, DividendSign); + Value *DvsXor = Builder.CreateXor(Divisor, DivisorSign); + Value *UDividend = Builder.CreateSub(DvdXor, DividendSign); + Value *UDivisor = Builder.CreateSub(DvsXor, DivisorSign); + Value *URem = Builder.CreateURem(UDividend, UDivisor); + Value *Xored = Builder.CreateXor(URem, DividendSign); + Value *SRem = Builder.CreateSub(Xored, DividendSign); + + if (Instruction *URemInst = dyn_cast<Instruction>(URem)) + Builder.SetInsertPoint(URemInst); + + return SRem; +} + + +/// Generate code to compute the remainder of two unsigned integers. Returns the +/// remainder. Builder's insert point should be pointing where the caller wants +/// code generated, e.g. at the urem instruction. This will generate a udiv in +/// the process, and Builder's insert point will be pointing at the udiv (if +/// present, i.e. not folded), ready to be expanded if the user wishes +static Value *generatedUnsignedRemainderCode(Value *Dividend, Value *Divisor, + IRBuilder<> &Builder) { + // Remainder = Dividend - Quotient*Divisor + + // ; %quotient = udiv i32 %dividend, %divisor + // ; %product = mul i32 %divisor, %quotient + // ; %remainder = sub i32 %dividend, %product + Value *Quotient = Builder.CreateUDiv(Dividend, Divisor); + Value *Product = Builder.CreateMul(Divisor, Quotient); + Value *Remainder = Builder.CreateSub(Dividend, Product); + + if (Instruction *UDiv = dyn_cast<Instruction>(Quotient)) + Builder.SetInsertPoint(UDiv); + + return Remainder; +} + +/// Generate code to divide two signed integers. Returns the quotient, rounded +/// towards 0. Builder's insert point should be pointing where the caller wants +/// code generated, e.g. at the sdiv instruction. This will generate a udiv in +/// the process, and Builder's insert point will be pointing at the udiv (if +/// present, i.e. not folded), ready to be expanded if the user wishes. +static Value *generateSignedDivisionCode(Value *Dividend, Value *Divisor, + IRBuilder<> &Builder) { + // Implementation taken from compiler-rt's __divsi3 + + ConstantInt *ThirtyOne = Builder.getInt32(31); + + // ; %tmp = ashr i32 %dividend, 31 + // ; %tmp1 = ashr i32 %divisor, 31 + // ; %tmp2 = xor i32 %tmp, %dividend + // ; %u_dvnd = sub nsw i32 %tmp2, %tmp + // ; %tmp3 = xor i32 %tmp1, %divisor + // ; %u_dvsr = sub nsw i32 %tmp3, %tmp1 + // ; %q_sgn = xor i32 %tmp1, %tmp + // ; %q_mag = udiv i32 %u_dvnd, %u_dvsr + // ; %tmp4 = xor i32 %q_mag, %q_sgn + // ; %q = sub i32 %tmp4, %q_sgn + Value *Tmp = Builder.CreateAShr(Dividend, ThirtyOne); + Value *Tmp1 = Builder.CreateAShr(Divisor, ThirtyOne); + Value *Tmp2 = Builder.CreateXor(Tmp, Dividend); + Value *U_Dvnd = Builder.CreateSub(Tmp2, Tmp); + Value *Tmp3 = Builder.CreateXor(Tmp1, Divisor); + Value *U_Dvsr = Builder.CreateSub(Tmp3, Tmp1); + Value *Q_Sgn = Builder.CreateXor(Tmp1, Tmp); + Value *Q_Mag = Builder.CreateUDiv(U_Dvnd, U_Dvsr); + Value *Tmp4 = Builder.CreateXor(Q_Mag, Q_Sgn); + Value *Q = Builder.CreateSub(Tmp4, Q_Sgn); + + if (Instruction *UDiv = dyn_cast<Instruction>(Q_Mag)) + Builder.SetInsertPoint(UDiv); + + return Q; +} + +/// Generates code to divide two unsigned scalar 32-bit integers. Returns the +/// quotient, rounded towards 0. Builder's insert point should be pointing where +/// the caller wants code generated, e.g. at the udiv instruction. +static Value *generateUnsignedDivisionCode(Value *Dividend, Value *Divisor, + IRBuilder<> &Builder) { + // The basic algorithm can be found in the compiler-rt project's + // implementation of __udivsi3.c. Here, we do a lower-level IR based approach + // that's been hand-tuned to lessen the amount of control flow involved. + + // Some helper values + IntegerType *I32Ty = Builder.getInt32Ty(); + + ConstantInt *Zero = Builder.getInt32(0); + ConstantInt *One = Builder.getInt32(1); + ConstantInt *ThirtyOne = Builder.getInt32(31); + ConstantInt *NegOne = ConstantInt::getSigned(I32Ty, -1); + ConstantInt *True = Builder.getTrue(); + + BasicBlock *IBB = Builder.GetInsertBlock(); + Function *F = IBB->getParent(); + Function *CTLZi32 = Intrinsic::getDeclaration(F->getParent(), Intrinsic::ctlz, + I32Ty); + + // Our CFG is going to look like: + // +---------------------+ + // | special-cases | + // | ... | + // +---------------------+ + // | | + // | +----------+ + // | | bb1 | + // | | ... | + // | +----------+ + // | | | + // | | +------------+ + // | | | preheader | + // | | | ... | + // | | +------------+ + // | | | + // | | | +---+ + // | | | | | + // | | +------------+ | + // | | | do-while | | + // | | | ... | | + // | | +------------+ | + // | | | | | + // | +-----------+ +---+ + // | | loop-exit | + // | | ... | + // | +-----------+ + // | | + // +-------+ + // | ... | + // | end | + // +-------+ + BasicBlock *SpecialCases = Builder.GetInsertBlock(); + SpecialCases->setName(Twine(SpecialCases->getName(), "_udiv-special-cases")); + BasicBlock *End = SpecialCases->splitBasicBlock(Builder.GetInsertPoint(), + "udiv-end"); + BasicBlock *LoopExit = BasicBlock::Create(Builder.getContext(), + "udiv-loop-exit", F, End); + BasicBlock *DoWhile = BasicBlock::Create(Builder.getContext(), + "udiv-do-while", F, End); + BasicBlock *Preheader = BasicBlock::Create(Builder.getContext(), + "udiv-preheader", F, End); + BasicBlock *BB1 = BasicBlock::Create(Builder.getContext(), + "udiv-bb1", F, End); + + // We'll be overwriting the terminator to insert our extra blocks + SpecialCases->getTerminator()->eraseFromParent(); + + // First off, check for special cases: dividend or divisor is zero, divisor + // is greater than dividend, and divisor is 1. + // ; special-cases: + // ; %ret0_1 = icmp eq i32 %divisor, 0 + // ; %ret0_2 = icmp eq i32 %dividend, 0 + // ; %ret0_3 = or i1 %ret0_1, %ret0_2 + // ; %tmp0 = tail call i32 @llvm.ctlz.i32(i32 %divisor, i1 true) + // ; %tmp1 = tail call i32 @llvm.ctlz.i32(i32 %dividend, i1 true) + // ; %sr = sub nsw i32 %tmp0, %tmp1 + // ; %ret0_4 = icmp ugt i32 %sr, 31 + // ; %ret0 = or i1 %ret0_3, %ret0_4 + // ; %retDividend = icmp eq i32 %sr, 31 + // ; %retVal = select i1 %ret0, i32 0, i32 %dividend + // ; %earlyRet = or i1 %ret0, %retDividend + // ; br i1 %earlyRet, label %end, label %bb1 + Builder.SetInsertPoint(SpecialCases); + Value *Ret0_1 = Builder.CreateICmpEQ(Divisor, Zero); + Value *Ret0_2 = Builder.CreateICmpEQ(Dividend, Zero); + Value *Ret0_3 = Builder.CreateOr(Ret0_1, Ret0_2); + Value *Tmp0 = Builder.CreateCall2(CTLZi32, Divisor, True); + Value *Tmp1 = Builder.CreateCall2(CTLZi32, Dividend, True); + Value *SR = Builder.CreateSub(Tmp0, Tmp1); + Value *Ret0_4 = Builder.CreateICmpUGT(SR, ThirtyOne); + Value *Ret0 = Builder.CreateOr(Ret0_3, Ret0_4); + Value *RetDividend = Builder.CreateICmpEQ(SR, ThirtyOne); + Value *RetVal = Builder.CreateSelect(Ret0, Zero, Dividend); + Value *EarlyRet = Builder.CreateOr(Ret0, RetDividend); + Builder.CreateCondBr(EarlyRet, End, BB1); + + // ; bb1: ; preds = %special-cases + // ; %sr_1 = add i32 %sr, 1 + // ; %tmp2 = sub i32 31, %sr + // ; %q = shl i32 %dividend, %tmp2 + // ; %skipLoop = icmp eq i32 %sr_1, 0 + // ; br i1 %skipLoop, label %loop-exit, label %preheader + Builder.SetInsertPoint(BB1); + Value *SR_1 = Builder.CreateAdd(SR, One); + Value *Tmp2 = Builder.CreateSub(ThirtyOne, SR); + Value *Q = Builder.CreateShl(Dividend, Tmp2); + Value *SkipLoop = Builder.CreateICmpEQ(SR_1, Zero); + Builder.CreateCondBr(SkipLoop, LoopExit, Preheader); + + // ; preheader: ; preds = %bb1 + // ; %tmp3 = lshr i32 %dividend, %sr_1 + // ; %tmp4 = add i32 %divisor, -1 + // ; br label %do-while + Builder.SetInsertPoint(Preheader); + Value *Tmp3 = Builder.CreateLShr(Dividend, SR_1); + Value *Tmp4 = Builder.CreateAdd(Divisor, NegOne); + Builder.CreateBr(DoWhile); + + // ; do-while: ; preds = %do-while, %preheader + // ; %carry_1 = phi i32 [ 0, %preheader ], [ %carry, %do-while ] + // ; %sr_3 = phi i32 [ %sr_1, %preheader ], [ %sr_2, %do-while ] + // ; %r_1 = phi i32 [ %tmp3, %preheader ], [ %r, %do-while ] + // ; %q_2 = phi i32 [ %q, %preheader ], [ %q_1, %do-while ] + // ; %tmp5 = shl i32 %r_1, 1 + // ; %tmp6 = lshr i32 %q_2, 31 + // ; %tmp7 = or i32 %tmp5, %tmp6 + // ; %tmp8 = shl i32 %q_2, 1 + // ; %q_1 = or i32 %carry_1, %tmp8 + // ; %tmp9 = sub i32 %tmp4, %tmp7 + // ; %tmp10 = ashr i32 %tmp9, 31 + // ; %carry = and i32 %tmp10, 1 + // ; %tmp11 = and i32 %tmp10, %divisor + // ; %r = sub i32 %tmp7, %tmp11 + // ; %sr_2 = add i32 %sr_3, -1 + // ; %tmp12 = icmp eq i32 %sr_2, 0 + // ; br i1 %tmp12, label %loop-exit, label %do-while + Builder.SetInsertPoint(DoWhile); + PHINode *Carry_1 = Builder.CreatePHI(I32Ty, 2); + PHINode *SR_3 = Builder.CreatePHI(I32Ty, 2); + PHINode *R_1 = Builder.CreatePHI(I32Ty, 2); + PHINode *Q_2 = Builder.CreatePHI(I32Ty, 2); + Value *Tmp5 = Builder.CreateShl(R_1, One); + Value *Tmp6 = Builder.CreateLShr(Q_2, ThirtyOne); + Value *Tmp7 = Builder.CreateOr(Tmp5, Tmp6); + Value *Tmp8 = Builder.CreateShl(Q_2, One); + Value *Q_1 = Builder.CreateOr(Carry_1, Tmp8); + Value *Tmp9 = Builder.CreateSub(Tmp4, Tmp7); + Value *Tmp10 = Builder.CreateAShr(Tmp9, 31); + Value *Carry = Builder.CreateAnd(Tmp10, One); + Value *Tmp11 = Builder.CreateAnd(Tmp10, Divisor); + Value *R = Builder.CreateSub(Tmp7, Tmp11); + Value *SR_2 = Builder.CreateAdd(SR_3, NegOne); + Value *Tmp12 = Builder.CreateICmpEQ(SR_2, Zero); + Builder.CreateCondBr(Tmp12, LoopExit, DoWhile); + + // ; loop-exit: ; preds = %do-while, %bb1 + // ; %carry_2 = phi i32 [ 0, %bb1 ], [ %carry, %do-while ] + // ; %q_3 = phi i32 [ %q, %bb1 ], [ %q_1, %do-while ] + // ; %tmp13 = shl i32 %q_3, 1 + // ; %q_4 = or i32 %carry_2, %tmp13 + // ; br label %end + Builder.SetInsertPoint(LoopExit); + PHINode *Carry_2 = Builder.CreatePHI(I32Ty, 2); + PHINode *Q_3 = Builder.CreatePHI(I32Ty, 2); + Value *Tmp13 = Builder.CreateShl(Q_3, One); + Value *Q_4 = Builder.CreateOr(Carry_2, Tmp13); + Builder.CreateBr(End); + + // ; end: ; preds = %loop-exit, %special-cases + // ; %q_5 = phi i32 [ %q_4, %loop-exit ], [ %retVal, %special-cases ] + // ; ret i32 %q_5 + Builder.SetInsertPoint(End, End->begin()); + PHINode *Q_5 = Builder.CreatePHI(I32Ty, 2); + + // Populate the Phis, since all values have now been created. Our Phis were: + // ; %carry_1 = phi i32 [ 0, %preheader ], [ %carry, %do-while ] + Carry_1->addIncoming(Zero, Preheader); + Carry_1->addIncoming(Carry, DoWhile); + // ; %sr_3 = phi i32 [ %sr_1, %preheader ], [ %sr_2, %do-while ] + SR_3->addIncoming(SR_1, Preheader); + SR_3->addIncoming(SR_2, DoWhile); + // ; %r_1 = phi i32 [ %tmp3, %preheader ], [ %r, %do-while ] + R_1->addIncoming(Tmp3, Preheader); + R_1->addIncoming(R, DoWhile); + // ; %q_2 = phi i32 [ %q, %preheader ], [ %q_1, %do-while ] + Q_2->addIncoming(Q, Preheader); + Q_2->addIncoming(Q_1, DoWhile); + // ; %carry_2 = phi i32 [ 0, %bb1 ], [ %carry, %do-while ] + Carry_2->addIncoming(Zero, BB1); + Carry_2->addIncoming(Carry, DoWhile); + // ; %q_3 = phi i32 [ %q, %bb1 ], [ %q_1, %do-while ] + Q_3->addIncoming(Q, BB1); + Q_3->addIncoming(Q_1, DoWhile); + // ; %q_5 = phi i32 [ %q_4, %loop-exit ], [ %retVal, %special-cases ] + Q_5->addIncoming(Q_4, LoopExit); + Q_5->addIncoming(RetVal, SpecialCases); + + return Q_5; +} + +/// Generate code to calculate the remainder of two integers, replacing Rem with +/// the generated code. This currently generates code using the udiv expansion, +/// but future work includes generating more specialized code, e.g. when more +/// information about the operands are known. Currently only implements 32bit +/// scalar division (due to udiv's limitation), but future work is removing this +/// limitation. +/// +/// @brief Replace Rem with generated code. +bool llvm::expandRemainder(BinaryOperator *Rem) { + assert((Rem->getOpcode() == Instruction::SRem || + Rem->getOpcode() == Instruction::URem) && + "Trying to expand remainder from a non-remainder function"); + + IRBuilder<> Builder(Rem); + + // First prepare the sign if it's a signed remainder + if (Rem->getOpcode() == Instruction::SRem) { + Value *Remainder = generateSignedRemainderCode(Rem->getOperand(0), + Rem->getOperand(1), Builder); + + Rem->replaceAllUsesWith(Remainder); + Rem->dropAllReferences(); + Rem->eraseFromParent(); + + // If we didn't actually generate a udiv instruction, we're done + BinaryOperator *BO = dyn_cast<BinaryOperator>(Builder.GetInsertPoint()); + if (!BO || BO->getOpcode() != Instruction::URem) + return true; + + Rem = BO; + } + + Value *Remainder = generatedUnsignedRemainderCode(Rem->getOperand(0), + Rem->getOperand(1), + Builder); + + Rem->replaceAllUsesWith(Remainder); + Rem->dropAllReferences(); + Rem->eraseFromParent(); + + // Expand the udiv + if (BinaryOperator *UDiv = dyn_cast<BinaryOperator>(Builder.GetInsertPoint())) { + assert(UDiv->getOpcode() == Instruction::UDiv && "Non-udiv in expansion?"); + expandDivision(UDiv); + } + + return true; +} + + +/// Generate code to divide two integers, replacing Div with the generated +/// code. This currently generates code similarly to compiler-rt's +/// implementations, but future work includes generating more specialized code +/// when more information about the operands are known. Currently only +/// implements 32bit scalar division, but future work is removing this +/// limitation. +/// +/// @brief Replace Div with generated code. +bool llvm::expandDivision(BinaryOperator *Div) { + assert((Div->getOpcode() == Instruction::SDiv || + Div->getOpcode() == Instruction::UDiv) && + "Trying to expand division from a non-division function"); + + IRBuilder<> Builder(Div); + + if (Div->getType()->isVectorTy()) + llvm_unreachable("Div over vectors not supported"); + + // First prepare the sign if it's a signed division + if (Div->getOpcode() == Instruction::SDiv) { + // Lower the code to unsigned division, and reset Div to point to the udiv. + Value *Quotient = generateSignedDivisionCode(Div->getOperand(0), + Div->getOperand(1), Builder); + Div->replaceAllUsesWith(Quotient); + Div->dropAllReferences(); + Div->eraseFromParent(); + + // If we didn't actually generate a udiv instruction, we're done + BinaryOperator *BO = dyn_cast<BinaryOperator>(Builder.GetInsertPoint()); + if (!BO || BO->getOpcode() != Instruction::UDiv) + return true; + + Div = BO; + } + + // Insert the unsigned division code + Value *Quotient = generateUnsignedDivisionCode(Div->getOperand(0), + Div->getOperand(1), + Builder); + Div->replaceAllUsesWith(Quotient); + Div->dropAllReferences(); + Div->eraseFromParent(); + + return true; +} diff --git a/lib/Transforms/Utils/LCSSA.cpp b/lib/Transforms/Utils/LCSSA.cpp index b654111..2d1b166 100644 --- a/lib/Transforms/Utils/LCSSA.cpp +++ b/lib/Transforms/Utils/LCSSA.cpp @@ -29,17 +29,17 @@ #define DEBUG_TYPE "lcssa" #include "llvm/Transforms/Scalar.h" -#include "llvm/Constants.h" -#include "llvm/Pass.h" -#include "llvm/Function.h" -#include "llvm/Instructions.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/Statistic.h" #include "llvm/Analysis/Dominators.h" #include "llvm/Analysis/LoopPass.h" #include "llvm/Analysis/ScalarEvolution.h" -#include "llvm/Transforms/Utils/SSAUpdater.h" -#include "llvm/ADT/Statistic.h" -#include "llvm/ADT/STLExtras.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/Instructions.h" +#include "llvm/Pass.h" #include "llvm/Support/PredIteratorCache.h" +#include "llvm/Transforms/Utils/SSAUpdater.h" using namespace llvm; STATISTIC(NumLCSSA, "Number of live out of a loop variables"); @@ -53,6 +53,8 @@ namespace { // Cached analysis information for the current function. DominatorTree *DT; + LoopInfo *LI; + ScalarEvolution *SE; std::vector<BasicBlock*> LoopBlocks; PredIteratorCache PredCache; Loop *L; @@ -117,6 +119,8 @@ bool LCSSA::runOnLoop(Loop *TheLoop, LPPassManager &LPM) { L = TheLoop; DT = &getAnalysis<DominatorTree>(); + LI = &getAnalysis<LoopInfo>(); + SE = getAnalysisIfAvailable<ScalarEvolution>(); // Get the set of exiting blocks. SmallVector<BasicBlock*, 8> ExitBlocks; @@ -156,6 +160,12 @@ bool LCSSA::runOnLoop(Loop *TheLoop, LPPassManager &LPM) { MadeChange |= ProcessInstruction(I, ExitBlocks); } } + + // If we modified the code, remove any caches about the loop from SCEV to + // avoid dangling entries. + // FIXME: This is a big hammer, can we clear the cache more selectively? + if (SE && MadeChange) + SE->forgetLoop(L); assert(L->isLCSSAForm(*DT)); PredCache.clear(); @@ -245,7 +255,7 @@ bool LCSSA::ProcessInstruction(Instruction *Inst, // Remember that this phi makes the value alive in this block. SSAUpdate.AddAvailableValue(ExitBB, PN); } - + // Rewrite all uses outside the loop in terms of the new PHIs we just // inserted. for (unsigned i = 0, e = UsesToRewrite.size(); i != e; ++i) { @@ -260,6 +270,9 @@ bool LCSSA::ProcessInstruction(Instruction *Inst, if (isa<PHINode>(UserBB->begin()) && isExitBlock(UserBB, ExitBlocks)) { + // Tell the VHs that the uses changed. This updates SCEV's caches. + if (UsesToRewrite[i]->get()->hasValueHandle()) + ValueHandleBase::ValueIsRAUWd(*UsesToRewrite[i], UserBB->begin()); UsesToRewrite[i]->set(UserBB->begin()); continue; } diff --git a/lib/Transforms/Utils/Local.cpp b/lib/Transforms/Utils/Local.cpp index 0601433..a54ee08 100644 --- a/lib/Transforms/Utils/Local.cpp +++ b/lib/Transforms/Utils/Local.cpp @@ -13,32 +13,34 @@ //===----------------------------------------------------------------------===// #include "llvm/Transforms/Utils/Local.h" -#include "llvm/Constants.h" -#include "llvm/DIBuilder.h" -#include "llvm/DebugInfo.h" -#include "llvm/DerivedTypes.h" -#include "llvm/GlobalAlias.h" -#include "llvm/GlobalVariable.h" -#include "llvm/IRBuilder.h" -#include "llvm/Instructions.h" -#include "llvm/IntrinsicInst.h" -#include "llvm/Intrinsics.h" -#include "llvm/Metadata.h" -#include "llvm/Operator.h" #include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/Analysis/Dominators.h" #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/MemoryBuiltins.h" #include "llvm/Analysis/ProfileInfo.h" #include "llvm/Analysis/ValueTracking.h" +#include "llvm/DIBuilder.h" +#include "llvm/DebugInfo.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/GlobalAlias.h" +#include "llvm/IR/GlobalVariable.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/Intrinsics.h" +#include "llvm/IR/MDBuilder.h" +#include "llvm/IR/Metadata.h" +#include "llvm/IR/Operator.h" #include "llvm/Support/CFG.h" #include "llvm/Support/Debug.h" #include "llvm/Support/GetElementPtrTypeIterator.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/ValueHandle.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetData.h" using namespace llvm; //===----------------------------------------------------------------------===// @@ -122,6 +124,27 @@ bool llvm::ConstantFoldTerminator(BasicBlock *BB, bool DeleteDeadConditions, // Check to see if this branch is going to the same place as the default // dest. If so, eliminate it as an explicit compare. if (i.getCaseSuccessor() == DefaultDest) { + MDNode* MD = SI->getMetadata(LLVMContext::MD_prof); + // MD should have 2 + NumCases operands. + if (MD && MD->getNumOperands() == 2 + SI->getNumCases()) { + // Collect branch weights into a vector. + SmallVector<uint32_t, 8> Weights; + for (unsigned MD_i = 1, MD_e = MD->getNumOperands(); MD_i < MD_e; + ++MD_i) { + ConstantInt* CI = dyn_cast<ConstantInt>(MD->getOperand(MD_i)); + assert(CI); + Weights.push_back(CI->getValue().getZExtValue()); + } + // Merge weight of this case to the default weight. + unsigned idx = i.getCaseIndex(); + Weights[0] += Weights[idx+1]; + // Remove weight for this case. + std::swap(Weights[idx+1], Weights.back()); + Weights.pop_back(); + SI->setMetadata(LLVMContext::MD_prof, + MDBuilder(BB->getContext()). + createBranchWeights(Weights)); + } // Remove this entry. DefaultDest->removePredecessor(SI->getParent()); SI->removeCase(i); @@ -178,8 +201,20 @@ bool llvm::ConstantFoldTerminator(BasicBlock *BB, bool DeleteDeadConditions, "cond"); // Insert the new branch. - Builder.CreateCondBr(Cond, FirstCase.getCaseSuccessor(), - SI->getDefaultDest()); + BranchInst *NewBr = Builder.CreateCondBr(Cond, + FirstCase.getCaseSuccessor(), + SI->getDefaultDest()); + MDNode* MD = SI->getMetadata(LLVMContext::MD_prof); + if (MD && MD->getNumOperands() == 3) { + ConstantInt *SICase = dyn_cast<ConstantInt>(MD->getOperand(2)); + ConstantInt *SIDef = dyn_cast<ConstantInt>(MD->getOperand(1)); + assert(SICase && SIDef); + // The TrueWeight should be the weight for the single case of SI. + NewBr->setMetadata(LLVMContext::MD_prof, + MDBuilder(BB->getContext()). + createBranchWeights(SICase->getValue().getZExtValue(), + SIDef->getValue().getZExtValue())); + } // Delete the old switch. SI->eraseFromParent(); @@ -363,7 +398,7 @@ bool llvm::RecursivelyDeleteDeadPHINode(PHINode *PN, /// /// This returns true if it changed the code, note that it can delete /// instructions in other blocks as well in this block. -bool llvm::SimplifyInstructionsInBlock(BasicBlock *BB, const TargetData *TD, +bool llvm::SimplifyInstructionsInBlock(BasicBlock *BB, const DataLayout *TD, const TargetLibraryInfo *TLI) { bool MadeChange = false; @@ -411,7 +446,7 @@ bool llvm::SimplifyInstructionsInBlock(BasicBlock *BB, const TargetData *TD, /// .. and delete the predecessor corresponding to the '1', this will attempt to /// recursively fold the and to 0. void llvm::RemovePredecessorAndSimplify(BasicBlock *BB, BasicBlock *Pred, - TargetData *TD) { + DataLayout *TD) { // This only adjusts blocks with PHI nodes. if (!isa<PHINode>(BB->begin())) return; @@ -570,7 +605,7 @@ bool llvm::TryToSimplifyUncondBranchFromEmptyBlock(BasicBlock *BB) { // possible to handle such cases, but difficult: it requires checking whether // BB dominates Succ, which is non-trivial to calculate in the case where // Succ has multiple predecessors. Also, it requires checking whether - // constructing the necessary self-referential PHI node doesn't intoduce any + // constructing the necessary self-referential PHI node doesn't introduce any // conflicts; this isn't too difficult, but the previous code for doing this // was incorrect. // @@ -726,7 +761,7 @@ bool llvm::EliminateDuplicatePHINodes(BasicBlock *BB) { /// their preferred alignment from the beginning. /// static unsigned enforceKnownAlignment(Value *V, unsigned Align, - unsigned PrefAlign, const TargetData *TD) { + unsigned PrefAlign, const DataLayout *TD) { V = V->stripPointerCasts(); if (AllocaInst *AI = dyn_cast<AllocaInst>(V)) { @@ -769,7 +804,7 @@ static unsigned enforceKnownAlignment(Value *V, unsigned Align, /// and it is more than the alignment of the ultimate object, see if we can /// increase the alignment of the ultimate object, making this check succeed. unsigned llvm::getOrEnforceKnownAlignment(Value *V, unsigned PrefAlign, - const TargetData *TD) { + const DataLayout *TD) { assert(V->getType()->isPointerTy() && "getOrEnforceKnownAlignment expects a pointer!"); unsigned BitWidth = TD ? TD->getPointerSizeInBits() : 64; @@ -894,3 +929,78 @@ DbgDeclareInst *llvm::FindAllocaDbgDeclare(Value *V) { return 0; } + +bool llvm::replaceDbgDeclareForAlloca(AllocaInst *AI, Value *NewAllocaAddress, + DIBuilder &Builder) { + DbgDeclareInst *DDI = FindAllocaDbgDeclare(AI); + if (!DDI) + return false; + DIVariable DIVar(DDI->getVariable()); + if (!DIVar.Verify()) + return false; + + // Create a copy of the original DIDescriptor for user variable, appending + // "deref" operation to a list of address elements, as new llvm.dbg.declare + // will take a value storing address of the memory for variable, not + // alloca itself. + Type *Int64Ty = Type::getInt64Ty(AI->getContext()); + SmallVector<Value*, 4> NewDIVarAddress; + if (DIVar.hasComplexAddress()) { + for (unsigned i = 0, n = DIVar.getNumAddrElements(); i < n; ++i) { + NewDIVarAddress.push_back( + ConstantInt::get(Int64Ty, DIVar.getAddrElement(i))); + } + } + NewDIVarAddress.push_back(ConstantInt::get(Int64Ty, DIBuilder::OpDeref)); + DIVariable NewDIVar = Builder.createComplexVariable( + DIVar.getTag(), DIVar.getContext(), DIVar.getName(), + DIVar.getFile(), DIVar.getLineNumber(), DIVar.getType(), + NewDIVarAddress, DIVar.getArgNumber()); + + // Insert llvm.dbg.declare in the same basic block as the original alloca, + // and remove old llvm.dbg.declare. + BasicBlock *BB = AI->getParent(); + Builder.insertDeclare(NewAllocaAddress, NewDIVar, BB); + DDI->eraseFromParent(); + return true; +} + +bool llvm::removeUnreachableBlocks(Function &F) { + SmallPtrSet<BasicBlock*, 16> Reachable; + SmallVector<BasicBlock*, 128> Worklist; + Worklist.push_back(&F.getEntryBlock()); + Reachable.insert(&F.getEntryBlock()); + do { + BasicBlock *BB = Worklist.pop_back_val(); + for (succ_iterator SI = succ_begin(BB), SE = succ_end(BB); SI != SE; ++SI) + if (Reachable.insert(*SI)) + Worklist.push_back(*SI); + } while (!Worklist.empty()); + + if (Reachable.size() == F.size()) + return false; + + assert(Reachable.size() < F.size()); + for (Function::iterator I = llvm::next(F.begin()), E = F.end(); I != E; ++I) { + if (Reachable.count(I)) + continue; + + // Remove the block as predecessor of all its reachable successors. + // Unreachable successors don't matter as they'll soon be removed, too. + for (succ_iterator SI = succ_begin(I), SE = succ_end(I); SI != SE; ++SI) + if (Reachable.count(*SI)) + (*SI)->removePredecessor(I); + + // Zap all instructions in this basic block. + while (!I->empty()) { + Instruction &Inst = I->back(); + if (!Inst.use_empty()) + Inst.replaceAllUsesWith(UndefValue::get(Inst.getType())); + I->getInstList().pop_back(); + } + + --I; + llvm::next(I)->eraseFromParent(); + } + return true; +} diff --git a/lib/Transforms/Utils/LoopSimplify.cpp b/lib/Transforms/Utils/LoopSimplify.cpp index 0bc185d..37819cc 100644 --- a/lib/Transforms/Utils/LoopSimplify.cpp +++ b/lib/Transforms/Utils/LoopSimplify.cpp @@ -39,25 +39,26 @@ #define DEBUG_TYPE "loop-simplify" #include "llvm/Transforms/Scalar.h" -#include "llvm/Constants.h" -#include "llvm/Instructions.h" -#include "llvm/IntrinsicInst.h" -#include "llvm/Function.h" -#include "llvm/LLVMContext.h" -#include "llvm/Type.h" +#include "llvm/ADT/DepthFirstIterator.h" +#include "llvm/ADT/SetOperations.h" +#include "llvm/ADT/SetVector.h" +#include "llvm/ADT/Statistic.h" #include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Analysis/DependenceAnalysis.h" #include "llvm/Analysis/Dominators.h" #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/LoopPass.h" #include "llvm/Analysis/ScalarEvolution.h" -#include "llvm/Transforms/Utils/BasicBlockUtils.h" -#include "llvm/Transforms/Utils/Local.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/LLVMContext.h" +#include "llvm/IR/Type.h" #include "llvm/Support/CFG.h" #include "llvm/Support/Debug.h" -#include "llvm/ADT/SetOperations.h" -#include "llvm/ADT/SetVector.h" -#include "llvm/ADT/Statistic.h" -#include "llvm/ADT/DepthFirstIterator.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" +#include "llvm/Transforms/Utils/Local.h" using namespace llvm; STATISTIC(NumInserted, "Number of pre-header or exit blocks inserted"); @@ -89,6 +90,7 @@ namespace { AU.addPreserved<AliasAnalysis>(); AU.addPreserved<ScalarEvolution>(); + AU.addPreserved<DependenceAnalysis>(); AU.addPreservedID(BreakCriticalEdgesID); // No critical edges added. } @@ -194,6 +196,11 @@ ReprocessLoop: BI->setCondition(ConstantInt::get(Cond->getType(), !L->contains(BI->getSuccessor(0)))); + + // This may make the loop analyzable, force SCEV recomputation. + if (SE) + SE->forgetLoop(L); + Changed = true; } } diff --git a/lib/Transforms/Utils/LoopUnroll.cpp b/lib/Transforms/Utils/LoopUnroll.cpp index 2023750..cb581b3 100644 --- a/lib/Transforms/Utils/LoopUnroll.cpp +++ b/lib/Transforms/Utils/LoopUnroll.cpp @@ -18,12 +18,12 @@ #define DEBUG_TYPE "loop-unroll" #include "llvm/Transforms/Utils/UnrollLoop.h" -#include "llvm/BasicBlock.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/LoopIterator.h" #include "llvm/Analysis/LoopPass.h" #include "llvm/Analysis/ScalarEvolution.h" +#include "llvm/IR/BasicBlock.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" diff --git a/lib/Transforms/Utils/LoopUnrollRuntime.cpp b/lib/Transforms/Utils/LoopUnrollRuntime.cpp index 67e17f4..d801d5f 100644 --- a/lib/Transforms/Utils/LoopUnrollRuntime.cpp +++ b/lib/Transforms/Utils/LoopUnrollRuntime.cpp @@ -23,12 +23,12 @@ #define DEBUG_TYPE "loop-unroll" #include "llvm/Transforms/Utils/UnrollLoop.h" -#include "llvm/BasicBlock.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/LoopIterator.h" #include "llvm/Analysis/LoopPass.h" #include "llvm/Analysis/ScalarEvolution.h" #include "llvm/Analysis/ScalarEvolutionExpander.h" +#include "llvm/IR/BasicBlock.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" diff --git a/lib/Transforms/Utils/LowerExpectIntrinsic.cpp b/lib/Transforms/Utils/LowerExpectIntrinsic.cpp index 02bdcda..4aee8ff 100644 --- a/lib/Transforms/Utils/LowerExpectIntrinsic.cpp +++ b/lib/Transforms/Utils/LowerExpectIntrinsic.cpp @@ -12,17 +12,17 @@ //===----------------------------------------------------------------------===// #define DEBUG_TYPE "lower-expect-intrinsic" -#include "llvm/BasicBlock.h" -#include "llvm/Constants.h" -#include "llvm/Function.h" -#include "llvm/Instructions.h" -#include "llvm/Intrinsics.h" -#include "llvm/LLVMContext.h" -#include "llvm/MDBuilder.h" -#include "llvm/Metadata.h" -#include "llvm/Pass.h" -#include "llvm/ADT/Statistic.h" #include "llvm/Transforms/Scalar.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/IR/BasicBlock.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/Intrinsics.h" +#include "llvm/IR/LLVMContext.h" +#include "llvm/IR/MDBuilder.h" +#include "llvm/IR/Metadata.h" +#include "llvm/Pass.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include <vector> diff --git a/lib/Transforms/Utils/LowerInvoke.cpp b/lib/Transforms/Utils/LowerInvoke.cpp index 9305554..9ec84d7 100644 --- a/lib/Transforms/Utils/LowerInvoke.cpp +++ b/lib/Transforms/Utils/LowerInvoke.cpp @@ -36,19 +36,19 @@ #define DEBUG_TYPE "lowerinvoke" #include "llvm/Transforms/Scalar.h" -#include "llvm/Constants.h" -#include "llvm/DerivedTypes.h" -#include "llvm/Instructions.h" -#include "llvm/Intrinsics.h" -#include "llvm/LLVMContext.h" -#include "llvm/Module.h" -#include "llvm/Pass.h" -#include "llvm/Transforms/Utils/BasicBlockUtils.h" -#include "llvm/Transforms/Utils/Local.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/Intrinsics.h" +#include "llvm/IR/LLVMContext.h" +#include "llvm/IR/Module.h" +#include "llvm/Pass.h" #include "llvm/Support/CommandLine.h" #include "llvm/Target/TargetLowering.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" +#include "llvm/Transforms/Utils/Local.h" #include <csetjmp> #include <set> using namespace llvm; diff --git a/lib/Transforms/Utils/LowerSwitch.cpp b/lib/Transforms/Utils/LowerSwitch.cpp index 1547439..955b853 100644 --- a/lib/Transforms/Utils/LowerSwitch.cpp +++ b/lib/Transforms/Utils/LowerSwitch.cpp @@ -14,16 +14,16 @@ //===----------------------------------------------------------------------===// #include "llvm/Transforms/Scalar.h" -#include "llvm/Transforms/Utils/UnifyFunctionExitNodes.h" -#include "llvm/Constants.h" -#include "llvm/Function.h" -#include "llvm/Instructions.h" -#include "llvm/LLVMContext.h" -#include "llvm/Pass.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/LLVMContext.h" +#include "llvm/Pass.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/Transforms/Utils/UnifyFunctionExitNodes.h" #include <algorithm> using namespace llvm; diff --git a/lib/Transforms/Utils/Mem2Reg.cpp b/lib/Transforms/Utils/Mem2Reg.cpp index f4ca81a..61b3965 100644 --- a/lib/Transforms/Utils/Mem2Reg.cpp +++ b/lib/Transforms/Utils/Mem2Reg.cpp @@ -14,12 +14,12 @@ #define DEBUG_TYPE "mem2reg" #include "llvm/Transforms/Scalar.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/Dominators.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/Instructions.h" #include "llvm/Transforms/Utils/PromoteMemToReg.h" #include "llvm/Transforms/Utils/UnifyFunctionExitNodes.h" -#include "llvm/Analysis/Dominators.h" -#include "llvm/Instructions.h" -#include "llvm/Function.h" -#include "llvm/ADT/Statistic.h" using namespace llvm; STATISTIC(NumPromoted, "Number of alloca's promoted"); diff --git a/lib/Transforms/Utils/MetaRenamer.cpp b/lib/Transforms/Utils/MetaRenamer.cpp new file mode 100644 index 0000000..d519fb7 --- /dev/null +++ b/lib/Transforms/Utils/MetaRenamer.cpp @@ -0,0 +1,131 @@ +//===- MetaRenamer.cpp - Rename everything with metasyntatic names --------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This pass renames everything with metasyntatic names. The intent is to use +// this pass after bugpoint reduction to conceal the nature of the original +// program. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/IPO.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/Module.h" +#include "llvm/IR/Type.h" +#include "llvm/IR/TypeFinder.h" +#include "llvm/Pass.h" +using namespace llvm; + +namespace { + + // This PRNG is from the ISO C spec. It is intentionally simple and + // unsuitable for cryptographic use. We're just looking for enough + // variety to surprise and delight users. + struct PRNG { + unsigned long next; + + void srand(unsigned int seed) { + next = seed; + } + + int rand() { + next = next * 1103515245 + 12345; + return (unsigned int)(next / 65536) % 32768; + } + }; + + struct MetaRenamer : public ModulePass { + static char ID; // Pass identification, replacement for typeid + MetaRenamer() : ModulePass(ID) { + initializeMetaRenamerPass(*PassRegistry::getPassRegistry()); + } + + void getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesAll(); + } + + bool runOnModule(Module &M) { + static const char *metaNames[] = { + // See http://en.wikipedia.org/wiki/Metasyntactic_variable + "foo", "bar", "baz", "quux", "barney", "snork", "zot", "blam", "hoge", + "wibble", "wobble", "widget", "wombat", "ham", "eggs", "pluto", "spam" + }; + + // Seed our PRNG with simple additive sum of ModuleID. We're looking to + // simply avoid always having the same function names, and we need to + // remain deterministic. + unsigned int randSeed = 0; + for (std::string::const_iterator I = M.getModuleIdentifier().begin(), + E = M.getModuleIdentifier().end(); I != E; ++I) + randSeed += *I; + + PRNG prng; + prng.srand(randSeed); + + // Rename all aliases + for (Module::alias_iterator AI = M.alias_begin(), AE = M.alias_end(); + AI != AE; ++AI) + AI->setName("alias"); + + // Rename all global variables + for (Module::global_iterator GI = M.global_begin(), GE = M.global_end(); + GI != GE; ++GI) + GI->setName("global"); + + // Rename all struct types + TypeFinder StructTypes; + StructTypes.run(M, true); + for (unsigned i = 0, e = StructTypes.size(); i != e; ++i) { + StructType *STy = StructTypes[i]; + if (STy->isLiteral() || STy->getName().empty()) continue; + + SmallString<128> NameStorage; + STy->setName((Twine("struct.") + metaNames[prng.rand() % + array_lengthof(metaNames)]).toStringRef(NameStorage)); + } + + // Rename all functions + for (Module::iterator FI = M.begin(), FE = M.end(); + FI != FE; ++FI) { + FI->setName(metaNames[prng.rand() % array_lengthof(metaNames)]); + runOnFunction(*FI); + } + return true; + } + + bool runOnFunction(Function &F) { + for (Function::arg_iterator AI = F.arg_begin(), AE = F.arg_end(); + AI != AE; ++AI) + if (!AI->getType()->isVoidTy()) + AI->setName("arg"); + + for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) { + BB->setName("bb"); + + for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) + if (!I->getType()->isVoidTy()) + I->setName("tmp"); + } + return true; + } + }; +} + +char MetaRenamer::ID = 0; +INITIALIZE_PASS(MetaRenamer, "metarenamer", + "Assign new names to everything", false, false) +//===----------------------------------------------------------------------===// +// +// MetaRenamer - Rename everything with metasyntactic names. +// +ModulePass *llvm::createMetaRenamerPass() { + return new MetaRenamer(); +} diff --git a/lib/Transforms/Utils/ModuleUtils.cpp b/lib/Transforms/Utils/ModuleUtils.cpp index dbcf3b2..d090b48 100644 --- a/lib/Transforms/Utils/ModuleUtils.cpp +++ b/lib/Transforms/Utils/ModuleUtils.cpp @@ -12,10 +12,10 @@ //===----------------------------------------------------------------------===// #include "llvm/Transforms/Utils/ModuleUtils.h" -#include "llvm/DerivedTypes.h" -#include "llvm/Function.h" -#include "llvm/IRBuilder.h" -#include "llvm/Module.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/Module.h" using namespace llvm; diff --git a/lib/Transforms/Utils/PromoteMemoryToRegister.cpp b/lib/Transforms/Utils/PromoteMemoryToRegister.cpp index dd5e20e..de335ec 100644 --- a/lib/Transforms/Utils/PromoteMemoryToRegister.cpp +++ b/lib/Transforms/Utils/PromoteMemoryToRegister.cpp @@ -27,26 +27,26 @@ #define DEBUG_TYPE "mem2reg" #include "llvm/Transforms/Utils/PromoteMemToReg.h" -#include "llvm/Constants.h" -#include "llvm/DebugInfo.h" -#include "llvm/DerivedTypes.h" -#include "llvm/DIBuilder.h" -#include "llvm/Function.h" -#include "llvm/Instructions.h" -#include "llvm/IntrinsicInst.h" -#include "llvm/Metadata.h" -#include "llvm/Analysis/AliasSetTracker.h" -#include "llvm/Analysis/Dominators.h" -#include "llvm/Analysis/InstructionSimplify.h" -#include "llvm/Analysis/ValueTracking.h" -#include "llvm/Transforms/Utils/Local.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/Hashing.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" -#include "llvm/ADT/STLExtras.h" +#include "llvm/Analysis/AliasSetTracker.h" +#include "llvm/Analysis/Dominators.h" +#include "llvm/Analysis/InstructionSimplify.h" +#include "llvm/Analysis/ValueTracking.h" +#include "llvm/DIBuilder.h" +#include "llvm/DebugInfo.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/Metadata.h" #include "llvm/Support/CFG.h" +#include "llvm/Transforms/Utils/Local.h" #include <algorithm> #include <queue> using namespace llvm; @@ -212,9 +212,13 @@ namespace { /// DenseMap<AllocaInst*, unsigned> AllocaLookup; - /// NewPhiNodes - The PhiNodes we're adding. + /// NewPhiNodes - The PhiNodes we're adding. That map is used to simplify + /// some Phi nodes as we iterate over it, so it should have deterministic + /// iterators. We could use a MapVector, but since we already maintain a + /// map from BasicBlock* to a stable numbering (BBNumbers), the DenseMap is + /// more efficient (also supports removal). /// - DenseMap<std::pair<BasicBlock*, unsigned>, PHINode*> NewPhiNodes; + DenseMap<std::pair<unsigned, unsigned>, PHINode*> NewPhiNodes; /// PhiToAllocaMap - For each PHI node, keep track of which entry in Allocas /// it corresponds to. @@ -588,7 +592,11 @@ void PromoteMem2Reg::run() { while (EliminatedAPHI) { EliminatedAPHI = false; - for (DenseMap<std::pair<BasicBlock*, unsigned>, PHINode*>::iterator I = + // Iterating over NewPhiNodes is deterministic, so it is safe to try to + // simplify and RAUW them as we go. If it was not, we could add uses to + // the values we replace with in a non deterministic order, thus creating + // non deterministic def->use chains. + for (DenseMap<std::pair<unsigned, unsigned>, PHINode*>::iterator I = NewPhiNodes.begin(), E = NewPhiNodes.end(); I != E;) { PHINode *PN = I->second; @@ -612,7 +620,7 @@ void PromoteMem2Reg::run() { // have incoming values for all predecessors. Loop over all PHI nodes we have // created, inserting undef values if they are missing any incoming values. // - for (DenseMap<std::pair<BasicBlock*, unsigned>, PHINode*>::iterator I = + for (DenseMap<std::pair<unsigned, unsigned>, PHINode*>::iterator I = NewPhiNodes.begin(), E = NewPhiNodes.end(); I != E; ++I) { // We want to do this once per basic block. As such, only process a block // when we find the PHI that is the first entry in the block. @@ -992,7 +1000,7 @@ void PromoteMem2Reg::PromoteSingleBlockAlloca(AllocaInst *AI, AllocaInfo &Info, bool PromoteMem2Reg::QueuePhiNode(BasicBlock *BB, unsigned AllocaNo, unsigned &Version) { // Look up the basic-block in question. - PHINode *&PN = NewPhiNodes[std::make_pair(BB, AllocaNo)]; + PHINode *&PN = NewPhiNodes[std::make_pair(BBNumbers[BB], AllocaNo)]; // If the BB already has a phi node added for the i'th alloca then we're done! if (PN) return false; diff --git a/lib/Transforms/Utils/SSAUpdater.cpp b/lib/Transforms/Utils/SSAUpdater.cpp index 72d4199..9d90fbe 100644 --- a/lib/Transforms/Utils/SSAUpdater.cpp +++ b/lib/Transforms/Utils/SSAUpdater.cpp @@ -12,12 +12,13 @@ //===----------------------------------------------------------------------===// #define DEBUG_TYPE "ssaupdater" -#include "llvm/Constants.h" -#include "llvm/Instructions.h" -#include "llvm/IntrinsicInst.h" +#include "llvm/Transforms/Utils/SSAUpdater.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/TinyPtrVector.h" #include "llvm/Analysis/InstructionSimplify.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/IntrinsicInst.h" #include "llvm/Support/AlignOf.h" #include "llvm/Support/Allocator.h" #include "llvm/Support/CFG.h" @@ -25,7 +26,6 @@ #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/Local.h" -#include "llvm/Transforms/Utils/SSAUpdater.h" #include "llvm/Transforms/Utils/SSAUpdaterImpl.h" using namespace llvm; diff --git a/lib/Transforms/Utils/SimplifyCFG.cpp b/lib/Transforms/Utils/SimplifyCFG.cpp index 32d7fa1..f10c35f 100644 --- a/lib/Transforms/Utils/SimplifyCFG.cpp +++ b/lib/Transforms/Utils/SimplifyCFG.cpp @@ -13,18 +13,6 @@ #define DEBUG_TYPE "simplifycfg" #include "llvm/Transforms/Utils/Local.h" -#include "llvm/Constants.h" -#include "llvm/DerivedTypes.h" -#include "llvm/GlobalVariable.h" -#include "llvm/IRBuilder.h" -#include "llvm/Instructions.h" -#include "llvm/IntrinsicInst.h" -#include "llvm/LLVMContext.h" -#include "llvm/MDBuilder.h" -#include "llvm/Metadata.h" -#include "llvm/Module.h" -#include "llvm/Operator.h" -#include "llvm/Type.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SetVector.h" @@ -32,18 +20,31 @@ #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/InstructionSimplify.h" +#include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/Analysis/ValueTracking.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/GlobalVariable.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/LLVMContext.h" +#include "llvm/IR/MDBuilder.h" +#include "llvm/IR/Metadata.h" +#include "llvm/IR/Module.h" +#include "llvm/IR/Operator.h" +#include "llvm/IR/Type.h" #include "llvm/Support/CFG.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/ConstantRange.h" #include "llvm/Support/Debug.h" #include "llvm/Support/NoFolder.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetData.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include <algorithm> -#include <set> #include <map> +#include <set> using namespace llvm; static cl::opt<unsigned> @@ -54,8 +55,14 @@ static cl::opt<bool> DupRet("simplifycfg-dup-ret", cl::Hidden, cl::init(false), cl::desc("Duplicate return instructions into unconditional branches")); -STATISTIC(NumSpeculations, "Number of speculative executed instructions"); +static cl::opt<bool> +SinkCommon("simplifycfg-sink-common", cl::Hidden, cl::init(true), + cl::desc("Sink common instructions down to the end block")); + +STATISTIC(NumBitMaps, "Number of switch instructions turned into bitmaps"); STATISTIC(NumLookupTables, "Number of switch instructions turned into lookup tables"); +STATISTIC(NumSinkCommons, "Number of common instructions sunk down to the end block"); +STATISTIC(NumSpeculations, "Number of speculative executed instructions"); namespace { /// ValueEqualityComparisonCase - Represents a case of a switch. @@ -70,10 +77,13 @@ namespace { // Comparing pointers is ok as we only rely on the order for uniquing. return Value < RHS.Value; } + + bool operator==(BasicBlock *RHSDest) const { return Dest == RHSDest; } }; class SimplifyCFGOpt { - const TargetData *const TD; + const TargetTransformInfo &TTI; + const DataLayout *const TD; Value *isValueEqualityComparison(TerminatorInst *TI); BasicBlock *GetValueEqualityComparisonCases(TerminatorInst *TI, @@ -93,7 +103,8 @@ class SimplifyCFGOpt { bool SimplifyCondBranch(BranchInst *BI, IRBuilder <>&Builder); public: - explicit SimplifyCFGOpt(const TargetData *td) : TD(td) {} + SimplifyCFGOpt(const TargetTransformInfo &TTI, const DataLayout *TD) + : TTI(TTI), TD(TD) {} bool run(BasicBlock *BB); }; } @@ -376,7 +387,7 @@ static bool DominatesMergePoint(Value *V, BasicBlock *BB, /// GetConstantInt - Extract ConstantInt from value, looking through IntToPtr /// and PointerNullValue. Return NULL if value is not a constant int. -static ConstantInt *GetConstantInt(Value *V, const TargetData *TD) { +static ConstantInt *GetConstantInt(Value *V, const DataLayout *TD) { // Normal constant int. ConstantInt *CI = dyn_cast<ConstantInt>(V); if (CI || !TD || !isa<Constant>(V) || !V->getType()->isPointerTy()) @@ -384,7 +395,7 @@ static ConstantInt *GetConstantInt(Value *V, const TargetData *TD) { // This is some kind of pointer constant. Turn it into a pointer-sized // ConstantInt if possible. - IntegerType *PtrTy = TD->getIntPtrType(V->getContext()); + IntegerType *PtrTy = cast<IntegerType>(TD->getIntPtrType(V->getType())); // Null pointer means 0, see SelectionDAGBuilder::getValue(const Value*). if (isa<ConstantPointerNull>(V)) @@ -410,7 +421,7 @@ static ConstantInt *GetConstantInt(Value *V, const TargetData *TD) { /// Values vector. static Value * GatherConstantCompares(Value *V, std::vector<ConstantInt*> &Vals, Value *&Extra, - const TargetData *TD, bool isEQ, unsigned &UsedICmps) { + const DataLayout *TD, bool isEQ, unsigned &UsedICmps) { Instruction *I = dyn_cast<Instruction>(V); if (I == 0) return 0; @@ -558,11 +569,7 @@ GetValueEqualityComparisonCases(TerminatorInst *TI, /// in the list that match the specified block. static void EliminateBlockCases(BasicBlock *BB, std::vector<ValueEqualityComparisonCase> &Cases) { - for (unsigned i = 0, e = Cases.size(); i != e; ++i) - if (Cases[i].Dest == BB) { - Cases.erase(Cases.begin()+i); - --i; --e; - } + Cases.erase(std::remove(Cases.begin(), Cases.end(), BB), Cases.end()); } /// ValuesOverlap - Return true if there are any keys in C1 that exist in C2 as @@ -667,13 +674,32 @@ SimplifyEqualityComparisonWithOnlyPredecessor(TerminatorInst *TI, DEBUG(dbgs() << "Threading pred instr: " << *Pred->getTerminator() << "Through successor TI: " << *TI); + // Collect branch weights into a vector. + SmallVector<uint32_t, 8> Weights; + MDNode* MD = SI->getMetadata(LLVMContext::MD_prof); + bool HasWeight = MD && (MD->getNumOperands() == 2 + SI->getNumCases()); + if (HasWeight) + for (unsigned MD_i = 1, MD_e = MD->getNumOperands(); MD_i < MD_e; + ++MD_i) { + ConstantInt* CI = dyn_cast<ConstantInt>(MD->getOperand(MD_i)); + assert(CI); + Weights.push_back(CI->getValue().getZExtValue()); + } for (SwitchInst::CaseIt i = SI->case_end(), e = SI->case_begin(); i != e;) { --i; if (DeadCases.count(i.getCaseValue())) { + if (HasWeight) { + std::swap(Weights[i.getCaseIndex()+1], Weights.back()); + Weights.pop_back(); + } i.getCaseSuccessor()->removePredecessor(TI->getParent()); SI->removeCase(i); } } + if (HasWeight && Weights.size() >= 2) + SI->setMetadata(LLVMContext::MD_prof, + MDBuilder(SI->getParent()->getContext()). + createBranchWeights(Weights)); DEBUG(dbgs() << "Leaving: " << *TI << "\n"); return true; @@ -752,38 +778,27 @@ static inline bool HasBranchWeights(const Instruction* I) { return false; } -/// Tries to get a branch weight for the given instruction, returns NULL if it -/// can't. Pos starts at 0. -static ConstantInt* GetWeight(Instruction* I, int Pos) { - MDNode* ProfMD = I->getMetadata(LLVMContext::MD_prof); - if (ProfMD && ProfMD->getOperand(0)) { - if (MDString* MDS = dyn_cast<MDString>(ProfMD->getOperand(0))) { - if (MDS->getString().equals("branch_weights")) { - assert(ProfMD->getNumOperands() >= 3); - return dyn_cast<ConstantInt>(ProfMD->getOperand(1 + Pos)); - } - } - } - - return 0; -} - -/// Scale the given weights based on the successor TI's metadata. Scaling is -/// done by multiplying every weight by the sum of the successor's weights. -static void ScaleWeights(Instruction* STI, MutableArrayRef<uint64_t> Weights) { - // Sum the successor's weights - assert(HasBranchWeights(STI)); - unsigned Scale = 0; - MDNode* ProfMD = STI->getMetadata(LLVMContext::MD_prof); - for (unsigned i = 1; i < ProfMD->getNumOperands(); ++i) { - ConstantInt* CI = dyn_cast<ConstantInt>(ProfMD->getOperand(i)); +/// Get Weights of a given TerminatorInst, the default weight is at the front +/// of the vector. If TI is a conditional eq, we need to swap the branch-weight +/// metadata. +static void GetBranchWeights(TerminatorInst *TI, + SmallVectorImpl<uint64_t> &Weights) { + MDNode* MD = TI->getMetadata(LLVMContext::MD_prof); + assert(MD); + for (unsigned i = 1, e = MD->getNumOperands(); i < e; ++i) { + ConstantInt* CI = dyn_cast<ConstantInt>(MD->getOperand(i)); assert(CI); - Scale += CI->getValue().getZExtValue(); + Weights.push_back(CI->getValue().getZExtValue()); } - // Skip default, as it's replaced during the folding - for (unsigned i = 1; i < Weights.size(); ++i) { - Weights[i] *= Scale; + // If TI is a conditional eq, the default case is the false case, + // and the corresponding branch-weight data is at index 2. We swap the + // default weight to be the first entry. + if (BranchInst* BI = dyn_cast<BranchInst>(TI)) { + assert(Weights.size() == 2); + ICmpInst *ICI = cast<ICmpInst>(BI->getCondition()); + if (ICI->getPredicate() == ICmpInst::ICMP_EQ) + std::swap(Weights.front(), Weights.back()); } } @@ -838,52 +853,28 @@ bool SimplifyCFGOpt::FoldValueComparisonIntoPredecessors(TerminatorInst *TI, // Update the branch weight metadata along the way SmallVector<uint64_t, 8> Weights; - uint64_t PredDefaultWeight = 0; bool PredHasWeights = HasBranchWeights(PTI); bool SuccHasWeights = HasBranchWeights(TI); if (PredHasWeights) { - MDNode* MD = PTI->getMetadata(LLVMContext::MD_prof); - assert(MD); - for (unsigned i = 1, e = MD->getNumOperands(); i < e; ++i) { - ConstantInt* CI = dyn_cast<ConstantInt>(MD->getOperand(i)); - assert(CI); - Weights.push_back(CI->getValue().getZExtValue()); - } - - // If the predecessor is a conditional eq, then swap the default weight - // to be the first entry. - if (BranchInst* BI = dyn_cast<BranchInst>(PTI)) { - assert(Weights.size() == 2); - ICmpInst *ICI = cast<ICmpInst>(BI->getCondition()); - - if (ICI->getPredicate() == ICmpInst::ICMP_EQ) { - std::swap(Weights.front(), Weights.back()); - } - } - - PredDefaultWeight = Weights.front(); - } else if (SuccHasWeights) { + GetBranchWeights(PTI, Weights); + // branch-weight metadata is inconsistent here. + if (Weights.size() != 1 + PredCases.size()) + PredHasWeights = SuccHasWeights = false; + } else if (SuccHasWeights) // If there are no predecessor weights but there are successor weights, // populate Weights with 1, which will later be scaled to the sum of // successor's weights Weights.assign(1 + PredCases.size(), 1); - PredDefaultWeight = 1; - } - uint64_t SuccDefaultWeight = 0; + SmallVector<uint64_t, 8> SuccWeights; if (SuccHasWeights) { - int Index = 0; - if (BranchInst* BI = dyn_cast<BranchInst>(TI)) { - ICmpInst* ICI = dyn_cast<ICmpInst>(BI->getCondition()); - assert(ICI); - - if (ICI->getPredicate() == ICmpInst::ICMP_EQ) - Index = 1; - } - - SuccDefaultWeight = GetWeight(TI, Index)->getValue().getZExtValue(); - } + GetBranchWeights(TI, SuccWeights); + // branch-weight metadata is inconsistent here. + if (SuccWeights.size() != 1 + BBCases.size()) + PredHasWeights = SuccHasWeights = false; + } else if (PredHasWeights) + SuccWeights.assign(1 + BBCases.size(), 1); if (PredDefault == BB) { // If this is the default destination from PTI, only the edges in TI @@ -896,7 +887,9 @@ bool SimplifyCFGOpt::FoldValueComparisonIntoPredecessors(TerminatorInst *TI, // The default destination is BB, we don't need explicit targets. std::swap(PredCases[i], PredCases.back()); - if (PredHasWeights) { + if (PredHasWeights || SuccHasWeights) { + // Increase weight for the default case. + Weights[0] += Weights[i+1]; std::swap(Weights[i+1], Weights.back()); Weights.pop_back(); } @@ -912,40 +905,46 @@ bool SimplifyCFGOpt::FoldValueComparisonIntoPredecessors(TerminatorInst *TI, NewSuccessors.push_back(BBDefault); } - if (SuccHasWeights) { - ScaleWeights(TI, Weights); - Weights.front() *= SuccDefaultWeight; - } else if (PredHasWeights) { - Weights.front() /= (1 + BBCases.size()); - } - + unsigned CasesFromPred = Weights.size(); + uint64_t ValidTotalSuccWeight = 0; for (unsigned i = 0, e = BBCases.size(); i != e; ++i) if (!PTIHandled.count(BBCases[i].Value) && BBCases[i].Dest != BBDefault) { PredCases.push_back(BBCases[i]); NewSuccessors.push_back(BBCases[i].Dest); - if (SuccHasWeights) { - Weights.push_back(PredDefaultWeight * - GetWeight(TI, i)->getValue().getZExtValue()); - } else if (PredHasWeights) { - // Split the old default's weight amongst the children - Weights.push_back(PredDefaultWeight / (1 + BBCases.size())); + if (SuccHasWeights || PredHasWeights) { + // The default weight is at index 0, so weight for the ith case + // should be at index i+1. Scale the cases from successor by + // PredDefaultWeight (Weights[0]). + Weights.push_back(Weights[0] * SuccWeights[i+1]); + ValidTotalSuccWeight += SuccWeights[i+1]; } } + if (SuccHasWeights || PredHasWeights) { + ValidTotalSuccWeight += SuccWeights[0]; + // Scale the cases from predecessor by ValidTotalSuccWeight. + for (unsigned i = 1; i < CasesFromPred; ++i) + Weights[i] *= ValidTotalSuccWeight; + // Scale the default weight by SuccDefaultWeight (SuccWeights[0]). + Weights[0] *= SuccWeights[0]; + } } else { - // FIXME: preserve branch weight metadata, similarly to the 'then' - // above. For now, drop it. - PredHasWeights = false; - SuccHasWeights = false; - // If this is not the default destination from PSI, only the edges // in SI that occur in PSI with a destination of BB will be // activated. std::set<ConstantInt*, ConstantIntOrdering> PTIHandled; + std::map<ConstantInt*, uint64_t> WeightsForHandled; for (unsigned i = 0, e = PredCases.size(); i != e; ++i) if (PredCases[i].Dest == BB) { PTIHandled.insert(PredCases[i].Value); + + if (PredHasWeights || SuccHasWeights) { + WeightsForHandled[PredCases[i].Value] = Weights[i+1]; + std::swap(Weights[i+1], Weights.back()); + Weights.pop_back(); + } + std::swap(PredCases[i], PredCases.back()); PredCases.pop_back(); --i; --e; @@ -956,6 +955,8 @@ bool SimplifyCFGOpt::FoldValueComparisonIntoPredecessors(TerminatorInst *TI, for (unsigned i = 0, e = BBCases.size(); i != e; ++i) if (PTIHandled.count(BBCases[i].Value)) { // If this is one we are capable of getting... + if (PredHasWeights || SuccHasWeights) + Weights.push_back(WeightsForHandled[BBCases[i].Value]); PredCases.push_back(BBCases[i]); NewSuccessors.push_back(BBCases[i].Dest); PTIHandled.erase(BBCases[i].Value);// This constant is taken care of @@ -966,6 +967,8 @@ bool SimplifyCFGOpt::FoldValueComparisonIntoPredecessors(TerminatorInst *TI, for (std::set<ConstantInt*, ConstantIntOrdering>::iterator I = PTIHandled.begin(), E = PTIHandled.end(); I != E; ++I) { + if (PredHasWeights || SuccHasWeights) + Weights.push_back(WeightsForHandled[*I]); PredCases.push_back(ValueEqualityComparisonCase(*I, BBDefault)); NewSuccessors.push_back(BBDefault); } @@ -980,7 +983,7 @@ bool SimplifyCFGOpt::FoldValueComparisonIntoPredecessors(TerminatorInst *TI, Builder.SetInsertPoint(PTI); // Convert pointer to int before we switch. if (CV->getType()->isPointerTy()) { - assert(TD && "Cannot switch on pointer without TargetData"); + assert(TD && "Cannot switch on pointer without DataLayout"); CV = Builder.CreatePtrToInt(CV, TD->getIntPtrType(CV->getContext()), "magicptr"); } @@ -1160,6 +1163,175 @@ HoistTerminator: return true; } +/// SinkThenElseCodeToEnd - Given an unconditional branch that goes to BBEnd, +/// check whether BBEnd has only two predecessors and the other predecessor +/// ends with an unconditional branch. If it is true, sink any common code +/// in the two predecessors to BBEnd. +static bool SinkThenElseCodeToEnd(BranchInst *BI1) { + assert(BI1->isUnconditional()); + BasicBlock *BB1 = BI1->getParent(); + BasicBlock *BBEnd = BI1->getSuccessor(0); + + // Check that BBEnd has two predecessors and the other predecessor ends with + // an unconditional branch. + pred_iterator PI = pred_begin(BBEnd), PE = pred_end(BBEnd); + BasicBlock *Pred0 = *PI++; + if (PI == PE) // Only one predecessor. + return false; + BasicBlock *Pred1 = *PI++; + if (PI != PE) // More than two predecessors. + return false; + BasicBlock *BB2 = (Pred0 == BB1) ? Pred1 : Pred0; + BranchInst *BI2 = dyn_cast<BranchInst>(BB2->getTerminator()); + if (!BI2 || !BI2->isUnconditional()) + return false; + + // Gather the PHI nodes in BBEnd. + std::map<Value*, std::pair<Value*, PHINode*> > MapValueFromBB1ToBB2; + Instruction *FirstNonPhiInBBEnd = 0; + for (BasicBlock::iterator I = BBEnd->begin(), E = BBEnd->end(); + I != E; ++I) { + if (PHINode *PN = dyn_cast<PHINode>(I)) { + Value *BB1V = PN->getIncomingValueForBlock(BB1); + Value *BB2V = PN->getIncomingValueForBlock(BB2); + MapValueFromBB1ToBB2[BB1V] = std::make_pair(BB2V, PN); + } else { + FirstNonPhiInBBEnd = &*I; + break; + } + } + if (!FirstNonPhiInBBEnd) + return false; + + + // This does very trivial matching, with limited scanning, to find identical + // instructions in the two blocks. We scan backward for obviously identical + // instructions in an identical order. + BasicBlock::InstListType::reverse_iterator RI1 = BB1->getInstList().rbegin(), + RE1 = BB1->getInstList().rend(), RI2 = BB2->getInstList().rbegin(), + RE2 = BB2->getInstList().rend(); + // Skip debug info. + while (RI1 != RE1 && isa<DbgInfoIntrinsic>(&*RI1)) ++RI1; + if (RI1 == RE1) + return false; + while (RI2 != RE2 && isa<DbgInfoIntrinsic>(&*RI2)) ++RI2; + if (RI2 == RE2) + return false; + // Skip the unconditional branches. + ++RI1; + ++RI2; + + bool Changed = false; + while (RI1 != RE1 && RI2 != RE2) { + // Skip debug info. + while (RI1 != RE1 && isa<DbgInfoIntrinsic>(&*RI1)) ++RI1; + if (RI1 == RE1) + return Changed; + while (RI2 != RE2 && isa<DbgInfoIntrinsic>(&*RI2)) ++RI2; + if (RI2 == RE2) + return Changed; + + Instruction *I1 = &*RI1, *I2 = &*RI2; + // I1 and I2 should have a single use in the same PHI node, and they + // perform the same operation. + // Cannot move control-flow-involving, volatile loads, vaarg, etc. + if (isa<PHINode>(I1) || isa<PHINode>(I2) || + isa<TerminatorInst>(I1) || isa<TerminatorInst>(I2) || + isa<LandingPadInst>(I1) || isa<LandingPadInst>(I2) || + isa<AllocaInst>(I1) || isa<AllocaInst>(I2) || + I1->mayHaveSideEffects() || I2->mayHaveSideEffects() || + I1->mayReadOrWriteMemory() || I2->mayReadOrWriteMemory() || + !I1->hasOneUse() || !I2->hasOneUse() || + MapValueFromBB1ToBB2.find(I1) == MapValueFromBB1ToBB2.end() || + MapValueFromBB1ToBB2[I1].first != I2) + return Changed; + + // Check whether we should swap the operands of ICmpInst. + ICmpInst *ICmp1 = dyn_cast<ICmpInst>(I1), *ICmp2 = dyn_cast<ICmpInst>(I2); + bool SwapOpnds = false; + if (ICmp1 && ICmp2 && + ICmp1->getOperand(0) != ICmp2->getOperand(0) && + ICmp1->getOperand(1) != ICmp2->getOperand(1) && + (ICmp1->getOperand(0) == ICmp2->getOperand(1) || + ICmp1->getOperand(1) == ICmp2->getOperand(0))) { + ICmp2->swapOperands(); + SwapOpnds = true; + } + if (!I1->isSameOperationAs(I2)) { + if (SwapOpnds) + ICmp2->swapOperands(); + return Changed; + } + + // The operands should be either the same or they need to be generated + // with a PHI node after sinking. We only handle the case where there is + // a single pair of different operands. + Value *DifferentOp1 = 0, *DifferentOp2 = 0; + unsigned Op1Idx = 0; + for (unsigned I = 0, E = I1->getNumOperands(); I != E; ++I) { + if (I1->getOperand(I) == I2->getOperand(I)) + continue; + // Early exit if we have more-than one pair of different operands or + // the different operand is already in MapValueFromBB1ToBB2. + // Early exit if we need a PHI node to replace a constant. + if (DifferentOp1 || + MapValueFromBB1ToBB2.find(I1->getOperand(I)) != + MapValueFromBB1ToBB2.end() || + isa<Constant>(I1->getOperand(I)) || + isa<Constant>(I2->getOperand(I))) { + // If we can't sink the instructions, undo the swapping. + if (SwapOpnds) + ICmp2->swapOperands(); + return Changed; + } + DifferentOp1 = I1->getOperand(I); + Op1Idx = I; + DifferentOp2 = I2->getOperand(I); + } + + // We insert the pair of different operands to MapValueFromBB1ToBB2 and + // remove (I1, I2) from MapValueFromBB1ToBB2. + if (DifferentOp1) { + PHINode *NewPN = PHINode::Create(DifferentOp1->getType(), 2, + DifferentOp1->getName() + ".sink", + BBEnd->begin()); + MapValueFromBB1ToBB2[DifferentOp1] = std::make_pair(DifferentOp2, NewPN); + // I1 should use NewPN instead of DifferentOp1. + I1->setOperand(Op1Idx, NewPN); + NewPN->addIncoming(DifferentOp1, BB1); + NewPN->addIncoming(DifferentOp2, BB2); + DEBUG(dbgs() << "Create PHI node " << *NewPN << "\n";); + } + PHINode *OldPN = MapValueFromBB1ToBB2[I1].second; + MapValueFromBB1ToBB2.erase(I1); + + DEBUG(dbgs() << "SINK common instructions " << *I1 << "\n";); + DEBUG(dbgs() << " " << *I2 << "\n";); + // We need to update RE1 and RE2 if we are going to sink the first + // instruction in the basic block down. + bool UpdateRE1 = (I1 == BB1->begin()), UpdateRE2 = (I2 == BB2->begin()); + // Sink the instruction. + BBEnd->getInstList().splice(FirstNonPhiInBBEnd, BB1->getInstList(), I1); + if (!OldPN->use_empty()) + OldPN->replaceAllUsesWith(I1); + OldPN->eraseFromParent(); + + if (!I2->use_empty()) + I2->replaceAllUsesWith(I1); + I1->intersectOptionalDataWith(I2); + I2->eraseFromParent(); + + if (UpdateRE1) + RE1 = BB1->getInstList().rend(); + if (UpdateRE2) + RE2 = BB2->getInstList().rend(); + FirstNonPhiInBBEnd = I1; + NumSinkCommons++; + Changed = true; + } + return Changed; +} + /// SpeculativelyExecuteBB - Given a conditional branch that goes to BB1 /// and an BB2 and the only successor of BB1 is BB2, hoist simple code /// (for now, restricted to a single instruction that's side effect free) from @@ -1243,7 +1415,7 @@ static bool SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *BB1) { if (BB1V == BIParentV) continue; - // Check for saftey. + // Check for safety. if (ConstantExpr *CE = dyn_cast<ConstantExpr>(BB1V)) { // An unfolded ConstantExpr could end up getting expanded into // Instructions. Don't speculate this and another instruction at @@ -1339,7 +1511,7 @@ static bool BlockIsSimpleEnoughToThreadThrough(BasicBlock *BB) { /// that is defined in the same block as the branch and if any PHI entries are /// constants, thread edges corresponding to that entry to be branches to their /// ultimate destination. -static bool FoldCondBranchOnPHI(BranchInst *BI, const TargetData *TD) { +static bool FoldCondBranchOnPHI(BranchInst *BI, const DataLayout *TD) { BasicBlock *BB = BI->getParent(); PHINode *PN = dyn_cast<PHINode>(BI->getCondition()); // NOTE: we currently cannot transform this case if the PHI node is used @@ -1435,7 +1607,7 @@ static bool FoldCondBranchOnPHI(BranchInst *BI, const TargetData *TD) { /// FoldTwoEntryPHINode - Given a BB that starts with the specified two-entry /// PHI node, see if we can eliminate it. -static bool FoldTwoEntryPHINode(PHINode *PN, const TargetData *TD) { +static bool FoldTwoEntryPHINode(PHINode *PN, const DataLayout *TD) { // Ok, this is a two entry PHI node. Check to see if this is a simple "if // statement", which has a very simple dominance structure. Basically, we // are trying to find the condition that is being branched on, which @@ -1662,7 +1834,7 @@ static bool SimplifyCondBranchToTwoReturns(BranchInst *BI, /// parameters and return true, or returns false if no or invalid metadata was /// found. static bool ExtractBranchMetadata(BranchInst *BI, - APInt &ProbTrue, APInt &ProbFalse) { + uint64_t &ProbTrue, uint64_t &ProbFalse) { assert(BI->isConditional() && "Looking for probabilities on unconditional branch?"); MDNode *ProfileData = BI->getMetadata(LLVMContext::MD_prof); @@ -1670,35 +1842,11 @@ static bool ExtractBranchMetadata(BranchInst *BI, ConstantInt *CITrue = dyn_cast<ConstantInt>(ProfileData->getOperand(1)); ConstantInt *CIFalse = dyn_cast<ConstantInt>(ProfileData->getOperand(2)); if (!CITrue || !CIFalse) return false; - ProbTrue = CITrue->getValue(); - ProbFalse = CIFalse->getValue(); - assert(ProbTrue.getBitWidth() == 32 && ProbFalse.getBitWidth() == 32 && - "Branch probability metadata must be 32-bit integers"); + ProbTrue = CITrue->getValue().getZExtValue(); + ProbFalse = CIFalse->getValue().getZExtValue(); return true; } -/// MultiplyAndLosePrecision - Multiplies A and B, then returns the result. In -/// the event of overflow, logically-shifts all four inputs right until the -/// multiply fits. -static APInt MultiplyAndLosePrecision(APInt &A, APInt &B, APInt &C, APInt &D, - unsigned &BitsLost) { - BitsLost = 0; - bool Overflow = false; - APInt Result = A.umul_ov(B, Overflow); - if (Overflow) { - APInt MaxB = APInt::getMaxValue(A.getBitWidth()).udiv(A); - do { - B = B.lshr(1); - ++BitsLost; - } while (B.ugt(MaxB)); - A = A.lshr(BitsLost); - C = C.lshr(BitsLost); - D = D.lshr(BitsLost); - Result = A * B; - } - return Result; -} - /// checkCSEInPredecessor - Return true if the given instruction is available /// in its predecessor block. If yes, the instruction will be removed. /// @@ -1824,7 +1972,7 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI) { continue; // Determine if the two branches share a common destination. - Instruction::BinaryOps Opc; + Instruction::BinaryOps Opc = Instruction::BinaryOpsEnd; bool InvertPredCond = false; if (BI->isConditional()) { @@ -1923,14 +2071,53 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI) { New, "or.cond")); PBI->setCondition(NewCond); + uint64_t PredTrueWeight, PredFalseWeight, SuccTrueWeight, SuccFalseWeight; + bool PredHasWeights = ExtractBranchMetadata(PBI, PredTrueWeight, + PredFalseWeight); + bool SuccHasWeights = ExtractBranchMetadata(BI, SuccTrueWeight, + SuccFalseWeight); + SmallVector<uint64_t, 8> NewWeights; + if (PBI->getSuccessor(0) == BB) { + if (PredHasWeights && SuccHasWeights) { + // PBI: br i1 %x, BB, FalseDest + // BI: br i1 %y, TrueDest, FalseDest + //TrueWeight is TrueWeight for PBI * TrueWeight for BI. + NewWeights.push_back(PredTrueWeight * SuccTrueWeight); + //FalseWeight is FalseWeight for PBI * TotalWeight for BI + + // TrueWeight for PBI * FalseWeight for BI. + // We assume that total weights of a BranchInst can fit into 32 bits. + // Therefore, we will not have overflow using 64-bit arithmetic. + NewWeights.push_back(PredFalseWeight * (SuccFalseWeight + + SuccTrueWeight) + PredTrueWeight * SuccFalseWeight); + } AddPredecessorToBlock(TrueDest, PredBlock, BB); PBI->setSuccessor(0, TrueDest); } if (PBI->getSuccessor(1) == BB) { + if (PredHasWeights && SuccHasWeights) { + // PBI: br i1 %x, TrueDest, BB + // BI: br i1 %y, TrueDest, FalseDest + //TrueWeight is TrueWeight for PBI * TotalWeight for BI + + // FalseWeight for PBI * TrueWeight for BI. + NewWeights.push_back(PredTrueWeight * (SuccFalseWeight + + SuccTrueWeight) + PredFalseWeight * SuccTrueWeight); + //FalseWeight is FalseWeight for PBI * FalseWeight for BI. + NewWeights.push_back(PredFalseWeight * SuccFalseWeight); + } AddPredecessorToBlock(FalseDest, PredBlock, BB); PBI->setSuccessor(1, FalseDest); } + if (NewWeights.size() == 2) { + // Halve the weights if any of them cannot fit in an uint32_t + FitWeights(NewWeights); + + SmallVector<uint32_t, 8> MDWeights(NewWeights.begin(),NewWeights.end()); + PBI->setMetadata(LLVMContext::MD_prof, + MDBuilder(BI->getContext()). + createBranchWeights(MDWeights)); + } else + PBI->setMetadata(LLVMContext::MD_prof, NULL); } else { // Update PHI nodes in the common successors. for (unsigned i = 0, e = PHIs.size(); i != e; ++i) { @@ -1985,90 +2172,6 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI) { // TODO: If BB is reachable from all paths through PredBlock, then we // could replace PBI's branch probabilities with BI's. - // Merge probability data into PredBlock's branch. - APInt A, B, C, D; - if (PBI->isConditional() && BI->isConditional() && - ExtractBranchMetadata(PBI, C, D) && ExtractBranchMetadata(BI, A, B)) { - // Given IR which does: - // bbA: - // br i1 %x, label %bbB, label %bbC - // bbB: - // br i1 %y, label %bbD, label %bbC - // Let's call the probability that we take the edge from %bbA to %bbB - // 'a', from %bbA to %bbC, 'b', from %bbB to %bbD 'c' and from %bbB to - // %bbC probability 'd'. - // - // We transform the IR into: - // bbA: - // br i1 %z, label %bbD, label %bbC - // where the probability of going to %bbD is (a*c) and going to bbC is - // (b+a*d). - // - // Probabilities aren't stored as ratios directly. Using branch weights, - // we get: - // (a*c)% = A*C, (b+(a*d))% = A*D+B*C+B*D. - - // In the event of overflow, we want to drop the LSB of the input - // probabilities. - unsigned BitsLost; - - // Ignore overflow result on ProbTrue. - APInt ProbTrue = MultiplyAndLosePrecision(A, C, B, D, BitsLost); - - APInt Tmp1 = MultiplyAndLosePrecision(B, D, A, C, BitsLost); - if (BitsLost) { - ProbTrue = ProbTrue.lshr(BitsLost*2); - } - - APInt Tmp2 = MultiplyAndLosePrecision(A, D, C, B, BitsLost); - if (BitsLost) { - ProbTrue = ProbTrue.lshr(BitsLost*2); - Tmp1 = Tmp1.lshr(BitsLost*2); - } - - APInt Tmp3 = MultiplyAndLosePrecision(B, C, A, D, BitsLost); - if (BitsLost) { - ProbTrue = ProbTrue.lshr(BitsLost*2); - Tmp1 = Tmp1.lshr(BitsLost*2); - Tmp2 = Tmp2.lshr(BitsLost*2); - } - - bool Overflow1 = false, Overflow2 = false; - APInt Tmp4 = Tmp2.uadd_ov(Tmp3, Overflow1); - APInt ProbFalse = Tmp4.uadd_ov(Tmp1, Overflow2); - - if (Overflow1 || Overflow2) { - ProbTrue = ProbTrue.lshr(1); - Tmp1 = Tmp1.lshr(1); - Tmp2 = Tmp2.lshr(1); - Tmp3 = Tmp3.lshr(1); - Tmp4 = Tmp2 + Tmp3; - ProbFalse = Tmp4 + Tmp1; - } - - // The sum of branch weights must fit in 32-bits. - if (ProbTrue.isNegative() && ProbFalse.isNegative()) { - ProbTrue = ProbTrue.lshr(1); - ProbFalse = ProbFalse.lshr(1); - } - - if (ProbTrue != ProbFalse) { - // Normalize the result. - APInt GCD = APIntOps::GreatestCommonDivisor(ProbTrue, ProbFalse); - ProbTrue = ProbTrue.udiv(GCD); - ProbFalse = ProbFalse.udiv(GCD); - - MDBuilder MDB(BI->getContext()); - MDNode *N = MDB.createBranchWeights(ProbTrue.getZExtValue(), - ProbFalse.getZExtValue()); - PBI->setMetadata(LLVMContext::MD_prof, N); - } else { - PBI->setMetadata(LLVMContext::MD_prof, NULL); - } - } else { - PBI->setMetadata(LLVMContext::MD_prof, NULL); - } - // Copy any debug value intrinsics into the end of PredBlock. for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) if (isa<DbgInfoIntrinsic>(*I)) @@ -2223,6 +2326,33 @@ static bool SimplifyCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI) { PBI->setSuccessor(0, CommonDest); PBI->setSuccessor(1, OtherDest); + // Update branch weight for PBI. + uint64_t PredTrueWeight, PredFalseWeight, SuccTrueWeight, SuccFalseWeight; + bool PredHasWeights = ExtractBranchMetadata(PBI, PredTrueWeight, + PredFalseWeight); + bool SuccHasWeights = ExtractBranchMetadata(BI, SuccTrueWeight, + SuccFalseWeight); + if (PredHasWeights && SuccHasWeights) { + uint64_t PredCommon = PBIOp ? PredFalseWeight : PredTrueWeight; + uint64_t PredOther = PBIOp ?PredTrueWeight : PredFalseWeight; + uint64_t SuccCommon = BIOp ? SuccFalseWeight : SuccTrueWeight; + uint64_t SuccOther = BIOp ? SuccTrueWeight : SuccFalseWeight; + // The weight to CommonDest should be PredCommon * SuccTotal + + // PredOther * SuccCommon. + // The weight to OtherDest should be PredOther * SuccOther. + SmallVector<uint64_t, 2> NewWeights; + NewWeights.push_back(PredCommon * (SuccCommon + SuccOther) + + PredOther * SuccCommon); + NewWeights.push_back(PredOther * SuccOther); + // Halve the weights if any of them cannot fit in an uint32_t + FitWeights(NewWeights); + + SmallVector<uint32_t, 2> MDWeights(NewWeights.begin(),NewWeights.end()); + PBI->setMetadata(LLVMContext::MD_prof, + MDBuilder(BI->getContext()). + createBranchWeights(MDWeights)); + } + // OtherDest may have phi nodes. If so, add an entry from PBI's // block that are identical to the entries for BI's block. AddPredecessorToBlock(OtherDest, PBI->getParent(), BB); @@ -2259,7 +2389,9 @@ static bool SimplifyCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI) { // Also makes sure not to introduce new successors by assuming that edges to // non-successor TrueBBs and FalseBBs aren't reachable. static bool SimplifyTerminatorOnSelect(TerminatorInst *OldTerm, Value *Cond, - BasicBlock *TrueBB, BasicBlock *FalseBB){ + BasicBlock *TrueBB, BasicBlock *FalseBB, + uint32_t TrueWeight, + uint32_t FalseWeight){ // Remove any superfluous successor edges from the CFG. // First, figure out which successors to preserve. // If TrueBB and FalseBB are equal, only try to preserve one copy of that @@ -2288,10 +2420,15 @@ static bool SimplifyTerminatorOnSelect(TerminatorInst *OldTerm, Value *Cond, // We were only looking for one successor, and it was present. // Create an unconditional branch to it. Builder.CreateBr(TrueBB); - else + else { // We found both of the successors we were looking for. // Create a conditional branch sharing the condition of the select. - Builder.CreateCondBr(Cond, TrueBB, FalseBB); + BranchInst *NewBI = Builder.CreateCondBr(Cond, TrueBB, FalseBB); + if (TrueWeight != FalseWeight) + NewBI->setMetadata(LLVMContext::MD_prof, + MDBuilder(OldTerm->getContext()). + createBranchWeights(TrueWeight, FalseWeight)); + } } else if (KeepEdge1 && (KeepEdge2 || TrueBB == FalseBB)) { // Neither of the selected blocks were successors, so this // terminator must be unreachable. @@ -2328,8 +2465,23 @@ static bool SimplifySwitchOnSelect(SwitchInst *SI, SelectInst *Select) { BasicBlock *TrueBB = SI->findCaseValue(TrueVal).getCaseSuccessor(); BasicBlock *FalseBB = SI->findCaseValue(FalseVal).getCaseSuccessor(); + // Get weight for TrueBB and FalseBB. + uint32_t TrueWeight = 0, FalseWeight = 0; + SmallVector<uint64_t, 8> Weights; + bool HasWeights = HasBranchWeights(SI); + if (HasWeights) { + GetBranchWeights(SI, Weights); + if (Weights.size() == 1 + SI->getNumCases()) { + TrueWeight = (uint32_t)Weights[SI->findCaseValue(TrueVal). + getSuccessorIndex()]; + FalseWeight = (uint32_t)Weights[SI->findCaseValue(FalseVal). + getSuccessorIndex()]; + } + } + // Perform the actual simplification. - return SimplifyTerminatorOnSelect(SI, Condition, TrueBB, FalseBB); + return SimplifyTerminatorOnSelect(SI, Condition, TrueBB, FalseBB, + TrueWeight, FalseWeight); } // SimplifyIndirectBrOnSelect - Replaces @@ -2349,7 +2501,8 @@ static bool SimplifyIndirectBrOnSelect(IndirectBrInst *IBI, SelectInst *SI) { BasicBlock *FalseBB = FBA->getBasicBlock(); // Perform the actual simplification. - return SimplifyTerminatorOnSelect(IBI, SI->getCondition(), TrueBB, FalseBB); + return SimplifyTerminatorOnSelect(IBI, SI->getCondition(), TrueBB, FalseBB, + 0, 0); } /// TryToSimplifyUncondBranchWithICmpInIt - This is called when we find an icmp @@ -2369,9 +2522,9 @@ static bool SimplifyIndirectBrOnSelect(IndirectBrInst *IBI, SelectInst *SI) { /// /// We prefer to split the edge to 'end' so that there is a true/false entry to /// the PHI, merging the third icmp into the switch. -static bool TryToSimplifyUncondBranchWithICmpInIt(ICmpInst *ICI, - const TargetData *TD, - IRBuilder<> &Builder) { +static bool TryToSimplifyUncondBranchWithICmpInIt( + ICmpInst *ICI, IRBuilder<> &Builder, const TargetTransformInfo &TTI, + const DataLayout *TD) { BasicBlock *BB = ICI->getParent(); // If the block has any PHIs in it or the icmp has multiple uses, it is too @@ -2404,7 +2557,7 @@ static bool TryToSimplifyUncondBranchWithICmpInIt(ICmpInst *ICI, ICI->eraseFromParent(); } // BB is now empty, so it is likely to simplify away. - return SimplifyCFG(BB) | true; + return SimplifyCFG(BB, TTI, TD) | true; } // Ok, the block is reachable from the default dest. If the constant we're @@ -2420,7 +2573,7 @@ static bool TryToSimplifyUncondBranchWithICmpInIt(ICmpInst *ICI, ICI->replaceAllUsesWith(V); ICI->eraseFromParent(); // BB is now empty, so it is likely to simplify away. - return SimplifyCFG(BB) | true; + return SimplifyCFG(BB, TTI, TD) | true; } // The use of the icmp has to be in the 'end' block, by the only PHI node in @@ -2448,6 +2601,21 @@ static bool TryToSimplifyUncondBranchWithICmpInIt(ICmpInst *ICI, // the switch to the merge point on the compared value. BasicBlock *NewBB = BasicBlock::Create(BB->getContext(), "switch.edge", BB->getParent(), BB); + SmallVector<uint64_t, 8> Weights; + bool HasWeights = HasBranchWeights(SI); + if (HasWeights) { + GetBranchWeights(SI, Weights); + if (Weights.size() == 1 + SI->getNumCases()) { + // Split weight for default case to case for "Cst". + Weights[0] = (Weights[0]+1) >> 1; + Weights.push_back(Weights[0]); + + SmallVector<uint32_t, 8> MDWeights(Weights.begin(), Weights.end()); + SI->setMetadata(LLVMContext::MD_prof, + MDBuilder(SI->getContext()). + createBranchWeights(MDWeights)); + } + } SI->addCase(Cst, NewBB); // NewBB branches to the phi block, add the uncond branch and the phi entry. @@ -2461,7 +2629,7 @@ static bool TryToSimplifyUncondBranchWithICmpInIt(ICmpInst *ICI, /// SimplifyBranchOnICmpChain - The specified branch is a conditional branch. /// Check to see if it is branching on an or/and chain of icmp instructions, and /// fold it into a switch instruction if so. -static bool SimplifyBranchOnICmpChain(BranchInst *BI, const TargetData *TD, +static bool SimplifyBranchOnICmpChain(BranchInst *BI, const DataLayout *TD, IRBuilder<> &Builder) { Instruction *Cond = dyn_cast<Instruction>(BI->getCondition()); if (Cond == 0) return false; @@ -2542,7 +2710,7 @@ static bool SimplifyBranchOnICmpChain(BranchInst *BI, const TargetData *TD, Builder.SetInsertPoint(BI); // Convert pointer to int before we switch. if (CompVal->getType()->isPointerTy()) { - assert(TD && "Cannot switch on pointer without TargetData"); + assert(TD && "Cannot switch on pointer without DataLayout"); CompVal = Builder.CreatePtrToInt(CompVal, TD->getIntPtrType(CompVal->getContext()), "magicptr"); @@ -2861,9 +3029,28 @@ static bool TurnSwitchRangeIntoICmp(SwitchInst *SI, IRBuilder<> &Builder) { if (!Offset->isNullValue()) Sub = Builder.CreateAdd(Sub, Offset, Sub->getName()+".off"); Value *Cmp = Builder.CreateICmpULT(Sub, NumCases, "switch"); - Builder.CreateCondBr( + BranchInst *NewBI = Builder.CreateCondBr( Cmp, SI->case_begin().getCaseSuccessor(), SI->getDefaultDest()); + // Update weight for the newly-created conditional branch. + SmallVector<uint64_t, 8> Weights; + bool HasWeights = HasBranchWeights(SI); + if (HasWeights) { + GetBranchWeights(SI, Weights); + if (Weights.size() == 1 + SI->getNumCases()) { + // Combine all weights for the cases to be the true weight of NewBI. + // We assume that the sum of all weights for a Terminator can fit into 32 + // bits. + uint32_t NewTrueWeight = 0; + for (unsigned I = 1, E = Weights.size(); I != E; ++I) + NewTrueWeight += (uint32_t)Weights[I]; + NewBI->setMetadata(LLVMContext::MD_prof, + MDBuilder(SI->getContext()). + createBranchWeights(NewTrueWeight, + (uint32_t)Weights[0])); + } + } + // Prune obsolete incoming values off the successor's PHI nodes. for (BasicBlock::iterator BBI = SI->case_begin().getCaseSuccessor()->begin(); isa<PHINode>(BBI); ++BBI) { @@ -2894,15 +3081,33 @@ static bool EliminateDeadSwitchCases(SwitchInst *SI) { } } + SmallVector<uint64_t, 8> Weights; + bool HasWeight = HasBranchWeights(SI); + if (HasWeight) { + GetBranchWeights(SI, Weights); + HasWeight = (Weights.size() == 1 + SI->getNumCases()); + } + // Remove dead cases from the switch. for (unsigned I = 0, E = DeadCases.size(); I != E; ++I) { SwitchInst::CaseIt Case = SI->findCaseValue(DeadCases[I]); assert(Case != SI->case_default() && "Case was not found. Probably mistake in DeadCases forming."); + if (HasWeight) { + std::swap(Weights[Case.getCaseIndex()+1], Weights.back()); + Weights.pop_back(); + } + // Prune unused values from PHI nodes. Case.getCaseSuccessor()->removePredecessor(SI->getParent()); SI->removeCase(Case); } + if (HasWeight) { + SmallVector<uint32_t, 8> MDWeights(Weights.begin(), Weights.end()); + SI->setMetadata(LLVMContext::MD_prof, + MDBuilder(SI->getParent()->getContext()). + createBranchWeights(MDWeights)); + } return !DeadCases.empty(); } @@ -2991,26 +3196,95 @@ static bool ValidLookupTableConstant(Constant *C) { isa<UndefValue>(C); } -/// GetCaseResulsts - Try to determine the resulting constant values in phi -/// nodes at the common destination basic block for one of the case -/// destinations of a switch instruction. +/// LookupConstant - If V is a Constant, return it. Otherwise, try to look up +/// its constant value in ConstantPool, returning 0 if it's not there. +static Constant *LookupConstant(Value *V, + const SmallDenseMap<Value*, Constant*>& ConstantPool) { + if (Constant *C = dyn_cast<Constant>(V)) + return C; + return ConstantPool.lookup(V); +} + +/// ConstantFold - Try to fold instruction I into a constant. This works for +/// simple instructions such as binary operations where both operands are +/// constant or can be replaced by constants from the ConstantPool. Returns the +/// resulting constant on success, 0 otherwise. +static Constant *ConstantFold(Instruction *I, + const SmallDenseMap<Value*, Constant*>& ConstantPool) { + if (BinaryOperator *BO = dyn_cast<BinaryOperator>(I)) { + Constant *A = LookupConstant(BO->getOperand(0), ConstantPool); + if (!A) + return 0; + Constant *B = LookupConstant(BO->getOperand(1), ConstantPool); + if (!B) + return 0; + return ConstantExpr::get(BO->getOpcode(), A, B); + } + + if (CmpInst *Cmp = dyn_cast<CmpInst>(I)) { + Constant *A = LookupConstant(I->getOperand(0), ConstantPool); + if (!A) + return 0; + Constant *B = LookupConstant(I->getOperand(1), ConstantPool); + if (!B) + return 0; + return ConstantExpr::getCompare(Cmp->getPredicate(), A, B); + } + + if (SelectInst *Select = dyn_cast<SelectInst>(I)) { + Constant *A = LookupConstant(Select->getCondition(), ConstantPool); + if (!A) + return 0; + if (A->isAllOnesValue()) + return LookupConstant(Select->getTrueValue(), ConstantPool); + if (A->isNullValue()) + return LookupConstant(Select->getFalseValue(), ConstantPool); + return 0; + } + + if (CastInst *Cast = dyn_cast<CastInst>(I)) { + Constant *A = LookupConstant(I->getOperand(0), ConstantPool); + if (!A) + return 0; + return ConstantExpr::getCast(Cast->getOpcode(), A, Cast->getDestTy()); + } + + return 0; +} + +/// GetCaseResults - Try to determine the resulting constant values in phi nodes +/// at the common destination basic block, *CommonDest, for one of the case +/// destionations CaseDest corresponding to value CaseVal (0 for the default +/// case), of a switch instruction SI. static bool GetCaseResults(SwitchInst *SI, + ConstantInt *CaseVal, BasicBlock *CaseDest, BasicBlock **CommonDest, SmallVector<std::pair<PHINode*,Constant*>, 4> &Res) { // The block from which we enter the common destination. BasicBlock *Pred = SI->getParent(); - // If CaseDest is empty, continue to its successor. - if (CaseDest->getFirstNonPHIOrDbg() == CaseDest->getTerminator() && - !isa<PHINode>(CaseDest->begin())) { - - TerminatorInst *Terminator = CaseDest->getTerminator(); - if (Terminator->getNumSuccessors() != 1) - return false; - - Pred = CaseDest; - CaseDest = Terminator->getSuccessor(0); + // If CaseDest is empty except for some side-effect free instructions through + // which we can constant-propagate the CaseVal, continue to its successor. + SmallDenseMap<Value*, Constant*> ConstantPool; + ConstantPool.insert(std::make_pair(SI->getCondition(), CaseVal)); + for (BasicBlock::iterator I = CaseDest->begin(), E = CaseDest->end(); I != E; + ++I) { + if (TerminatorInst *T = dyn_cast<TerminatorInst>(I)) { + // If the terminator is a simple branch, continue to the next block. + if (T->getNumSuccessors() != 1) + return false; + Pred = CaseDest; + CaseDest = T->getSuccessor(0); + } else if (isa<DbgInfoIntrinsic>(I)) { + // Skip debug intrinsic. + continue; + } else if (Constant *C = ConstantFold(I, ConstantPool)) { + // Instruction is side-effect free and constant. + ConstantPool.insert(std::make_pair(I, C)); + } else { + break; + } } // If we did not have a CommonDest before, use the current one. @@ -3027,10 +3301,17 @@ static bool GetCaseResults(SwitchInst *SI, if (Idx == -1) continue; - Constant *ConstVal = dyn_cast<Constant>(PHI->getIncomingValue(Idx)); + Constant *ConstVal = LookupConstant(PHI->getIncomingValue(Idx), + ConstantPool); if (!ConstVal) return false; + // Note: If the constant comes from constant-propagating the case value + // through the CaseDest basic block, it will be safe to remove the + // instructions in that block. They cannot be used (except in the phi nodes + // we visit) outside CaseDest, because that block does not dominate its + // successor. If it did, we would not be in this phi node. + // Be conservative about which kinds of constants we support. if (!ValidLookupTableConstant(ConstVal)) return false; @@ -3041,83 +3322,255 @@ static bool GetCaseResults(SwitchInst *SI, return true; } -/// BuildLookupTable - Build a lookup table with the contents of Results, using -/// DefaultResult to fill the holes in the table. If the table ends up -/// containing the same result in each element, set *SingleResult to that value -/// and return NULL. -static GlobalVariable *BuildLookupTable(Module &M, - uint64_t TableSize, - ConstantInt *Offset, - const SmallVector<std::pair<ConstantInt*, Constant*>, 4>& Results, - Constant *DefaultResult, - Constant **SingleResult) { - assert(Results.size() && "Need values to build lookup table"); - assert(TableSize >= Results.size() && "Table needs to hold all values"); +namespace { + /// SwitchLookupTable - This class represents a lookup table that can be used + /// to replace a switch. + class SwitchLookupTable { + public: + /// SwitchLookupTable - Create a lookup table to use as a switch replacement + /// with the contents of Values, using DefaultValue to fill any holes in the + /// table. + SwitchLookupTable(Module &M, + uint64_t TableSize, + ConstantInt *Offset, + const SmallVector<std::pair<ConstantInt*, Constant*>, 4>& Values, + Constant *DefaultValue, + const DataLayout *TD); + + /// BuildLookup - Build instructions with Builder to retrieve the value at + /// the position given by Index in the lookup table. + Value *BuildLookup(Value *Index, IRBuilder<> &Builder); + + /// WouldFitInRegister - Return true if a table with TableSize elements of + /// type ElementType would fit in a target-legal register. + static bool WouldFitInRegister(const DataLayout *TD, + uint64_t TableSize, + const Type *ElementType); + + private: + // Depending on the contents of the table, it can be represented in + // different ways. + enum { + // For tables where each element contains the same value, we just have to + // store that single value and return it for each lookup. + SingleValueKind, + + // For small tables with integer elements, we can pack them into a bitmap + // that fits into a target-legal register. Values are retrieved by + // shift and mask operations. + BitMapKind, + + // The table is stored as an array of values. Values are retrieved by load + // instructions from the table. + ArrayKind + } Kind; + + // For SingleValueKind, this is the single value. + Constant *SingleValue; + + // For BitMapKind, this is the bitmap. + ConstantInt *BitMap; + IntegerType *BitMapElementTy; + + // For ArrayKind, this is the array. + GlobalVariable *Array; + }; +} + +SwitchLookupTable::SwitchLookupTable(Module &M, + uint64_t TableSize, + ConstantInt *Offset, + const SmallVector<std::pair<ConstantInt*, Constant*>, 4>& Values, + Constant *DefaultValue, + const DataLayout *TD) { + assert(Values.size() && "Can't build lookup table without values!"); + assert(TableSize >= Values.size() && "Can't fit values in table!"); // If all values in the table are equal, this is that value. - Constant *SameResult = Results.begin()->second; + SingleValue = Values.begin()->second; // Build up the table contents. - std::vector<Constant*> TableContents(TableSize); - for (size_t I = 0, E = Results.size(); I != E; ++I) { - ConstantInt *CaseVal = Results[I].first; - Constant *CaseRes = Results[I].second; - - uint64_t Idx = (CaseVal->getValue() - Offset->getValue()).getLimitedValue(); + SmallVector<Constant*, 64> TableContents(TableSize); + for (size_t I = 0, E = Values.size(); I != E; ++I) { + ConstantInt *CaseVal = Values[I].first; + Constant *CaseRes = Values[I].second; + assert(CaseRes->getType() == DefaultValue->getType()); + + uint64_t Idx = (CaseVal->getValue() - Offset->getValue()) + .getLimitedValue(); TableContents[Idx] = CaseRes; - if (CaseRes != SameResult) - SameResult = NULL; + if (CaseRes != SingleValue) + SingleValue = 0; } // Fill in any holes in the table with the default result. - if (Results.size() < TableSize) { - for (unsigned i = 0; i < TableSize; ++i) { - if (!TableContents[i]) - TableContents[i] = DefaultResult; + if (Values.size() < TableSize) { + for (uint64_t I = 0; I < TableSize; ++I) { + if (!TableContents[I]) + TableContents[I] = DefaultValue; } - if (DefaultResult != SameResult) - SameResult = NULL; + if (DefaultValue != SingleValue) + SingleValue = 0; } - // Same result was used in the entire table; just return that. - if (SameResult) { - *SingleResult = SameResult; - return NULL; + // If each element in the table contains the same value, we only need to store + // that single value. + if (SingleValue) { + Kind = SingleValueKind; + return; } - ArrayType *ArrayTy = ArrayType::get(DefaultResult->getType(), TableSize); + // If the type is integer and the table fits in a register, build a bitmap. + if (WouldFitInRegister(TD, TableSize, DefaultValue->getType())) { + IntegerType *IT = cast<IntegerType>(DefaultValue->getType()); + APInt TableInt(TableSize * IT->getBitWidth(), 0); + for (uint64_t I = TableSize; I > 0; --I) { + TableInt <<= IT->getBitWidth(); + // Insert values into the bitmap. Undef values are set to zero. + if (!isa<UndefValue>(TableContents[I - 1])) { + ConstantInt *Val = cast<ConstantInt>(TableContents[I - 1]); + TableInt |= Val->getValue().zext(TableInt.getBitWidth()); + } + } + BitMap = ConstantInt::get(M.getContext(), TableInt); + BitMapElementTy = IT; + Kind = BitMapKind; + ++NumBitMaps; + return; + } + + // Store the table in an array. + ArrayType *ArrayTy = ArrayType::get(DefaultValue->getType(), TableSize); Constant *Initializer = ConstantArray::get(ArrayTy, TableContents); - GlobalVariable *GV = new GlobalVariable(M, ArrayTy, /*constant=*/ true, - GlobalVariable::PrivateLinkage, - Initializer, - "switch.table"); - GV->setUnnamedAddr(true); - return GV; + Array = new GlobalVariable(M, ArrayTy, /*constant=*/ true, + GlobalVariable::PrivateLinkage, + Initializer, + "switch.table"); + Array->setUnnamedAddr(true); + Kind = ArrayKind; +} + +Value *SwitchLookupTable::BuildLookup(Value *Index, IRBuilder<> &Builder) { + switch (Kind) { + case SingleValueKind: + return SingleValue; + case BitMapKind: { + // Type of the bitmap (e.g. i59). + IntegerType *MapTy = BitMap->getType(); + + // Cast Index to the same type as the bitmap. + // Note: The Index is <= the number of elements in the table, so + // truncating it to the width of the bitmask is safe. + Value *ShiftAmt = Builder.CreateZExtOrTrunc(Index, MapTy, "switch.cast"); + + // Multiply the shift amount by the element width. + ShiftAmt = Builder.CreateMul(ShiftAmt, + ConstantInt::get(MapTy, BitMapElementTy->getBitWidth()), + "switch.shiftamt"); + + // Shift down. + Value *DownShifted = Builder.CreateLShr(BitMap, ShiftAmt, + "switch.downshift"); + // Mask off. + return Builder.CreateTrunc(DownShifted, BitMapElementTy, + "switch.masked"); + } + case ArrayKind: { + Value *GEPIndices[] = { Builder.getInt32(0), Index }; + Value *GEP = Builder.CreateInBoundsGEP(Array, GEPIndices, + "switch.gep"); + return Builder.CreateLoad(GEP, "switch.load"); + } + } + llvm_unreachable("Unknown lookup table kind!"); +} + +bool SwitchLookupTable::WouldFitInRegister(const DataLayout *TD, + uint64_t TableSize, + const Type *ElementType) { + if (!TD) + return false; + const IntegerType *IT = dyn_cast<IntegerType>(ElementType); + if (!IT) + return false; + // FIXME: If the type is wider than it needs to be, e.g. i8 but all values + // are <= 15, we could try to narrow the type. + + // Avoid overflow, fitsInLegalInteger uses unsigned int for the width. + if (TableSize >= UINT_MAX/IT->getBitWidth()) + return false; + return TD->fitsInLegalInteger(TableSize * IT->getBitWidth()); +} + +/// ShouldBuildLookupTable - Determine whether a lookup table should be built +/// for this switch, based on the number of caes, size of the table and the +/// types of the results. +static bool ShouldBuildLookupTable(SwitchInst *SI, + uint64_t TableSize, + const TargetTransformInfo &TTI, + const DataLayout *TD, + const SmallDenseMap<PHINode*, Type*>& ResultTypes) { + if (SI->getNumCases() > TableSize || TableSize >= UINT64_MAX / 10) + return false; // TableSize overflowed, or mul below might overflow. + + bool AllTablesFitInRegister = true; + bool HasIllegalType = false; + for (SmallDenseMap<PHINode*, Type*>::const_iterator I = ResultTypes.begin(), + E = ResultTypes.end(); I != E; ++I) { + Type *Ty = I->second; + + // Saturate this flag to true. + HasIllegalType = HasIllegalType || !TTI.isTypeLegal(Ty); + + // Saturate this flag to false. + AllTablesFitInRegister = AllTablesFitInRegister && + SwitchLookupTable::WouldFitInRegister(TD, TableSize, Ty); + + // If both flags saturate, we're done. NOTE: This *only* works with + // saturating flags, and all flags have to saturate first due to the + // non-deterministic behavior of iterating over a dense map. + if (HasIllegalType && !AllTablesFitInRegister) + break; + } + + // If each table would fit in a register, we should build it anyway. + if (AllTablesFitInRegister) + return true; + + // Don't build a table that doesn't fit in-register if it has illegal types. + if (HasIllegalType) + return false; + + // The table density should be at least 40%. This is the same criterion as for + // jump tables, see SelectionDAGBuilder::handleJTSwitchCase. + // FIXME: Find the best cut-off. + return SI->getNumCases() * 10 >= TableSize * 4; } /// SwitchToLookupTable - If the switch is only used to initialize one or more /// phi nodes in a common successor block with different constant values, /// replace the switch with lookup tables. static bool SwitchToLookupTable(SwitchInst *SI, - IRBuilder<> &Builder) { + IRBuilder<> &Builder, + const TargetTransformInfo &TTI, + const DataLayout* TD) { assert(SI->getNumCases() > 1 && "Degenerate switch?"); - // FIXME: Handle unreachable cases. + + // Only build lookup table when we have a target that supports it. + if (!TTI.shouldBuildLookupTables()) + return false; // FIXME: If the switch is too sparse for a lookup table, perhaps we could // split off a dense part and build a lookup table for that. - // FIXME: If the results are all integers and the lookup table would fit in a - // target-legal register, we should store them as a bitmap and use shift/mask - // to look up the result. - // FIXME: This creates arrays of GEPs to constant strings, which means each // GEP needs a runtime relocation in PIC code. We should just build one big // string and lookup indices into that. - // Ignore the switch if the number of cases are too small. + // Ignore the switch if the number of cases is too small. // This is similar to the check when building jump tables in // SelectionDAGBuilder::handleJTSwitchCase. // FIXME: Determine the best cut-off. @@ -3131,7 +3584,7 @@ static bool SwitchToLookupTable(SwitchInst *SI, ConstantInt *MinCaseVal = CI.getCaseValue(); ConstantInt *MaxCaseVal = CI.getCaseValue(); - BasicBlock *CommonDest = NULL; + BasicBlock *CommonDest = 0; typedef SmallVector<std::pair<ConstantInt*, Constant*>, 4> ResultListTy; SmallDenseMap<PHINode*, ResultListTy> ResultLists; SmallDenseMap<PHINode*, Constant*> DefaultResults; @@ -3148,7 +3601,8 @@ static bool SwitchToLookupTable(SwitchInst *SI, // Resulting value at phi nodes for this case value. typedef SmallVector<std::pair<PHINode*, Constant*>, 4> ResultsTy; ResultsTy Results; - if (!GetCaseResults(SI, CI.getCaseSuccessor(), &CommonDest, Results)) + if (!GetCaseResults(SI, CaseVal, CI.getCaseSuccessor(), &CommonDest, + Results)) return false; // Append the result from this case to the list for each phi. @@ -3161,7 +3615,8 @@ static bool SwitchToLookupTable(SwitchInst *SI, // Get the resulting values for the default case. SmallVector<std::pair<PHINode*, Constant*>, 4> DefaultResultsList; - if (!GetCaseResults(SI, SI->getDefaultDest(), &CommonDest, DefaultResultsList)) + if (!GetCaseResults(SI, 0, SI->getDefaultDest(), &CommonDest, + DefaultResultsList)) return false; for (size_t I = 0, E = DefaultResultsList.size(); I != E; ++I) { PHINode *PHI = DefaultResultsList[I].first; @@ -3171,33 +3626,12 @@ static bool SwitchToLookupTable(SwitchInst *SI, } APInt RangeSpread = MaxCaseVal->getValue() - MinCaseVal->getValue(); - // The table density should be at lest 40%. This is the same criterion as for - // jump tables, see SelectionDAGBuilder::handleJTSwitchCase. - // FIXME: Find the best cut-off. - // Be careful to avoid overlow in the density computation. - if (RangeSpread.zextOrSelf(64).ugt(UINT64_MAX / 4 - 1)) - return false; uint64_t TableSize = RangeSpread.getLimitedValue() + 1; - if (SI->getNumCases() * 10 < TableSize * 4) + if (!ShouldBuildLookupTable(SI, TableSize, TTI, TD, ResultTypes)) return false; - // Build the lookup tables. - SmallDenseMap<PHINode*, GlobalVariable*> LookupTables; - SmallDenseMap<PHINode*, Constant*> SingleResults; - - Module &Mod = *CommonDest->getParent()->getParent(); - for (SmallVector<PHINode*, 4>::iterator I = PHIs.begin(), E = PHIs.end(); - I != E; ++I) { - PHINode *PHI = *I; - - Constant *SingleResult = NULL; - LookupTables[PHI] = BuildLookupTable(Mod, TableSize, MinCaseVal, - ResultLists[PHI], DefaultResults[PHI], - &SingleResult); - SingleResults[PHI] = SingleResult; - } - // Create the BB that does the lookups. + Module &Mod = *CommonDest->getParent()->getParent(); BasicBlock *LookupBB = BasicBlock::Create(Mod.getContext(), "switch.lookup", CommonDest->getParent(), @@ -3215,31 +3649,24 @@ static bool SwitchToLookupTable(SwitchInst *SI, // Populate the BB that does the lookups. Builder.SetInsertPoint(LookupBB); bool ReturnedEarly = false; - for (SmallVector<PHINode*, 4>::iterator I = PHIs.begin(), E = PHIs.end(); - I != E; ++I) { - PHINode *PHI = *I; - // There was a single result for this phi; just use that. - if (Constant *SingleResult = SingleResults[PHI]) { - PHI->addIncoming(SingleResult, LookupBB); - continue; - } + for (size_t I = 0, E = PHIs.size(); I != E; ++I) { + PHINode *PHI = PHIs[I]; - Value *GEPIndices[] = { Builder.getInt32(0), TableIndex }; - Value *GEP = Builder.CreateInBoundsGEP(LookupTables[PHI], GEPIndices, - "switch.gep"); - Value *Result = Builder.CreateLoad(GEP, "switch.load"); - - // If the result is only going to be used to return from the function, - // we want to do that right here. - if (PHI->hasOneUse() && isa<ReturnInst>(*PHI->use_begin())) { - if (CommonDest->getFirstNonPHIOrDbg() == CommonDest->getTerminator()) { - Builder.CreateRet(Result); - ReturnedEarly = true; - } + SwitchLookupTable Table(Mod, TableSize, MinCaseVal, ResultLists[PHI], + DefaultResults[PHI], TD); + + Value *Result = Table.BuildLookup(TableIndex, Builder); + + // If the result is used to return immediately from the function, we want to + // do that right here. + if (PHI->hasOneUse() && isa<ReturnInst>(*PHI->use_begin()) && + *PHI->use_begin() == CommonDest->getFirstNonPHIOrDbg()) { + Builder.CreateRet(Result); + ReturnedEarly = true; + break; } - if (!ReturnedEarly) - PHI->addIncoming(Result, LookupBB); + PHI->addIncoming(Result, LookupBB); } if (!ReturnedEarly) @@ -3258,46 +3685,44 @@ static bool SwitchToLookupTable(SwitchInst *SI, } bool SimplifyCFGOpt::SimplifySwitch(SwitchInst *SI, IRBuilder<> &Builder) { - // If this switch is too complex to want to look at, ignore it. - if (!isValueEqualityComparison(SI)) - return false; - BasicBlock *BB = SI->getParent(); - // If we only have one predecessor, and if it is a branch on this value, - // see if that predecessor totally determines the outcome of this switch. - if (BasicBlock *OnlyPred = BB->getSinglePredecessor()) - if (SimplifyEqualityComparisonWithOnlyPredecessor(SI, OnlyPred, Builder)) - return SimplifyCFG(BB) | true; + if (isValueEqualityComparison(SI)) { + // If we only have one predecessor, and if it is a branch on this value, + // see if that predecessor totally determines the outcome of this switch. + if (BasicBlock *OnlyPred = BB->getSinglePredecessor()) + if (SimplifyEqualityComparisonWithOnlyPredecessor(SI, OnlyPred, Builder)) + return SimplifyCFG(BB, TTI, TD) | true; - Value *Cond = SI->getCondition(); - if (SelectInst *Select = dyn_cast<SelectInst>(Cond)) - if (SimplifySwitchOnSelect(SI, Select)) - return SimplifyCFG(BB) | true; + Value *Cond = SI->getCondition(); + if (SelectInst *Select = dyn_cast<SelectInst>(Cond)) + if (SimplifySwitchOnSelect(SI, Select)) + return SimplifyCFG(BB, TTI, TD) | true; - // If the block only contains the switch, see if we can fold the block - // away into any preds. - BasicBlock::iterator BBI = BB->begin(); - // Ignore dbg intrinsics. - while (isa<DbgInfoIntrinsic>(BBI)) - ++BBI; - if (SI == &*BBI) - if (FoldValueComparisonIntoPredecessors(SI, Builder)) - return SimplifyCFG(BB) | true; + // If the block only contains the switch, see if we can fold the block + // away into any preds. + BasicBlock::iterator BBI = BB->begin(); + // Ignore dbg intrinsics. + while (isa<DbgInfoIntrinsic>(BBI)) + ++BBI; + if (SI == &*BBI) + if (FoldValueComparisonIntoPredecessors(SI, Builder)) + return SimplifyCFG(BB, TTI, TD) | true; + } // Try to transform the switch into an icmp and a branch. if (TurnSwitchRangeIntoICmp(SI, Builder)) - return SimplifyCFG(BB) | true; + return SimplifyCFG(BB, TTI, TD) | true; // Remove unreachable cases. if (EliminateDeadSwitchCases(SI)) - return SimplifyCFG(BB) | true; + return SimplifyCFG(BB, TTI, TD) | true; if (ForwardSwitchConditionToPHI(SI)) - return SimplifyCFG(BB) | true; + return SimplifyCFG(BB, TTI, TD) | true; - if (SwitchToLookupTable(SI, Builder)) - return SimplifyCFG(BB) | true; + if (SwitchToLookupTable(SI, Builder, TTI, TD)) + return SimplifyCFG(BB, TTI, TD) | true; return false; } @@ -3334,7 +3759,7 @@ bool SimplifyCFGOpt::SimplifyIndirectBr(IndirectBrInst *IBI) { if (SelectInst *SI = dyn_cast<SelectInst>(IBI->getAddress())) { if (SimplifyIndirectBrOnSelect(IBI, SI)) - return SimplifyCFG(BB) | true; + return SimplifyCFG(BB, TTI, TD) | true; } return Changed; } @@ -3342,6 +3767,9 @@ bool SimplifyCFGOpt::SimplifyIndirectBr(IndirectBrInst *IBI) { bool SimplifyCFGOpt::SimplifyUncondBranch(BranchInst *BI, IRBuilder<> &Builder){ BasicBlock *BB = BI->getParent(); + if (SinkCommon && SinkThenElseCodeToEnd(BI)) + return true; + // If the Terminator is the only non-phi instruction, simplify the block. BasicBlock::iterator I = BB->getFirstNonPHIOrDbgOrLifetime(); if (I->isTerminator() && BB != &BB->getParent()->getEntryBlock() && @@ -3355,7 +3783,7 @@ bool SimplifyCFGOpt::SimplifyUncondBranch(BranchInst *BI, IRBuilder<> &Builder){ for (++I; isa<DbgInfoIntrinsic>(I); ++I) ; if (I->isTerminator() && - TryToSimplifyUncondBranchWithICmpInIt(ICI, TD, Builder)) + TryToSimplifyUncondBranchWithICmpInIt(ICI, Builder, TTI, TD)) return true; } @@ -3364,7 +3792,7 @@ bool SimplifyCFGOpt::SimplifyUncondBranch(BranchInst *BI, IRBuilder<> &Builder){ // predecessor and use logical operations to update the incoming value // for PHI nodes in common successor. if (FoldBranchToCommonDest(BI)) - return SimplifyCFG(BB) | true; + return SimplifyCFG(BB, TTI, TD) | true; return false; } @@ -3379,7 +3807,7 @@ bool SimplifyCFGOpt::SimplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) { // switch. if (BasicBlock *OnlyPred = BB->getSinglePredecessor()) if (SimplifyEqualityComparisonWithOnlyPredecessor(BI, OnlyPred, Builder)) - return SimplifyCFG(BB) | true; + return SimplifyCFG(BB, TTI, TD) | true; // This block must be empty, except for the setcond inst, if it exists. // Ignore dbg intrinsics. @@ -3389,14 +3817,14 @@ bool SimplifyCFGOpt::SimplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) { ++I; if (&*I == BI) { if (FoldValueComparisonIntoPredecessors(BI, Builder)) - return SimplifyCFG(BB) | true; + return SimplifyCFG(BB, TTI, TD) | true; } else if (&*I == cast<Instruction>(BI->getCondition())){ ++I; // Ignore dbg intrinsics. while (isa<DbgInfoIntrinsic>(I)) ++I; if (&*I == BI && FoldValueComparisonIntoPredecessors(BI, Builder)) - return SimplifyCFG(BB) | true; + return SimplifyCFG(BB, TTI, TD) | true; } } @@ -3408,7 +3836,7 @@ bool SimplifyCFGOpt::SimplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) { // branches to us and one of our successors, fold the comparison into the // predecessor and use logical operations to pick the right destination. if (FoldBranchToCommonDest(BI)) - return SimplifyCFG(BB) | true; + return SimplifyCFG(BB, TTI, TD) | true; // We have a conditional branch to two blocks that are only reachable // from BI. We know that the condbr dominates the two blocks, so see if @@ -3417,7 +3845,7 @@ bool SimplifyCFGOpt::SimplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) { if (BI->getSuccessor(0)->getSinglePredecessor() != 0) { if (BI->getSuccessor(1)->getSinglePredecessor() != 0) { if (HoistThenElseCodeToIf(BI)) - return SimplifyCFG(BB) | true; + return SimplifyCFG(BB, TTI, TD) | true; } else { // If Successor #1 has multiple preds, we may be able to conditionally // execute Successor #0 if it branches to successor #1. @@ -3425,7 +3853,7 @@ bool SimplifyCFGOpt::SimplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) { if (Succ0TI->getNumSuccessors() == 1 && Succ0TI->getSuccessor(0) == BI->getSuccessor(1)) if (SpeculativelyExecuteBB(BI, BI->getSuccessor(0))) - return SimplifyCFG(BB) | true; + return SimplifyCFG(BB, TTI, TD) | true; } } else if (BI->getSuccessor(1)->getSinglePredecessor() != 0) { // If Successor #0 has multiple preds, we may be able to conditionally @@ -3434,7 +3862,7 @@ bool SimplifyCFGOpt::SimplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) { if (Succ1TI->getNumSuccessors() == 1 && Succ1TI->getSuccessor(0) == BI->getSuccessor(0)) if (SpeculativelyExecuteBB(BI, BI->getSuccessor(1))) - return SimplifyCFG(BB) | true; + return SimplifyCFG(BB, TTI, TD) | true; } // If this is a branch on a phi node in the current block, thread control @@ -3442,14 +3870,14 @@ bool SimplifyCFGOpt::SimplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) { if (PHINode *PN = dyn_cast<PHINode>(BI->getCondition())) if (PN->getParent() == BI->getParent()) if (FoldCondBranchOnPHI(BI, TD)) - return SimplifyCFG(BB) | true; + return SimplifyCFG(BB, TTI, TD) | true; // Scan predecessor blocks for conditional branches. for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) if (BranchInst *PBI = dyn_cast<BranchInst>((*PI)->getTerminator())) if (PBI != BI && PBI->isConditional()) if (SimplifyCondBranchToCondBranch(PBI, BI)) - return SimplifyCFG(BB) | true; + return SimplifyCFG(BB, TTI, TD) | true; return false; } @@ -3460,11 +3888,12 @@ static bool passingValueIsAlwaysUndefined(Value *V, Instruction *I) { if (!C) return false; - if (!I->hasOneUse()) // Only look at single-use instructions, for compile time + if (I->use_empty()) return false; if (C->isNullValue()) { - Instruction *Use = I->use_back(); + // Only look at the first use, avoid hurting compile time with long uselists + User *Use = *I->use_begin(); // Now make sure that there are no instructions in between that can alter // control flow (eg. calls) @@ -3589,6 +4018,7 @@ bool SimplifyCFGOpt::run(BasicBlock *BB) { /// eliminates unreachable basic blocks, and does other "peephole" optimization /// of the CFG. It returns true if a modification was made. /// -bool llvm::SimplifyCFG(BasicBlock *BB, const TargetData *TD) { - return SimplifyCFGOpt(TD).run(BB); +bool llvm::SimplifyCFG(BasicBlock *BB, const TargetTransformInfo &TTI, + const DataLayout *TD) { + return SimplifyCFGOpt(TTI, TD).run(BB); } diff --git a/lib/Transforms/Utils/SimplifyIndVar.cpp b/lib/Transforms/Utils/SimplifyIndVar.cpp index 5d673f1..41c207c 100644 --- a/lib/Transforms/Utils/SimplifyIndVar.cpp +++ b/lib/Transforms/Utils/SimplifyIndVar.cpp @@ -15,18 +15,18 @@ #define DEBUG_TYPE "indvars" -#include "llvm/Instructions.h" +#include "llvm/Transforms/Utils/SimplifyIndVar.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/Statistic.h" #include "llvm/Analysis/IVUsers.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/LoopPass.h" #include "llvm/Analysis/ScalarEvolutionExpressions.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/Instructions.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Transforms/Utils/SimplifyIndVar.h" -#include "llvm/Target/TargetData.h" -#include "llvm/ADT/SmallVector.h" -#include "llvm/ADT/Statistic.h" using namespace llvm; @@ -44,7 +44,7 @@ namespace { Loop *L; LoopInfo *LI; ScalarEvolution *SE; - const TargetData *TD; // May be NULL + const DataLayout *TD; // May be NULL SmallVectorImpl<WeakVH> &DeadInsts; @@ -56,7 +56,7 @@ namespace { L(Loop), LI(LPM->getAnalysisIfAvailable<LoopInfo>()), SE(SE), - TD(LPM->getAnalysisIfAvailable<TargetData>()), + TD(LPM->getAnalysisIfAvailable<DataLayout>()), DeadInsts(Dead), Changed(false) { assert(LI && "IV simplification requires LoopInfo"); diff --git a/lib/Transforms/Utils/SimplifyInstructions.cpp b/lib/Transforms/Utils/SimplifyInstructions.cpp index 528e6a1..f9687e4 100644 --- a/lib/Transforms/Utils/SimplifyInstructions.cpp +++ b/lib/Transforms/Utils/SimplifyInstructions.cpp @@ -15,17 +15,17 @@ //===----------------------------------------------------------------------===// #define DEBUG_TYPE "instsimplify" -#include "llvm/Function.h" -#include "llvm/Pass.h" -#include "llvm/Type.h" +#include "llvm/Transforms/Scalar.h" #include "llvm/ADT/DepthFirstIterator.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/Dominators.h" #include "llvm/Analysis/InstructionSimplify.h" -#include "llvm/Target/TargetData.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/Type.h" +#include "llvm/Pass.h" #include "llvm/Target/TargetLibraryInfo.h" -#include "llvm/Transforms/Scalar.h" #include "llvm/Transforms/Utils/Local.h" using namespace llvm; @@ -46,7 +46,7 @@ namespace { /// runOnFunction - Remove instructions that simplify. bool runOnFunction(Function &F) { const DominatorTree *DT = getAnalysisIfAvailable<DominatorTree>(); - const TargetData *TD = getAnalysisIfAvailable<TargetData>(); + const DataLayout *TD = getAnalysisIfAvailable<DataLayout>(); const TargetLibraryInfo *TLI = &getAnalysis<TargetLibraryInfo>(); SmallPtrSet<const Instruction*, 8> S1, S2, *ToSimplify = &S1, *Next = &S2; bool Changed = false; diff --git a/lib/Transforms/Utils/SimplifyLibCalls.cpp b/lib/Transforms/Utils/SimplifyLibCalls.cpp new file mode 100644 index 0000000..83c74e7 --- /dev/null +++ b/lib/Transforms/Utils/SimplifyLibCalls.cpp @@ -0,0 +1,1894 @@ +//===------ SimplifyLibCalls.cpp - Library calls simplifier ---------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This is a utility pass used for testing the InstructionSimplify analysis. +// The analysis is applied to every instruction, and if it simplifies then the +// instruction is replaced by the simplification. If you are looking for a pass +// that performs serious instruction folding, use the instcombine pass instead. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/Utils/SimplifyLibCalls.h" +#include "llvm/ADT/StringMap.h" +#include "llvm/Analysis/ValueTracking.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/Intrinsics.h" +#include "llvm/IR/LLVMContext.h" +#include "llvm/IR/Module.h" +#include "llvm/Target/TargetLibraryInfo.h" +#include "llvm/Transforms/Utils/BuildLibCalls.h" + +using namespace llvm; + +/// This class is the abstract base class for the set of optimizations that +/// corresponds to one library call. +namespace { +class LibCallOptimization { +protected: + Function *Caller; + const DataLayout *TD; + const TargetLibraryInfo *TLI; + const LibCallSimplifier *LCS; + LLVMContext* Context; +public: + LibCallOptimization() { } + virtual ~LibCallOptimization() {} + + /// callOptimizer - This pure virtual method is implemented by base classes to + /// do various optimizations. If this returns null then no transformation was + /// performed. If it returns CI, then it transformed the call and CI is to be + /// deleted. If it returns something else, replace CI with the new value and + /// delete CI. + virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) + =0; + + Value *optimizeCall(CallInst *CI, const DataLayout *TD, + const TargetLibraryInfo *TLI, + const LibCallSimplifier *LCS, IRBuilder<> &B) { + Caller = CI->getParent()->getParent(); + this->TD = TD; + this->TLI = TLI; + this->LCS = LCS; + if (CI->getCalledFunction()) + Context = &CI->getCalledFunction()->getContext(); + + // We never change the calling convention. + if (CI->getCallingConv() != llvm::CallingConv::C) + return NULL; + + return callOptimizer(CI->getCalledFunction(), CI, B); + } +}; + +//===----------------------------------------------------------------------===// +// Helper Functions +//===----------------------------------------------------------------------===// + +/// isOnlyUsedInZeroEqualityComparison - Return true if it only matters that the +/// value is equal or not-equal to zero. +static bool isOnlyUsedInZeroEqualityComparison(Value *V) { + for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); + UI != E; ++UI) { + if (ICmpInst *IC = dyn_cast<ICmpInst>(*UI)) + if (IC->isEquality()) + if (Constant *C = dyn_cast<Constant>(IC->getOperand(1))) + if (C->isNullValue()) + continue; + // Unknown instruction. + return false; + } + return true; +} + +/// isOnlyUsedInEqualityComparison - Return true if it is only used in equality +/// comparisons with With. +static bool isOnlyUsedInEqualityComparison(Value *V, Value *With) { + for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); + UI != E; ++UI) { + if (ICmpInst *IC = dyn_cast<ICmpInst>(*UI)) + if (IC->isEquality() && IC->getOperand(1) == With) + continue; + // Unknown instruction. + return false; + } + return true; +} + +static bool callHasFloatingPointArgument(const CallInst *CI) { + for (CallInst::const_op_iterator it = CI->op_begin(), e = CI->op_end(); + it != e; ++it) { + if ((*it)->getType()->isFloatingPointTy()) + return true; + } + return false; +} + +//===----------------------------------------------------------------------===// +// Fortified Library Call Optimizations +//===----------------------------------------------------------------------===// + +struct FortifiedLibCallOptimization : public LibCallOptimization { +protected: + virtual bool isFoldable(unsigned SizeCIOp, unsigned SizeArgOp, + bool isString) const = 0; +}; + +struct InstFortifiedLibCallOptimization : public FortifiedLibCallOptimization { + CallInst *CI; + + bool isFoldable(unsigned SizeCIOp, unsigned SizeArgOp, bool isString) const { + if (CI->getArgOperand(SizeCIOp) == CI->getArgOperand(SizeArgOp)) + return true; + if (ConstantInt *SizeCI = + dyn_cast<ConstantInt>(CI->getArgOperand(SizeCIOp))) { + if (SizeCI->isAllOnesValue()) + return true; + if (isString) { + uint64_t Len = GetStringLength(CI->getArgOperand(SizeArgOp)); + // If the length is 0 we don't know how long it is and so we can't + // remove the check. + if (Len == 0) return false; + return SizeCI->getZExtValue() >= Len; + } + if (ConstantInt *Arg = dyn_cast<ConstantInt>( + CI->getArgOperand(SizeArgOp))) + return SizeCI->getZExtValue() >= Arg->getZExtValue(); + } + return false; + } +}; + +struct MemCpyChkOpt : public InstFortifiedLibCallOptimization { + virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { + this->CI = CI; + FunctionType *FT = Callee->getFunctionType(); + LLVMContext &Context = CI->getParent()->getContext(); + + // Check if this has the right signature. + if (FT->getNumParams() != 4 || FT->getReturnType() != FT->getParamType(0) || + !FT->getParamType(0)->isPointerTy() || + !FT->getParamType(1)->isPointerTy() || + FT->getParamType(2) != TD->getIntPtrType(Context) || + FT->getParamType(3) != TD->getIntPtrType(Context)) + return 0; + + if (isFoldable(3, 2, false)) { + B.CreateMemCpy(CI->getArgOperand(0), CI->getArgOperand(1), + CI->getArgOperand(2), 1); + return CI->getArgOperand(0); + } + return 0; + } +}; + +struct MemMoveChkOpt : public InstFortifiedLibCallOptimization { + virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { + this->CI = CI; + FunctionType *FT = Callee->getFunctionType(); + LLVMContext &Context = CI->getParent()->getContext(); + + // Check if this has the right signature. + if (FT->getNumParams() != 4 || FT->getReturnType() != FT->getParamType(0) || + !FT->getParamType(0)->isPointerTy() || + !FT->getParamType(1)->isPointerTy() || + FT->getParamType(2) != TD->getIntPtrType(Context) || + FT->getParamType(3) != TD->getIntPtrType(Context)) + return 0; + + if (isFoldable(3, 2, false)) { + B.CreateMemMove(CI->getArgOperand(0), CI->getArgOperand(1), + CI->getArgOperand(2), 1); + return CI->getArgOperand(0); + } + return 0; + } +}; + +struct MemSetChkOpt : public InstFortifiedLibCallOptimization { + virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { + this->CI = CI; + FunctionType *FT = Callee->getFunctionType(); + LLVMContext &Context = CI->getParent()->getContext(); + + // Check if this has the right signature. + if (FT->getNumParams() != 4 || FT->getReturnType() != FT->getParamType(0) || + !FT->getParamType(0)->isPointerTy() || + !FT->getParamType(1)->isIntegerTy() || + FT->getParamType(2) != TD->getIntPtrType(Context) || + FT->getParamType(3) != TD->getIntPtrType(Context)) + return 0; + + if (isFoldable(3, 2, false)) { + Value *Val = B.CreateIntCast(CI->getArgOperand(1), B.getInt8Ty(), + false); + B.CreateMemSet(CI->getArgOperand(0), Val, CI->getArgOperand(2), 1); + return CI->getArgOperand(0); + } + return 0; + } +}; + +struct StrCpyChkOpt : public InstFortifiedLibCallOptimization { + virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { + this->CI = CI; + StringRef Name = Callee->getName(); + FunctionType *FT = Callee->getFunctionType(); + LLVMContext &Context = CI->getParent()->getContext(); + + // Check if this has the right signature. + if (FT->getNumParams() != 3 || + FT->getReturnType() != FT->getParamType(0) || + FT->getParamType(0) != FT->getParamType(1) || + FT->getParamType(0) != Type::getInt8PtrTy(Context) || + FT->getParamType(2) != TD->getIntPtrType(Context)) + return 0; + + Value *Dst = CI->getArgOperand(0), *Src = CI->getArgOperand(1); + if (Dst == Src) // __strcpy_chk(x,x) -> x + return Src; + + // If a) we don't have any length information, or b) we know this will + // fit then just lower to a plain strcpy. Otherwise we'll keep our + // strcpy_chk call which may fail at runtime if the size is too long. + // TODO: It might be nice to get a maximum length out of the possible + // string lengths for varying. + if (isFoldable(2, 1, true)) { + Value *Ret = EmitStrCpy(Dst, Src, B, TD, TLI, Name.substr(2, 6)); + return Ret; + } else { + // Maybe we can stil fold __strcpy_chk to __memcpy_chk. + uint64_t Len = GetStringLength(Src); + if (Len == 0) return 0; + + // This optimization require DataLayout. + if (!TD) return 0; + + Value *Ret = + EmitMemCpyChk(Dst, Src, + ConstantInt::get(TD->getIntPtrType(Context), Len), + CI->getArgOperand(2), B, TD, TLI); + return Ret; + } + return 0; + } +}; + +struct StpCpyChkOpt : public InstFortifiedLibCallOptimization { + virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { + this->CI = CI; + StringRef Name = Callee->getName(); + FunctionType *FT = Callee->getFunctionType(); + LLVMContext &Context = CI->getParent()->getContext(); + + // Check if this has the right signature. + if (FT->getNumParams() != 3 || + FT->getReturnType() != FT->getParamType(0) || + FT->getParamType(0) != FT->getParamType(1) || + FT->getParamType(0) != Type::getInt8PtrTy(Context) || + FT->getParamType(2) != TD->getIntPtrType(FT->getParamType(0))) + return 0; + + Value *Dst = CI->getArgOperand(0), *Src = CI->getArgOperand(1); + if (Dst == Src) { // stpcpy(x,x) -> x+strlen(x) + Value *StrLen = EmitStrLen(Src, B, TD, TLI); + return StrLen ? B.CreateInBoundsGEP(Dst, StrLen) : 0; + } + + // If a) we don't have any length information, or b) we know this will + // fit then just lower to a plain stpcpy. Otherwise we'll keep our + // stpcpy_chk call which may fail at runtime if the size is too long. + // TODO: It might be nice to get a maximum length out of the possible + // string lengths for varying. + if (isFoldable(2, 1, true)) { + Value *Ret = EmitStrCpy(Dst, Src, B, TD, TLI, Name.substr(2, 6)); + return Ret; + } else { + // Maybe we can stil fold __stpcpy_chk to __memcpy_chk. + uint64_t Len = GetStringLength(Src); + if (Len == 0) return 0; + + // This optimization require DataLayout. + if (!TD) return 0; + + Type *PT = FT->getParamType(0); + Value *LenV = ConstantInt::get(TD->getIntPtrType(PT), Len); + Value *DstEnd = B.CreateGEP(Dst, + ConstantInt::get(TD->getIntPtrType(PT), + Len - 1)); + if (!EmitMemCpyChk(Dst, Src, LenV, CI->getArgOperand(2), B, TD, TLI)) + return 0; + return DstEnd; + } + return 0; + } +}; + +struct StrNCpyChkOpt : public InstFortifiedLibCallOptimization { + virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { + this->CI = CI; + StringRef Name = Callee->getName(); + FunctionType *FT = Callee->getFunctionType(); + LLVMContext &Context = CI->getParent()->getContext(); + + // Check if this has the right signature. + if (FT->getNumParams() != 4 || FT->getReturnType() != FT->getParamType(0) || + FT->getParamType(0) != FT->getParamType(1) || + FT->getParamType(0) != Type::getInt8PtrTy(Context) || + !FT->getParamType(2)->isIntegerTy() || + FT->getParamType(3) != TD->getIntPtrType(Context)) + return 0; + + if (isFoldable(3, 2, false)) { + Value *Ret = EmitStrNCpy(CI->getArgOperand(0), CI->getArgOperand(1), + CI->getArgOperand(2), B, TD, TLI, + Name.substr(2, 7)); + return Ret; + } + return 0; + } +}; + +//===----------------------------------------------------------------------===// +// String and Memory Library Call Optimizations +//===----------------------------------------------------------------------===// + +struct StrCatOpt : public LibCallOptimization { + virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { + // Verify the "strcat" function prototype. + FunctionType *FT = Callee->getFunctionType(); + if (FT->getNumParams() != 2 || + FT->getReturnType() != B.getInt8PtrTy() || + FT->getParamType(0) != FT->getReturnType() || + FT->getParamType(1) != FT->getReturnType()) + return 0; + + // Extract some information from the instruction + Value *Dst = CI->getArgOperand(0); + Value *Src = CI->getArgOperand(1); + + // See if we can get the length of the input string. + uint64_t Len = GetStringLength(Src); + if (Len == 0) return 0; + --Len; // Unbias length. + + // Handle the simple, do-nothing case: strcat(x, "") -> x + if (Len == 0) + return Dst; + + // These optimizations require DataLayout. + if (!TD) return 0; + + return emitStrLenMemCpy(Src, Dst, Len, B); + } + + Value *emitStrLenMemCpy(Value *Src, Value *Dst, uint64_t Len, + IRBuilder<> &B) { + // We need to find the end of the destination string. That's where the + // memory is to be moved to. We just generate a call to strlen. + Value *DstLen = EmitStrLen(Dst, B, TD, TLI); + if (!DstLen) + return 0; + + // Now that we have the destination's length, we must index into the + // destination's pointer to get the actual memcpy destination (end of + // the string .. we're concatenating). + Value *CpyDst = B.CreateGEP(Dst, DstLen, "endptr"); + + // We have enough information to now generate the memcpy call to do the + // concatenation for us. Make a memcpy to copy the nul byte with align = 1. + B.CreateMemCpy(CpyDst, Src, + ConstantInt::get(TD->getIntPtrType(*Context), Len + 1), 1); + return Dst; + } +}; + +struct StrNCatOpt : public StrCatOpt { + virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { + // Verify the "strncat" function prototype. + FunctionType *FT = Callee->getFunctionType(); + if (FT->getNumParams() != 3 || + FT->getReturnType() != B.getInt8PtrTy() || + FT->getParamType(0) != FT->getReturnType() || + FT->getParamType(1) != FT->getReturnType() || + !FT->getParamType(2)->isIntegerTy()) + return 0; + + // Extract some information from the instruction + Value *Dst = CI->getArgOperand(0); + Value *Src = CI->getArgOperand(1); + uint64_t Len; + + // We don't do anything if length is not constant + if (ConstantInt *LengthArg = dyn_cast<ConstantInt>(CI->getArgOperand(2))) + Len = LengthArg->getZExtValue(); + else + return 0; + + // See if we can get the length of the input string. + uint64_t SrcLen = GetStringLength(Src); + if (SrcLen == 0) return 0; + --SrcLen; // Unbias length. + + // Handle the simple, do-nothing cases: + // strncat(x, "", c) -> x + // strncat(x, c, 0) -> x + if (SrcLen == 0 || Len == 0) return Dst; + + // These optimizations require DataLayout. + if (!TD) return 0; + + // We don't optimize this case + if (Len < SrcLen) return 0; + + // strncat(x, s, c) -> strcat(x, s) + // s is constant so the strcat can be optimized further + return emitStrLenMemCpy(Src, Dst, SrcLen, B); + } +}; + +struct StrChrOpt : public LibCallOptimization { + virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { + // Verify the "strchr" function prototype. + FunctionType *FT = Callee->getFunctionType(); + if (FT->getNumParams() != 2 || + FT->getReturnType() != B.getInt8PtrTy() || + FT->getParamType(0) != FT->getReturnType() || + !FT->getParamType(1)->isIntegerTy(32)) + return 0; + + Value *SrcStr = CI->getArgOperand(0); + + // If the second operand is non-constant, see if we can compute the length + // of the input string and turn this into memchr. + ConstantInt *CharC = dyn_cast<ConstantInt>(CI->getArgOperand(1)); + if (CharC == 0) { + // These optimizations require DataLayout. + if (!TD) return 0; + + uint64_t Len = GetStringLength(SrcStr); + if (Len == 0 || !FT->getParamType(1)->isIntegerTy(32))// memchr needs i32. + return 0; + + return EmitMemChr(SrcStr, CI->getArgOperand(1), // include nul. + ConstantInt::get(TD->getIntPtrType(*Context), Len), + B, TD, TLI); + } + + // Otherwise, the character is a constant, see if the first argument is + // a string literal. If so, we can constant fold. + StringRef Str; + if (!getConstantStringInfo(SrcStr, Str)) + return 0; + + // Compute the offset, make sure to handle the case when we're searching for + // zero (a weird way to spell strlen). + size_t I = CharC->getSExtValue() == 0 ? + Str.size() : Str.find(CharC->getSExtValue()); + if (I == StringRef::npos) // Didn't find the char. strchr returns null. + return Constant::getNullValue(CI->getType()); + + // strchr(s+n,c) -> gep(s+n+i,c) + return B.CreateGEP(SrcStr, B.getInt64(I), "strchr"); + } +}; + +struct StrRChrOpt : public LibCallOptimization { + virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { + // Verify the "strrchr" function prototype. + FunctionType *FT = Callee->getFunctionType(); + if (FT->getNumParams() != 2 || + FT->getReturnType() != B.getInt8PtrTy() || + FT->getParamType(0) != FT->getReturnType() || + !FT->getParamType(1)->isIntegerTy(32)) + return 0; + + Value *SrcStr = CI->getArgOperand(0); + ConstantInt *CharC = dyn_cast<ConstantInt>(CI->getArgOperand(1)); + + // Cannot fold anything if we're not looking for a constant. + if (!CharC) + return 0; + + StringRef Str; + if (!getConstantStringInfo(SrcStr, Str)) { + // strrchr(s, 0) -> strchr(s, 0) + if (TD && CharC->isZero()) + return EmitStrChr(SrcStr, '\0', B, TD, TLI); + return 0; + } + + // Compute the offset. + size_t I = CharC->getSExtValue() == 0 ? + Str.size() : Str.rfind(CharC->getSExtValue()); + if (I == StringRef::npos) // Didn't find the char. Return null. + return Constant::getNullValue(CI->getType()); + + // strrchr(s+n,c) -> gep(s+n+i,c) + return B.CreateGEP(SrcStr, B.getInt64(I), "strrchr"); + } +}; + +struct StrCmpOpt : public LibCallOptimization { + virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { + // Verify the "strcmp" function prototype. + FunctionType *FT = Callee->getFunctionType(); + if (FT->getNumParams() != 2 || + !FT->getReturnType()->isIntegerTy(32) || + FT->getParamType(0) != FT->getParamType(1) || + FT->getParamType(0) != B.getInt8PtrTy()) + return 0; + + Value *Str1P = CI->getArgOperand(0), *Str2P = CI->getArgOperand(1); + if (Str1P == Str2P) // strcmp(x,x) -> 0 + return ConstantInt::get(CI->getType(), 0); + + StringRef Str1, Str2; + bool HasStr1 = getConstantStringInfo(Str1P, Str1); + bool HasStr2 = getConstantStringInfo(Str2P, Str2); + + // strcmp(x, y) -> cnst (if both x and y are constant strings) + if (HasStr1 && HasStr2) + return ConstantInt::get(CI->getType(), Str1.compare(Str2)); + + if (HasStr1 && Str1.empty()) // strcmp("", x) -> -*x + return B.CreateNeg(B.CreateZExt(B.CreateLoad(Str2P, "strcmpload"), + CI->getType())); + + if (HasStr2 && Str2.empty()) // strcmp(x,"") -> *x + return B.CreateZExt(B.CreateLoad(Str1P, "strcmpload"), CI->getType()); + + // strcmp(P, "x") -> memcmp(P, "x", 2) + uint64_t Len1 = GetStringLength(Str1P); + uint64_t Len2 = GetStringLength(Str2P); + if (Len1 && Len2) { + // These optimizations require DataLayout. + if (!TD) return 0; + + return EmitMemCmp(Str1P, Str2P, + ConstantInt::get(TD->getIntPtrType(*Context), + std::min(Len1, Len2)), B, TD, TLI); + } + + return 0; + } +}; + +struct StrNCmpOpt : public LibCallOptimization { + virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { + // Verify the "strncmp" function prototype. + FunctionType *FT = Callee->getFunctionType(); + if (FT->getNumParams() != 3 || + !FT->getReturnType()->isIntegerTy(32) || + FT->getParamType(0) != FT->getParamType(1) || + FT->getParamType(0) != B.getInt8PtrTy() || + !FT->getParamType(2)->isIntegerTy()) + return 0; + + Value *Str1P = CI->getArgOperand(0), *Str2P = CI->getArgOperand(1); + if (Str1P == Str2P) // strncmp(x,x,n) -> 0 + return ConstantInt::get(CI->getType(), 0); + + // Get the length argument if it is constant. + uint64_t Length; + if (ConstantInt *LengthArg = dyn_cast<ConstantInt>(CI->getArgOperand(2))) + Length = LengthArg->getZExtValue(); + else + return 0; + + if (Length == 0) // strncmp(x,y,0) -> 0 + return ConstantInt::get(CI->getType(), 0); + + if (TD && Length == 1) // strncmp(x,y,1) -> memcmp(x,y,1) + return EmitMemCmp(Str1P, Str2P, CI->getArgOperand(2), B, TD, TLI); + + StringRef Str1, Str2; + bool HasStr1 = getConstantStringInfo(Str1P, Str1); + bool HasStr2 = getConstantStringInfo(Str2P, Str2); + + // strncmp(x, y) -> cnst (if both x and y are constant strings) + if (HasStr1 && HasStr2) { + StringRef SubStr1 = Str1.substr(0, Length); + StringRef SubStr2 = Str2.substr(0, Length); + return ConstantInt::get(CI->getType(), SubStr1.compare(SubStr2)); + } + + if (HasStr1 && Str1.empty()) // strncmp("", x, n) -> -*x + return B.CreateNeg(B.CreateZExt(B.CreateLoad(Str2P, "strcmpload"), + CI->getType())); + + if (HasStr2 && Str2.empty()) // strncmp(x, "", n) -> *x + return B.CreateZExt(B.CreateLoad(Str1P, "strcmpload"), CI->getType()); + + return 0; + } +}; + +struct StrCpyOpt : public LibCallOptimization { + virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { + // Verify the "strcpy" function prototype. + FunctionType *FT = Callee->getFunctionType(); + if (FT->getNumParams() != 2 || + FT->getReturnType() != FT->getParamType(0) || + FT->getParamType(0) != FT->getParamType(1) || + FT->getParamType(0) != B.getInt8PtrTy()) + return 0; + + Value *Dst = CI->getArgOperand(0), *Src = CI->getArgOperand(1); + if (Dst == Src) // strcpy(x,x) -> x + return Src; + + // These optimizations require DataLayout. + if (!TD) return 0; + + // See if we can get the length of the input string. + uint64_t Len = GetStringLength(Src); + if (Len == 0) return 0; + + // We have enough information to now generate the memcpy call to do the + // copy for us. Make a memcpy to copy the nul byte with align = 1. + B.CreateMemCpy(Dst, Src, + ConstantInt::get(TD->getIntPtrType(*Context), Len), 1); + return Dst; + } +}; + +struct StpCpyOpt: public LibCallOptimization { + virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { + // Verify the "stpcpy" function prototype. + FunctionType *FT = Callee->getFunctionType(); + if (FT->getNumParams() != 2 || + FT->getReturnType() != FT->getParamType(0) || + FT->getParamType(0) != FT->getParamType(1) || + FT->getParamType(0) != B.getInt8PtrTy()) + return 0; + + // These optimizations require DataLayout. + if (!TD) return 0; + + Value *Dst = CI->getArgOperand(0), *Src = CI->getArgOperand(1); + if (Dst == Src) { // stpcpy(x,x) -> x+strlen(x) + Value *StrLen = EmitStrLen(Src, B, TD, TLI); + return StrLen ? B.CreateInBoundsGEP(Dst, StrLen) : 0; + } + + // See if we can get the length of the input string. + uint64_t Len = GetStringLength(Src); + if (Len == 0) return 0; + + Type *PT = FT->getParamType(0); + Value *LenV = ConstantInt::get(TD->getIntPtrType(PT), Len); + Value *DstEnd = B.CreateGEP(Dst, + ConstantInt::get(TD->getIntPtrType(PT), + Len - 1)); + + // We have enough information to now generate the memcpy call to do the + // copy for us. Make a memcpy to copy the nul byte with align = 1. + B.CreateMemCpy(Dst, Src, LenV, 1); + return DstEnd; + } +}; + +struct StrNCpyOpt : public LibCallOptimization { + virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { + FunctionType *FT = Callee->getFunctionType(); + if (FT->getNumParams() != 3 || FT->getReturnType() != FT->getParamType(0) || + FT->getParamType(0) != FT->getParamType(1) || + FT->getParamType(0) != B.getInt8PtrTy() || + !FT->getParamType(2)->isIntegerTy()) + return 0; + + Value *Dst = CI->getArgOperand(0); + Value *Src = CI->getArgOperand(1); + Value *LenOp = CI->getArgOperand(2); + + // See if we can get the length of the input string. + uint64_t SrcLen = GetStringLength(Src); + if (SrcLen == 0) return 0; + --SrcLen; + + if (SrcLen == 0) { + // strncpy(x, "", y) -> memset(x, '\0', y, 1) + B.CreateMemSet(Dst, B.getInt8('\0'), LenOp, 1); + return Dst; + } + + uint64_t Len; + if (ConstantInt *LengthArg = dyn_cast<ConstantInt>(LenOp)) + Len = LengthArg->getZExtValue(); + else + return 0; + + if (Len == 0) return Dst; // strncpy(x, y, 0) -> x + + // These optimizations require DataLayout. + if (!TD) return 0; + + // Let strncpy handle the zero padding + if (Len > SrcLen+1) return 0; + + Type *PT = FT->getParamType(0); + // strncpy(x, s, c) -> memcpy(x, s, c, 1) [s and c are constant] + B.CreateMemCpy(Dst, Src, + ConstantInt::get(TD->getIntPtrType(PT), Len), 1); + + return Dst; + } +}; + +struct StrLenOpt : public LibCallOptimization { + virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { + FunctionType *FT = Callee->getFunctionType(); + if (FT->getNumParams() != 1 || + FT->getParamType(0) != B.getInt8PtrTy() || + !FT->getReturnType()->isIntegerTy()) + return 0; + + Value *Src = CI->getArgOperand(0); + + // Constant folding: strlen("xyz") -> 3 + if (uint64_t Len = GetStringLength(Src)) + return ConstantInt::get(CI->getType(), Len-1); + + // strlen(x) != 0 --> *x != 0 + // strlen(x) == 0 --> *x == 0 + if (isOnlyUsedInZeroEqualityComparison(CI)) + return B.CreateZExt(B.CreateLoad(Src, "strlenfirst"), CI->getType()); + return 0; + } +}; + +struct StrPBrkOpt : public LibCallOptimization { + virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { + FunctionType *FT = Callee->getFunctionType(); + if (FT->getNumParams() != 2 || + FT->getParamType(0) != B.getInt8PtrTy() || + FT->getParamType(1) != FT->getParamType(0) || + FT->getReturnType() != FT->getParamType(0)) + return 0; + + StringRef S1, S2; + bool HasS1 = getConstantStringInfo(CI->getArgOperand(0), S1); + bool HasS2 = getConstantStringInfo(CI->getArgOperand(1), S2); + + // strpbrk(s, "") -> NULL + // strpbrk("", s) -> NULL + if ((HasS1 && S1.empty()) || (HasS2 && S2.empty())) + return Constant::getNullValue(CI->getType()); + + // Constant folding. + if (HasS1 && HasS2) { + size_t I = S1.find_first_of(S2); + if (I == std::string::npos) // No match. + return Constant::getNullValue(CI->getType()); + + return B.CreateGEP(CI->getArgOperand(0), B.getInt64(I), "strpbrk"); + } + + // strpbrk(s, "a") -> strchr(s, 'a') + if (TD && HasS2 && S2.size() == 1) + return EmitStrChr(CI->getArgOperand(0), S2[0], B, TD, TLI); + + return 0; + } +}; + +struct StrToOpt : public LibCallOptimization { + virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { + FunctionType *FT = Callee->getFunctionType(); + if ((FT->getNumParams() != 2 && FT->getNumParams() != 3) || + !FT->getParamType(0)->isPointerTy() || + !FT->getParamType(1)->isPointerTy()) + return 0; + + Value *EndPtr = CI->getArgOperand(1); + if (isa<ConstantPointerNull>(EndPtr)) { + // With a null EndPtr, this function won't capture the main argument. + // It would be readonly too, except that it still may write to errno. + CI->addAttribute(1, Attribute::get(Callee->getContext(), + Attribute::NoCapture)); + } + + return 0; + } +}; + +struct StrSpnOpt : public LibCallOptimization { + virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { + FunctionType *FT = Callee->getFunctionType(); + if (FT->getNumParams() != 2 || + FT->getParamType(0) != B.getInt8PtrTy() || + FT->getParamType(1) != FT->getParamType(0) || + !FT->getReturnType()->isIntegerTy()) + return 0; + + StringRef S1, S2; + bool HasS1 = getConstantStringInfo(CI->getArgOperand(0), S1); + bool HasS2 = getConstantStringInfo(CI->getArgOperand(1), S2); + + // strspn(s, "") -> 0 + // strspn("", s) -> 0 + if ((HasS1 && S1.empty()) || (HasS2 && S2.empty())) + return Constant::getNullValue(CI->getType()); + + // Constant folding. + if (HasS1 && HasS2) { + size_t Pos = S1.find_first_not_of(S2); + if (Pos == StringRef::npos) Pos = S1.size(); + return ConstantInt::get(CI->getType(), Pos); + } + + return 0; + } +}; + +struct StrCSpnOpt : public LibCallOptimization { + virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { + FunctionType *FT = Callee->getFunctionType(); + if (FT->getNumParams() != 2 || + FT->getParamType(0) != B.getInt8PtrTy() || + FT->getParamType(1) != FT->getParamType(0) || + !FT->getReturnType()->isIntegerTy()) + return 0; + + StringRef S1, S2; + bool HasS1 = getConstantStringInfo(CI->getArgOperand(0), S1); + bool HasS2 = getConstantStringInfo(CI->getArgOperand(1), S2); + + // strcspn("", s) -> 0 + if (HasS1 && S1.empty()) + return Constant::getNullValue(CI->getType()); + + // Constant folding. + if (HasS1 && HasS2) { + size_t Pos = S1.find_first_of(S2); + if (Pos == StringRef::npos) Pos = S1.size(); + return ConstantInt::get(CI->getType(), Pos); + } + + // strcspn(s, "") -> strlen(s) + if (TD && HasS2 && S2.empty()) + return EmitStrLen(CI->getArgOperand(0), B, TD, TLI); + + return 0; + } +}; + +struct StrStrOpt : public LibCallOptimization { + virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { + FunctionType *FT = Callee->getFunctionType(); + if (FT->getNumParams() != 2 || + !FT->getParamType(0)->isPointerTy() || + !FT->getParamType(1)->isPointerTy() || + !FT->getReturnType()->isPointerTy()) + return 0; + + // fold strstr(x, x) -> x. + if (CI->getArgOperand(0) == CI->getArgOperand(1)) + return B.CreateBitCast(CI->getArgOperand(0), CI->getType()); + + // fold strstr(a, b) == a -> strncmp(a, b, strlen(b)) == 0 + if (TD && isOnlyUsedInEqualityComparison(CI, CI->getArgOperand(0))) { + Value *StrLen = EmitStrLen(CI->getArgOperand(1), B, TD, TLI); + if (!StrLen) + return 0; + Value *StrNCmp = EmitStrNCmp(CI->getArgOperand(0), CI->getArgOperand(1), + StrLen, B, TD, TLI); + if (!StrNCmp) + return 0; + for (Value::use_iterator UI = CI->use_begin(), UE = CI->use_end(); + UI != UE; ) { + ICmpInst *Old = cast<ICmpInst>(*UI++); + Value *Cmp = B.CreateICmp(Old->getPredicate(), StrNCmp, + ConstantInt::getNullValue(StrNCmp->getType()), + "cmp"); + LCS->replaceAllUsesWith(Old, Cmp); + } + return CI; + } + + // See if either input string is a constant string. + StringRef SearchStr, ToFindStr; + bool HasStr1 = getConstantStringInfo(CI->getArgOperand(0), SearchStr); + bool HasStr2 = getConstantStringInfo(CI->getArgOperand(1), ToFindStr); + + // fold strstr(x, "") -> x. + if (HasStr2 && ToFindStr.empty()) + return B.CreateBitCast(CI->getArgOperand(0), CI->getType()); + + // If both strings are known, constant fold it. + if (HasStr1 && HasStr2) { + std::string::size_type Offset = SearchStr.find(ToFindStr); + + if (Offset == StringRef::npos) // strstr("foo", "bar") -> null + return Constant::getNullValue(CI->getType()); + + // strstr("abcd", "bc") -> gep((char*)"abcd", 1) + Value *Result = CastToCStr(CI->getArgOperand(0), B); + Result = B.CreateConstInBoundsGEP1_64(Result, Offset, "strstr"); + return B.CreateBitCast(Result, CI->getType()); + } + + // fold strstr(x, "y") -> strchr(x, 'y'). + if (HasStr2 && ToFindStr.size() == 1) { + Value *StrChr= EmitStrChr(CI->getArgOperand(0), ToFindStr[0], B, TD, TLI); + return StrChr ? B.CreateBitCast(StrChr, CI->getType()) : 0; + } + return 0; + } +}; + +struct MemCmpOpt : public LibCallOptimization { + virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { + FunctionType *FT = Callee->getFunctionType(); + if (FT->getNumParams() != 3 || !FT->getParamType(0)->isPointerTy() || + !FT->getParamType(1)->isPointerTy() || + !FT->getReturnType()->isIntegerTy(32)) + return 0; + + Value *LHS = CI->getArgOperand(0), *RHS = CI->getArgOperand(1); + + if (LHS == RHS) // memcmp(s,s,x) -> 0 + return Constant::getNullValue(CI->getType()); + + // Make sure we have a constant length. + ConstantInt *LenC = dyn_cast<ConstantInt>(CI->getArgOperand(2)); + if (!LenC) return 0; + uint64_t Len = LenC->getZExtValue(); + + if (Len == 0) // memcmp(s1,s2,0) -> 0 + return Constant::getNullValue(CI->getType()); + + // memcmp(S1,S2,1) -> *(unsigned char*)LHS - *(unsigned char*)RHS + if (Len == 1) { + Value *LHSV = B.CreateZExt(B.CreateLoad(CastToCStr(LHS, B), "lhsc"), + CI->getType(), "lhsv"); + Value *RHSV = B.CreateZExt(B.CreateLoad(CastToCStr(RHS, B), "rhsc"), + CI->getType(), "rhsv"); + return B.CreateSub(LHSV, RHSV, "chardiff"); + } + + // Constant folding: memcmp(x, y, l) -> cnst (all arguments are constant) + StringRef LHSStr, RHSStr; + if (getConstantStringInfo(LHS, LHSStr) && + getConstantStringInfo(RHS, RHSStr)) { + // Make sure we're not reading out-of-bounds memory. + if (Len > LHSStr.size() || Len > RHSStr.size()) + return 0; + // Fold the memcmp and normalize the result. This way we get consistent + // results across multiple platforms. + uint64_t Ret = 0; + int Cmp = memcmp(LHSStr.data(), RHSStr.data(), Len); + if (Cmp < 0) + Ret = -1; + else if (Cmp > 0) + Ret = 1; + return ConstantInt::get(CI->getType(), Ret); + } + + return 0; + } +}; + +struct MemCpyOpt : public LibCallOptimization { + virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { + // These optimizations require DataLayout. + if (!TD) return 0; + + FunctionType *FT = Callee->getFunctionType(); + if (FT->getNumParams() != 3 || FT->getReturnType() != FT->getParamType(0) || + !FT->getParamType(0)->isPointerTy() || + !FT->getParamType(1)->isPointerTy() || + FT->getParamType(2) != TD->getIntPtrType(*Context)) + return 0; + + // memcpy(x, y, n) -> llvm.memcpy(x, y, n, 1) + B.CreateMemCpy(CI->getArgOperand(0), CI->getArgOperand(1), + CI->getArgOperand(2), 1); + return CI->getArgOperand(0); + } +}; + +struct MemMoveOpt : public LibCallOptimization { + virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { + // These optimizations require DataLayout. + if (!TD) return 0; + + FunctionType *FT = Callee->getFunctionType(); + if (FT->getNumParams() != 3 || FT->getReturnType() != FT->getParamType(0) || + !FT->getParamType(0)->isPointerTy() || + !FT->getParamType(1)->isPointerTy() || + FT->getParamType(2) != TD->getIntPtrType(*Context)) + return 0; + + // memmove(x, y, n) -> llvm.memmove(x, y, n, 1) + B.CreateMemMove(CI->getArgOperand(0), CI->getArgOperand(1), + CI->getArgOperand(2), 1); + return CI->getArgOperand(0); + } +}; + +struct MemSetOpt : public LibCallOptimization { + virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { + // These optimizations require DataLayout. + if (!TD) return 0; + + FunctionType *FT = Callee->getFunctionType(); + if (FT->getNumParams() != 3 || FT->getReturnType() != FT->getParamType(0) || + !FT->getParamType(0)->isPointerTy() || + !FT->getParamType(1)->isIntegerTy() || + FT->getParamType(2) != TD->getIntPtrType(*Context)) + return 0; + + // memset(p, v, n) -> llvm.memset(p, v, n, 1) + Value *Val = B.CreateIntCast(CI->getArgOperand(1), B.getInt8Ty(), false); + B.CreateMemSet(CI->getArgOperand(0), Val, CI->getArgOperand(2), 1); + return CI->getArgOperand(0); + } +}; + +//===----------------------------------------------------------------------===// +// Math Library Optimizations +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// Double -> Float Shrinking Optimizations for Unary Functions like 'floor' + +struct UnaryDoubleFPOpt : public LibCallOptimization { + bool CheckRetType; + UnaryDoubleFPOpt(bool CheckReturnType): CheckRetType(CheckReturnType) {} + virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { + FunctionType *FT = Callee->getFunctionType(); + if (FT->getNumParams() != 1 || !FT->getReturnType()->isDoubleTy() || + !FT->getParamType(0)->isDoubleTy()) + return 0; + + if (CheckRetType) { + // Check if all the uses for function like 'sin' are converted to float. + for (Value::use_iterator UseI = CI->use_begin(); UseI != CI->use_end(); + ++UseI) { + FPTruncInst *Cast = dyn_cast<FPTruncInst>(*UseI); + if (Cast == 0 || !Cast->getType()->isFloatTy()) + return 0; + } + } + + // If this is something like 'floor((double)floatval)', convert to floorf. + FPExtInst *Cast = dyn_cast<FPExtInst>(CI->getArgOperand(0)); + if (Cast == 0 || !Cast->getOperand(0)->getType()->isFloatTy()) + return 0; + + // floor((double)floatval) -> (double)floorf(floatval) + Value *V = Cast->getOperand(0); + V = EmitUnaryFloatFnCall(V, Callee->getName(), B, Callee->getAttributes()); + return B.CreateFPExt(V, B.getDoubleTy()); + } +}; + +struct UnsafeFPLibCallOptimization : public LibCallOptimization { + bool UnsafeFPShrink; + UnsafeFPLibCallOptimization(bool UnsafeFPShrink) { + this->UnsafeFPShrink = UnsafeFPShrink; + } +}; + +struct CosOpt : public UnsafeFPLibCallOptimization { + CosOpt(bool UnsafeFPShrink) : UnsafeFPLibCallOptimization(UnsafeFPShrink) {} + virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { + Value *Ret = NULL; + if (UnsafeFPShrink && Callee->getName() == "cos" && + TLI->has(LibFunc::cosf)) { + UnaryDoubleFPOpt UnsafeUnaryDoubleFP(true); + Ret = UnsafeUnaryDoubleFP.callOptimizer(Callee, CI, B); + } + + FunctionType *FT = Callee->getFunctionType(); + // Just make sure this has 1 argument of FP type, which matches the + // result type. + if (FT->getNumParams() != 1 || FT->getReturnType() != FT->getParamType(0) || + !FT->getParamType(0)->isFloatingPointTy()) + return Ret; + + // cos(-x) -> cos(x) + Value *Op1 = CI->getArgOperand(0); + if (BinaryOperator::isFNeg(Op1)) { + BinaryOperator *BinExpr = cast<BinaryOperator>(Op1); + return B.CreateCall(Callee, BinExpr->getOperand(1), "cos"); + } + return Ret; + } +}; + +struct PowOpt : public UnsafeFPLibCallOptimization { + PowOpt(bool UnsafeFPShrink) : UnsafeFPLibCallOptimization(UnsafeFPShrink) {} + virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { + Value *Ret = NULL; + if (UnsafeFPShrink && Callee->getName() == "pow" && + TLI->has(LibFunc::powf)) { + UnaryDoubleFPOpt UnsafeUnaryDoubleFP(true); + Ret = UnsafeUnaryDoubleFP.callOptimizer(Callee, CI, B); + } + + FunctionType *FT = Callee->getFunctionType(); + // Just make sure this has 2 arguments of the same FP type, which match the + // result type. + if (FT->getNumParams() != 2 || FT->getReturnType() != FT->getParamType(0) || + FT->getParamType(0) != FT->getParamType(1) || + !FT->getParamType(0)->isFloatingPointTy()) + return Ret; + + Value *Op1 = CI->getArgOperand(0), *Op2 = CI->getArgOperand(1); + if (ConstantFP *Op1C = dyn_cast<ConstantFP>(Op1)) { + if (Op1C->isExactlyValue(1.0)) // pow(1.0, x) -> 1.0 + return Op1C; + if (Op1C->isExactlyValue(2.0)) // pow(2.0, x) -> exp2(x) + return EmitUnaryFloatFnCall(Op2, "exp2", B, Callee->getAttributes()); + } + + ConstantFP *Op2C = dyn_cast<ConstantFP>(Op2); + if (Op2C == 0) return Ret; + + if (Op2C->getValueAPF().isZero()) // pow(x, 0.0) -> 1.0 + return ConstantFP::get(CI->getType(), 1.0); + + if (Op2C->isExactlyValue(0.5)) { + // Expand pow(x, 0.5) to (x == -infinity ? +infinity : fabs(sqrt(x))). + // This is faster than calling pow, and still handles negative zero + // and negative infinity correctly. + // TODO: In fast-math mode, this could be just sqrt(x). + // TODO: In finite-only mode, this could be just fabs(sqrt(x)). + Value *Inf = ConstantFP::getInfinity(CI->getType()); + Value *NegInf = ConstantFP::getInfinity(CI->getType(), true); + Value *Sqrt = EmitUnaryFloatFnCall(Op1, "sqrt", B, + Callee->getAttributes()); + Value *FAbs = EmitUnaryFloatFnCall(Sqrt, "fabs", B, + Callee->getAttributes()); + Value *FCmp = B.CreateFCmpOEQ(Op1, NegInf); + Value *Sel = B.CreateSelect(FCmp, Inf, FAbs); + return Sel; + } + + if (Op2C->isExactlyValue(1.0)) // pow(x, 1.0) -> x + return Op1; + if (Op2C->isExactlyValue(2.0)) // pow(x, 2.0) -> x*x + return B.CreateFMul(Op1, Op1, "pow2"); + if (Op2C->isExactlyValue(-1.0)) // pow(x, -1.0) -> 1.0/x + return B.CreateFDiv(ConstantFP::get(CI->getType(), 1.0), + Op1, "powrecip"); + return 0; + } +}; + +struct Exp2Opt : public UnsafeFPLibCallOptimization { + Exp2Opt(bool UnsafeFPShrink) : UnsafeFPLibCallOptimization(UnsafeFPShrink) {} + virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { + Value *Ret = NULL; + if (UnsafeFPShrink && Callee->getName() == "exp2" && + TLI->has(LibFunc::exp2)) { + UnaryDoubleFPOpt UnsafeUnaryDoubleFP(true); + Ret = UnsafeUnaryDoubleFP.callOptimizer(Callee, CI, B); + } + + FunctionType *FT = Callee->getFunctionType(); + // Just make sure this has 1 argument of FP type, which matches the + // result type. + if (FT->getNumParams() != 1 || FT->getReturnType() != FT->getParamType(0) || + !FT->getParamType(0)->isFloatingPointTy()) + return Ret; + + Value *Op = CI->getArgOperand(0); + // Turn exp2(sitofp(x)) -> ldexp(1.0, sext(x)) if sizeof(x) <= 32 + // Turn exp2(uitofp(x)) -> ldexp(1.0, zext(x)) if sizeof(x) < 32 + Value *LdExpArg = 0; + if (SIToFPInst *OpC = dyn_cast<SIToFPInst>(Op)) { + if (OpC->getOperand(0)->getType()->getPrimitiveSizeInBits() <= 32) + LdExpArg = B.CreateSExt(OpC->getOperand(0), B.getInt32Ty()); + } else if (UIToFPInst *OpC = dyn_cast<UIToFPInst>(Op)) { + if (OpC->getOperand(0)->getType()->getPrimitiveSizeInBits() < 32) + LdExpArg = B.CreateZExt(OpC->getOperand(0), B.getInt32Ty()); + } + + if (LdExpArg) { + const char *Name; + if (Op->getType()->isFloatTy()) + Name = "ldexpf"; + else if (Op->getType()->isDoubleTy()) + Name = "ldexp"; + else + Name = "ldexpl"; + + Constant *One = ConstantFP::get(*Context, APFloat(1.0f)); + if (!Op->getType()->isFloatTy()) + One = ConstantExpr::getFPExtend(One, Op->getType()); + + Module *M = Caller->getParent(); + Value *Callee = M->getOrInsertFunction(Name, Op->getType(), + Op->getType(), + B.getInt32Ty(), NULL); + CallInst *CI = B.CreateCall2(Callee, One, LdExpArg); + if (const Function *F = dyn_cast<Function>(Callee->stripPointerCasts())) + CI->setCallingConv(F->getCallingConv()); + + return CI; + } + return Ret; + } +}; + +//===----------------------------------------------------------------------===// +// Integer Library Call Optimizations +//===----------------------------------------------------------------------===// + +struct FFSOpt : public LibCallOptimization { + virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { + FunctionType *FT = Callee->getFunctionType(); + // Just make sure this has 2 arguments of the same FP type, which match the + // result type. + if (FT->getNumParams() != 1 || + !FT->getReturnType()->isIntegerTy(32) || + !FT->getParamType(0)->isIntegerTy()) + return 0; + + Value *Op = CI->getArgOperand(0); + + // Constant fold. + if (ConstantInt *CI = dyn_cast<ConstantInt>(Op)) { + if (CI->isZero()) // ffs(0) -> 0. + return B.getInt32(0); + // ffs(c) -> cttz(c)+1 + return B.getInt32(CI->getValue().countTrailingZeros() + 1); + } + + // ffs(x) -> x != 0 ? (i32)llvm.cttz(x)+1 : 0 + Type *ArgType = Op->getType(); + Value *F = Intrinsic::getDeclaration(Callee->getParent(), + Intrinsic::cttz, ArgType); + Value *V = B.CreateCall2(F, Op, B.getFalse(), "cttz"); + V = B.CreateAdd(V, ConstantInt::get(V->getType(), 1)); + V = B.CreateIntCast(V, B.getInt32Ty(), false); + + Value *Cond = B.CreateICmpNE(Op, Constant::getNullValue(ArgType)); + return B.CreateSelect(Cond, V, B.getInt32(0)); + } +}; + +struct AbsOpt : public LibCallOptimization { + virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { + FunctionType *FT = Callee->getFunctionType(); + // We require integer(integer) where the types agree. + if (FT->getNumParams() != 1 || !FT->getReturnType()->isIntegerTy() || + FT->getParamType(0) != FT->getReturnType()) + return 0; + + // abs(x) -> x >s -1 ? x : -x + Value *Op = CI->getArgOperand(0); + Value *Pos = B.CreateICmpSGT(Op, Constant::getAllOnesValue(Op->getType()), + "ispos"); + Value *Neg = B.CreateNeg(Op, "neg"); + return B.CreateSelect(Pos, Op, Neg); + } +}; + +struct IsDigitOpt : public LibCallOptimization { + virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { + FunctionType *FT = Callee->getFunctionType(); + // We require integer(i32) + if (FT->getNumParams() != 1 || !FT->getReturnType()->isIntegerTy() || + !FT->getParamType(0)->isIntegerTy(32)) + return 0; + + // isdigit(c) -> (c-'0') <u 10 + Value *Op = CI->getArgOperand(0); + Op = B.CreateSub(Op, B.getInt32('0'), "isdigittmp"); + Op = B.CreateICmpULT(Op, B.getInt32(10), "isdigit"); + return B.CreateZExt(Op, CI->getType()); + } +}; + +struct IsAsciiOpt : public LibCallOptimization { + virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { + FunctionType *FT = Callee->getFunctionType(); + // We require integer(i32) + if (FT->getNumParams() != 1 || !FT->getReturnType()->isIntegerTy() || + !FT->getParamType(0)->isIntegerTy(32)) + return 0; + + // isascii(c) -> c <u 128 + Value *Op = CI->getArgOperand(0); + Op = B.CreateICmpULT(Op, B.getInt32(128), "isascii"); + return B.CreateZExt(Op, CI->getType()); + } +}; + +struct ToAsciiOpt : public LibCallOptimization { + virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { + FunctionType *FT = Callee->getFunctionType(); + // We require i32(i32) + if (FT->getNumParams() != 1 || FT->getReturnType() != FT->getParamType(0) || + !FT->getParamType(0)->isIntegerTy(32)) + return 0; + + // toascii(c) -> c & 0x7f + return B.CreateAnd(CI->getArgOperand(0), + ConstantInt::get(CI->getType(),0x7F)); + } +}; + +//===----------------------------------------------------------------------===// +// Formatting and IO Library Call Optimizations +//===----------------------------------------------------------------------===// + +struct PrintFOpt : public LibCallOptimization { + Value *optimizeFixedFormatString(Function *Callee, CallInst *CI, + IRBuilder<> &B) { + // Check for a fixed format string. + StringRef FormatStr; + if (!getConstantStringInfo(CI->getArgOperand(0), FormatStr)) + return 0; + + // Empty format string -> noop. + if (FormatStr.empty()) // Tolerate printf's declared void. + return CI->use_empty() ? (Value*)CI : + ConstantInt::get(CI->getType(), 0); + + // Do not do any of the following transformations if the printf return value + // is used, in general the printf return value is not compatible with either + // putchar() or puts(). + if (!CI->use_empty()) + return 0; + + // printf("x") -> putchar('x'), even for '%'. + if (FormatStr.size() == 1) { + Value *Res = EmitPutChar(B.getInt32(FormatStr[0]), B, TD, TLI); + if (CI->use_empty() || !Res) return Res; + return B.CreateIntCast(Res, CI->getType(), true); + } + + // printf("foo\n") --> puts("foo") + if (FormatStr[FormatStr.size()-1] == '\n' && + FormatStr.find('%') == std::string::npos) { // no format characters. + // Create a string literal with no \n on it. We expect the constant merge + // pass to be run after this pass, to merge duplicate strings. + FormatStr = FormatStr.drop_back(); + Value *GV = B.CreateGlobalString(FormatStr, "str"); + Value *NewCI = EmitPutS(GV, B, TD, TLI); + return (CI->use_empty() || !NewCI) ? + NewCI : + ConstantInt::get(CI->getType(), FormatStr.size()+1); + } + + // Optimize specific format strings. + // printf("%c", chr) --> putchar(chr) + if (FormatStr == "%c" && CI->getNumArgOperands() > 1 && + CI->getArgOperand(1)->getType()->isIntegerTy()) { + Value *Res = EmitPutChar(CI->getArgOperand(1), B, TD, TLI); + + if (CI->use_empty() || !Res) return Res; + return B.CreateIntCast(Res, CI->getType(), true); + } + + // printf("%s\n", str) --> puts(str) + if (FormatStr == "%s\n" && CI->getNumArgOperands() > 1 && + CI->getArgOperand(1)->getType()->isPointerTy()) { + return EmitPutS(CI->getArgOperand(1), B, TD, TLI); + } + return 0; + } + + virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { + // Require one fixed pointer argument and an integer/void result. + FunctionType *FT = Callee->getFunctionType(); + if (FT->getNumParams() < 1 || !FT->getParamType(0)->isPointerTy() || + !(FT->getReturnType()->isIntegerTy() || + FT->getReturnType()->isVoidTy())) + return 0; + + if (Value *V = optimizeFixedFormatString(Callee, CI, B)) { + return V; + } + + // printf(format, ...) -> iprintf(format, ...) if no floating point + // arguments. + if (TLI->has(LibFunc::iprintf) && !callHasFloatingPointArgument(CI)) { + Module *M = B.GetInsertBlock()->getParent()->getParent(); + Constant *IPrintFFn = + M->getOrInsertFunction("iprintf", FT, Callee->getAttributes()); + CallInst *New = cast<CallInst>(CI->clone()); + New->setCalledFunction(IPrintFFn); + B.Insert(New); + return New; + } + return 0; + } +}; + +struct SPrintFOpt : public LibCallOptimization { + Value *OptimizeFixedFormatString(Function *Callee, CallInst *CI, + IRBuilder<> &B) { + // Check for a fixed format string. + StringRef FormatStr; + if (!getConstantStringInfo(CI->getArgOperand(1), FormatStr)) + return 0; + + // If we just have a format string (nothing else crazy) transform it. + if (CI->getNumArgOperands() == 2) { + // Make sure there's no % in the constant array. We could try to handle + // %% -> % in the future if we cared. + for (unsigned i = 0, e = FormatStr.size(); i != e; ++i) + if (FormatStr[i] == '%') + return 0; // we found a format specifier, bail out. + + // These optimizations require DataLayout. + if (!TD) return 0; + + // sprintf(str, fmt) -> llvm.memcpy(str, fmt, strlen(fmt)+1, 1) + B.CreateMemCpy(CI->getArgOperand(0), CI->getArgOperand(1), + ConstantInt::get(TD->getIntPtrType(*Context), // Copy the + FormatStr.size() + 1), 1); // nul byte. + return ConstantInt::get(CI->getType(), FormatStr.size()); + } + + // The remaining optimizations require the format string to be "%s" or "%c" + // and have an extra operand. + if (FormatStr.size() != 2 || FormatStr[0] != '%' || + CI->getNumArgOperands() < 3) + return 0; + + // Decode the second character of the format string. + if (FormatStr[1] == 'c') { + // sprintf(dst, "%c", chr) --> *(i8*)dst = chr; *((i8*)dst+1) = 0 + if (!CI->getArgOperand(2)->getType()->isIntegerTy()) return 0; + Value *V = B.CreateTrunc(CI->getArgOperand(2), B.getInt8Ty(), "char"); + Value *Ptr = CastToCStr(CI->getArgOperand(0), B); + B.CreateStore(V, Ptr); + Ptr = B.CreateGEP(Ptr, B.getInt32(1), "nul"); + B.CreateStore(B.getInt8(0), Ptr); + + return ConstantInt::get(CI->getType(), 1); + } + + if (FormatStr[1] == 's') { + // These optimizations require DataLayout. + if (!TD) return 0; + + // sprintf(dest, "%s", str) -> llvm.memcpy(dest, str, strlen(str)+1, 1) + if (!CI->getArgOperand(2)->getType()->isPointerTy()) return 0; + + Value *Len = EmitStrLen(CI->getArgOperand(2), B, TD, TLI); + if (!Len) + return 0; + Value *IncLen = B.CreateAdd(Len, + ConstantInt::get(Len->getType(), 1), + "leninc"); + B.CreateMemCpy(CI->getArgOperand(0), CI->getArgOperand(2), IncLen, 1); + + // The sprintf result is the unincremented number of bytes in the string. + return B.CreateIntCast(Len, CI->getType(), false); + } + return 0; + } + + virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { + // Require two fixed pointer arguments and an integer result. + FunctionType *FT = Callee->getFunctionType(); + if (FT->getNumParams() != 2 || !FT->getParamType(0)->isPointerTy() || + !FT->getParamType(1)->isPointerTy() || + !FT->getReturnType()->isIntegerTy()) + return 0; + + if (Value *V = OptimizeFixedFormatString(Callee, CI, B)) { + return V; + } + + // sprintf(str, format, ...) -> siprintf(str, format, ...) if no floating + // point arguments. + if (TLI->has(LibFunc::siprintf) && !callHasFloatingPointArgument(CI)) { + Module *M = B.GetInsertBlock()->getParent()->getParent(); + Constant *SIPrintFFn = + M->getOrInsertFunction("siprintf", FT, Callee->getAttributes()); + CallInst *New = cast<CallInst>(CI->clone()); + New->setCalledFunction(SIPrintFFn); + B.Insert(New); + return New; + } + return 0; + } +}; + +struct FPrintFOpt : public LibCallOptimization { + Value *optimizeFixedFormatString(Function *Callee, CallInst *CI, + IRBuilder<> &B) { + // All the optimizations depend on the format string. + StringRef FormatStr; + if (!getConstantStringInfo(CI->getArgOperand(1), FormatStr)) + return 0; + + // fprintf(F, "foo") --> fwrite("foo", 3, 1, F) + if (CI->getNumArgOperands() == 2) { + for (unsigned i = 0, e = FormatStr.size(); i != e; ++i) + if (FormatStr[i] == '%') // Could handle %% -> % if we cared. + return 0; // We found a format specifier. + + // These optimizations require DataLayout. + if (!TD) return 0; + + Value *NewCI = EmitFWrite(CI->getArgOperand(1), + ConstantInt::get(TD->getIntPtrType(*Context), + FormatStr.size()), + CI->getArgOperand(0), B, TD, TLI); + return NewCI ? ConstantInt::get(CI->getType(), FormatStr.size()) : 0; + } + + // The remaining optimizations require the format string to be "%s" or "%c" + // and have an extra operand. + if (FormatStr.size() != 2 || FormatStr[0] != '%' || + CI->getNumArgOperands() < 3) + return 0; + + // Decode the second character of the format string. + if (FormatStr[1] == 'c') { + // fprintf(F, "%c", chr) --> fputc(chr, F) + if (!CI->getArgOperand(2)->getType()->isIntegerTy()) return 0; + Value *NewCI = EmitFPutC(CI->getArgOperand(2), CI->getArgOperand(0), B, + TD, TLI); + return NewCI ? ConstantInt::get(CI->getType(), 1) : 0; + } + + if (FormatStr[1] == 's') { + // fprintf(F, "%s", str) --> fputs(str, F) + if (!CI->getArgOperand(2)->getType()->isPointerTy() || !CI->use_empty()) + return 0; + return EmitFPutS(CI->getArgOperand(2), CI->getArgOperand(0), B, TD, TLI); + } + return 0; + } + + virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { + // Require two fixed paramters as pointers and integer result. + FunctionType *FT = Callee->getFunctionType(); + if (FT->getNumParams() != 2 || !FT->getParamType(0)->isPointerTy() || + !FT->getParamType(1)->isPointerTy() || + !FT->getReturnType()->isIntegerTy()) + return 0; + + if (Value *V = optimizeFixedFormatString(Callee, CI, B)) { + return V; + } + + // fprintf(stream, format, ...) -> fiprintf(stream, format, ...) if no + // floating point arguments. + if (TLI->has(LibFunc::fiprintf) && !callHasFloatingPointArgument(CI)) { + Module *M = B.GetInsertBlock()->getParent()->getParent(); + Constant *FIPrintFFn = + M->getOrInsertFunction("fiprintf", FT, Callee->getAttributes()); + CallInst *New = cast<CallInst>(CI->clone()); + New->setCalledFunction(FIPrintFFn); + B.Insert(New); + return New; + } + return 0; + } +}; + +struct FWriteOpt : public LibCallOptimization { + virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { + // Require a pointer, an integer, an integer, a pointer, returning integer. + FunctionType *FT = Callee->getFunctionType(); + if (FT->getNumParams() != 4 || !FT->getParamType(0)->isPointerTy() || + !FT->getParamType(1)->isIntegerTy() || + !FT->getParamType(2)->isIntegerTy() || + !FT->getParamType(3)->isPointerTy() || + !FT->getReturnType()->isIntegerTy()) + return 0; + + // Get the element size and count. + ConstantInt *SizeC = dyn_cast<ConstantInt>(CI->getArgOperand(1)); + ConstantInt *CountC = dyn_cast<ConstantInt>(CI->getArgOperand(2)); + if (!SizeC || !CountC) return 0; + uint64_t Bytes = SizeC->getZExtValue()*CountC->getZExtValue(); + + // If this is writing zero records, remove the call (it's a noop). + if (Bytes == 0) + return ConstantInt::get(CI->getType(), 0); + + // If this is writing one byte, turn it into fputc. + // This optimisation is only valid, if the return value is unused. + if (Bytes == 1 && CI->use_empty()) { // fwrite(S,1,1,F) -> fputc(S[0],F) + Value *Char = B.CreateLoad(CastToCStr(CI->getArgOperand(0), B), "char"); + Value *NewCI = EmitFPutC(Char, CI->getArgOperand(3), B, TD, TLI); + return NewCI ? ConstantInt::get(CI->getType(), 1) : 0; + } + + return 0; + } +}; + +struct FPutsOpt : public LibCallOptimization { + virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { + // These optimizations require DataLayout. + if (!TD) return 0; + + // Require two pointers. Also, we can't optimize if return value is used. + FunctionType *FT = Callee->getFunctionType(); + if (FT->getNumParams() != 2 || !FT->getParamType(0)->isPointerTy() || + !FT->getParamType(1)->isPointerTy() || + !CI->use_empty()) + return 0; + + // fputs(s,F) --> fwrite(s,1,strlen(s),F) + uint64_t Len = GetStringLength(CI->getArgOperand(0)); + if (!Len) return 0; + // Known to have no uses (see above). + return EmitFWrite(CI->getArgOperand(0), + ConstantInt::get(TD->getIntPtrType(*Context), Len-1), + CI->getArgOperand(1), B, TD, TLI); + } +}; + +struct PutsOpt : public LibCallOptimization { + virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { + // Require one fixed pointer argument and an integer/void result. + FunctionType *FT = Callee->getFunctionType(); + if (FT->getNumParams() < 1 || !FT->getParamType(0)->isPointerTy() || + !(FT->getReturnType()->isIntegerTy() || + FT->getReturnType()->isVoidTy())) + return 0; + + // Check for a constant string. + StringRef Str; + if (!getConstantStringInfo(CI->getArgOperand(0), Str)) + return 0; + + if (Str.empty() && CI->use_empty()) { + // puts("") -> putchar('\n') + Value *Res = EmitPutChar(B.getInt32('\n'), B, TD, TLI); + if (CI->use_empty() || !Res) return Res; + return B.CreateIntCast(Res, CI->getType(), true); + } + + return 0; + } +}; + +} // End anonymous namespace. + +namespace llvm { + +class LibCallSimplifierImpl { + const DataLayout *TD; + const TargetLibraryInfo *TLI; + const LibCallSimplifier *LCS; + bool UnsafeFPShrink; + StringMap<LibCallOptimization*> Optimizations; + + // Fortified library call optimizations. + MemCpyChkOpt MemCpyChk; + MemMoveChkOpt MemMoveChk; + MemSetChkOpt MemSetChk; + StrCpyChkOpt StrCpyChk; + StpCpyChkOpt StpCpyChk; + StrNCpyChkOpt StrNCpyChk; + + // String library call optimizations. + StrCatOpt StrCat; + StrNCatOpt StrNCat; + StrChrOpt StrChr; + StrRChrOpt StrRChr; + StrCmpOpt StrCmp; + StrNCmpOpt StrNCmp; + StrCpyOpt StrCpy; + StpCpyOpt StpCpy; + StrNCpyOpt StrNCpy; + StrLenOpt StrLen; + StrPBrkOpt StrPBrk; + StrToOpt StrTo; + StrSpnOpt StrSpn; + StrCSpnOpt StrCSpn; + StrStrOpt StrStr; + + // Memory library call optimizations. + MemCmpOpt MemCmp; + MemCpyOpt MemCpy; + MemMoveOpt MemMove; + MemSetOpt MemSet; + + // Math library call optimizations. + UnaryDoubleFPOpt UnaryDoubleFP, UnsafeUnaryDoubleFP; + CosOpt Cos; PowOpt Pow; Exp2Opt Exp2; + + // Integer library call optimizations. + FFSOpt FFS; + AbsOpt Abs; + IsDigitOpt IsDigit; + IsAsciiOpt IsAscii; + ToAsciiOpt ToAscii; + + // Formatting and IO library call optimizations. + PrintFOpt PrintF; + SPrintFOpt SPrintF; + FPrintFOpt FPrintF; + FWriteOpt FWrite; + FPutsOpt FPuts; + PutsOpt Puts; + + void initOptimizations(); + void addOpt(LibFunc::Func F, LibCallOptimization* Opt); + void addOpt(LibFunc::Func F1, LibFunc::Func F2, LibCallOptimization* Opt); +public: + LibCallSimplifierImpl(const DataLayout *TD, const TargetLibraryInfo *TLI, + const LibCallSimplifier *LCS, + bool UnsafeFPShrink = false) + : UnaryDoubleFP(false), UnsafeUnaryDoubleFP(true), + Cos(UnsafeFPShrink), Pow(UnsafeFPShrink), Exp2(UnsafeFPShrink) { + this->TD = TD; + this->TLI = TLI; + this->LCS = LCS; + this->UnsafeFPShrink = UnsafeFPShrink; + } + + Value *optimizeCall(CallInst *CI); +}; + +void LibCallSimplifierImpl::initOptimizations() { + // Fortified library call optimizations. + Optimizations["__memcpy_chk"] = &MemCpyChk; + Optimizations["__memmove_chk"] = &MemMoveChk; + Optimizations["__memset_chk"] = &MemSetChk; + Optimizations["__strcpy_chk"] = &StrCpyChk; + Optimizations["__stpcpy_chk"] = &StpCpyChk; + Optimizations["__strncpy_chk"] = &StrNCpyChk; + Optimizations["__stpncpy_chk"] = &StrNCpyChk; + + // String library call optimizations. + addOpt(LibFunc::strcat, &StrCat); + addOpt(LibFunc::strncat, &StrNCat); + addOpt(LibFunc::strchr, &StrChr); + addOpt(LibFunc::strrchr, &StrRChr); + addOpt(LibFunc::strcmp, &StrCmp); + addOpt(LibFunc::strncmp, &StrNCmp); + addOpt(LibFunc::strcpy, &StrCpy); + addOpt(LibFunc::stpcpy, &StpCpy); + addOpt(LibFunc::strncpy, &StrNCpy); + addOpt(LibFunc::strlen, &StrLen); + addOpt(LibFunc::strpbrk, &StrPBrk); + addOpt(LibFunc::strtol, &StrTo); + addOpt(LibFunc::strtod, &StrTo); + addOpt(LibFunc::strtof, &StrTo); + addOpt(LibFunc::strtoul, &StrTo); + addOpt(LibFunc::strtoll, &StrTo); + addOpt(LibFunc::strtold, &StrTo); + addOpt(LibFunc::strtoull, &StrTo); + addOpt(LibFunc::strspn, &StrSpn); + addOpt(LibFunc::strcspn, &StrCSpn); + addOpt(LibFunc::strstr, &StrStr); + + // Memory library call optimizations. + addOpt(LibFunc::memcmp, &MemCmp); + addOpt(LibFunc::memcpy, &MemCpy); + addOpt(LibFunc::memmove, &MemMove); + addOpt(LibFunc::memset, &MemSet); + + // Math library call optimizations. + addOpt(LibFunc::ceil, LibFunc::ceilf, &UnaryDoubleFP); + addOpt(LibFunc::fabs, LibFunc::fabsf, &UnaryDoubleFP); + addOpt(LibFunc::floor, LibFunc::floorf, &UnaryDoubleFP); + addOpt(LibFunc::rint, LibFunc::rintf, &UnaryDoubleFP); + addOpt(LibFunc::round, LibFunc::roundf, &UnaryDoubleFP); + addOpt(LibFunc::nearbyint, LibFunc::nearbyintf, &UnaryDoubleFP); + addOpt(LibFunc::trunc, LibFunc::truncf, &UnaryDoubleFP); + + if(UnsafeFPShrink) { + addOpt(LibFunc::acos, LibFunc::acosf, &UnsafeUnaryDoubleFP); + addOpt(LibFunc::acosh, LibFunc::acoshf, &UnsafeUnaryDoubleFP); + addOpt(LibFunc::asin, LibFunc::asinf, &UnsafeUnaryDoubleFP); + addOpt(LibFunc::asinh, LibFunc::asinhf, &UnsafeUnaryDoubleFP); + addOpt(LibFunc::atan, LibFunc::atanf, &UnsafeUnaryDoubleFP); + addOpt(LibFunc::atanh, LibFunc::atanhf, &UnsafeUnaryDoubleFP); + addOpt(LibFunc::cbrt, LibFunc::cbrtf, &UnsafeUnaryDoubleFP); + addOpt(LibFunc::cosh, LibFunc::coshf, &UnsafeUnaryDoubleFP); + addOpt(LibFunc::exp, LibFunc::expf, &UnsafeUnaryDoubleFP); + addOpt(LibFunc::exp10, LibFunc::exp10f, &UnsafeUnaryDoubleFP); + addOpt(LibFunc::expm1, LibFunc::expm1f, &UnsafeUnaryDoubleFP); + addOpt(LibFunc::log, LibFunc::logf, &UnsafeUnaryDoubleFP); + addOpt(LibFunc::log10, LibFunc::log10f, &UnsafeUnaryDoubleFP); + addOpt(LibFunc::log1p, LibFunc::log1pf, &UnsafeUnaryDoubleFP); + addOpt(LibFunc::log2, LibFunc::log2f, &UnsafeUnaryDoubleFP); + addOpt(LibFunc::logb, LibFunc::logbf, &UnsafeUnaryDoubleFP); + addOpt(LibFunc::sin, LibFunc::sinf, &UnsafeUnaryDoubleFP); + addOpt(LibFunc::sinh, LibFunc::sinhf, &UnsafeUnaryDoubleFP); + addOpt(LibFunc::sqrt, LibFunc::sqrtf, &UnsafeUnaryDoubleFP); + addOpt(LibFunc::tan, LibFunc::tanf, &UnsafeUnaryDoubleFP); + addOpt(LibFunc::tanh, LibFunc::tanhf, &UnsafeUnaryDoubleFP); + } + + addOpt(LibFunc::cosf, &Cos); + addOpt(LibFunc::cos, &Cos); + addOpt(LibFunc::cosl, &Cos); + addOpt(LibFunc::powf, &Pow); + addOpt(LibFunc::pow, &Pow); + addOpt(LibFunc::powl, &Pow); + Optimizations["llvm.pow.f32"] = &Pow; + Optimizations["llvm.pow.f64"] = &Pow; + Optimizations["llvm.pow.f80"] = &Pow; + Optimizations["llvm.pow.f128"] = &Pow; + Optimizations["llvm.pow.ppcf128"] = &Pow; + addOpt(LibFunc::exp2l, &Exp2); + addOpt(LibFunc::exp2, &Exp2); + addOpt(LibFunc::exp2f, &Exp2); + Optimizations["llvm.exp2.ppcf128"] = &Exp2; + Optimizations["llvm.exp2.f128"] = &Exp2; + Optimizations["llvm.exp2.f80"] = &Exp2; + Optimizations["llvm.exp2.f64"] = &Exp2; + Optimizations["llvm.exp2.f32"] = &Exp2; + + // Integer library call optimizations. + addOpt(LibFunc::ffs, &FFS); + addOpt(LibFunc::ffsl, &FFS); + addOpt(LibFunc::ffsll, &FFS); + addOpt(LibFunc::abs, &Abs); + addOpt(LibFunc::labs, &Abs); + addOpt(LibFunc::llabs, &Abs); + addOpt(LibFunc::isdigit, &IsDigit); + addOpt(LibFunc::isascii, &IsAscii); + addOpt(LibFunc::toascii, &ToAscii); + + // Formatting and IO library call optimizations. + addOpt(LibFunc::printf, &PrintF); + addOpt(LibFunc::sprintf, &SPrintF); + addOpt(LibFunc::fprintf, &FPrintF); + addOpt(LibFunc::fwrite, &FWrite); + addOpt(LibFunc::fputs, &FPuts); + addOpt(LibFunc::puts, &Puts); +} + +Value *LibCallSimplifierImpl::optimizeCall(CallInst *CI) { + if (Optimizations.empty()) + initOptimizations(); + + Function *Callee = CI->getCalledFunction(); + LibCallOptimization *LCO = Optimizations.lookup(Callee->getName()); + if (LCO) { + IRBuilder<> Builder(CI); + return LCO->optimizeCall(CI, TD, TLI, LCS, Builder); + } + return 0; +} + +void LibCallSimplifierImpl::addOpt(LibFunc::Func F, LibCallOptimization* Opt) { + if (TLI->has(F)) + Optimizations[TLI->getName(F)] = Opt; +} + +void LibCallSimplifierImpl::addOpt(LibFunc::Func F1, LibFunc::Func F2, + LibCallOptimization* Opt) { + if (TLI->has(F1) && TLI->has(F2)) + Optimizations[TLI->getName(F1)] = Opt; +} + +LibCallSimplifier::LibCallSimplifier(const DataLayout *TD, + const TargetLibraryInfo *TLI, + bool UnsafeFPShrink) { + Impl = new LibCallSimplifierImpl(TD, TLI, this, UnsafeFPShrink); +} + +LibCallSimplifier::~LibCallSimplifier() { + delete Impl; +} + +Value *LibCallSimplifier::optimizeCall(CallInst *CI) { + return Impl->optimizeCall(CI); +} + +void LibCallSimplifier::replaceAllUsesWith(Instruction *I, Value *With) const { + I->replaceAllUsesWith(With); + I->eraseFromParent(); +} + +} diff --git a/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp b/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp index b1cad06..560f581 100644 --- a/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp +++ b/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp @@ -15,12 +15,12 @@ //===----------------------------------------------------------------------===// #include "llvm/Transforms/Utils/UnifyFunctionExitNodes.h" -#include "llvm/Transforms/Scalar.h" -#include "llvm/BasicBlock.h" -#include "llvm/Function.h" -#include "llvm/Instructions.h" -#include "llvm/Type.h" #include "llvm/ADT/StringExtras.h" +#include "llvm/IR/BasicBlock.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/Type.h" +#include "llvm/Transforms/Scalar.h" using namespace llvm; char UnifyFunctionExitNodes::ID = 0; diff --git a/lib/Transforms/Utils/Utils.cpp b/lib/Transforms/Utils/Utils.cpp index 24e8c8f..5812d46 100644 --- a/lib/Transforms/Utils/Utils.cpp +++ b/lib/Transforms/Utils/Utils.cpp @@ -29,6 +29,7 @@ void llvm::initializeTransformUtils(PassRegistry &Registry) { initializePromotePassPass(Registry); initializeUnifyFunctionExitNodesPass(Registry); initializeInstSimplifierPass(Registry); + initializeMetaRenamerPass(Registry); } /// LLVMInitializeTransformUtils - C binding for initializeTransformUtilsPasses. diff --git a/lib/Transforms/Utils/ValueMapper.cpp b/lib/Transforms/Utils/ValueMapper.cpp index fc2538d..a5e1643 100644 --- a/lib/Transforms/Utils/ValueMapper.cpp +++ b/lib/Transforms/Utils/ValueMapper.cpp @@ -13,15 +13,15 @@ //===----------------------------------------------------------------------===// #include "llvm/Transforms/Utils/ValueMapper.h" -#include "llvm/Constants.h" -#include "llvm/Function.h" -#include "llvm/InlineAsm.h" -#include "llvm/Instructions.h" -#include "llvm/Metadata.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/InlineAsm.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/Metadata.h" using namespace llvm; // Out of line method to get vtable etc for class. -void ValueMapTypeRemapper::Anchor() {} +void ValueMapTypeRemapper::anchor() {} Value *llvm::MapValue(const Value *V, ValueToValueMapTy &VM, RemapFlags Flags, ValueMapTypeRemapper *TypeMapper) { |
