diff options
Diffstat (limited to 'lib')
305 files changed, 7066 insertions, 5924 deletions
diff --git a/lib/Analysis/CMakeLists.txt b/lib/Analysis/CMakeLists.txt index cb1e1eb..2e3ec8b 100644 --- a/lib/Analysis/CMakeLists.txt +++ b/lib/Analysis/CMakeLists.txt @@ -10,6 +10,7 @@ add_llvm_library(LLVMAnalysis BranchProbabilityInfo.cpp CFGPrinter.cpp CaptureTracking.cpp + CodeMetrics.cpp ConstantFolding.cpp DIBuilder.cpp DbgInfoPrinter.cpp diff --git a/lib/Analysis/CodeMetrics.cpp b/lib/Analysis/CodeMetrics.cpp new file mode 100644 index 0000000..6c93f78 --- /dev/null +++ b/lib/Analysis/CodeMetrics.cpp @@ -0,0 +1,176 @@ +//===- CodeMetrics.cpp - Code cost measurements ---------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements code cost measurement utilities. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Analysis/CodeMetrics.h" +#include "llvm/Function.h" +#include "llvm/Support/CallSite.h" +#include "llvm/IntrinsicInst.h" +#include "llvm/Target/TargetData.h" + +using namespace llvm; + +/// callIsSmall - If a call is likely to lower to a single target instruction, +/// or is otherwise deemed small return true. +/// TODO: Perhaps calls like memcpy, strcpy, etc? +bool llvm::callIsSmall(const Function *F) { + if (!F) return false; + + if (F->hasLocalLinkage()) return false; + + if (!F->hasName()) return false; + + StringRef Name = F->getName(); + + // These will all likely lower to a single selection DAG node. + if (Name == "copysign" || Name == "copysignf" || Name == "copysignl" || + Name == "fabs" || Name == "fabsf" || Name == "fabsl" || + Name == "sin" || Name == "sinf" || Name == "sinl" || + Name == "cos" || Name == "cosf" || Name == "cosl" || + Name == "sqrt" || Name == "sqrtf" || Name == "sqrtl" ) + return true; + + // These are all likely to be optimized into something smaller. + if (Name == "pow" || Name == "powf" || Name == "powl" || + Name == "exp2" || Name == "exp2l" || Name == "exp2f" || + Name == "floor" || Name == "floorf" || Name == "ceil" || + Name == "round" || Name == "ffs" || Name == "ffsl" || + Name == "abs" || Name == "labs" || Name == "llabs") + return true; + + return false; +} + +/// analyzeBasicBlock - Fill in the current structure with information gleaned +/// from the specified block. +void CodeMetrics::analyzeBasicBlock(const BasicBlock *BB, + const TargetData *TD) { + ++NumBlocks; + unsigned NumInstsBeforeThisBB = NumInsts; + for (BasicBlock::const_iterator II = BB->begin(), E = BB->end(); + II != E; ++II) { + if (isa<PHINode>(II)) continue; // PHI nodes don't count. + + // Special handling for calls. + if (isa<CallInst>(II) || isa<InvokeInst>(II)) { + if (const IntrinsicInst *IntrinsicI = dyn_cast<IntrinsicInst>(II)) { + switch (IntrinsicI->getIntrinsicID()) { + default: break; + case Intrinsic::dbg_declare: + case Intrinsic::dbg_value: + case Intrinsic::invariant_start: + case Intrinsic::invariant_end: + case Intrinsic::lifetime_start: + case Intrinsic::lifetime_end: + case Intrinsic::objectsize: + case Intrinsic::ptr_annotation: + case Intrinsic::var_annotation: + // These intrinsics don't count as size. + continue; + } + } + + ImmutableCallSite CS(cast<Instruction>(II)); + + if (const Function *F = CS.getCalledFunction()) { + // If a function is both internal and has a single use, then it is + // extremely likely to get inlined in the future (it was probably + // exposed by an interleaved devirtualization pass). + if (!CS.isNoInline() && F->hasInternalLinkage() && F->hasOneUse()) + ++NumInlineCandidates; + + // If this call is to function itself, then the function is recursive. + // Inlining it into other functions is a bad idea, because this is + // basically just a form of loop peeling, and our metrics aren't useful + // for that case. + if (F == BB->getParent()) + isRecursive = true; + } + + if (!isa<IntrinsicInst>(II) && !callIsSmall(CS.getCalledFunction())) { + // Each argument to a call takes on average one instruction to set up. + NumInsts += CS.arg_size(); + + // We don't want inline asm to count as a call - that would prevent loop + // unrolling. The argument setup cost is still real, though. + if (!isa<InlineAsm>(CS.getCalledValue())) + ++NumCalls; + } + } + + if (const AllocaInst *AI = dyn_cast<AllocaInst>(II)) { + if (!AI->isStaticAlloca()) + this->usesDynamicAlloca = true; + } + + if (isa<ExtractElementInst>(II) || II->getType()->isVectorTy()) + ++NumVectorInsts; + + if (const CastInst *CI = dyn_cast<CastInst>(II)) { + // Noop casts, including ptr <-> int, don't count. + if (CI->isLosslessCast() || isa<IntToPtrInst>(CI) || + isa<PtrToIntInst>(CI)) + continue; + // trunc to a native type is free (assuming the target has compare and + // shift-right of the same width). + if (isa<TruncInst>(CI) && TD && + TD->isLegalInteger(TD->getTypeSizeInBits(CI->getType()))) + continue; + // Result of a cmp instruction is often extended (to be used by other + // cmp instructions, logical or return instructions). These are usually + // nop on most sane targets. + if (isa<CmpInst>(CI->getOperand(0))) + continue; + } else if (const GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(II)){ + // If a GEP has all constant indices, it will probably be folded with + // a load/store. + if (GEPI->hasAllConstantIndices()) + continue; + } + + ++NumInsts; + } + + if (isa<ReturnInst>(BB->getTerminator())) + ++NumRets; + + // We never want to inline functions that contain an indirectbr. This is + // incorrect because all the blockaddress's (in static global initializers + // for example) would be referring to the original function, and this indirect + // jump would jump from the inlined copy of the function into the original + // function which is extremely undefined behavior. + // FIXME: This logic isn't really right; we can safely inline functions + // with indirectbr's as long as no other function or global references the + // blockaddress of a block within the current function. And as a QOI issue, + // if someone is using a blockaddress without an indirectbr, and that + // reference somehow ends up in another function or global, we probably + // don't want to inline this function. + if (isa<IndirectBrInst>(BB->getTerminator())) + containsIndirectBr = true; + + // Remember NumInsts for this BB. + NumBBInsts[BB] = NumInsts - NumInstsBeforeThisBB; +} + +void CodeMetrics::analyzeFunction(Function *F, const TargetData *TD) { + // If this function contains a call that "returns twice" (e.g., setjmp or + // _setjmp) and it isn't marked with "returns twice" itself, never inline it. + // This is a hack because we depend on the user marking their local variables + // as volatile if they are live across a setjmp call, and they probably + // won't do this in callers. + exposesReturnsTwice = F->callsFunctionThatReturnsTwice() && + !F->hasFnAttr(Attribute::ReturnsTwice); + + // Look at the size of the callee. + for (Function::const_iterator BB = F->begin(), E = F->end(); BB != E; ++BB) + analyzeBasicBlock(&*BB, TD); +} diff --git a/lib/Analysis/DebugInfo.cpp b/lib/Analysis/DebugInfo.cpp index 585a087..e30c0a9 100644 --- a/lib/Analysis/DebugInfo.cpp +++ b/lib/Analysis/DebugInfo.cpp @@ -68,7 +68,7 @@ uint64_t DIDescriptor::getUInt64Field(unsigned Elt) const { return 0; if (Elt < DbgNode->getNumOperands()) - if (ConstantInt *CI = dyn_cast<ConstantInt>(DbgNode->getOperand(Elt))) + if (ConstantInt *CI = dyn_cast_or_null<ConstantInt>(DbgNode->getOperand(Elt))) return CI->getZExtValue(); return 0; diff --git a/lib/Analysis/IVUsers.cpp b/lib/Analysis/IVUsers.cpp index cad22f8..463584d 100644 --- a/lib/Analysis/IVUsers.cpp +++ b/lib/Analysis/IVUsers.cpp @@ -79,10 +79,39 @@ static bool isInteresting(const SCEV *S, const Instruction *I, const Loop *L, return false; } +/// Return true if all loop headers that dominate this block are in simplified +/// form. +static bool isSimplifiedLoopNest(BasicBlock *BB, const DominatorTree *DT, + const LoopInfo *LI, + SmallPtrSet<Loop*,16> &SimpleLoopNests) { + Loop *NearestLoop = 0; + for (DomTreeNode *Rung = DT->getNode(BB); + Rung; Rung = Rung->getIDom()) { + BasicBlock *DomBB = Rung->getBlock(); + Loop *DomLoop = LI->getLoopFor(DomBB); + if (DomLoop && DomLoop->getHeader() == DomBB) { + // If the domtree walk reaches a loop with no preheader, return false. + if (!DomLoop->isLoopSimplifyForm()) + return false; + // If we have already checked this loop nest, stop checking. + if (SimpleLoopNests.count(DomLoop)) + break; + // If we have not already checked this loop nest, remember the loop + // header nearest to BB. The nearest loop may not contain BB. + if (!NearestLoop) + NearestLoop = DomLoop; + } + } + if (NearestLoop) + SimpleLoopNests.insert(NearestLoop); + return true; +} + /// AddUsersIfInteresting - Inspect the specified instruction. If it is a /// reducible SCEV, recursively add its users to the IVUsesByStride set and /// return true. Otherwise, return false. -bool IVUsers::AddUsersIfInteresting(Instruction *I) { +bool IVUsers::AddUsersIfInteresting(Instruction *I, + SmallPtrSet<Loop*,16> &SimpleLoopNests) { // Add this IV user to the Processed set before returning false to ensure that // all IV users are members of the set. See IVUsers::isIVUserOrOperand. if (!Processed.insert(I)) @@ -117,6 +146,18 @@ bool IVUsers::AddUsersIfInteresting(Instruction *I) { if (isa<PHINode>(User) && Processed.count(User)) continue; + // Only consider IVUsers that are dominated by simplified loop + // headers. Otherwise, SCEVExpander will crash. + BasicBlock *UseBB = User->getParent(); + // A phi's use is live out of its predecessor block. + if (PHINode *PHI = dyn_cast<PHINode>(User)) { + unsigned OperandNo = UI.getOperandNo(); + unsigned ValNo = PHINode::getIncomingValueNumForOperand(OperandNo); + UseBB = PHI->getIncomingBlock(ValNo); + } + if (!isSimplifiedLoopNest(UseBB, DT, LI, SimpleLoopNests)) + return false; + // Descend recursively, but not into PHI nodes outside the current loop. // It's important to see the entire expression outside the loop to get // choices that depend on addressing mode use right, although we won't @@ -126,12 +167,13 @@ bool IVUsers::AddUsersIfInteresting(Instruction *I) { bool AddUserToIVUsers = false; if (LI->getLoopFor(User->getParent()) != L) { if (isa<PHINode>(User) || Processed.count(User) || - !AddUsersIfInteresting(User)) { + !AddUsersIfInteresting(User, SimpleLoopNests)) { DEBUG(dbgs() << "FOUND USER in other loop: " << *User << '\n' << " OF SCEV: " << *ISE << '\n'); AddUserToIVUsers = true; } - } else if (Processed.count(User) || !AddUsersIfInteresting(User)) { + } else if (Processed.count(User) + || !AddUsersIfInteresting(User, SimpleLoopNests)) { DEBUG(dbgs() << "FOUND USER: " << *User << '\n' << " OF SCEV: " << *ISE << '\n'); AddUserToIVUsers = true; @@ -180,11 +222,16 @@ bool IVUsers::runOnLoop(Loop *l, LPPassManager &LPM) { SE = &getAnalysis<ScalarEvolution>(); TD = getAnalysisIfAvailable<TargetData>(); + // SCEVExpander can only handle users that are dominated by simplified loop + // entries. Keep track of all loops that are only dominated by other simple + // loops so we don't traverse the domtree for each user. + SmallPtrSet<Loop*,16> SimpleLoopNests; + // Find all uses of induction variables in this loop, and categorize // them by stride. Start by finding all of the PHI nodes in the header for // this loop. If they are induction variables, inspect their uses. for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ++I) - (void)AddUsersIfInteresting(I); + (void)AddUsersIfInteresting(I, SimpleLoopNests); return false; } diff --git a/lib/Analysis/InlineCost.cpp b/lib/Analysis/InlineCost.cpp index b326ba7..dedbfeb 100644 --- a/lib/Analysis/InlineCost.cpp +++ b/lib/Analysis/InlineCost.cpp @@ -20,165 +20,27 @@ using namespace llvm; -/// callIsSmall - If a call is likely to lower to a single target instruction, -/// or is otherwise deemed small return true. -/// TODO: Perhaps calls like memcpy, strcpy, etc? -bool llvm::callIsSmall(const Function *F) { - if (!F) return false; - - if (F->hasLocalLinkage()) return false; - - if (!F->hasName()) return false; - - StringRef Name = F->getName(); - - // These will all likely lower to a single selection DAG node. - if (Name == "copysign" || Name == "copysignf" || Name == "copysignl" || - Name == "fabs" || Name == "fabsf" || Name == "fabsl" || - Name == "sin" || Name == "sinf" || Name == "sinl" || - Name == "cos" || Name == "cosf" || Name == "cosl" || - Name == "sqrt" || Name == "sqrtf" || Name == "sqrtl" ) - return true; - - // These are all likely to be optimized into something smaller. - if (Name == "pow" || Name == "powf" || Name == "powl" || - Name == "exp2" || Name == "exp2l" || Name == "exp2f" || - Name == "floor" || Name == "floorf" || Name == "ceil" || - Name == "round" || Name == "ffs" || Name == "ffsl" || - Name == "abs" || Name == "labs" || Name == "llabs") - return true; - - return false; -} - -/// analyzeBasicBlock - Fill in the current structure with information gleaned -/// from the specified block. -void CodeMetrics::analyzeBasicBlock(const BasicBlock *BB, - const TargetData *TD) { - ++NumBlocks; - unsigned NumInstsBeforeThisBB = NumInsts; - for (BasicBlock::const_iterator II = BB->begin(), E = BB->end(); - II != E; ++II) { - if (isa<PHINode>(II)) continue; // PHI nodes don't count. - - // Special handling for calls. - if (isa<CallInst>(II) || isa<InvokeInst>(II)) { - if (const IntrinsicInst *IntrinsicI = dyn_cast<IntrinsicInst>(II)) { - switch (IntrinsicI->getIntrinsicID()) { - default: break; - case Intrinsic::dbg_declare: - case Intrinsic::dbg_value: - case Intrinsic::invariant_start: - case Intrinsic::invariant_end: - case Intrinsic::lifetime_start: - case Intrinsic::lifetime_end: - case Intrinsic::objectsize: - case Intrinsic::ptr_annotation: - case Intrinsic::var_annotation: - // These intrinsics don't count as size. - continue; - } - } - - ImmutableCallSite CS(cast<Instruction>(II)); - - if (const Function *F = CS.getCalledFunction()) { - // If a function is both internal and has a single use, then it is - // extremely likely to get inlined in the future (it was probably - // exposed by an interleaved devirtualization pass). - if (!CS.isNoInline() && F->hasInternalLinkage() && F->hasOneUse()) - ++NumInlineCandidates; - - // If this call is to function itself, then the function is recursive. - // Inlining it into other functions is a bad idea, because this is - // basically just a form of loop peeling, and our metrics aren't useful - // for that case. - if (F == BB->getParent()) - isRecursive = true; - } - - if (!isa<IntrinsicInst>(II) && !callIsSmall(CS.getCalledFunction())) { - // Each argument to a call takes on average one instruction to set up. - NumInsts += CS.arg_size(); - - // We don't want inline asm to count as a call - that would prevent loop - // unrolling. The argument setup cost is still real, though. - if (!isa<InlineAsm>(CS.getCalledValue())) - ++NumCalls; - } - } - - if (const AllocaInst *AI = dyn_cast<AllocaInst>(II)) { - if (!AI->isStaticAlloca()) - this->usesDynamicAlloca = true; - } - - if (isa<ExtractElementInst>(II) || II->getType()->isVectorTy()) - ++NumVectorInsts; - - if (const CastInst *CI = dyn_cast<CastInst>(II)) { - // Noop casts, including ptr <-> int, don't count. - if (CI->isLosslessCast() || isa<IntToPtrInst>(CI) || - isa<PtrToIntInst>(CI)) - continue; - // trunc to a native type is free (assuming the target has compare and - // shift-right of the same width). - if (isa<TruncInst>(CI) && TD && - TD->isLegalInteger(TD->getTypeSizeInBits(CI->getType()))) - continue; - // Result of a cmp instruction is often extended (to be used by other - // cmp instructions, logical or return instructions). These are usually - // nop on most sane targets. - if (isa<CmpInst>(CI->getOperand(0))) - continue; - } else if (const GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(II)){ - // If a GEP has all constant indices, it will probably be folded with - // a load/store. - if (GEPI->hasAllConstantIndices()) +unsigned InlineCostAnalyzer::FunctionInfo::countCodeReductionForConstant( + const CodeMetrics &Metrics, Value *V) { + unsigned Reduction = 0; + SmallVector<Value *, 4> Worklist; + Worklist.push_back(V); + do { + Value *V = Worklist.pop_back_val(); + for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI != E;++UI){ + User *U = *UI; + if (isa<BranchInst>(U) || isa<SwitchInst>(U)) { + // We will be able to eliminate all but one of the successors. + const TerminatorInst &TI = cast<TerminatorInst>(*U); + const unsigned NumSucc = TI.getNumSuccessors(); + unsigned Instrs = 0; + for (unsigned I = 0; I != NumSucc; ++I) + Instrs += Metrics.NumBBInsts.lookup(TI.getSuccessor(I)); + // We don't know which blocks will be eliminated, so use the average size. + Reduction += InlineConstants::InstrCost*Instrs*(NumSucc-1)/NumSucc; continue; - } - - ++NumInsts; - } - - if (isa<ReturnInst>(BB->getTerminator())) - ++NumRets; - - // We never want to inline functions that contain an indirectbr. This is - // incorrect because all the blockaddress's (in static global initializers - // for example) would be referring to the original function, and this indirect - // jump would jump from the inlined copy of the function into the original - // function which is extremely undefined behavior. - // FIXME: This logic isn't really right; we can safely inline functions - // with indirectbr's as long as no other function or global references the - // blockaddress of a block within the current function. And as a QOI issue, - // if someone is using a blockaddress without an indirectbr, and that - // reference somehow ends up in another function or global, we probably - // don't want to inline this function. - if (isa<IndirectBrInst>(BB->getTerminator())) - containsIndirectBr = true; - - // Remember NumInsts for this BB. - NumBBInsts[BB] = NumInsts - NumInstsBeforeThisBB; -} + } -// CountCodeReductionForConstant - Figure out an approximation for how many -// instructions will be constant folded if the specified value is constant. -// -unsigned CodeMetrics::CountCodeReductionForConstant(Value *V) { - unsigned Reduction = 0; - for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI != E;++UI){ - User *U = *UI; - if (isa<BranchInst>(U) || isa<SwitchInst>(U)) { - // We will be able to eliminate all but one of the successors. - const TerminatorInst &TI = cast<TerminatorInst>(*U); - const unsigned NumSucc = TI.getNumSuccessors(); - unsigned Instrs = 0; - for (unsigned I = 0; I != NumSucc; ++I) - Instrs += NumBBInsts[TI.getSuccessor(I)]; - // We don't know which blocks will be eliminated, so use the average size. - Reduction += InlineConstants::InstrCost*Instrs*(NumSucc-1)/NumSucc; - } else { // Figure out if this instruction will be removed due to simple constant // propagation. Instruction &Inst = cast<Instruction>(*U); @@ -200,33 +62,186 @@ unsigned CodeMetrics::CountCodeReductionForConstant(Value *V) { AllOperandsConstant = false; break; } + if (!AllOperandsConstant) + continue; - if (AllOperandsConstant) { - // We will get to remove this instruction... - Reduction += InlineConstants::InstrCost; + // We will get to remove this instruction... + Reduction += InlineConstants::InstrCost; - // And any other instructions that use it which become constants - // themselves. - Reduction += CountCodeReductionForConstant(&Inst); + // And any other instructions that use it which become constants + // themselves. + Worklist.push_back(&Inst); + } + } while (!Worklist.empty()); + return Reduction; +} + +static unsigned countCodeReductionForAllocaICmp(const CodeMetrics &Metrics, + ICmpInst *ICI) { + unsigned Reduction = 0; + + // Bail if this is comparing against a non-constant; there is nothing we can + // do there. + if (!isa<Constant>(ICI->getOperand(1))) + return Reduction; + + // An icmp pred (alloca, C) becomes true if the predicate is true when + // equal and false otherwise. + bool Result = ICI->isTrueWhenEqual(); + + SmallVector<Instruction *, 4> Worklist; + Worklist.push_back(ICI); + do { + Instruction *U = Worklist.pop_back_val(); + Reduction += InlineConstants::InstrCost; + for (Value::use_iterator UI = U->use_begin(), UE = U->use_end(); + UI != UE; ++UI) { + Instruction *I = dyn_cast<Instruction>(*UI); + if (!I || I->mayHaveSideEffects()) continue; + if (I->getNumOperands() == 1) + Worklist.push_back(I); + if (BinaryOperator *BO = dyn_cast<BinaryOperator>(I)) { + // If BO produces the same value as U, then the other operand is + // irrelevant and we can put it into the Worklist to continue + // deleting dead instructions. If BO produces the same value as the + // other operand, we can delete BO but that's it. + if (Result == true) { + if (BO->getOpcode() == Instruction::Or) + Worklist.push_back(I); + if (BO->getOpcode() == Instruction::And) + Reduction += InlineConstants::InstrCost; + } else { + if (BO->getOpcode() == Instruction::Or || + BO->getOpcode() == Instruction::Xor) + Reduction += InlineConstants::InstrCost; + if (BO->getOpcode() == Instruction::And) + Worklist.push_back(I); + } + } + if (BranchInst *BI = dyn_cast<BranchInst>(I)) { + BasicBlock *BB = BI->getSuccessor(Result ? 0 : 1); + if (BB->getSinglePredecessor()) + Reduction + += InlineConstants::InstrCost * Metrics.NumBBInsts.lookup(BB); } } - } + } while (!Worklist.empty()); + return Reduction; } -// CountCodeReductionForAlloca - Figure out an approximation of how much smaller -// the function will be if it is inlined into a context where an argument -// becomes an alloca. -// -unsigned CodeMetrics::CountCodeReductionForAlloca(Value *V) { +/// \brief Compute the reduction possible for a given instruction if we are able +/// to SROA an alloca. +/// +/// The reduction for this instruction is added to the SROAReduction output +/// parameter. Returns false if this instruction is expected to defeat SROA in +/// general. +static bool countCodeReductionForSROAInst(Instruction *I, + SmallVectorImpl<Value *> &Worklist, + unsigned &SROAReduction) { + if (LoadInst *LI = dyn_cast<LoadInst>(I)) { + if (!LI->isSimple()) + return false; + SROAReduction += InlineConstants::InstrCost; + return true; + } + + if (StoreInst *SI = dyn_cast<StoreInst>(I)) { + if (!SI->isSimple()) + return false; + SROAReduction += InlineConstants::InstrCost; + return true; + } + + if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(I)) { + // If the GEP has variable indices, we won't be able to do much with it. + if (!GEP->hasAllConstantIndices()) + return false; + // A non-zero GEP will likely become a mask operation after SROA. + if (GEP->hasAllZeroIndices()) + SROAReduction += InlineConstants::InstrCost; + Worklist.push_back(GEP); + return true; + } + + if (BitCastInst *BCI = dyn_cast<BitCastInst>(I)) { + // Track pointer through bitcasts. + Worklist.push_back(BCI); + SROAReduction += InlineConstants::InstrCost; + return true; + } + + // We just look for non-constant operands to ICmp instructions as those will + // defeat SROA. The actual reduction for these happens even without SROA. + if (ICmpInst *ICI = dyn_cast<ICmpInst>(I)) + return isa<Constant>(ICI->getOperand(1)); + + if (SelectInst *SI = dyn_cast<SelectInst>(I)) { + // SROA can handle a select of alloca iff all uses of the alloca are + // loads, and dereferenceable. We assume it's dereferenceable since + // we're told the input is an alloca. + for (Value::use_iterator UI = SI->use_begin(), UE = SI->use_end(); + UI != UE; ++UI) { + LoadInst *LI = dyn_cast<LoadInst>(*UI); + if (LI == 0 || !LI->isSimple()) + return false; + } + // We don't know whether we'll be deleting the rest of the chain of + // instructions from the SelectInst on, because we don't know whether + // the other side of the select is also an alloca or not. + return true; + } + + if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) { + switch (II->getIntrinsicID()) { + default: + return false; + case Intrinsic::memset: + case Intrinsic::memcpy: + case Intrinsic::memmove: + case Intrinsic::lifetime_start: + case Intrinsic::lifetime_end: + // SROA can usually chew through these intrinsics. + SROAReduction += InlineConstants::InstrCost; + return true; + } + } + + // If there is some other strange instruction, we're not going to be + // able to do much if we inline this. + return false; +} + +unsigned InlineCostAnalyzer::FunctionInfo::countCodeReductionForAlloca( + const CodeMetrics &Metrics, Value *V) { if (!V->getType()->isPointerTy()) return 0; // Not a pointer unsigned Reduction = 0; + unsigned SROAReduction = 0; + bool CanSROAAlloca = true; - // Looking at ICmpInsts will never abort the analysis and return zero, and - // analyzing them is expensive, so save them for last so that we don't do - // extra work that we end up throwing out. - SmallVector<ICmpInst *, 4> ICmpInsts; + SmallVector<Value *, 4> Worklist; + Worklist.push_back(V); + do { + Value *V = Worklist.pop_back_val(); + for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); + UI != E; ++UI){ + Instruction *I = cast<Instruction>(*UI); + + if (ICmpInst *ICI = dyn_cast<ICmpInst>(I)) + Reduction += countCodeReductionForAllocaICmp(Metrics, ICI); + + if (CanSROAAlloca) + CanSROAAlloca = countCodeReductionForSROAInst(I, Worklist, + SROAReduction); + } + } while (!Worklist.empty()); + + return Reduction + (CanSROAAlloca ? SROAReduction : 0); +} +void InlineCostAnalyzer::FunctionInfo::countCodeReductionForPointerPair( + const CodeMetrics &Metrics, DenseMap<Value *, unsigned> &PointerArgs, + Value *V, unsigned ArgIdx) { SmallVector<Value *, 4> Worklist; Worklist.push_back(V); do { @@ -234,126 +249,57 @@ unsigned CodeMetrics::CountCodeReductionForAlloca(Value *V) { for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI != E; ++UI){ Instruction *I = cast<Instruction>(*UI); - if (LoadInst *LI = dyn_cast<LoadInst>(I)) { - if (!LI->isSimple()) - return 0; - Reduction += InlineConstants::InstrCost; - } else if (StoreInst *SI = dyn_cast<StoreInst>(I)) { - if (!SI->isSimple()) - return 0; - Reduction += InlineConstants::InstrCost; - } else if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(I)) { + + if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(I)) { // If the GEP has variable indices, we won't be able to do much with it. if (!GEP->hasAllConstantIndices()) - return 0; - // A non-zero GEP will likely become a mask operation after SROA. - if (GEP->hasAllZeroIndices()) - Reduction += InlineConstants::InstrCost; + continue; + // Unless the GEP is in-bounds, some comparisons will be non-constant. + // Fortunately, the real-world cases where this occurs uses in-bounds + // GEPs, and so we restrict the optimization to them here. + if (!GEP->isInBounds()) + continue; + + // Constant indices just change the constant offset. Add the resulting + // value both to our worklist for this argument, and to the set of + // viable paired values with future arguments. + PointerArgs[GEP] = ArgIdx; Worklist.push_back(GEP); - } else if (BitCastInst *BCI = dyn_cast<BitCastInst>(I)) { - // Track pointer through bitcasts. - Worklist.push_back(BCI); - Reduction += InlineConstants::InstrCost; - } else if (SelectInst *SI = dyn_cast<SelectInst>(I)) { - // SROA can handle a select of alloca iff all uses of the alloca are - // loads, and dereferenceable. We assume it's dereferenceable since - // we're told the input is an alloca. - for (Value::use_iterator UI = SI->use_begin(), UE = SI->use_end(); - UI != UE; ++UI) { - LoadInst *LI = dyn_cast<LoadInst>(*UI); - if (LI == 0 || !LI->isSimple()) return 0; - } - // We don't know whether we'll be deleting the rest of the chain of - // instructions from the SelectInst on, because we don't know whether - // the other side of the select is also an alloca or not. continue; - } else if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) { - switch (II->getIntrinsicID()) { - default: - return 0; - case Intrinsic::memset: - case Intrinsic::memcpy: - case Intrinsic::memmove: - case Intrinsic::lifetime_start: - case Intrinsic::lifetime_end: - // SROA can usually chew through these intrinsics. - Reduction += InlineConstants::InstrCost; - break; - } - } else if (ICmpInst *ICI = dyn_cast<ICmpInst>(I)) { - if (!isa<Constant>(ICI->getOperand(1))) - return 0; - ICmpInsts.push_back(ICI); - } else { - // If there is some other strange instruction, we're not going to be - // able to do much if we inline this. - return 0; } - } - } while (!Worklist.empty()); - - while (!ICmpInsts.empty()) { - ICmpInst *ICI = ICmpInsts.pop_back_val(); - // An icmp pred (alloca, C) becomes true if the predicate is true when - // equal and false otherwise. - bool Result = ICI->isTrueWhenEqual(); - - SmallVector<Instruction *, 4> Worklist; - Worklist.push_back(ICI); - do { - Instruction *U = Worklist.pop_back_val(); - Reduction += InlineConstants::InstrCost; - for (Value::use_iterator UI = U->use_begin(), UE = U->use_end(); - UI != UE; ++UI) { - Instruction *I = dyn_cast<Instruction>(*UI); - if (!I || I->mayHaveSideEffects()) continue; - if (I->getNumOperands() == 1) - Worklist.push_back(I); - if (BinaryOperator *BO = dyn_cast<BinaryOperator>(I)) { - // If BO produces the same value as U, then the other operand is - // irrelevant and we can put it into the Worklist to continue - // deleting dead instructions. If BO produces the same value as the - // other operand, we can delete BO but that's it. - if (Result == true) { - if (BO->getOpcode() == Instruction::Or) - Worklist.push_back(I); - if (BO->getOpcode() == Instruction::And) - Reduction += InlineConstants::InstrCost; - } else { - if (BO->getOpcode() == Instruction::Or || - BO->getOpcode() == Instruction::Xor) - Reduction += InlineConstants::InstrCost; - if (BO->getOpcode() == Instruction::And) - Worklist.push_back(I); - } - } - if (BranchInst *BI = dyn_cast<BranchInst>(I)) { - BasicBlock *BB = BI->getSuccessor(Result ? 0 : 1); - if (BB->getSinglePredecessor()) - Reduction += InlineConstants::InstrCost * NumBBInsts[BB]; - } + // Track pointer through casts. Even when the result is not a pointer, it + // remains a constant relative to constants derived from other constant + // pointers. + if (CastInst *CI = dyn_cast<CastInst>(I)) { + PointerArgs[CI] = ArgIdx; + Worklist.push_back(CI); + continue; } - } while (!Worklist.empty()); - } - return Reduction; -} + // There are two instructions which produce a strict constant value when + // applied to two related pointer values. Ignore everything else. + if (!isa<ICmpInst>(I) && I->getOpcode() != Instruction::Sub) + continue; + assert(I->getNumOperands() == 2); + + // Ensure that the two operands are in our set of potentially paired + // pointers (or are derived from them). + Value *OtherArg = I->getOperand(0); + if (OtherArg == V) + OtherArg = I->getOperand(1); + DenseMap<Value *, unsigned>::const_iterator ArgIt + = PointerArgs.find(OtherArg); + if (ArgIt == PointerArgs.end()) + continue; + std::pair<unsigned, unsigned> ArgPair(ArgIt->second, ArgIdx); + if (ArgPair.first > ArgPair.second) + std::swap(ArgPair.first, ArgPair.second); -/// analyzeFunction - Fill in the current structure with information gleaned -/// from the specified function. -void CodeMetrics::analyzeFunction(Function *F, const TargetData *TD) { - // If this function contains a call that "returns twice" (e.g., setjmp or - // _setjmp) and it isn't marked with "returns twice" itself, never inline it. - // This is a hack because we depend on the user marking their local variables - // as volatile if they are live across a setjmp call, and they probably - // won't do this in callers. - exposesReturnsTwice = F->callsFunctionThatReturnsTwice() && - !F->hasFnAttr(Attribute::ReturnsTwice); - - // Look at the size of the callee. - for (Function::const_iterator BB = F->begin(), E = F->end(); BB != E; ++BB) - analyzeBasicBlock(&*BB, TD); + PointerArgPairWeights[ArgPair] + += countCodeReductionForConstant(Metrics, I); + } + } while (!Worklist.empty()); } /// analyzeFunction - Fill in the current structure with information gleaned @@ -368,12 +314,25 @@ void InlineCostAnalyzer::FunctionInfo::analyzeFunction(Function *F, if (Metrics.NumRets==1) --Metrics.NumInsts; - // Check out all of the arguments to the function, figuring out how much - // code can be eliminated if one of the arguments is a constant. ArgumentWeights.reserve(F->arg_size()); - for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end(); I != E; ++I) - ArgumentWeights.push_back(ArgInfo(Metrics.CountCodeReductionForConstant(I), - Metrics.CountCodeReductionForAlloca(I))); + DenseMap<Value *, unsigned> PointerArgs; + unsigned ArgIdx = 0; + for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end(); I != E; + ++I, ++ArgIdx) { + // Count how much code can be eliminated if one of the arguments is + // a constant or an alloca. + ArgumentWeights.push_back(ArgInfo(countCodeReductionForConstant(Metrics, I), + countCodeReductionForAlloca(Metrics, I))); + + // If the argument is a pointer, also check for pairs of pointers where + // knowing a fixed offset between them allows simplification. This pattern + // arises mostly due to STL algorithm patterns where pointers are used as + // random access iterators. + if (!I->getType()->isPointerTy()) + continue; + PointerArgs[I] = ArgIdx; + countCodeReductionForPointerPair(Metrics, PointerArgs, I, ArgIdx); + } } /// NeverInline - returns true if the function should never be inlined into @@ -382,43 +341,6 @@ bool InlineCostAnalyzer::FunctionInfo::NeverInline() { return (Metrics.exposesReturnsTwice || Metrics.isRecursive || Metrics.containsIndirectBr); } -// getSpecializationBonus - The heuristic used to determine the per-call -// performance boost for using a specialization of Callee with argument -// specializedArgNo replaced by a constant. -int InlineCostAnalyzer::getSpecializationBonus(Function *Callee, - SmallVectorImpl<unsigned> &SpecializedArgNos) -{ - if (Callee->mayBeOverridden()) - return 0; - - int Bonus = 0; - // If this function uses the coldcc calling convention, prefer not to - // specialize it. - if (Callee->getCallingConv() == CallingConv::Cold) - Bonus -= InlineConstants::ColdccPenalty; - - // Get information about the callee. - FunctionInfo *CalleeFI = &CachedFunctionInfo[Callee]; - - // If we haven't calculated this information yet, do so now. - if (CalleeFI->Metrics.NumBlocks == 0) - CalleeFI->analyzeFunction(Callee, TD); - - unsigned ArgNo = 0; - unsigned i = 0; - for (Function::arg_iterator I = Callee->arg_begin(), E = Callee->arg_end(); - I != E; ++I, ++ArgNo) - if (ArgNo == SpecializedArgNos[i]) { - ++i; - Bonus += CountBonusForConstant(I); - } - - // Calls usually take a long time, so they make the specialization gain - // smaller. - Bonus -= CalleeFI->Metrics.NumCalls * InlineConstants::CallPenalty; - - return Bonus; -} // ConstantFunctionBonus - Figure out how much of a bonus we can get for // possibly devirtualizing a function. We'll subtract the size of the function @@ -522,6 +444,15 @@ int InlineCostAnalyzer::getInlineSize(CallSite CS, Function *Callee) { InlineCost -= CalleeFI->ArgumentWeights[ArgNo].ConstantWeight; } + const DenseMap<std::pair<unsigned, unsigned>, unsigned> &ArgPairWeights + = CalleeFI->PointerArgPairWeights; + for (DenseMap<std::pair<unsigned, unsigned>, unsigned>::const_iterator I + = ArgPairWeights.begin(), E = ArgPairWeights.end(); + I != E; ++I) + if (CS.getArgument(I->first.first)->stripInBoundsConstantOffsets() == + CS.getArgument(I->first.second)->stripInBoundsConstantOffsets()) + InlineCost -= I->second; + // Each argument passed in has a cost at both the caller and the callee // sides. Measurements show that each argument costs about the same as an // instruction. @@ -589,22 +520,18 @@ int InlineCostAnalyzer::getInlineBonuses(CallSite CS, Function *Callee) { // getInlineCost - The heuristic used to determine if we should inline the // function call or not. // -InlineCost InlineCostAnalyzer::getInlineCost(CallSite CS, - SmallPtrSet<const Function*, 16> &NeverInline) { - return getInlineCost(CS, CS.getCalledFunction(), NeverInline); +InlineCost InlineCostAnalyzer::getInlineCost(CallSite CS) { + return getInlineCost(CS, CS.getCalledFunction()); } -InlineCost InlineCostAnalyzer::getInlineCost(CallSite CS, - Function *Callee, - SmallPtrSet<const Function*, 16> &NeverInline) { +InlineCost InlineCostAnalyzer::getInlineCost(CallSite CS, Function *Callee) { Instruction *TheCall = CS.getInstruction(); Function *Caller = TheCall->getParent()->getParent(); // Don't inline functions which can be redefined at link-time to mean // something else. Don't inline functions marked noinline or call sites // marked noinline. - if (Callee->mayBeOverridden() || - Callee->hasFnAttr(Attribute::NoInline) || NeverInline.count(Callee) || + if (Callee->mayBeOverridden() || Callee->hasFnAttr(Attribute::NoInline) || CS.isNoInline()) return llvm::InlineCost::getNever(); @@ -655,38 +582,6 @@ InlineCost InlineCostAnalyzer::getInlineCost(CallSite CS, return llvm::InlineCost::get(InlineCost); } -// getSpecializationCost - The heuristic used to determine the code-size -// impact of creating a specialized version of Callee with argument -// SpecializedArgNo replaced by a constant. -InlineCost InlineCostAnalyzer::getSpecializationCost(Function *Callee, - SmallVectorImpl<unsigned> &SpecializedArgNos) -{ - // Don't specialize functions which can be redefined at link-time to mean - // something else. - if (Callee->mayBeOverridden()) - return llvm::InlineCost::getNever(); - - // Get information about the callee. - FunctionInfo *CalleeFI = &CachedFunctionInfo[Callee]; - - // If we haven't calculated this information yet, do so now. - if (CalleeFI->Metrics.NumBlocks == 0) - CalleeFI->analyzeFunction(Callee, TD); - - int Cost = 0; - - // Look at the original size of the callee. Each instruction counts as 5. - Cost += CalleeFI->Metrics.NumInsts * InlineConstants::InstrCost; - - // Offset that with the amount of code that can be constant-folded - // away with the given arguments replaced by constants. - for (SmallVectorImpl<unsigned>::iterator an = SpecializedArgNos.begin(), - ae = SpecializedArgNos.end(); an != ae; ++an) - Cost -= CalleeFI->ArgumentWeights[*an].ConstantWeight; - - return llvm::InlineCost::get(Cost); -} - // getInlineFudgeFactor - Return a > 1.0 factor if the inliner should use a // higher threshold to determine if the function call should be inlined. float InlineCostAnalyzer::getInlineFudgeFactor(CallSite CS) { diff --git a/lib/Analysis/InstructionSimplify.cpp b/lib/Analysis/InstructionSimplify.cpp index 370ab96..72e33d1 100644 --- a/lib/Analysis/InstructionSimplify.cpp +++ b/lib/Analysis/InstructionSimplify.cpp @@ -18,6 +18,7 @@ //===----------------------------------------------------------------------===// #define DEBUG_TYPE "instsimplify" +#include "llvm/GlobalAlias.h" #include "llvm/Operator.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/InstructionSimplify.h" @@ -26,6 +27,7 @@ #include "llvm/Analysis/Dominators.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/Support/ConstantRange.h" +#include "llvm/Support/GetElementPtrTypeIterator.h" #include "llvm/Support/PatternMatch.h" #include "llvm/Support/ValueHandle.h" #include "llvm/Target/TargetData.h" @@ -38,21 +40,23 @@ STATISTIC(NumExpand, "Number of expansions"); STATISTIC(NumFactor , "Number of factorizations"); STATISTIC(NumReassoc, "Number of reassociations"); -static Value *SimplifyAndInst(Value *, Value *, const TargetData *, - const TargetLibraryInfo *, const DominatorTree *, - unsigned); -static Value *SimplifyBinOp(unsigned, Value *, Value *, const TargetData *, - const TargetLibraryInfo *, const DominatorTree *, +struct Query { + const TargetData *TD; + const TargetLibraryInfo *TLI; + const DominatorTree *DT; + + Query(const TargetData *td, const TargetLibraryInfo *tli, + const DominatorTree *dt) : TD(td), TLI(tli), DT(dt) {}; +}; + +static Value *SimplifyAndInst(Value *, Value *, const Query &, unsigned); +static Value *SimplifyBinOp(unsigned, Value *, Value *, const Query &, unsigned); -static Value *SimplifyCmpInst(unsigned, Value *, Value *, const TargetData *, - const TargetLibraryInfo *, const DominatorTree *, - unsigned); -static Value *SimplifyOrInst(Value *, Value *, const TargetData *, - const TargetLibraryInfo *, const DominatorTree *, - unsigned); -static Value *SimplifyXorInst(Value *, Value *, const TargetData *, - const TargetLibraryInfo *, const DominatorTree *, +static Value *SimplifyCmpInst(unsigned, Value *, Value *, const Query &, unsigned); +static Value *SimplifyOrInst(Value *, Value *, const Query &, unsigned); +static Value *SimplifyXorInst(Value *, Value *, const Query &, unsigned); +static Value *SimplifyTruncInst(Value *, Type *, const Query &, unsigned); /// getFalse - For a boolean type, or a vector of boolean type, return false, or /// a vector with every element false, as appropriate for the type. @@ -91,10 +95,20 @@ static bool ValueDominatesPHI(Value *V, PHINode *P, const DominatorTree *DT) { // Arguments and constants dominate all instructions. return true; + // If we are processing instructions (and/or basic blocks) that have not been + // fully added to a function, the parent nodes may still be null. Simply + // return the conservative answer in these cases. + if (!I->getParent() || !P->getParent() || !I->getParent()->getParent()) + return false; + // If we have a DominatorTree then do a precise test. - if (DT) - return !DT->isReachableFromEntry(P->getParent()) || - !DT->isReachableFromEntry(I->getParent()) || DT->dominates(I, P); + if (DT) { + if (!DT->isReachableFromEntry(P->getParent())) + return true; + if (!DT->isReachableFromEntry(I->getParent())) + return false; + return DT->dominates(I, P); + } // Otherwise, if the instruction is in the entry block, and is not an invoke, // then it obviously dominates all phi nodes. @@ -111,8 +125,7 @@ static bool ValueDominatesPHI(Value *V, PHINode *P, const DominatorTree *DT) { /// Also performs the transform "(A op' B) op C" -> "(A op C) op' (B op C)". /// Returns the simplified value, or null if no simplification was performed. static Value *ExpandBinOp(unsigned Opcode, Value *LHS, Value *RHS, - unsigned OpcToExpand, const TargetData *TD, - const TargetLibraryInfo *TLI, const DominatorTree *DT, + unsigned OpcToExpand, const Query &Q, unsigned MaxRecurse) { Instruction::BinaryOps OpcodeToExpand = (Instruction::BinaryOps)OpcToExpand; // Recursion is always used, so bail out at once if we already hit the limit. @@ -125,8 +138,8 @@ static Value *ExpandBinOp(unsigned Opcode, Value *LHS, Value *RHS, // It does! Try turning it into "(A op C) op' (B op C)". Value *A = Op0->getOperand(0), *B = Op0->getOperand(1), *C = RHS; // Do "A op C" and "B op C" both simplify? - if (Value *L = SimplifyBinOp(Opcode, A, C, TD, TLI, DT, MaxRecurse)) - if (Value *R = SimplifyBinOp(Opcode, B, C, TD, TLI, DT, MaxRecurse)) { + if (Value *L = SimplifyBinOp(Opcode, A, C, Q, MaxRecurse)) + if (Value *R = SimplifyBinOp(Opcode, B, C, Q, MaxRecurse)) { // They do! Return "L op' R" if it simplifies or is already available. // If "L op' R" equals "A op' B" then "L op' R" is just the LHS. if ((L == A && R == B) || (Instruction::isCommutative(OpcodeToExpand) @@ -135,8 +148,7 @@ static Value *ExpandBinOp(unsigned Opcode, Value *LHS, Value *RHS, return LHS; } // Otherwise return "L op' R" if it simplifies. - if (Value *V = SimplifyBinOp(OpcodeToExpand, L, R, TD, TLI, DT, - MaxRecurse)) { + if (Value *V = SimplifyBinOp(OpcodeToExpand, L, R, Q, MaxRecurse)) { ++NumExpand; return V; } @@ -149,8 +161,8 @@ static Value *ExpandBinOp(unsigned Opcode, Value *LHS, Value *RHS, // It does! Try turning it into "(A op B) op' (A op C)". Value *A = LHS, *B = Op1->getOperand(0), *C = Op1->getOperand(1); // Do "A op B" and "A op C" both simplify? - if (Value *L = SimplifyBinOp(Opcode, A, B, TD, TLI, DT, MaxRecurse)) - if (Value *R = SimplifyBinOp(Opcode, A, C, TD, TLI, DT, MaxRecurse)) { + if (Value *L = SimplifyBinOp(Opcode, A, B, Q, MaxRecurse)) + if (Value *R = SimplifyBinOp(Opcode, A, C, Q, MaxRecurse)) { // They do! Return "L op' R" if it simplifies or is already available. // If "L op' R" equals "B op' C" then "L op' R" is just the RHS. if ((L == B && R == C) || (Instruction::isCommutative(OpcodeToExpand) @@ -159,8 +171,7 @@ static Value *ExpandBinOp(unsigned Opcode, Value *LHS, Value *RHS, return RHS; } // Otherwise return "L op' R" if it simplifies. - if (Value *V = SimplifyBinOp(OpcodeToExpand, L, R, TD, TLI, DT, - MaxRecurse)) { + if (Value *V = SimplifyBinOp(OpcodeToExpand, L, R, Q, MaxRecurse)) { ++NumExpand; return V; } @@ -175,9 +186,7 @@ static Value *ExpandBinOp(unsigned Opcode, Value *LHS, Value *RHS, /// OpCodeToExtract is Mul then this tries to turn "(A*B)+(A*C)" into "A*(B+C)". /// Returns the simplified value, or null if no simplification was performed. static Value *FactorizeBinOp(unsigned Opcode, Value *LHS, Value *RHS, - unsigned OpcToExtract, const TargetData *TD, - const TargetLibraryInfo *TLI, - const DominatorTree *DT, + unsigned OpcToExtract, const Query &Q, unsigned MaxRecurse) { Instruction::BinaryOps OpcodeToExtract = (Instruction::BinaryOps)OpcToExtract; // Recursion is always used, so bail out at once if we already hit the limit. @@ -202,7 +211,7 @@ static Value *FactorizeBinOp(unsigned Opcode, Value *LHS, Value *RHS, Value *DD = A == C ? D : C; // Form "A op' (B op DD)" if it simplifies completely. // Does "B op DD" simplify? - if (Value *V = SimplifyBinOp(Opcode, B, DD, TD, TLI, DT, MaxRecurse)) { + if (Value *V = SimplifyBinOp(Opcode, B, DD, Q, MaxRecurse)) { // It does! Return "A op' V" if it simplifies or is already available. // If V equals B then "A op' V" is just the LHS. If V equals DD then // "A op' V" is just the RHS. @@ -211,8 +220,7 @@ static Value *FactorizeBinOp(unsigned Opcode, Value *LHS, Value *RHS, return V == B ? LHS : RHS; } // Otherwise return "A op' V" if it simplifies. - if (Value *W = SimplifyBinOp(OpcodeToExtract, A, V, TD, TLI, DT, - MaxRecurse)) { + if (Value *W = SimplifyBinOp(OpcodeToExtract, A, V, Q, MaxRecurse)) { ++NumFactor; return W; } @@ -226,7 +234,7 @@ static Value *FactorizeBinOp(unsigned Opcode, Value *LHS, Value *RHS, Value *CC = B == D ? C : D; // Form "(A op CC) op' B" if it simplifies completely.. // Does "A op CC" simplify? - if (Value *V = SimplifyBinOp(Opcode, A, CC, TD, TLI, DT, MaxRecurse)) { + if (Value *V = SimplifyBinOp(Opcode, A, CC, Q, MaxRecurse)) { // It does! Return "V op' B" if it simplifies or is already available. // If V equals A then "V op' B" is just the LHS. If V equals CC then // "V op' B" is just the RHS. @@ -235,8 +243,7 @@ static Value *FactorizeBinOp(unsigned Opcode, Value *LHS, Value *RHS, return V == A ? LHS : RHS; } // Otherwise return "V op' B" if it simplifies. - if (Value *W = SimplifyBinOp(OpcodeToExtract, V, B, TD, TLI, DT, - MaxRecurse)) { + if (Value *W = SimplifyBinOp(OpcodeToExtract, V, B, Q, MaxRecurse)) { ++NumFactor; return W; } @@ -249,10 +256,7 @@ static Value *FactorizeBinOp(unsigned Opcode, Value *LHS, Value *RHS, /// SimplifyAssociativeBinOp - Generic simplifications for associative binary /// operations. Returns the simpler value, or null if none was found. static Value *SimplifyAssociativeBinOp(unsigned Opc, Value *LHS, Value *RHS, - const TargetData *TD, - const TargetLibraryInfo *TLI, - const DominatorTree *DT, - unsigned MaxRecurse) { + const Query &Q, unsigned MaxRecurse) { Instruction::BinaryOps Opcode = (Instruction::BinaryOps)Opc; assert(Instruction::isAssociative(Opcode) && "Not an associative operation!"); @@ -270,12 +274,12 @@ static Value *SimplifyAssociativeBinOp(unsigned Opc, Value *LHS, Value *RHS, Value *C = RHS; // Does "B op C" simplify? - if (Value *V = SimplifyBinOp(Opcode, B, C, TD, TLI, DT, MaxRecurse)) { + if (Value *V = SimplifyBinOp(Opcode, B, C, Q, MaxRecurse)) { // It does! Return "A op V" if it simplifies or is already available. // If V equals B then "A op V" is just the LHS. if (V == B) return LHS; // Otherwise return "A op V" if it simplifies. - if (Value *W = SimplifyBinOp(Opcode, A, V, TD, TLI, DT, MaxRecurse)) { + if (Value *W = SimplifyBinOp(Opcode, A, V, Q, MaxRecurse)) { ++NumReassoc; return W; } @@ -289,12 +293,12 @@ static Value *SimplifyAssociativeBinOp(unsigned Opc, Value *LHS, Value *RHS, Value *C = Op1->getOperand(1); // Does "A op B" simplify? - if (Value *V = SimplifyBinOp(Opcode, A, B, TD, TLI, DT, MaxRecurse)) { + if (Value *V = SimplifyBinOp(Opcode, A, B, Q, MaxRecurse)) { // It does! Return "V op C" if it simplifies or is already available. // If V equals B then "V op C" is just the RHS. if (V == B) return RHS; // Otherwise return "V op C" if it simplifies. - if (Value *W = SimplifyBinOp(Opcode, V, C, TD, TLI, DT, MaxRecurse)) { + if (Value *W = SimplifyBinOp(Opcode, V, C, Q, MaxRecurse)) { ++NumReassoc; return W; } @@ -312,12 +316,12 @@ static Value *SimplifyAssociativeBinOp(unsigned Opc, Value *LHS, Value *RHS, Value *C = RHS; // Does "C op A" simplify? - if (Value *V = SimplifyBinOp(Opcode, C, A, TD, TLI, DT, MaxRecurse)) { + if (Value *V = SimplifyBinOp(Opcode, C, A, Q, MaxRecurse)) { // It does! Return "V op B" if it simplifies or is already available. // If V equals A then "V op B" is just the LHS. if (V == A) return LHS; // Otherwise return "V op B" if it simplifies. - if (Value *W = SimplifyBinOp(Opcode, V, B, TD, TLI, DT, MaxRecurse)) { + if (Value *W = SimplifyBinOp(Opcode, V, B, Q, MaxRecurse)) { ++NumReassoc; return W; } @@ -331,12 +335,12 @@ static Value *SimplifyAssociativeBinOp(unsigned Opc, Value *LHS, Value *RHS, Value *C = Op1->getOperand(1); // Does "C op A" simplify? - if (Value *V = SimplifyBinOp(Opcode, C, A, TD, TLI, DT, MaxRecurse)) { + if (Value *V = SimplifyBinOp(Opcode, C, A, Q, MaxRecurse)) { // It does! Return "B op V" if it simplifies or is already available. // If V equals C then "B op V" is just the RHS. if (V == C) return RHS; // Otherwise return "B op V" if it simplifies. - if (Value *W = SimplifyBinOp(Opcode, B, V, TD, TLI, DT, MaxRecurse)) { + if (Value *W = SimplifyBinOp(Opcode, B, V, Q, MaxRecurse)) { ++NumReassoc; return W; } @@ -351,10 +355,7 @@ static Value *SimplifyAssociativeBinOp(unsigned Opc, Value *LHS, Value *RHS, /// evaluating it on both branches of the select results in the same value. /// Returns the common value if so, otherwise returns null. static Value *ThreadBinOpOverSelect(unsigned Opcode, Value *LHS, Value *RHS, - const TargetData *TD, - const TargetLibraryInfo *TLI, - const DominatorTree *DT, - unsigned MaxRecurse) { + const Query &Q, unsigned MaxRecurse) { // Recursion is always used, so bail out at once if we already hit the limit. if (!MaxRecurse--) return 0; @@ -371,11 +372,11 @@ static Value *ThreadBinOpOverSelect(unsigned Opcode, Value *LHS, Value *RHS, Value *TV; Value *FV; if (SI == LHS) { - TV = SimplifyBinOp(Opcode, SI->getTrueValue(), RHS, TD, TLI, DT, MaxRecurse); - FV = SimplifyBinOp(Opcode, SI->getFalseValue(), RHS, TD, TLI, DT, MaxRecurse); + TV = SimplifyBinOp(Opcode, SI->getTrueValue(), RHS, Q, MaxRecurse); + FV = SimplifyBinOp(Opcode, SI->getFalseValue(), RHS, Q, MaxRecurse); } else { - TV = SimplifyBinOp(Opcode, LHS, SI->getTrueValue(), TD, TLI, DT, MaxRecurse); - FV = SimplifyBinOp(Opcode, LHS, SI->getFalseValue(), TD, TLI, DT, MaxRecurse); + TV = SimplifyBinOp(Opcode, LHS, SI->getTrueValue(), Q, MaxRecurse); + FV = SimplifyBinOp(Opcode, LHS, SI->getFalseValue(), Q, MaxRecurse); } // If they simplified to the same value, then return the common value. @@ -426,9 +427,7 @@ static Value *ThreadBinOpOverSelect(unsigned Opcode, Value *LHS, Value *RHS, /// result in the same value. Returns the common value if so, otherwise returns /// null. static Value *ThreadCmpOverSelect(CmpInst::Predicate Pred, Value *LHS, - Value *RHS, const TargetData *TD, - const TargetLibraryInfo *TLI, - const DominatorTree *DT, + Value *RHS, const Query &Q, unsigned MaxRecurse) { // Recursion is always used, so bail out at once if we already hit the limit. if (!MaxRecurse--) @@ -447,7 +446,7 @@ static Value *ThreadCmpOverSelect(CmpInst::Predicate Pred, Value *LHS, // Now that we have "cmp select(Cond, TV, FV), RHS", analyse it. // Does "cmp TV, RHS" simplify? - Value *TCmp = SimplifyCmpInst(Pred, TV, RHS, TD, TLI, DT, MaxRecurse); + Value *TCmp = SimplifyCmpInst(Pred, TV, RHS, Q, MaxRecurse); if (TCmp == Cond) { // It not only simplified, it simplified to the select condition. Replace // it with 'true'. @@ -461,7 +460,7 @@ static Value *ThreadCmpOverSelect(CmpInst::Predicate Pred, Value *LHS, } // Does "cmp FV, RHS" simplify? - Value *FCmp = SimplifyCmpInst(Pred, FV, RHS, TD, TLI, DT, MaxRecurse); + Value *FCmp = SimplifyCmpInst(Pred, FV, RHS, Q, MaxRecurse); if (FCmp == Cond) { // It not only simplified, it simplified to the select condition. Replace // it with 'false'. @@ -487,19 +486,19 @@ static Value *ThreadCmpOverSelect(CmpInst::Predicate Pred, Value *LHS, // is equal to "Cond && TCmp". This also catches the case when the false // value simplified to false and the true value to true, returning "Cond". if (match(FCmp, m_Zero())) - if (Value *V = SimplifyAndInst(Cond, TCmp, TD, TLI, DT, MaxRecurse)) + if (Value *V = SimplifyAndInst(Cond, TCmp, Q, MaxRecurse)) return V; // If the true value simplified to true, then the result of the compare // is equal to "Cond || FCmp". if (match(TCmp, m_One())) - if (Value *V = SimplifyOrInst(Cond, FCmp, TD, TLI, DT, MaxRecurse)) + if (Value *V = SimplifyOrInst(Cond, FCmp, Q, MaxRecurse)) return V; // Finally, if the false value simplified to true and the true value to // false, then the result of the compare is equal to "!Cond". if (match(FCmp, m_One()) && match(TCmp, m_Zero())) if (Value *V = SimplifyXorInst(Cond, Constant::getAllOnesValue(Cond->getType()), - TD, TLI, DT, MaxRecurse)) + Q, MaxRecurse)) return V; return 0; @@ -510,10 +509,7 @@ static Value *ThreadCmpOverSelect(CmpInst::Predicate Pred, Value *LHS, /// it on the incoming phi values yields the same result for every value. If so /// returns the common value, otherwise returns null. static Value *ThreadBinOpOverPHI(unsigned Opcode, Value *LHS, Value *RHS, - const TargetData *TD, - const TargetLibraryInfo *TLI, - const DominatorTree *DT, - unsigned MaxRecurse) { + const Query &Q, unsigned MaxRecurse) { // Recursion is always used, so bail out at once if we already hit the limit. if (!MaxRecurse--) return 0; @@ -522,13 +518,13 @@ static Value *ThreadBinOpOverPHI(unsigned Opcode, Value *LHS, Value *RHS, if (isa<PHINode>(LHS)) { PI = cast<PHINode>(LHS); // Bail out if RHS and the phi may be mutually interdependent due to a loop. - if (!ValueDominatesPHI(RHS, PI, DT)) + if (!ValueDominatesPHI(RHS, PI, Q.DT)) return 0; } else { assert(isa<PHINode>(RHS) && "No PHI instruction operand!"); PI = cast<PHINode>(RHS); // Bail out if LHS and the phi may be mutually interdependent due to a loop. - if (!ValueDominatesPHI(LHS, PI, DT)) + if (!ValueDominatesPHI(LHS, PI, Q.DT)) return 0; } @@ -539,8 +535,8 @@ static Value *ThreadBinOpOverPHI(unsigned Opcode, Value *LHS, Value *RHS, // If the incoming value is the phi node itself, it can safely be skipped. if (Incoming == PI) continue; Value *V = PI == LHS ? - SimplifyBinOp(Opcode, Incoming, RHS, TD, TLI, DT, MaxRecurse) : - SimplifyBinOp(Opcode, LHS, Incoming, TD, TLI, DT, MaxRecurse); + SimplifyBinOp(Opcode, Incoming, RHS, Q, MaxRecurse) : + SimplifyBinOp(Opcode, LHS, Incoming, Q, MaxRecurse); // If the operation failed to simplify, or simplified to a different value // to previously, then give up. if (!V || (CommonValue && V != CommonValue)) @@ -556,10 +552,7 @@ static Value *ThreadBinOpOverPHI(unsigned Opcode, Value *LHS, Value *RHS, /// incoming phi values yields the same result every time. If so returns the /// common result, otherwise returns null. static Value *ThreadCmpOverPHI(CmpInst::Predicate Pred, Value *LHS, Value *RHS, - const TargetData *TD, - const TargetLibraryInfo *TLI, - const DominatorTree *DT, - unsigned MaxRecurse) { + const Query &Q, unsigned MaxRecurse) { // Recursion is always used, so bail out at once if we already hit the limit. if (!MaxRecurse--) return 0; @@ -573,7 +566,7 @@ static Value *ThreadCmpOverPHI(CmpInst::Predicate Pred, Value *LHS, Value *RHS, PHINode *PI = cast<PHINode>(LHS); // Bail out if RHS and the phi may be mutually interdependent due to a loop. - if (!ValueDominatesPHI(RHS, PI, DT)) + if (!ValueDominatesPHI(RHS, PI, Q.DT)) return 0; // Evaluate the BinOp on the incoming phi values. @@ -582,7 +575,7 @@ static Value *ThreadCmpOverPHI(CmpInst::Predicate Pred, Value *LHS, Value *RHS, Value *Incoming = PI->getIncomingValue(i); // If the incoming value is the phi node itself, it can safely be skipped. if (Incoming == PI) continue; - Value *V = SimplifyCmpInst(Pred, Incoming, RHS, TD, TLI, DT, MaxRecurse); + Value *V = SimplifyCmpInst(Pred, Incoming, RHS, Q, MaxRecurse); // If the operation failed to simplify, or simplified to a different value // to previously, then give up. if (!V || (CommonValue && V != CommonValue)) @@ -596,15 +589,12 @@ static Value *ThreadCmpOverPHI(CmpInst::Predicate Pred, Value *LHS, Value *RHS, /// SimplifyAddInst - Given operands for an Add, see if we can /// fold the result. If not, this returns null. static Value *SimplifyAddInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW, - const TargetData *TD, - const TargetLibraryInfo *TLI, - const DominatorTree *DT, - unsigned MaxRecurse) { + const Query &Q, unsigned MaxRecurse) { if (Constant *CLHS = dyn_cast<Constant>(Op0)) { if (Constant *CRHS = dyn_cast<Constant>(Op1)) { Constant *Ops[] = { CLHS, CRHS }; - return ConstantFoldInstOperands(Instruction::Add, CLHS->getType(), - Ops, TD, TLI); + return ConstantFoldInstOperands(Instruction::Add, CLHS->getType(), Ops, + Q.TD, Q.TLI); } // Canonicalize the constant to the RHS. @@ -634,17 +624,17 @@ static Value *SimplifyAddInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW, /// i1 add -> xor. if (MaxRecurse && Op0->getType()->isIntegerTy(1)) - if (Value *V = SimplifyXorInst(Op0, Op1, TD, TLI, DT, MaxRecurse-1)) + if (Value *V = SimplifyXorInst(Op0, Op1, Q, MaxRecurse-1)) return V; // Try some generic simplifications for associative operations. - if (Value *V = SimplifyAssociativeBinOp(Instruction::Add, Op0, Op1, TD, TLI, DT, + if (Value *V = SimplifyAssociativeBinOp(Instruction::Add, Op0, Op1, Q, MaxRecurse)) return V; // Mul distributes over Add. Try some generic simplifications based on this. if (Value *V = FactorizeBinOp(Instruction::Add, Op0, Op1, Instruction::Mul, - TD, TLI, DT, MaxRecurse)) + Q, MaxRecurse)) return V; // Threading Add over selects and phi nodes is pointless, so don't bother. @@ -662,21 +652,114 @@ static Value *SimplifyAddInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW, Value *llvm::SimplifyAddInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW, const TargetData *TD, const TargetLibraryInfo *TLI, const DominatorTree *DT) { - return ::SimplifyAddInst(Op0, Op1, isNSW, isNUW, TD, TLI, DT, RecursionLimit); + return ::SimplifyAddInst(Op0, Op1, isNSW, isNUW, Query (TD, TLI, DT), + RecursionLimit); +} + +/// \brief Accumulate the constant integer offset a GEP represents. +/// +/// Given a getelementptr instruction/constantexpr, accumulate the constant +/// offset from the base pointer into the provided APInt 'Offset'. Returns true +/// if the GEP has all-constant indices. Returns false if any non-constant +/// index is encountered leaving the 'Offset' in an undefined state. The +/// 'Offset' APInt must be the bitwidth of the target's pointer size. +static bool accumulateGEPOffset(const TargetData &TD, GEPOperator *GEP, + APInt &Offset) { + unsigned IntPtrWidth = TD.getPointerSizeInBits(); + assert(IntPtrWidth == Offset.getBitWidth()); + + gep_type_iterator GTI = gep_type_begin(GEP); + for (User::op_iterator I = GEP->op_begin() + 1, E = GEP->op_end(); I != E; + ++I, ++GTI) { + ConstantInt *OpC = dyn_cast<ConstantInt>(*I); + if (!OpC) return false; + if (OpC->isZero()) continue; + + // Handle a struct index, which adds its field offset to the pointer. + if (StructType *STy = dyn_cast<StructType>(*GTI)) { + unsigned ElementIdx = OpC->getZExtValue(); + const StructLayout *SL = TD.getStructLayout(STy); + Offset += APInt(IntPtrWidth, SL->getElementOffset(ElementIdx)); + continue; + } + + APInt TypeSize(IntPtrWidth, TD.getTypeAllocSize(GTI.getIndexedType())); + Offset += OpC->getValue().sextOrTrunc(IntPtrWidth) * TypeSize; + } + return true; +} + +/// \brief Compute the base pointer and cumulative constant offsets for V. +/// +/// This strips all constant offsets off of V, leaving it the base pointer, and +/// accumulates the total constant offset applied in the returned constant. It +/// returns 0 if V is not a pointer, and returns the constant '0' if there are +/// no constant offsets applied. +static Constant *stripAndComputeConstantOffsets(const TargetData &TD, + Value *&V) { + if (!V->getType()->isPointerTy()) + return 0; + + unsigned IntPtrWidth = TD.getPointerSizeInBits(); + APInt Offset = APInt::getNullValue(IntPtrWidth); + + // Even though we don't look through PHI nodes, we could be called on an + // instruction in an unreachable block, which may be on a cycle. + SmallPtrSet<Value *, 4> Visited; + Visited.insert(V); + do { + if (GEPOperator *GEP = dyn_cast<GEPOperator>(V)) { + if (!accumulateGEPOffset(TD, GEP, Offset)) + break; + V = GEP->getPointerOperand(); + } else if (Operator::getOpcode(V) == Instruction::BitCast) { + V = cast<Operator>(V)->getOperand(0); + } else if (GlobalAlias *GA = dyn_cast<GlobalAlias>(V)) { + if (GA->mayBeOverridden()) + break; + V = GA->getAliasee(); + } else { + break; + } + assert(V->getType()->isPointerTy() && "Unexpected operand type!"); + } while (Visited.insert(V)); + + Type *IntPtrTy = TD.getIntPtrType(V->getContext()); + return ConstantInt::get(IntPtrTy, Offset); +} + +/// \brief Compute the constant difference between two pointer values. +/// If the difference is not a constant, returns zero. +static Constant *computePointerDifference(const TargetData &TD, + Value *LHS, Value *RHS) { + Constant *LHSOffset = stripAndComputeConstantOffsets(TD, LHS); + if (!LHSOffset) + return 0; + Constant *RHSOffset = stripAndComputeConstantOffsets(TD, RHS); + if (!RHSOffset) + return 0; + + // If LHS and RHS are not related via constant offsets to the same base + // value, there is nothing we can do here. + if (LHS != RHS) + return 0; + + // Otherwise, the difference of LHS - RHS can be computed as: + // LHS - RHS + // = (LHSOffset + Base) - (RHSOffset + Base) + // = LHSOffset - RHSOffset + return ConstantExpr::getSub(LHSOffset, RHSOffset); } /// SimplifySubInst - Given operands for a Sub, see if we can /// fold the result. If not, this returns null. static Value *SimplifySubInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW, - const TargetData *TD, - const TargetLibraryInfo *TLI, - const DominatorTree *DT, - unsigned MaxRecurse) { + const Query &Q, unsigned MaxRecurse) { if (Constant *CLHS = dyn_cast<Constant>(Op0)) if (Constant *CRHS = dyn_cast<Constant>(Op1)) { Constant *Ops[] = { CLHS, CRHS }; return ConstantFoldInstOperands(Instruction::Sub, CLHS->getType(), - Ops, TD, TLI); + Ops, Q.TD, Q.TLI); } // X - undef -> undef @@ -704,19 +787,17 @@ static Value *SimplifySubInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW, Value *Y = 0, *Z = Op1; if (MaxRecurse && match(Op0, m_Add(m_Value(X), m_Value(Y)))) { // (X + Y) - Z // See if "V === Y - Z" simplifies. - if (Value *V = SimplifyBinOp(Instruction::Sub, Y, Z, TD, TLI, DT, MaxRecurse-1)) + if (Value *V = SimplifyBinOp(Instruction::Sub, Y, Z, Q, MaxRecurse-1)) // It does! Now see if "X + V" simplifies. - if (Value *W = SimplifyBinOp(Instruction::Add, X, V, TD, TLI, DT, - MaxRecurse-1)) { + if (Value *W = SimplifyBinOp(Instruction::Add, X, V, Q, MaxRecurse-1)) { // It does, we successfully reassociated! ++NumReassoc; return W; } // See if "V === X - Z" simplifies. - if (Value *V = SimplifyBinOp(Instruction::Sub, X, Z, TD, TLI, DT, MaxRecurse-1)) + if (Value *V = SimplifyBinOp(Instruction::Sub, X, Z, Q, MaxRecurse-1)) // It does! Now see if "Y + V" simplifies. - if (Value *W = SimplifyBinOp(Instruction::Add, Y, V, TD, TLI, DT, - MaxRecurse-1)) { + if (Value *W = SimplifyBinOp(Instruction::Add, Y, V, Q, MaxRecurse-1)) { // It does, we successfully reassociated! ++NumReassoc; return W; @@ -728,19 +809,17 @@ static Value *SimplifySubInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW, X = Op0; if (MaxRecurse && match(Op1, m_Add(m_Value(Y), m_Value(Z)))) { // X - (Y + Z) // See if "V === X - Y" simplifies. - if (Value *V = SimplifyBinOp(Instruction::Sub, X, Y, TD, TLI, DT, MaxRecurse-1)) + if (Value *V = SimplifyBinOp(Instruction::Sub, X, Y, Q, MaxRecurse-1)) // It does! Now see if "V - Z" simplifies. - if (Value *W = SimplifyBinOp(Instruction::Sub, V, Z, TD, TLI, DT, - MaxRecurse-1)) { + if (Value *W = SimplifyBinOp(Instruction::Sub, V, Z, Q, MaxRecurse-1)) { // It does, we successfully reassociated! ++NumReassoc; return W; } // See if "V === X - Z" simplifies. - if (Value *V = SimplifyBinOp(Instruction::Sub, X, Z, TD, TLI, DT, MaxRecurse-1)) + if (Value *V = SimplifyBinOp(Instruction::Sub, X, Z, Q, MaxRecurse-1)) // It does! Now see if "V - Y" simplifies. - if (Value *W = SimplifyBinOp(Instruction::Sub, V, Y, TD, TLI, DT, - MaxRecurse-1)) { + if (Value *W = SimplifyBinOp(Instruction::Sub, V, Y, Q, MaxRecurse-1)) { // It does, we successfully reassociated! ++NumReassoc; return W; @@ -752,23 +831,39 @@ static Value *SimplifySubInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW, Z = Op0; if (MaxRecurse && match(Op1, m_Sub(m_Value(X), m_Value(Y)))) // Z - (X - Y) // See if "V === Z - X" simplifies. - if (Value *V = SimplifyBinOp(Instruction::Sub, Z, X, TD, TLI, DT, MaxRecurse-1)) + if (Value *V = SimplifyBinOp(Instruction::Sub, Z, X, Q, MaxRecurse-1)) // It does! Now see if "V + Y" simplifies. - if (Value *W = SimplifyBinOp(Instruction::Add, V, Y, TD, TLI, DT, - MaxRecurse-1)) { + if (Value *W = SimplifyBinOp(Instruction::Add, V, Y, Q, MaxRecurse-1)) { // It does, we successfully reassociated! ++NumReassoc; return W; } + // trunc(X) - trunc(Y) -> trunc(X - Y) if everything simplifies. + if (MaxRecurse && match(Op0, m_Trunc(m_Value(X))) && + match(Op1, m_Trunc(m_Value(Y)))) + if (X->getType() == Y->getType()) + // See if "V === X - Y" simplifies. + if (Value *V = SimplifyBinOp(Instruction::Sub, X, Y, Q, MaxRecurse-1)) + // It does! Now see if "trunc V" simplifies. + if (Value *W = SimplifyTruncInst(V, Op0->getType(), Q, MaxRecurse-1)) + // It does, return the simplified "trunc V". + return W; + + // Variations on GEP(base, I, ...) - GEP(base, i, ...) -> GEP(null, I-i, ...). + if (Q.TD && match(Op0, m_PtrToInt(m_Value(X))) && + match(Op1, m_PtrToInt(m_Value(Y)))) + if (Constant *Result = computePointerDifference(*Q.TD, X, Y)) + return ConstantExpr::getIntegerCast(Result, Op0->getType(), true); + // Mul distributes over Sub. Try some generic simplifications based on this. if (Value *V = FactorizeBinOp(Instruction::Sub, Op0, Op1, Instruction::Mul, - TD, TLI, DT, MaxRecurse)) + Q, MaxRecurse)) return V; // i1 sub -> xor. if (MaxRecurse && Op0->getType()->isIntegerTy(1)) - if (Value *V = SimplifyXorInst(Op0, Op1, TD, TLI, DT, MaxRecurse-1)) + if (Value *V = SimplifyXorInst(Op0, Op1, Q, MaxRecurse-1)) return V; // Threading Sub over selects and phi nodes is pointless, so don't bother. @@ -784,22 +879,21 @@ static Value *SimplifySubInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW, } Value *llvm::SimplifySubInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW, - const TargetData *TD, - const TargetLibraryInfo *TLI, + const TargetData *TD, const TargetLibraryInfo *TLI, const DominatorTree *DT) { - return ::SimplifySubInst(Op0, Op1, isNSW, isNUW, TD, TLI, DT, RecursionLimit); + return ::SimplifySubInst(Op0, Op1, isNSW, isNUW, Query (TD, TLI, DT), + RecursionLimit); } /// SimplifyMulInst - Given operands for a Mul, see if we can /// fold the result. If not, this returns null. -static Value *SimplifyMulInst(Value *Op0, Value *Op1, const TargetData *TD, - const TargetLibraryInfo *TLI, - const DominatorTree *DT, unsigned MaxRecurse) { +static Value *SimplifyMulInst(Value *Op0, Value *Op1, const Query &Q, + unsigned MaxRecurse) { if (Constant *CLHS = dyn_cast<Constant>(Op0)) { if (Constant *CRHS = dyn_cast<Constant>(Op1)) { Constant *Ops[] = { CLHS, CRHS }; return ConstantFoldInstOperands(Instruction::Mul, CLHS->getType(), - Ops, TD, TLI); + Ops, Q.TD, Q.TLI); } // Canonicalize the constant to the RHS. @@ -826,30 +920,30 @@ static Value *SimplifyMulInst(Value *Op0, Value *Op1, const TargetData *TD, // i1 mul -> and. if (MaxRecurse && Op0->getType()->isIntegerTy(1)) - if (Value *V = SimplifyAndInst(Op0, Op1, TD, TLI, DT, MaxRecurse-1)) + if (Value *V = SimplifyAndInst(Op0, Op1, Q, MaxRecurse-1)) return V; // Try some generic simplifications for associative operations. - if (Value *V = SimplifyAssociativeBinOp(Instruction::Mul, Op0, Op1, TD, TLI, DT, + if (Value *V = SimplifyAssociativeBinOp(Instruction::Mul, Op0, Op1, Q, MaxRecurse)) return V; // Mul distributes over Add. Try some generic simplifications based on this. if (Value *V = ExpandBinOp(Instruction::Mul, Op0, Op1, Instruction::Add, - TD, TLI, DT, MaxRecurse)) + Q, MaxRecurse)) return V; // If the operation is with the result of a select instruction, check whether // operating on either branch of the select always yields the same value. if (isa<SelectInst>(Op0) || isa<SelectInst>(Op1)) - if (Value *V = ThreadBinOpOverSelect(Instruction::Mul, Op0, Op1, TD, TLI, DT, + if (Value *V = ThreadBinOpOverSelect(Instruction::Mul, Op0, Op1, Q, MaxRecurse)) return V; // If the operation is with the result of a phi instruction, check whether // operating on all incoming values of the phi always yields the same value. if (isa<PHINode>(Op0) || isa<PHINode>(Op1)) - if (Value *V = ThreadBinOpOverPHI(Instruction::Mul, Op0, Op1, TD, TLI, DT, + if (Value *V = ThreadBinOpOverPHI(Instruction::Mul, Op0, Op1, Q, MaxRecurse)) return V; @@ -859,18 +953,17 @@ static Value *SimplifyMulInst(Value *Op0, Value *Op1, const TargetData *TD, Value *llvm::SimplifyMulInst(Value *Op0, Value *Op1, const TargetData *TD, const TargetLibraryInfo *TLI, const DominatorTree *DT) { - return ::SimplifyMulInst(Op0, Op1, TD, TLI, DT, RecursionLimit); + return ::SimplifyMulInst(Op0, Op1, Query (TD, TLI, DT), RecursionLimit); } /// SimplifyDiv - Given operands for an SDiv or UDiv, see if we can /// fold the result. If not, this returns null. static Value *SimplifyDiv(Instruction::BinaryOps Opcode, Value *Op0, Value *Op1, - const TargetData *TD, const TargetLibraryInfo *TLI, - const DominatorTree *DT, unsigned MaxRecurse) { + const Query &Q, unsigned MaxRecurse) { if (Constant *C0 = dyn_cast<Constant>(Op0)) { if (Constant *C1 = dyn_cast<Constant>(Op1)) { Constant *Ops[] = { C0, C1 }; - return ConstantFoldInstOperands(Opcode, C0->getType(), Ops, TD, TLI); + return ConstantFoldInstOperands(Opcode, C0->getType(), Ops, Q.TD, Q.TLI); } } @@ -923,15 +1016,13 @@ static Value *SimplifyDiv(Instruction::BinaryOps Opcode, Value *Op0, Value *Op1, // If the operation is with the result of a select instruction, check whether // operating on either branch of the select always yields the same value. if (isa<SelectInst>(Op0) || isa<SelectInst>(Op1)) - if (Value *V = ThreadBinOpOverSelect(Opcode, Op0, Op1, TD, TLI, DT, - MaxRecurse)) + if (Value *V = ThreadBinOpOverSelect(Opcode, Op0, Op1, Q, MaxRecurse)) return V; // If the operation is with the result of a phi instruction, check whether // operating on all incoming values of the phi always yields the same value. if (isa<PHINode>(Op0) || isa<PHINode>(Op1)) - if (Value *V = ThreadBinOpOverPHI(Opcode, Op0, Op1, TD, TLI, DT, - MaxRecurse)) + if (Value *V = ThreadBinOpOverPHI(Opcode, Op0, Op1, Q, MaxRecurse)) return V; return 0; @@ -939,11 +1030,9 @@ static Value *SimplifyDiv(Instruction::BinaryOps Opcode, Value *Op0, Value *Op1, /// SimplifySDivInst - Given operands for an SDiv, see if we can /// fold the result. If not, this returns null. -static Value *SimplifySDivInst(Value *Op0, Value *Op1, const TargetData *TD, - const TargetLibraryInfo *TLI, - const DominatorTree *DT, unsigned MaxRecurse) { - if (Value *V = SimplifyDiv(Instruction::SDiv, Op0, Op1, TD, TLI, DT, - MaxRecurse)) +static Value *SimplifySDivInst(Value *Op0, Value *Op1, const Query &Q, + unsigned MaxRecurse) { + if (Value *V = SimplifyDiv(Instruction::SDiv, Op0, Op1, Q, MaxRecurse)) return V; return 0; @@ -952,16 +1041,14 @@ static Value *SimplifySDivInst(Value *Op0, Value *Op1, const TargetData *TD, Value *llvm::SimplifySDivInst(Value *Op0, Value *Op1, const TargetData *TD, const TargetLibraryInfo *TLI, const DominatorTree *DT) { - return ::SimplifySDivInst(Op0, Op1, TD, TLI, DT, RecursionLimit); + return ::SimplifySDivInst(Op0, Op1, Query (TD, TLI, DT), RecursionLimit); } /// SimplifyUDivInst - Given operands for a UDiv, see if we can /// fold the result. If not, this returns null. -static Value *SimplifyUDivInst(Value *Op0, Value *Op1, const TargetData *TD, - const TargetLibraryInfo *TLI, - const DominatorTree *DT, unsigned MaxRecurse) { - if (Value *V = SimplifyDiv(Instruction::UDiv, Op0, Op1, TD, TLI, DT, - MaxRecurse)) +static Value *SimplifyUDivInst(Value *Op0, Value *Op1, const Query &Q, + unsigned MaxRecurse) { + if (Value *V = SimplifyDiv(Instruction::UDiv, Op0, Op1, Q, MaxRecurse)) return V; return 0; @@ -970,12 +1057,11 @@ static Value *SimplifyUDivInst(Value *Op0, Value *Op1, const TargetData *TD, Value *llvm::SimplifyUDivInst(Value *Op0, Value *Op1, const TargetData *TD, const TargetLibraryInfo *TLI, const DominatorTree *DT) { - return ::SimplifyUDivInst(Op0, Op1, TD, TLI, DT, RecursionLimit); + return ::SimplifyUDivInst(Op0, Op1, Query (TD, TLI, DT), RecursionLimit); } -static Value *SimplifyFDivInst(Value *Op0, Value *Op1, const TargetData *, - const TargetLibraryInfo *, - const DominatorTree *, unsigned) { +static Value *SimplifyFDivInst(Value *Op0, Value *Op1, const Query &Q, + unsigned) { // undef / X -> undef (the undef could be a snan). if (match(Op0, m_Undef())) return Op0; @@ -990,18 +1076,17 @@ static Value *SimplifyFDivInst(Value *Op0, Value *Op1, const TargetData *, Value *llvm::SimplifyFDivInst(Value *Op0, Value *Op1, const TargetData *TD, const TargetLibraryInfo *TLI, const DominatorTree *DT) { - return ::SimplifyFDivInst(Op0, Op1, TD, TLI, DT, RecursionLimit); + return ::SimplifyFDivInst(Op0, Op1, Query (TD, TLI, DT), RecursionLimit); } /// SimplifyRem - Given operands for an SRem or URem, see if we can /// fold the result. If not, this returns null. static Value *SimplifyRem(Instruction::BinaryOps Opcode, Value *Op0, Value *Op1, - const TargetData *TD, const TargetLibraryInfo *TLI, - const DominatorTree *DT, unsigned MaxRecurse) { + const Query &Q, unsigned MaxRecurse) { if (Constant *C0 = dyn_cast<Constant>(Op0)) { if (Constant *C1 = dyn_cast<Constant>(Op1)) { Constant *Ops[] = { C0, C1 }; - return ConstantFoldInstOperands(Opcode, C0->getType(), Ops, TD, TLI); + return ConstantFoldInstOperands(Opcode, C0->getType(), Ops, Q.TD, Q.TLI); } } @@ -1036,13 +1121,13 @@ static Value *SimplifyRem(Instruction::BinaryOps Opcode, Value *Op0, Value *Op1, // If the operation is with the result of a select instruction, check whether // operating on either branch of the select always yields the same value. if (isa<SelectInst>(Op0) || isa<SelectInst>(Op1)) - if (Value *V = ThreadBinOpOverSelect(Opcode, Op0, Op1, TD, TLI, DT, MaxRecurse)) + if (Value *V = ThreadBinOpOverSelect(Opcode, Op0, Op1, Q, MaxRecurse)) return V; // If the operation is with the result of a phi instruction, check whether // operating on all incoming values of the phi always yields the same value. if (isa<PHINode>(Op0) || isa<PHINode>(Op1)) - if (Value *V = ThreadBinOpOverPHI(Opcode, Op0, Op1, TD, TLI, DT, MaxRecurse)) + if (Value *V = ThreadBinOpOverPHI(Opcode, Op0, Op1, Q, MaxRecurse)) return V; return 0; @@ -1050,11 +1135,9 @@ static Value *SimplifyRem(Instruction::BinaryOps Opcode, Value *Op0, Value *Op1, /// SimplifySRemInst - Given operands for an SRem, see if we can /// fold the result. If not, this returns null. -static Value *SimplifySRemInst(Value *Op0, Value *Op1, const TargetData *TD, - const TargetLibraryInfo *TLI, - const DominatorTree *DT, - unsigned MaxRecurse) { - if (Value *V = SimplifyRem(Instruction::SRem, Op0, Op1, TD, TLI, DT, MaxRecurse)) +static Value *SimplifySRemInst(Value *Op0, Value *Op1, const Query &Q, + unsigned MaxRecurse) { + if (Value *V = SimplifyRem(Instruction::SRem, Op0, Op1, Q, MaxRecurse)) return V; return 0; @@ -1063,16 +1146,14 @@ static Value *SimplifySRemInst(Value *Op0, Value *Op1, const TargetData *TD, Value *llvm::SimplifySRemInst(Value *Op0, Value *Op1, const TargetData *TD, const TargetLibraryInfo *TLI, const DominatorTree *DT) { - return ::SimplifySRemInst(Op0, Op1, TD, TLI, DT, RecursionLimit); + return ::SimplifySRemInst(Op0, Op1, Query (TD, TLI, DT), RecursionLimit); } /// SimplifyURemInst - Given operands for a URem, see if we can /// fold the result. If not, this returns null. -static Value *SimplifyURemInst(Value *Op0, Value *Op1, const TargetData *TD, - const TargetLibraryInfo *TLI, - const DominatorTree *DT, +static Value *SimplifyURemInst(Value *Op0, Value *Op1, const Query &Q, unsigned MaxRecurse) { - if (Value *V = SimplifyRem(Instruction::URem, Op0, Op1, TD, TLI, DT, MaxRecurse)) + if (Value *V = SimplifyRem(Instruction::URem, Op0, Op1, Q, MaxRecurse)) return V; return 0; @@ -1081,12 +1162,10 @@ static Value *SimplifyURemInst(Value *Op0, Value *Op1, const TargetData *TD, Value *llvm::SimplifyURemInst(Value *Op0, Value *Op1, const TargetData *TD, const TargetLibraryInfo *TLI, const DominatorTree *DT) { - return ::SimplifyURemInst(Op0, Op1, TD, TLI, DT, RecursionLimit); + return ::SimplifyURemInst(Op0, Op1, Query (TD, TLI, DT), RecursionLimit); } -static Value *SimplifyFRemInst(Value *Op0, Value *Op1, const TargetData *, - const TargetLibraryInfo *, - const DominatorTree *, +static Value *SimplifyFRemInst(Value *Op0, Value *Op1, const Query &, unsigned) { // undef % X -> undef (the undef could be a snan). if (match(Op0, m_Undef())) @@ -1102,18 +1181,17 @@ static Value *SimplifyFRemInst(Value *Op0, Value *Op1, const TargetData *, Value *llvm::SimplifyFRemInst(Value *Op0, Value *Op1, const TargetData *TD, const TargetLibraryInfo *TLI, const DominatorTree *DT) { - return ::SimplifyFRemInst(Op0, Op1, TD, TLI, DT, RecursionLimit); + return ::SimplifyFRemInst(Op0, Op1, Query (TD, TLI, DT), RecursionLimit); } /// SimplifyShift - Given operands for an Shl, LShr or AShr, see if we can /// fold the result. If not, this returns null. static Value *SimplifyShift(unsigned Opcode, Value *Op0, Value *Op1, - const TargetData *TD, const TargetLibraryInfo *TLI, - const DominatorTree *DT, unsigned MaxRecurse) { + const Query &Q, unsigned MaxRecurse) { if (Constant *C0 = dyn_cast<Constant>(Op0)) { if (Constant *C1 = dyn_cast<Constant>(Op1)) { Constant *Ops[] = { C0, C1 }; - return ConstantFoldInstOperands(Opcode, C0->getType(), Ops, TD, TLI); + return ConstantFoldInstOperands(Opcode, C0->getType(), Ops, Q.TD, Q.TLI); } } @@ -1138,13 +1216,13 @@ static Value *SimplifyShift(unsigned Opcode, Value *Op0, Value *Op1, // If the operation is with the result of a select instruction, check whether // operating on either branch of the select always yields the same value. if (isa<SelectInst>(Op0) || isa<SelectInst>(Op1)) - if (Value *V = ThreadBinOpOverSelect(Opcode, Op0, Op1, TD, TLI, DT, MaxRecurse)) + if (Value *V = ThreadBinOpOverSelect(Opcode, Op0, Op1, Q, MaxRecurse)) return V; // If the operation is with the result of a phi instruction, check whether // operating on all incoming values of the phi always yields the same value. if (isa<PHINode>(Op0) || isa<PHINode>(Op1)) - if (Value *V = ThreadBinOpOverPHI(Opcode, Op0, Op1, TD, TLI, DT, MaxRecurse)) + if (Value *V = ThreadBinOpOverPHI(Opcode, Op0, Op1, Q, MaxRecurse)) return V; return 0; @@ -1153,10 +1231,8 @@ static Value *SimplifyShift(unsigned Opcode, Value *Op0, Value *Op1, /// SimplifyShlInst - Given operands for an Shl, see if we can /// fold the result. If not, this returns null. static Value *SimplifyShlInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW, - const TargetData *TD, - const TargetLibraryInfo *TLI, - const DominatorTree *DT, unsigned MaxRecurse) { - if (Value *V = SimplifyShift(Instruction::Shl, Op0, Op1, TD, TLI, DT, MaxRecurse)) + const Query &Q, unsigned MaxRecurse) { + if (Value *V = SimplifyShift(Instruction::Shl, Op0, Op1, Q, MaxRecurse)) return V; // undef << X -> 0 @@ -1173,17 +1249,15 @@ static Value *SimplifyShlInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW, Value *llvm::SimplifyShlInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW, const TargetData *TD, const TargetLibraryInfo *TLI, const DominatorTree *DT) { - return ::SimplifyShlInst(Op0, Op1, isNSW, isNUW, TD, TLI, DT, RecursionLimit); + return ::SimplifyShlInst(Op0, Op1, isNSW, isNUW, Query (TD, TLI, DT), + RecursionLimit); } /// SimplifyLShrInst - Given operands for an LShr, see if we can /// fold the result. If not, this returns null. static Value *SimplifyLShrInst(Value *Op0, Value *Op1, bool isExact, - const TargetData *TD, - const TargetLibraryInfo *TLI, - const DominatorTree *DT, - unsigned MaxRecurse) { - if (Value *V = SimplifyShift(Instruction::LShr, Op0, Op1, TD, TLI, DT, MaxRecurse)) + const Query &Q, unsigned MaxRecurse) { + if (Value *V = SimplifyShift(Instruction::LShr, Op0, Op1, Q, MaxRecurse)) return V; // undef >>l X -> 0 @@ -1203,17 +1277,15 @@ Value *llvm::SimplifyLShrInst(Value *Op0, Value *Op1, bool isExact, const TargetData *TD, const TargetLibraryInfo *TLI, const DominatorTree *DT) { - return ::SimplifyLShrInst(Op0, Op1, isExact, TD, TLI, DT, RecursionLimit); + return ::SimplifyLShrInst(Op0, Op1, isExact, Query (TD, TLI, DT), + RecursionLimit); } /// SimplifyAShrInst - Given operands for an AShr, see if we can /// fold the result. If not, this returns null. static Value *SimplifyAShrInst(Value *Op0, Value *Op1, bool isExact, - const TargetData *TD, - const TargetLibraryInfo *TLI, - const DominatorTree *DT, - unsigned MaxRecurse) { - if (Value *V = SimplifyShift(Instruction::AShr, Op0, Op1, TD, TLI, DT, MaxRecurse)) + const Query &Q, unsigned MaxRecurse) { + if (Value *V = SimplifyShift(Instruction::AShr, Op0, Op1, Q, MaxRecurse)) return V; // all ones >>a X -> all ones @@ -1237,20 +1309,19 @@ Value *llvm::SimplifyAShrInst(Value *Op0, Value *Op1, bool isExact, const TargetData *TD, const TargetLibraryInfo *TLI, const DominatorTree *DT) { - return ::SimplifyAShrInst(Op0, Op1, isExact, TD, TLI, DT, RecursionLimit); + return ::SimplifyAShrInst(Op0, Op1, isExact, Query (TD, TLI, DT), + RecursionLimit); } /// SimplifyAndInst - Given operands for an And, see if we can /// fold the result. If not, this returns null. -static Value *SimplifyAndInst(Value *Op0, Value *Op1, const TargetData *TD, - const TargetLibraryInfo *TLI, - const DominatorTree *DT, +static Value *SimplifyAndInst(Value *Op0, Value *Op1, const Query &Q, unsigned MaxRecurse) { if (Constant *CLHS = dyn_cast<Constant>(Op0)) { if (Constant *CRHS = dyn_cast<Constant>(Op1)) { Constant *Ops[] = { CLHS, CRHS }; return ConstantFoldInstOperands(Instruction::And, CLHS->getType(), - Ops, TD, TLI); + Ops, Q.TD, Q.TLI); } // Canonicalize the constant to the RHS. @@ -1292,43 +1363,43 @@ static Value *SimplifyAndInst(Value *Op0, Value *Op1, const TargetData *TD, // A & (-A) = A if A is a power of two or zero. if (match(Op0, m_Neg(m_Specific(Op1))) || match(Op1, m_Neg(m_Specific(Op0)))) { - if (isPowerOfTwo(Op0, TD, /*OrZero*/true)) + if (isPowerOfTwo(Op0, Q.TD, /*OrZero*/true)) return Op0; - if (isPowerOfTwo(Op1, TD, /*OrZero*/true)) + if (isPowerOfTwo(Op1, Q.TD, /*OrZero*/true)) return Op1; } // Try some generic simplifications for associative operations. - if (Value *V = SimplifyAssociativeBinOp(Instruction::And, Op0, Op1, TD, TLI, - DT, MaxRecurse)) + if (Value *V = SimplifyAssociativeBinOp(Instruction::And, Op0, Op1, Q, + MaxRecurse)) return V; // And distributes over Or. Try some generic simplifications based on this. if (Value *V = ExpandBinOp(Instruction::And, Op0, Op1, Instruction::Or, - TD, TLI, DT, MaxRecurse)) + Q, MaxRecurse)) return V; // And distributes over Xor. Try some generic simplifications based on this. if (Value *V = ExpandBinOp(Instruction::And, Op0, Op1, Instruction::Xor, - TD, TLI, DT, MaxRecurse)) + Q, MaxRecurse)) return V; // Or distributes over And. Try some generic simplifications based on this. if (Value *V = FactorizeBinOp(Instruction::And, Op0, Op1, Instruction::Or, - TD, TLI, DT, MaxRecurse)) + Q, MaxRecurse)) return V; // If the operation is with the result of a select instruction, check whether // operating on either branch of the select always yields the same value. if (isa<SelectInst>(Op0) || isa<SelectInst>(Op1)) - if (Value *V = ThreadBinOpOverSelect(Instruction::And, Op0, Op1, TD, TLI, - DT, MaxRecurse)) + if (Value *V = ThreadBinOpOverSelect(Instruction::And, Op0, Op1, Q, + MaxRecurse)) return V; // If the operation is with the result of a phi instruction, check whether // operating on all incoming values of the phi always yields the same value. if (isa<PHINode>(Op0) || isa<PHINode>(Op1)) - if (Value *V = ThreadBinOpOverPHI(Instruction::And, Op0, Op1, TD, TLI, DT, + if (Value *V = ThreadBinOpOverPHI(Instruction::And, Op0, Op1, Q, MaxRecurse)) return V; @@ -1338,19 +1409,18 @@ static Value *SimplifyAndInst(Value *Op0, Value *Op1, const TargetData *TD, Value *llvm::SimplifyAndInst(Value *Op0, Value *Op1, const TargetData *TD, const TargetLibraryInfo *TLI, const DominatorTree *DT) { - return ::SimplifyAndInst(Op0, Op1, TD, TLI, DT, RecursionLimit); + return ::SimplifyAndInst(Op0, Op1, Query (TD, TLI, DT), RecursionLimit); } /// SimplifyOrInst - Given operands for an Or, see if we can /// fold the result. If not, this returns null. -static Value *SimplifyOrInst(Value *Op0, Value *Op1, const TargetData *TD, - const TargetLibraryInfo *TLI, - const DominatorTree *DT, unsigned MaxRecurse) { +static Value *SimplifyOrInst(Value *Op0, Value *Op1, const Query &Q, + unsigned MaxRecurse) { if (Constant *CLHS = dyn_cast<Constant>(Op0)) { if (Constant *CRHS = dyn_cast<Constant>(Op1)) { Constant *Ops[] = { CLHS, CRHS }; return ConstantFoldInstOperands(Instruction::Or, CLHS->getType(), - Ops, TD, TLI); + Ops, Q.TD, Q.TLI); } // Canonicalize the constant to the RHS. @@ -1400,32 +1470,31 @@ static Value *SimplifyOrInst(Value *Op0, Value *Op1, const TargetData *TD, return Constant::getAllOnesValue(Op0->getType()); // Try some generic simplifications for associative operations. - if (Value *V = SimplifyAssociativeBinOp(Instruction::Or, Op0, Op1, TD, TLI, - DT, MaxRecurse)) + if (Value *V = SimplifyAssociativeBinOp(Instruction::Or, Op0, Op1, Q, + MaxRecurse)) return V; // Or distributes over And. Try some generic simplifications based on this. - if (Value *V = ExpandBinOp(Instruction::Or, Op0, Op1, Instruction::And, TD, - TLI, DT, MaxRecurse)) + if (Value *V = ExpandBinOp(Instruction::Or, Op0, Op1, Instruction::And, Q, + MaxRecurse)) return V; // And distributes over Or. Try some generic simplifications based on this. if (Value *V = FactorizeBinOp(Instruction::Or, Op0, Op1, Instruction::And, - TD, TLI, DT, MaxRecurse)) + Q, MaxRecurse)) return V; // If the operation is with the result of a select instruction, check whether // operating on either branch of the select always yields the same value. if (isa<SelectInst>(Op0) || isa<SelectInst>(Op1)) - if (Value *V = ThreadBinOpOverSelect(Instruction::Or, Op0, Op1, TD, TLI, DT, + if (Value *V = ThreadBinOpOverSelect(Instruction::Or, Op0, Op1, Q, MaxRecurse)) return V; // If the operation is with the result of a phi instruction, check whether // operating on all incoming values of the phi always yields the same value. if (isa<PHINode>(Op0) || isa<PHINode>(Op1)) - if (Value *V = ThreadBinOpOverPHI(Instruction::Or, Op0, Op1, TD, TLI, DT, - MaxRecurse)) + if (Value *V = ThreadBinOpOverPHI(Instruction::Or, Op0, Op1, Q, MaxRecurse)) return V; return 0; @@ -1434,19 +1503,18 @@ static Value *SimplifyOrInst(Value *Op0, Value *Op1, const TargetData *TD, Value *llvm::SimplifyOrInst(Value *Op0, Value *Op1, const TargetData *TD, const TargetLibraryInfo *TLI, const DominatorTree *DT) { - return ::SimplifyOrInst(Op0, Op1, TD, TLI, DT, RecursionLimit); + return ::SimplifyOrInst(Op0, Op1, Query (TD, TLI, DT), RecursionLimit); } /// SimplifyXorInst - Given operands for a Xor, see if we can /// fold the result. If not, this returns null. -static Value *SimplifyXorInst(Value *Op0, Value *Op1, const TargetData *TD, - const TargetLibraryInfo *TLI, - const DominatorTree *DT, unsigned MaxRecurse) { +static Value *SimplifyXorInst(Value *Op0, Value *Op1, const Query &Q, + unsigned MaxRecurse) { if (Constant *CLHS = dyn_cast<Constant>(Op0)) { if (Constant *CRHS = dyn_cast<Constant>(Op1)) { Constant *Ops[] = { CLHS, CRHS }; return ConstantFoldInstOperands(Instruction::Xor, CLHS->getType(), - Ops, TD, TLI); + Ops, Q.TD, Q.TLI); } // Canonicalize the constant to the RHS. @@ -1471,13 +1539,13 @@ static Value *SimplifyXorInst(Value *Op0, Value *Op1, const TargetData *TD, return Constant::getAllOnesValue(Op0->getType()); // Try some generic simplifications for associative operations. - if (Value *V = SimplifyAssociativeBinOp(Instruction::Xor, Op0, Op1, TD, TLI, - DT, MaxRecurse)) + if (Value *V = SimplifyAssociativeBinOp(Instruction::Xor, Op0, Op1, Q, + MaxRecurse)) return V; // And distributes over Xor. Try some generic simplifications based on this. if (Value *V = FactorizeBinOp(Instruction::Xor, Op0, Op1, Instruction::And, - TD, TLI, DT, MaxRecurse)) + Q, MaxRecurse)) return V; // Threading Xor over selects and phi nodes is pointless, so don't bother. @@ -1495,7 +1563,7 @@ static Value *SimplifyXorInst(Value *Op0, Value *Op1, const TargetData *TD, Value *llvm::SimplifyXorInst(Value *Op0, Value *Op1, const TargetData *TD, const TargetLibraryInfo *TLI, const DominatorTree *DT) { - return ::SimplifyXorInst(Op0, Op1, TD, TLI, DT, RecursionLimit); + return ::SimplifyXorInst(Op0, Op1, Query (TD, TLI, DT), RecursionLimit); } static Type *GetCompareTy(Value *Op) { @@ -1522,42 +1590,17 @@ static Value *ExtractEquivalentCondition(Value *V, CmpInst::Predicate Pred, return 0; } -/// stripPointerAdjustments - This is like Value::stripPointerCasts, but also -/// removes inbounds gep operations, regardless of their indices. -static Value *stripPointerAdjustmentsImpl(Value *V, - SmallPtrSet<GEPOperator*, 8> &VisitedGEPs) { - GEPOperator *GEP = dyn_cast<GEPOperator>(V); - if (GEP == 0 || !GEP->isInBounds()) - return V; - - // If we've already seen this GEP, we will end up infinitely looping. This - // can happen in unreachable code. - if (!VisitedGEPs.insert(GEP)) - return V; - - return stripPointerAdjustmentsImpl(GEP->getOperand(0)->stripPointerCasts(), - VisitedGEPs); -} - -static Value *stripPointerAdjustments(Value *V) { - SmallPtrSet<GEPOperator*, 8> VisitedGEPs; - return stripPointerAdjustmentsImpl(V, VisitedGEPs); -} - /// SimplifyICmpInst - Given operands for an ICmpInst, see if we can /// fold the result. If not, this returns null. static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS, - const TargetData *TD, - const TargetLibraryInfo *TLI, - const DominatorTree *DT, - unsigned MaxRecurse) { + const Query &Q, unsigned MaxRecurse) { CmpInst::Predicate Pred = (CmpInst::Predicate)Predicate; assert(CmpInst::isIntPredicate(Pred) && "Not an integer compare!"); if (Constant *CLHS = dyn_cast<Constant>(LHS)) { if (Constant *CRHS = dyn_cast<Constant>(RHS)) - return ConstantFoldCompareInstOperands(Pred, CLHS, CRHS, TD, TLI); + return ConstantFoldCompareInstOperands(Pred, CLHS, CRHS, Q.TD, Q.TLI); // If we have a constant, make sure it is on the RHS. std::swap(LHS, RHS); @@ -1625,29 +1668,39 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS, // Be more aggressive about stripping pointer adjustments when checking a // comparison of an alloca address to another object. We can rip off all // inbounds GEP operations, even if they are variable. - LHSPtr = stripPointerAdjustments(LHSPtr); + LHSPtr = LHSPtr->stripInBoundsOffsets(); if (llvm::isIdentifiedObject(LHSPtr)) { - RHSPtr = stripPointerAdjustments(RHSPtr); + RHSPtr = RHSPtr->stripInBoundsOffsets(); if (llvm::isKnownNonNull(LHSPtr) || llvm::isKnownNonNull(RHSPtr)) { // If both sides are different identified objects, they aren't equal // unless they're null. - if (LHSPtr != RHSPtr && llvm::isIdentifiedObject(RHSPtr)) - return ConstantInt::get(ITy, CmpInst::isFalseWhenEqual(Pred)); + if (LHSPtr != RHSPtr && llvm::isIdentifiedObject(RHSPtr) && + Pred == CmpInst::ICMP_EQ) + return ConstantInt::get(ITy, false); // A local identified object (alloca or noalias call) can't equal any // incoming argument, unless they're both null. - if (isa<Instruction>(LHSPtr) && isa<Argument>(RHSPtr)) - return ConstantInt::get(ITy, CmpInst::isFalseWhenEqual(Pred)); + if (isa<Instruction>(LHSPtr) && isa<Argument>(RHSPtr) && + Pred == CmpInst::ICMP_EQ) + return ConstantInt::get(ITy, false); } // Assume that the constant null is on the right. - if (llvm::isKnownNonNull(LHSPtr) && isa<ConstantPointerNull>(RHSPtr)) - return ConstantInt::get(ITy, CmpInst::isFalseWhenEqual(Pred)); + if (llvm::isKnownNonNull(LHSPtr) && isa<ConstantPointerNull>(RHSPtr)) { + if (Pred == CmpInst::ICMP_EQ) + return ConstantInt::get(ITy, false); + else if (Pred == CmpInst::ICMP_NE) + return ConstantInt::get(ITy, true); + } } else if (isa<Argument>(LHSPtr)) { - RHSPtr = stripPointerAdjustments(RHSPtr); + RHSPtr = RHSPtr->stripInBoundsOffsets(); // An alloca can't be equal to an argument. - if (isa<AllocaInst>(RHSPtr)) - return ConstantInt::get(ITy, CmpInst::isFalseWhenEqual(Pred)); + if (isa<AllocaInst>(RHSPtr)) { + if (Pred == CmpInst::ICMP_EQ) + return ConstantInt::get(ITy, false); + else if (Pred == CmpInst::ICMP_NE) + return ConstantInt::get(ITy, true); + } } // If we are comparing with zero then try hard since this is a common case. @@ -1661,40 +1714,40 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS, return getTrue(ITy); case ICmpInst::ICMP_EQ: case ICmpInst::ICMP_ULE: - if (isKnownNonZero(LHS, TD)) + if (isKnownNonZero(LHS, Q.TD)) return getFalse(ITy); break; case ICmpInst::ICMP_NE: case ICmpInst::ICMP_UGT: - if (isKnownNonZero(LHS, TD)) + if (isKnownNonZero(LHS, Q.TD)) return getTrue(ITy); break; case ICmpInst::ICMP_SLT: - ComputeSignBit(LHS, LHSKnownNonNegative, LHSKnownNegative, TD); + ComputeSignBit(LHS, LHSKnownNonNegative, LHSKnownNegative, Q.TD); if (LHSKnownNegative) return getTrue(ITy); if (LHSKnownNonNegative) return getFalse(ITy); break; case ICmpInst::ICMP_SLE: - ComputeSignBit(LHS, LHSKnownNonNegative, LHSKnownNegative, TD); + ComputeSignBit(LHS, LHSKnownNonNegative, LHSKnownNegative, Q.TD); if (LHSKnownNegative) return getTrue(ITy); - if (LHSKnownNonNegative && isKnownNonZero(LHS, TD)) + if (LHSKnownNonNegative && isKnownNonZero(LHS, Q.TD)) return getFalse(ITy); break; case ICmpInst::ICMP_SGE: - ComputeSignBit(LHS, LHSKnownNonNegative, LHSKnownNegative, TD); + ComputeSignBit(LHS, LHSKnownNonNegative, LHSKnownNegative, Q.TD); if (LHSKnownNegative) return getFalse(ITy); if (LHSKnownNonNegative) return getTrue(ITy); break; case ICmpInst::ICMP_SGT: - ComputeSignBit(LHS, LHSKnownNonNegative, LHSKnownNegative, TD); + ComputeSignBit(LHS, LHSKnownNonNegative, LHSKnownNegative, Q.TD); if (LHSKnownNegative) return getFalse(ITy); - if (LHSKnownNonNegative && isKnownNonZero(LHS, TD)) + if (LHSKnownNonNegative && isKnownNonZero(LHS, Q.TD)) return getTrue(ITy); break; } @@ -1777,19 +1830,19 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS, // Turn icmp (ptrtoint x), (ptrtoint/constant) into a compare of the input // if the integer type is the same size as the pointer type. - if (MaxRecurse && TD && isa<PtrToIntInst>(LI) && - TD->getPointerSizeInBits() == DstTy->getPrimitiveSizeInBits()) { + if (MaxRecurse && Q.TD && isa<PtrToIntInst>(LI) && + Q.TD->getPointerSizeInBits() == DstTy->getPrimitiveSizeInBits()) { if (Constant *RHSC = dyn_cast<Constant>(RHS)) { // Transfer the cast to the constant. if (Value *V = SimplifyICmpInst(Pred, SrcOp, ConstantExpr::getIntToPtr(RHSC, SrcTy), - TD, TLI, DT, MaxRecurse-1)) + Q, MaxRecurse-1)) return V; } else if (PtrToIntInst *RI = dyn_cast<PtrToIntInst>(RHS)) { if (RI->getOperand(0)->getType() == SrcTy) // Compare without the cast. if (Value *V = SimplifyICmpInst(Pred, SrcOp, RI->getOperand(0), - TD, TLI, DT, MaxRecurse-1)) + Q, MaxRecurse-1)) return V; } } @@ -1801,7 +1854,7 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS, if (MaxRecurse && SrcTy == RI->getOperand(0)->getType()) // Compare X and Y. Note that signed predicates become unsigned. if (Value *V = SimplifyICmpInst(ICmpInst::getUnsignedPredicate(Pred), - SrcOp, RI->getOperand(0), TD, TLI, DT, + SrcOp, RI->getOperand(0), Q, MaxRecurse-1)) return V; } @@ -1817,7 +1870,7 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS, // also a case of comparing two zero-extended values. if (RExt == CI && MaxRecurse) if (Value *V = SimplifyICmpInst(ICmpInst::getUnsignedPredicate(Pred), - SrcOp, Trunc, TD, TLI, DT, MaxRecurse-1)) + SrcOp, Trunc, Q, MaxRecurse-1)) return V; // Otherwise the upper bits of LHS are zero while RHS has a non-zero bit @@ -1861,7 +1914,7 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS, if (MaxRecurse && SrcTy == RI->getOperand(0)->getType()) // Compare X and Y. Note that the predicate does not change. if (Value *V = SimplifyICmpInst(Pred, SrcOp, RI->getOperand(0), - TD, TLI, DT, MaxRecurse-1)) + Q, MaxRecurse-1)) return V; } // Turn icmp (sext X), Cst into a compare of X and Cst if Cst is extended @@ -1875,8 +1928,7 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS, // If the re-extended constant didn't change then this is effectively // also a case of comparing two sign-extended values. if (RExt == CI && MaxRecurse) - if (Value *V = SimplifyICmpInst(Pred, SrcOp, Trunc, TD, TLI, DT, - MaxRecurse-1)) + if (Value *V = SimplifyICmpInst(Pred, SrcOp, Trunc, Q, MaxRecurse-1)) return V; // Otherwise the upper bits of LHS are all equal, while RHS has varying @@ -1910,7 +1962,7 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS, if (MaxRecurse) if (Value *V = SimplifyICmpInst(ICmpInst::ICMP_SLT, SrcOp, Constant::getNullValue(SrcTy), - TD, TLI, DT, MaxRecurse-1)) + Q, MaxRecurse-1)) return V; break; case ICmpInst::ICMP_ULT: @@ -1919,7 +1971,7 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS, if (MaxRecurse) if (Value *V = SimplifyICmpInst(ICmpInst::ICMP_SGE, SrcOp, Constant::getNullValue(SrcTy), - TD, TLI, DT, MaxRecurse-1)) + Q, MaxRecurse-1)) return V; break; } @@ -1953,14 +2005,14 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS, if ((A == RHS || B == RHS) && NoLHSWrapProblem) if (Value *V = SimplifyICmpInst(Pred, A == RHS ? B : A, Constant::getNullValue(RHS->getType()), - TD, TLI, DT, MaxRecurse-1)) + Q, MaxRecurse-1)) return V; // icmp X, (X+Y) -> icmp 0, Y for equalities or if there is no overflow. if ((C == LHS || D == LHS) && NoRHSWrapProblem) if (Value *V = SimplifyICmpInst(Pred, Constant::getNullValue(LHS->getType()), - C == LHS ? D : C, TD, TLI, DT, MaxRecurse-1)) + C == LHS ? D : C, Q, MaxRecurse-1)) return V; // icmp (X+Y), (X+Z) -> icmp Y,Z for equalities or if there is no overflow. @@ -1969,7 +2021,7 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS, // Determine Y and Z in the form icmp (X+Y), (X+Z). Value *Y = (A == C || A == D) ? B : A; Value *Z = (C == A || C == B) ? D : C; - if (Value *V = SimplifyICmpInst(Pred, Y, Z, TD, TLI, DT, MaxRecurse-1)) + if (Value *V = SimplifyICmpInst(Pred, Y, Z, Q, MaxRecurse-1)) return V; } } @@ -1981,7 +2033,7 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS, break; case ICmpInst::ICMP_SGT: case ICmpInst::ICMP_SGE: - ComputeSignBit(LHS, KnownNonNegative, KnownNegative, TD); + ComputeSignBit(LHS, KnownNonNegative, KnownNegative, Q.TD); if (!KnownNonNegative) break; // fall-through @@ -1991,7 +2043,7 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS, return getFalse(ITy); case ICmpInst::ICMP_SLT: case ICmpInst::ICMP_SLE: - ComputeSignBit(LHS, KnownNonNegative, KnownNegative, TD); + ComputeSignBit(LHS, KnownNonNegative, KnownNegative, Q.TD); if (!KnownNonNegative) break; // fall-through @@ -2008,7 +2060,7 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS, break; case ICmpInst::ICMP_SGT: case ICmpInst::ICMP_SGE: - ComputeSignBit(RHS, KnownNonNegative, KnownNegative, TD); + ComputeSignBit(RHS, KnownNonNegative, KnownNegative, Q.TD); if (!KnownNonNegative) break; // fall-through @@ -2018,7 +2070,7 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS, return getTrue(ITy); case ICmpInst::ICMP_SLT: case ICmpInst::ICMP_SLE: - ComputeSignBit(RHS, KnownNonNegative, KnownNegative, TD); + ComputeSignBit(RHS, KnownNonNegative, KnownNegative, Q.TD); if (!KnownNonNegative) break; // fall-through @@ -2052,7 +2104,7 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS, if (!LBO->isExact() || !RBO->isExact()) break; if (Value *V = SimplifyICmpInst(Pred, LBO->getOperand(0), - RBO->getOperand(0), TD, TLI, DT, MaxRecurse-1)) + RBO->getOperand(0), Q, MaxRecurse-1)) return V; break; case Instruction::Shl: { @@ -2063,7 +2115,7 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS, if (!NSW && ICmpInst::isSigned(Pred)) break; if (Value *V = SimplifyICmpInst(Pred, LBO->getOperand(0), - RBO->getOperand(0), TD, TLI, DT, MaxRecurse-1)) + RBO->getOperand(0), Q, MaxRecurse-1)) return V; break; } @@ -2117,7 +2169,7 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS, return V; // Otherwise, see if "A EqP B" simplifies. if (MaxRecurse) - if (Value *V = SimplifyICmpInst(EqP, A, B, TD, TLI, DT, MaxRecurse-1)) + if (Value *V = SimplifyICmpInst(EqP, A, B, Q, MaxRecurse-1)) return V; break; case CmpInst::ICMP_NE: @@ -2131,7 +2183,7 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS, return V; // Otherwise, see if "A InvEqP B" simplifies. if (MaxRecurse) - if (Value *V = SimplifyICmpInst(InvEqP, A, B, TD, TLI, DT, MaxRecurse-1)) + if (Value *V = SimplifyICmpInst(InvEqP, A, B, Q, MaxRecurse-1)) return V; break; } @@ -2187,7 +2239,7 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS, return V; // Otherwise, see if "A EqP B" simplifies. if (MaxRecurse) - if (Value *V = SimplifyICmpInst(EqP, A, B, TD, TLI, DT, MaxRecurse-1)) + if (Value *V = SimplifyICmpInst(EqP, A, B, Q, MaxRecurse-1)) return V; break; case CmpInst::ICMP_NE: @@ -2201,7 +2253,7 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS, return V; // Otherwise, see if "A InvEqP B" simplifies. if (MaxRecurse) - if (Value *V = SimplifyICmpInst(InvEqP, A, B, TD, TLI, DT, MaxRecurse-1)) + if (Value *V = SimplifyICmpInst(InvEqP, A, B, Q, MaxRecurse-1)) return V; break; } @@ -2283,13 +2335,13 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS, // If the comparison is with the result of a select instruction, check whether // comparing with either branch of the select always yields the same value. if (isa<SelectInst>(LHS) || isa<SelectInst>(RHS)) - if (Value *V = ThreadCmpOverSelect(Pred, LHS, RHS, TD, TLI, DT, MaxRecurse)) + if (Value *V = ThreadCmpOverSelect(Pred, LHS, RHS, Q, MaxRecurse)) return V; // If the comparison is with the result of a phi instruction, check whether // doing the compare with each incoming phi value yields a common result. if (isa<PHINode>(LHS) || isa<PHINode>(RHS)) - if (Value *V = ThreadCmpOverPHI(Pred, LHS, RHS, TD, TLI, DT, MaxRecurse)) + if (Value *V = ThreadCmpOverPHI(Pred, LHS, RHS, Q, MaxRecurse)) return V; return 0; @@ -2299,22 +2351,20 @@ Value *llvm::SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS, const TargetData *TD, const TargetLibraryInfo *TLI, const DominatorTree *DT) { - return ::SimplifyICmpInst(Predicate, LHS, RHS, TD, TLI, DT, RecursionLimit); + return ::SimplifyICmpInst(Predicate, LHS, RHS, Query (TD, TLI, DT), + RecursionLimit); } /// SimplifyFCmpInst - Given operands for an FCmpInst, see if we can /// fold the result. If not, this returns null. static Value *SimplifyFCmpInst(unsigned Predicate, Value *LHS, Value *RHS, - const TargetData *TD, - const TargetLibraryInfo *TLI, - const DominatorTree *DT, - unsigned MaxRecurse) { + const Query &Q, unsigned MaxRecurse) { CmpInst::Predicate Pred = (CmpInst::Predicate)Predicate; assert(CmpInst::isFPPredicate(Pred) && "Not an FP compare!"); if (Constant *CLHS = dyn_cast<Constant>(LHS)) { if (Constant *CRHS = dyn_cast<Constant>(RHS)) - return ConstantFoldCompareInstOperands(Pred, CLHS, CRHS, TD, TLI); + return ConstantFoldCompareInstOperands(Pred, CLHS, CRHS, Q.TD, Q.TLI); // If we have a constant, make sure it is on the RHS. std::swap(LHS, RHS); @@ -2382,13 +2432,13 @@ static Value *SimplifyFCmpInst(unsigned Predicate, Value *LHS, Value *RHS, // If the comparison is with the result of a select instruction, check whether // comparing with either branch of the select always yields the same value. if (isa<SelectInst>(LHS) || isa<SelectInst>(RHS)) - if (Value *V = ThreadCmpOverSelect(Pred, LHS, RHS, TD, TLI, DT, MaxRecurse)) + if (Value *V = ThreadCmpOverSelect(Pred, LHS, RHS, Q, MaxRecurse)) return V; // If the comparison is with the result of a phi instruction, check whether // doing the compare with each incoming phi value yields a common result. if (isa<PHINode>(LHS) || isa<PHINode>(RHS)) - if (Value *V = ThreadCmpOverPHI(Pred, LHS, RHS, TD, TLI, DT, MaxRecurse)) + if (Value *V = ThreadCmpOverPHI(Pred, LHS, RHS, Q, MaxRecurse)) return V; return 0; @@ -2398,13 +2448,15 @@ Value *llvm::SimplifyFCmpInst(unsigned Predicate, Value *LHS, Value *RHS, const TargetData *TD, const TargetLibraryInfo *TLI, const DominatorTree *DT) { - return ::SimplifyFCmpInst(Predicate, LHS, RHS, TD, TLI, DT, RecursionLimit); + return ::SimplifyFCmpInst(Predicate, LHS, RHS, Query (TD, TLI, DT), + RecursionLimit); } /// SimplifySelectInst - Given operands for a SelectInst, see if we can fold /// the result. If not, this returns null. -Value *llvm::SimplifySelectInst(Value *CondVal, Value *TrueVal, Value *FalseVal, - const TargetData *TD, const DominatorTree *) { +static Value *SimplifySelectInst(Value *CondVal, Value *TrueVal, + Value *FalseVal, const Query &Q, + unsigned MaxRecurse) { // select true, X, Y -> X // select false, X, Y -> Y if (ConstantInt *CB = dyn_cast<ConstantInt>(CondVal)) @@ -2427,10 +2479,17 @@ Value *llvm::SimplifySelectInst(Value *CondVal, Value *TrueVal, Value *FalseVal, return 0; } +Value *llvm::SimplifySelectInst(Value *Cond, Value *TrueVal, Value *FalseVal, + const TargetData *TD, + const TargetLibraryInfo *TLI, + const DominatorTree *DT) { + return ::SimplifySelectInst(Cond, TrueVal, FalseVal, Query (TD, TLI, DT), + RecursionLimit); +} + /// SimplifyGEPInst - Given operands for an GetElementPtrInst, see if we can /// fold the result. If not, this returns null. -Value *llvm::SimplifyGEPInst(ArrayRef<Value *> Ops, const TargetData *TD, - const DominatorTree *) { +static Value *SimplifyGEPInst(ArrayRef<Value *> Ops, const Query &Q, unsigned) { // The type of the GEP pointer operand. PointerType *PtrTy = dyn_cast<PointerType>(Ops[0]->getType()); // The GEP pointer operand is not a pointer, it's a vector of pointers. @@ -2454,9 +2513,9 @@ Value *llvm::SimplifyGEPInst(ArrayRef<Value *> Ops, const TargetData *TD, if (C->isZero()) return Ops[0]; // getelementptr P, N -> P if P points to a type of zero size. - if (TD) { + if (Q.TD) { Type *Ty = PtrTy->getElementType(); - if (Ty->isSized() && TD->getTypeAllocSize(Ty) == 0) + if (Ty->isSized() && Q.TD->getTypeAllocSize(Ty) == 0) return Ops[0]; } } @@ -2469,12 +2528,17 @@ Value *llvm::SimplifyGEPInst(ArrayRef<Value *> Ops, const TargetData *TD, return ConstantExpr::getGetElementPtr(cast<Constant>(Ops[0]), Ops.slice(1)); } +Value *llvm::SimplifyGEPInst(ArrayRef<Value *> Ops, const TargetData *TD, + const TargetLibraryInfo *TLI, + const DominatorTree *DT) { + return ::SimplifyGEPInst(Ops, Query (TD, TLI, DT), RecursionLimit); +} + /// SimplifyInsertValueInst - Given operands for an InsertValueInst, see if we /// can fold the result. If not, this returns null. -Value *llvm::SimplifyInsertValueInst(Value *Agg, Value *Val, - ArrayRef<unsigned> Idxs, - const TargetData *, - const DominatorTree *) { +static Value *SimplifyInsertValueInst(Value *Agg, Value *Val, + ArrayRef<unsigned> Idxs, const Query &Q, + unsigned) { if (Constant *CAgg = dyn_cast<Constant>(Agg)) if (Constant *CVal = dyn_cast<Constant>(Val)) return ConstantFoldInsertValueInstruction(CAgg, CVal, Idxs); @@ -2499,8 +2563,17 @@ Value *llvm::SimplifyInsertValueInst(Value *Agg, Value *Val, return 0; } +Value *llvm::SimplifyInsertValueInst(Value *Agg, Value *Val, + ArrayRef<unsigned> Idxs, + const TargetData *TD, + const TargetLibraryInfo *TLI, + const DominatorTree *DT) { + return ::SimplifyInsertValueInst(Agg, Val, Idxs, Query (TD, TLI, DT), + RecursionLimit); +} + /// SimplifyPHINode - See if we can fold the given phi. If not, returns null. -static Value *SimplifyPHINode(PHINode *PN, const DominatorTree *DT) { +static Value *SimplifyPHINode(PHINode *PN, const Query &Q) { // If all of the PHI's incoming values are the same then replace the PHI node // with the common value. Value *CommonValue = 0; @@ -2528,81 +2601,77 @@ static Value *SimplifyPHINode(PHINode *PN, const DominatorTree *DT) { // instruction, we cannot return X as the result of the PHI node unless it // dominates the PHI block. if (HasUndefInput) - return ValueDominatesPHI(CommonValue, PN, DT) ? CommonValue : 0; + return ValueDominatesPHI(CommonValue, PN, Q.DT) ? CommonValue : 0; return CommonValue; } +static Value *SimplifyTruncInst(Value *Op, Type *Ty, const Query &Q, unsigned) { + if (Constant *C = dyn_cast<Constant>(Op)) + return ConstantFoldInstOperands(Instruction::Trunc, Ty, C, Q.TD, Q.TLI); + + return 0; +} + +Value *llvm::SimplifyTruncInst(Value *Op, Type *Ty, const TargetData *TD, + const TargetLibraryInfo *TLI, + const DominatorTree *DT) { + return ::SimplifyTruncInst(Op, Ty, Query (TD, TLI, DT), RecursionLimit); +} + //=== Helper functions for higher up the class hierarchy. /// SimplifyBinOp - Given operands for a BinaryOperator, see if we can /// fold the result. If not, this returns null. static Value *SimplifyBinOp(unsigned Opcode, Value *LHS, Value *RHS, - const TargetData *TD, - const TargetLibraryInfo *TLI, - const DominatorTree *DT, - unsigned MaxRecurse) { + const Query &Q, unsigned MaxRecurse) { switch (Opcode) { case Instruction::Add: return SimplifyAddInst(LHS, RHS, /*isNSW*/false, /*isNUW*/false, - TD, TLI, DT, MaxRecurse); + Q, MaxRecurse); case Instruction::Sub: return SimplifySubInst(LHS, RHS, /*isNSW*/false, /*isNUW*/false, - TD, TLI, DT, MaxRecurse); - case Instruction::Mul: return SimplifyMulInst (LHS, RHS, TD, TLI, DT, - MaxRecurse); - case Instruction::SDiv: return SimplifySDivInst(LHS, RHS, TD, TLI, DT, - MaxRecurse); - case Instruction::UDiv: return SimplifyUDivInst(LHS, RHS, TD, TLI, DT, - MaxRecurse); - case Instruction::FDiv: return SimplifyFDivInst(LHS, RHS, TD, TLI, DT, - MaxRecurse); - case Instruction::SRem: return SimplifySRemInst(LHS, RHS, TD, TLI, DT, - MaxRecurse); - case Instruction::URem: return SimplifyURemInst(LHS, RHS, TD, TLI, DT, - MaxRecurse); - case Instruction::FRem: return SimplifyFRemInst(LHS, RHS, TD, TLI, DT, - MaxRecurse); + Q, MaxRecurse); + case Instruction::Mul: return SimplifyMulInst (LHS, RHS, Q, MaxRecurse); + case Instruction::SDiv: return SimplifySDivInst(LHS, RHS, Q, MaxRecurse); + case Instruction::UDiv: return SimplifyUDivInst(LHS, RHS, Q, MaxRecurse); + case Instruction::FDiv: return SimplifyFDivInst(LHS, RHS, Q, MaxRecurse); + case Instruction::SRem: return SimplifySRemInst(LHS, RHS, Q, MaxRecurse); + case Instruction::URem: return SimplifyURemInst(LHS, RHS, Q, MaxRecurse); + case Instruction::FRem: return SimplifyFRemInst(LHS, RHS, Q, MaxRecurse); case Instruction::Shl: return SimplifyShlInst(LHS, RHS, /*isNSW*/false, /*isNUW*/false, - TD, TLI, DT, MaxRecurse); + Q, MaxRecurse); case Instruction::LShr: - return SimplifyLShrInst(LHS, RHS, /*isExact*/false, TD, TLI, DT, - MaxRecurse); + return SimplifyLShrInst(LHS, RHS, /*isExact*/false, Q, MaxRecurse); case Instruction::AShr: - return SimplifyAShrInst(LHS, RHS, /*isExact*/false, TD, TLI, DT, - MaxRecurse); - case Instruction::And: return SimplifyAndInst(LHS, RHS, TD, TLI, DT, - MaxRecurse); - case Instruction::Or: return SimplifyOrInst (LHS, RHS, TD, TLI, DT, - MaxRecurse); - case Instruction::Xor: return SimplifyXorInst(LHS, RHS, TD, TLI, DT, - MaxRecurse); + return SimplifyAShrInst(LHS, RHS, /*isExact*/false, Q, MaxRecurse); + case Instruction::And: return SimplifyAndInst(LHS, RHS, Q, MaxRecurse); + case Instruction::Or: return SimplifyOrInst (LHS, RHS, Q, MaxRecurse); + case Instruction::Xor: return SimplifyXorInst(LHS, RHS, Q, MaxRecurse); default: if (Constant *CLHS = dyn_cast<Constant>(LHS)) if (Constant *CRHS = dyn_cast<Constant>(RHS)) { Constant *COps[] = {CLHS, CRHS}; - return ConstantFoldInstOperands(Opcode, LHS->getType(), COps, TD, TLI); + return ConstantFoldInstOperands(Opcode, LHS->getType(), COps, Q.TD, + Q.TLI); } // If the operation is associative, try some generic simplifications. if (Instruction::isAssociative(Opcode)) - if (Value *V = SimplifyAssociativeBinOp(Opcode, LHS, RHS, TD, TLI, DT, - MaxRecurse)) + if (Value *V = SimplifyAssociativeBinOp(Opcode, LHS, RHS, Q, MaxRecurse)) return V; - // If the operation is with the result of a select instruction, check whether + // If the operation is with the result of a select instruction check whether // operating on either branch of the select always yields the same value. if (isa<SelectInst>(LHS) || isa<SelectInst>(RHS)) - if (Value *V = ThreadBinOpOverSelect(Opcode, LHS, RHS, TD, TLI, DT, - MaxRecurse)) + if (Value *V = ThreadBinOpOverSelect(Opcode, LHS, RHS, Q, MaxRecurse)) return V; // If the operation is with the result of a phi instruction, check whether // operating on all incoming values of the phi always yields the same value. if (isa<PHINode>(LHS) || isa<PHINode>(RHS)) - if (Value *V = ThreadBinOpOverPHI(Opcode, LHS, RHS, TD, TLI, DT, - MaxRecurse)) + if (Value *V = ThreadBinOpOverPHI(Opcode, LHS, RHS, Q, MaxRecurse)) return V; return 0; @@ -2612,28 +2681,26 @@ static Value *SimplifyBinOp(unsigned Opcode, Value *LHS, Value *RHS, Value *llvm::SimplifyBinOp(unsigned Opcode, Value *LHS, Value *RHS, const TargetData *TD, const TargetLibraryInfo *TLI, const DominatorTree *DT) { - return ::SimplifyBinOp(Opcode, LHS, RHS, TD, TLI, DT, RecursionLimit); + return ::SimplifyBinOp(Opcode, LHS, RHS, Query (TD, TLI, DT), RecursionLimit); } /// SimplifyCmpInst - Given operands for a CmpInst, see if we can /// fold the result. static Value *SimplifyCmpInst(unsigned Predicate, Value *LHS, Value *RHS, - const TargetData *TD, - const TargetLibraryInfo *TLI, - const DominatorTree *DT, - unsigned MaxRecurse) { + const Query &Q, unsigned MaxRecurse) { if (CmpInst::isIntPredicate((CmpInst::Predicate)Predicate)) - return SimplifyICmpInst(Predicate, LHS, RHS, TD, TLI, DT, MaxRecurse); - return SimplifyFCmpInst(Predicate, LHS, RHS, TD, TLI, DT, MaxRecurse); + return SimplifyICmpInst(Predicate, LHS, RHS, Q, MaxRecurse); + return SimplifyFCmpInst(Predicate, LHS, RHS, Q, MaxRecurse); } Value *llvm::SimplifyCmpInst(unsigned Predicate, Value *LHS, Value *RHS, const TargetData *TD, const TargetLibraryInfo *TLI, const DominatorTree *DT) { - return ::SimplifyCmpInst(Predicate, LHS, RHS, TD, TLI, DT, RecursionLimit); + return ::SimplifyCmpInst(Predicate, LHS, RHS, Query (TD, TLI, DT), + RecursionLimit); } -static Value *SimplifyCallInst(CallInst *CI) { +static Value *SimplifyCallInst(CallInst *CI, const Query &) { // call undef -> undef if (isa<UndefValue>(CI->getCalledValue())) return UndefValue::get(CI->getType()); @@ -2720,25 +2787,28 @@ Value *llvm::SimplifyInstruction(Instruction *I, const TargetData *TD, break; case Instruction::Select: Result = SimplifySelectInst(I->getOperand(0), I->getOperand(1), - I->getOperand(2), TD, DT); + I->getOperand(2), TD, TLI, DT); break; case Instruction::GetElementPtr: { SmallVector<Value*, 8> Ops(I->op_begin(), I->op_end()); - Result = SimplifyGEPInst(Ops, TD, DT); + Result = SimplifyGEPInst(Ops, TD, TLI, DT); break; } case Instruction::InsertValue: { InsertValueInst *IV = cast<InsertValueInst>(I); Result = SimplifyInsertValueInst(IV->getAggregateOperand(), IV->getInsertedValueOperand(), - IV->getIndices(), TD, DT); + IV->getIndices(), TD, TLI, DT); break; } case Instruction::PHI: - Result = SimplifyPHINode(cast<PHINode>(I), DT); + Result = SimplifyPHINode(cast<PHINode>(I), Query (TD, TLI, DT)); break; case Instruction::Call: - Result = SimplifyCallInst(cast<CallInst>(I)); + Result = SimplifyCallInst(cast<CallInst>(I), Query (TD, TLI, DT)); + break; + case Instruction::Trunc: + Result = SimplifyTruncInst(I->getOperand(0), I->getType(), TD, TLI, DT); break; } diff --git a/lib/Analysis/LazyValueInfo.cpp b/lib/Analysis/LazyValueInfo.cpp index 279d6a9..5ca2746 100644 --- a/lib/Analysis/LazyValueInfo.cpp +++ b/lib/Analysis/LazyValueInfo.cpp @@ -866,10 +866,11 @@ bool LazyValueInfoCache::getEdgeValue(Value *Val, BasicBlock *BBFrom, // BBFrom to BBTo. unsigned NumEdges = 0; ConstantInt *EdgeVal = 0; - for (unsigned i = 0, e = SI->getNumCases(); i != e; ++i) { - if (SI->getCaseSuccessor(i) != BBTo) continue; + for (SwitchInst::CaseIt i = SI->case_begin(), e = SI->case_end(); + i != e; ++i) { + if (i.getCaseSuccessor() != BBTo) continue; if (NumEdges++) break; - EdgeVal = SI->getCaseValue(i); + EdgeVal = i.getCaseValue(); } assert(EdgeVal && "Missing successor?"); if (NumEdges == 1) { diff --git a/lib/Analysis/Loads.cpp b/lib/Analysis/Loads.cpp index 0e6bcbf..873a275 100644 --- a/lib/Analysis/Loads.cpp +++ b/lib/Analysis/Loads.cpp @@ -17,6 +17,7 @@ #include "llvm/GlobalAlias.h" #include "llvm/GlobalVariable.h" #include "llvm/IntrinsicInst.h" +#include "llvm/LLVMContext.h" #include "llvm/Operator.h" using namespace llvm; @@ -160,10 +161,15 @@ bool llvm::isSafeToLoadUnconditionally(Value *V, Instruction *ScanFrom, /// MaxInstsToScan specifies the maximum instructions to scan in the block. If /// it is set to 0, it will scan the whole block. You can also optionally /// specify an alias analysis implementation, which makes this more precise. +/// +/// If TBAATag is non-null and a load or store is found, the TBAA tag from the +/// load or store is recorded there. If there is no TBAA tag or if no access +/// is found, it is left unmodified. Value *llvm::FindAvailableLoadedValue(Value *Ptr, BasicBlock *ScanBB, BasicBlock::iterator &ScanFrom, unsigned MaxInstsToScan, - AliasAnalysis *AA) { + AliasAnalysis *AA, + MDNode **TBAATag) { if (MaxInstsToScan == 0) MaxInstsToScan = ~0U; // If we're using alias analysis to disambiguate get the size of *Ptr. @@ -191,15 +197,19 @@ Value *llvm::FindAvailableLoadedValue(Value *Ptr, BasicBlock *ScanBB, // (This is true even if the load is volatile or atomic, although // those cases are unlikely.) if (LoadInst *LI = dyn_cast<LoadInst>(Inst)) - if (AreEquivalentAddressValues(LI->getOperand(0), Ptr)) + if (AreEquivalentAddressValues(LI->getOperand(0), Ptr)) { + if (TBAATag) *TBAATag = LI->getMetadata(LLVMContext::MD_tbaa); return LI; + } if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) { // If this is a store through Ptr, the value is available! // (This is true even if the store is volatile or atomic, although // those cases are unlikely.) - if (AreEquivalentAddressValues(SI->getOperand(1), Ptr)) + if (AreEquivalentAddressValues(SI->getOperand(1), Ptr)) { + if (TBAATag) *TBAATag = SI->getMetadata(LLVMContext::MD_tbaa); return SI->getOperand(0); + } // If Ptr is an alloca and this is a store to a different alloca, ignore // the store. This is a trivial form of alias analysis that is important diff --git a/lib/Analysis/PHITransAddr.cpp b/lib/Analysis/PHITransAddr.cpp index ca06300..38cb1c9 100644 --- a/lib/Analysis/PHITransAddr.cpp +++ b/lib/Analysis/PHITransAddr.cpp @@ -227,7 +227,7 @@ Value *PHITransAddr::PHITranslateSubExpr(Value *V, BasicBlock *CurBB, return GEP; // Simplify the GEP to handle 'gep x, 0' -> x etc. - if (Value *V = SimplifyGEPInst(GEPOps, TD, DT)) { + if (Value *V = SimplifyGEPInst(GEPOps, TD, TLI, DT)) { for (unsigned i = 0, e = GEPOps.size(); i != e; ++i) RemoveInstInputs(GEPOps[i], InstInputs); diff --git a/lib/Analysis/SparsePropagation.cpp b/lib/Analysis/SparsePropagation.cpp index 0c7d05f..c819666 100644 --- a/lib/Analysis/SparsePropagation.cpp +++ b/lib/Analysis/SparsePropagation.cpp @@ -194,9 +194,8 @@ void SparseSolver::getFeasibleSuccessors(TerminatorInst &TI, Succs.assign(TI.getNumSuccessors(), true); return; } - - unsigned CCase = SI.findCaseValue(cast<ConstantInt>(C)); - Succs[SI.resolveSuccessorIndex(CCase)] = true; + SwitchInst::CaseIt Case = SI.findCaseValue(cast<ConstantInt>(C)); + Succs[Case.getSuccessorIndex()] = true; } diff --git a/lib/Analysis/ValueTracking.cpp b/lib/Analysis/ValueTracking.cpp index b5811f2..01e00ca 100644 --- a/lib/Analysis/ValueTracking.cpp +++ b/lib/Analysis/ValueTracking.cpp @@ -41,6 +41,160 @@ static unsigned getBitWidth(Type *Ty, const TargetData *TD) { return TD ? TD->getPointerSizeInBits() : 0; } +static void ComputeMaskedBitsAddSub(bool Add, Value *Op0, Value *Op1, bool NSW, + const APInt &Mask, + APInt &KnownZero, APInt &KnownOne, + APInt &KnownZero2, APInt &KnownOne2, + const TargetData *TD, unsigned Depth) { + if (!Add) { + if (ConstantInt *CLHS = dyn_cast<ConstantInt>(Op0)) { + // We know that the top bits of C-X are clear if X contains less bits + // than C (i.e. no wrap-around can happen). For example, 20-X is + // positive if we can prove that X is >= 0 and < 16. + if (!CLHS->getValue().isNegative()) { + unsigned BitWidth = Mask.getBitWidth(); + unsigned NLZ = (CLHS->getValue()+1).countLeadingZeros(); + // NLZ can't be BitWidth with no sign bit + APInt MaskV = APInt::getHighBitsSet(BitWidth, NLZ+1); + llvm::ComputeMaskedBits(Op1, MaskV, KnownZero2, KnownOne2, TD, Depth+1); + + // If all of the MaskV bits are known to be zero, then we know the + // output top bits are zero, because we now know that the output is + // from [0-C]. + if ((KnownZero2 & MaskV) == MaskV) { + unsigned NLZ2 = CLHS->getValue().countLeadingZeros(); + // Top bits known zero. + KnownZero = APInt::getHighBitsSet(BitWidth, NLZ2) & Mask; + } + } + } + } + + unsigned BitWidth = Mask.getBitWidth(); + + // If one of the operands has trailing zeros, then the bits that the + // other operand has in those bit positions will be preserved in the + // result. For an add, this works with either operand. For a subtract, + // this only works if the known zeros are in the right operand. + APInt LHSKnownZero(BitWidth, 0), LHSKnownOne(BitWidth, 0); + APInt Mask2 = APInt::getLowBitsSet(BitWidth, + BitWidth - Mask.countLeadingZeros()); + llvm::ComputeMaskedBits(Op0, Mask2, LHSKnownZero, LHSKnownOne, TD, Depth+1); + assert((LHSKnownZero & LHSKnownOne) == 0 && + "Bits known to be one AND zero?"); + unsigned LHSKnownZeroOut = LHSKnownZero.countTrailingOnes(); + + llvm::ComputeMaskedBits(Op1, Mask2, KnownZero2, KnownOne2, TD, Depth+1); + assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); + unsigned RHSKnownZeroOut = KnownZero2.countTrailingOnes(); + + // Determine which operand has more trailing zeros, and use that + // many bits from the other operand. + if (LHSKnownZeroOut > RHSKnownZeroOut) { + if (Add) { + APInt Mask = APInt::getLowBitsSet(BitWidth, LHSKnownZeroOut); + KnownZero |= KnownZero2 & Mask; + KnownOne |= KnownOne2 & Mask; + } else { + // If the known zeros are in the left operand for a subtract, + // fall back to the minimum known zeros in both operands. + KnownZero |= APInt::getLowBitsSet(BitWidth, + std::min(LHSKnownZeroOut, + RHSKnownZeroOut)); + } + } else if (RHSKnownZeroOut >= LHSKnownZeroOut) { + APInt Mask = APInt::getLowBitsSet(BitWidth, RHSKnownZeroOut); + KnownZero |= LHSKnownZero & Mask; + KnownOne |= LHSKnownOne & Mask; + } + + // Are we still trying to solve for the sign bit? + if (Mask.isNegative() && !KnownZero.isNegative() && !KnownOne.isNegative()) { + if (NSW) { + if (Add) { + // Adding two positive numbers can't wrap into negative + if (LHSKnownZero.isNegative() && KnownZero2.isNegative()) + KnownZero |= APInt::getSignBit(BitWidth); + // and adding two negative numbers can't wrap into positive. + else if (LHSKnownOne.isNegative() && KnownOne2.isNegative()) + KnownOne |= APInt::getSignBit(BitWidth); + } else { + // Subtracting a negative number from a positive one can't wrap + if (LHSKnownZero.isNegative() && KnownOne2.isNegative()) + KnownZero |= APInt::getSignBit(BitWidth); + // neither can subtracting a positive number from a negative one. + else if (LHSKnownOne.isNegative() && KnownZero2.isNegative()) + KnownOne |= APInt::getSignBit(BitWidth); + } + } + } +} + +static void ComputeMaskedBitsMul(Value *Op0, Value *Op1, bool NSW, + const APInt &Mask, + APInt &KnownZero, APInt &KnownOne, + APInt &KnownZero2, APInt &KnownOne2, + const TargetData *TD, unsigned Depth) { + unsigned BitWidth = Mask.getBitWidth(); + APInt Mask2 = APInt::getAllOnesValue(BitWidth); + ComputeMaskedBits(Op1, Mask2, KnownZero, KnownOne, TD, Depth+1); + ComputeMaskedBits(Op0, Mask2, KnownZero2, KnownOne2, TD, Depth+1); + assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); + assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); + + bool isKnownNegative = false; + bool isKnownNonNegative = false; + // If the multiplication is known not to overflow, compute the sign bit. + if (Mask.isNegative() && NSW) { + if (Op0 == Op1) { + // The product of a number with itself is non-negative. + isKnownNonNegative = true; + } else { + bool isKnownNonNegativeOp1 = KnownZero.isNegative(); + bool isKnownNonNegativeOp0 = KnownZero2.isNegative(); + bool isKnownNegativeOp1 = KnownOne.isNegative(); + bool isKnownNegativeOp0 = KnownOne2.isNegative(); + // The product of two numbers with the same sign is non-negative. + isKnownNonNegative = (isKnownNegativeOp1 && isKnownNegativeOp0) || + (isKnownNonNegativeOp1 && isKnownNonNegativeOp0); + // The product of a negative number and a non-negative number is either + // negative or zero. + if (!isKnownNonNegative) + isKnownNegative = (isKnownNegativeOp1 && isKnownNonNegativeOp0 && + isKnownNonZero(Op0, TD, Depth)) || + (isKnownNegativeOp0 && isKnownNonNegativeOp1 && + isKnownNonZero(Op1, TD, Depth)); + } + } + + // If low bits are zero in either operand, output low known-0 bits. + // Also compute a conserative estimate for high known-0 bits. + // More trickiness is possible, but this is sufficient for the + // interesting case of alignment computation. + KnownOne.clearAllBits(); + unsigned TrailZ = KnownZero.countTrailingOnes() + + KnownZero2.countTrailingOnes(); + unsigned LeadZ = std::max(KnownZero.countLeadingOnes() + + KnownZero2.countLeadingOnes(), + BitWidth) - BitWidth; + + TrailZ = std::min(TrailZ, BitWidth); + LeadZ = std::min(LeadZ, BitWidth); + KnownZero = APInt::getLowBitsSet(BitWidth, TrailZ) | + APInt::getHighBitsSet(BitWidth, LeadZ); + KnownZero &= Mask; + + // Only make use of no-wrap flags if we failed to compute the sign bit + // directly. This matters if the multiplication always overflows, in + // which case we prefer to follow the result of the direct computation, + // though as the program is invoking undefined behaviour we can choose + // whatever we like here. + if (isKnownNonNegative && !KnownOne.isNegative()) + KnownZero.setBit(BitWidth - 1); + else if (isKnownNegative && !KnownZero.isNegative()) + KnownOne.setBit(BitWidth - 1); +} + /// ComputeMaskedBits - Determine which of the bits specified in Mask are /// known to be either zero or one and return them in the KnownZero/KnownOne /// bit sets. This code only analyzes bits in Mask, in order to short-circuit @@ -106,16 +260,18 @@ void llvm::ComputeMaskedBits(Value *V, const APInt &Mask, // The address of an aligned GlobalValue has trailing zeros. if (GlobalValue *GV = dyn_cast<GlobalValue>(V)) { unsigned Align = GV->getAlignment(); - if (Align == 0 && TD && GV->getType()->getElementType()->isSized()) { + if (Align == 0 && TD) { if (GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV)) { Type *ObjectType = GVar->getType()->getElementType(); - // If the object is defined in the current Module, we'll be giving - // it the preferred alignment. Otherwise, we have to assume that it - // may only have the minimum ABI alignment. - if (!GVar->isDeclaration() && !GVar->isWeakForLinker()) - Align = TD->getPreferredAlignment(GVar); - else - Align = TD->getABITypeAlignment(ObjectType); + if (ObjectType->isSized()) { + // If the object is defined in the current Module, we'll be giving + // it the preferred alignment. Otherwise, we have to assume that it + // may only have the minimum ABI alignment. + if (!GVar->isDeclaration() && !GVar->isWeakForLinker()) + Align = TD->getPreferredAlignment(GVar); + else + Align = TD->getABITypeAlignment(ObjectType); + } } } if (Align > 0) @@ -203,68 +359,11 @@ void llvm::ComputeMaskedBits(Value *V, const APInt &Mask, return; } case Instruction::Mul: { - APInt Mask2 = APInt::getAllOnesValue(BitWidth); - ComputeMaskedBits(I->getOperand(1), Mask2, KnownZero, KnownOne, TD,Depth+1); - ComputeMaskedBits(I->getOperand(0), Mask2, KnownZero2, KnownOne2, TD, - Depth+1); - assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); - assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); - - bool isKnownNegative = false; - bool isKnownNonNegative = false; - // If the multiplication is known not to overflow, compute the sign bit. - if (Mask.isNegative() && - cast<OverflowingBinaryOperator>(I)->hasNoSignedWrap()) { - Value *Op1 = I->getOperand(1), *Op2 = I->getOperand(0); - if (Op1 == Op2) { - // The product of a number with itself is non-negative. - isKnownNonNegative = true; - } else { - bool isKnownNonNegative1 = KnownZero.isNegative(); - bool isKnownNonNegative2 = KnownZero2.isNegative(); - bool isKnownNegative1 = KnownOne.isNegative(); - bool isKnownNegative2 = KnownOne2.isNegative(); - // The product of two numbers with the same sign is non-negative. - isKnownNonNegative = (isKnownNegative1 && isKnownNegative2) || - (isKnownNonNegative1 && isKnownNonNegative2); - // The product of a negative number and a non-negative number is either - // negative or zero. - if (!isKnownNonNegative) - isKnownNegative = (isKnownNegative1 && isKnownNonNegative2 && - isKnownNonZero(Op2, TD, Depth)) || - (isKnownNegative2 && isKnownNonNegative1 && - isKnownNonZero(Op1, TD, Depth)); - } - } - - // If low bits are zero in either operand, output low known-0 bits. - // Also compute a conserative estimate for high known-0 bits. - // More trickiness is possible, but this is sufficient for the - // interesting case of alignment computation. - KnownOne.clearAllBits(); - unsigned TrailZ = KnownZero.countTrailingOnes() + - KnownZero2.countTrailingOnes(); - unsigned LeadZ = std::max(KnownZero.countLeadingOnes() + - KnownZero2.countLeadingOnes(), - BitWidth) - BitWidth; - - TrailZ = std::min(TrailZ, BitWidth); - LeadZ = std::min(LeadZ, BitWidth); - KnownZero = APInt::getLowBitsSet(BitWidth, TrailZ) | - APInt::getHighBitsSet(BitWidth, LeadZ); - KnownZero &= Mask; - - // Only make use of no-wrap flags if we failed to compute the sign bit - // directly. This matters if the multiplication always overflows, in - // which case we prefer to follow the result of the direct computation, - // though as the program is invoking undefined behaviour we can choose - // whatever we like here. - if (isKnownNonNegative && !KnownOne.isNegative()) - KnownZero.setBit(BitWidth - 1); - else if (isKnownNegative && !KnownZero.isNegative()) - KnownOne.setBit(BitWidth - 1); - - return; + bool NSW = cast<OverflowingBinaryOperator>(I)->hasNoSignedWrap(); + ComputeMaskedBitsMul(I->getOperand(0), I->getOperand(1), NSW, + Mask, KnownZero, KnownOne, KnownZero2, KnownOne2, + TD, Depth); + break; } case Instruction::UDiv: { // For the purposes of computing leading zeros we can conservatively @@ -422,91 +521,18 @@ void llvm::ComputeMaskedBits(Value *V, const APInt &Mask, } break; case Instruction::Sub: { - if (ConstantInt *CLHS = dyn_cast<ConstantInt>(I->getOperand(0))) { - // We know that the top bits of C-X are clear if X contains less bits - // than C (i.e. no wrap-around can happen). For example, 20-X is - // positive if we can prove that X is >= 0 and < 16. - if (!CLHS->getValue().isNegative()) { - unsigned NLZ = (CLHS->getValue()+1).countLeadingZeros(); - // NLZ can't be BitWidth with no sign bit - APInt MaskV = APInt::getHighBitsSet(BitWidth, NLZ+1); - ComputeMaskedBits(I->getOperand(1), MaskV, KnownZero2, KnownOne2, - TD, Depth+1); - - // If all of the MaskV bits are known to be zero, then we know the - // output top bits are zero, because we now know that the output is - // from [0-C]. - if ((KnownZero2 & MaskV) == MaskV) { - unsigned NLZ2 = CLHS->getValue().countLeadingZeros(); - // Top bits known zero. - KnownZero = APInt::getHighBitsSet(BitWidth, NLZ2) & Mask; - } - } - } + bool NSW = cast<OverflowingBinaryOperator>(I)->hasNoSignedWrap(); + ComputeMaskedBitsAddSub(false, I->getOperand(0), I->getOperand(1), NSW, + Mask, KnownZero, KnownOne, KnownZero2, KnownOne2, + TD, Depth); + break; } - // fall through case Instruction::Add: { - // If one of the operands has trailing zeros, then the bits that the - // other operand has in those bit positions will be preserved in the - // result. For an add, this works with either operand. For a subtract, - // this only works if the known zeros are in the right operand. - APInt LHSKnownZero(BitWidth, 0), LHSKnownOne(BitWidth, 0); - APInt Mask2 = APInt::getLowBitsSet(BitWidth, - BitWidth - Mask.countLeadingZeros()); - ComputeMaskedBits(I->getOperand(0), Mask2, LHSKnownZero, LHSKnownOne, TD, - Depth+1); - assert((LHSKnownZero & LHSKnownOne) == 0 && - "Bits known to be one AND zero?"); - unsigned LHSKnownZeroOut = LHSKnownZero.countTrailingOnes(); - - ComputeMaskedBits(I->getOperand(1), Mask2, KnownZero2, KnownOne2, TD, - Depth+1); - assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); - unsigned RHSKnownZeroOut = KnownZero2.countTrailingOnes(); - - // Determine which operand has more trailing zeros, and use that - // many bits from the other operand. - if (LHSKnownZeroOut > RHSKnownZeroOut) { - if (I->getOpcode() == Instruction::Add) { - APInt Mask = APInt::getLowBitsSet(BitWidth, LHSKnownZeroOut); - KnownZero |= KnownZero2 & Mask; - KnownOne |= KnownOne2 & Mask; - } else { - // If the known zeros are in the left operand for a subtract, - // fall back to the minimum known zeros in both operands. - KnownZero |= APInt::getLowBitsSet(BitWidth, - std::min(LHSKnownZeroOut, - RHSKnownZeroOut)); - } - } else if (RHSKnownZeroOut >= LHSKnownZeroOut) { - APInt Mask = APInt::getLowBitsSet(BitWidth, RHSKnownZeroOut); - KnownZero |= LHSKnownZero & Mask; - KnownOne |= LHSKnownOne & Mask; - } - - // Are we still trying to solve for the sign bit? - if (Mask.isNegative() && !KnownZero.isNegative() && !KnownOne.isNegative()){ - OverflowingBinaryOperator *OBO = cast<OverflowingBinaryOperator>(I); - if (OBO->hasNoSignedWrap()) { - if (I->getOpcode() == Instruction::Add) { - // Adding two positive numbers can't wrap into negative - if (LHSKnownZero.isNegative() && KnownZero2.isNegative()) - KnownZero |= APInt::getSignBit(BitWidth); - // and adding two negative numbers can't wrap into positive. - else if (LHSKnownOne.isNegative() && KnownOne2.isNegative()) - KnownOne |= APInt::getSignBit(BitWidth); - } else { - // Subtracting a negative number from a positive one can't wrap - if (LHSKnownZero.isNegative() && KnownOne2.isNegative()) - KnownZero |= APInt::getSignBit(BitWidth); - // neither can subtracting a positive number from a negative one. - else if (LHSKnownOne.isNegative() && KnownZero2.isNegative()) - KnownOne |= APInt::getSignBit(BitWidth); - } - } - } - - return; + bool NSW = cast<OverflowingBinaryOperator>(I)->hasNoSignedWrap(); + ComputeMaskedBitsAddSub(true, I->getOperand(0), I->getOperand(1), NSW, + Mask, KnownZero, KnownOne, KnownZero2, KnownOne2, + TD, Depth); + break; } case Instruction::SRem: if (ConstantInt *Rem = dyn_cast<ConstantInt>(I->getOperand(1))) { @@ -691,8 +717,8 @@ void llvm::ComputeMaskedBits(Value *V, const APInt &Mask, if (P->hasConstantValue() == P) break; - KnownZero = APInt::getAllOnesValue(BitWidth); - KnownOne = APInt::getAllOnesValue(BitWidth); + KnownZero = Mask; + KnownOne = Mask; for (unsigned i = 0, e = P->getNumIncomingValues(); i != e; ++i) { // Skip direct self references. if (P->getIncomingValue(i) == P) continue; @@ -723,21 +749,51 @@ void llvm::ComputeMaskedBits(Value *V, const APInt &Mask, // If this call is undefined for 0, the result will be less than 2^n. if (II->getArgOperand(1) == ConstantInt::getTrue(II->getContext())) LowBits -= 1; - KnownZero = APInt::getHighBitsSet(BitWidth, BitWidth - LowBits); + KnownZero = Mask & APInt::getHighBitsSet(BitWidth, BitWidth - LowBits); break; } case Intrinsic::ctpop: { unsigned LowBits = Log2_32(BitWidth)+1; - KnownZero = APInt::getHighBitsSet(BitWidth, BitWidth - LowBits); + KnownZero = Mask & APInt::getHighBitsSet(BitWidth, BitWidth - LowBits); break; } case Intrinsic::x86_sse42_crc32_64_8: case Intrinsic::x86_sse42_crc32_64_64: - KnownZero = APInt::getHighBitsSet(64, 32); + KnownZero = Mask & APInt::getHighBitsSet(64, 32); break; } } break; + case Instruction::ExtractValue: + if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I->getOperand(0))) { + ExtractValueInst *EVI = cast<ExtractValueInst>(I); + if (EVI->getNumIndices() != 1) break; + if (EVI->getIndices()[0] == 0) { + switch (II->getIntrinsicID()) { + default: break; + case Intrinsic::uadd_with_overflow: + case Intrinsic::sadd_with_overflow: + ComputeMaskedBitsAddSub(true, II->getArgOperand(0), + II->getArgOperand(1), false, Mask, + KnownZero, KnownOne, KnownZero2, KnownOne2, + TD, Depth); + break; + case Intrinsic::usub_with_overflow: + case Intrinsic::ssub_with_overflow: + ComputeMaskedBitsAddSub(false, II->getArgOperand(0), + II->getArgOperand(1), false, Mask, + KnownZero, KnownOne, KnownZero2, KnownOne2, + TD, Depth); + break; + case Intrinsic::umul_with_overflow: + case Intrinsic::smul_with_overflow: + ComputeMaskedBitsMul(II->getArgOperand(0), II->getArgOperand(1), + false, Mask, KnownZero, KnownOne, + KnownZero2, KnownOne2, TD, Depth); + break; + } + } + } } } diff --git a/lib/Bitcode/Writer/BitcodeWriter.cpp b/lib/Bitcode/Writer/BitcodeWriter.cpp index 9376990..b25d2e9 100644 --- a/lib/Bitcode/Writer/BitcodeWriter.cpp +++ b/lib/Bitcode/Writer/BitcodeWriter.cpp @@ -1142,9 +1142,10 @@ static void WriteInstruction(const Instruction &I, unsigned InstID, Vals.push_back(VE.getTypeID(SI.getCondition()->getType())); Vals.push_back(VE.getValueID(SI.getCondition())); Vals.push_back(VE.getValueID(SI.getDefaultDest())); - for (unsigned i = 0, e = SI.getNumCases(); i != e; ++i) { - Vals.push_back(VE.getValueID(SI.getCaseValue(i))); - Vals.push_back(VE.getValueID(SI.getCaseSuccessor(i))); + for (SwitchInst::CaseIt i = SI.case_begin(), e = SI.case_end(); + i != e; ++i) { + Vals.push_back(VE.getValueID(i.getCaseValue())); + Vals.push_back(VE.getValueID(i.getCaseSuccessor())); } } break; diff --git a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp index d5926f9..dd3fb3b 100644 --- a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp +++ b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp @@ -1864,13 +1864,12 @@ static void EmitGlobalConstantLargeInt(const ConstantInt *CI, static void EmitGlobalConstantImpl(const Constant *CV, unsigned AddrSpace, AsmPrinter &AP) { - if (isa<ConstantAggregateZero>(CV) || isa<UndefValue>(CV)) { - uint64_t Size = AP.TM.getTargetData()->getTypeAllocSize(CV->getType()); + const TargetData *TD = AP.TM.getTargetData(); + uint64_t Size = TD->getTypeAllocSize(CV->getType()); + if (isa<ConstantAggregateZero>(CV) || isa<UndefValue>(CV)) return AP.OutStreamer.EmitZeros(Size, AddrSpace); - } if (const ConstantInt *CI = dyn_cast<ConstantInt>(CV)) { - unsigned Size = AP.TM.getTargetData()->getTypeAllocSize(CV->getType()); switch (Size) { case 1: case 2: @@ -1891,7 +1890,6 @@ static void EmitGlobalConstantImpl(const Constant *CV, unsigned AddrSpace, return EmitGlobalConstantFP(CFP, AddrSpace, AP); if (isa<ConstantPointerNull>(CV)) { - unsigned Size = AP.TM.getTargetData()->getTypeAllocSize(CV->getType()); AP.OutStreamer.EmitIntValue(0, Size, AddrSpace); return; } @@ -1905,20 +1903,28 @@ static void EmitGlobalConstantImpl(const Constant *CV, unsigned AddrSpace, if (const ConstantStruct *CVS = dyn_cast<ConstantStruct>(CV)) return EmitGlobalConstantStruct(CVS, AddrSpace, AP); - // Look through bitcasts, which might not be able to be MCExpr'ized (e.g. of - // vectors). - if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(CV)) + if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(CV)) { + // Look through bitcasts, which might not be able to be MCExpr'ized (e.g. of + // vectors). if (CE->getOpcode() == Instruction::BitCast) return EmitGlobalConstantImpl(CE->getOperand(0), AddrSpace, AP); + + if (Size > 8) { + // If the constant expression's size is greater than 64-bits, then we have + // to emit the value in chunks. Try to constant fold the value and emit it + // that way. + Constant *New = ConstantFoldConstantExpression(CE, TD); + if (New && New != CE) + return EmitGlobalConstantImpl(New, AddrSpace, AP); + } + } if (const ConstantVector *V = dyn_cast<ConstantVector>(CV)) return EmitGlobalConstantVector(V, AddrSpace, AP); // Otherwise, it must be a ConstantExpr. Lower it to an MCExpr, then emit it // thread the streamer with EmitValue. - AP.OutStreamer.EmitValue(LowerConstant(CV, AP), - AP.TM.getTargetData()->getTypeAllocSize(CV->getType()), - AddrSpace); + AP.OutStreamer.EmitValue(LowerConstant(CV, AP), Size, AddrSpace); } /// EmitGlobalConstant - Print a general LLVM constant to the .s file. @@ -2102,27 +2108,22 @@ void AsmPrinter::EmitBasicBlockStart(const MachineBasicBlock *MBB) const { OutStreamer.EmitLabel(Syms[i]); } + // Print some verbose block comments. + if (isVerbose()) { + if (const BasicBlock *BB = MBB->getBasicBlock()) + if (BB->hasName()) + OutStreamer.AddComment("%" + BB->getName()); + EmitBasicBlockLoopComments(*MBB, LI, *this); + } + // Print the main label for the block. if (MBB->pred_empty() || isBlockOnlyReachableByFallthrough(MBB)) { if (isVerbose() && OutStreamer.hasRawTextSupport()) { - if (const BasicBlock *BB = MBB->getBasicBlock()) - if (BB->hasName()) - OutStreamer.AddComment("%" + BB->getName()); - - EmitBasicBlockLoopComments(*MBB, LI, *this); - // NOTE: Want this comment at start of line, don't emit with AddComment. OutStreamer.EmitRawText(Twine(MAI->getCommentString()) + " BB#" + Twine(MBB->getNumber()) + ":"); } } else { - if (isVerbose()) { - if (const BasicBlock *BB = MBB->getBasicBlock()) - if (BB->hasName()) - OutStreamer.AddComment("%" + BB->getName()); - EmitBasicBlockLoopComments(*MBB, LI, *this); - } - OutStreamer.EmitLabel(MBB->getSymbol()); } } diff --git a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp index 51c635e..3b383f6 100644 --- a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp +++ b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp @@ -156,13 +156,12 @@ void CompileUnit::addSourceLine(DIE *Die, DISubprogram SP) { // Verify subprogram. if (!SP.Verify()) return; - // If the line number is 0, don't add it. - if (SP.getLineNumber() == 0) - return; + // If the line number is 0, don't add it. unsigned Line = SP.getLineNumber(); - if (!SP.getContext().Verify()) + if (Line == 0) return; + unsigned FileID = DD->GetOrCreateSourceID(SP.getFilename(), SP.getDirectory()); assert(FileID && "Invalid file id"); @@ -178,7 +177,7 @@ void CompileUnit::addSourceLine(DIE *Die, DIType Ty) { return; unsigned Line = Ty.getLineNumber(); - if (Line == 0 || !Ty.getContext().Verify()) + if (Line == 0) return; unsigned FileID = DD->GetOrCreateSourceID(Ty.getFilename(), Ty.getDirectory()); @@ -870,11 +869,6 @@ void CompileUnit::constructTypeDIE(DIE &Buffer, DICompositeType CTy) { if (CTy.isAppleBlockExtension()) addUInt(&Buffer, dwarf::DW_AT_APPLE_block, dwarf::DW_FORM_flag, 1); - unsigned RLang = CTy.getRunTimeLang(); - if (RLang) - addUInt(&Buffer, dwarf::DW_AT_APPLE_runtime_class, - dwarf::DW_FORM_data1, RLang); - DICompositeType ContainingType = CTy.getContainingType(); if (DIDescriptor(ContainingType).isCompositeType()) addDIEEntry(&Buffer, dwarf::DW_AT_containing_type, dwarf::DW_FORM_ref4, @@ -922,6 +916,12 @@ void CompileUnit::constructTypeDIE(DIE &Buffer, DICompositeType CTy) { // Add source line info if available. if (!CTy.isForwardDecl()) addSourceLine(&Buffer, CTy); + + // No harm in adding the runtime language to the declaration. + unsigned RLang = CTy.getRunTimeLang(); + if (RLang) + addUInt(&Buffer, dwarf::DW_AT_APPLE_runtime_class, + dwarf::DW_FORM_data1, RLang); } } @@ -1006,6 +1006,9 @@ DIE *CompileUnit::getOrCreateSubprogramDIE(DISubprogram SP) { // Add function template parameters. addTemplateParams(*SPDie, SP.getTemplateParams()); + // Unfortunately this code needs to stay here to work around + // a bug in older gdbs that requires the linkage name to resolve + // multiple template functions. StringRef LinkageName = SP.getLinkageName(); if (!LinkageName.empty()) addString(SPDie, dwarf::DW_AT_MIPS_linkage_name, diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp index fa62169..388cef4 100644 --- a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp +++ b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp @@ -523,20 +523,19 @@ unsigned DwarfDebug::GetOrCreateSourceID(StringRef FileName, DirName = ""; unsigned SrcId = SourceIdMap.size()+1; - std::pair<std::string, std::string> SourceName = - std::make_pair(FileName, DirName); - std::pair<std::pair<std::string, std::string>, unsigned> Entry = - make_pair(SourceName, SrcId); - std::map<std::pair<std::string, std::string>, unsigned>::iterator I; - bool NewlyInserted; - llvm::tie(I, NewlyInserted) = SourceIdMap.insert(Entry); - if (!NewlyInserted) - return I->second; + // We look up the file/dir pair by concatenating them with a zero byte. + SmallString<128> NamePair; + NamePair += DirName; + NamePair += '\0'; // Zero bytes are not allowed in paths. + NamePair += FileName; + + StringMapEntry<unsigned> &Ent = SourceIdMap.GetOrCreateValue(NamePair, SrcId); + if (Ent.getValue() != SrcId) + return Ent.getValue(); // Print out a .file directive to specify files for .loc directives. - Asm->OutStreamer.EmitDwarfFileDirective(SrcId, Entry.first.second, - Entry.first.first); + Asm->OutStreamer.EmitDwarfFileDirective(SrcId, DirName, FileName); return SrcId; } diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.h b/lib/CodeGen/AsmPrinter/DwarfDebug.h index 8b802d2..83f30f5 100644 --- a/lib/CodeGen/AsmPrinter/DwarfDebug.h +++ b/lib/CodeGen/AsmPrinter/DwarfDebug.h @@ -26,7 +26,6 @@ #include "llvm/ADT/UniqueVector.h" #include "llvm/Support/Allocator.h" #include "llvm/Support/DebugLoc.h" -#include <map> namespace llvm { @@ -209,9 +208,9 @@ class DwarfDebug { /// std::vector<DIEAbbrev *> Abbreviations; - /// SourceIdMap - Source id map, i.e. pair of source filename and directory - /// mapped to a unique id. - std::map<std::pair<std::string, std::string>, unsigned> SourceIdMap; + /// SourceIdMap - Source id map, i.e. pair of source filename and directory, + /// separated by a zero byte, mapped to a unique id. + StringMap<unsigned> SourceIdMap; /// StringPool - A String->Symbol mapping of strings used by indirect /// references. diff --git a/lib/CodeGen/BranchFolding.cpp b/lib/CodeGen/BranchFolding.cpp index 59c92b3..f57f4a8 100644 --- a/lib/CodeGen/BranchFolding.cpp +++ b/lib/CodeGen/BranchFolding.cpp @@ -1019,12 +1019,27 @@ static bool IsBetterFallthrough(MachineBasicBlock *MBB1, return MBB2I->isCall() && !MBB1I->isCall(); } +/// getBranchDebugLoc - Find and return, if any, the DebugLoc of the branch +/// instructions on the block. Always use the DebugLoc of the first +/// branching instruction found unless its absent, in which case use the +/// DebugLoc of the second if present. +static DebugLoc getBranchDebugLoc(MachineBasicBlock &MBB) { + MachineBasicBlock::iterator I = MBB.end(); + if (I == MBB.begin()) + return DebugLoc(); + --I; + while (I->isDebugValue() && I != MBB.begin()) + --I; + if (I->isBranch()) + return I->getDebugLoc(); + return DebugLoc(); +} + /// OptimizeBlock - Analyze and optimize control flow related to the specified /// block. This is never called on the entry block. bool BranchFolder::OptimizeBlock(MachineBasicBlock *MBB) { bool MadeChange = false; MachineFunction &MF = *MBB->getParent(); - DebugLoc dl; // FIXME: this is nowhere ReoptimizeBlock: MachineFunction::iterator FallThrough = MBB; @@ -1073,6 +1088,7 @@ ReoptimizeBlock: // destination, remove the branch, replacing it with an unconditional one or // a fall-through. if (PriorTBB && PriorTBB == PriorFBB) { + DebugLoc dl = getBranchDebugLoc(PrevBB); TII->RemoveBranch(PrevBB); PriorCond.clear(); if (PriorTBB != MBB) @@ -1130,6 +1146,7 @@ ReoptimizeBlock: // If the prior block branches somewhere else on the condition and here if // the condition is false, remove the uncond second branch. if (PriorFBB == MBB) { + DebugLoc dl = getBranchDebugLoc(PrevBB); TII->RemoveBranch(PrevBB); TII->InsertBranch(PrevBB, PriorTBB, 0, PriorCond, dl); MadeChange = true; @@ -1143,6 +1160,7 @@ ReoptimizeBlock: if (PriorTBB == MBB) { SmallVector<MachineOperand, 4> NewPriorCond(PriorCond); if (!TII->ReverseBranchCondition(NewPriorCond)) { + DebugLoc dl = getBranchDebugLoc(PrevBB); TII->RemoveBranch(PrevBB); TII->InsertBranch(PrevBB, PriorFBB, 0, NewPriorCond, dl); MadeChange = true; @@ -1180,6 +1198,7 @@ ReoptimizeBlock: DEBUG(dbgs() << "\nMoving MBB: " << *MBB << "To make fallthrough to: " << *PriorTBB << "\n"); + DebugLoc dl = getBranchDebugLoc(PrevBB); TII->RemoveBranch(PrevBB); TII->InsertBranch(PrevBB, MBB, 0, NewPriorCond, dl); @@ -1209,6 +1228,7 @@ ReoptimizeBlock: if (CurTBB && CurFBB && CurFBB == MBB && CurTBB != MBB) { SmallVector<MachineOperand, 4> NewCond(CurCond); if (!TII->ReverseBranchCondition(NewCond)) { + DebugLoc dl = getBranchDebugLoc(*MBB); TII->RemoveBranch(*MBB); TII->InsertBranch(*MBB, CurFBB, CurTBB, NewCond, dl); MadeChange = true; @@ -1222,6 +1242,7 @@ ReoptimizeBlock: if (CurTBB && CurCond.empty() && CurFBB == 0 && IsBranchOnlyBlock(MBB) && CurTBB != MBB && !MBB->hasAddressTaken()) { + DebugLoc dl = getBranchDebugLoc(*MBB); // This block may contain just an unconditional branch. Because there can // be 'non-branch terminators' in the block, try removing the branch and // then seeing if the block is empty. @@ -1264,8 +1285,9 @@ ReoptimizeBlock: assert(PriorFBB == 0 && "Machine CFG out of date!"); PriorFBB = MBB; } + DebugLoc pdl = getBranchDebugLoc(PrevBB); TII->RemoveBranch(PrevBB); - TII->InsertBranch(PrevBB, PriorTBB, PriorFBB, PriorCond, dl); + TII->InsertBranch(PrevBB, PriorTBB, PriorFBB, PriorCond, pdl); } // Iterate through all the predecessors, revectoring each in-turn. @@ -1289,9 +1311,10 @@ ReoptimizeBlock: bool NewCurUnAnalyzable = TII->AnalyzeBranch(*PMBB, NewCurTBB, NewCurFBB, NewCurCond, true); if (!NewCurUnAnalyzable && NewCurTBB && NewCurTBB == NewCurFBB) { + DebugLoc pdl = getBranchDebugLoc(*PMBB); TII->RemoveBranch(*PMBB); NewCurCond.clear(); - TII->InsertBranch(*PMBB, NewCurTBB, 0, NewCurCond, dl); + TII->InsertBranch(*PMBB, NewCurTBB, 0, NewCurCond, pdl); MadeChange = true; ++NumBranchOpts; PMBB->CorrectExtraCFGEdges(NewCurTBB, 0, false); @@ -1351,7 +1374,7 @@ ReoptimizeBlock: if (CurFallsThru) { MachineBasicBlock *NextBB = llvm::next(MachineFunction::iterator(MBB)); CurCond.clear(); - TII->InsertBranch(*MBB, NextBB, 0, CurCond, dl); + TII->InsertBranch(*MBB, NextBB, 0, CurCond, DebugLoc()); } MBB->moveAfter(PredBB); MadeChange = true; diff --git a/lib/CodeGen/CMakeLists.txt b/lib/CodeGen/CMakeLists.txt index 0362365..21729cd 100644 --- a/lib/CodeGen/CMakeLists.txt +++ b/lib/CodeGen/CMakeLists.txt @@ -80,7 +80,6 @@ add_llvm_library(LLVMCodeGen RegisterScavenging.cpp RenderMachineFunction.cpp ScheduleDAG.cpp - ScheduleDAGEmit.cpp ScheduleDAGInstrs.cpp ScheduleDAGPrinter.cpp ScoreboardHazardRecognizer.cpp diff --git a/lib/CodeGen/CriticalAntiDepBreaker.cpp b/lib/CodeGen/CriticalAntiDepBreaker.cpp index c684cdc..bad5010 100644 --- a/lib/CodeGen/CriticalAntiDepBreaker.cpp +++ b/lib/CodeGen/CriticalAntiDepBreaker.cpp @@ -35,7 +35,8 @@ CriticalAntiDepBreaker(MachineFunction& MFi, const RegisterClassInfo &RCI) : RegClassInfo(RCI), Classes(TRI->getNumRegs(), static_cast<const TargetRegisterClass *>(0)), KillIndices(TRI->getNumRegs(), 0), - DefIndices(TRI->getNumRegs(), 0) {} + DefIndices(TRI->getNumRegs(), 0), + KeepRegs(TRI->getNumRegs(), false) {} CriticalAntiDepBreaker::~CriticalAntiDepBreaker() { } @@ -52,9 +53,9 @@ void CriticalAntiDepBreaker::StartBlock(MachineBasicBlock *BB) { } // Clear "do not change" set. - KeepRegs.clear(); + KeepRegs.reset(); - bool IsReturnBlock = (!BB->empty() && BB->back().isReturn()); + bool IsReturnBlock = (BBSize != 0 && BB->back().isReturn()); // Determine the live-out physregs for this block. if (IsReturnBlock) { @@ -63,14 +64,14 @@ void CriticalAntiDepBreaker::StartBlock(MachineBasicBlock *BB) { E = MRI.liveout_end(); I != E; ++I) { unsigned Reg = *I; Classes[Reg] = reinterpret_cast<TargetRegisterClass *>(-1); - KillIndices[Reg] = BB->size(); + KillIndices[Reg] = BBSize; DefIndices[Reg] = ~0u; // Repeat, for all aliases. for (const uint16_t *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) { unsigned AliasReg = *Alias; Classes[AliasReg] = reinterpret_cast<TargetRegisterClass *>(-1); - KillIndices[AliasReg] = BB->size(); + KillIndices[AliasReg] = BBSize; DefIndices[AliasReg] = ~0u; } } @@ -85,14 +86,14 @@ void CriticalAntiDepBreaker::StartBlock(MachineBasicBlock *BB) { E = (*SI)->livein_end(); I != E; ++I) { unsigned Reg = *I; Classes[Reg] = reinterpret_cast<TargetRegisterClass *>(-1); - KillIndices[Reg] = BB->size(); + KillIndices[Reg] = BBSize; DefIndices[Reg] = ~0u; // Repeat, for all aliases. for (const uint16_t *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) { unsigned AliasReg = *Alias; Classes[AliasReg] = reinterpret_cast<TargetRegisterClass *>(-1); - KillIndices[AliasReg] = BB->size(); + KillIndices[AliasReg] = BBSize; DefIndices[AliasReg] = ~0u; } } @@ -106,14 +107,14 @@ void CriticalAntiDepBreaker::StartBlock(MachineBasicBlock *BB) { unsigned Reg = *I; if (!IsReturnBlock && !Pristine.test(Reg)) continue; Classes[Reg] = reinterpret_cast<TargetRegisterClass *>(-1); - KillIndices[Reg] = BB->size(); + KillIndices[Reg] = BBSize; DefIndices[Reg] = ~0u; // Repeat, for all aliases. for (const uint16_t *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) { unsigned AliasReg = *Alias; Classes[AliasReg] = reinterpret_cast<TargetRegisterClass *>(-1); - KillIndices[AliasReg] = BB->size(); + KillIndices[AliasReg] = BBSize; DefIndices[AliasReg] = ~0u; } } @@ -121,7 +122,7 @@ void CriticalAntiDepBreaker::StartBlock(MachineBasicBlock *BB) { void CriticalAntiDepBreaker::FinishBlock() { RegRefs.clear(); - KeepRegs.clear(); + KeepRegs.reset(); } void CriticalAntiDepBreaker::Observe(MachineInstr *MI, unsigned Count, @@ -233,10 +234,11 @@ void CriticalAntiDepBreaker::PrescanInstruction(MachineInstr *MI) { RegRefs.insert(std::make_pair(Reg, &MO)); if (MO.isUse() && Special) { - if (KeepRegs.insert(Reg)) { + if (!KeepRegs.test(Reg)) { + KeepRegs.set(Reg); for (const uint16_t *Subreg = TRI->getSubRegisters(Reg); *Subreg; ++Subreg) - KeepRegs.insert(*Subreg); + KeepRegs.set(*Subreg); } } } @@ -259,7 +261,7 @@ void CriticalAntiDepBreaker::ScanInstruction(MachineInstr *MI, if (MO.clobbersPhysReg(i)) { DefIndices[i] = Count; KillIndices[i] = ~0u; - KeepRegs.erase(i); + KeepRegs.reset(i); Classes[i] = 0; RegRefs.erase(i); } @@ -276,7 +278,7 @@ void CriticalAntiDepBreaker::ScanInstruction(MachineInstr *MI, assert(((KillIndices[Reg] == ~0u) != (DefIndices[Reg] == ~0u)) && "Kill and Def maps aren't consistent for Reg!"); - KeepRegs.erase(Reg); + KeepRegs.reset(Reg); Classes[Reg] = 0; RegRefs.erase(Reg); // Repeat, for all subregs. @@ -285,7 +287,7 @@ void CriticalAntiDepBreaker::ScanInstruction(MachineInstr *MI, unsigned SubregReg = *Subreg; DefIndices[SubregReg] = Count; KillIndices[SubregReg] = ~0u; - KeepRegs.erase(SubregReg); + KeepRegs.reset(SubregReg); Classes[SubregReg] = 0; RegRefs.erase(SubregReg); } @@ -551,7 +553,7 @@ BreakAntiDependencies(const std::vector<SUnit>& SUnits, if (!RegClassInfo.isAllocatable(AntiDepReg)) // Don't break anti-dependencies on non-allocatable registers. AntiDepReg = 0; - else if (KeepRegs.count(AntiDepReg)) + else if (KeepRegs.test(AntiDepReg)) // Don't break anti-dependencies if an use down below requires // this exact register. AntiDepReg = 0; diff --git a/lib/CodeGen/CriticalAntiDepBreaker.h b/lib/CodeGen/CriticalAntiDepBreaker.h index 0710780..7746259 100644 --- a/lib/CodeGen/CriticalAntiDepBreaker.h +++ b/lib/CodeGen/CriticalAntiDepBreaker.h @@ -24,7 +24,6 @@ #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/ScheduleDAG.h" #include "llvm/ADT/BitVector.h" -#include "llvm/ADT/SmallSet.h" #include <map> namespace llvm { @@ -66,7 +65,7 @@ class TargetRegisterInfo; /// KeepRegs - A set of registers which are live and cannot be changed to /// break anti-dependencies. - SmallSet<unsigned, 4> KeepRegs; + BitVector KeepRegs; public: CriticalAntiDepBreaker(MachineFunction& MFi, const RegisterClassInfo&); diff --git a/lib/CodeGen/DFAPacketizer.cpp b/lib/CodeGen/DFAPacketizer.cpp index f0cf290..5ff641c 100644 --- a/lib/CodeGen/DFAPacketizer.cpp +++ b/lib/CodeGen/DFAPacketizer.cpp @@ -23,10 +23,10 @@ // //===----------------------------------------------------------------------===// -#include "ScheduleDAGInstrs.h" #include "llvm/CodeGen/DFAPacketizer.h" #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineInstrBundle.h" +#include "llvm/CodeGen/ScheduleDAGInstrs.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/MC/MCInstrItineraries.h" using namespace llvm; @@ -103,15 +103,12 @@ void DFAPacketizer::reserveResources(llvm::MachineInstr *MI) { namespace { // DefaultVLIWScheduler - This class extends ScheduleDAGInstrs and overrides // Schedule method to build the dependence graph. -// -// ScheduleDAGInstrs has LLVM_LIBRARY_VISIBILITY so we have to reference it as -// an opaque pointer in VLIWPacketizerList. class DefaultVLIWScheduler : public ScheduleDAGInstrs { public: DefaultVLIWScheduler(MachineFunction &MF, MachineLoopInfo &MLI, MachineDominatorTree &MDT, bool IsPostRA); // Schedule - Actual scheduling work. - void Schedule(); + void schedule(); }; } // end anonymous namespace @@ -121,9 +118,9 @@ DefaultVLIWScheduler::DefaultVLIWScheduler( ScheduleDAGInstrs(MF, MLI, MDT, IsPostRA) { } -void DefaultVLIWScheduler::Schedule() { +void DefaultVLIWScheduler::schedule() { // Build the scheduling graph. - BuildSchedGraph(0); + buildSchedGraph(0); } // VLIWPacketizerList Ctor @@ -137,7 +134,7 @@ VLIWPacketizerList::VLIWPacketizerList( // VLIWPacketizerList Dtor VLIWPacketizerList::~VLIWPacketizerList() { - delete (DefaultVLIWScheduler *)SchedulerImpl; + delete SchedulerImpl; delete ResourceTracker; } @@ -184,18 +181,15 @@ void VLIWPacketizerList::endPacket(MachineBasicBlock *MBB, void VLIWPacketizerList::PacketizeMIs(MachineBasicBlock *MBB, MachineBasicBlock::iterator BeginItr, MachineBasicBlock::iterator EndItr) { - DefaultVLIWScheduler *Scheduler = (DefaultVLIWScheduler *)SchedulerImpl; - Scheduler->Run(MBB, BeginItr, EndItr, MBB->size()); + assert(MBB->end() == EndItr && "Bad EndIndex"); - // Remember scheduling units. - SUnits = Scheduler->SUnits; + SchedulerImpl->enterRegion(MBB, BeginItr, EndItr, MBB->size()); - // Generate MI -> SU map. - std::map <MachineInstr*, SUnit*> MIToSUnit; - for (unsigned i = 0, e = SUnits.size(); i != e; ++i) { - SUnit *SU = &SUnits[i]; - MIToSUnit[SU->getInstr()] = SU; - } + // Build the DAG without reordering instructions. + SchedulerImpl->schedule(); + + // Remember scheduling units. + SUnits = SchedulerImpl->SUnits; // The main packetizer loop. for (; BeginItr != EndItr; ++BeginItr) { @@ -211,7 +205,7 @@ void VLIWPacketizerList::PacketizeMIs(MachineBasicBlock *MBB, continue; } - SUnit *SUI = MIToSUnit[MI]; + SUnit *SUI = SchedulerImpl->getSUnit(MI); assert(SUI && "Missing SUnit Info!"); // Ask DFA if machine resource is available for MI. @@ -221,7 +215,7 @@ void VLIWPacketizerList::PacketizeMIs(MachineBasicBlock *MBB, for (std::vector<MachineInstr*>::iterator VI = CurrentPacketMIs.begin(), VE = CurrentPacketMIs.end(); VI != VE; ++VI) { MachineInstr *MJ = *VI; - SUnit *SUJ = MIToSUnit[MJ]; + SUnit *SUJ = SchedulerImpl->getSUnit(MJ); assert(SUJ && "Missing SUnit Info!"); // Is it legal to packetize SUI and SUJ together. @@ -245,4 +239,6 @@ void VLIWPacketizerList::PacketizeMIs(MachineBasicBlock *MBB, // End any packet left behind. endPacket(MBB, EndItr); + + SchedulerImpl->exitRegion(); } diff --git a/lib/CodeGen/LLVMTargetMachine.cpp b/lib/CodeGen/LLVMTargetMachine.cpp index 17633e2..97e6547 100644 --- a/lib/CodeGen/LLVMTargetMachine.cpp +++ b/lib/CodeGen/LLVMTargetMachine.cpp @@ -90,7 +90,7 @@ static void addPassesToHandleExceptions(TargetMachine *TM, // removed from the parent invoke(s). This could happen when a landing // pad is shared by multiple invokes and is also a target of a normal // edge from elsewhere. - PM.add(createSjLjEHPass(TM->getTargetLowering())); + PM.add(createSjLjEHPreparePass(TM->getTargetLowering())); // FALLTHROUGH case ExceptionHandling::DwarfCFI: case ExceptionHandling::ARM: diff --git a/lib/CodeGen/LatencyPriorityQueue.cpp b/lib/CodeGen/LatencyPriorityQueue.cpp index 0578229..deab05a 100644 --- a/lib/CodeGen/LatencyPriorityQueue.cpp +++ b/lib/CodeGen/LatencyPriorityQueue.cpp @@ -84,11 +84,11 @@ void LatencyPriorityQueue::push(SUnit *SU) { } -// ScheduledNode - As nodes are scheduled, we look to see if there are any +// scheduledNode - As nodes are scheduled, we look to see if there are any // successor nodes that have a single unscheduled predecessor. If so, that // single predecessor has a higher priority, since scheduling it will make // the node available. -void LatencyPriorityQueue::ScheduledNode(SUnit *SU) { +void LatencyPriorityQueue::scheduledNode(SUnit *SU) { for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); I != E; ++I) { AdjustPriorityOfUnscheduledPreds(I->getSUnit()); diff --git a/lib/CodeGen/LiveDebugVariables.cpp b/lib/CodeGen/LiveDebugVariables.cpp index c35302a..2187833 100644 --- a/lib/CodeGen/LiveDebugVariables.cpp +++ b/lib/CodeGen/LiveDebugVariables.cpp @@ -226,7 +226,7 @@ public: LiveInterval *LI, const VNInfo *VNI, SmallVectorImpl<SlotIndex> *Kills, LiveIntervals &LIS, MachineDominatorTree &MDT, - UserValueScopes &UVS); + UserValueScopes &UVS); /// addDefsFromCopies - The value in LI/LocNo may be copies to other /// registers. Determine if any of the copies are available at the kill @@ -486,7 +486,7 @@ void UserValue::extendDef(SlotIndex Idx, unsigned LocNo, LiveInterval *LI, const VNInfo *VNI, SmallVectorImpl<SlotIndex> *Kills, LiveIntervals &LIS, MachineDominatorTree &MDT, - UserValueScopes &UVS) { + UserValueScopes &UVS) { SmallVector<SlotIndex, 16> Todo; Todo.push_back(Idx); do { @@ -620,7 +620,7 @@ void UserValue::computeIntervals(MachineRegisterInfo &MRI, LiveIntervals &LIS, MachineDominatorTree &MDT, - UserValueScopes &UVS) { + UserValueScopes &UVS) { SmallVector<std::pair<SlotIndex, unsigned>, 16> Defs; // Collect all defs to be extended (Skipping undefs). @@ -841,7 +841,7 @@ bool UserValue::splitRegister(unsigned OldReg, ArrayRef<LiveInterval*> NewRegs) { bool DidChange = false; // Split locations referring to OldReg. Iterate backwards so splitLocation can - // safely erase unuused locations. + // safely erase unused locations. for (unsigned i = locations.size(); i ; --i) { unsigned LocNo = i-1; const MachineOperand *Loc = &locations[LocNo]; diff --git a/lib/CodeGen/LiveIntervalAnalysis.cpp b/lib/CodeGen/LiveIntervalAnalysis.cpp index 70ed1c3..3ade660 100644 --- a/lib/CodeGen/LiveIntervalAnalysis.cpp +++ b/lib/CodeGen/LiveIntervalAnalysis.cpp @@ -1049,9 +1049,19 @@ public: bool hasRegMaskOp = false; collectRanges(MI, Entering, Internal, Exiting, hasRegMaskOp, OldIdx); - moveAllEnteringFrom(OldIdx, Entering); - moveAllInternalFrom(OldIdx, Internal); - moveAllExitingFrom(OldIdx, Exiting); + // To keep the LiveRanges valid within an interval, move the ranges closest + // to the destination first. This prevents ranges from overlapping, to that + // APIs like removeRange still work. + if (NewIdx < OldIdx) { + moveAllEnteringFrom(OldIdx, Entering); + moveAllInternalFrom(OldIdx, Internal); + moveAllExitingFrom(OldIdx, Exiting); + } + else { + moveAllExitingFrom(OldIdx, Exiting); + moveAllInternalFrom(OldIdx, Internal); + moveAllEnteringFrom(OldIdx, Entering); + } if (hasRegMaskOp) updateRegMaskSlots(OldIdx); @@ -1319,8 +1329,14 @@ private: void moveEnteringDownFrom(SlotIndex OldIdx, IntRangePair& P) { LiveInterval* LI = P.first; LiveRange* LR = P.second; + // Extend the LiveRange if NewIdx is past the end. if (NewIdx > LR->end) { - moveKillFlags(LI->reg, LR->end, NewIdx); + // Move kill flags if OldIdx was not originally the end + // (otherwise LR->end points to an invalid slot). + if (LR->end.getRegSlot() != OldIdx.getRegSlot()) { + assert(LR->end > OldIdx && "LiveRange does not cover original slot"); + moveKillFlags(LI->reg, LR->end, NewIdx); + } LR->end = NewIdx.getRegSlot(); } } diff --git a/lib/CodeGen/LiveVariables.cpp b/lib/CodeGen/LiveVariables.cpp index 9c3d255..48e1e4c 100644 --- a/lib/CodeGen/LiveVariables.cpp +++ b/lib/CodeGen/LiveVariables.cpp @@ -109,6 +109,7 @@ void LiveVariables::MarkVirtRegAliveInBlock(VarInfo& VRInfo, // Mark the variable known alive in this bb VRInfo.AliveBlocks.set(BBNum); + assert(MBB != &MF->front() && "Can't find reaching def for virtreg"); WorkList.insert(WorkList.end(), MBB->pred_rbegin(), MBB->pred_rend()); } diff --git a/lib/CodeGen/MachineBasicBlock.cpp b/lib/CodeGen/MachineBasicBlock.cpp index 611b045..ca8a8e8 100644 --- a/lib/CodeGen/MachineBasicBlock.cpp +++ b/lib/CodeGen/MachineBasicBlock.cpp @@ -238,6 +238,18 @@ StringRef MachineBasicBlock::getName() const { return "(null)"; } +/// Return a hopefully unique identifier for this block. +std::string MachineBasicBlock::getFullName() const { + std::string Name; + if (getParent()) + Name = (getParent()->getFunction()->getName() + ":").str(); + if (getBasicBlock()) + Name += getBasicBlock()->getName(); + else + Name += (Twine("BB") + Twine(getNumber())).str(); + return Name; +} + void MachineBasicBlock::print(raw_ostream &OS, SlotIndexes *Indexes) const { const MachineFunction *MF = getParent(); if (!MF) { diff --git a/lib/CodeGen/MachineInstr.cpp b/lib/CodeGen/MachineInstr.cpp index e9f9475..43af1ad 100644 --- a/lib/CodeGen/MachineInstr.cpp +++ b/lib/CodeGen/MachineInstr.cpp @@ -40,6 +40,7 @@ #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" #include "llvm/ADT/FoldingSet.h" +#include "llvm/ADT/Hashing.h" using namespace llvm; //===----------------------------------------------------------------------===// @@ -481,7 +482,7 @@ raw_ostream &llvm::operator<<(raw_ostream &OS, const MachineMemOperand &MMO) { /// MCID NULL and no operands. MachineInstr::MachineInstr() : MCID(0), Flags(0), AsmPrinterFlags(0), - MemRefs(0), MemRefsEnd(0), + NumMemRefs(0), MemRefs(0), Parent(0) { // Make sure that we get added to a machine basicblock LeakDetector::addGarbageObject(this); @@ -489,10 +490,10 @@ MachineInstr::MachineInstr() void MachineInstr::addImplicitDefUseOperands() { if (MCID->ImplicitDefs) - for (const unsigned *ImpDefs = MCID->ImplicitDefs; *ImpDefs; ++ImpDefs) + for (const uint16_t *ImpDefs = MCID->getImplicitDefs(); *ImpDefs; ++ImpDefs) addOperand(MachineOperand::CreateReg(*ImpDefs, true, true)); if (MCID->ImplicitUses) - for (const unsigned *ImpUses = MCID->ImplicitUses; *ImpUses; ++ImpUses) + for (const uint16_t *ImpUses = MCID->getImplicitUses(); *ImpUses; ++ImpUses) addOperand(MachineOperand::CreateReg(*ImpUses, false, true)); } @@ -501,7 +502,7 @@ void MachineInstr::addImplicitDefUseOperands() { /// the MCInstrDesc. MachineInstr::MachineInstr(const MCInstrDesc &tid, bool NoImp) : MCID(&tid), Flags(0), AsmPrinterFlags(0), - MemRefs(0), MemRefsEnd(0), Parent(0) { + NumMemRefs(0), MemRefs(0), Parent(0) { unsigned NumImplicitOps = 0; if (!NoImp) NumImplicitOps = MCID->getNumImplicitDefs() + MCID->getNumImplicitUses(); @@ -516,7 +517,7 @@ MachineInstr::MachineInstr(const MCInstrDesc &tid, bool NoImp) MachineInstr::MachineInstr(const MCInstrDesc &tid, const DebugLoc dl, bool NoImp) : MCID(&tid), Flags(0), AsmPrinterFlags(0), - MemRefs(0), MemRefsEnd(0), Parent(0), debugLoc(dl) { + NumMemRefs(0), MemRefs(0), Parent(0), debugLoc(dl) { unsigned NumImplicitOps = 0; if (!NoImp) NumImplicitOps = MCID->getNumImplicitDefs() + MCID->getNumImplicitUses(); @@ -532,7 +533,7 @@ MachineInstr::MachineInstr(const MCInstrDesc &tid, const DebugLoc dl, /// basic block. MachineInstr::MachineInstr(MachineBasicBlock *MBB, const MCInstrDesc &tid) : MCID(&tid), Flags(0), AsmPrinterFlags(0), - MemRefs(0), MemRefsEnd(0), Parent(0) { + NumMemRefs(0), MemRefs(0), Parent(0) { assert(MBB && "Cannot use inserting ctor with null basic block!"); unsigned NumImplicitOps = MCID->getNumImplicitDefs() + MCID->getNumImplicitUses(); @@ -548,7 +549,7 @@ MachineInstr::MachineInstr(MachineBasicBlock *MBB, const MCInstrDesc &tid) MachineInstr::MachineInstr(MachineBasicBlock *MBB, const DebugLoc dl, const MCInstrDesc &tid) : MCID(&tid), Flags(0), AsmPrinterFlags(0), - MemRefs(0), MemRefsEnd(0), Parent(0), debugLoc(dl) { + NumMemRefs(0), MemRefs(0), Parent(0), debugLoc(dl) { assert(MBB && "Cannot use inserting ctor with null basic block!"); unsigned NumImplicitOps = MCID->getNumImplicitDefs() + MCID->getNumImplicitUses(); @@ -563,7 +564,7 @@ MachineInstr::MachineInstr(MachineBasicBlock *MBB, const DebugLoc dl, /// MachineInstr::MachineInstr(MachineFunction &MF, const MachineInstr &MI) : MCID(&MI.getDesc()), Flags(0), AsmPrinterFlags(0), - MemRefs(MI.MemRefs), MemRefsEnd(MI.MemRefsEnd), + NumMemRefs(MI.NumMemRefs), MemRefs(MI.MemRefs), Parent(0), debugLoc(MI.getDebugLoc()) { Operands.reserve(MI.getNumOperands()); @@ -738,28 +739,23 @@ void MachineInstr::RemoveOperand(unsigned OpNo) { void MachineInstr::addMemOperand(MachineFunction &MF, MachineMemOperand *MO) { mmo_iterator OldMemRefs = MemRefs; - mmo_iterator OldMemRefsEnd = MemRefsEnd; + uint16_t OldNumMemRefs = NumMemRefs; - size_t NewNum = (MemRefsEnd - MemRefs) + 1; + uint16_t NewNum = NumMemRefs + 1; mmo_iterator NewMemRefs = MF.allocateMemRefsArray(NewNum); - mmo_iterator NewMemRefsEnd = NewMemRefs + NewNum; - std::copy(OldMemRefs, OldMemRefsEnd, NewMemRefs); + std::copy(OldMemRefs, OldMemRefs + OldNumMemRefs, NewMemRefs); NewMemRefs[NewNum - 1] = MO; MemRefs = NewMemRefs; - MemRefsEnd = NewMemRefsEnd; + NumMemRefs = NewNum; } -bool -MachineInstr::hasProperty(unsigned MCFlag, QueryType Type) const { - if (Type == IgnoreBundle || !isBundle()) - return getDesc().getFlags() & (1 << MCFlag); - +bool MachineInstr::hasPropertyInBundle(unsigned Mask, QueryType Type) const { const MachineBasicBlock *MBB = getParent(); MachineBasicBlock::const_instr_iterator MII = *this; ++MII; while (MII != MBB->end() && MII->isInsideBundle()) { - if (MII->getDesc().getFlags() & (1 << MCFlag)) { + if (MII->getDesc().getFlags() & Mask) { if (Type == AnyInBundle) return true; } else { @@ -1843,49 +1839,55 @@ void MachineInstr::setPhysRegsDeadExcept(ArrayRef<unsigned> UsedRegs, unsigned MachineInstrExpressionTrait::getHashValue(const MachineInstr* const &MI) { - unsigned Hash = MI->getOpcode() * 37; + // Build up a buffer of hash code components. + // + // FIXME: This is a total hack. We should have a hash_value overload for + // MachineOperand, but currently that doesn't work because there are many + // different ideas of "equality" and thus different sets of information that + // contribute to the hash code. This one happens to want to take a specific + // subset. And it's still not clear that this routine uses the *correct* + // subset of information when computing the hash code. The goal is to use the + // same inputs for the hash code here that MachineInstr::isIdenticalTo uses to + // test for equality when passed the 'IgnoreVRegDefs' filter flag. It would + // be very useful to factor the selection of relevant inputs out of the two + // functions and into a common routine, but it's not clear how that can be + // done. + SmallVector<size_t, 8> HashComponents; + HashComponents.reserve(MI->getNumOperands() + 1); + HashComponents.push_back(MI->getOpcode()); for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { const MachineOperand &MO = MI->getOperand(i); - uint64_t Key = (uint64_t)MO.getType() << 32; switch (MO.getType()) { default: break; case MachineOperand::MO_Register: if (MO.isDef() && TargetRegisterInfo::isVirtualRegister(MO.getReg())) continue; // Skip virtual register defs. - Key |= MO.getReg(); + HashComponents.push_back(hash_combine(MO.getType(), MO.getReg())); break; case MachineOperand::MO_Immediate: - Key |= MO.getImm(); + HashComponents.push_back(hash_combine(MO.getType(), MO.getImm())); break; case MachineOperand::MO_FrameIndex: case MachineOperand::MO_ConstantPoolIndex: case MachineOperand::MO_JumpTableIndex: - Key |= MO.getIndex(); + HashComponents.push_back(hash_combine(MO.getType(), MO.getIndex())); break; case MachineOperand::MO_MachineBasicBlock: - Key |= DenseMapInfo<void*>::getHashValue(MO.getMBB()); + HashComponents.push_back(hash_combine(MO.getType(), MO.getMBB())); break; case MachineOperand::MO_GlobalAddress: - Key |= DenseMapInfo<void*>::getHashValue(MO.getGlobal()); + HashComponents.push_back(hash_combine(MO.getType(), MO.getGlobal())); break; case MachineOperand::MO_BlockAddress: - Key |= DenseMapInfo<void*>::getHashValue(MO.getBlockAddress()); + HashComponents.push_back(hash_combine(MO.getType(), + MO.getBlockAddress())); break; case MachineOperand::MO_MCSymbol: - Key |= DenseMapInfo<void*>::getHashValue(MO.getMCSymbol()); + HashComponents.push_back(hash_combine(MO.getType(), MO.getMCSymbol())); break; } - Key += ~(Key << 32); - Key ^= (Key >> 22); - Key += ~(Key << 13); - Key ^= (Key >> 8); - Key += (Key << 3); - Key ^= (Key >> 15); - Key += ~(Key << 27); - Key ^= (Key >> 31); - Hash = (unsigned)Key + Hash * 37; - } - return Hash; + } + return hash_combine_range(HashComponents.begin(), HashComponents.end()); } void MachineInstr::emitError(StringRef Msg) const { diff --git a/lib/CodeGen/MachineInstrBundle.cpp b/lib/CodeGen/MachineInstrBundle.cpp index d1f2df9..73489a7 100644 --- a/lib/CodeGen/MachineInstrBundle.cpp +++ b/lib/CodeGen/MachineInstrBundle.cpp @@ -229,6 +229,8 @@ bool llvm::finalizeBundles(MachineFunction &MF) { "First instr cannot be inside bundle before finalization!"); MachineBasicBlock::instr_iterator MIE = MBB.instr_end(); + if (MII == MIE) + continue; for (++MII; MII != MIE; ) { if (!MII->isInsideBundle()) ++MII; diff --git a/lib/CodeGen/MachineRegisterInfo.cpp b/lib/CodeGen/MachineRegisterInfo.cpp index 7d40e66..f140dec 100644 --- a/lib/CodeGen/MachineRegisterInfo.cpp +++ b/lib/CodeGen/MachineRegisterInfo.cpp @@ -161,9 +161,8 @@ void MachineRegisterInfo::replaceRegWith(unsigned FromReg, unsigned ToReg) { /// form, so there should only be one definition. MachineInstr *MachineRegisterInfo::getVRegDef(unsigned Reg) const { // Since we are in SSA form, we can use the first definition. - if (!def_empty(Reg)) - return &*def_begin(Reg); - return 0; + def_iterator I = def_begin(Reg); + return !I.atEnd() ? &*I : 0; } bool MachineRegisterInfo::hasOneUse(unsigned RegNo) const { diff --git a/lib/CodeGen/MachineScheduler.cpp b/lib/CodeGen/MachineScheduler.cpp index 8a485e0..364a244 100644 --- a/lib/CodeGen/MachineScheduler.cpp +++ b/lib/CodeGen/MachineScheduler.cpp @@ -14,10 +14,10 @@ #define DEBUG_TYPE "misched" -#include "ScheduleDAGInstrs.h" #include "llvm/CodeGen/LiveIntervalAnalysis.h" -#include "llvm/CodeGen/MachinePassRegistry.h" +#include "llvm/CodeGen/MachineScheduler.h" #include "llvm/CodeGen/Passes.h" +#include "llvm/CodeGen/ScheduleDAGInstrs.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Support/CommandLine.h" @@ -25,25 +25,36 @@ #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" #include "llvm/ADT/OwningPtr.h" +#include "llvm/ADT/PriorityQueue.h" #include <queue> using namespace llvm; +static cl::opt<bool> ForceTopDown("misched-topdown", cl::Hidden, + cl::desc("Force top-down list scheduling")); +static cl::opt<bool> ForceBottomUp("misched-bottomup", cl::Hidden, + cl::desc("Force bottom-up list scheduling")); + +#ifndef NDEBUG +static cl::opt<bool> ViewMISchedDAGs("view-misched-dags", cl::Hidden, + cl::desc("Pop up a window to show MISched dags after they are processed")); + +static cl::opt<unsigned> MISchedCutoff("misched-cutoff", cl::Hidden, + cl::desc("Stop scheduling after N instructions"), cl::init(~0U)); +#else +static bool ViewMISchedDAGs = false; +#endif // NDEBUG + //===----------------------------------------------------------------------===// // Machine Instruction Scheduling Pass and Registry //===----------------------------------------------------------------------===// namespace { /// MachineScheduler runs after coalescing and before register allocation. -class MachineScheduler : public MachineFunctionPass { +class MachineScheduler : public MachineSchedContext, + public MachineFunctionPass { public: - MachineFunction *MF; - const TargetInstrInfo *TII; - const MachineLoopInfo *MLI; - const MachineDominatorTree *MDT; - LiveIntervals *LIS; - MachineScheduler(); virtual void getAnalysisUsage(AnalysisUsage &AU) const; @@ -71,7 +82,7 @@ INITIALIZE_PASS_END(MachineScheduler, "misched", "Machine Instruction Scheduler", false, false) MachineScheduler::MachineScheduler() -: MachineFunctionPass(ID), MF(0), MLI(0), MDT(0) { +: MachineFunctionPass(ID) { initializeMachineSchedulerPass(*PassRegistry::getPassRegistry()); } @@ -80,7 +91,7 @@ void MachineScheduler::getAnalysisUsage(AnalysisUsage &AU) const { AU.addRequiredID(MachineDominatorsID); AU.addRequired<MachineLoopInfo>(); AU.addRequired<AliasAnalysis>(); - AU.addPreserved<AliasAnalysis>(); + AU.addRequired<TargetPassConfig>(); AU.addRequired<SlotIndexes>(); AU.addPreserved<SlotIndexes>(); AU.addRequired<LiveIntervals>(); @@ -88,91 +99,226 @@ void MachineScheduler::getAnalysisUsage(AnalysisUsage &AU) const { MachineFunctionPass::getAnalysisUsage(AU); } -namespace { -/// MachineSchedRegistry provides a selection of available machine instruction -/// schedulers. -class MachineSchedRegistry : public MachinePassRegistryNode { -public: - typedef ScheduleDAGInstrs *(*ScheduleDAGCtor)(MachineScheduler *); +MachinePassRegistry MachineSchedRegistry::Registry; - // RegisterPassParser requires a (misnamed) FunctionPassCtor type. - typedef ScheduleDAGCtor FunctionPassCtor; +/// A dummy default scheduler factory indicates whether the scheduler +/// is overridden on the command line. +static ScheduleDAGInstrs *useDefaultMachineSched(MachineSchedContext *C) { + return 0; +} - static MachinePassRegistry Registry; +/// MachineSchedOpt allows command line selection of the scheduler. +static cl::opt<MachineSchedRegistry::ScheduleDAGCtor, false, + RegisterPassParser<MachineSchedRegistry> > +MachineSchedOpt("misched", + cl::init(&useDefaultMachineSched), cl::Hidden, + cl::desc("Machine instruction scheduler to use")); - MachineSchedRegistry(const char *N, const char *D, ScheduleDAGCtor C) - : MachinePassRegistryNode(N, D, (MachinePassCtor)C) { - Registry.Add(this); - } - ~MachineSchedRegistry() { Registry.Remove(this); } +static MachineSchedRegistry +DefaultSchedRegistry("default", "Use the target's default scheduler choice.", + useDefaultMachineSched); + +/// Forward declare the standard machine scheduler. This will be used as the +/// default scheduler if the target does not set a default. +static ScheduleDAGInstrs *createConvergingSched(MachineSchedContext *C); + +/// Top-level MachineScheduler pass driver. +/// +/// Visit blocks in function order. Divide each block into scheduling regions +/// and visit them bottom-up. Visiting regions bottom-up is not required, but is +/// consistent with the DAG builder, which traverses the interior of the +/// scheduling regions bottom-up. +/// +/// This design avoids exposing scheduling boundaries to the DAG builder, +/// simplifying the DAG builder's support for "special" target instructions. +/// At the same time the design allows target schedulers to operate across +/// scheduling boundaries, for example to bundle the boudary instructions +/// without reordering them. This creates complexity, because the target +/// scheduler must update the RegionBegin and RegionEnd positions cached by +/// ScheduleDAGInstrs whenever adding or removing instructions. A much simpler +/// design would be to split blocks at scheduling boundaries, but LLVM has a +/// general bias against block splitting purely for implementation simplicity. +bool MachineScheduler::runOnMachineFunction(MachineFunction &mf) { + // Initialize the context of the pass. + MF = &mf; + MLI = &getAnalysis<MachineLoopInfo>(); + MDT = &getAnalysis<MachineDominatorTree>(); + PassConfig = &getAnalysis<TargetPassConfig>(); + AA = &getAnalysis<AliasAnalysis>(); - // Accessors. - // - MachineSchedRegistry *getNext() const { - return (MachineSchedRegistry *)MachinePassRegistryNode::getNext(); - } - static MachineSchedRegistry *getList() { - return (MachineSchedRegistry *)Registry.getList(); - } - static ScheduleDAGCtor getDefault() { - return (ScheduleDAGCtor)Registry.getDefault(); - } - static void setDefault(ScheduleDAGCtor C) { - Registry.setDefault((MachinePassCtor)C); + LIS = &getAnalysis<LiveIntervals>(); + const TargetInstrInfo *TII = MF->getTarget().getInstrInfo(); + + // Select the scheduler, or set the default. + MachineSchedRegistry::ScheduleDAGCtor Ctor = MachineSchedOpt; + if (Ctor == useDefaultMachineSched) { + // Get the default scheduler set by the target. + Ctor = MachineSchedRegistry::getDefault(); + if (!Ctor) { + Ctor = createConvergingSched; + MachineSchedRegistry::setDefault(Ctor); + } } - static void setListener(MachinePassRegistryListener *L) { - Registry.setListener(L); + // Instantiate the selected scheduler. + OwningPtr<ScheduleDAGInstrs> Scheduler(Ctor(this)); + + // Visit all machine basic blocks. + for (MachineFunction::iterator MBB = MF->begin(), MBBEnd = MF->end(); + MBB != MBBEnd; ++MBB) { + + Scheduler->startBlock(MBB); + + // Break the block into scheduling regions [I, RegionEnd), and schedule each + // region as soon as it is discovered. RegionEnd points the the scheduling + // boundary at the bottom of the region. The DAG does not include RegionEnd, + // but the region does (i.e. the next RegionEnd is above the previous + // RegionBegin). If the current block has no terminator then RegionEnd == + // MBB->end() for the bottom region. + // + // The Scheduler may insert instructions during either schedule() or + // exitRegion(), even for empty regions. So the local iterators 'I' and + // 'RegionEnd' are invalid across these calls. + unsigned RemainingCount = MBB->size(); + for(MachineBasicBlock::iterator RegionEnd = MBB->end(); + RegionEnd != MBB->begin(); RegionEnd = Scheduler->begin()) { + // Avoid decrementing RegionEnd for blocks with no terminator. + if (RegionEnd != MBB->end() + || TII->isSchedulingBoundary(llvm::prior(RegionEnd), MBB, *MF)) { + --RegionEnd; + // Count the boundary instruction. + --RemainingCount; + } + + // The next region starts above the previous region. Look backward in the + // instruction stream until we find the nearest boundary. + MachineBasicBlock::iterator I = RegionEnd; + for(;I != MBB->begin(); --I, --RemainingCount) { + if (TII->isSchedulingBoundary(llvm::prior(I), MBB, *MF)) + break; + } + // Notify the scheduler of the region, even if we may skip scheduling + // it. Perhaps it still needs to be bundled. + Scheduler->enterRegion(MBB, I, RegionEnd, RemainingCount); + + // Skip empty scheduling regions (0 or 1 schedulable instructions). + if (I == RegionEnd || I == llvm::prior(RegionEnd)) { + // Close the current region. Bundle the terminator if needed. + // This invalidates 'RegionEnd' and 'I'. + Scheduler->exitRegion(); + continue; + } + DEBUG(dbgs() << "MachineScheduling " << MF->getFunction()->getName() + << ":BB#" << MBB->getNumber() << "\n From: " << *I << " To: "; + if (RegionEnd != MBB->end()) dbgs() << *RegionEnd; + else dbgs() << "End"; + dbgs() << " Remaining: " << RemainingCount << "\n"); + + // Schedule a region: possibly reorder instructions. + // This invalidates 'RegionEnd' and 'I'. + Scheduler->schedule(); + + // Close the current region. + Scheduler->exitRegion(); + + // Scheduling has invalidated the current iterator 'I'. Ask the + // scheduler for the top of it's scheduled region. + RegionEnd = Scheduler->begin(); + } + assert(RemainingCount == 0 && "Instruction count mismatch!"); + Scheduler->finishBlock(); } -}; -} // namespace + DEBUG(LIS->print(dbgs())); + return true; +} -MachinePassRegistry MachineSchedRegistry::Registry; +void MachineScheduler::print(raw_ostream &O, const Module* m) const { + // unimplemented +} -static ScheduleDAGInstrs *createDefaultMachineSched(MachineScheduler *P); +//===----------------------------------------------------------------------===// +// MachineSchedStrategy - Interface to a machine scheduling algorithm. +//===----------------------------------------------------------------------===// -/// MachineSchedOpt allows command line selection of the scheduler. -static cl::opt<MachineSchedRegistry::ScheduleDAGCtor, false, - RegisterPassParser<MachineSchedRegistry> > -MachineSchedOpt("misched", - cl::init(&createDefaultMachineSched), cl::Hidden, - cl::desc("Machine instruction scheduler to use")); +namespace { +class ScheduleDAGMI; + +/// MachineSchedStrategy - Interface used by ScheduleDAGMI to drive the selected +/// scheduling algorithm. +/// +/// If this works well and targets wish to reuse ScheduleDAGMI, we may expose it +/// in ScheduleDAGInstrs.h +class MachineSchedStrategy { +public: + virtual ~MachineSchedStrategy() {} + + /// Initialize the strategy after building the DAG for a new region. + virtual void initialize(ScheduleDAGMI *DAG) = 0; + + /// Pick the next node to schedule, or return NULL. Set IsTopNode to true to + /// schedule the node at the top of the unscheduled region. Otherwise it will + /// be scheduled at the bottom. + virtual SUnit *pickNode(bool &IsTopNode) = 0; + + /// When all predecessor dependencies have been resolved, free this node for + /// top-down scheduling. + virtual void releaseTopNode(SUnit *SU) = 0; + /// When all successor dependencies have been resolved, free this node for + /// bottom-up scheduling. + virtual void releaseBottomNode(SUnit *SU) = 0; +}; +} // namespace //===----------------------------------------------------------------------===// -// Machine Instruction Scheduling Common Implementation +// ScheduleDAGMI - Base class for MachineInstr scheduling with LiveIntervals +// preservation. //===----------------------------------------------------------------------===// namespace { -/// ScheduleTopDownLive is an implementation of ScheduleDAGInstrs that schedules +/// ScheduleDAGMI is an implementation of ScheduleDAGInstrs that schedules /// machine instructions while updating LiveIntervals. -class ScheduleTopDownLive : public ScheduleDAGInstrs { -protected: - MachineScheduler *Pass; +class ScheduleDAGMI : public ScheduleDAGInstrs { + AliasAnalysis *AA; + MachineSchedStrategy *SchedImpl; + + /// The top of the unscheduled zone. + MachineBasicBlock::iterator CurrentTop; + + /// The bottom of the unscheduled zone. + MachineBasicBlock::iterator CurrentBottom; + + /// The number of instructions scheduled so far. Used to cut off the + /// scheduler at the point determined by misched-cutoff. + unsigned NumInstrsScheduled; public: - ScheduleTopDownLive(MachineScheduler *P): - ScheduleDAGInstrs(*P->MF, *P->MLI, *P->MDT, /*IsPostRA=*/false, P->LIS), - Pass(P) {} + ScheduleDAGMI(MachineSchedContext *C, MachineSchedStrategy *S): + ScheduleDAGInstrs(*C->MF, *C->MLI, *C->MDT, /*IsPostRA=*/false, C->LIS), + AA(C->AA), SchedImpl(S), CurrentTop(), CurrentBottom(), + NumInstrsScheduled(0) {} - /// ScheduleDAGInstrs callback. - void Schedule(); + ~ScheduleDAGMI() { + delete SchedImpl; + } - /// Interface implemented by the selected top-down liveinterval scheduler. - /// - /// Pick the next node to schedule, or return NULL. - virtual SUnit *pickNode() = 0; + MachineBasicBlock::iterator top() const { return CurrentTop; } + MachineBasicBlock::iterator bottom() const { return CurrentBottom; } - /// When all preceeding dependencies have been resolved, free this node for - /// scheduling. - virtual void releaseNode(SUnit *SU) = 0; + /// Implement ScheduleDAGInstrs interface. + void schedule(); protected: + void moveInstruction(MachineInstr *MI, MachineBasicBlock::iterator InsertPos); + bool checkSchedLimit(); + void releaseSucc(SUnit *SU, SDep *SuccEdge); void releaseSuccessors(SUnit *SU); + void releasePred(SUnit *SU, SDep *PredEdge); + void releasePredecessors(SUnit *SU); }; } // namespace /// ReleaseSucc - Decrement the NumPredsLeft count of a successor. When /// NumPredsLeft reaches zero, release the successor node. -void ScheduleTopDownLive::releaseSucc(SUnit *SU, SDep *SuccEdge) { +void ScheduleDAGMI::releaseSucc(SUnit *SU, SDep *SuccEdge) { SUnit *SuccSU = SuccEdge->getSUnit(); #ifndef NDEBUG @@ -185,164 +331,199 @@ void ScheduleTopDownLive::releaseSucc(SUnit *SU, SDep *SuccEdge) { #endif --SuccSU->NumPredsLeft; if (SuccSU->NumPredsLeft == 0 && SuccSU != &ExitSU) - releaseNode(SuccSU); + SchedImpl->releaseTopNode(SuccSU); } /// releaseSuccessors - Call releaseSucc on each of SU's successors. -void ScheduleTopDownLive::releaseSuccessors(SUnit *SU) { +void ScheduleDAGMI::releaseSuccessors(SUnit *SU) { for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); I != E; ++I) { releaseSucc(SU, &*I); } } -/// Schedule - This is called back from ScheduleDAGInstrs::Run() when it's -/// time to do some work. -void ScheduleTopDownLive::Schedule() { - BuildSchedGraph(&Pass->getAnalysis<AliasAnalysis>()); +/// ReleasePred - Decrement the NumSuccsLeft count of a predecessor. When +/// NumSuccsLeft reaches zero, release the predecessor node. +void ScheduleDAGMI::releasePred(SUnit *SU, SDep *PredEdge) { + SUnit *PredSU = PredEdge->getSUnit(); + +#ifndef NDEBUG + if (PredSU->NumSuccsLeft == 0) { + dbgs() << "*** Scheduling failed! ***\n"; + PredSU->dump(this); + dbgs() << " has been released too many times!\n"; + llvm_unreachable(0); + } +#endif + --PredSU->NumSuccsLeft; + if (PredSU->NumSuccsLeft == 0 && PredSU != &EntrySU) + SchedImpl->releaseBottomNode(PredSU); +} + +/// releasePredecessors - Call releasePred on each of SU's predecessors. +void ScheduleDAGMI::releasePredecessors(SUnit *SU) { + for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); + I != E; ++I) { + releasePred(SU, &*I); + } +} + +void ScheduleDAGMI::moveInstruction(MachineInstr *MI, + MachineBasicBlock::iterator InsertPos) { + // Fix RegionBegin if the first instruction moves down. + if (&*RegionBegin == MI) + RegionBegin = llvm::next(RegionBegin); + BB->splice(InsertPos, BB, MI); + LIS->handleMove(MI); + // Fix RegionBegin if another instruction moves above the first instruction. + if (RegionBegin == InsertPos) + RegionBegin = MI; +} + +bool ScheduleDAGMI::checkSchedLimit() { +#ifndef NDEBUG + if (NumInstrsScheduled == MISchedCutoff && MISchedCutoff != ~0U) { + CurrentTop = CurrentBottom; + return false; + } + ++NumInstrsScheduled; +#endif + return true; +} + +/// schedule - Called back from MachineScheduler::runOnMachineFunction +/// after setting up the current scheduling region. +void ScheduleDAGMI::schedule() { + buildSchedGraph(AA); DEBUG(dbgs() << "********** MI Scheduling **********\n"); DEBUG(for (unsigned su = 0, e = SUnits.size(); su != e; ++su) SUnits[su].dumpAll(this)); - // Release any successors of the special Entry node. It is currently unused, - // but we keep up appearances. + if (ViewMISchedDAGs) viewGraph(); + + SchedImpl->initialize(this); + + // Release edges from the special Entry node or to the special Exit node. releaseSuccessors(&EntrySU); + releasePredecessors(&ExitSU); // Release all DAG roots for scheduling. for (std::vector<SUnit>::iterator I = SUnits.begin(), E = SUnits.end(); I != E; ++I) { - // A SUnit is ready to schedule if it has no predecessors. + // A SUnit is ready to top schedule if it has no predecessors. if (I->Preds.empty()) - releaseNode(&(*I)); + SchedImpl->releaseTopNode(&(*I)); + // A SUnit is ready to bottom schedule if it has no successors. + if (I->Succs.empty()) + SchedImpl->releaseBottomNode(&(*I)); } - InsertPos = Begin; - while (SUnit *SU = pickNode()) { - DEBUG(dbgs() << "*** Scheduling Instruction:\n"; SU->dump(this)); + CurrentTop = RegionBegin; + CurrentBottom = RegionEnd; + bool IsTopNode = false; + while (SUnit *SU = SchedImpl->pickNode(IsTopNode)) { + DEBUG(dbgs() << "*** " << (IsTopNode ? "Top" : "Bottom") + << " Scheduling Instruction:\n"; SU->dump(this)); + if (!checkSchedLimit()) + break; // Move the instruction to its new location in the instruction stream. MachineInstr *MI = SU->getInstr(); - if (&*InsertPos == MI) - ++InsertPos; + + if (IsTopNode) { + assert(SU->isTopReady() && "node still has unscheduled dependencies"); + if (&*CurrentTop == MI) + ++CurrentTop; + else + moveInstruction(MI, CurrentTop); + // Release dependent instructions for scheduling. + releaseSuccessors(SU); + } else { - BB->splice(InsertPos, BB, MI); - Pass->LIS->handleMove(MI); - if (Begin == InsertPos) - Begin = MI; + assert(SU->isBottomReady() && "node still has unscheduled dependencies"); + if (&*llvm::prior(CurrentBottom) == MI) + --CurrentBottom; + else { + if (&*CurrentTop == MI) + CurrentTop = llvm::next(CurrentTop); + moveInstruction(MI, CurrentBottom); + CurrentBottom = MI; + } + // Release dependent instructions for scheduling. + releasePredecessors(SU); } - - // Release dependent instructions for scheduling. - releaseSuccessors(SU); + SU->isScheduled = true; } + assert(CurrentTop == CurrentBottom && "Nonempty unscheduled zone."); } -bool MachineScheduler::runOnMachineFunction(MachineFunction &mf) { - // Initialize the context of the pass. - MF = &mf; - MLI = &getAnalysis<MachineLoopInfo>(); - MDT = &getAnalysis<MachineDominatorTree>(); - LIS = &getAnalysis<LiveIntervals>(); - TII = MF->getTarget().getInstrInfo(); +//===----------------------------------------------------------------------===// +// ConvergingScheduler - Implementation of the standard MachineSchedStrategy. +//===----------------------------------------------------------------------===// - // Select the scheduler, or set the default. - MachineSchedRegistry::ScheduleDAGCtor Ctor = - MachineSchedRegistry::getDefault(); - if (!Ctor) { - Ctor = MachineSchedOpt; - MachineSchedRegistry::setDefault(Ctor); - } - // Instantiate the selected scheduler. - OwningPtr<ScheduleDAGInstrs> Scheduler(Ctor(this)); +namespace { +/// ConvergingScheduler shrinks the unscheduled zone using heuristics to balance +/// the schedule. +class ConvergingScheduler : public MachineSchedStrategy { + ScheduleDAGMI *DAG; - // Visit all machine basic blocks. - for (MachineFunction::iterator MBB = MF->begin(), MBBEnd = MF->end(); - MBB != MBBEnd; ++MBB) { + unsigned NumTopReady; + unsigned NumBottomReady; - // Break the block into scheduling regions [I, RegionEnd), and schedule each - // region as soon as it is discovered. - unsigned RemainingCount = MBB->size(); - for(MachineBasicBlock::iterator RegionEnd = MBB->end(); - RegionEnd != MBB->begin();) { - // The next region starts above the previous region. Look backward in the - // instruction stream until we find the nearest boundary. - MachineBasicBlock::iterator I = RegionEnd; - for(;I != MBB->begin(); --I, --RemainingCount) { - if (TII->isSchedulingBoundary(llvm::prior(I), MBB, *MF)) - break; - } - if (I == RegionEnd) { - // Skip empty scheduling regions. - RegionEnd = llvm::prior(RegionEnd); - --RemainingCount; - continue; - } - // Skip regions with one instruction. - if (I == llvm::prior(RegionEnd)) { - RegionEnd = llvm::prior(RegionEnd); - continue; - } - DEBUG(dbgs() << "MachineScheduling " << MF->getFunction()->getName() - << ":BB#" << MBB->getNumber() << "\n From: " << *I << " To: "; - if (RegionEnd != MBB->end()) dbgs() << *RegionEnd; - else dbgs() << "End"; - dbgs() << " Remaining: " << RemainingCount << "\n"); +public: + virtual void initialize(ScheduleDAGMI *dag) { + DAG = dag; - // Inform ScheduleDAGInstrs of the region being scheduled. It calls back - // to our Schedule() method. - Scheduler->Run(MBB, I, RegionEnd, MBB->size()); - RegionEnd = Scheduler->Begin; - } - assert(RemainingCount == 0 && "Instruction count mismatch!"); + assert((!ForceTopDown || !ForceBottomUp) && + "-misched-topdown incompatible with -misched-bottomup"); } - return true; -} -void MachineScheduler::print(raw_ostream &O, const Module* m) const { - // unimplemented -} + virtual SUnit *pickNode(bool &IsTopNode) { + if (DAG->top() == DAG->bottom()) + return NULL; -//===----------------------------------------------------------------------===// -// Placeholder for extending the machine instruction scheduler. -//===----------------------------------------------------------------------===// - -namespace { -class DefaultMachineScheduler : public ScheduleDAGInstrs { - MachineScheduler *Pass; -public: - DefaultMachineScheduler(MachineScheduler *P): - ScheduleDAGInstrs(*P->MF, *P->MLI, *P->MDT, /*IsPostRA=*/false, P->LIS), - Pass(P) {} + // As an initial placeholder heuristic, schedule in the direction that has + // the fewest choices. + SUnit *SU; + if (ForceTopDown || (!ForceBottomUp && NumTopReady <= NumBottomReady)) { + SU = DAG->getSUnit(DAG->top()); + IsTopNode = true; + } + else { + SU = DAG->getSUnit(llvm::prior(DAG->bottom())); + IsTopNode = false; + } + if (SU->isTopReady()) { + assert(NumTopReady > 0 && "bad ready count"); + --NumTopReady; + } + if (SU->isBottomReady()) { + assert(NumBottomReady > 0 && "bad ready count"); + --NumBottomReady; + } + return SU; + } - /// Schedule - This is called back from ScheduleDAGInstrs::Run() when it's - /// time to do some work. - void Schedule(); + virtual void releaseTopNode(SUnit *SU) { + ++NumTopReady; + } + virtual void releaseBottomNode(SUnit *SU) { + ++NumBottomReady; + } }; } // namespace -static ScheduleDAGInstrs *createDefaultMachineSched(MachineScheduler *P) { - return new DefaultMachineScheduler(P); +/// Create the standard converging machine scheduler. This will be used as the +/// default scheduler if the target does not set a default. +static ScheduleDAGInstrs *createConvergingSched(MachineSchedContext *C) { + assert((!ForceTopDown || !ForceBottomUp) && + "-misched-topdown incompatible with -misched-bottomup"); + return new ScheduleDAGMI(C, new ConvergingScheduler()); } static MachineSchedRegistry -SchedDefaultRegistry("default", "Activate the scheduler pass, " - "but don't reorder instructions", - createDefaultMachineSched); - - -/// Schedule - This is called back from ScheduleDAGInstrs::Run() when it's -/// time to do some work. -void DefaultMachineScheduler::Schedule() { - BuildSchedGraph(&Pass->getAnalysis<AliasAnalysis>()); - - DEBUG(dbgs() << "********** MI Scheduling **********\n"); - DEBUG(for (unsigned su = 0, e = SUnits.size(); su != e; ++su) - SUnits[su].dumpAll(this)); - - // TODO: Put interesting things here. - // - // When this is fully implemented, it will become a subclass of - // ScheduleTopDownLive. So this driver will disappear. -} +ConvergingSchedRegistry("converge", "Standard converging scheduler.", + createConvergingSched); //===----------------------------------------------------------------------===// // Machine Instruction Shuffler for Correctness Testing @@ -350,43 +531,83 @@ void DefaultMachineScheduler::Schedule() { #ifndef NDEBUG namespace { -// Nodes with a higher number have higher priority. This way we attempt to -// schedule the latest instructions earliest. -// -// TODO: Relies on the property of the BuildSchedGraph that results in SUnits -// being ordered in sequence top-down. -struct ShuffleSUnitOrder { +/// Apply a less-than relation on the node order, which corresponds to the +/// instruction order prior to scheduling. IsReverse implements greater-than. +template<bool IsReverse> +struct SUnitOrder { bool operator()(SUnit *A, SUnit *B) const { - return A->NodeNum < B->NodeNum; + if (IsReverse) + return A->NodeNum > B->NodeNum; + else + return A->NodeNum < B->NodeNum; } }; /// Reorder instructions as much as possible. -class InstructionShuffler : public ScheduleTopDownLive { - std::priority_queue<SUnit*, std::vector<SUnit*>, ShuffleSUnitOrder> Queue; +class InstructionShuffler : public MachineSchedStrategy { + bool IsAlternating; + bool IsTopDown; + + // Using a less-than relation (SUnitOrder<false>) for the TopQ priority + // gives nodes with a higher number higher priority causing the latest + // instructions to be scheduled first. + PriorityQueue<SUnit*, std::vector<SUnit*>, SUnitOrder<false> > + TopQ; + // When scheduling bottom-up, use greater-than as the queue priority. + PriorityQueue<SUnit*, std::vector<SUnit*>, SUnitOrder<true> > + BottomQ; public: - InstructionShuffler(MachineScheduler *P): - ScheduleTopDownLive(P) {} + InstructionShuffler(bool alternate, bool topdown) + : IsAlternating(alternate), IsTopDown(topdown) {} - /// ScheduleTopDownLive Interface + virtual void initialize(ScheduleDAGMI *) { + TopQ.clear(); + BottomQ.clear(); + } - virtual SUnit *pickNode() { - if (Queue.empty()) return NULL; - SUnit *SU = Queue.top(); - Queue.pop(); + /// Implement MachineSchedStrategy interface. + /// ----------------------------------------- + + virtual SUnit *pickNode(bool &IsTopNode) { + SUnit *SU; + if (IsTopDown) { + do { + if (TopQ.empty()) return NULL; + SU = TopQ.top(); + TopQ.pop(); + } while (SU->isScheduled); + IsTopNode = true; + } + else { + do { + if (BottomQ.empty()) return NULL; + SU = BottomQ.top(); + BottomQ.pop(); + } while (SU->isScheduled); + IsTopNode = false; + } + if (IsAlternating) + IsTopDown = !IsTopDown; return SU; } - virtual void releaseNode(SUnit *SU) { - Queue.push(SU); + virtual void releaseTopNode(SUnit *SU) { + TopQ.push(SU); + } + virtual void releaseBottomNode(SUnit *SU) { + BottomQ.push(SU); } }; } // namespace -static ScheduleDAGInstrs *createInstructionShuffler(MachineScheduler *P) { - return new InstructionShuffler(P); +static ScheduleDAGInstrs *createInstructionShuffler(MachineSchedContext *C) { + bool Alternate = !ForceTopDown && !ForceBottomUp; + bool TopDown = !ForceBottomUp; + assert((TopDown || !ForceTopDown) && + "-misched-topdown incompatible with -misched-bottomup"); + return new ScheduleDAGMI(C, new InstructionShuffler(Alternate, TopDown)); } -static MachineSchedRegistry ShufflerRegistry("shuffle", - "Shuffle machine instructions", - createInstructionShuffler); +static MachineSchedRegistry ShufflerRegistry( + "shuffle", "Shuffle machine instructions alternating directions", + createInstructionShuffler); #endif // !NDEBUG diff --git a/lib/CodeGen/MachineVerifier.cpp b/lib/CodeGen/MachineVerifier.cpp index 394a960..830a876 100644 --- a/lib/CodeGen/MachineVerifier.cpp +++ b/lib/CodeGen/MachineVerifier.cpp @@ -900,7 +900,7 @@ MachineVerifier::visitMachineBasicBlockAfter(const MachineBasicBlock *MBB) { void MachineVerifier::calcRegsPassed() { // First push live-out regs to successors' vregsPassed. Remember the MBBs that // have any vregsPassed. - DenseSet<const MachineBasicBlock*> todo; + SmallPtrSet<const MachineBasicBlock*, 8> todo; for (MachineFunction::const_iterator MFI = MF->begin(), MFE = MF->end(); MFI != MFE; ++MFI) { const MachineBasicBlock &MBB(*MFI); @@ -937,7 +937,7 @@ void MachineVerifier::calcRegsPassed() { // similar to calcRegsPassed, only backwards. void MachineVerifier::calcRegsRequired() { // First push live-in regs to predecessors' vregsRequired. - DenseSet<const MachineBasicBlock*> todo; + SmallPtrSet<const MachineBasicBlock*, 8> todo; for (MachineFunction::const_iterator MFI = MF->begin(), MFE = MF->end(); MFI != MFE; ++MFI) { const MachineBasicBlock &MBB(*MFI); @@ -970,9 +970,10 @@ void MachineVerifier::calcRegsRequired() { // Check PHI instructions at the beginning of MBB. It is assumed that // calcRegsPassed has been run so BBInfo::isLiveOut is valid. void MachineVerifier::checkPHIOps(const MachineBasicBlock *MBB) { + SmallPtrSet<const MachineBasicBlock*, 8> seen; for (MachineBasicBlock::const_iterator BBI = MBB->begin(), BBE = MBB->end(); BBI != BBE && BBI->isPHI(); ++BBI) { - DenseSet<const MachineBasicBlock*> seen; + seen.clear(); for (unsigned i = 1, e = BBI->getNumOperands(); i != e; i += 2) { unsigned Reg = BBI->getOperand(i).getReg(); @@ -1013,8 +1014,17 @@ void MachineVerifier::visitMachineFunctionAfter() { } // Now check liveness info if available - if (LiveVars || LiveInts) - calcRegsRequired(); + calcRegsRequired(); + + if (MRI->isSSA() && !MF->empty()) { + BBInfo &MInfo = MBBInfoMap[&MF->front()]; + for (RegSet::iterator + I = MInfo.vregsRequired.begin(), E = MInfo.vregsRequired.end(); I != E; + ++I) + report("Virtual register def doesn't dominate all uses.", + MRI->getVRegDef(*I)); + } + if (LiveVars) verifyLiveVariables(); if (LiveInts) diff --git a/lib/CodeGen/Passes.cpp b/lib/CodeGen/Passes.cpp index ec1f2b4..6246c21 100644 --- a/lib/CodeGen/Passes.cpp +++ b/lib/CodeGen/Passes.cpp @@ -564,7 +564,8 @@ void TargetPassConfig::addOptimizedRegAlloc(FunctionPass *RegAllocPass) { addPass(RegisterCoalescerID); // PreRA instruction scheduling. - addPass(MachineSchedulerID); + if (addPass(MachineSchedulerID) != &NoPassID) + printAndVerify("After Machine Scheduling"); // Add the selected register allocation pass. PM.add(RegAllocPass); diff --git a/lib/CodeGen/PostRASchedulerList.cpp b/lib/CodeGen/PostRASchedulerList.cpp index e59aa9d..24d3e5a 100644 --- a/lib/CodeGen/PostRASchedulerList.cpp +++ b/lib/CodeGen/PostRASchedulerList.cpp @@ -23,7 +23,6 @@ #include "AggressiveAntiDepBreaker.h" #include "CriticalAntiDepBreaker.h" #include "RegisterClassInfo.h" -#include "ScheduleDAGInstrs.h" #include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/LatencyPriorityQueue.h" #include "llvm/CodeGen/SchedulerRegistry.h" @@ -32,6 +31,7 @@ #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineLoopInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/ScheduleDAGInstrs.h" #include "llvm/CodeGen/ScheduleHazardRecognizer.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Target/TargetLowering.h" @@ -127,6 +127,9 @@ namespace { /// LiveRegs - true if the register is live. BitVector LiveRegs; + /// The schedule. Null SUnit*'s represent noop instructions. + std::vector<SUnit*> Sequence; + public: SchedulePostRATDList( MachineFunction &MF, MachineLoopInfo &MLI, MachineDominatorTree &MDT, @@ -136,23 +139,34 @@ namespace { ~SchedulePostRATDList(); - /// StartBlock - Initialize register live-range state for scheduling in + /// startBlock - Initialize register live-range state for scheduling in /// this block. /// - void StartBlock(MachineBasicBlock *BB); + void startBlock(MachineBasicBlock *BB); + + /// Initialize the scheduler state for the next scheduling region. + virtual void enterRegion(MachineBasicBlock *bb, + MachineBasicBlock::iterator begin, + MachineBasicBlock::iterator end, + unsigned endcount); + + /// Notify that the scheduler has finished scheduling the current region. + virtual void exitRegion(); /// Schedule - Schedule the instruction range using list scheduling. /// - void Schedule(); + void schedule(); + + void EmitSchedule(); /// Observe - Update liveness information to account for the current /// instruction, which will not be scheduled. /// void Observe(MachineInstr *MI, unsigned Count); - /// FinishBlock - Clean up register live-range state. + /// finishBlock - Clean up register live-range state. /// - void FinishBlock(); + void finishBlock(); /// FixupKills - Fix register kill flags that have been made /// invalid due to scheduling @@ -170,6 +184,8 @@ namespace { // adjustments may be made to the instruction if necessary. Return // true if the operand has been deleted, false if not. bool ToggleKillFlag(MachineInstr *MI, MachineOperand &MO); + + void dumpSchedule() const; }; } @@ -202,6 +218,35 @@ SchedulePostRATDList::~SchedulePostRATDList() { delete AntiDepBreak; } +/// Initialize state associated with the next scheduling region. +void SchedulePostRATDList::enterRegion(MachineBasicBlock *bb, + MachineBasicBlock::iterator begin, + MachineBasicBlock::iterator end, + unsigned endcount) { + ScheduleDAGInstrs::enterRegion(bb, begin, end, endcount); + Sequence.clear(); +} + +/// Print the schedule before exiting the region. +void SchedulePostRATDList::exitRegion() { + DEBUG({ + dbgs() << "*** Final schedule ***\n"; + dumpSchedule(); + dbgs() << '\n'; + }); + ScheduleDAGInstrs::exitRegion(); +} + +/// dumpSchedule - dump the scheduled Sequence. +void SchedulePostRATDList::dumpSchedule() const { + for (unsigned i = 0, e = Sequence.size(); i != e; i++) { + if (SUnit *SU = Sequence[i]) + SU->dump(this); + else + dbgs() << "**** NOOP ****\n"; + } +} + bool PostRAScheduler::runOnMachineFunction(MachineFunction &Fn) { TII = Fn.getTarget().getInstrInfo(); MachineLoopInfo &MLI = getAnalysis<MachineLoopInfo>(); @@ -256,7 +301,7 @@ bool PostRAScheduler::runOnMachineFunction(MachineFunction &Fn) { #endif // Initialize register live-range state for scheduling in this block. - Scheduler.StartBlock(MBB); + Scheduler.startBlock(MBB); // Schedule each sequence of instructions not interrupted by a label // or anything else that effectively needs to shut down scheduling. @@ -268,7 +313,9 @@ bool PostRAScheduler::runOnMachineFunction(MachineFunction &Fn) { // post-ra we don't gain anything by scheduling across calls since we // don't need to worry about register pressure. if (MI->isCall() || TII->isSchedulingBoundary(MI, MBB, Fn)) { - Scheduler.Run(MBB, I, Current, CurrentCount); + Scheduler.enterRegion(MBB, I, Current, CurrentCount); + Scheduler.schedule(); + Scheduler.exitRegion(); Scheduler.EmitSchedule(); Current = MI; CurrentCount = Count - 1; @@ -282,11 +329,13 @@ bool PostRAScheduler::runOnMachineFunction(MachineFunction &Fn) { assert(Count == 0 && "Instruction count mismatch!"); assert((MBB->begin() == Current || CurrentCount != 0) && "Instruction count mismatch!"); - Scheduler.Run(MBB, MBB->begin(), Current, CurrentCount); + Scheduler.enterRegion(MBB, MBB->begin(), Current, CurrentCount); + Scheduler.schedule(); + Scheduler.exitRegion(); Scheduler.EmitSchedule(); // Clean up register live-range state. - Scheduler.FinishBlock(); + Scheduler.finishBlock(); // Update register kills Scheduler.FixupKills(MBB); @@ -298,9 +347,9 @@ bool PostRAScheduler::runOnMachineFunction(MachineFunction &Fn) { /// StartBlock - Initialize register live-range state for scheduling in /// this block. /// -void SchedulePostRATDList::StartBlock(MachineBasicBlock *BB) { +void SchedulePostRATDList::startBlock(MachineBasicBlock *BB) { // Call the superclass. - ScheduleDAGInstrs::StartBlock(BB); + ScheduleDAGInstrs::startBlock(BB); // Reset the hazard recognizer and anti-dep breaker. HazardRec->Reset(); @@ -310,14 +359,14 @@ void SchedulePostRATDList::StartBlock(MachineBasicBlock *BB) { /// Schedule - Schedule the instruction range using list scheduling. /// -void SchedulePostRATDList::Schedule() { +void SchedulePostRATDList::schedule() { // Build the scheduling graph. - BuildSchedGraph(AA); + buildSchedGraph(AA); if (AntiDepBreak != NULL) { unsigned Broken = - AntiDepBreak->BreakAntiDependencies(SUnits, Begin, InsertPos, - InsertPosIndex, DbgValues); + AntiDepBreak->BreakAntiDependencies(SUnits, RegionBegin, RegionEnd, + EndIndex, DbgValues); if (Broken != 0) { // We made changes. Update the dependency graph. @@ -326,11 +375,8 @@ void SchedulePostRATDList::Schedule() { // the def's anti-dependence *and* output-dependence edges due to // that register, and add new anti-dependence and output-dependence // edges based on the next live range of the register. - SUnits.clear(); - Sequence.clear(); - EntrySU = SUnit(); - ExitSU = SUnit(); - BuildSchedGraph(AA); + ScheduleDAG::clearDAG(); + buildSchedGraph(AA); NumFixedAnti += Broken; } @@ -350,17 +396,17 @@ void SchedulePostRATDList::Schedule() { /// void SchedulePostRATDList::Observe(MachineInstr *MI, unsigned Count) { if (AntiDepBreak != NULL) - AntiDepBreak->Observe(MI, Count, InsertPosIndex); + AntiDepBreak->Observe(MI, Count, EndIndex); } /// FinishBlock - Clean up register live-range state. /// -void SchedulePostRATDList::FinishBlock() { +void SchedulePostRATDList::finishBlock() { if (AntiDepBreak != NULL) AntiDepBreak->FinishBlock(); // Call the superclass. - ScheduleDAGInstrs::FinishBlock(); + ScheduleDAGInstrs::finishBlock(); } /// StartBlockForKills - Initialize register live-range state for updating kills @@ -589,7 +635,7 @@ void SchedulePostRATDList::ScheduleNodeTopDown(SUnit *SU, unsigned CurCycle) { ReleaseSuccessors(SU); SU->isScheduled = true; - AvailableQueue.ScheduledNode(SU); + AvailableQueue.scheduledNode(SU); } /// ListScheduleTopDown - The main loop of list scheduling for top-down @@ -703,6 +749,46 @@ void SchedulePostRATDList::ListScheduleTopDown() { } #ifndef NDEBUG - VerifySchedule(/*isBottomUp=*/false); -#endif + unsigned ScheduledNodes = VerifyScheduledDAG(/*isBottomUp=*/false); + unsigned Noops = 0; + for (unsigned i = 0, e = Sequence.size(); i != e; ++i) + if (!Sequence[i]) + ++Noops; + assert(Sequence.size() - Noops == ScheduledNodes && + "The number of nodes scheduled doesn't match the expected number!"); +#endif // NDEBUG +} + +// EmitSchedule - Emit the machine code in scheduled order. +void SchedulePostRATDList::EmitSchedule() { + RegionBegin = RegionEnd; + + // If first instruction was a DBG_VALUE then put it back. + if (FirstDbgValue) + BB->splice(RegionEnd, BB, FirstDbgValue); + + // Then re-insert them according to the given schedule. + for (unsigned i = 0, e = Sequence.size(); i != e; i++) { + if (SUnit *SU = Sequence[i]) + BB->splice(RegionEnd, BB, SU->getInstr()); + else + // Null SUnit* is a noop. + TII->insertNoop(*BB, RegionEnd); + + // Update the Begin iterator, as the first instruction in the block + // may have been scheduled later. + if (i == 0) + RegionBegin = prior(RegionEnd); + } + + // Reinsert any remaining debug_values. + for (std::vector<std::pair<MachineInstr *, MachineInstr *> >::iterator + DI = DbgValues.end(), DE = DbgValues.begin(); DI != DE; --DI) { + std::pair<MachineInstr *, MachineInstr *> P = *prior(DI); + MachineInstr *DbgValue = P.first; + MachineBasicBlock::iterator OrigPrivMI = P.second; + BB->splice(++OrigPrivMI, BB, DbgValue); + } + DbgValues.clear(); + FirstDbgValue = NULL; } diff --git a/lib/CodeGen/RegAllocFast.cpp b/lib/CodeGen/RegAllocFast.cpp index c27a485..e09b7f8 100644 --- a/lib/CodeGen/RegAllocFast.cpp +++ b/lib/CodeGen/RegAllocFast.cpp @@ -1139,7 +1139,7 @@ bool RAFast::runOnMachineFunction(MachineFunction &Fn) { // Add the clobber lists for all the instructions we skipped earlier. for (SmallPtrSet<const MCInstrDesc*, 4>::const_iterator I = SkippedInstrs.begin(), E = SkippedInstrs.end(); I != E; ++I) - if (const unsigned *Defs = (*I)->getImplicitDefs()) + if (const uint16_t *Defs = (*I)->getImplicitDefs()) while (*Defs) MRI->setPhysRegUsed(*Defs++); diff --git a/lib/CodeGen/RegisterCoalescer.h b/lib/CodeGen/RegisterCoalescer.h index ef0c508..310b933 100644 --- a/lib/CodeGen/RegisterCoalescer.h +++ b/lib/CodeGen/RegisterCoalescer.h @@ -47,7 +47,7 @@ namespace llvm { /// CrossClass - True when both regs are virtual, and newRC is constrained. bool CrossClass; - /// Flipped - True when DstReg and SrcReg are reversed from the oriignal + /// Flipped - True when DstReg and SrcReg are reversed from the original /// copy instruction. bool Flipped; diff --git a/lib/CodeGen/ScheduleDAG.cpp b/lib/CodeGen/ScheduleDAG.cpp index 94b28b6..8fd6426 100644 --- a/lib/CodeGen/ScheduleDAG.cpp +++ b/lib/CodeGen/ScheduleDAG.cpp @@ -46,42 +46,17 @@ ScheduleDAG::ScheduleDAG(MachineFunction &mf) ScheduleDAG::~ScheduleDAG() {} -/// getInstrDesc helper to handle SDNodes. -const MCInstrDesc *ScheduleDAG::getNodeDesc(const SDNode *Node) const { - if (!Node || !Node->isMachineOpcode()) return NULL; - return &TII->get(Node->getMachineOpcode()); -} - -/// dump - dump the schedule. -void ScheduleDAG::dumpSchedule() const { - for (unsigned i = 0, e = Sequence.size(); i != e; i++) { - if (SUnit *SU = Sequence[i]) - SU->dump(this); - else - dbgs() << "**** NOOP ****\n"; - } -} - - -/// Run - perform scheduling. -/// -void ScheduleDAG::Run(MachineBasicBlock *bb, - MachineBasicBlock::iterator insertPos) { - BB = bb; - InsertPos = insertPos; - +/// Clear the DAG state (e.g. between scheduling regions). +void ScheduleDAG::clearDAG() { SUnits.clear(); - Sequence.clear(); EntrySU = SUnit(); ExitSU = SUnit(); +} - Schedule(); - - DEBUG({ - dbgs() << "*** Final schedule ***\n"; - dumpSchedule(); - dbgs() << '\n'; - }); +/// getInstrDesc helper to handle SDNodes. +const MCInstrDesc *ScheduleDAG::getNodeDesc(const SDNode *Node) const { + if (!Node || !Node->isMachineOpcode()) return NULL; + return &TII->get(Node->getMachineOpcode()); } /// addPred - This adds the specified edge as a pred of the current node if @@ -346,13 +321,12 @@ void SUnit::dumpAll(const ScheduleDAG *G) const { } #ifndef NDEBUG -/// VerifySchedule - Verify that all SUnits were scheduled and that -/// their state is consistent. +/// VerifyScheduledDAG - Verify that all SUnits were scheduled and that +/// their state is consistent. Return the number of scheduled nodes. /// -void ScheduleDAG::VerifySchedule(bool isBottomUp) { +unsigned ScheduleDAG::VerifyScheduledDAG(bool isBottomUp) { bool AnyNotSched = false; unsigned DeadNodes = 0; - unsigned Noops = 0; for (unsigned i = 0, e = SUnits.size(); i != e; ++i) { if (!SUnits[i].isScheduled) { if (SUnits[i].NumPreds == 0 && SUnits[i].NumSuccs == 0) { @@ -393,12 +367,8 @@ void ScheduleDAG::VerifySchedule(bool isBottomUp) { } } } - for (unsigned i = 0, e = Sequence.size(); i != e; ++i) - if (!Sequence[i]) - ++Noops; assert(!AnyNotSched); - assert(Sequence.size() + DeadNodes - Noops == SUnits.size() && - "The number of nodes scheduled doesn't match the expected number!"); + return SUnits.size() - DeadNodes; } #endif diff --git a/lib/CodeGen/ScheduleDAGEmit.cpp b/lib/CodeGen/ScheduleDAGEmit.cpp deleted file mode 100644 index f8b1bc7..0000000 --- a/lib/CodeGen/ScheduleDAGEmit.cpp +++ /dev/null @@ -1,68 +0,0 @@ -//===---- ScheduleDAGEmit.cpp - Emit routines for the ScheduleDAG class ---===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This implements the Emit routines for the ScheduleDAG class, which creates -// MachineInstrs according to the computed schedule. -// -//===----------------------------------------------------------------------===// - -#define DEBUG_TYPE "pre-RA-sched" -#include "llvm/CodeGen/ScheduleDAG.h" -#include "llvm/CodeGen/MachineConstantPool.h" -#include "llvm/CodeGen/MachineFunction.h" -#include "llvm/CodeGen/MachineInstrBuilder.h" -#include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/Target/TargetData.h" -#include "llvm/Target/TargetMachine.h" -#include "llvm/Target/TargetInstrInfo.h" -#include "llvm/Target/TargetLowering.h" -#include "llvm/ADT/Statistic.h" -#include "llvm/Support/CommandLine.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/MathExtras.h" -using namespace llvm; - -void ScheduleDAG::EmitNoop() { - TII->insertNoop(*BB, InsertPos); -} - -void ScheduleDAG::EmitPhysRegCopy(SUnit *SU, - DenseMap<SUnit*, unsigned> &VRBaseMap) { - for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); - I != E; ++I) { - if (I->isCtrl()) continue; // ignore chain preds - if (I->getSUnit()->CopyDstRC) { - // Copy to physical register. - DenseMap<SUnit*, unsigned>::iterator VRI = VRBaseMap.find(I->getSUnit()); - assert(VRI != VRBaseMap.end() && "Node emitted out of order - late"); - // Find the destination physical register. - unsigned Reg = 0; - for (SUnit::const_succ_iterator II = SU->Succs.begin(), - EE = SU->Succs.end(); II != EE; ++II) { - if (II->isCtrl()) continue; // ignore chain preds - if (II->getReg()) { - Reg = II->getReg(); - break; - } - } - BuildMI(*BB, InsertPos, DebugLoc(), TII->get(TargetOpcode::COPY), Reg) - .addReg(VRI->second); - } else { - // Copy from physical register. - assert(I->getReg() && "Unknown physical register!"); - unsigned VRBase = MRI.createVirtualRegister(SU->CopyDstRC); - bool isNew = VRBaseMap.insert(std::make_pair(SU, VRBase)).second; - (void)isNew; // Silence compiler warning. - assert(isNew && "Node emitted out of order - early"); - BuildMI(*BB, InsertPos, DebugLoc(), TII->get(TargetOpcode::COPY), VRBase) - .addReg(I->getReg()); - } - break; - } -} diff --git a/lib/CodeGen/ScheduleDAGInstrs.cpp b/lib/CodeGen/ScheduleDAGInstrs.cpp index c0ccdb3..6be1ab7 100644 --- a/lib/CodeGen/ScheduleDAGInstrs.cpp +++ b/lib/CodeGen/ScheduleDAGInstrs.cpp @@ -13,7 +13,6 @@ //===----------------------------------------------------------------------===// #define DEBUG_TYPE "sched-instrs" -#include "ScheduleDAGInstrs.h" #include "llvm/Operator.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/ValueTracking.h" @@ -22,6 +21,7 @@ #include "llvm/CodeGen/MachineMemOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/PseudoSourceValue.h" +#include "llvm/CodeGen/ScheduleDAGInstrs.h" #include "llvm/MC/MCInstrItineraries.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetInstrInfo.h" @@ -38,30 +38,15 @@ ScheduleDAGInstrs::ScheduleDAGInstrs(MachineFunction &mf, bool IsPostRAFlag, LiveIntervals *lis) : ScheduleDAG(mf), MLI(mli), MDT(mdt), MFI(mf.getFrameInfo()), - InstrItins(mf.getTarget().getInstrItineraryData()), IsPostRA(IsPostRAFlag), - LIS(lis), UnitLatencies(false), LoopRegs(MLI, MDT), FirstDbgValue(0) { + InstrItins(mf.getTarget().getInstrItineraryData()), LIS(lis), + IsPostRA(IsPostRAFlag), UnitLatencies(false), LoopRegs(MLI, MDT), + FirstDbgValue(0) { assert((IsPostRA || LIS) && "PreRA scheduling requires LiveIntervals"); DbgValues.clear(); assert(!(IsPostRA && MRI.getNumVirtRegs()) && "Virtual registers must be removed prior to PostRA scheduling"); } -/// Run - perform scheduling. -/// -void ScheduleDAGInstrs::Run(MachineBasicBlock *bb, - MachineBasicBlock::iterator begin, - MachineBasicBlock::iterator end, - unsigned endcount) { - BB = bb; - Begin = begin; - InsertPosIndex = endcount; - - // Check to see if the scheduler cares about latencies. - UnitLatencies = ForceUnitLatencies(); - - ScheduleDAG::Run(bb, end); -} - /// getUnderlyingObjectFromInt - This is the function that does the work of /// looking through basic ptrtoint+arithmetic+inttoptr sequences. static const Value *getUnderlyingObjectFromInt(const Value *V) { @@ -141,28 +126,58 @@ static const Value *getUnderlyingObjectForInstr(const MachineInstr *MI, return 0; } -void ScheduleDAGInstrs::StartBlock(MachineBasicBlock *BB) { +void ScheduleDAGInstrs::startBlock(MachineBasicBlock *BB) { LoopRegs.Deps.clear(); if (MachineLoop *ML = MLI.getLoopFor(BB)) if (BB == ML->getLoopLatch()) LoopRegs.VisitLoop(ML); } +void ScheduleDAGInstrs::finishBlock() { + // Nothing to do. +} + /// Initialize the map with the number of registers. -void ScheduleDAGInstrs::Reg2SUnitsMap::setRegLimit(unsigned Limit) { +void Reg2SUnitsMap::setRegLimit(unsigned Limit) { PhysRegSet.setUniverse(Limit); SUnits.resize(Limit); } /// Clear the map without deallocating storage. -void ScheduleDAGInstrs::Reg2SUnitsMap::clear() { +void Reg2SUnitsMap::clear() { for (const_iterator I = reg_begin(), E = reg_end(); I != E; ++I) { SUnits[*I].clear(); } PhysRegSet.clear(); } -/// AddSchedBarrierDeps - Add dependencies from instructions in the current +/// Initialize the DAG and common scheduler state for the current scheduling +/// region. This does not actually create the DAG, only clears it. The +/// scheduling driver may call BuildSchedGraph multiple times per scheduling +/// region. +void ScheduleDAGInstrs::enterRegion(MachineBasicBlock *bb, + MachineBasicBlock::iterator begin, + MachineBasicBlock::iterator end, + unsigned endcount) { + BB = bb; + RegionBegin = begin; + RegionEnd = end; + EndIndex = endcount; + MISUnitMap.clear(); + + // Check to see if the scheduler cares about latencies. + UnitLatencies = forceUnitLatencies(); + + ScheduleDAG::clearDAG(); +} + +/// Close the current scheduling region. Don't clear any state in case the +/// driver wants to refer to the previous scheduling region. +void ScheduleDAGInstrs::exitRegion() { + // Nothing to do. +} + +/// addSchedBarrierDeps - Add dependencies from instructions in the current /// list of instructions being scheduled to scheduling barrier by adding /// the exit SU to the register defs and use list. This is because we want to /// make sure instructions which define registers that are either used by @@ -170,8 +185,8 @@ void ScheduleDAGInstrs::Reg2SUnitsMap::clear() { /// especially important when the definition latency of the return value(s) /// are too high to be hidden by the branch or when the liveout registers /// used by instructions in the fallthrough block. -void ScheduleDAGInstrs::AddSchedBarrierDeps() { - MachineInstr *ExitMI = InsertPos != BB->end() ? &*InsertPos : 0; +void ScheduleDAGInstrs::addSchedBarrierDeps() { + MachineInstr *ExitMI = RegionEnd != BB->end() ? &*RegionEnd : 0; ExitSU.setInstr(ExitMI); bool AllDepKnown = ExitMI && (ExitMI->isCall() || ExitMI->isBarrier()); @@ -186,19 +201,21 @@ void ScheduleDAGInstrs::AddSchedBarrierDeps() { if (TRI->isPhysicalRegister(Reg)) Uses[Reg].push_back(&ExitSU); - else + else { assert(!IsPostRA && "Virtual register encountered after regalloc."); + addVRegUseDeps(&ExitSU, i); + } } } else { // For others, e.g. fallthrough, conditional branch, assume the exit // uses all the registers that are livein to the successor blocks. - SmallSet<unsigned, 8> Seen; + assert(Uses.empty() && "Uses in set before adding deps?"); for (MachineBasicBlock::succ_iterator SI = BB->succ_begin(), SE = BB->succ_end(); SI != SE; ++SI) for (MachineBasicBlock::livein_iterator I = (*SI)->livein_begin(), E = (*SI)->livein_end(); I != E; ++I) { unsigned Reg = *I; - if (Seen.insert(Reg)) + if (!Uses.contains(Reg)) Uses[Reg].push_back(&ExitSU); } } @@ -246,7 +263,7 @@ void ScheduleDAGInstrs::addPhysRegDataDeps(SUnit *SU, // perform its own adjustments. const SDep& dep = SDep(SU, SDep::Data, LDataLatency, *Alias); if (!UnitLatencies) { - ComputeOperandLatency(SU, UseSU, const_cast<SDep &>(dep)); + computeOperandLatency(SU, UseSU, const_cast<SDep &>(dep)); ST.adjustSchedDependency(SU, UseSU, const_cast<SDep &>(dep)); } UseSU->addPred(dep); @@ -436,7 +453,7 @@ void ScheduleDAGInstrs::addVRegUseDeps(SUnit *SU, unsigned OperIdx) { if (!UnitLatencies) { // Adjust the dependence latency using operand def/use information, then // allow the target to perform its own adjustments. - ComputeOperandLatency(DefSU, SU, const_cast<SDep &>(dep)); + computeOperandLatency(DefSU, SU, const_cast<SDep &>(dep)); const TargetSubtargetInfo &ST = TM.getSubtarget<TargetSubtargetInfo>(); ST.adjustSchedDependency(DefSU, SU, const_cast<SDep &>(dep)); } @@ -455,20 +472,23 @@ void ScheduleDAGInstrs::addVRegUseDeps(SUnit *SU, unsigned OperIdx) { /// /// Map each real instruction to its SUnit. /// -/// After initSUnits, the SUnits vector is cannot be resized and the scheduler -/// may hang onto SUnit pointers. We may relax this in the future by using SUnit -/// IDs instead of pointers. +/// After initSUnits, the SUnits vector cannot be resized and the scheduler may +/// hang onto SUnit pointers. We may relax this in the future by using SUnit IDs +/// instead of pointers. +/// +/// MachineScheduler relies on initSUnits numbering the nodes by their order in +/// the original instruction list. void ScheduleDAGInstrs::initSUnits() { // We'll be allocating one SUnit for each real instruction in the region, // which is contained within a basic block. SUnits.reserve(BB->size()); - for (MachineBasicBlock::iterator I = Begin; I != InsertPos; ++I) { + for (MachineBasicBlock::iterator I = RegionBegin; I != RegionEnd; ++I) { MachineInstr *MI = I; if (MI->isDebugValue()) continue; - SUnit *SU = NewSUnit(MI); + SUnit *SU = newSUnit(MI); MISUnitMap[MI] = SU; SU->isCall = MI->isCall(); @@ -478,11 +498,11 @@ void ScheduleDAGInstrs::initSUnits() { if (UnitLatencies) SU->Latency = 1; else - ComputeLatency(SU); + computeLatency(SU); } } -void ScheduleDAGInstrs::BuildSchedGraph(AliasAnalysis *AA) { +void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA) { // Create an SUnit for each real instruction. initSUnits(); @@ -517,11 +537,11 @@ void ScheduleDAGInstrs::BuildSchedGraph(AliasAnalysis *AA) { // Model data dependencies between instructions being scheduled and the // ExitSU. - AddSchedBarrierDeps(); + addSchedBarrierDeps(); // Walk the list of instructions, from bottom moving up. MachineInstr *PrevMI = NULL; - for (MachineBasicBlock::iterator MII = InsertPos, MIE = Begin; + for (MachineBasicBlock::iterator MII = RegionEnd, MIE = RegionBegin; MII != MIE; --MII) { MachineInstr *MI = prior(MII); if (MI && PrevMI) { @@ -712,14 +732,9 @@ void ScheduleDAGInstrs::BuildSchedGraph(AliasAnalysis *AA) { Uses.clear(); VRegDefs.clear(); PendingLoads.clear(); - MISUnitMap.clear(); -} - -void ScheduleDAGInstrs::FinishBlock() { - // Nothing to do. } -void ScheduleDAGInstrs::ComputeLatency(SUnit *SU) { +void ScheduleDAGInstrs::computeLatency(SUnit *SU) { // Compute the latency for the node. if (!InstrItins || InstrItins->isEmpty()) { SU->Latency = 1; @@ -733,7 +748,7 @@ void ScheduleDAGInstrs::ComputeLatency(SUnit *SU) { } } -void ScheduleDAGInstrs::ComputeOperandLatency(SUnit *Def, SUnit *Use, +void ScheduleDAGInstrs::computeOperandLatency(SUnit *Def, SUnit *Use, SDep& dep) const { if (!InstrItins || InstrItins->isEmpty()) return; @@ -808,37 +823,8 @@ std::string ScheduleDAGInstrs::getGraphNodeLabel(const SUnit *SU) const { return oss.str(); } -// EmitSchedule - Emit the machine code in scheduled order. -MachineBasicBlock *ScheduleDAGInstrs::EmitSchedule() { - Begin = InsertPos; - - // If first instruction was a DBG_VALUE then put it back. - if (FirstDbgValue) - BB->splice(InsertPos, BB, FirstDbgValue); - - // Then re-insert them according to the given schedule. - for (unsigned i = 0, e = Sequence.size(); i != e; i++) { - if (SUnit *SU = Sequence[i]) - BB->splice(InsertPos, BB, SU->getInstr()); - else - // Null SUnit* is a noop. - EmitNoop(); - - // Update the Begin iterator, as the first instruction in the block - // may have been scheduled later. - if (i == 0) - Begin = prior(InsertPos); - } - - // Reinsert any remaining debug_values. - for (std::vector<std::pair<MachineInstr *, MachineInstr *> >::iterator - DI = DbgValues.end(), DE = DbgValues.begin(); DI != DE; --DI) { - std::pair<MachineInstr *, MachineInstr *> P = *prior(DI); - MachineInstr *DbgValue = P.first; - MachineBasicBlock::iterator OrigPrivMI = P.second; - BB->splice(++OrigPrivMI, BB, DbgValue); - } - DbgValues.clear(); - FirstDbgValue = NULL; - return BB; +/// Return the basic block label. It is not necessarilly unique because a block +/// contains multiple scheduling regions. But it is fine for visualization. +std::string ScheduleDAGInstrs::getDAGName() const { + return "dag." + BB->getFullName(); } diff --git a/lib/CodeGen/ScheduleDAGInstrs.h b/lib/CodeGen/ScheduleDAGInstrs.h deleted file mode 100644 index c7ffed9..0000000 --- a/lib/CodeGen/ScheduleDAGInstrs.h +++ /dev/null @@ -1,306 +0,0 @@ -//==- ScheduleDAGInstrs.h - MachineInstr Scheduling --------------*- C++ -*-==// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file implements the ScheduleDAGInstrs class, which implements -// scheduling for a MachineInstr-based dependency graph. -// -//===----------------------------------------------------------------------===// - -#ifndef SCHEDULEDAGINSTRS_H -#define SCHEDULEDAGINSTRS_H - -#include "llvm/CodeGen/MachineDominators.h" -#include "llvm/CodeGen/MachineLoopInfo.h" -#include "llvm/CodeGen/ScheduleDAG.h" -#include "llvm/Support/Compiler.h" -#include "llvm/Target/TargetRegisterInfo.h" -#include "llvm/ADT/SmallSet.h" -#include "llvm/ADT/SparseSet.h" -#include <map> - -namespace llvm { - class MachineLoopInfo; - class MachineDominatorTree; - class LiveIntervals; - - /// LoopDependencies - This class analyzes loop-oriented register - /// dependencies, which are used to guide scheduling decisions. - /// For example, loop induction variable increments should be - /// scheduled as soon as possible after the variable's last use. - /// - class LLVM_LIBRARY_VISIBILITY LoopDependencies { - const MachineLoopInfo &MLI; - const MachineDominatorTree &MDT; - - public: - typedef std::map<unsigned, std::pair<const MachineOperand *, unsigned> > - LoopDeps; - LoopDeps Deps; - - LoopDependencies(const MachineLoopInfo &mli, - const MachineDominatorTree &mdt) : - MLI(mli), MDT(mdt) {} - - /// VisitLoop - Clear out any previous state and analyze the given loop. - /// - void VisitLoop(const MachineLoop *Loop) { - assert(Deps.empty() && "stale loop dependencies"); - - MachineBasicBlock *Header = Loop->getHeader(); - SmallSet<unsigned, 8> LoopLiveIns; - for (MachineBasicBlock::livein_iterator LI = Header->livein_begin(), - LE = Header->livein_end(); LI != LE; ++LI) - LoopLiveIns.insert(*LI); - - const MachineDomTreeNode *Node = MDT.getNode(Header); - const MachineBasicBlock *MBB = Node->getBlock(); - assert(Loop->contains(MBB) && - "Loop does not contain header!"); - VisitRegion(Node, MBB, Loop, LoopLiveIns); - } - - private: - void VisitRegion(const MachineDomTreeNode *Node, - const MachineBasicBlock *MBB, - const MachineLoop *Loop, - const SmallSet<unsigned, 8> &LoopLiveIns) { - unsigned Count = 0; - for (MachineBasicBlock::const_iterator I = MBB->begin(), E = MBB->end(); - I != E; ++I) { - const MachineInstr *MI = I; - if (MI->isDebugValue()) - continue; - for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { - const MachineOperand &MO = MI->getOperand(i); - if (!MO.isReg() || !MO.isUse()) - continue; - unsigned MOReg = MO.getReg(); - if (LoopLiveIns.count(MOReg)) - Deps.insert(std::make_pair(MOReg, std::make_pair(&MO, Count))); - } - ++Count; // Not every iteration due to dbg_value above. - } - - const std::vector<MachineDomTreeNode*> &Children = Node->getChildren(); - for (std::vector<MachineDomTreeNode*>::const_iterator I = - Children.begin(), E = Children.end(); I != E; ++I) { - const MachineDomTreeNode *ChildNode = *I; - MachineBasicBlock *ChildBlock = ChildNode->getBlock(); - if (Loop->contains(ChildBlock)) - VisitRegion(ChildNode, ChildBlock, Loop, LoopLiveIns); - } - } - }; - - /// ScheduleDAGInstrs - A ScheduleDAG subclass for scheduling lists of - /// MachineInstrs. - class LLVM_LIBRARY_VISIBILITY ScheduleDAGInstrs : public ScheduleDAG { - const MachineLoopInfo &MLI; - const MachineDominatorTree &MDT; - const MachineFrameInfo *MFI; - const InstrItineraryData *InstrItins; - - /// isPostRA flag indicates vregs cannot be present. - bool IsPostRA; - - /// Live Intervals provides reaching defs in preRA scheduling. - LiveIntervals *LIS; - - DenseMap<MachineInstr*, SUnit*> MISUnitMap; - - /// UnitLatencies (misnamed) flag avoids computing def-use latencies, using - /// the def-side latency only. - bool UnitLatencies; - - /// Combine a SparseSet with a 1x1 vector to track physical registers. - /// The SparseSet allows iterating over the (few) live registers for quickly - /// comparing against a regmask or clearing the set. - /// - /// Storage for the map is allocated once for the pass. The map can be - /// cleared between scheduling regions without freeing unused entries. - class Reg2SUnitsMap { - SparseSet<unsigned> PhysRegSet; - std::vector<std::vector<SUnit*> > SUnits; - public: - typedef SparseSet<unsigned>::const_iterator const_iterator; - - // Allow iteration over register numbers (keys) in the map. If needed, we - // can provide an iterator over SUnits (values) as well. - const_iterator reg_begin() const { return PhysRegSet.begin(); } - const_iterator reg_end() const { return PhysRegSet.end(); } - - /// Initialize the map with the number of registers. - /// If the map is already large enough, no allocation occurs. - /// For simplicity we expect the map to be empty(). - void setRegLimit(unsigned Limit); - - /// Returns true if the map is empty. - bool empty() const { return PhysRegSet.empty(); } - - /// Clear the map without deallocating storage. - void clear(); - - bool contains(unsigned Reg) const { return PhysRegSet.count(Reg); } - - /// If this register is mapped, return its existing SUnits vector. - /// Otherwise map the register and return an empty SUnits vector. - std::vector<SUnit *> &operator[](unsigned Reg) { - bool New = PhysRegSet.insert(Reg).second; - assert((!New || SUnits[Reg].empty()) && "stale SUnits vector"); - (void)New; - return SUnits[Reg]; - } - - /// Erase an existing element without freeing memory. - void erase(unsigned Reg) { - PhysRegSet.erase(Reg); - SUnits[Reg].clear(); - } - }; - /// Defs, Uses - Remember where defs and uses of each register are as we - /// iterate upward through the instructions. This is allocated here instead - /// of inside BuildSchedGraph to avoid the need for it to be initialized and - /// destructed for each block. - Reg2SUnitsMap Defs; - Reg2SUnitsMap Uses; - - /// An individual mapping from virtual register number to SUnit. - struct VReg2SUnit { - unsigned VirtReg; - SUnit *SU; - - VReg2SUnit(unsigned reg, SUnit *su): VirtReg(reg), SU(su) {} - - unsigned getSparseSetKey() const { - return TargetRegisterInfo::virtReg2Index(VirtReg); - } - }; - /// Use SparseSet as a SparseMap by relying on the fact that it never - /// compares ValueT's, only unsigned keys. This allows the set to be cleared - /// between scheduling regions in constant time as long as ValueT does not - /// require a destructor. - typedef SparseSet<VReg2SUnit> VReg2SUnitMap; - /// Track the last instructon in this region defining each virtual register. - VReg2SUnitMap VRegDefs; - - /// PendingLoads - Remember where unknown loads are after the most recent - /// unknown store, as we iterate. As with Defs and Uses, this is here - /// to minimize construction/destruction. - std::vector<SUnit *> PendingLoads; - - /// LoopRegs - Track which registers are used for loop-carried dependencies. - /// - LoopDependencies LoopRegs; - - protected: - - /// DbgValues - Remember instruction that preceeds DBG_VALUE. - typedef std::vector<std::pair<MachineInstr *, MachineInstr *> > - DbgValueVector; - DbgValueVector DbgValues; - MachineInstr *FirstDbgValue; - - public: - MachineBasicBlock::iterator Begin; // The beginning of the range to - // be scheduled. The range extends - // to InsertPos. - unsigned InsertPosIndex; // The index in BB of InsertPos. - - explicit ScheduleDAGInstrs(MachineFunction &mf, - const MachineLoopInfo &mli, - const MachineDominatorTree &mdt, - bool IsPostRAFlag, - LiveIntervals *LIS = 0); - - virtual ~ScheduleDAGInstrs() {} - - /// NewSUnit - Creates a new SUnit and return a ptr to it. - /// - SUnit *NewSUnit(MachineInstr *MI) { -#ifndef NDEBUG - const SUnit *Addr = SUnits.empty() ? 0 : &SUnits[0]; -#endif - SUnits.push_back(SUnit(MI, (unsigned)SUnits.size())); - assert((Addr == 0 || Addr == &SUnits[0]) && - "SUnits std::vector reallocated on the fly!"); - SUnits.back().OrigNode = &SUnits.back(); - return &SUnits.back(); - } - - - /// Run - perform scheduling. - /// - void Run(MachineBasicBlock *bb, - MachineBasicBlock::iterator begin, - MachineBasicBlock::iterator end, - unsigned endindex); - - /// BuildSchedGraph - Build SUnits from the MachineBasicBlock that we are - /// input. - virtual void BuildSchedGraph(AliasAnalysis *AA); - - /// AddSchedBarrierDeps - Add dependencies from instructions in the current - /// list of instructions being scheduled to scheduling barrier. We want to - /// make sure instructions which define registers that are either used by - /// the terminator or are live-out are properly scheduled. This is - /// especially important when the definition latency of the return value(s) - /// are too high to be hidden by the branch or when the liveout registers - /// used by instructions in the fallthrough block. - void AddSchedBarrierDeps(); - - /// ComputeLatency - Compute node latency. - /// - virtual void ComputeLatency(SUnit *SU); - - /// ComputeOperandLatency - Override dependence edge latency using - /// operand use/def information - /// - virtual void ComputeOperandLatency(SUnit *Def, SUnit *Use, - SDep& dep) const; - - virtual MachineBasicBlock *EmitSchedule(); - - /// StartBlock - Prepare to perform scheduling in the given block. - /// - virtual void StartBlock(MachineBasicBlock *BB); - - /// Schedule - Order nodes according to selected style, filling - /// in the Sequence member. - /// - virtual void Schedule() = 0; - - /// FinishBlock - Clean up after scheduling in the given block. - /// - virtual void FinishBlock(); - - virtual void dumpNode(const SUnit *SU) const; - - virtual std::string getGraphNodeLabel(const SUnit *SU) const; - - protected: - SUnit *getSUnit(MachineInstr *MI) const { - DenseMap<MachineInstr*, SUnit*>::const_iterator I = MISUnitMap.find(MI); - if (I == MISUnitMap.end()) - return 0; - return I->second; - } - - void initSUnits(); - void addPhysRegDataDeps(SUnit *SU, const MachineOperand &MO); - void addPhysRegDeps(SUnit *SU, unsigned OperIdx); - void addVRegDefDeps(SUnit *SU, unsigned OperIdx); - void addVRegUseDeps(SUnit *SU, unsigned OperIdx); - - VReg2SUnitMap::iterator findVRegDef(unsigned VirtReg) { - return VRegDefs.find(TargetRegisterInfo::virtReg2Index(VirtReg)); - } - }; -} - -#endif diff --git a/lib/CodeGen/ScheduleDAGPrinter.cpp b/lib/CodeGen/ScheduleDAGPrinter.cpp index 4251583..38feee9 100644 --- a/lib/CodeGen/ScheduleDAGPrinter.cpp +++ b/lib/CodeGen/ScheduleDAGPrinter.cpp @@ -41,12 +41,12 @@ namespace llvm { static bool renderGraphFromBottomUp() { return true; } - + static bool hasNodeAddressLabel(const SUnit *Node, const ScheduleDAG *Graph) { return true; } - + /// If you want to override the dot attributes printed for a particular /// edge, override this method. static std::string getEdgeAttributes(const SUnit *Node, @@ -58,7 +58,7 @@ namespace llvm { return "color=blue,style=dashed"; return ""; } - + std::string getNodeLabel(const SUnit *Node, const ScheduleDAG *Graph); static std::string getNodeAttributes(const SUnit *N, @@ -81,18 +81,17 @@ std::string DOTGraphTraits<ScheduleDAG*>::getNodeLabel(const SUnit *SU, /// viewGraph - Pop up a ghostview window with the reachable parts of the DAG /// rendered using 'dot'. /// -void ScheduleDAG::viewGraph() { -// This code is only for debugging! +void ScheduleDAG::viewGraph(const Twine &Name, const Twine &Title) { + // This code is only for debugging! #ifndef NDEBUG - if (BB->getBasicBlock()) - ViewGraph(this, "dag." + MF.getFunction()->getName(), false, - "Scheduling-Units Graph for " + MF.getFunction()->getName() + - ":" + BB->getBasicBlock()->getName()); - else - ViewGraph(this, "dag." + MF.getFunction()->getName(), false, - "Scheduling-Units Graph for " + MF.getFunction()->getName()); + ViewGraph(this, Name, false, Title); #else errs() << "ScheduleDAG::viewGraph is only available in debug builds on " << "systems with Graphviz or gv!\n"; #endif // NDEBUG } + +/// Out-of-line implementation with no arguments is handy for gdb. +void ScheduleDAG::viewGraph() { + viewGraph(getDAGName(), "Scheduling-Units Graph for " + getDAGName()); +} diff --git a/lib/CodeGen/SelectionDAG/CMakeLists.txt b/lib/CodeGen/SelectionDAG/CMakeLists.txt index 9a79217..a6bdc3b 100644 --- a/lib/CodeGen/SelectionDAG/CMakeLists.txt +++ b/lib/CodeGen/SelectionDAG/CMakeLists.txt @@ -16,6 +16,7 @@ add_llvm_library(LLVMSelectionDAG ScheduleDAGSDNodes.cpp SelectionDAG.cpp SelectionDAGBuilder.cpp + SelectionDAGDumper.cpp SelectionDAGISel.cpp SelectionDAGPrinter.cpp ScheduleDAGVLIW.cpp diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 1b148ad..7c4db97 100644 --- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -80,7 +80,7 @@ namespace { // visit, we pop off the order stack until we find an item that is // also in the contents set. All operations are O(log N). SmallPtrSet<SDNode*, 64> WorkListContents; - std::vector<SDNode*> WorkListOrder; + SmallVector<SDNode*, 64> WorkListOrder; // AA - Used for DAG load/store alias analysis. AliasAnalysis &AA; @@ -381,6 +381,7 @@ CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO) { /// specified expression for the same cost as the expression itself, or 2 if we /// can compute the negated form more cheaply than the expression itself. static char isNegatibleForFree(SDValue Op, bool LegalOperations, + const TargetLowering &TLI, const TargetOptions *Options, unsigned Depth = 0) { // No compile time optimizations on this type. @@ -406,12 +407,17 @@ static char isNegatibleForFree(SDValue Op, bool LegalOperations, // FIXME: determine better conditions for this xform. if (!Options->UnsafeFPMath) return 0; + // After operation legalization, it might not be legal to create new FSUBs. + if (LegalOperations && + !TLI.isOperationLegalOrCustom(ISD::FSUB, Op.getValueType())) + return 0; + // fold (fsub (fadd A, B)) -> (fsub (fneg A), B) - if (char V = isNegatibleForFree(Op.getOperand(0), LegalOperations, Options, - Depth + 1)) + if (char V = isNegatibleForFree(Op.getOperand(0), LegalOperations, TLI, + Options, Depth + 1)) return V; // fold (fneg (fadd A, B)) -> (fsub (fneg B), A) - return isNegatibleForFree(Op.getOperand(1), LegalOperations, Options, + return isNegatibleForFree(Op.getOperand(1), LegalOperations, TLI, Options, Depth + 1); case ISD::FSUB: // We can't turn -(A-B) into B-A when we honor signed zeros. @@ -425,17 +431,17 @@ static char isNegatibleForFree(SDValue Op, bool LegalOperations, if (Options->HonorSignDependentRoundingFPMath()) return 0; // fold (fneg (fmul X, Y)) -> (fmul (fneg X), Y) or (fmul X, (fneg Y)) - if (char V = isNegatibleForFree(Op.getOperand(0), LegalOperations, Options, - Depth + 1)) + if (char V = isNegatibleForFree(Op.getOperand(0), LegalOperations, TLI, + Options, Depth + 1)) return V; - return isNegatibleForFree(Op.getOperand(1), LegalOperations, Options, + return isNegatibleForFree(Op.getOperand(1), LegalOperations, TLI, Options, Depth + 1); case ISD::FP_EXTEND: case ISD::FP_ROUND: case ISD::FSIN: - return isNegatibleForFree(Op.getOperand(0), LegalOperations, Options, + return isNegatibleForFree(Op.getOperand(0), LegalOperations, TLI, Options, Depth + 1); } } @@ -464,6 +470,7 @@ static SDValue GetNegatedExpression(SDValue Op, SelectionDAG &DAG, // fold (fneg (fadd A, B)) -> (fsub (fneg A), B) if (isNegatibleForFree(Op.getOperand(0), LegalOperations, + DAG.getTargetLoweringInfo(), &DAG.getTarget().Options, Depth+1)) return DAG.getNode(ISD::FSUB, Op.getDebugLoc(), Op.getValueType(), GetNegatedExpression(Op.getOperand(0), DAG, @@ -493,6 +500,7 @@ static SDValue GetNegatedExpression(SDValue Op, SelectionDAG &DAG, // fold (fneg (fmul X, Y)) -> (fmul (fneg X), Y) if (isNegatibleForFree(Op.getOperand(0), LegalOperations, + DAG.getTargetLoweringInfo(), &DAG.getTarget().Options, Depth+1)) return DAG.getNode(Op.getOpcode(), Op.getDebugLoc(), Op.getValueType(), GetNegatedExpression(Op.getOperand(0), DAG, @@ -997,8 +1005,7 @@ void DAGCombiner::Run(CombineLevel AtLevel) { // worklist *should* contain, and check the node we want to visit is should // actually be visited. do { - N = WorkListOrder.back(); - WorkListOrder.pop_back(); + N = WorkListOrder.pop_back_val(); } while (!WorkListContents.erase(N)); // If N has no uses, it is dead. Make sure to revisit all N's operands once @@ -5507,11 +5514,13 @@ SDValue DAGCombiner::visitFADD(SDNode *N) { N1CFP->getValueAPF().isZero()) return N0; // fold (fadd A, (fneg B)) -> (fsub A, B) - if (isNegatibleForFree(N1, LegalOperations, &DAG.getTarget().Options) == 2) + if ((!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FSUB, VT)) && + isNegatibleForFree(N1, LegalOperations, TLI, &DAG.getTarget().Options) == 2) return DAG.getNode(ISD::FSUB, N->getDebugLoc(), VT, N0, GetNegatedExpression(N1, DAG, LegalOperations)); // fold (fadd (fneg A), B) -> (fsub B, A) - if (isNegatibleForFree(N0, LegalOperations, &DAG.getTarget().Options) == 2) + if ((!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FSUB, VT)) && + isNegatibleForFree(N0, LegalOperations, TLI, &DAG.getTarget().Options) == 2) return DAG.getNode(ISD::FSUB, N->getDebugLoc(), VT, N1, GetNegatedExpression(N0, DAG, LegalOperations)); @@ -5549,16 +5558,33 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) { // fold (fsub 0, B) -> -B if (DAG.getTarget().Options.UnsafeFPMath && N0CFP && N0CFP->getValueAPF().isZero()) { - if (isNegatibleForFree(N1, LegalOperations, &DAG.getTarget().Options)) + if (isNegatibleForFree(N1, LegalOperations, TLI, &DAG.getTarget().Options)) return GetNegatedExpression(N1, DAG, LegalOperations); if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT)) return DAG.getNode(ISD::FNEG, N->getDebugLoc(), VT, N1); } // fold (fsub A, (fneg B)) -> (fadd A, B) - if (isNegatibleForFree(N1, LegalOperations, &DAG.getTarget().Options)) + if (isNegatibleForFree(N1, LegalOperations, TLI, &DAG.getTarget().Options)) return DAG.getNode(ISD::FADD, N->getDebugLoc(), VT, N0, GetNegatedExpression(N1, DAG, LegalOperations)); + // If 'unsafe math' is enabled, fold + // (fsub x, (fadd x, y)) -> (fneg y) & + // (fsub x, (fadd y, x)) -> (fneg y) + if (DAG.getTarget().Options.UnsafeFPMath) { + if (N1.getOpcode() == ISD::FADD) { + SDValue N10 = N1->getOperand(0); + SDValue N11 = N1->getOperand(1); + + if (N10 == N0 && isNegatibleForFree(N11, LegalOperations, TLI, + &DAG.getTarget().Options)) + return GetNegatedExpression(N11, DAG, LegalOperations); + else if (N11 == N0 && isNegatibleForFree(N10, LegalOperations, TLI, + &DAG.getTarget().Options)) + return GetNegatedExpression(N10, DAG, LegalOperations); + } + } + return SDValue(); } @@ -5568,6 +5594,7 @@ SDValue DAGCombiner::visitFMUL(SDNode *N) { ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0); ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1); EVT VT = N->getValueType(0); + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); // fold vector ops if (VT.isVector()) { @@ -5598,9 +5625,9 @@ SDValue DAGCombiner::visitFMUL(SDNode *N) { return DAG.getNode(ISD::FNEG, N->getDebugLoc(), VT, N0); // fold (fmul (fneg X), (fneg Y)) -> (fmul X, Y) - if (char LHSNeg = isNegatibleForFree(N0, LegalOperations, + if (char LHSNeg = isNegatibleForFree(N0, LegalOperations, TLI, &DAG.getTarget().Options)) { - if (char RHSNeg = isNegatibleForFree(N1, LegalOperations, + if (char RHSNeg = isNegatibleForFree(N1, LegalOperations, TLI, &DAG.getTarget().Options)) { // Both can be negated for free, check to see if at least one is cheaper // negated. @@ -5628,6 +5655,7 @@ SDValue DAGCombiner::visitFDIV(SDNode *N) { ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0); ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1); EVT VT = N->getValueType(0); + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); // fold vector ops if (VT.isVector()) { @@ -5641,9 +5669,9 @@ SDValue DAGCombiner::visitFDIV(SDNode *N) { // (fdiv (fneg X), (fneg Y)) -> (fdiv X, Y) - if (char LHSNeg = isNegatibleForFree(N0, LegalOperations, + if (char LHSNeg = isNegatibleForFree(N0, LegalOperations, TLI, &DAG.getTarget().Options)) { - if (char RHSNeg = isNegatibleForFree(N1, LegalOperations, + if (char RHSNeg = isNegatibleForFree(N1, LegalOperations, TLI, &DAG.getTarget().Options)) { // Both can be negated for free, check to see if at least one is cheaper // negated. @@ -5897,7 +5925,8 @@ SDValue DAGCombiner::visitFNEG(SDNode *N) { SDValue N0 = N->getOperand(0); EVT VT = N->getValueType(0); - if (isNegatibleForFree(N0, LegalOperations, &DAG.getTarget().Options)) + if (isNegatibleForFree(N0, LegalOperations, DAG.getTargetLoweringInfo(), + &DAG.getTarget().Options)) return GetNegatedExpression(N0, DAG, LegalOperations); // Transform fneg(bitconvert(x)) -> bitconvert(x^sign) to avoid loading @@ -6129,8 +6158,7 @@ SDValue DAGCombiner::visitBR_CC(SDNode *N) { /// canFoldInAddressingMode - Return true if 'Use' is a load or a store that /// uses N as its base pointer and that N may be folded in the load / store -/// addressing mode. FIXME: This currently only looks for folding of -/// [reg +/- imm] addressing modes. +/// addressing mode. static bool canFoldInAddressingMode(SDNode *N, SDNode *Use, SelectionDAG &DAG, const TargetLowering &TLI) { @@ -6150,15 +6178,19 @@ static bool canFoldInAddressingMode(SDNode *N, SDNode *Use, if (N->getOpcode() == ISD::ADD) { ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(N->getOperand(1)); if (Offset) + // [reg +/- imm] AM.BaseOffs = Offset->getSExtValue(); else - return false; + // [reg +/- reg] + AM.Scale = 1; } else if (N->getOpcode() == ISD::SUB) { ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(N->getOperand(1)); if (Offset) + // [reg +/- imm] AM.BaseOffs = -Offset->getSExtValue(); else - return false; + // [reg +/- reg] + AM.Scale = 1; } else return false; @@ -7187,6 +7219,11 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) { EVT ExtVT = VT.getVectorElementType(); EVT LVT = ExtVT; + // If the result of load has to be truncated, then it's not necessarily + // profitable. + if (NVT.bitsLT(LVT) && !TLI.isTruncateFree(LVT, NVT)) + return SDValue(); + if (InVec.getOpcode() == ISD::BITCAST) { // Don't duplicate a load with other uses. if (!InVec.hasOneUse()) @@ -7287,17 +7324,36 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) { // Note that this replacement assumes that the extractvalue is the only // use of the load; that's okay because we don't want to perform this // transformation in other cases anyway. - SDValue Load = DAG.getLoad(LVT, N->getDebugLoc(), LN0->getChain(), NewPtr, - LN0->getPointerInfo().getWithOffset(PtrOff), - LN0->isVolatile(), LN0->isNonTemporal(), - LN0->isInvariant(), Align); + SDValue Load; + SDValue Chain; + if (NVT.bitsGT(LVT)) { + // If the result type of vextract is wider than the load, then issue an + // extending load instead. + ISD::LoadExtType ExtType = TLI.isLoadExtLegal(ISD::ZEXTLOAD, LVT) + ? ISD::ZEXTLOAD : ISD::EXTLOAD; + Load = DAG.getExtLoad(ExtType, N->getDebugLoc(), NVT, LN0->getChain(), + NewPtr, LN0->getPointerInfo().getWithOffset(PtrOff), + LVT, LN0->isVolatile(), LN0->isNonTemporal(),Align); + Chain = Load.getValue(1); + } else { + Load = DAG.getLoad(LVT, N->getDebugLoc(), LN0->getChain(), NewPtr, + LN0->getPointerInfo().getWithOffset(PtrOff), + LN0->isVolatile(), LN0->isNonTemporal(), + LN0->isInvariant(), Align); + Chain = Load.getValue(1); + if (NVT.bitsLT(LVT)) + Load = DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), NVT, Load); + else + Load = DAG.getNode(ISD::BITCAST, N->getDebugLoc(), NVT, Load); + } WorkListRemover DeadNodes(*this); SDValue From[] = { SDValue(N, 0), SDValue(LN0,1) }; - SDValue To[] = { Load.getValue(0), Load.getValue(1) }; + SDValue To[] = { Load, Chain }; DAG.ReplaceAllUsesOfValuesWith(From, To, 2, &DeadNodes); // Since we're explcitly calling ReplaceAllUses, add the new node to the // worklist explicitly as well. AddToWorkList(Load.getNode()); + AddUsersToWorkList(Load.getNode()); // Add users too // Make sure to revisit this node to clean it up; it will usually be dead. AddToWorkList(N); return SDValue(N, 0); @@ -7367,6 +7423,8 @@ SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) { // will be type-legalized to complex code sequences. // We perform this optimization only before the operation legalizer because we // may introduce illegal operations. + // Create a new simpler BUILD_VECTOR sequence which other optimizations can + // turn into a single shuffle instruction. if ((Level == AfterLegalizeVectorOps || Level == AfterLegalizeTypes) && ValidTypes) { bool isLE = TLI.isLittleEndian(); @@ -7407,6 +7465,8 @@ SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) { SDValue BV = DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(), VecVT, &Ops[0], Ops.size()); + // The new BUILD_VECTOR node has the potential to be further optimized. + AddToWorkList(BV.getNode()); // Bitcast to the desired type. return DAG.getNode(ISD::BITCAST, dl, N->getValueType(0), BV); } @@ -7414,6 +7474,12 @@ SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) { // Check to see if this is a BUILD_VECTOR of a bunch of EXTRACT_VECTOR_ELT // operations. If so, and if the EXTRACT_VECTOR_ELT vector inputs come from // at most two distinct vectors, turn this into a shuffle node. + + // May only combine to shuffle after legalize if shuffle is legal. + if (LegalOperations && + !TLI.isOperationLegalOrCustom(ISD::VECTOR_SHUFFLE, VT)) + return SDValue(); + SDValue VecIn1, VecIn2; for (unsigned i = 0; i != NumInScalars; ++i) { // Ignore undef inputs. diff --git a/lib/CodeGen/SelectionDAG/FastISel.cpp b/lib/CodeGen/SelectionDAG/FastISel.cpp index fd8ce78..9f4a44a 100644 --- a/lib/CodeGen/SelectionDAG/FastISel.cpp +++ b/lib/CodeGen/SelectionDAG/FastISel.cpp @@ -136,15 +136,8 @@ unsigned FastISel::getRegForValue(const Value *V) { return 0; } - // Look up the value to see if we already have a register for it. We - // cache values defined by Instructions across blocks, and other values - // only locally. This is because Instructions already have the SSA - // def-dominates-use requirement enforced. - DenseMap<const Value *, unsigned>::iterator I = FuncInfo.ValueMap.find(V); - if (I != FuncInfo.ValueMap.end()) - return I->second; - - unsigned Reg = LocalValueMap[V]; + // Look up the value to see if we already have a register for it. + unsigned Reg = lookUpRegForValue(V); if (Reg != 0) return Reg; @@ -199,7 +192,7 @@ unsigned FastISel::materializeRegForValue(const Value *V, MVT VT) { uint32_t IntBitWidth = IntVT.getSizeInBits(); bool isExact; (void) Flt.convertToInteger(x, IntBitWidth, /*isSigned=*/true, - APFloat::rmTowardZero, &isExact); + APFloat::rmTowardZero, &isExact); if (isExact) { APInt IntVal(IntBitWidth, x); @@ -577,12 +570,16 @@ bool FastISel::SelectCall(const User *I) { case Intrinsic::dbg_declare: { const DbgDeclareInst *DI = cast<DbgDeclareInst>(Call); if (!DIVariable(DI->getVariable()).Verify() || - !FuncInfo.MF->getMMI().hasDebugInfo()) + !FuncInfo.MF->getMMI().hasDebugInfo()) { + DEBUG(dbgs() << "Dropping debug info for " << *DI << "\n"); return true; + } const Value *Address = DI->getAddress(); - if (!Address || isa<UndefValue>(Address) || isa<AllocaInst>(Address)) + if (!Address || isa<UndefValue>(Address)) { + DEBUG(dbgs() << "Dropping debug info for " << *DI << "\n"); return true; + } unsigned Reg = 0; unsigned Offset = 0; @@ -590,16 +587,25 @@ bool FastISel::SelectCall(const User *I) { // Some arguments' frame index is recorded during argument lowering. Offset = FuncInfo.getArgumentFrameIndex(Arg); if (Offset) - Reg = TRI.getFrameRegister(*FuncInfo.MF); + Reg = TRI.getFrameRegister(*FuncInfo.MF); } if (!Reg) - Reg = getRegForValue(Address); + Reg = lookUpRegForValue(Address); + + if (!Reg && isa<Instruction>(Address) && + (!isa<AllocaInst>(Address) || + !FuncInfo.StaticAllocaMap.count(cast<AllocaInst>(Address)))) + Reg = FuncInfo.InitializeRegForValue(Address); if (Reg) BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::DBG_VALUE)) .addReg(Reg, RegState::Debug).addImm(Offset) .addMetadata(DI->getVariable()); + else + // We can't yet handle anything else here because it would require + // generating code, thus altering codegen because of debug info. + DEBUG(dbgs() << "Dropping debug info for " << DI); return true; } case Intrinsic::dbg_value: { diff --git a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp index 31df458..1b84b13 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -3032,6 +3032,16 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) { Results.push_back(Results[0].getValue(1)); break; } + case ISD::FSUB: { + EVT VT = Node->getValueType(0); + assert(TLI.isOperationLegalOrCustom(ISD::FADD, VT) && + TLI.isOperationLegalOrCustom(ISD::FNEG, VT) && + "Don't know how to expand this FP subtraction!"); + Tmp1 = DAG.getNode(ISD::FNEG, dl, VT, Node->getOperand(1)); + Tmp1 = DAG.getNode(ISD::FADD, dl, VT, Node->getOperand(0), Tmp1); + Results.push_back(Tmp1); + break; + } case ISD::SUB: { EVT VT = Node->getValueType(0); assert(TLI.isOperationLegalOrCustom(ISD::ADD, VT) && @@ -3590,10 +3600,11 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) { Tmp1, Tmp2, Node->getOperand(2))); break; } + case ISD::FDIV: case ISD::FPOW: { Tmp1 = DAG.getNode(ISD::FP_EXTEND, dl, NVT, Node->getOperand(0)); Tmp2 = DAG.getNode(ISD::FP_EXTEND, dl, NVT, Node->getOperand(1)); - Tmp3 = DAG.getNode(ISD::FPOW, dl, NVT, Tmp1, Tmp2); + Tmp3 = DAG.getNode(Node->getOpcode(), dl, NVT, Tmp1, Tmp2); Results.push_back(DAG.getNode(ISD::FP_ROUND, dl, OVT, Tmp3, DAG.getIntPtrConstant(0))); break; diff --git a/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp b/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp index 1a27f3f..ff0136e 100644 --- a/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp +++ b/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp @@ -470,7 +470,7 @@ signed ResourcePriorityQueue::SUSchedulingCost(SUnit *SU) { /// Main resource tracking point. -void ResourcePriorityQueue::ScheduledNode(SUnit *SU) { +void ResourcePriorityQueue::scheduledNode(SUnit *SU) { // Use NULL entry as an event marker to reset // the DFA state. if (!SU) { diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp index 34ee1f3..24da432 100644 --- a/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp +++ b/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp @@ -43,7 +43,7 @@ namespace { SmallVector<SUnit *, 16> Queue; bool empty() const { return Queue.empty(); } - + void push(SUnit *U) { Queue.push_back(U); } @@ -101,8 +101,8 @@ private: bool DelayForLiveRegsBottomUp(SUnit*, SmallVector<unsigned, 4>&); void ListScheduleBottomUp(); - /// ForceUnitLatencies - The fast scheduler doesn't care about real latencies. - bool ForceUnitLatencies() const { return true; } + /// forceUnitLatencies - The fast scheduler doesn't care about real latencies. + bool forceUnitLatencies() const { return true; } }; } // end anonymous namespace @@ -112,7 +112,7 @@ void ScheduleDAGFast::Schedule() { DEBUG(dbgs() << "********** List Scheduling **********\n"); NumLiveRegs = 0; - LiveRegDefs.resize(TRI->getNumRegs(), NULL); + LiveRegDefs.resize(TRI->getNumRegs(), NULL); LiveRegCycles.resize(TRI->getNumRegs(), 0); // Build the scheduling graph. @@ -159,7 +159,7 @@ void ScheduleDAGFast::ReleasePredecessors(SUnit *SU, unsigned CurCycle) { ReleasePred(SU, &*I); if (I->isAssignedRegDep()) { // This is a physical register dependency and it's impossible or - // expensive to copy the register. Make sure nothing that can + // expensive to copy the register. Make sure nothing that can // clobber the register is scheduled between the predecessor and // this node. if (!LiveRegDefs[I->getReg()]) { @@ -245,10 +245,10 @@ SUnit *ScheduleDAGFast::CopyAndMoveSuccessors(SUnit *SU) { DAG->ReplaceAllUsesOfValueWith(SDValue(SU->getNode(), OldNumVals-1), SDValue(LoadNode, 1)); - SUnit *NewSU = NewSUnit(N); + SUnit *NewSU = newSUnit(N); assert(N->getNodeId() == -1 && "Node already inserted!"); N->setNodeId(NewSU->NodeNum); - + const MCInstrDesc &MCID = TII->get(N->getMachineOpcode()); for (unsigned i = 0; i != MCID.getNumOperands(); ++i) { if (MCID.getOperandConstraint(i, MCOI::TIED_TO) != -1) { @@ -268,7 +268,7 @@ SUnit *ScheduleDAGFast::CopyAndMoveSuccessors(SUnit *SU) { LoadSU = &SUnits[LoadNode->getNodeId()]; isNewLoad = false; } else { - LoadSU = NewSUnit(LoadNode); + LoadSU = newSUnit(LoadNode); LoadNode->setNodeId(LoadSU->NodeNum); } @@ -329,7 +329,7 @@ SUnit *ScheduleDAGFast::CopyAndMoveSuccessors(SUnit *SU) { D.setSUnit(LoadSU); AddPred(SuccDep, D); } - } + } if (isNewLoad) { AddPred(NewSU, SDep(LoadSU, SDep::Order, LoadSU->Latency)); } @@ -381,11 +381,11 @@ void ScheduleDAGFast::InsertCopiesAndMoveSuccs(SUnit *SU, unsigned Reg, const TargetRegisterClass *DestRC, const TargetRegisterClass *SrcRC, SmallVector<SUnit*, 2> &Copies) { - SUnit *CopyFromSU = NewSUnit(static_cast<SDNode *>(NULL)); + SUnit *CopyFromSU = newSUnit(static_cast<SDNode *>(NULL)); CopyFromSU->CopySrcRC = SrcRC; CopyFromSU->CopyDstRC = DestRC; - SUnit *CopyToSU = NewSUnit(static_cast<SDNode *>(NULL)); + SUnit *CopyToSU = newSUnit(static_cast<SDNode *>(NULL)); CopyToSU->CopySrcRC = DestRC; CopyToSU->CopyDstRC = SrcRC; @@ -425,7 +425,7 @@ static EVT getPhysicalRegisterVT(SDNode *N, unsigned Reg, const MCInstrDesc &MCID = TII->get(N->getMachineOpcode()); assert(MCID.ImplicitDefs && "Physical reg def must be in implicit def list!"); unsigned NumRes = MCID.getNumDefs(); - for (const unsigned *ImpDef = MCID.getImplicitDefs(); *ImpDef; ++ImpDef) { + for (const uint16_t *ImpDef = MCID.getImplicitDefs(); *ImpDef; ++ImpDef) { if (Reg == *ImpDef) break; ++NumRes; @@ -508,7 +508,7 @@ bool ScheduleDAGFast::DelayForLiveRegsBottomUp(SUnit *SU, const MCInstrDesc &MCID = TII->get(Node->getMachineOpcode()); if (!MCID.ImplicitDefs) continue; - for (const unsigned *Reg = MCID.ImplicitDefs; *Reg; ++Reg) { + for (const uint16_t *Reg = MCID.getImplicitDefs(); *Reg; ++Reg) { CheckForLiveRegDef(SU, *Reg, LiveRegDefs, RegAdded, LRegs, TRI); } } @@ -630,7 +630,7 @@ void ScheduleDAGFast::ListScheduleBottomUp() { std::reverse(Sequence.begin(), Sequence.end()); #ifndef NDEBUG - VerifySchedule(/*isBottomUp=*/true); + VerifyScheduledSequence(/*isBottomUp=*/true); #endif } diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp index 1017d36..f44adfc 100644 --- a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp +++ b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp @@ -232,7 +232,7 @@ private: /// Updates the topological ordering if required. SUnit *CreateNewSUnit(SDNode *N) { unsigned NumSUnits = SUnits.size(); - SUnit *NewNode = NewSUnit(N); + SUnit *NewNode = newSUnit(N); // Update the topological ordering. if (NewNode->NodeNum >= NumSUnits) Topo.InitDAGTopologicalSorting(); @@ -250,9 +250,9 @@ private: return NewNode; } - /// ForceUnitLatencies - Register-pressure-reducing scheduling doesn't + /// forceUnitLatencies - Register-pressure-reducing scheduling doesn't /// need actual latency information but the hybrid scheduler does. - bool ForceUnitLatencies() const { + bool forceUnitLatencies() const { return !NeedLatency; } }; @@ -327,6 +327,12 @@ void ScheduleDAGRRList::Schedule() { ListScheduleBottomUp(); AvailableQueue->releaseState(); + + DEBUG({ + dbgs() << "*** Final schedule ***\n"; + dumpSchedule(); + dbgs() << '\n'; + }); } //===----------------------------------------------------------------------===// @@ -348,7 +354,7 @@ void ScheduleDAGRRList::ReleasePred(SUnit *SU, const SDep *PredEdge) { #endif --PredSU->NumSuccsLeft; - if (!ForceUnitLatencies()) { + if (!forceUnitLatencies()) { // Updating predecessor's height. This is now the cycle when the // predecessor can be scheduled without causing a pipeline stall. PredSU->setHeightToAtLeast(SU->getHeight() + PredEdge->getLatency()); @@ -695,7 +701,7 @@ void ScheduleDAGRRList::ScheduleNodeBottomUp(SUnit *SU) { Sequence.push_back(SU); - AvailableQueue->ScheduledNode(SU); + AvailableQueue->scheduledNode(SU); // If HazardRec is disabled, and each inst counts as one cycle, then // advance CurCycle before ReleasePredecessors to avoid useless pushes to @@ -842,7 +848,7 @@ void ScheduleDAGRRList::UnscheduleNodeBottomUp(SUnit *SU) { else { AvailableQueue->push(SU); } - AvailableQueue->UnscheduledNode(SU); + AvailableQueue->unscheduledNode(SU); } /// After backtracking, the hazard checker needs to be restored to a state @@ -963,7 +969,7 @@ SUnit *ScheduleDAGRRList::CopyAndMoveSuccessors(SUnit *SU) { LoadNode->setNodeId(LoadSU->NodeNum); InitNumRegDefsLeft(LoadSU); - ComputeLatency(LoadSU); + computeLatency(LoadSU); } SUnit *NewSU = CreateNewSUnit(N); @@ -981,7 +987,7 @@ SUnit *ScheduleDAGRRList::CopyAndMoveSuccessors(SUnit *SU) { NewSU->isCommutable = true; InitNumRegDefsLeft(NewSU); - ComputeLatency(NewSU); + computeLatency(NewSU); // Record all the edges to and from the old SU, by category. SmallVector<SDep, 4> ChainPreds; @@ -1160,7 +1166,7 @@ static EVT getPhysicalRegisterVT(SDNode *N, unsigned Reg, const MCInstrDesc &MCID = TII->get(N->getMachineOpcode()); assert(MCID.ImplicitDefs && "Physical reg def must be in implicit def list!"); unsigned NumRes = MCID.getNumDefs(); - for (const unsigned *ImpDef = MCID.getImplicitDefs(); *ImpDef; ++ImpDef) { + for (const uint16_t *ImpDef = MCID.getImplicitDefs(); *ImpDef; ++ImpDef) { if (Reg == *ImpDef) break; ++NumRes; @@ -1286,7 +1292,7 @@ DelayForLiveRegsBottomUp(SUnit *SU, SmallVector<unsigned, 4> &LRegs) { const MCInstrDesc &MCID = TII->get(Node->getMachineOpcode()); if (!MCID.ImplicitDefs) continue; - for (const unsigned *Reg = MCID.ImplicitDefs; *Reg; ++Reg) + for (const uint16_t *Reg = MCID.getImplicitDefs(); *Reg; ++Reg) CheckForLiveRegDef(SU, *Reg, LiveRegDefs, RegAdded, LRegs, TRI); } @@ -1475,7 +1481,7 @@ void ScheduleDAGRRList::ListScheduleBottomUp() { std::reverse(Sequence.begin(), Sequence.end()); #ifndef NDEBUG - VerifySchedule(/*isBottomUp=*/true); + VerifyScheduledSequence(/*isBottomUp=*/true); #endif } @@ -1681,9 +1687,9 @@ public: int RegPressureDiff(SUnit *SU, unsigned &LiveUses) const; - void ScheduledNode(SUnit *SU); + void scheduledNode(SUnit *SU); - void UnscheduledNode(SUnit *SU); + void unscheduledNode(SUnit *SU); protected: bool canClobber(const SUnit *SU, const SUnit *Op); @@ -1984,7 +1990,7 @@ int RegReductionPQBase::RegPressureDiff(SUnit *SU, unsigned &LiveUses) const { return PDiff; } -void RegReductionPQBase::ScheduledNode(SUnit *SU) { +void RegReductionPQBase::scheduledNode(SUnit *SU) { if (!TracksRegPressure) return; @@ -2053,7 +2059,7 @@ void RegReductionPQBase::ScheduledNode(SUnit *SU) { dumpRegPressure(); } -void RegReductionPQBase::UnscheduledNode(SUnit *SU) { +void RegReductionPQBase::unscheduledNode(SUnit *SU) { if (!TracksRegPressure) return; @@ -2661,7 +2667,7 @@ static bool canClobberReachingPhysRegUse(const SUnit *DepSU, const SUnit *SU, ScheduleDAGRRList *scheduleDAG, const TargetInstrInfo *TII, const TargetRegisterInfo *TRI) { - const unsigned *ImpDefs + const uint16_t *ImpDefs = TII->get(SU->getNode()->getMachineOpcode()).getImplicitDefs(); const uint32_t *RegMask = getNodeRegMask(SU->getNode()); if(!ImpDefs && !RegMask) @@ -2680,7 +2686,7 @@ static bool canClobberReachingPhysRegUse(const SUnit *DepSU, const SUnit *SU, return true; if (ImpDefs) - for (const unsigned *ImpDef = ImpDefs; *ImpDef; ++ImpDef) + for (const uint16_t *ImpDef = ImpDefs; *ImpDef; ++ImpDef) // Return true if SU clobbers this physical register use and the // definition of the register reaches from DepSU. IsReachable queries // a topological forward sort of the DAG (following the successors). @@ -2699,13 +2705,13 @@ static bool canClobberPhysRegDefs(const SUnit *SuccSU, const SUnit *SU, const TargetRegisterInfo *TRI) { SDNode *N = SuccSU->getNode(); unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs(); - const unsigned *ImpDefs = TII->get(N->getMachineOpcode()).getImplicitDefs(); + const uint16_t *ImpDefs = TII->get(N->getMachineOpcode()).getImplicitDefs(); assert(ImpDefs && "Caller should check hasPhysRegDefs"); for (const SDNode *SUNode = SU->getNode(); SUNode; SUNode = SUNode->getGluedNode()) { if (!SUNode->isMachineOpcode()) continue; - const unsigned *SUImpDefs = + const uint16_t *SUImpDefs = TII->get(SUNode->getMachineOpcode()).getImplicitDefs(); const uint32_t *SURegMask = getNodeRegMask(SUNode); if (!SUImpDefs && !SURegMask) diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp index 71f07d6..69dd813 100644 --- a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp +++ b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp @@ -17,6 +17,8 @@ #include "ScheduleDAGSDNodes.h" #include "InstrEmitter.h" #include "llvm/CodeGen/SelectionDAG.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/MC/MCInstrItineraries.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetInstrInfo.h" @@ -44,20 +46,26 @@ static cl::opt<int> HighLatencyCycles( "instructions take for targets with no itinerary")); ScheduleDAGSDNodes::ScheduleDAGSDNodes(MachineFunction &mf) - : ScheduleDAG(mf), + : ScheduleDAG(mf), BB(0), DAG(0), InstrItins(mf.getTarget().getInstrItineraryData()) {} /// Run - perform scheduling. /// -void ScheduleDAGSDNodes::Run(SelectionDAG *dag, MachineBasicBlock *bb, - MachineBasicBlock::iterator insertPos) { +void ScheduleDAGSDNodes::Run(SelectionDAG *dag, MachineBasicBlock *bb) { + BB = bb; DAG = dag; - ScheduleDAG::Run(bb, insertPos); + + // Clear the scheduler's SUnit DAG. + ScheduleDAG::clearDAG(); + Sequence.clear(); + + // Invoke the target's selection of scheduler. + Schedule(); } /// NewSUnit - Creates a new SUnit and return a ptr to it. /// -SUnit *ScheduleDAGSDNodes::NewSUnit(SDNode *N) { +SUnit *ScheduleDAGSDNodes::newSUnit(SDNode *N) { #ifndef NDEBUG const SUnit *Addr = 0; if (!SUnits.empty()) @@ -79,7 +87,7 @@ SUnit *ScheduleDAGSDNodes::NewSUnit(SDNode *N) { } SUnit *ScheduleDAGSDNodes::Clone(SUnit *Old) { - SUnit *SU = NewSUnit(Old->getNode()); + SUnit *SU = newSUnit(Old->getNode()); SU->OrigNode = Old->OrigNode; SU->Latency = Old->Latency; SU->isVRegCycle = Old->isVRegCycle; @@ -302,7 +310,7 @@ void ScheduleDAGSDNodes::BuildSchedUnits() { // If this node has already been processed, stop now. if (NI->getNodeId() != -1) continue; - SUnit *NodeSUnit = NewSUnit(NI); + SUnit *NodeSUnit = newSUnit(NI); // See if anything is glued to this node, if so, add them to glued // nodes. Nodes can have at most one glue input and one glue output. Glue @@ -360,7 +368,7 @@ void ScheduleDAGSDNodes::BuildSchedUnits() { InitNumRegDefsLeft(NodeSUnit); // Assign the Latency field of NodeSUnit using target-provided information. - ComputeLatency(NodeSUnit); + computeLatency(NodeSUnit); } // Find all call operands. @@ -382,7 +390,7 @@ void ScheduleDAGSDNodes::AddSchedEdges() { const TargetSubtargetInfo &ST = TM.getSubtarget<TargetSubtargetInfo>(); // Check to see if the scheduler cares about latencies. - bool UnitLatencies = ForceUnitLatencies(); + bool UnitLatencies = forceUnitLatencies(); // Pass 2: add the preds, succs, etc. for (unsigned su = 0, e = SUnits.size(); su != e; ++su) { @@ -448,7 +456,7 @@ void ScheduleDAGSDNodes::AddSchedEdges() { const SDep &dep = SDep(OpSU, isChain ? SDep::Order : SDep::Data, OpLatency, PhysReg); if (!isChain && !UnitLatencies) { - ComputeOperandLatency(OpN, N, i, const_cast<SDep &>(dep)); + computeOperandLatency(OpN, N, i, const_cast<SDep &>(dep)); ST.adjustSchedDependency(OpSU, SU, const_cast<SDep &>(dep)); } @@ -541,7 +549,7 @@ void ScheduleDAGSDNodes::InitNumRegDefsLeft(SUnit *SU) { } } -void ScheduleDAGSDNodes::ComputeLatency(SUnit *SU) { +void ScheduleDAGSDNodes::computeLatency(SUnit *SU) { SDNode *N = SU->getNode(); // TokenFactor operands are considered zero latency, and some schedulers @@ -553,7 +561,7 @@ void ScheduleDAGSDNodes::ComputeLatency(SUnit *SU) { } // Check to see if the scheduler cares about latencies. - if (ForceUnitLatencies()) { + if (forceUnitLatencies()) { SU->Latency = 1; return; } @@ -575,10 +583,10 @@ void ScheduleDAGSDNodes::ComputeLatency(SUnit *SU) { SU->Latency += TII->getInstrLatency(InstrItins, N); } -void ScheduleDAGSDNodes::ComputeOperandLatency(SDNode *Def, SDNode *Use, +void ScheduleDAGSDNodes::computeOperandLatency(SDNode *Def, SDNode *Use, unsigned OpIdx, SDep& dep) const{ // Check to see if the scheduler cares about latencies. - if (ForceUnitLatencies()) + if (forceUnitLatencies()) return; if (dep.getKind() != SDep::Data) @@ -621,6 +629,30 @@ void ScheduleDAGSDNodes::dumpNode(const SUnit *SU) const { } } +void ScheduleDAGSDNodes::dumpSchedule() const { + for (unsigned i = 0, e = Sequence.size(); i != e; i++) { + if (SUnit *SU = Sequence[i]) + SU->dump(this); + else + dbgs() << "**** NOOP ****\n"; + } +} + +#ifndef NDEBUG +/// VerifyScheduledSequence - Verify that all SUnits were scheduled and that +/// their state is consistent with the nodes listed in Sequence. +/// +void ScheduleDAGSDNodes::VerifyScheduledSequence(bool isBottomUp) { + unsigned ScheduledNodes = ScheduleDAG::VerifyScheduledDAG(isBottomUp); + unsigned Noops = 0; + for (unsigned i = 0, e = Sequence.size(); i != e; ++i) + if (!Sequence[i]) + ++Noops; + assert(Sequence.size() - Noops == ScheduledNodes && + "The number of nodes scheduled doesn't match the expected number!"); +} +#endif // NDEBUG + namespace { struct OrderSorter { bool operator()(const std::pair<unsigned, MachineInstr*> &A, @@ -686,9 +718,48 @@ static void ProcessSourceNode(SDNode *N, SelectionDAG *DAG, ProcessSDDbgValues(N, DAG, Emitter, Orders, VRBaseMap, Order); } +void ScheduleDAGSDNodes:: +EmitPhysRegCopy(SUnit *SU, DenseMap<SUnit*, unsigned> &VRBaseMap, + MachineBasicBlock::iterator InsertPos) { + for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); + I != E; ++I) { + if (I->isCtrl()) continue; // ignore chain preds + if (I->getSUnit()->CopyDstRC) { + // Copy to physical register. + DenseMap<SUnit*, unsigned>::iterator VRI = VRBaseMap.find(I->getSUnit()); + assert(VRI != VRBaseMap.end() && "Node emitted out of order - late"); + // Find the destination physical register. + unsigned Reg = 0; + for (SUnit::const_succ_iterator II = SU->Succs.begin(), + EE = SU->Succs.end(); II != EE; ++II) { + if (II->isCtrl()) continue; // ignore chain preds + if (II->getReg()) { + Reg = II->getReg(); + break; + } + } + BuildMI(*BB, InsertPos, DebugLoc(), TII->get(TargetOpcode::COPY), Reg) + .addReg(VRI->second); + } else { + // Copy from physical register. + assert(I->getReg() && "Unknown physical register!"); + unsigned VRBase = MRI.createVirtualRegister(SU->CopyDstRC); + bool isNew = VRBaseMap.insert(std::make_pair(SU, VRBase)).second; + (void)isNew; // Silence compiler warning. + assert(isNew && "Node emitted out of order - early"); + BuildMI(*BB, InsertPos, DebugLoc(), TII->get(TargetOpcode::COPY), VRBase) + .addReg(I->getReg()); + } + break; + } +} -/// EmitSchedule - Emit the machine code in scheduled order. -MachineBasicBlock *ScheduleDAGSDNodes::EmitSchedule() { +/// EmitSchedule - Emit the machine code in scheduled order. Return the new +/// InsertPos and MachineBasicBlock that contains this insertion +/// point. ScheduleDAGSDNodes holds a BB pointer for convenience, but this does +/// not necessarily refer to returned BB. The emitter may split blocks. +MachineBasicBlock *ScheduleDAGSDNodes:: +EmitSchedule(MachineBasicBlock::iterator &InsertPos) { InstrEmitter Emitter(BB, InsertPos); DenseMap<SDValue, unsigned> VRBaseMap; DenseMap<SUnit*, unsigned> CopyVRBaseMap; @@ -711,7 +782,7 @@ MachineBasicBlock *ScheduleDAGSDNodes::EmitSchedule() { SUnit *SU = Sequence[i]; if (!SU) { // Null SUnit* is a noop. - EmitNoop(); + TII->insertNoop(*Emitter.getBlock(), InsertPos); continue; } @@ -719,7 +790,7 @@ MachineBasicBlock *ScheduleDAGSDNodes::EmitSchedule() { // SDNode and any glued SDNodes and append them to the block. if (!SU->getNode()) { // Emit a copy. - EmitPhysRegCopy(SU, CopyVRBaseMap); + EmitPhysRegCopy(SU, CopyVRBaseMap, InsertPos); continue; } @@ -784,19 +855,24 @@ MachineBasicBlock *ScheduleDAGSDNodes::EmitSchedule() { } // Add trailing DbgValue's before the terminator. FIXME: May want to add // some of them before one or more conditional branches? + SmallVector<MachineInstr*, 8> DbgMIs; while (DI != DE) { - MachineBasicBlock *InsertBB = Emitter.getBlock(); - MachineBasicBlock::iterator Pos= Emitter.getBlock()->getFirstTerminator(); - if (!(*DI)->isInvalidated()) { - MachineInstr *DbgMI= Emitter.EmitDbgValue(*DI, VRBaseMap); - if (DbgMI) - InsertBB->insert(Pos, DbgMI); - } + if (!(*DI)->isInvalidated()) + if (MachineInstr *DbgMI = Emitter.EmitDbgValue(*DI, VRBaseMap)) + DbgMIs.push_back(DbgMI); ++DI; } + + MachineBasicBlock *InsertBB = Emitter.getBlock(); + MachineBasicBlock::iterator Pos = InsertBB->getFirstTerminator(); + InsertBB->insert(Pos, DbgMIs.begin(), DbgMIs.end()); } - BB = Emitter.getBlock(); InsertPos = Emitter.getInsertPos(); - return BB; + return Emitter.getBlock(); +} + +/// Return the basic block label. +std::string ScheduleDAGSDNodes::getDAGName() const { + return "sunit-dag." + BB->getFullName(); } diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h index 17b4901..75940ec 100644 --- a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h +++ b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h @@ -35,17 +35,20 @@ namespace llvm { /// class ScheduleDAGSDNodes : public ScheduleDAG { public: + MachineBasicBlock *BB; SelectionDAG *DAG; // DAG of the current basic block const InstrItineraryData *InstrItins; + /// The schedule. Null SUnit*'s represent noop instructions. + std::vector<SUnit*> Sequence; + explicit ScheduleDAGSDNodes(MachineFunction &mf); virtual ~ScheduleDAGSDNodes() {} /// Run - perform scheduling. /// - void Run(SelectionDAG *dag, MachineBasicBlock *bb, - MachineBasicBlock::iterator insertPos); + void Run(SelectionDAG *dag, MachineBasicBlock *bb); /// isPassiveNode - Return true if the node is a non-scheduled leaf. /// @@ -68,7 +71,7 @@ namespace llvm { /// NewSUnit - Creates a new SUnit and return a ptr to it. /// - SUnit *NewSUnit(SDNode *N); + SUnit *newSUnit(SDNode *N); /// Clone - Creates a clone of the specified SUnit. It does not copy the /// predecessors / successors info nor the temporary scheduling states. @@ -79,7 +82,7 @@ namespace llvm { /// are input. This SUnit graph is similar to the SelectionDAG, but /// excludes nodes that aren't interesting to scheduling, and represents /// flagged together nodes with a single SUnit. - virtual void BuildSchedGraph(AliasAnalysis *AA); + void BuildSchedGraph(AliasAnalysis *AA); /// InitVRegCycleFlag - Set isVRegCycle if this node's single use is /// CopyToReg and its only active data operands are CopyFromReg within a @@ -91,30 +94,41 @@ namespace llvm { /// void InitNumRegDefsLeft(SUnit *SU); - /// ComputeLatency - Compute node latency. + /// computeLatency - Compute node latency. /// - virtual void ComputeLatency(SUnit *SU); + virtual void computeLatency(SUnit *SU); - /// ComputeOperandLatency - Override dependence edge latency using + /// computeOperandLatency - Override dependence edge latency using /// operand use/def information /// - virtual void ComputeOperandLatency(SUnit *Def, SUnit *Use, + virtual void computeOperandLatency(SUnit *Def, SUnit *Use, SDep& dep) const { } - virtual void ComputeOperandLatency(SDNode *Def, SDNode *Use, + virtual void computeOperandLatency(SDNode *Def, SDNode *Use, unsigned OpIdx, SDep& dep) const; - virtual MachineBasicBlock *EmitSchedule(); - /// Schedule - Order nodes according to selected style, filling /// in the Sequence member. /// virtual void Schedule() = 0; + /// VerifyScheduledSequence - Verify that all SUnits are scheduled and + /// consistent with the Sequence of scheduled instructions. + void VerifyScheduledSequence(bool isBottomUp); + + /// EmitSchedule - Insert MachineInstrs into the MachineBasicBlock + /// according to the order specified in Sequence. + /// + MachineBasicBlock *EmitSchedule(MachineBasicBlock::iterator &InsertPos); + virtual void dumpNode(const SUnit *SU) const; + void dumpSchedule() const; + virtual std::string getGraphNodeLabel(const SUnit *SU) const; + virtual std::string getDAGName() const; + virtual void getCustomGraphFeatures(GraphWriter<ScheduleDAG*> &GW) const; /// RegDefIter - In place iteration over the values defined by an @@ -160,6 +174,9 @@ namespace llvm { /// BuildSchedUnits, AddSchedEdges - Helper functions for BuildSchedGraph. void BuildSchedUnits(); void AddSchedEdges(); + + void EmitPhysRegCopy(SUnit *SU, DenseMap<SUnit*, unsigned> &VRBaseMap, + MachineBasicBlock::iterator InsertPos); }; } diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp index 7d12509..c851291 100644 --- a/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp +++ b/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp @@ -158,7 +158,7 @@ void ScheduleDAGVLIW::scheduleNodeTopDown(SUnit *SU, unsigned CurCycle) { releaseSuccessors(SU); SU->isScheduled = true; - AvailableQueue->ScheduledNode(SU); + AvailableQueue->scheduledNode(SU); } /// listScheduleTopDown - The main loop of list scheduling for top-down @@ -202,7 +202,7 @@ void ScheduleDAGVLIW::listScheduleTopDown() { // don't advance the hazard recognizer. if (AvailableQueue->empty()) { // Reset DFA state. - AvailableQueue->ScheduledNode(0); + AvailableQueue->scheduledNode(0); ++CurCycle; continue; } @@ -261,7 +261,7 @@ void ScheduleDAGVLIW::listScheduleTopDown() { } #ifndef NDEBUG - VerifySchedule(/*isBottomUp=*/false); + VerifyScheduledSequence(/*isBottomUp=*/false); #endif } diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index 796abf4..e3a7305 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -124,20 +124,29 @@ bool ISD::isBuildVectorAllOnes(const SDNode *N) { if (i == e) return false; // Do not accept build_vectors that aren't all constants or which have non-~0 - // elements. + // elements. We have to be a bit careful here, as the type of the constant + // may not be the same as the type of the vector elements due to type + // legalization (the elements are promoted to a legal type for the target and + // a vector of a type may be legal when the base element type is not). + // We only want to check enough bits to cover the vector elements, because + // we care if the resultant vector is all ones, not whether the individual + // constants are. SDValue NotZero = N->getOperand(i); + unsigned EltSize = N->getValueType(0).getVectorElementType().getSizeInBits(); if (isa<ConstantSDNode>(NotZero)) { - if (!cast<ConstantSDNode>(NotZero)->isAllOnesValue()) + if (cast<ConstantSDNode>(NotZero)->getAPIntValue().countTrailingOnes() < + EltSize) return false; } else if (isa<ConstantFPSDNode>(NotZero)) { - if (!cast<ConstantFPSDNode>(NotZero)->getValueAPF(). - bitcastToAPInt().isAllOnesValue()) + if (cast<ConstantFPSDNode>(NotZero)->getValueAPF() + .bitcastToAPInt().countTrailingOnes() < EltSize) return false; } else return false; // Okay, we have at least one ~0 value, check to see if the rest match or are - // undefs. + // undefs. Even with the above element type twiddling, this should be OK, as + // the same type legalization should have applied to all the elements. for (++i; i != e; ++i) if (N->getOperand(i) != NotZero && N->getOperand(i).getOpcode() != ISD::UNDEF) @@ -5904,571 +5913,6 @@ uint64_t SDNode::getConstantOperandVal(unsigned Num) const { return cast<ConstantSDNode>(OperandList[Num])->getZExtValue(); } -std::string SDNode::getOperationName(const SelectionDAG *G) const { - switch (getOpcode()) { - default: - if (getOpcode() < ISD::BUILTIN_OP_END) - return "<<Unknown DAG Node>>"; - if (isMachineOpcode()) { - if (G) - if (const TargetInstrInfo *TII = G->getTarget().getInstrInfo()) - if (getMachineOpcode() < TII->getNumOpcodes()) - return TII->getName(getMachineOpcode()); - return "<<Unknown Machine Node #" + utostr(getOpcode()) + ">>"; - } - if (G) { - const TargetLowering &TLI = G->getTargetLoweringInfo(); - const char *Name = TLI.getTargetNodeName(getOpcode()); - if (Name) return Name; - return "<<Unknown Target Node #" + utostr(getOpcode()) + ">>"; - } - return "<<Unknown Node #" + utostr(getOpcode()) + ">>"; - -#ifndef NDEBUG - case ISD::DELETED_NODE: - return "<<Deleted Node!>>"; -#endif - case ISD::PREFETCH: return "Prefetch"; - case ISD::MEMBARRIER: return "MemBarrier"; - case ISD::ATOMIC_FENCE: return "AtomicFence"; - case ISD::ATOMIC_CMP_SWAP: return "AtomicCmpSwap"; - case ISD::ATOMIC_SWAP: return "AtomicSwap"; - case ISD::ATOMIC_LOAD_ADD: return "AtomicLoadAdd"; - case ISD::ATOMIC_LOAD_SUB: return "AtomicLoadSub"; - case ISD::ATOMIC_LOAD_AND: return "AtomicLoadAnd"; - case ISD::ATOMIC_LOAD_OR: return "AtomicLoadOr"; - case ISD::ATOMIC_LOAD_XOR: return "AtomicLoadXor"; - case ISD::ATOMIC_LOAD_NAND: return "AtomicLoadNand"; - case ISD::ATOMIC_LOAD_MIN: return "AtomicLoadMin"; - case ISD::ATOMIC_LOAD_MAX: return "AtomicLoadMax"; - case ISD::ATOMIC_LOAD_UMIN: return "AtomicLoadUMin"; - case ISD::ATOMIC_LOAD_UMAX: return "AtomicLoadUMax"; - case ISD::ATOMIC_LOAD: return "AtomicLoad"; - case ISD::ATOMIC_STORE: return "AtomicStore"; - case ISD::PCMARKER: return "PCMarker"; - case ISD::READCYCLECOUNTER: return "ReadCycleCounter"; - case ISD::SRCVALUE: return "SrcValue"; - case ISD::MDNODE_SDNODE: return "MDNode"; - case ISD::EntryToken: return "EntryToken"; - case ISD::TokenFactor: return "TokenFactor"; - case ISD::AssertSext: return "AssertSext"; - case ISD::AssertZext: return "AssertZext"; - - case ISD::BasicBlock: return "BasicBlock"; - case ISD::VALUETYPE: return "ValueType"; - case ISD::Register: return "Register"; - case ISD::RegisterMask: return "RegisterMask"; - case ISD::Constant: return "Constant"; - case ISD::ConstantFP: return "ConstantFP"; - case ISD::GlobalAddress: return "GlobalAddress"; - case ISD::GlobalTLSAddress: return "GlobalTLSAddress"; - case ISD::FrameIndex: return "FrameIndex"; - case ISD::JumpTable: return "JumpTable"; - case ISD::GLOBAL_OFFSET_TABLE: return "GLOBAL_OFFSET_TABLE"; - case ISD::RETURNADDR: return "RETURNADDR"; - case ISD::FRAMEADDR: return "FRAMEADDR"; - case ISD::FRAME_TO_ARGS_OFFSET: return "FRAME_TO_ARGS_OFFSET"; - case ISD::EXCEPTIONADDR: return "EXCEPTIONADDR"; - case ISD::LSDAADDR: return "LSDAADDR"; - case ISD::EHSELECTION: return "EHSELECTION"; - case ISD::EH_RETURN: return "EH_RETURN"; - case ISD::EH_SJLJ_SETJMP: return "EH_SJLJ_SETJMP"; - case ISD::EH_SJLJ_LONGJMP: return "EH_SJLJ_LONGJMP"; - case ISD::ConstantPool: return "ConstantPool"; - case ISD::ExternalSymbol: return "ExternalSymbol"; - case ISD::BlockAddress: return "BlockAddress"; - case ISD::INTRINSIC_WO_CHAIN: - case ISD::INTRINSIC_VOID: - case ISD::INTRINSIC_W_CHAIN: { - unsigned OpNo = getOpcode() == ISD::INTRINSIC_WO_CHAIN ? 0 : 1; - unsigned IID = cast<ConstantSDNode>(getOperand(OpNo))->getZExtValue(); - if (IID < Intrinsic::num_intrinsics) - return Intrinsic::getName((Intrinsic::ID)IID); - else if (const TargetIntrinsicInfo *TII = G->getTarget().getIntrinsicInfo()) - return TII->getName(IID); - llvm_unreachable("Invalid intrinsic ID"); - } - - case ISD::BUILD_VECTOR: return "BUILD_VECTOR"; - case ISD::TargetConstant: return "TargetConstant"; - case ISD::TargetConstantFP:return "TargetConstantFP"; - case ISD::TargetGlobalAddress: return "TargetGlobalAddress"; - case ISD::TargetGlobalTLSAddress: return "TargetGlobalTLSAddress"; - case ISD::TargetFrameIndex: return "TargetFrameIndex"; - case ISD::TargetJumpTable: return "TargetJumpTable"; - case ISD::TargetConstantPool: return "TargetConstantPool"; - case ISD::TargetExternalSymbol: return "TargetExternalSymbol"; - case ISD::TargetBlockAddress: return "TargetBlockAddress"; - - case ISD::CopyToReg: return "CopyToReg"; - case ISD::CopyFromReg: return "CopyFromReg"; - case ISD::UNDEF: return "undef"; - case ISD::MERGE_VALUES: return "merge_values"; - case ISD::INLINEASM: return "inlineasm"; - case ISD::EH_LABEL: return "eh_label"; - case ISD::HANDLENODE: return "handlenode"; - - // Unary operators - case ISD::FABS: return "fabs"; - case ISD::FNEG: return "fneg"; - case ISD::FSQRT: return "fsqrt"; - case ISD::FSIN: return "fsin"; - case ISD::FCOS: return "fcos"; - case ISD::FTRUNC: return "ftrunc"; - case ISD::FFLOOR: return "ffloor"; - case ISD::FCEIL: return "fceil"; - case ISD::FRINT: return "frint"; - case ISD::FNEARBYINT: return "fnearbyint"; - case ISD::FEXP: return "fexp"; - case ISD::FEXP2: return "fexp2"; - case ISD::FLOG: return "flog"; - case ISD::FLOG2: return "flog2"; - case ISD::FLOG10: return "flog10"; - - // Binary operators - case ISD::ADD: return "add"; - case ISD::SUB: return "sub"; - case ISD::MUL: return "mul"; - case ISD::MULHU: return "mulhu"; - case ISD::MULHS: return "mulhs"; - case ISD::SDIV: return "sdiv"; - case ISD::UDIV: return "udiv"; - case ISD::SREM: return "srem"; - case ISD::UREM: return "urem"; - case ISD::SMUL_LOHI: return "smul_lohi"; - case ISD::UMUL_LOHI: return "umul_lohi"; - case ISD::SDIVREM: return "sdivrem"; - case ISD::UDIVREM: return "udivrem"; - case ISD::AND: return "and"; - case ISD::OR: return "or"; - case ISD::XOR: return "xor"; - case ISD::SHL: return "shl"; - case ISD::SRA: return "sra"; - case ISD::SRL: return "srl"; - case ISD::ROTL: return "rotl"; - case ISD::ROTR: return "rotr"; - case ISD::FADD: return "fadd"; - case ISD::FSUB: return "fsub"; - case ISD::FMUL: return "fmul"; - case ISD::FDIV: return "fdiv"; - case ISD::FMA: return "fma"; - case ISD::FREM: return "frem"; - case ISD::FCOPYSIGN: return "fcopysign"; - case ISD::FGETSIGN: return "fgetsign"; - case ISD::FPOW: return "fpow"; - - case ISD::FPOWI: return "fpowi"; - case ISD::SETCC: return "setcc"; - case ISD::SELECT: return "select"; - case ISD::VSELECT: return "vselect"; - case ISD::SELECT_CC: return "select_cc"; - case ISD::INSERT_VECTOR_ELT: return "insert_vector_elt"; - case ISD::EXTRACT_VECTOR_ELT: return "extract_vector_elt"; - case ISD::CONCAT_VECTORS: return "concat_vectors"; - case ISD::INSERT_SUBVECTOR: return "insert_subvector"; - case ISD::EXTRACT_SUBVECTOR: return "extract_subvector"; - case ISD::SCALAR_TO_VECTOR: return "scalar_to_vector"; - case ISD::VECTOR_SHUFFLE: return "vector_shuffle"; - case ISD::CARRY_FALSE: return "carry_false"; - case ISD::ADDC: return "addc"; - case ISD::ADDE: return "adde"; - case ISD::SADDO: return "saddo"; - case ISD::UADDO: return "uaddo"; - case ISD::SSUBO: return "ssubo"; - case ISD::USUBO: return "usubo"; - case ISD::SMULO: return "smulo"; - case ISD::UMULO: return "umulo"; - case ISD::SUBC: return "subc"; - case ISD::SUBE: return "sube"; - case ISD::SHL_PARTS: return "shl_parts"; - case ISD::SRA_PARTS: return "sra_parts"; - case ISD::SRL_PARTS: return "srl_parts"; - - // Conversion operators. - case ISD::SIGN_EXTEND: return "sign_extend"; - case ISD::ZERO_EXTEND: return "zero_extend"; - case ISD::ANY_EXTEND: return "any_extend"; - case ISD::SIGN_EXTEND_INREG: return "sign_extend_inreg"; - case ISD::TRUNCATE: return "truncate"; - case ISD::FP_ROUND: return "fp_round"; - case ISD::FLT_ROUNDS_: return "flt_rounds"; - case ISD::FP_ROUND_INREG: return "fp_round_inreg"; - case ISD::FP_EXTEND: return "fp_extend"; - - case ISD::SINT_TO_FP: return "sint_to_fp"; - case ISD::UINT_TO_FP: return "uint_to_fp"; - case ISD::FP_TO_SINT: return "fp_to_sint"; - case ISD::FP_TO_UINT: return "fp_to_uint"; - case ISD::BITCAST: return "bitcast"; - case ISD::FP16_TO_FP32: return "fp16_to_fp32"; - case ISD::FP32_TO_FP16: return "fp32_to_fp16"; - - case ISD::CONVERT_RNDSAT: { - switch (cast<CvtRndSatSDNode>(this)->getCvtCode()) { - default: llvm_unreachable("Unknown cvt code!"); - case ISD::CVT_FF: return "cvt_ff"; - case ISD::CVT_FS: return "cvt_fs"; - case ISD::CVT_FU: return "cvt_fu"; - case ISD::CVT_SF: return "cvt_sf"; - case ISD::CVT_UF: return "cvt_uf"; - case ISD::CVT_SS: return "cvt_ss"; - case ISD::CVT_SU: return "cvt_su"; - case ISD::CVT_US: return "cvt_us"; - case ISD::CVT_UU: return "cvt_uu"; - } - } - - // Control flow instructions - case ISD::BR: return "br"; - case ISD::BRIND: return "brind"; - case ISD::BR_JT: return "br_jt"; - case ISD::BRCOND: return "brcond"; - case ISD::BR_CC: return "br_cc"; - case ISD::CALLSEQ_START: return "callseq_start"; - case ISD::CALLSEQ_END: return "callseq_end"; - - // Other operators - case ISD::LOAD: return "load"; - case ISD::STORE: return "store"; - case ISD::VAARG: return "vaarg"; - case ISD::VACOPY: return "vacopy"; - case ISD::VAEND: return "vaend"; - case ISD::VASTART: return "vastart"; - case ISD::DYNAMIC_STACKALLOC: return "dynamic_stackalloc"; - case ISD::EXTRACT_ELEMENT: return "extract_element"; - case ISD::BUILD_PAIR: return "build_pair"; - case ISD::STACKSAVE: return "stacksave"; - case ISD::STACKRESTORE: return "stackrestore"; - case ISD::TRAP: return "trap"; - - // Bit manipulation - case ISD::BSWAP: return "bswap"; - case ISD::CTPOP: return "ctpop"; - case ISD::CTTZ: return "cttz"; - case ISD::CTTZ_ZERO_UNDEF: return "cttz_zero_undef"; - case ISD::CTLZ: return "ctlz"; - case ISD::CTLZ_ZERO_UNDEF: return "ctlz_zero_undef"; - - // Trampolines - case ISD::INIT_TRAMPOLINE: return "init_trampoline"; - case ISD::ADJUST_TRAMPOLINE: return "adjust_trampoline"; - - case ISD::CONDCODE: - switch (cast<CondCodeSDNode>(this)->get()) { - default: llvm_unreachable("Unknown setcc condition!"); - case ISD::SETOEQ: return "setoeq"; - case ISD::SETOGT: return "setogt"; - case ISD::SETOGE: return "setoge"; - case ISD::SETOLT: return "setolt"; - case ISD::SETOLE: return "setole"; - case ISD::SETONE: return "setone"; - - case ISD::SETO: return "seto"; - case ISD::SETUO: return "setuo"; - case ISD::SETUEQ: return "setue"; - case ISD::SETUGT: return "setugt"; - case ISD::SETUGE: return "setuge"; - case ISD::SETULT: return "setult"; - case ISD::SETULE: return "setule"; - case ISD::SETUNE: return "setune"; - - case ISD::SETEQ: return "seteq"; - case ISD::SETGT: return "setgt"; - case ISD::SETGE: return "setge"; - case ISD::SETLT: return "setlt"; - case ISD::SETLE: return "setle"; - case ISD::SETNE: return "setne"; - - case ISD::SETTRUE: return "settrue"; - case ISD::SETTRUE2: return "settrue2"; - case ISD::SETFALSE: return "setfalse"; - case ISD::SETFALSE2: return "setfalse2"; - } - } -} - -const char *SDNode::getIndexedModeName(ISD::MemIndexedMode AM) { - switch (AM) { - default: - return ""; - case ISD::PRE_INC: - return "<pre-inc>"; - case ISD::PRE_DEC: - return "<pre-dec>"; - case ISD::POST_INC: - return "<post-inc>"; - case ISD::POST_DEC: - return "<post-dec>"; - } -} - -std::string ISD::ArgFlagsTy::getArgFlagsString() { - std::string S = "< "; - - if (isZExt()) - S += "zext "; - if (isSExt()) - S += "sext "; - if (isInReg()) - S += "inreg "; - if (isSRet()) - S += "sret "; - if (isByVal()) - S += "byval "; - if (isNest()) - S += "nest "; - if (getByValAlign()) - S += "byval-align:" + utostr(getByValAlign()) + " "; - if (getOrigAlign()) - S += "orig-align:" + utostr(getOrigAlign()) + " "; - if (getByValSize()) - S += "byval-size:" + utostr(getByValSize()) + " "; - return S + ">"; -} - -void SDNode::dump() const { dump(0); } -void SDNode::dump(const SelectionDAG *G) const { - print(dbgs(), G); - dbgs() << '\n'; -} - -void SDNode::print_types(raw_ostream &OS, const SelectionDAG *G) const { - OS << (void*)this << ": "; - - for (unsigned i = 0, e = getNumValues(); i != e; ++i) { - if (i) OS << ","; - if (getValueType(i) == MVT::Other) - OS << "ch"; - else - OS << getValueType(i).getEVTString(); - } - OS << " = " << getOperationName(G); -} - -void SDNode::print_details(raw_ostream &OS, const SelectionDAG *G) const { - if (const MachineSDNode *MN = dyn_cast<MachineSDNode>(this)) { - if (!MN->memoperands_empty()) { - OS << "<"; - OS << "Mem:"; - for (MachineSDNode::mmo_iterator i = MN->memoperands_begin(), - e = MN->memoperands_end(); i != e; ++i) { - OS << **i; - if (llvm::next(i) != e) - OS << " "; - } - OS << ">"; - } - } else if (const ShuffleVectorSDNode *SVN = - dyn_cast<ShuffleVectorSDNode>(this)) { - OS << "<"; - for (unsigned i = 0, e = ValueList[0].getVectorNumElements(); i != e; ++i) { - int Idx = SVN->getMaskElt(i); - if (i) OS << ","; - if (Idx < 0) - OS << "u"; - else - OS << Idx; - } - OS << ">"; - } else if (const ConstantSDNode *CSDN = dyn_cast<ConstantSDNode>(this)) { - OS << '<' << CSDN->getAPIntValue() << '>'; - } else if (const ConstantFPSDNode *CSDN = dyn_cast<ConstantFPSDNode>(this)) { - if (&CSDN->getValueAPF().getSemantics()==&APFloat::IEEEsingle) - OS << '<' << CSDN->getValueAPF().convertToFloat() << '>'; - else if (&CSDN->getValueAPF().getSemantics()==&APFloat::IEEEdouble) - OS << '<' << CSDN->getValueAPF().convertToDouble() << '>'; - else { - OS << "<APFloat("; - CSDN->getValueAPF().bitcastToAPInt().dump(); - OS << ")>"; - } - } else if (const GlobalAddressSDNode *GADN = - dyn_cast<GlobalAddressSDNode>(this)) { - int64_t offset = GADN->getOffset(); - OS << '<'; - WriteAsOperand(OS, GADN->getGlobal()); - OS << '>'; - if (offset > 0) - OS << " + " << offset; - else - OS << " " << offset; - if (unsigned int TF = GADN->getTargetFlags()) - OS << " [TF=" << TF << ']'; - } else if (const FrameIndexSDNode *FIDN = dyn_cast<FrameIndexSDNode>(this)) { - OS << "<" << FIDN->getIndex() << ">"; - } else if (const JumpTableSDNode *JTDN = dyn_cast<JumpTableSDNode>(this)) { - OS << "<" << JTDN->getIndex() << ">"; - if (unsigned int TF = JTDN->getTargetFlags()) - OS << " [TF=" << TF << ']'; - } else if (const ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(this)){ - int offset = CP->getOffset(); - if (CP->isMachineConstantPoolEntry()) - OS << "<" << *CP->getMachineCPVal() << ">"; - else - OS << "<" << *CP->getConstVal() << ">"; - if (offset > 0) - OS << " + " << offset; - else - OS << " " << offset; - if (unsigned int TF = CP->getTargetFlags()) - OS << " [TF=" << TF << ']'; - } else if (const BasicBlockSDNode *BBDN = dyn_cast<BasicBlockSDNode>(this)) { - OS << "<"; - const Value *LBB = (const Value*)BBDN->getBasicBlock()->getBasicBlock(); - if (LBB) - OS << LBB->getName() << " "; - OS << (const void*)BBDN->getBasicBlock() << ">"; - } else if (const RegisterSDNode *R = dyn_cast<RegisterSDNode>(this)) { - OS << ' ' << PrintReg(R->getReg(), G ? G->getTarget().getRegisterInfo() :0); - } else if (const ExternalSymbolSDNode *ES = - dyn_cast<ExternalSymbolSDNode>(this)) { - OS << "'" << ES->getSymbol() << "'"; - if (unsigned int TF = ES->getTargetFlags()) - OS << " [TF=" << TF << ']'; - } else if (const SrcValueSDNode *M = dyn_cast<SrcValueSDNode>(this)) { - if (M->getValue()) - OS << "<" << M->getValue() << ">"; - else - OS << "<null>"; - } else if (const MDNodeSDNode *MD = dyn_cast<MDNodeSDNode>(this)) { - if (MD->getMD()) - OS << "<" << MD->getMD() << ">"; - else - OS << "<null>"; - } else if (const VTSDNode *N = dyn_cast<VTSDNode>(this)) { - OS << ":" << N->getVT().getEVTString(); - } - else if (const LoadSDNode *LD = dyn_cast<LoadSDNode>(this)) { - OS << "<" << *LD->getMemOperand(); - - bool doExt = true; - switch (LD->getExtensionType()) { - default: doExt = false; break; - case ISD::EXTLOAD: OS << ", anyext"; break; - case ISD::SEXTLOAD: OS << ", sext"; break; - case ISD::ZEXTLOAD: OS << ", zext"; break; - } - if (doExt) - OS << " from " << LD->getMemoryVT().getEVTString(); - - const char *AM = getIndexedModeName(LD->getAddressingMode()); - if (*AM) - OS << ", " << AM; - - OS << ">"; - } else if (const StoreSDNode *ST = dyn_cast<StoreSDNode>(this)) { - OS << "<" << *ST->getMemOperand(); - - if (ST->isTruncatingStore()) - OS << ", trunc to " << ST->getMemoryVT().getEVTString(); - - const char *AM = getIndexedModeName(ST->getAddressingMode()); - if (*AM) - OS << ", " << AM; - - OS << ">"; - } else if (const MemSDNode* M = dyn_cast<MemSDNode>(this)) { - OS << "<" << *M->getMemOperand() << ">"; - } else if (const BlockAddressSDNode *BA = - dyn_cast<BlockAddressSDNode>(this)) { - OS << "<"; - WriteAsOperand(OS, BA->getBlockAddress()->getFunction(), false); - OS << ", "; - WriteAsOperand(OS, BA->getBlockAddress()->getBasicBlock(), false); - OS << ">"; - if (unsigned int TF = BA->getTargetFlags()) - OS << " [TF=" << TF << ']'; - } - - if (G) - if (unsigned Order = G->GetOrdering(this)) - OS << " [ORD=" << Order << ']'; - - if (getNodeId() != -1) - OS << " [ID=" << getNodeId() << ']'; - - DebugLoc dl = getDebugLoc(); - if (G && !dl.isUnknown()) { - DIScope - Scope(dl.getScope(G->getMachineFunction().getFunction()->getContext())); - OS << " dbg:"; - // Omit the directory, since it's usually long and uninteresting. - if (Scope.Verify()) - OS << Scope.getFilename(); - else - OS << "<unknown>"; - OS << ':' << dl.getLine(); - if (dl.getCol() != 0) - OS << ':' << dl.getCol(); - } -} - -void SDNode::print(raw_ostream &OS, const SelectionDAG *G) const { - print_types(OS, G); - for (unsigned i = 0, e = getNumOperands(); i != e; ++i) { - if (i) OS << ", "; else OS << " "; - OS << (void*)getOperand(i).getNode(); - if (unsigned RN = getOperand(i).getResNo()) - OS << ":" << RN; - } - print_details(OS, G); -} - -static void printrWithDepthHelper(raw_ostream &OS, const SDNode *N, - const SelectionDAG *G, unsigned depth, - unsigned indent) { - if (depth == 0) - return; - - OS.indent(indent); - - N->print(OS, G); - - if (depth < 1) - return; - - for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { - // Don't follow chain operands. - if (N->getOperand(i).getValueType() == MVT::Other) - continue; - OS << '\n'; - printrWithDepthHelper(OS, N->getOperand(i).getNode(), G, depth-1, indent+2); - } -} - -void SDNode::printrWithDepth(raw_ostream &OS, const SelectionDAG *G, - unsigned depth) const { - printrWithDepthHelper(OS, this, G, depth, 0); -} - -void SDNode::printrFull(raw_ostream &OS, const SelectionDAG *G) const { - // Don't print impossibly deep things. - printrWithDepth(OS, G, 10); -} - -void SDNode::dumprWithDepth(const SelectionDAG *G, unsigned depth) const { - printrWithDepth(dbgs(), G, depth); -} - -void SDNode::dumprFull(const SelectionDAG *G) const { - // Don't print impossibly deep things. - dumprWithDepth(G, 10); -} - -static void DumpNodes(const SDNode *N, unsigned indent, const SelectionDAG *G) { - for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) - if (N->getOperand(i).getNode()->hasOneUse()) - DumpNodes(N->getOperand(i).getNode(), indent+2, G); - else - dbgs() << "\n" << std::string(indent+2, ' ') - << (void*)N->getOperand(i).getNode() << ": <multiple use>"; - - - dbgs() << "\n"; - dbgs().indent(indent); - N->dump(G); -} - SDValue SelectionDAG::UnrollVectorOp(SDNode *N, unsigned ResNE) { assert(N->getNumValues() == 1 && "Can't unroll a vector with multiple results!"); @@ -6625,74 +6069,6 @@ unsigned SelectionDAG::InferPtrAlignment(SDValue Ptr) const { return 0; } -void SelectionDAG::dump() const { - dbgs() << "SelectionDAG has " << AllNodes.size() << " nodes:"; - - for (allnodes_const_iterator I = allnodes_begin(), E = allnodes_end(); - I != E; ++I) { - const SDNode *N = I; - if (!N->hasOneUse() && N != getRoot().getNode()) - DumpNodes(N, 2, this); - } - - if (getRoot().getNode()) DumpNodes(getRoot().getNode(), 2, this); - - dbgs() << "\n\n"; -} - -void SDNode::printr(raw_ostream &OS, const SelectionDAG *G) const { - print_types(OS, G); - print_details(OS, G); -} - -typedef SmallPtrSet<const SDNode *, 128> VisitedSDNodeSet; -static void DumpNodesr(raw_ostream &OS, const SDNode *N, unsigned indent, - const SelectionDAG *G, VisitedSDNodeSet &once) { - if (!once.insert(N)) // If we've been here before, return now. - return; - - // Dump the current SDNode, but don't end the line yet. - OS.indent(indent); - N->printr(OS, G); - - // Having printed this SDNode, walk the children: - for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { - const SDNode *child = N->getOperand(i).getNode(); - - if (i) OS << ","; - OS << " "; - - if (child->getNumOperands() == 0) { - // This child has no grandchildren; print it inline right here. - child->printr(OS, G); - once.insert(child); - } else { // Just the address. FIXME: also print the child's opcode. - OS << (void*)child; - if (unsigned RN = N->getOperand(i).getResNo()) - OS << ":" << RN; - } - } - - OS << "\n"; - - // Dump children that have grandchildren on their own line(s). - for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { - const SDNode *child = N->getOperand(i).getNode(); - DumpNodesr(OS, child, indent+2, G, once); - } -} - -void SDNode::dumpr() const { - VisitedSDNodeSet once; - DumpNodesr(dbgs(), this, 0, 0, once); -} - -void SDNode::dumpr(const SelectionDAG *G) const { - VisitedSDNodeSet once; - DumpNodesr(dbgs(), this, 0, G, once); -} - - // getAddressSpace - Return the address space this GlobalAddress belongs to. unsigned GlobalAddressSDNode::getAddressSpace() const { return getGlobal()->getType()->getAddressSpace(); diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index 4e4aa11..2ac9655 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -2411,14 +2411,14 @@ size_t SelectionDAGBuilder::Clusterify(CaseVector& Cases, BranchProbabilityInfo *BPI = FuncInfo.BPI; // Start with "simple" cases - for (size_t i = 0; i < SI.getNumCases(); ++i) { - BasicBlock *SuccBB = SI.getCaseSuccessor(i); + for (SwitchInst::ConstCaseIt i = SI.case_begin(), e = SI.case_end(); + i != e; ++i) { + const BasicBlock *SuccBB = i.getCaseSuccessor(); MachineBasicBlock *SMBB = FuncInfo.MBBMap[SuccBB]; uint32_t ExtraWeight = BPI ? BPI->getEdgeWeight(SI.getParent(), SuccBB) : 0; - Cases.push_back(Case(SI.getCaseValue(i), - SI.getCaseValue(i), + Cases.push_back(Case(i.getCaseValue(), i.getCaseValue(), SMBB, ExtraWeight)); } std::sort(Cases.begin(), Cases.end(), CaseCmp()); @@ -4561,8 +4561,10 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { const DbgDeclareInst &DI = cast<DbgDeclareInst>(I); MDNode *Variable = DI.getVariable(); const Value *Address = DI.getAddress(); - if (!Address || !DIVariable(Variable).Verify()) + if (!Address || !DIVariable(Variable).Verify()) { + DEBUG(dbgs() << "Dropping debug info for " << DI << "\n"); return 0; + } // Build an entry in DbgOrdering. Debug info input nodes get an SDNodeOrder // but do not always have a corresponding SDNode built. The SDNodeOrder diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp new file mode 100644 index 0000000..f981afb --- /dev/null +++ b/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp @@ -0,0 +1,631 @@ +//===-- SelectionDAGDumper.cpp - Implement SelectionDAG::dump() -----------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This implements the SelectionDAG::dump method and friends. +// +//===----------------------------------------------------------------------===// + +#include "ScheduleDAGSDNodes.h" +#include "llvm/Function.h" +#include "llvm/Intrinsics.h" +#include "llvm/Assembly/Writer.h" +#include "llvm/CodeGen/SelectionDAG.h" +#include "llvm/CodeGen/MachineConstantPool.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/Analysis/DebugInfo.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetIntrinsicInfo.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/GraphWriter.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/ADT/StringExtras.h" +using namespace llvm; + +std::string SDNode::getOperationName(const SelectionDAG *G) const { + switch (getOpcode()) { + default: + if (getOpcode() < ISD::BUILTIN_OP_END) + return "<<Unknown DAG Node>>"; + if (isMachineOpcode()) { + if (G) + if (const TargetInstrInfo *TII = G->getTarget().getInstrInfo()) + if (getMachineOpcode() < TII->getNumOpcodes()) + return TII->getName(getMachineOpcode()); + return "<<Unknown Machine Node #" + utostr(getOpcode()) + ">>"; + } + if (G) { + const TargetLowering &TLI = G->getTargetLoweringInfo(); + const char *Name = TLI.getTargetNodeName(getOpcode()); + if (Name) return Name; + return "<<Unknown Target Node #" + utostr(getOpcode()) + ">>"; + } + return "<<Unknown Node #" + utostr(getOpcode()) + ">>"; + +#ifndef NDEBUG + case ISD::DELETED_NODE: return "<<Deleted Node!>>"; +#endif + case ISD::PREFETCH: return "Prefetch"; + case ISD::MEMBARRIER: return "MemBarrier"; + case ISD::ATOMIC_FENCE: return "AtomicFence"; + case ISD::ATOMIC_CMP_SWAP: return "AtomicCmpSwap"; + case ISD::ATOMIC_SWAP: return "AtomicSwap"; + case ISD::ATOMIC_LOAD_ADD: return "AtomicLoadAdd"; + case ISD::ATOMIC_LOAD_SUB: return "AtomicLoadSub"; + case ISD::ATOMIC_LOAD_AND: return "AtomicLoadAnd"; + case ISD::ATOMIC_LOAD_OR: return "AtomicLoadOr"; + case ISD::ATOMIC_LOAD_XOR: return "AtomicLoadXor"; + case ISD::ATOMIC_LOAD_NAND: return "AtomicLoadNand"; + case ISD::ATOMIC_LOAD_MIN: return "AtomicLoadMin"; + case ISD::ATOMIC_LOAD_MAX: return "AtomicLoadMax"; + case ISD::ATOMIC_LOAD_UMIN: return "AtomicLoadUMin"; + case ISD::ATOMIC_LOAD_UMAX: return "AtomicLoadUMax"; + case ISD::ATOMIC_LOAD: return "AtomicLoad"; + case ISD::ATOMIC_STORE: return "AtomicStore"; + case ISD::PCMARKER: return "PCMarker"; + case ISD::READCYCLECOUNTER: return "ReadCycleCounter"; + case ISD::SRCVALUE: return "SrcValue"; + case ISD::MDNODE_SDNODE: return "MDNode"; + case ISD::EntryToken: return "EntryToken"; + case ISD::TokenFactor: return "TokenFactor"; + case ISD::AssertSext: return "AssertSext"; + case ISD::AssertZext: return "AssertZext"; + + case ISD::BasicBlock: return "BasicBlock"; + case ISD::VALUETYPE: return "ValueType"; + case ISD::Register: return "Register"; + case ISD::RegisterMask: return "RegisterMask"; + case ISD::Constant: return "Constant"; + case ISD::ConstantFP: return "ConstantFP"; + case ISD::GlobalAddress: return "GlobalAddress"; + case ISD::GlobalTLSAddress: return "GlobalTLSAddress"; + case ISD::FrameIndex: return "FrameIndex"; + case ISD::JumpTable: return "JumpTable"; + case ISD::GLOBAL_OFFSET_TABLE: return "GLOBAL_OFFSET_TABLE"; + case ISD::RETURNADDR: return "RETURNADDR"; + case ISD::FRAMEADDR: return "FRAMEADDR"; + case ISD::FRAME_TO_ARGS_OFFSET: return "FRAME_TO_ARGS_OFFSET"; + case ISD::EXCEPTIONADDR: return "EXCEPTIONADDR"; + case ISD::LSDAADDR: return "LSDAADDR"; + case ISD::EHSELECTION: return "EHSELECTION"; + case ISD::EH_RETURN: return "EH_RETURN"; + case ISD::EH_SJLJ_SETJMP: return "EH_SJLJ_SETJMP"; + case ISD::EH_SJLJ_LONGJMP: return "EH_SJLJ_LONGJMP"; + case ISD::ConstantPool: return "ConstantPool"; + case ISD::ExternalSymbol: return "ExternalSymbol"; + case ISD::BlockAddress: return "BlockAddress"; + case ISD::INTRINSIC_WO_CHAIN: + case ISD::INTRINSIC_VOID: + case ISD::INTRINSIC_W_CHAIN: { + unsigned OpNo = getOpcode() == ISD::INTRINSIC_WO_CHAIN ? 0 : 1; + unsigned IID = cast<ConstantSDNode>(getOperand(OpNo))->getZExtValue(); + if (IID < Intrinsic::num_intrinsics) + return Intrinsic::getName((Intrinsic::ID)IID); + else if (const TargetIntrinsicInfo *TII = G->getTarget().getIntrinsicInfo()) + return TII->getName(IID); + llvm_unreachable("Invalid intrinsic ID"); + } + + case ISD::BUILD_VECTOR: return "BUILD_VECTOR"; + case ISD::TargetConstant: return "TargetConstant"; + case ISD::TargetConstantFP: return "TargetConstantFP"; + case ISD::TargetGlobalAddress: return "TargetGlobalAddress"; + case ISD::TargetGlobalTLSAddress: return "TargetGlobalTLSAddress"; + case ISD::TargetFrameIndex: return "TargetFrameIndex"; + case ISD::TargetJumpTable: return "TargetJumpTable"; + case ISD::TargetConstantPool: return "TargetConstantPool"; + case ISD::TargetExternalSymbol: return "TargetExternalSymbol"; + case ISD::TargetBlockAddress: return "TargetBlockAddress"; + + case ISD::CopyToReg: return "CopyToReg"; + case ISD::CopyFromReg: return "CopyFromReg"; + case ISD::UNDEF: return "undef"; + case ISD::MERGE_VALUES: return "merge_values"; + case ISD::INLINEASM: return "inlineasm"; + case ISD::EH_LABEL: return "eh_label"; + case ISD::HANDLENODE: return "handlenode"; + + // Unary operators + case ISD::FABS: return "fabs"; + case ISD::FNEG: return "fneg"; + case ISD::FSQRT: return "fsqrt"; + case ISD::FSIN: return "fsin"; + case ISD::FCOS: return "fcos"; + case ISD::FTRUNC: return "ftrunc"; + case ISD::FFLOOR: return "ffloor"; + case ISD::FCEIL: return "fceil"; + case ISD::FRINT: return "frint"; + case ISD::FNEARBYINT: return "fnearbyint"; + case ISD::FEXP: return "fexp"; + case ISD::FEXP2: return "fexp2"; + case ISD::FLOG: return "flog"; + case ISD::FLOG2: return "flog2"; + case ISD::FLOG10: return "flog10"; + + // Binary operators + case ISD::ADD: return "add"; + case ISD::SUB: return "sub"; + case ISD::MUL: return "mul"; + case ISD::MULHU: return "mulhu"; + case ISD::MULHS: return "mulhs"; + case ISD::SDIV: return "sdiv"; + case ISD::UDIV: return "udiv"; + case ISD::SREM: return "srem"; + case ISD::UREM: return "urem"; + case ISD::SMUL_LOHI: return "smul_lohi"; + case ISD::UMUL_LOHI: return "umul_lohi"; + case ISD::SDIVREM: return "sdivrem"; + case ISD::UDIVREM: return "udivrem"; + case ISD::AND: return "and"; + case ISD::OR: return "or"; + case ISD::XOR: return "xor"; + case ISD::SHL: return "shl"; + case ISD::SRA: return "sra"; + case ISD::SRL: return "srl"; + case ISD::ROTL: return "rotl"; + case ISD::ROTR: return "rotr"; + case ISD::FADD: return "fadd"; + case ISD::FSUB: return "fsub"; + case ISD::FMUL: return "fmul"; + case ISD::FDIV: return "fdiv"; + case ISD::FMA: return "fma"; + case ISD::FREM: return "frem"; + case ISD::FCOPYSIGN: return "fcopysign"; + case ISD::FGETSIGN: return "fgetsign"; + case ISD::FPOW: return "fpow"; + + case ISD::FPOWI: return "fpowi"; + case ISD::SETCC: return "setcc"; + case ISD::SELECT: return "select"; + case ISD::VSELECT: return "vselect"; + case ISD::SELECT_CC: return "select_cc"; + case ISD::INSERT_VECTOR_ELT: return "insert_vector_elt"; + case ISD::EXTRACT_VECTOR_ELT: return "extract_vector_elt"; + case ISD::CONCAT_VECTORS: return "concat_vectors"; + case ISD::INSERT_SUBVECTOR: return "insert_subvector"; + case ISD::EXTRACT_SUBVECTOR: return "extract_subvector"; + case ISD::SCALAR_TO_VECTOR: return "scalar_to_vector"; + case ISD::VECTOR_SHUFFLE: return "vector_shuffle"; + case ISD::CARRY_FALSE: return "carry_false"; + case ISD::ADDC: return "addc"; + case ISD::ADDE: return "adde"; + case ISD::SADDO: return "saddo"; + case ISD::UADDO: return "uaddo"; + case ISD::SSUBO: return "ssubo"; + case ISD::USUBO: return "usubo"; + case ISD::SMULO: return "smulo"; + case ISD::UMULO: return "umulo"; + case ISD::SUBC: return "subc"; + case ISD::SUBE: return "sube"; + case ISD::SHL_PARTS: return "shl_parts"; + case ISD::SRA_PARTS: return "sra_parts"; + case ISD::SRL_PARTS: return "srl_parts"; + + // Conversion operators. + case ISD::SIGN_EXTEND: return "sign_extend"; + case ISD::ZERO_EXTEND: return "zero_extend"; + case ISD::ANY_EXTEND: return "any_extend"; + case ISD::SIGN_EXTEND_INREG: return "sign_extend_inreg"; + case ISD::TRUNCATE: return "truncate"; + case ISD::FP_ROUND: return "fp_round"; + case ISD::FLT_ROUNDS_: return "flt_rounds"; + case ISD::FP_ROUND_INREG: return "fp_round_inreg"; + case ISD::FP_EXTEND: return "fp_extend"; + + case ISD::SINT_TO_FP: return "sint_to_fp"; + case ISD::UINT_TO_FP: return "uint_to_fp"; + case ISD::FP_TO_SINT: return "fp_to_sint"; + case ISD::FP_TO_UINT: return "fp_to_uint"; + case ISD::BITCAST: return "bitcast"; + case ISD::FP16_TO_FP32: return "fp16_to_fp32"; + case ISD::FP32_TO_FP16: return "fp32_to_fp16"; + + case ISD::CONVERT_RNDSAT: { + switch (cast<CvtRndSatSDNode>(this)->getCvtCode()) { + default: llvm_unreachable("Unknown cvt code!"); + case ISD::CVT_FF: return "cvt_ff"; + case ISD::CVT_FS: return "cvt_fs"; + case ISD::CVT_FU: return "cvt_fu"; + case ISD::CVT_SF: return "cvt_sf"; + case ISD::CVT_UF: return "cvt_uf"; + case ISD::CVT_SS: return "cvt_ss"; + case ISD::CVT_SU: return "cvt_su"; + case ISD::CVT_US: return "cvt_us"; + case ISD::CVT_UU: return "cvt_uu"; + } + } + + // Control flow instructions + case ISD::BR: return "br"; + case ISD::BRIND: return "brind"; + case ISD::BR_JT: return "br_jt"; + case ISD::BRCOND: return "brcond"; + case ISD::BR_CC: return "br_cc"; + case ISD::CALLSEQ_START: return "callseq_start"; + case ISD::CALLSEQ_END: return "callseq_end"; + + // Other operators + case ISD::LOAD: return "load"; + case ISD::STORE: return "store"; + case ISD::VAARG: return "vaarg"; + case ISD::VACOPY: return "vacopy"; + case ISD::VAEND: return "vaend"; + case ISD::VASTART: return "vastart"; + case ISD::DYNAMIC_STACKALLOC: return "dynamic_stackalloc"; + case ISD::EXTRACT_ELEMENT: return "extract_element"; + case ISD::BUILD_PAIR: return "build_pair"; + case ISD::STACKSAVE: return "stacksave"; + case ISD::STACKRESTORE: return "stackrestore"; + case ISD::TRAP: return "trap"; + + // Bit manipulation + case ISD::BSWAP: return "bswap"; + case ISD::CTPOP: return "ctpop"; + case ISD::CTTZ: return "cttz"; + case ISD::CTTZ_ZERO_UNDEF: return "cttz_zero_undef"; + case ISD::CTLZ: return "ctlz"; + case ISD::CTLZ_ZERO_UNDEF: return "ctlz_zero_undef"; + + // Trampolines + case ISD::INIT_TRAMPOLINE: return "init_trampoline"; + case ISD::ADJUST_TRAMPOLINE: return "adjust_trampoline"; + + case ISD::CONDCODE: + switch (cast<CondCodeSDNode>(this)->get()) { + default: llvm_unreachable("Unknown setcc condition!"); + case ISD::SETOEQ: return "setoeq"; + case ISD::SETOGT: return "setogt"; + case ISD::SETOGE: return "setoge"; + case ISD::SETOLT: return "setolt"; + case ISD::SETOLE: return "setole"; + case ISD::SETONE: return "setone"; + + case ISD::SETO: return "seto"; + case ISD::SETUO: return "setuo"; + case ISD::SETUEQ: return "setue"; + case ISD::SETUGT: return "setugt"; + case ISD::SETUGE: return "setuge"; + case ISD::SETULT: return "setult"; + case ISD::SETULE: return "setule"; + case ISD::SETUNE: return "setune"; + + case ISD::SETEQ: return "seteq"; + case ISD::SETGT: return "setgt"; + case ISD::SETGE: return "setge"; + case ISD::SETLT: return "setlt"; + case ISD::SETLE: return "setle"; + case ISD::SETNE: return "setne"; + + case ISD::SETTRUE: return "settrue"; + case ISD::SETTRUE2: return "settrue2"; + case ISD::SETFALSE: return "setfalse"; + case ISD::SETFALSE2: return "setfalse2"; + } + } +} + +const char *SDNode::getIndexedModeName(ISD::MemIndexedMode AM) { + switch (AM) { + default: return ""; + case ISD::PRE_INC: return "<pre-inc>"; + case ISD::PRE_DEC: return "<pre-dec>"; + case ISD::POST_INC: return "<post-inc>"; + case ISD::POST_DEC: return "<post-dec>"; + } +} + +void SDNode::dump() const { dump(0); } +void SDNode::dump(const SelectionDAG *G) const { + print(dbgs(), G); + dbgs() << '\n'; +} + +void SDNode::print_types(raw_ostream &OS, const SelectionDAG *G) const { + OS << (void*)this << ": "; + + for (unsigned i = 0, e = getNumValues(); i != e; ++i) { + if (i) OS << ","; + if (getValueType(i) == MVT::Other) + OS << "ch"; + else + OS << getValueType(i).getEVTString(); + } + OS << " = " << getOperationName(G); +} + +void SDNode::print_details(raw_ostream &OS, const SelectionDAG *G) const { + if (const MachineSDNode *MN = dyn_cast<MachineSDNode>(this)) { + if (!MN->memoperands_empty()) { + OS << "<"; + OS << "Mem:"; + for (MachineSDNode::mmo_iterator i = MN->memoperands_begin(), + e = MN->memoperands_end(); i != e; ++i) { + OS << **i; + if (llvm::next(i) != e) + OS << " "; + } + OS << ">"; + } + } else if (const ShuffleVectorSDNode *SVN = + dyn_cast<ShuffleVectorSDNode>(this)) { + OS << "<"; + for (unsigned i = 0, e = ValueList[0].getVectorNumElements(); i != e; ++i) { + int Idx = SVN->getMaskElt(i); + if (i) OS << ","; + if (Idx < 0) + OS << "u"; + else + OS << Idx; + } + OS << ">"; + } else if (const ConstantSDNode *CSDN = dyn_cast<ConstantSDNode>(this)) { + OS << '<' << CSDN->getAPIntValue() << '>'; + } else if (const ConstantFPSDNode *CSDN = dyn_cast<ConstantFPSDNode>(this)) { + if (&CSDN->getValueAPF().getSemantics()==&APFloat::IEEEsingle) + OS << '<' << CSDN->getValueAPF().convertToFloat() << '>'; + else if (&CSDN->getValueAPF().getSemantics()==&APFloat::IEEEdouble) + OS << '<' << CSDN->getValueAPF().convertToDouble() << '>'; + else { + OS << "<APFloat("; + CSDN->getValueAPF().bitcastToAPInt().dump(); + OS << ")>"; + } + } else if (const GlobalAddressSDNode *GADN = + dyn_cast<GlobalAddressSDNode>(this)) { + int64_t offset = GADN->getOffset(); + OS << '<'; + WriteAsOperand(OS, GADN->getGlobal()); + OS << '>'; + if (offset > 0) + OS << " + " << offset; + else + OS << " " << offset; + if (unsigned int TF = GADN->getTargetFlags()) + OS << " [TF=" << TF << ']'; + } else if (const FrameIndexSDNode *FIDN = dyn_cast<FrameIndexSDNode>(this)) { + OS << "<" << FIDN->getIndex() << ">"; + } else if (const JumpTableSDNode *JTDN = dyn_cast<JumpTableSDNode>(this)) { + OS << "<" << JTDN->getIndex() << ">"; + if (unsigned int TF = JTDN->getTargetFlags()) + OS << " [TF=" << TF << ']'; + } else if (const ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(this)){ + int offset = CP->getOffset(); + if (CP->isMachineConstantPoolEntry()) + OS << "<" << *CP->getMachineCPVal() << ">"; + else + OS << "<" << *CP->getConstVal() << ">"; + if (offset > 0) + OS << " + " << offset; + else + OS << " " << offset; + if (unsigned int TF = CP->getTargetFlags()) + OS << " [TF=" << TF << ']'; + } else if (const BasicBlockSDNode *BBDN = dyn_cast<BasicBlockSDNode>(this)) { + OS << "<"; + const Value *LBB = (const Value*)BBDN->getBasicBlock()->getBasicBlock(); + if (LBB) + OS << LBB->getName() << " "; + OS << (const void*)BBDN->getBasicBlock() << ">"; + } else if (const RegisterSDNode *R = dyn_cast<RegisterSDNode>(this)) { + OS << ' ' << PrintReg(R->getReg(), G ? G->getTarget().getRegisterInfo() :0); + } else if (const ExternalSymbolSDNode *ES = + dyn_cast<ExternalSymbolSDNode>(this)) { + OS << "'" << ES->getSymbol() << "'"; + if (unsigned int TF = ES->getTargetFlags()) + OS << " [TF=" << TF << ']'; + } else if (const SrcValueSDNode *M = dyn_cast<SrcValueSDNode>(this)) { + if (M->getValue()) + OS << "<" << M->getValue() << ">"; + else + OS << "<null>"; + } else if (const MDNodeSDNode *MD = dyn_cast<MDNodeSDNode>(this)) { + if (MD->getMD()) + OS << "<" << MD->getMD() << ">"; + else + OS << "<null>"; + } else if (const VTSDNode *N = dyn_cast<VTSDNode>(this)) { + OS << ":" << N->getVT().getEVTString(); + } + else if (const LoadSDNode *LD = dyn_cast<LoadSDNode>(this)) { + OS << "<" << *LD->getMemOperand(); + + bool doExt = true; + switch (LD->getExtensionType()) { + default: doExt = false; break; + case ISD::EXTLOAD: OS << ", anyext"; break; + case ISD::SEXTLOAD: OS << ", sext"; break; + case ISD::ZEXTLOAD: OS << ", zext"; break; + } + if (doExt) + OS << " from " << LD->getMemoryVT().getEVTString(); + + const char *AM = getIndexedModeName(LD->getAddressingMode()); + if (*AM) + OS << ", " << AM; + + OS << ">"; + } else if (const StoreSDNode *ST = dyn_cast<StoreSDNode>(this)) { + OS << "<" << *ST->getMemOperand(); + + if (ST->isTruncatingStore()) + OS << ", trunc to " << ST->getMemoryVT().getEVTString(); + + const char *AM = getIndexedModeName(ST->getAddressingMode()); + if (*AM) + OS << ", " << AM; + + OS << ">"; + } else if (const MemSDNode* M = dyn_cast<MemSDNode>(this)) { + OS << "<" << *M->getMemOperand() << ">"; + } else if (const BlockAddressSDNode *BA = + dyn_cast<BlockAddressSDNode>(this)) { + OS << "<"; + WriteAsOperand(OS, BA->getBlockAddress()->getFunction(), false); + OS << ", "; + WriteAsOperand(OS, BA->getBlockAddress()->getBasicBlock(), false); + OS << ">"; + if (unsigned int TF = BA->getTargetFlags()) + OS << " [TF=" << TF << ']'; + } + + if (G) + if (unsigned Order = G->GetOrdering(this)) + OS << " [ORD=" << Order << ']'; + + if (getNodeId() != -1) + OS << " [ID=" << getNodeId() << ']'; + + DebugLoc dl = getDebugLoc(); + if (G && !dl.isUnknown()) { + DIScope + Scope(dl.getScope(G->getMachineFunction().getFunction()->getContext())); + OS << " dbg:"; + // Omit the directory, since it's usually long and uninteresting. + if (Scope.Verify()) + OS << Scope.getFilename(); + else + OS << "<unknown>"; + OS << ':' << dl.getLine(); + if (dl.getCol() != 0) + OS << ':' << dl.getCol(); + } +} + +static void DumpNodes(const SDNode *N, unsigned indent, const SelectionDAG *G) { + for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) + if (N->getOperand(i).getNode()->hasOneUse()) + DumpNodes(N->getOperand(i).getNode(), indent+2, G); + else + dbgs() << "\n" << std::string(indent+2, ' ') + << (void*)N->getOperand(i).getNode() << ": <multiple use>"; + + dbgs() << '\n'; + dbgs().indent(indent); + N->dump(G); +} + +void SelectionDAG::dump() const { + dbgs() << "SelectionDAG has " << AllNodes.size() << " nodes:"; + + for (allnodes_const_iterator I = allnodes_begin(), E = allnodes_end(); + I != E; ++I) { + const SDNode *N = I; + if (!N->hasOneUse() && N != getRoot().getNode()) + DumpNodes(N, 2, this); + } + + if (getRoot().getNode()) DumpNodes(getRoot().getNode(), 2, this); + dbgs() << "\n\n"; +} + +void SDNode::printr(raw_ostream &OS, const SelectionDAG *G) const { + print_types(OS, G); + print_details(OS, G); +} + +typedef SmallPtrSet<const SDNode *, 128> VisitedSDNodeSet; +static void DumpNodesr(raw_ostream &OS, const SDNode *N, unsigned indent, + const SelectionDAG *G, VisitedSDNodeSet &once) { + if (!once.insert(N)) // If we've been here before, return now. + return; + + // Dump the current SDNode, but don't end the line yet. + OS.indent(indent); + N->printr(OS, G); + + // Having printed this SDNode, walk the children: + for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { + const SDNode *child = N->getOperand(i).getNode(); + + if (i) OS << ","; + OS << " "; + + if (child->getNumOperands() == 0) { + // This child has no grandchildren; print it inline right here. + child->printr(OS, G); + once.insert(child); + } else { // Just the address. FIXME: also print the child's opcode. + OS << (void*)child; + if (unsigned RN = N->getOperand(i).getResNo()) + OS << ":" << RN; + } + } + + OS << "\n"; + + // Dump children that have grandchildren on their own line(s). + for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { + const SDNode *child = N->getOperand(i).getNode(); + DumpNodesr(OS, child, indent+2, G, once); + } +} + +void SDNode::dumpr() const { + VisitedSDNodeSet once; + DumpNodesr(dbgs(), this, 0, 0, once); +} + +void SDNode::dumpr(const SelectionDAG *G) const { + VisitedSDNodeSet once; + DumpNodesr(dbgs(), this, 0, G, once); +} + +static void printrWithDepthHelper(raw_ostream &OS, const SDNode *N, + const SelectionDAG *G, unsigned depth, + unsigned indent) { + if (depth == 0) + return; + + OS.indent(indent); + + N->print(OS, G); + + if (depth < 1) + return; + + for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { + // Don't follow chain operands. + if (N->getOperand(i).getValueType() == MVT::Other) + continue; + OS << '\n'; + printrWithDepthHelper(OS, N->getOperand(i).getNode(), G, depth-1, indent+2); + } +} + +void SDNode::printrWithDepth(raw_ostream &OS, const SelectionDAG *G, + unsigned depth) const { + printrWithDepthHelper(OS, this, G, depth, 0); +} + +void SDNode::printrFull(raw_ostream &OS, const SelectionDAG *G) const { + // Don't print impossibly deep things. + printrWithDepth(OS, G, 10); +} + +void SDNode::dumprWithDepth(const SelectionDAG *G, unsigned depth) const { + printrWithDepth(dbgs(), G, depth); +} + +void SDNode::dumprFull(const SelectionDAG *G) const { + // Don't print impossibly deep things. + dumprWithDepth(G, 10); +} + +void SDNode::print(raw_ostream &OS, const SelectionDAG *G) const { + print_types(OS, G); + for (unsigned i = 0, e = getNumOperands(); i != e; ++i) { + if (i) OS << ", "; else OS << " "; + OS << (void*)getOperand(i).getNode(); + if (unsigned RN = getOperand(i).getResNo()) + OS << ":" << RN; + } + print_details(OS, G); +} diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp index 2173d8d..8aabc02 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp @@ -673,7 +673,7 @@ void SelectionDAGISel::CodeGenAndEmitDAG() { { NamedRegionTimer T("Instruction Scheduling", GroupName, TimePassesIsEnabled); - Scheduler->Run(CurDAG, FuncInfo->MBB, FuncInfo->InsertPt); + Scheduler->Run(CurDAG, FuncInfo->MBB); } if (ViewSUnitDAGs) Scheduler->viewGraph(); @@ -684,8 +684,9 @@ void SelectionDAGISel::CodeGenAndEmitDAG() { { NamedRegionTimer T("Instruction Creation", GroupName, TimePassesIsEnabled); - LastMBB = FuncInfo->MBB = Scheduler->EmitSchedule(); - FuncInfo->InsertPt = Scheduler->InsertPos; + // FuncInfo->InsertPt is passed by reference and set to the end of the + // scheduled instructions. + LastMBB = FuncInfo->MBB = Scheduler->EmitSchedule(FuncInfo->InsertPt); } // If the block was split, make sure we update any references that are used to @@ -774,7 +775,7 @@ void SelectionDAGISel::PrepareEHLandingPad() { // Assign the call site to the landing pad's begin label. MF->getMMI().setCallSiteLandingPad(Label, SDB->LPadToCallSiteMap[MBB]); - + const MCInstrDesc &II = TM.getInstrInfo()->get(TargetOpcode::EH_LABEL); BuildMI(*MBB, FuncInfo->InsertPt, SDB->getCurDebugLoc(), II) .addSym(Label); @@ -934,9 +935,9 @@ static void collectFailStats(const Instruction *I) { case Instruction::FPToSI: NumFastIselFailFPToSI++; return; case Instruction::UIToFP: NumFastIselFailUIToFP++; return; case Instruction::SIToFP: NumFastIselFailSIToFP++; return; - case Instruction::IntToPtr: NumFastIselFailIntToPtr++; return; + case Instruction::IntToPtr: NumFastIselFailIntToPtr++; return; case Instruction::PtrToInt: NumFastIselFailPtrToInt++; return; - case Instruction::BitCast: NumFastIselFailBitCast++; return; + case Instruction::BitCast: NumFastIselFailBitCast++; return; // Other instructions... case Instruction::ICmp: NumFastIselFailICmp++; return; diff --git a/lib/CodeGen/SjLjEHPrepare.cpp b/lib/CodeGen/SjLjEHPrepare.cpp index 5412c97..9a86f32 100644 --- a/lib/CodeGen/SjLjEHPrepare.cpp +++ b/lib/CodeGen/SjLjEHPrepare.cpp @@ -1,4 +1,4 @@ -//===- SjLjEHPass.cpp - Eliminate Invoke & Unwind instructions -----------===// +//===- SjLjEHPrepare.cpp - Eliminate Invoke & Unwind instructions ---------===// // // The LLVM Compiler Infrastructure // @@ -42,7 +42,7 @@ STATISTIC(NumInvokes, "Number of invokes replaced"); STATISTIC(NumSpilled, "Number of registers live across unwind edges"); namespace { - class SjLjEHPass : public FunctionPass { + class SjLjEHPrepare : public FunctionPass { const TargetLowering *TLI; Type *FunctionContextTy; Constant *RegisterFn; @@ -58,7 +58,7 @@ namespace { AllocaInst *FuncCtx; public: static char ID; // Pass identification, replacement for typeid - explicit SjLjEHPass(const TargetLowering *tli = NULL) + explicit SjLjEHPrepare(const TargetLowering *tli = NULL) : FunctionPass(ID), TLI(tli) { } bool doInitialization(Module &M); bool runOnFunction(Function &F); @@ -79,15 +79,15 @@ namespace { }; } // end anonymous namespace -char SjLjEHPass::ID = 0; +char SjLjEHPrepare::ID = 0; -// Public Interface To the SjLjEHPass pass. -FunctionPass *llvm::createSjLjEHPass(const TargetLowering *TLI) { - return new SjLjEHPass(TLI); +// Public Interface To the SjLjEHPrepare pass. +FunctionPass *llvm::createSjLjEHPreparePass(const TargetLowering *TLI) { + return new SjLjEHPrepare(TLI); } // doInitialization - Set up decalarations and types needed to process // exceptions. -bool SjLjEHPass::doInitialization(Module &M) { +bool SjLjEHPrepare::doInitialization(Module &M) { // Build the function context structure. // builtin_setjmp uses a five word jbuf Type *VoidPtrTy = Type::getInt8PtrTy(M.getContext()); @@ -123,7 +123,7 @@ bool SjLjEHPass::doInitialization(Module &M) { /// insertCallSiteStore - Insert a store of the call-site value to the /// function context -void SjLjEHPass::insertCallSiteStore(Instruction *I, int Number) { +void SjLjEHPrepare::insertCallSiteStore(Instruction *I, int Number) { IRBuilder<> Builder(I); // Get a reference to the call_site field. @@ -151,8 +151,8 @@ static void MarkBlocksLiveIn(BasicBlock *BB, /// substituteLPadValues - Substitute the values returned by the landingpad /// instruction with those returned by the personality function. -void SjLjEHPass::substituteLPadValues(LandingPadInst *LPI, Value *ExnVal, - Value *SelVal) { +void SjLjEHPrepare::substituteLPadValues(LandingPadInst *LPI, Value *ExnVal, + Value *SelVal) { SmallVector<Value*, 8> UseWorkList(LPI->use_begin(), LPI->use_end()); while (!UseWorkList.empty()) { Value *Val = UseWorkList.pop_back_val(); @@ -183,7 +183,7 @@ void SjLjEHPass::substituteLPadValues(LandingPadInst *LPI, Value *ExnVal, /// setupFunctionContext - Allocate the function context on the stack and fill /// it with all of the data that we know at this point. -Value *SjLjEHPass:: +Value *SjLjEHPrepare:: setupFunctionContext(Function &F, ArrayRef<LandingPadInst*> LPads) { BasicBlock *EntryBB = F.begin(); @@ -251,7 +251,7 @@ setupFunctionContext(Function &F, ArrayRef<LandingPadInst*> LPads) { /// specially, we lower each arg to a copy instruction in the entry block. This /// ensures that the argument value itself cannot be live out of the entry /// block. -void SjLjEHPass::lowerIncomingArguments(Function &F) { +void SjLjEHPrepare::lowerIncomingArguments(Function &F) { BasicBlock::iterator AfterAllocaInsPt = F.begin()->begin(); while (isa<AllocaInst>(AfterAllocaInsPt) && isa<ConstantInt>(cast<AllocaInst>(AfterAllocaInsPt)->getArraySize())) @@ -295,8 +295,8 @@ void SjLjEHPass::lowerIncomingArguments(Function &F) { /// lowerAcrossUnwindEdges - Find all variables which are alive across an unwind /// edge and spill them. -void SjLjEHPass::lowerAcrossUnwindEdges(Function &F, - ArrayRef<InvokeInst*> Invokes) { +void SjLjEHPrepare::lowerAcrossUnwindEdges(Function &F, + ArrayRef<InvokeInst*> Invokes) { // Finally, scan the code looking for instructions with bad live ranges. for (Function::iterator BB = F.begin(), BBE = F.end(); BB != BBE; ++BB) { @@ -393,7 +393,7 @@ void SjLjEHPass::lowerAcrossUnwindEdges(Function &F, /// setupEntryBlockAndCallSites - Setup the entry block by creating and filling /// the function context and marking the call sites with the appropriate /// values. These values are used by the DWARF EH emitter. -bool SjLjEHPass::setupEntryBlockAndCallSites(Function &F) { +bool SjLjEHPrepare::setupEntryBlockAndCallSites(Function &F) { SmallVector<ReturnInst*, 16> Returns; SmallVector<InvokeInst*, 16> Invokes; SmallSetVector<LandingPadInst*, 16> LPads; @@ -519,7 +519,7 @@ bool SjLjEHPass::setupEntryBlockAndCallSites(Function &F) { return true; } -bool SjLjEHPass::runOnFunction(Function &F) { +bool SjLjEHPrepare::runOnFunction(Function &F) { bool Res = setupEntryBlockAndCallSites(F); return Res; } diff --git a/lib/ExecutionEngine/CMakeLists.txt b/lib/ExecutionEngine/CMakeLists.txt index 58caae8..cb11bfe 100644 --- a/lib/ExecutionEngine/CMakeLists.txt +++ b/lib/ExecutionEngine/CMakeLists.txt @@ -1,3 +1,5 @@ + + add_llvm_library(LLVMExecutionEngine ExecutionEngine.cpp ExecutionEngineBindings.cpp @@ -8,3 +10,11 @@ add_subdirectory(Interpreter) add_subdirectory(JIT) add_subdirectory(MCJIT) add_subdirectory(RuntimeDyld) + +if( LLVM_USE_OPROFILE ) + add_subdirectory(OProfileJIT) +endif( LLVM_USE_OPROFILE ) + +if( LLVM_USE_INTEL_JITEVENTS ) + add_subdirectory(IntelJITEvents) +endif( LLVM_USE_INTEL_JITEVENTS ) diff --git a/lib/ExecutionEngine/EventListenerCommon.h b/lib/ExecutionEngine/EventListenerCommon.h new file mode 100644 index 0000000..1c07c94 --- /dev/null +++ b/lib/ExecutionEngine/EventListenerCommon.h @@ -0,0 +1,67 @@ +//===-- JIT.h - Abstract Execution Engine Interface -------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Common functionality for JITEventListener implementations +// +//===----------------------------------------------------------------------===// + +#ifndef EVENT_LISTENER_COMMON_H +#define EVENT_LISTENER_COMMON_H + +#include "llvm/Metadata.h" +#include "llvm/Analysis/DebugInfo.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/Support/ValueHandle.h" +#include "llvm/Support/Path.h" + +namespace llvm { + +namespace jitprofiling { + +class FilenameCache { + // Holds the filename of each Scope, so that we can pass a null-terminated + // string into oprofile. Use an AssertingVH rather than a ValueMap because we + // shouldn't be modifying any MDNodes while this map is alive. + DenseMap<AssertingVH<MDNode>, std::string> Filenames; + DenseMap<AssertingVH<MDNode>, std::string> Paths; + + public: + const char *getFilename(MDNode *Scope) { + std::string &Filename = Filenames[Scope]; + if (Filename.empty()) { + DIScope DIScope(Scope); + Filename = DIScope.getFilename(); + } + return Filename.c_str(); + } + + const char *getFullPath(MDNode *Scope) { + std::string &P = Paths[Scope]; + if (P.empty()) { + DIScope DIScope(Scope); + StringRef DirName = DIScope.getDirectory(); + StringRef FileName = DIScope.getFilename(); + SmallString<256> FullPath; + if (DirName != "." && DirName != "") { + FullPath = DirName; + } + if (FileName != "") { + sys::path::append(FullPath, FileName); + } + P = FullPath.str(); + } + return P.c_str(); + } +}; + +} // namespace jitprofiling + +} // namespace llvm + +#endif //EVENT_LISTENER_COMMON_H diff --git a/lib/ExecutionEngine/IntelJITEvents/CMakeLists.txt b/lib/ExecutionEngine/IntelJITEvents/CMakeLists.txt new file mode 100644 index 0000000..7d67d0d --- /dev/null +++ b/lib/ExecutionEngine/IntelJITEvents/CMakeLists.txt @@ -0,0 +1,11 @@ + +include_directories( ${LLVM_INTEL_JITEVENTS_INCDIR} ${CMAKE_CURRENT_SOURCE_DIR}/.. ) + +set(system_libs + ${system_libs} + jitprofiling + ) + +add_llvm_library(LLVMIntelJITEvents + IntelJITEventListener.cpp + ) diff --git a/lib/ExecutionEngine/IntelJITEvents/IntelJITEventListener.cpp b/lib/ExecutionEngine/IntelJITEvents/IntelJITEventListener.cpp new file mode 100644 index 0000000..5dfa78f --- /dev/null +++ b/lib/ExecutionEngine/IntelJITEvents/IntelJITEventListener.cpp @@ -0,0 +1,183 @@ +//===-- IntelJITEventListener.cpp - Tell Intel profiler about JITed code --===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines a JITEventListener object to tell Intel(R) VTune(TM) +// Amplifier XE 2011 about JITted functions. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Config/config.h" +#include "llvm/ExecutionEngine/JITEventListener.h" + +#define DEBUG_TYPE "amplifier-jit-event-listener" +#include "llvm/Function.h" +#include "llvm/Metadata.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/OwningPtr.h" +#include "llvm/Analysis/DebugInfo.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/ExecutionEngine/IntelJITEventsWrapper.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Support/Errno.h" +#include "llvm/Support/ValueHandle.h" +#include "EventListenerCommon.h" + +using namespace llvm; +using namespace llvm::jitprofiling; + +namespace { + +class IntelJITEventListener : public JITEventListener { + typedef DenseMap<void*, unsigned int> MethodIDMap; + + IntelJITEventsWrapper& Wrapper; + MethodIDMap MethodIDs; + FilenameCache Filenames; + +public: + IntelJITEventListener(IntelJITEventsWrapper& libraryWrapper) + : Wrapper(libraryWrapper) { + } + + ~IntelJITEventListener() { + } + + virtual void NotifyFunctionEmitted(const Function &F, + void *FnStart, size_t FnSize, + const EmittedFunctionDetails &Details); + + virtual void NotifyFreeingMachineCode(void *OldPtr); +}; + +static LineNumberInfo LineStartToIntelJITFormat( + uintptr_t StartAddress, + uintptr_t Address, + DebugLoc Loc) { + LineNumberInfo Result; + + Result.Offset = Address - StartAddress; + Result.LineNumber = Loc.getLine(); + + return Result; +} + +static iJIT_Method_Load FunctionDescToIntelJITFormat( + IntelJITEventsWrapper& Wrapper, + const char* FnName, + uintptr_t FnStart, + size_t FnSize) { + iJIT_Method_Load Result; + memset(&Result, 0, sizeof(iJIT_Method_Load)); + + Result.method_id = Wrapper.iJIT_GetNewMethodID(); + Result.method_name = const_cast<char*>(FnName); + Result.method_load_address = reinterpret_cast<void*>(FnStart); + Result.method_size = FnSize; + + Result.class_id = 0; + Result.class_file_name = NULL; + Result.user_data = NULL; + Result.user_data_size = 0; + Result.env = iJDE_JittingAPI; + + return Result; +} + +// Adds the just-emitted function to the symbol table. +void IntelJITEventListener::NotifyFunctionEmitted( + const Function &F, void *FnStart, size_t FnSize, + const EmittedFunctionDetails &Details) { + iJIT_Method_Load FunctionMessage = FunctionDescToIntelJITFormat(Wrapper, + F.getName().data(), + reinterpret_cast<uint64_t>(FnStart), + FnSize); + + std::vector<LineNumberInfo> LineInfo; + + if (!Details.LineStarts.empty()) { + // Now convert the line number information from the address/DebugLoc + // format in Details to the offset/lineno in Intel JIT API format. + + LineInfo.reserve(Details.LineStarts.size() + 1); + + DebugLoc FirstLoc = Details.LineStarts[0].Loc; + assert(!FirstLoc.isUnknown() + && "LineStarts should not contain unknown DebugLocs"); + + MDNode *FirstLocScope = FirstLoc.getScope(F.getContext()); + DISubprogram FunctionDI = getDISubprogram(FirstLocScope); + if (FunctionDI.Verify()) { + FunctionMessage.source_file_name = const_cast<char*>( + Filenames.getFullPath(FirstLocScope)); + + LineNumberInfo FirstLine; + FirstLine.Offset = 0; + FirstLine.LineNumber = FunctionDI.getLineNumber(); + LineInfo.push_back(FirstLine); + } + + for (std::vector<EmittedFunctionDetails::LineStart>::const_iterator I = + Details.LineStarts.begin(), E = Details.LineStarts.end(); + I != E; ++I) { + // This implementation ignores the DebugLoc filename because the Intel + // JIT API does not support multiple source files associated with a single + // JIT function + LineInfo.push_back(LineStartToIntelJITFormat( + reinterpret_cast<uintptr_t>(FnStart), + I->Address, + I->Loc)); + + // If we have no file name yet for the function, use the filename from + // the first instruction that has one + if (FunctionMessage.source_file_name == 0) { + MDNode *scope = I->Loc.getScope( + Details.MF->getFunction()->getContext()); + FunctionMessage.source_file_name = const_cast<char*>( + Filenames.getFullPath(scope)); + } + } + + FunctionMessage.line_number_size = LineInfo.size(); + FunctionMessage.line_number_table = &*LineInfo.begin(); + } else { + FunctionMessage.line_number_size = 0; + FunctionMessage.line_number_table = 0; + } + + Wrapper.iJIT_NotifyEvent(iJVM_EVENT_TYPE_METHOD_LOAD_FINISHED, + &FunctionMessage); + MethodIDs[FnStart] = FunctionMessage.method_id; +} + +void IntelJITEventListener::NotifyFreeingMachineCode(void *FnStart) { + MethodIDMap::iterator I = MethodIDs.find(FnStart); + if (I != MethodIDs.end()) { + Wrapper.iJIT_NotifyEvent(iJVM_EVENT_TYPE_METHOD_UNLOAD_START, &I->second); + MethodIDs.erase(I); + } +} + +} // anonymous namespace. + +namespace llvm { +JITEventListener *JITEventListener::createIntelJITEventListener() { + static OwningPtr<IntelJITEventsWrapper> JITProfilingWrapper( + new IntelJITEventsWrapper); + return new IntelJITEventListener(*JITProfilingWrapper); +} + +// for testing +JITEventListener *JITEventListener::createIntelJITEventListener( + IntelJITEventsWrapper* TestImpl) { + return new IntelJITEventListener(*TestImpl); +} + +} // namespace llvm + diff --git a/lib/ExecutionEngine/IntelJITEvents/LLVMBuild.txt b/lib/ExecutionEngine/IntelJITEvents/LLVMBuild.txt new file mode 100644 index 0000000..80d2273 --- /dev/null +++ b/lib/ExecutionEngine/IntelJITEvents/LLVMBuild.txt @@ -0,0 +1,23 @@ +;===- ./lib/ExecutionEngine/JITProfileAmplifier/LLVMBuild.txt --*- Conf -*--===; +; +; The LLVM Compiler Infrastructure +; +; This file is distributed under the University of Illinois Open Source +; License. See LICENSE.TXT for details. +; +;===------------------------------------------------------------------------===; +; +; This is an LLVMBuild description file for the components in this subdirectory. +; +; For more information on the LLVMBuild system, please see: +; +; http://llvm.org/docs/LLVMBuild.html +; +;===------------------------------------------------------------------------===; + +[common] + +[component_0] +type = Library +name = IntelJITEvents +parent = ExecutionEngine diff --git a/lib/ExecutionEngine/IntelJITEvents/Makefile b/lib/ExecutionEngine/IntelJITEvents/Makefile new file mode 100644 index 0000000..ba75ac6 --- /dev/null +++ b/lib/ExecutionEngine/IntelJITEvents/Makefile @@ -0,0 +1,17 @@ +##===- lib/ExecutionEngine/JITProfile/Makefile -------------*- Makefile -*-===## +# +# The LLVM Compiler Infrastructure +# +# This file is distributed under the University of Illinois Open Source +# License. See LICENSE.TXT for details. +# +##===----------------------------------------------------------------------===## +LEVEL = ../../.. +LIBRARYNAME = LLVMIntelJITEvents + +include $(LEVEL)/Makefile.config + +SOURCES := IntelJITEventListener.cpp +CPPFLAGS += -I$(INTEL_JITEVENTS_INCDIR) -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/.. + +include $(LLVM_SRC_ROOT)/Makefile.rules diff --git a/lib/ExecutionEngine/Interpreter/Execution.cpp b/lib/ExecutionEngine/Interpreter/Execution.cpp index 3dce3b3..af47be9 100644 --- a/lib/ExecutionEngine/Interpreter/Execution.cpp +++ b/lib/ExecutionEngine/Interpreter/Execution.cpp @@ -650,12 +650,10 @@ void Interpreter::visitSwitchInst(SwitchInst &I) { // Check to see if any of the cases match... BasicBlock *Dest = 0; - unsigned NumCases = I.getNumCases(); - // Skip the first item since that's the default case. - for (unsigned i = 0; i < NumCases; ++i) { - GenericValue CaseVal = getOperandValue(I.getCaseValue(i), SF); + for (SwitchInst::CaseIt i = I.case_begin(), e = I.case_end(); i != e; ++i) { + GenericValue CaseVal = getOperandValue(i.getCaseValue(), SF); if (executeICMP_EQ(CondVal, CaseVal, ElTy).IntVal != 0) { - Dest = cast<BasicBlock>(I.getCaseSuccessor(i)); + Dest = cast<BasicBlock>(i.getCaseSuccessor()); break; } } diff --git a/lib/ExecutionEngine/JIT/CMakeLists.txt b/lib/ExecutionEngine/JIT/CMakeLists.txt index dcef08c..52bb389 100644 --- a/lib/ExecutionEngine/JIT/CMakeLists.txt +++ b/lib/ExecutionEngine/JIT/CMakeLists.txt @@ -2,10 +2,8 @@ add_definitions(-DENABLE_X86_JIT) add_llvm_library(LLVMJIT - Intercept.cpp JIT.cpp JITDwarfEmitter.cpp JITEmitter.cpp JITMemoryManager.cpp - OProfileJITEventListener.cpp ) diff --git a/lib/ExecutionEngine/JIT/Intercept.cpp b/lib/ExecutionEngine/JIT/Intercept.cpp deleted file mode 100644 index 2251a8e..0000000 --- a/lib/ExecutionEngine/JIT/Intercept.cpp +++ /dev/null @@ -1,162 +0,0 @@ -//===-- Intercept.cpp - System function interception routines -------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// If a function call occurs to an external function, the JIT is designed to use -// the dynamic loader interface to find a function to call. This is useful for -// calling system calls and library functions that are not available in LLVM. -// Some system calls, however, need to be handled specially. For this reason, -// we intercept some of them here and use our own stubs to handle them. -// -//===----------------------------------------------------------------------===// - -#include "JIT.h" -#include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/DynamicLibrary.h" -#include "llvm/Config/config.h" -using namespace llvm; - -// AtExitHandlers - List of functions to call when the program exits, -// registered with the atexit() library function. -static std::vector<void (*)()> AtExitHandlers; - -/// runAtExitHandlers - Run any functions registered by the program's -/// calls to atexit(3), which we intercept and store in -/// AtExitHandlers. -/// -static void runAtExitHandlers() { - while (!AtExitHandlers.empty()) { - void (*Fn)() = AtExitHandlers.back(); - AtExitHandlers.pop_back(); - Fn(); - } -} - -//===----------------------------------------------------------------------===// -// Function stubs that are invoked instead of certain library calls -//===----------------------------------------------------------------------===// - -// Force the following functions to be linked in to anything that uses the -// JIT. This is a hack designed to work around the all-too-clever Glibc -// strategy of making these functions work differently when inlined vs. when -// not inlined, and hiding their real definitions in a separate archive file -// that the dynamic linker can't see. For more info, search for -// 'libc_nonshared.a' on Google, or read http://llvm.org/PR274. -#if defined(__linux__) -#if defined(HAVE_SYS_STAT_H) -#include <sys/stat.h> -#endif -#include <fcntl.h> -#include <unistd.h> -/* stat functions are redirecting to __xstat with a version number. On x86-64 - * linking with libc_nonshared.a and -Wl,--export-dynamic doesn't make 'stat' - * available as an exported symbol, so we have to add it explicitly. - */ -namespace { -class StatSymbols { -public: - StatSymbols() { - sys::DynamicLibrary::AddSymbol("stat", (void*)(intptr_t)stat); - sys::DynamicLibrary::AddSymbol("fstat", (void*)(intptr_t)fstat); - sys::DynamicLibrary::AddSymbol("lstat", (void*)(intptr_t)lstat); - sys::DynamicLibrary::AddSymbol("stat64", (void*)(intptr_t)stat64); - sys::DynamicLibrary::AddSymbol("\x1stat64", (void*)(intptr_t)stat64); - sys::DynamicLibrary::AddSymbol("\x1open64", (void*)(intptr_t)open64); - sys::DynamicLibrary::AddSymbol("\x1lseek64", (void*)(intptr_t)lseek64); - sys::DynamicLibrary::AddSymbol("fstat64", (void*)(intptr_t)fstat64); - sys::DynamicLibrary::AddSymbol("lstat64", (void*)(intptr_t)lstat64); - sys::DynamicLibrary::AddSymbol("atexit", (void*)(intptr_t)atexit); - sys::DynamicLibrary::AddSymbol("mknod", (void*)(intptr_t)mknod); - } -}; -} -static StatSymbols initStatSymbols; -#endif // __linux__ - -// jit_exit - Used to intercept the "exit" library call. -static void jit_exit(int Status) { - runAtExitHandlers(); // Run atexit handlers... - exit(Status); -} - -// jit_atexit - Used to intercept the "atexit" library call. -static int jit_atexit(void (*Fn)()) { - AtExitHandlers.push_back(Fn); // Take note of atexit handler... - return 0; // Always successful -} - -static int jit_noop() { - return 0; -} - -//===----------------------------------------------------------------------===// -// -/// getPointerToNamedFunction - This method returns the address of the specified -/// function by using the dynamic loader interface. As such it is only useful -/// for resolving library symbols, not code generated symbols. -/// -void *JIT::getPointerToNamedFunction(const std::string &Name, - bool AbortOnFailure) { - if (!isSymbolSearchingDisabled()) { - // Check to see if this is one of the functions we want to intercept. Note, - // we cast to intptr_t here to silence a -pedantic warning that complains - // about casting a function pointer to a normal pointer. - if (Name == "exit") return (void*)(intptr_t)&jit_exit; - if (Name == "atexit") return (void*)(intptr_t)&jit_atexit; - - // We should not invoke parent's ctors/dtors from generated main()! - // On Mingw and Cygwin, the symbol __main is resolved to - // callee's(eg. tools/lli) one, to invoke wrong duplicated ctors - // (and register wrong callee's dtors with atexit(3)). - // We expect ExecutionEngine::runStaticConstructorsDestructors() - // is called before ExecutionEngine::runFunctionAsMain() is called. - if (Name == "__main") return (void*)(intptr_t)&jit_noop; - - const char *NameStr = Name.c_str(); - // If this is an asm specifier, skip the sentinal. - if (NameStr[0] == 1) ++NameStr; - - // If it's an external function, look it up in the process image... - void *Ptr = sys::DynamicLibrary::SearchForAddressOfSymbol(NameStr); - if (Ptr) return Ptr; - - // If it wasn't found and if it starts with an underscore ('_') character, - // and has an asm specifier, try again without the underscore. - if (Name[0] == 1 && NameStr[0] == '_') { - Ptr = sys::DynamicLibrary::SearchForAddressOfSymbol(NameStr+1); - if (Ptr) return Ptr; - } - - // Darwin/PPC adds $LDBLStub suffixes to various symbols like printf. These - // are references to hidden visibility symbols that dlsym cannot resolve. - // If we have one of these, strip off $LDBLStub and try again. -#if defined(__APPLE__) && defined(__ppc__) - if (Name.size() > 9 && Name[Name.size()-9] == '$' && - memcmp(&Name[Name.size()-8], "LDBLStub", 8) == 0) { - // First try turning $LDBLStub into $LDBL128. If that fails, strip it off. - // This mirrors logic in libSystemStubs.a. - std::string Prefix = std::string(Name.begin(), Name.end()-9); - if (void *Ptr = getPointerToNamedFunction(Prefix+"$LDBL128", false)) - return Ptr; - if (void *Ptr = getPointerToNamedFunction(Prefix, false)) - return Ptr; - } -#endif - } - - /// If a LazyFunctionCreator is installed, use it to get/create the function. - if (LazyFunctionCreator) - if (void *RP = LazyFunctionCreator(Name)) - return RP; - - if (AbortOnFailure) { - report_fatal_error("Program used external function '"+Name+ - "' which could not be resolved!"); - } - return 0; -} diff --git a/lib/ExecutionEngine/JIT/JIT.cpp b/lib/ExecutionEngine/JIT/JIT.cpp index f715f6f..16b8ee2 100644 --- a/lib/ExecutionEngine/JIT/JIT.cpp +++ b/lib/ExecutionEngine/JIT/JIT.cpp @@ -23,6 +23,7 @@ #include "llvm/CodeGen/MachineCodeInfo.h" #include "llvm/ExecutionEngine/GenericValue.h" #include "llvm/ExecutionEngine/JITEventListener.h" +#include "llvm/ExecutionEngine/JITMemoryManager.h" #include "llvm/Target/TargetData.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetJITInfo.h" @@ -267,9 +268,9 @@ extern "C" { } JIT::JIT(Module *M, TargetMachine &tm, TargetJITInfo &tji, - JITMemoryManager *JMM, bool GVsWithCode) - : ExecutionEngine(M), TM(tm), TJI(tji), AllocateGVsWithCode(GVsWithCode), - isAlreadyCodeGenerating(false) { + JITMemoryManager *jmm, bool GVsWithCode) + : ExecutionEngine(M), TM(tm), TJI(tji), JMM(jmm), + AllocateGVsWithCode(GVsWithCode), isAlreadyCodeGenerating(false) { setTargetData(TM.getTargetData()); jitstate = new JITState(M); @@ -711,6 +712,27 @@ void *JIT::getPointerToBasicBlock(BasicBlock *BB) { } } +void *JIT::getPointerToNamedFunction(const std::string &Name, + bool AbortOnFailure){ + if (!isSymbolSearchingDisabled()) { + void *ptr = JMM->getPointerToNamedFunction(Name, false); + if (ptr) + return ptr; + } + + /// If a LazyFunctionCreator is installed, use it to get/create the function. + if (LazyFunctionCreator) + if (void *RP = LazyFunctionCreator(Name)) + return RP; + + if (AbortOnFailure) { + report_fatal_error("Program used external function '"+Name+ + "' which could not be resolved!"); + } + return 0; +} + + /// getOrEmitGlobalVariable - Return the address of the specified global /// variable, possibly emitting it to memory if needed. This is used by the /// Emitter. diff --git a/lib/ExecutionEngine/JIT/JIT.h b/lib/ExecutionEngine/JIT/JIT.h index 17d33fe..c557981 100644 --- a/lib/ExecutionEngine/JIT/JIT.h +++ b/lib/ExecutionEngine/JIT/JIT.h @@ -58,6 +58,7 @@ class JIT : public ExecutionEngine { TargetMachine &TM; // The current target we are compiling to TargetJITInfo &TJI; // The JITInfo for the target we are compiling to JITCodeEmitter *JCE; // JCE object + JITMemoryManager *JMM; std::vector<JITEventListener*> EventListeners; /// AllocateGVsWithCode - Some applications require that global variables and diff --git a/lib/ExecutionEngine/JIT/JITMemoryManager.cpp b/lib/ExecutionEngine/JIT/JITMemoryManager.cpp index efd570d..d404d0c 100644 --- a/lib/ExecutionEngine/JIT/JITMemoryManager.cpp +++ b/lib/ExecutionEngine/JIT/JITMemoryManager.cpp @@ -314,6 +314,17 @@ namespace { /// should allocate a separate slab. static const size_t DefaultSizeThreshold; + /// getPointerToNamedFunction - This method returns the address of the + /// specified function by using the dlsym function call. As such it is only + /// useful for resolving library symbols, not code generated symbols. + /// + /// If AbortOnFailure is false and no function with the given name is + /// found, this function silently returns a null pointer. Otherwise, + /// it prints a message to stderr and aborts. + /// + virtual void *getPointerToNamedFunction(const std::string &Name, + bool AbortOnFailure = true); + void AllocateGOT(); // Testing methods. @@ -757,6 +768,148 @@ bool DefaultJITMemoryManager::CheckInvariants(std::string &ErrorStr) { return true; } +//===----------------------------------------------------------------------===// +// getPointerToNamedFunction() implementation. +//===----------------------------------------------------------------------===// +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/DynamicLibrary.h" +#include "llvm/Config/config.h" + +// AtExitHandlers - List of functions to call when the program exits, +// registered with the atexit() library function. +static std::vector<void (*)()> AtExitHandlers; + +/// runAtExitHandlers - Run any functions registered by the program's +/// calls to atexit(3), which we intercept and store in +/// AtExitHandlers. +/// +static void runAtExitHandlers() { + while (!AtExitHandlers.empty()) { + void (*Fn)() = AtExitHandlers.back(); + AtExitHandlers.pop_back(); + Fn(); + } +} + +//===----------------------------------------------------------------------===// +// Function stubs that are invoked instead of certain library calls +//===----------------------------------------------------------------------===// + +// Force the following functions to be linked in to anything that uses the +// JIT. This is a hack designed to work around the all-too-clever Glibc +// strategy of making these functions work differently when inlined vs. when +// not inlined, and hiding their real definitions in a separate archive file +// that the dynamic linker can't see. For more info, search for +// 'libc_nonshared.a' on Google, or read http://llvm.org/PR274. +#if defined(__linux__) +#if defined(HAVE_SYS_STAT_H) +#include <sys/stat.h> +#endif +#include <fcntl.h> +#include <unistd.h> +/* stat functions are redirecting to __xstat with a version number. On x86-64 + * linking with libc_nonshared.a and -Wl,--export-dynamic doesn't make 'stat' + * available as an exported symbol, so we have to add it explicitly. + */ +namespace { +class StatSymbols { +public: + StatSymbols() { + sys::DynamicLibrary::AddSymbol("stat", (void*)(intptr_t)stat); + sys::DynamicLibrary::AddSymbol("fstat", (void*)(intptr_t)fstat); + sys::DynamicLibrary::AddSymbol("lstat", (void*)(intptr_t)lstat); + sys::DynamicLibrary::AddSymbol("stat64", (void*)(intptr_t)stat64); + sys::DynamicLibrary::AddSymbol("\x1stat64", (void*)(intptr_t)stat64); + sys::DynamicLibrary::AddSymbol("\x1open64", (void*)(intptr_t)open64); + sys::DynamicLibrary::AddSymbol("\x1lseek64", (void*)(intptr_t)lseek64); + sys::DynamicLibrary::AddSymbol("fstat64", (void*)(intptr_t)fstat64); + sys::DynamicLibrary::AddSymbol("lstat64", (void*)(intptr_t)lstat64); + sys::DynamicLibrary::AddSymbol("atexit", (void*)(intptr_t)atexit); + sys::DynamicLibrary::AddSymbol("mknod", (void*)(intptr_t)mknod); + } +}; +} +static StatSymbols initStatSymbols; +#endif // __linux__ + +// jit_exit - Used to intercept the "exit" library call. +static void jit_exit(int Status) { + runAtExitHandlers(); // Run atexit handlers... + exit(Status); +} + +// jit_atexit - Used to intercept the "atexit" library call. +static int jit_atexit(void (*Fn)()) { + AtExitHandlers.push_back(Fn); // Take note of atexit handler... + return 0; // Always successful +} + +static int jit_noop() { + return 0; +} + +//===----------------------------------------------------------------------===// +// +/// getPointerToNamedFunction - This method returns the address of the specified +/// function by using the dynamic loader interface. As such it is only useful +/// for resolving library symbols, not code generated symbols. +/// +void *DefaultJITMemoryManager::getPointerToNamedFunction(const std::string &Name, + bool AbortOnFailure) { + // Check to see if this is one of the functions we want to intercept. Note, + // we cast to intptr_t here to silence a -pedantic warning that complains + // about casting a function pointer to a normal pointer. + if (Name == "exit") return (void*)(intptr_t)&jit_exit; + if (Name == "atexit") return (void*)(intptr_t)&jit_atexit; + + // We should not invoke parent's ctors/dtors from generated main()! + // On Mingw and Cygwin, the symbol __main is resolved to + // callee's(eg. tools/lli) one, to invoke wrong duplicated ctors + // (and register wrong callee's dtors with atexit(3)). + // We expect ExecutionEngine::runStaticConstructorsDestructors() + // is called before ExecutionEngine::runFunctionAsMain() is called. + if (Name == "__main") return (void*)(intptr_t)&jit_noop; + + const char *NameStr = Name.c_str(); + // If this is an asm specifier, skip the sentinal. + if (NameStr[0] == 1) ++NameStr; + + // If it's an external function, look it up in the process image... + void *Ptr = sys::DynamicLibrary::SearchForAddressOfSymbol(NameStr); + if (Ptr) return Ptr; + + // If it wasn't found and if it starts with an underscore ('_') character, + // try again without the underscore. + if (NameStr[0] == '_') { + Ptr = sys::DynamicLibrary::SearchForAddressOfSymbol(NameStr+1); + if (Ptr) return Ptr; + } + + // Darwin/PPC adds $LDBLStub suffixes to various symbols like printf. These + // are references to hidden visibility symbols that dlsym cannot resolve. + // If we have one of these, strip off $LDBLStub and try again. +#if defined(__APPLE__) && defined(__ppc__) + if (Name.size() > 9 && Name[Name.size()-9] == '$' && + memcmp(&Name[Name.size()-8], "LDBLStub", 8) == 0) { + // First try turning $LDBLStub into $LDBL128. If that fails, strip it off. + // This mirrors logic in libSystemStubs.a. + std::string Prefix = std::string(Name.begin(), Name.end()-9); + if (void *Ptr = getPointerToNamedFunction(Prefix+"$LDBL128", false)) + return Ptr; + if (void *Ptr = getPointerToNamedFunction(Prefix, false)) + return Ptr; + } +#endif + + if (AbortOnFailure) { + report_fatal_error("Program used external function '"+Name+ + "' which could not be resolved!"); + } + return 0; +} + + + JITMemoryManager *JITMemoryManager::CreateDefaultMemManager() { return new DefaultJITMemoryManager(); } diff --git a/lib/ExecutionEngine/LLVMBuild.txt b/lib/ExecutionEngine/LLVMBuild.txt index d426969..1f94a4f 100644 --- a/lib/ExecutionEngine/LLVMBuild.txt +++ b/lib/ExecutionEngine/LLVMBuild.txt @@ -16,7 +16,7 @@ ;===------------------------------------------------------------------------===; [common] -subdirectories = Interpreter JIT MCJIT RuntimeDyld +subdirectories = Interpreter JIT MCJIT RuntimeDyld IntelJITEvents OProfileJIT [component_0] type = Library diff --git a/lib/ExecutionEngine/MCJIT/CMakeLists.txt b/lib/ExecutionEngine/MCJIT/CMakeLists.txt index 2c0f8d6..fef7176 100644 --- a/lib/ExecutionEngine/MCJIT/CMakeLists.txt +++ b/lib/ExecutionEngine/MCJIT/CMakeLists.txt @@ -1,5 +1,4 @@ add_llvm_library(LLVMMCJIT MCJIT.cpp MCJITMemoryManager.cpp - Intercept.cpp ) diff --git a/lib/ExecutionEngine/MCJIT/Intercept.cpp b/lib/ExecutionEngine/MCJIT/Intercept.cpp deleted file mode 100644 index f83f428..0000000 --- a/lib/ExecutionEngine/MCJIT/Intercept.cpp +++ /dev/null @@ -1,162 +0,0 @@ -//===-- Intercept.cpp - System function interception routines -------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// If a function call occurs to an external function, the JIT is designed to use -// the dynamic loader interface to find a function to call. This is useful for -// calling system calls and library functions that are not available in LLVM. -// Some system calls, however, need to be handled specially. For this reason, -// we intercept some of them here and use our own stubs to handle them. -// -//===----------------------------------------------------------------------===// - -#include "MCJIT.h" -#include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/DynamicLibrary.h" -#include "llvm/Config/config.h" -using namespace llvm; - -// AtExitHandlers - List of functions to call when the program exits, -// registered with the atexit() library function. -static std::vector<void (*)()> AtExitHandlers; - -/// runAtExitHandlers - Run any functions registered by the program's -/// calls to atexit(3), which we intercept and store in -/// AtExitHandlers. -/// -static void runAtExitHandlers() { - while (!AtExitHandlers.empty()) { - void (*Fn)() = AtExitHandlers.back(); - AtExitHandlers.pop_back(); - Fn(); - } -} - -//===----------------------------------------------------------------------===// -// Function stubs that are invoked instead of certain library calls -//===----------------------------------------------------------------------===// - -// Force the following functions to be linked in to anything that uses the -// JIT. This is a hack designed to work around the all-too-clever Glibc -// strategy of making these functions work differently when inlined vs. when -// not inlined, and hiding their real definitions in a separate archive file -// that the dynamic linker can't see. For more info, search for -// 'libc_nonshared.a' on Google, or read http://llvm.org/PR274. -#if defined(__linux__) -#if defined(HAVE_SYS_STAT_H) -#include <sys/stat.h> -#endif -#include <fcntl.h> -#include <unistd.h> -/* stat functions are redirecting to __xstat with a version number. On x86-64 - * linking with libc_nonshared.a and -Wl,--export-dynamic doesn't make 'stat' - * available as an exported symbol, so we have to add it explicitly. - */ -namespace { -class StatSymbols { -public: - StatSymbols() { - sys::DynamicLibrary::AddSymbol("stat", (void*)(intptr_t)stat); - sys::DynamicLibrary::AddSymbol("fstat", (void*)(intptr_t)fstat); - sys::DynamicLibrary::AddSymbol("lstat", (void*)(intptr_t)lstat); - sys::DynamicLibrary::AddSymbol("stat64", (void*)(intptr_t)stat64); - sys::DynamicLibrary::AddSymbol("\x1stat64", (void*)(intptr_t)stat64); - sys::DynamicLibrary::AddSymbol("\x1open64", (void*)(intptr_t)open64); - sys::DynamicLibrary::AddSymbol("\x1lseek64", (void*)(intptr_t)lseek64); - sys::DynamicLibrary::AddSymbol("fstat64", (void*)(intptr_t)fstat64); - sys::DynamicLibrary::AddSymbol("lstat64", (void*)(intptr_t)lstat64); - sys::DynamicLibrary::AddSymbol("atexit", (void*)(intptr_t)atexit); - sys::DynamicLibrary::AddSymbol("mknod", (void*)(intptr_t)mknod); - } -}; -} -static StatSymbols initStatSymbols; -#endif // __linux__ - -// jit_exit - Used to intercept the "exit" library call. -static void jit_exit(int Status) { - runAtExitHandlers(); // Run atexit handlers... - exit(Status); -} - -// jit_atexit - Used to intercept the "atexit" library call. -static int jit_atexit(void (*Fn)()) { - AtExitHandlers.push_back(Fn); // Take note of atexit handler... - return 0; // Always successful -} - -static int jit_noop() { - return 0; -} - -//===----------------------------------------------------------------------===// -// -/// getPointerToNamedFunction - This method returns the address of the specified -/// function by using the dynamic loader interface. As such it is only useful -/// for resolving library symbols, not code generated symbols. -/// -void *MCJIT::getPointerToNamedFunction(const std::string &Name, - bool AbortOnFailure) { - if (!isSymbolSearchingDisabled()) { - // Check to see if this is one of the functions we want to intercept. Note, - // we cast to intptr_t here to silence a -pedantic warning that complains - // about casting a function pointer to a normal pointer. - if (Name == "exit") return (void*)(intptr_t)&jit_exit; - if (Name == "atexit") return (void*)(intptr_t)&jit_atexit; - - // We should not invoke parent's ctors/dtors from generated main()! - // On Mingw and Cygwin, the symbol __main is resolved to - // callee's(eg. tools/lli) one, to invoke wrong duplicated ctors - // (and register wrong callee's dtors with atexit(3)). - // We expect ExecutionEngine::runStaticConstructorsDestructors() - // is called before ExecutionEngine::runFunctionAsMain() is called. - if (Name == "__main") return (void*)(intptr_t)&jit_noop; - - const char *NameStr = Name.c_str(); - // If this is an asm specifier, skip the sentinal. - if (NameStr[0] == 1) ++NameStr; - - // If it's an external function, look it up in the process image... - void *Ptr = sys::DynamicLibrary::SearchForAddressOfSymbol(NameStr); - if (Ptr) return Ptr; - - // If it wasn't found and if it starts with an underscore ('_') character, - // and has an asm specifier, try again without the underscore. - if (Name[0] == 1 && NameStr[0] == '_') { - Ptr = sys::DynamicLibrary::SearchForAddressOfSymbol(NameStr+1); - if (Ptr) return Ptr; - } - - // Darwin/PPC adds $LDBLStub suffixes to various symbols like printf. These - // are references to hidden visibility symbols that dlsym cannot resolve. - // If we have one of these, strip off $LDBLStub and try again. -#if defined(__APPLE__) && defined(__ppc__) - if (Name.size() > 9 && Name[Name.size()-9] == '$' && - memcmp(&Name[Name.size()-8], "LDBLStub", 8) == 0) { - // First try turning $LDBLStub into $LDBL128. If that fails, strip it off. - // This mirrors logic in libSystemStubs.a. - std::string Prefix = std::string(Name.begin(), Name.end()-9); - if (void *Ptr = getPointerToNamedFunction(Prefix+"$LDBL128", false)) - return Ptr; - if (void *Ptr = getPointerToNamedFunction(Prefix, false)) - return Ptr; - } -#endif - } - - /// If a LazyFunctionCreator is installed, use it to get/create the function. - if (LazyFunctionCreator) - if (void *RP = LazyFunctionCreator(Name)) - return RP; - - if (AbortOnFailure) { - report_fatal_error("Program used external function '"+Name+ - "' which could not be resolved!"); - } - return 0; -} diff --git a/lib/ExecutionEngine/MCJIT/MCJIT.cpp b/lib/ExecutionEngine/MCJIT/MCJIT.cpp index 5f93a8d..cbb23d3 100644 --- a/lib/ExecutionEngine/MCJIT/MCJIT.cpp +++ b/lib/ExecutionEngine/MCJIT/MCJIT.cpp @@ -215,3 +215,23 @@ GenericValue MCJIT::runFunction(Function *F, llvm_unreachable("Full-featured argument passing not supported yet!"); } + +void *MCJIT::getPointerToNamedFunction(const std::string &Name, + bool AbortOnFailure){ + if (!isSymbolSearchingDisabled()) { + void *ptr = MemMgr->getPointerToNamedFunction(Name, false); + if (ptr) + return ptr; + } + + /// If a LazyFunctionCreator is installed, use it to get/create the function. + if (LazyFunctionCreator) + if (void *RP = LazyFunctionCreator(Name)) + return RP; + + if (AbortOnFailure) { + report_fatal_error("Program used external function '"+Name+ + "' which could not be resolved!"); + } + return 0; +} diff --git a/lib/ExecutionEngine/MCJIT/MCJIT.h b/lib/ExecutionEngine/MCJIT/MCJIT.h index 7f4ae77..2b3df98 100644 --- a/lib/ExecutionEngine/MCJIT/MCJIT.h +++ b/lib/ExecutionEngine/MCJIT/MCJIT.h @@ -67,6 +67,7 @@ public: /// virtual void *getPointerToNamedFunction(const std::string &Name, bool AbortOnFailure = true); + /// mapSectionAddress - map a section to its target address space value. /// Map the address of a JIT section as returned from the memory manager /// to the address in the target process as the running code will see it. diff --git a/lib/ExecutionEngine/MCJIT/MCJITMemoryManager.h b/lib/ExecutionEngine/MCJIT/MCJITMemoryManager.h index ac8c155..dac8b26 100644 --- a/lib/ExecutionEngine/MCJIT/MCJITMemoryManager.h +++ b/lib/ExecutionEngine/MCJIT/MCJITMemoryManager.h @@ -33,46 +33,17 @@ public: uint8_t *allocateDataSection(uintptr_t Size, unsigned Alignment, unsigned SectionID) { - return JMM->allocateDataSection(Size, Alignment, SectionID); + return JMM->allocateSpace(Size, Alignment); } uint8_t *allocateCodeSection(uintptr_t Size, unsigned Alignment, unsigned SectionID) { - return JMM->allocateCodeSection(Size, Alignment, SectionID); + return JMM->allocateSpace(Size, Alignment); } - // Allocate ActualSize bytes, or more, for the named function. Return - // a pointer to the allocated memory and update Size to reflect how much - // memory was acutally allocated. - uint8_t *startFunctionBody(const char *Name, uintptr_t &Size) { - // FIXME: This should really reference the MCAsmInfo to get the global - // prefix. - if (Name[0] == '_') ++Name; - Function *F = M->getFunction(Name); - // Some ObjC names have a prefixed \01 in the IR. If we failed to find - // the symbol and it's of the ObjC conventions (starts with "-" or - // "+"), try prepending a \01 and see if we can find it that way. - if (!F && (Name[0] == '-' || Name[0] == '+')) - F = M->getFunction((Twine("\1") + Name).str()); - assert(F && "No matching function in JIT IR Module!"); - return JMM->startFunctionBody(F, Size); - } - - // Mark the end of the function, including how much of the allocated - // memory was actually used. - void endFunctionBody(const char *Name, uint8_t *FunctionStart, - uint8_t *FunctionEnd) { - // FIXME: This should really reference the MCAsmInfo to get the global - // prefix. - if (Name[0] == '_') ++Name; - Function *F = M->getFunction(Name); - // Some ObjC names have a prefixed \01 in the IR. If we failed to find - // the symbol and it's of the ObjC conventions (starts with "-" or - // "+"), try prepending a \01 and see if we can find it that way. - if (!F && (Name[0] == '-' || Name[0] == '+')) - F = M->getFunction((Twine("\1") + Name).str()); - assert(F && "No matching function in JIT IR Module!"); - JMM->endFunctionBody(F, FunctionStart, FunctionEnd); + virtual void *getPointerToNamedFunction(const std::string &Name, + bool AbortOnFailure = true) { + return JMM->getPointerToNamedFunction(Name, AbortOnFailure); } }; diff --git a/lib/ExecutionEngine/Makefile b/lib/ExecutionEngine/Makefile index 9a649a5..c26e0ad 100644 --- a/lib/ExecutionEngine/Makefile +++ b/lib/ExecutionEngine/Makefile @@ -8,6 +8,17 @@ ##===----------------------------------------------------------------------===## LEVEL = ../.. LIBRARYNAME = LLVMExecutionEngine + +include $(LEVEL)/Makefile.config + PARALLEL_DIRS = Interpreter JIT MCJIT RuntimeDyld -include $(LEVEL)/Makefile.common +ifeq ($(USE_INTEL_JITEVENTS), 1) +PARALLEL_DIRS += IntelJITEvents +endif + +ifeq ($(USE_OPROFILE), 1) +PARALLEL_DIRS += OProfileJIT +endif + +include $(LLVM_SRC_ROOT)/Makefile.rules diff --git a/lib/ExecutionEngine/OProfileJIT/CMakeLists.txt b/lib/ExecutionEngine/OProfileJIT/CMakeLists.txt new file mode 100644 index 0000000..d585136 --- /dev/null +++ b/lib/ExecutionEngine/OProfileJIT/CMakeLists.txt @@ -0,0 +1,7 @@ + +include_directories( ${LLVM_OPROFILE_DIR} ${CMAKE_CURRENT_SOURCE_DIR}/.. ) + +add_llvm_library(LLVMOProfileJIT + OProfileJITEventListener.cpp + OProfileWrapper.cpp + ) diff --git a/lib/ExecutionEngine/OProfileJIT/LLVMBuild.txt b/lib/ExecutionEngine/OProfileJIT/LLVMBuild.txt new file mode 100644 index 0000000..4516dfa --- /dev/null +++ b/lib/ExecutionEngine/OProfileJIT/LLVMBuild.txt @@ -0,0 +1,23 @@ +;===- ./lib/ExecutionEngine/OProfileJIT/LLVMBuild.txt ----------*- Conf -*--===; +; +; The LLVM Compiler Infrastructure +; +; This file is distributed under the University of Illinois Open Source +; License. See LICENSE.TXT for details. +; +;===------------------------------------------------------------------------===; +; +; This is an LLVMBuild description file for the components in this subdirectory. +; +; For more information on the LLVMBuild system, please see: +; +; http://llvm.org/docs/LLVMBuild.html +; +;===------------------------------------------------------------------------===; + +[common] + +[component_0] +type = Library +name = OProfileJIT +parent = ExecutionEngine diff --git a/lib/ExecutionEngine/OProfileJIT/Makefile b/lib/ExecutionEngine/OProfileJIT/Makefile new file mode 100644 index 0000000..fd3adce --- /dev/null +++ b/lib/ExecutionEngine/OProfileJIT/Makefile @@ -0,0 +1,18 @@ +##===- lib/ExecutionEngine/OProfileJIT/Makefile ------------*- Makefile -*-===## +# +# The LLVM Compiler Infrastructure +# +# This file is distributed under the University of Illinois Open Source +# License. See LICENSE.TXT for details. +# +##===----------------------------------------------------------------------===## +LEVEL = ../../.. +LIBRARYNAME = LLVMOProfileJIT + +include $(LEVEL)/Makefile.config + +SOURCES += OProfileJITEventListener.cpp \ + OProfileWrapper.cpp +CPPFLAGS += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/.. + +include $(LLVM_SRC_ROOT)/Makefile.rules diff --git a/lib/ExecutionEngine/JIT/OProfileJITEventListener.cpp b/lib/ExecutionEngine/OProfileJIT/OProfileJITEventListener.cpp index 9a9ed6d..e6142e3 100644 --- a/lib/ExecutionEngine/JIT/OProfileJITEventListener.cpp +++ b/lib/ExecutionEngine/OProfileJIT/OProfileJITEventListener.cpp @@ -7,51 +7,55 @@ // //===----------------------------------------------------------------------===// // -// This file defines a JITEventListener object that calls into OProfile to tell -// it about JITted functions. For now, we only record function names and sizes, -// but eventually we'll also record line number information. -// -// See http://oprofile.sourceforge.net/doc/devel/jit-interface.html for the -// definition of the interface we're using. +// This file defines a JITEventListener object that uses OProfileWrapper to tell +// oprofile about JITted functions, including source line information. // //===----------------------------------------------------------------------===// +#include "llvm/Config/config.h" +#include "llvm/ExecutionEngine/JITEventListener.h" + #define DEBUG_TYPE "oprofile-jit-event-listener" #include "llvm/Function.h" -#include "llvm/Metadata.h" -#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/OwningPtr.h" #include "llvm/Analysis/DebugInfo.h" #include "llvm/CodeGen/MachineFunction.h" -#include "llvm/ExecutionEngine/JITEventListener.h" +#include "llvm/ExecutionEngine/OProfileWrapper.h" #include "llvm/Support/Debug.h" -#include "llvm/Support/ValueHandle.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Support/Errno.h" -#include "llvm/Config/config.h" -#include <stddef.h> -using namespace llvm; +#include "EventListenerCommon.h" -#if USE_OPROFILE +#include <dirent.h> +#include <fcntl.h> -#include <opagent.h> +using namespace llvm; +using namespace llvm::jitprofiling; namespace { class OProfileJITEventListener : public JITEventListener { - op_agent_t Agent; + OProfileWrapper& Wrapper; + + void initialize(); + public: - OProfileJITEventListener(); + OProfileJITEventListener(OProfileWrapper& LibraryWrapper) + : Wrapper(LibraryWrapper) { + initialize(); + } + ~OProfileJITEventListener(); virtual void NotifyFunctionEmitted(const Function &F, - void *FnStart, size_t FnSize, - const EmittedFunctionDetails &Details); + void *FnStart, size_t FnSize, + const JITEvent_EmittedFunctionDetails &Details); + virtual void NotifyFreeingMachineCode(void *OldPtr); }; -OProfileJITEventListener::OProfileJITEventListener() - : Agent(op_open_agent()) { - if (Agent == NULL) { +void OProfileJITEventListener::initialize() { + if (!Wrapper.op_open_agent()) { const std::string err_str = sys::StrError(); DEBUG(dbgs() << "Failed to connect to OProfile agent: " << err_str << "\n"); } else { @@ -60,8 +64,8 @@ OProfileJITEventListener::OProfileJITEventListener() } OProfileJITEventListener::~OProfileJITEventListener() { - if (Agent != NULL) { - if (op_close_agent(Agent) == -1) { + if (Wrapper.isAgentAvailable()) { + if (Wrapper.op_close_agent() == -1) { const std::string err_str = sys::StrError(); DEBUG(dbgs() << "Failed to disconnect from OProfile agent: " << err_str << "\n"); @@ -71,22 +75,6 @@ OProfileJITEventListener::~OProfileJITEventListener() { } } -class FilenameCache { - // Holds the filename of each Scope, so that we can pass a null-terminated - // string into oprofile. Use an AssertingVH rather than a ValueMap because we - // shouldn't be modifying any MDNodes while this map is alive. - DenseMap<AssertingVH<MDNode>, std::string> Filenames; - - public: - const char *getFilename(MDNode *Scope) { - std::string &Filename = Filenames[Scope]; - if (Filename.empty()) { - Filename = DIScope(Scope).getFilename(); - } - return Filename.c_str(); - } -}; - static debug_line_info LineStartToOProfileFormat( const MachineFunction &MF, FilenameCache &Filenames, uintptr_t Address, DebugLoc Loc) { @@ -103,9 +91,9 @@ static debug_line_info LineStartToOProfileFormat( // Adds the just-emitted function to the symbol table. void OProfileJITEventListener::NotifyFunctionEmitted( const Function &F, void *FnStart, size_t FnSize, - const EmittedFunctionDetails &Details) { + const JITEvent_EmittedFunctionDetails &Details) { assert(F.hasName() && FnStart != 0 && "Bad symbol to add"); - if (op_write_native_code(Agent, F.getName().data(), + if (Wrapper.op_write_native_code(F.getName().data(), reinterpret_cast<uint64_t>(FnStart), FnStart, FnSize) == -1) { DEBUG(dbgs() << "Failed to tell OProfile about native function " @@ -151,8 +139,8 @@ void OProfileJITEventListener::NotifyFunctionEmitted( // line info's address to include the start of the function. LineInfo[0].vma = reinterpret_cast<uintptr_t>(FnStart); - if (op_write_debug_line_info(Agent, FnStart, - LineInfo.size(), &*LineInfo.begin()) == -1) { + if (Wrapper.op_write_debug_line_info(FnStart, LineInfo.size(), + &*LineInfo.begin()) == -1) { DEBUG(dbgs() << "Failed to tell OProfile about line numbers for native function " << F.getName() << " at [" @@ -164,7 +152,7 @@ void OProfileJITEventListener::NotifyFunctionEmitted( // Removes the being-deleted function from the symbol table. void OProfileJITEventListener::NotifyFreeingMachineCode(void *FnStart) { assert(FnStart && "Invalid function pointer"); - if (op_unload_native_code(Agent, reinterpret_cast<uint64_t>(FnStart)) == -1) { + if (Wrapper.op_unload_native_code(reinterpret_cast<uint64_t>(FnStart)) == -1) { DEBUG(dbgs() << "Failed to tell OProfile about unload of native function at " << FnStart << "\n"); @@ -174,19 +162,16 @@ void OProfileJITEventListener::NotifyFreeingMachineCode(void *FnStart) { } // anonymous namespace. namespace llvm { -JITEventListener *createOProfileJITEventListener() { - return new OProfileJITEventListener; -} +JITEventListener *JITEventListener::createOProfileJITEventListener() { + static OwningPtr<OProfileWrapper> JITProfilingWrapper(new OProfileWrapper); + return new OProfileJITEventListener(*JITProfilingWrapper); } -#else // USE_OPROFILE - -namespace llvm { -// By defining this to return NULL, we can let clients call it unconditionally, -// even if they haven't configured with the OProfile libraries. -JITEventListener *createOProfileJITEventListener() { - return NULL; +// for testing +JITEventListener *JITEventListener::createOProfileJITEventListener( + OProfileWrapper* TestImpl) { + return new OProfileJITEventListener(*TestImpl); } -} // namespace llvm -#endif // USE_OPROFILE +} // namespace llvm + diff --git a/lib/ExecutionEngine/OProfileJIT/OProfileWrapper.cpp b/lib/ExecutionEngine/OProfileJIT/OProfileWrapper.cpp new file mode 100644 index 0000000..d67f537 --- /dev/null +++ b/lib/ExecutionEngine/OProfileJIT/OProfileWrapper.cpp @@ -0,0 +1,263 @@ +//===-- OProfileWrapper.cpp - OProfile JIT API Wrapper implementation -----===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the interface in OProfileWrapper.h. It is responsible +// for loading the opagent dynamic library when the first call to an op_ +// function occurs. +// +//===----------------------------------------------------------------------===// + +#include "llvm/ExecutionEngine/OProfileWrapper.h" + +#define DEBUG_TYPE "oprofile-wrapper" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Support/DynamicLibrary.h" +#include "llvm/Support/Mutex.h" +#include "llvm/Support/MutexGuard.h" +#include "llvm/ADT/SmallString.h" + +#include <sstream> +#include <cstring> +#include <stddef.h> +#include <dirent.h> +#include <sys/stat.h> +#include <fcntl.h> + +namespace { + +// Global mutex to ensure a single thread initializes oprofile agent. +llvm::sys::Mutex OProfileInitializationMutex; + +} // anonymous namespace + +namespace llvm { + +OProfileWrapper::OProfileWrapper() +: Agent(0), + OpenAgentFunc(0), + CloseAgentFunc(0), + WriteNativeCodeFunc(0), + WriteDebugLineInfoFunc(0), + UnloadNativeCodeFunc(0), + MajorVersionFunc(0), + MinorVersionFunc(0), + IsOProfileRunningFunc(0), + Initialized(false) { +} + +bool OProfileWrapper::initialize() { + using namespace llvm; + using namespace llvm::sys; + + MutexGuard Guard(OProfileInitializationMutex); + + if (Initialized) + return OpenAgentFunc != 0; + + Initialized = true; + + // If the oprofile daemon is not running, don't load the opagent library + if (!isOProfileRunning()) { + DEBUG(dbgs() << "OProfile daemon is not detected.\n"); + return false; + } + + std::string error; + if(!DynamicLibrary::LoadLibraryPermanently("libopagent.so", &error)) { + DEBUG(dbgs() + << "OProfile connector library libopagent.so could not be loaded: " + << error << "\n"); + } + + // Get the addresses of the opagent functions + OpenAgentFunc = (op_open_agent_ptr_t)(intptr_t) + DynamicLibrary::SearchForAddressOfSymbol("op_open_agent"); + CloseAgentFunc = (op_close_agent_ptr_t)(intptr_t) + DynamicLibrary::SearchForAddressOfSymbol("op_close_agent"); + WriteNativeCodeFunc = (op_write_native_code_ptr_t)(intptr_t) + DynamicLibrary::SearchForAddressOfSymbol("op_write_native_code"); + WriteDebugLineInfoFunc = (op_write_debug_line_info_ptr_t)(intptr_t) + DynamicLibrary::SearchForAddressOfSymbol("op_write_debug_line_info"); + UnloadNativeCodeFunc = (op_unload_native_code_ptr_t)(intptr_t) + DynamicLibrary::SearchForAddressOfSymbol("op_unload_native_code"); + MajorVersionFunc = (op_major_version_ptr_t)(intptr_t) + DynamicLibrary::SearchForAddressOfSymbol("op_major_version"); + MinorVersionFunc = (op_major_version_ptr_t)(intptr_t) + DynamicLibrary::SearchForAddressOfSymbol("op_minor_version"); + + // With missing functions, we can do nothing + if (!OpenAgentFunc + || !CloseAgentFunc + || !WriteNativeCodeFunc + || !WriteDebugLineInfoFunc + || !UnloadNativeCodeFunc) { + OpenAgentFunc = 0; + CloseAgentFunc = 0; + WriteNativeCodeFunc = 0; + WriteDebugLineInfoFunc = 0; + UnloadNativeCodeFunc = 0; + return false; + } + + return true; +} + +bool OProfileWrapper::isOProfileRunning() { + if (IsOProfileRunningFunc != 0) + return IsOProfileRunningFunc(); + return checkForOProfileProcEntry(); +} + +bool OProfileWrapper::checkForOProfileProcEntry() { + DIR* ProcDir; + + ProcDir = opendir("/proc"); + if (!ProcDir) + return false; + + // Walk the /proc tree looking for the oprofile daemon + struct dirent* Entry; + while (0 != (Entry = readdir(ProcDir))) { + if (Entry->d_type == DT_DIR) { + // Build a path from the current entry name + SmallString<256> CmdLineFName; + raw_svector_ostream(CmdLineFName) << "/proc/" << Entry->d_name + << "/cmdline"; + + // Open the cmdline file + int CmdLineFD = open(CmdLineFName.c_str(), S_IRUSR); + if (CmdLineFD != -1) { + char ExeName[PATH_MAX+1]; + char* BaseName = 0; + + // Read the cmdline file + ssize_t NumRead = read(CmdLineFD, ExeName, PATH_MAX+1); + close(CmdLineFD); + ssize_t Idx = 0; + + // Find the terminator for the first string + while (Idx < NumRead-1 && ExeName[Idx] != 0) { + Idx++; + } + + // Go back to the last non-null character + Idx--; + + // Find the last path separator in the first string + while (Idx > 0) { + if (ExeName[Idx] == '/') { + BaseName = ExeName + Idx + 1; + break; + } + Idx--; + } + + // Test this to see if it is the oprofile daemon + if (BaseName != 0 && !strcmp("oprofiled", BaseName)) { + // If it is, we're done + closedir(ProcDir); + return true; + } + } + } + } + + // We've looked through all the files and didn't find the daemon + closedir(ProcDir); + return false; +} + +bool OProfileWrapper::op_open_agent() { + if (!Initialized) + initialize(); + + if (OpenAgentFunc != 0) { + Agent = OpenAgentFunc(); + return Agent != 0; + } + + return false; +} + +int OProfileWrapper::op_close_agent() { + if (!Initialized) + initialize(); + + int ret = -1; + if (Agent && CloseAgentFunc) { + ret = CloseAgentFunc(Agent); + if (ret == 0) { + Agent = 0; + } + } + return ret; +} + +bool OProfileWrapper::isAgentAvailable() { + return Agent != 0; +} + +int OProfileWrapper::op_write_native_code(const char* Name, + uint64_t Addr, + void const* Code, + const unsigned int Size) { + if (!Initialized) + initialize(); + + if (Agent && WriteNativeCodeFunc) + return WriteNativeCodeFunc(Agent, Name, Addr, Code, Size); + + return -1; +} + +int OProfileWrapper::op_write_debug_line_info( + void const* Code, + size_t NumEntries, + struct debug_line_info const* Info) { + if (!Initialized) + initialize(); + + if (Agent && WriteDebugLineInfoFunc) + return WriteDebugLineInfoFunc(Agent, Code, NumEntries, Info); + + return -1; +} + +int OProfileWrapper::op_major_version() { + if (!Initialized) + initialize(); + + if (Agent && MajorVersionFunc) + return MajorVersionFunc(); + + return -1; +} + +int OProfileWrapper::op_minor_version() { + if (!Initialized) + initialize(); + + if (Agent && MinorVersionFunc) + return MinorVersionFunc(); + + return -1; +} + +int OProfileWrapper::op_unload_native_code(uint64_t Addr) { + if (!Initialized) + initialize(); + + if (Agent && UnloadNativeCodeFunc) + return UnloadNativeCodeFunc(Agent, Addr); + + return -1; +} + +} // namespace llvm diff --git a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp index 2896c2d..ff4a2c8 100644 --- a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp +++ b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp @@ -26,45 +26,290 @@ RuntimeDyldImpl::~RuntimeDyldImpl() {} namespace llvm { -void RuntimeDyldImpl::extractFunction(StringRef Name, uint8_t *StartAddress, - uint8_t *EndAddress) { - // FIXME: DEPRECATED in favor of by-section allocation. - // Allocate memory for the function via the memory manager. - uintptr_t Size = EndAddress - StartAddress + 1; - uintptr_t AllocSize = Size; - uint8_t *Mem = MemMgr->startFunctionBody(Name.data(), AllocSize); - assert(Size >= (uint64_t)(EndAddress - StartAddress + 1) && - "Memory manager failed to allocate enough memory!"); - // Copy the function payload into the memory block. - memcpy(Mem, StartAddress, Size); - MemMgr->endFunctionBody(Name.data(), Mem, Mem + Size); - // Remember where we put it. - unsigned SectionID = Sections.size(); - Sections.push_back(sys::MemoryBlock(Mem, Size)); - // Default the assigned address for this symbol to wherever this - // allocated it. - SymbolTable[Name] = SymbolLoc(SectionID, 0); - DEBUG(dbgs() << " allocated to [" << Mem << ", " << Mem + Size << "]\n"); -} // Resolve the relocations for all symbols we currently know about. void RuntimeDyldImpl::resolveRelocations() { + // First, resolve relocations assotiated with external symbols. + resolveSymbols(); + // Just iterate over the sections we have and resolve all the relocations // in them. Gross overkill, but it gets the job done. for (int i = 0, e = Sections.size(); i != e; ++i) { - reassignSectionAddress(i, SectionLoadAddress[i]); + reassignSectionAddress(i, Sections[i].LoadAddress); } } void RuntimeDyldImpl::mapSectionAddress(void *LocalAddress, uint64_t TargetAddress) { - assert(SectionLocalMemToID.count(LocalAddress) && - "Attempting to remap address of unknown section!"); - unsigned SectionID = SectionLocalMemToID[LocalAddress]; - reassignSectionAddress(SectionID, TargetAddress); + for (unsigned i = 0, e = Sections.size(); i != e; ++i) { + if (Sections[i].Address == LocalAddress) { + reassignSectionAddress(i, TargetAddress); + return; + } + } + llvm_unreachable("Attempting to remap address of unknown section!"); +} + +bool RuntimeDyldImpl::loadObject(const MemoryBuffer *InputBuffer) { + // FIXME: ObjectFile don't modify MemoryBuffer. + // It should use const MemoryBuffer as parameter. + ObjectFile *obj = ObjectFile:: + createObjectFile(const_cast<MemoryBuffer*>(InputBuffer)); + + Arch = (Triple::ArchType)obj->getArch(); + + LocalSymbolMap LocalSymbols; // Functions and data symbols from the + // object file. + ObjSectionToIDMap LocalSections; // Used sections from the object file + + error_code err; + + + // Parse symbols + DEBUG(dbgs() << "Parse symbols:\n"); + for (symbol_iterator it = obj->begin_symbols(), itEnd = obj->end_symbols(); + it != itEnd; it.increment(err)) { + if (err) break; + object::SymbolRef::Type SymType; + StringRef Name; + if ((bool)(err = it->getType(SymType))) break; + if ((bool)(err = it->getName(Name))) break; + + if (SymType == object::SymbolRef::ST_Function || + SymType == object::SymbolRef::ST_Data) { + uint64_t FileOffset; + uint32_t flags; + StringRef sData; + section_iterator sIt = obj->end_sections(); + if ((bool)(err = it->getFileOffset(FileOffset))) break; + if ((bool)(err = it->getFlags(flags))) break; + if ((bool)(err = it->getSection(sIt))) break; + if (sIt == obj->end_sections()) continue; + if ((bool)(err = sIt->getContents(sData))) break; + const uint8_t* SymPtr = (const uint8_t*)InputBuffer->getBufferStart() + + (uintptr_t)FileOffset; + uintptr_t SectOffset = (uintptr_t)(SymPtr - (const uint8_t*)sData.begin()); + unsigned SectionID = + findOrEmitSection(*sIt, + SymType == object::SymbolRef::ST_Function, + LocalSections); + bool isGlobal = flags & SymbolRef::SF_Global; + LocalSymbols[Name.data()] = SymbolLoc(SectionID, SectOffset); + DEBUG(dbgs() << "\tFileOffset: " << format("%p", (uintptr_t)FileOffset) + << " flags: " << flags + << " SID: " << SectionID + << " Offset: " << format("%p", SectOffset)); + if (isGlobal) + SymbolTable[Name] = SymbolLoc(SectionID, SectOffset); + } + DEBUG(dbgs() << "\tType: " << SymType << " Name: " << Name << "\n"); + } + if (err) { + report_fatal_error(err.message()); + } + + // Parse and proccess relocations + DEBUG(dbgs() << "Parse relocations:\n"); + for (section_iterator sIt = obj->begin_sections(), + sItEnd = obj->end_sections(); sIt != sItEnd; sIt.increment(err)) { + if (err) break; + bool isFirstRelocation = true; + unsigned SectionID = 0; + StubMap Stubs; + + for (relocation_iterator it = sIt->begin_relocations(), + itEnd = sIt->end_relocations(); it != itEnd; it.increment(err)) { + if (err) break; + + // If it's first relocation in this section, find its SectionID + if (isFirstRelocation) { + SectionID = findOrEmitSection(*sIt, true, LocalSections); + DEBUG(dbgs() << "\tSectionID: " << SectionID << "\n"); + isFirstRelocation = false; + } + + ObjRelocationInfo RI; + RI.SectionID = SectionID; + if ((bool)(err = it->getAdditionalInfo(RI.AdditionalInfo))) break; + if ((bool)(err = it->getOffset(RI.Offset))) break; + if ((bool)(err = it->getSymbol(RI.Symbol))) break; + if ((bool)(err = it->getType(RI.Type))) break; + + DEBUG(dbgs() << "\t\tAddend: " << RI.AdditionalInfo + << " Offset: " << format("%p", (uintptr_t)RI.Offset) + << " Type: " << (uint32_t)(RI.Type & 0xffffffffL) + << "\n"); + processRelocationRef(RI, *obj, LocalSections, LocalSymbols, Stubs); + } + if (err) { + report_fatal_error(err.message()); + } + } + return false; +} + +unsigned RuntimeDyldImpl::emitSection(const SectionRef &Section, + bool IsCode) { + + unsigned StubBufSize = 0, + StubSize = getMaxStubSize(); + error_code err; + if (StubSize > 0) { + for (relocation_iterator it = Section.begin_relocations(), + itEnd = Section.end_relocations(); it != itEnd; it.increment(err)) + StubBufSize += StubSize; + } + StringRef data; + uint64_t Alignment64; + if ((bool)(err = Section.getContents(data))) report_fatal_error(err.message()); + if ((bool)(err = Section.getAlignment(Alignment64))) + report_fatal_error(err.message()); + + unsigned Alignment = (unsigned)Alignment64 & 0xffffffffL; + unsigned DataSize = data.size(); + unsigned Allocate = DataSize + StubBufSize; + unsigned SectionID = Sections.size(); + const char *pData = data.data(); + uint8_t *Addr = IsCode + ? MemMgr->allocateCodeSection(Allocate, Alignment, SectionID) + : MemMgr->allocateDataSection(Allocate, Alignment, SectionID); + + memcpy(Addr, pData, DataSize); + DEBUG(dbgs() << "emitSection SectionID: " << SectionID + << " obj addr: " << format("%p", pData) + << " new addr: " << format("%p", Addr) + << " DataSize: " << DataSize + << " StubBufSize: " << StubBufSize + << " Allocate: " << Allocate + << "\n"); + Sections.push_back(SectionEntry(Addr, Allocate, DataSize,(uintptr_t)pData)); + return SectionID; +} + +unsigned RuntimeDyldImpl:: +findOrEmitSection(const SectionRef &Section, bool IsCode, + ObjSectionToIDMap &LocalSections) { + + unsigned SectionID = 0; + ObjSectionToIDMap::iterator sIDIt = LocalSections.find(Section); + if (sIDIt != LocalSections.end()) + SectionID = sIDIt->second; + else { + SectionID = emitSection(Section, IsCode); + LocalSections[Section] = SectionID; + } + return SectionID; +} + +void RuntimeDyldImpl::AddRelocation(const RelocationValueRef &Value, + unsigned SectionID, uintptr_t Offset, + uint32_t RelType) { + DEBUG(dbgs() << "AddRelocation SymNamePtr: " << format("%p", Value.SymbolName) + << " SID: " << Value.SectionID + << " Addend: " << format("%p", Value.Addend) + << " Offset: " << format("%p", Offset) + << " RelType: " << format("%x", RelType) + << "\n"); + + if (Value.SymbolName == 0) { + Relocations[Value.SectionID].push_back(RelocationEntry( + SectionID, + Offset, + RelType, + Value.Addend)); + } else + SymbolRelocations[Value.SymbolName].push_back(RelocationEntry( + SectionID, + Offset, + RelType, + Value.Addend)); +} + +uint8_t *RuntimeDyldImpl::createStubFunction(uint8_t *Addr) { + // TODO: There is only ARM far stub now. We should add the Thumb stub, + // and stubs for branches Thumb - ARM and ARM - Thumb. + if (Arch == Triple::arm) { + uint32_t *StubAddr = (uint32_t*)Addr; + *StubAddr = 0xe51ff004; // ldr pc,<label> + return (uint8_t*)++StubAddr; + } + else + return Addr; } +// Assign an address to a symbol name and resolve all the relocations +// associated with it. +void RuntimeDyldImpl::reassignSectionAddress(unsigned SectionID, + uint64_t Addr) { + // The address to use for relocation resolution is not + // the address of the local section buffer. We must be doing + // a remote execution environment of some sort. Re-apply any + // relocations referencing this section with the given address. + // + // Addr is a uint64_t because we can't assume the pointer width + // of the target is the same as that of the host. Just use a generic + // "big enough" type. + Sections[SectionID].LoadAddress = Addr; + DEBUG(dbgs() << "Resolving relocations Section #" << SectionID + << "\t" << format("%p", (uint8_t *)Addr) + << "\n"); + resolveRelocationList(Relocations[SectionID], Addr); +} + +void RuntimeDyldImpl::resolveRelocationEntry(const RelocationEntry &RE, + uint64_t Value) { + uint8_t *Target = Sections[RE.SectionID].Address + RE.Offset; + DEBUG(dbgs() << "\tSectionID: " << RE.SectionID + << " + " << RE.Offset << " (" << format("%p", Target) << ")" + << " Data: " << RE.Data + << " Addend: " << RE.Addend + << "\n"); + + resolveRelocation(Target, Sections[RE.SectionID].LoadAddress + RE.Offset, + Value, RE.Data, RE.Addend); +} + +void RuntimeDyldImpl::resolveRelocationList(const RelocationList &Relocs, + uint64_t Value) { + for (unsigned i = 0, e = Relocs.size(); i != e; ++i) { + resolveRelocationEntry(Relocs[i], Value); + } +} + +// resolveSymbols - Resolve any relocations to the specified symbols if +// we know where it lives. +void RuntimeDyldImpl::resolveSymbols() { + StringMap<RelocationList>::iterator it = SymbolRelocations.begin(), + itEnd = SymbolRelocations.end(); + for (; it != itEnd; it++) { + StringRef Name = it->first(); + RelocationList &Relocs = it->second; + StringMap<SymbolLoc>::const_iterator Loc = SymbolTable.find(Name); + if (Loc == SymbolTable.end()) { + // This is an external symbol, try to get it address from + // MemoryManager. + uint8_t *Addr = (uint8_t*) MemMgr->getPointerToNamedFunction(Name.data(), + true); + DEBUG(dbgs() << "Resolving relocations Name: " << Name + << "\t" << format("%p", Addr) + << "\n"); + resolveRelocationList(Relocs, (uintptr_t)Addr); + } else { + // Change the relocation to be section relative rather than symbol + // relative and move it to the resolved relocation list. + DEBUG(dbgs() << "Resolving symbol '" << Name << "'\n"); + for (int i = 0, e = Relocs.size(); i != e; ++i) { + RelocationEntry Entry = Relocs[i]; + Entry.Addend += Loc->second.second; + Relocations[Loc->second.first].push_back(Entry); + } + Relocs.clear(); + } + } +} + + //===----------------------------------------------------------------------===// // RuntimeDyld class implementation RuntimeDyld::RuntimeDyld(RTDyldMemoryManager *mm) { diff --git a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp index e15b200..9351b6c 100644 --- a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp +++ b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp @@ -25,222 +25,58 @@ using namespace llvm::object; namespace llvm { -namespace { -// FIXME: this function should probably not live here... -// -// Returns the name and address of an unrelocated symbol in an ELF section -void getSymbolInfo(symbol_iterator Sym, uint64_t &Addr, StringRef &Name) { - //FIXME: error checking here required to catch corrupt ELF objects... - error_code Err = Sym->getName(Name); - - uint64_t AddrInSection; - Err = Sym->getAddress(AddrInSection); - - SectionRef empty_section; - section_iterator Section(empty_section); - Err = Sym->getSection(Section); - - StringRef SectionContents; - Section->getContents(SectionContents); - - Addr = reinterpret_cast<uint64_t>(SectionContents.data()) + AddrInSection; -} - -} - -bool RuntimeDyldELF::loadObject(MemoryBuffer *InputBuffer) { - if (!isCompatibleFormat(InputBuffer)) - return true; - - OwningPtr<ObjectFile> Obj(ObjectFile::createELFObjectFile(InputBuffer)); - - Arch = Obj->getArch(); - - // Map address in the Object file image to function names - IntervalMap<uint64_t, StringRef>::Allocator A; - IntervalMap<uint64_t, StringRef> FuncMap(A); - - // This is a bit of a hack. The ObjectFile we've just loaded reports - // section addresses as 0 and doesn't provide access to the section - // offset (from which we could calculate the address. Instead, - // we're storing the address when it comes up in the ST_Debug case - // below. - // - StringMap<uint64_t> DebugSymbolMap; - - symbol_iterator SymEnd = Obj->end_symbols(); - error_code Err; - for (symbol_iterator Sym = Obj->begin_symbols(); - Sym != SymEnd; Sym.increment(Err)) { - SymbolRef::Type Type; - Sym->getType(Type); - if (Type == SymbolRef::ST_Function) { - StringRef Name; - uint64_t Addr; - getSymbolInfo(Sym, Addr, Name); - - uint64_t Size; - Err = Sym->getSize(Size); - - uint8_t *Start; - uint8_t *End; - Start = reinterpret_cast<uint8_t*>(Addr); - End = reinterpret_cast<uint8_t*>(Addr + Size - 1); - - extractFunction(Name, Start, End); - FuncMap.insert(Addr, Addr + Size - 1, Name); - } else if (Type == SymbolRef::ST_Debug) { - // This case helps us find section addresses - StringRef Name; - uint64_t Addr; - getSymbolInfo(Sym, Addr, Name); - DebugSymbolMap[Name] = Addr; - } - } - - // Iterate through the relocations for this object - section_iterator SecEnd = Obj->end_sections(); - for (section_iterator Sec = Obj->begin_sections(); - Sec != SecEnd; Sec.increment(Err)) { - StringRef SecName; - uint64_t SecAddr; - Sec->getName(SecName); - // Ignore sections that aren't in our map - if (DebugSymbolMap.find(SecName) == DebugSymbolMap.end()) { - continue; - } - SecAddr = DebugSymbolMap[SecName]; - relocation_iterator RelEnd = Sec->end_relocations(); - for (relocation_iterator Rel = Sec->begin_relocations(); - Rel != RelEnd; Rel.increment(Err)) { - uint64_t RelOffset; - uint64_t RelType; - int64_t RelAddend; - SymbolRef RelSym; - StringRef SymName; - uint64_t SymAddr; - uint64_t SymOffset; - - Rel->getAddress(RelOffset); - Rel->getType(RelType); - Rel->getAdditionalInfo(RelAddend); - Rel->getSymbol(RelSym); - RelSym.getName(SymName); - RelSym.getAddress(SymAddr); - RelSym.getFileOffset(SymOffset); - - // If this relocation is inside a function, we want to store the - // function name and a function-relative offset - IntervalMap<uint64_t, StringRef>::iterator ContainingFunc - = FuncMap.find(SecAddr + RelOffset); - if (ContainingFunc.valid()) { - // Re-base the relocation to make it relative to the target function - RelOffset = (SecAddr + RelOffset) - ContainingFunc.start(); - Relocations[SymName].push_back(RelocationEntry(ContainingFunc.value(), - RelOffset, - RelType, - RelAddend, - true)); - } else { - Relocations[SymName].push_back(RelocationEntry(SecName, - RelOffset, - RelType, - RelAddend, - false)); - } - } - } - return false; -} - -void RuntimeDyldELF::resolveRelocations() { - // FIXME: deprecated. should be changed to use the by-section - // allocation and relocation scheme. - - // Just iterate over the symbols in our symbol table and assign their - // addresses. - StringMap<SymbolLoc>::iterator i = SymbolTable.begin(); - StringMap<SymbolLoc>::iterator e = SymbolTable.end(); - for (;i != e; ++i) { - assert (i->getValue().second == 0 && "non-zero offset in by-function sym!"); - reassignSymbolAddress(i->getKey(), - (uint8_t*)Sections[i->getValue().first].base()); - } -} - -void RuntimeDyldELF::resolveX86_64Relocation(StringRef Name, - uint8_t *Addr, - const RelocationEntry &RE) { - uint8_t *TargetAddr; - if (RE.IsFunctionRelative) { - StringMap<SymbolLoc>::const_iterator Loc = SymbolTable.find(RE.Target); - assert(Loc != SymbolTable.end() && "Function for relocation not found"); - TargetAddr = - reinterpret_cast<uint8_t*>(Sections[Loc->second.first].base()) + - Loc->second.second + RE.Offset; - } else { - // FIXME: Get the address of the target section and add that to RE.Offset - llvm_unreachable("Non-function relocation not implemented yet!"); - } - - switch (RE.Type) { - default: llvm_unreachable("Relocation type not implemented yet!"); +void RuntimeDyldELF::resolveX86_64Relocation(uint8_t *LocalAddress, + uint64_t FinalAddress, + uint64_t Value, + uint32_t Type, + int64_t Addend) { + switch (Type) { + default: + llvm_unreachable("Relocation type not implemented yet!"); + break; case ELF::R_X86_64_64: { - uint8_t **Target = reinterpret_cast<uint8_t**>(TargetAddr); - *Target = Addr + RE.Addend; + uint64_t *Target = (uint64_t*)(LocalAddress); + *Target = Value + Addend; break; } case ELF::R_X86_64_32: case ELF::R_X86_64_32S: { - uint64_t Value = reinterpret_cast<uint64_t>(Addr) + RE.Addend; + Value += Addend; // FIXME: Handle the possibility of this assertion failing - assert((RE.Type == ELF::R_X86_64_32 && !(Value & 0xFFFFFFFF00000000ULL)) || - (RE.Type == ELF::R_X86_64_32S && + assert((Type == ELF::R_X86_64_32 && !(Value & 0xFFFFFFFF00000000ULL)) || + (Type == ELF::R_X86_64_32S && (Value & 0xFFFFFFFF00000000ULL) == 0xFFFFFFFF00000000ULL)); uint32_t TruncatedAddr = (Value & 0xFFFFFFFF); - uint32_t *Target = reinterpret_cast<uint32_t*>(TargetAddr); + uint32_t *Target = reinterpret_cast<uint32_t*>(LocalAddress); *Target = TruncatedAddr; break; } case ELF::R_X86_64_PC32: { - uint32_t *Placeholder = reinterpret_cast<uint32_t*>(TargetAddr); - uint64_t RealOffset = *Placeholder + - reinterpret_cast<uint64_t>(Addr) + - RE.Addend - reinterpret_cast<uint64_t>(TargetAddr); - assert((RealOffset & 0xFFFFFFFF) == RealOffset); - uint32_t TruncOffset = (RealOffset & 0xFFFFFFFF); + uint32_t *Placeholder = reinterpret_cast<uint32_t*>(LocalAddress); + int64_t RealOffset = *Placeholder + Value + Addend - FinalAddress; + assert(RealOffset <= 214783647 && RealOffset >= -214783648); + int32_t TruncOffset = (RealOffset & 0xFFFFFFFF); *Placeholder = TruncOffset; break; } } } -void RuntimeDyldELF::resolveX86Relocation(StringRef Name, - uint8_t *Addr, - const RelocationEntry &RE) { - uint8_t *TargetAddr; - if (RE.IsFunctionRelative) { - StringMap<SymbolLoc>::const_iterator Loc = SymbolTable.find(RE.Target); - assert(Loc != SymbolTable.end() && "Function for relocation not found"); - TargetAddr = - reinterpret_cast<uint8_t*>(Sections[Loc->second.first].base()) + - Loc->second.second + RE.Offset; - } else { - // FIXME: Get the address of the target section and add that to RE.Offset - llvm_unreachable("Non-function relocation not implemented yet!"); - } - - switch (RE.Type) { +void RuntimeDyldELF::resolveX86Relocation(uint8_t *LocalAddress, + uint32_t FinalAddress, + uint32_t Value, + uint32_t Type, + int32_t Addend) { + switch (Type) { case ELF::R_386_32: { - uint8_t **Target = reinterpret_cast<uint8_t**>(TargetAddr); - *Target = Addr + RE.Addend; + uint32_t *Target = (uint32_t*)(LocalAddress); + *Target = Value + Addend; break; } case ELF::R_386_PC32: { - uint32_t *Placeholder = reinterpret_cast<uint32_t*>(TargetAddr); - uint32_t RealOffset = *Placeholder + reinterpret_cast<uintptr_t>(Addr) + - RE.Addend - reinterpret_cast<uintptr_t>(TargetAddr); + uint32_t *Placeholder = reinterpret_cast<uint32_t*>(LocalAddress); + uint32_t RealOffset = *Placeholder + Value + Addend - FinalAddress; *Placeholder = RealOffset; break; } @@ -248,57 +84,173 @@ void RuntimeDyldELF::resolveX86Relocation(StringRef Name, // There are other relocation types, but it appears these are the // only ones currently used by the LLVM ELF object writer llvm_unreachable("Relocation type not implemented yet!"); + break; } } -void RuntimeDyldELF::resolveArmRelocation(StringRef Name, - uint8_t *Addr, - const RelocationEntry &RE) { +void RuntimeDyldELF::resolveARMRelocation(uint8_t *LocalAddress, + uint32_t FinalAddress, + uint32_t Value, + uint32_t Type, + int32_t Addend) { + // TODO: Add Thumb relocations. + uint32_t* TargetPtr = (uint32_t*)LocalAddress; + Value += Addend; + + DEBUG(dbgs() << "resolveARMRelocation, LocalAddress: " << LocalAddress + << " FinalAddress: " << format("%p",FinalAddress) + << " Value: " << format("%x",Value) + << " Type: " << format("%x",Type) + << " Addend: " << format("%x",Addend) + << "\n"); + + switch(Type) { + default: + llvm_unreachable("Not implemented relocation type!"); + + // Just write 32bit value to relocation address + case ELF::R_ARM_ABS32 : + *TargetPtr = Value; + break; + + // Write first 16 bit of 32 bit value to the mov instruction. + // Last 4 bit should be shifted. + case ELF::R_ARM_MOVW_ABS_NC : + Value = Value & 0xFFFF; + *TargetPtr |= Value & 0xFFF; + *TargetPtr |= ((Value >> 12) & 0xF) << 16; + break; + + // Write last 16 bit of 32 bit value to the mov instruction. + // Last 4 bit should be shifted. + case ELF::R_ARM_MOVT_ABS : + Value = (Value >> 16) & 0xFFFF; + *TargetPtr |= Value & 0xFFF; + *TargetPtr |= ((Value >> 12) & 0xF) << 16; + break; + + // Write 24 bit relative value to the branch instruction. + case ELF::R_ARM_PC24 : // Fall through. + case ELF::R_ARM_CALL : // Fall through. + case ELF::R_ARM_JUMP24 : + int32_t RelValue = static_cast<int32_t>(Value - FinalAddress - 8); + RelValue = (RelValue & 0x03FFFFFC) >> 2; + *TargetPtr &= 0xFF000000; + *TargetPtr |= RelValue; + break; + } } -void RuntimeDyldELF::resolveRelocation(StringRef Name, - uint8_t *Addr, - const RelocationEntry &RE) { +void RuntimeDyldELF::resolveRelocation(uint8_t *LocalAddress, + uint64_t FinalAddress, + uint64_t Value, + uint32_t Type, + int64_t Addend) { switch (Arch) { case Triple::x86_64: - resolveX86_64Relocation(Name, Addr, RE); + resolveX86_64Relocation(LocalAddress, FinalAddress, Value, Type, Addend); break; case Triple::x86: - resolveX86Relocation(Name, Addr, RE); + resolveX86Relocation(LocalAddress, (uint32_t)(FinalAddress & 0xffffffffL), + (uint32_t)(Value & 0xffffffffL), Type, + (uint32_t)(Addend & 0xffffffffL)); break; - case Triple::arm: - resolveArmRelocation(Name, Addr, RE); + case Triple::arm: // Fall through. + case Triple::thumb: + resolveARMRelocation(LocalAddress, (uint32_t)(FinalAddress & 0xffffffffL), + (uint32_t)(Value & 0xffffffffL), Type, + (uint32_t)(Addend & 0xffffffffL)); break; default: llvm_unreachable("Unsupported CPU type!"); } } -void RuntimeDyldELF::reassignSymbolAddress(StringRef Name, uint8_t *Addr) { - // FIXME: deprecated. switch to reassignSectionAddress() instead. - // - // Actually moving the symbol address requires by-section mapping. - assert(Sections[SymbolTable.lookup(Name).first].base() == (void*)Addr && - "Unable to relocate section in by-function JIT allocation model!"); - - RelocationList &Relocs = Relocations[Name]; - for (unsigned i = 0, e = Relocs.size(); i != e; ++i) { - RelocationEntry &RE = Relocs[i]; - resolveRelocation(Name, Addr, RE); +void RuntimeDyldELF:: +processRelocationRef(const ObjRelocationInfo &Rel, const ObjectFile &Obj, + ObjSectionToIDMap &ObjSectionToID, + LocalSymbolMap &Symbols, StubMap &Stubs) { + + uint32_t RelType = (uint32_t)(Rel.Type & 0xffffffffL); + intptr_t Addend = (intptr_t)Rel.AdditionalInfo; + RelocationValueRef Value; + StringRef TargetName; + const SymbolRef &Symbol = Rel.Symbol; + Symbol.getName(TargetName); + DEBUG(dbgs() << "\t\tRelType: " << RelType + << " Addend: " << Addend + << " TargetName: " << TargetName + << "\n"); + // First look the symbol in object file symbols. + LocalSymbolMap::iterator it = Symbols.find(TargetName.data()); + if (it != Symbols.end()) { + Value.SectionID = it->second.first; + Value.Addend = it->second.second; + } else { + // Second look the symbol in global symbol table. + StringMap<SymbolLoc>::iterator itS = SymbolTable.find(TargetName.data()); + if (itS != SymbolTable.end()) { + Value.SectionID = itS->second.first; + Value.Addend = itS->second.second; + } else { + SymbolRef::Type SymType; + Symbol.getType(SymType); + switch (SymType) { + case SymbolRef::ST_Debug: { + // TODO: Now ELF SymbolRef::ST_Debug = STT_SECTION, it's not obviously + // and can be changed by another developers. Maybe best way is add + // a new symbol type ST_Section to SymbolRef and use it. + section_iterator sIt = Obj.end_sections(); + Symbol.getSection(sIt); + if (sIt == Obj.end_sections()) + llvm_unreachable("Symbol section not found, bad object file format!"); + DEBUG(dbgs() << "\t\tThis is section symbol\n"); + Value.SectionID = findOrEmitSection((*sIt), true, ObjSectionToID); + Value.Addend = Addend; + break; + } + case SymbolRef::ST_Unknown: { + Value.SymbolName = TargetName.data(); + Value.Addend = Addend; + break; + } + default: + llvm_unreachable("Unresolved symbol type!"); + break; + } + } } -} - -// Assign an address to a symbol name and resolve all the relocations -// associated with it. -void RuntimeDyldELF::reassignSectionAddress(unsigned SectionID, uint64_t Addr) { - // The address to use for relocation resolution is not - // the address of the local section buffer. We must be doing - // a remote execution environment of some sort. Re-apply any - // relocations referencing this section with the given address. - // - // Addr is a uint64_t because we can't assume the pointer width - // of the target is the same as that of the host. Just use a generic - // "big enough" type. - assert(0); + DEBUG(dbgs() << "\t\tRel.SectionID: " << Rel.SectionID + << " Rel.Offset: " << Rel.Offset + << "\n"); + if (Arch == Triple::arm && + (RelType == ELF::R_ARM_PC24 || + RelType == ELF::R_ARM_CALL || + RelType == ELF::R_ARM_JUMP24)) { + // This is an ARM branch relocation, need to use a stub function. + DEBUG(dbgs() << "\t\tThis is an ARM branch relocation."); + SectionEntry &Section = Sections[Rel.SectionID]; + uint8_t *Target = Section.Address + Rel.Offset; + + // Look up for existing stub. + StubMap::const_iterator stubIt = Stubs.find(Value); + if (stubIt != Stubs.end()) { + resolveRelocation(Target, Section.LoadAddress, (uint64_t)Section.Address + + stubIt->second, RelType, 0); + DEBUG(dbgs() << " Stub function found\n"); + } else { + // Create a new stub function. + DEBUG(dbgs() << " Create a new stub function\n"); + Stubs[Value] = Section.StubOffset; + uint8_t *StubTargetAddr = createStubFunction(Section.Address + + Section.StubOffset); + AddRelocation(Value, Rel.SectionID, + StubTargetAddr - Section.Address, ELF::R_ARM_ABS32); + resolveRelocation(Target, Section.LoadAddress, (uint64_t)Section.Address + + Section.StubOffset, RelType, 0); + Section.StubOffset += getMaxStubSize(); + } + } else + AddRelocation(Value, Rel.SectionID, Rel.Offset, RelType); } bool RuntimeDyldELF::isCompatibleFormat(const MemoryBuffer *InputBuffer) const { diff --git a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.h b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.h index e0f7d54..36566da 100644 --- a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.h +++ b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.h @@ -21,158 +21,42 @@ using namespace llvm; namespace llvm { class RuntimeDyldELF : public RuntimeDyldImpl { - // For each symbol, keep a list of relocations based on it. Anytime - // its address is reassigned (the JIT re-compiled the function, e.g.), - // the relocations get re-resolved. - struct RelocationEntry { - // Function or section this relocation is contained in. - std::string Target; - // Offset into the target function or section for the relocation. - uint32_t Offset; - // Relocation type - uint32_t Type; - // Addend encoded in the instruction itself, if any. - int32_t Addend; - // Has the relocation been recalcuated as an offset within a function? - bool IsFunctionRelative; - // Has this relocation been resolved previously? - bool isResolved; - - RelocationEntry(StringRef t, - uint32_t offset, - uint32_t type, - int32_t addend, - bool isFunctionRelative) - : Target(t) - , Offset(offset) - , Type(type) - , Addend(addend) - , IsFunctionRelative(isFunctionRelative) - , isResolved(false) { } - }; - typedef SmallVector<RelocationEntry, 4> RelocationList; - StringMap<RelocationList> Relocations; - unsigned Arch; - - void resolveRelocations(); - - void resolveX86_64Relocation(StringRef Name, - uint8_t *Addr, - const RelocationEntry &RE); - - void resolveX86Relocation(StringRef Name, - uint8_t *Addr, - const RelocationEntry &RE); - - void resolveArmRelocation(StringRef Name, - uint8_t *Addr, - const RelocationEntry &RE); - - void resolveRelocation(StringRef Name, - uint8_t *Addr, - const RelocationEntry &RE); - -public: - RuntimeDyldELF(RTDyldMemoryManager *mm) : RuntimeDyldImpl(mm) {} - - bool loadObject(MemoryBuffer *InputBuffer); - - void reassignSymbolAddress(StringRef Name, uint8_t *Addr); - void reassignSectionAddress(unsigned SectionID, uint64_t Addr); - - bool isCompatibleFormat(const MemoryBuffer *InputBuffer) const; -}; - -} // end namespace llvm - -#endif - -//===-- RuntimeDyldELF.h - Run-time dynamic linker for MC-JIT ---*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// ELF support for MC-JIT runtime dynamic linker. -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_RUNTIME_DYLD_ELF_H -#define LLVM_RUNTIME_DYLD_ELF_H - -#include "RuntimeDyldImpl.h" - -using namespace llvm; - - -namespace llvm { -class RuntimeDyldELF : public RuntimeDyldImpl { - // For each symbol, keep a list of relocations based on it. Anytime - // its address is reassigned (the JIT re-compiled the function, e.g.), - // the relocations get re-resolved. - struct RelocationEntry { - // Function or section this relocation is contained in. - std::string Target; - // Offset into the target function or section for the relocation. - uint32_t Offset; - // Relocation type - uint32_t Type; - // Addend encoded in the instruction itself, if any. - int32_t Addend; - // Has the relocation been recalcuated as an offset within a function? - bool IsFunctionRelative; - // Has this relocation been resolved previously? - bool isResolved; - - RelocationEntry(StringRef t, - uint32_t offset, - uint32_t type, - int32_t addend, - bool isFunctionRelative) - : Target(t) - , Offset(offset) - , Type(type) - , Addend(addend) - , IsFunctionRelative(isFunctionRelative) - , isResolved(false) { } - }; - typedef SmallVector<RelocationEntry, 4> RelocationList; - StringMap<RelocationList> Relocations; - unsigned Arch; - - void resolveRelocations(); - - void resolveX86_64Relocation(StringRef Name, - uint8_t *Addr, - const RelocationEntry &RE); - - void resolveX86Relocation(StringRef Name, - uint8_t *Addr, - const RelocationEntry &RE); - - void resolveArmRelocation(StringRef Name, - uint8_t *Addr, - const RelocationEntry &RE); - - void resolveRelocation(StringRef Name, - uint8_t *Addr, - const RelocationEntry &RE); +protected: + void resolveX86_64Relocation(uint8_t *LocalAddress, + uint64_t FinalAddress, + uint64_t Value, + uint32_t Type, + int64_t Addend); + + void resolveX86Relocation(uint8_t *LocalAddress, + uint32_t FinalAddress, + uint32_t Value, + uint32_t Type, + int32_t Addend); + + void resolveARMRelocation(uint8_t *LocalAddress, + uint32_t FinalAddress, + uint32_t Value, + uint32_t Type, + int32_t Addend); + + virtual void resolveRelocation(uint8_t *LocalAddress, + uint64_t FinalAddress, + uint64_t Value, + uint32_t Type, + int64_t Addend); + + virtual void processRelocationRef(const ObjRelocationInfo &Rel, + const ObjectFile &Obj, + ObjSectionToIDMap &ObjSectionToID, + LocalSymbolMap &Symbols, StubMap &Stubs); public: RuntimeDyldELF(RTDyldMemoryManager *mm) : RuntimeDyldImpl(mm) {} - bool loadObject(MemoryBuffer *InputBuffer); - - void reassignSymbolAddress(StringRef Name, uint8_t *Addr); - void reassignSectionAddress(unsigned SectionID, uint64_t Addr); - bool isCompatibleFormat(const MemoryBuffer *InputBuffer) const; }; } // end namespace llvm -#endif - +#endif diff --git a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldImpl.h b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldImpl.h index 28e99be..d6430a9 100644 --- a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldImpl.h +++ b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldImpl.h @@ -15,45 +15,125 @@ #define LLVM_RUNTIME_DYLD_IMPL_H #include "llvm/ExecutionEngine/RuntimeDyld.h" +#include "llvm/Object/ObjectFile.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/StringMap.h" #include "llvm/ADT/Twine.h" #include "llvm/ADT/SmallVector.h" -#include "llvm/ExecutionEngine/ExecutionEngine.h" #include "llvm/Support/Memory.h" #include "llvm/Support/MemoryBuffer.h" #include "llvm/Support/system_error.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/ADT/Triple.h" +#include <map> +#include "llvm/Support/Format.h" using namespace llvm; +using namespace llvm::object; namespace llvm { + +class SectionEntry { +public: + uint8_t* Address; + size_t Size; + uint64_t LoadAddress; // For each section, the address it will be + // considered to live at for relocations. The same + // as the pointer to the above memory block for + // hosted JITs. + uintptr_t StubOffset; // It's used for architecturies with stub + // functions for far relocations like ARM. + uintptr_t ObjAddress; // Section address in object file. It's use for + // calculate MachO relocation addend + SectionEntry(uint8_t* address, size_t size, uintptr_t stubOffset, + uintptr_t objAddress) + : Address(address), Size(size), LoadAddress((uintptr_t)address), + StubOffset(stubOffset), ObjAddress(objAddress) {} +}; + +class RelocationEntry { +public: + unsigned SectionID; // Section the relocation is contained in. + uintptr_t Offset; // Offset into the section for the relocation. + uint32_t Data; // Relocatino data. Including type of relocation + // and another flags and parameners from + intptr_t Addend; // Addend encoded in the instruction itself, if any, + // plus the offset into the source section for + // the symbol once the relocation is resolvable. + RelocationEntry(unsigned id, uint64_t offset, uint32_t data, int64_t addend) + : SectionID(id), Offset(offset), Data(data), Addend(addend) {} +}; + +// Raw relocation data from object file +class ObjRelocationInfo { +public: + unsigned SectionID; + uint64_t Offset; + SymbolRef Symbol; + uint64_t Type; + int64_t AdditionalInfo; +}; + +class RelocationValueRef { +public: + unsigned SectionID; + intptr_t Addend; + const char *SymbolName; + RelocationValueRef(): SectionID(0), Addend(0), SymbolName(0) {} + + inline bool operator==(const RelocationValueRef &Other) const { + return std::memcmp(this, &Other, sizeof(RelocationValueRef)) == 0; + } + inline bool operator <(const RelocationValueRef &Other) const { + return std::memcmp(this, &Other, sizeof(RelocationValueRef)) < 0; + } +}; + class RuntimeDyldImpl { protected: - unsigned CPUType; - unsigned CPUSubtype; - // The MemoryManager to load objects into. RTDyldMemoryManager *MemMgr; - // For each section, we have a MemoryBlock of it's data. - // Indexed by SectionID. - SmallVector<sys::MemoryBlock, 32> Sections; - // For each section, the address it will be considered to live at for - // relocations. The same as the pointer to the above memory block for hosted - // JITs. Indexed by SectionID. - SmallVector<uint64_t, 32> SectionLoadAddress; + // A list of emmitted sections. + typedef SmallVector<SectionEntry, 64> SectionList; + SectionList Sections; - // Keep a map of starting local address to the SectionID which references it. - // Lookup function for when we assign virtual addresses. - DenseMap<void *, unsigned> SectionLocalMemToID; + // Keep a map of sections from object file to the SectionID which + // references it. + typedef std::map<SectionRef, unsigned> ObjSectionToIDMap; // Master symbol table. As modules are loaded and external symbols are // resolved, their addresses are stored here as a SectionID/Offset pair. - typedef std::pair<unsigned, uint64_t> SymbolLoc; + typedef std::pair<unsigned, uintptr_t> SymbolLoc; StringMap<SymbolLoc> SymbolTable; + typedef DenseMap<const char*, SymbolLoc> LocalSymbolMap; + + // For each symbol, keep a list of relocations based on it. Anytime + // its address is reassigned (the JIT re-compiled the function, e.g.), + // the relocations get re-resolved. + // The symbol (or section) the relocation is sourced from is the Key + // in the relocation list where it's stored. + typedef SmallVector<RelocationEntry, 64> RelocationList; + // Relocations to sections already loaded. Indexed by SectionID which is the + // source of the address. The target where the address will be writen is + // SectionID/Offset in the relocation itself. + DenseMap<unsigned, RelocationList> Relocations; + // Relocations to external symbols that are not yet resolved. + // Indexed by symbol name. + StringMap<RelocationList> SymbolRelocations; + + typedef std::map<RelocationValueRef, uintptr_t> StubMap; + + Triple::ArchType Arch; + + inline unsigned getMaxStubSize() { + if (Arch == Triple::arm || Arch == Triple::thumb) + return 8; // 32-bit instruction and 32-bit address + else + return 0; + } bool HasError; std::string ErrorStr; @@ -66,17 +146,62 @@ protected: } uint8_t *getSectionAddress(unsigned SectionID) { - return (uint8_t*)Sections[SectionID].base(); + return (uint8_t*)Sections[SectionID].Address; } - void extractFunction(StringRef Name, uint8_t *StartAddress, - uint8_t *EndAddress); + /// \brief Emits section data from the object file to the MemoryManager. + /// \param IsCode if it's true then allocateCodeSection() will be + /// used for emmits, else allocateDataSection() will be used. + /// \return SectionID. + unsigned emitSection(const SectionRef &Section, bool IsCode); + + /// \brief Find Section in LocalSections. If the secton is not found - emit + /// it and store in LocalSections. + /// \param IsCode if it's true then allocateCodeSection() will be + /// used for emmits, else allocateDataSection() will be used. + /// \return SectionID. + unsigned findOrEmitSection(const SectionRef &Section, bool IsCode, + ObjSectionToIDMap &LocalSections); + + /// \brief If Value.SymbolName is NULL then store relocation to the + /// Relocations, else store it in the SymbolRelocations. + void AddRelocation(const RelocationValueRef &Value, unsigned SectionID, + uintptr_t Offset, uint32_t RelType); + + /// \brief Emits long jump instruction to Addr. + /// \return Pointer to the memory area for emitting target address. + uint8_t* createStubFunction(uint8_t *Addr); + + /// \brief Resolves relocations from Relocs list with address from Value. + void resolveRelocationList(const RelocationList &Relocs, uint64_t Value); + void resolveRelocationEntry(const RelocationEntry &RE, uint64_t Value); + + /// \brief A object file specific relocation resolver + /// \param Address Address to apply the relocation action + /// \param Value Target symbol address to apply the relocation action + /// \param Type object file specific relocation type + /// \param Addend A constant addend used to compute the value to be stored + /// into the relocatable field + virtual void resolveRelocation(uint8_t *LocalAddress, + uint64_t FinalAddress, + uint64_t Value, + uint32_t Type, + int64_t Addend) = 0; + + /// \brief Parses the object file relocation and store it to Relocations + /// or SymbolRelocations. Its depend from object file type. + virtual void processRelocationRef(const ObjRelocationInfo &Rel, + const ObjectFile &Obj, + ObjSectionToIDMap &ObjSectionToID, + LocalSymbolMap &Symbols, StubMap &Stubs) = 0; + + void resolveSymbols(); public: RuntimeDyldImpl(RTDyldMemoryManager *mm) : MemMgr(mm), HasError(false) {} virtual ~RuntimeDyldImpl(); - virtual bool loadObject(MemoryBuffer *InputBuffer) = 0; + bool loadObject(const MemoryBuffer *InputBuffer); void *getSymbolAddress(StringRef Name) { // FIXME: Just look up as a function for now. Overly simple of course. @@ -87,9 +212,9 @@ public: return getSectionAddress(Loc.first) + Loc.second; } - virtual void resolveRelocations(); + void resolveRelocations(); - virtual void reassignSectionAddress(unsigned SectionID, uint64_t Addr) = 0; + void reassignSectionAddress(unsigned SectionID, uint64_t Addr); void mapSectionAddress(void *LocalAddress, uint64_t TargetAddress); @@ -103,6 +228,7 @@ public: StringRef getErrorString() { return ErrorStr; } virtual bool isCompatibleFormat(const MemoryBuffer *InputBuffer) const = 0; + }; } // end namespace llvm diff --git a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.cpp b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.cpp index c11b2c3..24437e0 100644 --- a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.cpp +++ b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.cpp @@ -21,30 +21,64 @@ using namespace llvm::object; namespace llvm { -bool RuntimeDyldMachO:: -resolveRelocation(uint8_t *Address, uint64_t Value, bool isPCRel, - unsigned Type, unsigned Size, int64_t Addend) { +void RuntimeDyldMachO::resolveRelocation(uint8_t *LocalAddress, + uint64_t FinalAddress, + uint64_t Value, + uint32_t Type, + int64_t Addend) { + bool isPCRel = (Type >> 24) & 1; + unsigned MachoType = (Type >> 28) & 0xf; + unsigned Size = 1 << ((Type >> 25) & 3); + + DEBUG(dbgs() << "resolveRelocation LocalAddress: " << format("%p", LocalAddress) + << " FinalAddress: " << format("%p", FinalAddress) + << " Value: " << format("%p", Value) + << " Addend: " << Addend + << " isPCRel: " << isPCRel + << " MachoType: " << MachoType + << " Size: " << Size + << "\n"); + // This just dispatches to the proper target specific routine. - switch (CPUType) { + switch (Arch) { default: llvm_unreachable("Unsupported CPU type!"); - case mach::CTM_x86_64: - return resolveX86_64Relocation((uintptr_t)Address, (uintptr_t)Value, - isPCRel, Type, Size, Addend); - case mach::CTM_ARM: - return resolveARMRelocation((uintptr_t)Address, (uintptr_t)Value, - isPCRel, Type, Size, Addend); + case Triple::x86_64: // Fall through. + case Triple::x86: + resolveX86_64Relocation(LocalAddress, + FinalAddress, + (uintptr_t)Value, + isPCRel, + MachoType, + Size, + Addend); + break; + case Triple::arm: // Fall through. + case Triple::thumb: + resolveARMRelocation(LocalAddress, + FinalAddress, + (uintptr_t)Value, + isPCRel, + MachoType, + Size, + Addend); + break; } } bool RuntimeDyldMachO:: -resolveX86_64Relocation(uintptr_t Address, uintptr_t Value, bool isPCRel, - unsigned Type, unsigned Size, int64_t Addend) { +resolveX86_64Relocation(uint8_t *LocalAddress, + uint64_t FinalAddress, + uint64_t Value, + bool isPCRel, + unsigned Type, + unsigned Size, + int64_t Addend) { // If the relocation is PC-relative, the value to be encoded is the // pointer difference. if (isPCRel) // FIXME: It seems this value needs to be adjusted by 4 for an effective PC // address. Is that expected? Only for branches, perhaps? - Value -= Address + 4; + Value -= FinalAddress + 4; switch(Type) { default: @@ -58,7 +92,7 @@ resolveX86_64Relocation(uintptr_t Address, uintptr_t Value, bool isPCRel, Value += Addend; // Mask in the target value a byte at a time (we don't have an alignment // guarantee for the target address, so this is safest). - uint8_t *p = (uint8_t*)Address; + uint8_t *p = (uint8_t*)LocalAddress; for (unsigned i = 0; i < Size; ++i) { *p++ = (uint8_t)Value; Value >>= 8; @@ -74,12 +108,17 @@ resolveX86_64Relocation(uintptr_t Address, uintptr_t Value, bool isPCRel, } bool RuntimeDyldMachO:: -resolveARMRelocation(uintptr_t Address, uintptr_t Value, bool isPCRel, - unsigned Type, unsigned Size, int64_t Addend) { +resolveARMRelocation(uint8_t *LocalAddress, + uint64_t FinalAddress, + uint64_t Value, + bool isPCRel, + unsigned Type, + unsigned Size, + int64_t Addend) { // If the relocation is PC-relative, the value to be encoded is the // pointer difference. if (isPCRel) { - Value -= Address; + Value -= FinalAddress; // ARM PCRel relocations have an effective-PC offset of two instructions // (four bytes in Thumb mode, 8 bytes in ARM mode). // FIXME: For now, assume ARM mode. @@ -92,7 +131,7 @@ resolveARMRelocation(uintptr_t Address, uintptr_t Value, bool isPCRel, case macho::RIT_Vanilla: { // Mask in the target value a byte at a time (we don't have an alignment // guarantee for the target address, so this is safest). - uint8_t *p = (uint8_t*)Address; + uint8_t *p = (uint8_t*)LocalAddress; for (unsigned i = 0; i < Size; ++i) { *p++ = (uint8_t)Value; Value >>= 8; @@ -102,7 +141,7 @@ resolveARMRelocation(uintptr_t Address, uintptr_t Value, bool isPCRel, case macho::RIT_ARM_Branch24Bit: { // Mask the value into the target address. We know instructions are // 32-bit aligned, so we can do it all at once. - uint32_t *p = (uint32_t*)Address; + uint32_t *p = (uint32_t*)LocalAddress; // The low two bits of the value are not encoded. Value >>= 2; // Mask the value to 24 bits. @@ -128,463 +167,83 @@ resolveARMRelocation(uintptr_t Address, uintptr_t Value, bool isPCRel, return false; } -bool RuntimeDyldMachO:: -loadSegment32(const MachOObject *Obj, - const MachOObject::LoadCommandInfo *SegmentLCI, - const InMemoryStruct<macho::SymtabLoadCommand> &SymtabLC) { - // FIXME: This should really be combined w/ loadSegment64. Templatized - // function on the 32/64 datatypes maybe? - InMemoryStruct<macho::SegmentLoadCommand> SegmentLC; - Obj->ReadSegmentLoadCommand(*SegmentLCI, SegmentLC); - if (!SegmentLC) - return Error("unable to load segment load command"); - - - SmallVector<unsigned, 16> SectionMap; - for (unsigned SectNum = 0; SectNum != SegmentLC->NumSections; ++SectNum) { - InMemoryStruct<macho::Section> Sect; - Obj->ReadSection(*SegmentLCI, SectNum, Sect); - if (!Sect) - return Error("unable to load section: '" + Twine(SectNum) + "'"); - - // Allocate memory via the MM for the section. - uint8_t *Buffer; - uint32_t SectionID = Sections.size(); - if (Sect->Flags == 0x80000400) - Buffer = MemMgr->allocateCodeSection(Sect->Size, Sect->Align, SectionID); - else - Buffer = MemMgr->allocateDataSection(Sect->Size, Sect->Align, SectionID); - - DEBUG(dbgs() << "Loading " - << ((Sect->Flags == 0x80000400) ? "text" : "data") - << " (ID #" << SectionID << ")" - << " '" << Sect->SegmentName << "," - << Sect->Name << "' of size " << Sect->Size - << " to address " << Buffer << ".\n"); - - // Copy the payload from the object file into the allocated buffer. - uint8_t *Base = (uint8_t*)Obj->getData(SegmentLC->FileOffset, - SegmentLC->FileSize).data(); - memcpy(Buffer, Base + Sect->Address, Sect->Size); - - // Remember what got allocated for this SectionID. - Sections.push_back(sys::MemoryBlock(Buffer, Sect->Size)); - SectionLocalMemToID[Buffer] = SectionID; - - // By default, the load address of a section is its memory buffer. - SectionLoadAddress.push_back((uint64_t)Buffer); - - // Keep a map of object file section numbers to corresponding SectionIDs - // while processing the file. - SectionMap.push_back(SectionID); - } - - // Process the symbol table. - SmallVector<StringRef, 64> SymbolNames; - processSymbols32(Obj, SectionMap, SymbolNames, SymtabLC); - - // Process the relocations for each section we're loading. - Relocations.grow(Relocations.size() + SegmentLC->NumSections); - for (unsigned SectNum = 0; SectNum != SegmentLC->NumSections; ++SectNum) { - InMemoryStruct<macho::Section> Sect; - Obj->ReadSection(*SegmentLCI, SectNum, Sect); - if (!Sect) - return Error("unable to load section: '" + Twine(SectNum) + "'"); - for (unsigned j = 0; j != Sect->NumRelocationTableEntries; ++j) { - InMemoryStruct<macho::RelocationEntry> RE; - Obj->ReadRelocationEntry(Sect->RelocationTableOffset, j, RE); - if (RE->Word0 & macho::RF_Scattered) - return Error("NOT YET IMPLEMENTED: scattered relocations."); - // Word0 of the relocation is the offset into the section where the - // relocation should be applied. We need to translate that into an - // offset into a function since that's our atom. - uint32_t Offset = RE->Word0; - bool isExtern = (RE->Word1 >> 27) & 1; - - // FIXME: Get the relocation addend from the target address. - // FIXME: VERY imporant for internal relocations. - - // Figure out the source symbol of the relocation. If isExtern is true, - // this relocation references the symbol table, otherwise it references - // a section in the same object, numbered from 1 through NumSections - // (SectionBases is [0, NumSections-1]). - uint32_t SourceNum = RE->Word1 & 0xffffff; // 24-bit value - if (!isExtern) { - assert(SourceNum > 0 && "Invalid relocation section number!"); - unsigned SectionID = SectionMap[SourceNum - 1]; - unsigned TargetID = SectionMap[SectNum]; - DEBUG(dbgs() << "Internal relocation at Section #" - << TargetID << " + " << Offset - << " from Section #" - << SectionID << " (Word1: " - << format("0x%x", RE->Word1) << ")\n"); - - // Store the relocation information. It will get resolved when - // the section addresses are assigned. - Relocations[SectionID].push_back(RelocationEntry(TargetID, - Offset, - RE->Word1, - 0 /*Addend*/)); - } else { - StringRef SourceName = SymbolNames[SourceNum]; - - // Now store the relocation information. Associate it with the source - // symbol. Just add it to the unresolved list and let the general - // path post-load resolve it if we know where the symbol is. - UnresolvedRelocations[SourceName].push_back(RelocationEntry(SectNum, - Offset, - RE->Word1, - 0 /*Addend*/)); - DEBUG(dbgs() << "Relocation at Section #" << SectNum << " + " << Offset - << " from '" << SourceName << "(Word1: " - << format("0x%x", RE->Word1) << ")\n"); - } +void RuntimeDyldMachO:: +processRelocationRef(const ObjRelocationInfo &Rel, const ObjectFile &Obj, + ObjSectionToIDMap &ObjSectionToID, + LocalSymbolMap &Symbols, StubMap &Stubs) { + + uint32_t RelType = (uint32_t) (Rel.Type & 0xffffffffL); + RelocationValueRef Value; + SectionEntry &Section = Sections[Rel.SectionID]; + uint8_t *Target = Section.Address + Rel.Offset; + + bool isExtern = (RelType >> 27) & 1; + if (isExtern) { + StringRef TargetName; + const SymbolRef &Symbol = Rel.Symbol; + Symbol.getName(TargetName); + // First look the symbol in object file symbols. + LocalSymbolMap::iterator it = Symbols.find(TargetName.data()); + if (it != Symbols.end()) { + Value.SectionID = it->second.first; + Value.Addend = it->second.second; + } else { + // Second look the symbol in global symbol table. + StringMap<SymbolLoc>::iterator itS = SymbolTable.find(TargetName.data()); + if (itS != SymbolTable.end()) { + Value.SectionID = itS->second.first; + Value.Addend = itS->second.second; + } else + Value.SymbolName = TargetName.data(); } - } - - // Resolve the addresses of any symbols that were defined in this segment. - for (int i = 0, e = SymbolNames.size(); i != e; ++i) - resolveSymbol(SymbolNames[i]); - - return false; -} - - -bool RuntimeDyldMachO:: -loadSegment64(const MachOObject *Obj, - const MachOObject::LoadCommandInfo *SegmentLCI, - const InMemoryStruct<macho::SymtabLoadCommand> &SymtabLC) { - InMemoryStruct<macho::Segment64LoadCommand> Segment64LC; - Obj->ReadSegment64LoadCommand(*SegmentLCI, Segment64LC); - if (!Segment64LC) - return Error("unable to load segment load command"); - - - SmallVector<unsigned, 16> SectionMap; - for (unsigned SectNum = 0; SectNum != Segment64LC->NumSections; ++SectNum) { - InMemoryStruct<macho::Section64> Sect; - Obj->ReadSection64(*SegmentLCI, SectNum, Sect); - if (!Sect) - return Error("unable to load section: '" + Twine(SectNum) + "'"); - - // Allocate memory via the MM for the section. - uint8_t *Buffer; - uint32_t SectionID = Sections.size(); - unsigned Align = 1 << Sect->Align; // .o file has log2 alignment. - if (Sect->Flags == 0x80000400) - Buffer = MemMgr->allocateCodeSection(Sect->Size, Align, SectionID); - else - Buffer = MemMgr->allocateDataSection(Sect->Size, Align, SectionID); - - DEBUG(dbgs() << "Loading " - << ((Sect->Flags == 0x80000400) ? "text" : "data") - << " (ID #" << SectionID << ")" - << " '" << Sect->SegmentName << "," - << Sect->Name << "' of size " << Sect->Size - << " (align " << Align << ")" - << " to address " << Buffer << ".\n"); - - // Copy the payload from the object file into the allocated buffer. - uint8_t *Base = (uint8_t*)Obj->getData(Segment64LC->FileOffset, - Segment64LC->FileSize).data(); - memcpy(Buffer, Base + Sect->Address, Sect->Size); - - // Remember what got allocated for this SectionID. - Sections.push_back(sys::MemoryBlock(Buffer, Sect->Size)); - SectionLocalMemToID[Buffer] = SectionID; - - // By default, the load address of a section is its memory buffer. - SectionLoadAddress.push_back((uint64_t)Buffer); - - // Keep a map of object file section numbers to corresponding SectionIDs - // while processing the file. - SectionMap.push_back(SectionID); - } - - // Process the symbol table. - SmallVector<StringRef, 64> SymbolNames; - processSymbols64(Obj, SectionMap, SymbolNames, SymtabLC); - - // Process the relocations for each section we're loading. - Relocations.grow(Relocations.size() + Segment64LC->NumSections); - for (unsigned SectNum = 0; SectNum != Segment64LC->NumSections; ++SectNum) { - InMemoryStruct<macho::Section64> Sect; - Obj->ReadSection64(*SegmentLCI, SectNum, Sect); - if (!Sect) - return Error("unable to load section: '" + Twine(SectNum) + "'"); - for (unsigned j = 0; j != Sect->NumRelocationTableEntries; ++j) { - InMemoryStruct<macho::RelocationEntry> RE; - Obj->ReadRelocationEntry(Sect->RelocationTableOffset, j, RE); - if (RE->Word0 & macho::RF_Scattered) - return Error("NOT YET IMPLEMENTED: scattered relocations."); - // Word0 of the relocation is the offset into the section where the - // relocation should be applied. We need to translate that into an - // offset into a function since that's our atom. - uint32_t Offset = RE->Word0; - bool isExtern = (RE->Word1 >> 27) & 1; - - // FIXME: Get the relocation addend from the target address. - // FIXME: VERY imporant for internal relocations. - - // Figure out the source symbol of the relocation. If isExtern is true, - // this relocation references the symbol table, otherwise it references - // a section in the same object, numbered from 1 through NumSections - // (SectionBases is [0, NumSections-1]). - uint32_t SourceNum = RE->Word1 & 0xffffff; // 24-bit value - if (!isExtern) { - assert(SourceNum > 0 && "Invalid relocation section number!"); - unsigned SectionID = SectionMap[SourceNum - 1]; - unsigned TargetID = SectionMap[SectNum]; - DEBUG(dbgs() << "Internal relocation at Section #" - << TargetID << " + " << Offset - << " from Section #" - << SectionID << " (Word1: " - << format("0x%x", RE->Word1) << ")\n"); - - // Store the relocation information. It will get resolved when - // the section addresses are assigned. - Relocations[SectionID].push_back(RelocationEntry(TargetID, - Offset, - RE->Word1, - 0 /*Addend*/)); - } else { - StringRef SourceName = SymbolNames[SourceNum]; - - // Now store the relocation information. Associate it with the source - // symbol. Just add it to the unresolved list and let the general - // path post-load resolve it if we know where the symbol is. - UnresolvedRelocations[SourceName].push_back(RelocationEntry(SectNum, - Offset, - RE->Word1, - 0 /*Addend*/)); - DEBUG(dbgs() << "Relocation at Section #" << SectNum << " + " << Offset - << " from '" << SourceName << "(Word1: " - << format("0x%x", RE->Word1) << ")\n"); - } + } else { + error_code err; + uint8_t sIdx = static_cast<uint8_t>(RelType & 0xFF); + section_iterator sIt = Obj.begin_sections(), + sItEnd = Obj.end_sections(); + for (uint8_t i = 1; i < sIdx; i++) { + error_code err; + sIt.increment(err); + if (sIt == sItEnd) + break; } - } - - // Resolve the addresses of any symbols that were defined in this segment. - for (int i = 0, e = SymbolNames.size(); i != e; ++i) - resolveSymbol(SymbolNames[i]); - - return false; -} - -bool RuntimeDyldMachO:: -processSymbols32(const MachOObject *Obj, - SmallVectorImpl<unsigned> &SectionMap, - SmallVectorImpl<StringRef> &SymbolNames, - const InMemoryStruct<macho::SymtabLoadCommand> &SymtabLC) { - // FIXME: Combine w/ processSymbols64. Factor 64/32 datatype and such. - for (unsigned i = 0; i != SymtabLC->NumSymbolTableEntries; ++i) { - InMemoryStruct<macho::SymbolTableEntry> STE; - Obj->ReadSymbolTableEntry(SymtabLC->SymbolTableOffset, i, STE); - if (!STE) - return Error("unable to read symbol: '" + Twine(i) + "'"); - // Get the symbol name. - StringRef Name = Obj->getStringAtIndex(STE->StringIndex); - SymbolNames.push_back(Name); - - // FIXME: Check the symbol type and flags. - if (STE->Type != 0xF) // external, defined in this segment. - continue; - // Flags in the upper nibble we don't care about. - if ((STE->Flags & 0xf) != 0x0) - continue; - - // Remember the symbol. - uint32_t SectionID = SectionMap[STE->SectionIndex - 1]; - SymbolTable[Name] = SymbolLoc(SectionID, STE->Value); - - DEBUG(dbgs() << "Symbol: '" << Name << "' @ " - << (getSectionAddress(SectionID) + STE->Value) - << "\n"); - } - return false; -} - -bool RuntimeDyldMachO:: -processSymbols64(const MachOObject *Obj, - SmallVectorImpl<unsigned> &SectionMap, - SmallVectorImpl<StringRef> &SymbolNames, - const InMemoryStruct<macho::SymtabLoadCommand> &SymtabLC) { - for (unsigned i = 0; i != SymtabLC->NumSymbolTableEntries; ++i) { - InMemoryStruct<macho::Symbol64TableEntry> STE; - Obj->ReadSymbol64TableEntry(SymtabLC->SymbolTableOffset, i, STE); - if (!STE) - return Error("unable to read symbol: '" + Twine(i) + "'"); - // Get the symbol name. - StringRef Name = Obj->getStringAtIndex(STE->StringIndex); - SymbolNames.push_back(Name); - - // FIXME: Check the symbol type and flags. - if (STE->Type != 0xF) // external, defined in this segment. - continue; - // Flags in the upper nibble we don't care about. - if ((STE->Flags & 0xf) != 0x0) - continue; - - // Remember the symbol. - uint32_t SectionID = SectionMap[STE->SectionIndex - 1]; - SymbolTable[Name] = SymbolLoc(SectionID, STE->Value); - - DEBUG(dbgs() << "Symbol: '" << Name << "' @ " - << (getSectionAddress(SectionID) + STE->Value) - << "\n"); - } - return false; -} - -// resolveSymbol - Resolve any relocations to the specified symbol if -// we know where it lives. -void RuntimeDyldMachO::resolveSymbol(StringRef Name) { - StringMap<SymbolLoc>::const_iterator Loc = SymbolTable.find(Name); - if (Loc == SymbolTable.end()) - return; - - RelocationList &Relocs = UnresolvedRelocations[Name]; - DEBUG(dbgs() << "Resolving symbol '" << Name << "'\n"); - for (int i = 0, e = Relocs.size(); i != e; ++i) { - // Change the relocation to be section relative rather than symbol - // relative and move it to the resolved relocation list. - RelocationEntry Entry = Relocs[i]; - Entry.Addend += Loc->second.second; - Relocations[Loc->second.first].push_back(Entry); - } - // FIXME: Keep a worklist of the relocations we've added so that we can - // resolve more selectively later. - Relocs.clear(); -} - -bool RuntimeDyldMachO::loadObject(MemoryBuffer *InputBuffer) { - // If the linker is in an error state, don't do anything. - if (hasError()) - return true; - // Load the Mach-O wrapper object. - std::string ErrorStr; - OwningPtr<MachOObject> Obj( - MachOObject::LoadFromBuffer(InputBuffer, &ErrorStr)); - if (!Obj) - return Error("unable to load object: '" + ErrorStr + "'"); - - // Get the CPU type information from the header. - const macho::Header &Header = Obj->getHeader(); - - // FIXME: Error checking that the loaded object is compatible with - // the system we're running on. - CPUType = Header.CPUType; - CPUSubtype = Header.CPUSubtype; - - // Validate that the load commands match what we expect. - const MachOObject::LoadCommandInfo *SegmentLCI = 0, *SymtabLCI = 0, - *DysymtabLCI = 0; - for (unsigned i = 0; i != Header.NumLoadCommands; ++i) { - const MachOObject::LoadCommandInfo &LCI = Obj->getLoadCommandInfo(i); - switch (LCI.Command.Type) { - case macho::LCT_Segment: - case macho::LCT_Segment64: - if (SegmentLCI) - return Error("unexpected input object (multiple segments)"); - SegmentLCI = &LCI; - break; - case macho::LCT_Symtab: - if (SymtabLCI) - return Error("unexpected input object (multiple symbol tables)"); - SymtabLCI = &LCI; - break; - case macho::LCT_Dysymtab: - if (DysymtabLCI) - return Error("unexpected input object (multiple symbol tables)"); - DysymtabLCI = &LCI; - break; - default: - return Error("unexpected input object (unexpected load command"); + assert(sIt != sItEnd && "No section containing relocation!"); + Value.SectionID = findOrEmitSection(*sIt, true, ObjSectionToID); + Value.Addend = *(const intptr_t *)Target; + if (Value.Addend) { + // The MachO addend is offset from the current section, we need set it + // as offset from destination section + Value.Addend += Section.ObjAddress - Sections[Value.SectionID].ObjAddress; } } - if (!SymtabLCI) - return Error("no symbol table found in object"); - if (!SegmentLCI) - return Error("no segments found in object"); - - // Read and register the symbol table data. - InMemoryStruct<macho::SymtabLoadCommand> SymtabLC; - Obj->ReadSymtabLoadCommand(*SymtabLCI, SymtabLC); - if (!SymtabLC) - return Error("unable to load symbol table load command"); - Obj->RegisterStringTable(*SymtabLC); - - // Read the dynamic link-edit information, if present (not present in static - // objects). - if (DysymtabLCI) { - InMemoryStruct<macho::DysymtabLoadCommand> DysymtabLC; - Obj->ReadDysymtabLoadCommand(*DysymtabLCI, DysymtabLC); - if (!DysymtabLC) - return Error("unable to load dynamic link-exit load command"); - - // FIXME: We don't support anything interesting yet. -// if (DysymtabLC->LocalSymbolsIndex != 0) -// return Error("NOT YET IMPLEMENTED: local symbol entries"); -// if (DysymtabLC->ExternalSymbolsIndex != 0) -// return Error("NOT YET IMPLEMENTED: non-external symbol entries"); -// if (DysymtabLC->UndefinedSymbolsIndex != SymtabLC->NumSymbolTableEntries) -// return Error("NOT YET IMPLEMENTED: undefined symbol entries"); - } - - // Load the segment load command. - if (SegmentLCI->Command.Type == macho::LCT_Segment) { - if (loadSegment32(Obj.get(), SegmentLCI, SymtabLC)) - return true; - } else { - if (loadSegment64(Obj.get(), SegmentLCI, SymtabLC)) - return true; - } - - // Assign the addresses of the sections from the object so that any - // relocations to them get set properly. - // FIXME: This is done directly from the client at the moment. We should - // default the values to the local storage, at least when the target arch - // is the same as the host arch. - - return false; + if (Arch == Triple::arm && RelType == macho::RIT_ARM_Branch24Bit) { + // This is an ARM branch relocation, need to use a stub function. + + // Look up for existing stub. + StubMap::const_iterator stubIt = Stubs.find(Value); + if (stubIt != Stubs.end()) + resolveRelocation(Target, (uint64_t)Target, + (uint64_t)Section.Address + stubIt->second, + RelType, 0); + else { + // Create a new stub function. + Stubs[Value] = Section.StubOffset; + uint8_t *StubTargetAddr = createStubFunction(Section.Address + + Section.StubOffset); + AddRelocation(Value, Rel.SectionID, StubTargetAddr - Section.Address, + macho::RIT_Vanilla); + resolveRelocation(Target, (uint64_t)Target, + (uint64_t)Section.Address + Section.StubOffset, + RelType, 0); + Section.StubOffset += getMaxStubSize(); + } + } else + AddRelocation(Value, Rel.SectionID, Rel.Offset, RelType); } -// Assign an address to a symbol name and resolve all the relocations -// associated with it. -void RuntimeDyldMachO::reassignSectionAddress(unsigned SectionID, - uint64_t Addr) { - // The address to use for relocation resolution is not - // the address of the local section buffer. We must be doing - // a remote execution environment of some sort. Re-apply any - // relocations referencing this section with the given address. - // - // Addr is a uint64_t because we can't assume the pointer width - // of the target is the same as that of the host. Just use a generic - // "big enough" type. - - SectionLoadAddress[SectionID] = Addr; - - RelocationList &Relocs = Relocations[SectionID]; - for (unsigned i = 0, e = Relocs.size(); i != e; ++i) { - RelocationEntry &RE = Relocs[i]; - uint8_t *Target = (uint8_t*)Sections[RE.SectionID].base() + RE.Offset; - bool isPCRel = (RE.Data >> 24) & 1; - unsigned Type = (RE.Data >> 28) & 0xf; - unsigned Size = 1 << ((RE.Data >> 25) & 3); - - DEBUG(dbgs() << "Resolving relocation at Section #" << RE.SectionID - << " + " << RE.Offset << " (" << format("%p", Target) << ")" - << " from Section #" << SectionID << " (" << format("%p", Addr) << ")" - << "(" << (isPCRel ? "pcrel" : "absolute") - << ", type: " << Type << ", Size: " << Size << ", Addend: " - << RE.Addend << ").\n"); - - resolveRelocation(Target, Addr, isPCRel, Type, Size, RE.Addend); - } -} -bool RuntimeDyldMachO::isKnownFormat(const MemoryBuffer *InputBuffer) { +bool RuntimeDyldMachO::isCompatibleFormat(const MemoryBuffer *InputBuffer) const { StringRef Magic = InputBuffer->getBuffer().slice(0, 4); if (Magic == "\xFE\xED\xFA\xCE") return true; if (Magic == "\xCE\xFA\xED\xFE") return true; diff --git a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.h b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.h index 5798981..36b39dd 100644 --- a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.h +++ b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.h @@ -25,68 +25,37 @@ using namespace llvm::object; namespace llvm { class RuntimeDyldMachO : public RuntimeDyldImpl { - - // For each symbol, keep a list of relocations based on it. Anytime - // its address is reassigned (the JIT re-compiled the function, e.g.), - // the relocations get re-resolved. - // The symbol (or section) the relocation is sourced from is the Key - // in the relocation list where it's stored. - struct RelocationEntry { - unsigned SectionID; // Section the relocation is contained in. - uint64_t Offset; // Offset into the section for the relocation. - uint32_t Data; // Second word of the raw macho relocation entry. - int64_t Addend; // Addend encoded in the instruction itself, if any, - // plus the offset into the source section for - // the symbol once the relocation is resolvable. - - RelocationEntry(unsigned id, uint64_t offset, uint32_t data, int64_t addend) - : SectionID(id), Offset(offset), Data(data), Addend(addend) {} - }; - typedef SmallVector<RelocationEntry, 4> RelocationList; - // Relocations to sections already loaded. Indexed by SectionID which is the - // source of the address. The target where the address will be writen is - // SectionID/Offset in the relocation itself. - IndexedMap<RelocationList> Relocations; - // Relocations to symbols that are not yet resolved. Must be external - // relocations by definition. Indexed by symbol name. - StringMap<RelocationList> UnresolvedRelocations; - - bool resolveRelocation(uint8_t *Address, uint64_t Value, bool isPCRel, - unsigned Type, unsigned Size, int64_t Addend); - bool resolveX86_64Relocation(uintptr_t Address, uintptr_t Value, bool isPCRel, - unsigned Type, unsigned Size, int64_t Addend); - bool resolveARMRelocation(uintptr_t Address, uintptr_t Value, bool isPCRel, - unsigned Type, unsigned Size, int64_t Addend); - - bool loadSegment32(const MachOObject *Obj, - const MachOObject::LoadCommandInfo *SegmentLCI, - const InMemoryStruct<macho::SymtabLoadCommand> &SymtabLC); - bool loadSegment64(const MachOObject *Obj, - const MachOObject::LoadCommandInfo *SegmentLCI, - const InMemoryStruct<macho::SymtabLoadCommand> &SymtabLC); - bool processSymbols32(const MachOObject *Obj, - SmallVectorImpl<unsigned> &SectionMap, - SmallVectorImpl<StringRef> &SymbolNames, - const InMemoryStruct<macho::SymtabLoadCommand> &SymtabLC); - bool processSymbols64(const MachOObject *Obj, - SmallVectorImpl<unsigned> &SectionMap, - SmallVectorImpl<StringRef> &SymbolNames, - const InMemoryStruct<macho::SymtabLoadCommand> &SymtabLC); - - void resolveSymbol(StringRef Name); +protected: + bool resolveX86_64Relocation(uint8_t *LocalAddress, + uint64_t FinalAddress, + uint64_t Value, + bool isPCRel, + unsigned Type, + unsigned Size, + int64_t Addend); + bool resolveARMRelocation(uint8_t *LocalAddress, + uint64_t FinalAddress, + uint64_t Value, + bool isPCRel, + unsigned Type, + unsigned Size, + int64_t Addend); + + virtual void processRelocationRef(const ObjRelocationInfo &Rel, + const ObjectFile &Obj, + ObjSectionToIDMap &ObjSectionToID, + LocalSymbolMap &Symbols, StubMap &Stubs); public: + virtual void resolveRelocation(uint8_t *LocalAddress, + uint64_t FinalAddress, + uint64_t Value, + uint32_t Type, + int64_t Addend); + RuntimeDyldMachO(RTDyldMemoryManager *mm) : RuntimeDyldImpl(mm) {} - bool loadObject(MemoryBuffer *InputBuffer); - - void reassignSectionAddress(unsigned SectionID, uint64_t Addr); - - static bool isKnownFormat(const MemoryBuffer *InputBuffer); - - bool isCompatibleFormat(const MemoryBuffer *InputBuffer) const { - return isKnownFormat(InputBuffer); - } + bool isCompatibleFormat(const MemoryBuffer *InputBuffer) const; }; } // end namespace llvm diff --git a/lib/MC/MCParser/AsmParser.cpp b/lib/MC/MCParser/AsmParser.cpp index bd5956f..ab5ddaf 100644 --- a/lib/MC/MCParser/AsmParser.cpp +++ b/lib/MC/MCParser/AsmParser.cpp @@ -1528,6 +1528,11 @@ bool AsmParser::HandleMacroEntry(StringRef Name, SMLoc NameLoc, } Lex(); } + // If there weren't any arguments, erase the token vector so everything + // else knows that. Leaving around the vestigal empty token list confuses + // things. + if (MacroArguments.size() == 1 && MacroArguments.back().empty()) + MacroArguments.clear(); // Macro instantiation is lexical, unfortunately. We construct a new buffer // to hold the macro body with substitutions. @@ -1624,6 +1629,8 @@ bool AsmParser::ParseAssignment(StringRef Name, bool allow_redef) { return Error(EqualLoc, "Recursive use of '" + Name + "'"); else if (Sym->isUndefined() && !Sym->isUsed() && !Sym->isVariable()) ; // Allow redefinitions of undefined symbols only used in directives. + else if (Sym->isVariable() && !Sym->isUsed() && allow_redef) + ; // Allow redefinitions of variables that haven't yet been used. else if (!Sym->isUndefined() && (!Sym->isVariable() || !allow_redef)) return Error(EqualLoc, "redefinition of '" + Name + "'"); else if (!Sym->isVariable()) diff --git a/lib/MC/MCSymbol.cpp b/lib/MC/MCSymbol.cpp index c2fad167..e013e77 100644 --- a/lib/MC/MCSymbol.cpp +++ b/lib/MC/MCSymbol.cpp @@ -54,17 +54,14 @@ const MCSymbol &MCSymbol::AliasedSymbol() const { void MCSymbol::setVariableValue(const MCExpr *Value) { assert(!IsUsed && "Cannot set a variable that has already been used."); assert(Value && "Invalid variable value!"); - assert((isUndefined() || (isAbsolute() && isa<MCConstantExpr>(Value))) && - "Invalid redefinition!"); this->Value = Value; // Variables should always be marked as in the same "section" as the value. const MCSection *Section = Value->FindAssociatedSection(); - if (Section) { + if (Section) setSection(*Section); - } else { + else setUndefined(); - } } void MCSymbol::print(raw_ostream &OS) const { diff --git a/lib/MC/WinCOFFObjectWriter.cpp b/lib/MC/WinCOFFObjectWriter.cpp index 7144e68..f706cac 100644 --- a/lib/MC/WinCOFFObjectWriter.cpp +++ b/lib/MC/WinCOFFObjectWriter.cpp @@ -783,9 +783,22 @@ void WinCOFFObjectWriter::WriteObject(MCAssembler &Asm, } if (Sec->Relocations.size() > 0) { - Sec->Header.NumberOfRelocations = Sec->Relocations.size(); + bool RelocationsOverflow = Sec->Relocations.size() >= 0xffff; + + if (RelocationsOverflow) { + // Signal overflow by setting NumberOfSections to max value. Actual + // size is found in reloc #0. Microsoft tools understand this. + Sec->Header.NumberOfRelocations = 0xffff; + } else { + Sec->Header.NumberOfRelocations = Sec->Relocations.size(); + } Sec->Header.PointerToRelocations = offset; + if (RelocationsOverflow) { + // Reloc #0 will contain actual count, so make room for it. + offset += COFF::RelocationSize; + } + offset += COFF::RelocationSize * Sec->Relocations.size(); for (relocations::iterator cr = Sec->Relocations.begin(), @@ -820,8 +833,12 @@ void WinCOFFObjectWriter::WriteObject(MCAssembler &Asm, MCAssembler::const_iterator j, je; for (i = Sections.begin(), ie = Sections.end(); i != ie; i++) - if ((*i)->Number != -1) + if ((*i)->Number != -1) { + if ((*i)->Relocations.size() >= 0xffff) { + (*i)->Header.Characteristics |= COFF::IMAGE_SCN_LNK_NRELOC_OVFL; + } WriteSectionHeader((*i)->Header); + } for (i = Sections.begin(), ie = Sections.end(), j = Asm.begin(), je = Asm.end(); @@ -841,6 +858,16 @@ void WinCOFFObjectWriter::WriteObject(MCAssembler &Asm, assert(OS.tell() == (*i)->Header.PointerToRelocations && "Section::PointerToRelocations is insane!"); + if ((*i)->Relocations.size() >= 0xffff) { + // In case of overflow, write actual relocation count as first + // relocation. Including the synthetic reloc itself (+ 1). + COFF::relocation r; + r.VirtualAddress = (*i)->Relocations.size() + 1; + r.SymbolTableIndex = 0; + r.Type = 0; + WriteRelocation(r); + } + for (relocations::const_iterator k = (*i)->Relocations.begin(), ke = (*i)->Relocations.end(); k != ke; k++) { diff --git a/lib/Object/Archive.cpp b/lib/Object/Archive.cpp index b67377c..c5f15ba 100644 --- a/lib/Object/Archive.cpp +++ b/lib/Object/Archive.cpp @@ -174,7 +174,7 @@ error_code Archive::Child::getAsBinary(OwningPtr<Binary> &Result) const { } Archive::Archive(MemoryBuffer *source, error_code &ec) - : Binary(Binary::isArchive, source) { + : Binary(Binary::ID_Archive, source) { // Check for sufficient magic. if (!source || source->getBufferSize() < (8 + sizeof(ArchiveMemberHeader) + 2) // Smallest archive. diff --git a/lib/Object/COFFObjectFile.cpp b/lib/Object/COFFObjectFile.cpp index a3fdd5b..b8ba905 100644 --- a/lib/Object/COFFObjectFile.cpp +++ b/lib/Object/COFFObjectFile.cpp @@ -12,6 +12,7 @@ //===----------------------------------------------------------------------===// #include "llvm/Object/COFF.h" +#include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/StringSwitch.h" #include "llvm/ADT/Triple.h" @@ -300,24 +301,7 @@ error_code COFFObjectFile::getSectionNext(DataRefImpl Sec, error_code COFFObjectFile::getSectionName(DataRefImpl Sec, StringRef &Result) const { const coff_section *sec = toSec(Sec); - StringRef name; - if (sec->Name[7] == 0) - // Null terminated, let ::strlen figure out the length. - name = sec->Name; - else - // Not null terminated, use all 8 bytes. - name = StringRef(sec->Name, 8); - - // Check for string table entry. First byte is '/'. - if (name[0] == '/') { - uint32_t Offset; - name.substr(1).getAsInteger(10, Offset); - if (error_code ec = getString(Offset, name)) - return ec; - } - - Result = name; - return object_error::success; + return getSectionName(sec, Result); } error_code COFFObjectFile::getSectionAddress(DataRefImpl Sec, @@ -337,16 +321,10 @@ error_code COFFObjectFile::getSectionSize(DataRefImpl Sec, error_code COFFObjectFile::getSectionContents(DataRefImpl Sec, StringRef &Result) const { const coff_section *sec = toSec(Sec); - // The only thing that we need to verify is that the contents is contained - // within the file bounds. We don't need to make sure it doesn't cover other - // data, as there's nothing that says that is not allowed. - uintptr_t con_start = uintptr_t(base()) + sec->PointerToRawData; - uintptr_t con_end = con_start + sec->SizeOfRawData; - if (con_end > uintptr_t(Data->getBufferEnd())) - return object_error::parse_failed; - Result = StringRef(reinterpret_cast<const char*>(con_start), - sec->SizeOfRawData); - return object_error::success; + ArrayRef<uint8_t> Res; + error_code EC = getSectionContents(sec, Res); + Result = StringRef(reinterpret_cast<const char*>(Res.data()), Res.size()); + return EC; } error_code COFFObjectFile::getSectionAlignment(DataRefImpl Sec, @@ -421,7 +399,7 @@ relocation_iterator COFFObjectFile::getSectionRelEnd(DataRefImpl Sec) const { } COFFObjectFile::COFFObjectFile(MemoryBuffer *Object, error_code &ec) - : ObjectFile(Binary::isCOFF, Object, ec) + : ObjectFile(Binary::ID_COFF, Object, ec) , Header(0) , SectionTable(0) , SymbolTable(0) @@ -630,6 +608,43 @@ error_code COFFObjectFile::getSymbolName(const coff_symbol *symbol, return object_error::success; } +error_code COFFObjectFile::getSectionName(const coff_section *Sec, + StringRef &Res) const { + StringRef Name; + if (Sec->Name[7] == 0) + // Null terminated, let ::strlen figure out the length. + Name = Sec->Name; + else + // Not null terminated, use all 8 bytes. + Name = StringRef(Sec->Name, 8); + + // Check for string table entry. First byte is '/'. + if (Name[0] == '/') { + uint32_t Offset; + if (Name.substr(1).getAsInteger(10, Offset)) + return object_error::parse_failed; + if (error_code ec = getString(Offset, Name)) + return ec; + } + + Res = Name; + return object_error::success; +} + +error_code COFFObjectFile::getSectionContents(const coff_section *Sec, + ArrayRef<uint8_t> &Res) const { + // The only thing that we need to verify is that the contents is contained + // within the file bounds. We don't need to make sure it doesn't cover other + // data, as there's nothing that says that is not allowed. + uintptr_t ConStart = uintptr_t(base()) + Sec->PointerToRawData; + uintptr_t ConEnd = ConStart + Sec->SizeOfRawData; + if (ConEnd > uintptr_t(Data->getBufferEnd())) + return object_error::parse_failed; + Res = ArrayRef<uint8_t>(reinterpret_cast<const unsigned char*>(ConStart), + Sec->SizeOfRawData); + return object_error::success; +} + const coff_relocation *COFFObjectFile::toRel(DataRefImpl Rel) const { return reinterpret_cast<const coff_relocation*>(Rel.p); } diff --git a/lib/Object/MachOObjectFile.cpp b/lib/Object/MachOObjectFile.cpp index 655c40a..819409e 100644 --- a/lib/Object/MachOObjectFile.cpp +++ b/lib/Object/MachOObjectFile.cpp @@ -30,7 +30,7 @@ namespace object { MachOObjectFile::MachOObjectFile(MemoryBuffer *Object, MachOObject *MOO, error_code &ec) - : ObjectFile(Binary::isMachO, Object, ec), + : ObjectFile(Binary::ID_MachO, Object, ec), MachOObj(MOO), RegisteredStringTable(std::numeric_limits<uint32_t>::max()) { DataRefImpl DRI; diff --git a/lib/Support/APInt.cpp b/lib/Support/APInt.cpp index 031bbb8..9b81fe7 100644 --- a/lib/Support/APInt.cpp +++ b/lib/Support/APInt.cpp @@ -457,16 +457,6 @@ APInt APInt::XorSlowCase(const APInt& RHS) const { return APInt(val, getBitWidth()).clearUnusedBits(); } -bool APInt::operator !() const { - if (isSingleWord()) - return !VAL; - - for (unsigned i = 0; i < getNumWords(); ++i) - if (pVal[i]) - return false; - return true; -} - APInt APInt::operator*(const APInt& RHS) const { assert(BitWidth == RHS.BitWidth && "Bit widths must be the same"); if (isSingleWord()) @@ -494,12 +484,6 @@ APInt APInt::operator-(const APInt& RHS) const { return Result.clearUnusedBits(); } -bool APInt::operator[](unsigned bitPosition) const { - assert(bitPosition < getBitWidth() && "Bit position out of bounds!"); - return (maskBit(bitPosition) & - (isSingleWord() ? VAL : pVal[whichWord(bitPosition)])) != 0; -} - bool APInt::EqualSlowCase(const APInt& RHS) const { // Get some facts about the number of bits used in the two operands. unsigned n1 = getActiveBits(); @@ -722,20 +706,9 @@ unsigned APInt::countLeadingZerosSlowCase() const { return Count; } -static unsigned countLeadingOnes_64(uint64_t V, unsigned skip) { - unsigned Count = 0; - if (skip) - V <<= skip; - while (V && (V & (1ULL << 63))) { - Count++; - V <<= 1; - } - return Count; -} - unsigned APInt::countLeadingOnes() const { if (isSingleWord()) - return countLeadingOnes_64(VAL, APINT_BITS_PER_WORD - BitWidth); + return CountLeadingOnes_64(VAL << (APINT_BITS_PER_WORD - BitWidth)); unsigned highWordBits = BitWidth % APINT_BITS_PER_WORD; unsigned shift; @@ -746,13 +719,13 @@ unsigned APInt::countLeadingOnes() const { shift = APINT_BITS_PER_WORD - highWordBits; } int i = getNumWords() - 1; - unsigned Count = countLeadingOnes_64(pVal[i], shift); + unsigned Count = CountLeadingOnes_64(pVal[i] << shift); if (Count == highWordBits) { for (i--; i >= 0; --i) { if (pVal[i] == -1ULL) Count += APINT_BITS_PER_WORD; else { - Count += countLeadingOnes_64(pVal[i], 0); + Count += CountLeadingOnes_64(pVal[i]); break; } } diff --git a/lib/Support/CommandLine.cpp b/lib/Support/CommandLine.cpp index d1ec4b0..e6fdf16 100644 --- a/lib/Support/CommandLine.cpp +++ b/lib/Support/CommandLine.cpp @@ -1191,7 +1191,7 @@ printOptionNoValue(const Option &O, size_t GlobalWidth) const { static int OptNameCompare(const void *LHS, const void *RHS) { typedef std::pair<const char *, Option*> pair_ty; - return strcmp(((pair_ty*)LHS)->first, ((pair_ty*)RHS)->first); + return strcmp(((const pair_ty*)LHS)->first, ((const pair_ty*)RHS)->first); } // Copy Options into a vector so we can sort them as we like. diff --git a/lib/Support/GraphWriter.cpp b/lib/Support/GraphWriter.cpp index 0dba28a..32126ec 100644 --- a/lib/Support/GraphWriter.cpp +++ b/lib/Support/GraphWriter.cpp @@ -11,12 +11,16 @@ // //===----------------------------------------------------------------------===// +#include "llvm/Support/CommandLine.h" #include "llvm/Support/GraphWriter.h" #include "llvm/Support/Path.h" #include "llvm/Support/Program.h" #include "llvm/Config/config.h" using namespace llvm; +static cl::opt<bool> ViewBackground("view-background", cl::Hidden, + cl::desc("Execute graph viewer in the background. Creates tmp file litter.")); + std::string llvm::DOT::EscapeString(const std::string &Label) { std::string Str(Label); for (unsigned i = 0; i != Str.length(); ++i) @@ -49,10 +53,28 @@ std::string llvm::DOT::EscapeString(const std::string &Label) { return Str; } - +// Execute the graph viewer. Return true if successful. +static bool LLVM_ATTRIBUTE_UNUSED +ExecGraphViewer(const sys::Path &ExecPath, std::vector<const char*> &args, + const sys::Path &Filename, bool wait, std::string &ErrMsg) { + if (wait) { + if (sys::Program::ExecuteAndWait(ExecPath, &args[0],0,0,0,0,&ErrMsg)) { + errs() << "Error: " << ErrMsg << "\n"; + return false; + } + Filename.eraseFromDisk(); + errs() << " done. \n"; + } + else { + sys::Program::ExecuteNoWait(ExecPath, &args[0],0,0,0,&ErrMsg); + errs() << "Remember to erase graph file: " << Filename.str() << "\n"; + } + return true; +} void llvm::DisplayGraph(const sys::Path &Filename, bool wait, GraphProgram::Name program) { + wait &= !ViewBackground; std::string ErrMsg; #if HAVE_GRAPHVIZ sys::Path Graphviz(LLVM_PATH_GRAPHVIZ); @@ -61,14 +83,10 @@ void llvm::DisplayGraph(const sys::Path &Filename, bool wait, args.push_back(Graphviz.c_str()); args.push_back(Filename.c_str()); args.push_back(0); - + errs() << "Running 'Graphviz' program... "; - if (sys::Program::ExecuteAndWait(Graphviz, &args[0],0,0,0,0,&ErrMsg)) { - errs() << "Error: " << ErrMsg << "\n"; + if (!ExecGraphViewer(Graphviz, args, Filename, wait, ErrMsg)) return; - } - Filename.eraseFromDisk(); - errs() << " done. \n"; #elif HAVE_XDOT_PY std::vector<const char*> args; @@ -83,17 +101,12 @@ void llvm::DisplayGraph(const sys::Path &Filename, bool wait, case GraphProgram::CIRCO: args.push_back("-f"); args.push_back("circo");break; default: errs() << "Unknown graph layout name; using default.\n"; } - + args.push_back(0); errs() << "Running 'xdot.py' program... "; - if (sys::Program::ExecuteAndWait(sys::Path(LLVM_PATH_XDOT_PY), - &args[0],0,0,0,0,&ErrMsg)) { - errs() << "Error: " << ErrMsg << "\n"; + if (!ExecGraphViewer(sys::Path(LLVM_PATH_XDOT_PY), args, Filename, wait, ErrMsg)) return; - } - Filename.eraseFromDisk(); - errs() << " done. \n"; #elif (HAVE_GV && (HAVE_DOT || HAVE_FDP || HAVE_NEATO || \ HAVE_TWOPI || HAVE_CIRCO)) @@ -150,14 +163,11 @@ void llvm::DisplayGraph(const sys::Path &Filename, bool wait, args.push_back("-o"); args.push_back(PSFilename.c_str()); args.push_back(0); - + errs() << "Running '" << prog.str() << "' program... "; - if (sys::Program::ExecuteAndWait(prog, &args[0], 0, 0, 0, 0, &ErrMsg)) { - errs() << "Error: " << ErrMsg << "\n"; + if (!ExecGraphViewer(prog, args, Filename, wait, ErrMsg)) return; - } - errs() << " done. \n"; sys::Path gv(LLVM_PATH_GV); args.clear(); @@ -165,19 +175,11 @@ void llvm::DisplayGraph(const sys::Path &Filename, bool wait, args.push_back(PSFilename.c_str()); args.push_back("--spartan"); args.push_back(0); - + ErrMsg.clear(); - if (wait) { - if (sys::Program::ExecuteAndWait(gv, &args[0],0,0,0,0,&ErrMsg)) - errs() << "Error: " << ErrMsg << "\n"; - Filename.eraseFromDisk(); - PSFilename.eraseFromDisk(); - } - else { - sys::Program::ExecuteNoWait(gv, &args[0],0,0,0,&ErrMsg); - errs() << "Remember to erase graph files: " << Filename.str() << " " - << PSFilename.str() << "\n"; - } + if (!ExecGraphViewer(gv, args, PSFilename, wait, ErrMsg)) + return; + #elif HAVE_DOTTY sys::Path dotty(LLVM_PATH_DOTTY); @@ -185,16 +187,13 @@ void llvm::DisplayGraph(const sys::Path &Filename, bool wait, args.push_back(dotty.c_str()); args.push_back(Filename.c_str()); args.push_back(0); - - errs() << "Running 'dotty' program... "; - if (sys::Program::ExecuteAndWait(dotty, &args[0],0,0,0,0,&ErrMsg)) { - errs() << "Error: " << ErrMsg << "\n"; - } else { + // Dotty spawns another app and doesn't wait until it returns #if defined (__MINGW32__) || defined (_WINDOWS) - return; + wait = false; #endif - Filename.eraseFromDisk(); - } + errs() << "Running 'dotty' program... "; + if (!ExecGraphViewer(dotty, args, Filename, wait, ErrMsg)) + return; #endif } diff --git a/lib/Support/MemoryBuffer.cpp b/lib/Support/MemoryBuffer.cpp index 4b15587..911a03f 100644 --- a/lib/Support/MemoryBuffer.cpp +++ b/lib/Support/MemoryBuffer.cpp @@ -336,7 +336,11 @@ error_code MemoryBuffer::getOpenFile(int FD, const char *Filename, // Error while reading. return error_code(errno, posix_category()); } - assert(NumRead != 0 && "fstat reported an invalid file size."); + if (NumRead == 0) { + assert(0 && "We got inaccurate FileSize value or fstat reported an " + "invalid file size."); + break; + } BytesLeft -= NumRead; BufPtr += NumRead; } diff --git a/lib/Support/SmallPtrSet.cpp b/lib/Support/SmallPtrSet.cpp index 997ce0b..68d9c29 100644 --- a/lib/Support/SmallPtrSet.cpp +++ b/lib/Support/SmallPtrSet.cpp @@ -14,6 +14,7 @@ #include "llvm/ADT/SmallPtrSet.h" #include "llvm/Support/MathExtras.h" +#include <algorithm> #include <cstdlib> using namespace llvm; @@ -223,6 +224,56 @@ void SmallPtrSetImpl::CopyFrom(const SmallPtrSetImpl &RHS) { NumTombstones = RHS.NumTombstones; } +void SmallPtrSetImpl::swap(SmallPtrSetImpl &RHS) { + if (this == &RHS) return; + + // We can only avoid copying elements if neither set is small. + if (!this->isSmall() && !RHS.isSmall()) { + std::swap(this->CurArray, RHS.CurArray); + std::swap(this->CurArraySize, RHS.CurArraySize); + std::swap(this->NumElements, RHS.NumElements); + std::swap(this->NumTombstones, RHS.NumTombstones); + return; + } + + // FIXME: From here on we assume that both sets have the same small size. + + // If only RHS is small, copy the small elements into LHS and move the pointer + // from LHS to RHS. + if (!this->isSmall() && RHS.isSmall()) { + std::copy(RHS.SmallArray, RHS.SmallArray+RHS.CurArraySize, + this->SmallArray); + std::swap(this->NumElements, RHS.NumElements); + std::swap(this->CurArraySize, RHS.CurArraySize); + RHS.CurArray = this->CurArray; + RHS.NumTombstones = this->NumTombstones; + this->CurArray = this->SmallArray; + this->NumTombstones = 0; + return; + } + + // If only LHS is small, copy the small elements into RHS and move the pointer + // from RHS to LHS. + if (this->isSmall() && !RHS.isSmall()) { + std::copy(this->SmallArray, this->SmallArray+this->CurArraySize, + RHS.SmallArray); + std::swap(RHS.NumElements, this->NumElements); + std::swap(RHS.CurArraySize, this->CurArraySize); + this->CurArray = RHS.CurArray; + this->NumTombstones = RHS.NumTombstones; + RHS.CurArray = RHS.SmallArray; + RHS.NumTombstones = 0; + return; + } + + // Both a small, just swap the small elements. + assert(this->isSmall() && RHS.isSmall()); + assert(this->CurArraySize == RHS.CurArraySize); + std::swap_ranges(this->SmallArray, this->SmallArray+this->CurArraySize, + RHS.SmallArray); + std::swap(this->NumElements, RHS.NumElements); +} + SmallPtrSetImpl::~SmallPtrSetImpl() { if (!isSmall()) free(CurArray); diff --git a/lib/Support/StringRef.cpp b/lib/Support/StringRef.cpp index 1c28bf8..abe570f 100644 --- a/lib/Support/StringRef.cpp +++ b/lib/Support/StringRef.cpp @@ -285,8 +285,8 @@ static unsigned GetAutoSenseRadix(StringRef &Str) { /// GetAsUnsignedInteger - Workhorse method that converts a integer character /// sequence of radix up to 36 to an unsigned long long value. -static bool GetAsUnsignedInteger(StringRef Str, unsigned Radix, - unsigned long long &Result) { +bool llvm::getAsUnsignedInteger(StringRef Str, unsigned Radix, + unsigned long long &Result) { // Autosense radix if not specified. if (Radix == 0) Radix = GetAutoSenseRadix(Str); @@ -326,17 +326,13 @@ static bool GetAsUnsignedInteger(StringRef Str, unsigned Radix, return false; } -bool StringRef::getAsInteger(unsigned Radix, unsigned long long &Result) const { - return GetAsUnsignedInteger(*this, Radix, Result); -} - - -bool StringRef::getAsInteger(unsigned Radix, long long &Result) const { +bool llvm::getAsSignedInteger(StringRef Str, unsigned Radix, + long long &Result) { unsigned long long ULLVal; // Handle positive strings first. - if (empty() || front() != '-') { - if (GetAsUnsignedInteger(*this, Radix, ULLVal) || + if (Str.empty() || Str.front() != '-') { + if (getAsUnsignedInteger(Str, Radix, ULLVal) || // Check for value so large it overflows a signed value. (long long)ULLVal < 0) return true; @@ -345,7 +341,7 @@ bool StringRef::getAsInteger(unsigned Radix, long long &Result) const { } // Get the positive part of the value. - if (GetAsUnsignedInteger(substr(1), Radix, ULLVal) || + if (getAsUnsignedInteger(Str.substr(1), Radix, ULLVal) || // Reject values so large they'd overflow as negative signed, but allow // "-0". This negates the unsigned so that the negative isn't undefined // on signed overflow. @@ -356,24 +352,6 @@ bool StringRef::getAsInteger(unsigned Radix, long long &Result) const { return false; } -bool StringRef::getAsInteger(unsigned Radix, int &Result) const { - long long Val; - if (getAsInteger(Radix, Val) || - (int)Val != Val) - return true; - Result = Val; - return false; -} - -bool StringRef::getAsInteger(unsigned Radix, unsigned &Result) const { - unsigned long long Val; - if (getAsInteger(Radix, Val) || - (unsigned)Val != Val) - return true; - Result = Val; - return false; -} - bool StringRef::getAsInteger(unsigned Radix, APInt &Result) const { StringRef Str = *this; diff --git a/lib/Support/Triple.cpp b/lib/Support/Triple.cpp index 94333a3..d261c53 100644 --- a/lib/Support/Triple.cpp +++ b/lib/Support/Triple.cpp @@ -29,6 +29,7 @@ const char *Triple::getArchTypeName(ArchType Kind) { case msp430: return "msp430"; case ppc64: return "powerpc64"; case ppc: return "powerpc"; + case r600: return "r600"; case sparc: return "sparc"; case sparcv9: return "sparcv9"; case tce: return "tce"; @@ -63,6 +64,8 @@ const char *Triple::getArchTypePrefix(ArchType Kind) { case hexagon: return "hexagon"; + case r600: return "r600"; + case sparcv9: case sparc: return "sparc"; @@ -145,6 +148,7 @@ Triple::ArchType Triple::getArchTypeForLLVMName(StringRef Name) { .Case("ppc32", ppc) .Case("ppc", ppc) .Case("mblaze", mblaze) + .Case("r600", r600) .Case("hexagon", hexagon) .Case("sparc", sparc) .Case("sparcv9", sparcv9) @@ -184,6 +188,7 @@ Triple::ArchType Triple::getArchTypeForDarwinArchName(StringRef Str) { // This is derived from the driver driver. .Cases("arm", "armv4t", "armv5", "armv6", Triple::arm) .Cases("armv7", "armv7f", "armv7k", "armv7s", "xscale", Triple::arm) + .Case("r600", Triple::r600) .Case("ptx32", Triple::ptx32) .Case("ptx64", Triple::ptx64) .Case("amdil", Triple::amdil) @@ -206,6 +211,7 @@ const char *Triple::getArchNameForAssembler() { .Cases("armv5", "armv5e", "thumbv5", "thumbv5e", "armv5") .Cases("armv6", "thumbv6", "armv6") .Cases("armv7", "thumbv7", "armv7") + .Case("r600", "r600") .Case("ptx32", "ptx32") .Case("ptx64", "ptx64") .Case("le32", "le32") @@ -234,6 +240,7 @@ static Triple::ArchType parseArch(StringRef ArchName) { .Cases("mipsel", "mipsallegrexel", Triple::mipsel) .Cases("mips64", "mips64eb", Triple::mips64) .Case("mips64el", Triple::mips64el) + .Case("r600", Triple::r600) .Case("hexagon", Triple::hexagon) .Case("sparc", Triple::sparc) .Case("sparcv9", Triple::sparcv9) @@ -641,6 +648,7 @@ static unsigned getArchPointerBitWidth(llvm::Triple::ArchType Arch) { case llvm::Triple::mipsel: case llvm::Triple::ppc: case llvm::Triple::ptx32: + case llvm::Triple::r600: case llvm::Triple::sparc: case llvm::Triple::tce: case llvm::Triple::thumb: @@ -689,6 +697,7 @@ Triple Triple::get32BitArchVariant() const { case Triple::mipsel: case Triple::ppc: case Triple::ptx32: + case Triple::r600: case Triple::sparc: case Triple::tce: case Triple::thumb: @@ -718,6 +727,7 @@ Triple Triple::get64BitArchVariant() const { case Triple::le32: case Triple::mblaze: case Triple::msp430: + case Triple::r600: case Triple::tce: case Triple::thumb: case Triple::xcore: diff --git a/lib/TableGen/Record.cpp b/lib/TableGen/Record.cpp index 12d1b1a..93eed24 100644 --- a/lib/TableGen/Record.cpp +++ b/lib/TableGen/Record.cpp @@ -1726,12 +1726,6 @@ void Record::setName(Init *NewName) { } // Otherwise this isn't yet registered. Name = NewName; checkName(); - // Since the Init for the name was changed, see if we can resolve - // any of it using members of the Record. - Init *ComputedName = Name->resolveReferences(*this, 0); - if (ComputedName != Name) { - setName(ComputedName); - } // DO NOT resolve record values to the name at this point because // there might be default values for arguments of this def. Those // arguments might not have been resolved yet so we don't want to @@ -1754,6 +1748,8 @@ void Record::setName(const std::string &Name) { /// references. void Record::resolveReferencesTo(const RecordVal *RV) { for (unsigned i = 0, e = Values.size(); i != e; ++i) { + if (RV == &Values[i]) // Skip resolve the same field as the given one + continue; if (Init *V = Values[i].getValue()) Values[i].setValue(V->resolveReferences(*this, RV)); } diff --git a/lib/Target/ARM/ARM.h b/lib/Target/ARM/ARM.h index acb57f7..2a1e8e4 100644 --- a/lib/Target/ARM/ARM.h +++ b/lib/Target/ARM/ARM.h @@ -18,9 +18,7 @@ #include "MCTargetDesc/ARMBaseInfo.h" #include "MCTargetDesc/ARMMCTargetDesc.h" #include "llvm/Support/DataTypes.h" -#include "llvm/Support/ErrorHandling.h" #include "llvm/Target/TargetMachine.h" -#include <cassert> namespace llvm { diff --git a/lib/Target/ARM/ARMAsmPrinter.cpp b/lib/Target/ARM/ARMAsmPrinter.cpp index 4ec19cc..ca30716 100644 --- a/lib/Target/ARM/ARMAsmPrinter.cpp +++ b/lib/Target/ARM/ARMAsmPrinter.cpp @@ -13,8 +13,8 @@ //===----------------------------------------------------------------------===// #define DEBUG_TYPE "asm-printer" -#include "ARM.h" #include "ARMAsmPrinter.h" +#include "ARM.h" #include "ARMBuildAttrs.h" #include "ARMBaseRegisterInfo.h" #include "ARMConstantPoolValue.h" diff --git a/lib/Target/ARM/ARMBaseInstrInfo.cpp b/lib/Target/ARM/ARMBaseInstrInfo.cpp index 75b796e..366e2fa 100644 --- a/lib/Target/ARM/ARMBaseInstrInfo.cpp +++ b/lib/Target/ARM/ARMBaseInstrInfo.cpp @@ -935,6 +935,8 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, MIB = AddDReg(MIB, DestReg, ARM::dsub_1, RegState::DefineNoRead, TRI); MIB = AddDReg(MIB, DestReg, ARM::dsub_2, RegState::DefineNoRead, TRI); MIB = AddDReg(MIB, DestReg, ARM::dsub_3, RegState::DefineNoRead, TRI); + if (TargetRegisterInfo::isPhysicalRegister(DestReg)) + MIB.addReg(DestReg, RegState::ImplicitDefine); } } else llvm_unreachable("Unknown reg class!"); @@ -953,6 +955,8 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, MIB = AddDReg(MIB, DestReg, ARM::dsub_5, RegState::DefineNoRead, TRI); MIB = AddDReg(MIB, DestReg, ARM::dsub_6, RegState::DefineNoRead, TRI); MIB = AddDReg(MIB, DestReg, ARM::dsub_7, RegState::DefineNoRead, TRI); + if (TargetRegisterInfo::isPhysicalRegister(DestReg)) + MIB.addReg(DestReg, RegState::ImplicitDefine); } else llvm_unreachable("Unknown reg class!"); break; @@ -2756,24 +2760,24 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData, case ARM::VLD4q8oddPseudo_UPD: case ARM::VLD4q16oddPseudo_UPD: case ARM::VLD4q32oddPseudo_UPD: - case ARM::VLD1DUPq8Pseudo: - case ARM::VLD1DUPq16Pseudo: - case ARM::VLD1DUPq32Pseudo: - case ARM::VLD1DUPq8PseudoWB_fixed: - case ARM::VLD1DUPq16PseudoWB_fixed: - case ARM::VLD1DUPq32PseudoWB_fixed: - case ARM::VLD1DUPq8PseudoWB_register: - case ARM::VLD1DUPq16PseudoWB_register: - case ARM::VLD1DUPq32PseudoWB_register: - case ARM::VLD2DUPd8Pseudo: - case ARM::VLD2DUPd16Pseudo: - case ARM::VLD2DUPd32Pseudo: - case ARM::VLD2DUPd8PseudoWB_fixed: - case ARM::VLD2DUPd16PseudoWB_fixed: - case ARM::VLD2DUPd32PseudoWB_fixed: - case ARM::VLD2DUPd8PseudoWB_register: - case ARM::VLD2DUPd16PseudoWB_register: - case ARM::VLD2DUPd32PseudoWB_register: + case ARM::VLD1DUPq8: + case ARM::VLD1DUPq16: + case ARM::VLD1DUPq32: + case ARM::VLD1DUPq8wb_fixed: + case ARM::VLD1DUPq16wb_fixed: + case ARM::VLD1DUPq32wb_fixed: + case ARM::VLD1DUPq8wb_register: + case ARM::VLD1DUPq16wb_register: + case ARM::VLD1DUPq32wb_register: + case ARM::VLD2DUPd8: + case ARM::VLD2DUPd16: + case ARM::VLD2DUPd32: + case ARM::VLD2DUPd8wb_fixed: + case ARM::VLD2DUPd16wb_fixed: + case ARM::VLD2DUPd32wb_fixed: + case ARM::VLD2DUPd8wb_register: + case ARM::VLD2DUPd16wb_register: + case ARM::VLD2DUPd32wb_register: case ARM::VLD4DUPd8Pseudo: case ARM::VLD4DUPd16Pseudo: case ARM::VLD4DUPd32Pseudo: diff --git a/lib/Target/ARM/ARMBaseRegisterInfo.cpp b/lib/Target/ARM/ARMBaseRegisterInfo.cpp index d2aff9a..291369f 100644 --- a/lib/Target/ARM/ARMBaseRegisterInfo.cpp +++ b/lib/Target/ARM/ARMBaseRegisterInfo.cpp @@ -11,9 +11,9 @@ // //===----------------------------------------------------------------------===// +#include "ARMBaseRegisterInfo.h" #include "ARM.h" #include "ARMBaseInstrInfo.h" -#include "ARMBaseRegisterInfo.h" #include "ARMFrameLowering.h" #include "ARMInstrInfo.h" #include "ARMMachineFunctionInfo.h" @@ -79,6 +79,7 @@ getReservedRegs(const MachineFunction &MF) const { BitVector Reserved(getNumRegs()); Reserved.set(ARM::SP); Reserved.set(ARM::PC); + Reserved.set(ARM::FPSCR); if (TFI->hasFP(MF)) Reserved.set(FramePtr); if (hasBasePointer(MF)) @@ -492,8 +493,7 @@ bool ARMBaseRegisterInfo::hasBasePointer(const MachineFunction &MF) const { // When outgoing call frames are so large that we adjust the stack pointer // around the call, we can no longer use the stack pointer to reach the // emergency spill slot. - if (needsStackRealignment(MF) && (MFI->hasVarSizedObjects() || - !TFI->hasReservedCallFrame(MF))) + if (needsStackRealignment(MF) && !TFI->hasReservedCallFrame(MF)) return true; // Thumb has trouble with negative offsets from the FP. Thumb2 has a limited @@ -517,7 +517,6 @@ bool ARMBaseRegisterInfo::hasBasePointer(const MachineFunction &MF) const { } bool ARMBaseRegisterInfo::canRealignStack(const MachineFunction &MF) const { - const MachineFrameInfo *MFI = MF.getFrameInfo(); const MachineRegisterInfo *MRI = &MF.getRegInfo(); const ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); // We can't realign the stack if: @@ -532,8 +531,9 @@ bool ARMBaseRegisterInfo::canRealignStack(const MachineFunction &MF) const { // register allocation with frame pointer elimination, it is too late now. if (!MRI->canReserveReg(FramePtr)) return false; - // We may also need a base pointer if there are dynamic allocas. - if (!MFI->hasVarSizedObjects()) + // We may also need a base pointer if there are dynamic allocas or stack + // pointer adjustments around calls. + if (MF.getTarget().getFrameLowering()->hasReservedCallFrame(MF)) return true; if (!EnableBasePointer) return false; diff --git a/lib/Target/ARM/ARMCallingConv.h b/lib/Target/ARM/ARMCallingConv.h index 437b4c7..2b9c55d 100644 --- a/lib/Target/ARM/ARMCallingConv.h +++ b/lib/Target/ARM/ARMCallingConv.h @@ -15,13 +15,13 @@ #ifndef ARMCALLINGCONV_H #define ARMCALLINGCONV_H -#include "llvm/CallingConv.h" -#include "llvm/CodeGen/CallingConvLower.h" -#include "llvm/Target/TargetInstrInfo.h" +#include "ARM.h" #include "ARMBaseInstrInfo.h" #include "ARMRegisterInfo.h" #include "ARMSubtarget.h" -#include "ARM.h" +#include "llvm/CallingConv.h" +#include "llvm/CodeGen/CallingConvLower.h" +#include "llvm/Target/TargetInstrInfo.h" namespace llvm { @@ -29,7 +29,7 @@ namespace llvm { static bool f64AssignAPCS(unsigned &ValNo, MVT &ValVT, MVT &LocVT, CCValAssign::LocInfo &LocInfo, CCState &State, bool CanFail) { - static const unsigned RegList[] = { ARM::R0, ARM::R1, ARM::R2, ARM::R3 }; + static const uint16_t RegList[] = { ARM::R0, ARM::R1, ARM::R2, ARM::R3 }; // Try to get the first register. if (unsigned Reg = State.AllocateReg(RegList, 4)) @@ -72,9 +72,9 @@ static bool CC_ARM_APCS_Custom_f64(unsigned &ValNo, MVT &ValVT, MVT &LocVT, static bool f64AssignAAPCS(unsigned &ValNo, MVT &ValVT, MVT &LocVT, CCValAssign::LocInfo &LocInfo, CCState &State, bool CanFail) { - static const unsigned HiRegList[] = { ARM::R0, ARM::R2 }; - static const unsigned LoRegList[] = { ARM::R1, ARM::R3 }; - static const unsigned ShadowRegList[] = { ARM::R0, ARM::R1 }; + static const uint16_t HiRegList[] = { ARM::R0, ARM::R2 }; + static const uint16_t LoRegList[] = { ARM::R1, ARM::R3 }; + static const uint16_t ShadowRegList[] = { ARM::R0, ARM::R1 }; unsigned Reg = State.AllocateReg(HiRegList, ShadowRegList, 2); if (Reg == 0) { @@ -118,8 +118,8 @@ static bool CC_ARM_AAPCS_Custom_f64(unsigned &ValNo, MVT &ValVT, MVT &LocVT, static bool f64RetAssign(unsigned &ValNo, MVT &ValVT, MVT &LocVT, CCValAssign::LocInfo &LocInfo, CCState &State) { - static const unsigned HiRegList[] = { ARM::R0, ARM::R2 }; - static const unsigned LoRegList[] = { ARM::R1, ARM::R3 }; + static const uint16_t HiRegList[] = { ARM::R0, ARM::R2 }; + static const uint16_t LoRegList[] = { ARM::R1, ARM::R3 }; unsigned Reg = State.AllocateReg(HiRegList, LoRegList, 2); if (Reg == 0) diff --git a/lib/Target/ARM/ARMExpandPseudoInsts.cpp b/lib/Target/ARM/ARMExpandPseudoInsts.cpp index c4ab99d..c2b7816 100644 --- a/lib/Target/ARM/ARMExpandPseudoInsts.cpp +++ b/lib/Target/ARM/ARMExpandPseudoInsts.cpp @@ -99,8 +99,8 @@ namespace { // Entries for NEON load/store information table. The table is sorted by // PseudoOpc for fast binary-search lookups. struct NEONLdStTableEntry { - unsigned PseudoOpc; - unsigned RealOpc; + uint16_t PseudoOpc; + uint16_t RealOpc; bool IsLoad; bool isUpdating; bool hasWritebackOperand; @@ -129,16 +129,6 @@ namespace { } static const NEONLdStTableEntry NEONLdStTable[] = { -{ ARM::VLD1DUPq16Pseudo, ARM::VLD1DUPq16, true, false, false, SingleSpc, 2, 4,false}, -{ ARM::VLD1DUPq16PseudoWB_fixed, ARM::VLD1DUPq16wb_fixed, true, true, true, SingleSpc, 2, 4,false}, -{ ARM::VLD1DUPq16PseudoWB_register, ARM::VLD1DUPq16wb_register, true, true, true, SingleSpc, 2, 4,false}, -{ ARM::VLD1DUPq32Pseudo, ARM::VLD1DUPq32, true, false, false, SingleSpc, 2, 2,false}, -{ ARM::VLD1DUPq32PseudoWB_fixed, ARM::VLD1DUPq32wb_fixed, true, true, false, SingleSpc, 2, 2,false}, -{ ARM::VLD1DUPq32PseudoWB_register, ARM::VLD1DUPq32wb_register, true, true, true, SingleSpc, 2, 2,false}, -{ ARM::VLD1DUPq8Pseudo, ARM::VLD1DUPq8, true, false, false, SingleSpc, 2, 8,false}, -{ ARM::VLD1DUPq8PseudoWB_fixed, ARM::VLD1DUPq8wb_fixed, true, true, false, SingleSpc, 2, 8,false}, -{ ARM::VLD1DUPq8PseudoWB_register, ARM::VLD1DUPq8wb_register, true, true, true, SingleSpc, 2, 8,false}, - { ARM::VLD1LNq16Pseudo, ARM::VLD1LNd16, true, false, false, EvenDblSpc, 1, 4 ,true}, { ARM::VLD1LNq16Pseudo_UPD, ARM::VLD1LNd16_UPD, true, true, true, EvenDblSpc, 1, 4 ,true}, { ARM::VLD1LNq32Pseudo, ARM::VLD1LNd32, true, false, false, EvenDblSpc, 1, 2 ,true}, @@ -149,16 +139,6 @@ static const NEONLdStTableEntry NEONLdStTable[] = { { ARM::VLD1d64QPseudo, ARM::VLD1d64Q, true, false, false, SingleSpc, 4, 1 ,false}, { ARM::VLD1d64TPseudo, ARM::VLD1d64T, true, false, false, SingleSpc, 3, 1 ,false}, -{ ARM::VLD2DUPd16Pseudo, ARM::VLD2DUPd16, true, false, false, SingleSpc, 2, 4,false}, -{ ARM::VLD2DUPd16PseudoWB_fixed, ARM::VLD2DUPd16wb_fixed, true, true, false, SingleSpc, 2, 4,false}, -{ ARM::VLD2DUPd16PseudoWB_register, ARM::VLD2DUPd16wb_register, true, true, true, SingleSpc, 2, 4,false}, -{ ARM::VLD2DUPd32Pseudo, ARM::VLD2DUPd32, true, false, false, SingleSpc, 2, 2,false}, -{ ARM::VLD2DUPd32PseudoWB_fixed, ARM::VLD2DUPd32wb_fixed, true, true, false, SingleSpc, 2, 2,false}, -{ ARM::VLD2DUPd32PseudoWB_register, ARM::VLD2DUPd32wb_register, true, true, true, SingleSpc, 2, 2,false}, -{ ARM::VLD2DUPd8Pseudo, ARM::VLD2DUPd8, true, false, false, SingleSpc, 2, 8,false}, -{ ARM::VLD2DUPd8PseudoWB_fixed, ARM::VLD2DUPd8wb_fixed, true, true, false, SingleSpc, 2, 8,false}, -{ ARM::VLD2DUPd8PseudoWB_register, ARM::VLD2DUPd8wb_register, true, true, true, SingleSpc, 2, 8,false}, - { ARM::VLD2LNd16Pseudo, ARM::VLD2LNd16, true, false, false, SingleSpc, 2, 4 ,true}, { ARM::VLD2LNd16Pseudo_UPD, ARM::VLD2LNd16_UPD, true, true, true, SingleSpc, 2, 4 ,true}, { ARM::VLD2LNd32Pseudo, ARM::VLD2LNd32, true, false, false, SingleSpc, 2, 2 ,true}, @@ -345,7 +325,7 @@ static const NEONLdStTableEntry NEONLdStTable[] = { /// LookupNEONLdSt - Search the NEONLdStTable for information about a NEON /// load or store pseudo instruction. static const NEONLdStTableEntry *LookupNEONLdSt(unsigned Opcode) { - unsigned NumEntries = array_lengthof(NEONLdStTable); + const unsigned NumEntries = array_lengthof(NEONLdStTable); #ifndef NDEBUG // Make sure the table is sorted. @@ -1090,24 +1070,6 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB, case ARM::VLD4q8oddPseudo_UPD: case ARM::VLD4q16oddPseudo_UPD: case ARM::VLD4q32oddPseudo_UPD: - case ARM::VLD1DUPq8Pseudo: - case ARM::VLD1DUPq16Pseudo: - case ARM::VLD1DUPq32Pseudo: - case ARM::VLD1DUPq8PseudoWB_fixed: - case ARM::VLD1DUPq16PseudoWB_fixed: - case ARM::VLD1DUPq32PseudoWB_fixed: - case ARM::VLD1DUPq8PseudoWB_register: - case ARM::VLD1DUPq16PseudoWB_register: - case ARM::VLD1DUPq32PseudoWB_register: - case ARM::VLD2DUPd8Pseudo: - case ARM::VLD2DUPd16Pseudo: - case ARM::VLD2DUPd32Pseudo: - case ARM::VLD2DUPd8PseudoWB_fixed: - case ARM::VLD2DUPd16PseudoWB_fixed: - case ARM::VLD2DUPd32PseudoWB_fixed: - case ARM::VLD2DUPd8PseudoWB_register: - case ARM::VLD2DUPd16PseudoWB_register: - case ARM::VLD2DUPd32PseudoWB_register: case ARM::VLD3DUPd8Pseudo: case ARM::VLD3DUPd16Pseudo: case ARM::VLD3DUPd32Pseudo: diff --git a/lib/Target/ARM/ARMFastISel.cpp b/lib/Target/ARM/ARMFastISel.cpp index 818b202..a24eab4 100644 --- a/lib/Target/ARM/ARMFastISel.cpp +++ b/lib/Target/ARM/ARMFastISel.cpp @@ -1384,7 +1384,10 @@ bool ARMFastISel::ARMEmitCmp(const Value *Src1Value, const Value *Src2Value, SrcVT == MVT::i1) { const APInt &CIVal = ConstInt->getValue(); Imm = (isZExt) ? (int)CIVal.getZExtValue() : (int)CIVal.getSExtValue(); - if (Imm < 0) { + // For INT_MIN/LONG_MIN (i.e., 0x80000000) we need to use a cmp, rather + // then a cmn, because there is no way to represent 2147483648 as a + // signed 32-bit int. + if (Imm < 0 && Imm != (int)0x80000000) { isNegativeImm = true; Imm = -Imm; } @@ -1475,7 +1478,6 @@ bool ARMFastISel::ARMEmitCmp(const Value *Src1Value, const Value *Src2Value, bool ARMFastISel::SelectCmp(const Instruction *I) { const CmpInst *CI = cast<CmpInst>(I); - Type *Ty = CI->getOperand(0)->getType(); // Get the compare predicate. ARMCC::CondCodes ARMPred = getComparePred(CI->getPredicate()); @@ -1495,11 +1497,10 @@ bool ARMFastISel::SelectCmp(const Instruction *I) { unsigned DestReg = createResultReg(RC); Constant *Zero = ConstantInt::get(Type::getInt32Ty(*Context), 0); unsigned ZeroReg = TargetMaterializeConstant(Zero); - bool isFloat = (Ty->isFloatTy() || Ty->isDoubleTy()); - unsigned CondReg = isFloat ? ARM::FPSCR : ARM::CPSR; + // ARMEmitCmp emits a FMSTAT when necessary, so it's always safe to use CPSR. BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(MovCCOpc), DestReg) .addReg(ZeroReg).addImm(1) - .addImm(ARMPred).addReg(CondReg); + .addImm(ARMPred).addReg(ARM::CPSR); UpdateValueMap(I, DestReg); return true; @@ -1851,6 +1852,48 @@ bool ARMFastISel::ProcessCallArgs(SmallVectorImpl<Value*> &Args, CCState CCInfo(CC, false, *FuncInfo.MF, TM, ArgLocs, *Context); CCInfo.AnalyzeCallOperands(ArgVTs, ArgFlags, CCAssignFnForCall(CC, false)); + // Check that we can handle all of the arguments. If we can't, then bail out + // now before we add code to the MBB. + for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { + CCValAssign &VA = ArgLocs[i]; + MVT ArgVT = ArgVTs[VA.getValNo()]; + + // We don't handle NEON/vector parameters yet. + if (ArgVT.isVector() || ArgVT.getSizeInBits() > 64) + return false; + + // Now copy/store arg to correct locations. + if (VA.isRegLoc() && !VA.needsCustom()) { + continue; + } else if (VA.needsCustom()) { + // TODO: We need custom lowering for vector (v2f64) args. + if (VA.getLocVT() != MVT::f64 || + // TODO: Only handle register args for now. + !VA.isRegLoc() || !ArgLocs[++i].isRegLoc()) + return false; + } else { + switch (static_cast<EVT>(ArgVT).getSimpleVT().SimpleTy) { + default: + return false; + case MVT::i1: + case MVT::i8: + case MVT::i16: + case MVT::i32: + break; + case MVT::f32: + if (!Subtarget->hasVFP2()) + return false; + break; + case MVT::f64: + if (!Subtarget->hasVFP2()) + return false; + break; + } + } + } + + // At the point, we are able to handle the call's arguments in fast isel. + // Get a count of how many bytes are to be pushed on the stack. NumBytes = CCInfo.getNextStackOffset(); @@ -1866,9 +1909,8 @@ bool ARMFastISel::ProcessCallArgs(SmallVectorImpl<Value*> &Args, unsigned Arg = ArgRegs[VA.getValNo()]; MVT ArgVT = ArgVTs[VA.getValNo()]; - // We don't handle NEON/vector parameters yet. - if (ArgVT.isVector() || ArgVT.getSizeInBits() > 64) - return false; + assert((!ArgVT.isVector() && ArgVT.getSizeInBits() <= 64) && + "We don't handle NEON/vector parameters yet."); // Handle arg promotion, etc. switch (VA.getLocInfo()) { @@ -1908,12 +1950,13 @@ bool ARMFastISel::ProcessCallArgs(SmallVectorImpl<Value*> &Args, RegArgs.push_back(VA.getLocReg()); } else if (VA.needsCustom()) { // TODO: We need custom lowering for vector (v2f64) args. - if (VA.getLocVT() != MVT::f64) return false; + assert(VA.getLocVT() == MVT::f64 && + "Custom lowering for v2f64 args not available"); CCValAssign &NextVA = ArgLocs[++i]; - // TODO: Only handle register args for now. - if(!(VA.isRegLoc() && NextVA.isRegLoc())) return false; + assert(VA.isRegLoc() && NextVA.isRegLoc() && + "We only handle register args!"); AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(ARM::VMOVRRD), VA.getLocReg()) @@ -1929,9 +1972,11 @@ bool ARMFastISel::ProcessCallArgs(SmallVectorImpl<Value*> &Args, Addr.Base.Reg = ARM::SP; Addr.Offset = VA.getLocMemOffset(); - if (!ARMEmitStore(ArgVT, Arg, Addr)) return false; + bool EmitRet = ARMEmitStore(ArgVT, Arg, Addr); (void)EmitRet; + assert(EmitRet && "Could not emit a store for argument!"); } } + return true; } @@ -2136,7 +2181,7 @@ bool ARMFastISel::ARMEmitLibcall(const Instruction *I, RTLIB::Libcall Call) { // TODO: Turn this into the table of arm call ops. MachineInstrBuilder MIB; unsigned CallOpc = ARMSelectCallOp(NULL); - if(isThumb2) + if (isThumb2) // Explicitly adding the predicate here. MIB = AddDefaultPred(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(CallOpc))) diff --git a/lib/Target/ARM/ARMFrameLowering.cpp b/lib/Target/ARM/ARMFrameLowering.cpp index 0fd6025..bd4b2a9 100644 --- a/lib/Target/ARM/ARMFrameLowering.cpp +++ b/lib/Target/ARM/ARMFrameLowering.cpp @@ -501,7 +501,7 @@ ARMFrameLowering::ResolveFrameIndexReference(const MachineFunction &MF, // SP can move around if there are allocas. We may also lose track of SP // when emergency spilling inside a non-reserved call frame setup. - bool hasMovingSP = MFI->hasVarSizedObjects() || !hasReservedCallFrame(MF); + bool hasMovingSP = !hasReservedCallFrame(MF); // When dynamically realigning the stack, use the frame pointer for // parameters, and the stack/base pointer for locals. diff --git a/lib/Target/ARM/ARMISelDAGToDAG.cpp b/lib/Target/ARM/ARMISelDAGToDAG.cpp index c99db98..ffb9acb 100644 --- a/lib/Target/ARM/ARMISelDAGToDAG.cpp +++ b/lib/Target/ARM/ARMISelDAGToDAG.cpp @@ -1589,9 +1589,9 @@ static unsigned getVLDSTRegisterUpdateOpcode(unsigned Opc) { case ARM::VST2q16PseudoWB_fixed: return ARM::VST2q16PseudoWB_register; case ARM::VST2q32PseudoWB_fixed: return ARM::VST2q32PseudoWB_register; - case ARM::VLD2DUPd8PseudoWB_fixed: return ARM::VLD2DUPd8PseudoWB_register; - case ARM::VLD2DUPd16PseudoWB_fixed: return ARM::VLD2DUPd16PseudoWB_register; - case ARM::VLD2DUPd32PseudoWB_fixed: return ARM::VLD2DUPd32PseudoWB_register; + case ARM::VLD2DUPd8wb_fixed: return ARM::VLD2DUPd8wb_register; + case ARM::VLD2DUPd16wb_fixed: return ARM::VLD2DUPd16wb_register; + case ARM::VLD2DUPd32wb_fixed: return ARM::VLD2DUPd32wb_register; } return Opc; // If not one we handle, return it unchanged. } @@ -2891,8 +2891,8 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { } case ARMISD::VLD2DUP: { - unsigned Opcodes[] = { ARM::VLD2DUPd8Pseudo, ARM::VLD2DUPd16Pseudo, - ARM::VLD2DUPd32Pseudo }; + unsigned Opcodes[] = { ARM::VLD2DUPd8, ARM::VLD2DUPd16, + ARM::VLD2DUPd32 }; return SelectVLDDup(N, false, 2, Opcodes); } @@ -2909,9 +2909,8 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { } case ARMISD::VLD2DUP_UPD: { - unsigned Opcodes[] = { ARM::VLD2DUPd8PseudoWB_fixed, - ARM::VLD2DUPd16PseudoWB_fixed, - ARM::VLD2DUPd32PseudoWB_fixed }; + unsigned Opcodes[] = { ARM::VLD2DUPd8wb_fixed, ARM::VLD2DUPd16wb_fixed, + ARM::VLD2DUPd32wb_fixed }; return SelectVLDDup(N, true, 2, Opcodes); } diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp index 477b5f4..e26dd22 100644 --- a/lib/Target/ARM/ARMISelLowering.cpp +++ b/lib/Target/ARM/ARMISelLowering.cpp @@ -13,10 +13,10 @@ //===----------------------------------------------------------------------===// #define DEBUG_TYPE "arm-isel" +#include "ARMISelLowering.h" #include "ARM.h" #include "ARMCallingConv.h" #include "ARMConstantPoolValue.h" -#include "ARMISelLowering.h" #include "ARMMachineFunctionInfo.h" #include "ARMPerfectShuffle.h" #include "ARMRegisterInfo.h" @@ -49,7 +49,6 @@ #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" -#include <sstream> using namespace llvm; STATISTIC(NumTailCalls, "Number of tail calls"); @@ -87,7 +86,7 @@ namespace { } // The APCS parameter registers. -static const unsigned GPRArgRegs[] = { +static const uint16_t GPRArgRegs[] = { ARM::R0, ARM::R1, ARM::R2, ARM::R3 }; @@ -456,6 +455,8 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) setLoadExtAction(ISD::EXTLOAD, (MVT::SimpleValueType)VT, Expand); } + setOperationAction(ISD::ConstantFP, MVT::f32, Custom); + if (Subtarget->hasNEON()) { addDRTypeForNEON(MVT::v2f32); addDRTypeForNEON(MVT::v8i8); @@ -3673,6 +3674,27 @@ static SDValue LowerVSETCC(SDValue Op, SelectionDAG &DAG) { return Result; } +SDValue ARMTargetLowering::LowerConstantFP(SDValue Op, SelectionDAG &DAG, + const ARMSubtarget *ST) const { + if (!ST->useNEONForSinglePrecisionFP() || !ST->hasVFP3() || ST->hasD16()) + return SDValue(); + + ConstantFPSDNode *CFP = cast<ConstantFPSDNode>(Op); + assert(Op.getValueType() == MVT::f32 && + "ConstantFP custom lowering should only occur for f32."); + + APFloat FPVal = CFP->getValueAPF(); + int ImmVal = ARM_AM::getFP32Imm(FPVal); + if (ImmVal == -1) + return SDValue(); + + DebugLoc DL = Op.getDebugLoc(); + SDValue NewVal = DAG.getTargetConstant(ImmVal, MVT::i32); + SDValue VecConstant = DAG.getNode(ARMISD::VMOVFPIMM, DL, MVT::v2f32, NewVal); + return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, VecConstant, + DAG.getConstant(0, MVT::i32)); +} + /// isNEONModifiedImm - Check if the specified splat value corresponds to a /// valid vector constant for a NEON instruction with a "modified immediate" /// operand (e.g., VMOV). If so, return the encoded value. @@ -5109,6 +5131,7 @@ SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { case ISD::SRA_PARTS: return LowerShiftRightParts(Op, DAG); case ISD::CTTZ: return LowerCTTZ(Op.getNode(), DAG, Subtarget); case ISD::SETCC: return LowerVSETCC(Op, DAG); + case ISD::ConstantFP: return LowerConstantFP(Op, DAG, Subtarget); case ISD::BUILD_VECTOR: return LowerBUILD_VECTOR(Op, DAG, Subtarget); case ISD::VECTOR_SHUFFLE: return LowerVECTOR_SHUFFLE(Op, DAG); case ISD::INSERT_VECTOR_ELT: return LowerINSERT_VECTOR_ELT(Op, DAG); @@ -6842,33 +6865,63 @@ static SDValue PerformMULCombine(SDNode *N, if (!C) return SDValue(); - uint64_t MulAmt = C->getZExtValue(); + int64_t MulAmt = C->getSExtValue(); unsigned ShiftAmt = CountTrailingZeros_64(MulAmt); + ShiftAmt = ShiftAmt & (32 - 1); SDValue V = N->getOperand(0); DebugLoc DL = N->getDebugLoc(); SDValue Res; MulAmt >>= ShiftAmt; - if (isPowerOf2_32(MulAmt - 1)) { - // (mul x, 2^N + 1) => (add (shl x, N), x) - Res = DAG.getNode(ISD::ADD, DL, VT, - V, DAG.getNode(ISD::SHL, DL, VT, - V, DAG.getConstant(Log2_32(MulAmt-1), - MVT::i32))); - } else if (isPowerOf2_32(MulAmt + 1)) { - // (mul x, 2^N - 1) => (sub (shl x, N), x) - Res = DAG.getNode(ISD::SUB, DL, VT, - DAG.getNode(ISD::SHL, DL, VT, - V, DAG.getConstant(Log2_32(MulAmt+1), - MVT::i32)), - V); - } else - return SDValue(); + + if (MulAmt >= 0) { + if (isPowerOf2_32(MulAmt - 1)) { + // (mul x, 2^N + 1) => (add (shl x, N), x) + Res = DAG.getNode(ISD::ADD, DL, VT, + V, + DAG.getNode(ISD::SHL, DL, VT, + V, + DAG.getConstant(Log2_32(MulAmt - 1), + MVT::i32))); + } else if (isPowerOf2_32(MulAmt + 1)) { + // (mul x, 2^N - 1) => (sub (shl x, N), x) + Res = DAG.getNode(ISD::SUB, DL, VT, + DAG.getNode(ISD::SHL, DL, VT, + V, + DAG.getConstant(Log2_32(MulAmt + 1), + MVT::i32)), + V); + } else + return SDValue(); + } else { + uint64_t MulAmtAbs = -MulAmt; + if (isPowerOf2_32(MulAmtAbs + 1)) { + // (mul x, -(2^N - 1)) => (sub x, (shl x, N)) + Res = DAG.getNode(ISD::SUB, DL, VT, + V, + DAG.getNode(ISD::SHL, DL, VT, + V, + DAG.getConstant(Log2_32(MulAmtAbs + 1), + MVT::i32))); + } else if (isPowerOf2_32(MulAmtAbs - 1)) { + // (mul x, -(2^N + 1)) => - (add (shl x, N), x) + Res = DAG.getNode(ISD::ADD, DL, VT, + V, + DAG.getNode(ISD::SHL, DL, VT, + V, + DAG.getConstant(Log2_32(MulAmtAbs-1), + MVT::i32))); + Res = DAG.getNode(ISD::SUB, DL, VT, + DAG.getConstant(0, MVT::i32),Res); + + } else + return SDValue(); + } if (ShiftAmt != 0) - Res = DAG.getNode(ISD::SHL, DL, VT, Res, - DAG.getConstant(ShiftAmt, MVT::i32)); + Res = DAG.getNode(ISD::SHL, DL, VT, + Res, DAG.getConstant(ShiftAmt, MVT::i32)); // Do not add new nodes to DAG combiner worklist. DCI.CombineTo(N, Res, false); diff --git a/lib/Target/ARM/ARMISelLowering.h b/lib/Target/ARM/ARMISelLowering.h index 7f12293..a71b74e 100644 --- a/lib/Target/ARM/ARMISelLowering.h +++ b/lib/Target/ARM/ARMISelLowering.h @@ -15,6 +15,7 @@ #ifndef ARMISELLOWERING_H #define ARMISELLOWERING_H +#include "ARM.h" #include "ARMSubtarget.h" #include "llvm/Target/TargetLowering.h" #include "llvm/Target/TargetRegisterInfo.h" @@ -434,6 +435,8 @@ namespace llvm { SDValue LowerShiftRightParts(SDValue Op, SelectionDAG &DAG) const; SDValue LowerShiftLeftParts(SDValue Op, SelectionDAG &DAG) const; SDValue LowerFLT_ROUNDS_(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerConstantFP(SDValue Op, SelectionDAG &DAG, + const ARMSubtarget *ST) const; SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *ST) const; diff --git a/lib/Target/ARM/ARMInstrInfo.h b/lib/Target/ARM/ARMInstrInfo.h index 7bedf30..72af535 100644 --- a/lib/Target/ARM/ARMInstrInfo.h +++ b/lib/Target/ARM/ARMInstrInfo.h @@ -14,11 +14,11 @@ #ifndef ARMINSTRUCTIONINFO_H #define ARMINSTRUCTIONINFO_H -#include "llvm/Target/TargetInstrInfo.h" +#include "ARM.h" #include "ARMBaseInstrInfo.h" #include "ARMRegisterInfo.h" #include "ARMSubtarget.h" -#include "ARM.h" +#include "llvm/Target/TargetInstrInfo.h" namespace llvm { class ARMSubtarget; diff --git a/lib/Target/ARM/ARMInstrInfo.td b/lib/Target/ARM/ARMInstrInfo.td index 0b1406e..8196582 100644 --- a/lib/Target/ARM/ARMInstrInfo.td +++ b/lib/Target/ARM/ARMInstrInfo.td @@ -637,6 +637,7 @@ def BitfieldAsmOperand : AsmOperandClass { let Name = "Bitfield"; let ParserMethod = "parseBitfield"; } + def bf_inv_mask_imm : Operand<i32>, PatLeaf<(imm), [{ return ARM::isBitFieldInvertedMask(N->getZExtValue()); @@ -4084,74 +4085,43 @@ def MVNCCi : ARMPseudoInst<(outs GPR:$Rd), [/*(set GPR:$Rd, (ARMcmov GPR:$false, so_imm_not:$imm, imm:$cc, CCR:$ccr))*/]>, RegConstraint<"$false = $Rd">; -let isCodeGenOnly = 1 in { // Conditional instructions -multiclass AsI1_bincc_irs<bits<4> opcod, string opc, - InstrItinClass iii, InstrItinClass iir, InstrItinClass iis> { - def ri : AsI1<opcod, (outs GPR:$Rd), (ins GPR:$Rn, so_imm:$imm), DPFrm, - iii, opc, "\t$Rd, $Rn, $imm", []>, - RegConstraint<"$Rn = $Rd"> { - bits<4> Rd; - bits<4> Rn; - bits<12> imm; - let Inst{25} = 1; - let Inst{19-16} = Rn; - let Inst{15-12} = Rd; - let Inst{11-0} = imm; - } - def rr : AsI1<opcod, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm), DPFrm, - iir, opc, "\t$Rd, $Rn, $Rm", []>, - RegConstraint<"$Rn = $Rd"> { - bits<4> Rd; - bits<4> Rn; - bits<4> Rm; - let Inst{25} = 0; - let Inst{19-16} = Rn; - let Inst{15-12} = Rd; - let Inst{11-4} = 0b00000000; - let Inst{3-0} = Rm; - } - - def rsi : AsI1<opcod, (outs GPR:$Rd), - (ins GPR:$Rn, so_reg_imm:$shift), DPSoRegImmFrm, - iis, opc, "\t$Rd, $Rn, $shift", []>, - RegConstraint<"$Rn = $Rd"> { - bits<4> Rd; - bits<4> Rn; - bits<12> shift; - let Inst{25} = 0; - let Inst{19-16} = Rn; - let Inst{15-12} = Rd; - let Inst{11-5} = shift{11-5}; - let Inst{4} = 0; - let Inst{3-0} = shift{3-0}; - } - - def rsr : AsI1<opcod, (outs GPR:$Rd), - (ins GPR:$Rn, so_reg_reg:$shift), DPSoRegRegFrm, - iis, opc, "\t$Rd, $Rn, $shift", []>, - RegConstraint<"$Rn = $Rd"> { - bits<4> Rd; - bits<4> Rn; - bits<12> shift; - let Inst{25} = 0; - let Inst{19-16} = Rn; - let Inst{15-12} = Rd; - let Inst{11-8} = shift{11-8}; - let Inst{7} = 0; - let Inst{6-5} = shift{6-5}; - let Inst{4} = 1; - let Inst{3-0} = shift{3-0}; - } -} // AsI1_bincc_irs - -defm ANDCC : AsI1_bincc_irs<0b0000, "and", IIC_iBITi, IIC_iBITr, IIC_iBITsr>; -defm ORRCC : AsI1_bincc_irs<0b1100, "orr", IIC_iBITi, IIC_iBITr, IIC_iBITsr>; -defm EORCC : AsI1_bincc_irs<0b0001, "eor", IIC_iBITi, IIC_iBITr, IIC_iBITsr>; +multiclass AsI1_bincc_irs<Instruction iri, Instruction irr, Instruction irsi, + Instruction irsr, + InstrItinClass iii, InstrItinClass iir, + InstrItinClass iis> { + def ri : ARMPseudoExpand<(outs GPR:$Rd), + (ins GPR:$Rn, so_imm:$imm, pred:$p, cc_out:$s), + 4, iii, [], + (iri GPR:$Rd, GPR:$Rn, so_imm:$imm, pred:$p, cc_out:$s)>, + RegConstraint<"$Rn = $Rd">; + def rr : ARMPseudoExpand<(outs GPR:$Rd), + (ins GPR:$Rn, GPR:$Rm, pred:$p, cc_out:$s), + 4, iir, [], + (irr GPR:$Rd, GPR:$Rn, GPR:$Rm, pred:$p, cc_out:$s)>, + RegConstraint<"$Rn = $Rd">; + def rsi : ARMPseudoExpand<(outs GPR:$Rd), + (ins GPR:$Rn, so_reg_imm:$shift, pred:$p, cc_out:$s), + 4, iis, [], + (irsi GPR:$Rd, GPR:$Rn, so_reg_imm:$shift, pred:$p, cc_out:$s)>, + RegConstraint<"$Rn = $Rd">; + def rsr : ARMPseudoExpand<(outs GPRnopc:$Rd), + (ins GPRnopc:$Rn, so_reg_reg:$shift, pred:$p, cc_out:$s), + 4, iis, [], + (irsr GPR:$Rd, GPR:$Rn, so_reg_reg:$shift, pred:$p, cc_out:$s)>, + RegConstraint<"$Rn = $Rd">; +} + +defm ANDCC : AsI1_bincc_irs<ANDri, ANDrr, ANDrsi, ANDrsr, + IIC_iBITi, IIC_iBITr, IIC_iBITsr>; +defm ORRCC : AsI1_bincc_irs<ORRri, ORRrr, ORRrsi, ORRrsr, + IIC_iBITi, IIC_iBITr, IIC_iBITsr>; +defm EORCC : AsI1_bincc_irs<EORri, EORrr, EORrsi, EORrsr, + IIC_iBITi, IIC_iBITr, IIC_iBITsr>; -} // isCodeGenOnly } // neverHasSideEffects + //===----------------------------------------------------------------------===// // Atomic operations intrinsics // @@ -4605,10 +4575,16 @@ def MCR : MovRCopro<"mcr", 0 /* from ARM core register to coprocessor */, c_imm:$CRm, imm0_7:$opc2), [(int_arm_mcr imm:$cop, imm:$opc1, GPR:$Rt, imm:$CRn, imm:$CRm, imm:$opc2)]>; +def : ARMInstAlias<"mcr${p} $cop, $opc1, $Rt, $CRn, $CRm", + (MCR p_imm:$cop, imm0_7:$opc1, GPR:$Rt, c_imm:$CRn, + c_imm:$CRm, 0, pred:$p)>; def MRC : MovRCopro<"mrc", 1 /* from coprocessor to ARM core register */, (outs GPR:$Rt), (ins p_imm:$cop, imm0_7:$opc1, c_imm:$CRn, c_imm:$CRm, imm0_7:$opc2), []>; +def : ARMInstAlias<"mrc${p} $cop, $opc1, $Rt, $CRn, $CRm", + (MRC GPR:$Rt, p_imm:$cop, imm0_7:$opc1, c_imm:$CRn, + c_imm:$CRm, 0, pred:$p)>; def : ARMPat<(int_arm_mrc imm:$cop, imm:$opc1, imm:$CRn, imm:$CRm, imm:$opc2), (MRC imm:$cop, imm:$opc1, imm:$CRn, imm:$CRm, imm:$opc2)>; @@ -4642,10 +4618,16 @@ def MCR2 : MovRCopro2<"mcr2", 0 /* from ARM core register to coprocessor */, c_imm:$CRm, imm0_7:$opc2), [(int_arm_mcr2 imm:$cop, imm:$opc1, GPR:$Rt, imm:$CRn, imm:$CRm, imm:$opc2)]>; +def : ARMInstAlias<"mcr2$ $cop, $opc1, $Rt, $CRn, $CRm", + (MCR2 p_imm:$cop, imm0_7:$opc1, GPR:$Rt, c_imm:$CRn, + c_imm:$CRm, 0)>; def MRC2 : MovRCopro2<"mrc2", 1 /* from coprocessor to ARM core register */, (outs GPR:$Rt), (ins p_imm:$cop, imm0_7:$opc1, c_imm:$CRn, c_imm:$CRm, imm0_7:$opc2), []>; +def : ARMInstAlias<"mrc2$ $cop, $opc1, $Rt, $CRn, $CRm", + (MRC2 GPR:$Rt, p_imm:$cop, imm0_7:$opc1, c_imm:$CRn, + c_imm:$CRm, 0)>; def : ARMV5TPat<(int_arm_mrc2 imm:$cop, imm:$opc1, imm:$CRn, imm:$CRm, imm:$opc2), @@ -5252,6 +5234,20 @@ def : ARMInstAlias<"mul${s}${p} $Rn, $Rm", def : ARMInstAlias<"neg${s}${p} $Rd, $Rm", (RSBri GPR:$Rd, GPR:$Rm, 0, pred:$p, cc_out:$s)>; +// Pre-v6, 'mov r0, r0' was used as a NOP encoding. +def : InstAlias<"nop${p}", (MOVr R0, R0, pred:$p, zero_reg)>, + Requires<[IsARM, NoV6]>; + +// UMULL/SMULL are available on all arches, but the instruction definitions +// need difference constraints pre-v6. Use these aliases for the assembly +// parsing on pre-v6. +def : InstAlias<"smull${s}${p} $RdLo, $RdHi, $Rn, $Rm", + (SMULL GPR:$RdLo, GPR:$RdHi, GPR:$Rn, GPR:$Rm, pred:$p, cc_out:$s)>, + Requires<[IsARM, NoV6]>; +def : InstAlias<"umull${s}${p} $RdLo, $RdHi, $Rn, $Rm", + (UMULL GPR:$RdLo, GPR:$RdHi, GPR:$Rn, GPR:$Rm, pred:$p, cc_out:$s)>, + Requires<[IsARM, NoV6]>; + // 'it' blocks in ARM mode just validate the predicates. The IT itself // is discarded. def ITasm : ARMAsmPseudo<"it$mask $cc", (ins it_pred:$cc, it_mask:$mask)>; diff --git a/lib/Target/ARM/ARMInstrNEON.td b/lib/Target/ARM/ARMInstrNEON.td index 8684ce1..f61eb2b 100644 --- a/lib/Target/ARM/ARMInstrNEON.td +++ b/lib/Target/ARM/ARMInstrNEON.td @@ -94,7 +94,7 @@ def VecListDPairAsmOperand : AsmOperandClass { let ParserMethod = "parseVectorList"; let RenderMethod = "addVecListOperands"; } -def VecListDPair : RegisterOperand<DPair, "printVectorListDPair"> { +def VecListDPair : RegisterOperand<DPair, "printVectorListTwo"> { let ParserMatchClass = VecListDPairAsmOperand; } // Register list of three sequential D registers. @@ -121,7 +121,7 @@ def VecListDPairSpacedAsmOperand : AsmOperandClass { let ParserMethod = "parseVectorList"; let RenderMethod = "addVecListOperands"; } -def VecListDPairSpaced : RegisterOperand<DPair, "printVectorListDPairSpaced"> { +def VecListDPairSpaced : RegisterOperand<DPair, "printVectorListTwoSpaced"> { let ParserMatchClass = VecListDPairSpacedAsmOperand; } // Register list of three D registers spaced by 2 (three Q registers). @@ -153,23 +153,24 @@ def VecListOneDAllLanes : RegisterOperand<DPR, "printVectorListOneAllLanes"> { let ParserMatchClass = VecListOneDAllLanesAsmOperand; } // Register list of two D registers, with "all lanes" subscripting. -def VecListTwoDAllLanesAsmOperand : AsmOperandClass { - let Name = "VecListTwoDAllLanes"; +def VecListDPairAllLanesAsmOperand : AsmOperandClass { + let Name = "VecListDPairAllLanes"; let ParserMethod = "parseVectorList"; let RenderMethod = "addVecListOperands"; } -def VecListTwoDAllLanes : RegisterOperand<DPR, "printVectorListTwoAllLanes"> { - let ParserMatchClass = VecListTwoDAllLanesAsmOperand; +def VecListDPairAllLanes : RegisterOperand<DPair, + "printVectorListTwoAllLanes"> { + let ParserMatchClass = VecListDPairAllLanesAsmOperand; } // Register list of two D registers spaced by 2 (two sequential Q registers). -def VecListTwoQAllLanesAsmOperand : AsmOperandClass { - let Name = "VecListTwoQAllLanes"; +def VecListDPairSpacedAllLanesAsmOperand : AsmOperandClass { + let Name = "VecListDPairSpacedAllLanes"; let ParserMethod = "parseVectorList"; let RenderMethod = "addVecListOperands"; } -def VecListTwoQAllLanes : RegisterOperand<DPR, +def VecListDPairSpacedAllLanes : RegisterOperand<DPair, "printVectorListTwoSpacedAllLanes"> { - let ParserMatchClass = VecListTwoQAllLanesAsmOperand; + let ParserMatchClass = VecListDPairSpacedAllLanesAsmOperand; } // Register list of three D registers, with "all lanes" subscripting. def VecListThreeDAllLanesAsmOperand : AsmOperandClass { @@ -1276,39 +1277,32 @@ class VLD1DUP<bits<4> op7_4, string Dt, ValueType Ty, PatFrag LoadOp> let Inst{4} = Rn{4}; let DecoderMethod = "DecodeVLD1DupInstruction"; } -class VLD1QDUPPseudo<ValueType Ty, PatFrag LoadOp> : VLDQPseudo<IIC_VLD1dup> { - let Pattern = [(set QPR:$dst, - (Ty (NEONvdup (i32 (LoadOp addrmode6dup:$addr)))))]; -} - def VLD1DUPd8 : VLD1DUP<{0,0,0,?}, "8", v8i8, extloadi8>; def VLD1DUPd16 : VLD1DUP<{0,1,0,?}, "16", v4i16, extloadi16>; def VLD1DUPd32 : VLD1DUP<{1,0,0,?}, "32", v2i32, load>; -def VLD1DUPq8Pseudo : VLD1QDUPPseudo<v16i8, extloadi8>; -def VLD1DUPq16Pseudo : VLD1QDUPPseudo<v8i16, extloadi16>; -def VLD1DUPq32Pseudo : VLD1QDUPPseudo<v4i32, load>; - def : Pat<(v2f32 (NEONvdup (f32 (load addrmode6dup:$addr)))), (VLD1DUPd32 addrmode6:$addr)>; -def : Pat<(v4f32 (NEONvdup (f32 (load addrmode6dup:$addr)))), - (VLD1DUPq32Pseudo addrmode6:$addr)>; -let mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1 in { - -class VLD1QDUP<bits<4> op7_4, string Dt> - : NLdSt<1, 0b10, 0b1100, op7_4, (outs VecListTwoDAllLanes:$Vd), +class VLD1QDUP<bits<4> op7_4, string Dt, ValueType Ty, PatFrag LoadOp> + : NLdSt<1, 0b10, 0b1100, op7_4, (outs VecListDPairAllLanes:$Vd), (ins addrmode6dup:$Rn), IIC_VLD1dup, - "vld1", Dt, "$Vd, $Rn", "", []> { + "vld1", Dt, "$Vd, $Rn", "", + [(set VecListDPairAllLanes:$Vd, + (Ty (NEONvdup (i32 (LoadOp addrmode6dup:$Rn)))))]> { let Rm = 0b1111; let Inst{4} = Rn{4}; let DecoderMethod = "DecodeVLD1DupInstruction"; } -def VLD1DUPq8 : VLD1QDUP<{0,0,1,0}, "8">; -def VLD1DUPq16 : VLD1QDUP<{0,1,1,?}, "16">; -def VLD1DUPq32 : VLD1QDUP<{1,0,1,?}, "32">; +def VLD1DUPq8 : VLD1QDUP<{0,0,1,0}, "8", v16i8, extloadi8>; +def VLD1DUPq16 : VLD1QDUP<{0,1,1,?}, "16", v8i16, extloadi16>; +def VLD1DUPq32 : VLD1QDUP<{1,0,1,?}, "32", v4i32, load>; +def : Pat<(v4f32 (NEONvdup (f32 (load addrmode6dup:$addr)))), + (VLD1DUPq32 addrmode6:$addr)>; + +let mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1 in { // ...with address register writeback: multiclass VLD1DUPWB<bits<4> op7_4, string Dt> { def _fixed : NLdSt<1, 0b10, 0b1100, op7_4, @@ -1333,7 +1327,7 @@ multiclass VLD1DUPWB<bits<4> op7_4, string Dt> { } multiclass VLD1QDUPWB<bits<4> op7_4, string Dt> { def _fixed : NLdSt<1, 0b10, 0b1100, op7_4, - (outs VecListTwoDAllLanes:$Vd, GPR:$wb), + (outs VecListDPairAllLanes:$Vd, GPR:$wb), (ins addrmode6dup:$Rn), IIC_VLD1dupu, "vld1", Dt, "$Vd, $Rn!", "$Rn.addr = $wb", []> { @@ -1343,7 +1337,7 @@ multiclass VLD1QDUPWB<bits<4> op7_4, string Dt> { let AsmMatchConverter = "cvtVLDwbFixed"; } def _register : NLdSt<1, 0b10, 0b1100, op7_4, - (outs VecListTwoDAllLanes:$Vd, GPR:$wb), + (outs VecListDPairAllLanes:$Vd, GPR:$wb), (ins addrmode6dup:$Rn, rGPR:$Rm), IIC_VLD1dupu, "vld1", Dt, "$Vd, $Rn, $Rm", "$Rn.addr = $wb", []> { @@ -1361,13 +1355,6 @@ defm VLD1DUPq8wb : VLD1QDUPWB<{0,0,1,0}, "8">; defm VLD1DUPq16wb : VLD1QDUPWB<{0,1,1,?}, "16">; defm VLD1DUPq32wb : VLD1QDUPWB<{1,0,1,?}, "32">; -def VLD1DUPq8PseudoWB_fixed : VLDQWBfixedPseudo<IIC_VLD1dupu>; -def VLD1DUPq16PseudoWB_fixed : VLDQWBfixedPseudo<IIC_VLD1dupu>; -def VLD1DUPq32PseudoWB_fixed : VLDQWBfixedPseudo<IIC_VLD1dupu>; -def VLD1DUPq8PseudoWB_register : VLDQWBregisterPseudo<IIC_VLD1dupu>; -def VLD1DUPq16PseudoWB_register : VLDQWBregisterPseudo<IIC_VLD1dupu>; -def VLD1DUPq32PseudoWB_register : VLDQWBregisterPseudo<IIC_VLD1dupu>; - // VLD2DUP : Vector Load (single 2-element structure to all lanes) class VLD2DUP<bits<4> op7_4, string Dt, RegisterOperand VdTy> : NLdSt<1, 0b10, 0b1101, op7_4, (outs VdTy:$Vd), @@ -1378,18 +1365,14 @@ class VLD2DUP<bits<4> op7_4, string Dt, RegisterOperand VdTy> let DecoderMethod = "DecodeVLD2DupInstruction"; } -def VLD2DUPd8 : VLD2DUP<{0,0,0,?}, "8", VecListTwoDAllLanes>; -def VLD2DUPd16 : VLD2DUP<{0,1,0,?}, "16", VecListTwoDAllLanes>; -def VLD2DUPd32 : VLD2DUP<{1,0,0,?}, "32", VecListTwoDAllLanes>; +def VLD2DUPd8 : VLD2DUP<{0,0,0,?}, "8", VecListDPairAllLanes>; +def VLD2DUPd16 : VLD2DUP<{0,1,0,?}, "16", VecListDPairAllLanes>; +def VLD2DUPd32 : VLD2DUP<{1,0,0,?}, "32", VecListDPairAllLanes>; -def VLD2DUPd8Pseudo : VLDQPseudo<IIC_VLD2dup>; -def VLD2DUPd16Pseudo : VLDQPseudo<IIC_VLD2dup>; -def VLD2DUPd32Pseudo : VLDQPseudo<IIC_VLD2dup>; - -// ...with double-spaced registers (not used for codegen): -def VLD2DUPd8x2 : VLD2DUP<{0,0,1,?}, "8", VecListTwoQAllLanes>; -def VLD2DUPd16x2 : VLD2DUP<{0,1,1,?}, "16", VecListTwoQAllLanes>; -def VLD2DUPd32x2 : VLD2DUP<{1,0,1,?}, "32", VecListTwoQAllLanes>; +// ...with double-spaced registers +def VLD2DUPd8x2 : VLD2DUP<{0,0,1,?}, "8", VecListDPairSpacedAllLanes>; +def VLD2DUPd16x2 : VLD2DUP<{0,1,1,?}, "16", VecListDPairSpacedAllLanes>; +def VLD2DUPd32x2 : VLD2DUP<{1,0,1,?}, "32", VecListDPairSpacedAllLanes>; // ...with address register writeback: multiclass VLD2DUPWB<bits<4> op7_4, string Dt, RegisterOperand VdTy> { @@ -1414,20 +1397,13 @@ multiclass VLD2DUPWB<bits<4> op7_4, string Dt, RegisterOperand VdTy> { } } -defm VLD2DUPd8wb : VLD2DUPWB<{0,0,0,0}, "8", VecListTwoDAllLanes>; -defm VLD2DUPd16wb : VLD2DUPWB<{0,1,0,?}, "16", VecListTwoDAllLanes>; -defm VLD2DUPd32wb : VLD2DUPWB<{1,0,0,?}, "32", VecListTwoDAllLanes>; - -defm VLD2DUPd8x2wb : VLD2DUPWB<{0,0,1,0}, "8", VecListTwoQAllLanes>; -defm VLD2DUPd16x2wb : VLD2DUPWB<{0,1,1,?}, "16", VecListTwoQAllLanes>; -defm VLD2DUPd32x2wb : VLD2DUPWB<{1,0,1,?}, "32", VecListTwoQAllLanes>; +defm VLD2DUPd8wb : VLD2DUPWB<{0,0,0,0}, "8", VecListDPairAllLanes>; +defm VLD2DUPd16wb : VLD2DUPWB<{0,1,0,?}, "16", VecListDPairAllLanes>; +defm VLD2DUPd32wb : VLD2DUPWB<{1,0,0,?}, "32", VecListDPairAllLanes>; -def VLD2DUPd8PseudoWB_fixed : VLDQWBfixedPseudo <IIC_VLD2dupu>; -def VLD2DUPd8PseudoWB_register : VLDQWBregisterPseudo<IIC_VLD2dupu>; -def VLD2DUPd16PseudoWB_fixed : VLDQWBfixedPseudo <IIC_VLD2dupu>; -def VLD2DUPd16PseudoWB_register : VLDQWBregisterPseudo<IIC_VLD2dupu>; -def VLD2DUPd32PseudoWB_fixed : VLDQWBfixedPseudo <IIC_VLD2dupu>; -def VLD2DUPd32PseudoWB_register : VLDQWBregisterPseudo<IIC_VLD2dupu>; +defm VLD2DUPd8x2wb : VLD2DUPWB<{0,0,1,0}, "8", VecListDPairSpacedAllLanes>; +defm VLD2DUPd16x2wb : VLD2DUPWB<{0,1,1,?}, "16", VecListDPairSpacedAllLanes>; +defm VLD2DUPd32x2wb : VLD2DUPWB<{1,0,1,?}, "32", VecListDPairSpacedAllLanes>; // VLD3DUP : Vector Load (single 3-element structure to all lanes) class VLD3DUP<bits<4> op7_4, string Dt> diff --git a/lib/Target/ARM/ARMInstrThumb2.td b/lib/Target/ARM/ARMInstrThumb2.td index e8984e1..1f7edc1 100644 --- a/lib/Target/ARM/ARMInstrThumb2.td +++ b/lib/Target/ARM/ARMInstrThumb2.td @@ -574,7 +574,7 @@ multiclass T2I_bin_w_irs<bits<4> opcod, string opc, cc_out:$s)>; // and with the optional destination operand, too. - def : t2InstAlias<!strconcat(opc, "${s}${p}.ri", " $Rdn, $imm"), + def : t2InstAlias<!strconcat(opc, "${s}${p}.w", " $Rdn, $imm"), (!cast<Instruction>(!strconcat(baseOpc, "ri")) rGPR:$Rdn, rGPR:$Rdn, t2_so_imm:$imm, pred:$p, cc_out:$s)>; @@ -2952,45 +2952,36 @@ def t2MOVCCror : T2I_movcc_sh<0b11, (outs rGPR:$Rd), (ins rGPR:$false, rGPR:$Rm, i32imm:$imm), IIC_iCMOVsi, "ror", ".w\t$Rd, $Rm, $imm", []>, RegConstraint<"$false = $Rd">; +} // isCodeGenOnly = 1 -multiclass T2I_bincc_irs<bits<4> opcod, string opc, +multiclass T2I_bincc_irs<Instruction iri, Instruction irr, Instruction irs, InstrItinClass iii, InstrItinClass iir, InstrItinClass iis> { // shifted imm - def ri : T2sTwoRegImm<(outs rGPR:$Rd), (ins rGPR:$Rn, t2_so_imm:$imm), - iii, opc, ".w\t$Rd, $Rn, $imm", []>, - RegConstraint<"$Rn = $Rd"> { - let Inst{31-27} = 0b11110; - let Inst{25} = 0; - let Inst{24-21} = opcod; - let Inst{15} = 0; - } + def ri : t2PseudoExpand<(outs rGPR:$Rd), + (ins rGPR:$Rn, t2_so_imm:$imm, pred:$p, cc_out:$s), + 4, iii, [], + (iri rGPR:$Rd, rGPR:$Rn, t2_so_imm:$imm, pred:$p, cc_out:$s)>, + RegConstraint<"$Rn = $Rd">; // register - def rr : T2sThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), - iir, opc, ".w\t$Rd, $Rn, $Rm", []>, - RegConstraint<"$Rn = $Rd"> { - let Inst{31-27} = 0b11101; - let Inst{26-25} = 0b01; - let Inst{24-21} = opcod; - let Inst{14-12} = 0b000; // imm3 - let Inst{7-6} = 0b00; // imm2 - let Inst{5-4} = 0b00; // type - } + def rr : t2PseudoExpand<(outs rGPR:$Rd), + (ins rGPR:$Rn, rGPR:$Rm, pred:$p, cc_out:$s), + 4, iir, [], + (irr rGPR:$Rd, rGPR:$Rn, rGPR:$Rm, pred:$p, cc_out:$s)>, + RegConstraint<"$Rn = $Rd">; // shifted register - def rs : T2sTwoRegShiftedReg<(outs rGPR:$Rd), - (ins rGPR:$Rn, t2_so_reg:$ShiftedRm), - iis, opc, ".w\t$Rd, $Rn, $ShiftedRm", []>, - RegConstraint<"$Rn = $Rd"> { - let Inst{31-27} = 0b11101; - let Inst{26-25} = 0b01; - let Inst{24-21} = opcod; - } + def rs : t2PseudoExpand<(outs rGPR:$Rd), + (ins rGPR:$Rn, t2_so_reg:$ShiftedRm, pred:$p, cc_out:$s), + 4, iis, [], + (irs rGPR:$Rd, rGPR:$Rn, t2_so_reg:$ShiftedRm, pred:$p, cc_out:$s)>, + RegConstraint<"$Rn = $Rd">; } // T2I_bincc_irs -defm t2ANDCC : T2I_bincc_irs<0b0000, "and", IIC_iBITi, IIC_iBITr, IIC_iBITsi>; -defm t2ORRCC : T2I_bincc_irs<0b0010, "orr", IIC_iBITi, IIC_iBITr, IIC_iBITsi>; -defm t2EORCC : T2I_bincc_irs<0b0100, "eor", IIC_iBITi, IIC_iBITr, IIC_iBITsi>; - -} // isCodeGenOnly = 1 +defm t2ANDCC : T2I_bincc_irs<t2ANDri, t2ANDrr, t2ANDrs, + IIC_iBITi, IIC_iBITr, IIC_iBITsi>; +defm t2ORRCC : T2I_bincc_irs<t2ORRri, t2ORRrr, t2ORRrs, + IIC_iBITi, IIC_iBITr, IIC_iBITsi>; +defm t2EORCC : T2I_bincc_irs<t2EORri, t2EORrr, t2EORrs, + IIC_iBITi, IIC_iBITr, IIC_iBITsi>; } // neverHasSideEffects //===----------------------------------------------------------------------===// @@ -3768,20 +3759,32 @@ def t2MCR : t2MovRCopro<0b1110, "mcr", 0, c_imm:$CRm, imm0_7:$opc2), [(int_arm_mcr imm:$cop, imm:$opc1, GPR:$Rt, imm:$CRn, imm:$CRm, imm:$opc2)]>; +def : t2InstAlias<"mcr $cop, $opc1, $Rt, $CRn, $CRm", + (t2MCR p_imm:$cop, imm0_7:$opc1, GPR:$Rt, c_imm:$CRn, + c_imm:$CRm, 0)>; def t2MCR2 : t2MovRCopro<0b1111, "mcr2", 0, (outs), (ins p_imm:$cop, imm0_7:$opc1, GPR:$Rt, c_imm:$CRn, c_imm:$CRm, imm0_7:$opc2), [(int_arm_mcr2 imm:$cop, imm:$opc1, GPR:$Rt, imm:$CRn, imm:$CRm, imm:$opc2)]>; +def : t2InstAlias<"mcr2 $cop, $opc1, $Rt, $CRn, $CRm", + (t2MCR2 p_imm:$cop, imm0_7:$opc1, GPR:$Rt, c_imm:$CRn, + c_imm:$CRm, 0)>; /* from coprocessor to ARM core register */ def t2MRC : t2MovRCopro<0b1110, "mrc", 1, (outs GPR:$Rt), (ins p_imm:$cop, imm0_7:$opc1, c_imm:$CRn, c_imm:$CRm, imm0_7:$opc2), []>; +def : t2InstAlias<"mrc $cop, $opc1, $Rt, $CRn, $CRm", + (t2MRC GPR:$Rt, p_imm:$cop, imm0_7:$opc1, c_imm:$CRn, + c_imm:$CRm, 0)>; def t2MRC2 : t2MovRCopro<0b1111, "mrc2", 1, (outs GPR:$Rt), (ins p_imm:$cop, imm0_7:$opc1, c_imm:$CRn, c_imm:$CRm, imm0_7:$opc2), []>; +def : t2InstAlias<"mrc2 $cop, $opc1, $Rt, $CRn, $CRm", + (t2MRC2 GPR:$Rt, p_imm:$cop, imm0_7:$opc1, c_imm:$CRn, + c_imm:$CRm, 0)>; def : T2v6Pat<(int_arm_mrc imm:$cop, imm:$opc1, imm:$CRn, imm:$CRm, imm:$opc2), (t2MRC imm:$cop, imm:$opc1, imm:$CRn, imm:$CRm, imm:$opc2)>; diff --git a/lib/Target/ARM/ARMInstrVFP.td b/lib/Target/ARM/ARMInstrVFP.td index aa10af7..e9d5720 100644 --- a/lib/Target/ARM/ARMInstrVFP.td +++ b/lib/Target/ARM/ARMInstrVFP.td @@ -206,6 +206,14 @@ def : InstAlias<"vpop${p} $r", (VLDMDIA_UPD SP, pred:$p, dpr_reglist:$r)>, Requires<[HasVFP2]>; def : InstAlias<"vpop${p} $r", (VLDMSIA_UPD SP, pred:$p, spr_reglist:$r)>, Requires<[HasVFP2]>; +defm : VFPDTAnyInstAlias<"vpush${p}", "$r", + (VSTMSDB_UPD SP, pred:$p, spr_reglist:$r)>; +defm : VFPDTAnyInstAlias<"vpush${p}", "$r", + (VSTMDDB_UPD SP, pred:$p, dpr_reglist:$r)>; +defm : VFPDTAnyInstAlias<"vpop${p}", "$r", + (VLDMSIA_UPD SP, pred:$p, spr_reglist:$r)>; +defm : VFPDTAnyInstAlias<"vpop${p}", "$r", + (VLDMDIA_UPD SP, pred:$p, dpr_reglist:$r)>; // FLDMX, FSTMX - mixing S/D registers for pre-armv6 cores @@ -286,7 +294,7 @@ def : Pat<(fmul (fneg SPR:$a), SPR:$b), (VNMULS SPR:$a, SPR:$b)>, Requires<[NoHonorSignDependentRounding]>; // These are encoded as unary instructions. -let Defs = [FPSCR] in { +let Defs = [FPSCR_NZCV] in { def VCMPED : ADuI<0b11101, 0b11, 0b0100, 0b11, 0, (outs), (ins DPR:$Dd, DPR:$Dm), IIC_fpCMP64, "vcmpe", ".f64\t$Dd, $Dm", @@ -315,7 +323,7 @@ def VCMPS : ASuI<0b11101, 0b11, 0b0100, 0b01, 0, // VFP pipelines on A8. let D = VFPNeonA8Domain; } -} // Defs = [FPSCR] +} // Defs = [FPSCR_NZCV] //===----------------------------------------------------------------------===// // FP Unary Operations. @@ -335,7 +343,7 @@ def VABSS : ASuIn<0b11101, 0b11, 0b0000, 0b11, 0, let D = VFPNeonA8Domain; } -let Defs = [FPSCR] in { +let Defs = [FPSCR_NZCV] in { def VCMPEZD : ADuI<0b11101, 0b11, 0b0101, 0b11, 0, (outs), (ins DPR:$Dd), IIC_fpCMP64, "vcmpe", ".f64\t$Dd, #0", @@ -376,7 +384,7 @@ def VCMPZS : ASuI<0b11101, 0b11, 0b0101, 0b01, 0, // VFP pipelines on A8. let D = VFPNeonA8Domain; } -} // Defs = [FPSCR] +} // Defs = [FPSCR_NZCV] def VCVTDS : ASuI<0b11101, 0b11, 0b0111, 0b11, 0, (outs DPR:$Dd), (ins SPR:$Sm), @@ -810,7 +818,29 @@ let Constraints = "$a = $dst" in { // FP to Fixed-Point: -def VTOSHS : AVConv1XI<0b11101, 0b11, 0b1110, 0b1010, 0, +// Single Precision register +class AVConv1XInsS_Encode<bits<5> op1, bits<2> op2, bits<4> op3, bits<4> op4, bit op5, + dag oops, dag iops, InstrItinClass itin, string opc, string asm, + list<dag> pattern> + : AVConv1XI<op1, op2, op3, op4, op5, oops, iops, itin, opc, asm, pattern> { + bits<5> dst; + // if dp_operation then UInt(D:Vd) else UInt(Vd:D); + let Inst{22} = dst{0}; + let Inst{15-12} = dst{4-1}; +} + +// Double Precision register +class AVConv1XInsD_Encode<bits<5> op1, bits<2> op2, bits<4> op3, bits<4> op4, bit op5, + dag oops, dag iops, InstrItinClass itin, string opc, string asm, + list<dag> pattern> + : AVConv1XI<op1, op2, op3, op4, op5, oops, iops, itin, opc, asm, pattern> { + bits<5> dst; + // if dp_operation then UInt(D:Vd) else UInt(Vd:D); + let Inst{22} = dst{4}; + let Inst{15-12} = dst{3-0}; +} + +def VTOSHS : AVConv1XInsS_Encode<0b11101, 0b11, 0b1110, 0b1010, 0, (outs SPR:$dst), (ins SPR:$a, fbits16:$fbits), IIC_fpCVTSI, "vcvt", ".s16.f32\t$dst, $a, $fbits", []> { // Some single precision VFP instructions may be executed on both NEON and @@ -818,7 +848,7 @@ def VTOSHS : AVConv1XI<0b11101, 0b11, 0b1110, 0b1010, 0, let D = VFPNeonA8Domain; } -def VTOUHS : AVConv1XI<0b11101, 0b11, 0b1111, 0b1010, 0, +def VTOUHS : AVConv1XInsS_Encode<0b11101, 0b11, 0b1111, 0b1010, 0, (outs SPR:$dst), (ins SPR:$a, fbits16:$fbits), IIC_fpCVTSI, "vcvt", ".u16.f32\t$dst, $a, $fbits", []> { // Some single precision VFP instructions may be executed on both NEON and @@ -826,7 +856,7 @@ def VTOUHS : AVConv1XI<0b11101, 0b11, 0b1111, 0b1010, 0, let D = VFPNeonA8Domain; } -def VTOSLS : AVConv1XI<0b11101, 0b11, 0b1110, 0b1010, 1, +def VTOSLS : AVConv1XInsS_Encode<0b11101, 0b11, 0b1110, 0b1010, 1, (outs SPR:$dst), (ins SPR:$a, fbits32:$fbits), IIC_fpCVTSI, "vcvt", ".s32.f32\t$dst, $a, $fbits", []> { // Some single precision VFP instructions may be executed on both NEON and @@ -834,7 +864,7 @@ def VTOSLS : AVConv1XI<0b11101, 0b11, 0b1110, 0b1010, 1, let D = VFPNeonA8Domain; } -def VTOULS : AVConv1XI<0b11101, 0b11, 0b1111, 0b1010, 1, +def VTOULS : AVConv1XInsS_Encode<0b11101, 0b11, 0b1111, 0b1010, 1, (outs SPR:$dst), (ins SPR:$a, fbits32:$fbits), IIC_fpCVTSI, "vcvt", ".u32.f32\t$dst, $a, $fbits", []> { // Some single precision VFP instructions may be executed on both NEON and @@ -842,25 +872,25 @@ def VTOULS : AVConv1XI<0b11101, 0b11, 0b1111, 0b1010, 1, let D = VFPNeonA8Domain; } -def VTOSHD : AVConv1XI<0b11101, 0b11, 0b1110, 0b1011, 0, +def VTOSHD : AVConv1XInsD_Encode<0b11101, 0b11, 0b1110, 0b1011, 0, (outs DPR:$dst), (ins DPR:$a, fbits16:$fbits), IIC_fpCVTDI, "vcvt", ".s16.f64\t$dst, $a, $fbits", []>; -def VTOUHD : AVConv1XI<0b11101, 0b11, 0b1111, 0b1011, 0, +def VTOUHD : AVConv1XInsD_Encode<0b11101, 0b11, 0b1111, 0b1011, 0, (outs DPR:$dst), (ins DPR:$a, fbits16:$fbits), IIC_fpCVTDI, "vcvt", ".u16.f64\t$dst, $a, $fbits", []>; -def VTOSLD : AVConv1XI<0b11101, 0b11, 0b1110, 0b1011, 1, +def VTOSLD : AVConv1XInsD_Encode<0b11101, 0b11, 0b1110, 0b1011, 1, (outs DPR:$dst), (ins DPR:$a, fbits32:$fbits), IIC_fpCVTDI, "vcvt", ".s32.f64\t$dst, $a, $fbits", []>; -def VTOULD : AVConv1XI<0b11101, 0b11, 0b1111, 0b1011, 1, +def VTOULD : AVConv1XInsD_Encode<0b11101, 0b11, 0b1111, 0b1011, 1, (outs DPR:$dst), (ins DPR:$a, fbits32:$fbits), IIC_fpCVTDI, "vcvt", ".u32.f64\t$dst, $a, $fbits", []>; // Fixed-Point to FP: -def VSHTOS : AVConv1XI<0b11101, 0b11, 0b1010, 0b1010, 0, +def VSHTOS : AVConv1XInsS_Encode<0b11101, 0b11, 0b1010, 0b1010, 0, (outs SPR:$dst), (ins SPR:$a, fbits16:$fbits), IIC_fpCVTIS, "vcvt", ".f32.s16\t$dst, $a, $fbits", []> { // Some single precision VFP instructions may be executed on both NEON and @@ -868,7 +898,7 @@ def VSHTOS : AVConv1XI<0b11101, 0b11, 0b1010, 0b1010, 0, let D = VFPNeonA8Domain; } -def VUHTOS : AVConv1XI<0b11101, 0b11, 0b1011, 0b1010, 0, +def VUHTOS : AVConv1XInsS_Encode<0b11101, 0b11, 0b1011, 0b1010, 0, (outs SPR:$dst), (ins SPR:$a, fbits16:$fbits), IIC_fpCVTIS, "vcvt", ".f32.u16\t$dst, $a, $fbits", []> { // Some single precision VFP instructions may be executed on both NEON and @@ -876,7 +906,7 @@ def VUHTOS : AVConv1XI<0b11101, 0b11, 0b1011, 0b1010, 0, let D = VFPNeonA8Domain; } -def VSLTOS : AVConv1XI<0b11101, 0b11, 0b1010, 0b1010, 1, +def VSLTOS : AVConv1XInsS_Encode<0b11101, 0b11, 0b1010, 0b1010, 1, (outs SPR:$dst), (ins SPR:$a, fbits32:$fbits), IIC_fpCVTIS, "vcvt", ".f32.s32\t$dst, $a, $fbits", []> { // Some single precision VFP instructions may be executed on both NEON and @@ -884,7 +914,7 @@ def VSLTOS : AVConv1XI<0b11101, 0b11, 0b1010, 0b1010, 1, let D = VFPNeonA8Domain; } -def VULTOS : AVConv1XI<0b11101, 0b11, 0b1011, 0b1010, 1, +def VULTOS : AVConv1XInsS_Encode<0b11101, 0b11, 0b1011, 0b1010, 1, (outs SPR:$dst), (ins SPR:$a, fbits32:$fbits), IIC_fpCVTIS, "vcvt", ".f32.u32\t$dst, $a, $fbits", []> { // Some single precision VFP instructions may be executed on both NEON and @@ -892,19 +922,19 @@ def VULTOS : AVConv1XI<0b11101, 0b11, 0b1011, 0b1010, 1, let D = VFPNeonA8Domain; } -def VSHTOD : AVConv1XI<0b11101, 0b11, 0b1010, 0b1011, 0, +def VSHTOD : AVConv1XInsD_Encode<0b11101, 0b11, 0b1010, 0b1011, 0, (outs DPR:$dst), (ins DPR:$a, fbits16:$fbits), IIC_fpCVTID, "vcvt", ".f64.s16\t$dst, $a, $fbits", []>; -def VUHTOD : AVConv1XI<0b11101, 0b11, 0b1011, 0b1011, 0, +def VUHTOD : AVConv1XInsD_Encode<0b11101, 0b11, 0b1011, 0b1011, 0, (outs DPR:$dst), (ins DPR:$a, fbits16:$fbits), IIC_fpCVTID, "vcvt", ".f64.u16\t$dst, $a, $fbits", []>; -def VSLTOD : AVConv1XI<0b11101, 0b11, 0b1010, 0b1011, 1, +def VSLTOD : AVConv1XInsD_Encode<0b11101, 0b11, 0b1010, 0b1011, 1, (outs DPR:$dst), (ins DPR:$a, fbits32:$fbits), IIC_fpCVTID, "vcvt", ".f64.s32\t$dst, $a, $fbits", []>; -def VULTOD : AVConv1XI<0b11101, 0b11, 0b1011, 0b1011, 1, +def VULTOD : AVConv1XInsD_Encode<0b11101, 0b11, 0b1011, 0b1011, 1, (outs DPR:$dst), (ins DPR:$a, fbits32:$fbits), IIC_fpCVTID, "vcvt", ".f64.u32\t$dst, $a, $fbits", []>; @@ -1166,9 +1196,9 @@ class MovFromVFP<bits<4> opc19_16, dag oops, dag iops, string opc, string asm, // APSR is the application level alias of CPSR. This FPSCR N, Z, C, V flags // to APSR. -let Defs = [CPSR], Uses = [FPSCR], Rt = 0b1111 /* apsr_nzcv */ in +let Defs = [CPSR], Uses = [FPSCR_NZCV], Rt = 0b1111 /* apsr_nzcv */ in def FMSTAT : MovFromVFP<0b0001 /* fpscr */, (outs), (ins), - "vmrs", "\tapsr_nzcv, fpscr", [(arm_fmstat)]>; + "vmrs", "\tAPSR_nzcv, fpscr", [(arm_fmstat)]>; // Application level FPSCR -> GPR let hasSideEffects = 1, Uses = [FPSCR] in @@ -1182,6 +1212,10 @@ let Uses = [FPSCR] in { "vmrs", "\t$Rt, fpexc", []>; def VMRS_FPSID : MovFromVFP<0b0000 /* fpsid */, (outs GPR:$Rt), (ins), "vmrs", "\t$Rt, fpsid", []>; + def VMRS_MVFR0 : MovFromVFP<0b0111 /* mvfr0 */, (outs GPR:$Rt), (ins), + "vmrs", "\t$Rt, mvfr0", []>; + def VMRS_MVFR1 : MovFromVFP<0b0110 /* mvfr1 */, (outs GPR:$Rt), (ins), + "vmrs", "\t$Rt, mvfr1", []>; } //===----------------------------------------------------------------------===// @@ -1304,6 +1338,13 @@ def : VFP2MnemonicAlias<"fcmps", "vcmp.f32">; def : VFP2MnemonicAlias<"fcmpd", "vcmp.f64">; def : VFP2MnemonicAlias<"fdivs", "vdiv.f32">; def : VFP2MnemonicAlias<"fdivd", "vdiv.f64">; +def : VFP2MnemonicAlias<"fmrx", "vmrs">; +def : VFP2MnemonicAlias<"fmxr", "vmsr">; + +// Be friendly and accept the old form of zero-compare +def : VFP2InstAlias<"fcmpzd${p} $val", (VCMPZD DPR:$val, pred:$p)>; +def : VFP2InstAlias<"fcmpzs${p} $val", (VCMPZS SPR:$val, pred:$p)>; + def : VFP2InstAlias<"fmstat${p}", (FMSTAT pred:$p)>; def : VFP2InstAlias<"fadds${p} $Sd, $Sn, $Sm", diff --git a/lib/Target/ARM/ARMJITInfo.cpp b/lib/Target/ARM/ARMJITInfo.cpp index afbe0e4..753e578 100644 --- a/lib/Target/ARM/ARMJITInfo.cpp +++ b/lib/Target/ARM/ARMJITInfo.cpp @@ -62,7 +62,7 @@ extern "C" { // concerned, so we can't just preserve the callee saved regs. "stmdb sp!, {r0, r1, r2, r3, lr}\n" #if (defined(__VFP_FP__) && !defined(__SOFTFP__)) - "fstmfdd sp!, {d0, d1, d2, d3, d4, d5, d6, d7}\n" + "vstmdb sp!, {d0, d1, d2, d3, d4, d5, d6, d7}\n" #endif // The LR contains the address of the stub function on entry. // pass it as the argument to the C part of the callback @@ -86,7 +86,7 @@ extern "C" { // #if (defined(__VFP_FP__) && !defined(__SOFTFP__)) // Restore VFP caller-saved registers. - "fldmfdd sp!, {d0, d1, d2, d3, d4, d5, d6, d7}\n" + "vldmia sp!, {d0, d1, d2, d3, d4, d5, d6, d7}\n" #endif // // We need to exchange the values in slots 0 and 1 so we can diff --git a/lib/Target/ARM/ARMRegisterInfo.cpp b/lib/Target/ARM/ARMRegisterInfo.cpp index 1f83762..6f3819a 100644 --- a/lib/Target/ARM/ARMRegisterInfo.cpp +++ b/lib/Target/ARM/ARMRegisterInfo.cpp @@ -11,9 +11,9 @@ // //===----------------------------------------------------------------------===// +#include "ARMRegisterInfo.h" #include "ARM.h" #include "ARMBaseInstrInfo.h" -#include "ARMRegisterInfo.h" using namespace llvm; void ARMRegisterInfo::anchor() { } diff --git a/lib/Target/ARM/ARMRegisterInfo.h b/lib/Target/ARM/ARMRegisterInfo.h index 65ed95d..8a24842 100644 --- a/lib/Target/ARM/ARMRegisterInfo.h +++ b/lib/Target/ARM/ARMRegisterInfo.h @@ -15,13 +15,12 @@ #define ARMREGISTERINFO_H #include "ARM.h" -#include "llvm/Target/TargetRegisterInfo.h" #include "ARMBaseRegisterInfo.h" +#include "llvm/Target/TargetRegisterInfo.h" namespace llvm { class ARMSubtarget; class ARMBaseInstrInfo; - class Type; struct ARMRegisterInfo : public ARMBaseRegisterInfo { virtual void anchor(); diff --git a/lib/Target/ARM/ARMRegisterInfo.td b/lib/Target/ARM/ARMRegisterInfo.td index b16a12c..1327fb8 100644 --- a/lib/Target/ARM/ARMRegisterInfo.td +++ b/lib/Target/ARM/ARMRegisterInfo.td @@ -153,14 +153,21 @@ def Q15 : ARMReg<15, "q15", [D30, D31]>; } // Current Program Status Register. -def CPSR : ARMReg<0, "cpsr">; -def APSR : ARMReg<1, "apsr">; -def SPSR : ARMReg<2, "spsr">; -def FPSCR : ARMReg<3, "fpscr">; -def ITSTATE : ARMReg<4, "itstate">; +// We model fpscr with two registers: FPSCR models the control bits and will be +// reserved. FPSCR_NZCV models the flag bits and will be unreserved. +def CPSR : ARMReg<0, "cpsr">; +def APSR : ARMReg<1, "apsr">; +def SPSR : ARMReg<2, "spsr">; +def FPSCR : ARMReg<3, "fpscr">; +def FPSCR_NZCV : ARMReg<3, "fpscr_nzcv"> { + let Aliases = [FPSCR]; +} +def ITSTATE : ARMReg<4, "itstate">; // Special Registers - only available in privileged mode. def FPSID : ARMReg<0, "fpsid">; +def MVFR1 : ARMReg<6, "mvfr1">; +def MVFR0 : ARMReg<7, "mvfr0">; def FPEXC : ARMReg<8, "fpexc">; // Register classes. @@ -304,7 +311,8 @@ def TuplesOE2D : RegisterTuples<[dsub_0, dsub_1], // Register class representing a pair of consecutive D registers. // Use the Q registers for the even-odd pairs. -def DPair : RegisterClass<"ARM", [v2i64], 128, (interleave QPR, TuplesOE2D)> { +def DPair : RegisterClass<"ARM", [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64], + 128, (interleave QPR, TuplesOE2D)> { // Allocate starting at non-VFP2 registers D16-D31 first. let AltOrders = [(rotl DPair, 16)]; let AltOrderSelect = [{ return 1; }]; diff --git a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp index 2045482..911eb13 100644 --- a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp +++ b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp @@ -1101,13 +1101,8 @@ public: return VectorList.Count == 4; } - bool isVecListTwoQ() const { - if (!isDoubleSpacedVectorList()) return false; - return VectorList.Count == 2; - } - bool isVecListDPairSpaced() const { - if (!isSingleSpacedVectorList()) return false; + if (isSingleSpacedVectorList()) return false; return (ARMMCRegisterClasses[ARM::DPairSpcRegClassID] .contains(VectorList.RegNum)); } @@ -1133,12 +1128,13 @@ public: return VectorList.Count == 1; } - bool isVecListTwoDAllLanes() const { + bool isVecListDPairAllLanes() const { if (!isSingleSpacedVectorAllLanes()) return false; - return VectorList.Count == 2; + return (ARMMCRegisterClasses[ARM::DPairRegClassID] + .contains(VectorList.RegNum)); } - bool isVecListTwoQAllLanes() const { + bool isVecListDPairSpacedAllLanes() const { if (!isDoubleSpacedVectorAllLanes()) return false; return VectorList.Count == 2; } @@ -2858,8 +2854,12 @@ parseRegisterList(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { if (!RC->contains(Reg)) return Error(RegLoc, "invalid register in register list"); // List must be monotonically increasing. - if (getARMRegisterNumbering(Reg) < getARMRegisterNumbering(OldReg)) - return Error(RegLoc, "register list not in ascending order"); + if (getARMRegisterNumbering(Reg) < getARMRegisterNumbering(OldReg)) { + if (ARMMCRegisterClasses[ARM::GPRRegClassID].contains(Reg)) + Warning(RegLoc, "register list not in ascending order"); + else + return Error(RegLoc, "register list not in ascending order"); + } if (getARMRegisterNumbering(Reg) == getARMRegisterNumbering(OldReg)) { Warning(RegLoc, "duplicated register (" + RegTok.getString() + ") in register list"); @@ -2905,6 +2905,12 @@ parseVectorLane(VectorLaneTy &LaneKind, unsigned &Index) { Parser.Lex(); // Eat the ']'. return MatchOperand_Success; } + + // There's an optional '#' token here. Normally there wouldn't be, but + // inline assemble puts one in, and it's friendly to accept that. + if (Parser.getTok().is(AsmToken::Hash)) + Parser.Lex(); // Eat the '#' + const MCExpr *LaneIndex; SMLoc Loc = Parser.getTok().getLoc(); if (getParser().ParseExpression(LaneIndex)) { @@ -2981,12 +2987,13 @@ parseVectorList(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { case NoLanes: E = Parser.getTok().getLoc(); Reg = MRI->getMatchingSuperReg(Reg, ARM::dsub_0, - &ARMMCRegisterClasses[ARM::DPairRegClassID]); - + &ARMMCRegisterClasses[ARM::DPairRegClassID]); Operands.push_back(ARMOperand::CreateVectorList(Reg, 2, false, S, E)); break; case AllLanes: E = Parser.getTok().getLoc(); + Reg = MRI->getMatchingSuperReg(Reg, ARM::dsub_0, + &ARMMCRegisterClasses[ARM::DPairRegClassID]); Operands.push_back(ARMOperand::CreateVectorListAllLanes(Reg, 2, false, S, E)); break; @@ -3152,7 +3159,7 @@ parseVectorList(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { switch (LaneKind) { case NoLanes: - // Non-lane two-register operands have been converted to the + // Two-register operands have been converted to the // composite register classes. if (Count == 2) { const MCRegisterClass *RC = (Spacing == 1) ? @@ -3165,6 +3172,14 @@ parseVectorList(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { (Spacing == 2), S, E)); break; case AllLanes: + // Two-register operands have been converted to the + // composite register classes. + if (Count == 2) { + const MCRegisterClass *RC = (Spacing == 1) ? + &ARMMCRegisterClasses[ARM::DPairRegClassID] : + &ARMMCRegisterClasses[ARM::DPairSpcRegClassID]; + FirstReg = MRI->getMatchingSuperReg(FirstReg, ARM::dsub_0, RC); + } Operands.push_back(ARMOperand::CreateVectorListAllLanes(FirstReg, Count, (Spacing == 2), S, E)); @@ -3253,7 +3268,8 @@ parseMSRMaskOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { if (isMClass()) { // See ARMv6-M 10.1.1 - unsigned FlagsVal = StringSwitch<unsigned>(Mask) + std::string Name = Mask.lower(); + unsigned FlagsVal = StringSwitch<unsigned>(Name) .Case("apsr", 0) .Case("iapsr", 1) .Case("eapsr", 2) @@ -4427,10 +4443,11 @@ bool ARMAsmParser::parseOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands, else if (Res == -1) // irrecoverable error return true; // If this is VMRS, check for the apsr_nzcv operand. - if (Mnemonic == "vmrs" && Parser.getTok().getString() == "apsr_nzcv") { + if (Mnemonic == "vmrs" && + Parser.getTok().getString().equals_lower("apsr_nzcv")) { S = Parser.getTok().getLoc(); Parser.Lex(); - Operands.push_back(ARMOperand::CreateToken("apsr_nzcv", S)); + Operands.push_back(ARMOperand::CreateToken("APSR_nzcv", S)); return false; } @@ -4598,7 +4615,7 @@ StringRef ARMAsmParser::splitMnemonic(StringRef Mnemonic, Mnemonic == "vrsqrts" || Mnemonic == "srs" || Mnemonic == "flds" || Mnemonic == "fmrs" || Mnemonic == "fsqrts" || Mnemonic == "fsubs" || Mnemonic == "fsts" || Mnemonic == "fcpys" || Mnemonic == "fdivs" || - Mnemonic == "fmuls" || Mnemonic == "fcmps" || + Mnemonic == "fmuls" || Mnemonic == "fcmps" || Mnemonic == "fcmpzs" || (Mnemonic == "movs" && isThumb()))) { Mnemonic = Mnemonic.slice(0, Mnemonic.size() - 1); CarrySetting = true; diff --git a/lib/Target/ARM/Disassembler/ARMDisassembler.cpp b/lib/Target/ARM/Disassembler/ARMDisassembler.cpp index 4101f59..ce4587b 100644 --- a/lib/Target/ARM/Disassembler/ARMDisassembler.cpp +++ b/lib/Target/ARM/Disassembler/ARMDisassembler.cpp @@ -849,7 +849,7 @@ extern "C" void LLVMInitializeARMDisassembler() { createThumbDisassembler); } -static const unsigned GPRDecoderTable[] = { +static const uint16_t GPRDecoderTable[] = { ARM::R0, ARM::R1, ARM::R2, ARM::R3, ARM::R4, ARM::R5, ARM::R6, ARM::R7, ARM::R8, ARM::R9, ARM::R10, ARM::R11, @@ -869,8 +869,14 @@ static DecodeStatus DecodeGPRRegisterClass(llvm::MCInst &Inst, unsigned RegNo, static DecodeStatus DecodeGPRnopcRegisterClass(llvm::MCInst &Inst, unsigned RegNo, uint64_t Address, const void *Decoder) { - if (RegNo == 15) return MCDisassembler::Fail; - return DecodeGPRRegisterClass(Inst, RegNo, Address, Decoder); + DecodeStatus S = MCDisassembler::Success; + + if (RegNo == 15) + S = MCDisassembler::SoftFail; + + Check(S, DecodeGPRRegisterClass(Inst, RegNo, Address, Decoder)); + + return S; } static DecodeStatus DecodetGPRRegisterClass(llvm::MCInst &Inst, unsigned RegNo, @@ -916,7 +922,7 @@ static DecodeStatus DecoderGPRRegisterClass(llvm::MCInst &Inst, unsigned RegNo, return DecodeGPRRegisterClass(Inst, RegNo, Address, Decoder); } -static const unsigned SPRDecoderTable[] = { +static const uint16_t SPRDecoderTable[] = { ARM::S0, ARM::S1, ARM::S2, ARM::S3, ARM::S4, ARM::S5, ARM::S6, ARM::S7, ARM::S8, ARM::S9, ARM::S10, ARM::S11, @@ -937,7 +943,7 @@ static DecodeStatus DecodeSPRRegisterClass(llvm::MCInst &Inst, unsigned RegNo, return MCDisassembler::Success; } -static const unsigned DPRDecoderTable[] = { +static const uint16_t DPRDecoderTable[] = { ARM::D0, ARM::D1, ARM::D2, ARM::D3, ARM::D4, ARM::D5, ARM::D6, ARM::D7, ARM::D8, ARM::D9, ARM::D10, ARM::D11, @@ -973,7 +979,7 @@ DecodeDPR_VFP2RegisterClass(llvm::MCInst &Inst, unsigned RegNo, return DecodeDPRRegisterClass(Inst, RegNo, Address, Decoder); } -static const unsigned QPRDecoderTable[] = { +static const uint16_t QPRDecoderTable[] = { ARM::Q0, ARM::Q1, ARM::Q2, ARM::Q3, ARM::Q4, ARM::Q5, ARM::Q6, ARM::Q7, ARM::Q8, ARM::Q9, ARM::Q10, ARM::Q11, @@ -992,7 +998,7 @@ static DecodeStatus DecodeQPRRegisterClass(llvm::MCInst &Inst, unsigned RegNo, return MCDisassembler::Success; } -static const unsigned DPairDecoderTable[] = { +static const uint16_t DPairDecoderTable[] = { ARM::Q0, ARM::D1_D2, ARM::Q1, ARM::D3_D4, ARM::Q2, ARM::D5_D6, ARM::Q3, ARM::D7_D8, ARM::Q4, ARM::D9_D10, ARM::Q5, ARM::D11_D12, ARM::Q6, ARM::D13_D14, ARM::Q7, ARM::D15_D16, ARM::Q8, ARM::D17_D18, @@ -1011,7 +1017,7 @@ static DecodeStatus DecodeDPairRegisterClass(llvm::MCInst &Inst, unsigned RegNo, return MCDisassembler::Success; } -static const unsigned DPairSpacedDecoderTable[] = { +static const uint16_t DPairSpacedDecoderTable[] = { ARM::D0_D2, ARM::D1_D3, ARM::D2_D4, ARM::D3_D5, ARM::D4_D6, ARM::D5_D7, ARM::D6_D8, ARM::D7_D9, ARM::D8_D10, ARM::D9_D11, ARM::D10_D12, ARM::D11_D13, @@ -2001,27 +2007,15 @@ static DecodeStatus DecodeVLDInstruction(llvm::MCInst &Inst, unsigned Insn, // First output register switch (Inst.getOpcode()) { - case ARM::VLD1q16: - case ARM::VLD1q32: - case ARM::VLD1q64: - case ARM::VLD1q8: - case ARM::VLD1q16wb_fixed: - case ARM::VLD1q16wb_register: - case ARM::VLD1q32wb_fixed: - case ARM::VLD1q32wb_register: - case ARM::VLD1q64wb_fixed: - case ARM::VLD1q64wb_register: - case ARM::VLD1q8wb_fixed: - case ARM::VLD1q8wb_register: - case ARM::VLD2d16: - case ARM::VLD2d32: - case ARM::VLD2d8: - case ARM::VLD2d16wb_fixed: - case ARM::VLD2d16wb_register: - case ARM::VLD2d32wb_fixed: - case ARM::VLD2d32wb_register: - case ARM::VLD2d8wb_fixed: - case ARM::VLD2d8wb_register: + case ARM::VLD1q16: case ARM::VLD1q32: case ARM::VLD1q64: case ARM::VLD1q8: + case ARM::VLD1q16wb_fixed: case ARM::VLD1q16wb_register: + case ARM::VLD1q32wb_fixed: case ARM::VLD1q32wb_register: + case ARM::VLD1q64wb_fixed: case ARM::VLD1q64wb_register: + case ARM::VLD1q8wb_fixed: case ARM::VLD1q8wb_register: + case ARM::VLD2d16: case ARM::VLD2d32: case ARM::VLD2d8: + case ARM::VLD2d16wb_fixed: case ARM::VLD2d16wb_register: + case ARM::VLD2d32wb_fixed: case ARM::VLD2d32wb_register: + case ARM::VLD2d8wb_fixed: case ARM::VLD2d8wb_register: if (!Check(S, DecodeDPairRegisterClass(Inst, Rd, Address, Decoder))) return MCDisassembler::Fail; break; @@ -2325,6 +2319,8 @@ static DecodeStatus DecodeVSTInstruction(llvm::MCInst &Inst, unsigned Insn, case ARM::VST2b8wb_register: case ARM::VST2b16wb_register: case ARM::VST2b32wb_register: + Inst.addOperand(MCOperand::CreateImm(0)); + break; case ARM::VST3d8_UPD: case ARM::VST3d16_UPD: case ARM::VST3d32_UPD: @@ -2366,6 +2362,23 @@ static DecodeStatus DecodeVSTInstruction(llvm::MCInst &Inst, unsigned Insn, case ARM::VST1q16wb_fixed: case ARM::VST1q32wb_fixed: case ARM::VST1q64wb_fixed: + case ARM::VST1d8Twb_fixed: + case ARM::VST1d16Twb_fixed: + case ARM::VST1d32Twb_fixed: + case ARM::VST1d64Twb_fixed: + case ARM::VST1d8Qwb_fixed: + case ARM::VST1d16Qwb_fixed: + case ARM::VST1d32Qwb_fixed: + case ARM::VST1d64Qwb_fixed: + case ARM::VST2d8wb_fixed: + case ARM::VST2d16wb_fixed: + case ARM::VST2d32wb_fixed: + case ARM::VST2q8wb_fixed: + case ARM::VST2q16wb_fixed: + case ARM::VST2q32wb_fixed: + case ARM::VST2b8wb_fixed: + case ARM::VST2b16wb_fixed: + case ARM::VST2b32wb_fixed: break; } @@ -2525,8 +2538,19 @@ static DecodeStatus DecodeVLD1DupInstruction(llvm::MCInst &Inst, unsigned Insn, align *= (1 << size); - if (!Check(S, DecodeDPRRegisterClass(Inst, Rd, Address, Decoder))) - return MCDisassembler::Fail; + switch (Inst.getOpcode()) { + case ARM::VLD1DUPq16: case ARM::VLD1DUPq32: case ARM::VLD1DUPq8: + case ARM::VLD1DUPq16wb_fixed: case ARM::VLD1DUPq16wb_register: + case ARM::VLD1DUPq32wb_fixed: case ARM::VLD1DUPq32wb_register: + case ARM::VLD1DUPq8wb_fixed: case ARM::VLD1DUPq8wb_register: + if (!Check(S, DecodeDPairRegisterClass(Inst, Rd, Address, Decoder))) + return MCDisassembler::Fail; + break; + default: + if (!Check(S, DecodeDPRRegisterClass(Inst, Rd, Address, Decoder))) + return MCDisassembler::Fail; + break; + } if (Rm != 0xF) { if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder))) return MCDisassembler::Fail; @@ -2556,18 +2580,33 @@ static DecodeStatus DecodeVLD2DupInstruction(llvm::MCInst &Inst, unsigned Insn, unsigned Rm = fieldFromInstruction32(Insn, 0, 4); unsigned align = fieldFromInstruction32(Insn, 4, 1); unsigned size = 1 << fieldFromInstruction32(Insn, 6, 2); - unsigned inc = fieldFromInstruction32(Insn, 5, 1) + 1; + unsigned pred = fieldFromInstruction32(Insn, 22, 4); align *= 2*size; - if (!Check(S, DecodeDPRRegisterClass(Inst, Rd, Address, Decoder))) - return MCDisassembler::Fail; - if (!Check(S, DecodeDPRRegisterClass(Inst, (Rd+inc)%32, Address, Decoder))) - return MCDisassembler::Fail; - if (Rm != 0xF) { - if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder))) + switch (Inst.getOpcode()) { + case ARM::VLD2DUPd16: case ARM::VLD2DUPd32: case ARM::VLD2DUPd8: + case ARM::VLD2DUPd16wb_fixed: case ARM::VLD2DUPd16wb_register: + case ARM::VLD2DUPd32wb_fixed: case ARM::VLD2DUPd32wb_register: + case ARM::VLD2DUPd8wb_fixed: case ARM::VLD2DUPd8wb_register: + if (!Check(S, DecodeDPairRegisterClass(Inst, Rd, Address, Decoder))) + return MCDisassembler::Fail; + break; + case ARM::VLD2DUPd16x2: case ARM::VLD2DUPd32x2: case ARM::VLD2DUPd8x2: + case ARM::VLD2DUPd16x2wb_fixed: case ARM::VLD2DUPd16x2wb_register: + case ARM::VLD2DUPd32x2wb_fixed: case ARM::VLD2DUPd32x2wb_register: + case ARM::VLD2DUPd8x2wb_fixed: case ARM::VLD2DUPd8x2wb_register: + if (!Check(S, DecodeDPairSpacedRegisterClass(Inst, Rd, Address, Decoder))) + return MCDisassembler::Fail; + break; + default: + if (!Check(S, DecodeDPRRegisterClass(Inst, Rd, Address, Decoder))) return MCDisassembler::Fail; + break; } + if (Rm != 0xF) + Inst.addOperand(MCOperand::CreateImm(0)); + if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder))) return MCDisassembler::Fail; Inst.addOperand(MCOperand::CreateImm(align)); @@ -2579,6 +2618,9 @@ static DecodeStatus DecodeVLD2DupInstruction(llvm::MCInst &Inst, unsigned Insn, return MCDisassembler::Fail; } + if (!Check(S, DecodePredicateOperand(Inst, pred, Address, Decoder))) + return MCDisassembler::Fail; + return S; } diff --git a/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp b/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp index bae4e78..2b994df 100644 --- a/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp +++ b/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp @@ -1026,15 +1026,6 @@ void ARMInstPrinter::printVectorListOne(const MCInst *MI, unsigned OpNum, } void ARMInstPrinter::printVectorListTwo(const MCInst *MI, unsigned OpNum, - raw_ostream &O) { - // Normally, it's not safe to use register enum values directly with - // addition to get the next register, but for VFP registers, the - // sort order is guaranteed because they're all of the form D<n>. - O << "{" << getRegisterName(MI->getOperand(OpNum).getReg()) << ", " - << getRegisterName(MI->getOperand(OpNum).getReg() + 1) << "}"; -} - -void ARMInstPrinter::printVectorListDPair(const MCInst *MI, unsigned OpNum, raw_ostream &O) { unsigned Reg = MI->getOperand(OpNum).getReg(); unsigned Reg0 = MRI.getSubReg(Reg, ARM::dsub_0); @@ -1042,9 +1033,9 @@ void ARMInstPrinter::printVectorListDPair(const MCInst *MI, unsigned OpNum, O << "{" << getRegisterName(Reg0) << ", " << getRegisterName(Reg1) << "}"; } -void ARMInstPrinter::printVectorListDPairSpaced(const MCInst *MI, - unsigned OpNum, - raw_ostream &O) { +void ARMInstPrinter::printVectorListTwoSpaced(const MCInst *MI, + unsigned OpNum, + raw_ostream &O) { unsigned Reg = MI->getOperand(OpNum).getReg(); unsigned Reg0 = MRI.getSubReg(Reg, ARM::dsub_0); unsigned Reg1 = MRI.getSubReg(Reg, ARM::dsub_2); @@ -1081,11 +1072,10 @@ void ARMInstPrinter::printVectorListOneAllLanes(const MCInst *MI, void ARMInstPrinter::printVectorListTwoAllLanes(const MCInst *MI, unsigned OpNum, raw_ostream &O) { - // Normally, it's not safe to use register enum values directly with - // addition to get the next register, but for VFP registers, the - // sort order is guaranteed because they're all of the form D<n>. - O << "{" << getRegisterName(MI->getOperand(OpNum).getReg()) << "[], " - << getRegisterName(MI->getOperand(OpNum).getReg() + 1) << "[]}"; + unsigned Reg = MI->getOperand(OpNum).getReg(); + unsigned Reg0 = MRI.getSubReg(Reg, ARM::dsub_0); + unsigned Reg1 = MRI.getSubReg(Reg, ARM::dsub_1); + O << "{" << getRegisterName(Reg0) << "[], " << getRegisterName(Reg1) << "[]}"; } void ARMInstPrinter::printVectorListThreeAllLanes(const MCInst *MI, @@ -1111,23 +1101,13 @@ void ARMInstPrinter::printVectorListFourAllLanes(const MCInst *MI, << getRegisterName(MI->getOperand(OpNum).getReg() + 3) << "[]}"; } -void ARMInstPrinter::printVectorListTwoSpaced(const MCInst *MI, unsigned OpNum, - raw_ostream &O) { - // Normally, it's not safe to use register enum values directly with - // addition to get the next register, but for VFP registers, the - // sort order is guaranteed because they're all of the form D<n>. - O << "{" << getRegisterName(MI->getOperand(OpNum).getReg()) << ", " - << getRegisterName(MI->getOperand(OpNum).getReg() + 2) << "}"; -} - void ARMInstPrinter::printVectorListTwoSpacedAllLanes(const MCInst *MI, unsigned OpNum, raw_ostream &O) { - // Normally, it's not safe to use register enum values directly with - // addition to get the next register, but for VFP registers, the - // sort order is guaranteed because they're all of the form D<n>. - O << "{" << getRegisterName(MI->getOperand(OpNum).getReg()) << "[], " - << getRegisterName(MI->getOperand(OpNum).getReg() + 2) << "[]}"; + unsigned Reg = MI->getOperand(OpNum).getReg(); + unsigned Reg0 = MRI.getSubReg(Reg, ARM::dsub_0); + unsigned Reg1 = MRI.getSubReg(Reg, ARM::dsub_2); + O << "{" << getRegisterName(Reg0) << "[], " << getRegisterName(Reg1) << "[]}"; } void ARMInstPrinter::printVectorListThreeSpacedAllLanes(const MCInst *MI, diff --git a/lib/Target/ARM/InstPrinter/ARMInstPrinter.h b/lib/Target/ARM/InstPrinter/ARMInstPrinter.h index 1037161..e9cd407 100644 --- a/lib/Target/ARM/InstPrinter/ARMInstPrinter.h +++ b/lib/Target/ARM/InstPrinter/ARMInstPrinter.h @@ -134,9 +134,8 @@ public: void printVectorIndex(const MCInst *MI, unsigned OpNum, raw_ostream &O); void printVectorListOne(const MCInst *MI, unsigned OpNum, raw_ostream &O); void printVectorListTwo(const MCInst *MI, unsigned OpNum, raw_ostream &O); - void printVectorListDPair(const MCInst *MI, unsigned OpNum, raw_ostream &O); - void printVectorListDPairSpaced(const MCInst *MI, unsigned OpNum, - raw_ostream &O); + void printVectorListTwoSpaced(const MCInst *MI, unsigned OpNum, + raw_ostream &O); void printVectorListThree(const MCInst *MI, unsigned OpNum, raw_ostream &O); void printVectorListFour(const MCInst *MI, unsigned OpNum, raw_ostream &O); void printVectorListOneAllLanes(const MCInst *MI, unsigned OpNum, @@ -147,8 +146,6 @@ public: raw_ostream &O); void printVectorListFourAllLanes(const MCInst *MI, unsigned OpNum, raw_ostream &O); - void printVectorListTwoSpaced(const MCInst *MI, unsigned OpNum, - raw_ostream &O); void printVectorListTwoSpacedAllLanes(const MCInst *MI, unsigned OpNum, raw_ostream &O); void printVectorListThreeSpacedAllLanes(const MCInst *MI, unsigned OpNum, diff --git a/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp b/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp index d3a3d3a..25849ee 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp +++ b/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp @@ -167,6 +167,7 @@ static unsigned getRelaxedOpcode(unsigned Op) { case ARM::tBcc: return ARM::t2Bcc; case ARM::tLDRpciASM: return ARM::t2LDRpci; case ARM::tADR: return ARM::t2ADR; + case ARM::tB: return ARM::t2B; } } @@ -181,6 +182,16 @@ bool ARMAsmBackend::fixupNeedsRelaxation(const MCFixup &Fixup, const MCInstFragment *DF, const MCAsmLayout &Layout) const { switch ((unsigned)Fixup.getKind()) { + case ARM::fixup_arm_thumb_br: { + // Relaxing tB to t2B. tB has a signed 12-bit displacement with the + // low bit being an implied zero. There's an implied +4 offset for the + // branch, so we adjust the other way here to determine what's + // encodable. + // + // Relax if the value is too big for a (signed) i8. + int64_t Offset = int64_t(Value) - 4; + return Offset > 2046 || Offset < -2048; + } case ARM::fixup_arm_thumb_bcc: { // Relaxing tBcc to t2Bcc. tBcc has a signed 9-bit displacement with the // low bit being an implied zero. There's an implied +4 offset for the diff --git a/lib/Target/ARM/MCTargetDesc/ARMBaseInfo.h b/lib/Target/ARM/MCTargetDesc/ARMBaseInfo.h index 06eb4e5..ae11be8 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMBaseInfo.h +++ b/lib/Target/ARM/MCTargetDesc/ARMBaseInfo.h @@ -187,21 +187,37 @@ inline static unsigned getARMRegisterNumbering(unsigned Reg) { case S31: case D31: return 31; // Composite registers use the regnum of the first register in the list. - case D1_D2: return 1; - case D3_D5: return 3; - case D5_D7: return 5; - case D7_D9: return 7; - case D9_D10: return 9; - case D11_D12: return 11; - case D13_D14: return 13; - case D15_D16: return 15; - case D17_D18: return 17; - case D19_D20: return 19; - case D21_D22: return 21; - case D23_D24: return 23; - case D25_D26: return 25; - case D27_D28: return 27; - case D29_D30: return 29; + /* Q0 */ case D0_D2: return 0; + case D1_D2: case D1_D3: return 1; + /* Q1 */ case D2_D4: return 2; + case D3_D4: case D3_D5: return 3; + /* Q2 */ case D4_D6: return 4; + case D5_D6: case D5_D7: return 5; + /* Q3 */ case D6_D8: return 6; + case D7_D8: case D7_D9: return 7; + /* Q4 */ case D8_D10: return 8; + case D9_D10: case D9_D11: return 9; + /* Q5 */ case D10_D12: return 10; + case D11_D12: case D11_D13: return 11; + /* Q6 */ case D12_D14: return 12; + case D13_D14: case D13_D15: return 13; + /* Q7 */ case D14_D16: return 14; + case D15_D16: case D15_D17: return 15; + /* Q8 */ case D16_D18: return 16; + case D17_D18: case D17_D19: return 17; + /* Q9 */ case D18_D20: return 18; + case D19_D20: case D19_D21: return 19; + /* Q10 */ case D20_D22: return 20; + case D21_D22: case D21_D23: return 21; + /* Q11 */ case D22_D24: return 22; + case D23_D24: case D23_D25: return 23; + /* Q12 */ case D24_D26: return 24; + case D25_D26: case D25_D27: return 25; + /* Q13 */ case D26_D28: return 26; + case D27_D28: case D27_D29: return 27; + /* Q14 */ case D28_D30: return 28; + case D29_D30: case D29_D31: return 29; + /* Q15 */ } } diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp b/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp index 1606b92..ed27f9f 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp +++ b/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp @@ -151,13 +151,13 @@ static MCStreamer *createMCStreamer(const Target &T, StringRef TT, Triple TheTriple(TT); if (TheTriple.isOSDarwin()) - return createMachOStreamer(Ctx, MAB, OS, Emitter, RelaxAll); + return createMachOStreamer(Ctx, MAB, OS, Emitter, false); if (TheTriple.isOSWindows()) { llvm_unreachable("ARM does not support Windows COFF format"); } - return createELFStreamer(Ctx, MAB, OS, Emitter, RelaxAll, NoExecStack); + return createELFStreamer(Ctx, MAB, OS, Emitter, false, NoExecStack); } static MCInstPrinter *createARMMCInstPrinter(const Target &T, diff --git a/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp b/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp index faf73ac..9d3da14 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp +++ b/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp @@ -34,12 +34,12 @@ class ARMMachObjectWriter : public MCMachObjectTargetWriter { MCValue Target, unsigned Log2Size, uint64_t &FixedValue); - void RecordARMMovwMovtRelocation(MachObjectWriter *Writer, - const MCAssembler &Asm, - const MCAsmLayout &Layout, - const MCFragment *Fragment, - const MCFixup &Fixup, MCValue Target, - uint64_t &FixedValue); + void RecordARMScatteredHalfRelocation(MachObjectWriter *Writer, + const MCAssembler &Asm, + const MCAsmLayout &Layout, + const MCFragment *Fragment, + const MCFixup &Fixup, MCValue Target, + uint64_t &FixedValue); public: ARMMachObjectWriter(bool Is64Bit, uint32_t CPUType, @@ -102,34 +102,47 @@ static bool getARMFixupKindMachOInfo(unsigned Kind, unsigned &RelocType, Log2Size = llvm::Log2_32(4); return true; + // For movw/movt r_type relocations they always have a pair following them and + // the r_length bits are used differently. The encoding of the r_length is as + // follows: + // low bit of r_length: + // 0 - :lower16: for movw instructions + // 1 - :upper16: for movt instructions + // high bit of r_length: + // 0 - arm instructions + // 1 - thumb instructions case ARM::fixup_arm_movt_hi16: case ARM::fixup_arm_movt_hi16_pcrel: + RelocType = unsigned(macho::RIT_ARM_Half); + Log2Size = 1; + return true; case ARM::fixup_t2_movt_hi16: case ARM::fixup_t2_movt_hi16_pcrel: - RelocType = unsigned(macho::RIT_ARM_HalfDifference); - // Report as 'long', even though that is not quite accurate. - Log2Size = llvm::Log2_32(4); + RelocType = unsigned(macho::RIT_ARM_Half); + Log2Size = 3; return true; case ARM::fixup_arm_movw_lo16: case ARM::fixup_arm_movw_lo16_pcrel: + RelocType = unsigned(macho::RIT_ARM_Half); + Log2Size = 0; + return true; case ARM::fixup_t2_movw_lo16: case ARM::fixup_t2_movw_lo16_pcrel: RelocType = unsigned(macho::RIT_ARM_Half); - // Report as 'long', even though that is not quite accurate. - Log2Size = llvm::Log2_32(4); + Log2Size = 2; return true; } } void ARMMachObjectWriter:: -RecordARMMovwMovtRelocation(MachObjectWriter *Writer, - const MCAssembler &Asm, - const MCAsmLayout &Layout, - const MCFragment *Fragment, - const MCFixup &Fixup, - MCValue Target, - uint64_t &FixedValue) { +RecordARMScatteredHalfRelocation(MachObjectWriter *Writer, + const MCAssembler &Asm, + const MCAsmLayout &Layout, + const MCFragment *Fragment, + const MCFixup &Fixup, + MCValue Target, + uint64_t &FixedValue) { uint32_t FixupOffset = Layout.getFragmentOffset(Fragment)+Fixup.getOffset(); unsigned IsPCRel = Writer->isFixupKindPCRel(Asm, Fixup.getKind()); unsigned Type = macho::RIT_ARM_Half; @@ -313,10 +326,9 @@ void ARMMachObjectWriter::RecordRelocation(MachObjectWriter *Writer, // scattered relocation entry. Differences always require scattered // relocations. if (Target.getSymB()) { - if (RelocType == macho::RIT_ARM_Half || - RelocType == macho::RIT_ARM_HalfDifference) - return RecordARMMovwMovtRelocation(Writer, Asm, Layout, Fragment, Fixup, - Target, FixedValue); + if (RelocType == macho::RIT_ARM_Half) + return RecordARMScatteredHalfRelocation(Writer, Asm, Layout, Fragment, + Fixup, Target, FixedValue); return RecordARMScatteredRelocation(Writer, Asm, Layout, Fragment, Fixup, Target, Log2Size, FixedValue); } @@ -391,6 +403,30 @@ void ARMMachObjectWriter::RecordRelocation(MachObjectWriter *Writer, (Log2Size << 25) | (IsExtern << 27) | (Type << 28)); + + // Even when it's not a scattered relocation, movw/movt always uses + // a PAIR relocation. + if (Type == macho::RIT_ARM_Half) { + // The other-half value only gets populated for the movt relocation. + uint32_t Value = 0;; + switch ((unsigned)Fixup.getKind()) { + default: break; + case ARM::fixup_arm_movt_hi16: + case ARM::fixup_arm_movt_hi16_pcrel: + case ARM::fixup_t2_movt_hi16: + case ARM::fixup_t2_movt_hi16_pcrel: + Value = FixedValue; + break; + } + macho::RelocationEntry MREPair; + MREPair.Word0 = Value; + MREPair.Word1 = ((0xffffff) | + (Log2Size << 25) | + (macho::RIT_Pair << 28)); + + Writer->addRelocation(Fragment->getParent(), MREPair); + } + Writer->addRelocation(Fragment->getParent(), MRE); } diff --git a/lib/Target/ARM/Thumb1FrameLowering.cpp b/lib/Target/ARM/Thumb1FrameLowering.cpp index a89a663..edd73c2 100644 --- a/lib/Target/ARM/Thumb1FrameLowering.cpp +++ b/lib/Target/ARM/Thumb1FrameLowering.cpp @@ -12,7 +12,6 @@ //===----------------------------------------------------------------------===// #include "Thumb1FrameLowering.h" -#include "ARMBaseInstrInfo.h" #include "ARMMachineFunctionInfo.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" diff --git a/lib/Target/ARM/Thumb1InstrInfo.cpp b/lib/Target/ARM/Thumb1InstrInfo.cpp index adaccdd..8cf7cac 100644 --- a/lib/Target/ARM/Thumb1InstrInfo.cpp +++ b/lib/Target/ARM/Thumb1InstrInfo.cpp @@ -13,7 +13,6 @@ #include "Thumb1InstrInfo.h" #include "ARM.h" -#include "ARMMachineFunctionInfo.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" diff --git a/lib/Target/ARM/Thumb1InstrInfo.h b/lib/Target/ARM/Thumb1InstrInfo.h index 4d97626..27fce9b 100644 --- a/lib/Target/ARM/Thumb1InstrInfo.h +++ b/lib/Target/ARM/Thumb1InstrInfo.h @@ -14,10 +14,10 @@ #ifndef THUMB1INSTRUCTIONINFO_H #define THUMB1INSTRUCTIONINFO_H -#include "llvm/Target/TargetInstrInfo.h" #include "ARM.h" -#include "ARMInstrInfo.h" +#include "ARMBaseInstrInfo.h" #include "Thumb1RegisterInfo.h" +#include "llvm/Target/TargetInstrInfo.h" namespace llvm { class ARMSubtarget; diff --git a/lib/Target/ARM/Thumb1RegisterInfo.cpp b/lib/Target/ARM/Thumb1RegisterInfo.cpp index 6b8bf0e..ef77bbd 100644 --- a/lib/Target/ARM/Thumb1RegisterInfo.cpp +++ b/lib/Target/ARM/Thumb1RegisterInfo.cpp @@ -12,12 +12,11 @@ // //===----------------------------------------------------------------------===// +#include "Thumb1RegisterInfo.h" #include "ARM.h" #include "ARMBaseInstrInfo.h" #include "ARMMachineFunctionInfo.h" #include "ARMSubtarget.h" -#include "Thumb1InstrInfo.h" -#include "Thumb1RegisterInfo.h" #include "MCTargetDesc/ARMAddressingModes.h" #include "llvm/Constants.h" #include "llvm/DerivedTypes.h" diff --git a/lib/Target/ARM/Thumb1RegisterInfo.h b/lib/Target/ARM/Thumb1RegisterInfo.h index 9060e59..6971842 100644 --- a/lib/Target/ARM/Thumb1RegisterInfo.h +++ b/lib/Target/ARM/Thumb1RegisterInfo.h @@ -16,13 +16,12 @@ #define THUMB1REGISTERINFO_H #include "ARM.h" -#include "ARMRegisterInfo.h" +#include "ARMBaseRegisterInfo.h" #include "llvm/Target/TargetRegisterInfo.h" namespace llvm { class ARMSubtarget; class ARMBaseInstrInfo; - class Type; struct Thumb1RegisterInfo : public ARMBaseRegisterInfo { public: diff --git a/lib/Target/ARM/Thumb2InstrInfo.cpp b/lib/Target/ARM/Thumb2InstrInfo.cpp index 6cb182a..2fe4b85 100644 --- a/lib/Target/ARM/Thumb2InstrInfo.cpp +++ b/lib/Target/ARM/Thumb2InstrInfo.cpp @@ -15,7 +15,6 @@ #include "ARM.h" #include "ARMConstantPoolValue.h" #include "ARMMachineFunctionInfo.h" -#include "Thumb2InstrInfo.h" #include "MCTargetDesc/ARMAddressingModes.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineInstrBuilder.h" diff --git a/lib/Target/ARM/Thumb2InstrInfo.h b/lib/Target/ARM/Thumb2InstrInfo.h index a754649..1ae2ef1 100644 --- a/lib/Target/ARM/Thumb2InstrInfo.h +++ b/lib/Target/ARM/Thumb2InstrInfo.h @@ -14,10 +14,10 @@ #ifndef THUMB2INSTRUCTIONINFO_H #define THUMB2INSTRUCTIONINFO_H -#include "llvm/Target/TargetInstrInfo.h" #include "ARM.h" #include "ARMInstrInfo.h" #include "Thumb2RegisterInfo.h" +#include "llvm/Target/TargetInstrInfo.h" namespace llvm { class ARMSubtarget; diff --git a/lib/Target/ARM/Thumb2RegisterInfo.cpp b/lib/Target/ARM/Thumb2RegisterInfo.cpp index 6d210fe..29a87d0 100644 --- a/lib/Target/ARM/Thumb2RegisterInfo.cpp +++ b/lib/Target/ARM/Thumb2RegisterInfo.cpp @@ -12,10 +12,10 @@ // //===----------------------------------------------------------------------===// +#include "Thumb2RegisterInfo.h" #include "ARM.h" +#include "ARMBaseInstrInfo.h" #include "ARMSubtarget.h" -#include "Thumb2InstrInfo.h" -#include "Thumb2RegisterInfo.h" #include "llvm/Constants.h" #include "llvm/DerivedTypes.h" #include "llvm/Function.h" diff --git a/lib/Target/ARM/Thumb2RegisterInfo.h b/lib/Target/ARM/Thumb2RegisterInfo.h index 824378a..6b397e8 100644 --- a/lib/Target/ARM/Thumb2RegisterInfo.h +++ b/lib/Target/ARM/Thumb2RegisterInfo.h @@ -16,13 +16,12 @@ #define THUMB2REGISTERINFO_H #include "ARM.h" -#include "ARMRegisterInfo.h" +#include "ARMBaseRegisterInfo.h" #include "llvm/Target/TargetRegisterInfo.h" namespace llvm { class ARMSubtarget; class ARMBaseInstrInfo; - class Type; struct Thumb2RegisterInfo : public ARMBaseRegisterInfo { public: diff --git a/lib/Target/ARM/Thumb2SizeReduction.cpp b/lib/Target/ARM/Thumb2SizeReduction.cpp index 5ee5f42..fb9d93b 100644 --- a/lib/Target/ARM/Thumb2SizeReduction.cpp +++ b/lib/Target/ARM/Thumb2SizeReduction.cpp @@ -39,9 +39,9 @@ namespace { /// ReduceTable - A static table with information on mapping from wide /// opcodes to narrow struct ReduceEntry { - unsigned WideOpc; // Wide opcode - unsigned NarrowOpc1; // Narrow opcode to transform to - unsigned NarrowOpc2; // Narrow opcode when it's two-address + uint16_t WideOpc; // Wide opcode + uint16_t NarrowOpc1; // Narrow opcode to transform to + uint16_t NarrowOpc2; // Narrow opcode when it's two-address uint8_t Imm1Limit; // Limit of immediate field (bits) uint8_t Imm2Limit; // Limit of immediate field when it's two-address unsigned LowRegs1 : 1; // Only possible if low-registers are used @@ -189,7 +189,7 @@ Thumb2SizeReduce::Thumb2SizeReduce() : MachineFunctionPass(ID) { } static bool HasImplicitCPSRDef(const MCInstrDesc &MCID) { - for (const unsigned *Regs = MCID.ImplicitDefs; *Regs; ++Regs) + for (const uint16_t *Regs = MCID.getImplicitDefs(); *Regs; ++Regs) if (*Regs == ARM::CPSR) return true; return false; diff --git a/lib/Target/CBackend/CBackend.cpp b/lib/Target/CBackend/CBackend.cpp index 80973b7..b6b209e 100644 --- a/lib/Target/CBackend/CBackend.cpp +++ b/lib/Target/CBackend/CBackend.cpp @@ -2392,17 +2392,17 @@ void CWriter::visitSwitchInst(SwitchInst &SI) { printBranchToBlock(SI.getParent(), SI.getDefaultDest(), 2); Out << ";\n"; - unsigned NumCases = SI.getNumCases(); // Skip the first item since that's the default case. - for (unsigned i = 0; i < NumCases; ++i) { - ConstantInt* CaseVal = SI.getCaseValue(i); - BasicBlock* Succ = SI.getCaseSuccessor(i); + for (SwitchInst::CaseIt i = SI.case_begin(), e = SI.case_end(); i != e; ++i) { + ConstantInt* CaseVal = i.getCaseValue(); + BasicBlock* Succ = i.getCaseSuccessor(); Out << " case "; writeOperand(CaseVal); Out << ":\n"; printPHICopiesForSuccessor (SI.getParent(), Succ, 2); printBranchToBlock(SI.getParent(), Succ, 2); - if (Function::iterator(Succ) == llvm::next(Function::iterator(SI.getParent()))) + if (Function::iterator(Succ) == + llvm::next(Function::iterator(SI.getParent()))) Out << " break;\n"; } diff --git a/lib/Target/CellSPU/SPUFrameLowering.cpp b/lib/Target/CellSPU/SPUFrameLowering.cpp index 916f9ba..fac806e 100644 --- a/lib/Target/CellSPU/SPUFrameLowering.cpp +++ b/lib/Target/CellSPU/SPUFrameLowering.cpp @@ -11,8 +11,8 @@ // //===----------------------------------------------------------------------===// -#include "SPU.h" #include "SPUFrameLowering.h" +#include "SPU.h" #include "SPUInstrBuilder.h" #include "SPUInstrInfo.h" #include "llvm/Function.h" diff --git a/lib/Target/CellSPU/SPUISelLowering.cpp b/lib/Target/CellSPU/SPUISelLowering.cpp index 3d2b32d..55b3f72 100644 --- a/lib/Target/CellSPU/SPUISelLowering.cpp +++ b/lib/Target/CellSPU/SPUISelLowering.cpp @@ -31,14 +31,10 @@ #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" -#include <map> using namespace llvm; -// Used in getTargetNodeName() below namespace { - std::map<unsigned, const char *> node_names; - // Byte offset of the preferred slot (counted from the MSB) int prefslotOffset(EVT VT) { int retval=0; @@ -481,40 +477,34 @@ SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM) setSchedulingPreference(Sched::RegPressure); } -const char * -SPUTargetLowering::getTargetNodeName(unsigned Opcode) const -{ - if (node_names.empty()) { - node_names[(unsigned) SPUISD::RET_FLAG] = "SPUISD::RET_FLAG"; - node_names[(unsigned) SPUISD::Hi] = "SPUISD::Hi"; - node_names[(unsigned) SPUISD::Lo] = "SPUISD::Lo"; - node_names[(unsigned) SPUISD::PCRelAddr] = "SPUISD::PCRelAddr"; - node_names[(unsigned) SPUISD::AFormAddr] = "SPUISD::AFormAddr"; - node_names[(unsigned) SPUISD::IndirectAddr] = "SPUISD::IndirectAddr"; - node_names[(unsigned) SPUISD::LDRESULT] = "SPUISD::LDRESULT"; - node_names[(unsigned) SPUISD::CALL] = "SPUISD::CALL"; - node_names[(unsigned) SPUISD::SHUFB] = "SPUISD::SHUFB"; - node_names[(unsigned) SPUISD::SHUFFLE_MASK] = "SPUISD::SHUFFLE_MASK"; - node_names[(unsigned) SPUISD::CNTB] = "SPUISD::CNTB"; - node_names[(unsigned) SPUISD::PREFSLOT2VEC] = "SPUISD::PREFSLOT2VEC"; - node_names[(unsigned) SPUISD::VEC2PREFSLOT] = "SPUISD::VEC2PREFSLOT"; - node_names[(unsigned) SPUISD::SHL_BITS] = "SPUISD::SHL_BITS"; - node_names[(unsigned) SPUISD::SHL_BYTES] = "SPUISD::SHL_BYTES"; - node_names[(unsigned) SPUISD::VEC_ROTL] = "SPUISD::VEC_ROTL"; - node_names[(unsigned) SPUISD::VEC_ROTR] = "SPUISD::VEC_ROTR"; - node_names[(unsigned) SPUISD::ROTBYTES_LEFT] = "SPUISD::ROTBYTES_LEFT"; - node_names[(unsigned) SPUISD::ROTBYTES_LEFT_BITS] = - "SPUISD::ROTBYTES_LEFT_BITS"; - node_names[(unsigned) SPUISD::SELECT_MASK] = "SPUISD::SELECT_MASK"; - node_names[(unsigned) SPUISD::SELB] = "SPUISD::SELB"; - node_names[(unsigned) SPUISD::ADD64_MARKER] = "SPUISD::ADD64_MARKER"; - node_names[(unsigned) SPUISD::SUB64_MARKER] = "SPUISD::SUB64_MARKER"; - node_names[(unsigned) SPUISD::MUL64_MARKER] = "SPUISD::MUL64_MARKER"; - } - - std::map<unsigned, const char *>::iterator i = node_names.find(Opcode); - - return ((i != node_names.end()) ? i->second : 0); +const char *SPUTargetLowering::getTargetNodeName(unsigned Opcode) const { + switch (Opcode) { + default: return 0; + case SPUISD::RET_FLAG: return "SPUISD::RET_FLAG"; + case SPUISD::Hi: return "SPUISD::Hi"; + case SPUISD::Lo: return "SPUISD::Lo"; + case SPUISD::PCRelAddr: return "SPUISD::PCRelAddr"; + case SPUISD::AFormAddr: return "SPUISD::AFormAddr"; + case SPUISD::IndirectAddr: return "SPUISD::IndirectAddr"; + case SPUISD::LDRESULT: return "SPUISD::LDRESULT"; + case SPUISD::CALL: return "SPUISD::CALL"; + case SPUISD::SHUFB: return "SPUISD::SHUFB"; + case SPUISD::SHUFFLE_MASK: return "SPUISD::SHUFFLE_MASK"; + case SPUISD::CNTB: return "SPUISD::CNTB"; + case SPUISD::PREFSLOT2VEC: return "SPUISD::PREFSLOT2VEC"; + case SPUISD::VEC2PREFSLOT: return "SPUISD::VEC2PREFSLOT"; + case SPUISD::SHL_BITS: return "SPUISD::SHL_BITS"; + case SPUISD::SHL_BYTES: return "SPUISD::SHL_BYTES"; + case SPUISD::VEC_ROTL: return "SPUISD::VEC_ROTL"; + case SPUISD::VEC_ROTR: return "SPUISD::VEC_ROTR"; + case SPUISD::ROTBYTES_LEFT: return "SPUISD::ROTBYTES_LEFT"; + case SPUISD::ROTBYTES_LEFT_BITS: return "SPUISD::ROTBYTES_LEFT_BITS"; + case SPUISD::SELECT_MASK: return "SPUISD::SELECT_MASK"; + case SPUISD::SELB: return "SPUISD::SELB"; + case SPUISD::ADD64_MARKER: return "SPUISD::ADD64_MARKER"; + case SPUISD::SUB64_MARKER: return "SPUISD::SUB64_MARKER"; + case SPUISD::MUL64_MARKER: return "SPUISD::MUL64_MARKER"; + } } //===----------------------------------------------------------------------===// @@ -1216,7 +1206,7 @@ SPUTargetLowering::LowerFormalArguments(SDValue Chain, if (isVarArg) { // FIXME: we should be able to query the argument registers from // tablegen generated code. - static const unsigned ArgRegs[] = { + static const uint16_t ArgRegs[] = { SPU::R3, SPU::R4, SPU::R5, SPU::R6, SPU::R7, SPU::R8, SPU::R9, SPU::R10, SPU::R11, SPU::R12, SPU::R13, SPU::R14, SPU::R15, SPU::R16, SPU::R17, SPU::R18, SPU::R19, SPU::R20, SPU::R21, SPU::R22, SPU::R23, @@ -1230,7 +1220,7 @@ SPUTargetLowering::LowerFormalArguments(SDValue Chain, SPU::R73, SPU::R74, SPU::R75, SPU::R76, SPU::R77, SPU::R78, SPU::R79 }; // size of ArgRegs array - unsigned NumArgRegs = 77; + const unsigned NumArgRegs = 77; // We will spill (79-3)+1 registers to the stack SmallVector<SDValue, 79-3+1> MemOps; diff --git a/lib/Target/CellSPU/SPUISelLowering.h b/lib/Target/CellSPU/SPUISelLowering.h index e28e2a4..25c5355 100644 --- a/lib/Target/CellSPU/SPUISelLowering.h +++ b/lib/Target/CellSPU/SPUISelLowering.h @@ -15,9 +15,9 @@ #ifndef SPU_ISELLOWERING_H #define SPU_ISELLOWERING_H +#include "SPU.h" #include "llvm/Target/TargetLowering.h" #include "llvm/CodeGen/SelectionDAG.h" -#include "SPU.h" namespace llvm { namespace SPUISD { diff --git a/lib/Target/CellSPU/SPUInstrInfo.h b/lib/Target/CellSPU/SPUInstrInfo.h index f0d21ad..85e5821 100644 --- a/lib/Target/CellSPU/SPUInstrInfo.h +++ b/lib/Target/CellSPU/SPUInstrInfo.h @@ -15,8 +15,8 @@ #define SPU_INSTRUCTIONINFO_H #include "SPU.h" -#include "llvm/Target/TargetInstrInfo.h" #include "SPURegisterInfo.h" +#include "llvm/Target/TargetInstrInfo.h" #define GET_INSTRINFO_HEADER #include "SPUGenInstrInfo.inc" diff --git a/lib/Target/CellSPU/SPURegisterInfo.cpp b/lib/Target/CellSPU/SPURegisterInfo.cpp index 92983e1..1b2da5f 100644 --- a/lib/Target/CellSPU/SPURegisterInfo.cpp +++ b/lib/Target/CellSPU/SPURegisterInfo.cpp @@ -12,8 +12,8 @@ //===----------------------------------------------------------------------===// #define DEBUG_TYPE "reginfo" -#include "SPU.h" #include "SPURegisterInfo.h" +#include "SPU.h" #include "SPUInstrBuilder.h" #include "SPUSubtarget.h" #include "SPUMachineFunction.h" diff --git a/lib/Target/CellSPU/SPUTargetMachine.cpp b/lib/Target/CellSPU/SPUTargetMachine.cpp index e43f5ad..21f6b25 100644 --- a/lib/Target/CellSPU/SPUTargetMachine.cpp +++ b/lib/Target/CellSPU/SPUTargetMachine.cpp @@ -11,8 +11,8 @@ // //===----------------------------------------------------------------------===// -#include "SPU.h" #include "SPUTargetMachine.h" +#include "SPU.h" #include "llvm/PassManager.h" #include "llvm/CodeGen/SchedulerRegistry.h" #include "llvm/Support/DynamicLibrary.h" diff --git a/lib/Target/CellSPU/SPUTargetMachine.h b/lib/Target/CellSPU/SPUTargetMachine.h index c179292..3e5d38c 100644 --- a/lib/Target/CellSPU/SPUTargetMachine.h +++ b/lib/Target/CellSPU/SPUTargetMachine.h @@ -23,9 +23,6 @@ #include "llvm/Target/TargetData.h" namespace llvm { -class PassManager; -class GlobalValue; -class TargetFrameLowering; /// SPUTargetMachine /// diff --git a/lib/Target/CppBackend/CPPBackend.cpp b/lib/Target/CppBackend/CPPBackend.cpp index 76b5e9c..107c6cc 100644 --- a/lib/Target/CppBackend/CPPBackend.cpp +++ b/lib/Target/CppBackend/CPPBackend.cpp @@ -1090,10 +1090,10 @@ void CppWriter::printInstruction(const Instruction *I, << getOpName(SI->getDefaultDest()) << ", " << SI->getNumCases() << ", " << bbname << ");"; nl(Out); - unsigned NumCases = SI->getNumCases(); - for (unsigned i = 0; i < NumCases; ++i) { - const ConstantInt* CaseVal = SI->getCaseValue(i); - const BasicBlock *BB = SI->getCaseSuccessor(i); + for (SwitchInst::ConstCaseIt i = SI->case_begin(), e = SI->case_end(); + i != e; ++i) { + const ConstantInt* CaseVal = i.getCaseValue(); + const BasicBlock *BB = i.getCaseSuccessor(); Out << iName << "->addCase(" << getOpName(CaseVal) << ", " << getOpName(BB) << ");"; diff --git a/lib/Target/Hexagon/Hexagon.h b/lib/Target/Hexagon/Hexagon.h index bbefcaf..270c7a7 100644 --- a/lib/Target/Hexagon/Hexagon.h +++ b/lib/Target/Hexagon/Hexagon.h @@ -15,7 +15,6 @@ #ifndef TARGET_Hexagon_H #define TARGET_Hexagon_H -#include <cassert> #include "MCTargetDesc/HexagonMCTargetDesc.h" #include "llvm/Target/TargetLowering.h" diff --git a/lib/Target/Hexagon/HexagonAsmPrinter.cpp b/lib/Target/Hexagon/HexagonAsmPrinter.cpp index 688b8e3..bf333b7 100644 --- a/lib/Target/Hexagon/HexagonAsmPrinter.cpp +++ b/lib/Target/Hexagon/HexagonAsmPrinter.cpp @@ -32,11 +32,11 @@ #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCSymbol.h" #include "llvm/Support/MathExtras.h" -#include "llvm/Support/MathExtras.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/Support/TargetRegistry.h" #include "llvm/Target/Mangler.h" #include "llvm/Target/TargetData.h" #include "llvm/Target/TargetLoweringObjectFile.h" @@ -46,8 +46,6 @@ #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/StringExtras.h" -#include "llvm/Support/TargetRegistry.h" -#include "llvm/Support/raw_ostream.h" using namespace llvm; diff --git a/lib/Target/Hexagon/HexagonCallingConvLower.cpp b/lib/Target/Hexagon/HexagonCallingConvLower.cpp index 71787de..46c20e9 100644 --- a/lib/Target/Hexagon/HexagonCallingConvLower.cpp +++ b/lib/Target/Hexagon/HexagonCallingConvLower.cpp @@ -14,13 +14,13 @@ //===----------------------------------------------------------------------===// #include "HexagonCallingConvLower.h" +#include "Hexagon.h" #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Target/TargetData.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" -#include "Hexagon.h" using namespace llvm; Hexagon_CCState::Hexagon_CCState(CallingConv::ID CC, bool isVarArg, diff --git a/lib/Target/Hexagon/HexagonFrameLowering.cpp b/lib/Target/Hexagon/HexagonFrameLowering.cpp index 49c6cdf..e8a6924 100644 --- a/lib/Target/Hexagon/HexagonFrameLowering.cpp +++ b/lib/Target/Hexagon/HexagonFrameLowering.cpp @@ -8,13 +8,13 @@ // //===----------------------------------------------------------------------===// +#include "HexagonFrameLowering.h" #include "Hexagon.h" #include "HexagonInstrInfo.h" #include "HexagonRegisterInfo.h" #include "HexagonSubtarget.h" #include "HexagonTargetMachine.h" #include "HexagonMachineFunctionInfo.h" -#include "HexagonFrameLowering.h" #include "llvm/Function.h" #include "llvm/Type.h" #include "llvm/ADT/BitVector.h" diff --git a/lib/Target/Hexagon/HexagonHardwareLoops.cpp b/lib/Target/Hexagon/HexagonHardwareLoops.cpp index 04ea4ed..57772a5 100644 --- a/lib/Target/Hexagon/HexagonHardwareLoops.cpp +++ b/lib/Target/Hexagon/HexagonHardwareLoops.cpp @@ -27,6 +27,8 @@ //===----------------------------------------------------------------------===// #define DEBUG_TYPE "hwloops" +#include "Hexagon.h" +#include "HexagonTargetMachine.h" #include "llvm/Constants.h" #include "llvm/PassSupport.h" #include "llvm/ADT/DenseMap.h" @@ -43,8 +45,6 @@ #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetInstrInfo.h" #include <algorithm> -#include "Hexagon.h" -#include "HexagonTargetMachine.h" using namespace llvm; diff --git a/lib/Target/Hexagon/HexagonISelLowering.cpp b/lib/Target/Hexagon/HexagonISelLowering.cpp index ed4b840..d6da0d0 100644 --- a/lib/Target/Hexagon/HexagonISelLowering.cpp +++ b/lib/Target/Hexagon/HexagonISelLowering.cpp @@ -28,17 +28,16 @@ #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineJumpTableInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/SelectionDAGISel.h" #include "llvm/CodeGen/ValueTypes.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" -#include "llvm/CodeGen/MachineJumpTableInfo.h" -#include "HexagonMachineFunctionInfo.h" #include "llvm/Support/CommandLine.h" +using namespace llvm; const unsigned Hexagon_MAX_RET_SIZE = 64; -using namespace llvm; static cl::opt<bool> EmitJumpTables("hexagon-emit-jump-tables", cl::init(true), cl::Hidden, @@ -159,7 +158,7 @@ static bool CC_Hexagon32(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State) { - static const unsigned RegList[] = { + static const uint16_t RegList[] = { Hexagon::R0, Hexagon::R1, Hexagon::R2, Hexagon::R3, Hexagon::R4, Hexagon::R5 }; @@ -182,10 +181,10 @@ static bool CC_Hexagon64(unsigned ValNo, MVT ValVT, return false; } - static const unsigned RegList1[] = { + static const uint16_t RegList1[] = { Hexagon::D1, Hexagon::D2 }; - static const unsigned RegList2[] = { + static const uint16_t RegList2[] = { Hexagon::R1, Hexagon::R3 }; if (unsigned Reg = State.AllocateReg(RegList1, RegList2, 2)) { diff --git a/lib/Target/Hexagon/HexagonISelLowering.h b/lib/Target/Hexagon/HexagonISelLowering.h index 5396486..4208bcb 100644 --- a/lib/Target/Hexagon/HexagonISelLowering.h +++ b/lib/Target/Hexagon/HexagonISelLowering.h @@ -15,10 +15,10 @@ #ifndef Hexagon_ISELLOWERING_H #define Hexagon_ISELLOWERING_H +#include "Hexagon.h" #include "llvm/Target/TargetLowering.h" #include "llvm/CallingConv.h" #include "llvm/CodeGen/CallingConvLower.h" -#include "Hexagon.h" namespace llvm { namespace HexagonISD { diff --git a/lib/Target/Hexagon/HexagonInstrInfo.cpp b/lib/Target/Hexagon/HexagonInstrInfo.cpp index 07872d4..3d7ace5 100644 --- a/lib/Target/Hexagon/HexagonInstrInfo.cpp +++ b/lib/Target/Hexagon/HexagonInstrInfo.cpp @@ -11,8 +11,8 @@ // //===----------------------------------------------------------------------===// -#include "HexagonRegisterInfo.h" #include "HexagonInstrInfo.h" +#include "HexagonRegisterInfo.h" #include "HexagonSubtarget.h" #include "Hexagon.h" #include "llvm/ADT/STLExtras.h" diff --git a/lib/Target/Hexagon/HexagonInstrInfo.h b/lib/Target/Hexagon/HexagonInstrInfo.h index eb088c3..7306870 100644 --- a/lib/Target/Hexagon/HexagonInstrInfo.h +++ b/lib/Target/Hexagon/HexagonInstrInfo.h @@ -14,10 +14,10 @@ #ifndef HexagonINSTRUCTIONINFO_H #define HexagonINSTRUCTIONINFO_H +#include "HexagonRegisterInfo.h" #include "MCTargetDesc/HexagonBaseInfo.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetFrameLowering.h" -#include "HexagonRegisterInfo.h" #define GET_INSTRINFO_HEADER diff --git a/lib/Target/Hexagon/HexagonPeephole.cpp b/lib/Target/Hexagon/HexagonPeephole.cpp index 06c732f..55cbc09 100644 --- a/lib/Target/Hexagon/HexagonPeephole.cpp +++ b/lib/Target/Hexagon/HexagonPeephole.cpp @@ -36,6 +36,8 @@ //===----------------------------------------------------------------------===// #define DEBUG_TYPE "hexagon-peephole" +#include "Hexagon.h" +#include "HexagonTargetMachine.h" #include "llvm/Constants.h" #include "llvm/PassSupport.h" #include "llvm/ADT/DenseMap.h" @@ -45,16 +47,13 @@ #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Target/TargetInstrInfo.h" #include <algorithm> -#include "Hexagon.h" -#include "HexagonTargetMachine.h" - -#include "llvm/Support/CommandLine.h" using namespace llvm; diff --git a/lib/Target/Hexagon/HexagonRegisterInfo.cpp b/lib/Target/Hexagon/HexagonRegisterInfo.cpp index c481270..2a9de92 100644 --- a/lib/Target/Hexagon/HexagonRegisterInfo.cpp +++ b/lib/Target/Hexagon/HexagonRegisterInfo.cpp @@ -12,8 +12,8 @@ // //===----------------------------------------------------------------------===// -#include "Hexagon.h" #include "HexagonRegisterInfo.h" +#include "Hexagon.h" #include "HexagonSubtarget.h" #include "HexagonTargetMachine.h" #include "HexagonMachineFunctionInfo.h" diff --git a/lib/Target/Hexagon/HexagonRegisterInfo.h b/lib/Target/Hexagon/HexagonRegisterInfo.h index fc65305..6cf727b 100644 --- a/lib/Target/Hexagon/HexagonRegisterInfo.h +++ b/lib/Target/Hexagon/HexagonRegisterInfo.h @@ -16,9 +16,10 @@ #define HexagonREGISTERINFO_H #include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/MC/MachineLocation.h" + #define GET_REGINFO_HEADER #include "HexagonGenRegisterInfo.inc" -#include "llvm/MC/MachineLocation.h" // // We try not to hard code the reserved registers in our code, diff --git a/lib/Target/Hexagon/HexagonTargetMachine.cpp b/lib/Target/Hexagon/HexagonTargetMachine.cpp index 319eab2..b9e6894 100644 --- a/lib/Target/Hexagon/HexagonTargetMachine.cpp +++ b/lib/Target/Hexagon/HexagonTargetMachine.cpp @@ -50,7 +50,7 @@ extern "C" void LLVMInitializeHexagonTarget() { /// HexagonTargetMachine::HexagonTargetMachine(const Target &T, StringRef TT, StringRef CPU, StringRef FS, - TargetOptions Options, + const TargetOptions &Options, Reloc::Model RM, CodeModel::Model CM, CodeGenOpt::Level OL) diff --git a/lib/Target/Hexagon/HexagonTargetMachine.h b/lib/Target/Hexagon/HexagonTargetMachine.h index 70bea56..0336965 100644 --- a/lib/Target/Hexagon/HexagonTargetMachine.h +++ b/lib/Target/Hexagon/HexagonTargetMachine.h @@ -14,13 +14,13 @@ #ifndef HexagonTARGETMACHINE_H #define HexagonTARGETMACHINE_H -#include "llvm/Target/TargetMachine.h" -#include "llvm/Target/TargetData.h" #include "HexagonInstrInfo.h" #include "HexagonSubtarget.h" #include "HexagonISelLowering.h" #include "HexagonSelectionDAGInfo.h" #include "HexagonFrameLowering.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetData.h" namespace llvm { @@ -37,8 +37,9 @@ class HexagonTargetMachine : public LLVMTargetMachine { public: HexagonTargetMachine(const Target &T, StringRef TT,StringRef CPU, - StringRef FS, TargetOptions Options, Reloc::Model RM, - CodeModel::Model CM, CodeGenOpt::Level OL); + StringRef FS, const TargetOptions &Options, + Reloc::Model RM, CodeModel::Model CM, + CodeGenOpt::Level OL); virtual const HexagonInstrInfo *getInstrInfo() const { return &InstrInfo; diff --git a/lib/Target/Hexagon/HexagonTargetObjectFile.cpp b/lib/Target/Hexagon/HexagonTargetObjectFile.cpp index d3ce5a6..32cc709 100644 --- a/lib/Target/Hexagon/HexagonTargetObjectFile.cpp +++ b/lib/Target/Hexagon/HexagonTargetObjectFile.cpp @@ -11,6 +11,9 @@ // //===----------------------------------------------------------------------===// +#include "HexagonTargetObjectFile.h" +#include "HexagonSubtarget.h" +#include "HexagonTargetMachine.h" #include "llvm/Function.h" #include "llvm/GlobalVariable.h" #include "llvm/Target/TargetData.h" @@ -18,9 +21,6 @@ #include "llvm/MC/MCContext.h" #include "llvm/Support/ELF.h" #include "llvm/Support/CommandLine.h" -#include "HexagonSubtarget.h" -#include "HexagonTargetObjectFile.h" -#include "HexagonTargetMachine.h" using namespace llvm; diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp b/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp index 74abc56..3cfa4fd 100644 --- a/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp +++ b/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp @@ -1,4 +1,4 @@ -//===-- HexagonMCTargetDesc.cpp - Cell Hexagon Target Descriptions --------===// +//===-- HexagonMCTargetDesc.cpp - Hexagon Target Descriptions -------------===// // // The LLVM Compiler Infrastructure // @@ -7,7 +7,7 @@ // //===----------------------------------------------------------------------===// // -// This file provides Cell Hexagon specific target descriptions. +// This file provides Hexagon specific target descriptions. // //===----------------------------------------------------------------------===// diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.h b/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.h index 364841f..b18d23a 100644 --- a/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.h +++ b/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.h @@ -1,4 +1,4 @@ -//===-- SPUMCTargetDesc.h - Hexagon Target Descriptions ---------*- C++ -*-===// +//===-- HexagonMCTargetDesc.h - Hexagon Target Descriptions -----*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -11,8 +11,8 @@ // //===----------------------------------------------------------------------===// -#ifndef SPUMCTARGETDESC_H -#define SPUMCTARGETDESC_H +#ifndef HEXAGONMCTARGETDESC_H +#define HEXAGONMCTARGETDESC_H namespace llvm { class MCSubtargetInfo; diff --git a/lib/Target/Hexagon/MCTargetDesc/LLVMBuild.txt b/lib/Target/Hexagon/MCTargetDesc/LLVMBuild.txt index 1114d99..73c7e01 100644 --- a/lib/Target/Hexagon/MCTargetDesc/LLVMBuild.txt +++ b/lib/Target/Hexagon/MCTargetDesc/LLVMBuild.txt @@ -1,4 +1,4 @@ -;===- ./lib/Target/CellSPU/MCTargetDesc/LLVMBuild.txt ----------*- Conf -*--===; +;===- ./lib/Target/Hexagon/MCTargetDesc/LLVMBuild.txt ----------*- Conf -*--===; ; ; The LLVM Compiler Infrastructure ; diff --git a/lib/Target/Hexagon/MCTargetDesc/Makefile b/lib/Target/Hexagon/MCTargetDesc/Makefile index 67be2bc..885be2d 100644 --- a/lib/Target/Hexagon/MCTargetDesc/Makefile +++ b/lib/Target/Hexagon/MCTargetDesc/Makefile @@ -1,4 +1,4 @@ -##===- lib/Target/CellSPU/TargetDesc/Makefile --------------*- Makefile -*-===## +##===- lib/Target/Hexagon/TargetDesc/Makefile --------------*- Makefile -*-===## # # The LLVM Compiler Infrastructure # diff --git a/lib/Target/MBlaze/Disassembler/MBlazeDisassembler.cpp b/lib/Target/MBlaze/Disassembler/MBlazeDisassembler.cpp index adedf93..6b958c8 100644 --- a/lib/Target/MBlaze/Disassembler/MBlazeDisassembler.cpp +++ b/lib/Target/MBlaze/Disassembler/MBlazeDisassembler.cpp @@ -34,9 +34,9 @@ extern const MCInstrDesc MBlazeInsts[]; using namespace llvm; -const unsigned UNSUPPORTED = -1; +const uint16_t UNSUPPORTED = -1; -static const unsigned mblazeBinary2Opcode[] = { +static const uint16_t mblazeBinary2Opcode[] = { MBlaze::ADD, MBlaze::RSUB, MBlaze::ADDC, MBlaze::RSUBC, //00,01,02,03 MBlaze::ADDK, MBlaze::RSUBK, MBlaze::ADDKC, MBlaze::RSUBKC, //04,05,06,07 MBlaze::ADDI, MBlaze::RSUBI, MBlaze::ADDIC, MBlaze::RSUBIC, //08,09,0A,0B diff --git a/lib/Target/MBlaze/MBlazeFrameLowering.h b/lib/Target/MBlaze/MBlazeFrameLowering.h index 8be15bf..01e6578 100644 --- a/lib/Target/MBlaze/MBlazeFrameLowering.h +++ b/lib/Target/MBlaze/MBlazeFrameLowering.h @@ -15,11 +15,10 @@ #define MBLAZE_FRAMEINFO_H #include "MBlaze.h" -#include "MBlazeSubtarget.h" #include "llvm/Target/TargetFrameLowering.h" namespace llvm { - class MBlazeSubtarget; +class MBlazeSubtarget; class MBlazeFrameLowering : public TargetFrameLowering { protected: diff --git a/lib/Target/MBlaze/MBlazeISelLowering.cpp b/lib/Target/MBlaze/MBlazeISelLowering.cpp index 23c8e13..9ef6bb6 100644 --- a/lib/Target/MBlaze/MBlazeISelLowering.cpp +++ b/lib/Target/MBlaze/MBlazeISelLowering.cpp @@ -657,7 +657,7 @@ static bool CC_MBlaze_AssignReg(unsigned &ValNo, MVT &ValVT, MVT &LocVT, CCValAssign::LocInfo &LocInfo, ISD::ArgFlagsTy &ArgFlags, CCState &State) { - static const unsigned ArgRegs[] = { + static const uint16_t ArgRegs[] = { MBlaze::R5, MBlaze::R6, MBlaze::R7, MBlaze::R8, MBlaze::R9, MBlaze::R10 }; diff --git a/lib/Target/MBlaze/MBlazeISelLowering.h b/lib/Target/MBlaze/MBlazeISelLowering.h index 168694b..6a79fc1 100644 --- a/lib/Target/MBlaze/MBlazeISelLowering.h +++ b/lib/Target/MBlaze/MBlazeISelLowering.h @@ -15,11 +15,11 @@ #ifndef MBlazeISELLOWERING_H #define MBlazeISELLOWERING_H +#include "MBlaze.h" +#include "MBlazeSubtarget.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/CodeGen/SelectionDAG.h" #include "llvm/Target/TargetLowering.h" -#include "MBlaze.h" -#include "MBlazeSubtarget.h" namespace llvm { namespace MBlazeCC { diff --git a/lib/Target/MBlaze/MBlazeInstrInfo.h b/lib/Target/MBlaze/MBlazeInstrInfo.h index a309d2b..5252147 100644 --- a/lib/Target/MBlaze/MBlazeInstrInfo.h +++ b/lib/Target/MBlaze/MBlazeInstrInfo.h @@ -15,9 +15,9 @@ #define MBLAZEINSTRUCTIONINFO_H #include "MBlaze.h" +#include "MBlazeRegisterInfo.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Target/TargetInstrInfo.h" -#include "MBlazeRegisterInfo.h" #define GET_INSTRINFO_HEADER #include "MBlazeGenInstrInfo.inc" diff --git a/lib/Target/MBlaze/MBlazeMCInstLower.h b/lib/Target/MBlaze/MBlazeMCInstLower.h index bb77ed4..7b97744 100644 --- a/lib/Target/MBlaze/MBlazeMCInstLower.h +++ b/lib/Target/MBlaze/MBlazeMCInstLower.h @@ -14,7 +14,6 @@ namespace llvm { class AsmPrinter; - class MCAsmInfo; class MCContext; class MCInst; class MCOperand; diff --git a/lib/Target/MBlaze/MBlazeRegisterInfo.cpp b/lib/Target/MBlaze/MBlazeRegisterInfo.cpp index 6801a1a..46f5207 100644 --- a/lib/Target/MBlaze/MBlazeRegisterInfo.cpp +++ b/lib/Target/MBlaze/MBlazeRegisterInfo.cpp @@ -14,9 +14,9 @@ #define DEBUG_TYPE "mblaze-frame-info" +#include "MBlazeRegisterInfo.h" #include "MBlaze.h" #include "MBlazeSubtarget.h" -#include "MBlazeRegisterInfo.h" #include "MBlazeMachineFunction.h" #include "llvm/Constants.h" #include "llvm/Type.h" diff --git a/lib/Target/MBlaze/MBlazeTargetMachine.cpp b/lib/Target/MBlaze/MBlazeTargetMachine.cpp index 5c07424..dd7de9b 100644 --- a/lib/Target/MBlaze/MBlazeTargetMachine.cpp +++ b/lib/Target/MBlaze/MBlazeTargetMachine.cpp @@ -11,8 +11,8 @@ // //===----------------------------------------------------------------------===// -#include "MBlaze.h" #include "MBlazeTargetMachine.h" +#include "MBlaze.h" #include "llvm/PassManager.h" #include "llvm/CodeGen/Passes.h" #include "llvm/Support/FormattedStream.h" diff --git a/lib/Target/MSP430/MSP430InstrInfo.cpp b/lib/Target/MSP430/MSP430InstrInfo.cpp index fd5de34..c03ba47 100644 --- a/lib/Target/MSP430/MSP430InstrInfo.cpp +++ b/lib/Target/MSP430/MSP430InstrInfo.cpp @@ -11,8 +11,8 @@ // //===----------------------------------------------------------------------===// -#include "MSP430.h" #include "MSP430InstrInfo.h" +#include "MSP430.h" #include "MSP430MachineFunctionInfo.h" #include "MSP430TargetMachine.h" #include "llvm/Function.h" diff --git a/lib/Target/MSP430/MSP430InstrInfo.h b/lib/Target/MSP430/MSP430InstrInfo.h index fe2a75c..04f339b 100644 --- a/lib/Target/MSP430/MSP430InstrInfo.h +++ b/lib/Target/MSP430/MSP430InstrInfo.h @@ -14,8 +14,8 @@ #ifndef LLVM_TARGET_MSP430INSTRINFO_H #define LLVM_TARGET_MSP430INSTRINFO_H -#include "llvm/Target/TargetInstrInfo.h" #include "MSP430RegisterInfo.h" +#include "llvm/Target/TargetInstrInfo.h" #define GET_INSTRINFO_HEADER #include "MSP430GenInstrInfo.inc" diff --git a/lib/Target/MSP430/MSP430MCInstLower.h b/lib/Target/MSP430/MSP430MCInstLower.h index 297efd2..24151e2 100644 --- a/lib/Target/MSP430/MSP430MCInstLower.h +++ b/lib/Target/MSP430/MSP430MCInstLower.h @@ -14,7 +14,6 @@ namespace llvm { class AsmPrinter; - class MCAsmInfo; class MCContext; class MCInst; class MCOperand; diff --git a/lib/Target/MSP430/MSP430RegisterInfo.cpp b/lib/Target/MSP430/MSP430RegisterInfo.cpp index f9ddfb3..51ec71a 100644 --- a/lib/Target/MSP430/MSP430RegisterInfo.cpp +++ b/lib/Target/MSP430/MSP430RegisterInfo.cpp @@ -13,9 +13,9 @@ #define DEBUG_TYPE "msp430-reg-info" +#include "MSP430RegisterInfo.h" #include "MSP430.h" #include "MSP430MachineFunctionInfo.h" -#include "MSP430RegisterInfo.h" #include "MSP430TargetMachine.h" #include "llvm/Function.h" #include "llvm/CodeGen/MachineFrameInfo.h" diff --git a/lib/Target/MSP430/MSP430Subtarget.h b/lib/Target/MSP430/MSP430Subtarget.h index e7bebbd..4d8792e 100644 --- a/lib/Target/MSP430/MSP430Subtarget.h +++ b/lib/Target/MSP430/MSP430Subtarget.h @@ -15,12 +15,11 @@ #define LLVM_TARGET_MSP430_SUBTARGET_H #include "llvm/Target/TargetSubtargetInfo.h" +#include <string> #define GET_SUBTARGETINFO_HEADER #include "MSP430GenSubtargetInfo.inc" -#include <string> - namespace llvm { class StringRef; diff --git a/lib/Target/MSP430/MSP430TargetMachine.cpp b/lib/Target/MSP430/MSP430TargetMachine.cpp index af62e48..9f2eda1 100644 --- a/lib/Target/MSP430/MSP430TargetMachine.cpp +++ b/lib/Target/MSP430/MSP430TargetMachine.cpp @@ -11,8 +11,8 @@ // //===----------------------------------------------------------------------===// -#include "MSP430.h" #include "MSP430TargetMachine.h" +#include "MSP430.h" #include "llvm/PassManager.h" #include "llvm/CodeGen/Passes.h" #include "llvm/MC/MCAsmInfo.h" diff --git a/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp b/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp index d69570b..9d5a2f1 100644 --- a/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp +++ b/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp @@ -92,25 +92,42 @@ public: if (!Value) return; // Doesn't change encoding. + // Where do we start in the object unsigned Offset = Fixup.getOffset(); - // FIXME: The below code will not work across endian models - // How many bytes/bits are we fixing up? - unsigned NumBytes = ((getFixupKindInfo(Kind).TargetSize-1)/8)+1; - uint64_t Mask = ((uint64_t)1 << getFixupKindInfo(Kind).TargetSize) - 1; + // Number of bytes we need to fixup + unsigned NumBytes = (getFixupKindInfo(Kind).TargetSize + 7) / 8; + // Used to point to big endian bytes + unsigned FullSize; + + switch ((unsigned)Kind) { + case Mips::fixup_Mips_16: + FullSize = 2; + break; + case Mips::fixup_Mips_64: + FullSize = 8; + break; + default: + FullSize = 4; + break; + } // Grab current value, if any, from bits. uint64_t CurVal = 0; - for (unsigned i = 0; i != NumBytes; ++i) - CurVal |= ((uint8_t)Data[Offset + i]) << (i * 8); + for (unsigned i = 0; i != NumBytes; ++i) { + unsigned Idx = IsLittle ? i : (FullSize - 1 - i); + CurVal |= (uint64_t)((uint8_t)Data[Offset + Idx]) << (i*8); + } + + uint64_t Mask = ((uint64_t)(-1) >> (64 - getFixupKindInfo(Kind).TargetSize)); CurVal = (CurVal & ~Mask) | ((CurVal + Value) & Mask); - // Write out the bytes back to the code/data bits. - // First the unaffected bits and then the fixup. + // Write out the fixed up bytes back to the code/data bits. for (unsigned i = 0; i != NumBytes; ++i) { - Data[Offset + i] = uint8_t((CurVal >> (i * 8)) & 0xff); + unsigned Idx = IsLittle ? i : (FullSize - 1 - i); + Data[Offset + Idx] = (uint8_t)((CurVal >> (i*8)) & 0xff); } -} + } unsigned getNumFixupKinds() const { return Mips::NumTargetFixupKinds; } diff --git a/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp b/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp index b039678..9ebb6d2 100644 --- a/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp +++ b/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp @@ -49,9 +49,9 @@ public: void EmitInstruction(uint64_t Val, unsigned Size, raw_ostream &OS) const { // Output the instruction encoding in little endian byte order. - for (unsigned i = 0; i != Size; ++i) { - EmitByte(Val & 255, OS); - Val >>= 8; + for (unsigned i = 0; i < Size; ++i) { + unsigned Shift = IsLittleEndian ? i * 8 : (Size - 1 - i) * 8; + EmitByte((Val >> Shift) & 0xff, OS); } } diff --git a/lib/Target/Mips/Mips.h b/lib/Target/Mips/Mips.h index bacecf2..bafadc8 100644 --- a/lib/Target/Mips/Mips.h +++ b/lib/Target/Mips/Mips.h @@ -21,8 +21,6 @@ namespace llvm { class MipsTargetMachine; class FunctionPass; - class MachineCodeEmitter; - class formatted_raw_ostream; FunctionPass *createMipsISelDag(MipsTargetMachine &TM); FunctionPass *createMipsDelaySlotFillerPass(MipsTargetMachine &TM); diff --git a/lib/Target/Mips/MipsAnalyzeImmediate.cpp b/lib/Target/Mips/MipsAnalyzeImmediate.cpp index 31b669a..dc8fbd0 100644 --- a/lib/Target/Mips/MipsAnalyzeImmediate.cpp +++ b/lib/Target/Mips/MipsAnalyzeImmediate.cpp @@ -26,28 +26,28 @@ void MipsAnalyzeImmediate::AddInstr(InstSeqLs &SeqLs, const Inst &I) { Iter->push_back(I); } -void MipsAnalyzeImmediate::GetInstSeqLsADDiu(int64_t Imm, unsigned RemSize, +void MipsAnalyzeImmediate::GetInstSeqLsADDiu(uint64_t Imm, unsigned RemSize, InstSeqLs &SeqLs) { - GetInstSeqLs((Imm + 0x8000) & ~0xffff, RemSize, SeqLs); - AddInstr(SeqLs, Inst(ADDiu, Imm & 0xffff)); + GetInstSeqLs((Imm + 0x8000ULL) & 0xffffffffffff0000ULL, RemSize, SeqLs); + AddInstr(SeqLs, Inst(ADDiu, Imm & 0xffffULL)); } -void MipsAnalyzeImmediate::GetInstSeqLsORi(int64_t Imm, unsigned RemSize, +void MipsAnalyzeImmediate::GetInstSeqLsORi(uint64_t Imm, unsigned RemSize, InstSeqLs &SeqLs) { - GetInstSeqLs(Imm & ~0xffff, RemSize, SeqLs); - AddInstr(SeqLs, Inst(ORi, Imm & 0xffff)); + GetInstSeqLs(Imm & 0xffffffffffff0000ULL, RemSize, SeqLs); + AddInstr(SeqLs, Inst(ORi, Imm & 0xffffULL)); } -void MipsAnalyzeImmediate::GetInstSeqLsSLL(int64_t Imm, unsigned RemSize, +void MipsAnalyzeImmediate::GetInstSeqLsSLL(uint64_t Imm, unsigned RemSize, InstSeqLs &SeqLs) { unsigned Shamt = CountTrailingZeros_64(Imm); GetInstSeqLs(Imm >> Shamt, RemSize - Shamt, SeqLs); AddInstr(SeqLs, Inst(SLL, Shamt)); } -void MipsAnalyzeImmediate::GetInstSeqLs(int64_t Imm, unsigned RemSize, +void MipsAnalyzeImmediate::GetInstSeqLs(uint64_t Imm, unsigned RemSize, InstSeqLs &SeqLs) { - int64_t MaskedImm = Imm & (((uint64_t)-1) >> (64 - Size)); + uint64_t MaskedImm = Imm & (0xffffffffffffffffULL >> (64 - Size)); // Do nothing if Imm is 0. if (!MaskedImm) @@ -122,7 +122,7 @@ void MipsAnalyzeImmediate::GetShortestSeq(InstSeqLs &SeqLs, InstSeq &Insts) { } const MipsAnalyzeImmediate::InstSeq -&MipsAnalyzeImmediate::Analyze(int64_t Imm, unsigned Size, +&MipsAnalyzeImmediate::Analyze(uint64_t Imm, unsigned Size, bool LastInstrIsADDiu) { this->Size = Size; diff --git a/lib/Target/Mips/MipsAnalyzeImmediate.h b/lib/Target/Mips/MipsAnalyzeImmediate.h index 24e6e5f..a094dda 100644 --- a/lib/Target/Mips/MipsAnalyzeImmediate.h +++ b/lib/Target/Mips/MipsAnalyzeImmediate.h @@ -25,7 +25,7 @@ namespace llvm { /// Analyze - Get an instrucion sequence to load immediate Imm. The last /// instruction in the sequence must be an ADDiu if LastInstrIsADDiu is /// true; - const InstSeq &Analyze(int64_t Imm, unsigned Size, bool LastInstrIsADDiu); + const InstSeq &Analyze(uint64_t Imm, unsigned Size, bool LastInstrIsADDiu); private: typedef SmallVector<InstSeq, 5> InstSeqLs; @@ -34,18 +34,18 @@ namespace llvm { /// GetInstSeqLsADDiu - Get instrucion sequences which end with an ADDiu to /// load immediate Imm - void GetInstSeqLsADDiu(int64_t Imm, unsigned RemSize, InstSeqLs &SeqLs); + void GetInstSeqLsADDiu(uint64_t Imm, unsigned RemSize, InstSeqLs &SeqLs); /// GetInstSeqLsORi - Get instrucion sequences which end with an ORi to /// load immediate Imm - void GetInstSeqLsORi(int64_t Imm, unsigned RemSize, InstSeqLs &SeqLs); + void GetInstSeqLsORi(uint64_t Imm, unsigned RemSize, InstSeqLs &SeqLs); /// GetInstSeqLsSLL - Get instrucion sequences which end with a SLL to /// load immediate Imm - void GetInstSeqLsSLL(int64_t Imm, unsigned RemSize, InstSeqLs &SeqLs); + void GetInstSeqLsSLL(uint64_t Imm, unsigned RemSize, InstSeqLs &SeqLs); /// GetInstSeqLs - Get instrucion sequences to load immediate Imm. - void GetInstSeqLs(int64_t Imm, unsigned RemSize, InstSeqLs &SeqLs); + void GetInstSeqLs(uint64_t Imm, unsigned RemSize, InstSeqLs &SeqLs); /// ReplaceADDiuSLLWithLUi - Replace an ADDiu & SLL pair with a LUi. void ReplaceADDiuSLLWithLUi(InstSeq &Seq); diff --git a/lib/Target/Mips/MipsAsmPrinter.cpp b/lib/Target/Mips/MipsAsmPrinter.cpp index aeabc0f..f2b842a 100644 --- a/lib/Target/Mips/MipsAsmPrinter.cpp +++ b/lib/Target/Mips/MipsAsmPrinter.cpp @@ -13,8 +13,8 @@ //===----------------------------------------------------------------------===// #define DEBUG_TYPE "mips-asm-printer" -#include "Mips.h" #include "MipsAsmPrinter.h" +#include "Mips.h" #include "MipsInstrInfo.h" #include "MipsMachineFunction.h" #include "MipsMCInstLower.h" @@ -34,8 +34,6 @@ #include "llvm/Instructions.h" #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCAsmInfo.h" -#include "llvm/MC/MCContext.h" -#include "llvm/MC/MCExpr.h" #include "llvm/MC/MCInst.h" #include "llvm/MC/MCSymbol.h" #include "llvm/Support/TargetRegistry.h" diff --git a/lib/Target/Mips/MipsAsmPrinter.h b/lib/Target/Mips/MipsAsmPrinter.h index 8502db2..473da7e 100644 --- a/lib/Target/Mips/MipsAsmPrinter.h +++ b/lib/Target/Mips/MipsAsmPrinter.h @@ -22,9 +22,9 @@ namespace llvm { class MCStreamer; class MachineInstr; -class raw_ostream; class MachineBasicBlock; class Module; +class raw_ostream; class LLVM_LIBRARY_VISIBILITY MipsAsmPrinter : public AsmPrinter { diff --git a/lib/Target/Mips/MipsFrameLowering.cpp b/lib/Target/Mips/MipsFrameLowering.cpp index e83c64e..ebfbb4a 100644 --- a/lib/Target/Mips/MipsFrameLowering.cpp +++ b/lib/Target/Mips/MipsFrameLowering.cpp @@ -11,8 +11,8 @@ // //===----------------------------------------------------------------------===// -#include "MipsAnalyzeImmediate.h" #include "MipsFrameLowering.h" +#include "MipsAnalyzeImmediate.h" #include "MipsInstrInfo.h" #include "MipsMachineFunction.h" #include "MCTargetDesc/MipsBaseInfo.h" diff --git a/lib/Target/Mips/MipsISelDAGToDAG.cpp b/lib/Target/Mips/MipsISelDAGToDAG.cpp index 782d203..536879e 100644 --- a/lib/Target/Mips/MipsISelDAGToDAG.cpp +++ b/lib/Target/Mips/MipsISelDAGToDAG.cpp @@ -99,6 +99,8 @@ private: return CurDAG->getTargetConstant(Imm, Node->getValueType(0)); } + void ProcessFunctionAfterISel(MachineFunction &MF); + bool ReplaceUsesWithZeroReg(MachineRegisterInfo *MRI, const MachineInstr&); void InitGlobalBaseReg(MachineFunction &MF); virtual bool SelectInlineAsmMemoryOperand(const SDValue &Op, @@ -181,10 +183,57 @@ void MipsDAGToDAGISel::InitGlobalBaseReg(MachineFunction &MF) { } } +bool MipsDAGToDAGISel::ReplaceUsesWithZeroReg(MachineRegisterInfo *MRI, + const MachineInstr& MI) { + unsigned DstReg = 0, ZeroReg = 0; + + // Check if MI is "addiu $dst, $zero, 0" or "daddiu $dst, $zero, 0". + if ((MI.getOpcode() == Mips::ADDiu) && + (MI.getOperand(1).getReg() == Mips::ZERO) && + (MI.getOperand(2).getImm() == 0)) { + DstReg = MI.getOperand(0).getReg(); + ZeroReg = Mips::ZERO; + } else if ((MI.getOpcode() == Mips::DADDiu) && + (MI.getOperand(1).getReg() == Mips::ZERO_64) && + (MI.getOperand(2).getImm() == 0)) { + DstReg = MI.getOperand(0).getReg(); + ZeroReg = Mips::ZERO_64; + } + + if (!DstReg) + return false; + + // Replace uses with ZeroReg. + for (MachineRegisterInfo::use_iterator U = MRI->use_begin(DstReg), + E = MRI->use_end(); U != E; ++U) { + MachineOperand &MO = U.getOperand(); + MachineInstr *MI = MO.getParent(); + + // Do not replace if it is a phi's operand or is tied to def operand. + if (MI->isPHI() || MI->isRegTiedToDefOperand(U.getOperandNo())) + continue; + + MO.setReg(ZeroReg); + } + + return true; +} + +void MipsDAGToDAGISel::ProcessFunctionAfterISel(MachineFunction &MF) { + InitGlobalBaseReg(MF); + + MachineRegisterInfo *MRI = &MF.getRegInfo(); + + for (MachineFunction::iterator MFI = MF.begin(), MFE = MF.end(); MFI != MFE; + ++MFI) + for (MachineBasicBlock::iterator I = MFI->begin(); I != MFI->end(); ++I) + ReplaceUsesWithZeroReg(MRI, *I); +} + bool MipsDAGToDAGISel::runOnMachineFunction(MachineFunction &MF) { bool Ret = SelectionDAGISel::runOnMachineFunction(MF); - InitGlobalBaseReg(MF); + ProcessFunctionAfterISel(MF); return Ret; } diff --git a/lib/Target/Mips/MipsISelLowering.cpp b/lib/Target/Mips/MipsISelLowering.cpp index dc894d9..ecde5b6 100644 --- a/lib/Target/Mips/MipsISelLowering.cpp +++ b/lib/Target/Mips/MipsISelLowering.cpp @@ -18,13 +18,13 @@ #include "MipsTargetMachine.h" #include "MipsTargetObjectFile.h" #include "MipsSubtarget.h" +#include "InstPrinter/MipsInstPrinter.h" +#include "MCTargetDesc/MipsBaseInfo.h" #include "llvm/DerivedTypes.h" #include "llvm/Function.h" #include "llvm/GlobalVariable.h" #include "llvm/Intrinsics.h" #include "llvm/CallingConv.h" -#include "InstPrinter/MipsInstPrinter.h" -#include "MCTargetDesc/MipsBaseInfo.h" #include "llvm/CodeGen/CallingConvLower.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" @@ -130,22 +130,32 @@ MipsTargetLowering(MipsTargetMachine &TM) // Mips Custom Operations setOperationAction(ISD::GlobalAddress, MVT::i32, Custom); - setOperationAction(ISD::GlobalAddress, MVT::i64, Custom); setOperationAction(ISD::BlockAddress, MVT::i32, Custom); - setOperationAction(ISD::BlockAddress, MVT::i64, Custom); setOperationAction(ISD::GlobalTLSAddress, MVT::i32, Custom); - setOperationAction(ISD::GlobalTLSAddress, MVT::i64, Custom); setOperationAction(ISD::JumpTable, MVT::i32, Custom); - setOperationAction(ISD::JumpTable, MVT::i64, Custom); setOperationAction(ISD::ConstantPool, MVT::i32, Custom); - setOperationAction(ISD::ConstantPool, MVT::i64, Custom); setOperationAction(ISD::SELECT, MVT::f32, Custom); setOperationAction(ISD::SELECT, MVT::f64, Custom); setOperationAction(ISD::SELECT, MVT::i32, Custom); + setOperationAction(ISD::SETCC, MVT::f32, Custom); + setOperationAction(ISD::SETCC, MVT::f64, Custom); setOperationAction(ISD::BRCOND, MVT::Other, Custom); setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Custom); - setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64, Custom); setOperationAction(ISD::VASTART, MVT::Other, Custom); + setOperationAction(ISD::FCOPYSIGN, MVT::f32, Custom); + setOperationAction(ISD::FCOPYSIGN, MVT::f64, Custom); + setOperationAction(ISD::MEMBARRIER, MVT::Other, Custom); + setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Custom); + + if (HasMips64) { + setOperationAction(ISD::GlobalAddress, MVT::i64, Custom); + setOperationAction(ISD::BlockAddress, MVT::i64, Custom); + setOperationAction(ISD::GlobalTLSAddress, MVT::i64, Custom); + setOperationAction(ISD::JumpTable, MVT::i64, Custom); + setOperationAction(ISD::ConstantPool, MVT::i64, Custom); + setOperationAction(ISD::SELECT, MVT::i64, Custom); + setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64, Custom); + } setOperationAction(ISD::SDIV, MVT::i32, Expand); setOperationAction(ISD::SREM, MVT::i32, Expand); @@ -185,8 +195,6 @@ MipsTargetLowering(MipsTargetMachine &TM) setOperationAction(ISD::SHL_PARTS, MVT::i32, Expand); setOperationAction(ISD::SRA_PARTS, MVT::i32, Expand); setOperationAction(ISD::SRL_PARTS, MVT::i32, Expand); - setOperationAction(ISD::FCOPYSIGN, MVT::f32, Custom); - setOperationAction(ISD::FCOPYSIGN, MVT::f64, Custom); setOperationAction(ISD::FSIN, MVT::f32, Expand); setOperationAction(ISD::FSIN, MVT::f64, Expand); setOperationAction(ISD::FCOS, MVT::f32, Expand); @@ -214,9 +222,6 @@ MipsTargetLowering(MipsTargetMachine &TM) setOperationAction(ISD::STACKSAVE, MVT::Other, Expand); setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand); - setOperationAction(ISD::MEMBARRIER, MVT::Other, Custom); - setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Custom); - setOperationAction(ISD::ATOMIC_LOAD, MVT::i32, Expand); setOperationAction(ISD::ATOMIC_LOAD, MVT::i64, Expand); setOperationAction(ISD::ATOMIC_STORE, MVT::i32, Expand); @@ -246,11 +251,11 @@ MipsTargetLowering(MipsTargetMachine &TM) setTargetDAGCombine(ISD::SUBE); setTargetDAGCombine(ISD::SDIVREM); setTargetDAGCombine(ISD::UDIVREM); - setTargetDAGCombine(ISD::SETCC); + setTargetDAGCombine(ISD::SELECT); setTargetDAGCombine(ISD::AND); setTargetDAGCombine(ISD::OR); - setMinFunctionAlignment(2); + setMinFunctionAlignment(HasMips64 ? 3 : 2); setStackPointerRegisterToSaveRestore(IsN64 ? Mips::SP_64 : Mips::SP); computeRegisterProperties(); @@ -559,21 +564,37 @@ static SDValue CreateCMovFP(SelectionDAG& DAG, SDValue Cond, SDValue True, True.getValueType(), True, False, Cond); } -static SDValue PerformSETCCCombine(SDNode *N, SelectionDAG& DAG, - TargetLowering::DAGCombinerInfo &DCI, - const MipsSubtarget* Subtarget) { +static SDValue PerformSELECTCombine(SDNode *N, SelectionDAG& DAG, + TargetLowering::DAGCombinerInfo &DCI, + const MipsSubtarget* Subtarget) { if (DCI.isBeforeLegalizeOps()) return SDValue(); - SDValue Cond = CreateFPCmp(DAG, SDValue(N, 0)); + SDValue SetCC = N->getOperand(0); - if (Cond.getOpcode() != MipsISD::FPCmp) + if ((SetCC.getOpcode() != ISD::SETCC) || + !SetCC.getOperand(0).getValueType().isInteger()) return SDValue(); - SDValue True = DAG.getConstant(1, MVT::i32); - SDValue False = DAG.getConstant(0, MVT::i32); + SDValue False = N->getOperand(2); + EVT FalseTy = False.getValueType(); - return CreateCMovFP(DAG, Cond, True, False, N->getDebugLoc()); + if (!FalseTy.isInteger()) + return SDValue(); + + ConstantSDNode *CN = dyn_cast<ConstantSDNode>(False); + + if (!CN || CN->getZExtValue()) + return SDValue(); + + const DebugLoc DL = N->getDebugLoc(); + ISD::CondCode CC = cast<CondCodeSDNode>(SetCC.getOperand(2))->get(); + SDValue True = N->getOperand(1); + + SetCC = DAG.getSetCC(DL, SetCC.getValueType(), SetCC.getOperand(0), + SetCC.getOperand(1), ISD::getSetCCInverse(CC, true)); + + return DAG.getNode(ISD::SELECT, DL, FalseTy, SetCC, False, True); } static SDValue PerformANDCombine(SDNode *N, SelectionDAG& DAG, @@ -684,8 +705,8 @@ SDValue MipsTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) case ISD::SDIVREM: case ISD::UDIVREM: return PerformDivRemCombine(N, DAG, DCI, Subtarget); - case ISD::SETCC: - return PerformSETCCCombine(N, DAG, DCI, Subtarget); + case ISD::SELECT: + return PerformSELECTCombine(N, DAG, DCI, Subtarget); case ISD::AND: return PerformANDCombine(N, DAG, DCI, Subtarget); case ISD::OR: @@ -708,6 +729,7 @@ LowerOperation(SDValue Op, SelectionDAG &DAG) const case ISD::GlobalTLSAddress: return LowerGlobalTLSAddress(Op, DAG); case ISD::JumpTable: return LowerJumpTable(Op, DAG); case ISD::SELECT: return LowerSELECT(Op, DAG); + case ISD::SETCC: return LowerSETCC(Op, DAG); case ISD::VASTART: return LowerVASTART(Op, DAG); case ISD::FCOPYSIGN: return LowerFCOPYSIGN(Op, DAG); case ISD::FRAMEADDR: return LowerFRAMEADDR(Op, DAG); @@ -1475,6 +1497,18 @@ LowerSELECT(SDValue Op, SelectionDAG &DAG) const Op.getDebugLoc()); } +SDValue MipsTargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const { + SDValue Cond = CreateFPCmp(DAG, Op); + + assert(Cond.getOpcode() == MipsISD::FPCmp && + "Floating point operand expected."); + + SDValue True = DAG.getConstant(1, MVT::i32); + SDValue False = DAG.getConstant(0, MVT::i32); + + return CreateCMovFP(DAG, Cond, True, False, Op.getDebugLoc()); +} + SDValue MipsTargetLowering::LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const { // FIXME there isn't actually debug info here @@ -1841,13 +1875,13 @@ static bool CC_MipsO32(unsigned ValNo, MVT ValVT, static const unsigned IntRegsSize=4, FloatRegsSize=2; - static const unsigned IntRegs[] = { + static const uint16_t IntRegs[] = { Mips::A0, Mips::A1, Mips::A2, Mips::A3 }; - static const unsigned F32Regs[] = { + static const uint16_t F32Regs[] = { Mips::F12, Mips::F14 }; - static const unsigned F64Regs[] = { + static const uint16_t F64Regs[] = { Mips::D6, Mips::D7 }; @@ -1926,10 +1960,10 @@ static bool CC_MipsO32(unsigned ValNo, MVT ValVT, return false; // CC must always match } -static const unsigned Mips64IntRegs[8] = +static const uint16_t Mips64IntRegs[8] = {Mips::A0_64, Mips::A1_64, Mips::A2_64, Mips::A3_64, Mips::T0_64, Mips::T1_64, Mips::T2_64, Mips::T3_64}; -static const unsigned Mips64DPRegs[8] = +static const uint16_t Mips64DPRegs[8] = {Mips::D12_64, Mips::D13_64, Mips::D14_64, Mips::D15_64, Mips::D16_64, Mips::D17_64, Mips::D18_64, Mips::D19_64}; @@ -1996,7 +2030,7 @@ AnalyzeMips64CallOperands(CCState &CCInfo, static const unsigned O32IntRegsSize = 4; -static const unsigned O32IntRegs[] = { +static const uint16_t O32IntRegs[] = { Mips::A0, Mips::A1, Mips::A2, Mips::A3 }; @@ -2115,9 +2149,9 @@ PassByValArg64(SDValue& ByValChain, SDValue Chain, DebugLoc dl, if (!IsRegLoc) LocMemOffset = VA.getLocMemOffset(); else { - const unsigned *Reg = std::find(Mips64IntRegs, Mips64IntRegs + 8, + const uint16_t *Reg = std::find(Mips64IntRegs, Mips64IntRegs + 8, VA.getLocReg()); - const unsigned *RegEnd = Mips64IntRegs + 8; + const uint16_t *RegEnd = Mips64IntRegs + 8; // Copy double words to registers. for (; (Reg != RegEnd) && (ByValSize >= Offset + 8); ++Reg, Offset += 8) { @@ -2540,7 +2574,7 @@ CopyMips64ByValRegs(MachineFunction &MF, SDValue Chain, DebugLoc dl, MachineFrameInfo *MFI, bool IsRegLoc, SmallVectorImpl<SDValue> &InVals, MipsFunctionInfo *MipsFI, EVT PtrTy) { - const unsigned *Reg = Mips64IntRegs + 8; + const uint16_t *Reg = Mips64IntRegs + 8; int FOOffset; // Frame object offset from virtual frame pointer. if (IsRegLoc) { @@ -2709,7 +2743,7 @@ MipsTargetLowering::LowerFormalArguments(SDValue Chain, if (isVarArg) { unsigned NumOfRegs = IsO32 ? 4 : 8; - const unsigned *ArgRegs = IsO32 ? O32IntRegs : Mips64IntRegs; + const uint16_t *ArgRegs = IsO32 ? O32IntRegs : Mips64IntRegs; unsigned Idx = CCInfo.getFirstUnallocated(ArgRegs, NumOfRegs); int FirstRegSlotOffset = IsO32 ? 0 : -64 ; // offset of $a0's slot. const TargetRegisterClass *RC diff --git a/lib/Target/Mips/MipsISelLowering.h b/lib/Target/Mips/MipsISelLowering.h index 621bbec..66f45cd 100644 --- a/lib/Target/Mips/MipsISelLowering.h +++ b/lib/Target/Mips/MipsISelLowering.h @@ -15,10 +15,10 @@ #ifndef MipsISELLOWERING_H #define MipsISELLOWERING_H -#include "llvm/CodeGen/SelectionDAG.h" -#include "llvm/Target/TargetLowering.h" #include "Mips.h" #include "MipsSubtarget.h" +#include "llvm/CodeGen/SelectionDAG.h" +#include "llvm/Target/TargetLowering.h" namespace llvm { namespace MipsISD { @@ -128,6 +128,7 @@ namespace llvm { SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const; SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG) const; SDValue LowerSELECT(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const; SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) const; SDValue LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const; SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const; diff --git a/lib/Target/Mips/MipsInstrInfo.h b/lib/Target/Mips/MipsInstrInfo.h index 10caf30..4be727d 100644 --- a/lib/Target/Mips/MipsInstrInfo.h +++ b/lib/Target/Mips/MipsInstrInfo.h @@ -15,9 +15,9 @@ #define MIPSINSTRUCTIONINFO_H #include "Mips.h" +#include "MipsRegisterInfo.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Target/TargetInstrInfo.h" -#include "MipsRegisterInfo.h" #define GET_INSTRINFO_HEADER #include "MipsGenInstrInfo.inc" diff --git a/lib/Target/Mips/MipsMCInstLower.cpp b/lib/Target/Mips/MipsMCInstLower.cpp index be65298..0d51298 100644 --- a/lib/Target/Mips/MipsMCInstLower.cpp +++ b/lib/Target/Mips/MipsMCInstLower.cpp @@ -12,9 +12,9 @@ // //===----------------------------------------------------------------------===// +#include "MipsMCInstLower.h" #include "MipsAsmPrinter.h" #include "MipsInstrInfo.h" -#include "MipsMCInstLower.h" #include "MCTargetDesc/MipsBaseInfo.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstr.h" diff --git a/lib/Target/Mips/MipsMCInstLower.h b/lib/Target/Mips/MipsMCInstLower.h index cbd5264..20bb338 100644 --- a/lib/Target/Mips/MipsMCInstLower.h +++ b/lib/Target/Mips/MipsMCInstLower.h @@ -14,11 +14,9 @@ #include "llvm/Support/Compiler.h" namespace llvm { - class MCAsmInfo; class MCContext; class MCInst; class MCOperand; - class MCSymbol; class MachineInstr; class MachineFunction; class Mangler; @@ -38,7 +36,7 @@ public: void LowerCPLOAD(const MachineInstr *MI, SmallVector<MCInst, 4>& MCInsts); void LowerCPRESTORE(const MachineInstr *MI, SmallVector<MCInst, 4>& MCInsts); void LowerUnalignedLoadStore(const MachineInstr *MI, - SmallVector<MCInst, 4>& MCInsts); + SmallVector<MCInst, 4>& MCInsts); void LowerSETGP01(const MachineInstr *MI, SmallVector<MCInst, 4>& MCInsts); private: MCOperand LowerSymbolOperand(const MachineOperand &MO, diff --git a/lib/Target/Mips/MipsMachineFunction.h b/lib/Target/Mips/MipsMachineFunction.h index 57ff069..abb5404 100644 --- a/lib/Target/Mips/MipsMachineFunction.h +++ b/lib/Target/Mips/MipsMachineFunction.h @@ -14,10 +14,10 @@ #ifndef MIPS_MACHINE_FUNCTION_INFO_H #define MIPS_MACHINE_FUNCTION_INFO_H -#include <utility> #include "llvm/ADT/SmallVector.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFrameInfo.h" +#include <utility> namespace llvm { diff --git a/lib/Target/Mips/MipsRegisterInfo.cpp b/lib/Target/Mips/MipsRegisterInfo.cpp index e0ecba2..5cfda34 100644 --- a/lib/Target/Mips/MipsRegisterInfo.cpp +++ b/lib/Target/Mips/MipsRegisterInfo.cpp @@ -13,10 +13,10 @@ #define DEBUG_TYPE "mips-reg-info" +#include "MipsRegisterInfo.h" #include "Mips.h" #include "MipsAnalyzeImmediate.h" #include "MipsSubtarget.h" -#include "MipsRegisterInfo.h" #include "MipsMachineFunction.h" #include "llvm/Constants.h" #include "llvm/Type.h" @@ -83,12 +83,12 @@ MipsRegisterInfo::getCallPreservedMask(CallingConv::ID) const BitVector MipsRegisterInfo:: getReservedRegs(const MachineFunction &MF) const { - static const unsigned ReservedCPURegs[] = { + static const uint16_t ReservedCPURegs[] = { Mips::ZERO, Mips::AT, Mips::K0, Mips::K1, Mips::SP, Mips::FP, Mips::RA }; - static const unsigned ReservedCPU64Regs[] = { + static const uint16_t ReservedCPU64Regs[] = { Mips::ZERO_64, Mips::AT_64, Mips::K0_64, Mips::K1_64, Mips::SP_64, Mips::FP_64, Mips::RA_64 }; diff --git a/lib/Target/Mips/MipsTargetMachine.cpp b/lib/Target/Mips/MipsTargetMachine.cpp index 8806aaf..ad02231 100644 --- a/lib/Target/Mips/MipsTargetMachine.cpp +++ b/lib/Target/Mips/MipsTargetMachine.cpp @@ -11,8 +11,8 @@ // //===----------------------------------------------------------------------===// -#include "Mips.h" #include "MipsTargetMachine.h" +#include "Mips.h" #include "llvm/PassManager.h" #include "llvm/CodeGen/Passes.h" #include "llvm/Support/TargetRegistry.h" diff --git a/lib/Target/Mips/MipsTargetMachine.h b/lib/Target/Mips/MipsTargetMachine.h index 19ae142..80c00e8 100644 --- a/lib/Target/Mips/MipsTargetMachine.h +++ b/lib/Target/Mips/MipsTargetMachine.h @@ -14,15 +14,15 @@ #ifndef MIPSTARGETMACHINE_H #define MIPSTARGETMACHINE_H -#include "MipsSubtarget.h" +#include "MipsFrameLowering.h" #include "MipsInstrInfo.h" #include "MipsISelLowering.h" -#include "MipsFrameLowering.h" +#include "MipsJITInfo.h" #include "MipsSelectionDAGInfo.h" +#include "MipsSubtarget.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetData.h" #include "llvm/Target/TargetFrameLowering.h" -#include "MipsJITInfo.h" namespace llvm { class formatted_raw_ostream; diff --git a/lib/Target/PTX/MCTargetDesc/PTXBaseInfo.h b/lib/Target/PTX/MCTargetDesc/PTXBaseInfo.h index 77a298d..a3e0f32 100644 --- a/lib/Target/PTX/MCTargetDesc/PTXBaseInfo.h +++ b/lib/Target/PTX/MCTargetDesc/PTXBaseInfo.h @@ -17,9 +17,9 @@ #ifndef PTXBASEINFO_H #define PTXBASEINFO_H +#include "PTXMCTargetDesc.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" -#include "PTXMCTargetDesc.h" namespace llvm { namespace PTXStateSpace { diff --git a/lib/Target/PTX/PTX.h b/lib/Target/PTX/PTX.h index 7d46cce..ffb92cb 100644 --- a/lib/Target/PTX/PTX.h +++ b/lib/Target/PTX/PTX.h @@ -1,4 +1,3 @@ -//===-- PTX.h - Top-level interface for PTX representation ------*- C++ -*-===// // // The LLVM Compiler Infrastructure // diff --git a/lib/Target/PTX/PTXAsmPrinter.cpp b/lib/Target/PTX/PTXAsmPrinter.cpp index 58ac5f2..0b6ac7b 100644 --- a/lib/Target/PTX/PTXAsmPrinter.cpp +++ b/lib/Target/PTX/PTXAsmPrinter.cpp @@ -14,8 +14,8 @@ #define DEBUG_TYPE "ptx-asm-printer" -#include "PTX.h" #include "PTXAsmPrinter.h" +#include "PTX.h" #include "PTXMachineFunctionInfo.h" #include "PTXParamManager.h" #include "PTXRegisterInfo.h" diff --git a/lib/Target/PTX/PTXISelLowering.cpp b/lib/Target/PTX/PTXISelLowering.cpp index e5d4edc..db1c953 100644 --- a/lib/Target/PTX/PTXISelLowering.cpp +++ b/lib/Target/PTX/PTXISelLowering.cpp @@ -11,8 +11,8 @@ // //===----------------------------------------------------------------------===// -#include "PTX.h" #include "PTXISelLowering.h" +#include "PTX.h" #include "PTXMachineFunctionInfo.h" #include "PTXRegisterInfo.h" #include "PTXSubtarget.h" diff --git a/lib/Target/PTX/PTXISelLowering.h b/lib/Target/PTX/PTXISelLowering.h index fd20982..33220f4 100644 --- a/lib/Target/PTX/PTXISelLowering.h +++ b/lib/Target/PTX/PTXISelLowering.h @@ -18,8 +18,6 @@ #include "llvm/Target/TargetLowering.h" namespace llvm { -class PTXSubtarget; -class PTXTargetMachine; namespace PTXISD { enum NodeType { diff --git a/lib/Target/PTX/PTXInstrInfo.cpp b/lib/Target/PTX/PTXInstrInfo.cpp index 9d6cbf1..443cd54 100644 --- a/lib/Target/PTX/PTXInstrInfo.cpp +++ b/lib/Target/PTX/PTXInstrInfo.cpp @@ -13,8 +13,8 @@ #define DEBUG_TYPE "ptx-instrinfo" -#include "PTX.h" #include "PTXInstrInfo.h" +#include "PTX.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/SelectionDAG.h" diff --git a/lib/Target/PTX/PTXParamManager.cpp b/lib/Target/PTX/PTXParamManager.cpp index 74538e6..cc1cc71 100644 --- a/lib/Target/PTX/PTXParamManager.cpp +++ b/lib/Target/PTX/PTXParamManager.cpp @@ -11,8 +11,8 @@ // //===----------------------------------------------------------------------===// -#include "PTX.h" #include "PTXParamManager.h" +#include "PTX.h" #include "llvm/ADT/StringExtras.h" using namespace llvm; diff --git a/lib/Target/PTX/PTXParamManager.h b/lib/Target/PTX/PTXParamManager.h index 32342f7..92e7728 100644 --- a/lib/Target/PTX/PTXParamManager.h +++ b/lib/Target/PTX/PTXParamManager.h @@ -17,6 +17,7 @@ #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/SmallVector.h" +#include <string> namespace llvm { diff --git a/lib/Target/PTX/PTXRegisterInfo.cpp b/lib/Target/PTX/PTXRegisterInfo.cpp index 3f087cd..b6ffd38 100644 --- a/lib/Target/PTX/PTXRegisterInfo.cpp +++ b/lib/Target/PTX/PTXRegisterInfo.cpp @@ -11,8 +11,8 @@ // //===----------------------------------------------------------------------===// -#include "PTX.h" #include "PTXRegisterInfo.h" +#include "PTX.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" diff --git a/lib/Target/PTX/PTXTargetMachine.cpp b/lib/Target/PTX/PTXTargetMachine.cpp index 9305377..40835d0 100644 --- a/lib/Target/PTX/PTXTargetMachine.cpp +++ b/lib/Target/PTX/PTXTargetMachine.cpp @@ -11,8 +11,8 @@ // //===----------------------------------------------------------------------===// -#include "PTX.h" #include "PTXTargetMachine.h" +#include "PTX.h" #include "llvm/PassManager.h" #include "llvm/Analysis/Passes.h" #include "llvm/Analysis/Verifier.h" @@ -26,6 +26,7 @@ #include "llvm/MC/MCInstrInfo.h" #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSubtargetInfo.h" +#include "llvm/Support/Debug.h" #include "llvm/Support/TargetRegistry.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetData.h" @@ -37,8 +38,6 @@ #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Target/TargetSubtargetInfo.h" #include "llvm/Transforms/Scalar.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/TargetRegistry.h" using namespace llvm; diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp index 02dad45..9c6eefe 100644 --- a/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp +++ b/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp @@ -7,9 +7,9 @@ // //===----------------------------------------------------------------------===// -#include "llvm/MC/MCAsmBackend.h" #include "MCTargetDesc/PPCMCTargetDesc.h" #include "MCTargetDesc/PPCFixupKinds.h" +#include "llvm/MC/MCAsmBackend.h" #include "llvm/MC/MCELFObjectWriter.h" #include "llvm/MC/MCMachObjectWriter.h" #include "llvm/MC/MCSectionMachO.h" diff --git a/lib/Target/PowerPC/PPC.h b/lib/Target/PowerPC/PPC.h index 5dc1863..24a7178 100644 --- a/lib/Target/PowerPC/PPC.h +++ b/lib/Target/PowerPC/PPC.h @@ -25,14 +25,11 @@ namespace llvm { class PPCTargetMachine; class FunctionPass; - class formatted_raw_ostream; class JITCodeEmitter; - class Target; class MachineInstr; class AsmPrinter; class MCInst; - class TargetMachine; - + FunctionPass *createPPCBranchSelectionPass(); FunctionPass *createPPCISelDag(PPCTargetMachine &TM); FunctionPass *createPPCJITCodeEmitterPass(PPCTargetMachine &TM, diff --git a/lib/Target/PowerPC/PPCAsmPrinter.cpp b/lib/Target/PowerPC/PPCAsmPrinter.cpp index 591ae02..4abb469 100644 --- a/lib/Target/PowerPC/PPCAsmPrinter.cpp +++ b/lib/Target/PowerPC/PPCAsmPrinter.cpp @@ -20,6 +20,7 @@ #include "PPC.h" #include "PPCTargetMachine.h" #include "PPCSubtarget.h" +#include "InstPrinter/PPCInstPrinter.h" #include "MCTargetDesc/PPCPredicates.h" #include "llvm/Analysis/DebugInfo.h" #include "llvm/Constants.h" @@ -53,7 +54,6 @@ #include "llvm/Support/ELF.h" #include "llvm/ADT/StringExtras.h" #include "llvm/ADT/SmallString.h" -#include "InstPrinter/PPCInstPrinter.h" using namespace llvm; namespace { diff --git a/lib/Target/PowerPC/PPCCallingConv.td b/lib/Target/PowerPC/PPCCallingConv.td index 8efc9c1..9883c2e 100644 --- a/lib/Target/PowerPC/PPCCallingConv.td +++ b/lib/Target/PowerPC/PPCCallingConv.td @@ -130,3 +130,34 @@ def CC_PPC_SVR4_ByVal : CallingConv<[ CCCustom<"CC_PPC_SVR4_Custom_Dummy"> ]>; +def CSR_Darwin32 : CalleeSavedRegs<(add R13, R14, R15, R16, R17, R18, R19, R20, + R21, R22, R23, R24, R25, R26, R27, R28, + R29, R30, R31, F14, F15, F16, F17, F18, + F19, F20, F21, F22, F23, F24, F25, F26, + F27, F28, F29, F30, F31, CR2, CR3, CR4, + V20, V21, V22, V23, V24, V25, V26, V27, + V28, V29, V30, V31)>; + +def CSR_SVR432 : CalleeSavedRegs<(add R14, R15, R16, R17, R18, R19, R20, VRSAVE, + R21, R22, R23, R24, R25, R26, R27, R28, + R29, R30, R31, F14, F15, F16, F17, F18, + F19, F20, F21, F22, F23, F24, F25, F26, + F27, F28, F29, F30, F31, CR2, CR3, CR4, + V20, V21, V22, V23, V24, V25, V26, V27, + V28, V29, V30, V31)>; + +def CSR_Darwin64 : CalleeSavedRegs<(add X13, X14, X15, X16, X17, X18, X19, X20, + X21, X22, X23, X24, X25, X26, X27, X28, + X29, X30, X31, F14, F15, F16, F17, F18, + F19, F20, F21, F22, F23, F24, F25, F26, + F27, F28, F29, F30, F31, CR2, CR3, CR4, + V20, V21, V22, V23, V24, V25, V26, V27, + V28, V29, V30, V31)>; + +def CSR_SVR464 : CalleeSavedRegs<(add X14, X15, X16, X17, X18, X19, X20, VRSAVE, + X21, X22, X23, X24, X25, X26, X27, X28, + X29, X30, X31, F14, F15, F16, F17, F18, + F19, F20, F21, F22, F23, F24, F25, F26, + F27, F28, F29, F30, F31, CR2, CR3, CR4, + V20, V21, V22, V23, V24, V25, V26, V27, + V28, V29, V30, V31)>; diff --git a/lib/Target/PowerPC/PPCFrameLowering.cpp b/lib/Target/PowerPC/PPCFrameLowering.cpp index 6d612f7..b77a80b 100644 --- a/lib/Target/PowerPC/PPCFrameLowering.cpp +++ b/lib/Target/PowerPC/PPCFrameLowering.cpp @@ -38,7 +38,7 @@ using namespace llvm; /// VRRegNo - Map from a numbered VR register to its enum value. /// -static const unsigned short VRRegNo[] = { +static const uint16_t VRRegNo[] = { PPC::V0 , PPC::V1 , PPC::V2 , PPC::V3 , PPC::V4 , PPC::V5 , PPC::V6 , PPC::V7 , PPC::V8 , PPC::V9 , PPC::V10, PPC::V11, PPC::V12, PPC::V13, PPC::V14, PPC::V15, PPC::V16, PPC::V17, PPC::V18, PPC::V19, PPC::V20, PPC::V21, PPC::V22, PPC::V23, diff --git a/lib/Target/PowerPC/PPCHazardRecognizers.h b/lib/Target/PowerPC/PPCHazardRecognizers.h index 95d0d64..d80a385 100644 --- a/lib/Target/PowerPC/PPCHazardRecognizers.h +++ b/lib/Target/PowerPC/PPCHazardRecognizers.h @@ -14,10 +14,10 @@ #ifndef PPCHAZRECS_H #define PPCHAZRECS_H +#include "PPCInstrInfo.h" #include "llvm/CodeGen/ScheduleHazardRecognizer.h" #include "llvm/CodeGen/ScoreboardHazardRecognizer.h" #include "llvm/CodeGen/SelectionDAGNodes.h" -#include "PPCInstrInfo.h" namespace llvm { diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp index bfed7ba..85b5bc1 100644 --- a/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/lib/Target/PowerPC/PPCISelLowering.cpp @@ -16,6 +16,11 @@ #include "PPCPerfectShuffle.h" #include "PPCTargetMachine.h" #include "MCTargetDesc/PPCPredicates.h" +#include "llvm/CallingConv.h" +#include "llvm/Constants.h" +#include "llvm/DerivedTypes.h" +#include "llvm/Function.h" +#include "llvm/Intrinsics.h" #include "llvm/ADT/STLExtras.h" #include "llvm/CodeGen/CallingConvLower.h" #include "llvm/CodeGen/MachineFrameInfo.h" @@ -24,16 +29,11 @@ #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/SelectionDAG.h" #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" -#include "llvm/CallingConv.h" -#include "llvm/Constants.h" -#include "llvm/Function.h" -#include "llvm/Intrinsics.h" -#include "llvm/Support/MathExtras.h" -#include "llvm/Target/TargetOptions.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/DerivedTypes.h" +#include "llvm/Target/TargetOptions.h" using namespace llvm; static bool CC_PPC_SVR4_Custom_Dummy(unsigned &ValNo, MVT &ValVT, MVT &LocVT, @@ -1547,7 +1547,7 @@ static bool CC_PPC_SVR4_Custom_AlignArgRegs(unsigned &ValNo, MVT &ValVT, CCValAssign::LocInfo &LocInfo, ISD::ArgFlagsTy &ArgFlags, CCState &State) { - static const unsigned ArgRegs[] = { + static const uint16_t ArgRegs[] = { PPC::R3, PPC::R4, PPC::R5, PPC::R6, PPC::R7, PPC::R8, PPC::R9, PPC::R10, }; @@ -1574,7 +1574,7 @@ static bool CC_PPC_SVR4_Custom_AlignFPArgRegs(unsigned &ValNo, MVT &ValVT, CCValAssign::LocInfo &LocInfo, ISD::ArgFlagsTy &ArgFlags, CCState &State) { - static const unsigned ArgRegs[] = { + static const uint16_t ArgRegs[] = { PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5, PPC::F6, PPC::F7, PPC::F8 }; @@ -1598,8 +1598,8 @@ static bool CC_PPC_SVR4_Custom_AlignFPArgRegs(unsigned &ValNo, MVT &ValVT, /// GetFPR - Get the set of FP registers that should be allocated for arguments, /// on Darwin. -static const unsigned *GetFPR() { - static const unsigned FPR[] = { +static const uint16_t *GetFPR() { + static const uint16_t FPR[] = { PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5, PPC::F6, PPC::F7, PPC::F8, PPC::F9, PPC::F10, PPC::F11, PPC::F12, PPC::F13 }; @@ -1780,13 +1780,13 @@ PPCTargetLowering::LowerFormalArguments_SVR4( // If the function takes variable number of arguments, make a frame index for // the start of the first vararg value... for expansion of llvm.va_start. if (isVarArg) { - static const unsigned GPArgRegs[] = { + static const uint16_t GPArgRegs[] = { PPC::R3, PPC::R4, PPC::R5, PPC::R6, PPC::R7, PPC::R8, PPC::R9, PPC::R10, }; const unsigned NumGPArgRegs = array_lengthof(GPArgRegs); - static const unsigned FPArgRegs[] = { + static const uint16_t FPArgRegs[] = { PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5, PPC::F6, PPC::F7, PPC::F8 }; @@ -1879,18 +1879,18 @@ PPCTargetLowering::LowerFormalArguments_Darwin( // Area that is at least reserved in caller of this function. unsigned MinReservedArea = ArgOffset; - static const unsigned GPR_32[] = { // 32-bit registers. + static const uint16_t GPR_32[] = { // 32-bit registers. PPC::R3, PPC::R4, PPC::R5, PPC::R6, PPC::R7, PPC::R8, PPC::R9, PPC::R10, }; - static const unsigned GPR_64[] = { // 64-bit registers. + static const uint16_t GPR_64[] = { // 64-bit registers. PPC::X3, PPC::X4, PPC::X5, PPC::X6, PPC::X7, PPC::X8, PPC::X9, PPC::X10, }; - static const unsigned *FPR = GetFPR(); + static const uint16_t *FPR = GetFPR(); - static const unsigned VR[] = { + static const uint16_t VR[] = { PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8, PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13 }; @@ -1901,7 +1901,7 @@ PPCTargetLowering::LowerFormalArguments_Darwin( unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0; - const unsigned *GPR = isPPC64 ? GPR_64 : GPR_32; + const uint16_t *GPR = isPPC64 ? GPR_64 : GPR_32; // In 32-bit non-varargs functions, the stack space for vectors is after the // stack space for non-vectors. We do not use this space unless we have @@ -2769,6 +2769,12 @@ PPCTargetLowering::FinishCall(CallingConv::ID CallConv, DebugLoc dl, (CallConv == CallingConv::Fast && getTargetMachine().Options.GuaranteedTailCallOpt) ? NumBytes : 0; + // Add a register mask operand representing the call-preserved registers. + const TargetRegisterInfo *TRI = getTargetMachine().getRegisterInfo(); + const uint32_t *Mask = TRI->getCallPreservedMask(CallConv); + assert(Mask && "Missing call preserved mask for calling convention"); + Ops.push_back(DAG.getRegisterMask(Mask)); + if (InFlag.getNode()) Ops.push_back(InFlag); @@ -3141,17 +3147,17 @@ PPCTargetLowering::LowerCall_Darwin(SDValue Chain, SDValue Callee, unsigned ArgOffset = PPCFrameLowering::getLinkageSize(isPPC64, true); unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0; - static const unsigned GPR_32[] = { // 32-bit registers. + static const uint16_t GPR_32[] = { // 32-bit registers. PPC::R3, PPC::R4, PPC::R5, PPC::R6, PPC::R7, PPC::R8, PPC::R9, PPC::R10, }; - static const unsigned GPR_64[] = { // 64-bit registers. + static const uint16_t GPR_64[] = { // 64-bit registers. PPC::X3, PPC::X4, PPC::X5, PPC::X6, PPC::X7, PPC::X8, PPC::X9, PPC::X10, }; - static const unsigned *FPR = GetFPR(); + static const uint16_t *FPR = GetFPR(); - static const unsigned VR[] = { + static const uint16_t VR[] = { PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8, PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13 }; @@ -3159,7 +3165,7 @@ PPCTargetLowering::LowerCall_Darwin(SDValue Chain, SDValue Callee, const unsigned NumFPRs = 13; const unsigned NumVRs = array_lengthof(VR); - const unsigned *GPR = isPPC64 ? GPR_64 : GPR_32; + const uint16_t *GPR = isPPC64 ? GPR_64 : GPR_32; SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass; SmallVector<TailCallArgumentInfo, 8> TailCallArguments; diff --git a/lib/Target/PowerPC/PPCISelLowering.h b/lib/Target/PowerPC/PPCISelLowering.h index 3534e9c..2e046c4 100644 --- a/lib/Target/PowerPC/PPCISelLowering.h +++ b/lib/Target/PowerPC/PPCISelLowering.h @@ -15,10 +15,10 @@ #ifndef LLVM_TARGET_POWERPC_PPC32ISELLOWERING_H #define LLVM_TARGET_POWERPC_PPC32ISELLOWERING_H -#include "llvm/Target/TargetLowering.h" -#include "llvm/CodeGen/SelectionDAG.h" #include "PPC.h" #include "PPCSubtarget.h" +#include "llvm/Target/TargetLowering.h" +#include "llvm/CodeGen/SelectionDAG.h" namespace llvm { namespace PPCISD { diff --git a/lib/Target/PowerPC/PPCInstr64Bit.td b/lib/Target/PowerPC/PPCInstr64Bit.td index 02bffed..78f3596 100644 --- a/lib/Target/PowerPC/PPCInstr64Bit.td +++ b/lib/Target/PowerPC/PPCInstr64Bit.td @@ -64,13 +64,7 @@ let Defs = [LR8] in PPC970_Unit_BRU; // Darwin ABI Calls. -let isCall = 1, PPC970_Unit = 7, - // All calls clobber the PPC64 non-callee saved registers. - Defs = [X0,X2,X3,X4,X5,X6,X7,X8,X9,X10,X11,X12, - F0,F1,F2,F3,F4,F5,F6,F7,F8,F9,F10,F11,F12,F13, - V0,V1,V2,V3,V4,V5,V6,V7,V8,V9,V10,V11,V12,V13,V14,V15,V16,V17,V18,V19, - LR8,CTR8, - CR0,CR1,CR5,CR6,CR7,CARRY] in { +let isCall = 1, PPC970_Unit = 7, Defs = [LR8] in { // Convenient aliases for call instructions let Uses = [RM] in { def BL8_Darwin : IForm<18, 0, 1, @@ -90,13 +84,7 @@ let isCall = 1, PPC970_Unit = 7, // ELF 64 ABI Calls = Darwin ABI Calls // Used to define BL8_ELF and BLA8_ELF -let isCall = 1, PPC970_Unit = 7, - // All calls clobber the PPC64 non-callee saved registers. - Defs = [X0,X2,X3,X4,X5,X6,X7,X8,X9,X10,X11,X12, - F0,F1,F2,F3,F4,F5,F6,F7,F8,F9,F10,F11,F12,F13, - V0,V1,V2,V3,V4,V5,V6,V7,V8,V9,V10,V11,V12,V13,V14,V15,V16,V17,V18,V19, - LR8,CTR8, - CR0,CR1,CR5,CR6,CR7,CARRY] in { +let isCall = 1, PPC970_Unit = 7, Defs = [LR8] in { // Convenient aliases for call instructions let Uses = [RM] in { def BL8_ELF : IForm<18, 0, 1, diff --git a/lib/Target/PowerPC/PPCInstrInfo.h b/lib/Target/PowerPC/PPCInstrInfo.h index e5f171d..7d49aa1 100644 --- a/lib/Target/PowerPC/PPCInstrInfo.h +++ b/lib/Target/PowerPC/PPCInstrInfo.h @@ -15,8 +15,8 @@ #define POWERPC_INSTRUCTIONINFO_H #include "PPC.h" -#include "llvm/Target/TargetInstrInfo.h" #include "PPCRegisterInfo.h" +#include "llvm/Target/TargetInstrInfo.h" #define GET_INSTRINFO_HEADER #include "PPCGenInstrInfo.inc" diff --git a/lib/Target/PowerPC/PPCInstrInfo.td b/lib/Target/PowerPC/PPCInstrInfo.td index e234012..939b71a 100644 --- a/lib/Target/PowerPC/PPCInstrInfo.td +++ b/lib/Target/PowerPC/PPCInstrInfo.td @@ -438,13 +438,7 @@ let isBranch = 1, isTerminator = 1, hasCtrlDep = 1, PPC970_Unit = 7 in { } // Darwin ABI Calls. -let isCall = 1, PPC970_Unit = 7, - // All calls clobber the non-callee saved registers... - Defs = [R0,R2,R3,R4,R5,R6,R7,R8,R9,R10,R11,R12, - F0,F1,F2,F3,F4,F5,F6,F7,F8,F9,F10,F11,F12,F13, - V0,V1,V2,V3,V4,V5,V6,V7,V8,V9,V10,V11,V12,V13,V14,V15,V16,V17,V18,V19, - LR,CTR, - CR0,CR1,CR5,CR6,CR7,CARRY] in { +let isCall = 1, PPC970_Unit = 7, Defs = [LR] in { // Convenient aliases for call instructions let Uses = [RM] in { def BL_Darwin : IForm<18, 0, 1, @@ -463,13 +457,7 @@ let isCall = 1, PPC970_Unit = 7, } // SVR4 ABI Calls. -let isCall = 1, PPC970_Unit = 7, - // All calls clobber the non-callee saved registers... - Defs = [R0,R3,R4,R5,R6,R7,R8,R9,R10,R11,R12, - F0,F1,F2,F3,F4,F5,F6,F7,F8,F9,F10,F11,F12,F13, - V0,V1,V2,V3,V4,V5,V6,V7,V8,V9,V10,V11,V12,V13,V14,V15,V16,V17,V18,V19, - LR,CTR, - CR0,CR1,CR5,CR6,CR7,CARRY] in { +let isCall = 1, PPC970_Unit = 7, Defs = [LR] in { // Convenient aliases for call instructions let Uses = [RM] in { def BL_SVR4 : IForm<18, 0, 1, diff --git a/lib/Target/PowerPC/PPCRegisterInfo.cpp b/lib/Target/PowerPC/PPCRegisterInfo.cpp index 306cc1f..2976f01 100644 --- a/lib/Target/PowerPC/PPCRegisterInfo.cpp +++ b/lib/Target/PowerPC/PPCRegisterInfo.cpp @@ -13,10 +13,10 @@ //===----------------------------------------------------------------------===// #define DEBUG_TYPE "reginfo" +#include "PPCRegisterInfo.h" #include "PPC.h" #include "PPCInstrBuilder.h" #include "PPCMachineFunctionInfo.h" -#include "PPCRegisterInfo.h" #include "PPCFrameLowering.h" #include "PPCSubtarget.h" #include "llvm/CallingConv.h" @@ -100,104 +100,20 @@ PPCRegisterInfo::getPointerRegClass(unsigned Kind) const { const uint16_t* PPCRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const { - // 32-bit Darwin calling convention. - static const uint16_t Darwin32_CalleeSavedRegs[] = { - PPC::R13, PPC::R14, PPC::R15, - PPC::R16, PPC::R17, PPC::R18, PPC::R19, - PPC::R20, PPC::R21, PPC::R22, PPC::R23, - PPC::R24, PPC::R25, PPC::R26, PPC::R27, - PPC::R28, PPC::R29, PPC::R30, PPC::R31, - - PPC::F14, PPC::F15, PPC::F16, PPC::F17, - PPC::F18, PPC::F19, PPC::F20, PPC::F21, - PPC::F22, PPC::F23, PPC::F24, PPC::F25, - PPC::F26, PPC::F27, PPC::F28, PPC::F29, - PPC::F30, PPC::F31, - - PPC::CR2, PPC::CR3, PPC::CR4, - PPC::V20, PPC::V21, PPC::V22, PPC::V23, - PPC::V24, PPC::V25, PPC::V26, PPC::V27, - PPC::V28, PPC::V29, PPC::V30, PPC::V31, - - PPC::LR, 0 - }; - - // 32-bit SVR4 calling convention. - static const uint16_t SVR4_CalleeSavedRegs[] = { - PPC::R14, PPC::R15, - PPC::R16, PPC::R17, PPC::R18, PPC::R19, - PPC::R20, PPC::R21, PPC::R22, PPC::R23, - PPC::R24, PPC::R25, PPC::R26, PPC::R27, - PPC::R28, PPC::R29, PPC::R30, PPC::R31, - - PPC::F14, PPC::F15, PPC::F16, PPC::F17, - PPC::F18, PPC::F19, PPC::F20, PPC::F21, - PPC::F22, PPC::F23, PPC::F24, PPC::F25, - PPC::F26, PPC::F27, PPC::F28, PPC::F29, - PPC::F30, PPC::F31, - - PPC::CR2, PPC::CR3, PPC::CR4, - - PPC::VRSAVE, - - PPC::V20, PPC::V21, PPC::V22, PPC::V23, - PPC::V24, PPC::V25, PPC::V26, PPC::V27, - PPC::V28, PPC::V29, PPC::V30, PPC::V31, - - 0 - }; - // 64-bit Darwin calling convention. - static const uint16_t Darwin64_CalleeSavedRegs[] = { - PPC::X14, PPC::X15, - PPC::X16, PPC::X17, PPC::X18, PPC::X19, - PPC::X20, PPC::X21, PPC::X22, PPC::X23, - PPC::X24, PPC::X25, PPC::X26, PPC::X27, - PPC::X28, PPC::X29, PPC::X30, PPC::X31, - - PPC::F14, PPC::F15, PPC::F16, PPC::F17, - PPC::F18, PPC::F19, PPC::F20, PPC::F21, - PPC::F22, PPC::F23, PPC::F24, PPC::F25, - PPC::F26, PPC::F27, PPC::F28, PPC::F29, - PPC::F30, PPC::F31, - - PPC::CR2, PPC::CR3, PPC::CR4, - PPC::V20, PPC::V21, PPC::V22, PPC::V23, - PPC::V24, PPC::V25, PPC::V26, PPC::V27, - PPC::V28, PPC::V29, PPC::V30, PPC::V31, - - PPC::LR8, 0 - }; - - // 64-bit SVR4 calling convention. - static const uint16_t SVR4_64_CalleeSavedRegs[] = { - PPC::X14, PPC::X15, - PPC::X16, PPC::X17, PPC::X18, PPC::X19, - PPC::X20, PPC::X21, PPC::X22, PPC::X23, - PPC::X24, PPC::X25, PPC::X26, PPC::X27, - PPC::X28, PPC::X29, PPC::X30, PPC::X31, - - PPC::F14, PPC::F15, PPC::F16, PPC::F17, - PPC::F18, PPC::F19, PPC::F20, PPC::F21, - PPC::F22, PPC::F23, PPC::F24, PPC::F25, - PPC::F26, PPC::F27, PPC::F28, PPC::F29, - PPC::F30, PPC::F31, - - PPC::CR2, PPC::CR3, PPC::CR4, - - PPC::VRSAVE, + if (Subtarget.isDarwinABI()) + return Subtarget.isPPC64() ? CSR_Darwin64_SaveList : + CSR_Darwin32_SaveList; - PPC::V20, PPC::V21, PPC::V22, PPC::V23, - PPC::V24, PPC::V25, PPC::V26, PPC::V27, - PPC::V28, PPC::V29, PPC::V30, PPC::V31, + return Subtarget.isPPC64() ? CSR_SVR464_SaveList : CSR_SVR432_SaveList; +} - 0 - }; - +const unsigned* +PPCRegisterInfo::getCallPreservedMask(CallingConv::ID CC) const { if (Subtarget.isDarwinABI()) - return Subtarget.isPPC64() ? Darwin64_CalleeSavedRegs : - Darwin32_CalleeSavedRegs; + return Subtarget.isPPC64() ? CSR_Darwin64_RegMask : + CSR_Darwin32_RegMask; - return Subtarget.isPPC64() ? SVR4_64_CalleeSavedRegs : SVR4_CalleeSavedRegs; + return Subtarget.isPPC64() ? CSR_SVR464_RegMask : CSR_SVR432_RegMask; } BitVector PPCRegisterInfo::getReservedRegs(const MachineFunction &MF) const { diff --git a/lib/Target/PowerPC/PPCRegisterInfo.h b/lib/Target/PowerPC/PPCRegisterInfo.h index 6ce90bc..b1e6a72 100644 --- a/lib/Target/PowerPC/PPCRegisterInfo.h +++ b/lib/Target/PowerPC/PPCRegisterInfo.h @@ -42,6 +42,7 @@ public: /// Code Generation virtual methods... const uint16_t *getCalleeSavedRegs(const MachineFunction* MF = 0) const; + const unsigned *getCallPreservedMask(CallingConv::ID CC) const; BitVector getReservedRegs(const MachineFunction &MF) const; diff --git a/lib/Target/PowerPC/PPCTargetMachine.cpp b/lib/Target/PowerPC/PPCTargetMachine.cpp index da20274..ba9c779 100644 --- a/lib/Target/PowerPC/PPCTargetMachine.cpp +++ b/lib/Target/PowerPC/PPCTargetMachine.cpp @@ -11,8 +11,8 @@ // //===----------------------------------------------------------------------===// -#include "PPC.h" #include "PPCTargetMachine.h" +#include "PPC.h" #include "llvm/PassManager.h" #include "llvm/MC/MCStreamer.h" #include "llvm/CodeGen/Passes.h" diff --git a/lib/Target/PowerPC/PPCTargetMachine.h b/lib/Target/PowerPC/PPCTargetMachine.h index 6dd11c9..7da2b0c 100644 --- a/lib/Target/PowerPC/PPCTargetMachine.h +++ b/lib/Target/PowerPC/PPCTargetMachine.h @@ -24,8 +24,6 @@ #include "llvm/Target/TargetData.h" namespace llvm { -class PassManager; -class GlobalValue; /// PPCTargetMachine - Common code between 32-bit and 64-bit PowerPC targets. /// diff --git a/lib/Target/Sparc/FPMover.cpp b/lib/Target/Sparc/FPMover.cpp index 1423b1e..9a729bd 100644 --- a/lib/Target/Sparc/FPMover.cpp +++ b/lib/Target/Sparc/FPMover.cpp @@ -59,19 +59,19 @@ FunctionPass *llvm::createSparcFPMoverPass(TargetMachine &tm) { /// registers that correspond to it. static void getDoubleRegPair(unsigned DoubleReg, unsigned &EvenReg, unsigned &OddReg) { - static const unsigned EvenHalvesOfPairs[] = { + static const uint16_t EvenHalvesOfPairs[] = { SP::F0, SP::F2, SP::F4, SP::F6, SP::F8, SP::F10, SP::F12, SP::F14, SP::F16, SP::F18, SP::F20, SP::F22, SP::F24, SP::F26, SP::F28, SP::F30 }; - static const unsigned OddHalvesOfPairs[] = { + static const uint16_t OddHalvesOfPairs[] = { SP::F1, SP::F3, SP::F5, SP::F7, SP::F9, SP::F11, SP::F13, SP::F15, SP::F17, SP::F19, SP::F21, SP::F23, SP::F25, SP::F27, SP::F29, SP::F31 }; - static const unsigned DoubleRegsInOrder[] = { + static const uint16_t DoubleRegsInOrder[] = { SP::D0, SP::D1, SP::D2, SP::D3, SP::D4, SP::D5, SP::D6, SP::D7, SP::D8, SP::D9, SP::D10, SP::D11, SP::D12, SP::D13, SP::D14, SP::D15 }; - for (unsigned i = 0; i < sizeof(DoubleRegsInOrder)/sizeof(unsigned); ++i) + for (unsigned i = 0; i < array_lengthof(DoubleRegsInOrder); ++i) if (DoubleRegsInOrder[i] == DoubleReg) { EvenReg = EvenHalvesOfPairs[i]; OddReg = OddHalvesOfPairs[i]; diff --git a/lib/Target/Sparc/SparcISelLowering.cpp b/lib/Target/Sparc/SparcISelLowering.cpp index a6b63fb..ee12633 100644 --- a/lib/Target/Sparc/SparcISelLowering.cpp +++ b/lib/Target/Sparc/SparcISelLowering.cpp @@ -50,7 +50,7 @@ static bool CC_Sparc_Assign_f64(unsigned &ValNo, MVT &ValVT, MVT &LocVT, CCValAssign::LocInfo &LocInfo, ISD::ArgFlagsTy &ArgFlags, CCState &State) { - static const unsigned RegList[] = { + static const uint16_t RegList[] = { SP::I0, SP::I1, SP::I2, SP::I3, SP::I4, SP::I5 }; //Try to get first reg @@ -301,11 +301,11 @@ SparcTargetLowering::LowerFormalArguments(SDValue Chain, // Store remaining ArgRegs to the stack if this is a varargs function. if (isVarArg) { - static const unsigned ArgRegs[] = { + static const uint16_t ArgRegs[] = { SP::I0, SP::I1, SP::I2, SP::I3, SP::I4, SP::I5 }; unsigned NumAllocated = CCInfo.getFirstUnallocated(ArgRegs, 6); - const unsigned *CurArgReg = ArgRegs+NumAllocated, *ArgRegEnd = ArgRegs+6; + const uint16_t *CurArgReg = ArgRegs+NumAllocated, *ArgRegEnd = ArgRegs+6; unsigned ArgOffset = CCInfo.getNextStackOffset(); if (NumAllocated == 6) ArgOffset += StackOffset; diff --git a/lib/Target/Sparc/SparcISelLowering.h b/lib/Target/Sparc/SparcISelLowering.h index 4a7c479..f483c96 100644 --- a/lib/Target/Sparc/SparcISelLowering.h +++ b/lib/Target/Sparc/SparcISelLowering.h @@ -15,8 +15,8 @@ #ifndef SPARC_ISELLOWERING_H #define SPARC_ISELLOWERING_H -#include "llvm/Target/TargetLowering.h" #include "Sparc.h" +#include "llvm/Target/TargetLowering.h" namespace llvm { namespace SPISD { diff --git a/lib/Target/Sparc/SparcInstrInfo.h b/lib/Target/Sparc/SparcInstrInfo.h index 4932531..204f698 100644 --- a/lib/Target/Sparc/SparcInstrInfo.h +++ b/lib/Target/Sparc/SparcInstrInfo.h @@ -14,8 +14,8 @@ #ifndef SPARCINSTRUCTIONINFO_H #define SPARCINSTRUCTIONINFO_H -#include "llvm/Target/TargetInstrInfo.h" #include "SparcRegisterInfo.h" +#include "llvm/Target/TargetInstrInfo.h" #define GET_INSTRINFO_HEADER #include "SparcGenInstrInfo.inc" diff --git a/lib/Target/Sparc/SparcRegisterInfo.cpp b/lib/Target/Sparc/SparcRegisterInfo.cpp index c392fcc..6357468 100644 --- a/lib/Target/Sparc/SparcRegisterInfo.cpp +++ b/lib/Target/Sparc/SparcRegisterInfo.cpp @@ -11,15 +11,15 @@ // //===----------------------------------------------------------------------===// -#include "Sparc.h" #include "SparcRegisterInfo.h" +#include "Sparc.h" #include "SparcSubtarget.h" +#include "llvm/Type.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Target/TargetInstrInfo.h" -#include "llvm/Type.h" #include "llvm/ADT/BitVector.h" #include "llvm/ADT/STLExtras.h" diff --git a/lib/Target/Sparc/SparcTargetMachine.cpp b/lib/Target/Sparc/SparcTargetMachine.cpp index 80a3be6..6f31356 100644 --- a/lib/Target/Sparc/SparcTargetMachine.cpp +++ b/lib/Target/Sparc/SparcTargetMachine.cpp @@ -10,8 +10,8 @@ // //===----------------------------------------------------------------------===// -#include "Sparc.h" #include "SparcTargetMachine.h" +#include "Sparc.h" #include "llvm/PassManager.h" #include "llvm/CodeGen/Passes.h" #include "llvm/Support/TargetRegistry.h" diff --git a/lib/Target/X86/AsmParser/X86AsmParser.cpp b/lib/Target/X86/AsmParser/X86AsmParser.cpp index d91830f..9e88472 100644 --- a/lib/Target/X86/AsmParser/X86AsmParser.cpp +++ b/lib/Target/X86/AsmParser/X86AsmParser.cpp @@ -67,11 +67,11 @@ private: MCStreamer &Out); /// isSrcOp - Returns true if operand is either (%rsi) or %ds:%(rsi) - /// in 64bit mode or (%edi) or %es:(%edi) in 32bit mode. + /// in 64bit mode or (%esi) or %es:(%esi) in 32bit mode. bool isSrcOp(X86Operand &Op); - /// isDstOp - Returns true if operand is either %es:(%rdi) in 64bit mode - /// or %es:(%edi) in 32bit mode. + /// isDstOp - Returns true if operand is either (%rdi) or %es:(%rdi) + /// in 64bit mode or (%edi) or %es:(%edi) in 32bit mode. bool isDstOp(X86Operand &Op); bool is64BitMode() const { @@ -468,7 +468,8 @@ bool X86AsmParser::isSrcOp(X86Operand &Op) { bool X86AsmParser::isDstOp(X86Operand &Op) { unsigned basereg = is64BitMode() ? X86::RDI : X86::EDI; - return Op.isMem() && Op.Mem.SegReg == X86::ES && + return Op.isMem() && + (Op.Mem.SegReg == 0 || Op.Mem.SegReg == X86::ES) && isa<MCConstantExpr>(Op.Mem.Disp) && cast<MCConstantExpr>(Op.Mem.Disp)->getValue() == 0 && Op.Mem.BaseReg == basereg && Op.Mem.IndexReg == 0; @@ -838,6 +839,7 @@ X86Operand *X86AsmParser::ParseMemOperand(unsigned SegReg, SMLoc MemStart) { // If we reached here, then we just ate the ( of the memory operand. Process // the rest of the memory operand. unsigned BaseReg = 0, IndexReg = 0, Scale = 1; + SMLoc IndexLoc; if (getLexer().is(AsmToken::Percent)) { SMLoc StartLoc, EndLoc; @@ -851,6 +853,7 @@ X86Operand *X86AsmParser::ParseMemOperand(unsigned SegReg, SMLoc MemStart) { if (getLexer().is(AsmToken::Comma)) { Parser.Lex(); // Eat the comma. + IndexLoc = Parser.getTok().getLoc(); // Following the comma we should have either an index register, or a scale // value. We don't support the later form, but we want to parse it @@ -876,8 +879,10 @@ X86Operand *X86AsmParser::ParseMemOperand(unsigned SegReg, SMLoc MemStart) { SMLoc Loc = Parser.getTok().getLoc(); int64_t ScaleVal; - if (getParser().ParseAbsoluteExpression(ScaleVal)) + if (getParser().ParseAbsoluteExpression(ScaleVal)){ + Error(Loc, "expected scale expression"); return 0; + } // Validate the scale amount. if (ScaleVal != 1 && ScaleVal != 2 && ScaleVal != 4 && ScaleVal != 8){ @@ -910,6 +915,23 @@ X86Operand *X86AsmParser::ParseMemOperand(unsigned SegReg, SMLoc MemStart) { SMLoc MemEnd = Parser.getTok().getLoc(); Parser.Lex(); // Eat the ')'. + // If we have both a base register and an index register make sure they are + // both 64-bit or 32-bit registers. + if (BaseReg != 0 && IndexReg != 0) { + if (X86MCRegisterClasses[X86::GR64RegClassID].contains(BaseReg) && + !X86MCRegisterClasses[X86::GR64RegClassID].contains(IndexReg) && + IndexReg != X86::RIZ) { + Error(IndexLoc, "index register is 32-bit, but base register is 64-bit"); + return 0; + } + if (X86MCRegisterClasses[X86::GR32RegClassID].contains(BaseReg) && + !X86MCRegisterClasses[X86::GR32RegClassID].contains(IndexReg) && + IndexReg != X86::EIZ){ + Error(IndexLoc, "index register is 64-bit, but base register is 32-bit"); + return 0; + } + } + return X86Operand::CreateMem(SegReg, Disp, BaseReg, IndexReg, Scale, MemStart, MemEnd); } diff --git a/lib/Target/X86/Disassembler/X86DisassemblerDecoder.c b/lib/Target/X86/Disassembler/X86DisassemblerDecoder.c index b0e66f0..fbd81d2 100644 --- a/lib/Target/X86/Disassembler/X86DisassemblerDecoder.c +++ b/lib/Target/X86/Disassembler/X86DisassemblerDecoder.c @@ -312,6 +312,15 @@ static int readPrefixes(struct InternalInstruction* insn) { if (consumeByte(insn, &byte)) return -1; + + /* + * If the first byte is a LOCK prefix break and let it be disassembled + * as a lock "instruction", by creating an <MCInst #xxxx LOCK_PREFIX>. + * FIXME there is currently no way to get the disassembler to print the + * lock prefix if it is not the first byte. + */ + if (insn->readerCursor - 1 == insn->startLocation && byte == 0xf0) + break; switch (byte) { case 0xf0: /* LOCK */ diff --git a/lib/Target/X86/InstPrinter/X86InstComments.cpp b/lib/Target/X86/InstPrinter/X86InstComments.cpp index 30a847f..f532019 100644 --- a/lib/Target/X86/InstPrinter/X86InstComments.cpp +++ b/lib/Target/X86/InstPrinter/X86InstComments.cpp @@ -29,7 +29,7 @@ using namespace llvm; void llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS, const char *(*getRegName)(unsigned)) { // If this is a shuffle operation, the switch should fill in this state. - SmallVector<unsigned, 8> ShuffleMask; + SmallVector<int, 8> ShuffleMask; const char *DestName = 0, *Src1Name = 0, *Src2Name = 0; switch (MI->getOpcode()) { @@ -500,7 +500,7 @@ void llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS, if (Src1Name == Src2Name) { for (unsigned i = 0, e = ShuffleMask.size(); i != e; ++i) { if ((int)ShuffleMask[i] >= 0 && // Not sentinel. - ShuffleMask[i] >= e) // From second mask. + ShuffleMask[i] >= (int)e) // From second mask. ShuffleMask[i] -= e; } } @@ -518,13 +518,13 @@ void llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS, // Otherwise, it must come from src1 or src2. Print the span of elements // that comes from this src. - bool isSrc1 = ShuffleMask[i] < ShuffleMask.size(); + bool isSrc1 = ShuffleMask[i] < (int)ShuffleMask.size(); const char *SrcName = isSrc1 ? Src1Name : Src2Name; OS << (SrcName ? SrcName : "mem") << '['; bool IsFirst = true; while (i != e && (int)ShuffleMask[i] >= 0 && - (ShuffleMask[i] < ShuffleMask.size()) == isSrc1) { + (ShuffleMask[i] < (int)ShuffleMask.size()) == isSrc1) { if (!IsFirst) OS << ','; else diff --git a/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp b/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp index 9ccbf1c..3f770f7 100644 --- a/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp +++ b/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp @@ -7,10 +7,10 @@ // //===----------------------------------------------------------------------===// -#include "llvm/MC/MCAsmBackend.h" #include "MCTargetDesc/X86BaseInfo.h" #include "MCTargetDesc/X86FixupKinds.h" #include "llvm/ADT/Twine.h" +#include "llvm/MC/MCAsmBackend.h" #include "llvm/MC/MCAssembler.h" #include "llvm/MC/MCELFObjectWriter.h" #include "llvm/MC/MCExpr.h" diff --git a/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp b/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp index 37727b6..80990e5 100644 --- a/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp +++ b/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp @@ -46,6 +46,11 @@ public: return (STI.getFeatureBits() & X86::Mode64Bit) != 0; } + bool is32BitMode() const { + // FIXME: Can tablegen auto-generate this? + return (STI.getFeatureBits() & X86::Mode64Bit) == 0; + } + static unsigned GetX86RegNum(const MCOperand &MO) { return X86_MC::getX86RegNum(MO.getReg()); } @@ -154,9 +159,8 @@ static MCFixupKind getImmFixupKind(uint64_t TSFlags) { return MCFixup::getKindForSize(Size, isPCRel); } -/// Is32BitMemOperand - Return true if the specified instruction with a memory -/// operand should emit the 0x67 prefix byte in 64-bit mode due to a 32-bit -/// memory operand. Op specifies the operand # of the memoperand. +/// Is32BitMemOperand - Return true if the specified instruction has +/// a 32-bit memory operand. Op specifies the operand # of the memoperand. static bool Is32BitMemOperand(const MCInst &MI, unsigned Op) { const MCOperand &BaseReg = MI.getOperand(Op+X86::AddrBaseReg); const MCOperand &IndexReg = MI.getOperand(Op+X86::AddrIndexReg); @@ -169,6 +173,36 @@ static bool Is32BitMemOperand(const MCInst &MI, unsigned Op) { return false; } +/// Is64BitMemOperand - Return true if the specified instruction has +/// a 64-bit memory operand. Op specifies the operand # of the memoperand. +#ifndef NDEBUG +static bool Is64BitMemOperand(const MCInst &MI, unsigned Op) { + const MCOperand &BaseReg = MI.getOperand(Op+X86::AddrBaseReg); + const MCOperand &IndexReg = MI.getOperand(Op+X86::AddrIndexReg); + + if ((BaseReg.getReg() != 0 && + X86MCRegisterClasses[X86::GR64RegClassID].contains(BaseReg.getReg())) || + (IndexReg.getReg() != 0 && + X86MCRegisterClasses[X86::GR64RegClassID].contains(IndexReg.getReg()))) + return true; + return false; +} +#endif + +/// Is16BitMemOperand - Return true if the specified instruction has +/// a 16-bit memory operand. Op specifies the operand # of the memoperand. +static bool Is16BitMemOperand(const MCInst &MI, unsigned Op) { + const MCOperand &BaseReg = MI.getOperand(Op+X86::AddrBaseReg); + const MCOperand &IndexReg = MI.getOperand(Op+X86::AddrIndexReg); + + if ((BaseReg.getReg() != 0 && + X86MCRegisterClasses[X86::GR16RegClassID].contains(BaseReg.getReg())) || + (IndexReg.getReg() != 0 && + X86MCRegisterClasses[X86::GR16RegClassID].contains(IndexReg.getReg()))) + return true; + return false; +} + /// StartsWithGlobalOffsetTable - Check if this expression starts with /// _GLOBAL_OFFSET_TABLE_ and if it is of the form /// _GLOBAL_OFFSET_TABLE_-symbol. This is needed to support PIC on ELF @@ -817,8 +851,22 @@ void X86MCCodeEmitter::EmitOpcodePrefix(uint64_t TSFlags, unsigned &CurByte, EmitByte(0xF3, CurByte, OS); // Emit the address size opcode prefix as needed. - if ((TSFlags & X86II::AdSize) || - (MemOperand != -1 && is64BitMode() && Is32BitMemOperand(MI, MemOperand))) + bool need_address_override; + if (TSFlags & X86II::AdSize) { + need_address_override = true; + } else if (MemOperand == -1) { + need_address_override = false; + } else if (is64BitMode()) { + assert(!Is16BitMemOperand(MI, MemOperand)); + need_address_override = Is32BitMemOperand(MI, MemOperand); + } else if (is32BitMode()) { + assert(!Is64BitMemOperand(MI, MemOperand)); + need_address_override = Is16BitMemOperand(MI, MemOperand); + } else { + need_address_override = false; + } + + if (need_address_override) EmitByte(0x67, CurByte, OS); // Emit the operand size opcode prefix as needed. diff --git a/lib/Target/X86/README-SSE.txt b/lib/Target/X86/README-SSE.txt index a581993..624e56f 100644 --- a/lib/Target/X86/README-SSE.txt +++ b/lib/Target/X86/README-SSE.txt @@ -922,3 +922,22 @@ _test2: ## @test2 The insertps's of $0 are pointless complex copies. //===---------------------------------------------------------------------===// + +[UNSAFE FP] + +void foo(double, double, double); +void norm(double x, double y, double z) { + double scale = __builtin_sqrt(x*x + y*y + z*z); + foo(x/scale, y/scale, z/scale); +} + +We currently generate an sqrtsd and 3 divsd instructions. This is bad, fp div is +slow and not pipelined. In -ffast-math mode we could compute "1.0/scale" first +and emit 3 mulsd in place of the divs. This can be done as a target-independent +transform. + +If we're dealing with floats instead of doubles we could even replace the sqrtss +and inversion with an rsqrtss instruction, which computes 1/sqrt faster at the +cost of reduced accuracy. + +//===---------------------------------------------------------------------===// diff --git a/lib/Target/X86/Utils/X86ShuffleDecode.cpp b/lib/Target/X86/Utils/X86ShuffleDecode.cpp index f4b85ae..32c722a 100644 --- a/lib/Target/X86/Utils/X86ShuffleDecode.cpp +++ b/lib/Target/X86/Utils/X86ShuffleDecode.cpp @@ -20,7 +20,7 @@ namespace llvm { -void DecodeINSERTPSMask(unsigned Imm, SmallVectorImpl<unsigned> &ShuffleMask) { +void DecodeINSERTPSMask(unsigned Imm, SmallVectorImpl<int> &ShuffleMask) { // Defaults the copying the dest value. ShuffleMask.push_back(0); ShuffleMask.push_back(1); @@ -44,8 +44,7 @@ void DecodeINSERTPSMask(unsigned Imm, SmallVectorImpl<unsigned> &ShuffleMask) { } // <3,1> or <6,7,2,3> -void DecodeMOVHLPSMask(unsigned NElts, - SmallVectorImpl<unsigned> &ShuffleMask) { +void DecodeMOVHLPSMask(unsigned NElts, SmallVectorImpl<int> &ShuffleMask) { for (unsigned i = NElts/2; i != NElts; ++i) ShuffleMask.push_back(NElts+i); @@ -54,8 +53,7 @@ void DecodeMOVHLPSMask(unsigned NElts, } // <0,2> or <0,1,4,5> -void DecodeMOVLHPSMask(unsigned NElts, - SmallVectorImpl<unsigned> &ShuffleMask) { +void DecodeMOVLHPSMask(unsigned NElts, SmallVectorImpl<int> &ShuffleMask) { for (unsigned i = 0; i != NElts/2; ++i) ShuffleMask.push_back(i); @@ -66,8 +64,7 @@ void DecodeMOVLHPSMask(unsigned NElts, /// DecodePSHUFMask - This decodes the shuffle masks for pshufd, and vpermilp*. /// VT indicates the type of the vector allowing it to handle different /// datatypes and vector widths. -void DecodePSHUFMask(EVT VT, unsigned Imm, - SmallVectorImpl<unsigned> &ShuffleMask) { +void DecodePSHUFMask(EVT VT, unsigned Imm, SmallVectorImpl<int> &ShuffleMask) { unsigned NumElts = VT.getVectorNumElements(); unsigned NumLanes = VT.getSizeInBits() / 128; @@ -83,8 +80,7 @@ void DecodePSHUFMask(EVT VT, unsigned Imm, } } -void DecodePSHUFHWMask(unsigned Imm, - SmallVectorImpl<unsigned> &ShuffleMask) { +void DecodePSHUFHWMask(unsigned Imm, SmallVectorImpl<int> &ShuffleMask) { ShuffleMask.push_back(0); ShuffleMask.push_back(1); ShuffleMask.push_back(2); @@ -95,8 +91,7 @@ void DecodePSHUFHWMask(unsigned Imm, } } -void DecodePSHUFLWMask(unsigned Imm, - SmallVectorImpl<unsigned> &ShuffleMask) { +void DecodePSHUFLWMask(unsigned Imm, SmallVectorImpl<int> &ShuffleMask) { for (unsigned i = 0; i != 4; ++i) { ShuffleMask.push_back((Imm & 3)); Imm >>= 2; @@ -110,8 +105,7 @@ void DecodePSHUFLWMask(unsigned Imm, /// DecodeSHUFPMask - This decodes the shuffle masks for shufp*. VT indicates /// the type of the vector allowing it to handle different datatypes and vector /// widths. -void DecodeSHUFPMask(EVT VT, unsigned Imm, - SmallVectorImpl<unsigned> &ShuffleMask) { +void DecodeSHUFPMask(EVT VT, unsigned Imm, SmallVectorImpl<int> &ShuffleMask) { unsigned NumElts = VT.getVectorNumElements(); unsigned NumLanes = VT.getSizeInBits() / 128; @@ -136,7 +130,7 @@ void DecodeSHUFPMask(EVT VT, unsigned Imm, /// DecodeUNPCKHMask - This decodes the shuffle masks for unpckhps/unpckhpd /// and punpckh*. VT indicates the type of the vector allowing it to handle /// different datatypes and vector widths. -void DecodeUNPCKHMask(EVT VT, SmallVectorImpl<unsigned> &ShuffleMask) { +void DecodeUNPCKHMask(EVT VT, SmallVectorImpl<int> &ShuffleMask) { unsigned NumElts = VT.getVectorNumElements(); // Handle 128 and 256-bit vector lengths. AVX defines UNPCK* to operate @@ -156,7 +150,7 @@ void DecodeUNPCKHMask(EVT VT, SmallVectorImpl<unsigned> &ShuffleMask) { /// DecodeUNPCKLMask - This decodes the shuffle masks for unpcklps/unpcklpd /// and punpckl*. VT indicates the type of the vector allowing it to handle /// different datatypes and vector widths. -void DecodeUNPCKLMask(EVT VT, SmallVectorImpl<unsigned> &ShuffleMask) { +void DecodeUNPCKLMask(EVT VT, SmallVectorImpl<int> &ShuffleMask) { unsigned NumElts = VT.getVectorNumElements(); // Handle 128 and 256-bit vector lengths. AVX defines UNPCK* to operate @@ -174,7 +168,7 @@ void DecodeUNPCKLMask(EVT VT, SmallVectorImpl<unsigned> &ShuffleMask) { } void DecodeVPERM2X128Mask(EVT VT, unsigned Imm, - SmallVectorImpl<unsigned> &ShuffleMask) { + SmallVectorImpl<int> &ShuffleMask) { unsigned HalfSize = VT.getVectorNumElements()/2; unsigned FstHalfBegin = (Imm & 0x3) * HalfSize; unsigned SndHalfBegin = ((Imm >> 4) & 0x3) * HalfSize; diff --git a/lib/Target/X86/Utils/X86ShuffleDecode.h b/lib/Target/X86/Utils/X86ShuffleDecode.h index 877c9bd..5b8c6ef 100644 --- a/lib/Target/X86/Utils/X86ShuffleDecode.h +++ b/lib/Target/X86/Utils/X86ShuffleDecode.h @@ -24,47 +24,41 @@ namespace llvm { enum { - SM_SentinelZero = ~0U + SM_SentinelZero = -1 }; -void DecodeINSERTPSMask(unsigned Imm, SmallVectorImpl<unsigned> &ShuffleMask); +void DecodeINSERTPSMask(unsigned Imm, SmallVectorImpl<int> &ShuffleMask); // <3,1> or <6,7,2,3> -void DecodeMOVHLPSMask(unsigned NElts, - SmallVectorImpl<unsigned> &ShuffleMask); +void DecodeMOVHLPSMask(unsigned NElts, SmallVectorImpl<int> &ShuffleMask); // <0,2> or <0,1,4,5> -void DecodeMOVLHPSMask(unsigned NElts, - SmallVectorImpl<unsigned> &ShuffleMask); +void DecodeMOVLHPSMask(unsigned NElts, SmallVectorImpl<int> &ShuffleMask); -void DecodePSHUFMask(EVT VT, unsigned Imm, - SmallVectorImpl<unsigned> &ShuffleMask); +void DecodePSHUFMask(EVT VT, unsigned Imm, SmallVectorImpl<int> &ShuffleMask); -void DecodePSHUFHWMask(unsigned Imm, - SmallVectorImpl<unsigned> &ShuffleMask); +void DecodePSHUFHWMask(unsigned Imm, SmallVectorImpl<int> &ShuffleMask); -void DecodePSHUFLWMask(unsigned Imm, - SmallVectorImpl<unsigned> &ShuffleMask); +void DecodePSHUFLWMask(unsigned Imm, SmallVectorImpl<int> &ShuffleMask); /// DecodeSHUFPMask - This decodes the shuffle masks for shufp*. VT indicates /// the type of the vector allowing it to handle different datatypes and vector /// widths. -void DecodeSHUFPMask(EVT VT, unsigned Imm, - SmallVectorImpl<unsigned> &ShuffleMask); +void DecodeSHUFPMask(EVT VT, unsigned Imm, SmallVectorImpl<int> &ShuffleMask); /// DecodeUNPCKHMask - This decodes the shuffle masks for unpckhps/unpckhpd /// and punpckh*. VT indicates the type of the vector allowing it to handle /// different datatypes and vector widths. -void DecodeUNPCKHMask(EVT VT, SmallVectorImpl<unsigned> &ShuffleMask); +void DecodeUNPCKHMask(EVT VT, SmallVectorImpl<int> &ShuffleMask); /// DecodeUNPCKLMask - This decodes the shuffle masks for unpcklps/unpcklpd /// and punpckl*. VT indicates the type of the vector allowing it to handle /// different datatypes and vector widths. -void DecodeUNPCKLMask(EVT VT, SmallVectorImpl<unsigned> &ShuffleMask); +void DecodeUNPCKLMask(EVT VT, SmallVectorImpl<int> &ShuffleMask); void DecodeVPERM2X128Mask(EVT VT, unsigned Imm, - SmallVectorImpl<unsigned> &ShuffleMask); + SmallVectorImpl<int> &ShuffleMask); } // llvm namespace diff --git a/lib/Target/X86/X86.h b/lib/Target/X86/X86.h index 81e9422..ecc7b59 100644 --- a/lib/Target/X86/X86.h +++ b/lib/Target/X86/X86.h @@ -24,8 +24,6 @@ namespace llvm { class FunctionPass; class JITCodeEmitter; -class MachineCodeEmitter; -class Target; class X86TargetMachine; /// createX86ISelDag - This pass converts a legalized DAG into a diff --git a/lib/Target/X86/X86AsmPrinter.cpp b/lib/Target/X86/X86AsmPrinter.cpp index 268cbf4..f1cedf3 100644 --- a/lib/Target/X86/X86AsmPrinter.cpp +++ b/lib/Target/X86/X86AsmPrinter.cpp @@ -13,13 +13,13 @@ //===----------------------------------------------------------------------===// #include "X86AsmPrinter.h" -#include "InstPrinter/X86ATTInstPrinter.h" -#include "InstPrinter/X86IntelInstPrinter.h" #include "X86MCInstLower.h" #include "X86.h" #include "X86COFFMachineModuleInfo.h" #include "X86MachineFunctionInfo.h" #include "X86TargetMachine.h" +#include "InstPrinter/X86ATTInstPrinter.h" +#include "InstPrinter/X86IntelInstPrinter.h" #include "llvm/CallingConv.h" #include "llvm/DerivedTypes.h" #include "llvm/Module.h" diff --git a/lib/Target/X86/X86AsmPrinter.h b/lib/Target/X86/X86AsmPrinter.h index 1058df5..a6ed9ba 100644 --- a/lib/Target/X86/X86AsmPrinter.h +++ b/lib/Target/X86/X86AsmPrinter.h @@ -24,11 +24,7 @@ namespace llvm { -class MachineJumpTableInfo; -class MCContext; -class MCInst; class MCStreamer; -class MCSymbol; class LLVM_LIBRARY_VISIBILITY X86AsmPrinter : public AsmPrinter { const X86Subtarget *Subtarget; diff --git a/lib/Target/X86/X86COFFMachineModuleInfo.h b/lib/Target/X86/X86COFFMachineModuleInfo.h index 63c08f1..0cec95a 100644 --- a/lib/Target/X86/X86COFFMachineModuleInfo.h +++ b/lib/Target/X86/X86COFFMachineModuleInfo.h @@ -14,9 +14,9 @@ #ifndef X86COFF_MACHINEMODULEINFO_H #define X86COFF_MACHINEMODULEINFO_H +#include "X86MachineFunctionInfo.h" #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/ADT/DenseSet.h" -#include "X86MachineFunctionInfo.h" namespace llvm { class X86MachineFunctionInfo; diff --git a/lib/Target/X86/X86FastISel.cpp b/lib/Target/X86/X86FastISel.cpp index f90764e..3d63b7e 100644 --- a/lib/Target/X86/X86FastISel.cpp +++ b/lib/Target/X86/X86FastISel.cpp @@ -1779,7 +1779,7 @@ bool X86FastISel::DoSelectCall(const Instruction *I, const char *MemIntName) { if (Subtarget->is64Bit() && isVarArg && !Subtarget->isTargetWin64()) { // Count the number of XMM registers allocated. - static const unsigned XMMArgRegs[] = { + static const uint16_t XMMArgRegs[] = { X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3, X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7 }; diff --git a/lib/Target/X86/X86FloatingPoint.cpp b/lib/Target/X86/X86FloatingPoint.cpp index 32de194..936df27 100644 --- a/lib/Target/X86/X86FloatingPoint.cpp +++ b/lib/Target/X86/X86FloatingPoint.cpp @@ -26,6 +26,7 @@ #define DEBUG_TYPE "x86-codegen" #include "X86.h" #include "X86InstrInfo.h" +#include "llvm/InlineAsm.h" #include "llvm/ADT/DepthFirstIterator.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/SmallPtrSet.h" @@ -37,7 +38,6 @@ #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/Passes.h" -#include "llvm/InlineAsm.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" @@ -570,8 +570,8 @@ void FPS::finishBlockStack() { namespace { struct TableEntry { - unsigned from; - unsigned to; + uint16_t from; + uint16_t to; bool operator<(const TableEntry &TE) const { return from < TE.from; } friend bool operator<(const TableEntry &TE, unsigned V) { return TE.from < V; diff --git a/lib/Target/X86/X86ISelDAGToDAG.cpp b/lib/Target/X86/X86ISelDAGToDAG.cpp index aa508b8..9405c2f 100644 --- a/lib/Target/X86/X86ISelDAGToDAG.cpp +++ b/lib/Target/X86/X86ISelDAGToDAG.cpp @@ -21,7 +21,6 @@ #include "X86TargetMachine.h" #include "llvm/Instructions.h" #include "llvm/Intrinsics.h" -#include "llvm/Support/CFG.h" #include "llvm/Type.h" #include "llvm/CodeGen/FunctionLoweringInfo.h" #include "llvm/CodeGen/MachineConstantPool.h" @@ -32,6 +31,7 @@ #include "llvm/CodeGen/SelectionDAGISel.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetOptions.h" +#include "llvm/Support/CFG.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" @@ -1654,7 +1654,7 @@ enum AtomicSz { AtomicSzEnd }; -static const unsigned int AtomicOpcTbl[AtomicOpcEnd][AtomicSzEnd] = { +static const uint16_t AtomicOpcTbl[AtomicOpcEnd][AtomicSzEnd] = { { X86::LOCK_OR8mi, X86::LOCK_OR8mr, diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index cae9aad..88f3829 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -1927,17 +1927,17 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain, unsigned TotalNumIntRegs = 0, TotalNumXMMRegs = 0; // FIXME: We should really autogenerate these arrays - static const unsigned GPR64ArgRegsWin64[] = { + static const uint16_t GPR64ArgRegsWin64[] = { X86::RCX, X86::RDX, X86::R8, X86::R9 }; - static const unsigned GPR64ArgRegs64Bit[] = { + static const uint16_t GPR64ArgRegs64Bit[] = { X86::RDI, X86::RSI, X86::RDX, X86::RCX, X86::R8, X86::R9 }; - static const unsigned XMMArgRegs64Bit[] = { + static const uint16_t XMMArgRegs64Bit[] = { X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3, X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7 }; - const unsigned *GPR64ArgRegs; + const uint16_t *GPR64ArgRegs; unsigned NumXMMRegs = 0; if (IsWin64) { @@ -2326,7 +2326,7 @@ X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee, // registers used and is in the range 0 - 8 inclusive. // Count the number of XMM registers allocated. - static const unsigned XMMArgRegs[] = { + static const uint16_t XMMArgRegs[] = { X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3, X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7 }; @@ -2910,7 +2910,7 @@ static bool isTargetShuffle(unsigned Opcode) { } static SDValue getTargetShuffleNode(unsigned Opc, DebugLoc dl, EVT VT, - SDValue V1, SelectionDAG &DAG) { + SDValue V1, SelectionDAG &DAG) { switch(Opc) { default: llvm_unreachable("Unknown x86 shuffle node"); case X86ISD::MOVSHDUP: @@ -2921,7 +2921,8 @@ static SDValue getTargetShuffleNode(unsigned Opc, DebugLoc dl, EVT VT, } static SDValue getTargetShuffleNode(unsigned Opc, DebugLoc dl, EVT VT, - SDValue V1, unsigned TargetMask, SelectionDAG &DAG) { + SDValue V1, unsigned TargetMask, + SelectionDAG &DAG) { switch(Opc) { default: llvm_unreachable("Unknown x86 shuffle node"); case X86ISD::PSHUFD: @@ -2933,7 +2934,8 @@ static SDValue getTargetShuffleNode(unsigned Opc, DebugLoc dl, EVT VT, } static SDValue getTargetShuffleNode(unsigned Opc, DebugLoc dl, EVT VT, - SDValue V1, SDValue V2, unsigned TargetMask, SelectionDAG &DAG) { + SDValue V1, SDValue V2, unsigned TargetMask, + SelectionDAG &DAG) { switch(Opc) { default: llvm_unreachable("Unknown x86 shuffle node"); case X86ISD::PALIGN: @@ -3712,6 +3714,8 @@ static bool isVPERMILPMask(ArrayRef<int> Mask, EVT VT, bool HasAVX) { static bool isCommutedMOVLMask(ArrayRef<int> Mask, EVT VT, bool V2IsSplat = false, bool V2IsUndef = false) { unsigned NumOps = VT.getVectorNumElements(); + if (VT.getSizeInBits() == 256) + return false; if (NumOps != 2 && NumOps != 4 && NumOps != 8 && NumOps != 16) return false; @@ -4342,9 +4346,81 @@ static SDValue getShuffleVectorZeroOrUndef(SDValue V2, unsigned Idx, return DAG.getVectorShuffle(VT, V2.getDebugLoc(), V1, V2, &MaskVec[0]); } +/// getTargetShuffleMask - Calculates the shuffle mask corresponding to the +/// target specific opcode. Returns true if the Mask could be calculated. +/// Sets IsUnary to true if only uses one source. +static bool getTargetShuffleMask(SDNode *N, EVT VT, + SmallVectorImpl<int> &Mask, bool &IsUnary) { + unsigned NumElems = VT.getVectorNumElements(); + SDValue ImmN; + + IsUnary = false; + switch(N->getOpcode()) { + case X86ISD::SHUFP: + ImmN = N->getOperand(N->getNumOperands()-1); + DecodeSHUFPMask(VT, cast<ConstantSDNode>(ImmN)->getZExtValue(), Mask); + break; + case X86ISD::UNPCKH: + DecodeUNPCKHMask(VT, Mask); + break; + case X86ISD::UNPCKL: + DecodeUNPCKLMask(VT, Mask); + break; + case X86ISD::MOVHLPS: + DecodeMOVHLPSMask(NumElems, Mask); + break; + case X86ISD::MOVLHPS: + DecodeMOVLHPSMask(NumElems, Mask); + break; + case X86ISD::PSHUFD: + case X86ISD::VPERMILP: + ImmN = N->getOperand(N->getNumOperands()-1); + DecodePSHUFMask(VT, cast<ConstantSDNode>(ImmN)->getZExtValue(), Mask); + IsUnary = true; + break; + case X86ISD::PSHUFHW: + ImmN = N->getOperand(N->getNumOperands()-1); + DecodePSHUFHWMask(cast<ConstantSDNode>(ImmN)->getZExtValue(), Mask); + IsUnary = true; + break; + case X86ISD::PSHUFLW: + ImmN = N->getOperand(N->getNumOperands()-1); + DecodePSHUFLWMask(cast<ConstantSDNode>(ImmN)->getZExtValue(), Mask); + IsUnary = true; + break; + case X86ISD::MOVSS: + case X86ISD::MOVSD: { + // The index 0 always comes from the first element of the second source, + // this is why MOVSS and MOVSD are used in the first place. The other + // elements come from the other positions of the first source vector + Mask.push_back(NumElems); + for (unsigned i = 1; i != NumElems; ++i) { + Mask.push_back(i); + } + break; + } + case X86ISD::VPERM2X128: + ImmN = N->getOperand(N->getNumOperands()-1); + DecodeVPERM2X128Mask(VT, cast<ConstantSDNode>(ImmN)->getZExtValue(), Mask); + break; + case X86ISD::MOVDDUP: + case X86ISD::MOVLHPD: + case X86ISD::MOVLPD: + case X86ISD::MOVLPS: + case X86ISD::MOVSHDUP: + case X86ISD::MOVSLDUP: + case X86ISD::PALIGN: + // Not yet implemented + return false; + default: llvm_unreachable("unknown target shuffle node"); + } + + return true; +} + /// getShuffleScalarElt - Returns the scalar element that will make up the ith /// element of the result of the vector shuffle. -static SDValue getShuffleScalarElt(SDNode *N, int Index, SelectionDAG &DAG, +static SDValue getShuffleScalarElt(SDNode *N, unsigned Index, SelectionDAG &DAG, unsigned Depth) { if (Depth == 6) return SDValue(); // Limit search depth. @@ -4355,89 +4431,34 @@ static SDValue getShuffleScalarElt(SDNode *N, int Index, SelectionDAG &DAG, // Recurse into ISD::VECTOR_SHUFFLE node to find scalars. if (const ShuffleVectorSDNode *SV = dyn_cast<ShuffleVectorSDNode>(N)) { - Index = SV->getMaskElt(Index); + int Elt = SV->getMaskElt(Index); - if (Index < 0) + if (Elt < 0) return DAG.getUNDEF(VT.getVectorElementType()); unsigned NumElems = VT.getVectorNumElements(); - SDValue NewV = (Index < (int)NumElems) ? SV->getOperand(0) - : SV->getOperand(1); - return getShuffleScalarElt(NewV.getNode(), Index % NumElems, DAG, Depth+1); + SDValue NewV = (Elt < (int)NumElems) ? SV->getOperand(0) + : SV->getOperand(1); + return getShuffleScalarElt(NewV.getNode(), Elt % NumElems, DAG, Depth+1); } // Recurse into target specific vector shuffles to find scalars. if (isTargetShuffle(Opcode)) { unsigned NumElems = VT.getVectorNumElements(); - SmallVector<unsigned, 16> ShuffleMask; + SmallVector<int, 16> ShuffleMask; SDValue ImmN; + bool IsUnary; - switch(Opcode) { - case X86ISD::SHUFP: - ImmN = N->getOperand(N->getNumOperands()-1); - DecodeSHUFPMask(VT, cast<ConstantSDNode>(ImmN)->getZExtValue(), - ShuffleMask); - break; - case X86ISD::UNPCKH: - DecodeUNPCKHMask(VT, ShuffleMask); - break; - case X86ISD::UNPCKL: - DecodeUNPCKLMask(VT, ShuffleMask); - break; - case X86ISD::MOVHLPS: - DecodeMOVHLPSMask(NumElems, ShuffleMask); - break; - case X86ISD::MOVLHPS: - DecodeMOVLHPSMask(NumElems, ShuffleMask); - break; - case X86ISD::PSHUFD: - case X86ISD::VPERMILP: - ImmN = N->getOperand(N->getNumOperands()-1); - DecodePSHUFMask(VT, cast<ConstantSDNode>(ImmN)->getZExtValue(), - ShuffleMask); - break; - case X86ISD::PSHUFHW: - ImmN = N->getOperand(N->getNumOperands()-1); - DecodePSHUFHWMask(cast<ConstantSDNode>(ImmN)->getZExtValue(), - ShuffleMask); - break; - case X86ISD::PSHUFLW: - ImmN = N->getOperand(N->getNumOperands()-1); - DecodePSHUFLWMask(cast<ConstantSDNode>(ImmN)->getZExtValue(), - ShuffleMask); - break; - case X86ISD::MOVSS: - case X86ISD::MOVSD: { - // The index 0 always comes from the first element of the second source, - // this is why MOVSS and MOVSD are used in the first place. The other - // elements come from the other positions of the first source vector. - unsigned OpNum = (Index == 0) ? 1 : 0; - return getShuffleScalarElt(V.getOperand(OpNum).getNode(), Index, DAG, - Depth+1); - } - case X86ISD::VPERM2X128: - ImmN = N->getOperand(N->getNumOperands()-1); - DecodeVPERM2X128Mask(VT, cast<ConstantSDNode>(ImmN)->getZExtValue(), - ShuffleMask); - break; - case X86ISD::MOVDDUP: - case X86ISD::MOVLHPD: - case X86ISD::MOVLPD: - case X86ISD::MOVLPS: - case X86ISD::MOVSHDUP: - case X86ISD::MOVSLDUP: - case X86ISD::PALIGN: - return SDValue(); // Not yet implemented. - default: llvm_unreachable("unknown target shuffle node"); - } - - Index = ShuffleMask[Index]; - if (Index < 0) + if (!getTargetShuffleMask(N, VT, ShuffleMask, IsUnary)) + return SDValue(); + + int Elt = ShuffleMask[Index]; + if (Elt < 0) return DAG.getUNDEF(VT.getVectorElementType()); - SDValue NewV = (Index < (int)NumElems) ? N->getOperand(0) + SDValue NewV = (Elt < (int)NumElems) ? N->getOperand(0) : N->getOperand(1); - return getShuffleScalarElt(NewV.getNode(), Index % NumElems, DAG, + return getShuffleScalarElt(NewV.getNode(), Elt % NumElems, DAG, Depth+1); } @@ -4453,7 +4474,7 @@ static SDValue getShuffleScalarElt(SDNode *N, int Index, SelectionDAG &DAG, if (V.getOpcode() == ISD::SCALAR_TO_VECTOR) return (Index == 0) ? V.getOperand(0) - : DAG.getUNDEF(VT.getVectorElementType()); + : DAG.getUNDEF(VT.getVectorElementType()); if (V.getOpcode() == ISD::BUILD_VECTOR) return V.getOperand(Index); @@ -4465,38 +4486,37 @@ static SDValue getShuffleScalarElt(SDNode *N, int Index, SelectionDAG &DAG, /// shuffle operation which come from a consecutively from a zero. The /// search can start in two different directions, from left or right. static -unsigned getNumOfConsecutiveZeros(SDNode *N, int NumElems, +unsigned getNumOfConsecutiveZeros(ShuffleVectorSDNode *SVOp, unsigned NumElems, bool ZerosFromLeft, SelectionDAG &DAG) { - int i = 0; - - while (i < NumElems) { + unsigned i; + for (i = 0; i != NumElems; ++i) { unsigned Index = ZerosFromLeft ? i : NumElems-i-1; - SDValue Elt = getShuffleScalarElt(N, Index, DAG, 0); + SDValue Elt = getShuffleScalarElt(SVOp, Index, DAG, 0); if (!(Elt.getNode() && (Elt.getOpcode() == ISD::UNDEF || X86::isZeroNode(Elt)))) break; - ++i; } return i; } -/// isShuffleMaskConsecutive - Check if the shuffle mask indicies from MaskI to -/// MaskE correspond consecutively to elements from one of the vector operands, +/// isShuffleMaskConsecutive - Check if the shuffle mask indicies [MaskI, MaskE) +/// correspond consecutively to elements from one of the vector operands, /// starting from its index OpIdx. Also tell OpNum which source vector operand. static -bool isShuffleMaskConsecutive(ShuffleVectorSDNode *SVOp, int MaskI, int MaskE, - int OpIdx, int NumElems, unsigned &OpNum) { +bool isShuffleMaskConsecutive(ShuffleVectorSDNode *SVOp, + unsigned MaskI, unsigned MaskE, unsigned OpIdx, + unsigned NumElems, unsigned &OpNum) { bool SeenV1 = false; bool SeenV2 = false; - for (int i = MaskI; i <= MaskE; ++i, ++OpIdx) { + for (unsigned i = MaskI; i != MaskE; ++i, ++OpIdx) { int Idx = SVOp->getMaskElt(i); // Ignore undef indicies if (Idx < 0) continue; - if (Idx < NumElems) + if (Idx < (int)NumElems) SeenV1 = true; else SeenV2 = true; @@ -4531,7 +4551,7 @@ static bool isVectorShiftRight(ShuffleVectorSDNode *SVOp, SelectionDAG &DAG, // if (!isShuffleMaskConsecutive(SVOp, 0, // Mask Start Index - NumElems-NumZeros-1, // Mask End Index + NumElems-NumZeros, // Mask End Index(exclusive) NumZeros, // Where to start looking in the src vector NumElems, // Number of elements in vector OpSrc)) // Which source operand ? @@ -4564,7 +4584,7 @@ static bool isVectorShiftLeft(ShuffleVectorSDNode *SVOp, SelectionDAG &DAG, // if (!isShuffleMaskConsecutive(SVOp, NumZeros, // Mask Start Index - NumElems-1, // Mask End Index + NumElems, // Mask End Index(exclusive) 0, // Where to start looking in the src vector NumElems, // Number of elements in vector OpSrc)) // Which source operand ? @@ -6080,88 +6100,6 @@ static bool RelaxedMayFoldVectorLoad(SDValue V) { return false; } -/// CanFoldShuffleIntoVExtract - Check if the current shuffle is used by -/// a vector extract, and if both can be later optimized into a single load. -/// This is done in visitEXTRACT_VECTOR_ELT and the conditions are checked -/// here because otherwise a target specific shuffle node is going to be -/// emitted for this shuffle, and the optimization not done. -/// FIXME: This is probably not the best approach, but fix the problem -/// until the right path is decided. -static -bool CanXFormVExtractWithShuffleIntoLoad(SDValue V, SelectionDAG &DAG, - const TargetLowering &TLI) { - EVT VT = V.getValueType(); - ShuffleVectorSDNode *SVOp = dyn_cast<ShuffleVectorSDNode>(V); - - // Be sure that the vector shuffle is present in a pattern like this: - // (vextract (v4f32 shuffle (load $addr), <1,u,u,u>), c) -> (f32 load $addr) - if (!V.hasOneUse()) - return false; - - SDNode *N = *V.getNode()->use_begin(); - if (N->getOpcode() != ISD::EXTRACT_VECTOR_ELT) - return false; - - SDValue EltNo = N->getOperand(1); - if (!isa<ConstantSDNode>(EltNo)) - return false; - - // If the bit convert changed the number of elements, it is unsafe - // to examine the mask. - bool HasShuffleIntoBitcast = false; - if (V.getOpcode() == ISD::BITCAST) { - EVT SrcVT = V.getOperand(0).getValueType(); - if (SrcVT.getVectorNumElements() != VT.getVectorNumElements()) - return false; - V = V.getOperand(0); - HasShuffleIntoBitcast = true; - } - - // Select the input vector, guarding against out of range extract vector. - unsigned NumElems = VT.getVectorNumElements(); - unsigned Elt = cast<ConstantSDNode>(EltNo)->getZExtValue(); - int Idx = (Elt > NumElems) ? -1 : SVOp->getMaskElt(Elt); - V = (Idx < (int)NumElems) ? V.getOperand(0) : V.getOperand(1); - - // If we are accessing the upper part of a YMM register - // then the EXTRACT_VECTOR_ELT is likely to be legalized to a sequence of - // EXTRACT_SUBVECTOR + EXTRACT_VECTOR_ELT, which are not detected at this point - // because the legalization of N did not happen yet. - if (Idx >= (int)NumElems/2 && VT.getSizeInBits() == 256) - return false; - - // Skip one more bit_convert if necessary - if (V.getOpcode() == ISD::BITCAST) { - if (!V.hasOneUse()) - return false; - V = V.getOperand(0); - } - - if (!ISD::isNormalLoad(V.getNode())) - return false; - - // Is the original load suitable? - LoadSDNode *LN0 = cast<LoadSDNode>(V); - - if (!LN0 || !LN0->hasNUsesOfValue(1,0) || LN0->isVolatile()) - return false; - - if (!HasShuffleIntoBitcast) - return true; - - // If there's a bitcast before the shuffle, check if the load type and - // alignment is valid. - unsigned Align = LN0->getAlignment(); - unsigned NewAlign = - TLI.getTargetData()->getABITypeAlignment( - VT.getTypeForEVT(*DAG.getContext())); - - if (NewAlign > Align || !TLI.isOperationLegalOrCustom(ISD::LOAD, VT)) - return false; - - return true; -} - static SDValue getMOVDDup(SDValue &Op, DebugLoc &dl, SDValue V1, SelectionDAG &DAG) { EVT VT = Op.getValueType(); @@ -6282,12 +6220,6 @@ SDValue NormalizeVectorShuffle(SDValue Op, SelectionDAG &DAG, if (SVOp->isSplat()) { unsigned NumElem = VT.getVectorNumElements(); int Size = VT.getSizeInBits(); - // Special case, this is the only place now where it's allowed to return - // a vector_shuffle operation without using a target specific node, because - // *hopefully* it will be optimized away by the dag combiner. FIXME: should - // this be moved to DAGCombine instead? - if (NumElem <= 4 && CanXFormVExtractWithShuffleIntoLoad(Op, DAG, TLI)) - return Op; // Use vbroadcast whenever the splat comes from a foldable load SDValue LD = isVectorBroadcast(Op, Subtarget); @@ -13005,11 +12937,109 @@ SDValue X86TargetLowering::PerformTruncateCombine(SDNode *N, SelectionDAG &DAG, return SDValue(); } +/// XFormVExtractWithShuffleIntoLoad - Check if a vector extract from a target +/// specific shuffle of a load can be folded into a single element load. +/// Similar handling for VECTOR_SHUFFLE is performed by DAGCombiner, but +/// shuffles have been customed lowered so we need to handle those here. +static SDValue XFormVExtractWithShuffleIntoLoad(SDNode *N, SelectionDAG &DAG, + TargetLowering::DAGCombinerInfo &DCI) { + if (DCI.isBeforeLegalizeOps()) + return SDValue(); + + SDValue InVec = N->getOperand(0); + SDValue EltNo = N->getOperand(1); + + if (!isa<ConstantSDNode>(EltNo)) + return SDValue(); + + EVT VT = InVec.getValueType(); + + bool HasShuffleIntoBitcast = false; + if (InVec.getOpcode() == ISD::BITCAST) { + // Don't duplicate a load with other uses. + if (!InVec.hasOneUse()) + return SDValue(); + EVT BCVT = InVec.getOperand(0).getValueType(); + if (BCVT.getVectorNumElements() != VT.getVectorNumElements()) + return SDValue(); + InVec = InVec.getOperand(0); + HasShuffleIntoBitcast = true; + } + + if (!isTargetShuffle(InVec.getOpcode())) + return SDValue(); + + // Don't duplicate a load with other uses. + if (!InVec.hasOneUse()) + return SDValue(); + + SmallVector<int, 16> ShuffleMask; + bool UnaryShuffle; + if (!getTargetShuffleMask(InVec.getNode(), VT, ShuffleMask, UnaryShuffle)) + return SDValue(); + + // Select the input vector, guarding against out of range extract vector. + unsigned NumElems = VT.getVectorNumElements(); + int Elt = cast<ConstantSDNode>(EltNo)->getZExtValue(); + int Idx = (Elt > (int)NumElems) ? -1 : ShuffleMask[Elt]; + SDValue LdNode = (Idx < (int)NumElems) ? InVec.getOperand(0) + : InVec.getOperand(1); + + // If inputs to shuffle are the same for both ops, then allow 2 uses + unsigned AllowedUses = InVec.getOperand(0) == InVec.getOperand(1) ? 2 : 1; + + if (LdNode.getOpcode() == ISD::BITCAST) { + // Don't duplicate a load with other uses. + if (!LdNode.getNode()->hasNUsesOfValue(AllowedUses, 0)) + return SDValue(); + + AllowedUses = 1; // only allow 1 load use if we have a bitcast + LdNode = LdNode.getOperand(0); + } + + if (!ISD::isNormalLoad(LdNode.getNode())) + return SDValue(); + + LoadSDNode *LN0 = cast<LoadSDNode>(LdNode); + + if (!LN0 ||!LN0->hasNUsesOfValue(AllowedUses, 0) || LN0->isVolatile()) + return SDValue(); + + if (HasShuffleIntoBitcast) { + // If there's a bitcast before the shuffle, check if the load type and + // alignment is valid. + unsigned Align = LN0->getAlignment(); + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + unsigned NewAlign = TLI.getTargetData()-> + getABITypeAlignment(VT.getTypeForEVT(*DAG.getContext())); + + if (NewAlign > Align || !TLI.isOperationLegalOrCustom(ISD::LOAD, VT)) + return SDValue(); + } + + // All checks match so transform back to vector_shuffle so that DAG combiner + // can finish the job + DebugLoc dl = N->getDebugLoc(); + + // Create shuffle node taking into account the case that its a unary shuffle + SDValue Shuffle = (UnaryShuffle) ? DAG.getUNDEF(VT) : InVec.getOperand(1); + Shuffle = DAG.getVectorShuffle(InVec.getValueType(), dl, + InVec.getOperand(0), Shuffle, + &ShuffleMask[0]); + Shuffle = DAG.getNode(ISD::BITCAST, dl, VT, Shuffle); + return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, N->getValueType(0), Shuffle, + EltNo); +} + /// PerformEXTRACT_VECTOR_ELTCombine - Detect vector gather/scatter index /// generation and convert it from being a bunch of shuffles and extracts /// to a simple store and scalar loads to extract the elements. static SDValue PerformEXTRACT_VECTOR_ELTCombine(SDNode *N, SelectionDAG &DAG, - const TargetLowering &TLI) { + TargetLowering::DAGCombinerInfo &DCI) { + SDValue NewOp = XFormVExtractWithShuffleIntoLoad(N, DAG, DCI); + if (NewOp.getNode()) + return NewOp; + SDValue InputVector = N->getOperand(0); // Only operate on vectors of 4 elements, where the alternative shuffling @@ -13070,6 +13100,7 @@ static SDValue PerformEXTRACT_VECTOR_ELTCombine(SDNode *N, SelectionDAG &DAG, unsigned EltSize = InputVector.getValueType().getVectorElementType().getSizeInBits()/8; uint64_t Offset = EltSize * cast<ConstantSDNode>(Idx)->getZExtValue(); + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); SDValue OffsetVal = DAG.getConstant(Offset, TLI.getPointerTy()); SDValue ScalarAddr = DAG.getNode(ISD::ADD, dl, TLI.getPointerTy(), @@ -13093,6 +13124,8 @@ static SDValue PerformEXTRACT_VECTOR_ELTCombine(SDNode *N, SelectionDAG &DAG, static SDValue PerformSELECTCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const X86Subtarget *Subtarget) { + + DebugLoc DL = N->getDebugLoc(); SDValue Cond = N->getOperand(0); // Get the LHS/RHS of the select. @@ -14897,7 +14930,7 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N, switch (N->getOpcode()) { default: break; case ISD::EXTRACT_VECTOR_ELT: - return PerformEXTRACT_VECTOR_ELTCombine(N, DAG, *this); + return PerformEXTRACT_VECTOR_ELTCombine(N, DAG, DCI); case ISD::VSELECT: case ISD::SELECT: return PerformSELECTCombine(N, DAG, DCI, Subtarget); case X86ISD::CMOV: return PerformCMOVCombine(N, DAG, DCI); diff --git a/lib/Target/X86/X86InstrCompiler.td b/lib/Target/X86/X86InstrCompiler.td index ac49232..42a5014 100644 --- a/lib/Target/X86/X86InstrCompiler.td +++ b/lib/Target/X86/X86InstrCompiler.td @@ -153,7 +153,7 @@ let isTerminator = 1, isReturn = 1, isBarrier = 1, hasCtrlDep = 1, isCodeGenOnly = 1 in { def EH_RETURN : I<0xC3, RawFrm, (outs), (ins GR32:$addr), "ret\t#eh_return, addr: $addr", - [(X86ehret GR32:$addr)]>; + [(X86ehret GR32:$addr)], IIC_RET>; } @@ -161,7 +161,7 @@ let isTerminator = 1, isReturn = 1, isBarrier = 1, hasCtrlDep = 1, isCodeGenOnly = 1 in { def EH_RETURN64 : I<0xC3, RawFrm, (outs), (ins GR64:$addr), "ret\t#eh_return, addr: $addr", - [(X86ehret GR64:$addr)]>; + [(X86ehret GR64:$addr)], IIC_RET>; } @@ -193,7 +193,7 @@ def MORESTACK_RET_RESTORE_R10 : I<0, Pseudo, (outs), (ins), let Defs = [EFLAGS], isReMaterializable = 1, isAsCheapAsAMove = 1, isCodeGenOnly = 1 in { def MOV8r0 : I<0x30, MRMInitReg, (outs GR8 :$dst), (ins), "", - [(set GR8:$dst, 0)]>; + [(set GR8:$dst, 0)], IIC_ALU_NONMEM>; // We want to rewrite MOV16r0 in terms of MOV32r0, because it's a smaller // encoding and avoids a partial-register update sometimes, but doing so @@ -202,11 +202,11 @@ def MOV8r0 : I<0x30, MRMInitReg, (outs GR8 :$dst), (ins), "", // to an MCInst. def MOV16r0 : I<0x31, MRMInitReg, (outs GR16:$dst), (ins), "", - [(set GR16:$dst, 0)]>, OpSize; + [(set GR16:$dst, 0)], IIC_ALU_NONMEM>, OpSize; // FIXME: Set encoding to pseudo. def MOV32r0 : I<0x31, MRMInitReg, (outs GR32:$dst), (ins), "", - [(set GR32:$dst, 0)]>; + [(set GR32:$dst, 0)], IIC_ALU_NONMEM>; } // We want to rewrite MOV64r0 in terms of MOV32r0, because it's sometimes a @@ -218,7 +218,7 @@ def MOV32r0 : I<0x31, MRMInitReg, (outs GR32:$dst), (ins), "", let Defs = [EFLAGS], isCodeGenOnly=1, AddedComplexity = 1, isReMaterializable = 1, isAsCheapAsAMove = 1 in def MOV64r0 : I<0x31, MRMInitReg, (outs GR64:$dst), (ins), "", - [(set GR64:$dst, 0)]>; + [(set GR64:$dst, 0)], IIC_ALU_NONMEM>; // Materialize i64 constant where top 32-bits are zero. This could theoretically // use MOV32ri with a SUBREG_TO_REG to represent the zero-extension, however @@ -226,7 +226,8 @@ def MOV64r0 : I<0x31, MRMInitReg, (outs GR64:$dst), (ins), "", let AddedComplexity = 1, isReMaterializable = 1, isAsCheapAsAMove = 1, isCodeGenOnly = 1 in def MOV64ri64i32 : Ii32<0xB8, AddRegFrm, (outs GR64:$dst), (ins i64i32imm:$src), - "", [(set GR64:$dst, i64immZExt32:$src)]>; + "", [(set GR64:$dst, i64immZExt32:$src)], + IIC_ALU_NONMEM>; // Use sbb to materialize carry bit. let Uses = [EFLAGS], Defs = [EFLAGS], isCodeGenOnly = 1 in { @@ -236,14 +237,18 @@ let Uses = [EFLAGS], Defs = [EFLAGS], isCodeGenOnly = 1 in { // FIXME: Change these to have encoding Pseudo when X86MCCodeEmitter replaces // X86CodeEmitter. def SETB_C8r : I<0x18, MRMInitReg, (outs GR8:$dst), (ins), "", - [(set GR8:$dst, (X86setcc_c X86_COND_B, EFLAGS))]>; + [(set GR8:$dst, (X86setcc_c X86_COND_B, EFLAGS))], + IIC_ALU_NONMEM>; def SETB_C16r : I<0x19, MRMInitReg, (outs GR16:$dst), (ins), "", - [(set GR16:$dst, (X86setcc_c X86_COND_B, EFLAGS))]>, + [(set GR16:$dst, (X86setcc_c X86_COND_B, EFLAGS))], + IIC_ALU_NONMEM>, OpSize; def SETB_C32r : I<0x19, MRMInitReg, (outs GR32:$dst), (ins), "", - [(set GR32:$dst, (X86setcc_c X86_COND_B, EFLAGS))]>; + [(set GR32:$dst, (X86setcc_c X86_COND_B, EFLAGS))], + IIC_ALU_NONMEM>; def SETB_C64r : RI<0x19, MRMInitReg, (outs GR64:$dst), (ins), "", - [(set GR64:$dst, (X86setcc_c X86_COND_B, EFLAGS))]>; + [(set GR64:$dst, (X86setcc_c X86_COND_B, EFLAGS))], + IIC_ALU_NONMEM>; } // isCodeGenOnly @@ -297,32 +302,32 @@ def : Pat<(sub GR64:$op, (i64 (X86setcc_c X86_COND_B, EFLAGS))), // let Defs = [ECX,EDI,ESI], Uses = [ECX,EDI,ESI], isCodeGenOnly = 1 in { def REP_MOVSB : I<0xA4, RawFrm, (outs), (ins), "{rep;movsb|rep movsb}", - [(X86rep_movs i8)]>, REP; + [(X86rep_movs i8)], IIC_REP_MOVS>, REP; def REP_MOVSW : I<0xA5, RawFrm, (outs), (ins), "{rep;movsw|rep movsw}", - [(X86rep_movs i16)]>, REP, OpSize; + [(X86rep_movs i16)], IIC_REP_MOVS>, REP, OpSize; def REP_MOVSD : I<0xA5, RawFrm, (outs), (ins), "{rep;movsl|rep movsd}", - [(X86rep_movs i32)]>, REP; + [(X86rep_movs i32)], IIC_REP_MOVS>, REP; } let Defs = [RCX,RDI,RSI], Uses = [RCX,RDI,RSI], isCodeGenOnly = 1 in def REP_MOVSQ : RI<0xA5, RawFrm, (outs), (ins), "{rep;movsq|rep movsq}", - [(X86rep_movs i64)]>, REP; + [(X86rep_movs i64)], IIC_REP_MOVS>, REP; // FIXME: Should use "(X86rep_stos AL)" as the pattern. let Defs = [ECX,EDI], Uses = [AL,ECX,EDI], isCodeGenOnly = 1 in def REP_STOSB : I<0xAA, RawFrm, (outs), (ins), "{rep;stosb|rep stosb}", - [(X86rep_stos i8)]>, REP; + [(X86rep_stos i8)], IIC_REP_STOS>, REP; let Defs = [ECX,EDI], Uses = [AX,ECX,EDI], isCodeGenOnly = 1 in def REP_STOSW : I<0xAB, RawFrm, (outs), (ins), "{rep;stosw|rep stosw}", - [(X86rep_stos i16)]>, REP, OpSize; + [(X86rep_stos i16)], IIC_REP_STOS>, REP, OpSize; let Defs = [ECX,EDI], Uses = [EAX,ECX,EDI], isCodeGenOnly = 1 in def REP_STOSD : I<0xAB, RawFrm, (outs), (ins), "{rep;stosl|rep stosd}", - [(X86rep_stos i32)]>, REP; + [(X86rep_stos i32)], IIC_REP_STOS>, REP; let Defs = [RCX,RDI], Uses = [RAX,RCX,RDI], isCodeGenOnly = 1 in def REP_STOSQ : RI<0xAB, RawFrm, (outs), (ins), "{rep;stosq|rep stosq}", - [(X86rep_stos i64)]>, REP; + [(X86rep_stos i64)], IIC_REP_STOS>, REP; //===----------------------------------------------------------------------===// @@ -571,7 +576,7 @@ let isCodeGenOnly = 1, Defs = [EFLAGS] in def OR32mrLocked : I<0x09, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$zero), "lock\n\t" "or{l}\t{$zero, $dst|$dst, $zero}", - []>, Requires<[In32BitMode]>, LOCK; + [], IIC_ALU_MEM>, Requires<[In32BitMode]>, LOCK; let hasSideEffects = 1 in def Int_MemBarrier : I<0, Pseudo, (outs), (ins), @@ -591,72 +596,72 @@ def #NAME#8mr : I<{RegOpc{7}, RegOpc{6}, RegOpc{5}, RegOpc{4}, MRMDestMem, (outs), (ins i8mem:$dst, GR8:$src2), !strconcat("lock\n\t", mnemonic, "{b}\t", "{$src2, $dst|$dst, $src2}"), - []>, LOCK; + [], IIC_ALU_NONMEM>, LOCK; def #NAME#16mr : I<{RegOpc{7}, RegOpc{6}, RegOpc{5}, RegOpc{4}, RegOpc{3}, RegOpc{2}, RegOpc{1}, 1 }, MRMDestMem, (outs), (ins i16mem:$dst, GR16:$src2), !strconcat("lock\n\t", mnemonic, "{w}\t", "{$src2, $dst|$dst, $src2}"), - []>, OpSize, LOCK; + [], IIC_ALU_NONMEM>, OpSize, LOCK; def #NAME#32mr : I<{RegOpc{7}, RegOpc{6}, RegOpc{5}, RegOpc{4}, RegOpc{3}, RegOpc{2}, RegOpc{1}, 1 }, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src2), !strconcat("lock\n\t", mnemonic, "{l}\t", "{$src2, $dst|$dst, $src2}"), - []>, LOCK; + [], IIC_ALU_NONMEM>, LOCK; def #NAME#64mr : RI<{RegOpc{7}, RegOpc{6}, RegOpc{5}, RegOpc{4}, RegOpc{3}, RegOpc{2}, RegOpc{1}, 1 }, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src2), !strconcat("lock\n\t", mnemonic, "{q}\t", "{$src2, $dst|$dst, $src2}"), - []>, LOCK; + [], IIC_ALU_NONMEM>, LOCK; def #NAME#8mi : Ii8<{ImmOpc{7}, ImmOpc{6}, ImmOpc{5}, ImmOpc{4}, ImmOpc{3}, ImmOpc{2}, ImmOpc{1}, 0 }, ImmMod, (outs), (ins i8mem :$dst, i8imm :$src2), !strconcat("lock\n\t", mnemonic, "{b}\t", "{$src2, $dst|$dst, $src2}"), - []>, LOCK; + [], IIC_ALU_MEM>, LOCK; def #NAME#16mi : Ii16<{ImmOpc{7}, ImmOpc{6}, ImmOpc{5}, ImmOpc{4}, ImmOpc{3}, ImmOpc{2}, ImmOpc{1}, 1 }, ImmMod, (outs), (ins i16mem :$dst, i16imm :$src2), !strconcat("lock\n\t", mnemonic, "{w}\t", "{$src2, $dst|$dst, $src2}"), - []>, LOCK; + [], IIC_ALU_MEM>, LOCK; def #NAME#32mi : Ii32<{ImmOpc{7}, ImmOpc{6}, ImmOpc{5}, ImmOpc{4}, ImmOpc{3}, ImmOpc{2}, ImmOpc{1}, 1 }, ImmMod, (outs), (ins i32mem :$dst, i32imm :$src2), !strconcat("lock\n\t", mnemonic, "{l}\t", "{$src2, $dst|$dst, $src2}"), - []>, LOCK; + [], IIC_ALU_MEM>, LOCK; def #NAME#64mi32 : RIi32<{ImmOpc{7}, ImmOpc{6}, ImmOpc{5}, ImmOpc{4}, ImmOpc{3}, ImmOpc{2}, ImmOpc{1}, 1 }, ImmMod, (outs), (ins i64mem :$dst, i64i32imm :$src2), !strconcat("lock\n\t", mnemonic, "{q}\t", "{$src2, $dst|$dst, $src2}"), - []>, LOCK; + [], IIC_ALU_MEM>, LOCK; def #NAME#16mi8 : Ii8<{ImmOpc8{7}, ImmOpc8{6}, ImmOpc8{5}, ImmOpc8{4}, ImmOpc8{3}, ImmOpc8{2}, ImmOpc8{1}, 1 }, ImmMod, (outs), (ins i16mem :$dst, i16i8imm :$src2), !strconcat("lock\n\t", mnemonic, "{w}\t", "{$src2, $dst|$dst, $src2}"), - []>, LOCK; + [], IIC_ALU_MEM>, LOCK; def #NAME#32mi8 : Ii8<{ImmOpc8{7}, ImmOpc8{6}, ImmOpc8{5}, ImmOpc8{4}, ImmOpc8{3}, ImmOpc8{2}, ImmOpc8{1}, 1 }, ImmMod, (outs), (ins i32mem :$dst, i32i8imm :$src2), !strconcat("lock\n\t", mnemonic, "{l}\t", "{$src2, $dst|$dst, $src2}"), - []>, LOCK; + [], IIC_ALU_MEM>, LOCK; def #NAME#64mi8 : RIi8<{ImmOpc8{7}, ImmOpc8{6}, ImmOpc8{5}, ImmOpc8{4}, ImmOpc8{3}, ImmOpc8{2}, ImmOpc8{1}, 1 }, ImmMod, (outs), (ins i64mem :$dst, i64i8imm :$src2), !strconcat("lock\n\t", mnemonic, "{q}\t", "{$src2, $dst|$dst, $src2}"), - []>, LOCK; + [], IIC_ALU_MEM>, LOCK; } @@ -673,29 +678,29 @@ let Defs = [EFLAGS], mayLoad = 1, mayStore = 1, isCodeGenOnly = 1 in { def LOCK_INC8m : I<0xFE, MRM0m, (outs), (ins i8mem :$dst), "lock\n\t" - "inc{b}\t$dst", []>, LOCK; + "inc{b}\t$dst", [], IIC_UNARY_MEM>, LOCK; def LOCK_INC16m : I<0xFF, MRM0m, (outs), (ins i16mem:$dst), "lock\n\t" - "inc{w}\t$dst", []>, OpSize, LOCK; + "inc{w}\t$dst", [], IIC_UNARY_MEM>, OpSize, LOCK; def LOCK_INC32m : I<0xFF, MRM0m, (outs), (ins i32mem:$dst), "lock\n\t" - "inc{l}\t$dst", []>, LOCK; + "inc{l}\t$dst", [], IIC_UNARY_MEM>, LOCK; def LOCK_INC64m : RI<0xFF, MRM0m, (outs), (ins i64mem:$dst), "lock\n\t" - "inc{q}\t$dst", []>, LOCK; + "inc{q}\t$dst", [], IIC_UNARY_MEM>, LOCK; def LOCK_DEC8m : I<0xFE, MRM1m, (outs), (ins i8mem :$dst), "lock\n\t" - "dec{b}\t$dst", []>, LOCK; + "dec{b}\t$dst", [], IIC_UNARY_MEM>, LOCK; def LOCK_DEC16m : I<0xFF, MRM1m, (outs), (ins i16mem:$dst), "lock\n\t" - "dec{w}\t$dst", []>, OpSize, LOCK; + "dec{w}\t$dst", [], IIC_UNARY_MEM>, OpSize, LOCK; def LOCK_DEC32m : I<0xFF, MRM1m, (outs), (ins i32mem:$dst), "lock\n\t" - "dec{l}\t$dst", []>, LOCK; + "dec{l}\t$dst", [], IIC_UNARY_MEM>, LOCK; def LOCK_DEC64m : RI<0xFF, MRM1m, (outs), (ins i64mem:$dst), "lock\n\t" - "dec{q}\t$dst", []>, LOCK; + "dec{q}\t$dst", [], IIC_UNARY_MEM>, LOCK; } // Atomic compare and swap. @@ -704,42 +709,42 @@ let Defs = [EAX, EDX, EFLAGS], Uses = [EAX, EBX, ECX, EDX], def LCMPXCHG8B : I<0xC7, MRM1m, (outs), (ins i64mem:$ptr), "lock\n\t" "cmpxchg8b\t$ptr", - [(X86cas8 addr:$ptr)]>, TB, LOCK; + [(X86cas8 addr:$ptr)], IIC_CMPX_LOCK_8B>, TB, LOCK; let Defs = [RAX, RDX, EFLAGS], Uses = [RAX, RBX, RCX, RDX], isCodeGenOnly = 1 in def LCMPXCHG16B : RI<0xC7, MRM1m, (outs), (ins i128mem:$ptr), "lock\n\t" "cmpxchg16b\t$ptr", - [(X86cas16 addr:$ptr)]>, TB, LOCK, + [(X86cas16 addr:$ptr)], IIC_CMPX_LOCK_16B>, TB, LOCK, Requires<[HasCmpxchg16b]>; let Defs = [AL, EFLAGS], Uses = [AL], isCodeGenOnly = 1 in { def LCMPXCHG8 : I<0xB0, MRMDestMem, (outs), (ins i8mem:$ptr, GR8:$swap), "lock\n\t" "cmpxchg{b}\t{$swap, $ptr|$ptr, $swap}", - [(X86cas addr:$ptr, GR8:$swap, 1)]>, TB, LOCK; + [(X86cas addr:$ptr, GR8:$swap, 1)], IIC_CMPX_LOCK_8>, TB, LOCK; } let Defs = [AX, EFLAGS], Uses = [AX], isCodeGenOnly = 1 in { def LCMPXCHG16 : I<0xB1, MRMDestMem, (outs), (ins i16mem:$ptr, GR16:$swap), "lock\n\t" "cmpxchg{w}\t{$swap, $ptr|$ptr, $swap}", - [(X86cas addr:$ptr, GR16:$swap, 2)]>, TB, OpSize, LOCK; + [(X86cas addr:$ptr, GR16:$swap, 2)], IIC_CMPX_LOCK>, TB, OpSize, LOCK; } let Defs = [EAX, EFLAGS], Uses = [EAX], isCodeGenOnly = 1 in { def LCMPXCHG32 : I<0xB1, MRMDestMem, (outs), (ins i32mem:$ptr, GR32:$swap), "lock\n\t" "cmpxchg{l}\t{$swap, $ptr|$ptr, $swap}", - [(X86cas addr:$ptr, GR32:$swap, 4)]>, TB, LOCK; + [(X86cas addr:$ptr, GR32:$swap, 4)], IIC_CMPX_LOCK>, TB, LOCK; } let Defs = [RAX, EFLAGS], Uses = [RAX], isCodeGenOnly = 1 in { def LCMPXCHG64 : RI<0xB1, MRMDestMem, (outs), (ins i64mem:$ptr, GR64:$swap), "lock\n\t" "cmpxchg{q}\t{$swap, $ptr|$ptr, $swap}", - [(X86cas addr:$ptr, GR64:$swap, 8)]>, TB, LOCK; + [(X86cas addr:$ptr, GR64:$swap, 8)], IIC_CMPX_LOCK>, TB, LOCK; } // Atomic exchange and add @@ -747,22 +752,26 @@ let Constraints = "$val = $dst", Defs = [EFLAGS], isCodeGenOnly = 1 in { def LXADD8 : I<0xC0, MRMSrcMem, (outs GR8:$dst), (ins GR8:$val, i8mem:$ptr), "lock\n\t" "xadd{b}\t{$val, $ptr|$ptr, $val}", - [(set GR8:$dst, (atomic_load_add_8 addr:$ptr, GR8:$val))]>, + [(set GR8:$dst, (atomic_load_add_8 addr:$ptr, GR8:$val))], + IIC_XADD_LOCK_MEM8>, TB, LOCK; def LXADD16 : I<0xC1, MRMSrcMem, (outs GR16:$dst), (ins GR16:$val, i16mem:$ptr), "lock\n\t" "xadd{w}\t{$val, $ptr|$ptr, $val}", - [(set GR16:$dst, (atomic_load_add_16 addr:$ptr, GR16:$val))]>, + [(set GR16:$dst, (atomic_load_add_16 addr:$ptr, GR16:$val))], + IIC_XADD_LOCK_MEM>, TB, OpSize, LOCK; def LXADD32 : I<0xC1, MRMSrcMem, (outs GR32:$dst), (ins GR32:$val, i32mem:$ptr), "lock\n\t" "xadd{l}\t{$val, $ptr|$ptr, $val}", - [(set GR32:$dst, (atomic_load_add_32 addr:$ptr, GR32:$val))]>, + [(set GR32:$dst, (atomic_load_add_32 addr:$ptr, GR32:$val))], + IIC_XADD_LOCK_MEM>, TB, LOCK; def LXADD64 : RI<0xC1, MRMSrcMem, (outs GR64:$dst), (ins GR64:$val,i64mem:$ptr), "lock\n\t" "xadd{q}\t{$val, $ptr|$ptr, $val}", - [(set GR64:$dst, (atomic_load_add_64 addr:$ptr, GR64:$val))]>, + [(set GR64:$dst, (atomic_load_add_64 addr:$ptr, GR64:$val))], + IIC_XADD_LOCK_MEM>, TB, LOCK; } diff --git a/lib/Target/X86/X86InstrFragmentsSIMD.td b/lib/Target/X86/X86InstrFragmentsSIMD.td index 4f9f089..ae3ed1b 100644 --- a/lib/Target/X86/X86InstrFragmentsSIMD.td +++ b/lib/Target/X86/X86InstrFragmentsSIMD.td @@ -218,6 +218,11 @@ def alignedload : PatFrag<(ops node:$ptr), (load node:$ptr), [{ return cast<LoadSDNode>(N)->getAlignment() >= 16; }]>; +// Like 'X86vzload', but always requires 128-bit vector alignment. +def alignedX86vzload : PatFrag<(ops node:$ptr), (X86vzload node:$ptr), [{ + return cast<MemSDNode>(N)->getAlignment() >= 16; +}]>; + // Like 'load', but always requires 256-bit vector alignment. def alignedload256 : PatFrag<(ops node:$ptr), (load node:$ptr), [{ return cast<LoadSDNode>(N)->getAlignment() >= 32; diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp index 5a479f0..307c96b 100644 --- a/lib/Target/X86/X86InstrInfo.cpp +++ b/lib/Target/X86/X86InstrInfo.cpp @@ -25,13 +25,13 @@ #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/LiveVariables.h" +#include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCInst.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetOptions.h" -#include "llvm/MC/MCAsmInfo.h" #include <limits> #define GET_INSTRINFO_CTOR @@ -82,6 +82,12 @@ enum { TB_FOLDED_STORE = 1 << 19 }; +struct X86OpTblEntry { + uint16_t RegOp; + uint16_t MemOp; + uint32_t Flags; +}; + X86InstrInfo::X86InstrInfo(X86TargetMachine &tm) : X86GenInstrInfo((tm.getSubtarget<X86Subtarget>().is64Bit() ? X86::ADJCALLSTACKDOWN64 @@ -91,7 +97,7 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm) : X86::ADJCALLSTACKUP32)), TM(tm), RI(tm, *this) { - static const unsigned OpTbl2Addr[][3] = { + static const X86OpTblEntry OpTbl2Addr[] = { { X86::ADC32ri, X86::ADC32mi, 0 }, { X86::ADC32ri8, X86::ADC32mi8, 0 }, { X86::ADC32rr, X86::ADC32mr, 0 }, @@ -259,16 +265,16 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm) }; for (unsigned i = 0, e = array_lengthof(OpTbl2Addr); i != e; ++i) { - unsigned RegOp = OpTbl2Addr[i][0]; - unsigned MemOp = OpTbl2Addr[i][1]; - unsigned Flags = OpTbl2Addr[i][2]; + unsigned RegOp = OpTbl2Addr[i].RegOp; + unsigned MemOp = OpTbl2Addr[i].MemOp; + unsigned Flags = OpTbl2Addr[i].Flags; AddTableEntry(RegOp2MemOpTable2Addr, MemOp2RegOpTable, RegOp, MemOp, // Index 0, folded load and store, no alignment requirement. Flags | TB_INDEX_0 | TB_FOLDED_LOAD | TB_FOLDED_STORE); } - static const unsigned OpTbl0[][3] = { + static const X86OpTblEntry OpTbl0[] = { { X86::BT16ri8, X86::BT16mi8, TB_FOLDED_LOAD }, { X86::BT32ri8, X86::BT32mi8, TB_FOLDED_LOAD }, { X86::BT64ri8, X86::BT64mi8, TB_FOLDED_LOAD }, @@ -370,14 +376,14 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm) }; for (unsigned i = 0, e = array_lengthof(OpTbl0); i != e; ++i) { - unsigned RegOp = OpTbl0[i][0]; - unsigned MemOp = OpTbl0[i][1]; - unsigned Flags = OpTbl0[i][2]; + unsigned RegOp = OpTbl0[i].RegOp; + unsigned MemOp = OpTbl0[i].MemOp; + unsigned Flags = OpTbl0[i].Flags; AddTableEntry(RegOp2MemOpTable0, MemOp2RegOpTable, RegOp, MemOp, TB_INDEX_0 | Flags); } - static const unsigned OpTbl1[][3] = { + static const X86OpTblEntry OpTbl1[] = { { X86::CMP16rr, X86::CMP16rm, 0 }, { X86::CMP32rr, X86::CMP32rm, 0 }, { X86::CMP64rr, X86::CMP64rm, 0 }, @@ -555,16 +561,16 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm) }; for (unsigned i = 0, e = array_lengthof(OpTbl1); i != e; ++i) { - unsigned RegOp = OpTbl1[i][0]; - unsigned MemOp = OpTbl1[i][1]; - unsigned Flags = OpTbl1[i][2]; + unsigned RegOp = OpTbl1[i].RegOp; + unsigned MemOp = OpTbl1[i].MemOp; + unsigned Flags = OpTbl1[i].Flags; AddTableEntry(RegOp2MemOpTable1, MemOp2RegOpTable, RegOp, MemOp, // Index 1, folded load Flags | TB_INDEX_1 | TB_FOLDED_LOAD); } - static const unsigned OpTbl2[][3] = { + static const X86OpTblEntry OpTbl2[] = { { X86::ADC32rr, X86::ADC32rm, 0 }, { X86::ADC64rr, X86::ADC64rm, 0 }, { X86::ADD16rr, X86::ADD16rm, 0 }, @@ -1108,9 +1114,9 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm) }; for (unsigned i = 0, e = array_lengthof(OpTbl2); i != e; ++i) { - unsigned RegOp = OpTbl2[i][0]; - unsigned MemOp = OpTbl2[i][1]; - unsigned Flags = OpTbl2[i][2]; + unsigned RegOp = OpTbl2[i].RegOp; + unsigned MemOp = OpTbl2[i].MemOp; + unsigned Flags = OpTbl2[i].Flags; AddTableEntry(RegOp2MemOpTable2, MemOp2RegOpTable, RegOp, MemOp, // Index 2, folded load @@ -3627,7 +3633,7 @@ unsigned X86InstrInfo::getGlobalBaseReg(MachineFunction *MF) const { // These are the replaceable SSE instructions. Some of these have Int variants // that we don't include here. We don't want to replace instructions selected // by intrinsics. -static const unsigned ReplaceableInstrs[][3] = { +static const uint16_t ReplaceableInstrs[][3] = { //PackedSingle PackedDouble PackedInt { X86::MOVAPSmr, X86::MOVAPDmr, X86::MOVDQAmr }, { X86::MOVAPSrm, X86::MOVAPDrm, X86::MOVDQArm }, @@ -3667,7 +3673,7 @@ static const unsigned ReplaceableInstrs[][3] = { { X86::VMOVNTPSYmr, X86::VMOVNTPDYmr, X86::VMOVNTDQYmr } }; -static const unsigned ReplaceableInstrsAVX2[][3] = { +static const uint16_t ReplaceableInstrsAVX2[][3] = { //PackedSingle PackedDouble PackedInt { X86::VANDNPSYrm, X86::VANDNPDYrm, X86::VPANDNYrm }, { X86::VANDNPSYrr, X86::VANDNPDYrr, X86::VPANDNYrr }, @@ -3688,14 +3694,14 @@ static const unsigned ReplaceableInstrsAVX2[][3] = { // FIXME: Some shuffle and unpack instructions have equivalents in different // domains, but they require a bit more work than just switching opcodes. -static const unsigned *lookup(unsigned opcode, unsigned domain) { +static const uint16_t *lookup(unsigned opcode, unsigned domain) { for (unsigned i = 0, e = array_lengthof(ReplaceableInstrs); i != e; ++i) if (ReplaceableInstrs[i][domain-1] == opcode) return ReplaceableInstrs[i]; return 0; } -static const unsigned *lookupAVX2(unsigned opcode, unsigned domain) { +static const uint16_t *lookupAVX2(unsigned opcode, unsigned domain) { for (unsigned i = 0, e = array_lengthof(ReplaceableInstrsAVX2); i != e; ++i) if (ReplaceableInstrsAVX2[i][domain-1] == opcode) return ReplaceableInstrsAVX2[i]; @@ -3718,7 +3724,7 @@ void X86InstrInfo::setExecutionDomain(MachineInstr *MI, unsigned Domain) const { assert(Domain>0 && Domain<4 && "Invalid execution domain"); uint16_t dom = (MI->getDesc().TSFlags >> X86II::SSEDomainShift) & 3; assert(dom && "Not an SSE instruction"); - const unsigned *table = lookup(MI->getOpcode(), dom); + const uint16_t *table = lookup(MI->getOpcode(), dom); if (!table) { // try the other table assert((TM.getSubtarget<X86Subtarget>().hasAVX2() || Domain < 3) && "256-bit vector operations only available in AVX2"); diff --git a/lib/Target/X86/X86InstrInfo.h b/lib/Target/X86/X86InstrInfo.h index d065d2d..b23d756 100644 --- a/lib/Target/X86/X86InstrInfo.h +++ b/lib/Target/X86/X86InstrInfo.h @@ -14,10 +14,10 @@ #ifndef X86INSTRUCTIONINFO_H #define X86INSTRUCTIONINFO_H -#include "llvm/Target/TargetInstrInfo.h" #include "X86.h" #include "X86RegisterInfo.h" #include "llvm/ADT/DenseMap.h" +#include "llvm/Target/TargetInstrInfo.h" #define GET_INSTRINFO_HEADER #include "X86GenInstrInfo.inc" diff --git a/lib/Target/X86/X86InstrInfo.td b/lib/Target/X86/X86InstrInfo.td index f585b47..dd7cf50 100644 --- a/lib/Target/X86/X86InstrInfo.td +++ b/lib/Target/X86/X86InstrInfo.td @@ -1856,19 +1856,19 @@ def : InstAlias<"outl $port", (OUT32ir i8imm:$port)>; def : InstAlias<"sldt $mem", (SLDT16m i16mem:$mem)>; // shld/shrd op,op -> shld op, op, CL -def : InstAlias<"shldw $r1, $r2", (SHLD16rrCL GR16:$r1, GR16:$r2)>; -def : InstAlias<"shldl $r1, $r2", (SHLD32rrCL GR32:$r1, GR32:$r2)>; -def : InstAlias<"shldq $r1, $r2", (SHLD64rrCL GR64:$r1, GR64:$r2)>; -def : InstAlias<"shrdw $r1, $r2", (SHRD16rrCL GR16:$r1, GR16:$r2)>; -def : InstAlias<"shrdl $r1, $r2", (SHRD32rrCL GR32:$r1, GR32:$r2)>; -def : InstAlias<"shrdq $r1, $r2", (SHRD64rrCL GR64:$r1, GR64:$r2)>; - -def : InstAlias<"shldw $mem, $reg", (SHLD16mrCL i16mem:$mem, GR16:$reg)>; -def : InstAlias<"shldl $mem, $reg", (SHLD32mrCL i32mem:$mem, GR32:$reg)>; -def : InstAlias<"shldq $mem, $reg", (SHLD64mrCL i64mem:$mem, GR64:$reg)>; -def : InstAlias<"shrdw $mem, $reg", (SHRD16mrCL i16mem:$mem, GR16:$reg)>; -def : InstAlias<"shrdl $mem, $reg", (SHRD32mrCL i32mem:$mem, GR32:$reg)>; -def : InstAlias<"shrdq $mem, $reg", (SHRD64mrCL i64mem:$mem, GR64:$reg)>; +def : InstAlias<"shldw $r2, $r1", (SHLD16rrCL GR16:$r1, GR16:$r2)>; +def : InstAlias<"shldl $r2, $r1", (SHLD32rrCL GR32:$r1, GR32:$r2)>; +def : InstAlias<"shldq $r2, $r1", (SHLD64rrCL GR64:$r1, GR64:$r2)>; +def : InstAlias<"shrdw $r2, $r1", (SHRD16rrCL GR16:$r1, GR16:$r2)>; +def : InstAlias<"shrdl $r2, $r1", (SHRD32rrCL GR32:$r1, GR32:$r2)>; +def : InstAlias<"shrdq $r2, $r1", (SHRD64rrCL GR64:$r1, GR64:$r2)>; + +def : InstAlias<"shldw $reg, $mem", (SHLD16mrCL i16mem:$mem, GR16:$reg)>; +def : InstAlias<"shldl $reg, $mem", (SHLD32mrCL i32mem:$mem, GR32:$reg)>; +def : InstAlias<"shldq $reg, $mem", (SHLD64mrCL i64mem:$mem, GR64:$reg)>; +def : InstAlias<"shrdw $reg, $mem", (SHRD16mrCL i16mem:$mem, GR16:$reg)>; +def : InstAlias<"shrdl $reg, $mem", (SHRD32mrCL i32mem:$mem, GR32:$reg)>; +def : InstAlias<"shrdq $reg, $mem", (SHRD64mrCL i64mem:$mem, GR64:$reg)>; /* FIXME: This is disabled because the asm matcher is currently incapable of * matching a fixed immediate like $1. diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index c6d1d19..df42627 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -641,7 +641,7 @@ let Predicates = [HasAVX] in { (VMOVSDrr (v2i64 (V_SET0)), (EXTRACT_SUBREG (v4i64 VR256:$src), sub_sd)), sub_xmm)>; -// Extract and store. + // Extract and store. def : Pat<(store (f32 (vector_extract (v4f32 VR128:$src), (iPTR 0))), addr:$dst), (VMOVSSmr addr:$dst, @@ -2306,7 +2306,7 @@ let Defs = [EFLAGS] in { "comisd", SSEPackedDouble>, TB, OpSize; } // Defs = [EFLAGS] -// sse12_cmp_packed - sse 1 & 2 compared packed instructions +// sse12_cmp_packed - sse 1 & 2 compare packed instructions multiclass sse12_cmp_packed<RegisterClass RC, X86MemOperand x86memop, Intrinsic Int, string asm, string asm_alt, Domain d> { @@ -4820,8 +4820,10 @@ let Predicates = [HasSSE2], AddedComplexity = 20 in { } let Predicates = [HasAVX] in { -def : Pat<(v4i64 (X86vzload addr:$src)), +def : Pat<(v4i64 (alignedX86vzload addr:$src)), (SUBREG_TO_REG (i32 0), (VMOVAPSrm addr:$src), sub_xmm)>; +def : Pat<(v4i64 (X86vzload addr:$src)), + (SUBREG_TO_REG (i32 0), (VMOVUPSrm addr:$src), sub_xmm)>; } //===---------------------------------------------------------------------===// @@ -7307,6 +7309,24 @@ def VEXTRACTF128mr : AVXAIi8<0x19, MRMDestMem, (outs), []>, VEX; } +// Extract and store. +let Predicates = [HasAVX] in { + def : Pat<(alignedstore (int_x86_avx_vextractf128_ps_256 VR256:$src1, imm:$src2), addr:$dst), + (VEXTRACTF128mr addr:$dst, VR256:$src1, imm:$src2)>; + def : Pat<(alignedstore (int_x86_avx_vextractf128_pd_256 VR256:$src1, imm:$src2), addr:$dst), + (VEXTRACTF128mr addr:$dst, VR256:$src1, imm:$src2)>; + def : Pat<(alignedstore (int_x86_avx_vextractf128_si_256 VR256:$src1, imm:$src2), addr:$dst), + (VEXTRACTF128mr addr:$dst, VR256:$src1, imm:$src2)>; + + def : Pat<(int_x86_sse_storeu_ps addr:$dst, (int_x86_avx_vextractf128_ps_256 VR256:$src1, imm:$src2)), + (VEXTRACTF128mr addr:$dst, VR256:$src1, imm:$src2)>; + def : Pat<(int_x86_sse2_storeu_pd addr:$dst, (int_x86_avx_vextractf128_pd_256 VR256:$src1, imm:$src2)), + (VEXTRACTF128mr addr:$dst, VR256:$src1, imm:$src2)>; + def : Pat<(int_x86_sse2_storeu_dq addr:$dst, (bc_v16i8 (int_x86_avx_vextractf128_si_256 VR256:$src1, imm:$src2))), + (VEXTRACTF128mr addr:$dst, VR256:$src1, imm:$src2)>; +} + +// AVX1 patterns let Predicates = [HasAVX] in { def : Pat<(int_x86_avx_vextractf128_pd_256 VR256:$src1, imm:$src2), (VEXTRACTF128rr VR256:$src1, imm:$src2)>; @@ -7314,6 +7334,31 @@ def : Pat<(int_x86_avx_vextractf128_ps_256 VR256:$src1, imm:$src2), (VEXTRACTF128rr VR256:$src1, imm:$src2)>; def : Pat<(int_x86_avx_vextractf128_si_256 VR256:$src1, imm:$src2), (VEXTRACTF128rr VR256:$src1, imm:$src2)>; + +def : Pat<(vextractf128_extract:$ext VR256:$src1, (i32 imm)), + (v4f32 (VEXTRACTF128rr + (v8f32 VR256:$src1), + (EXTRACT_get_vextractf128_imm VR128:$ext)))>; +def : Pat<(vextractf128_extract:$ext VR256:$src1, (i32 imm)), + (v2f64 (VEXTRACTF128rr + (v4f64 VR256:$src1), + (EXTRACT_get_vextractf128_imm VR128:$ext)))>; +def : Pat<(vextractf128_extract:$ext VR256:$src1, (i32 imm)), + (v2i64 (VEXTRACTF128rr + (v4i64 VR256:$src1), + (EXTRACT_get_vextractf128_imm VR128:$ext)))>; +def : Pat<(vextractf128_extract:$ext VR256:$src1, (i32 imm)), + (v4i32 (VEXTRACTF128rr + (v8i32 VR256:$src1), + (EXTRACT_get_vextractf128_imm VR128:$ext)))>; +def : Pat<(vextractf128_extract:$ext VR256:$src1, (i32 imm)), + (v8i16 (VEXTRACTF128rr + (v16i16 VR256:$src1), + (EXTRACT_get_vextractf128_imm VR128:$ext)))>; +def : Pat<(vextractf128_extract:$ext VR256:$src1, (i32 imm)), + (v16i8 (VEXTRACTF128rr + (v32i8 VR256:$src1), + (EXTRACT_get_vextractf128_imm VR128:$ext)))>; } //===----------------------------------------------------------------------===// @@ -7711,7 +7756,7 @@ def VINSERTI128rm : AVX2AIi8<0x38, MRMSrcMem, (outs VR256:$dst), (int_x86_avx2_vinserti128 VR256:$src1, (memopv2i64 addr:$src2), imm:$src3))]>, VEX_4V; -let Predicates = [HasAVX2] in { +let Predicates = [HasAVX2], AddedComplexity = 1 in { def : Pat<(vinsertf128_insert:$ins (v4i64 VR256:$src1), (v2i64 VR128:$src2), (i32 imm)), (VINSERTI128rr VR256:$src1, VR128:$src2, @@ -7756,6 +7801,19 @@ def : Pat<(vinsertf128_insert:$ins (v16i16 VR256:$src1), (v8i16 VR128:$src2), (i32 imm)), (VINSERTF128rr VR256:$src1, VR128:$src2, (INSERT_get_vinsertf128_imm VR256:$ins))>; + +def : Pat<(vinsertf128_insert:$ins (v8f32 VR256:$src1), (loadv4f32 addr:$src2), + (i32 imm)), + (VINSERTF128rm VR256:$src1, addr:$src2, + (INSERT_get_vinsertf128_imm VR256:$ins))>; +def : Pat<(vinsertf128_insert:$ins (v4f64 VR256:$src1), (loadv2f64 addr:$src2), + (i32 imm)), + (VINSERTF128rm VR256:$src1, addr:$src2, + (INSERT_get_vinsertf128_imm VR256:$ins))>; +def : Pat<(vinsertf128_insert:$ins (v4i64 VR256:$src1), (loadv2i64 addr:$src2), + (i32 imm)), + (VINSERTF128rm VR256:$src1, addr:$src2, + (INSERT_get_vinsertf128_imm VR256:$ins))>; } //===----------------------------------------------------------------------===// @@ -7791,34 +7849,6 @@ def : Pat<(vextractf128_extract:$ext VR256:$src1, (i32 imm)), (EXTRACT_get_vextractf128_imm VR128:$ext)))>; } -// AVX1 patterns -let Predicates = [HasAVX] in { -def : Pat<(vextractf128_extract:$ext VR256:$src1, (i32 imm)), - (v4f32 (VEXTRACTF128rr - (v8f32 VR256:$src1), - (EXTRACT_get_vextractf128_imm VR128:$ext)))>; -def : Pat<(vextractf128_extract:$ext VR256:$src1, (i32 imm)), - (v2f64 (VEXTRACTF128rr - (v4f64 VR256:$src1), - (EXTRACT_get_vextractf128_imm VR128:$ext)))>; -def : Pat<(vextractf128_extract:$ext VR256:$src1, (i32 imm)), - (v2i64 (VEXTRACTF128rr - (v4i64 VR256:$src1), - (EXTRACT_get_vextractf128_imm VR128:$ext)))>; -def : Pat<(vextractf128_extract:$ext VR256:$src1, (i32 imm)), - (v4i32 (VEXTRACTF128rr - (v8i32 VR256:$src1), - (EXTRACT_get_vextractf128_imm VR128:$ext)))>; -def : Pat<(vextractf128_extract:$ext VR256:$src1, (i32 imm)), - (v8i16 (VEXTRACTF128rr - (v16i16 VR256:$src1), - (EXTRACT_get_vextractf128_imm VR128:$ext)))>; -def : Pat<(vextractf128_extract:$ext VR256:$src1, (i32 imm)), - (v16i8 (VEXTRACTF128rr - (v32i8 VR256:$src1), - (EXTRACT_get_vextractf128_imm VR128:$ext)))>; -} - //===----------------------------------------------------------------------===// // VPMASKMOV - Conditional SIMD Integer Packed Loads and Stores // diff --git a/lib/Target/X86/X86InstrSystem.td b/lib/Target/X86/X86InstrSystem.td index 8843848..bddba6c 100644 --- a/lib/Target/X86/X86InstrSystem.td +++ b/lib/Target/X86/X86InstrSystem.td @@ -45,17 +45,17 @@ def INT : Ii8<0xcd, RawFrm, (outs), (ins i8imm:$trap), "int\t$trap", def SYSCALL : I<0x05, RawFrm, (outs), (ins), "syscall", []>, TB; -def SYSRETL : I<0x07, RawFrm, (outs), (ins), "sysretl", []>, TB; -def SYSRETQ :RI<0x07, RawFrm, (outs), (ins), "sysretq", []>, TB, +def SYSRET : I<0x07, RawFrm, (outs), (ins), "sysret{l}", []>, TB; +def SYSRET64 :RI<0x07, RawFrm, (outs), (ins), "sysret{q}", []>, TB, Requires<[In64BitMode]>; def SYSENTER : I<0x34, RawFrm, (outs), (ins), "sysenter", []>, TB; -def SYSEXIT : I<0x35, RawFrm, (outs), (ins), "sysexitl", []>, TB; -def SYSEXIT64 :RI<0x35, RawFrm, (outs), (ins), "sysexitq", []>, TB, +def SYSEXIT : I<0x35, RawFrm, (outs), (ins), "sysexit{l}", []>, TB; +def SYSEXIT64 :RI<0x35, RawFrm, (outs), (ins), "sysexit{q}", []>, TB, Requires<[In64BitMode]>; -def IRET16 : I<0xcf, RawFrm, (outs), (ins), "iretw", []>, OpSize; +def IRET16 : I<0xcf, RawFrm, (outs), (ins), "iret{w}", []>, OpSize; def IRET32 : I<0xcf, RawFrm, (outs), (ins), "iret{l|d}", []>; def IRET64 : RI<0xcf, RawFrm, (outs), (ins), "iretq", []>, Requires<[In64BitMode]>; diff --git a/lib/Target/X86/X86MCInstLower.cpp b/lib/Target/X86/X86MCInstLower.cpp index a7a5c56..b578e8d 100644 --- a/lib/Target/X86/X86MCInstLower.cpp +++ b/lib/Target/X86/X86MCInstLower.cpp @@ -12,10 +12,11 @@ // //===----------------------------------------------------------------------===// -#include "InstPrinter/X86ATTInstPrinter.h" #include "X86MCInstLower.h" #include "X86AsmPrinter.h" #include "X86COFFMachineModuleInfo.h" +#include "InstPrinter/X86ATTInstPrinter.h" +#include "llvm/Type.h" #include "llvm/CodeGen/MachineModuleInfoImpls.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCContext.h" @@ -26,7 +27,6 @@ #include "llvm/Target/Mangler.h" #include "llvm/Support/FormattedStream.h" #include "llvm/ADT/SmallString.h" -#include "llvm/Type.h" using namespace llvm; X86MCInstLower::X86MCInstLower(Mangler *mang, const MachineFunction &mf, diff --git a/lib/Target/X86/X86RegisterInfo.cpp b/lib/Target/X86/X86RegisterInfo.cpp index 93e2744..b56025f 100644 --- a/lib/Target/X86/X86RegisterInfo.cpp +++ b/lib/Target/X86/X86RegisterInfo.cpp @@ -13,8 +13,8 @@ // //===----------------------------------------------------------------------===// -#include "X86.h" #include "X86RegisterInfo.h" +#include "X86.h" #include "X86InstrBuilder.h" #include "X86MachineFunctionInfo.h" #include "X86Subtarget.h" diff --git a/lib/Target/X86/X86Schedule.td b/lib/Target/X86/X86Schedule.td index d6d0149..17f4efd 100644 --- a/lib/Target/X86/X86Schedule.td +++ b/lib/Target/X86/X86Schedule.td @@ -114,6 +114,9 @@ def IIC_MOVZX : InstrItinClass; def IIC_MOVZX_R16_R8 : InstrItinClass; def IIC_MOVZX_R16_M8 : InstrItinClass; +def IIC_REP_MOVS : InstrItinClass; +def IIC_REP_STOS : InstrItinClass; + // SSE scalar/parallel binary operations def IIC_SSE_ALU_F32S_RR : InstrItinClass; def IIC_SSE_ALU_F32S_RM : InstrItinClass; @@ -250,6 +253,14 @@ def IIC_SSE_CVT_SS2SI64_RR : InstrItinClass; def IIC_SSE_CVT_SD2SI_RM : InstrItinClass; def IIC_SSE_CVT_SD2SI_RR : InstrItinClass; +def IIC_CMPX_LOCK : InstrItinClass; +def IIC_CMPX_LOCK_8 : InstrItinClass; +def IIC_CMPX_LOCK_8B : InstrItinClass; +def IIC_CMPX_LOCK_16B : InstrItinClass; + +def IIC_XADD_LOCK_MEM : InstrItinClass; +def IIC_XADD_LOCK_MEM8 : InstrItinClass; + //===----------------------------------------------------------------------===// // Processor instruction itineraries. diff --git a/lib/Target/X86/X86ScheduleAtom.td b/lib/Target/X86/X86ScheduleAtom.td index e8cf72a..77d4e56 100644 --- a/lib/Target/X86/X86ScheduleAtom.td +++ b/lib/Target/X86/X86ScheduleAtom.td @@ -144,6 +144,9 @@ def AtomItineraries : ProcessorItineraries< InstrItinData<IIC_MOVZX_R16_R8, [InstrStage<2, [Port0, Port1]>] >, InstrItinData<IIC_MOVZX_R16_M8, [InstrStage<3, [Port0, Port1]>] >, + InstrItinData<IIC_REP_MOVS, [InstrStage<75, [Port0, Port1]>] >, + InstrItinData<IIC_REP_STOS, [InstrStage<74, [Port0, Port1]>] >, + // SSE binary operations // arithmetic fp scalar InstrItinData<IIC_SSE_ALU_F32S_RR, [InstrStage<5, [Port1]>] >, @@ -289,6 +292,14 @@ def AtomItineraries : ProcessorItineraries< InstrItinData<IIC_SSE_CVT_SS2SI64_RR, [InstrStage<9, [Port0, Port1]>] >, InstrItinData<IIC_SSE_CVT_SS2SI64_RM, [InstrStage<10, [Port0, Port1]>] >, InstrItinData<IIC_SSE_CVT_SD2SI_RR, [InstrStage<8, [Port0, Port1]>] >, - InstrItinData<IIC_SSE_CVT_SD2SI_RM, [InstrStage<9, [Port0, Port1]>] > -]>; + InstrItinData<IIC_SSE_CVT_SD2SI_RM, [InstrStage<9, [Port0, Port1]>] >, + + InstrItinData<IIC_CMPX_LOCK, [InstrStage<14, [Port0, Port1]>] >, + InstrItinData<IIC_CMPX_LOCK_8, [InstrStage<6, [Port0, Port1]>] >, + InstrItinData<IIC_CMPX_LOCK_8B, [InstrStage<18, [Port0, Port1]>] >, + InstrItinData<IIC_CMPX_LOCK_16B, [InstrStage<22, [Port0, Port1]>] >, + + InstrItinData<IIC_XADD_LOCK_MEM, [InstrStage<2, [Port0, Port1]>] >, + InstrItinData<IIC_XADD_LOCK_MEM, [InstrStage<3, [Port0, Port1]>] > + ]>; diff --git a/lib/Target/X86/X86Subtarget.h b/lib/Target/X86/X86Subtarget.h index a36d0d8..7fd832b 100644 --- a/lib/Target/X86/X86Subtarget.h +++ b/lib/Target/X86/X86Subtarget.h @@ -14,9 +14,9 @@ #ifndef X86SUBTARGET_H #define X86SUBTARGET_H +#include "llvm/CallingConv.h" #include "llvm/ADT/Triple.h" #include "llvm/Target/TargetSubtargetInfo.h" -#include "llvm/CallingConv.h" #include <string> #define GET_SUBTARGETINFO_HEADER diff --git a/lib/Target/X86/X86TargetMachine.h b/lib/Target/X86/X86TargetMachine.h index 143caba..8e935af 100644 --- a/lib/Target/X86/X86TargetMachine.h +++ b/lib/Target/X86/X86TargetMachine.h @@ -28,7 +28,6 @@ namespace llvm { -class formatted_raw_ostream; class StringRef; class X86TargetMachine : public LLVMTargetMachine { diff --git a/lib/Target/X86/X86TargetObjectFile.h b/lib/Target/X86/X86TargetObjectFile.h index ceb7a4a..a02a368 100644 --- a/lib/Target/X86/X86TargetObjectFile.h +++ b/lib/Target/X86/X86TargetObjectFile.h @@ -15,7 +15,6 @@ #include "llvm/Target/TargetLoweringObjectFile.h" namespace llvm { - class X86TargetMachine; /// X8664_MachoTargetObjectFile - This TLOF implementation is used for Darwin /// x86-64. diff --git a/lib/Target/XCore/XCoreFrameLowering.cpp b/lib/Target/XCore/XCoreFrameLowering.cpp index 4d8ef74..50fda58 100644 --- a/lib/Target/XCore/XCoreFrameLowering.cpp +++ b/lib/Target/XCore/XCoreFrameLowering.cpp @@ -12,8 +12,8 @@ // //===----------------------------------------------------------------------===// -#include "XCore.h" #include "XCoreFrameLowering.h" +#include "XCore.h" #include "XCoreInstrInfo.h" #include "XCoreMachineFunctionInfo.h" #include "llvm/Function.h" diff --git a/lib/Target/XCore/XCoreISelLowering.cpp b/lib/Target/XCore/XCoreISelLowering.cpp index c2d2a5d..593cebc 100644 --- a/lib/Target/XCore/XCoreISelLowering.cpp +++ b/lib/Target/XCore/XCoreISelLowering.cpp @@ -1152,7 +1152,7 @@ XCoreTargetLowering::LowerCCCArguments(SDValue Chain, if (isVarArg) { /* Argument registers */ - static const unsigned ArgRegs[] = { + static const uint16_t ArgRegs[] = { XCore::R0, XCore::R1, XCore::R2, XCore::R3 }; XCoreFunctionInfo *XFI = MF.getInfo<XCoreFunctionInfo>(); diff --git a/lib/Target/XCore/XCoreISelLowering.h b/lib/Target/XCore/XCoreISelLowering.h index f5a6822..5cd3e67 100644 --- a/lib/Target/XCore/XCoreISelLowering.h +++ b/lib/Target/XCore/XCoreISelLowering.h @@ -15,9 +15,9 @@ #ifndef XCOREISELLOWERING_H #define XCOREISELLOWERING_H +#include "XCore.h" #include "llvm/CodeGen/SelectionDAG.h" #include "llvm/Target/TargetLowering.h" -#include "XCore.h" namespace llvm { diff --git a/lib/Target/XCore/XCoreInstrInfo.cpp b/lib/Target/XCore/XCoreInstrInfo.cpp index f930623..0a3008d 100644 --- a/lib/Target/XCore/XCoreInstrInfo.cpp +++ b/lib/Target/XCore/XCoreInstrInfo.cpp @@ -11,8 +11,8 @@ // //===----------------------------------------------------------------------===// -#include "XCoreMachineFunctionInfo.h" #include "XCoreInstrInfo.h" +#include "XCoreMachineFunctionInfo.h" #include "XCore.h" #include "llvm/MC/MCContext.h" #include "llvm/CodeGen/MachineInstrBuilder.h" diff --git a/lib/Target/XCore/XCoreInstrInfo.h b/lib/Target/XCore/XCoreInstrInfo.h index e47d212..42eeed8 100644 --- a/lib/Target/XCore/XCoreInstrInfo.h +++ b/lib/Target/XCore/XCoreInstrInfo.h @@ -14,8 +14,8 @@ #ifndef XCOREINSTRUCTIONINFO_H #define XCOREINSTRUCTIONINFO_H -#include "llvm/Target/TargetInstrInfo.h" #include "XCoreRegisterInfo.h" +#include "llvm/Target/TargetInstrInfo.h" #define GET_INSTRINFO_HEADER #include "XCoreGenInstrInfo.inc" diff --git a/lib/Target/XCore/XCoreRegisterInfo.cpp b/lib/Target/XCore/XCoreRegisterInfo.cpp index 8730282..f3b4b4c 100644 --- a/lib/Target/XCore/XCoreRegisterInfo.cpp +++ b/lib/Target/XCore/XCoreRegisterInfo.cpp @@ -14,6 +14,8 @@ #include "XCoreRegisterInfo.h" #include "XCoreMachineFunctionInfo.h" #include "XCore.h" +#include "llvm/Type.h" +#include "llvm/Function.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFrameInfo.h" @@ -24,8 +26,6 @@ #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetOptions.h" #include "llvm/Target/TargetInstrInfo.h" -#include "llvm/Type.h" -#include "llvm/Function.h" #include "llvm/ADT/BitVector.h" #include "llvm/ADT/STLExtras.h" #include "llvm/Support/Debug.h" @@ -54,20 +54,6 @@ static inline bool isImmU16(unsigned val) { return val < (1 << 16); } -static const unsigned XCore_ArgRegs[] = { - XCore::R0, XCore::R1, XCore::R2, XCore::R3 -}; - -const unsigned * XCoreRegisterInfo::getArgRegs(const MachineFunction *MF) -{ - return XCore_ArgRegs; -} - -unsigned XCoreRegisterInfo::getNumArgRegs(const MachineFunction *MF) -{ - return array_lengthof(XCore_ArgRegs); -} - bool XCoreRegisterInfo::needsFrameMoves(const MachineFunction &MF) { return MF.getMMI().hasDebugInfo() || MF.getFunction()->needsUnwindTableEntry(); diff --git a/lib/Target/XCore/XCoreRegisterInfo.h b/lib/Target/XCore/XCoreRegisterInfo.h index ab6ce56..7391cfd 100644 --- a/lib/Target/XCore/XCoreRegisterInfo.h +++ b/lib/Target/XCore/XCoreRegisterInfo.h @@ -62,15 +62,6 @@ public: // Debug information queries. unsigned getFrameRegister(const MachineFunction &MF) const; - //! Return the array of argument passing registers - /*! - \note The size of this array is returned by getArgRegsSize(). - */ - static const unsigned *getArgRegs(const MachineFunction *MF = 0); - - //! Return the size of the argument passing register array - static unsigned getNumArgRegs(const MachineFunction *MF = 0); - //! Return whether to emit frame moves static bool needsFrameMoves(const MachineFunction &MF); }; diff --git a/lib/Target/XCore/XCoreTargetMachine.h b/lib/Target/XCore/XCoreTargetMachine.h index 2c174f4..2546681 100644 --- a/lib/Target/XCore/XCoreTargetMachine.h +++ b/lib/Target/XCore/XCoreTargetMachine.h @@ -14,13 +14,13 @@ #ifndef XCORETARGETMACHINE_H #define XCORETARGETMACHINE_H -#include "llvm/Target/TargetMachine.h" -#include "llvm/Target/TargetData.h" #include "XCoreFrameLowering.h" #include "XCoreSubtarget.h" #include "XCoreInstrInfo.h" #include "XCoreISelLowering.h" #include "XCoreSelectionDAGInfo.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetData.h" namespace llvm { diff --git a/lib/Transforms/IPO/GlobalOpt.cpp b/lib/Transforms/IPO/GlobalOpt.cpp index 58ab567..a32e550 100644 --- a/lib/Transforms/IPO/GlobalOpt.cpp +++ b/lib/Transforms/IPO/GlobalOpt.cpp @@ -2561,11 +2561,6 @@ bool Evaluator::EvaluateBlock(BasicBlock::iterator CurInst, return false; delete ValueStack.pop_back_val(); InstResult = RetVal; - - if (InvokeInst *II = dyn_cast<InvokeInst>(CurInst)) { - NextBB = II->getNormalDest(); - return true; - } } } else if (isa<TerminatorInst>(CurInst)) { if (BranchInst *BI = dyn_cast<BranchInst>(CurInst)) { @@ -2582,8 +2577,7 @@ bool Evaluator::EvaluateBlock(BasicBlock::iterator CurInst, ConstantInt *Val = dyn_cast<ConstantInt>(getVal(SI->getCondition())); if (!Val) return false; // Cannot determine. - unsigned ValTISucc = SI->resolveSuccessorIndex(SI->findCaseValue(Val)); - NextBB = SI->getSuccessor(ValTISucc); + NextBB = SI->findCaseValue(Val).getCaseSuccessor(); } else if (IndirectBrInst *IBI = dyn_cast<IndirectBrInst>(CurInst)) { Value *Val = getVal(IBI->getAddress())->stripPointerCasts(); if (BlockAddress *BA = dyn_cast<BlockAddress>(Val)) @@ -2611,6 +2605,12 @@ bool Evaluator::EvaluateBlock(BasicBlock::iterator CurInst, setVal(CurInst, InstResult); } + // If we just processed an invoke, we finished evaluating the block. + if (InvokeInst *II = dyn_cast<InvokeInst>(CurInst)) { + NextBB = II->getNormalDest(); + return true; + } + // Advance program counter. ++CurInst; } diff --git a/lib/Transforms/IPO/InlineAlways.cpp b/lib/Transforms/IPO/InlineAlways.cpp index 749a3fa..3c7fac6 100644 --- a/lib/Transforms/IPO/InlineAlways.cpp +++ b/lib/Transforms/IPO/InlineAlways.cpp @@ -32,8 +32,6 @@ namespace { // AlwaysInliner only inlines functions that are mark as "always inline". class AlwaysInliner : public Inliner { - // Functions that are never inlined - SmallPtrSet<const Function*, 16> NeverInline; InlineCostAnalyzer CA; public: // Use extremely low threshold. @@ -46,7 +44,22 @@ namespace { } static char ID; // Pass identification, replacement for typeid InlineCost getInlineCost(CallSite CS) { - return CA.getInlineCost(CS, NeverInline); + Function *Callee = CS.getCalledFunction(); + // We assume indirect calls aren't calling an always-inline function. + if (!Callee) return InlineCost::getNever(); + + // We can't inline calls to external functions. + // FIXME: We shouldn't even get here. + if (Callee->isDeclaration()) return InlineCost::getNever(); + + // Return never for anything not marked as always inline. + if (!Callee->hasFnAttr(Attribute::AlwaysInline)) + return InlineCost::getNever(); + + // We still have to check the inline cost in case there are reasons to + // not inline which trump the always-inline attribute such as setjmp and + // indirectbr. + return CA.getInlineCost(CS); } float getInlineFudgeFactor(CallSite CS) { return CA.getInlineFudgeFactor(CS); @@ -58,7 +71,7 @@ namespace { CA.growCachedCostInfo(Caller, Callee); } virtual bool doFinalization(CallGraph &CG) { - return removeDeadFunctions(CG, &NeverInline); + return removeDeadFunctions(CG, /*AlwaysInlineOnly=*/true); } virtual bool doInitialization(CallGraph &CG); void releaseMemory() { @@ -84,12 +97,5 @@ Pass *llvm::createAlwaysInlinerPass(bool InsertLifetime) { // been annotated with the "always inline" attribute. bool AlwaysInliner::doInitialization(CallGraph &CG) { CA.setTargetData(getAnalysisIfAvailable<TargetData>()); - - Module &M = CG.getModule(); - - for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) - if (!I->isDeclaration() && !I->hasFnAttr(Attribute::AlwaysInline)) - NeverInline.insert(I); - return false; } diff --git a/lib/Transforms/IPO/InlineSimple.cpp b/lib/Transforms/IPO/InlineSimple.cpp index b3421eb..03032e6 100644 --- a/lib/Transforms/IPO/InlineSimple.cpp +++ b/lib/Transforms/IPO/InlineSimple.cpp @@ -23,15 +23,12 @@ #include "llvm/Transforms/IPO.h" #include "llvm/Transforms/IPO/InlinerPass.h" #include "llvm/Target/TargetData.h" -#include "llvm/ADT/SmallPtrSet.h" using namespace llvm; namespace { class SimpleInliner : public Inliner { - // Functions that are never inlined - SmallPtrSet<const Function*, 16> NeverInline; InlineCostAnalyzer CA; public: SimpleInliner() : Inliner(ID) { @@ -43,7 +40,7 @@ namespace { } static char ID; // Pass identification, replacement for typeid InlineCost getInlineCost(CallSite CS) { - return CA.getInlineCost(CS, NeverInline); + return CA.getInlineCost(CS); } float getInlineFudgeFactor(CallSite CS) { return CA.getInlineFudgeFactor(CS); @@ -78,44 +75,6 @@ Pass *llvm::createFunctionInliningPass(int Threshold) { // annotated with the noinline attribute. bool SimpleInliner::doInitialization(CallGraph &CG) { CA.setTargetData(getAnalysisIfAvailable<TargetData>()); - - Module &M = CG.getModule(); - - for (Module::iterator I = M.begin(), E = M.end(); - I != E; ++I) - if (!I->isDeclaration() && I->hasFnAttr(Attribute::NoInline)) - NeverInline.insert(I); - - // Get llvm.noinline - GlobalVariable *GV = M.getNamedGlobal("llvm.noinline"); - - if (GV == 0) - return false; - - // Don't crash on invalid code - if (!GV->hasDefinitiveInitializer()) - return false; - - const ConstantArray *InitList = dyn_cast<ConstantArray>(GV->getInitializer()); - - if (InitList == 0) - return false; - - // Iterate over each element and add to the NeverInline set - for (unsigned i = 0, e = InitList->getNumOperands(); i != e; ++i) { - - // Get Source - const Constant *Elt = InitList->getOperand(i); - - if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(Elt)) - if (CE->getOpcode() == Instruction::BitCast) - Elt = CE->getOperand(0); - - // Insert into set of functions to never inline - if (const Function *F = dyn_cast<Function>(Elt)) - NeverInline.insert(F); - } - return false; } diff --git a/lib/Transforms/IPO/Inliner.cpp b/lib/Transforms/IPO/Inliner.cpp index 1f7625d..9975333 100644 --- a/lib/Transforms/IPO/Inliner.cpp +++ b/lib/Transforms/IPO/Inliner.cpp @@ -19,6 +19,7 @@ #include "llvm/IntrinsicInst.h" #include "llvm/Analysis/CallGraph.h" #include "llvm/Analysis/InlineCost.h" +#include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Target/TargetData.h" #include "llvm/Transforms/IPO/InlinerPass.h" #include "llvm/Transforms/Utils/Cloning.h" @@ -243,13 +244,20 @@ bool Inliner::shouldInline(CallSite CS) { return false; } - // Try to detect the case where the current inlining candidate caller - // (call it B) is a static function and is an inlining candidate elsewhere, - // and the current candidate callee (call it C) is large enough that - // inlining it into B would make B too big to inline later. In these - // circumstances it may be best not to inline C into B, but to inline B - // into its callers. - if (Caller->hasLocalLinkage()) { + // Try to detect the case where the current inlining candidate caller (call + // it B) is a static or linkonce-ODR function and is an inlining candidate + // elsewhere, and the current candidate callee (call it C) is large enough + // that inlining it into B would make B too big to inline later. In these + // circumstances it may be best not to inline C into B, but to inline B into + // its callers. + // + // This only applies to static and linkonce-ODR functions because those are + // expected to be available for inlining in the translation units where they + // are used. Thus we will always have the opportunity to make local inlining + // decisions. Importantly the linkonce-ODR linkage covers inline functions + // and templates in C++. + if (Caller->hasLocalLinkage() || + Caller->getLinkage() == GlobalValue::LinkOnceODRLinkage) { int TotalSecondaryCost = 0; bool outerCallsFound = false; // This bool tracks what happens if we do NOT inline C into B. @@ -327,6 +335,37 @@ static bool InlineHistoryIncludes(Function *F, int InlineHistoryID, return false; } +/// \brief Simplify arguments going into a particular callsite. +/// +/// This is important to do each time we add a callsite due to inlining so that +/// constants and other entities which feed into inline cost estimation are +/// properly recognized when analyzing the new callsite. Consider: +/// void outer(int x) { +/// if (x < 42) +/// return inner(42 - x); +/// ... +/// } +/// void inner(int x) { +/// ... +/// } +/// +/// The inliner gives calls to 'outer' with a constant argument a bonus because +/// it will delete one side of a branch. But the resulting call to 'inner' +/// will, after inlining, also have a constant operand. We need to do just +/// enough constant folding to expose this for callsite arguments. The rest +/// will be taken care of after the inliner finishes running. +static void simplifyCallSiteArguments(const TargetData *TD, CallSite CS) { + // FIXME: It would be nice to avoid this smallvector if RAUW doesn't + // invalidate operand iterators in any cases. + SmallVector<std::pair<Value *, Value*>, 4> SimplifiedArgs; + for (CallSite::arg_iterator I = CS.arg_begin(), E = CS.arg_end(); + I != E; ++I) + if (Instruction *Inst = dyn_cast<Instruction>(*I)) + if (Value *SimpleArg = SimplifyInstruction(Inst, TD)) + SimplifiedArgs.push_back(std::make_pair(Inst, SimpleArg)); + for (unsigned Idx = 0, Size = SimplifiedArgs.size(); Idx != Size; ++Idx) + SimplifiedArgs[Idx].first->replaceAllUsesWith(SimplifiedArgs[Idx].second); +} bool Inliner::runOnSCC(CallGraphSCC &SCC) { CallGraph &CG = getAnalysis<CallGraph>(); @@ -455,7 +494,9 @@ bool Inliner::runOnSCC(CallGraphSCC &SCC) { for (unsigned i = 0, e = InlineInfo.InlinedCalls.size(); i != e; ++i) { Value *Ptr = InlineInfo.InlinedCalls[i]; - CallSites.push_back(std::make_pair(CallSite(Ptr), NewHistoryID)); + CallSite NewCS = Ptr; + simplifyCallSiteArguments(TD, NewCS); + CallSites.push_back(std::make_pair(NewCS, NewHistoryID)); } } @@ -515,25 +556,27 @@ bool Inliner::doFinalization(CallGraph &CG) { /// removeDeadFunctions - Remove dead functions that are not included in /// DNR (Do Not Remove) list. -bool Inliner::removeDeadFunctions(CallGraph &CG, - SmallPtrSet<const Function *, 16> *DNR) { - SmallPtrSet<CallGraphNode*, 16> FunctionsToRemove; +bool Inliner::removeDeadFunctions(CallGraph &CG, bool AlwaysInlineOnly) { + SmallVector<CallGraphNode*, 16> FunctionsToRemove; // Scan for all of the functions, looking for ones that should now be removed // from the program. Insert the dead ones in the FunctionsToRemove set. for (CallGraph::iterator I = CG.begin(), E = CG.end(); I != E; ++I) { CallGraphNode *CGN = I->second; - if (CGN->getFunction() == 0) - continue; - Function *F = CGN->getFunction(); - + if (!F || F->isDeclaration()) + continue; + + // Handle the case when this function is called and we only want to care + // about always-inline functions. This is a bit of a hack to share code + // between here and the InlineAlways pass. + if (AlwaysInlineOnly && !F->hasFnAttr(Attribute::AlwaysInline)) + continue; + // If the only remaining users of the function are dead constants, remove // them. F->removeDeadConstantUsers(); - if (DNR && DNR->count(F)) - continue; if (!F->isDefTriviallyDead()) continue; @@ -546,24 +589,28 @@ bool Inliner::removeDeadFunctions(CallGraph &CG, CG.getExternalCallingNode()->removeAnyCallEdgeTo(CGN); // Removing the node for callee from the call graph and delete it. - FunctionsToRemove.insert(CGN); + FunctionsToRemove.push_back(CGN); } + if (FunctionsToRemove.empty()) + return false; // Now that we know which functions to delete, do so. We didn't want to do // this inline, because that would invalidate our CallGraph::iterator // objects. :( // - // Note that it doesn't matter that we are iterating over a non-stable set + // Note that it doesn't matter that we are iterating over a non-stable order // here to do this, it doesn't matter which order the functions are deleted // in. - bool Changed = false; - for (SmallPtrSet<CallGraphNode*, 16>::iterator I = FunctionsToRemove.begin(), - E = FunctionsToRemove.end(); I != E; ++I) { + std::sort(FunctionsToRemove.begin(), FunctionsToRemove.end()); + FunctionsToRemove.erase(std::unique(FunctionsToRemove.begin(), + FunctionsToRemove.end()), + FunctionsToRemove.end()); + for (SmallVectorImpl<CallGraphNode *>::iterator I = FunctionsToRemove.begin(), + E = FunctionsToRemove.end(); + I != E; ++I) { resetCachedCostInfo((*I)->getFunction()); delete CG.removeFunctionFromModule(*I); ++NumDeleted; - Changed = true; } - - return Changed; + return true; } diff --git a/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp index cc8f5bf..1165660 100644 --- a/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp +++ b/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp @@ -1929,8 +1929,11 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) { } // Canonicalize xor to the RHS. - if (match(Op0, m_Xor(m_Value(), m_Value()))) + bool SwappedForXor = false; + if (match(Op0, m_Xor(m_Value(), m_Value()))) { std::swap(Op0, Op1); + SwappedForXor = true; + } // A | ( A ^ B) -> A | B // A | (~A ^ B) -> A | ~B @@ -1961,6 +1964,9 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) { return BinaryOperator::CreateOr(Not, Op0); } + if (SwappedForXor) + std::swap(Op0, Op1); + if (ICmpInst *RHS = dyn_cast<ICmpInst>(I.getOperand(1))) if (ICmpInst *LHS = dyn_cast<ICmpInst>(I.getOperand(0))) if (Value *Res = FoldOrOfICmps(LHS, RHS)) diff --git a/lib/Transforms/InstCombine/InstCombineWorklist.h b/lib/Transforms/InstCombine/InstCombineWorklist.h index 32009c3..99a02fc 100644 --- a/lib/Transforms/InstCombine/InstCombineWorklist.h +++ b/lib/Transforms/InstCombine/InstCombineWorklist.h @@ -55,9 +55,9 @@ public: Worklist.reserve(NumEntries+16); WorklistMap.resize(NumEntries); DEBUG(errs() << "IC: ADDING: " << NumEntries << " instrs to worklist\n"); - for (; NumEntries; --NumEntries) { + for (unsigned Idx = 0; NumEntries; --NumEntries) { Instruction *I = List[NumEntries-1]; - WorklistMap.insert(std::make_pair(I, Worklist.size())); + WorklistMap.insert(std::make_pair(I, Idx++)); Worklist.push_back(I); } } diff --git a/lib/Transforms/InstCombine/InstructionCombining.cpp b/lib/Transforms/InstCombine/InstructionCombining.cpp index 318256a..349ba83 100644 --- a/lib/Transforms/InstCombine/InstructionCombining.cpp +++ b/lib/Transforms/InstCombine/InstructionCombining.cpp @@ -1245,15 +1245,15 @@ Instruction *InstCombiner::visitSwitchInst(SwitchInst &SI) { if (I->getOpcode() == Instruction::Add) if (ConstantInt *AddRHS = dyn_cast<ConstantInt>(I->getOperand(1))) { // change 'switch (X+4) case 1:' into 'switch (X) case -3' - unsigned NumCases = SI.getNumCases(); // Skip the first item since that's the default case. - for (unsigned i = 0; i < NumCases; ++i) { - ConstantInt* CaseVal = SI.getCaseValue(i); + for (SwitchInst::CaseIt i = SI.case_begin(), e = SI.case_end(); + i != e; ++i) { + ConstantInt* CaseVal = i.getCaseValue(); Constant* NewCaseVal = ConstantExpr::getSub(cast<Constant>(CaseVal), AddRHS); assert(isa<ConstantInt>(NewCaseVal) && "Result of expression should be constant"); - SI.setCaseValue(i, cast<ConstantInt>(NewCaseVal)); + i.setValue(cast<ConstantInt>(NewCaseVal)); } SI.setCondition(I->getOperand(0)); Worklist.Add(I); @@ -1873,9 +1873,10 @@ static bool AddReachableCodeToWorklist(BasicBlock *BB, } else if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) { if (ConstantInt *Cond = dyn_cast<ConstantInt>(SI->getCondition())) { // See if this is an explicit destination. - for (unsigned i = 0, e = SI->getNumCases(); i != e; ++i) - if (SI->getCaseValue(i) == Cond) { - BasicBlock *ReachableBB = SI->getCaseSuccessor(i); + for (SwitchInst::CaseIt i = SI->case_begin(), e = SI->case_end(); + i != e; ++i) + if (i.getCaseValue() == Cond) { + BasicBlock *ReachableBB = i.getCaseSuccessor(); Worklist.push_back(ReachableBB); continue; } diff --git a/lib/Transforms/Instrumentation/AddressSanitizer.cpp b/lib/Transforms/Instrumentation/AddressSanitizer.cpp index 123e399..b43b9e5 100644 --- a/lib/Transforms/Instrumentation/AddressSanitizer.cpp +++ b/lib/Transforms/Instrumentation/AddressSanitizer.cpp @@ -15,6 +15,7 @@ #define DEBUG_TYPE "asan" +#include "FunctionBlackList.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/OwningPtr.h" #include "llvm/ADT/SmallSet.h" @@ -29,8 +30,6 @@ #include "llvm/Support/DataTypes.h" #include "llvm/Support/Debug.h" #include "llvm/Support/IRBuilder.h" -#include "llvm/Support/MemoryBuffer.h" -#include "llvm/Support/Regex.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Support/system_error.h" #include "llvm/Target/TargetData.h" @@ -126,21 +125,6 @@ static cl::opt<int> ClDebugMax("asan-debug-max", cl::desc("Debug man inst"), namespace { -// Blacklisted functions are not instrumented. -// The blacklist file contains one or more lines like this: -// --- -// fun:FunctionWildCard -// --- -// This is similar to the "ignore" feature of ThreadSanitizer. -// http://code.google.com/p/data-race-test/wiki/ThreadSanitizerIgnores -class BlackList { - public: - BlackList(const std::string &Path); - bool isIn(const Function &F); - private: - Regex *Functions; -}; - /// AddressSanitizer: instrument the code in module to find memory bugs. struct AddressSanitizer : public ModulePass { AddressSanitizer(); @@ -195,7 +179,7 @@ struct AddressSanitizer : public ModulePass { Function *AsanCtorFunction; Function *AsanInitFunction; Instruction *CtorInsertBefore; - OwningPtr<BlackList> BL; + OwningPtr<FunctionBlackList> BL; }; } // namespace @@ -470,7 +454,7 @@ bool AddressSanitizer::insertGlobalRedzones(Module &M) { GlobalVariable *G = GlobalsToChange[i]; PointerType *PtrTy = cast<PointerType>(G->getType()); Type *Ty = PtrTy->getElementType(); - uint64_t SizeInBytes = TD->getTypeStoreSizeInBits(Ty) / 8; + uint64_t SizeInBytes = TD->getTypeAllocSize(Ty); uint64_t RightRedzoneSize = RedzoneSize + (RedzoneSize - (SizeInBytes % RedzoneSize)); Type *RightRedZoneTy = ArrayType::get(IRB.getInt8Ty(), RightRedzoneSize); @@ -551,7 +535,7 @@ bool AddressSanitizer::runOnModule(Module &M) { TD = getAnalysisIfAvailable<TargetData>(); if (!TD) return false; - BL.reset(new BlackList(ClBlackListFile)); + BL.reset(new FunctionBlackList(ClBlackListFile)); CurrentModule = &M; C = &(M.getContext()); @@ -595,18 +579,23 @@ bool AddressSanitizer::runOnModule(Module &M) { if (ClGlobals) Res |= insertGlobalRedzones(M); - // Tell the run-time the current values of mapping offset and scale. - GlobalValue *asan_mapping_offset = - new GlobalVariable(M, IntptrTy, true, GlobalValue::LinkOnceODRLinkage, - ConstantInt::get(IntptrTy, MappingOffset), - kAsanMappingOffsetName); - GlobalValue *asan_mapping_scale = - new GlobalVariable(M, IntptrTy, true, GlobalValue::LinkOnceODRLinkage, - ConstantInt::get(IntptrTy, MappingScale), - kAsanMappingScaleName); - // Read these globals, otherwise they may be optimized away. - IRB.CreateLoad(asan_mapping_scale, true); - IRB.CreateLoad(asan_mapping_offset, true); + if (ClMappingOffsetLog >= 0) { + // Tell the run-time the current values of mapping offset and scale. + GlobalValue *asan_mapping_offset = + new GlobalVariable(M, IntptrTy, true, GlobalValue::LinkOnceODRLinkage, + ConstantInt::get(IntptrTy, MappingOffset), + kAsanMappingOffsetName); + // Read the global, otherwise it may be optimized away. + IRB.CreateLoad(asan_mapping_offset, true); + } + if (ClMappingScale) { + GlobalValue *asan_mapping_scale = + new GlobalVariable(M, IntptrTy, true, GlobalValue::LinkOnceODRLinkage, + ConstantInt::get(IntptrTy, MappingScale), + kAsanMappingScaleName); + // Read the global, otherwise it may be optimized away. + IRB.CreateLoad(asan_mapping_scale, true); + } for (Module::iterator F = M.begin(), E = M.end(); F != E; ++F) { @@ -946,54 +935,3 @@ bool AddressSanitizer::poisonStackInFunction(Module &M, Function &F) { return true; } - -BlackList::BlackList(const std::string &Path) { - Functions = NULL; - const char *kFunPrefix = "fun:"; - if (!ClBlackListFile.size()) return; - std::string Fun; - - OwningPtr<MemoryBuffer> File; - if (error_code EC = MemoryBuffer::getFile(ClBlackListFile.c_str(), File)) { - report_fatal_error("Can't open blacklist file " + ClBlackListFile + ": " + - EC.message()); - } - MemoryBuffer *Buff = File.take(); - const char *Data = Buff->getBufferStart(); - size_t DataLen = Buff->getBufferSize(); - SmallVector<StringRef, 16> Lines; - SplitString(StringRef(Data, DataLen), Lines, "\n\r"); - for (size_t i = 0, numLines = Lines.size(); i < numLines; i++) { - if (Lines[i].startswith(kFunPrefix)) { - std::string ThisFunc = Lines[i].substr(strlen(kFunPrefix)); - std::string ThisFuncRE; - // add ThisFunc replacing * with .* - for (size_t j = 0, n = ThisFunc.size(); j < n; j++) { - if (ThisFunc[j] == '*') - ThisFuncRE += '.'; - ThisFuncRE += ThisFunc[j]; - } - // Check that the regexp is valid. - Regex CheckRE(ThisFuncRE); - std::string Error; - if (!CheckRE.isValid(Error)) - report_fatal_error("malformed blacklist regex: " + ThisFunc + - ": " + Error); - // Append to the final regexp. - if (Fun.size()) - Fun += "|"; - Fun += ThisFuncRE; - } - } - if (Fun.size()) { - Functions = new Regex(Fun); - } -} - -bool BlackList::isIn(const Function &F) { - if (Functions) { - bool Res = Functions->match(F.getName()); - return Res; - } - return false; -} diff --git a/lib/Transforms/Instrumentation/CMakeLists.txt b/lib/Transforms/Instrumentation/CMakeLists.txt index f8dbca3..e4c8cf1 100644 --- a/lib/Transforms/Instrumentation/CMakeLists.txt +++ b/lib/Transforms/Instrumentation/CMakeLists.txt @@ -1,6 +1,7 @@ add_llvm_library(LLVMInstrumentation AddressSanitizer.cpp EdgeProfiling.cpp + FunctionBlackList.cpp GCOVProfiling.cpp Instrumentation.cpp OptimalEdgeProfiling.cpp diff --git a/lib/Transforms/Instrumentation/FunctionBlackList.cpp b/lib/Transforms/Instrumentation/FunctionBlackList.cpp new file mode 100644 index 0000000..188ea4d --- /dev/null +++ b/lib/Transforms/Instrumentation/FunctionBlackList.cpp @@ -0,0 +1,79 @@ +//===-- FunctionBlackList.cpp - blacklist of functions --------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This is a utility class for instrumentation passes (like AddressSanitizer +// or ThreadSanitizer) to avoid instrumenting some functions based on +// user-supplied blacklist. +// +//===----------------------------------------------------------------------===// + +#include "FunctionBlackList.h" +#include "llvm/ADT/OwningPtr.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/Function.h" +#include "llvm/Support/MemoryBuffer.h" +#include "llvm/Support/Regex.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Support/system_error.h" + +namespace llvm { + +FunctionBlackList::FunctionBlackList(const std::string &Path) { + Functions = NULL; + const char *kFunPrefix = "fun:"; + if (!Path.size()) return; + std::string Fun; + + OwningPtr<MemoryBuffer> File; + if (error_code EC = MemoryBuffer::getFile(Path.c_str(), File)) { + report_fatal_error("Can't open blacklist file " + Path + ": " + + EC.message()); + } + MemoryBuffer *Buff = File.take(); + const char *Data = Buff->getBufferStart(); + size_t DataLen = Buff->getBufferSize(); + SmallVector<StringRef, 16> Lines; + SplitString(StringRef(Data, DataLen), Lines, "\n\r"); + for (size_t i = 0, numLines = Lines.size(); i < numLines; i++) { + if (Lines[i].startswith(kFunPrefix)) { + std::string ThisFunc = Lines[i].substr(strlen(kFunPrefix)); + std::string ThisFuncRE; + // add ThisFunc replacing * with .* + for (size_t j = 0, n = ThisFunc.size(); j < n; j++) { + if (ThisFunc[j] == '*') + ThisFuncRE += '.'; + ThisFuncRE += ThisFunc[j]; + } + // Check that the regexp is valid. + Regex CheckRE(ThisFuncRE); + std::string Error; + if (!CheckRE.isValid(Error)) + report_fatal_error("malformed blacklist regex: " + ThisFunc + + ": " + Error); + // Append to the final regexp. + if (Fun.size()) + Fun += "|"; + Fun += ThisFuncRE; + } + } + if (Fun.size()) { + Functions = new Regex(Fun); + } +} + +bool FunctionBlackList::isIn(const Function &F) { + if (Functions) { + bool Res = Functions->match(F.getName()); + return Res; + } + return false; +} + +} // namespace llvm diff --git a/lib/Transforms/Instrumentation/FunctionBlackList.h b/lib/Transforms/Instrumentation/FunctionBlackList.h new file mode 100644 index 0000000..c1239b9 --- /dev/null +++ b/lib/Transforms/Instrumentation/FunctionBlackList.h @@ -0,0 +1,37 @@ +//===-- FunctionBlackList.cpp - blacklist of functions ----------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +//===----------------------------------------------------------------------===// +// +// This is a utility class for instrumentation passes (like AddressSanitizer +// or ThreadSanitizer) to avoid instrumenting some functions based on +// user-supplied blacklist. +// +//===----------------------------------------------------------------------===// +// + +#include <string> + +namespace llvm { +class Function; +class Regex; + +// Blacklisted functions are not instrumented. +// The blacklist file contains one or more lines like this: +// --- +// fun:FunctionWildCard +// --- +// This is similar to the "ignore" feature of ThreadSanitizer. +// http://code.google.com/p/data-race-test/wiki/ThreadSanitizerIgnores +class FunctionBlackList { + public: + FunctionBlackList(const std::string &Path); + bool isIn(const Function &F); + private: + Regex *Functions; +}; + +} // namespace llvm diff --git a/lib/Transforms/Instrumentation/ThreadSanitizer.cpp b/lib/Transforms/Instrumentation/ThreadSanitizer.cpp index d822535..85fda30 100644 --- a/lib/Transforms/Instrumentation/ThreadSanitizer.cpp +++ b/lib/Transforms/Instrumentation/ThreadSanitizer.cpp @@ -21,12 +21,14 @@ #define DEBUG_TYPE "tsan" +#include "FunctionBlackList.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringExtras.h" #include "llvm/Intrinsics.h" #include "llvm/Function.h" #include "llvm/Module.h" +#include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/IRBuilder.h" #include "llvm/Support/MathExtras.h" @@ -37,6 +39,9 @@ using namespace llvm; +static cl::opt<std::string> ClBlackListFile("tsan-blacklist", + cl::desc("Blacklist file"), cl::Hidden); + namespace { /// ThreadSanitizer: instrument the code in module to find races. struct ThreadSanitizer : public FunctionPass { @@ -48,6 +53,7 @@ struct ThreadSanitizer : public FunctionPass { private: TargetData *TD; + OwningPtr<FunctionBlackList> BL; // Callbacks to run-time library are computed in doInitialization. Value *TsanFuncEntry; Value *TsanFuncExit; @@ -76,6 +82,8 @@ bool ThreadSanitizer::doInitialization(Module &M) { TD = getAnalysisIfAvailable<TargetData>(); if (!TD) return false; + BL.reset(new FunctionBlackList(ClBlackListFile)); + // Always insert a call to __tsan_init into the module's CTORs. IRBuilder<> IRB(M.getContext()); Value *TsanInit = M.getOrInsertFunction("__tsan_init", @@ -102,6 +110,7 @@ bool ThreadSanitizer::doInitialization(Module &M) { bool ThreadSanitizer::runOnFunction(Function &F) { if (!TD) return false; + if (BL->isIn(F)) return false; SmallVector<Instruction*, 8> RetVec; SmallVector<Instruction*, 8> LoadsAndStores; bool Res = false; diff --git a/lib/Transforms/Scalar/CodeGenPrepare.cpp b/lib/Transforms/Scalar/CodeGenPrepare.cpp index aad3a92..020ec57 100644 --- a/lib/Transforms/Scalar/CodeGenPrepare.cpp +++ b/lib/Transforms/Scalar/CodeGenPrepare.cpp @@ -579,6 +579,15 @@ bool CodeGenPrepare::OptimizeCallInst(CallInst *CI) { return true; } + if (II && TLI) { + SmallVector<Value*, 2> PtrOps; + Type *AccessTy; + if (TLI->GetAddrModeArguments(II, PtrOps, AccessTy)) + while (!PtrOps.empty()) + if (OptimizeMemoryInst(II, PtrOps.pop_back_val(), AccessTy)) + return true; + } + // From here on out we're working with named functions. if (CI->getCalledFunction() == 0) return false; diff --git a/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp b/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp index e275268..9b0aadb 100644 --- a/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp +++ b/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp @@ -28,6 +28,7 @@ STATISTIC(NumPhis, "Number of phis propagated"); STATISTIC(NumSelects, "Number of selects propagated"); STATISTIC(NumMemAccess, "Number of memory access targets propagated"); STATISTIC(NumCmps, "Number of comparisons propagated"); +STATISTIC(NumDeadCases, "Number of switch cases removed"); namespace { class CorrelatedValuePropagation : public FunctionPass { @@ -37,6 +38,7 @@ namespace { bool processPHI(PHINode *P); bool processMemAccess(Instruction *I); bool processCmp(CmpInst *C); + bool processSwitch(SwitchInst *SI); public: static char ID; @@ -110,7 +112,8 @@ bool CorrelatedValuePropagation::processPHI(PHINode *P) { Changed = true; } - ++NumPhis; + if (Changed) + ++NumPhis; return Changed; } @@ -173,6 +176,86 @@ bool CorrelatedValuePropagation::processCmp(CmpInst *C) { return true; } +/// processSwitch - Simplify a switch instruction by removing cases which can +/// never fire. If the uselessness of a case could be determined locally then +/// constant propagation would already have figured it out. Instead, walk the +/// predecessors and statically evaluate cases based on information available +/// on that edge. Cases that cannot fire no matter what the incoming edge can +/// safely be removed. If a case fires on every incoming edge then the entire +/// switch can be removed and replaced with a branch to the case destination. +bool CorrelatedValuePropagation::processSwitch(SwitchInst *SI) { + Value *Cond = SI->getCondition(); + BasicBlock *BB = SI->getParent(); + + // If the condition was defined in same block as the switch then LazyValueInfo + // currently won't say anything useful about it, though in theory it could. + if (isa<Instruction>(Cond) && cast<Instruction>(Cond)->getParent() == BB) + return false; + + // If the switch is unreachable then trying to improve it is a waste of time. + pred_iterator PB = pred_begin(BB), PE = pred_end(BB); + if (PB == PE) return false; + + // Analyse each switch case in turn. This is done in reverse order so that + // removing a case doesn't cause trouble for the iteration. + bool Changed = false; + for (SwitchInst::CaseIt CI = SI->case_end(), CE = SI->case_begin(); CI-- != CE; + ) { + ConstantInt *Case = CI.getCaseValue(); + + // Check to see if the switch condition is equal to/not equal to the case + // value on every incoming edge, equal/not equal being the same each time. + LazyValueInfo::Tristate State = LazyValueInfo::Unknown; + for (pred_iterator PI = PB; PI != PE; ++PI) { + // Is the switch condition equal to the case value? + LazyValueInfo::Tristate Value = LVI->getPredicateOnEdge(CmpInst::ICMP_EQ, + Cond, Case, *PI, BB); + // Give up on this case if nothing is known. + if (Value == LazyValueInfo::Unknown) { + State = LazyValueInfo::Unknown; + break; + } + + // If this was the first edge to be visited, record that all other edges + // need to give the same result. + if (PI == PB) { + State = Value; + continue; + } + + // If this case is known to fire for some edges and known not to fire for + // others then there is nothing we can do - give up. + if (Value != State) { + State = LazyValueInfo::Unknown; + break; + } + } + + if (State == LazyValueInfo::False) { + // This case never fires - remove it. + CI.getCaseSuccessor()->removePredecessor(BB); + SI->removeCase(CI); // Does not invalidate the iterator. + ++NumDeadCases; + Changed = true; + } else if (State == LazyValueInfo::True) { + // This case always fires. Arrange for the switch to be turned into an + // unconditional branch by replacing the switch condition with the case + // value. + SI->setCondition(Case); + NumDeadCases += SI->getNumCases(); + Changed = true; + break; + } + } + + if (Changed) + // If the switch has been simplified to the point where it can be replaced + // by a branch then do so now. + ConstantFoldTerminator(BB); + + return Changed; +} + bool CorrelatedValuePropagation::runOnFunction(Function &F) { LVI = &getAnalysis<LazyValueInfo>(); @@ -200,6 +283,13 @@ bool CorrelatedValuePropagation::runOnFunction(Function &F) { } } + Instruction *Term = FI->getTerminator(); + switch (Term->getOpcode()) { + case Instruction::Switch: + BBChanged |= processSwitch(cast<SwitchInst>(Term)); + break; + } + FnChanged |= BBChanged; } diff --git a/lib/Transforms/Scalar/GVN.cpp b/lib/Transforms/Scalar/GVN.cpp index fe05e35..ac80c48 100644 --- a/lib/Transforms/Scalar/GVN.cpp +++ b/lib/Transforms/Scalar/GVN.cpp @@ -2158,10 +2158,11 @@ bool GVN::processInstruction(Instruction *I) { Value *SwitchCond = SI->getCondition(); BasicBlock *Parent = SI->getParent(); bool Changed = false; - for (unsigned i = 0, e = SI->getNumCases(); i != e; ++i) { - BasicBlock *Dst = SI->getCaseSuccessor(i); + for (SwitchInst::CaseIt i = SI->case_begin(), e = SI->case_end(); + i != e; ++i) { + BasicBlock *Dst = i.getCaseSuccessor(); if (isOnlyReachableViaThisEdge(Parent, Dst, DT)) - Changed |= propagateEquality(SwitchCond, SI->getCaseValue(i), Dst); + Changed |= propagateEquality(SwitchCond, i.getCaseValue(), Dst); } return Changed; } diff --git a/lib/Transforms/Scalar/IndVarSimplify.cpp b/lib/Transforms/Scalar/IndVarSimplify.cpp index d1e57e1..490617a 100644 --- a/lib/Transforms/Scalar/IndVarSimplify.cpp +++ b/lib/Transforms/Scalar/IndVarSimplify.cpp @@ -450,8 +450,10 @@ void IndVarSimplify::HandleFloatingPointIV(Loop *L, PHINode *PN) { } // Add a new IVUsers entry for the newly-created integer PHI. - if (IU) - IU->AddUsersIfInteresting(NewPHI); + if (IU) { + SmallPtrSet<Loop*, 16> SimplifiedLoopNests; + IU->AddUsersIfInteresting(NewPHI, SimplifiedLoopNests); + } Changed = true; } @@ -1967,8 +1969,11 @@ bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) { // loop exit test instruction. if (IU && NewICmp) { ICmpInst *NewICmpInst = dyn_cast<ICmpInst>(NewICmp); - if (NewICmpInst) - IU->AddUsersIfInteresting(cast<Instruction>(NewICmpInst->getOperand(0))); + if (NewICmpInst) { + SmallPtrSet<Loop*, 16> SimplifiedLoopNests; + IU->AddUsersIfInteresting(cast<Instruction>(NewICmpInst->getOperand(0)), + SimplifiedLoopNests); + } } // Clean up dead instructions. Changed |= DeleteDeadPHIs(L->getHeader()); diff --git a/lib/Transforms/Scalar/JumpThreading.cpp b/lib/Transforms/Scalar/JumpThreading.cpp index fa25a8f..429b61b 100644 --- a/lib/Transforms/Scalar/JumpThreading.cpp +++ b/lib/Transforms/Scalar/JumpThreading.cpp @@ -857,6 +857,9 @@ bool JumpThreading::SimplifyPartiallyRedundantLoad(LoadInst *LI) { if (BBIt != LoadBB->begin()) return false; + // If all of the loads and stores that feed the value have the same TBAA tag, + // then we can propagate it onto any newly inserted loads. + MDNode *TBAATag = LI->getMetadata(LLVMContext::MD_tbaa); SmallPtrSet<BasicBlock*, 8> PredsScanned; typedef SmallVector<std::pair<BasicBlock*, Value*>, 8> AvailablePredsTy; @@ -875,11 +878,16 @@ bool JumpThreading::SimplifyPartiallyRedundantLoad(LoadInst *LI) { // Scan the predecessor to see if the value is available in the pred. BBIt = PredBB->end(); - Value *PredAvailable = FindAvailableLoadedValue(LoadedPtr, PredBB, BBIt, 6); + MDNode *ThisTBAATag = 0; + Value *PredAvailable = FindAvailableLoadedValue(LoadedPtr, PredBB, BBIt, 6, + 0, &ThisTBAATag); if (!PredAvailable) { OneUnavailablePred = PredBB; continue; } + + // If tbaa tags disagree or are not present, forget about them. + if (TBAATag != ThisTBAATag) TBAATag = 0; // If so, this load is partially redundant. Remember this info so that we // can create a PHI node. @@ -939,6 +947,9 @@ bool JumpThreading::SimplifyPartiallyRedundantLoad(LoadInst *LI) { LI->getAlignment(), UnavailablePred->getTerminator()); NewVal->setDebugLoc(LI->getDebugLoc()); + if (TBAATag) + NewVal->setMetadata(LLVMContext::MD_tbaa, TBAATag); + AvailablePreds.push_back(std::make_pair(UnavailablePred, NewVal)); } @@ -1087,8 +1098,7 @@ bool JumpThreading::ProcessThreadableEdges(Value *Cond, BasicBlock *BB, else if (BranchInst *BI = dyn_cast<BranchInst>(BB->getTerminator())) DestBB = BI->getSuccessor(cast<ConstantInt>(Val)->isZero()); else if (SwitchInst *SI = dyn_cast<SwitchInst>(BB->getTerminator())) { - unsigned ValCase = SI->findCaseValue(cast<ConstantInt>(Val)); - DestBB = SI->getSuccessor(SI->resolveSuccessorIndex(ValCase)); + DestBB = SI->findCaseValue(cast<ConstantInt>(Val)).getCaseSuccessor(); } else { assert(isa<IndirectBrInst>(BB->getTerminator()) && "Unexpected terminator"); diff --git a/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/lib/Transforms/Scalar/LoopStrengthReduce.cpp index 6768860..82d918e 100644 --- a/lib/Transforms/Scalar/LoopStrengthReduce.cpp +++ b/lib/Transforms/Scalar/LoopStrengthReduce.cpp @@ -4534,22 +4534,25 @@ LSRInstance::LSRInstance(const TargetLowering *tli, Loop *l, Pass *P) if (!L->isLoopSimplifyForm()) return; + // If there's no interesting work to be done, bail early. + if (IU.empty()) return; + +#ifndef NDEBUG // All dominating loops must have preheaders, or SCEVExpander may not be able // to materialize an AddRecExpr whose Start is an outer AddRecExpr. // - // FIXME: This is a little absurd. I think LoopSimplify should be taught - // to create a preheader under any circumstance. + // IVUsers analysis should only create users that are dominated by simple loop + // headers. Since this loop should dominate all of its users, its user list + // should be empty if this loop itself is not within a simple loop nest. for (DomTreeNode *Rung = DT.getNode(L->getLoopPreheader()); Rung; Rung = Rung->getIDom()) { BasicBlock *BB = Rung->getBlock(); const Loop *DomLoop = LI.getLoopFor(BB); if (DomLoop && DomLoop->getHeader() == BB) { - if (!DomLoop->getLoopPreheader()) - return; + assert(DomLoop->getLoopPreheader() && "LSR needs a simplified loop nest"); } } - // If there's no interesting work to be done, bail early. - if (IU.empty()) return; +#endif // DEBUG DEBUG(dbgs() << "\nLSR on loop "; WriteAsOperand(dbgs(), L->getHeader(), /*PrintType=*/false); diff --git a/lib/Transforms/Scalar/LoopUnswitch.cpp b/lib/Transforms/Scalar/LoopUnswitch.cpp index 2c75f63..053eb0c 100644 --- a/lib/Transforms/Scalar/LoopUnswitch.cpp +++ b/lib/Transforms/Scalar/LoopUnswitch.cpp @@ -32,7 +32,7 @@ #include "llvm/DerivedTypes.h" #include "llvm/Function.h" #include "llvm/Instructions.h" -#include "llvm/Analysis/InlineCost.h" +#include "llvm/Analysis/CodeMetrics.h" #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/LoopPass.h" @@ -445,8 +445,9 @@ bool LoopUnswitch::processCurrentLoop() { // Do not process same value again and again. // At this point we have some cases already unswitched and // some not yet unswitched. Let's find the first not yet unswitched one. - for (unsigned i = 0; i < NumCases; ++i) { - Constant* UnswitchValCandidate = SI->getCaseValue(i); + for (SwitchInst::CaseIt i = SI->case_begin(), e = SI->case_end(); + i != e; ++i) { + Constant* UnswitchValCandidate = i.getCaseValue(); if (!BranchesInfo.isUnswitched(SI, UnswitchValCandidate)) { UnswitchVal = UnswitchValCandidate; break; @@ -574,12 +575,13 @@ bool LoopUnswitch::IsTrivialUnswitchCondition(Value *Cond, Constant **Val, // this. // Note that we can't trivially unswitch on the default case or // on already unswitched cases. - for (unsigned i = 0, e = SI->getNumCases(); i != e; ++i) { + for (SwitchInst::CaseIt i = SI->case_begin(), e = SI->case_end(); + i != e; ++i) { BasicBlock* LoopExitCandidate; if ((LoopExitCandidate = isTrivialLoopExitBlock(currentLoop, - SI->getCaseSuccessor(i)))) { + i.getCaseSuccessor()))) { // Okay, we found a trivial case, remember the value that is trivial. - ConstantInt* CaseVal = SI->getCaseValue(i); + ConstantInt* CaseVal = i.getCaseValue(); // Check that it was not unswitched before, since already unswitched // trivial vals are looks trivial too. @@ -1117,16 +1119,16 @@ void LoopUnswitch::RewriteLoopBodyWithConditionConstant(Loop *L, Value *LIC, SwitchInst *SI = dyn_cast<SwitchInst>(U); if (SI == 0 || !isa<ConstantInt>(Val)) continue; - unsigned DeadCase = SI->findCaseValue(cast<ConstantInt>(Val)); + SwitchInst::CaseIt DeadCase = SI->findCaseValue(cast<ConstantInt>(Val)); // Default case is live for multiple values. - if (DeadCase == SwitchInst::ErrorIndex) continue; + if (DeadCase == SI->case_default()) continue; // Found a dead case value. Don't remove PHI nodes in the // successor if they become single-entry, those PHI nodes may // be in the Users list. BasicBlock *Switch = SI->getParent(); - BasicBlock *SISucc = SI->getCaseSuccessor(DeadCase); + BasicBlock *SISucc = DeadCase.getCaseSuccessor(); BasicBlock *Latch = L->getLoopLatch(); BranchesInfo.setUnswitched(SI, Val); @@ -1146,7 +1148,7 @@ void LoopUnswitch::RewriteLoopBodyWithConditionConstant(Loop *L, Value *LIC, // Compute the successors instead of relying on the return value // of SplitEdge, since it may have split the switch successor // after PHI nodes. - BasicBlock *NewSISucc = SI->getCaseSuccessor(DeadCase); + BasicBlock *NewSISucc = DeadCase.getCaseSuccessor(); BasicBlock *OldSISucc = *succ_begin(NewSISucc); // Create an "unreachable" destination. BasicBlock *Abort = BasicBlock::Create(Context, "us-unreachable", diff --git a/lib/Transforms/Scalar/ObjCARC.cpp b/lib/Transforms/Scalar/ObjCARC.cpp index 1c7f036..9fdea8d 100644 --- a/lib/Transforms/Scalar/ObjCARC.cpp +++ b/lib/Transforms/Scalar/ObjCARC.cpp @@ -2929,11 +2929,17 @@ ComputePostOrders(Function &F, Visited.clear(); // Compute the exits, which are the starting points for reverse-CFG DFS. + // This includes blocks where all the successors are backedges that + // we're skipping. SmallVector<BasicBlock *, 4> Exits; for (Function::iterator I = F.begin(), E = F.end(); I != E; ++I) { BasicBlock *BB = I; - if (cast<TerminatorInst>(&BB->back())->getNumSuccessors() == 0) - Exits.push_back(BB); + TerminatorInst *TI = cast<TerminatorInst>(&BB->back()); + for (succ_iterator SI(TI), SE(TI, true); SI != SE; ++SI) + if (!Backedges.count(std::make_pair(BB, *SI))) + goto HasNonBackedgeSucc; + Exits.push_back(BB); + HasNonBackedgeSucc:; } // Do reverse-CFG DFS, computing the reverse-CFG PostOrder. @@ -3035,7 +3041,8 @@ void ObjCARCOpt::MoveCalls(Value *Arg, // but our releases will never depend on it, because they must be // paired with retains from before the invoke. InsertPts[0] = II->getNormalDest()->getFirstInsertionPt(); - InsertPts[1] = II->getUnwindDest()->getFirstInsertionPt(); + if (!II->getMetadata(NoObjCARCExceptionsMDKind)) + InsertPts[1] = II->getUnwindDest()->getFirstInsertionPt(); } else { // Insert code immediately after the last use. InsertPts[0] = llvm::next(BasicBlock::iterator(LastUse)); @@ -4017,36 +4024,40 @@ bool ObjCARCContract::runOnFunction(Function &F) { Use &U = UI.getUse(); unsigned OperandNo = UI.getOperandNo(); ++UI; // Increment UI now, because we may unlink its element. - if (Instruction *UserInst = dyn_cast<Instruction>(U.getUser())) - if (Inst != UserInst && DT->dominates(Inst, UserInst)) { - Changed = true; - Instruction *Replacement = Inst; - Type *UseTy = U.get()->getType(); - if (PHINode *PHI = dyn_cast<PHINode>(UserInst)) { - // For PHI nodes, insert the bitcast in the predecessor block. - unsigned ValNo = - PHINode::getIncomingValueNumForOperand(OperandNo); - BasicBlock *BB = - PHI->getIncomingBlock(ValNo); - if (Replacement->getType() != UseTy) - Replacement = new BitCastInst(Replacement, UseTy, "", - &BB->back()); - for (unsigned i = 0, e = PHI->getNumIncomingValues(); - i != e; ++i) - if (PHI->getIncomingBlock(i) == BB) { - // Keep the UI iterator valid. - if (&PHI->getOperandUse( - PHINode::getOperandNumForIncomingValue(i)) == - &UI.getUse()) - ++UI; - PHI->setIncomingValue(i, Replacement); - } - } else { - if (Replacement->getType() != UseTy) - Replacement = new BitCastInst(Replacement, UseTy, "", UserInst); - U.set(Replacement); - } + Instruction *UserInst = dyn_cast<Instruction>(U.getUser()); + if (!UserInst) + continue; + // FIXME: dominates should return true for unreachable UserInst. + if (!DT->isReachableFromEntry(UserInst->getParent()) || + DT->dominates(Inst, UserInst)) { + Changed = true; + Instruction *Replacement = Inst; + Type *UseTy = U.get()->getType(); + if (PHINode *PHI = dyn_cast<PHINode>(UserInst)) { + // For PHI nodes, insert the bitcast in the predecessor block. + unsigned ValNo = + PHINode::getIncomingValueNumForOperand(OperandNo); + BasicBlock *BB = + PHI->getIncomingBlock(ValNo); + if (Replacement->getType() != UseTy) + Replacement = new BitCastInst(Replacement, UseTy, "", + &BB->back()); + for (unsigned i = 0, e = PHI->getNumIncomingValues(); + i != e; ++i) + if (PHI->getIncomingBlock(i) == BB) { + // Keep the UI iterator valid. + if (&PHI->getOperandUse( + PHINode::getOperandNumForIncomingValue(i)) == + &UI.getUse()) + ++UI; + PHI->setIncomingValue(i, Replacement); + } + } else { + if (Replacement->getType() != UseTy) + Replacement = new BitCastInst(Replacement, UseTy, "", UserInst); + U.set(Replacement); } + } } // If Arg is a no-op casted pointer, strip one level of casts and diff --git a/lib/Transforms/Scalar/SCCP.cpp b/lib/Transforms/Scalar/SCCP.cpp index 4274b50..5ce82b9 100644 --- a/lib/Transforms/Scalar/SCCP.cpp +++ b/lib/Transforms/Scalar/SCCP.cpp @@ -564,7 +564,7 @@ void SCCPSolver::getFeasibleSuccessors(TerminatorInst &TI, return; } - Succs[SI->resolveSuccessorIndex(SI->findCaseValue(CI))] = true; + Succs[SI->findCaseValue(CI).getSuccessorIndex()] = true; return; } @@ -623,14 +623,7 @@ bool SCCPSolver::isEdgeFeasible(BasicBlock *From, BasicBlock *To) { if (CI == 0) return !SCValue.isUndefined(); - // Make sure to skip the "default value" which isn't a value - for (unsigned i = 0, E = SI->getNumCases(); i != E; ++i) - if (SI->getCaseValue(i) == CI) // Found the taken branch. - return SI->getCaseSuccessor(i) == To; - - // If the constant value is not equal to any of the branches, we must - // execute default branch. - return SI->getDefaultDest() == To; + return SI->findCaseValue(CI).getCaseSuccessor() == To; } // Just mark all destinations executable! @@ -1495,12 +1488,12 @@ bool SCCPSolver::ResolvedUndefsIn(Function &F) { // If the input to SCCP is actually switch on undef, fix the undef to // the first constant. if (isa<UndefValue>(SI->getCondition())) { - SI->setCondition(SI->getCaseValue(0)); - markEdgeExecutable(BB, SI->getCaseSuccessor(0)); + SI->setCondition(SI->case_begin().getCaseValue()); + markEdgeExecutable(BB, SI->case_begin().getCaseSuccessor()); return true; } - markForcedConstant(SI->getCondition(), SI->getCaseValue(0)); + markForcedConstant(SI->getCondition(), SI->case_begin().getCaseValue()); return true; } } diff --git a/lib/Transforms/Scalar/ScalarReplAggregates.cpp b/lib/Transforms/Scalar/ScalarReplAggregates.cpp index d23263f..d36a18f 100644 --- a/lib/Transforms/Scalar/ScalarReplAggregates.cpp +++ b/lib/Transforms/Scalar/ScalarReplAggregates.cpp @@ -574,8 +574,9 @@ void ConvertToScalarInfo::ConvertUsesToScalar(Value *Ptr, AllocaInst *NewAI, // transform it into a store of the expanded constant value. if (MemSetInst *MSI = dyn_cast<MemSetInst>(User)) { assert(MSI->getRawDest() == Ptr && "Consistency error!"); - unsigned NumBytes = cast<ConstantInt>(MSI->getLength())->getZExtValue(); - if (NumBytes != 0) { + signed SNumBytes = cast<ConstantInt>(MSI->getLength())->getSExtValue(); + if (SNumBytes > 0) { + unsigned NumBytes = static_cast<unsigned>(SNumBytes); unsigned Val = cast<ConstantInt>(MSI->getValue())->getZExtValue(); // Compute the value replicated the right number of times. @@ -1517,6 +1518,9 @@ void SROA::isSafeForScalarRepl(Instruction *I, uint64_t Offset, ConstantInt *Length = dyn_cast<ConstantInt>(MI->getLength()); if (Length == 0) return MarkUnsafe(Info, User); + if (Length->isNegative()) + return MarkUnsafe(Info, User); + isSafeMemAccess(Offset, Length->getZExtValue(), 0, UI.getOperandNo() == 0, Info, MI, true /*AllowWholeAccess*/); diff --git a/lib/Transforms/Utils/BasicInliner.cpp b/lib/Transforms/Utils/BasicInliner.cpp deleted file mode 100644 index 50c91b6..0000000 --- a/lib/Transforms/Utils/BasicInliner.cpp +++ /dev/null @@ -1,182 +0,0 @@ -//===- BasicInliner.cpp - Basic function level inliner --------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file defines a simple function based inliner that does not use -// call graph information. -// -//===----------------------------------------------------------------------===// - -#define DEBUG_TYPE "basicinliner" -#include "llvm/Module.h" -#include "llvm/Function.h" -#include "llvm/Transforms/Utils/BasicInliner.h" -#include "llvm/Transforms/Utils/Cloning.h" -#include "llvm/Support/CallSite.h" -#include "llvm/Support/CommandLine.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/raw_ostream.h" -#include "llvm/ADT/SmallPtrSet.h" -#include <vector> - -using namespace llvm; - -static cl::opt<unsigned> -BasicInlineThreshold("basic-inline-threshold", cl::Hidden, cl::init(200), - cl::desc("Control the amount of basic inlining to perform (default = 200)")); - -namespace llvm { - - /// BasicInlinerImpl - BasicInliner implemantation class. This hides - /// container info, used by basic inliner, from public interface. - struct BasicInlinerImpl { - - BasicInlinerImpl(const BasicInlinerImpl&); // DO NOT IMPLEMENT - void operator=(const BasicInlinerImpl&); // DO NO IMPLEMENT - public: - BasicInlinerImpl(TargetData *T) : TD(T) {} - - /// addFunction - Add function into the list of functions to process. - /// All functions must be inserted using this interface before invoking - /// inlineFunctions(). - void addFunction(Function *F) { - Functions.push_back(F); - } - - /// neverInlineFunction - Sometimes a function is never to be inlined - /// because of one or other reason. - void neverInlineFunction(Function *F) { - NeverInline.insert(F); - } - - /// inlineFuctions - Walk all call sites in all functions supplied by - /// client. Inline as many call sites as possible. Delete completely - /// inlined functions. - void inlineFunctions(); - - private: - TargetData *TD; - std::vector<Function *> Functions; - SmallPtrSet<const Function *, 16> NeverInline; - SmallPtrSet<Function *, 8> DeadFunctions; - InlineCostAnalyzer CA; - }; - -/// inlineFuctions - Walk all call sites in all functions supplied by -/// client. Inline as many call sites as possible. Delete completely -/// inlined functions. -void BasicInlinerImpl::inlineFunctions() { - - // Scan through and identify all call sites ahead of time so that we only - // inline call sites in the original functions, not call sites that result - // from inlining other functions. - std::vector<CallSite> CallSites; - - for (std::vector<Function *>::iterator FI = Functions.begin(), - FE = Functions.end(); FI != FE; ++FI) { - Function *F = *FI; - for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) - for (BasicBlock::iterator I = BB->begin(); I != BB->end(); ++I) { - CallSite CS(cast<Value>(I)); - if (CS && CS.getCalledFunction() - && !CS.getCalledFunction()->isDeclaration()) - CallSites.push_back(CS); - } - } - - DEBUG(dbgs() << ": " << CallSites.size() << " call sites.\n"); - - // Inline call sites. - bool Changed = false; - do { - Changed = false; - for (unsigned index = 0; index != CallSites.size() && !CallSites.empty(); - ++index) { - CallSite CS = CallSites[index]; - if (Function *Callee = CS.getCalledFunction()) { - - // Eliminate calls that are never inlinable. - if (Callee->isDeclaration() || - CS.getInstruction()->getParent()->getParent() == Callee) { - CallSites.erase(CallSites.begin() + index); - --index; - continue; - } - InlineCost IC = CA.getInlineCost(CS, NeverInline); - if (IC.isAlways()) { - DEBUG(dbgs() << " Inlining: cost=always" - <<", call: " << *CS.getInstruction()); - } else if (IC.isNever()) { - DEBUG(dbgs() << " NOT Inlining: cost=never" - <<", call: " << *CS.getInstruction()); - continue; - } else { - int Cost = IC.getValue(); - - if (Cost >= (int) BasicInlineThreshold) { - DEBUG(dbgs() << " NOT Inlining: cost = " << Cost - << ", call: " << *CS.getInstruction()); - continue; - } else { - DEBUG(dbgs() << " Inlining: cost = " << Cost - << ", call: " << *CS.getInstruction()); - } - } - - // Inline - InlineFunctionInfo IFI(0, TD); - if (InlineFunction(CS, IFI)) { - Callee->removeDeadConstantUsers(); - if (Callee->isDefTriviallyDead()) - DeadFunctions.insert(Callee); - Changed = true; - CallSites.erase(CallSites.begin() + index); - --index; - } - } - } - } while (Changed); - - // Remove completely inlined functions from module. - for(SmallPtrSet<Function *, 8>::iterator I = DeadFunctions.begin(), - E = DeadFunctions.end(); I != E; ++I) { - Function *D = *I; - Module *M = D->getParent(); - M->getFunctionList().remove(D); - } -} - -BasicInliner::BasicInliner(TargetData *TD) { - Impl = new BasicInlinerImpl(TD); -} - -BasicInliner::~BasicInliner() { - delete Impl; -} - -/// addFunction - Add function into the list of functions to process. -/// All functions must be inserted using this interface before invoking -/// inlineFunctions(). -void BasicInliner::addFunction(Function *F) { - Impl->addFunction(F); -} - -/// neverInlineFunction - Sometimes a function is never to be inlined because -/// of one or other reason. -void BasicInliner::neverInlineFunction(Function *F) { - Impl->neverInlineFunction(F); -} - -/// inlineFuctions - Walk all call sites in all functions supplied by -/// client. Inline as many call sites as possible. Delete completely -/// inlined functions. -void BasicInliner::inlineFunctions() { - Impl->inlineFunctions(); -} - -} diff --git a/lib/Transforms/Utils/CMakeLists.txt b/lib/Transforms/Utils/CMakeLists.txt index d1aa599..7f5cb5e 100644 --- a/lib/Transforms/Utils/CMakeLists.txt +++ b/lib/Transforms/Utils/CMakeLists.txt @@ -1,7 +1,6 @@ add_llvm_library(LLVMTransformUtils AddrModeMatcher.cpp BasicBlockUtils.cpp - BasicInliner.cpp BreakCriticalEdges.cpp BuildLibCalls.cpp CloneFunction.cpp diff --git a/lib/Transforms/Utils/CloneFunction.cpp b/lib/Transforms/Utils/CloneFunction.cpp index 04ef7d7..1b28c35 100644 --- a/lib/Transforms/Utils/CloneFunction.cpp +++ b/lib/Transforms/Utils/CloneFunction.cpp @@ -313,8 +313,8 @@ void PruningFunctionCloner::CloneBlock(const BasicBlock *BB, Cond = dyn_cast_or_null<ConstantInt>(V); } if (Cond) { // Constant fold to uncond branch! - unsigned CaseIndex = SI->findCaseValue(Cond); - BasicBlock *Dest = SI->getSuccessor(SI->resolveSuccessorIndex(CaseIndex)); + SwitchInst::ConstCaseIt Case = SI->findCaseValue(Cond); + BasicBlock *Dest = const_cast<BasicBlock*>(Case.getCaseSuccessor()); VMap[OldTI] = BranchInst::Create(Dest, NewBB); ToClone.push_back(Dest); TerminatorDone = true; @@ -424,10 +424,6 @@ void llvm::CloneAndPruneFunctionInto(Function *NewFunc, const Function *OldFunc, // BasicBlock::iterator I = NewBB->begin(); - DebugLoc TheCallDL; - if (TheCall) - TheCallDL = TheCall->getDebugLoc(); - // Handle PHI nodes specially, as we have to remove references to dead // blocks. if (PHINode *PN = dyn_cast<PHINode>(I)) { diff --git a/lib/Transforms/Utils/CodeExtractor.cpp b/lib/Transforms/Utils/CodeExtractor.cpp index 429919b..e8c0b80 100644 --- a/lib/Transforms/Utils/CodeExtractor.cpp +++ b/lib/Transforms/Utils/CodeExtractor.cpp @@ -617,7 +617,8 @@ emitCallAndSwitchStatement(Function *newFunction, BasicBlock *codeReplacer, // of the other successors. TheSwitch->setCondition(call); TheSwitch->setDefaultDest(TheSwitch->getSuccessor(NumExitBlocks)); - TheSwitch->removeCase(NumExitBlocks-1); // Remove redundant case + // Remove redundant case + TheSwitch->removeCase(SwitchInst::CaseIt(TheSwitch, NumExitBlocks-1)); break; } } diff --git a/lib/Transforms/Utils/Local.cpp b/lib/Transforms/Utils/Local.cpp index 336d8f6..5f895eb 100644 --- a/lib/Transforms/Utils/Local.cpp +++ b/lib/Transforms/Utils/Local.cpp @@ -106,31 +106,32 @@ bool llvm::ConstantFoldTerminator(BasicBlock *BB, bool DeleteDeadConditions) { // If we are switching on a constant, we can convert the switch into a // single branch instruction! ConstantInt *CI = dyn_cast<ConstantInt>(SI->getCondition()); - BasicBlock *TheOnlyDest = SI->getDefaultDest(); // The default dest + BasicBlock *TheOnlyDest = SI->getDefaultDest(); BasicBlock *DefaultDest = TheOnlyDest; // Figure out which case it goes to. - for (unsigned i = 0, e = SI->getNumCases(); i != e; ++i) { + for (SwitchInst::CaseIt i = SI->case_begin(), e = SI->case_end(); + i != e; ++i) { // Found case matching a constant operand? - if (SI->getCaseValue(i) == CI) { - TheOnlyDest = SI->getCaseSuccessor(i); + if (i.getCaseValue() == CI) { + TheOnlyDest = i.getCaseSuccessor(); break; } // Check to see if this branch is going to the same place as the default // dest. If so, eliminate it as an explicit compare. - if (SI->getCaseSuccessor(i) == DefaultDest) { + if (i.getCaseSuccessor() == DefaultDest) { // Remove this entry. DefaultDest->removePredecessor(SI->getParent()); SI->removeCase(i); - --i; --e; // Don't skip an entry... + --i; --e; continue; } // Otherwise, check to see if the switch only branches to one destination. // We do this by reseting "TheOnlyDest" to null when we find two non-equal // destinations. - if (SI->getCaseSuccessor(i) != TheOnlyDest) TheOnlyDest = 0; + if (i.getCaseSuccessor() != TheOnlyDest) TheOnlyDest = 0; } if (CI && !TheOnlyDest) { @@ -167,11 +168,13 @@ bool llvm::ConstantFoldTerminator(BasicBlock *BB, bool DeleteDeadConditions) { if (SI->getNumCases() == 1) { // Otherwise, we can fold this switch into a conditional branch // instruction if it has only one non-default destination. + SwitchInst::CaseIt FirstCase = SI->case_begin(); Value *Cond = Builder.CreateICmpEQ(SI->getCondition(), - SI->getCaseValue(0), "cond"); + FirstCase.getCaseValue(), "cond"); // Insert the new branch. - Builder.CreateCondBr(Cond, SI->getCaseSuccessor(0), SI->getDefaultDest()); + Builder.CreateCondBr(Cond, FirstCase.getCaseSuccessor(), + SI->getDefaultDest()); // Delete the old switch. SI->eraseFromParent(); diff --git a/lib/Transforms/Utils/LoopSimplify.cpp b/lib/Transforms/Utils/LoopSimplify.cpp index 4376265..0bc185d 100644 --- a/lib/Transforms/Utils/LoopSimplify.cpp +++ b/lib/Transforms/Utils/LoopSimplify.cpp @@ -386,7 +386,7 @@ BasicBlock *LoopSimplify::InsertPreheaderForLoop(Loop *L) { this); } else { SmallVector<BasicBlock*, 2> NewBBs; - SplitLandingPadPredecessors(Header, OutsideBlocks, ".preheader", + SplitLandingPadPredecessors(Header, OutsideBlocks, ".preheader", ".split-lp", this, NewBBs); PreheaderBB = NewBBs[0]; } @@ -538,8 +538,7 @@ void LoopSimplify::PlaceSplitBlockCarefully(BasicBlock *NewBB, /// Loop *LoopSimplify::SeparateNestedLoop(Loop *L, LPPassManager &LPM, BasicBlock *Preheader) { - // Don't try to separate loops without a preheader (this excludes - // loop headers which are targeted by an indirectbr). + // Don't try to separate loops without a preheader. if (!Preheader) return 0; @@ -554,11 +553,15 @@ Loop *LoopSimplify::SeparateNestedLoop(Loop *L, LPPassManager &LPM, // handles the case when a PHI node has multiple instances of itself as // arguments. SmallVector<BasicBlock*, 8> OuterLoopPreds; - for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) + for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { if (PN->getIncomingValue(i) != PN || - !L->contains(PN->getIncomingBlock(i))) + !L->contains(PN->getIncomingBlock(i))) { + // We can't split indirectbr edges. + if (isa<IndirectBrInst>(PN->getIncomingBlock(i)->getTerminator())) + return 0; OuterLoopPreds.push_back(PN->getIncomingBlock(i)); - + } + } DEBUG(dbgs() << "LoopSimplify: Splitting out a new outer loop\n"); // If ScalarEvolution is around and knows anything about values in diff --git a/lib/Transforms/Utils/LowerExpectIntrinsic.cpp b/lib/Transforms/Utils/LowerExpectIntrinsic.cpp index df8d68e..c70ced1 100644 --- a/lib/Transforms/Utils/LowerExpectIntrinsic.cpp +++ b/lib/Transforms/Utils/LowerExpectIntrinsic.cpp @@ -73,16 +73,16 @@ bool LowerExpectIntrinsic::HandleSwitchExpect(SwitchInst *SI) { LLVMContext &Context = CI->getContext(); Type *Int32Ty = Type::getInt32Ty(Context); - unsigned caseNo = SI->findCaseValue(ExpectedValue); + SwitchInst::CaseIt Case = SI->findCaseValue(ExpectedValue); std::vector<Value *> Vec; unsigned n = SI->getNumCases(); Vec.resize(n + 1 + 1); // +1 for MDString and +1 for default case Vec[0] = MDString::get(Context, "branch_weights"); - Vec[1] = ConstantInt::get(Int32Ty, SwitchInst::ErrorIndex == caseNo ? + Vec[1] = ConstantInt::get(Int32Ty, Case == SI->case_default() ? LikelyBranchWeight : UnlikelyBranchWeight); for (unsigned i = 0; i < n; ++i) { - Vec[i + 1 + 1] = ConstantInt::get(Int32Ty, i == caseNo ? + Vec[i + 1 + 1] = ConstantInt::get(Int32Ty, i == Case.getCaseIndex() ? LikelyBranchWeight : UnlikelyBranchWeight); } diff --git a/lib/Transforms/Utils/LowerSwitch.cpp b/lib/Transforms/Utils/LowerSwitch.cpp index 424f564..a16130d 100644 --- a/lib/Transforms/Utils/LowerSwitch.cpp +++ b/lib/Transforms/Utils/LowerSwitch.cpp @@ -237,10 +237,10 @@ unsigned LowerSwitch::Clusterify(CaseVector& Cases, SwitchInst *SI) { unsigned numCmps = 0; // Start with "simple" cases - for (unsigned i = 0; i < SI->getNumCases(); ++i) - Cases.push_back(CaseRange(SI->getCaseValue(i), - SI->getCaseValue(i), - SI->getCaseSuccessor(i))); + for (SwitchInst::CaseIt i = SI->case_begin(), e = SI->case_end(); i != e; ++i) + Cases.push_back(CaseRange(i.getCaseValue(), i.getCaseValue(), + i.getCaseSuccessor())); + std::sort(Cases.begin(), Cases.end(), CaseCmp()); // Merge case into clusters diff --git a/lib/Transforms/Utils/SimplifyCFG.cpp b/lib/Transforms/Utils/SimplifyCFG.cpp index a9853a4..d53a46e 100644 --- a/lib/Transforms/Utils/SimplifyCFG.cpp +++ b/lib/Transforms/Utils/SimplifyCFG.cpp @@ -480,9 +480,9 @@ GetValueEqualityComparisonCases(TerminatorInst *TI, BasicBlock*> > &Cases) { if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) { Cases.reserve(SI->getNumCases()); - for (unsigned i = 0, e = SI->getNumCases(); i != e; ++i) - Cases.push_back(std::make_pair(SI->getCaseValue(i), - SI->getCaseSuccessor(i))); + for (SwitchInst::CaseIt i = SI->case_begin(), e = SI->case_end(); i != e; ++i) + Cases.push_back(std::make_pair(i.getCaseValue(), + i.getCaseSuccessor())); return SI->getDefaultDest(); } @@ -605,10 +605,10 @@ SimplifyEqualityComparisonWithOnlyPredecessor(TerminatorInst *TI, DEBUG(dbgs() << "Threading pred instr: " << *Pred->getTerminator() << "Through successor TI: " << *TI); - for (unsigned i = SI->getNumCases(); i != 0;) { + for (SwitchInst::CaseIt i = SI->case_end(), e = SI->case_begin(); i != e;) { --i; - if (DeadCases.count(SI->getCaseValue(i))) { - SI->getCaseSuccessor(i)->removePredecessor(TI->getParent()); + if (DeadCases.count(i.getCaseValue())) { + i.getCaseSuccessor()->removePredecessor(TI->getParent()); SI->removeCase(i); } } @@ -2009,10 +2009,8 @@ static bool SimplifySwitchOnSelect(SwitchInst *SI, SelectInst *Select) { // Find the relevant condition and destinations. Value *Condition = Select->getCondition(); - unsigned TrueCase = SI->findCaseValue(TrueVal); - unsigned FalseCase = SI->findCaseValue(FalseVal); - BasicBlock *TrueBB = SI->getSuccessor(SI->resolveSuccessorIndex(TrueCase)); - BasicBlock *FalseBB = SI->getSuccessor(SI->resolveSuccessorIndex(FalseCase)); + BasicBlock *TrueBB = SI->findCaseValue(TrueVal).getCaseSuccessor(); + BasicBlock *FalseBB = SI->findCaseValue(FalseVal).getCaseSuccessor(); // Perform the actual simplification. return SimplifyTerminatorOnSelect(SI, Condition, TrueBB, FalseBB); @@ -2096,7 +2094,7 @@ static bool TryToSimplifyUncondBranchWithICmpInIt(ICmpInst *ICI, // Ok, the block is reachable from the default dest. If the constant we're // comparing exists in one of the other edges, then we can constant fold ICI // and zap it. - if (SI->findCaseValue(Cst) != SwitchInst::ErrorIndex) { + if (SI->findCaseValue(Cst) != SI->case_default()) { Value *V; if (ICI->getPredicate() == ICmpInst::ICMP_EQ) V = ConstantInt::getFalse(BB->getContext()); @@ -2423,8 +2421,9 @@ bool SimplifyCFGOpt::SimplifyUnreachable(UnreachableInst *UI) { } } } else if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) { - for (unsigned i = 0, e = SI->getNumCases(); i != e; ++i) - if (SI->getCaseSuccessor(i) == BB) { + for (SwitchInst::CaseIt i = SI->case_begin(), e = SI->case_end(); + i != e; ++i) + if (i.getCaseSuccessor() == BB) { BB->removePredecessor(SI->getParent()); SI->removeCase(i); --i; --e; @@ -2434,12 +2433,13 @@ bool SimplifyCFGOpt::SimplifyUnreachable(UnreachableInst *UI) { // destination and make it the default. if (SI->getDefaultDest() == BB) { std::map<BasicBlock*, std::pair<unsigned, unsigned> > Popularity; - for (unsigned i = 0, e = SI->getNumCases(); i != e; ++i) { + for (SwitchInst::CaseIt i = SI->case_begin(), e = SI->case_end(); + i != e; ++i) { std::pair<unsigned, unsigned> &entry = - Popularity[SI->getCaseSuccessor(i)]; + Popularity[i.getCaseSuccessor()]; if (entry.first == 0) { entry.first = 1; - entry.second = i; + entry.second = i.getCaseIndex(); } else { entry.first++; } @@ -2470,8 +2470,9 @@ bool SimplifyCFGOpt::SimplifyUnreachable(UnreachableInst *UI) { for (unsigned i = 0; i != MaxPop-1; ++i) MaxBlock->removePredecessor(SI->getParent()); - for (unsigned i = 0, e = SI->getNumCases(); i != e; ++i) - if (SI->getCaseSuccessor(i) == MaxBlock) { + for (SwitchInst::CaseIt i = SI->case_begin(), e = SI->case_end(); + i != e; ++i) + if (i.getCaseSuccessor() == MaxBlock) { SI->removeCase(i); --i; --e; } @@ -2517,11 +2518,13 @@ static bool TurnSwitchRangeIntoICmp(SwitchInst *SI, IRBuilder<> &Builder) { // Make sure all cases point to the same destination and gather the values. SmallVector<ConstantInt *, 16> Cases; - Cases.push_back(SI->getCaseValue(0)); - for (unsigned I = 1, E = SI->getNumCases(); I != E; ++I) { - if (SI->getCaseSuccessor(I-1) != SI->getCaseSuccessor(I)) + SwitchInst::CaseIt I = SI->case_begin(); + Cases.push_back(I.getCaseValue()); + SwitchInst::CaseIt PrevI = I++; + for (SwitchInst::CaseIt E = SI->case_end(); I != E; PrevI = I++) { + if (PrevI.getCaseSuccessor() != I.getCaseSuccessor()) return false; - Cases.push_back(SI->getCaseValue(I)); + Cases.push_back(I.getCaseValue()); } assert(Cases.size() == SI->getNumCases() && "Not all cases gathered"); @@ -2539,10 +2542,11 @@ static bool TurnSwitchRangeIntoICmp(SwitchInst *SI, IRBuilder<> &Builder) { if (!Offset->isNullValue()) Sub = Builder.CreateAdd(Sub, Offset, Sub->getName()+".off"); Value *Cmp = Builder.CreateICmpULT(Sub, NumCases, "switch"); - Builder.CreateCondBr(Cmp, SI->getCaseSuccessor(0), SI->getDefaultDest()); + Builder.CreateCondBr( + Cmp, SI->case_begin().getCaseSuccessor(), SI->getDefaultDest()); // Prune obsolete incoming values off the successor's PHI nodes. - for (BasicBlock::iterator BBI = SI->getCaseSuccessor(0)->begin(); + for (BasicBlock::iterator BBI = SI->case_begin().getCaseSuccessor()->begin(); isa<PHINode>(BBI); ++BBI) { for (unsigned I = 0, E = SI->getNumCases()-1; I != E; ++I) cast<PHINode>(BBI)->removeIncomingValue(SI->getParent()); @@ -2562,22 +2566,22 @@ static bool EliminateDeadSwitchCases(SwitchInst *SI) { // Gather dead cases. SmallVector<ConstantInt*, 8> DeadCases; - for (unsigned I = 0, E = SI->getNumCases(); I != E; ++I) { - if ((SI->getCaseValue(I)->getValue() & KnownZero) != 0 || - (SI->getCaseValue(I)->getValue() & KnownOne) != KnownOne) { - DeadCases.push_back(SI->getCaseValue(I)); + for (SwitchInst::CaseIt I = SI->case_begin(), E = SI->case_end(); I != E; ++I) { + if ((I.getCaseValue()->getValue() & KnownZero) != 0 || + (I.getCaseValue()->getValue() & KnownOne) != KnownOne) { + DeadCases.push_back(I.getCaseValue()); DEBUG(dbgs() << "SimplifyCFG: switch case '" - << SI->getCaseValue(I)->getValue() << "' is dead.\n"); + << I.getCaseValue() << "' is dead.\n"); } } // Remove dead cases from the switch. for (unsigned I = 0, E = DeadCases.size(); I != E; ++I) { - unsigned Case = SI->findCaseValue(DeadCases[I]); - assert(Case != SwitchInst::ErrorIndex && + SwitchInst::CaseIt Case = SI->findCaseValue(DeadCases[I]); + assert(Case != SI->case_default() && "Case was not found. Probably mistake in DeadCases forming."); // Prune unused values from PHI nodes. - SI->getCaseSuccessor(Case)->removePredecessor(SI->getParent()); + Case.getCaseSuccessor()->removePredecessor(SI->getParent()); SI->removeCase(Case); } @@ -2626,9 +2630,9 @@ static bool ForwardSwitchConditionToPHI(SwitchInst *SI) { typedef DenseMap<PHINode*, SmallVector<int,4> > ForwardingNodesMap; ForwardingNodesMap ForwardingNodes; - for (unsigned I = 0; I < SI->getNumCases(); ++I) { // 0 is the default case. - ConstantInt *CaseValue = SI->getCaseValue(I); - BasicBlock *CaseDest = SI->getCaseSuccessor(I); + for (SwitchInst::CaseIt I = SI->case_begin(), E = SI->case_end(); I != E; ++I) { + ConstantInt *CaseValue = I.getCaseValue(); + BasicBlock *CaseDest = I.getCaseSuccessor(); int PhiIndex; PHINode *PHI = FindPHIForConditionForwarding(CaseValue, CaseDest, diff --git a/lib/Transforms/Utils/SimplifyIndVar.cpp b/lib/Transforms/Utils/SimplifyIndVar.cpp index 20eef3c..e00565d 100644 --- a/lib/Transforms/Utils/SimplifyIndVar.cpp +++ b/lib/Transforms/Utils/SimplifyIndVar.cpp @@ -231,8 +231,10 @@ void SimplifyIndvar::eliminateIVRemainder(BinaryOperator *Rem, // Inform IVUsers about the new users. if (IU) { - if (Instruction *I = dyn_cast<Instruction>(Rem->getOperand(0))) - IU->AddUsersIfInteresting(I); + if (Instruction *I = dyn_cast<Instruction>(Rem->getOperand(0))) { + SmallPtrSet<Loop*, 16> SimplifiedLoopNests; + IU->AddUsersIfInteresting(I, SimplifiedLoopNests); + } } DEBUG(dbgs() << "INDVARS: Simplified rem: " << *Rem << '\n'); ++NumElimRem; diff --git a/lib/VMCore/AsmWriter.cpp b/lib/VMCore/AsmWriter.cpp index 6874906..7b39efb 100644 --- a/lib/VMCore/AsmWriter.cpp +++ b/lib/VMCore/AsmWriter.cpp @@ -1731,12 +1731,12 @@ void AssemblyWriter::printInstruction(const Instruction &I) { Out << ", "; writeOperand(SI.getDefaultDest(), true); Out << " ["; - unsigned NumCases = SI.getNumCases(); - for (unsigned i = 0; i < NumCases; ++i) { + for (SwitchInst::CaseIt i = SI.case_begin(), e = SI.case_end(); + i != e; ++i) { Out << "\n "; - writeOperand(SI.getCaseValue(i), true); + writeOperand(i.getCaseValue(), true); Out << ", "; - writeOperand(SI.getCaseSuccessor(i), true); + writeOperand(i.getCaseSuccessor(), true); } Out << "\n ]"; } else if (isa<IndirectBrInst>(I)) { diff --git a/lib/VMCore/ConstantsContext.h b/lib/VMCore/ConstantsContext.h index a7277f6..8903a8f 100644 --- a/lib/VMCore/ConstantsContext.h +++ b/lib/VMCore/ConstantsContext.h @@ -16,6 +16,7 @@ #define LLVM_CONSTANTSCONTEXT_H #include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/Hashing.h" #include "llvm/InlineAsm.h" #include "llvm/Instructions.h" #include "llvm/Operator.h" @@ -656,48 +657,18 @@ private: return ConstantClassInfo::getTombstoneKey(); } static unsigned getHashValue(const ConstantClass *CP) { - // This is adapted from SuperFastHash by Paul Hsieh. - unsigned Hash = TypeClassInfo::getHashValue(CP->getType()); - for (unsigned I = 0, E = CP->getNumOperands(); I < E; ++I) { - unsigned Data = ConstantInfo::getHashValue(CP->getOperand(I)); - Hash += Data & 0xFFFF; - unsigned Tmp = ((Data >> 16) << 11) ^ Hash; - Hash = (Hash << 16) ^ Tmp; - Hash += Hash >> 11; - } - - // Force "avalanching" of final 127 bits. - Hash ^= Hash << 3; - Hash += Hash >> 5; - Hash ^= Hash << 4; - Hash += Hash >> 17; - Hash ^= Hash << 25; - Hash += Hash >> 6; - return Hash; + SmallVector<Constant*, 8> CPOperands; + CPOperands.reserve(CP->getNumOperands()); + for (unsigned I = 0, E = CP->getNumOperands(); I < E; ++I) + CPOperands.push_back(CP->getOperand(I)); + return getHashValue(LookupKey(CP->getType(), CPOperands)); } static bool isEqual(const ConstantClass *LHS, const ConstantClass *RHS) { return LHS == RHS; } static unsigned getHashValue(const LookupKey &Val) { - // This is adapted from SuperFastHash by Paul Hsieh. - unsigned Hash = TypeClassInfo::getHashValue(Val.first); - for (Operands::const_iterator - I = Val.second.begin(), E = Val.second.end(); I != E; ++I) { - unsigned Data = ConstantInfo::getHashValue(*I); - Hash += Data & 0xFFFF; - unsigned Tmp = ((Data >> 16) << 11) ^ Hash; - Hash = (Hash << 16) ^ Tmp; - Hash += Hash >> 11; - } - - // Force "avalanching" of final 127 bits. - Hash ^= Hash << 3; - Hash += Hash >> 5; - Hash ^= Hash << 4; - Hash += Hash >> 17; - Hash ^= Hash << 25; - Hash += Hash >> 6; - return Hash; + return hash_combine(Val.first, hash_combine_range(Val.second.begin(), + Val.second.end())); } static bool isEqual(const LookupKey &LHS, const ConstantClass *RHS) { if (RHS == getEmptyKey() || RHS == getTombstoneKey()) diff --git a/lib/VMCore/Instructions.cpp b/lib/VMCore/Instructions.cpp index 11fd5b6..8db6ac9 100644 --- a/lib/VMCore/Instructions.cpp +++ b/lib/VMCore/Instructions.cpp @@ -3159,13 +3159,16 @@ void SwitchInst::addCase(ConstantInt *OnVal, BasicBlock *Dest) { // Initialize some new operands. assert(OpNo+1 < ReservedSpace && "Growing didn't work!"); NumOperands = OpNo+2; - setCaseValue(NewCaseIdx, OnVal); - setCaseSuccessor(NewCaseIdx, Dest); + CaseIt Case(this, NewCaseIdx); + Case.setValue(OnVal); + Case.setSuccessor(Dest); } /// removeCase - This method removes the specified case and its successor /// from the switch instruction. -void SwitchInst::removeCase(unsigned idx) { +void SwitchInst::removeCase(CaseIt i) { + unsigned idx = i.getCaseIndex(); + assert(2 + idx*2 < getNumOperands() && "Case index out of range!!!"); unsigned NumOps = getNumOperands(); diff --git a/lib/VMCore/Type.cpp b/lib/VMCore/Type.cpp index 0bc4f74..c6f3558 100644 --- a/lib/VMCore/Type.cpp +++ b/lib/VMCore/Type.cpp @@ -185,16 +185,7 @@ bool Type::isSizedDerivedType() const { if (!this->isStructTy()) return false; - // Opaque structs have no size. - if (cast<StructType>(this)->isOpaque()) - return false; - - // Okay, our struct is sized if all of the elements are. - for (subtype_iterator I = subtype_begin(), E = subtype_end(); I != E; ++I) - if (!(*I)->isSized()) - return false; - - return true; + return cast<StructType>(this)->isSized(); } //===----------------------------------------------------------------------===// @@ -579,6 +570,26 @@ StructType *StructType::create(StringRef Name, Type *type, ...) { return llvm::StructType::create(Ctx, StructFields, Name); } +bool StructType::isSized() const { + if ((getSubclassData() & SCDB_IsSized) != 0) + return true; + if (isOpaque()) + return false; + + // Okay, our struct is sized if all of the elements are, but if one of the + // elements is opaque, the struct isn't sized *yet*, but may become sized in + // the future, so just bail out without caching. + for (element_iterator I = element_begin(), E = element_end(); I != E; ++I) + if (!(*I)->isSized()) + return false; + + // Here we cheat a bit and cast away const-ness. The goal is to memoize when + // we find a sized type, as types can only move from opaque to sized, not the + // other way. + const_cast<StructType*>(this)->setSubclassData( + getSubclassData() | SCDB_IsSized); + return true; +} StringRef StructType::getName() const { assert(!isLiteral() && "Literal structs never have names"); diff --git a/lib/VMCore/Value.cpp b/lib/VMCore/Value.cpp index 207c06d..41cc38c 100644 --- a/lib/VMCore/Value.cpp +++ b/lib/VMCore/Value.cpp @@ -317,20 +317,40 @@ void Value::replaceAllUsesWith(Value *New) { BB->replaceSuccessorsPhiUsesWith(cast<BasicBlock>(New)); } -Value *Value::stripPointerCasts() { - if (!getType()->isPointerTy()) - return this; +namespace { +// Various metrics for how much to strip off of pointers. +enum PointerStripKind { + PSK_ZeroIndices, + PSK_InBoundsConstantIndices, + PSK_InBounds +}; + +template <PointerStripKind StripKind> +static Value *stripPointerCastsAndOffsets(Value *V) { + if (!V->getType()->isPointerTy()) + return V; // Even though we don't look through PHI nodes, we could be called on an // instruction in an unreachable block, which may be on a cycle. SmallPtrSet<Value *, 4> Visited; - Value *V = this; Visited.insert(V); do { if (GEPOperator *GEP = dyn_cast<GEPOperator>(V)) { - if (!GEP->hasAllZeroIndices()) - return V; + switch (StripKind) { + case PSK_ZeroIndices: + if (!GEP->hasAllZeroIndices()) + return V; + break; + case PSK_InBoundsConstantIndices: + if (!GEP->hasAllConstantIndices()) + return V; + // fallthrough + case PSK_InBounds: + if (!GEP->isInBounds()) + return V; + break; + } V = GEP->getPointerOperand(); } else if (Operator::getOpcode(V) == Instruction::BitCast) { V = cast<Operator>(V)->getOperand(0); @@ -346,6 +366,19 @@ Value *Value::stripPointerCasts() { return V; } +} // namespace + +Value *Value::stripPointerCasts() { + return stripPointerCastsAndOffsets<PSK_ZeroIndices>(this); +} + +Value *Value::stripInBoundsConstantOffsets() { + return stripPointerCastsAndOffsets<PSK_InBoundsConstantIndices>(this); +} + +Value *Value::stripInBoundsOffsets() { + return stripPointerCastsAndOffsets<PSK_InBounds>(this); +} /// isDereferenceablePointer - Test if this value is always a pointer to /// allocated and suitably aligned memory for a simple load or store. diff --git a/lib/VMCore/Verifier.cpp b/lib/VMCore/Verifier.cpp index dcf86d2..5b9b2a5 100644 --- a/lib/VMCore/Verifier.cpp +++ b/lib/VMCore/Verifier.cpp @@ -813,11 +813,11 @@ void Verifier::visitSwitchInst(SwitchInst &SI) { // have the same type as the switched-on value. Type *SwitchTy = SI.getCondition()->getType(); SmallPtrSet<ConstantInt*, 32> Constants; - for (unsigned i = 0, e = SI.getNumCases(); i != e; ++i) { - Assert1(SI.getCaseValue(i)->getType() == SwitchTy, + for (SwitchInst::CaseIt i = SI.case_begin(), e = SI.case_end(); i != e; ++i) { + Assert1(i.getCaseValue()->getType() == SwitchTy, "Switch constants must all be same type as switch value!", &SI); - Assert2(Constants.insert(SI.getCaseValue(i)), - "Duplicate integer as switch case", &SI, SI.getCaseValue(i)); + Assert2(Constants.insert(i.getCaseValue()), + "Duplicate integer as switch case", &SI, i.getCaseValue()); } visitTerminatorInst(SI); |