diff options
author | Shih-wei Liao <sliao@google.com> | 2012-04-24 11:26:46 -0700 |
---|---|---|
committer | Shih-wei Liao <sliao@google.com> | 2012-04-24 11:26:46 -0700 |
commit | cf5a1461acaace0f3e7d11fbbcfbf635b8c8ea9d (patch) | |
tree | 557137810ae9efc96147d672d372e4dabd0a2440 /lib | |
parent | 4c8fab82874a29dcd2b242533af3ebe7f66bfd74 (diff) | |
parent | fc728fbdc2631ce8f343cf9b7292d218fde7419f (diff) | |
download | external_llvm-cf5a1461acaace0f3e7d11fbbcfbf635b8c8ea9d.zip external_llvm-cf5a1461acaace0f3e7d11fbbcfbf635b8c8ea9d.tar.gz external_llvm-cf5a1461acaace0f3e7d11fbbcfbf635b8c8ea9d.tar.bz2 |
Merge with LLVM upstream r155090.
Conflicts:
lib/Support/Unix/PathV2.inc
Change-Id: I7b89833849f6cbcfa958a33a971d0f7754c9cb2c
Diffstat (limited to 'lib')
389 files changed, 14072 insertions, 10824 deletions
diff --git a/lib/Analysis/CodeMetrics.cpp b/lib/Analysis/CodeMetrics.cpp index 6c93f78..316e7bc 100644 --- a/lib/Analysis/CodeMetrics.cpp +++ b/lib/Analysis/CodeMetrics.cpp @@ -50,6 +50,52 @@ bool llvm::callIsSmall(const Function *F) { return false; } +bool llvm::isInstructionFree(const Instruction *I, const TargetData *TD) { + if (isa<PHINode>(I)) + return true; + + // If a GEP has all constant indices, it will probably be folded with + // a load/store. + if (const GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(I)) + return GEP->hasAllConstantIndices(); + + if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) { + switch (II->getIntrinsicID()) { + default: + return false; + case Intrinsic::dbg_declare: + case Intrinsic::dbg_value: + case Intrinsic::invariant_start: + case Intrinsic::invariant_end: + case Intrinsic::lifetime_start: + case Intrinsic::lifetime_end: + case Intrinsic::objectsize: + case Intrinsic::ptr_annotation: + case Intrinsic::var_annotation: + // These intrinsics don't count as size. + return true; + } + } + + if (const CastInst *CI = dyn_cast<CastInst>(I)) { + // Noop casts, including ptr <-> int, don't count. + if (CI->isLosslessCast() || isa<IntToPtrInst>(CI) || isa<PtrToIntInst>(CI)) + return true; + // trunc to a native type is free (assuming the target has compare and + // shift-right of the same width). + if (TD && isa<TruncInst>(CI) && + TD->isLegalInteger(TD->getTypeSizeInBits(CI->getType()))) + return true; + // Result of a cmp instruction is often extended (to be used by other + // cmp instructions, logical or return instructions). These are usually + // nop on most sane targets. + if (isa<CmpInst>(CI->getOperand(0))) + return true; + } + + return false; +} + /// analyzeBasicBlock - Fill in the current structure with information gleaned /// from the specified block. void CodeMetrics::analyzeBasicBlock(const BasicBlock *BB, @@ -58,27 +104,11 @@ void CodeMetrics::analyzeBasicBlock(const BasicBlock *BB, unsigned NumInstsBeforeThisBB = NumInsts; for (BasicBlock::const_iterator II = BB->begin(), E = BB->end(); II != E; ++II) { - if (isa<PHINode>(II)) continue; // PHI nodes don't count. + if (isInstructionFree(II, TD)) + continue; // Special handling for calls. if (isa<CallInst>(II) || isa<InvokeInst>(II)) { - if (const IntrinsicInst *IntrinsicI = dyn_cast<IntrinsicInst>(II)) { - switch (IntrinsicI->getIntrinsicID()) { - default: break; - case Intrinsic::dbg_declare: - case Intrinsic::dbg_value: - case Intrinsic::invariant_start: - case Intrinsic::invariant_end: - case Intrinsic::lifetime_start: - case Intrinsic::lifetime_end: - case Intrinsic::objectsize: - case Intrinsic::ptr_annotation: - case Intrinsic::var_annotation: - // These intrinsics don't count as size. - continue; - } - } - ImmutableCallSite CS(cast<Instruction>(II)); if (const Function *F = CS.getCalledFunction()) { @@ -115,28 +145,6 @@ void CodeMetrics::analyzeBasicBlock(const BasicBlock *BB, if (isa<ExtractElementInst>(II) || II->getType()->isVectorTy()) ++NumVectorInsts; - if (const CastInst *CI = dyn_cast<CastInst>(II)) { - // Noop casts, including ptr <-> int, don't count. - if (CI->isLosslessCast() || isa<IntToPtrInst>(CI) || - isa<PtrToIntInst>(CI)) - continue; - // trunc to a native type is free (assuming the target has compare and - // shift-right of the same width). - if (isa<TruncInst>(CI) && TD && - TD->isLegalInteger(TD->getTypeSizeInBits(CI->getType()))) - continue; - // Result of a cmp instruction is often extended (to be used by other - // cmp instructions, logical or return instructions). These are usually - // nop on most sane targets. - if (isa<CmpInst>(CI->getOperand(0))) - continue; - } else if (const GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(II)){ - // If a GEP has all constant indices, it will probably be folded with - // a load/store. - if (GEPI->hasAllConstantIndices()) - continue; - } - ++NumInsts; } diff --git a/lib/Analysis/DIBuilder.cpp b/lib/Analysis/DIBuilder.cpp index f0bdc48..85913b1 100644 --- a/lib/Analysis/DIBuilder.cpp +++ b/lib/Analysis/DIBuilder.cpp @@ -17,6 +17,7 @@ #include "llvm/IntrinsicInst.h" #include "llvm/Module.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/Support/Debug.h" #include "llvm/Support/Dwarf.h" using namespace llvm; @@ -385,16 +386,21 @@ DIType DIBuilder::createObjCIVar(StringRef Name, /// createObjCProperty - Create debugging information entry for Objective-C /// property. -DIObjCProperty DIBuilder::createObjCProperty(StringRef Name, +DIObjCProperty DIBuilder::createObjCProperty(StringRef Name, + DIFile File, unsigned LineNumber, StringRef GetterName, StringRef SetterName, - unsigned PropertyAttributes) { + unsigned PropertyAttributes, + DIType Ty) { Value *Elts[] = { - GetTagConstant(VMContext, dwarf::DW_TAG_APPLE_Property), + GetTagConstant(VMContext, dwarf::DW_TAG_APPLE_property), MDString::get(VMContext, Name), + File, + ConstantInt::get(Type::getInt32Ty(VMContext), LineNumber), MDString::get(VMContext, GetterName), MDString::get(VMContext, SetterName), - ConstantInt::get(Type::getInt32Ty(VMContext), PropertyAttributes) + ConstantInt::get(Type::getInt32Ty(VMContext), PropertyAttributes), + Ty }; return DIObjCProperty(MDNode::get(VMContext, Elts)); } @@ -820,6 +826,7 @@ DISubprogram DIBuilder::createFunction(DIDescriptor Context, DIFile File, unsigned LineNo, DIType Ty, bool isLocalToUnit, bool isDefinition, + unsigned ScopeLine, unsigned Flags, bool isOptimized, Function *Fn, MDNode *TParams, @@ -849,7 +856,8 @@ DISubprogram DIBuilder::createFunction(DIDescriptor Context, Fn, TParams, Decl, - THolder + THolder, + ConstantInt::get(Type::getInt32Ty(VMContext), ScopeLine) }; MDNode *Node = MDNode::get(VMContext, Elts); @@ -897,7 +905,9 @@ DISubprogram DIBuilder::createMethod(DIDescriptor Context, Fn, TParam, llvm::Constant::getNullValue(Type::getInt32Ty(VMContext)), - THolder + THolder, + // FIXME: Do we want to use a different scope lines? + ConstantInt::get(Type::getInt32Ty(VMContext), LineNo) }; MDNode *Node = MDNode::get(VMContext, Elts); return DISubprogram(Node); diff --git a/lib/Analysis/DebugInfo.cpp b/lib/Analysis/DebugInfo.cpp index e30c0a9..f61a8f3 100644 --- a/lib/Analysis/DebugInfo.cpp +++ b/lib/Analysis/DebugInfo.cpp @@ -291,7 +291,7 @@ bool DIDescriptor::isEnumerator() const { /// isObjCProperty - Return true if the specified tag is DW_TAG bool DIDescriptor::isObjCProperty() const { - return DbgNode && getTag() == dwarf::DW_TAG_APPLE_Property; + return DbgNode && getTag() == dwarf::DW_TAG_APPLE_property; } //===----------------------------------------------------------------------===// // Simple Descriptor Constructors and other Methods @@ -377,6 +377,19 @@ bool DICompileUnit::Verify() const { return true; } +/// Verify - Verify that an ObjC property is well formed. +bool DIObjCProperty::Verify() const { + if (!DbgNode) + return false; + unsigned Tag = getTag(); + if (Tag != dwarf::DW_TAG_APPLE_property) return false; + DIType Ty = getType(); + if (!Ty.Verify()) return false; + + // Don't worry about the rest of the strings for now. + return true; +} + /// Verify - Verify that a type descriptor is well formed. bool DIType::Verify() const { if (!DbgNode) @@ -774,6 +787,9 @@ void DISubprogram::print(raw_ostream &OS) const { if (isDefinition()) OS << " [def] "; + if (getScopeLineNumber() != getLineNumber()) + OS << " [Scope: " << getScopeLineNumber() << "] "; + OS << "\n"; } diff --git a/lib/Analysis/IPA/GlobalsModRef.cpp b/lib/Analysis/IPA/GlobalsModRef.cpp index b226d66..c1d8e3e 100644 --- a/lib/Analysis/IPA/GlobalsModRef.cpp +++ b/lib/Analysis/IPA/GlobalsModRef.cpp @@ -21,6 +21,7 @@ #include "llvm/Instructions.h" #include "llvm/Constants.h" #include "llvm/DerivedTypes.h" +#include "llvm/IntrinsicInst.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/CallGraph.h" #include "llvm/Analysis/MemoryBuiltins.h" @@ -467,6 +468,11 @@ void GlobalsModRef::AnalyzeCallGraph(CallGraph &CG, Module &M) { } else if (isMalloc(&cast<Instruction>(*II)) || isFreeCall(&cast<Instruction>(*II))) { FunctionEffect |= ModRef; + } else if (IntrinsicInst *Intrinsic = dyn_cast<IntrinsicInst>(&*II)) { + // The callgraph doesn't include intrinsic calls. + Function *Callee = Intrinsic->getCalledFunction(); + ModRefBehavior Behaviour = AliasAnalysis::getModRefBehavior(Callee); + FunctionEffect |= (Behaviour & ModRef); } if ((FunctionEffect & Mod) == 0) diff --git a/lib/Analysis/IVUsers.cpp b/lib/Analysis/IVUsers.cpp index 463584d..b80966b 100644 --- a/lib/Analysis/IVUsers.cpp +++ b/lib/Analysis/IVUsers.cpp @@ -107,11 +107,11 @@ static bool isSimplifiedLoopNest(BasicBlock *BB, const DominatorTree *DT, return true; } -/// AddUsersIfInteresting - Inspect the specified instruction. If it is a +/// AddUsersImpl - Inspect the specified instruction. If it is a /// reducible SCEV, recursively add its users to the IVUsesByStride set and /// return true. Otherwise, return false. -bool IVUsers::AddUsersIfInteresting(Instruction *I, - SmallPtrSet<Loop*,16> &SimpleLoopNests) { +bool IVUsers::AddUsersImpl(Instruction *I, + SmallPtrSet<Loop*,16> &SimpleLoopNests) { // Add this IV user to the Processed set before returning false to ensure that // all IV users are members of the set. See IVUsers::isIVUserOrOperand. if (!Processed.insert(I)) @@ -167,13 +167,12 @@ bool IVUsers::AddUsersIfInteresting(Instruction *I, bool AddUserToIVUsers = false; if (LI->getLoopFor(User->getParent()) != L) { if (isa<PHINode>(User) || Processed.count(User) || - !AddUsersIfInteresting(User, SimpleLoopNests)) { + !AddUsersImpl(User, SimpleLoopNests)) { DEBUG(dbgs() << "FOUND USER in other loop: " << *User << '\n' << " OF SCEV: " << *ISE << '\n'); AddUserToIVUsers = true; } - } else if (Processed.count(User) - || !AddUsersIfInteresting(User, SimpleLoopNests)) { + } else if (Processed.count(User) || !AddUsersImpl(User, SimpleLoopNests)) { DEBUG(dbgs() << "FOUND USER: " << *User << '\n' << " OF SCEV: " << *ISE << '\n'); AddUserToIVUsers = true; @@ -197,6 +196,15 @@ bool IVUsers::AddUsersIfInteresting(Instruction *I, return true; } +bool IVUsers::AddUsersIfInteresting(Instruction *I) { + // SCEVExpander can only handle users that are dominated by simplified loop + // entries. Keep track of all loops that are only dominated by other simple + // loops so we don't traverse the domtree for each user. + SmallPtrSet<Loop*,16> SimpleLoopNests; + + return AddUsersImpl(I, SimpleLoopNests); +} + IVStrideUse &IVUsers::AddUser(Instruction *User, Value *Operand) { IVUses.push_back(new IVStrideUse(this, User, Operand)); return IVUses.back(); @@ -222,16 +230,11 @@ bool IVUsers::runOnLoop(Loop *l, LPPassManager &LPM) { SE = &getAnalysis<ScalarEvolution>(); TD = getAnalysisIfAvailable<TargetData>(); - // SCEVExpander can only handle users that are dominated by simplified loop - // entries. Keep track of all loops that are only dominated by other simple - // loops so we don't traverse the domtree for each user. - SmallPtrSet<Loop*,16> SimpleLoopNests; - // Find all uses of induction variables in this loop, and categorize // them by stride. Start by finding all of the PHI nodes in the header for // this loop. If they are induction variables, inspect their uses. for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ++I) - (void)AddUsersIfInteresting(I, SimpleLoopNests); + (void)AddUsersIfInteresting(I); return false; } diff --git a/lib/Analysis/InlineCost.cpp b/lib/Analysis/InlineCost.cpp index dedbfeb..3e3d2ab 100644 --- a/lib/Analysis/InlineCost.cpp +++ b/lib/Analysis/InlineCost.cpp @@ -11,659 +11,1012 @@ // //===----------------------------------------------------------------------===// +#define DEBUG_TYPE "inline-cost" #include "llvm/Analysis/InlineCost.h" +#include "llvm/Analysis/ConstantFolding.h" +#include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Support/CallSite.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/InstVisitor.h" +#include "llvm/Support/GetElementPtrTypeIterator.h" +#include "llvm/Support/raw_ostream.h" #include "llvm/CallingConv.h" #include "llvm/IntrinsicInst.h" +#include "llvm/Operator.h" +#include "llvm/GlobalAlias.h" #include "llvm/Target/TargetData.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SetVector.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/Statistic.h" using namespace llvm; -unsigned InlineCostAnalyzer::FunctionInfo::countCodeReductionForConstant( - const CodeMetrics &Metrics, Value *V) { - unsigned Reduction = 0; - SmallVector<Value *, 4> Worklist; - Worklist.push_back(V); - do { - Value *V = Worklist.pop_back_val(); - for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI != E;++UI){ - User *U = *UI; - if (isa<BranchInst>(U) || isa<SwitchInst>(U)) { - // We will be able to eliminate all but one of the successors. - const TerminatorInst &TI = cast<TerminatorInst>(*U); - const unsigned NumSucc = TI.getNumSuccessors(); - unsigned Instrs = 0; - for (unsigned I = 0; I != NumSucc; ++I) - Instrs += Metrics.NumBBInsts.lookup(TI.getSuccessor(I)); - // We don't know which blocks will be eliminated, so use the average size. - Reduction += InlineConstants::InstrCost*Instrs*(NumSucc-1)/NumSucc; - continue; - } +STATISTIC(NumCallsAnalyzed, "Number of call sites analyzed"); + +namespace { + +class CallAnalyzer : public InstVisitor<CallAnalyzer, bool> { + typedef InstVisitor<CallAnalyzer, bool> Base; + friend class InstVisitor<CallAnalyzer, bool>; + + // TargetData if available, or null. + const TargetData *const TD; + + // The called function. + Function &F; + + int Threshold; + int Cost; + const bool AlwaysInline; + + bool IsRecursive; + bool ExposesReturnsTwice; + bool HasDynamicAlloca; + unsigned NumInstructions, NumVectorInstructions; + int FiftyPercentVectorBonus, TenPercentVectorBonus; + int VectorBonus; + + // While we walk the potentially-inlined instructions, we build up and + // maintain a mapping of simplified values specific to this callsite. The + // idea is to propagate any special information we have about arguments to + // this call through the inlinable section of the function, and account for + // likely simplifications post-inlining. The most important aspect we track + // is CFG altering simplifications -- when we prove a basic block dead, that + // can cause dramatic shifts in the cost of inlining a function. + DenseMap<Value *, Constant *> SimplifiedValues; + + // Keep track of the values which map back (through function arguments) to + // allocas on the caller stack which could be simplified through SROA. + DenseMap<Value *, Value *> SROAArgValues; + + // The mapping of caller Alloca values to their accumulated cost savings. If + // we have to disable SROA for one of the allocas, this tells us how much + // cost must be added. + DenseMap<Value *, int> SROAArgCosts; + + // Keep track of values which map to a pointer base and constant offset. + DenseMap<Value *, std::pair<Value *, APInt> > ConstantOffsetPtrs; + + // Custom simplification helper routines. + bool isAllocaDerivedArg(Value *V); + bool lookupSROAArgAndCost(Value *V, Value *&Arg, + DenseMap<Value *, int>::iterator &CostIt); + void disableSROA(DenseMap<Value *, int>::iterator CostIt); + void disableSROA(Value *V); + void accumulateSROACost(DenseMap<Value *, int>::iterator CostIt, + int InstructionCost); + bool handleSROACandidate(bool IsSROAValid, + DenseMap<Value *, int>::iterator CostIt, + int InstructionCost); + bool isGEPOffsetConstant(GetElementPtrInst &GEP); + bool accumulateGEPOffset(GEPOperator &GEP, APInt &Offset); + ConstantInt *stripAndComputeInBoundsConstantOffsets(Value *&V); + + // Custom analysis routines. + bool analyzeBlock(BasicBlock *BB); + + // Disable several entry points to the visitor so we don't accidentally use + // them by declaring but not defining them here. + void visit(Module *); void visit(Module &); + void visit(Function *); void visit(Function &); + void visit(BasicBlock *); void visit(BasicBlock &); + + // Provide base case for our instruction visit. + bool visitInstruction(Instruction &I); + + // Our visit overrides. + bool visitAlloca(AllocaInst &I); + bool visitPHI(PHINode &I); + bool visitGetElementPtr(GetElementPtrInst &I); + bool visitBitCast(BitCastInst &I); + bool visitPtrToInt(PtrToIntInst &I); + bool visitIntToPtr(IntToPtrInst &I); + bool visitCastInst(CastInst &I); + bool visitUnaryInstruction(UnaryInstruction &I); + bool visitICmp(ICmpInst &I); + bool visitSub(BinaryOperator &I); + bool visitBinaryOperator(BinaryOperator &I); + bool visitLoad(LoadInst &I); + bool visitStore(StoreInst &I); + bool visitCallSite(CallSite CS); + +public: + CallAnalyzer(const TargetData *TD, Function &Callee, int Threshold) + : TD(TD), F(Callee), Threshold(Threshold), Cost(0), + AlwaysInline(F.hasFnAttr(Attribute::AlwaysInline)), + IsRecursive(false), ExposesReturnsTwice(false), HasDynamicAlloca(false), + NumInstructions(0), NumVectorInstructions(0), + FiftyPercentVectorBonus(0), TenPercentVectorBonus(0), VectorBonus(0), + NumConstantArgs(0), NumConstantOffsetPtrArgs(0), NumAllocaArgs(0), + NumConstantPtrCmps(0), NumConstantPtrDiffs(0), + NumInstructionsSimplified(0), SROACostSavings(0), SROACostSavingsLost(0) { + } - // Figure out if this instruction will be removed due to simple constant - // propagation. - Instruction &Inst = cast<Instruction>(*U); - - // We can't constant propagate instructions which have effects or - // read memory. - // - // FIXME: It would be nice to capture the fact that a load from a - // pointer-to-constant-global is actually a *really* good thing to zap. - // Unfortunately, we don't know the pointer that may get propagated here, - // so we can't make this decision. - if (Inst.mayReadFromMemory() || Inst.mayHaveSideEffects() || - isa<AllocaInst>(Inst)) - continue; + bool analyzeCall(CallSite CS); - bool AllOperandsConstant = true; - for (unsigned i = 0, e = Inst.getNumOperands(); i != e; ++i) - if (!isa<Constant>(Inst.getOperand(i)) && Inst.getOperand(i) != V) { - AllOperandsConstant = false; - break; - } - if (!AllOperandsConstant) - continue; + int getThreshold() { return Threshold; } + int getCost() { return Cost; } - // We will get to remove this instruction... - Reduction += InlineConstants::InstrCost; + // Keep a bunch of stats about the cost savings found so we can print them + // out when debugging. + unsigned NumConstantArgs; + unsigned NumConstantOffsetPtrArgs; + unsigned NumAllocaArgs; + unsigned NumConstantPtrCmps; + unsigned NumConstantPtrDiffs; + unsigned NumInstructionsSimplified; + unsigned SROACostSavings; + unsigned SROACostSavingsLost; - // And any other instructions that use it which become constants - // themselves. - Worklist.push_back(&Inst); - } - } while (!Worklist.empty()); - return Reduction; -} + void dump(); +}; -static unsigned countCodeReductionForAllocaICmp(const CodeMetrics &Metrics, - ICmpInst *ICI) { - unsigned Reduction = 0; +} // namespace - // Bail if this is comparing against a non-constant; there is nothing we can - // do there. - if (!isa<Constant>(ICI->getOperand(1))) - return Reduction; +/// \brief Test whether the given value is an Alloca-derived function argument. +bool CallAnalyzer::isAllocaDerivedArg(Value *V) { + return SROAArgValues.count(V); +} - // An icmp pred (alloca, C) becomes true if the predicate is true when - // equal and false otherwise. - bool Result = ICI->isTrueWhenEqual(); +/// \brief Lookup the SROA-candidate argument and cost iterator which V maps to. +/// Returns false if V does not map to a SROA-candidate. +bool CallAnalyzer::lookupSROAArgAndCost( + Value *V, Value *&Arg, DenseMap<Value *, int>::iterator &CostIt) { + if (SROAArgValues.empty() || SROAArgCosts.empty()) + return false; - SmallVector<Instruction *, 4> Worklist; - Worklist.push_back(ICI); - do { - Instruction *U = Worklist.pop_back_val(); - Reduction += InlineConstants::InstrCost; - for (Value::use_iterator UI = U->use_begin(), UE = U->use_end(); - UI != UE; ++UI) { - Instruction *I = dyn_cast<Instruction>(*UI); - if (!I || I->mayHaveSideEffects()) continue; - if (I->getNumOperands() == 1) - Worklist.push_back(I); - if (BinaryOperator *BO = dyn_cast<BinaryOperator>(I)) { - // If BO produces the same value as U, then the other operand is - // irrelevant and we can put it into the Worklist to continue - // deleting dead instructions. If BO produces the same value as the - // other operand, we can delete BO but that's it. - if (Result == true) { - if (BO->getOpcode() == Instruction::Or) - Worklist.push_back(I); - if (BO->getOpcode() == Instruction::And) - Reduction += InlineConstants::InstrCost; - } else { - if (BO->getOpcode() == Instruction::Or || - BO->getOpcode() == Instruction::Xor) - Reduction += InlineConstants::InstrCost; - if (BO->getOpcode() == Instruction::And) - Worklist.push_back(I); - } - } - if (BranchInst *BI = dyn_cast<BranchInst>(I)) { - BasicBlock *BB = BI->getSuccessor(Result ? 0 : 1); - if (BB->getSinglePredecessor()) - Reduction - += InlineConstants::InstrCost * Metrics.NumBBInsts.lookup(BB); - } - } - } while (!Worklist.empty()); + DenseMap<Value *, Value *>::iterator ArgIt = SROAArgValues.find(V); + if (ArgIt == SROAArgValues.end()) + return false; - return Reduction; + Arg = ArgIt->second; + CostIt = SROAArgCosts.find(Arg); + return CostIt != SROAArgCosts.end(); } -/// \brief Compute the reduction possible for a given instruction if we are able -/// to SROA an alloca. +/// \brief Disable SROA for the candidate marked by this cost iterator. /// -/// The reduction for this instruction is added to the SROAReduction output -/// parameter. Returns false if this instruction is expected to defeat SROA in -/// general. -static bool countCodeReductionForSROAInst(Instruction *I, - SmallVectorImpl<Value *> &Worklist, - unsigned &SROAReduction) { - if (LoadInst *LI = dyn_cast<LoadInst>(I)) { - if (!LI->isSimple()) - return false; - SROAReduction += InlineConstants::InstrCost; +/// This markes the candidate as no longer viable for SROA, and adds the cost +/// savings associated with it back into the inline cost measurement. +void CallAnalyzer::disableSROA(DenseMap<Value *, int>::iterator CostIt) { + // If we're no longer able to perform SROA we need to undo its cost savings + // and prevent subsequent analysis. + Cost += CostIt->second; + SROACostSavings -= CostIt->second; + SROACostSavingsLost += CostIt->second; + SROAArgCosts.erase(CostIt); +} + +/// \brief If 'V' maps to a SROA candidate, disable SROA for it. +void CallAnalyzer::disableSROA(Value *V) { + Value *SROAArg; + DenseMap<Value *, int>::iterator CostIt; + if (lookupSROAArgAndCost(V, SROAArg, CostIt)) + disableSROA(CostIt); +} + +/// \brief Accumulate the given cost for a particular SROA candidate. +void CallAnalyzer::accumulateSROACost(DenseMap<Value *, int>::iterator CostIt, + int InstructionCost) { + CostIt->second += InstructionCost; + SROACostSavings += InstructionCost; +} + +/// \brief Helper for the common pattern of handling a SROA candidate. +/// Either accumulates the cost savings if the SROA remains valid, or disables +/// SROA for the candidate. +bool CallAnalyzer::handleSROACandidate(bool IsSROAValid, + DenseMap<Value *, int>::iterator CostIt, + int InstructionCost) { + if (IsSROAValid) { + accumulateSROACost(CostIt, InstructionCost); return true; } - if (StoreInst *SI = dyn_cast<StoreInst>(I)) { - if (!SI->isSimple()) + disableSROA(CostIt); + return false; +} + +/// \brief Check whether a GEP's indices are all constant. +/// +/// Respects any simplified values known during the analysis of this callsite. +bool CallAnalyzer::isGEPOffsetConstant(GetElementPtrInst &GEP) { + for (User::op_iterator I = GEP.idx_begin(), E = GEP.idx_end(); I != E; ++I) + if (!isa<Constant>(*I) && !SimplifiedValues.lookup(*I)) return false; - SROAReduction += InlineConstants::InstrCost; - return true; - } - if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(I)) { - // If the GEP has variable indices, we won't be able to do much with it. - if (!GEP->hasAllConstantIndices()) + return true; +} + +/// \brief Accumulate a constant GEP offset into an APInt if possible. +/// +/// Returns false if unable to compute the offset for any reason. Respects any +/// simplified values known during the analysis of this callsite. +bool CallAnalyzer::accumulateGEPOffset(GEPOperator &GEP, APInt &Offset) { + if (!TD) + return false; + + unsigned IntPtrWidth = TD->getPointerSizeInBits(); + assert(IntPtrWidth == Offset.getBitWidth()); + + for (gep_type_iterator GTI = gep_type_begin(GEP), GTE = gep_type_end(GEP); + GTI != GTE; ++GTI) { + ConstantInt *OpC = dyn_cast<ConstantInt>(GTI.getOperand()); + if (!OpC) + if (Constant *SimpleOp = SimplifiedValues.lookup(GTI.getOperand())) + OpC = dyn_cast<ConstantInt>(SimpleOp); + if (!OpC) return false; - // A non-zero GEP will likely become a mask operation after SROA. - if (GEP->hasAllZeroIndices()) - SROAReduction += InlineConstants::InstrCost; - Worklist.push_back(GEP); - return true; - } + if (OpC->isZero()) continue; - if (BitCastInst *BCI = dyn_cast<BitCastInst>(I)) { - // Track pointer through bitcasts. - Worklist.push_back(BCI); - SROAReduction += InlineConstants::InstrCost; - return true; + // Handle a struct index, which adds its field offset to the pointer. + if (StructType *STy = dyn_cast<StructType>(*GTI)) { + unsigned ElementIdx = OpC->getZExtValue(); + const StructLayout *SL = TD->getStructLayout(STy); + Offset += APInt(IntPtrWidth, SL->getElementOffset(ElementIdx)); + continue; + } + + APInt TypeSize(IntPtrWidth, TD->getTypeAllocSize(GTI.getIndexedType())); + Offset += OpC->getValue().sextOrTrunc(IntPtrWidth) * TypeSize; } + return true; +} + +bool CallAnalyzer::visitAlloca(AllocaInst &I) { + // FIXME: Check whether inlining will turn a dynamic alloca into a static + // alloca, and handle that case. + + // We will happily inline static alloca instructions or dynamic alloca + // instructions in always-inline situations. + if (AlwaysInline || I.isStaticAlloca()) + return Base::visitAlloca(I); + + // FIXME: This is overly conservative. Dynamic allocas are inefficient for + // a variety of reasons, and so we would like to not inline them into + // functions which don't currently have a dynamic alloca. This simply + // disables inlining altogether in the presence of a dynamic alloca. + HasDynamicAlloca = true; + return false; +} - // We just look for non-constant operands to ICmp instructions as those will - // defeat SROA. The actual reduction for these happens even without SROA. - if (ICmpInst *ICI = dyn_cast<ICmpInst>(I)) - return isa<Constant>(ICI->getOperand(1)); - - if (SelectInst *SI = dyn_cast<SelectInst>(I)) { - // SROA can handle a select of alloca iff all uses of the alloca are - // loads, and dereferenceable. We assume it's dereferenceable since - // we're told the input is an alloca. - for (Value::use_iterator UI = SI->use_begin(), UE = SI->use_end(); - UI != UE; ++UI) { - LoadInst *LI = dyn_cast<LoadInst>(*UI); - if (LI == 0 || !LI->isSimple()) +bool CallAnalyzer::visitPHI(PHINode &I) { + // FIXME: We should potentially be tracking values through phi nodes, + // especially when they collapse to a single value due to deleted CFG edges + // during inlining. + + // FIXME: We need to propagate SROA *disabling* through phi nodes, even + // though we don't want to propagate it's bonuses. The idea is to disable + // SROA if it *might* be used in an inappropriate manner. + + // Phi nodes are always zero-cost. + return true; +} + +bool CallAnalyzer::visitGetElementPtr(GetElementPtrInst &I) { + Value *SROAArg; + DenseMap<Value *, int>::iterator CostIt; + bool SROACandidate = lookupSROAArgAndCost(I.getPointerOperand(), + SROAArg, CostIt); + + // Try to fold GEPs of constant-offset call site argument pointers. This + // requires target data and inbounds GEPs. + if (TD && I.isInBounds()) { + // Check if we have a base + offset for the pointer. + Value *Ptr = I.getPointerOperand(); + std::pair<Value *, APInt> BaseAndOffset = ConstantOffsetPtrs.lookup(Ptr); + if (BaseAndOffset.first) { + // Check if the offset of this GEP is constant, and if so accumulate it + // into Offset. + if (!accumulateGEPOffset(cast<GEPOperator>(I), BaseAndOffset.second)) { + // Non-constant GEPs aren't folded, and disable SROA. + if (SROACandidate) + disableSROA(CostIt); return false; + } + + // Add the result as a new mapping to Base + Offset. + ConstantOffsetPtrs[&I] = BaseAndOffset; + + // Also handle SROA candidates here, we already know that the GEP is + // all-constant indexed. + if (SROACandidate) + SROAArgValues[&I] = SROAArg; + + return true; } - // We don't know whether we'll be deleting the rest of the chain of - // instructions from the SelectInst on, because we don't know whether - // the other side of the select is also an alloca or not. + } + + if (isGEPOffsetConstant(I)) { + if (SROACandidate) + SROAArgValues[&I] = SROAArg; + + // Constant GEPs are modeled as free. return true; } - if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) { - switch (II->getIntrinsicID()) { - default: - return false; - case Intrinsic::memset: - case Intrinsic::memcpy: - case Intrinsic::memmove: - case Intrinsic::lifetime_start: - case Intrinsic::lifetime_end: - // SROA can usually chew through these intrinsics. - SROAReduction += InlineConstants::InstrCost; + // Variable GEPs will require math and will disable SROA. + if (SROACandidate) + disableSROA(CostIt); + return false; +} + +bool CallAnalyzer::visitBitCast(BitCastInst &I) { + // Propagate constants through bitcasts. + if (Constant *COp = dyn_cast<Constant>(I.getOperand(0))) + if (Constant *C = ConstantExpr::getBitCast(COp, I.getType())) { + SimplifiedValues[&I] = C; + return true; + } + + // Track base/offsets through casts + std::pair<Value *, APInt> BaseAndOffset + = ConstantOffsetPtrs.lookup(I.getOperand(0)); + // Casts don't change the offset, just wrap it up. + if (BaseAndOffset.first) + ConstantOffsetPtrs[&I] = BaseAndOffset; + + // Also look for SROA candidates here. + Value *SROAArg; + DenseMap<Value *, int>::iterator CostIt; + if (lookupSROAArgAndCost(I.getOperand(0), SROAArg, CostIt)) + SROAArgValues[&I] = SROAArg; + + // Bitcasts are always zero cost. + return true; +} + +bool CallAnalyzer::visitPtrToInt(PtrToIntInst &I) { + // Propagate constants through ptrtoint. + if (Constant *COp = dyn_cast<Constant>(I.getOperand(0))) + if (Constant *C = ConstantExpr::getPtrToInt(COp, I.getType())) { + SimplifiedValues[&I] = C; return true; } + + // Track base/offset pairs when converted to a plain integer provided the + // integer is large enough to represent the pointer. + unsigned IntegerSize = I.getType()->getScalarSizeInBits(); + if (TD && IntegerSize >= TD->getPointerSizeInBits()) { + std::pair<Value *, APInt> BaseAndOffset + = ConstantOffsetPtrs.lookup(I.getOperand(0)); + if (BaseAndOffset.first) + ConstantOffsetPtrs[&I] = BaseAndOffset; } - // If there is some other strange instruction, we're not going to be - // able to do much if we inline this. - return false; + // This is really weird. Technically, ptrtoint will disable SROA. However, + // unless that ptrtoint is *used* somewhere in the live basic blocks after + // inlining, it will be nuked, and SROA should proceed. All of the uses which + // would block SROA would also block SROA if applied directly to a pointer, + // and so we can just add the integer in here. The only places where SROA is + // preserved either cannot fire on an integer, or won't in-and-of themselves + // disable SROA (ext) w/o some later use that we would see and disable. + Value *SROAArg; + DenseMap<Value *, int>::iterator CostIt; + if (lookupSROAArgAndCost(I.getOperand(0), SROAArg, CostIt)) + SROAArgValues[&I] = SROAArg; + + // A ptrtoint cast is free so long as the result is large enough to store the + // pointer, and a legal integer type. + return TD && TD->isLegalInteger(IntegerSize) && + IntegerSize >= TD->getPointerSizeInBits(); } -unsigned InlineCostAnalyzer::FunctionInfo::countCodeReductionForAlloca( - const CodeMetrics &Metrics, Value *V) { - if (!V->getType()->isPointerTy()) return 0; // Not a pointer - unsigned Reduction = 0; - unsigned SROAReduction = 0; - bool CanSROAAlloca = true; +bool CallAnalyzer::visitIntToPtr(IntToPtrInst &I) { + // Propagate constants through ptrtoint. + if (Constant *COp = dyn_cast<Constant>(I.getOperand(0))) + if (Constant *C = ConstantExpr::getIntToPtr(COp, I.getType())) { + SimplifiedValues[&I] = C; + return true; + } - SmallVector<Value *, 4> Worklist; - Worklist.push_back(V); - do { - Value *V = Worklist.pop_back_val(); - for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); - UI != E; ++UI){ - Instruction *I = cast<Instruction>(*UI); + // Track base/offset pairs when round-tripped through a pointer without + // modifications provided the integer is not too large. + Value *Op = I.getOperand(0); + unsigned IntegerSize = Op->getType()->getScalarSizeInBits(); + if (TD && IntegerSize <= TD->getPointerSizeInBits()) { + std::pair<Value *, APInt> BaseAndOffset = ConstantOffsetPtrs.lookup(Op); + if (BaseAndOffset.first) + ConstantOffsetPtrs[&I] = BaseAndOffset; + } - if (ICmpInst *ICI = dyn_cast<ICmpInst>(I)) - Reduction += countCodeReductionForAllocaICmp(Metrics, ICI); + // "Propagate" SROA here in the same manner as we do for ptrtoint above. + Value *SROAArg; + DenseMap<Value *, int>::iterator CostIt; + if (lookupSROAArgAndCost(Op, SROAArg, CostIt)) + SROAArgValues[&I] = SROAArg; - if (CanSROAAlloca) - CanSROAAlloca = countCodeReductionForSROAInst(I, Worklist, - SROAReduction); + // An inttoptr cast is free so long as the input is a legal integer type + // which doesn't contain values outside the range of a pointer. + return TD && TD->isLegalInteger(IntegerSize) && + IntegerSize <= TD->getPointerSizeInBits(); +} + +bool CallAnalyzer::visitCastInst(CastInst &I) { + // Propagate constants through ptrtoint. + if (Constant *COp = dyn_cast<Constant>(I.getOperand(0))) + if (Constant *C = ConstantExpr::getCast(I.getOpcode(), COp, I.getType())) { + SimplifiedValues[&I] = C; + return true; } - } while (!Worklist.empty()); - return Reduction + (CanSROAAlloca ? SROAReduction : 0); + // Disable SROA in the face of arbitrary casts we don't whitelist elsewhere. + disableSROA(I.getOperand(0)); + + // No-op casts don't have any cost. + if (I.isLosslessCast()) + return true; + + // trunc to a native type is free (assuming the target has compare and + // shift-right of the same width). + if (TD && isa<TruncInst>(I) && + TD->isLegalInteger(TD->getTypeSizeInBits(I.getType()))) + return true; + + // Result of a cmp instruction is often extended (to be used by other + // cmp instructions, logical or return instructions). These are usually + // no-ops on most sane targets. + if (isa<CmpInst>(I.getOperand(0))) + return true; + + // Assume the rest of the casts require work. + return false; } -void InlineCostAnalyzer::FunctionInfo::countCodeReductionForPointerPair( - const CodeMetrics &Metrics, DenseMap<Value *, unsigned> &PointerArgs, - Value *V, unsigned ArgIdx) { - SmallVector<Value *, 4> Worklist; - Worklist.push_back(V); - do { - Value *V = Worklist.pop_back_val(); - for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); - UI != E; ++UI){ - Instruction *I = cast<Instruction>(*UI); - - if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(I)) { - // If the GEP has variable indices, we won't be able to do much with it. - if (!GEP->hasAllConstantIndices()) - continue; - // Unless the GEP is in-bounds, some comparisons will be non-constant. - // Fortunately, the real-world cases where this occurs uses in-bounds - // GEPs, and so we restrict the optimization to them here. - if (!GEP->isInBounds()) - continue; +bool CallAnalyzer::visitUnaryInstruction(UnaryInstruction &I) { + Value *Operand = I.getOperand(0); + Constant *Ops[1] = { dyn_cast<Constant>(Operand) }; + if (Ops[0] || (Ops[0] = SimplifiedValues.lookup(Operand))) + if (Constant *C = ConstantFoldInstOperands(I.getOpcode(), I.getType(), + Ops, TD)) { + SimplifiedValues[&I] = C; + return true; + } - // Constant indices just change the constant offset. Add the resulting - // value both to our worklist for this argument, and to the set of - // viable paired values with future arguments. - PointerArgs[GEP] = ArgIdx; - Worklist.push_back(GEP); - continue; + // Disable any SROA on the argument to arbitrary unary operators. + disableSROA(Operand); + + return false; +} + +bool CallAnalyzer::visitICmp(ICmpInst &I) { + Value *LHS = I.getOperand(0), *RHS = I.getOperand(1); + // First try to handle simplified comparisons. + if (!isa<Constant>(LHS)) + if (Constant *SimpleLHS = SimplifiedValues.lookup(LHS)) + LHS = SimpleLHS; + if (!isa<Constant>(RHS)) + if (Constant *SimpleRHS = SimplifiedValues.lookup(RHS)) + RHS = SimpleRHS; + if (Constant *CLHS = dyn_cast<Constant>(LHS)) + if (Constant *CRHS = dyn_cast<Constant>(RHS)) + if (Constant *C = ConstantExpr::getICmp(I.getPredicate(), CLHS, CRHS)) { + SimplifiedValues[&I] = C; + return true; } - // Track pointer through casts. Even when the result is not a pointer, it - // remains a constant relative to constants derived from other constant - // pointers. - if (CastInst *CI = dyn_cast<CastInst>(I)) { - PointerArgs[CI] = ArgIdx; - Worklist.push_back(CI); - continue; + // Otherwise look for a comparison between constant offset pointers with + // a common base. + Value *LHSBase, *RHSBase; + APInt LHSOffset, RHSOffset; + llvm::tie(LHSBase, LHSOffset) = ConstantOffsetPtrs.lookup(LHS); + if (LHSBase) { + llvm::tie(RHSBase, RHSOffset) = ConstantOffsetPtrs.lookup(RHS); + if (RHSBase && LHSBase == RHSBase) { + // We have common bases, fold the icmp to a constant based on the + // offsets. + Constant *CLHS = ConstantInt::get(LHS->getContext(), LHSOffset); + Constant *CRHS = ConstantInt::get(RHS->getContext(), RHSOffset); + if (Constant *C = ConstantExpr::getICmp(I.getPredicate(), CLHS, CRHS)) { + SimplifiedValues[&I] = C; + ++NumConstantPtrCmps; + return true; } + } + } - // There are two instructions which produce a strict constant value when - // applied to two related pointer values. Ignore everything else. - if (!isa<ICmpInst>(I) && I->getOpcode() != Instruction::Sub) - continue; - assert(I->getNumOperands() == 2); - - // Ensure that the two operands are in our set of potentially paired - // pointers (or are derived from them). - Value *OtherArg = I->getOperand(0); - if (OtherArg == V) - OtherArg = I->getOperand(1); - DenseMap<Value *, unsigned>::const_iterator ArgIt - = PointerArgs.find(OtherArg); - if (ArgIt == PointerArgs.end()) - continue; - std::pair<unsigned, unsigned> ArgPair(ArgIt->second, ArgIdx); - if (ArgPair.first > ArgPair.second) - std::swap(ArgPair.first, ArgPair.second); + // If the comparison is an equality comparison with null, we can simplify it + // for any alloca-derived argument. + if (I.isEquality() && isa<ConstantPointerNull>(I.getOperand(1))) + if (isAllocaDerivedArg(I.getOperand(0))) { + // We can actually predict the result of comparisons between an + // alloca-derived value and null. Note that this fires regardless of + // SROA firing. + bool IsNotEqual = I.getPredicate() == CmpInst::ICMP_NE; + SimplifiedValues[&I] = IsNotEqual ? ConstantInt::getTrue(I.getType()) + : ConstantInt::getFalse(I.getType()); + return true; + } - PointerArgPairWeights[ArgPair] - += countCodeReductionForConstant(Metrics, I); + // Finally check for SROA candidates in comparisons. + Value *SROAArg; + DenseMap<Value *, int>::iterator CostIt; + if (lookupSROAArgAndCost(I.getOperand(0), SROAArg, CostIt)) { + if (isa<ConstantPointerNull>(I.getOperand(1))) { + accumulateSROACost(CostIt, InlineConstants::InstrCost); + return true; } - } while (!Worklist.empty()); -} -/// analyzeFunction - Fill in the current structure with information gleaned -/// from the specified function. -void InlineCostAnalyzer::FunctionInfo::analyzeFunction(Function *F, - const TargetData *TD) { - Metrics.analyzeFunction(F, TD); - - // A function with exactly one return has it removed during the inlining - // process (see InlineFunction), so don't count it. - // FIXME: This knowledge should really be encoded outside of FunctionInfo. - if (Metrics.NumRets==1) - --Metrics.NumInsts; - - ArgumentWeights.reserve(F->arg_size()); - DenseMap<Value *, unsigned> PointerArgs; - unsigned ArgIdx = 0; - for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end(); I != E; - ++I, ++ArgIdx) { - // Count how much code can be eliminated if one of the arguments is - // a constant or an alloca. - ArgumentWeights.push_back(ArgInfo(countCodeReductionForConstant(Metrics, I), - countCodeReductionForAlloca(Metrics, I))); - - // If the argument is a pointer, also check for pairs of pointers where - // knowing a fixed offset between them allows simplification. This pattern - // arises mostly due to STL algorithm patterns where pointers are used as - // random access iterators. - if (!I->getType()->isPointerTy()) - continue; - PointerArgs[I] = ArgIdx; - countCodeReductionForPointerPair(Metrics, PointerArgs, I, ArgIdx); + disableSROA(CostIt); } -} -/// NeverInline - returns true if the function should never be inlined into -/// any caller -bool InlineCostAnalyzer::FunctionInfo::NeverInline() { - return (Metrics.exposesReturnsTwice || Metrics.isRecursive || - Metrics.containsIndirectBr); + return false; } -// ConstantFunctionBonus - Figure out how much of a bonus we can get for -// possibly devirtualizing a function. We'll subtract the size of the function -// we may wish to inline from the indirect call bonus providing a limit on -// growth. Leave an upper limit of 0 for the bonus - we don't want to penalize -// inlining because we decide we don't want to give a bonus for -// devirtualizing. -int InlineCostAnalyzer::ConstantFunctionBonus(CallSite CS, Constant *C) { +bool CallAnalyzer::visitSub(BinaryOperator &I) { + // Try to handle a special case: we can fold computing the difference of two + // constant-related pointers. + Value *LHS = I.getOperand(0), *RHS = I.getOperand(1); + Value *LHSBase, *RHSBase; + APInt LHSOffset, RHSOffset; + llvm::tie(LHSBase, LHSOffset) = ConstantOffsetPtrs.lookup(LHS); + if (LHSBase) { + llvm::tie(RHSBase, RHSOffset) = ConstantOffsetPtrs.lookup(RHS); + if (RHSBase && LHSBase == RHSBase) { + // We have common bases, fold the subtract to a constant based on the + // offsets. + Constant *CLHS = ConstantInt::get(LHS->getContext(), LHSOffset); + Constant *CRHS = ConstantInt::get(RHS->getContext(), RHSOffset); + if (Constant *C = ConstantExpr::getSub(CLHS, CRHS)) { + SimplifiedValues[&I] = C; + ++NumConstantPtrDiffs; + return true; + } + } + } + + // Otherwise, fall back to the generic logic for simplifying and handling + // instructions. + return Base::visitSub(I); +} - // This could just be NULL. - if (!C) return 0; +bool CallAnalyzer::visitBinaryOperator(BinaryOperator &I) { + Value *LHS = I.getOperand(0), *RHS = I.getOperand(1); + if (!isa<Constant>(LHS)) + if (Constant *SimpleLHS = SimplifiedValues.lookup(LHS)) + LHS = SimpleLHS; + if (!isa<Constant>(RHS)) + if (Constant *SimpleRHS = SimplifiedValues.lookup(RHS)) + RHS = SimpleRHS; + Value *SimpleV = SimplifyBinOp(I.getOpcode(), LHS, RHS, TD); + if (Constant *C = dyn_cast_or_null<Constant>(SimpleV)) { + SimplifiedValues[&I] = C; + return true; + } - Function *F = dyn_cast<Function>(C); - if (!F) return 0; + // Disable any SROA on arguments to arbitrary, unsimplified binary operators. + disableSROA(LHS); + disableSROA(RHS); - int Bonus = InlineConstants::IndirectCallBonus + getInlineSize(CS, F); - return (Bonus > 0) ? 0 : Bonus; + return false; } -// CountBonusForConstant - Figure out an approximation for how much per-call -// performance boost we can expect if the specified value is constant. -int InlineCostAnalyzer::CountBonusForConstant(Value *V, Constant *C) { - unsigned Bonus = 0; - for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI != E;++UI){ - User *U = *UI; - if (CallInst *CI = dyn_cast<CallInst>(U)) { - // Turning an indirect call into a direct call is a BIG win - if (CI->getCalledValue() == V) - Bonus += ConstantFunctionBonus(CallSite(CI), C); - } else if (InvokeInst *II = dyn_cast<InvokeInst>(U)) { - // Turning an indirect call into a direct call is a BIG win - if (II->getCalledValue() == V) - Bonus += ConstantFunctionBonus(CallSite(II), C); +bool CallAnalyzer::visitLoad(LoadInst &I) { + Value *SROAArg; + DenseMap<Value *, int>::iterator CostIt; + if (lookupSROAArgAndCost(I.getOperand(0), SROAArg, CostIt)) { + if (I.isSimple()) { + accumulateSROACost(CostIt, InlineConstants::InstrCost); + return true; } - // FIXME: Eliminating conditional branches and switches should - // also yield a per-call performance boost. - else { - // Figure out the bonuses that wll accrue due to simple constant - // propagation. - Instruction &Inst = cast<Instruction>(*U); - - // We can't constant propagate instructions which have effects or - // read memory. - // - // FIXME: It would be nice to capture the fact that a load from a - // pointer-to-constant-global is actually a *really* good thing to zap. - // Unfortunately, we don't know the pointer that may get propagated here, - // so we can't make this decision. - if (Inst.mayReadFromMemory() || Inst.mayHaveSideEffects() || - isa<AllocaInst>(Inst)) - continue; - bool AllOperandsConstant = true; - for (unsigned i = 0, e = Inst.getNumOperands(); i != e; ++i) - if (!isa<Constant>(Inst.getOperand(i)) && Inst.getOperand(i) != V) { - AllOperandsConstant = false; - break; - } + disableSROA(CostIt); + } - if (AllOperandsConstant) - Bonus += CountBonusForConstant(&Inst); + return false; +} + +bool CallAnalyzer::visitStore(StoreInst &I) { + Value *SROAArg; + DenseMap<Value *, int>::iterator CostIt; + if (lookupSROAArgAndCost(I.getOperand(0), SROAArg, CostIt)) { + if (I.isSimple()) { + accumulateSROACost(CostIt, InlineConstants::InstrCost); + return true; } + + disableSROA(CostIt); } - return Bonus; + return false; } -int InlineCostAnalyzer::getInlineSize(CallSite CS, Function *Callee) { - // Get information about the callee. - FunctionInfo *CalleeFI = &CachedFunctionInfo[Callee]; - - // If we haven't calculated this information yet, do so now. - if (CalleeFI->Metrics.NumBlocks == 0) - CalleeFI->analyzeFunction(Callee, TD); - - // InlineCost - This value measures how good of an inline candidate this call - // site is to inline. A lower inline cost make is more likely for the call to - // be inlined. This value may go negative. - // - int InlineCost = 0; - - // Compute any size reductions we can expect due to arguments being passed into - // the function. - // - unsigned ArgNo = 0; - CallSite::arg_iterator I = CS.arg_begin(); - for (Function::arg_iterator FI = Callee->arg_begin(), FE = Callee->arg_end(); - FI != FE; ++I, ++FI, ++ArgNo) { - - // If an alloca is passed in, inlining this function is likely to allow - // significant future optimization possibilities (like scalar promotion, and - // scalarization), so encourage the inlining of the function. - // - if (isa<AllocaInst>(I)) - InlineCost -= CalleeFI->ArgumentWeights[ArgNo].AllocaWeight; - - // If this is a constant being passed into the function, use the argument - // weights calculated for the callee to determine how much will be folded - // away with this information. - else if (isa<Constant>(I)) - InlineCost -= CalleeFI->ArgumentWeights[ArgNo].ConstantWeight; +bool CallAnalyzer::visitCallSite(CallSite CS) { + if (CS.isCall() && cast<CallInst>(CS.getInstruction())->canReturnTwice() && + !F.hasFnAttr(Attribute::ReturnsTwice)) { + // This aborts the entire analysis. + ExposesReturnsTwice = true; + return false; + } + + if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(CS.getInstruction())) { + switch (II->getIntrinsicID()) { + default: + return Base::visitCallSite(CS); + + case Intrinsic::dbg_declare: + case Intrinsic::dbg_value: + case Intrinsic::invariant_start: + case Intrinsic::invariant_end: + case Intrinsic::lifetime_start: + case Intrinsic::lifetime_end: + case Intrinsic::memset: + case Intrinsic::memcpy: + case Intrinsic::memmove: + case Intrinsic::objectsize: + case Intrinsic::ptr_annotation: + case Intrinsic::var_annotation: + // SROA can usually chew through these intrinsics and they have no cost + // so don't pay the price of analyzing them in detail. + return true; + } } - const DenseMap<std::pair<unsigned, unsigned>, unsigned> &ArgPairWeights - = CalleeFI->PointerArgPairWeights; - for (DenseMap<std::pair<unsigned, unsigned>, unsigned>::const_iterator I - = ArgPairWeights.begin(), E = ArgPairWeights.end(); - I != E; ++I) - if (CS.getArgument(I->first.first)->stripInBoundsConstantOffsets() == - CS.getArgument(I->first.second)->stripInBoundsConstantOffsets()) - InlineCost -= I->second; + if (Function *F = CS.getCalledFunction()) { + if (F == CS.getInstruction()->getParent()->getParent()) { + // This flag will fully abort the analysis, so don't bother with anything + // else. + IsRecursive = true; + return false; + } - // Each argument passed in has a cost at both the caller and the callee - // sides. Measurements show that each argument costs about the same as an - // instruction. - InlineCost -= (CS.arg_size() * InlineConstants::InstrCost); + if (!callIsSmall(F)) { + // We account for the average 1 instruction per call argument setup + // here. + Cost += CS.arg_size() * InlineConstants::InstrCost; - // Now that we have considered all of the factors that make the call site more - // likely to be inlined, look at factors that make us not want to inline it. + // Everything other than inline ASM will also have a significant cost + // merely from making the call. + if (!isa<InlineAsm>(CS.getCalledValue())) + Cost += InlineConstants::CallPenalty; + } - // Calls usually take a long time, so they make the inlining gain smaller. - InlineCost += CalleeFI->Metrics.NumCalls * InlineConstants::CallPenalty; + return Base::visitCallSite(CS); + } - // Look at the size of the callee. Each instruction counts as 5. - InlineCost += CalleeFI->Metrics.NumInsts * InlineConstants::InstrCost; + // Otherwise we're in a very special case -- an indirect function call. See + // if we can be particularly clever about this. + Value *Callee = CS.getCalledValue(); + + // First, pay the price of the argument setup. We account for the average + // 1 instruction per call argument setup here. + Cost += CS.arg_size() * InlineConstants::InstrCost; + + // Next, check if this happens to be an indirect function call to a known + // function in this inline context. If not, we've done all we can. + Function *F = dyn_cast_or_null<Function>(SimplifiedValues.lookup(Callee)); + if (!F) + return Base::visitCallSite(CS); + + // If we have a constant that we are calling as a function, we can peer + // through it and see the function target. This happens not infrequently + // during devirtualization and so we want to give it a hefty bonus for + // inlining, but cap that bonus in the event that inlining wouldn't pan + // out. Pretend to inline the function, with a custom threshold. + CallAnalyzer CA(TD, *F, InlineConstants::IndirectCallThreshold); + if (CA.analyzeCall(CS)) { + // We were able to inline the indirect call! Subtract the cost from the + // bonus we want to apply, but don't go below zero. + Cost -= std::max(0, InlineConstants::IndirectCallThreshold - CA.getCost()); + } - return InlineCost; + return Base::visitCallSite(CS); } -int InlineCostAnalyzer::getInlineBonuses(CallSite CS, Function *Callee) { - // Get information about the callee. - FunctionInfo *CalleeFI = &CachedFunctionInfo[Callee]; - - // If we haven't calculated this information yet, do so now. - if (CalleeFI->Metrics.NumBlocks == 0) - CalleeFI->analyzeFunction(Callee, TD); - - bool isDirectCall = CS.getCalledFunction() == Callee; - Instruction *TheCall = CS.getInstruction(); - int Bonus = 0; - - // If there is only one call of the function, and it has internal linkage, - // make it almost guaranteed to be inlined. - // - if (Callee->hasLocalLinkage() && Callee->hasOneUse() && isDirectCall) - Bonus += InlineConstants::LastCallToStaticBonus; - - // If the instruction after the call, or if the normal destination of the - // invoke is an unreachable instruction, the function is noreturn. As such, - // there is little point in inlining this. - if (InvokeInst *II = dyn_cast<InvokeInst>(TheCall)) { - if (isa<UnreachableInst>(II->getNormalDest()->begin())) - Bonus += InlineConstants::NoreturnPenalty; - } else if (isa<UnreachableInst>(++BasicBlock::iterator(TheCall))) - Bonus += InlineConstants::NoreturnPenalty; - - // If this function uses the coldcc calling convention, prefer not to inline - // it. - if (Callee->getCallingConv() == CallingConv::Cold) - Bonus += InlineConstants::ColdccPenalty; - - // Add to the inline quality for properties that make the call valuable to - // inline. This includes factors that indicate that the result of inlining - // the function will be optimizable. Currently this just looks at arguments - // passed into the function. - // - CallSite::arg_iterator I = CS.arg_begin(); - for (Function::arg_iterator FI = Callee->arg_begin(), FE = Callee->arg_end(); - FI != FE; ++I, ++FI) - // Compute any constant bonus due to inlining we want to give here. - if (isa<Constant>(I)) - Bonus += CountBonusForConstant(FI, cast<Constant>(I)); - - return Bonus; +bool CallAnalyzer::visitInstruction(Instruction &I) { + // We found something we don't understand or can't handle. Mark any SROA-able + // values in the operand list as no longer viable. + for (User::op_iterator OI = I.op_begin(), OE = I.op_end(); OI != OE; ++OI) + disableSROA(*OI); + + return false; } -// getInlineCost - The heuristic used to determine if we should inline the -// function call or not. -// -InlineCost InlineCostAnalyzer::getInlineCost(CallSite CS) { - return getInlineCost(CS, CS.getCalledFunction()); + +/// \brief Analyze a basic block for its contribution to the inline cost. +/// +/// This method walks the analyzer over every instruction in the given basic +/// block and accounts for their cost during inlining at this callsite. It +/// aborts early if the threshold has been exceeded or an impossible to inline +/// construct has been detected. It returns false if inlining is no longer +/// viable, and true if inlining remains viable. +bool CallAnalyzer::analyzeBlock(BasicBlock *BB) { + for (BasicBlock::iterator I = BB->begin(), E = llvm::prior(BB->end()); + I != E; ++I) { + ++NumInstructions; + if (isa<ExtractElementInst>(I) || I->getType()->isVectorTy()) + ++NumVectorInstructions; + + // If the instruction simplified to a constant, there is no cost to this + // instruction. Visit the instructions using our InstVisitor to account for + // all of the per-instruction logic. The visit tree returns true if we + // consumed the instruction in any way, and false if the instruction's base + // cost should count against inlining. + if (Base::visit(I)) + ++NumInstructionsSimplified; + else + Cost += InlineConstants::InstrCost; + + // If the visit this instruction detected an uninlinable pattern, abort. + if (IsRecursive || ExposesReturnsTwice || HasDynamicAlloca) + return false; + + if (NumVectorInstructions > NumInstructions/2) + VectorBonus = FiftyPercentVectorBonus; + else if (NumVectorInstructions > NumInstructions/10) + VectorBonus = TenPercentVectorBonus; + else + VectorBonus = 0; + + // Check if we've past the threshold so we don't spin in huge basic + // blocks that will never inline. + if (!AlwaysInline && Cost > (Threshold + VectorBonus)) + return false; + } + + return true; } -InlineCost InlineCostAnalyzer::getInlineCost(CallSite CS, Function *Callee) { - Instruction *TheCall = CS.getInstruction(); - Function *Caller = TheCall->getParent()->getParent(); +/// \brief Compute the base pointer and cumulative constant offsets for V. +/// +/// This strips all constant offsets off of V, leaving it the base pointer, and +/// accumulates the total constant offset applied in the returned constant. It +/// returns 0 if V is not a pointer, and returns the constant '0' if there are +/// no constant offsets applied. +ConstantInt *CallAnalyzer::stripAndComputeInBoundsConstantOffsets(Value *&V) { + if (!TD || !V->getType()->isPointerTy()) + return 0; + + unsigned IntPtrWidth = TD->getPointerSizeInBits(); + APInt Offset = APInt::getNullValue(IntPtrWidth); + + // Even though we don't look through PHI nodes, we could be called on an + // instruction in an unreachable block, which may be on a cycle. + SmallPtrSet<Value *, 4> Visited; + Visited.insert(V); + do { + if (GEPOperator *GEP = dyn_cast<GEPOperator>(V)) { + if (!GEP->isInBounds() || !accumulateGEPOffset(*GEP, Offset)) + return 0; + V = GEP->getPointerOperand(); + } else if (Operator::getOpcode(V) == Instruction::BitCast) { + V = cast<Operator>(V)->getOperand(0); + } else if (GlobalAlias *GA = dyn_cast<GlobalAlias>(V)) { + if (GA->mayBeOverridden()) + break; + V = GA->getAliasee(); + } else { + break; + } + assert(V->getType()->isPointerTy() && "Unexpected operand type!"); + } while (Visited.insert(V)); - // Don't inline functions which can be redefined at link-time to mean - // something else. Don't inline functions marked noinline or call sites - // marked noinline. - if (Callee->mayBeOverridden() || Callee->hasFnAttr(Attribute::NoInline) || - CS.isNoInline()) - return llvm::InlineCost::getNever(); + Type *IntPtrTy = TD->getIntPtrType(V->getContext()); + return cast<ConstantInt>(ConstantInt::get(IntPtrTy, Offset)); +} - // Get information about the callee. - FunctionInfo *CalleeFI = &CachedFunctionInfo[Callee]; +/// \brief Analyze a call site for potential inlining. +/// +/// Returns true if inlining this call is viable, and false if it is not +/// viable. It computes the cost and adjusts the threshold based on numerous +/// factors and heuristics. If this method returns false but the computed cost +/// is below the computed threshold, then inlining was forcibly disabled by +/// some artifact of the rountine. +bool CallAnalyzer::analyzeCall(CallSite CS) { + ++NumCallsAnalyzed; + + // Track whether the post-inlining function would have more than one basic + // block. A single basic block is often intended for inlining. Balloon the + // threshold by 50% until we pass the single-BB phase. + bool SingleBB = true; + int SingleBBBonus = Threshold / 2; + Threshold += SingleBBBonus; + + // Unless we are always-inlining, perform some tweaks to the cost and + // threshold based on the direct callsite information. + if (!AlwaysInline) { + // We want to more aggressively inline vector-dense kernels, so up the + // threshold, and we'll lower it if the % of vector instructions gets too + // low. + assert(NumInstructions == 0); + assert(NumVectorInstructions == 0); + FiftyPercentVectorBonus = Threshold; + TenPercentVectorBonus = Threshold / 2; + + // Subtract off one instruction per call argument as those will be free after + // inlining. + Cost -= CS.arg_size() * InlineConstants::InstrCost; + + // If there is only one call of the function, and it has internal linkage, + // the cost of inlining it drops dramatically. + if (F.hasLocalLinkage() && F.hasOneUse() && &F == CS.getCalledFunction()) + Cost += InlineConstants::LastCallToStaticBonus; + + // If the instruction after the call, or if the normal destination of the + // invoke is an unreachable instruction, the function is noreturn. As such, + // there is little point in inlining this unless there is literally zero cost. + if (InvokeInst *II = dyn_cast<InvokeInst>(CS.getInstruction())) { + if (isa<UnreachableInst>(II->getNormalDest()->begin())) + Threshold = 1; + } else if (isa<UnreachableInst>(++BasicBlock::iterator(CS.getInstruction()))) + Threshold = 1; + + // If this function uses the coldcc calling convention, prefer not to inline + // it. + if (F.getCallingConv() == CallingConv::Cold) + Cost += InlineConstants::ColdccPenalty; + + // Check if we're done. This can happen due to bonuses and penalties. + if (Cost > Threshold) + return false; + } - // If we haven't calculated this information yet, do so now. - if (CalleeFI->Metrics.NumBlocks == 0) - CalleeFI->analyzeFunction(Callee, TD); + if (F.empty()) + return true; - // If we should never inline this, return a huge cost. - if (CalleeFI->NeverInline()) - return InlineCost::getNever(); + // Track whether we've seen a return instruction. The first return + // instruction is free, as at least one will usually disappear in inlining. + bool HasReturn = false; + + // Populate our simplified values by mapping from function arguments to call + // arguments with known important simplifications. + CallSite::arg_iterator CAI = CS.arg_begin(); + for (Function::arg_iterator FAI = F.arg_begin(), FAE = F.arg_end(); + FAI != FAE; ++FAI, ++CAI) { + assert(CAI != CS.arg_end()); + if (Constant *C = dyn_cast<Constant>(CAI)) + SimplifiedValues[FAI] = C; + + Value *PtrArg = *CAI; + if (ConstantInt *C = stripAndComputeInBoundsConstantOffsets(PtrArg)) { + ConstantOffsetPtrs[FAI] = std::make_pair(PtrArg, C->getValue()); + + // We can SROA any pointer arguments derived from alloca instructions. + if (isa<AllocaInst>(PtrArg)) { + SROAArgValues[FAI] = PtrArg; + SROAArgCosts[PtrArg] = 0; + } + } + } + NumConstantArgs = SimplifiedValues.size(); + NumConstantOffsetPtrArgs = ConstantOffsetPtrs.size(); + NumAllocaArgs = SROAArgValues.size(); + + // The worklist of live basic blocks in the callee *after* inlining. We avoid + // adding basic blocks of the callee which can be proven to be dead for this + // particular call site in order to get more accurate cost estimates. This + // requires a somewhat heavyweight iteration pattern: we need to walk the + // basic blocks in a breadth-first order as we insert live successors. To + // accomplish this, prioritizing for small iterations because we exit after + // crossing our threshold, we use a small-size optimized SetVector. + typedef SetVector<BasicBlock *, SmallVector<BasicBlock *, 16>, + SmallPtrSet<BasicBlock *, 16> > BBSetVector; + BBSetVector BBWorklist; + BBWorklist.insert(&F.getEntryBlock()); + // Note that we *must not* cache the size, this loop grows the worklist. + for (unsigned Idx = 0; Idx != BBWorklist.size(); ++Idx) { + // Bail out the moment we cross the threshold. This means we'll under-count + // the cost, but only when undercounting doesn't matter. + if (!AlwaysInline && Cost > (Threshold + VectorBonus)) + break; + + BasicBlock *BB = BBWorklist[Idx]; + if (BB->empty()) + continue; - // FIXME: It would be nice to kill off CalleeFI->NeverInline. Then we - // could move this up and avoid computing the FunctionInfo for - // things we are going to just return always inline for. This - // requires handling setjmp somewhere else, however. - if (!Callee->isDeclaration() && Callee->hasFnAttr(Attribute::AlwaysInline)) - return InlineCost::getAlways(); + // Handle the terminator cost here where we can track returns and other + // function-wide constructs. + TerminatorInst *TI = BB->getTerminator(); + + // We never want to inline functions that contain an indirectbr. This is + // incorrect because all the blockaddress's (in static global initializers + // for example) would be referring to the original function, and this indirect + // jump would jump from the inlined copy of the function into the original + // function which is extremely undefined behavior. + // FIXME: This logic isn't really right; we can safely inline functions + // with indirectbr's as long as no other function or global references the + // blockaddress of a block within the current function. And as a QOI issue, + // if someone is using a blockaddress without an indirectbr, and that + // reference somehow ends up in another function or global, we probably + // don't want to inline this function. + if (isa<IndirectBrInst>(TI)) + return false; - if (CalleeFI->Metrics.usesDynamicAlloca) { - // Get information about the caller. - FunctionInfo &CallerFI = CachedFunctionInfo[Caller]; + if (!HasReturn && isa<ReturnInst>(TI)) + HasReturn = true; + else + Cost += InlineConstants::InstrCost; - // If we haven't calculated this information yet, do so now. - if (CallerFI.Metrics.NumBlocks == 0) { - CallerFI.analyzeFunction(Caller, TD); + // Analyze the cost of this block. If we blow through the threshold, this + // returns false, and we can bail on out. + if (!analyzeBlock(BB)) { + if (IsRecursive || ExposesReturnsTwice || HasDynamicAlloca) + return false; + break; + } - // Recompute the CalleeFI pointer, getting Caller could have invalidated - // it. - CalleeFI = &CachedFunctionInfo[Callee]; + // Add in the live successors by first checking whether we have terminator + // that may be simplified based on the values simplified by this call. + if (BranchInst *BI = dyn_cast<BranchInst>(TI)) { + if (BI->isConditional()) { + Value *Cond = BI->getCondition(); + if (ConstantInt *SimpleCond + = dyn_cast_or_null<ConstantInt>(SimplifiedValues.lookup(Cond))) { + BBWorklist.insert(BI->getSuccessor(SimpleCond->isZero() ? 1 : 0)); + continue; + } + } + } else if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) { + Value *Cond = SI->getCondition(); + if (ConstantInt *SimpleCond + = dyn_cast_or_null<ConstantInt>(SimplifiedValues.lookup(Cond))) { + BBWorklist.insert(SI->findCaseValue(SimpleCond).getCaseSuccessor()); + continue; + } } - // Don't inline a callee with dynamic alloca into a caller without them. - // Functions containing dynamic alloca's are inefficient in various ways; - // don't create more inefficiency. - if (!CallerFI.Metrics.usesDynamicAlloca) - return InlineCost::getNever(); + // If we're unable to select a particular successor, just count all of + // them. + for (unsigned TIdx = 0, TSize = TI->getNumSuccessors(); TIdx != TSize; ++TIdx) + BBWorklist.insert(TI->getSuccessor(TIdx)); + + // If we had any successors at this point, than post-inlining is likely to + // have them as well. Note that we assume any basic blocks which existed + // due to branches or switches which folded above will also fold after + // inlining. + if (SingleBB && TI->getNumSuccessors() > 1) { + // Take off the bonus we applied to the threshold. + Threshold -= SingleBBBonus; + SingleBB = false; + } } - // InlineCost - This value measures how good of an inline candidate this call - // site is to inline. A lower inline cost make is more likely for the call to - // be inlined. This value may go negative due to the fact that bonuses - // are negative numbers. - // - int InlineCost = getInlineSize(CS, Callee) + getInlineBonuses(CS, Callee); - return llvm::InlineCost::get(InlineCost); -} + Threshold += VectorBonus; -// getInlineFudgeFactor - Return a > 1.0 factor if the inliner should use a -// higher threshold to determine if the function call should be inlined. -float InlineCostAnalyzer::getInlineFudgeFactor(CallSite CS) { - Function *Callee = CS.getCalledFunction(); - - // Get information about the callee. - FunctionInfo &CalleeFI = CachedFunctionInfo[Callee]; - - // If we haven't calculated this information yet, do so now. - if (CalleeFI.Metrics.NumBlocks == 0) - CalleeFI.analyzeFunction(Callee, TD); - - float Factor = 1.0f; - // Single BB functions are often written to be inlined. - if (CalleeFI.Metrics.NumBlocks == 1) - Factor += 0.5f; - - // Be more aggressive if the function contains a good chunk (if it mades up - // at least 10% of the instructions) of vector instructions. - if (CalleeFI.Metrics.NumVectorInsts > CalleeFI.Metrics.NumInsts/2) - Factor += 2.0f; - else if (CalleeFI.Metrics.NumVectorInsts > CalleeFI.Metrics.NumInsts/10) - Factor += 1.5f; - return Factor; + return AlwaysInline || Cost < Threshold; } -/// growCachedCostInfo - update the cached cost info for Caller after Callee has -/// been inlined. -void -InlineCostAnalyzer::growCachedCostInfo(Function *Caller, Function *Callee) { - CodeMetrics &CallerMetrics = CachedFunctionInfo[Caller].Metrics; +/// \brief Dump stats about this call's analysis. +void CallAnalyzer::dump() { +#define DEBUG_PRINT_STAT(x) llvm::dbgs() << " " #x ": " << x << "\n" + DEBUG_PRINT_STAT(NumConstantArgs); + DEBUG_PRINT_STAT(NumConstantOffsetPtrArgs); + DEBUG_PRINT_STAT(NumAllocaArgs); + DEBUG_PRINT_STAT(NumConstantPtrCmps); + DEBUG_PRINT_STAT(NumConstantPtrDiffs); + DEBUG_PRINT_STAT(NumInstructionsSimplified); + DEBUG_PRINT_STAT(SROACostSavings); + DEBUG_PRINT_STAT(SROACostSavingsLost); +#undef DEBUG_PRINT_STAT +} - // For small functions we prefer to recalculate the cost for better accuracy. - if (CallerMetrics.NumBlocks < 10 && CallerMetrics.NumInsts < 1000) { - resetCachedCostInfo(Caller); - return; - } +InlineCost InlineCostAnalyzer::getInlineCost(CallSite CS, int Threshold) { + return getInlineCost(CS, CS.getCalledFunction(), Threshold); +} - // For large functions, we can save a lot of computation time by skipping - // recalculations. - if (CallerMetrics.NumCalls > 0) - --CallerMetrics.NumCalls; +InlineCost InlineCostAnalyzer::getInlineCost(CallSite CS, Function *Callee, + int Threshold) { + // Don't inline functions which can be redefined at link-time to mean + // something else. Don't inline functions marked noinline or call sites + // marked noinline. + if (!Callee || Callee->mayBeOverridden() || + Callee->hasFnAttr(Attribute::NoInline) || CS.isNoInline()) + return llvm::InlineCost::getNever(); - if (Callee == 0) return; + DEBUG(llvm::dbgs() << " Analyzing call of " << Callee->getName() << "...\n"); - CodeMetrics &CalleeMetrics = CachedFunctionInfo[Callee].Metrics; + CallAnalyzer CA(TD, *Callee, Threshold); + bool ShouldInline = CA.analyzeCall(CS); - // If we don't have metrics for the callee, don't recalculate them just to - // update an approximation in the caller. Instead, just recalculate the - // caller info from scratch. - if (CalleeMetrics.NumBlocks == 0) { - resetCachedCostInfo(Caller); - return; - } + DEBUG(CA.dump()); - // Since CalleeMetrics were already calculated, we know that the CallerMetrics - // reference isn't invalidated: both were in the DenseMap. - CallerMetrics.usesDynamicAlloca |= CalleeMetrics.usesDynamicAlloca; - - // FIXME: If any of these three are true for the callee, the callee was - // not inlined into the caller, so I think they're redundant here. - CallerMetrics.exposesReturnsTwice |= CalleeMetrics.exposesReturnsTwice; - CallerMetrics.isRecursive |= CalleeMetrics.isRecursive; - CallerMetrics.containsIndirectBr |= CalleeMetrics.containsIndirectBr; - - CallerMetrics.NumInsts += CalleeMetrics.NumInsts; - CallerMetrics.NumBlocks += CalleeMetrics.NumBlocks; - CallerMetrics.NumCalls += CalleeMetrics.NumCalls; - CallerMetrics.NumVectorInsts += CalleeMetrics.NumVectorInsts; - CallerMetrics.NumRets += CalleeMetrics.NumRets; - - // analyzeBasicBlock counts each function argument as an inst. - if (CallerMetrics.NumInsts >= Callee->arg_size()) - CallerMetrics.NumInsts -= Callee->arg_size(); - else - CallerMetrics.NumInsts = 0; - - // We are not updating the argument weights. We have already determined that - // Caller is a fairly large function, so we accept the loss of precision. -} + // Check if there was a reason to force inlining or no inlining. + if (!ShouldInline && CA.getCost() < CA.getThreshold()) + return InlineCost::getNever(); + if (ShouldInline && CA.getCost() >= CA.getThreshold()) + return InlineCost::getAlways(); -/// clear - empty the cache of inline costs -void InlineCostAnalyzer::clear() { - CachedFunctionInfo.clear(); + return llvm::InlineCost::get(CA.getCost(), CA.getThreshold()); } diff --git a/lib/Analysis/InstructionSimplify.cpp b/lib/Analysis/InstructionSimplify.cpp index 72e33d1..16e7a72 100644 --- a/lib/Analysis/InstructionSimplify.cpp +++ b/lib/Analysis/InstructionSimplify.cpp @@ -21,6 +21,7 @@ #include "llvm/GlobalAlias.h" #include "llvm/Operator.h" #include "llvm/ADT/Statistic.h" +#include "llvm/ADT/SetVector.h" #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/ConstantFolding.h" @@ -709,7 +710,7 @@ static Constant *stripAndComputeConstantOffsets(const TargetData &TD, Visited.insert(V); do { if (GEPOperator *GEP = dyn_cast<GEPOperator>(V)) { - if (!accumulateGEPOffset(TD, GEP, Offset)) + if (!GEP->isInBounds() || !accumulateGEPOffset(TD, GEP, Offset)) break; V = GEP->getPointerOperand(); } else if (Operator::getOpcode(V) == Instruction::BitCast) { @@ -1590,6 +1591,45 @@ static Value *ExtractEquivalentCondition(Value *V, CmpInst::Predicate Pred, return 0; } +static Constant *computePointerICmp(const TargetData &TD, + CmpInst::Predicate Pred, + Value *LHS, Value *RHS) { + // We can only fold certain predicates on pointer comparisons. + switch (Pred) { + default: + return 0; + + // Equality comaprisons are easy to fold. + case CmpInst::ICMP_EQ: + case CmpInst::ICMP_NE: + break; + + // We can only handle unsigned relational comparisons because 'inbounds' on + // a GEP only protects against unsigned wrapping. + case CmpInst::ICMP_UGT: + case CmpInst::ICMP_UGE: + case CmpInst::ICMP_ULT: + case CmpInst::ICMP_ULE: + // However, we have to switch them to their signed variants to handle + // negative indices from the base pointer. + Pred = ICmpInst::getSignedPredicate(Pred); + break; + } + + Constant *LHSOffset = stripAndComputeConstantOffsets(TD, LHS); + if (!LHSOffset) + return 0; + Constant *RHSOffset = stripAndComputeConstantOffsets(TD, RHS); + if (!RHSOffset) + return 0; + + // If LHS and RHS are not related via constant offsets to the same base + // value, there is nothing we can do here. + if (LHS != RHS) + return 0; + + return ConstantExpr::getICmp(Pred, LHSOffset, RHSOffset); +} /// SimplifyICmpInst - Given operands for an ICmpInst, see if we can /// fold the result. If not, this returns null. @@ -2310,7 +2350,12 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS, return getFalse(ITy); } - // Simplify comparisons of GEPs. + // Simplify comparisons of related pointers using a powerful, recursive + // GEP-walk when we have target data available.. + if (Q.TD && LHS->getType()->isPointerTy() && RHS->getType()->isPointerTy()) + if (Constant *C = computePointerICmp(*Q.TD, Pred, LHS, RHS)) + return C; + if (GetElementPtrInst *GLHS = dyn_cast<GetElementPtrInst>(LHS)) { if (GEPOperator *GRHS = dyn_cast<GEPOperator>(RHS)) { if (GLHS->getPointerOperand() == GRHS->getPointerOperand() && @@ -2818,58 +2863,84 @@ Value *llvm::SimplifyInstruction(Instruction *I, const TargetData *TD, return Result == I ? UndefValue::get(I->getType()) : Result; } -/// ReplaceAndSimplifyAllUses - Perform From->replaceAllUsesWith(To) and then -/// delete the From instruction. In addition to a basic RAUW, this does a -/// recursive simplification of the newly formed instructions. This catches -/// things where one simplification exposes other opportunities. This only -/// simplifies and deletes scalar operations, it does not change the CFG. +/// \brief Implementation of recursive simplification through an instructions +/// uses. /// -void llvm::ReplaceAndSimplifyAllUses(Instruction *From, Value *To, - const TargetData *TD, - const TargetLibraryInfo *TLI, - const DominatorTree *DT) { - assert(From != To && "ReplaceAndSimplifyAllUses(X,X) is not valid!"); - - // FromHandle/ToHandle - This keeps a WeakVH on the from/to values so that - // we can know if it gets deleted out from under us or replaced in a - // recursive simplification. - WeakVH FromHandle(From); - WeakVH ToHandle(To); - - while (!From->use_empty()) { - // Update the instruction to use the new value. - Use &TheUse = From->use_begin().getUse(); - Instruction *User = cast<Instruction>(TheUse.getUser()); - TheUse = To; - - // Check to see if the instruction can be folded due to the operand - // replacement. For example changing (or X, Y) into (or X, -1) can replace - // the 'or' with -1. - Value *SimplifiedVal; - { - // Sanity check to make sure 'User' doesn't dangle across - // SimplifyInstruction. - AssertingVH<> UserHandle(User); - - SimplifiedVal = SimplifyInstruction(User, TD, TLI, DT); - if (SimplifiedVal == 0) continue; - } +/// This is the common implementation of the recursive simplification routines. +/// If we have a pre-simplified value in 'SimpleV', that is forcibly used to +/// replace the instruction 'I'. Otherwise, we simply add 'I' to the list of +/// instructions to process and attempt to simplify it using +/// InstructionSimplify. +/// +/// This routine returns 'true' only when *it* simplifies something. The passed +/// in simplified value does not count toward this. +static bool replaceAndRecursivelySimplifyImpl(Instruction *I, Value *SimpleV, + const TargetData *TD, + const TargetLibraryInfo *TLI, + const DominatorTree *DT) { + bool Simplified = false; + SmallSetVector<Instruction *, 8> Worklist; + + // If we have an explicit value to collapse to, do that round of the + // simplification loop by hand initially. + if (SimpleV) { + for (Value::use_iterator UI = I->use_begin(), UE = I->use_end(); UI != UE; + ++UI) + if (*UI != I) + Worklist.insert(cast<Instruction>(*UI)); + + // Replace the instruction with its simplified value. + I->replaceAllUsesWith(SimpleV); + + // Gracefully handle edge cases where the instruction is not wired into any + // parent block. + if (I->getParent()) + I->eraseFromParent(); + } else { + Worklist.insert(I); + } + + // Note that we must test the size on each iteration, the worklist can grow. + for (unsigned Idx = 0; Idx != Worklist.size(); ++Idx) { + I = Worklist[Idx]; + + // See if this instruction simplifies. + SimpleV = SimplifyInstruction(I, TD, TLI, DT); + if (!SimpleV) + continue; + + Simplified = true; - // Recursively simplify this user to the new value. - ReplaceAndSimplifyAllUses(User, SimplifiedVal, TD, TLI, DT); - From = dyn_cast_or_null<Instruction>((Value*)FromHandle); - To = ToHandle; + // Stash away all the uses of the old instruction so we can check them for + // recursive simplifications after a RAUW. This is cheaper than checking all + // uses of To on the recursive step in most cases. + for (Value::use_iterator UI = I->use_begin(), UE = I->use_end(); UI != UE; + ++UI) + Worklist.insert(cast<Instruction>(*UI)); - assert(ToHandle && "To value deleted by recursive simplification?"); + // Replace the instruction with its simplified value. + I->replaceAllUsesWith(SimpleV); - // If the recursive simplification ended up revisiting and deleting - // 'From' then we're done. - if (From == 0) - return; + // Gracefully handle edge cases where the instruction is not wired into any + // parent block. + if (I->getParent()) + I->eraseFromParent(); } + return Simplified; +} - // If 'From' has value handles referring to it, do a real RAUW to update them. - From->replaceAllUsesWith(To); +bool llvm::recursivelySimplifyInstruction(Instruction *I, + const TargetData *TD, + const TargetLibraryInfo *TLI, + const DominatorTree *DT) { + return replaceAndRecursivelySimplifyImpl(I, 0, TD, TLI, DT); +} - From->eraseFromParent(); +bool llvm::replaceAndRecursivelySimplify(Instruction *I, Value *SimpleV, + const TargetData *TD, + const TargetLibraryInfo *TLI, + const DominatorTree *DT) { + assert(I != SimpleV && "replaceAndRecursivelySimplify(X,X) is not valid!"); + assert(SimpleV && "Must provide a simplified value."); + return replaceAndRecursivelySimplifyImpl(I, SimpleV, TD, TLI, DT); } diff --git a/lib/Analysis/Lint.cpp b/lib/Analysis/Lint.cpp index 971065f..83bdf52 100644 --- a/lib/Analysis/Lint.cpp +++ b/lib/Analysis/Lint.cpp @@ -416,9 +416,8 @@ void Lint::visitMemoryReference(Instruction &I, if (Align != 0) { unsigned BitWidth = TD->getTypeSizeInBits(Ptr->getType()); - APInt Mask = APInt::getAllOnesValue(BitWidth), - KnownZero(BitWidth, 0), KnownOne(BitWidth, 0); - ComputeMaskedBits(Ptr, Mask, KnownZero, KnownOne, TD); + APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0); + ComputeMaskedBits(Ptr, KnownZero, KnownOne, TD); Assert1(!(KnownOne & APInt::getLowBitsSet(BitWidth, Log2_32(Align))), "Undefined behavior: Memory reference address is misaligned", &I); } @@ -476,9 +475,8 @@ static bool isZero(Value *V, TargetData *TD) { if (isa<UndefValue>(V)) return true; unsigned BitWidth = cast<IntegerType>(V->getType())->getBitWidth(); - APInt Mask = APInt::getAllOnesValue(BitWidth), - KnownZero(BitWidth, 0), KnownOne(BitWidth, 0); - ComputeMaskedBits(V, Mask, KnownZero, KnownOne, TD); + APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0); + ComputeMaskedBits(V, KnownZero, KnownOne, TD); return KnownZero.isAllOnesValue(); } diff --git a/lib/Analysis/LoopInfo.cpp b/lib/Analysis/LoopInfo.cpp index 858cc64..f7a60a1 100644 --- a/lib/Analysis/LoopInfo.cpp +++ b/lib/Analysis/LoopInfo.cpp @@ -205,6 +205,17 @@ bool Loop::isLoopSimplifyForm() const { return getLoopPreheader() && getLoopLatch() && hasDedicatedExits(); } +/// isSafeToClone - Return true if the loop body is safe to clone in practice. +/// Routines that reform the loop CFG and split edges often fail on indirectbr. +bool Loop::isSafeToClone() const { + // Return false if any loop blocks contain indirectbrs. + for (Loop::block_iterator I = block_begin(), E = block_end(); I != E; ++I) { + if (isa<IndirectBrInst>((*I)->getTerminator())) + return false; + } + return true; +} + /// hasDedicatedExits - Return true if no exit block for the loop /// has a predecessor that is outside the loop. bool Loop::hasDedicatedExits() const { diff --git a/lib/Analysis/LoopPass.cpp b/lib/Analysis/LoopPass.cpp index 5ba1f40..aba700a 100644 --- a/lib/Analysis/LoopPass.cpp +++ b/lib/Analysis/LoopPass.cpp @@ -14,10 +14,8 @@ //===----------------------------------------------------------------------===// #include "llvm/Analysis/LoopPass.h" -#include "llvm/DebugInfoProbe.h" #include "llvm/Assembly/PrintModulePass.h" #include "llvm/Support/Debug.h" -#include "llvm/Support/ManagedStatic.h" #include "llvm/Support/Timer.h" using namespace llvm; @@ -54,20 +52,6 @@ char PrintLoopPass::ID = 0; } //===----------------------------------------------------------------------===// -// DebugInfoProbe - -static DebugInfoProbeInfo *TheDebugProbe; -static void createDebugInfoProbe() { - if (TheDebugProbe) return; - - // Constructed the first time this is called. This guarantees that the - // object will be constructed, if -enable-debug-info-probe is set, - // before static globals, thus it will be destroyed before them. - static ManagedStatic<DebugInfoProbeInfo> DIP; - TheDebugProbe = &*DIP; -} - -//===----------------------------------------------------------------------===// // LPPassManager // @@ -195,7 +179,6 @@ void LPPassManager::getAnalysisUsage(AnalysisUsage &Info) const { bool LPPassManager::runOnFunction(Function &F) { LI = &getAnalysis<LoopInfo>(); bool Changed = false; - createDebugInfoProbe(); // Collect inherited analysis from Module level pass manager. populateInheritedAnalysis(TPM->activeStack); @@ -227,21 +210,19 @@ bool LPPassManager::runOnFunction(Function &F) { // Run all passes on the current Loop. for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) { LoopPass *P = getContainedPass(Index); + dumpPassInfo(P, EXECUTION_MSG, ON_LOOP_MSG, CurrentLoop->getHeader()->getName()); dumpRequiredSet(P); initializeAnalysisImpl(P); - if (TheDebugProbe) - TheDebugProbe->initialize(P, F); + { PassManagerPrettyStackEntry X(P, *CurrentLoop->getHeader()); TimeRegion PassTimer(getPassTimer(P)); Changed |= P->runOnLoop(CurrentLoop, *this); } - if (TheDebugProbe) - TheDebugProbe->finalize(P, F); if (Changed) dumpPassInfo(P, MODIFICATION_MSG, ON_LOOP_MSG, diff --git a/lib/Analysis/ScalarEvolution.cpp b/lib/Analysis/ScalarEvolution.cpp index 0c0ceeb..205227c 100644 --- a/lib/Analysis/ScalarEvolution.cpp +++ b/lib/Analysis/ScalarEvolution.cpp @@ -3261,9 +3261,8 @@ ScalarEvolution::GetMinTrailingZeros(const SCEV *S) { if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(S)) { // For a SCEVUnknown, ask ValueTracking. unsigned BitWidth = getTypeSizeInBits(U->getType()); - APInt Mask = APInt::getAllOnesValue(BitWidth); APInt Zeros(BitWidth, 0), Ones(BitWidth, 0); - ComputeMaskedBits(U->getValue(), Mask, Zeros, Ones); + ComputeMaskedBits(U->getValue(), Zeros, Ones); return Zeros.countTrailingOnes(); } @@ -3401,9 +3400,8 @@ ScalarEvolution::getUnsignedRange(const SCEV *S) { if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(S)) { // For a SCEVUnknown, ask ValueTracking. - APInt Mask = APInt::getAllOnesValue(BitWidth); APInt Zeros(BitWidth, 0), Ones(BitWidth, 0); - ComputeMaskedBits(U->getValue(), Mask, Zeros, Ones, TD); + ComputeMaskedBits(U->getValue(), Zeros, Ones, TD); if (Ones == ~Zeros + 1) return setUnsignedRange(U, ConservativeResult); return setUnsignedRange(U, @@ -3660,9 +3658,8 @@ const SCEV *ScalarEvolution::createSCEV(Value *V) { // knew about to reconstruct a low-bits mask value. unsigned LZ = A.countLeadingZeros(); unsigned BitWidth = A.getBitWidth(); - APInt AllOnes = APInt::getAllOnesValue(BitWidth); APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0); - ComputeMaskedBits(U->getOperand(0), AllOnes, KnownZero, KnownOne, TD); + ComputeMaskedBits(U->getOperand(0), KnownZero, KnownOne, TD); APInt EffectiveMask = APInt::getLowBitsSet(BitWidth, BitWidth - LZ); @@ -4619,6 +4616,10 @@ ScalarEvolution::ComputeLoadConstantCompareExitLimit( Indexes.push_back(0); } + // Loop-invariant loads may be a byproduct of loop optimization. Skip them. + if (!VarIdx) + return getCouldNotCompute(); + // Okay, we know we have a (load (gep GV, 0, X)) comparison with a constant. // Check to see if X is a loop variant variable value now. const SCEV *Idx = getSCEV(VarIdx); @@ -6845,7 +6846,7 @@ ScalarEvolution::computeBlockDisposition(const SCEV *S, const BasicBlock *BB) { return ProperlyDominatesBlock; case scCouldNotCompute: llvm_unreachable("Attempt to use a SCEVCouldNotCompute object!"); - default: + default: llvm_unreachable("Unknown SCEV kind!"); } } diff --git a/lib/Analysis/ValueTracking.cpp b/lib/Analysis/ValueTracking.cpp index 01e00ca..a430f62 100644 --- a/lib/Analysis/ValueTracking.cpp +++ b/lib/Analysis/ValueTracking.cpp @@ -20,8 +20,10 @@ #include "llvm/GlobalAlias.h" #include "llvm/IntrinsicInst.h" #include "llvm/LLVMContext.h" +#include "llvm/Metadata.h" #include "llvm/Operator.h" #include "llvm/Target/TargetData.h" +#include "llvm/Support/ConstantRange.h" #include "llvm/Support/GetElementPtrTypeIterator.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/PatternMatch.h" @@ -42,7 +44,6 @@ static unsigned getBitWidth(Type *Ty, const TargetData *TD) { } static void ComputeMaskedBitsAddSub(bool Add, Value *Op0, Value *Op1, bool NSW, - const APInt &Mask, APInt &KnownZero, APInt &KnownOne, APInt &KnownZero2, APInt &KnownOne2, const TargetData *TD, unsigned Depth) { @@ -52,11 +53,11 @@ static void ComputeMaskedBitsAddSub(bool Add, Value *Op0, Value *Op1, bool NSW, // than C (i.e. no wrap-around can happen). For example, 20-X is // positive if we can prove that X is >= 0 and < 16. if (!CLHS->getValue().isNegative()) { - unsigned BitWidth = Mask.getBitWidth(); + unsigned BitWidth = KnownZero.getBitWidth(); unsigned NLZ = (CLHS->getValue()+1).countLeadingZeros(); // NLZ can't be BitWidth with no sign bit APInt MaskV = APInt::getHighBitsSet(BitWidth, NLZ+1); - llvm::ComputeMaskedBits(Op1, MaskV, KnownZero2, KnownOne2, TD, Depth+1); + llvm::ComputeMaskedBits(Op1, KnownZero2, KnownOne2, TD, Depth+1); // If all of the MaskV bits are known to be zero, then we know the // output top bits are zero, because we now know that the output is @@ -64,27 +65,25 @@ static void ComputeMaskedBitsAddSub(bool Add, Value *Op0, Value *Op1, bool NSW, if ((KnownZero2 & MaskV) == MaskV) { unsigned NLZ2 = CLHS->getValue().countLeadingZeros(); // Top bits known zero. - KnownZero = APInt::getHighBitsSet(BitWidth, NLZ2) & Mask; + KnownZero = APInt::getHighBitsSet(BitWidth, NLZ2); } } } } - unsigned BitWidth = Mask.getBitWidth(); + unsigned BitWidth = KnownZero.getBitWidth(); // If one of the operands has trailing zeros, then the bits that the // other operand has in those bit positions will be preserved in the // result. For an add, this works with either operand. For a subtract, // this only works if the known zeros are in the right operand. APInt LHSKnownZero(BitWidth, 0), LHSKnownOne(BitWidth, 0); - APInt Mask2 = APInt::getLowBitsSet(BitWidth, - BitWidth - Mask.countLeadingZeros()); - llvm::ComputeMaskedBits(Op0, Mask2, LHSKnownZero, LHSKnownOne, TD, Depth+1); + llvm::ComputeMaskedBits(Op0, LHSKnownZero, LHSKnownOne, TD, Depth+1); assert((LHSKnownZero & LHSKnownOne) == 0 && "Bits known to be one AND zero?"); unsigned LHSKnownZeroOut = LHSKnownZero.countTrailingOnes(); - llvm::ComputeMaskedBits(Op1, Mask2, KnownZero2, KnownOne2, TD, Depth+1); + llvm::ComputeMaskedBits(Op1, KnownZero2, KnownOne2, TD, Depth+1); assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); unsigned RHSKnownZeroOut = KnownZero2.countTrailingOnes(); @@ -109,7 +108,7 @@ static void ComputeMaskedBitsAddSub(bool Add, Value *Op0, Value *Op1, bool NSW, } // Are we still trying to solve for the sign bit? - if (Mask.isNegative() && !KnownZero.isNegative() && !KnownOne.isNegative()) { + if (!KnownZero.isNegative() && !KnownOne.isNegative()) { if (NSW) { if (Add) { // Adding two positive numbers can't wrap into negative @@ -131,21 +130,19 @@ static void ComputeMaskedBitsAddSub(bool Add, Value *Op0, Value *Op1, bool NSW, } static void ComputeMaskedBitsMul(Value *Op0, Value *Op1, bool NSW, - const APInt &Mask, APInt &KnownZero, APInt &KnownOne, APInt &KnownZero2, APInt &KnownOne2, const TargetData *TD, unsigned Depth) { - unsigned BitWidth = Mask.getBitWidth(); - APInt Mask2 = APInt::getAllOnesValue(BitWidth); - ComputeMaskedBits(Op1, Mask2, KnownZero, KnownOne, TD, Depth+1); - ComputeMaskedBits(Op0, Mask2, KnownZero2, KnownOne2, TD, Depth+1); + unsigned BitWidth = KnownZero.getBitWidth(); + ComputeMaskedBits(Op1, KnownZero, KnownOne, TD, Depth+1); + ComputeMaskedBits(Op0, KnownZero2, KnownOne2, TD, Depth+1); assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); bool isKnownNegative = false; bool isKnownNonNegative = false; // If the multiplication is known not to overflow, compute the sign bit. - if (Mask.isNegative() && NSW) { + if (NSW) { if (Op0 == Op1) { // The product of a number with itself is non-negative. isKnownNonNegative = true; @@ -182,7 +179,6 @@ static void ComputeMaskedBitsMul(Value *Op0, Value *Op1, bool NSW, LeadZ = std::min(LeadZ, BitWidth); KnownZero = APInt::getLowBitsSet(BitWidth, TrailZ) | APInt::getHighBitsSet(BitWidth, LeadZ); - KnownZero &= Mask; // Only make use of no-wrap flags if we failed to compute the sign bit // directly. This matters if the multiplication always overflows, in @@ -195,10 +191,28 @@ static void ComputeMaskedBitsMul(Value *Op0, Value *Op1, bool NSW, KnownOne.setBit(BitWidth - 1); } -/// ComputeMaskedBits - Determine which of the bits specified in Mask are -/// known to be either zero or one and return them in the KnownZero/KnownOne -/// bit sets. This code only analyzes bits in Mask, in order to short-circuit -/// processing. +void llvm::computeMaskedBitsLoad(const MDNode &Ranges, APInt &KnownZero) { + unsigned BitWidth = KnownZero.getBitWidth(); + unsigned NumRanges = Ranges.getNumOperands() / 2; + assert(NumRanges >= 1); + + // Use the high end of the ranges to find leading zeros. + unsigned MinLeadingZeros = BitWidth; + for (unsigned i = 0; i < NumRanges; ++i) { + ConstantInt *Lower = cast<ConstantInt>(Ranges.getOperand(2*i + 0)); + ConstantInt *Upper = cast<ConstantInt>(Ranges.getOperand(2*i + 1)); + ConstantRange Range(Lower->getValue(), Upper->getValue()); + if (Range.isWrappedSet()) + MinLeadingZeros = 0; // -1 has no zeros + unsigned LeadingZeros = (Upper->getValue() - 1).countLeadingZeros(); + MinLeadingZeros = std::min(LeadingZeros, MinLeadingZeros); + } + + KnownZero = APInt::getHighBitsSet(BitWidth, MinLeadingZeros); +} +/// ComputeMaskedBits - Determine which of the bits are known to be either zero +/// or one and return them in the KnownZero/KnownOne bit sets. +/// /// NOTE: we cannot consider 'undef' to be "IsZero" here. The problem is that /// we cannot optimize based on the assumption that it is zero without changing /// it to be an explicit zero. If we don't change it to zero, other code could @@ -208,15 +222,15 @@ static void ComputeMaskedBitsMul(Value *Op0, Value *Op1, bool NSW, /// /// This function is defined on values with integer type, values with pointer /// type (but only if TD is non-null), and vectors of integers. In the case -/// where V is a vector, the mask, known zero, and known one values are the +/// where V is a vector, known zero, and known one values are the /// same width as the vector element, and the bit is set only if it is true /// for all of the elements in the vector. -void llvm::ComputeMaskedBits(Value *V, const APInt &Mask, - APInt &KnownZero, APInt &KnownOne, +void llvm::ComputeMaskedBits(Value *V, APInt &KnownZero, APInt &KnownOne, const TargetData *TD, unsigned Depth) { assert(V && "No Value?"); assert(Depth <= MaxDepth && "Limit Search Depth"); - unsigned BitWidth = Mask.getBitWidth(); + unsigned BitWidth = KnownZero.getBitWidth(); + assert((V->getType()->isIntOrIntVectorTy() || V->getType()->getScalarType()->isPointerTy()) && "Not integer or pointer type!"); @@ -230,15 +244,15 @@ void llvm::ComputeMaskedBits(Value *V, const APInt &Mask, if (ConstantInt *CI = dyn_cast<ConstantInt>(V)) { // We know all of the bits for a constant! - KnownOne = CI->getValue() & Mask; - KnownZero = ~KnownOne & Mask; + KnownOne = CI->getValue(); + KnownZero = ~KnownOne; return; } // Null and aggregate-zero are all-zeros. if (isa<ConstantPointerNull>(V) || isa<ConstantAggregateZero>(V)) { KnownOne.clearAllBits(); - KnownZero = Mask; + KnownZero = APInt::getAllOnesValue(BitWidth); return; } // Handle a constant vector by taking the intersection of the known bits of @@ -275,8 +289,8 @@ void llvm::ComputeMaskedBits(Value *V, const APInt &Mask, } } if (Align > 0) - KnownZero = Mask & APInt::getLowBitsSet(BitWidth, - CountTrailingZeros_32(Align)); + KnownZero = APInt::getLowBitsSet(BitWidth, + CountTrailingZeros_32(Align)); else KnownZero.clearAllBits(); KnownOne.clearAllBits(); @@ -288,8 +302,7 @@ void llvm::ComputeMaskedBits(Value *V, const APInt &Mask, if (GA->mayBeOverridden()) { KnownZero.clearAllBits(); KnownOne.clearAllBits(); } else { - ComputeMaskedBits(GA->getAliasee(), Mask, KnownZero, KnownOne, - TD, Depth+1); + ComputeMaskedBits(GA->getAliasee(), KnownZero, KnownOne, TD, Depth+1); } return; } @@ -298,15 +311,15 @@ void llvm::ComputeMaskedBits(Value *V, const APInt &Mask, // Get alignment information off byval arguments if specified in the IR. if (A->hasByValAttr()) if (unsigned Align = A->getParamAlignment()) - KnownZero = Mask & APInt::getLowBitsSet(BitWidth, - CountTrailingZeros_32(Align)); + KnownZero = APInt::getLowBitsSet(BitWidth, + CountTrailingZeros_32(Align)); return; } // Start out not knowing anything. KnownZero.clearAllBits(); KnownOne.clearAllBits(); - if (Depth == MaxDepth || Mask == 0) + if (Depth == MaxDepth) return; // Limit search depth. Operator *I = dyn_cast<Operator>(V); @@ -315,12 +328,14 @@ void llvm::ComputeMaskedBits(Value *V, const APInt &Mask, APInt KnownZero2(KnownZero), KnownOne2(KnownOne); switch (I->getOpcode()) { default: break; + case Instruction::Load: + if (MDNode *MD = cast<LoadInst>(I)->getMetadata(LLVMContext::MD_range)) + computeMaskedBitsLoad(*MD, KnownZero); + return; case Instruction::And: { // If either the LHS or the RHS are Zero, the result is zero. - ComputeMaskedBits(I->getOperand(1), Mask, KnownZero, KnownOne, TD, Depth+1); - APInt Mask2(Mask & ~KnownZero); - ComputeMaskedBits(I->getOperand(0), Mask2, KnownZero2, KnownOne2, TD, - Depth+1); + ComputeMaskedBits(I->getOperand(1), KnownZero, KnownOne, TD, Depth+1); + ComputeMaskedBits(I->getOperand(0), KnownZero2, KnownOne2, TD, Depth+1); assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); @@ -331,10 +346,8 @@ void llvm::ComputeMaskedBits(Value *V, const APInt &Mask, return; } case Instruction::Or: { - ComputeMaskedBits(I->getOperand(1), Mask, KnownZero, KnownOne, TD, Depth+1); - APInt Mask2(Mask & ~KnownOne); - ComputeMaskedBits(I->getOperand(0), Mask2, KnownZero2, KnownOne2, TD, - Depth+1); + ComputeMaskedBits(I->getOperand(1), KnownZero, KnownOne, TD, Depth+1); + ComputeMaskedBits(I->getOperand(0), KnownZero2, KnownOne2, TD, Depth+1); assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); @@ -345,9 +358,8 @@ void llvm::ComputeMaskedBits(Value *V, const APInt &Mask, return; } case Instruction::Xor: { - ComputeMaskedBits(I->getOperand(1), Mask, KnownZero, KnownOne, TD, Depth+1); - ComputeMaskedBits(I->getOperand(0), Mask, KnownZero2, KnownOne2, TD, - Depth+1); + ComputeMaskedBits(I->getOperand(1), KnownZero, KnownOne, TD, Depth+1); + ComputeMaskedBits(I->getOperand(0), KnownZero2, KnownOne2, TD, Depth+1); assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); @@ -361,34 +373,30 @@ void llvm::ComputeMaskedBits(Value *V, const APInt &Mask, case Instruction::Mul: { bool NSW = cast<OverflowingBinaryOperator>(I)->hasNoSignedWrap(); ComputeMaskedBitsMul(I->getOperand(0), I->getOperand(1), NSW, - Mask, KnownZero, KnownOne, KnownZero2, KnownOne2, - TD, Depth); + KnownZero, KnownOne, KnownZero2, KnownOne2, TD, Depth); break; } case Instruction::UDiv: { // For the purposes of computing leading zeros we can conservatively // treat a udiv as a logical right shift by the power of 2 known to // be less than the denominator. - APInt AllOnes = APInt::getAllOnesValue(BitWidth); - ComputeMaskedBits(I->getOperand(0), - AllOnes, KnownZero2, KnownOne2, TD, Depth+1); + ComputeMaskedBits(I->getOperand(0), KnownZero2, KnownOne2, TD, Depth+1); unsigned LeadZ = KnownZero2.countLeadingOnes(); KnownOne2.clearAllBits(); KnownZero2.clearAllBits(); - ComputeMaskedBits(I->getOperand(1), - AllOnes, KnownZero2, KnownOne2, TD, Depth+1); + ComputeMaskedBits(I->getOperand(1), KnownZero2, KnownOne2, TD, Depth+1); unsigned RHSUnknownLeadingOnes = KnownOne2.countLeadingZeros(); if (RHSUnknownLeadingOnes != BitWidth) LeadZ = std::min(BitWidth, LeadZ + BitWidth - RHSUnknownLeadingOnes - 1); - KnownZero = APInt::getHighBitsSet(BitWidth, LeadZ) & Mask; + KnownZero = APInt::getHighBitsSet(BitWidth, LeadZ); return; } case Instruction::Select: - ComputeMaskedBits(I->getOperand(2), Mask, KnownZero, KnownOne, TD, Depth+1); - ComputeMaskedBits(I->getOperand(1), Mask, KnownZero2, KnownOne2, TD, + ComputeMaskedBits(I->getOperand(2), KnownZero, KnownOne, TD, Depth+1); + ComputeMaskedBits(I->getOperand(1), KnownZero2, KnownOne2, TD, Depth+1); assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); @@ -421,11 +429,9 @@ void llvm::ComputeMaskedBits(Value *V, const APInt &Mask, else SrcBitWidth = SrcTy->getScalarSizeInBits(); - APInt MaskIn = Mask.zextOrTrunc(SrcBitWidth); KnownZero = KnownZero.zextOrTrunc(SrcBitWidth); KnownOne = KnownOne.zextOrTrunc(SrcBitWidth); - ComputeMaskedBits(I->getOperand(0), MaskIn, KnownZero, KnownOne, TD, - Depth+1); + ComputeMaskedBits(I->getOperand(0), KnownZero, KnownOne, TD, Depth+1); KnownZero = KnownZero.zextOrTrunc(BitWidth); KnownOne = KnownOne.zextOrTrunc(BitWidth); // Any top bits are known to be zero. @@ -439,8 +445,7 @@ void llvm::ComputeMaskedBits(Value *V, const APInt &Mask, // TODO: For now, not handling conversions like: // (bitcast i64 %x to <2 x i32>) !I->getType()->isVectorTy()) { - ComputeMaskedBits(I->getOperand(0), Mask, KnownZero, KnownOne, TD, - Depth+1); + ComputeMaskedBits(I->getOperand(0), KnownZero, KnownOne, TD, Depth+1); return; } break; @@ -449,11 +454,9 @@ void llvm::ComputeMaskedBits(Value *V, const APInt &Mask, // Compute the bits in the result that are not present in the input. unsigned SrcBitWidth = I->getOperand(0)->getType()->getScalarSizeInBits(); - APInt MaskIn = Mask.trunc(SrcBitWidth); KnownZero = KnownZero.trunc(SrcBitWidth); KnownOne = KnownOne.trunc(SrcBitWidth); - ComputeMaskedBits(I->getOperand(0), MaskIn, KnownZero, KnownOne, TD, - Depth+1); + ComputeMaskedBits(I->getOperand(0), KnownZero, KnownOne, TD, Depth+1); assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); KnownZero = KnownZero.zext(BitWidth); KnownOne = KnownOne.zext(BitWidth); @@ -470,9 +473,7 @@ void llvm::ComputeMaskedBits(Value *V, const APInt &Mask, // (shl X, C1) & C2 == 0 iff (X & C2 >>u C1) == 0 if (ConstantInt *SA = dyn_cast<ConstantInt>(I->getOperand(1))) { uint64_t ShiftAmt = SA->getLimitedValue(BitWidth); - APInt Mask2(Mask.lshr(ShiftAmt)); - ComputeMaskedBits(I->getOperand(0), Mask2, KnownZero, KnownOne, TD, - Depth+1); + ComputeMaskedBits(I->getOperand(0), KnownZero, KnownOne, TD, Depth+1); assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); KnownZero <<= ShiftAmt; KnownOne <<= ShiftAmt; @@ -487,9 +488,7 @@ void llvm::ComputeMaskedBits(Value *V, const APInt &Mask, uint64_t ShiftAmt = SA->getLimitedValue(BitWidth); // Unsigned shift right. - APInt Mask2(Mask.shl(ShiftAmt)); - ComputeMaskedBits(I->getOperand(0), Mask2, KnownZero,KnownOne, TD, - Depth+1); + ComputeMaskedBits(I->getOperand(0), KnownZero,KnownOne, TD, Depth+1); assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); KnownZero = APIntOps::lshr(KnownZero, ShiftAmt); KnownOne = APIntOps::lshr(KnownOne, ShiftAmt); @@ -505,9 +504,7 @@ void llvm::ComputeMaskedBits(Value *V, const APInt &Mask, uint64_t ShiftAmt = SA->getLimitedValue(BitWidth-1); // Signed shift right. - APInt Mask2(Mask.shl(ShiftAmt)); - ComputeMaskedBits(I->getOperand(0), Mask2, KnownZero, KnownOne, TD, - Depth+1); + ComputeMaskedBits(I->getOperand(0), KnownZero, KnownOne, TD, Depth+1); assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); KnownZero = APIntOps::lshr(KnownZero, ShiftAmt); KnownOne = APIntOps::lshr(KnownOne, ShiftAmt); @@ -523,15 +520,15 @@ void llvm::ComputeMaskedBits(Value *V, const APInt &Mask, case Instruction::Sub: { bool NSW = cast<OverflowingBinaryOperator>(I)->hasNoSignedWrap(); ComputeMaskedBitsAddSub(false, I->getOperand(0), I->getOperand(1), NSW, - Mask, KnownZero, KnownOne, KnownZero2, KnownOne2, - TD, Depth); + KnownZero, KnownOne, KnownZero2, KnownOne2, TD, + Depth); break; } case Instruction::Add: { bool NSW = cast<OverflowingBinaryOperator>(I)->hasNoSignedWrap(); ComputeMaskedBitsAddSub(true, I->getOperand(0), I->getOperand(1), NSW, - Mask, KnownZero, KnownOne, KnownZero2, KnownOne2, - TD, Depth); + KnownZero, KnownOne, KnownZero2, KnownOne2, TD, + Depth); break; } case Instruction::SRem: @@ -539,9 +536,7 @@ void llvm::ComputeMaskedBits(Value *V, const APInt &Mask, APInt RA = Rem->getValue().abs(); if (RA.isPowerOf2()) { APInt LowBits = RA - 1; - APInt Mask2 = LowBits | APInt::getSignBit(BitWidth); - ComputeMaskedBits(I->getOperand(0), Mask2, KnownZero2, KnownOne2, TD, - Depth+1); + ComputeMaskedBits(I->getOperand(0), KnownZero2, KnownOne2, TD, Depth+1); // The low bits of the first operand are unchanged by the srem. KnownZero = KnownZero2 & LowBits; @@ -557,19 +552,15 @@ void llvm::ComputeMaskedBits(Value *V, const APInt &Mask, if (KnownOne2[BitWidth-1] && ((KnownOne2 & LowBits) != 0)) KnownOne |= ~LowBits; - KnownZero &= Mask; - KnownOne &= Mask; - assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); } } // The sign bit is the LHS's sign bit, except when the result of the // remainder is zero. - if (Mask.isNegative() && KnownZero.isNonNegative()) { - APInt Mask2 = APInt::getSignBit(BitWidth); + if (KnownZero.isNonNegative()) { APInt LHSKnownZero(BitWidth, 0), LHSKnownOne(BitWidth, 0); - ComputeMaskedBits(I->getOperand(0), Mask2, LHSKnownZero, LHSKnownOne, TD, + ComputeMaskedBits(I->getOperand(0), LHSKnownZero, LHSKnownOne, TD, Depth+1); // If it's known zero, our sign bit is also zero. if (LHSKnownZero.isNegative()) @@ -582,27 +573,24 @@ void llvm::ComputeMaskedBits(Value *V, const APInt &Mask, APInt RA = Rem->getValue(); if (RA.isPowerOf2()) { APInt LowBits = (RA - 1); - APInt Mask2 = LowBits & Mask; - KnownZero |= ~LowBits & Mask; - ComputeMaskedBits(I->getOperand(0), Mask2, KnownZero, KnownOne, TD, + ComputeMaskedBits(I->getOperand(0), KnownZero, KnownOne, TD, Depth+1); assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); + KnownZero |= ~LowBits; + KnownOne &= LowBits; break; } } // Since the result is less than or equal to either operand, any leading // zero bits in either operand must also exist in the result. - APInt AllOnes = APInt::getAllOnesValue(BitWidth); - ComputeMaskedBits(I->getOperand(0), AllOnes, KnownZero, KnownOne, - TD, Depth+1); - ComputeMaskedBits(I->getOperand(1), AllOnes, KnownZero2, KnownOne2, - TD, Depth+1); + ComputeMaskedBits(I->getOperand(0), KnownZero, KnownOne, TD, Depth+1); + ComputeMaskedBits(I->getOperand(1), KnownZero2, KnownOne2, TD, Depth+1); unsigned Leaders = std::max(KnownZero.countLeadingOnes(), KnownZero2.countLeadingOnes()); KnownOne.clearAllBits(); - KnownZero = APInt::getHighBitsSet(BitWidth, Leaders) & Mask; + KnownZero = APInt::getHighBitsSet(BitWidth, Leaders); break; } @@ -613,17 +601,15 @@ void llvm::ComputeMaskedBits(Value *V, const APInt &Mask, Align = TD->getABITypeAlignment(AI->getType()->getElementType()); if (Align > 0) - KnownZero = Mask & APInt::getLowBitsSet(BitWidth, - CountTrailingZeros_32(Align)); + KnownZero = APInt::getLowBitsSet(BitWidth, CountTrailingZeros_32(Align)); break; } case Instruction::GetElementPtr: { // Analyze all of the subscripts of this getelementptr instruction // to determine if we can prove known low zero bits. - APInt LocalMask = APInt::getAllOnesValue(BitWidth); APInt LocalKnownZero(BitWidth, 0), LocalKnownOne(BitWidth, 0); - ComputeMaskedBits(I->getOperand(0), LocalMask, - LocalKnownZero, LocalKnownOne, TD, Depth+1); + ComputeMaskedBits(I->getOperand(0), LocalKnownZero, LocalKnownOne, TD, + Depth+1); unsigned TrailZ = LocalKnownZero.countTrailingOnes(); gep_type_iterator GTI = gep_type_begin(I); @@ -643,17 +629,15 @@ void llvm::ComputeMaskedBits(Value *V, const APInt &Mask, if (!IndexedTy->isSized()) return; unsigned GEPOpiBits = Index->getType()->getScalarSizeInBits(); uint64_t TypeSize = TD ? TD->getTypeAllocSize(IndexedTy) : 1; - LocalMask = APInt::getAllOnesValue(GEPOpiBits); LocalKnownZero = LocalKnownOne = APInt(GEPOpiBits, 0); - ComputeMaskedBits(Index, LocalMask, - LocalKnownZero, LocalKnownOne, TD, Depth+1); + ComputeMaskedBits(Index, LocalKnownZero, LocalKnownOne, TD, Depth+1); TrailZ = std::min(TrailZ, unsigned(CountTrailingZeros_64(TypeSize) + LocalKnownZero.countTrailingOnes())); } } - KnownZero = APInt::getLowBitsSet(BitWidth, TrailZ) & Mask; + KnownZero = APInt::getLowBitsSet(BitWidth, TrailZ); break; } case Instruction::PHI: { @@ -688,17 +672,13 @@ void llvm::ComputeMaskedBits(Value *V, const APInt &Mask, break; // Ok, we have a PHI of the form L op= R. Check for low // zero bits. - APInt Mask2 = APInt::getAllOnesValue(BitWidth); - ComputeMaskedBits(R, Mask2, KnownZero2, KnownOne2, TD, Depth+1); - Mask2 = APInt::getLowBitsSet(BitWidth, - KnownZero2.countTrailingOnes()); + ComputeMaskedBits(R, KnownZero2, KnownOne2, TD, Depth+1); // We need to take the minimum number of known bits APInt KnownZero3(KnownZero), KnownOne3(KnownOne); - ComputeMaskedBits(L, Mask2, KnownZero3, KnownOne3, TD, Depth+1); + ComputeMaskedBits(L, KnownZero3, KnownOne3, TD, Depth+1); - KnownZero = Mask & - APInt::getLowBitsSet(BitWidth, + KnownZero = APInt::getLowBitsSet(BitWidth, std::min(KnownZero2.countTrailingOnes(), KnownZero3.countTrailingOnes())); break; @@ -717,8 +697,8 @@ void llvm::ComputeMaskedBits(Value *V, const APInt &Mask, if (P->hasConstantValue() == P) break; - KnownZero = Mask; - KnownOne = Mask; + KnownZero = APInt::getAllOnesValue(BitWidth); + KnownOne = APInt::getAllOnesValue(BitWidth); for (unsigned i = 0, e = P->getNumIncomingValues(); i != e; ++i) { // Skip direct self references. if (P->getIncomingValue(i) == P) continue; @@ -727,8 +707,8 @@ void llvm::ComputeMaskedBits(Value *V, const APInt &Mask, KnownOne2 = APInt(BitWidth, 0); // Recurse, but cap the recursion to one level, because we don't // want to waste time spinning around in loops. - ComputeMaskedBits(P->getIncomingValue(i), KnownZero | KnownOne, - KnownZero2, KnownOne2, TD, MaxDepth-1); + ComputeMaskedBits(P->getIncomingValue(i), KnownZero2, KnownOne2, TD, + MaxDepth-1); KnownZero &= KnownZero2; KnownOne &= KnownOne2; // If all bits have been ruled out, there's no need to check @@ -749,17 +729,17 @@ void llvm::ComputeMaskedBits(Value *V, const APInt &Mask, // If this call is undefined for 0, the result will be less than 2^n. if (II->getArgOperand(1) == ConstantInt::getTrue(II->getContext())) LowBits -= 1; - KnownZero = Mask & APInt::getHighBitsSet(BitWidth, BitWidth - LowBits); + KnownZero = APInt::getHighBitsSet(BitWidth, BitWidth - LowBits); break; } case Intrinsic::ctpop: { unsigned LowBits = Log2_32(BitWidth)+1; - KnownZero = Mask & APInt::getHighBitsSet(BitWidth, BitWidth - LowBits); + KnownZero = APInt::getHighBitsSet(BitWidth, BitWidth - LowBits); break; } case Intrinsic::x86_sse42_crc32_64_8: case Intrinsic::x86_sse42_crc32_64_64: - KnownZero = Mask & APInt::getHighBitsSet(64, 32); + KnownZero = APInt::getHighBitsSet(64, 32); break; } } @@ -774,21 +754,19 @@ void llvm::ComputeMaskedBits(Value *V, const APInt &Mask, case Intrinsic::uadd_with_overflow: case Intrinsic::sadd_with_overflow: ComputeMaskedBitsAddSub(true, II->getArgOperand(0), - II->getArgOperand(1), false, Mask, - KnownZero, KnownOne, KnownZero2, KnownOne2, - TD, Depth); + II->getArgOperand(1), false, KnownZero, + KnownOne, KnownZero2, KnownOne2, TD, Depth); break; case Intrinsic::usub_with_overflow: case Intrinsic::ssub_with_overflow: ComputeMaskedBitsAddSub(false, II->getArgOperand(0), - II->getArgOperand(1), false, Mask, - KnownZero, KnownOne, KnownZero2, KnownOne2, - TD, Depth); + II->getArgOperand(1), false, KnownZero, + KnownOne, KnownZero2, KnownOne2, TD, Depth); break; case Intrinsic::umul_with_overflow: case Intrinsic::smul_with_overflow: ComputeMaskedBitsMul(II->getArgOperand(0), II->getArgOperand(1), - false, Mask, KnownZero, KnownOne, + false, KnownZero, KnownOne, KnownZero2, KnownOne2, TD, Depth); break; } @@ -809,8 +787,7 @@ void llvm::ComputeSignBit(Value *V, bool &KnownZero, bool &KnownOne, } APInt ZeroBits(BitWidth, 0); APInt OneBits(BitWidth, 0); - ComputeMaskedBits(V, APInt::getSignBit(BitWidth), ZeroBits, OneBits, TD, - Depth); + ComputeMaskedBits(V, ZeroBits, OneBits, TD, Depth); KnownOne = OneBits[BitWidth - 1]; KnownZero = ZeroBits[BitWidth - 1]; } @@ -918,7 +895,7 @@ bool llvm::isKnownNonZero(Value *V, const TargetData *TD, unsigned Depth) { APInt KnownZero(BitWidth, 0); APInt KnownOne(BitWidth, 0); - ComputeMaskedBits(X, APInt(BitWidth, 1), KnownZero, KnownOne, TD, Depth); + ComputeMaskedBits(X, KnownZero, KnownOne, TD, Depth); if (KnownOne[0]) return true; } @@ -960,12 +937,12 @@ bool llvm::isKnownNonZero(Value *V, const TargetData *TD, unsigned Depth) { APInt Mask = APInt::getSignedMaxValue(BitWidth); // The sign bit of X is set. If some other bit is set then X is not equal // to INT_MIN. - ComputeMaskedBits(X, Mask, KnownZero, KnownOne, TD, Depth); + ComputeMaskedBits(X, KnownZero, KnownOne, TD, Depth); if ((KnownOne & Mask) != 0) return true; // The sign bit of Y is set. If some other bit is set then Y is not equal // to INT_MIN. - ComputeMaskedBits(Y, Mask, KnownZero, KnownOne, TD, Depth); + ComputeMaskedBits(Y, KnownZero, KnownOne, TD, Depth); if ((KnownOne & Mask) != 0) return true; } @@ -995,8 +972,7 @@ bool llvm::isKnownNonZero(Value *V, const TargetData *TD, unsigned Depth) { if (!BitWidth) return false; APInt KnownZero(BitWidth, 0); APInt KnownOne(BitWidth, 0); - ComputeMaskedBits(V, APInt::getAllOnesValue(BitWidth), KnownZero, KnownOne, - TD, Depth); + ComputeMaskedBits(V, KnownZero, KnownOne, TD, Depth); return KnownOne != 0; } @@ -1012,7 +988,7 @@ bool llvm::isKnownNonZero(Value *V, const TargetData *TD, unsigned Depth) { bool llvm::MaskedValueIsZero(Value *V, const APInt &Mask, const TargetData *TD, unsigned Depth) { APInt KnownZero(Mask.getBitWidth(), 0), KnownOne(Mask.getBitWidth(), 0); - ComputeMaskedBits(V, Mask, KnownZero, KnownOne, TD, Depth); + ComputeMaskedBits(V, KnownZero, KnownOne, TD, Depth); assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); return (KnownZero & Mask) == Mask; } @@ -1103,13 +1079,11 @@ unsigned llvm::ComputeNumSignBits(Value *V, const TargetData *TD, if (ConstantInt *CRHS = dyn_cast<ConstantInt>(U->getOperand(1))) if (CRHS->isAllOnesValue()) { APInt KnownZero(TyBits, 0), KnownOne(TyBits, 0); - APInt Mask = APInt::getAllOnesValue(TyBits); - ComputeMaskedBits(U->getOperand(0), Mask, KnownZero, KnownOne, TD, - Depth+1); + ComputeMaskedBits(U->getOperand(0), KnownZero, KnownOne, TD, Depth+1); // If the input is known to be 0 or 1, the output is 0/-1, which is all // sign bits set. - if ((KnownZero | APInt(TyBits, 1)) == Mask) + if ((KnownZero | APInt(TyBits, 1)).isAllOnesValue()) return TyBits; // If we are subtracting one from a positive number, there is no carry @@ -1130,12 +1104,10 @@ unsigned llvm::ComputeNumSignBits(Value *V, const TargetData *TD, if (ConstantInt *CLHS = dyn_cast<ConstantInt>(U->getOperand(0))) if (CLHS->isNullValue()) { APInt KnownZero(TyBits, 0), KnownOne(TyBits, 0); - APInt Mask = APInt::getAllOnesValue(TyBits); - ComputeMaskedBits(U->getOperand(1), Mask, KnownZero, KnownOne, - TD, Depth+1); + ComputeMaskedBits(U->getOperand(1), KnownZero, KnownOne, TD, Depth+1); // If the input is known to be 0 or 1, the output is 0/-1, which is all // sign bits set. - if ((KnownZero | APInt(TyBits, 1)) == Mask) + if ((KnownZero | APInt(TyBits, 1)).isAllOnesValue()) return TyBits; // If the input is known to be positive (the sign bit is known clear), @@ -1177,8 +1149,8 @@ unsigned llvm::ComputeNumSignBits(Value *V, const TargetData *TD, // Finally, if we can prove that the top bits of the result are 0's or 1's, // use this information. APInt KnownZero(TyBits, 0), KnownOne(TyBits, 0); - APInt Mask = APInt::getAllOnesValue(TyBits); - ComputeMaskedBits(V, Mask, KnownZero, KnownOne, TD, Depth); + APInt Mask; + ComputeMaskedBits(V, KnownZero, KnownOne, TD, Depth); if (KnownZero.isNegative()) { // sign bit is 0 Mask = KnownZero; @@ -1870,8 +1842,7 @@ bool llvm::isSafeToSpeculativelyExecute(const Value *V, return false; APInt KnownZero(BitWidth, 0); APInt KnownOne(BitWidth, 0); - ComputeMaskedBits(Op, APInt::getAllOnesValue(BitWidth), - KnownZero, KnownOne, TD); + ComputeMaskedBits(Op, KnownZero, KnownOne, TD); return !!KnownZero; } case Instruction::Load: { @@ -1883,6 +1854,14 @@ bool llvm::isSafeToSpeculativelyExecute(const Value *V, case Instruction::Call: { if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst)) { switch (II->getIntrinsicID()) { + // These synthetic intrinsics have no side-effects, and just mark + // information about their operands. + // FIXME: There are other no-op synthetic instructions that potentially + // should be considered at least *safe* to speculate... + case Intrinsic::dbg_declare: + case Intrinsic::dbg_value: + return true; + case Intrinsic::bswap: case Intrinsic::ctlz: case Intrinsic::ctpop: diff --git a/lib/Archive/ArchiveReader.cpp b/lib/Archive/ArchiveReader.cpp index a78c0f5..68873e2 100644 --- a/lib/Archive/ArchiveReader.cpp +++ b/lib/Archive/ArchiveReader.cpp @@ -16,6 +16,7 @@ #include "llvm/Bitcode/ReaderWriter.h" #include "llvm/Support/MemoryBuffer.h" #include "llvm/Module.h" +#include <cstdio> #include <cstdlib> #include <memory> using namespace llvm; diff --git a/lib/CodeGen/Analysis.cpp b/lib/CodeGen/Analysis.cpp index 14e14c3..00874d4 100644 --- a/lib/CodeGen/Analysis.cpp +++ b/lib/CodeGen/Analysis.cpp @@ -290,7 +290,7 @@ bool llvm::isInTailCallPosition(ImmutableCallSite CS, Attributes CalleeRetAttr, } bool llvm::isInTailCallPosition(SelectionDAG &DAG, SDNode *Node, - const TargetLowering &TLI) { + SDValue &Chain, const TargetLowering &TLI) { const Function *F = DAG.getMachineFunction().getFunction(); // Conservatively require the attributes of the call to match those of @@ -304,5 +304,5 @@ bool llvm::isInTailCallPosition(SelectionDAG &DAG, SDNode *Node, return false; // Check if the only use is a function return node. - return TLI.isUsedByReturnOnly(Node); + return TLI.isUsedByReturnOnly(Node, Chain); } diff --git a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp index dd3fb3b..f6cde98 100644 --- a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp +++ b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp @@ -1781,7 +1781,9 @@ static void EmitGlobalConstantFP(const ConstantFP *CFP, unsigned AddrSpace, if (CFP->getType()->isFloatTy()) { if (AP.isVerbose()) { float Val = CFP->getValueAPF().convertToFloat(); - AP.OutStreamer.GetCommentOS() << "float " << Val << '\n'; + uint64_t IntVal = CFP->getValueAPF().bitcastToAPInt().getZExtValue(); + AP.OutStreamer.GetCommentOS() << "float " << Val << '\n' + << " (" << format("0x%x", IntVal) << ")\n"; } uint64_t Val = CFP->getValueAPF().bitcastToAPInt().getZExtValue(); AP.OutStreamer.EmitIntValue(Val, 4, AddrSpace); @@ -1793,7 +1795,9 @@ static void EmitGlobalConstantFP(const ConstantFP *CFP, unsigned AddrSpace, if (CFP->getType()->isDoubleTy()) { if (AP.isVerbose()) { double Val = CFP->getValueAPF().convertToDouble(); - AP.OutStreamer.GetCommentOS() << "double " << Val << '\n'; + uint64_t IntVal = CFP->getValueAPF().bitcastToAPInt().getZExtValue(); + AP.OutStreamer.GetCommentOS() << "double " << Val << '\n' + << " (" << format("0x%lx", IntVal) << ")\n"; } uint64_t Val = CFP->getValueAPF().bitcastToAPInt().getZExtValue(); diff --git a/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp b/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp index 89e6cd1..e9e9335 100644 --- a/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp +++ b/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp @@ -329,7 +329,11 @@ void AsmPrinter::EmitInlineAsm(const MachineInstr *MI) const { OpNo += InlineAsm::getNumOperandRegisters(OpFlags) + 1; } - if (OpNo >= MI->getNumOperands()) { + // We may have a location metadata attached to the end of the + // instruction, and at no point should see metadata at any + // other point while processing. It's an error if so. + if (OpNo >= MI->getNumOperands() || + MI->getOperand(OpNo).isMetadata()) { Error = true; } else { unsigned OpFlags = MI->getOperand(OpNo).getImm(); diff --git a/lib/CodeGen/AsmPrinter/DwarfAccelTable.cpp b/lib/CodeGen/AsmPrinter/DwarfAccelTable.cpp index 644eaad..454a923 100644 --- a/lib/CodeGen/AsmPrinter/DwarfAccelTable.cpp +++ b/lib/CodeGen/AsmPrinter/DwarfAccelTable.cpp @@ -11,14 +11,16 @@ // //===----------------------------------------------------------------------===// +#include "DwarfAccelTable.h" +#include "DwarfDebug.h" +#include "DIE.h" +#include "llvm/ADT/Twine.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/CodeGen/AsmPrinter.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSymbol.h" #include "llvm/Support/Debug.h" -#include "DwarfAccelTable.h" -#include "DwarfDebug.h" -#include "DIE.h" using namespace llvm; @@ -34,44 +36,28 @@ const char *DwarfAccelTable::Atom::AtomTypeString(enum AtomType AT) { llvm_unreachable("invalid AtomType!"); } -// The general case would need to have a less hard coded size for the -// length of the HeaderData, however, if we're constructing based on a -// single Atom then we know it will always be: 4 + 4 + 2 + 2. -DwarfAccelTable::DwarfAccelTable(DwarfAccelTable::Atom atom) : - Header(12), - HeaderData(atom) { -} - // The length of the header data is always going to be 4 + 4 + 4*NumAtoms. -DwarfAccelTable::DwarfAccelTable(std::vector<DwarfAccelTable::Atom> &atomList) : +DwarfAccelTable::DwarfAccelTable(ArrayRef<DwarfAccelTable::Atom> atomList) : Header(8 + (atomList.size() * 4)), - HeaderData(atomList) { -} + HeaderData(atomList), + Entries(Allocator) { } -DwarfAccelTable::~DwarfAccelTable() { - for (size_t i = 0, e = Data.size(); i < e; ++i) - delete Data[i]; - for (StringMap<DataArray>::iterator - EI = Entries.begin(), EE = Entries.end(); EI != EE; ++EI) - for (DataArray::iterator DI = EI->second.begin(), - DE = EI->second.end(); DI != DE; ++DI) - delete (*DI); -} +DwarfAccelTable::~DwarfAccelTable() { } void DwarfAccelTable::AddName(StringRef Name, DIE* die, char Flags) { + assert(Data.empty() && "Already finalized!"); // If the string is in the list already then add this die to the list // otherwise add a new one. DataArray &DIEs = Entries[Name]; - DIEs.push_back(new HashDataContents(die, Flags)); + DIEs.push_back(new (Allocator) HashDataContents(die, Flags)); } void DwarfAccelTable::ComputeBucketCount(void) { // First get the number of unique hashes. - std::vector<uint32_t> uniques; - uniques.resize(Data.size()); + std::vector<uint32_t> uniques(Data.size()); for (size_t i = 0, e = Data.size(); i < e; ++i) uniques[i] = Data[i]->HashValue; - std::stable_sort(uniques.begin(), uniques.end()); + array_pod_sort(uniques.begin(), uniques.end()); std::vector<uint32_t>::iterator p = std::unique(uniques.begin(), uniques.end()); uint32_t num = std::distance(uniques.begin(), p); @@ -84,31 +70,23 @@ void DwarfAccelTable::ComputeBucketCount(void) { Header.hashes_count = num; } -namespace { - // DIESorter - comparison predicate that sorts DIEs by their offset. - struct DIESorter { - bool operator()(const struct DwarfAccelTable::HashDataContents *A, - const struct DwarfAccelTable::HashDataContents *B) const { - return A->Die->getOffset() < B->Die->getOffset(); - } - }; +// compareDIEs - comparison predicate that sorts DIEs by their offset. +static bool compareDIEs(const DwarfAccelTable::HashDataContents *A, + const DwarfAccelTable::HashDataContents *B) { + return A->Die->getOffset() < B->Die->getOffset(); } void DwarfAccelTable::FinalizeTable(AsmPrinter *Asm, const char *Prefix) { // Create the individual hash data outputs. for (StringMap<DataArray>::iterator EI = Entries.begin(), EE = Entries.end(); EI != EE; ++EI) { - struct HashData *Entry = new HashData((*EI).getKeyData()); // Unique the entries. - std::stable_sort(EI->second.begin(), EI->second.end(), DIESorter()); + std::stable_sort(EI->second.begin(), EI->second.end(), compareDIEs); EI->second.erase(std::unique(EI->second.begin(), EI->second.end()), EI->second.end()); - for (DataArray::const_iterator DI = EI->second.begin(), - DE = EI->second.end(); - DI != DE; ++DI) - Entry->addData((*DI)); + HashData *Entry = new (Allocator) HashData(EI->getKey(), EI->second); Data.push_back(Entry); } @@ -215,7 +193,7 @@ void DwarfAccelTable::EmitData(AsmPrinter *Asm, DwarfDebug *D) { D->getStringPool()); Asm->OutStreamer.AddComment("Num DIEs"); Asm->EmitInt32((*HI)->Data.size()); - for (std::vector<struct HashDataContents*>::const_iterator + for (ArrayRef<HashDataContents*>::const_iterator DI = (*HI)->Data.begin(), DE = (*HI)->Data.end(); DI != DE; ++DI) { // Emit the DIE offset diff --git a/lib/CodeGen/AsmPrinter/DwarfAccelTable.h b/lib/CodeGen/AsmPrinter/DwarfAccelTable.h index 2278d4c..963b8cd 100644 --- a/lib/CodeGen/AsmPrinter/DwarfAccelTable.h +++ b/lib/CodeGen/AsmPrinter/DwarfAccelTable.h @@ -15,6 +15,7 @@ #define CODEGEN_ASMPRINTER_DWARFACCELTABLE_H__ #include "llvm/ADT/StringMap.h" +#include "llvm/ADT/ArrayRef.h" #include "llvm/MC/MCSymbol.h" #include "llvm/Support/Dwarf.h" #include "llvm/Support/DataTypes.h" @@ -164,22 +165,12 @@ public: private: struct TableHeaderData { - uint32_t die_offset_base; - std::vector<Atom> Atoms; + SmallVector<Atom, 1> Atoms; + + TableHeaderData(ArrayRef<Atom> AtomList, uint32_t offset = 0) + : die_offset_base(offset), Atoms(AtomList.begin(), AtomList.end()) { } - TableHeaderData(std::vector<DwarfAccelTable::Atom> &AtomList, - uint32_t offset = 0) : - die_offset_base(offset) { - for (size_t i = 0, e = AtomList.size(); i != e; ++i) - Atoms.push_back(AtomList[i]); - } - - TableHeaderData(DwarfAccelTable::Atom Atom, uint32_t offset = 0) - : die_offset_base(offset) { - Atoms.push_back(Atom); - } - #ifndef NDEBUG void print (raw_ostream &O) { O << "die_offset_base: " << die_offset_base << "\n"; @@ -221,11 +212,11 @@ private: StringRef Str; uint32_t HashValue; MCSymbol *Sym; - std::vector<struct HashDataContents*> Data; // offsets - HashData(StringRef S) : Str(S) { + ArrayRef<HashDataContents*> Data; // offsets + HashData(StringRef S, ArrayRef<HashDataContents*> Data) + : Str(S), Data(Data) { HashValue = DwarfAccelTable::HashDJB(S); } - void addData(struct HashDataContents *Datum) { Data.push_back(Datum); } #ifndef NDEBUG void print(raw_ostream &O) { O << "Name: " << Str << "\n"; @@ -255,15 +246,18 @@ private: void EmitHashes(AsmPrinter *); void EmitOffsets(AsmPrinter *, MCSymbol *); void EmitData(AsmPrinter *, DwarfDebug *D); - + + // Allocator for HashData and HashDataContents. + BumpPtrAllocator Allocator; + // Output Variables TableHeader Header; TableHeaderData HeaderData; std::vector<HashData*> Data; // String Data - typedef std::vector<struct HashDataContents*> DataArray; - typedef StringMap<DataArray> StringEntries; + typedef std::vector<HashDataContents*> DataArray; + typedef StringMap<DataArray, BumpPtrAllocator&> StringEntries; StringEntries Entries; // Buckets/Hashes/Offsets @@ -274,8 +268,7 @@ private: // Public Implementation public: - DwarfAccelTable(DwarfAccelTable::Atom); - DwarfAccelTable(std::vector<DwarfAccelTable::Atom> &); + DwarfAccelTable(ArrayRef<DwarfAccelTable::Atom>); ~DwarfAccelTable(); void AddName(StringRef, DIE*, char = 0); void FinalizeTable(AsmPrinter *, const char *); diff --git a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp index 3b383f6..cc5b642 100644 --- a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp +++ b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp @@ -188,6 +188,24 @@ void CompileUnit::addSourceLine(DIE *Die, DIType Ty) { /// addSourceLine - Add location information to specified debug information /// entry. +void CompileUnit::addSourceLine(DIE *Die, DIObjCProperty Ty) { + // Verify type. + if (!Ty.Verify()) + return; + + unsigned Line = Ty.getLineNumber(); + if (Line == 0) + return; + DIFile File = Ty.getFile(); + unsigned FileID = DD->GetOrCreateSourceID(File.getFilename(), + File.getDirectory()); + assert(FileID && "Invalid file id"); + addUInt(Die, dwarf::DW_AT_decl_file, 0, FileID); + addUInt(Die, dwarf::DW_AT_decl_line, 0, Line); +} + +/// addSourceLine - Add location information to specified debug information +/// entry. void CompileUnit::addSourceLine(DIE *Die, DINameSpace NS) { // Verify namespace. if (!NS.Verify()) @@ -628,7 +646,8 @@ DIE *CompileUnit::getOrCreateTypeDIE(const MDNode *TyNode) { } /// addType - Add a new type attribute to the specified entity. -void CompileUnit::addType(DIE *Entity, DIType Ty) { +void CompileUnit::addType(DIE *Entity, DIType Ty, + unsigned Attribute) { if (!Ty.Verify()) return; @@ -636,7 +655,7 @@ void CompileUnit::addType(DIE *Entity, DIType Ty) { DIEEntry *Entry = getDIEEntry(Ty); // If it exists then use the existing value. if (Entry) { - Entity->addValue(dwarf::DW_AT_type, dwarf::DW_FORM_ref4, Entry); + Entity->addValue(Attribute, dwarf::DW_FORM_ref4, Entry); return; } @@ -646,7 +665,7 @@ void CompileUnit::addType(DIE *Entity, DIType Ty) { // Set up proxy. Entry = createDIEEntry(Buffer); insertDIEEntry(Ty, Entry); - Entity->addValue(dwarf::DW_AT_type, dwarf::DW_FORM_ref4, Entry); + Entity->addValue(Attribute, dwarf::DW_FORM_ref4, Entry); // If this is a complete composite type then include it in the // list of global types. @@ -826,13 +845,20 @@ void CompileUnit::constructTypeDIE(DIE &Buffer, DICompositeType CTy) { addUInt(ElemDie, dwarf::DW_AT_declaration, dwarf::DW_FORM_flag, 1); addUInt(ElemDie, dwarf::DW_AT_external, dwarf::DW_FORM_flag, 1); addSourceLine(ElemDie, DV); - } else if (Element.isDerivedType()) - ElemDie = createMemberDIE(DIDerivedType(Element)); - else if (Element.isObjCProperty()) { + } else if (Element.isDerivedType()) { + DIDerivedType DDTy(Element); + if (DDTy.getTag() == dwarf::DW_TAG_friend) { + ElemDie = new DIE(dwarf::DW_TAG_friend); + addType(ElemDie, DDTy.getTypeDerivedFrom(), dwarf::DW_AT_friend); + } else + ElemDie = createMemberDIE(DIDerivedType(Element)); + } else if (Element.isObjCProperty()) { DIObjCProperty Property(Element); ElemDie = new DIE(Property.getTag()); StringRef PropertyName = Property.getObjCPropertyName(); addString(ElemDie, dwarf::DW_AT_APPLE_property_name, PropertyName); + addType(ElemDie, Property.getType()); + addSourceLine(ElemDie, Property); StringRef GetterName = Property.getObjCPropertyGetterName(); if (!GetterName.empty()) addString(ElemDie, dwarf::DW_AT_APPLE_property_getter, GetterName); @@ -1006,9 +1032,10 @@ DIE *CompileUnit::getOrCreateSubprogramDIE(DISubprogram SP) { // Add function template parameters. addTemplateParams(*SPDie, SP.getTemplateParams()); - // Unfortunately this code needs to stay here to work around - // a bug in older gdbs that requires the linkage name to resolve - // multiple template functions. + // Unfortunately this code needs to stay here instead of below the + // AT_specification code in order to work around a bug in older + // gdbs that requires the linkage name to resolve multiple template + // functions. StringRef LinkageName = SP.getLinkageName(); if (!LinkageName.empty()) addString(SPDie, dwarf::DW_AT_MIPS_linkage_name, diff --git a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h index 4e63c3f..45e407e 100644 --- a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h +++ b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h @@ -213,6 +213,7 @@ public: void addSourceLine(DIE *Die, DISubprogram SP); void addSourceLine(DIE *Die, DIType Ty); void addSourceLine(DIE *Die, DINameSpace NS); + void addSourceLine(DIE *Die, DIObjCProperty Ty); /// addAddress - Add an address attribute to a die based on the location /// provided. @@ -260,8 +261,10 @@ public: /// addToContextOwner - Add Die into the list of its context owner's children. void addToContextOwner(DIE *Die, DIDescriptor Context); - /// addType - Add a new type attribute to the specified entity. - void addType(DIE *Entity, DIType Ty); + /// addType - Add a new type attribute to the specified entity. This takes + /// and attribute parameter because DW_AT_friend attributes are also + /// type references. + void addType(DIE *Entity, DIType Ty, unsigned Attribute = dwarf::DW_AT_type); /// getOrCreateNameSpace - Create a DIE for DINameSpace. DIE *getOrCreateNameSpace(DINameSpace NS); diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp index 388cef4..cb78878 100644 --- a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp +++ b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp @@ -19,6 +19,7 @@ #include "llvm/Constants.h" #include "llvm/Module.h" #include "llvm/Instructions.h" +#include "llvm/ADT/Triple.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/MC/MCAsmInfo.h" @@ -133,6 +134,11 @@ DwarfDebug::DwarfDebug(AsmPrinter *A, Module *M) DwarfStrSectionSym = TextSectionSym = 0; DwarfDebugRangeSectionSym = DwarfDebugLocSectionSym = 0; FunctionBeginSym = FunctionEndSym = 0; + + // Turn on accelerator tables for Darwin. + if (Triple(M->getTargetTriple()).isOSDarwin()) + DwarfAccelTables = true; + { NamedRegionTimer T(DbgTimerName, DWARFGroupName, TimePassesIsEnabled); beginModule(M); @@ -438,7 +444,8 @@ DIE *DwarfDebug::constructInlinedScopeDIE(CompileUnit *TheCU, I->second.push_back(std::make_pair(StartLabel, ScopeDIE)); DILocation DL(Scope->getInlinedAt()); - TheCU->addUInt(ScopeDIE, dwarf::DW_AT_call_file, 0, TheCU->getID()); + TheCU->addUInt(ScopeDIE, dwarf::DW_AT_call_file, 0, + GetOrCreateSourceID(DL.getFilename(), DL.getDirectory())); TheCU->addUInt(ScopeDIE, dwarf::DW_AT_call_line, 0, DL.getLineNumber()); // Add name to the name table, we do this here because we're guaranteed @@ -554,9 +561,9 @@ CompileUnit *DwarfDebug::constructCompileUnit(const MDNode *N) { NewCU->addUInt(Die, dwarf::DW_AT_language, dwarf::DW_FORM_data2, DIUnit.getLanguage()); NewCU->addString(Die, dwarf::DW_AT_name, FN); - // Use DW_AT_entry_pc instead of DW_AT_low_pc/DW_AT_high_pc pair. This - // simplifies debug range entries. - NewCU->addUInt(Die, dwarf::DW_AT_entry_pc, dwarf::DW_FORM_addr, 0); + // 2.17.1 requires that we use DW_AT_low_pc for a single entry point + // into an entity. + NewCU->addUInt(Die, dwarf::DW_AT_low_pc, dwarf::DW_FORM_addr, 0); // DW_AT_stmt_list is a offset of line number information for this // compile unit in debug_line section. if (Asm->MAI->doesDwarfRequireRelocationForSectionOffset()) @@ -1086,12 +1093,15 @@ void DwarfDebug::beginInstruction(const MachineInstr *MI) { if (!MI->isDebugValue()) { DebugLoc DL = MI->getDebugLoc(); if (DL != PrevInstLoc && (!DL.isUnknown() || UnknownLocations)) { - unsigned Flags = DWARF2_FLAG_IS_STMT; + unsigned Flags = 0; PrevInstLoc = DL; if (DL == PrologEndLoc) { Flags |= DWARF2_FLAG_PROLOGUE_END; PrologEndLoc = DebugLoc(); } + if (PrologEndLoc.isUnknown()) + Flags |= DWARF2_FLAG_IS_STMT; + if (!DL.isUnknown()) { const MDNode *Scope = DL.getScope(Asm->MF->getFunction()->getContext()); recordSourceLine(DL.getLine(), DL.getCol(), Scope, Flags); @@ -1186,12 +1196,19 @@ static MDNode *getScopeNode(DebugLoc DL, const LLVMContext &Ctx) { } /// getFnDebugLoc - Walk up the scope chain of given debug loc and find -/// line number info for the function. +/// line number info for the function. static DebugLoc getFnDebugLoc(DebugLoc DL, const LLVMContext &Ctx) { const MDNode *Scope = getScopeNode(DL, Ctx); DISubprogram SP = getDISubprogram(Scope); - if (SP.Verify()) - return DebugLoc::get(SP.getLineNumber(), 0, SP); + if (SP.Verify()) { + // Check for number of operands since the compatibility is + // cheap here. + if (SP->getNumOperands() > 19) + return DebugLoc::get(SP.getScopeLineNumber(), 0, SP); + else + return DebugLoc::get(SP.getLineNumber(), 0, SP); + } + return DebugLoc(); } @@ -1364,7 +1381,7 @@ void DwarfDebug::beginFunction(const MachineFunction *MF) { MF->getFunction()->getContext()); recordSourceLine(FnStartDL.getLine(), FnStartDL.getCol(), FnStartDL.getScope(MF->getFunction()->getContext()), - DWARF2_FLAG_IS_STMT); + 0); } } diff --git a/lib/CodeGen/BranchFolding.cpp b/lib/CodeGen/BranchFolding.cpp index f57f4a8..ef1d2ba 100644 --- a/lib/CodeGen/BranchFolding.cpp +++ b/lib/CodeGen/BranchFolding.cpp @@ -23,6 +23,7 @@ #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineJumpTableInfo.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/RegisterScavenging.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetMachine.h" @@ -183,8 +184,14 @@ bool BranchFolder::OptimizeFunction(MachineFunction &MF, TII = tii; TRI = tri; MMI = mmi; - - RS = TRI->requiresRegisterScavenging(MF) ? new RegScavenger() : NULL; + RS = NULL; + + // Use a RegScavenger to help update liveness when required. + MachineRegisterInfo &MRI = MF.getRegInfo(); + if (MRI.tracksLiveness() && TRI->requiresRegisterScavenging(MF)) + RS = new RegScavenger(); + else + MRI.invalidateLiveness(); // Fix CFG. The later algorithms expect it to be right. bool MadeChange = false; diff --git a/lib/CodeGen/InlineSpiller.cpp b/lib/CodeGen/InlineSpiller.cpp index d596d8b..d5ea666 100644 --- a/lib/CodeGen/InlineSpiller.cpp +++ b/lib/CodeGen/InlineSpiller.cpp @@ -14,12 +14,12 @@ #define DEBUG_TYPE "regalloc" #include "Spiller.h" -#include "LiveRangeEdit.h" #include "VirtRegMap.h" #include "llvm/ADT/Statistic.h" #include "llvm/ADT/TinyPtrVector.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/CodeGen/LiveIntervalAnalysis.h" +#include "llvm/CodeGen/LiveRangeEdit.h" #include "llvm/CodeGen/LiveStackAnalysis.h" #include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/MachineInstrBundle.h" @@ -655,7 +655,7 @@ void InlineSpiller::analyzeSiblingValues() { if (OrigVNI->def != VNI->def) DefMI = traceSiblingValue(Reg, VNI, OrigVNI); } - if (DefMI && Edit->checkRematerializable(VNI, DefMI, TII, AA)) { + if (DefMI && Edit->checkRematerializable(VNI, DefMI, AA)) { DEBUG(dbgs() << "Value " << PrintReg(Reg) << ':' << VNI->id << '@' << VNI->def << " may remat from " << *DefMI); } @@ -856,7 +856,7 @@ bool InlineSpiller::reMaterializeFor(LiveInterval &VirtReg, SibValueMap::const_iterator SibI = SibValues.find(ParentVNI); if (SibI != SibValues.end()) RM.OrigMI = SibI->second.DefMI; - if (!Edit->canRematerializeAt(RM, UseIdx, false, LIS)) { + if (!Edit->canRematerializeAt(RM, UseIdx, false)) { markValueUsed(&VirtReg, ParentVNI); DEBUG(dbgs() << "\tcannot remat for " << UseIdx << '\t' << *MI); return false; @@ -883,12 +883,12 @@ bool InlineSpiller::reMaterializeFor(LiveInterval &VirtReg, } // Alocate a new register for the remat. - LiveInterval &NewLI = Edit->createFrom(Original, LIS, VRM); + LiveInterval &NewLI = Edit->createFrom(Original); NewLI.markNotSpillable(); // Finally we can rematerialize OrigMI before MI. SlotIndex DefIdx = Edit->rematerializeAt(*MI->getParent(), MI, NewLI.reg, RM, - LIS, TII, TRI); + TRI); DEBUG(dbgs() << "\tremat: " << DefIdx << '\t' << *LIS.getInstructionFromIndex(DefIdx)); @@ -913,7 +913,7 @@ bool InlineSpiller::reMaterializeFor(LiveInterval &VirtReg, /// and trim the live ranges after. void InlineSpiller::reMaterializeAll() { // analyzeSiblingValues has already tested all relevant defining instructions. - if (!Edit->anyRematerializable(LIS, TII, AA)) + if (!Edit->anyRematerializable(AA)) return; UsedValues.clear(); @@ -954,7 +954,7 @@ void InlineSpiller::reMaterializeAll() { if (DeadDefs.empty()) return; DEBUG(dbgs() << "Remat created " << DeadDefs.size() << " dead defs.\n"); - Edit->eliminateDeadDefs(DeadDefs, LIS, VRM, TII, RegsToSpill); + Edit->eliminateDeadDefs(DeadDefs, RegsToSpill); // Get rid of deleted and empty intervals. for (unsigned i = RegsToSpill.size(); i != 0; --i) { @@ -966,7 +966,7 @@ void InlineSpiller::reMaterializeAll() { LiveInterval &LI = LIS.getInterval(Reg); if (!LI.empty()) continue; - Edit->eraseVirtReg(Reg, LIS); + Edit->eraseVirtReg(Reg); RegsToSpill.erase(RegsToSpill.begin() + (i - 1)); } DEBUG(dbgs() << RegsToSpill.size() << " registers to spill after remat.\n"); @@ -1181,7 +1181,7 @@ void InlineSpiller::spillAroundUses(unsigned Reg) { // Allocate interval around instruction. // FIXME: Infer regclass from instruction alone. - LiveInterval &NewLI = Edit->createFrom(Reg, LIS, VRM); + LiveInterval &NewLI = Edit->createFrom(Reg); NewLI.markNotSpillable(); if (RI.Reads) @@ -1244,7 +1244,7 @@ void InlineSpiller::spillAll() { // Hoisted spills may cause dead code. if (!DeadDefs.empty()) { DEBUG(dbgs() << "Eliminating " << DeadDefs.size() << " dead defs\n"); - Edit->eliminateDeadDefs(DeadDefs, LIS, VRM, TII, RegsToSpill); + Edit->eliminateDeadDefs(DeadDefs, RegsToSpill); } // Finally delete the SnippetCopies. @@ -1260,7 +1260,7 @@ void InlineSpiller::spillAll() { // Delete all spilled registers. for (unsigned i = 0, e = RegsToSpill.size(); i != e; ++i) - Edit->eraseVirtReg(RegsToSpill[i], LIS); + Edit->eraseVirtReg(RegsToSpill[i]); } void InlineSpiller::spill(LiveRangeEdit &edit) { @@ -1289,5 +1289,5 @@ void InlineSpiller::spill(LiveRangeEdit &edit) { if (!RegsToSpill.empty()) spillAll(); - Edit->calculateRegClassAndHint(MF, LIS, Loops); + Edit->calculateRegClassAndHint(MF, Loops); } diff --git a/lib/CodeGen/LLVMTargetMachine.cpp b/lib/CodeGen/LLVMTargetMachine.cpp index 97e6547..a1f479a 100644 --- a/lib/CodeGen/LLVMTargetMachine.cpp +++ b/lib/CodeGen/LLVMTargetMachine.cpp @@ -172,6 +172,7 @@ bool LLVMTargetMachine::addPassesToEmitFile(PassManagerBase &PM, case CGFT_AssemblyFile: { MCInstPrinter *InstPrinter = getTarget().createMCInstPrinter(MAI.getAssemblerDialect(), MAI, + *getInstrInfo(), Context->getRegisterInfo(), STI); // Create a code emitter if asked to show the encoding. diff --git a/lib/CodeGen/LiveIntervalAnalysis.cpp b/lib/CodeGen/LiveIntervalAnalysis.cpp index 3ade660..934cc12 100644 --- a/lib/CodeGen/LiveIntervalAnalysis.cpp +++ b/lib/CodeGen/LiveIntervalAnalysis.cpp @@ -1068,9 +1068,9 @@ public: #ifndef NDEBUG LIValidator validator; - std::for_each(Entering.begin(), Entering.end(), validator); - std::for_each(Internal.begin(), Internal.end(), validator); - std::for_each(Exiting.begin(), Exiting.end(), validator); + validator = std::for_each(Entering.begin(), Entering.end(), validator); + validator = std::for_each(Internal.begin(), Internal.end(), validator); + validator = std::for_each(Exiting.begin(), Exiting.end(), validator); assert(validator.rangesOk() && "moveAllOperandsFrom broke liveness."); #endif @@ -1115,9 +1115,9 @@ public: #ifndef NDEBUG LIValidator validator; - std::for_each(Entering.begin(), Entering.end(), validator); - std::for_each(Internal.begin(), Internal.end(), validator); - std::for_each(Exiting.begin(), Exiting.end(), validator); + validator = std::for_each(Entering.begin(), Entering.end(), validator); + validator = std::for_each(Internal.begin(), Internal.end(), validator); + validator = std::for_each(Exiting.begin(), Exiting.end(), validator); assert(validator.rangesOk() && "moveAllOperandsInto broke liveness."); #endif } diff --git a/lib/CodeGen/LiveRangeEdit.cpp b/lib/CodeGen/LiveRangeEdit.cpp index f9b93d5..695f536 100644 --- a/lib/CodeGen/LiveRangeEdit.cpp +++ b/lib/CodeGen/LiveRangeEdit.cpp @@ -12,12 +12,12 @@ //===----------------------------------------------------------------------===// #define DEBUG_TYPE "regalloc" -#include "LiveRangeEdit.h" #include "VirtRegMap.h" #include "llvm/ADT/SetVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/CodeGen/CalcSpillWeights.h" #include "llvm/CodeGen/LiveIntervalAnalysis.h" +#include "llvm/CodeGen/LiveRangeEdit.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Support/Debug.h" @@ -31,13 +31,12 @@ STATISTIC(NumFracRanges, "Number of live ranges fractured by DCE"); void LiveRangeEdit::Delegate::anchor() { } -LiveInterval &LiveRangeEdit::createFrom(unsigned OldReg, - LiveIntervals &LIS, - VirtRegMap &VRM) { - MachineRegisterInfo &MRI = VRM.getRegInfo(); +LiveInterval &LiveRangeEdit::createFrom(unsigned OldReg) { unsigned VReg = MRI.createVirtualRegister(MRI.getRegClass(OldReg)); - VRM.grow(); - VRM.setIsSplitFromReg(VReg, VRM.getOriginal(OldReg)); + if (VRM) { + VRM->grow(); + VRM->setIsSplitFromReg(VReg, VRM->getOriginal(OldReg)); + } LiveInterval &LI = LIS.getOrCreateInterval(VReg); newRegs_.push_back(&LI); return LI; @@ -45,37 +44,32 @@ LiveInterval &LiveRangeEdit::createFrom(unsigned OldReg, bool LiveRangeEdit::checkRematerializable(VNInfo *VNI, const MachineInstr *DefMI, - const TargetInstrInfo &tii, AliasAnalysis *aa) { assert(DefMI && "Missing instruction"); scannedRemattable_ = true; - if (!tii.isTriviallyReMaterializable(DefMI, aa)) + if (!TII.isTriviallyReMaterializable(DefMI, aa)) return false; remattable_.insert(VNI); return true; } -void LiveRangeEdit::scanRemattable(LiveIntervals &lis, - const TargetInstrInfo &tii, - AliasAnalysis *aa) { +void LiveRangeEdit::scanRemattable(AliasAnalysis *aa) { for (LiveInterval::vni_iterator I = parent_.vni_begin(), E = parent_.vni_end(); I != E; ++I) { VNInfo *VNI = *I; if (VNI->isUnused()) continue; - MachineInstr *DefMI = lis.getInstructionFromIndex(VNI->def); + MachineInstr *DefMI = LIS.getInstructionFromIndex(VNI->def); if (!DefMI) continue; - checkRematerializable(VNI, DefMI, tii, aa); + checkRematerializable(VNI, DefMI, aa); } scannedRemattable_ = true; } -bool LiveRangeEdit::anyRematerializable(LiveIntervals &lis, - const TargetInstrInfo &tii, - AliasAnalysis *aa) { +bool LiveRangeEdit::anyRematerializable(AliasAnalysis *aa) { if (!scannedRemattable_) - scanRemattable(lis, tii, aa); + scanRemattable(aa); return !remattable_.empty(); } @@ -83,8 +77,7 @@ bool LiveRangeEdit::anyRematerializable(LiveIntervals &lis, /// OrigIdx are also available with the same value at UseIdx. bool LiveRangeEdit::allUsesAvailableAt(const MachineInstr *OrigMI, SlotIndex OrigIdx, - SlotIndex UseIdx, - LiveIntervals &lis) { + SlotIndex UseIdx) { OrigIdx = OrigIdx.getRegSlot(true); UseIdx = UseIdx.getRegSlot(true); for (unsigned i = 0, e = OrigMI->getNumOperands(); i != e; ++i) { @@ -92,10 +85,10 @@ bool LiveRangeEdit::allUsesAvailableAt(const MachineInstr *OrigMI, if (!MO.isReg() || !MO.getReg() || MO.isDef()) continue; // Reserved registers are OK. - if (MO.isUndef() || !lis.hasInterval(MO.getReg())) + if (MO.isUndef() || !LIS.hasInterval(MO.getReg())) continue; - LiveInterval &li = lis.getInterval(MO.getReg()); + LiveInterval &li = LIS.getInterval(MO.getReg()); const VNInfo *OVNI = li.getVNInfoAt(OrigIdx); if (!OVNI) continue; @@ -107,8 +100,7 @@ bool LiveRangeEdit::allUsesAvailableAt(const MachineInstr *OrigMI, bool LiveRangeEdit::canRematerializeAt(Remat &RM, SlotIndex UseIdx, - bool cheapAsAMove, - LiveIntervals &lis) { + bool cheapAsAMove) { assert(scannedRemattable_ && "Call anyRematerializable first"); // Use scanRemattable info. @@ -118,10 +110,10 @@ bool LiveRangeEdit::canRematerializeAt(Remat &RM, // No defining instruction provided. SlotIndex DefIdx; if (RM.OrigMI) - DefIdx = lis.getInstructionIndex(RM.OrigMI); + DefIdx = LIS.getInstructionIndex(RM.OrigMI); else { DefIdx = RM.ParentVNI->def; - RM.OrigMI = lis.getInstructionFromIndex(DefIdx); + RM.OrigMI = LIS.getInstructionFromIndex(DefIdx); assert(RM.OrigMI && "No defining instruction for remattable value"); } @@ -130,7 +122,7 @@ bool LiveRangeEdit::canRematerializeAt(Remat &RM, return false; // Verify that all used registers are available with the same values. - if (!allUsesAvailableAt(RM.OrigMI, DefIdx, UseIdx, lis)) + if (!allUsesAvailableAt(RM.OrigMI, DefIdx, UseIdx)) return false; return true; @@ -140,27 +132,22 @@ SlotIndex LiveRangeEdit::rematerializeAt(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, unsigned DestReg, const Remat &RM, - LiveIntervals &lis, - const TargetInstrInfo &tii, const TargetRegisterInfo &tri, bool Late) { assert(RM.OrigMI && "Invalid remat"); - tii.reMaterialize(MBB, MI, DestReg, 0, RM.OrigMI, tri); + TII.reMaterialize(MBB, MI, DestReg, 0, RM.OrigMI, tri); rematted_.insert(RM.ParentVNI); - return lis.getSlotIndexes()->insertMachineInstrInMaps(--MI, Late) + return LIS.getSlotIndexes()->insertMachineInstrInMaps(--MI, Late) .getRegSlot(); } -void LiveRangeEdit::eraseVirtReg(unsigned Reg, LiveIntervals &LIS) { +void LiveRangeEdit::eraseVirtReg(unsigned Reg) { if (delegate_ && delegate_->LRE_CanEraseVirtReg(Reg)) LIS.removeInterval(Reg); } bool LiveRangeEdit::foldAsLoad(LiveInterval *LI, - SmallVectorImpl<MachineInstr*> &Dead, - MachineRegisterInfo &MRI, - LiveIntervals &LIS, - const TargetInstrInfo &TII) { + SmallVectorImpl<MachineInstr*> &Dead) { MachineInstr *DefMI = 0, *UseMI = 0; // Check that there is a single def and a single use. @@ -206,13 +193,10 @@ bool LiveRangeEdit::foldAsLoad(LiveInterval *LI, } void LiveRangeEdit::eliminateDeadDefs(SmallVectorImpl<MachineInstr*> &Dead, - LiveIntervals &LIS, VirtRegMap &VRM, - const TargetInstrInfo &TII, ArrayRef<unsigned> RegsBeingSpilled) { SetVector<LiveInterval*, SmallVector<LiveInterval*, 8>, SmallPtrSet<LiveInterval*, 8> > ToShrink; - MachineRegisterInfo &MRI = VRM.getRegInfo(); for (;;) { // Erase all dead defs. @@ -263,7 +247,7 @@ void LiveRangeEdit::eliminateDeadDefs(SmallVectorImpl<MachineInstr*> &Dead, LI.removeValNo(VNI); if (LI.empty()) { ToShrink.remove(&LI); - eraseVirtReg(Reg, LIS); + eraseVirtReg(Reg); } } } @@ -282,7 +266,7 @@ void LiveRangeEdit::eliminateDeadDefs(SmallVectorImpl<MachineInstr*> &Dead, // Shrink just one live interval. Then delete new dead defs. LiveInterval *LI = ToShrink.back(); ToShrink.pop_back(); - if (foldAsLoad(LI, Dead, MRI, LIS, TII)) + if (foldAsLoad(LI, Dead)) continue; if (delegate_) delegate_->LRE_WillShrinkVirtReg(LI->reg); @@ -302,7 +286,6 @@ void LiveRangeEdit::eliminateDeadDefs(SmallVectorImpl<MachineInstr*> &Dead, } if (BeingSpilled) continue; - // LI may have been separated, create new intervals. LI->RenumberValues(LIS); @@ -311,16 +294,16 @@ void LiveRangeEdit::eliminateDeadDefs(SmallVectorImpl<MachineInstr*> &Dead, if (NumComp <= 1) continue; ++NumFracRanges; - bool IsOriginal = VRM.getOriginal(LI->reg) == LI->reg; + bool IsOriginal = VRM && VRM->getOriginal(LI->reg) == LI->reg; DEBUG(dbgs() << NumComp << " components: " << *LI << '\n'); SmallVector<LiveInterval*, 8> Dups(1, LI); for (unsigned i = 1; i != NumComp; ++i) { - Dups.push_back(&createFrom(LI->reg, LIS, VRM)); + Dups.push_back(&createFrom(LI->reg)); // If LI is an original interval that hasn't been split yet, make the new // intervals their own originals instead of referring to LI. The original // interval must contain all the split products, and LI doesn't. if (IsOriginal) - VRM.setIsSplitFromReg(Dups.back()->reg, 0); + VRM->setIsSplitFromReg(Dups.back()->reg, 0); if (delegate_) delegate_->LRE_DidCloneVirtReg(Dups.back()->reg, LI->reg); } @@ -329,10 +312,8 @@ void LiveRangeEdit::eliminateDeadDefs(SmallVectorImpl<MachineInstr*> &Dead, } void LiveRangeEdit::calculateRegClassAndHint(MachineFunction &MF, - LiveIntervals &LIS, const MachineLoopInfo &Loops) { VirtRegAuxInfo VRAI(MF, LIS, Loops); - MachineRegisterInfo &MRI = MF.getRegInfo(); for (iterator I = begin(), E = end(); I != E; ++I) { LiveInterval &LI = **I; if (MRI.recomputeRegClass(LI.reg, MF.getTarget())) diff --git a/lib/CodeGen/LiveRangeEdit.h b/lib/CodeGen/LiveRangeEdit.h deleted file mode 100644 index 1148025..0000000 --- a/lib/CodeGen/LiveRangeEdit.h +++ /dev/null @@ -1,201 +0,0 @@ -//===---- LiveRangeEdit.h - Basic tools for split and spill -----*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// The LiveRangeEdit class represents changes done to a virtual register when it -// is spilled or split. -// -// The parent register is never changed. Instead, a number of new virtual -// registers are created and added to the newRegs vector. -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_CODEGEN_LIVERANGEEDIT_H -#define LLVM_CODEGEN_LIVERANGEEDIT_H - -#include "llvm/ADT/ArrayRef.h" -#include "llvm/ADT/SmallPtrSet.h" -#include "llvm/CodeGen/LiveInterval.h" - -namespace llvm { - -class AliasAnalysis; -class LiveIntervals; -class MachineLoopInfo; -class MachineRegisterInfo; -class VirtRegMap; - -class LiveRangeEdit { -public: - /// Callback methods for LiveRangeEdit owners. - class Delegate { - virtual void anchor(); - public: - /// Called immediately before erasing a dead machine instruction. - virtual void LRE_WillEraseInstruction(MachineInstr *MI) {} - - /// Called when a virtual register is no longer used. Return false to defer - /// its deletion from LiveIntervals. - virtual bool LRE_CanEraseVirtReg(unsigned) { return true; } - - /// Called before shrinking the live range of a virtual register. - virtual void LRE_WillShrinkVirtReg(unsigned) {} - - /// Called after cloning a virtual register. - /// This is used for new registers representing connected components of Old. - virtual void LRE_DidCloneVirtReg(unsigned New, unsigned Old) {} - - virtual ~Delegate() {} - }; - -private: - LiveInterval &parent_; - SmallVectorImpl<LiveInterval*> &newRegs_; - Delegate *const delegate_; - - /// firstNew_ - Index of the first register added to newRegs_. - const unsigned firstNew_; - - /// scannedRemattable_ - true when remattable values have been identified. - bool scannedRemattable_; - - /// remattable_ - Values defined by remattable instructions as identified by - /// tii.isTriviallyReMaterializable(). - SmallPtrSet<const VNInfo*,4> remattable_; - - /// rematted_ - Values that were actually rematted, and so need to have their - /// live range trimmed or entirely removed. - SmallPtrSet<const VNInfo*,4> rematted_; - - /// scanRemattable - Identify the parent_ values that may rematerialize. - void scanRemattable(LiveIntervals &lis, - const TargetInstrInfo &tii, - AliasAnalysis *aa); - - /// allUsesAvailableAt - Return true if all registers used by OrigMI at - /// OrigIdx are also available with the same value at UseIdx. - bool allUsesAvailableAt(const MachineInstr *OrigMI, SlotIndex OrigIdx, - SlotIndex UseIdx, LiveIntervals &lis); - - /// foldAsLoad - If LI has a single use and a single def that can be folded as - /// a load, eliminate the register by folding the def into the use. - bool foldAsLoad(LiveInterval *LI, SmallVectorImpl<MachineInstr*> &Dead, - MachineRegisterInfo&, LiveIntervals&, const TargetInstrInfo&); - -public: - /// Create a LiveRangeEdit for breaking down parent into smaller pieces. - /// @param parent The register being spilled or split. - /// @param newRegs List to receive any new registers created. This needn't be - /// empty initially, any existing registers are ignored. - LiveRangeEdit(LiveInterval &parent, - SmallVectorImpl<LiveInterval*> &newRegs, - Delegate *delegate = 0) - : parent_(parent), newRegs_(newRegs), - delegate_(delegate), - firstNew_(newRegs.size()), - scannedRemattable_(false) {} - - LiveInterval &getParent() const { return parent_; } - unsigned getReg() const { return parent_.reg; } - - /// Iterator for accessing the new registers added by this edit. - typedef SmallVectorImpl<LiveInterval*>::const_iterator iterator; - iterator begin() const { return newRegs_.begin()+firstNew_; } - iterator end() const { return newRegs_.end(); } - unsigned size() const { return newRegs_.size()-firstNew_; } - bool empty() const { return size() == 0; } - LiveInterval *get(unsigned idx) const { return newRegs_[idx+firstNew_]; } - - ArrayRef<LiveInterval*> regs() const { - return makeArrayRef(newRegs_).slice(firstNew_); - } - - /// createFrom - Create a new virtual register based on OldReg. - LiveInterval &createFrom(unsigned OldReg, LiveIntervals&, VirtRegMap&); - - /// create - Create a new register with the same class and original slot as - /// parent. - LiveInterval &create(LiveIntervals &LIS, VirtRegMap &VRM) { - return createFrom(getReg(), LIS, VRM); - } - - /// anyRematerializable - Return true if any parent values may be - /// rematerializable. - /// This function must be called before any rematerialization is attempted. - bool anyRematerializable(LiveIntervals&, const TargetInstrInfo&, - AliasAnalysis*); - - /// checkRematerializable - Manually add VNI to the list of rematerializable - /// values if DefMI may be rematerializable. - bool checkRematerializable(VNInfo *VNI, const MachineInstr *DefMI, - const TargetInstrInfo&, AliasAnalysis*); - - /// Remat - Information needed to rematerialize at a specific location. - struct Remat { - VNInfo *ParentVNI; // parent_'s value at the remat location. - MachineInstr *OrigMI; // Instruction defining ParentVNI. - explicit Remat(VNInfo *ParentVNI) : ParentVNI(ParentVNI), OrigMI(0) {} - }; - - /// canRematerializeAt - Determine if ParentVNI can be rematerialized at - /// UseIdx. It is assumed that parent_.getVNINfoAt(UseIdx) == ParentVNI. - /// When cheapAsAMove is set, only cheap remats are allowed. - bool canRematerializeAt(Remat &RM, - SlotIndex UseIdx, - bool cheapAsAMove, - LiveIntervals &lis); - - /// rematerializeAt - Rematerialize RM.ParentVNI into DestReg by inserting an - /// instruction into MBB before MI. The new instruction is mapped, but - /// liveness is not updated. - /// Return the SlotIndex of the new instruction. - SlotIndex rematerializeAt(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MI, - unsigned DestReg, - const Remat &RM, - LiveIntervals&, - const TargetInstrInfo&, - const TargetRegisterInfo&, - bool Late = false); - - /// markRematerialized - explicitly mark a value as rematerialized after doing - /// it manually. - void markRematerialized(const VNInfo *ParentVNI) { - rematted_.insert(ParentVNI); - } - - /// didRematerialize - Return true if ParentVNI was rematerialized anywhere. - bool didRematerialize(const VNInfo *ParentVNI) const { - return rematted_.count(ParentVNI); - } - - /// eraseVirtReg - Notify the delegate that Reg is no longer in use, and try - /// to erase it from LIS. - void eraseVirtReg(unsigned Reg, LiveIntervals &LIS); - - /// eliminateDeadDefs - Try to delete machine instructions that are now dead - /// (allDefsAreDead returns true). This may cause live intervals to be trimmed - /// and further dead efs to be eliminated. - /// RegsBeingSpilled lists registers currently being spilled by the register - /// allocator. These registers should not be split into new intervals - /// as currently those new intervals are not guaranteed to spill. - void eliminateDeadDefs(SmallVectorImpl<MachineInstr*> &Dead, - LiveIntervals&, VirtRegMap&, - const TargetInstrInfo&, - ArrayRef<unsigned> RegsBeingSpilled - = ArrayRef<unsigned>()); - - /// calculateRegClassAndHint - Recompute register class and hint for each new - /// register. - void calculateRegClassAndHint(MachineFunction&, LiveIntervals&, - const MachineLoopInfo&); -}; - -} - -#endif diff --git a/lib/CodeGen/LiveVariables.cpp b/lib/CodeGen/LiveVariables.cpp index 48e1e4c..5a0d97d 100644 --- a/lib/CodeGen/LiveVariables.cpp +++ b/lib/CodeGen/LiveVariables.cpp @@ -14,7 +14,7 @@ // the instruction, but are never used after the instruction (i.e., they are // killed). // -// This class computes live variables using are sparse implementation based on +// This class computes live variables using a sparse implementation based on // the machine code SSA form. This class computes live variable information for // each virtual and _register allocatable_ physical register in a function. It // uses the dominance properties of SSA form to efficiently compute live diff --git a/lib/CodeGen/MachineBasicBlock.cpp b/lib/CodeGen/MachineBasicBlock.cpp index ca8a8e8..1abb8f2 100644 --- a/lib/CodeGen/MachineBasicBlock.cpp +++ b/lib/CodeGen/MachineBasicBlock.cpp @@ -321,8 +321,8 @@ void MachineBasicBlock::print(raw_ostream &OS, SlotIndexes *Indexes) const { void MachineBasicBlock::removeLiveIn(unsigned Reg) { std::vector<unsigned>::iterator I = std::find(LiveIns.begin(), LiveIns.end(), Reg); - assert(I != LiveIns.end() && "Not a live in!"); - LiveIns.erase(I); + if (I != LiveIns.end()) + LiveIns.erase(I); } bool MachineBasicBlock::isLiveIn(unsigned Reg) const { @@ -392,22 +392,44 @@ void MachineBasicBlock::updateTerminator() { TII->InsertBranch(*this, TBB, 0, Cond, dl); } } else { + // Walk through the successors and find the successor which is not + // a landing pad and is not the conditional branch destination (in TBB) + // as the fallthrough successor. + MachineBasicBlock *FallthroughBB = 0; + for (succ_iterator SI = succ_begin(), SE = succ_end(); SI != SE; ++SI) { + if ((*SI)->isLandingPad() || *SI == TBB) + continue; + assert(!FallthroughBB && "Found more than one fallthrough successor."); + FallthroughBB = *SI; + } + if (!FallthroughBB && canFallThrough()) { + // We fallthrough to the same basic block as the conditional jump + // targets. Remove the conditional jump, leaving unconditional + // fallthrough. + // FIXME: This does not seem like a reasonable pattern to support, but it + // has been seen in the wild coming out of degenerate ARM test cases. + TII->RemoveBranch(*this); + + // Finally update the unconditional successor to be reached via a branch + // if it would not be reached by fallthrough. + if (!isLayoutSuccessor(TBB)) + TII->InsertBranch(*this, TBB, 0, Cond, dl); + return; + } + // The block has a fallthrough conditional branch. - MachineBasicBlock *MBBA = *succ_begin(); - MachineBasicBlock *MBBB = *llvm::next(succ_begin()); - if (MBBA == TBB) std::swap(MBBB, MBBA); if (isLayoutSuccessor(TBB)) { if (TII->ReverseBranchCondition(Cond)) { // We can't reverse the condition, add an unconditional branch. Cond.clear(); - TII->InsertBranch(*this, MBBA, 0, Cond, dl); + TII->InsertBranch(*this, FallthroughBB, 0, Cond, dl); return; } TII->RemoveBranch(*this); - TII->InsertBranch(*this, MBBA, 0, Cond, dl); - } else if (!isLayoutSuccessor(MBBA)) { + TII->InsertBranch(*this, FallthroughBB, 0, Cond, dl); + } else if (!isLayoutSuccessor(FallthroughBB)) { TII->RemoveBranch(*this); - TII->InsertBranch(*this, TBB, MBBA, Cond, dl); + TII->InsertBranch(*this, TBB, FallthroughBB, Cond, dl); } } } diff --git a/lib/CodeGen/MachineBlockPlacement.cpp b/lib/CodeGen/MachineBlockPlacement.cpp index 63892af..5ba6851 100644 --- a/lib/CodeGen/MachineBlockPlacement.cpp +++ b/lib/CodeGen/MachineBlockPlacement.cpp @@ -102,13 +102,13 @@ public: } /// \brief Iterator over blocks within the chain. - typedef SmallVectorImpl<MachineBasicBlock *>::const_iterator iterator; + typedef SmallVectorImpl<MachineBasicBlock *>::iterator iterator; /// \brief Beginning of blocks within the chain. - iterator begin() const { return Blocks.begin(); } + iterator begin() { return Blocks.begin(); } /// \brief End of blocks within the chain. - iterator end() const { return Blocks.end(); } + iterator end() { return Blocks.end(); } /// \brief Merge a block chain into this one. /// @@ -141,6 +141,14 @@ public: } } +#ifndef NDEBUG + /// \brief Dump the blocks in this chain. + void dump() LLVM_ATTRIBUTE_USED { + for (iterator I = begin(), E = end(); I != E; ++I) + (*I)->dump(); + } +#endif // NDEBUG + /// \brief Count of predecessors within the loop currently being processed. /// /// This count is updated at each loop we process to represent the number of @@ -203,12 +211,15 @@ class MachineBlockPlacement : public MachineFunctionPass { void buildChain(MachineBasicBlock *BB, BlockChain &Chain, SmallVectorImpl<MachineBasicBlock *> &BlockWorkList, const BlockFilterSet *BlockFilter = 0); - MachineBasicBlock *findBestLoopTop(MachineFunction &F, - MachineLoop &L, + MachineBasicBlock *findBestLoopTop(MachineLoop &L, const BlockFilterSet &LoopBlockSet); + MachineBasicBlock *findBestLoopExit(MachineFunction &F, + MachineLoop &L, + const BlockFilterSet &LoopBlockSet); void buildLoopChains(MachineFunction &F, MachineLoop &L); + void rotateLoop(BlockChain &LoopChain, MachineBasicBlock *ExitingBB, + const BlockFilterSet &LoopBlockSet); void buildCFGChains(MachineFunction &F); - void AlignLoops(MachineFunction &F); public: static char ID; // Pass identification, replacement for typeid @@ -430,7 +441,6 @@ MachineBasicBlock *MachineBlockPlacement::selectBestCandidateBlock( for (SmallVectorImpl<MachineBasicBlock *>::iterator WBI = WorkList.begin(), WBE = WorkList.end(); WBI != WBE; ++WBI) { - assert(!BlockFilter || BlockFilter->count(*WBI)); BlockChain &SuccChain = *BlockToChain[*WBI]; if (&SuccChain == &Chain) { DEBUG(dbgs() << " " << getBlockName(*WBI) @@ -533,16 +543,89 @@ void MachineBlockPlacement::buildChain( /// \brief Find the best loop top block for layout. /// +/// Look for a block which is strictly better than the loop header for laying +/// out at the top of the loop. This looks for one and only one pattern: +/// a latch block with no conditional exit. This block will cause a conditional +/// jump around it or will be the bottom of the loop if we lay it out in place, +/// but if it it doesn't end up at the bottom of the loop for any reason, +/// rotation alone won't fix it. Because such a block will always result in an +/// unconditional jump (for the backedge) rotating it in front of the loop +/// header is always profitable. +MachineBasicBlock * +MachineBlockPlacement::findBestLoopTop(MachineLoop &L, + const BlockFilterSet &LoopBlockSet) { + // Check that the header hasn't been fused with a preheader block due to + // crazy branches. If it has, we need to start with the header at the top to + // prevent pulling the preheader into the loop body. + BlockChain &HeaderChain = *BlockToChain[L.getHeader()]; + if (!LoopBlockSet.count(*HeaderChain.begin())) + return L.getHeader(); + + DEBUG(dbgs() << "Finding best loop top for: " + << getBlockName(L.getHeader()) << "\n"); + + BlockFrequency BestPredFreq; + MachineBasicBlock *BestPred = 0; + for (MachineBasicBlock::pred_iterator PI = L.getHeader()->pred_begin(), + PE = L.getHeader()->pred_end(); + PI != PE; ++PI) { + MachineBasicBlock *Pred = *PI; + if (!LoopBlockSet.count(Pred)) + continue; + DEBUG(dbgs() << " header pred: " << getBlockName(Pred) << ", " + << Pred->succ_size() << " successors, " + << MBFI->getBlockFreq(Pred) << " freq\n"); + if (Pred->succ_size() > 1) + continue; + + BlockFrequency PredFreq = MBFI->getBlockFreq(Pred); + if (!BestPred || PredFreq > BestPredFreq || + (!(PredFreq < BestPredFreq) && + Pred->isLayoutSuccessor(L.getHeader()))) { + BestPred = Pred; + BestPredFreq = PredFreq; + } + } + + // If no direct predecessor is fine, just use the loop header. + if (!BestPred) + return L.getHeader(); + + // Walk backwards through any straight line of predecessors. + while (BestPred->pred_size() == 1 && + (*BestPred->pred_begin())->succ_size() == 1 && + *BestPred->pred_begin() != L.getHeader()) + BestPred = *BestPred->pred_begin(); + + DEBUG(dbgs() << " final top: " << getBlockName(BestPred) << "\n"); + return BestPred; +} + + +/// \brief Find the best loop exiting block for layout. +/// /// This routine implements the logic to analyze the loop looking for the best /// block to layout at the top of the loop. Typically this is done to maximize /// fallthrough opportunities. MachineBasicBlock * -MachineBlockPlacement::findBestLoopTop(MachineFunction &F, - MachineLoop &L, - const BlockFilterSet &LoopBlockSet) { +MachineBlockPlacement::findBestLoopExit(MachineFunction &F, + MachineLoop &L, + const BlockFilterSet &LoopBlockSet) { + // We don't want to layout the loop linearly in all cases. If the loop header + // is just a normal basic block in the loop, we want to look for what block + // within the loop is the best one to layout at the top. However, if the loop + // header has be pre-merged into a chain due to predecessors not having + // analyzable branches, *and* the predecessor it is merged with is *not* part + // of the loop, rotating the header into the middle of the loop will create + // a non-contiguous range of blocks which is Very Bad. So start with the + // header and only rotate if safe. + BlockChain &HeaderChain = *BlockToChain[L.getHeader()]; + if (!LoopBlockSet.count(*HeaderChain.begin())) + return 0; + BlockFrequency BestExitEdgeFreq; + unsigned BestExitLoopDepth = 0; MachineBasicBlock *ExitingBB = 0; - MachineBasicBlock *LoopingBB = 0; // If there are exits to outer loops, loop rotation can severely limit // fallthrough opportunites unless it selects such an exit. Keep a set of // blocks where rotating to exit with that block will reach an outer loop. @@ -565,15 +648,10 @@ MachineBlockPlacement::findBestLoopTop(MachineFunction &F, // successor isn't found. MachineBasicBlock *OldExitingBB = ExitingBB; BlockFrequency OldBestExitEdgeFreq = BestExitEdgeFreq; - // We also compute and store the best looping successor for use in layout. - MachineBasicBlock *BestLoopSucc = 0; + bool HasLoopingSucc = false; // FIXME: Due to the performance of the probability and weight routines in - // the MBPI analysis, we use the internal weights. This is only valid - // because it is purely a ranking function, we don't care about anything - // but the relative values. - uint32_t BestLoopSuccWeight = 0; - // FIXME: We also manually compute the probabilities to avoid quadratic - // behavior. + // the MBPI analysis, we use the internal weights and manually compute the + // probabilities to avoid quadratic behavior. uint32_t WeightScale = 0; uint32_t SumWeight = MBPI->getSumForBlock(*I, WeightScale); for (MachineBasicBlock::succ_iterator SI = (*I)->succ_begin(), @@ -585,10 +663,8 @@ MachineBlockPlacement::findBestLoopTop(MachineFunction &F, continue; BlockChain &SuccChain = *BlockToChain[*SI]; // Don't split chains, either this chain or the successor's chain. - if (&Chain == &SuccChain || *SI != *SuccChain.begin()) { - DEBUG(dbgs() << " " << (LoopBlockSet.count(*SI) ? "looping: " - : "exiting: ") - << getBlockName(*I) << " -> " + if (&Chain == &SuccChain) { + DEBUG(dbgs() << " exiting: " << getBlockName(*I) << " -> " << getBlockName(*SI) << " (chain conflict)\n"); continue; } @@ -597,60 +673,103 @@ MachineBlockPlacement::findBestLoopTop(MachineFunction &F, if (LoopBlockSet.count(*SI)) { DEBUG(dbgs() << " looping: " << getBlockName(*I) << " -> " << getBlockName(*SI) << " (" << SuccWeight << ")\n"); - if (BestLoopSucc && BestLoopSuccWeight >= SuccWeight) - continue; - - BestLoopSucc = *SI; - BestLoopSuccWeight = SuccWeight; + HasLoopingSucc = true; continue; } + unsigned SuccLoopDepth = 0; + if (MachineLoop *ExitLoop = MLI->getLoopFor(*SI)) { + SuccLoopDepth = ExitLoop->getLoopDepth(); + if (ExitLoop->contains(&L)) + BlocksExitingToOuterLoop.insert(*I); + } + BranchProbability SuccProb(SuccWeight / WeightScale, SumWeight); BlockFrequency ExitEdgeFreq = MBFI->getBlockFreq(*I) * SuccProb; DEBUG(dbgs() << " exiting: " << getBlockName(*I) << " -> " - << getBlockName(*SI) << " (" << ExitEdgeFreq << ")\n"); + << getBlockName(*SI) << " [L:" << SuccLoopDepth + << "] (" << ExitEdgeFreq << ")\n"); // Note that we slightly bias this toward an existing layout successor to // retain incoming order in the absence of better information. // FIXME: Should we bias this more strongly? It's pretty weak. - if (!ExitingBB || ExitEdgeFreq > BestExitEdgeFreq || + if (!ExitingBB || BestExitLoopDepth < SuccLoopDepth || + ExitEdgeFreq > BestExitEdgeFreq || ((*I)->isLayoutSuccessor(*SI) && !(ExitEdgeFreq < BestExitEdgeFreq))) { BestExitEdgeFreq = ExitEdgeFreq; ExitingBB = *I; } - - if (MachineLoop *ExitLoop = MLI->getLoopFor(*SI)) - if (ExitLoop->contains(&L)) - BlocksExitingToOuterLoop.insert(*I); } // Restore the old exiting state, no viable looping successor was found. - if (!BestLoopSucc) { + if (!HasLoopingSucc) { ExitingBB = OldExitingBB; BestExitEdgeFreq = OldBestExitEdgeFreq; continue; } - - // If this was best exiting block thus far, also record the looping block. - if (ExitingBB == *I) - LoopingBB = BestLoopSucc; } - // Without a candidate exitting block or with only a single block in the + // Without a candidate exiting block or with only a single block in the // loop, just use the loop header to layout the loop. if (!ExitingBB || L.getNumBlocks() == 1) - return L.getHeader(); + return 0; // Also, if we have exit blocks which lead to outer loops but didn't select // one of them as the exiting block we are rotating toward, disable loop // rotation altogether. if (!BlocksExitingToOuterLoop.empty() && !BlocksExitingToOuterLoop.count(ExitingBB)) - return L.getHeader(); + return 0; - assert(LoopingBB && "All successors of a loop block are exit blocks!"); DEBUG(dbgs() << " Best exiting block: " << getBlockName(ExitingBB) << "\n"); - DEBUG(dbgs() << " Best top block: " << getBlockName(LoopingBB) << "\n"); - return LoopingBB; + return ExitingBB; +} + +/// \brief Attempt to rotate an exiting block to the bottom of the loop. +/// +/// Once we have built a chain, try to rotate it to line up the hot exit block +/// with fallthrough out of the loop if doing so doesn't introduce unnecessary +/// branches. For example, if the loop has fallthrough into its header and out +/// of its bottom already, don't rotate it. +void MachineBlockPlacement::rotateLoop(BlockChain &LoopChain, + MachineBasicBlock *ExitingBB, + const BlockFilterSet &LoopBlockSet) { + if (!ExitingBB) + return; + + MachineBasicBlock *Top = *LoopChain.begin(); + bool ViableTopFallthrough = false; + for (MachineBasicBlock::pred_iterator PI = Top->pred_begin(), + PE = Top->pred_end(); + PI != PE; ++PI) { + BlockChain *PredChain = BlockToChain[*PI]; + if (!LoopBlockSet.count(*PI) && + (!PredChain || *PI == *llvm::prior(PredChain->end()))) { + ViableTopFallthrough = true; + break; + } + } + + // If the header has viable fallthrough, check whether the current loop + // bottom is a viable exiting block. If so, bail out as rotating will + // introduce an unnecessary branch. + if (ViableTopFallthrough) { + MachineBasicBlock *Bottom = *llvm::prior(LoopChain.end()); + for (MachineBasicBlock::succ_iterator SI = Bottom->succ_begin(), + SE = Bottom->succ_end(); + SI != SE; ++SI) { + BlockChain *SuccChain = BlockToChain[*SI]; + if (!LoopBlockSet.count(*SI) && + (!SuccChain || *SI == *SuccChain->begin())) + return; + } + } + + BlockChain::iterator ExitIt = std::find(LoopChain.begin(), LoopChain.end(), + ExitingBB); + if (ExitIt == LoopChain.end()) + return; + + std::rotate(LoopChain.begin(), llvm::next(ExitIt), LoopChain.end()); } /// \brief Forms basic block chains from the natural loop structures. @@ -669,8 +788,20 @@ void MachineBlockPlacement::buildLoopChains(MachineFunction &F, SmallVector<MachineBasicBlock *, 16> BlockWorkList; BlockFilterSet LoopBlockSet(L.block_begin(), L.block_end()); - MachineBasicBlock *LayoutTop = findBestLoopTop(F, L, LoopBlockSet); - BlockChain &LoopChain = *BlockToChain[LayoutTop]; + // First check to see if there is an obviously preferable top block for the + // loop. This will default to the header, but may end up as one of the + // predecessors to the header if there is one which will result in strictly + // fewer branches in the loop body. + MachineBasicBlock *LoopTop = findBestLoopTop(L, LoopBlockSet); + + // If we selected just the header for the loop top, look for a potentially + // profitable exit block in the event that rotating the loop can eliminate + // branches by placing an exit edge at the bottom. + MachineBasicBlock *ExitingBB = 0; + if (LoopTop == L.getHeader()) + ExitingBB = findBestLoopExit(F, L, LoopBlockSet); + + BlockChain &LoopChain = *BlockToChain[LoopTop]; // FIXME: This is a really lame way of walking the chains in the loop: we // walk the blocks, and use a set to prevent visiting a particular chain @@ -702,7 +833,8 @@ void MachineBlockPlacement::buildLoopChains(MachineFunction &F, BlockWorkList.push_back(*Chain.begin()); } - buildChain(LayoutTop, LoopChain, BlockWorkList, &LoopBlockSet); + buildChain(LoopTop, LoopChain, BlockWorkList, &LoopBlockSet); + rotateLoop(LoopChain, ExitingBB, LoopBlockSet); DEBUG({ // Crash at the end so we get all of the debugging output first. @@ -714,7 +846,8 @@ void MachineBlockPlacement::buildLoopChains(MachineFunction &F, << " Chain header: " << getBlockName(*LoopChain.begin()) << "\n"; } for (BlockChain::iterator BCI = LoopChain.begin(), BCE = LoopChain.end(); - BCI != BCE; ++BCI) + BCI != BCE; ++BCI) { + dbgs() << " ... " << getBlockName(*BCI) << "\n"; if (!LoopBlockSet.erase(*BCI)) { // We don't mark the loop as bad here because there are real situations // where this can occur. For example, with an unanalyzable fallthrough @@ -724,6 +857,7 @@ void MachineBlockPlacement::buildLoopChains(MachineFunction &F, << " Chain header: " << getBlockName(*LoopChain.begin()) << "\n" << " Bad block: " << getBlockName(*BCI) << "\n"; } + } if (!LoopBlockSet.empty()) { BadLoop = true; @@ -863,28 +997,33 @@ void MachineBlockPlacement::buildCFGChains(MachineFunction &F) { MachineBasicBlock *TBB = 0, *FBB = 0; // For AnalyzeBranch. if (!TII->AnalyzeBranch(F.back(), TBB, FBB, Cond)) F.back().updateTerminator(); -} -/// \brief Recursive helper to align a loop and any nested loops. -static void AlignLoop(MachineFunction &F, MachineLoop *L, unsigned Align) { - // Recurse through nested loops. - for (MachineLoop::iterator I = L->begin(), E = L->end(); I != E; ++I) - AlignLoop(F, *I, Align); - - L->getTopBlock()->setAlignment(Align); -} - -/// \brief Align loop headers to target preferred alignments. -void MachineBlockPlacement::AlignLoops(MachineFunction &F) { + // Walk through the backedges of the function now that we have fully laid out + // the basic blocks and align the destination of each backedge. We don't rely + // on the loop info here so that we can align backedges in unnatural CFGs and + // backedges that were introduced purely because of the loop rotations done + // during this layout pass. + // FIXME: This isn't quite right, we shouldn't align backedges that result + // from blocks being sunken below the exit block for the function. if (F.getFunction()->hasFnAttr(Attribute::OptimizeForSize)) return; - unsigned Align = TLI->getPrefLoopAlignment(); if (!Align) return; // Don't care about loop alignment. - for (MachineLoopInfo::iterator I = MLI->begin(), E = MLI->end(); I != E; ++I) - AlignLoop(F, *I, Align); + SmallPtrSet<MachineBasicBlock *, 16> PreviousBlocks; + for (BlockChain::iterator BI = FunctionChain.begin(), + BE = FunctionChain.end(); + BI != BE; ++BI) { + PreviousBlocks.insert(*BI); + // Set alignment on the destination of all the back edges in the new + // ordering. + for (MachineBasicBlock::succ_iterator SI = (*BI)->succ_begin(), + SE = (*BI)->succ_end(); + SI != SE; ++SI) + if (PreviousBlocks.count(*SI)) + (*SI)->setAlignment(Align); + } } bool MachineBlockPlacement::runOnMachineFunction(MachineFunction &F) { @@ -900,7 +1039,6 @@ bool MachineBlockPlacement::runOnMachineFunction(MachineFunction &F) { assert(BlockToChain.empty()); buildCFGChains(F); - AlignLoops(F); BlockToChain.clear(); ChainAllocator.DestroyAll(); diff --git a/lib/CodeGen/MachineCopyPropagation.cpp b/lib/CodeGen/MachineCopyPropagation.cpp index 9aa74f1..9730eaa 100644 --- a/lib/CodeGen/MachineCopyPropagation.cpp +++ b/lib/CodeGen/MachineCopyPropagation.cpp @@ -43,9 +43,12 @@ namespace { virtual bool runOnMachineFunction(MachineFunction &MF); private: + typedef SmallVector<unsigned, 4> DestList; + typedef DenseMap<unsigned, DestList> SourceMap; + void SourceNoLongerAvailable(unsigned Reg, - DenseMap<unsigned, unsigned> &SrcMap, - DenseMap<unsigned, MachineInstr*> &AvailCopyMap); + SourceMap &SrcMap, + DenseMap<unsigned, MachineInstr*> &AvailCopyMap); bool CopyPropagateBlock(MachineBasicBlock &MBB); }; } @@ -57,24 +60,32 @@ INITIALIZE_PASS(MachineCopyPropagation, "machine-cp", void MachineCopyPropagation::SourceNoLongerAvailable(unsigned Reg, - DenseMap<unsigned, unsigned> &SrcMap, + SourceMap &SrcMap, DenseMap<unsigned, MachineInstr*> &AvailCopyMap) { - DenseMap<unsigned, unsigned>::iterator SI = SrcMap.find(Reg); + SourceMap::iterator SI = SrcMap.find(Reg); if (SI != SrcMap.end()) { - unsigned MappedDef = SI->second; - // Source of copy is no longer available for propagation. - if (AvailCopyMap.erase(MappedDef)) { - for (const uint16_t *SR = TRI->getSubRegisters(MappedDef); *SR; ++SR) - AvailCopyMap.erase(*SR); + const DestList& Defs = SI->second; + for (DestList::const_iterator I = Defs.begin(), E = Defs.end(); + I != E; ++I) { + unsigned MappedDef = *I; + // Source of copy is no longer available for propagation. + if (AvailCopyMap.erase(MappedDef)) { + for (const uint16_t *SR = TRI->getSubRegisters(MappedDef); *SR; ++SR) + AvailCopyMap.erase(*SR); + } } } for (const uint16_t *AS = TRI->getAliasSet(Reg); *AS; ++AS) { SI = SrcMap.find(*AS); if (SI != SrcMap.end()) { - unsigned MappedDef = SI->second; - if (AvailCopyMap.erase(MappedDef)) { - for (const uint16_t *SR = TRI->getSubRegisters(MappedDef); *SR; ++SR) - AvailCopyMap.erase(*SR); + const DestList& Defs = SI->second; + for (DestList::const_iterator I = Defs.begin(), E = Defs.end(); + I != E; ++I) { + unsigned MappedDef = *I; + if (AvailCopyMap.erase(MappedDef)) { + for (const uint16_t *SR = TRI->getSubRegisters(MappedDef); *SR; ++SR) + AvailCopyMap.erase(*SR); + } } } } @@ -125,10 +136,10 @@ static bool isNopCopy(MachineInstr *CopyMI, unsigned Def, unsigned Src, } bool MachineCopyPropagation::CopyPropagateBlock(MachineBasicBlock &MBB) { - SmallSetVector<MachineInstr*, 8> MaybeDeadCopies; // Candidates for deletion - DenseMap<unsigned, MachineInstr*> AvailCopyMap; // Def -> available copies map - DenseMap<unsigned, MachineInstr*> CopyMap; // Def -> copies map - DenseMap<unsigned, unsigned> SrcMap; // Src -> Def map + SmallSetVector<MachineInstr*, 8> MaybeDeadCopies; // Candidates for deletion + DenseMap<unsigned, MachineInstr*> AvailCopyMap; // Def -> available copies map + DenseMap<unsigned, MachineInstr*> CopyMap; // Def -> copies map + SourceMap SrcMap; // Src -> Def map bool Changed = false; for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end(); I != E; ) { @@ -213,7 +224,10 @@ bool MachineCopyPropagation::CopyPropagateBlock(MachineBasicBlock &MBB) { // Remember source that's copied to Def. Once it's clobbered, then // it's no longer available for copy propagation. - SrcMap[Src] = Def; + if (std::find(SrcMap[Src].begin(), SrcMap[Src].end(), Def) == + SrcMap[Src].end()) { + SrcMap[Src].push_back(Def); + } continue; } diff --git a/lib/CodeGen/MachineFunction.cpp b/lib/CodeGen/MachineFunction.cpp index 8ab8b18..d8c2f6a 100644 --- a/lib/CodeGen/MachineFunction.cpp +++ b/lib/CodeGen/MachineFunction.cpp @@ -195,9 +195,10 @@ MachineFunction::DeleteMachineBasicBlock(MachineBasicBlock *MBB) { MachineMemOperand * MachineFunction::getMachineMemOperand(MachinePointerInfo PtrInfo, unsigned f, uint64_t s, unsigned base_alignment, - const MDNode *TBAAInfo) { + const MDNode *TBAAInfo, + const MDNode *Ranges) { return new (Allocator) MachineMemOperand(PtrInfo, f, s, base_alignment, - TBAAInfo); + TBAAInfo, Ranges); } MachineMemOperand * @@ -284,7 +285,13 @@ void MachineFunction::dump() const { } void MachineFunction::print(raw_ostream &OS, SlotIndexes *Indexes) const { - OS << "# Machine code for function " << Fn->getName() << ":\n"; + OS << "# Machine code for function " << Fn->getName() << ": "; + if (RegInfo) { + OS << (RegInfo->isSSA() ? "SSA" : "Post SSA"); + if (!RegInfo->tracksLiveness()) + OS << ", not tracking liveness"; + } + OS << '\n'; // Print Frame Information FrameInfo->print(*this, OS); diff --git a/lib/CodeGen/MachineInstr.cpp b/lib/CodeGen/MachineInstr.cpp index 43af1ad..e553a04 100644 --- a/lib/CodeGen/MachineInstr.cpp +++ b/lib/CodeGen/MachineInstr.cpp @@ -381,10 +381,11 @@ MachinePointerInfo MachinePointerInfo::getStack(int64_t Offset) { MachineMemOperand::MachineMemOperand(MachinePointerInfo ptrinfo, unsigned f, uint64_t s, unsigned int a, - const MDNode *TBAAInfo) + const MDNode *TBAAInfo, + const MDNode *Ranges) : PtrInfo(ptrinfo), Size(s), Flags((f & ((1 << MOMaxBits) - 1)) | ((Log2_32(a) + 1) << MOMaxBits)), - TBAAInfo(TBAAInfo) { + TBAAInfo(TBAAInfo), Ranges(Ranges) { assert((PtrInfo.V == 0 || isa<PointerType>(PtrInfo.V->getType())) && "invalid pointer value"); assert(getBaseAlignment() == a && "Alignment is not a power of 2!"); diff --git a/lib/CodeGen/MachineLICM.cpp b/lib/CodeGen/MachineLICM.cpp index 428a9d9..8c562cc 100644 --- a/lib/CodeGen/MachineLICM.cpp +++ b/lib/CodeGen/MachineLICM.cpp @@ -80,6 +80,14 @@ namespace { MachineLoop *CurLoop; // The current loop we are working on. MachineBasicBlock *CurPreheader; // The preheader for CurLoop. + // Exit blocks for CurLoop. + SmallVector<MachineBasicBlock*, 8> ExitBlocks; + + bool isExitBlock(const MachineBasicBlock *MBB) const { + return std::find(ExitBlocks.begin(), ExitBlocks.end(), MBB) != + ExitBlocks.end(); + } + // Track 'estimated' register pressure. SmallSet<unsigned, 32> RegSeen; SmallVector<unsigned, 8> RegPressure; @@ -182,9 +190,9 @@ namespace { /// bool IsLoopInvariantInst(MachineInstr &I); - /// HasAnyPHIUse - Return true if the specified register is used by any - /// phi node. - bool HasAnyPHIUse(unsigned Reg) const; + /// HasLoopPHIUse - Return true if the specified instruction is used by any + /// phi node in the current loop. + bool HasLoopPHIUse(const MachineInstr *MI) const; /// HasHighOperandLatency - Compute operand latency between a def of 'Reg' /// and an use in the current loop, return true if the target considered @@ -197,7 +205,7 @@ namespace { /// CanCauseHighRegPressure - Visit BBs from header to current BB, /// check if hoisting an instruction of the given cost matrix can cause high /// register pressure. - bool CanCauseHighRegPressure(DenseMap<unsigned, int> &Cost); + bool CanCauseHighRegPressure(DenseMap<unsigned, int> &Cost, bool Cheap); /// UpdateBackTraceRegPressure - Traverse the back trace from header to /// the current block and update their register pressures to reflect the @@ -348,6 +356,7 @@ bool MachineLICM::runOnMachineFunction(MachineFunction &MF) { while (!Worklist.empty()) { CurLoop = Worklist.pop_back_val(); CurPreheader = 0; + ExitBlocks.clear(); // If this is done before regalloc, only visit outer-most preheader-sporting // loops. @@ -356,6 +365,8 @@ bool MachineLICM::runOnMachineFunction(MachineFunction &MF) { continue; } + CurLoop->getExitBlocks(ExitBlocks); + if (!PreRegAlloc) HoistRegionPostRA(); else { @@ -478,6 +489,10 @@ void MachineLICM::ProcessMI(MachineInstr *MI, /// HoistRegionPostRA - Walk the specified region of the CFG and hoist loop /// invariants out to the preheader. void MachineLICM::HoistRegionPostRA() { + MachineBasicBlock *Preheader = getCurPreheader(); + if (!Preheader) + return; + unsigned NumRegs = TRI->getNumRegs(); BitVector PhysRegDefs(NumRegs); // Regs defined once in the loop. BitVector PhysRegClobbers(NumRegs); // Regs defined more than once. @@ -514,25 +529,46 @@ void MachineLICM::HoistRegionPostRA() { } } + // Gather the registers read / clobbered by the terminator. + BitVector TermRegs(NumRegs); + MachineBasicBlock::iterator TI = Preheader->getFirstTerminator(); + if (TI != Preheader->end()) { + for (unsigned i = 0, e = TI->getNumOperands(); i != e; ++i) { + const MachineOperand &MO = TI->getOperand(i); + if (!MO.isReg()) + continue; + unsigned Reg = MO.getReg(); + if (!Reg) + continue; + for (const uint16_t *AS = TRI->getOverlaps(Reg); *AS; ++AS) + TermRegs.set(*AS); + } + } + // Now evaluate whether the potential candidates qualify. // 1. Check if the candidate defined register is defined by another // instruction in the loop. // 2. If the candidate is a load from stack slot (always true for now), // check if the slot is stored anywhere in the loop. + // 3. Make sure candidate def should not clobber + // registers read by the terminator. Similarly its def should not be + // clobbered by the terminator. for (unsigned i = 0, e = Candidates.size(); i != e; ++i) { if (Candidates[i].FI != INT_MIN && StoredFIs.count(Candidates[i].FI)) continue; - if (!PhysRegClobbers.test(Candidates[i].Def)) { + unsigned Def = Candidates[i].Def; + if (!PhysRegClobbers.test(Def) && !TermRegs.test(Def)) { bool Safe = true; MachineInstr *MI = Candidates[i].MI; for (unsigned j = 0, ee = MI->getNumOperands(); j != ee; ++j) { const MachineOperand &MO = MI->getOperand(j); if (!MO.isReg() || MO.isDef() || !MO.getReg()) continue; - if (PhysRegDefs.test(MO.getReg()) || - PhysRegClobbers.test(MO.getReg())) { + unsigned Reg = MO.getReg(); + if (PhysRegDefs.test(Reg) || + PhysRegClobbers.test(Reg)) { // If it's using a non-loop-invariant register, then it's obviously // not safe to hoist. Safe = false; @@ -571,7 +607,6 @@ void MachineLICM::AddToLiveIns(unsigned Reg) { /// dirty work. void MachineLICM::HoistPostRA(MachineInstr *MI, unsigned Def) { MachineBasicBlock *Preheader = getCurPreheader(); - if (!Preheader) return; // Now move the instructions to the predecessor, inserting it before any // terminator instructions. @@ -931,22 +966,40 @@ bool MachineLICM::IsLoopInvariantInst(MachineInstr &I) { } -/// HasAnyPHIUse - Return true if the specified register is used by any -/// phi node. -bool MachineLICM::HasAnyPHIUse(unsigned Reg) const { - for (MachineRegisterInfo::use_iterator UI = MRI->use_begin(Reg), - UE = MRI->use_end(); UI != UE; ++UI) { - MachineInstr *UseMI = &*UI; - if (UseMI->isPHI()) - return true; - // Look pass copies as well. - if (UseMI->isCopy()) { - unsigned Def = UseMI->getOperand(0).getReg(); - if (TargetRegisterInfo::isVirtualRegister(Def) && - HasAnyPHIUse(Def)) - return true; +/// HasLoopPHIUse - Return true if the specified instruction is used by a +/// phi node and hoisting it could cause a copy to be inserted. +bool MachineLICM::HasLoopPHIUse(const MachineInstr *MI) const { + SmallVector<const MachineInstr*, 8> Work(1, MI); + do { + MI = Work.pop_back_val(); + for (ConstMIOperands MO(MI); MO.isValid(); ++MO) { + if (!MO->isReg() || !MO->isDef()) + continue; + unsigned Reg = MO->getReg(); + if (!TargetRegisterInfo::isVirtualRegister(Reg)) + continue; + for (MachineRegisterInfo::use_iterator UI = MRI->use_begin(Reg), + UE = MRI->use_end(); UI != UE; ++UI) { + MachineInstr *UseMI = &*UI; + // A PHI may cause a copy to be inserted. + if (UseMI->isPHI()) { + // A PHI inside the loop causes a copy because the live range of Reg is + // extended across the PHI. + if (CurLoop->contains(UseMI)) + return true; + // A PHI in an exit block can cause a copy to be inserted if the PHI + // has multiple predecessors in the loop with different values. + // For now, approximate by rejecting all exit blocks. + if (isExitBlock(UseMI->getParent())) + return true; + continue; + } + // Look past copies as well. + if (UseMI->isCopy() && CurLoop->contains(UseMI)) + Work.push_back(UseMI); + } } - } + } while (!Work.empty()); return false; } @@ -1014,7 +1067,8 @@ bool MachineLICM::IsCheapInstruction(MachineInstr &MI) const { /// CanCauseHighRegPressure - Visit BBs from header to current BB, check /// if hoisting an instruction of the given cost matrix can cause high /// register pressure. -bool MachineLICM::CanCauseHighRegPressure(DenseMap<unsigned, int> &Cost) { +bool MachineLICM::CanCauseHighRegPressure(DenseMap<unsigned, int> &Cost, + bool CheapInstr) { for (DenseMap<unsigned, int>::iterator CI = Cost.begin(), CE = Cost.end(); CI != CE; ++CI) { if (CI->second <= 0) @@ -1023,6 +1077,12 @@ bool MachineLICM::CanCauseHighRegPressure(DenseMap<unsigned, int> &Cost) { unsigned RCId = CI->first; unsigned Limit = RegLimit[RCId]; int Cost = CI->second; + + // Don't hoist cheap instructions if they would increase register pressure, + // even if we're under the limit. + if (CheapInstr) + return true; + for (unsigned i = BackTrace.size(); i != 0; --i) { SmallVector<unsigned, 8> &RP = BackTrace[i-1]; if (RP[RCId] + Cost >= Limit) @@ -1085,87 +1145,95 @@ bool MachineLICM::IsProfitableToHoist(MachineInstr &MI) { if (MI.isImplicitDef()) return true; - // If the instruction is cheap, only hoist if it is re-materilizable. LICM - // will increase register pressure. It's probably not worth it if the - // instruction is cheap. - // Also hoist loads from constant memory, e.g. load from stubs, GOT. Hoisting - // these tend to help performance in low register pressure situation. The - // trade off is it may cause spill in high pressure situation. It will end up - // adding a store in the loop preheader. But the reload is no more expensive. - // The side benefit is these loads are frequently CSE'ed. - if (IsCheapInstruction(MI)) { - if (!TII->isTriviallyReMaterializable(&MI, AA)) - return false; - } else { - // Estimate register pressure to determine whether to LICM the instruction. - // In low register pressure situation, we can be more aggressive about - // hoisting. Also, favors hoisting long latency instructions even in - // moderately high pressure situation. - // FIXME: If there are long latency loop-invariant instructions inside the - // loop at this point, why didn't the optimizer's LICM hoist them? - DenseMap<unsigned, int> Cost; - for (unsigned i = 0, e = MI.getDesc().getNumOperands(); i != e; ++i) { - const MachineOperand &MO = MI.getOperand(i); - if (!MO.isReg() || MO.isImplicit()) - continue; - unsigned Reg = MO.getReg(); - if (!TargetRegisterInfo::isVirtualRegister(Reg)) - continue; + // Besides removing computation from the loop, hoisting an instruction has + // these effects: + // + // - The value defined by the instruction becomes live across the entire + // loop. This increases register pressure in the loop. + // + // - If the value is used by a PHI in the loop, a copy will be required for + // lowering the PHI after extending the live range. + // + // - When hoisting the last use of a value in the loop, that value no longer + // needs to be live in the loop. This lowers register pressure in the loop. + + bool CheapInstr = IsCheapInstruction(MI); + bool CreatesCopy = HasLoopPHIUse(&MI); + + // Don't hoist a cheap instruction if it would create a copy in the loop. + if (CheapInstr && CreatesCopy) { + DEBUG(dbgs() << "Won't hoist cheap instr with loop PHI use: " << MI); + return false; + } - unsigned RCId, RCCost; - getRegisterClassIDAndCost(&MI, Reg, i, RCId, RCCost); - if (MO.isDef()) { - if (HasHighOperandLatency(MI, i, Reg)) { - ++NumHighLatency; - return true; - } + // Rematerializable instructions should always be hoisted since the register + // allocator can just pull them down again when needed. + if (TII->isTriviallyReMaterializable(&MI, AA)) + return true; - DenseMap<unsigned, int>::iterator CI = Cost.find(RCId); - if (CI != Cost.end()) - CI->second += RCCost; - else - Cost.insert(std::make_pair(RCId, RCCost)); - } else if (isOperandKill(MO, MRI)) { - // Is a virtual register use is a kill, hoisting it out of the loop - // may actually reduce register pressure or be register pressure - // neutral. - DenseMap<unsigned, int>::iterator CI = Cost.find(RCId); - if (CI != Cost.end()) - CI->second -= RCCost; - else - Cost.insert(std::make_pair(RCId, -RCCost)); + // Estimate register pressure to determine whether to LICM the instruction. + // In low register pressure situation, we can be more aggressive about + // hoisting. Also, favors hoisting long latency instructions even in + // moderately high pressure situation. + // Cheap instructions will only be hoisted if they don't increase register + // pressure at all. + // FIXME: If there are long latency loop-invariant instructions inside the + // loop at this point, why didn't the optimizer's LICM hoist them? + DenseMap<unsigned, int> Cost; + for (unsigned i = 0, e = MI.getDesc().getNumOperands(); i != e; ++i) { + const MachineOperand &MO = MI.getOperand(i); + if (!MO.isReg() || MO.isImplicit()) + continue; + unsigned Reg = MO.getReg(); + if (!TargetRegisterInfo::isVirtualRegister(Reg)) + continue; + + unsigned RCId, RCCost; + getRegisterClassIDAndCost(&MI, Reg, i, RCId, RCCost); + if (MO.isDef()) { + if (HasHighOperandLatency(MI, i, Reg)) { + DEBUG(dbgs() << "Hoist High Latency: " << MI); + ++NumHighLatency; + return true; } + Cost[RCId] += RCCost; + } else if (isOperandKill(MO, MRI)) { + // Is a virtual register use is a kill, hoisting it out of the loop + // may actually reduce register pressure or be register pressure + // neutral. + Cost[RCId] -= RCCost; } + } - // Visit BBs from header to current BB, if hoisting this doesn't cause - // high register pressure, then it's safe to proceed. - if (!CanCauseHighRegPressure(Cost)) { - ++NumLowRP; - return true; - } + // Visit BBs from header to current BB, if hoisting this doesn't cause + // high register pressure, then it's safe to proceed. + if (!CanCauseHighRegPressure(Cost, CheapInstr)) { + DEBUG(dbgs() << "Hoist non-reg-pressure: " << MI); + ++NumLowRP; + return true; + } - // Do not "speculate" in high register pressure situation. If an - // instruction is not guaranteed to be executed in the loop, it's best to be - // conservative. - if (AvoidSpeculation && - (!IsGuaranteedToExecute(MI.getParent()) && !MayCSE(&MI))) - return false; + // Don't risk increasing register pressure if it would create copies. + if (CreatesCopy) { + DEBUG(dbgs() << "Won't hoist instr with loop PHI use: " << MI); + return false; + } - // High register pressure situation, only hoist if the instruction is going - // to be remat'ed. - if (!TII->isTriviallyReMaterializable(&MI, AA) && - !MI.isInvariantLoad(AA)) - return false; + // Do not "speculate" in high register pressure situation. If an + // instruction is not guaranteed to be executed in the loop, it's best to be + // conservative. + if (AvoidSpeculation && + (!IsGuaranteedToExecute(MI.getParent()) && !MayCSE(&MI))) { + DEBUG(dbgs() << "Won't speculate: " << MI); + return false; } - // If result(s) of this instruction is used by PHIs outside of the loop, then - // don't hoist it if the instruction because it will introduce an extra copy. - for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) { - const MachineOperand &MO = MI.getOperand(i); - if (!MO.isReg() || !MO.isDef()) - continue; - if (HasAnyPHIUse(MO.getReg())) - return false; + // High register pressure situation, only hoist if the instruction is going + // to be remat'ed. + if (!TII->isTriviallyReMaterializable(&MI, AA) && + !MI.isInvariantLoad(AA)) { + DEBUG(dbgs() << "Can't remat / high reg-pressure: " << MI); + return false; } return true; diff --git a/lib/CodeGen/MachinePassRegistry.cpp b/lib/CodeGen/MachinePassRegistry.cpp index 58e067b..cb204fd 100644 --- a/lib/CodeGen/MachinePassRegistry.cpp +++ b/lib/CodeGen/MachinePassRegistry.cpp @@ -18,6 +18,19 @@ using namespace llvm; void MachinePassRegistryListener::anchor() { } +/// setDefault - Set the default constructor by name. +void MachinePassRegistry::setDefault(StringRef Name) { + MachinePassCtor Ctor = 0; + for(MachinePassRegistryNode *R = getList(); R; R = R->getNext()) { + if (R->getName() == Name) { + Ctor = R->getCtor(); + break; + } + } + assert(Ctor && "Unregistered pass name"); + setDefault(Ctor); +} + /// Add - Adds a function pass to the registration list. /// void MachinePassRegistry::Add(MachinePassRegistryNode *Node) { diff --git a/lib/CodeGen/MachineRegisterInfo.cpp b/lib/CodeGen/MachineRegisterInfo.cpp index f140dec..7ea1517 100644 --- a/lib/CodeGen/MachineRegisterInfo.cpp +++ b/lib/CodeGen/MachineRegisterInfo.cpp @@ -18,7 +18,7 @@ using namespace llvm; MachineRegisterInfo::MachineRegisterInfo(const TargetRegisterInfo &TRI) - : TRI(&TRI), IsSSA(true) { + : TRI(&TRI), IsSSA(true), TracksLiveness(true) { VRegInfo.reserve(256); RegAllocHints.reserve(256); UsedPhysRegs.resize(TRI.getNumRegs()); diff --git a/lib/CodeGen/MachineScheduler.cpp b/lib/CodeGen/MachineScheduler.cpp index 364a244..1d3241b 100644 --- a/lib/CodeGen/MachineScheduler.cpp +++ b/lib/CodeGen/MachineScheduler.cpp @@ -227,6 +227,7 @@ bool MachineScheduler::runOnMachineFunction(MachineFunction &mf) { assert(RemainingCount == 0 && "Instruction count mismatch!"); Scheduler->finishBlock(); } + Scheduler->finalizeSchedule(); DEBUG(LIS->print(dbgs())); return true; } diff --git a/lib/CodeGen/MachineVerifier.cpp b/lib/CodeGen/MachineVerifier.cpp index 830a876..74ba94d 100644 --- a/lib/CodeGen/MachineVerifier.cpp +++ b/lib/CodeGen/MachineVerifier.cpp @@ -202,6 +202,7 @@ namespace { void report(const char *msg, const MachineInstr *MI); void report(const char *msg, const MachineOperand *MO, unsigned MONum); + void checkLiveness(const MachineOperand *MO, unsigned MONum); void markReachable(const MachineBasicBlock *MBB); void calcRegsPassed(); void checkPHIOps(const MachineBasicBlock *MBB); @@ -608,7 +609,9 @@ void MachineVerifier::visitMachineInstrBefore(const MachineInstr *MI) { } // Ensure non-terminators don't follow terminators. - if (MI->isTerminator()) { + // Ignore predicated terminators formed by if conversion. + // FIXME: If conversion shouldn't need to violate this rule. + if (MI->isTerminator() && !TII->isPredicated(MI)) { if (!FirstTerminator) FirstTerminator = MI; } else if (FirstTerminator) { @@ -656,112 +659,9 @@ MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) { const unsigned Reg = MO->getReg(); if (!Reg) return; + if (MRI->tracksLiveness() && !MI->isDebugValue()) + checkLiveness(MO, MONum); - // Check Live Variables. - if (MI->isDebugValue()) { - // Liveness checks are not valid for debug values. - } else if (MO->isUse() && !MO->isUndef()) { - regsLiveInButUnused.erase(Reg); - - bool isKill = false; - unsigned defIdx; - if (MI->isRegTiedToDefOperand(MONum, &defIdx)) { - // A two-addr use counts as a kill if use and def are the same. - unsigned DefReg = MI->getOperand(defIdx).getReg(); - if (Reg == DefReg) - isKill = true; - else if (TargetRegisterInfo::isPhysicalRegister(Reg)) { - report("Two-address instruction operands must be identical", - MO, MONum); - } - } else - isKill = MO->isKill(); - - if (isKill) - addRegWithSubRegs(regsKilled, Reg); - - // Check that LiveVars knows this kill. - if (LiveVars && TargetRegisterInfo::isVirtualRegister(Reg) && - MO->isKill()) { - LiveVariables::VarInfo &VI = LiveVars->getVarInfo(Reg); - if (std::find(VI.Kills.begin(), - VI.Kills.end(), MI) == VI.Kills.end()) - report("Kill missing from LiveVariables", MO, MONum); - } - - // Check LiveInts liveness and kill. - if (TargetRegisterInfo::isVirtualRegister(Reg) && - LiveInts && !LiveInts->isNotInMIMap(MI)) { - SlotIndex UseIdx = LiveInts->getInstructionIndex(MI).getRegSlot(true); - if (LiveInts->hasInterval(Reg)) { - const LiveInterval &LI = LiveInts->getInterval(Reg); - if (!LI.liveAt(UseIdx)) { - report("No live range at use", MO, MONum); - *OS << UseIdx << " is not live in " << LI << '\n'; - } - // Check for extra kill flags. - // Note that we allow missing kill flags for now. - if (MO->isKill() && !LI.killedAt(UseIdx.getRegSlot())) { - report("Live range continues after kill flag", MO, MONum); - *OS << "Live range: " << LI << '\n'; - } - } else { - report("Virtual register has no Live interval", MO, MONum); - } - } - - // Use of a dead register. - if (!regsLive.count(Reg)) { - if (TargetRegisterInfo::isPhysicalRegister(Reg)) { - // Reserved registers may be used even when 'dead'. - if (!isReserved(Reg)) - report("Using an undefined physical register", MO, MONum); - } else { - BBInfo &MInfo = MBBInfoMap[MI->getParent()]; - // We don't know which virtual registers are live in, so only complain - // if vreg was killed in this MBB. Otherwise keep track of vregs that - // must be live in. PHI instructions are handled separately. - if (MInfo.regsKilled.count(Reg)) - report("Using a killed virtual register", MO, MONum); - else if (!MI->isPHI()) - MInfo.vregsLiveIn.insert(std::make_pair(Reg, MI)); - } - } - } else if (MO->isDef()) { - // Register defined. - // TODO: verify that earlyclobber ops are not used. - if (MO->isDead()) - addRegWithSubRegs(regsDead, Reg); - else - addRegWithSubRegs(regsDefined, Reg); - - // Verify SSA form. - if (MRI->isSSA() && TargetRegisterInfo::isVirtualRegister(Reg) && - llvm::next(MRI->def_begin(Reg)) != MRI->def_end()) - report("Multiple virtual register defs in SSA form", MO, MONum); - - // Check LiveInts for a live range, but only for virtual registers. - if (LiveInts && TargetRegisterInfo::isVirtualRegister(Reg) && - !LiveInts->isNotInMIMap(MI)) { - SlotIndex DefIdx = LiveInts->getInstructionIndex(MI).getRegSlot(); - if (LiveInts->hasInterval(Reg)) { - const LiveInterval &LI = LiveInts->getInterval(Reg); - if (const VNInfo *VNI = LI.getVNInfoAt(DefIdx)) { - assert(VNI && "NULL valno is not allowed"); - if (VNI->def != DefIdx && !MO->isEarlyClobber()) { - report("Inconsistent valno->def", MO, MONum); - *OS << "Valno " << VNI->id << " is not defined at " - << DefIdx << " in " << LI << '\n'; - } - } else { - report("No live range at def", MO, MONum); - *OS << DefIdx << " is not live in " << LI << '\n'; - } - } else { - report("Virtual register has no Live interval", MO, MONum); - } - } - } // Check register classes. if (MONum < MCID.getNumOperands() && !MO->isImplicit()) { @@ -853,6 +753,115 @@ MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) { } } +void MachineVerifier::checkLiveness(const MachineOperand *MO, unsigned MONum) { + const MachineInstr *MI = MO->getParent(); + const unsigned Reg = MO->getReg(); + + // Both use and def operands can read a register. + if (MO->readsReg()) { + regsLiveInButUnused.erase(Reg); + + bool isKill = false; + unsigned defIdx; + if (MI->isRegTiedToDefOperand(MONum, &defIdx)) { + // A two-addr use counts as a kill if use and def are the same. + unsigned DefReg = MI->getOperand(defIdx).getReg(); + if (Reg == DefReg) + isKill = true; + else if (TargetRegisterInfo::isPhysicalRegister(Reg)) { + report("Two-address instruction operands must be identical", MO, MONum); + } + } else + isKill = MO->isKill(); + + if (isKill) + addRegWithSubRegs(regsKilled, Reg); + + // Check that LiveVars knows this kill. + if (LiveVars && TargetRegisterInfo::isVirtualRegister(Reg) && + MO->isKill()) { + LiveVariables::VarInfo &VI = LiveVars->getVarInfo(Reg); + if (std::find(VI.Kills.begin(), VI.Kills.end(), MI) == VI.Kills.end()) + report("Kill missing from LiveVariables", MO, MONum); + } + + // Check LiveInts liveness and kill. + if (TargetRegisterInfo::isVirtualRegister(Reg) && + LiveInts && !LiveInts->isNotInMIMap(MI)) { + SlotIndex UseIdx = LiveInts->getInstructionIndex(MI).getRegSlot(true); + if (LiveInts->hasInterval(Reg)) { + const LiveInterval &LI = LiveInts->getInterval(Reg); + if (!LI.liveAt(UseIdx)) { + report("No live range at use", MO, MONum); + *OS << UseIdx << " is not live in " << LI << '\n'; + } + // Check for extra kill flags. + // Note that we allow missing kill flags for now. + if (MO->isKill() && !LI.killedAt(UseIdx.getRegSlot())) { + report("Live range continues after kill flag", MO, MONum); + *OS << "Live range: " << LI << '\n'; + } + } else { + report("Virtual register has no Live interval", MO, MONum); + } + } + + // Use of a dead register. + if (!regsLive.count(Reg)) { + if (TargetRegisterInfo::isPhysicalRegister(Reg)) { + // Reserved registers may be used even when 'dead'. + if (!isReserved(Reg)) + report("Using an undefined physical register", MO, MONum); + } else { + BBInfo &MInfo = MBBInfoMap[MI->getParent()]; + // We don't know which virtual registers are live in, so only complain + // if vreg was killed in this MBB. Otherwise keep track of vregs that + // must be live in. PHI instructions are handled separately. + if (MInfo.regsKilled.count(Reg)) + report("Using a killed virtual register", MO, MONum); + else if (!MI->isPHI()) + MInfo.vregsLiveIn.insert(std::make_pair(Reg, MI)); + } + } + } + + if (MO->isDef()) { + // Register defined. + // TODO: verify that earlyclobber ops are not used. + if (MO->isDead()) + addRegWithSubRegs(regsDead, Reg); + else + addRegWithSubRegs(regsDefined, Reg); + + // Verify SSA form. + if (MRI->isSSA() && TargetRegisterInfo::isVirtualRegister(Reg) && + llvm::next(MRI->def_begin(Reg)) != MRI->def_end()) + report("Multiple virtual register defs in SSA form", MO, MONum); + + // Check LiveInts for a live range, but only for virtual registers. + if (LiveInts && TargetRegisterInfo::isVirtualRegister(Reg) && + !LiveInts->isNotInMIMap(MI)) { + SlotIndex DefIdx = LiveInts->getInstructionIndex(MI).getRegSlot(); + if (LiveInts->hasInterval(Reg)) { + const LiveInterval &LI = LiveInts->getInterval(Reg); + if (const VNInfo *VNI = LI.getVNInfoAt(DefIdx)) { + assert(VNI && "NULL valno is not allowed"); + if (VNI->def != DefIdx && !MO->isEarlyClobber()) { + report("Inconsistent valno->def", MO, MONum); + *OS << "Valno " << VNI->id << " is not defined at " + << DefIdx << " in " << LI << '\n'; + } + } else { + report("No live range at def", MO, MONum); + *OS << DefIdx << " is not live in " << LI << '\n'; + } + } else { + report("Virtual register has no Live interval", MO, MONum); + } + } + } +} + void MachineVerifier::visitMachineInstrAfter(const MachineInstr *MI) { BBInfo &MInfo = MBBInfoMap[MI->getParent()]; set_union(MInfo.regsKilled, regsKilled); diff --git a/lib/CodeGen/Passes.cpp b/lib/CodeGen/Passes.cpp index 6246c21..13d1bbc 100644 --- a/lib/CodeGen/Passes.cpp +++ b/lib/CodeGen/Passes.cpp @@ -37,8 +37,9 @@ static cl::opt<bool> DisableTailDuplicate("disable-tail-duplicate", cl::Hidden, cl::desc("Disable tail duplication")); static cl::opt<bool> DisableEarlyTailDup("disable-early-taildup", cl::Hidden, cl::desc("Disable pre-register allocation tail duplication")); -static cl::opt<bool> EnableBlockPlacement("enable-block-placement", - cl::Hidden, cl::desc("Enable probability-driven block placement")); +static cl::opt<bool> DisableBlockPlacement("disable-block-placement", + cl::Hidden, cl::desc("Disable the probability-driven block placement, and " + "re-enable the old code placement pass")); static cl::opt<bool> EnableBlockPlacementStats("enable-block-placement-stats", cl::Hidden, cl::desc("Collect probability-driven block placement stats")); static cl::opt<bool> DisableCodePlace("disable-code-place", cl::Hidden, @@ -272,11 +273,6 @@ AnalysisID TargetPassConfig::addPass(char &ID) { return FinalID; } -void TargetPassConfig::printNoVerify(const char *Banner) const { - if (TM->shouldPrintMachineCode()) - PM.add(createMachineFunctionPrinterPass(dbgs(), Banner)); -} - void TargetPassConfig::printAndVerify(const char *Banner) const { if (TM->shouldPrintMachineCode()) PM.add(createMachineFunctionPrinterPass(dbgs(), Banner)); @@ -394,16 +390,16 @@ void TargetPassConfig::addMachinePasses() { // Expand pseudo instructions before second scheduling pass. addPass(ExpandPostRAPseudosID); - printNoVerify("After ExpandPostRAPseudos"); + printAndVerify("After ExpandPostRAPseudos"); // Run pre-sched2 passes. if (addPreSched2()) - printNoVerify("After PreSched2 passes"); + printAndVerify("After PreSched2 passes"); // Second pass scheduler. if (getOptLevel() != CodeGenOpt::None) { addPass(PostRASchedulerID); - printNoVerify("After PostRAScheduler"); + printAndVerify("After PostRAScheduler"); } // GC @@ -416,7 +412,7 @@ void TargetPassConfig::addMachinePasses() { addBlockPlacement(); if (addPreEmitPass()) - printNoVerify("After PreEmit passes"); + printAndVerify("After PreEmit passes"); } /// Add passes that optimize machine instructions in SSA form. @@ -601,24 +597,24 @@ void TargetPassConfig::addOptimizedRegAlloc(FunctionPass *RegAllocPass) { void TargetPassConfig::addMachineLateOptimization() { // Branch folding must be run after regalloc and prolog/epilog insertion. if (addPass(BranchFolderPassID) != &NoPassID) - printNoVerify("After BranchFolding"); + printAndVerify("After BranchFolding"); // Tail duplication. if (addPass(TailDuplicateID) != &NoPassID) - printNoVerify("After TailDuplicate"); + printAndVerify("After TailDuplicate"); // Copy propagation. if (addPass(MachineCopyPropagationID) != &NoPassID) - printNoVerify("After copy propagation pass"); + printAndVerify("After copy propagation pass"); } /// Add standard basic block placement passes. void TargetPassConfig::addBlockPlacement() { AnalysisID ID = &NoPassID; - if (EnableBlockPlacement) { - // MachineBlockPlacement is an experimental pass which is disabled by - // default currently. Eventually it should subsume CodePlacementOpt, so - // when enabled, the other is disabled. + if (!DisableBlockPlacement) { + // MachineBlockPlacement is a new pass which subsumes the functionality of + // CodPlacementOpt. The old code placement pass can be restored by + // disabling block placement, but eventually it will be removed. ID = addPass(MachineBlockPlacementID); } else { ID = addPass(CodePlacementOptID); @@ -628,6 +624,6 @@ void TargetPassConfig::addBlockPlacement() { if (EnableBlockPlacementStats) addPass(MachineBlockPlacementStatsID); - printNoVerify("After machine block placement."); + printAndVerify("After machine block placement."); } } diff --git a/lib/CodeGen/RegAllocBase.cpp b/lib/CodeGen/RegAllocBase.cpp index 85119c9..b00eceb 100644 --- a/lib/CodeGen/RegAllocBase.cpp +++ b/lib/CodeGen/RegAllocBase.cpp @@ -14,11 +14,11 @@ #define DEBUG_TYPE "regalloc" #include "RegAllocBase.h" -#include "LiveRangeEdit.h" #include "Spiller.h" #include "VirtRegMap.h" #include "llvm/ADT/Statistic.h" #include "llvm/CodeGen/LiveIntervalAnalysis.h" +#include "llvm/CodeGen/LiveRangeEdit.h" #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/Target/TargetMachine.h" diff --git a/lib/CodeGen/RegAllocBasic.cpp b/lib/CodeGen/RegAllocBasic.cpp index f39a21c..77ee314 100644 --- a/lib/CodeGen/RegAllocBasic.cpp +++ b/lib/CodeGen/RegAllocBasic.cpp @@ -15,7 +15,6 @@ #define DEBUG_TYPE "regalloc" #include "RegAllocBase.h" #include "LiveDebugVariables.h" -#include "LiveRangeEdit.h" #include "RenderMachineFunction.h" #include "Spiller.h" #include "VirtRegMap.h" @@ -24,6 +23,7 @@ #include "llvm/PassAnalysisSupport.h" #include "llvm/CodeGen/CalcSpillWeights.h" #include "llvm/CodeGen/LiveIntervalAnalysis.h" +#include "llvm/CodeGen/LiveRangeEdit.h" #include "llvm/CodeGen/LiveStackAnalysis.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstr.h" @@ -187,7 +187,7 @@ void RABasic::spillReg(LiveInterval& VirtReg, unsigned PhysReg, unassign(SpilledVReg, PhysReg); // Spill the extracted interval. - LiveRangeEdit LRE(SpilledVReg, SplitVRegs); + LiveRangeEdit LRE(SpilledVReg, SplitVRegs, *MF, *LIS, VRM); spiller().spill(LRE); } // After extracting segments, the query's results are invalid. But keep the @@ -287,7 +287,7 @@ unsigned RABasic::selectOrSplit(LiveInterval &VirtReg, DEBUG(dbgs() << "spilling: " << VirtReg << '\n'); if (!VirtReg.isSpillable()) return ~0u; - LiveRangeEdit LRE(VirtReg, SplitVRegs); + LiveRangeEdit LRE(VirtReg, SplitVRegs, *MF, *LIS, VRM); spiller().spill(LRE); // The live virtual register requesting allocation was spilled, so tell diff --git a/lib/CodeGen/RegAllocGreedy.cpp b/lib/CodeGen/RegAllocGreedy.cpp index feec3d4..3f2a617 100644 --- a/lib/CodeGen/RegAllocGreedy.cpp +++ b/lib/CodeGen/RegAllocGreedy.cpp @@ -16,7 +16,6 @@ #include "AllocationOrder.h" #include "InterferenceCache.h" #include "LiveDebugVariables.h" -#include "LiveRangeEdit.h" #include "RegAllocBase.h" #include "Spiller.h" #include "SpillPlacement.h" @@ -29,6 +28,7 @@ #include "llvm/CodeGen/CalcSpillWeights.h" #include "llvm/CodeGen/EdgeBundles.h" #include "llvm/CodeGen/LiveIntervalAnalysis.h" +#include "llvm/CodeGen/LiveRangeEdit.h" #include "llvm/CodeGen/LiveStackAnalysis.h" #include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/MachineFunctionPass.h" @@ -428,13 +428,13 @@ void RAGreedy::enqueue(LiveInterval *LI) { Prio |= (1u << 30); } - Queue.push(std::make_pair(Prio, Reg)); + Queue.push(std::make_pair(Prio, ~Reg)); } LiveInterval *RAGreedy::dequeue() { if (Queue.empty()) return 0; - LiveInterval *LI = &LIS->getInterval(Queue.top().second); + LiveInterval *LI = &LIS->getInterval(~Queue.top().second); Queue.pop(); return LI; } @@ -1183,7 +1183,7 @@ unsigned RAGreedy::tryRegionSplit(LiveInterval &VirtReg, AllocationOrder &Order, return 0; // Prepare split editor. - LiveRangeEdit LREdit(VirtReg, NewVRegs, this); + LiveRangeEdit LREdit(VirtReg, NewVRegs, *MF, *LIS, VRM, this); SE->reset(LREdit, SplitSpillMode); // Assign all edge bundles to the preferred candidate, or NoCand. @@ -1231,7 +1231,7 @@ unsigned RAGreedy::tryBlockSplit(LiveInterval &VirtReg, AllocationOrder &Order, assert(&SA->getParent() == &VirtReg && "Live range wasn't analyzed"); unsigned Reg = VirtReg.reg; bool SingleInstrs = RegClassInfo.isProperSubClass(MRI->getRegClass(Reg)); - LiveRangeEdit LREdit(VirtReg, NewVRegs, this); + LiveRangeEdit LREdit(VirtReg, NewVRegs, *MF, *LIS, VRM, this); SE->reset(LREdit, SplitSpillMode); ArrayRef<SplitAnalysis::BlockInfo> UseBlocks = SA->getUseBlocks(); for (unsigned i = 0; i != UseBlocks.size(); ++i) { @@ -1512,7 +1512,7 @@ unsigned RAGreedy::tryLocalSplit(LiveInterval &VirtReg, AllocationOrder &Order, << '-' << Uses[BestAfter] << ", " << BestDiff << ", " << (BestAfter - BestBefore + 1) << " instrs\n"); - LiveRangeEdit LREdit(VirtReg, NewVRegs, this); + LiveRangeEdit LREdit(VirtReg, NewVRegs, *MF, *LIS, VRM, this); SE->reset(LREdit); SE->openIntv(); @@ -1644,7 +1644,7 @@ unsigned RAGreedy::selectOrSplit(LiveInterval &VirtReg, // Finally spill VirtReg itself. NamedRegionTimer T("Spiller", TimerGroupName, TimePassesIsEnabled); - LiveRangeEdit LRE(VirtReg, NewVRegs, this); + LiveRangeEdit LRE(VirtReg, NewVRegs, *MF, *LIS, VRM, this); spiller().spill(LRE); setStage(NewVRegs.begin(), NewVRegs.end(), RS_Done); diff --git a/lib/CodeGen/RegAllocPBQP.cpp b/lib/CodeGen/RegAllocPBQP.cpp index 9fcf886..a284614 100644 --- a/lib/CodeGen/RegAllocPBQP.cpp +++ b/lib/CodeGen/RegAllocPBQP.cpp @@ -31,14 +31,15 @@ #define DEBUG_TYPE "regalloc" -#include "LiveRangeEdit.h" #include "RenderMachineFunction.h" #include "Spiller.h" #include "VirtRegMap.h" #include "RegisterCoalescer.h" +#include "llvm/Module.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/CodeGen/CalcSpillWeights.h" #include "llvm/CodeGen/LiveIntervalAnalysis.h" +#include "llvm/CodeGen/LiveRangeEdit.h" #include "llvm/CodeGen/LiveStackAnalysis.h" #include "llvm/CodeGen/RegAllocPBQP.h" #include "llvm/CodeGen/MachineDominators.h" @@ -56,6 +57,7 @@ #include <limits> #include <memory> #include <set> +#include <sstream> #include <vector> using namespace llvm; @@ -69,6 +71,13 @@ pbqpCoalescing("pbqp-coalescing", cl::desc("Attempt coalescing during PBQP register allocation."), cl::init(false), cl::Hidden); +#ifndef NDEBUG +static cl::opt<bool> +pbqpDumpGraphs("pbqp-dump-graphs", + cl::desc("Dump graphs for each function/round in the compilation unit."), + cl::init(false), cl::Hidden); +#endif + namespace { /// @@ -187,7 +196,7 @@ std::auto_ptr<PBQPRAProblem> PBQPBuilder::build(MachineFunction *mf, const RegSet &vregs) { typedef std::vector<const LiveInterval*> LIVector; - + ArrayRef<SlotIndex> regMaskSlots = lis->getRegMaskSlots(); MachineRegisterInfo *mri = &mf->getRegInfo(); const TargetRegisterInfo *tri = mf->getTarget().getRegisterInfo(); @@ -224,7 +233,9 @@ std::auto_ptr<PBQPRAProblem> PBQPBuilder::build(MachineFunction *mf, } } - // Remove any physical registers which overlap. + RegSet overlappingPRegs; + + // Record physical registers whose ranges overlap. for (RegSet::const_iterator pregItr = pregs.begin(), pregEnd = pregs.end(); pregItr != pregEnd; ++pregItr) { @@ -235,9 +246,41 @@ std::auto_ptr<PBQPRAProblem> PBQPBuilder::build(MachineFunction *mf, continue; } - if (!vregLI->overlaps(*pregLI)) { - continue; + if (vregLI->overlaps(*pregLI)) + overlappingPRegs.insert(preg); + } + + // Record any overlaps with regmask operands. + BitVector regMaskOverlaps(tri->getNumRegs()); + for (ArrayRef<SlotIndex>::iterator rmItr = regMaskSlots.begin(), + rmEnd = regMaskSlots.end(); + rmItr != rmEnd; ++rmItr) { + SlotIndex rmIdx = *rmItr; + if (vregLI->liveAt(rmIdx)) { + MachineInstr *rmMI = lis->getInstructionFromIndex(rmIdx); + const uint32_t* regMask = 0; + for (MachineInstr::mop_iterator mopItr = rmMI->operands_begin(), + mopEnd = rmMI->operands_end(); + mopItr != mopEnd; ++mopItr) { + if (mopItr->isRegMask()) { + regMask = mopItr->getRegMask(); + break; + } + } + assert(regMask != 0 && "Couldn't find register mask."); + regMaskOverlaps.setBitsNotInMask(regMask); } + } + + for (unsigned preg = 0; preg < tri->getNumRegs(); ++preg) { + if (regMaskOverlaps.test(preg)) + overlappingPRegs.insert(preg); + } + + for (RegSet::const_iterator pregItr = overlappingPRegs.begin(), + pregEnd = overlappingPRegs.end(); + pregItr != pregEnd; ++pregItr) { + unsigned preg = *pregItr; // Remove the register from the allowed set. VRAllowed::iterator eraseItr = @@ -507,7 +550,7 @@ bool RegAllocPBQP::mapPBQPToRegAlloc(const PBQPRAProblem &problem, } else if (problem.isSpillOption(vreg, alloc)) { vregsToAlloc.erase(vreg); SmallVector<LiveInterval*, 8> newSpills; - LiveRangeEdit LRE(lis->getInterval(vreg), newSpills); + LiveRangeEdit LRE(lis->getInterval(vreg), newSpills, *mf, *lis, vrm); spiller->spill(LRE); DEBUG(dbgs() << "VREG " << vreg << " -> SPILLED (Cost: " @@ -633,6 +676,12 @@ bool RegAllocPBQP::runOnMachineFunction(MachineFunction &MF) { // Find the vreg intervals in need of allocation. findVRegIntervalsToAlloc(); + const Function* func = mf->getFunction(); + std::string fqn = + func->getParent()->getModuleIdentifier() + "." + + func->getName().str(); + (void)fqn; + // If there are non-empty intervals allocate them using pbqp. if (!vregsToAlloc.empty()) { @@ -644,6 +693,20 @@ bool RegAllocPBQP::runOnMachineFunction(MachineFunction &MF) { std::auto_ptr<PBQPRAProblem> problem = builder->build(mf, lis, loopInfo, vregsToAlloc); + +#ifndef NDEBUG + if (pbqpDumpGraphs) { + std::ostringstream rs; + rs << round; + std::string graphFileName(fqn + "." + rs.str() + ".pbqpgraph"); + std::string tmp; + raw_fd_ostream os(graphFileName.c_str(), tmp); + DEBUG(dbgs() << "Dumping graph for round " << round << " to \"" + << graphFileName << "\"\n"); + problem->getGraph().dump(os); + } +#endif + PBQP::Solution solution = PBQP::HeuristicSolver<PBQP::Heuristics::Briggs>::solve( problem->getGraph()); diff --git a/lib/CodeGen/RegisterScavenging.cpp b/lib/CodeGen/RegisterScavenging.cpp index 2818f49..03bd82e 100644 --- a/lib/CodeGen/RegisterScavenging.cpp +++ b/lib/CodeGen/RegisterScavenging.cpp @@ -83,6 +83,11 @@ void RegScavenger::enterBasicBlock(MachineBasicBlock *mbb) { assert((NumPhysRegs == 0 || NumPhysRegs == TRI->getNumRegs()) && "Target changed?"); + // It is not possible to use the register scavenger after late optimization + // passes that don't preserve accurate liveness information. + assert(MRI->tracksLiveness() && + "Cannot use register scavenger with inaccurate liveness"); + // Self-initialize. if (!MBB) { NumPhysRegs = TRI->getNumRegs(); diff --git a/lib/CodeGen/ScheduleDAGInstrs.cpp b/lib/CodeGen/ScheduleDAGInstrs.cpp index 6be1ab7..d46eb89 100644 --- a/lib/CodeGen/ScheduleDAGInstrs.cpp +++ b/lib/CodeGen/ScheduleDAGInstrs.cpp @@ -39,8 +39,8 @@ ScheduleDAGInstrs::ScheduleDAGInstrs(MachineFunction &mf, LiveIntervals *lis) : ScheduleDAG(mf), MLI(mli), MDT(mdt), MFI(mf.getFrameInfo()), InstrItins(mf.getTarget().getInstrItineraryData()), LIS(lis), - IsPostRA(IsPostRAFlag), UnitLatencies(false), LoopRegs(MLI, MDT), - FirstDbgValue(0) { + IsPostRA(IsPostRAFlag), UnitLatencies(false), CanHandleTerminators(false), + LoopRegs(MLI, MDT), FirstDbgValue(0) { assert((IsPostRA || LIS) && "PreRA scheduling requires LiveIntervals"); DbgValues.clear(); assert(!(IsPostRA && MRI.getNumVirtRegs()) && @@ -554,7 +554,7 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA) { continue; } - assert(!MI->isTerminator() && !MI->isLabel() && + assert((!MI->isTerminator() || CanHandleTerminators) && !MI->isLabel() && "Cannot schedule terminators or labels!"); SUnit *SU = MISUnitMap[MI]; diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 7c4db97..0914c66 100644 --- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -1080,6 +1080,7 @@ void DAGCombiner::Run(CombineLevel AtLevel) { // If the root changed (e.g. it was a dead load, update the root). DAG.setRoot(Dummy.getValue()); + DAG.RemoveDeadNodes(); } SDValue DAGCombiner::visit(SDNode *N) { @@ -1452,16 +1453,14 @@ SDValue DAGCombiner::visitADD(SDNode *N) { if (VT.isInteger() && !VT.isVector()) { APInt LHSZero, LHSOne; APInt RHSZero, RHSOne; - APInt Mask = APInt::getAllOnesValue(VT.getScalarType().getSizeInBits()); - DAG.ComputeMaskedBits(N0, Mask, LHSZero, LHSOne); + DAG.ComputeMaskedBits(N0, LHSZero, LHSOne); if (LHSZero.getBoolValue()) { - DAG.ComputeMaskedBits(N1, Mask, RHSZero, RHSOne); + DAG.ComputeMaskedBits(N1, RHSZero, RHSOne); // If all possibly-set bits on the LHS are clear on the RHS, return an OR. // If all possibly-set bits on the RHS are clear on the LHS, return an OR. - if ((RHSZero & (~LHSZero & Mask)) == (~LHSZero & Mask) || - (LHSZero & (~RHSZero & Mask)) == (~RHSZero & Mask)) + if ((RHSZero & ~LHSZero) == ~LHSZero || (LHSZero & ~RHSZero) == ~RHSZero) return DAG.getNode(ISD::OR, N->getDebugLoc(), VT, N0, N1); } } @@ -1547,16 +1546,14 @@ SDValue DAGCombiner::visitADDC(SDNode *N) { // fold (addc a, b) -> (or a, b), CARRY_FALSE iff a and b share no bits. APInt LHSZero, LHSOne; APInt RHSZero, RHSOne; - APInt Mask = APInt::getAllOnesValue(VT.getScalarType().getSizeInBits()); - DAG.ComputeMaskedBits(N0, Mask, LHSZero, LHSOne); + DAG.ComputeMaskedBits(N0, LHSZero, LHSOne); if (LHSZero.getBoolValue()) { - DAG.ComputeMaskedBits(N1, Mask, RHSZero, RHSOne); + DAG.ComputeMaskedBits(N1, RHSZero, RHSOne); // If all possibly-set bits on the LHS are clear on the RHS, return an OR. // If all possibly-set bits on the RHS are clear on the LHS, return an OR. - if ((RHSZero & (~LHSZero & Mask)) == (~LHSZero & Mask) || - (LHSZero & (~RHSZero & Mask)) == (~RHSZero & Mask)) + if ((RHSZero & ~LHSZero) == ~LHSZero || (LHSZero & ~RHSZero) == ~RHSZero) return CombineTo(N, DAG.getNode(ISD::OR, N->getDebugLoc(), VT, N0, N1), DAG.getNode(ISD::CARRY_FALSE, N->getDebugLoc(), MVT::Glue)); @@ -2336,6 +2333,67 @@ SDValue DAGCombiner::SimplifyBinOpWithSameOpcodeHands(SDNode *N) { ORNode, N0.getOperand(1)); } + // Simplify xor/and/or (bitcast(A), bitcast(B)) -> bitcast(op (A,B)) + // Only perform this optimization after type legalization and before + // LegalizeVectorOprs. LegalizeVectorOprs promotes vector operations by + // adding bitcasts. For example (xor v4i32) is promoted to (v2i64), and + // we don't want to undo this promotion. + // We also handle SCALAR_TO_VECTOR because xor/or/and operations are cheaper + // on scalars. + if ((N0.getOpcode() == ISD::BITCAST || N0.getOpcode() == ISD::SCALAR_TO_VECTOR) + && Level == AfterLegalizeVectorOps) { + SDValue In0 = N0.getOperand(0); + SDValue In1 = N1.getOperand(0); + EVT In0Ty = In0.getValueType(); + EVT In1Ty = In1.getValueType(); + // If both incoming values are integers, and the original types are the same. + if (In0Ty.isInteger() && In1Ty.isInteger() && In0Ty == In1Ty) { + SDValue Op = DAG.getNode(N->getOpcode(), N->getDebugLoc(), In0Ty, In0, In1); + SDValue BC = DAG.getNode(N0.getOpcode(), N->getDebugLoc(), VT, Op); + AddToWorkList(Op.getNode()); + return BC; + } + } + + // Xor/and/or are indifferent to the swizzle operation (shuffle of one value). + // Simplify xor/and/or (shuff(A), shuff(B)) -> shuff(op (A,B)) + // If both shuffles use the same mask, and both shuffle within a single + // vector, then it is worthwhile to move the swizzle after the operation. + // The type-legalizer generates this pattern when loading illegal + // vector types from memory. In many cases this allows additional shuffle + // optimizations. + if (N0.getOpcode() == ISD::VECTOR_SHUFFLE && Level < AfterLegalizeDAG && + N0.getOperand(1).getOpcode() == ISD::UNDEF && + N1.getOperand(1).getOpcode() == ISD::UNDEF) { + ShuffleVectorSDNode *SVN0 = cast<ShuffleVectorSDNode>(N0); + ShuffleVectorSDNode *SVN1 = cast<ShuffleVectorSDNode>(N1); + + assert(N0.getOperand(0).getValueType() == N1.getOperand(1).getValueType() && + "Inputs to shuffles are not the same type"); + + unsigned NumElts = VT.getVectorNumElements(); + + // Check that both shuffles use the same mask. The masks are known to be of + // the same length because the result vector type is the same. + bool SameMask = true; + for (unsigned i = 0; i != NumElts; ++i) { + int Idx0 = SVN0->getMaskElt(i); + int Idx1 = SVN1->getMaskElt(i); + if (Idx0 != Idx1) { + SameMask = false; + break; + } + } + + if (SameMask) { + SDValue Op = DAG.getNode(N->getOpcode(), N->getDebugLoc(), VT, + N0.getOperand(0), N1.getOperand(0)); + AddToWorkList(Op.getNode()); + return DAG.getVectorShuffle(VT, N->getDebugLoc(), Op, + DAG.getUNDEF(VT), &SVN0->getMask()[0]); + } + } + return SDValue(); } @@ -3773,8 +3831,7 @@ SDValue DAGCombiner::visitSRL(SDNode *N) { if (N1C && N0.getOpcode() == ISD::CTLZ && N1C->getAPIntValue() == Log2_32(VT.getSizeInBits())) { APInt KnownZero, KnownOne; - APInt Mask = APInt::getAllOnesValue(VT.getScalarType().getSizeInBits()); - DAG.ComputeMaskedBits(N0.getOperand(0), Mask, KnownZero, KnownOne); + DAG.ComputeMaskedBits(N0.getOperand(0), KnownZero, KnownOne); // If any of the input bits are KnownOne, then the input couldn't be all // zeros, thus the result of the srl will always be zero. @@ -3782,7 +3839,7 @@ SDValue DAGCombiner::visitSRL(SDNode *N) { // If all of the bits input the to ctlz node are known to be zero, then // the result of the ctlz is "32" and the result of the shift is one. - APInt UnknownBits = ~KnownZero & Mask; + APInt UnknownBits = ~KnownZero; if (UnknownBits == 0) return DAG.getConstant(1, VT); // Otherwise, check to see if there is exactly one bit input to the ctlz. @@ -4298,12 +4355,17 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) { // Only do this before legalize for now. if (VT.isVector() && !LegalOperations) { EVT N0VT = N0.getOperand(0).getValueType(); - // We know that the # elements of the results is the same as the - // # elements of the compare (and the # elements of the compare result - // for that matter). Check to see that they are the same size. If so, - // we know that the element size of the sext'd result matches the - // element size of the compare operands. - if (VT.getSizeInBits() == N0VT.getSizeInBits()) + // On some architectures (such as SSE/NEON/etc) the SETCC result type is + // of the same size as the compared operands. Only optimize sext(setcc()) + // if this is the case. + EVT SVT = TLI.getSetCCResultType(N0VT); + + // We know that the # elements of the results is the same as the + // # elements of the compare (and the # elements of the compare result + // for that matter). Check to see that they are the same size. If so, + // we know that the element size of the sext'd result matches the + // element size of the compare operands. + if (VT.getSizeInBits() == SVT.getSizeInBits()) return DAG.getSetCC(N->getDebugLoc(), VT, N0.getOperand(0), N0.getOperand(1), cast<CondCodeSDNode>(N0.getOperand(2))->get()); @@ -4317,11 +4379,13 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) { EVT MatchingVectorType = EVT::getVectorVT(*DAG.getContext(), MatchingElementType, N0VT.getVectorNumElements()); - SDValue VsetCC = - DAG.getSetCC(N->getDebugLoc(), MatchingVectorType, N0.getOperand(0), - N0.getOperand(1), - cast<CondCodeSDNode>(N0.getOperand(2))->get()); - return DAG.getSExtOrTrunc(VsetCC, N->getDebugLoc(), VT); + + if (SVT == MatchingVectorType) { + SDValue VsetCC = DAG.getSetCC(N->getDebugLoc(), MatchingVectorType, + N0.getOperand(0), N0.getOperand(1), + cast<CondCodeSDNode>(N0.getOperand(2))->get()); + return DAG.getSExtOrTrunc(VsetCC, N->getDebugLoc(), VT); + } } } @@ -4352,6 +4416,44 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) { return SDValue(); } +// isTruncateOf - If N is a truncate of some other value, return true, record +// the value being truncated in Op and which of Op's bits are zero in KnownZero. +// This function computes KnownZero to avoid a duplicated call to +// ComputeMaskedBits in the caller. +static bool isTruncateOf(SelectionDAG &DAG, SDValue N, SDValue &Op, + APInt &KnownZero) { + APInt KnownOne; + if (N->getOpcode() == ISD::TRUNCATE) { + Op = N->getOperand(0); + DAG.ComputeMaskedBits(Op, KnownZero, KnownOne); + return true; + } + + if (N->getOpcode() != ISD::SETCC || N->getValueType(0) != MVT::i1 || + cast<CondCodeSDNode>(N->getOperand(2))->get() != ISD::SETNE) + return false; + + SDValue Op0 = N->getOperand(0); + SDValue Op1 = N->getOperand(1); + assert(Op0.getValueType() == Op1.getValueType()); + + ConstantSDNode *COp0 = dyn_cast<ConstantSDNode>(Op0); + ConstantSDNode *COp1 = dyn_cast<ConstantSDNode>(Op1); + if (COp0 && COp0->isNullValue()) + Op = Op1; + else if (COp1 && COp1->isNullValue()) + Op = Op0; + else + return false; + + DAG.ComputeMaskedBits(Op, KnownZero, KnownOne); + + if (!(KnownZero | APInt(Op.getValueSizeInBits(), 1)).isAllOnesValue()) + return false; + + return true; +} + SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) { SDValue N0 = N->getOperand(0); EVT VT = N->getValueType(0); @@ -4369,16 +4471,17 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) { // (zext (truncate x)) -> (truncate x) // This is valid when the truncated bits of x are already zero. // FIXME: We should extend this to work for vectors too. - if (N0.getOpcode() == ISD::TRUNCATE && !VT.isVector()) { - SDValue Op = N0.getOperand(0); - APInt TruncatedBits - = APInt::getBitsSet(Op.getValueSizeInBits(), - N0.getValueSizeInBits(), - std::min(Op.getValueSizeInBits(), - VT.getSizeInBits())); - APInt KnownZero, KnownOne; - DAG.ComputeMaskedBits(Op, TruncatedBits, KnownZero, KnownOne); - if (TruncatedBits == KnownZero) { + SDValue Op; + APInt KnownZero; + if (!VT.isVector() && isTruncateOf(DAG, N0, Op, KnownZero)) { + APInt TruncatedBits = + (Op.getValueSizeInBits() == N0.getValueSizeInBits()) ? + APInt(Op.getValueSizeInBits(), 0) : + APInt::getBitsSet(Op.getValueSizeInBits(), + N0.getValueSizeInBits(), + std::min(Op.getValueSizeInBits(), + VT.getSizeInBits())); + if (TruncatedBits == (KnownZero & TruncatedBits)) { if (VT.bitsGT(Op.getValueType())) return DAG.getNode(ISD::ZERO_EXTEND, N->getDebugLoc(), VT, Op); if (VT.bitsLT(Op.getValueType())) @@ -5280,7 +5383,8 @@ SDValue DAGCombiner::visitBITCAST(SDNode *N) { // fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit) // fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit)) // This often reduces constant pool loads. - if ((N0.getOpcode() == ISD::FNEG || N0.getOpcode() == ISD::FABS) && + if (((N0.getOpcode() == ISD::FNEG && !TLI.isFNegFree(VT)) || + (N0.getOpcode() == ISD::FABS && !TLI.isFAbsFree(VT))) && N0.getNode()->hasOneUse() && VT.isInteger() && !VT.isVector()) { SDValue NewConv = DAG.getNode(ISD::BITCAST, N0.getDebugLoc(), VT, N0.getOperand(0)); @@ -5667,6 +5771,24 @@ SDValue DAGCombiner::visitFDIV(SDNode *N) { if (N0CFP && N1CFP && VT != MVT::ppcf128) return DAG.getNode(ISD::FDIV, N->getDebugLoc(), VT, N0, N1); + // fold (fdiv X, c2) -> fmul X, 1/c2 if losing precision is acceptable. + if (N1CFP && VT != MVT::ppcf128 && DAG.getTarget().Options.UnsafeFPMath) { + // Compute the reciprocal 1.0 / c2. + APFloat N1APF = N1CFP->getValueAPF(); + APFloat Recip(N1APF.getSemantics(), 1); // 1.0 + APFloat::opStatus st = Recip.divide(N1APF, APFloat::rmNearestTiesToEven); + // Only do the transform if the reciprocal is a legal fp immediate that + // isn't too nasty (eg NaN, denormal, ...). + if ((st == APFloat::opOK || st == APFloat::opInexact) && // Not too nasty + (!LegalOperations || + // FIXME: custom lowering of ConstantFP might fail (see e.g. ARM + // backend)... we should handle this gracefully after Legalize. + // TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT) || + TLI.isOperationLegal(llvm::ISD::ConstantFP, VT) || + TLI.isFPImmLegal(Recip, VT))) + return DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT, N0, + DAG.getConstantFP(Recip, VT)); + } // (fdiv (fneg X), (fneg Y)) -> (fdiv X, Y) if (char LHSNeg = isNegatibleForFree(N0, LegalOperations, TLI, @@ -5931,7 +6053,7 @@ SDValue DAGCombiner::visitFNEG(SDNode *N) { // Transform fneg(bitconvert(x)) -> bitconvert(x^sign) to avoid loading // constant pool values. - if (N0.getOpcode() == ISD::BITCAST && + if (!TLI.isFNegFree(VT) && N0.getOpcode() == ISD::BITCAST && !VT.isVector() && N0.getNode()->hasOneUse() && N0.getOperand(0).getValueType().isInteger()) { @@ -5967,7 +6089,8 @@ SDValue DAGCombiner::visitFABS(SDNode *N) { // Transform fabs(bitconvert(x)) -> bitconvert(x&~sign) to avoid loading // constant pool values. - if (N0.getOpcode() == ISD::BITCAST && N0.getNode()->hasOneUse() && + if (!TLI.isFAbsFree(VT) && + N0.getOpcode() == ISD::BITCAST && N0.getNode()->hasOneUse() && N0.getOperand(0).getValueType().isInteger() && !N0.getOperand(0).getValueType().isVector()) { SDValue Int = N0.getOperand(0); @@ -7628,8 +7751,7 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); - assert(N0.getValueType().getVectorNumElements() == NumElts && - "Vector shuffle must be normalized in DAG"); + assert(N0.getValueType() == VT && "Vector shuffle must be normalized in DAG"); // Canonicalize shuffle undef, undef -> undef if (N0.getOpcode() == ISD::UNDEF && N1.getOpcode() == ISD::UNDEF) @@ -7654,12 +7776,13 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) { SmallVector<int, 8> NewMask; for (unsigned i = 0; i != NumElts; ++i) { int Idx = SVN->getMaskElt(i); - if (Idx < 0) - NewMask.push_back(Idx); - else if (Idx < (int)NumElts) - NewMask.push_back(Idx + NumElts); - else - NewMask.push_back(Idx - NumElts); + if (Idx >= 0) { + if (Idx < (int)NumElts) + Idx += NumElts; + else + Idx -= NumElts; + } + NewMask.push_back(Idx); } return DAG.getVectorShuffle(VT, N->getDebugLoc(), N1, DAG.getUNDEF(VT), &NewMask[0]); @@ -7721,6 +7844,40 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) { return N0; } } + + // If this shuffle node is simply a swizzle of another shuffle node, + // and it reverses the swizzle of the previous shuffle then we can + // optimize shuffle(shuffle(x, undef), undef) -> x. + if (N0.getOpcode() == ISD::VECTOR_SHUFFLE && Level < AfterLegalizeDAG && + N1.getOpcode() == ISD::UNDEF) { + + ShuffleVectorSDNode *OtherSV = cast<ShuffleVectorSDNode>(N0); + + // Shuffle nodes can only reverse shuffles with a single non-undef value. + if (N0.getOperand(1).getOpcode() != ISD::UNDEF) + return SDValue(); + + // The incoming shuffle must be of the same type as the result of the + // current shuffle. + assert(OtherSV->getOperand(0).getValueType() == VT && + "Shuffle types don't match"); + + for (unsigned i = 0; i != NumElts; ++i) { + int Idx = SVN->getMaskElt(i); + assert(Idx < (int)NumElts && "Index references undef operand"); + // Next, this index comes from the first value, which is the incoming + // shuffle. Adopt the incoming index. + if (Idx >= 0) + Idx = OtherSV->getMaskElt(Idx); + + // The combined shuffle must map each index to itself. + if (Idx >= 0 && (unsigned)Idx != i) + return SDValue(); + } + + return OtherSV->getOperand(0); + } + return SDValue(); } @@ -7796,7 +7953,8 @@ SDValue DAGCombiner::XformToShuffleWithZero(SDNode *N) { SDValue Elt = RHS.getOperand(i); if (!isa<ConstantSDNode>(Elt)) return SDValue(); - else if (cast<ConstantSDNode>(Elt)->isAllOnesValue()) + + if (cast<ConstantSDNode>(Elt)->isAllOnesValue()) Indices.push_back(i); else if (cast<ConstantSDNode>(Elt)->isNullValue()) Indices.push_back(NumElts); @@ -7991,8 +8149,8 @@ bool DAGCombiner::SimplifySelectOps(SDNode *TheSelect, SDValue LHS, if ((LLD->hasAnyUseOfValue(1) && (LLD->isPredecessorOf(CondLHS) || LLD->isPredecessorOf(CondRHS))) || - (LLD->hasAnyUseOfValue(1) && - (LLD->isPredecessorOf(CondLHS) || LLD->isPredecessorOf(CondRHS)))) + (RLD->hasAnyUseOfValue(1) && + (RLD->isPredecessorOf(CondLHS) || RLD->isPredecessorOf(CondRHS)))) return false; Addr = DAG.getNode(ISD::SELECT_CC, TheSelect->getDebugLoc(), diff --git a/lib/CodeGen/SelectionDAG/FastISel.cpp b/lib/CodeGen/SelectionDAG/FastISel.cpp index 9f4a44a..0c1ac69 100644 --- a/lib/CodeGen/SelectionDAG/FastISel.cpp +++ b/lib/CodeGen/SelectionDAG/FastISel.cpp @@ -395,6 +395,13 @@ bool FastISel::SelectBinaryOp(const User *I, unsigned ISDOpcode) { ISDOpcode = ISD::SRA; } + // Transform "urem x, pow2" -> "and x, pow2-1". + if (ISDOpcode == ISD::UREM && isa<BinaryOperator>(I) && + isPowerOf2_64(Imm)) { + --Imm; + ISDOpcode = ISD::AND; + } + unsigned ResultReg = FastEmit_ri_(VT.getSimpleVT(), ISDOpcode, Op0, Op0IsKill, Imm, VT.getSimpleVT()); if (ResultReg == 0) return false; @@ -592,7 +599,18 @@ bool FastISel::SelectCall(const User *I) { if (!Reg) Reg = lookUpRegForValue(Address); - if (!Reg && isa<Instruction>(Address) && + // If we have a VLA that has a "use" in a metadata node that's then used + // here but it has no other uses, then we have a problem. E.g., + // + // int foo (const int *x) { + // char a[*x]; + // return 0; + // } + // + // If we assign 'a' a vreg and fast isel later on has to use the selection + // DAG isel, it will want to copy the value to the vreg. However, there are + // no uses, which goes counter to what selection DAG isel expects. + if (!Reg && !Address->use_empty() && isa<Instruction>(Address) && (!isa<AllocaInst>(Address) || !FuncInfo.StaticAllocaMap.count(cast<AllocaInst>(Address)))) Reg = FuncInfo.InitializeRegForValue(Address); @@ -803,8 +821,11 @@ FastISel::SelectInstruction(const Instruction *I) { /// the CFG. void FastISel::FastEmitBranch(MachineBasicBlock *MSucc, DebugLoc DL) { - if (FuncInfo.MBB->isLayoutSuccessor(MSucc)) { - // The unconditional fall-through case, which needs no instructions. + + if (FuncInfo.MBB->getBasicBlock()->size() > 1 && FuncInfo.MBB->isLayoutSuccessor(MSucc)) { + // For more accurate line information if this is the only instruction + // in the block then emit it, otherwise we have the unconditional + // fall-through case, which needs no instructions. } else { // The unconditional branch case. TII.InsertBranch(*FuncInfo.MBB, MSucc, NULL, diff --git a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp index 1b84b13..a96a997 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -718,10 +718,15 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) { case ISD::INTRINSIC_W_CHAIN: case ISD::INTRINSIC_WO_CHAIN: case ISD::INTRINSIC_VOID: - case ISD::VAARG: case ISD::STACKSAVE: Action = TLI.getOperationAction(Node->getOpcode(), MVT::Other); break; + case ISD::VAARG: + Action = TLI.getOperationAction(Node->getOpcode(), + Node->getValueType(0)); + if (Action != TargetLowering::Promote) + Action = TLI.getOperationAction(Node->getOpcode(), MVT::Other); + break; case ISD::SINT_TO_FP: case ISD::UINT_TO_FP: case ISD::EXTRACT_VECTOR_ELT: @@ -1762,11 +1767,6 @@ SDValue SelectionDAGLegalize::ExpandBUILD_VECTOR(SDNode *Node) { // and leave the Hi part unset. SDValue SelectionDAGLegalize::ExpandLibCall(RTLIB::Libcall LC, SDNode *Node, bool isSigned) { - // The input chain to this libcall is the entry node of the function. - // Legalizing the call will automatically add the previous call to the - // dependence. - SDValue InChain = DAG.getEntryNode(); - TargetLowering::ArgListTy Args; TargetLowering::ArgListEntry Entry; for (unsigned i = 0, e = Node->getNumOperands(); i != e; ++i) { @@ -1782,9 +1782,19 @@ SDValue SelectionDAGLegalize::ExpandLibCall(RTLIB::Libcall LC, SDNode *Node, Type *RetTy = Node->getValueType(0).getTypeForEVT(*DAG.getContext()); + // By default, the input chain to this libcall is the entry node of the + // function. If the libcall is going to be emitted as a tail call then + // TLI.isUsedByReturnOnly will change it to the right chain if the return + // node which is being folded has a non-entry input chain. + SDValue InChain = DAG.getEntryNode(); + // isTailCall may be true since the callee does not reference caller stack // frame. Check if it's in the right position. - bool isTailCall = isInTailCallPosition(DAG, Node, TLI); + SDValue TCChain = InChain; + bool isTailCall = isInTailCallPosition(DAG, Node, TCChain, TLI); + if (isTailCall) + InChain = TCChain; + std::pair<SDValue, SDValue> CallInfo = TLI.LowerCallTo(InChain, RetTy, isSigned, !isSigned, false, false, 0, TLI.getLibcallCallingConv(LC), isTailCall, @@ -1820,7 +1830,7 @@ SDValue SelectionDAGLegalize::ExpandLibCall(RTLIB::Libcall LC, EVT RetVT, Type *RetTy = RetVT.getTypeForEVT(*DAG.getContext()); std::pair<SDValue,SDValue> CallInfo = TLI.LowerCallTo(DAG.getEntryNode(), RetTy, isSigned, !isSigned, false, - false, 0, TLI.getLibcallCallingConv(LC), false, + false, 0, TLI.getLibcallCallingConv(LC), /*isTailCall=*/false, /*doesNotReturn=*/false, /*isReturnValueUsed=*/true, Callee, Args, DAG, dl); @@ -3528,6 +3538,33 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) { Node->getOpcode() == ISD::SINT_TO_FP, dl); Results.push_back(Tmp1); break; + case ISD::VAARG: { + SDValue Chain = Node->getOperand(0); // Get the chain. + SDValue Ptr = Node->getOperand(1); // Get the pointer. + + unsigned TruncOp; + if (OVT.isVector()) { + TruncOp = ISD::BITCAST; + } else { + assert(OVT.isInteger() + && "VAARG promotion is supported only for vectors or integer types"); + TruncOp = ISD::TRUNCATE; + } + + // Perform the larger operation, then convert back + Tmp1 = DAG.getVAArg(NVT, dl, Chain, Ptr, Node->getOperand(2), + Node->getConstantOperandVal(3)); + Chain = Tmp1.getValue(1); + + Tmp2 = DAG.getNode(TruncOp, dl, OVT, Tmp1); + + // Modified the chain result - switch anything that used the old chain to + // use the new one. + DAG.ReplaceAllUsesOfValueWith(SDValue(Node, 0), Tmp2); + DAG.ReplaceAllUsesOfValueWith(SDValue(Node, 1), Chain); + ReplacedNode(Node); + break; + } case ISD::AND: case ISD::OR: case ISD::XOR: { @@ -3601,6 +3638,7 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) { break; } case ISD::FDIV: + case ISD::FREM: case ISD::FPOW: { Tmp1 = DAG.getNode(ISD::FP_EXTEND, dl, NVT, Node->getOperand(0)); Tmp2 = DAG.getNode(ISD::FP_EXTEND, dl, NVT, Node->getOperand(1)); diff --git a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp index 41506d1..95ddb1e 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp @@ -1362,7 +1362,7 @@ ExpandShiftWithKnownAmountBit(SDNode *N, SDValue &Lo, SDValue &Hi) { APInt HighBitMask = APInt::getHighBitsSet(ShBits, ShBits - Log2_32(NVTBits)); APInt KnownZero, KnownOne; - DAG.ComputeMaskedBits(N->getOperand(1), HighBitMask, KnownZero, KnownOne); + DAG.ComputeMaskedBits(N->getOperand(1), KnownZero, KnownOne); // If we don't know anything about the high bits, exit. if (((KnownZero|KnownOne) & HighBitMask) == 0) diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/lib/CodeGen/SelectionDAG/LegalizeTypes.h index 69c2100..e866445 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeTypes.h +++ b/lib/CodeGen/SelectionDAG/LegalizeTypes.h @@ -521,6 +521,7 @@ private: SDValue ScalarizeVecRes_LOAD(LoadSDNode *N); SDValue ScalarizeVecRes_SCALAR_TO_VECTOR(SDNode *N); SDValue ScalarizeVecRes_SIGN_EXTEND_INREG(SDNode *N); + SDValue ScalarizeVecRes_VSELECT(SDNode *N); SDValue ScalarizeVecRes_SELECT(SDNode *N); SDValue ScalarizeVecRes_SELECT_CC(SDNode *N); SDValue ScalarizeVecRes_SETCC(SDNode *N); diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp index 3ae8345..9fe4480 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp @@ -417,7 +417,8 @@ SDValue VectorLegalizer::ExpandVSELECT(SDValue Op) { Op1 = DAG.getNode(ISD::AND, DL, VT, Op1, Mask); Op2 = DAG.getNode(ISD::AND, DL, VT, Op2, NotMask); - return DAG.getNode(ISD::OR, DL, VT, Op1, Op2); + SDValue Val = DAG.getNode(ISD::OR, DL, VT, Op1, Op2); + return DAG.getNode(ISD::BITCAST, DL, Op.getValueType(), Val); } SDValue VectorLegalizer::ExpandUINT_TO_FLOAT(SDValue Op) { diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp index a8aee12..5f23f01 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -58,6 +58,7 @@ void DAGTypeLegalizer::ScalarizeVectorResult(SDNode *N, unsigned ResNo) { case ISD::LOAD: R = ScalarizeVecRes_LOAD(cast<LoadSDNode>(N));break; case ISD::SCALAR_TO_VECTOR: R = ScalarizeVecRes_SCALAR_TO_VECTOR(N); break; case ISD::SIGN_EXTEND_INREG: R = ScalarizeVecRes_InregOp(N); break; + case ISD::VSELECT: R = ScalarizeVecRes_VSELECT(N); break; case ISD::SELECT: R = ScalarizeVecRes_SELECT(N); break; case ISD::SELECT_CC: R = ScalarizeVecRes_SELECT_CC(N); break; case ISD::SETCC: R = ScalarizeVecRes_SETCC(N); break; @@ -226,6 +227,37 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_SCALAR_TO_VECTOR(SDNode *N) { return InOp; } +SDValue DAGTypeLegalizer::ScalarizeVecRes_VSELECT(SDNode *N) { + SDValue Cond = GetScalarizedVector(N->getOperand(0)); + SDValue LHS = GetScalarizedVector(N->getOperand(1)); + TargetLowering::BooleanContent ScalarBool = TLI.getBooleanContents(false); + TargetLowering::BooleanContent VecBool = TLI.getBooleanContents(true); + if (ScalarBool != VecBool) { + EVT CondVT = Cond.getValueType(); + switch (ScalarBool) { + case TargetLowering::UndefinedBooleanContent: + break; + case TargetLowering::ZeroOrOneBooleanContent: + assert(VecBool == TargetLowering::UndefinedBooleanContent || + VecBool == TargetLowering::ZeroOrNegativeOneBooleanContent); + // Vector read from all ones, scalar expects a single 1 so mask. + Cond = DAG.getNode(ISD::AND, N->getDebugLoc(), CondVT, + Cond, DAG.getConstant(1, CondVT)); + break; + case TargetLowering::ZeroOrNegativeOneBooleanContent: + assert(VecBool == TargetLowering::UndefinedBooleanContent || + VecBool == TargetLowering::ZeroOrOneBooleanContent); + // Vector reads from a one, scalar from all ones so sign extend. + Cond = DAG.getNode(ISD::SIGN_EXTEND_INREG, N->getDebugLoc(), CondVT, + Cond, DAG.getValueType(MVT::i1)); + break; + } + } + return DAG.getNode(ISD::SELECT, N->getDebugLoc(), + LHS.getValueType(), Cond, LHS, + GetScalarizedVector(N->getOperand(2))); +} + SDValue DAGTypeLegalizer::ScalarizeVecRes_SELECT(SDNode *N) { SDValue LHS = GetScalarizedVector(N->getOperand(1)); return DAG.getNode(ISD::SELECT, N->getDebugLoc(), diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp index f44adfc..2cb5d37 100644 --- a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp +++ b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp @@ -1587,6 +1587,7 @@ protected: std::vector<SUnit*> Queue; unsigned CurQueueId; bool TracksRegPressure; + bool SrcOrder; // SUnits - The SUnits for the current graph. std::vector<SUnit> *SUnits; @@ -1612,11 +1613,12 @@ public: RegReductionPQBase(MachineFunction &mf, bool hasReadyFilter, bool tracksrp, + bool srcorder, const TargetInstrInfo *tii, const TargetRegisterInfo *tri, const TargetLowering *tli) : SchedulingPriorityQueue(hasReadyFilter), - CurQueueId(0), TracksRegPressure(tracksrp), + CurQueueId(0), TracksRegPressure(tracksrp), SrcOrder(srcorder), MF(mf), TII(tii), TRI(tri), TLI(tli), scheduleDAG(NULL) { if (TracksRegPressure) { unsigned NumRC = TRI->getNumRegClasses(); @@ -1731,10 +1733,12 @@ class RegReductionPriorityQueue : public RegReductionPQBase { public: RegReductionPriorityQueue(MachineFunction &mf, bool tracksrp, + bool srcorder, const TargetInstrInfo *tii, const TargetRegisterInfo *tri, const TargetLowering *tli) - : RegReductionPQBase(mf, SF::HasReadyFilter, tracksrp, tii, tri, tli), + : RegReductionPQBase(mf, SF::HasReadyFilter, tracksrp, srcorder, + tii, tri, tli), Picker(this) {} bool isBottomUp() const { return SF::IsBottomUp; } @@ -2625,7 +2629,7 @@ void RegReductionPQBase::initNodes(std::vector<SUnit> &sunits) { if (!Disable2AddrHack) AddPseudoTwoAddrDeps(); // Reroute edges to nodes with multiple uses. - if (!TracksRegPressure) + if (!TracksRegPressure && !SrcOrder) PrescheduleNodesWithMultipleUses(); // Calculate node priorities. CalculateSethiUllmanNumbers(); @@ -2948,7 +2952,7 @@ llvm::createBURRListDAGScheduler(SelectionDAGISel *IS, const TargetRegisterInfo *TRI = TM.getRegisterInfo(); BURegReductionPriorityQueue *PQ = - new BURegReductionPriorityQueue(*IS->MF, false, TII, TRI, 0); + new BURegReductionPriorityQueue(*IS->MF, false, false, TII, TRI, 0); ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, false, PQ, OptLevel); PQ->setScheduleDAG(SD); return SD; @@ -2962,7 +2966,7 @@ llvm::createSourceListDAGScheduler(SelectionDAGISel *IS, const TargetRegisterInfo *TRI = TM.getRegisterInfo(); SrcRegReductionPriorityQueue *PQ = - new SrcRegReductionPriorityQueue(*IS->MF, false, TII, TRI, 0); + new SrcRegReductionPriorityQueue(*IS->MF, false, true, TII, TRI, 0); ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, false, PQ, OptLevel); PQ->setScheduleDAG(SD); return SD; @@ -2977,7 +2981,7 @@ llvm::createHybridListDAGScheduler(SelectionDAGISel *IS, const TargetLowering *TLI = &IS->getTargetLowering(); HybridBURRPriorityQueue *PQ = - new HybridBURRPriorityQueue(*IS->MF, true, TII, TRI, TLI); + new HybridBURRPriorityQueue(*IS->MF, true, false, TII, TRI, TLI); ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, true, PQ, OptLevel); PQ->setScheduleDAG(SD); @@ -2993,7 +2997,7 @@ llvm::createILPListDAGScheduler(SelectionDAGISel *IS, const TargetLowering *TLI = &IS->getTargetLowering(); ILPBURRPriorityQueue *PQ = - new ILPBURRPriorityQueue(*IS->MF, true, TII, TRI, TLI); + new ILPBURRPriorityQueue(*IS->MF, true, false, TII, TRI, TLI); ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, true, PQ, OptLevel); PQ->setScheduleDAG(SD); return SD; diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index e3a7305..92671d1 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -62,6 +62,7 @@ static SDVTList makeVTList(const EVT *VTs, unsigned NumVTs) { static const fltSemantics *EVTToAPFloatSemantics(EVT VT) { switch (VT.getSimpleVT().SimpleTy) { default: llvm_unreachable("Unknown FP format"); + case MVT::f16: return &APFloat::IEEEhalf; case MVT::f32: return &APFloat::IEEEsingle; case MVT::f64: return &APFloat::IEEEdouble; case MVT::f80: return &APFloat::x87DoubleExtended; @@ -1042,7 +1043,7 @@ SDValue SelectionDAG::getConstantFP(double Val, EVT VT, bool isTarget) { return getConstantFP(APFloat((float)Val), VT, isTarget); else if (EltVT==MVT::f64) return getConstantFP(APFloat(Val), VT, isTarget); - else if (EltVT==MVT::f80 || EltVT==MVT::f128) { + else if (EltVT==MVT::f80 || EltVT==MVT::f128 || EltVT==MVT::f16) { bool ignored; APFloat apf = APFloat(Val); apf.convert(*EVTToAPFloatSemantics(EltVT), APFloat::rmNearestTiesToEven, @@ -1627,7 +1628,7 @@ bool SelectionDAG::SignBitIsZero(SDValue Op, unsigned Depth) const { bool SelectionDAG::MaskedValueIsZero(SDValue Op, const APInt &Mask, unsigned Depth) const { APInt KnownZero, KnownOne; - ComputeMaskedBits(Op, Mask, KnownZero, KnownOne, Depth); + ComputeMaskedBits(Op, KnownZero, KnownOne, Depth); assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); return (KnownZero & Mask) == Mask; } @@ -1636,15 +1637,12 @@ bool SelectionDAG::MaskedValueIsZero(SDValue Op, const APInt &Mask, /// known to be either zero or one and return them in the KnownZero/KnownOne /// bitsets. This code only analyzes bits in Mask, in order to short-circuit /// processing. -void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask, - APInt &KnownZero, APInt &KnownOne, - unsigned Depth) const { - unsigned BitWidth = Mask.getBitWidth(); - assert(BitWidth == Op.getValueType().getScalarType().getSizeInBits() && - "Mask size mismatches value type size!"); +void SelectionDAG::ComputeMaskedBits(SDValue Op, APInt &KnownZero, + APInt &KnownOne, unsigned Depth) const { + unsigned BitWidth = Op.getValueType().getScalarType().getSizeInBits(); KnownZero = KnownOne = APInt(BitWidth, 0); // Don't know anything. - if (Depth == 6 || Mask == 0) + if (Depth == 6) return; // Limit search depth. APInt KnownZero2, KnownOne2; @@ -1652,14 +1650,13 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask, switch (Op.getOpcode()) { case ISD::Constant: // We know all of the bits for a constant! - KnownOne = cast<ConstantSDNode>(Op)->getAPIntValue() & Mask; - KnownZero = ~KnownOne & Mask; + KnownOne = cast<ConstantSDNode>(Op)->getAPIntValue(); + KnownZero = ~KnownOne; return; case ISD::AND: // If either the LHS or the RHS are Zero, the result is zero. - ComputeMaskedBits(Op.getOperand(1), Mask, KnownZero, KnownOne, Depth+1); - ComputeMaskedBits(Op.getOperand(0), Mask & ~KnownZero, - KnownZero2, KnownOne2, Depth+1); + ComputeMaskedBits(Op.getOperand(1), KnownZero, KnownOne, Depth+1); + ComputeMaskedBits(Op.getOperand(0), KnownZero2, KnownOne2, Depth+1); assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); @@ -1669,9 +1666,8 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask, KnownZero |= KnownZero2; return; case ISD::OR: - ComputeMaskedBits(Op.getOperand(1), Mask, KnownZero, KnownOne, Depth+1); - ComputeMaskedBits(Op.getOperand(0), Mask & ~KnownOne, - KnownZero2, KnownOne2, Depth+1); + ComputeMaskedBits(Op.getOperand(1), KnownZero, KnownOne, Depth+1); + ComputeMaskedBits(Op.getOperand(0), KnownZero2, KnownOne2, Depth+1); assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); @@ -1681,8 +1677,8 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask, KnownOne |= KnownOne2; return; case ISD::XOR: { - ComputeMaskedBits(Op.getOperand(1), Mask, KnownZero, KnownOne, Depth+1); - ComputeMaskedBits(Op.getOperand(0), Mask, KnownZero2, KnownOne2, Depth+1); + ComputeMaskedBits(Op.getOperand(1), KnownZero, KnownOne, Depth+1); + ComputeMaskedBits(Op.getOperand(0), KnownZero2, KnownOne2, Depth+1); assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); @@ -1694,9 +1690,8 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask, return; } case ISD::MUL: { - APInt Mask2 = APInt::getAllOnesValue(BitWidth); - ComputeMaskedBits(Op.getOperand(1), Mask2, KnownZero, KnownOne, Depth+1); - ComputeMaskedBits(Op.getOperand(0), Mask2, KnownZero2, KnownOne2, Depth+1); + ComputeMaskedBits(Op.getOperand(1), KnownZero, KnownOne, Depth+1); + ComputeMaskedBits(Op.getOperand(0), KnownZero2, KnownOne2, Depth+1); assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); @@ -1715,33 +1710,29 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask, LeadZ = std::min(LeadZ, BitWidth); KnownZero = APInt::getLowBitsSet(BitWidth, TrailZ) | APInt::getHighBitsSet(BitWidth, LeadZ); - KnownZero &= Mask; return; } case ISD::UDIV: { // For the purposes of computing leading zeros we can conservatively // treat a udiv as a logical right shift by the power of 2 known to // be less than the denominator. - APInt AllOnes = APInt::getAllOnesValue(BitWidth); - ComputeMaskedBits(Op.getOperand(0), - AllOnes, KnownZero2, KnownOne2, Depth+1); + ComputeMaskedBits(Op.getOperand(0), KnownZero2, KnownOne2, Depth+1); unsigned LeadZ = KnownZero2.countLeadingOnes(); KnownOne2.clearAllBits(); KnownZero2.clearAllBits(); - ComputeMaskedBits(Op.getOperand(1), - AllOnes, KnownZero2, KnownOne2, Depth+1); + ComputeMaskedBits(Op.getOperand(1), KnownZero2, KnownOne2, Depth+1); unsigned RHSUnknownLeadingOnes = KnownOne2.countLeadingZeros(); if (RHSUnknownLeadingOnes != BitWidth) LeadZ = std::min(BitWidth, LeadZ + BitWidth - RHSUnknownLeadingOnes - 1); - KnownZero = APInt::getHighBitsSet(BitWidth, LeadZ) & Mask; + KnownZero = APInt::getHighBitsSet(BitWidth, LeadZ); return; } case ISD::SELECT: - ComputeMaskedBits(Op.getOperand(2), Mask, KnownZero, KnownOne, Depth+1); - ComputeMaskedBits(Op.getOperand(1), Mask, KnownZero2, KnownOne2, Depth+1); + ComputeMaskedBits(Op.getOperand(2), KnownZero, KnownOne, Depth+1); + ComputeMaskedBits(Op.getOperand(1), KnownZero2, KnownOne2, Depth+1); assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); @@ -1750,8 +1741,8 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask, KnownZero &= KnownZero2; return; case ISD::SELECT_CC: - ComputeMaskedBits(Op.getOperand(3), Mask, KnownZero, KnownOne, Depth+1); - ComputeMaskedBits(Op.getOperand(2), Mask, KnownZero2, KnownOne2, Depth+1); + ComputeMaskedBits(Op.getOperand(3), KnownZero, KnownOne, Depth+1); + ComputeMaskedBits(Op.getOperand(2), KnownZero2, KnownOne2, Depth+1); assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); @@ -1783,8 +1774,7 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask, if (ShAmt >= BitWidth) return; - ComputeMaskedBits(Op.getOperand(0), Mask.lshr(ShAmt), - KnownZero, KnownOne, Depth+1); + ComputeMaskedBits(Op.getOperand(0), KnownZero, KnownOne, Depth+1); assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); KnownZero <<= ShAmt; KnownOne <<= ShAmt; @@ -1801,13 +1791,12 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask, if (ShAmt >= BitWidth) return; - ComputeMaskedBits(Op.getOperand(0), (Mask << ShAmt), - KnownZero, KnownOne, Depth+1); + ComputeMaskedBits(Op.getOperand(0), KnownZero, KnownOne, Depth+1); assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); KnownZero = KnownZero.lshr(ShAmt); KnownOne = KnownOne.lshr(ShAmt); - APInt HighBits = APInt::getHighBitsSet(BitWidth, ShAmt) & Mask; + APInt HighBits = APInt::getHighBitsSet(BitWidth, ShAmt); KnownZero |= HighBits; // High bits known zero. } return; @@ -1819,15 +1808,11 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask, if (ShAmt >= BitWidth) return; - APInt InDemandedMask = (Mask << ShAmt); // If any of the demanded bits are produced by the sign extension, we also // demand the input sign bit. - APInt HighBits = APInt::getHighBitsSet(BitWidth, ShAmt) & Mask; - if (HighBits.getBoolValue()) - InDemandedMask |= APInt::getSignBit(BitWidth); + APInt HighBits = APInt::getHighBitsSet(BitWidth, ShAmt); - ComputeMaskedBits(Op.getOperand(0), InDemandedMask, KnownZero, KnownOne, - Depth+1); + ComputeMaskedBits(Op.getOperand(0), KnownZero, KnownOne, Depth+1); assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); KnownZero = KnownZero.lshr(ShAmt); KnownOne = KnownOne.lshr(ShAmt); @@ -1849,10 +1834,10 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask, // Sign extension. Compute the demanded bits in the result that are not // present in the input. - APInt NewBits = APInt::getHighBitsSet(BitWidth, BitWidth - EBits) & Mask; + APInt NewBits = APInt::getHighBitsSet(BitWidth, BitWidth - EBits); APInt InSignBit = APInt::getSignBit(EBits); - APInt InputDemandedBits = Mask & APInt::getLowBitsSet(BitWidth, EBits); + APInt InputDemandedBits = APInt::getLowBitsSet(BitWidth, EBits); // If the sign extended bits are demanded, we know that the sign // bit is demanded. @@ -1860,8 +1845,9 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask, if (NewBits.getBoolValue()) InputDemandedBits |= InSignBit; - ComputeMaskedBits(Op.getOperand(0), InputDemandedBits, - KnownZero, KnownOne, Depth+1); + ComputeMaskedBits(Op.getOperand(0), KnownZero, KnownOne, Depth+1); + KnownOne &= InputDemandedBits; + KnownZero &= InputDemandedBits; assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); // If the sign bit of the input is known set or clear, then we know the @@ -1889,22 +1875,23 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask, return; } case ISD::LOAD: { + LoadSDNode *LD = cast<LoadSDNode>(Op); if (ISD::isZEXTLoad(Op.getNode())) { - LoadSDNode *LD = cast<LoadSDNode>(Op); EVT VT = LD->getMemoryVT(); unsigned MemBits = VT.getScalarType().getSizeInBits(); - KnownZero |= APInt::getHighBitsSet(BitWidth, BitWidth - MemBits) & Mask; + KnownZero |= APInt::getHighBitsSet(BitWidth, BitWidth - MemBits); + } else if (const MDNode *Ranges = LD->getRanges()) { + computeMaskedBitsLoad(*Ranges, KnownZero); } return; } case ISD::ZERO_EXTEND: { EVT InVT = Op.getOperand(0).getValueType(); unsigned InBits = InVT.getScalarType().getSizeInBits(); - APInt NewBits = APInt::getHighBitsSet(BitWidth, BitWidth - InBits) & Mask; - APInt InMask = Mask.trunc(InBits); + APInt NewBits = APInt::getHighBitsSet(BitWidth, BitWidth - InBits); KnownZero = KnownZero.trunc(InBits); KnownOne = KnownOne.trunc(InBits); - ComputeMaskedBits(Op.getOperand(0), InMask, KnownZero, KnownOne, Depth+1); + ComputeMaskedBits(Op.getOperand(0), KnownZero, KnownOne, Depth+1); KnownZero = KnownZero.zext(BitWidth); KnownOne = KnownOne.zext(BitWidth); KnownZero |= NewBits; @@ -1914,17 +1901,11 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask, EVT InVT = Op.getOperand(0).getValueType(); unsigned InBits = InVT.getScalarType().getSizeInBits(); APInt InSignBit = APInt::getSignBit(InBits); - APInt NewBits = APInt::getHighBitsSet(BitWidth, BitWidth - InBits) & Mask; - APInt InMask = Mask.trunc(InBits); - - // If any of the sign extended bits are demanded, we know that the sign - // bit is demanded. Temporarily set this bit in the mask for our callee. - if (NewBits.getBoolValue()) - InMask |= InSignBit; + APInt NewBits = APInt::getHighBitsSet(BitWidth, BitWidth - InBits); KnownZero = KnownZero.trunc(InBits); KnownOne = KnownOne.trunc(InBits); - ComputeMaskedBits(Op.getOperand(0), InMask, KnownZero, KnownOne, Depth+1); + ComputeMaskedBits(Op.getOperand(0), KnownZero, KnownOne, Depth+1); // Note if the sign bit is known to be zero or one. bool SignBitKnownZero = KnownZero.isNegative(); @@ -1932,13 +1913,6 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask, assert(!(SignBitKnownZero && SignBitKnownOne) && "Sign bit can't be known to be both zero and one!"); - // If the sign bit wasn't actually demanded by our caller, we don't - // want it set in the KnownZero and KnownOne result values. Reset the - // mask and reapply it to the result values. - InMask = Mask.trunc(InBits); - KnownZero &= InMask; - KnownOne &= InMask; - KnownZero = KnownZero.zext(BitWidth); KnownOne = KnownOne.zext(BitWidth); @@ -1952,10 +1926,9 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask, case ISD::ANY_EXTEND: { EVT InVT = Op.getOperand(0).getValueType(); unsigned InBits = InVT.getScalarType().getSizeInBits(); - APInt InMask = Mask.trunc(InBits); KnownZero = KnownZero.trunc(InBits); KnownOne = KnownOne.trunc(InBits); - ComputeMaskedBits(Op.getOperand(0), InMask, KnownZero, KnownOne, Depth+1); + ComputeMaskedBits(Op.getOperand(0), KnownZero, KnownOne, Depth+1); KnownZero = KnownZero.zext(BitWidth); KnownOne = KnownOne.zext(BitWidth); return; @@ -1963,10 +1936,9 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask, case ISD::TRUNCATE: { EVT InVT = Op.getOperand(0).getValueType(); unsigned InBits = InVT.getScalarType().getSizeInBits(); - APInt InMask = Mask.zext(InBits); KnownZero = KnownZero.zext(InBits); KnownOne = KnownOne.zext(InBits); - ComputeMaskedBits(Op.getOperand(0), InMask, KnownZero, KnownOne, Depth+1); + ComputeMaskedBits(Op.getOperand(0), KnownZero, KnownOne, Depth+1); assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); KnownZero = KnownZero.trunc(BitWidth); KnownOne = KnownOne.trunc(BitWidth); @@ -1975,9 +1947,8 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask, case ISD::AssertZext: { EVT VT = cast<VTSDNode>(Op.getOperand(1))->getVT(); APInt InMask = APInt::getLowBitsSet(BitWidth, VT.getSizeInBits()); - ComputeMaskedBits(Op.getOperand(0), Mask & InMask, KnownZero, - KnownOne, Depth+1); - KnownZero |= (~InMask) & Mask; + ComputeMaskedBits(Op.getOperand(0), KnownZero, KnownOne, Depth+1); + KnownZero |= (~InMask); return; } case ISD::FGETSIGN: @@ -1994,8 +1965,7 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask, unsigned NLZ = (CLHS->getAPIntValue()+1).countLeadingZeros(); // NLZ can't be BitWidth with no sign bit APInt MaskV = APInt::getHighBitsSet(BitWidth, NLZ+1); - ComputeMaskedBits(Op.getOperand(1), MaskV, KnownZero2, KnownOne2, - Depth+1); + ComputeMaskedBits(Op.getOperand(1), KnownZero2, KnownOne2, Depth+1); // If all of the MaskV bits are known to be zero, then we know the // output top bits are zero, because we now know that the output is @@ -2003,7 +1973,7 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask, if ((KnownZero2 & MaskV) == MaskV) { unsigned NLZ2 = CLHS->getAPIntValue().countLeadingZeros(); // Top bits known zero. - KnownZero = APInt::getHighBitsSet(BitWidth, NLZ2) & Mask; + KnownZero = APInt::getHighBitsSet(BitWidth, NLZ2); } } } @@ -2014,13 +1984,11 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask, // Output known-0 bits are known if clear or set in both the low clear bits // common to both LHS & RHS. For example, 8+(X<<3) is known to have the // low 3 bits clear. - APInt Mask2 = APInt::getLowBitsSet(BitWidth, - BitWidth - Mask.countLeadingZeros()); - ComputeMaskedBits(Op.getOperand(0), Mask2, KnownZero2, KnownOne2, Depth+1); + ComputeMaskedBits(Op.getOperand(0), KnownZero2, KnownOne2, Depth+1); assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); unsigned KnownZeroOut = KnownZero2.countTrailingOnes(); - ComputeMaskedBits(Op.getOperand(1), Mask2, KnownZero2, KnownOne2, Depth+1); + ComputeMaskedBits(Op.getOperand(1), KnownZero2, KnownOne2, Depth+1); assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); KnownZeroOut = std::min(KnownZeroOut, KnownZero2.countTrailingOnes()); @@ -2044,7 +2012,7 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask, if (RA.isPowerOf2()) { APInt LowBits = RA - 1; APInt Mask2 = LowBits | APInt::getSignBit(BitWidth); - ComputeMaskedBits(Op.getOperand(0), Mask2,KnownZero2,KnownOne2,Depth+1); + ComputeMaskedBits(Op.getOperand(0), KnownZero2,KnownOne2,Depth+1); // The low bits of the first operand are unchanged by the srem. KnownZero = KnownZero2 & LowBits; @@ -2059,10 +2027,6 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask, // the upper bits are all one. if (KnownOne2[BitWidth-1] && ((KnownOne2 & LowBits) != 0)) KnownOne |= ~LowBits; - - KnownZero &= Mask; - KnownOne &= Mask; - assert((KnownZero & KnownOne) == 0&&"Bits known to be one AND zero?"); } } @@ -2072,9 +2036,8 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask, const APInt &RA = Rem->getAPIntValue(); if (RA.isPowerOf2()) { APInt LowBits = (RA - 1); - APInt Mask2 = LowBits & Mask; - KnownZero |= ~LowBits & Mask; - ComputeMaskedBits(Op.getOperand(0), Mask2, KnownZero, KnownOne,Depth+1); + KnownZero |= ~LowBits; + ComputeMaskedBits(Op.getOperand(0), KnownZero, KnownOne,Depth+1); assert((KnownZero & KnownOne) == 0&&"Bits known to be one AND zero?"); break; } @@ -2082,16 +2045,13 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask, // Since the result is less than or equal to either operand, any leading // zero bits in either operand must also exist in the result. - APInt AllOnes = APInt::getAllOnesValue(BitWidth); - ComputeMaskedBits(Op.getOperand(0), AllOnes, KnownZero, KnownOne, - Depth+1); - ComputeMaskedBits(Op.getOperand(1), AllOnes, KnownZero2, KnownOne2, - Depth+1); + ComputeMaskedBits(Op.getOperand(0), KnownZero, KnownOne, Depth+1); + ComputeMaskedBits(Op.getOperand(1), KnownZero2, KnownOne2, Depth+1); uint32_t Leaders = std::max(KnownZero.countLeadingOnes(), KnownZero2.countLeadingOnes()); KnownOne.clearAllBits(); - KnownZero = APInt::getHighBitsSet(BitWidth, Leaders) & Mask; + KnownZero = APInt::getHighBitsSet(BitWidth, Leaders); return; } case ISD::FrameIndex: @@ -2111,8 +2071,7 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask, case ISD::INTRINSIC_W_CHAIN: case ISD::INTRINSIC_VOID: // Allow the target to implement this method for its nodes. - TLI.computeMaskedBitsForTargetNode(Op, Mask, KnownZero, KnownOne, *this, - Depth); + TLI.computeMaskedBitsForTargetNode(Op, KnownZero, KnownOne, *this, Depth); return; } } @@ -2236,12 +2195,11 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, unsigned Depth) const{ if (ConstantSDNode *CRHS = dyn_cast<ConstantSDNode>(Op.getOperand(1))) if (CRHS->isAllOnesValue()) { APInt KnownZero, KnownOne; - APInt Mask = APInt::getAllOnesValue(VTBits); - ComputeMaskedBits(Op.getOperand(0), Mask, KnownZero, KnownOne, Depth+1); + ComputeMaskedBits(Op.getOperand(0), KnownZero, KnownOne, Depth+1); // If the input is known to be 0 or 1, the output is 0/-1, which is all // sign bits set. - if ((KnownZero | APInt(VTBits, 1)) == Mask) + if ((KnownZero | APInt(VTBits, 1)).isAllOnesValue()) return VTBits; // If we are subtracting one from a positive number, there is no carry @@ -2262,11 +2220,10 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, unsigned Depth) const{ if (ConstantSDNode *CLHS = dyn_cast<ConstantSDNode>(Op.getOperand(0))) if (CLHS->isNullValue()) { APInt KnownZero, KnownOne; - APInt Mask = APInt::getAllOnesValue(VTBits); - ComputeMaskedBits(Op.getOperand(1), Mask, KnownZero, KnownOne, Depth+1); + ComputeMaskedBits(Op.getOperand(1), KnownZero, KnownOne, Depth+1); // If the input is known to be 0 or 1, the output is 0/-1, which is all // sign bits set. - if ((KnownZero | APInt(VTBits, 1)) == Mask) + if ((KnownZero | APInt(VTBits, 1)).isAllOnesValue()) return VTBits; // If the input is known to be positive (the sign bit is known clear), @@ -2315,9 +2272,9 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, unsigned Depth) const{ // Finally, if we can prove that the top bits of the result are 0's or 1's, // use this information. APInt KnownZero, KnownOne; - APInt Mask = APInt::getAllOnesValue(VTBits); - ComputeMaskedBits(Op, Mask, KnownZero, KnownOne, Depth); + ComputeMaskedBits(Op, KnownZero, KnownOne, Depth); + APInt Mask; if (KnownZero.isNegative()) { // sign bit is 0 Mask = KnownZero; } else if (KnownOne.isNegative()) { // sign bit is 1; @@ -2471,7 +2428,6 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, case ISD::FABS: V.clearSign(); return getConstantFP(V, VT); - case ISD::FP_ROUND: case ISD::FP_EXTEND: { bool ignored; // This can return overflow, underflow, or inexact; we don't care. @@ -3037,6 +2993,16 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT, default: break; } } + + if (Opcode == ISD::FP_ROUND) { + APFloat V = N1CFP->getValueAPF(); // make copy + bool ignored; + // This can return overflow, underflow, or inexact; we don't care. + // FIXME need to be more flexible about rounding mode. + (void)V.convert(*EVTToAPFloatSemantics(VT), + APFloat::rmNearestTiesToEven, &ignored); + return getConstantFP(V, VT); + } } // Canonicalize an UNDEF to the RHS, even over a constant. @@ -4170,7 +4136,8 @@ SelectionDAG::getLoad(ISD::MemIndexedMode AM, ISD::LoadExtType ExtType, SDValue Ptr, SDValue Offset, MachinePointerInfo PtrInfo, EVT MemVT, bool isVolatile, bool isNonTemporal, bool isInvariant, - unsigned Alignment, const MDNode *TBAAInfo) { + unsigned Alignment, const MDNode *TBAAInfo, + const MDNode *Ranges) { assert(Chain.getValueType() == MVT::Other && "Invalid chain type"); if (Alignment == 0) // Ensure that codegen never sees alignment 0 @@ -4192,7 +4159,7 @@ SelectionDAG::getLoad(ISD::MemIndexedMode AM, ISD::LoadExtType ExtType, MachineFunction &MF = getMachineFunction(); MachineMemOperand *MMO = MF.getMachineMemOperand(PtrInfo, Flags, MemVT.getStoreSize(), Alignment, - TBAAInfo); + TBAAInfo, Ranges); return getLoad(AM, ExtType, VT, dl, Chain, Ptr, Offset, MemVT, MMO); } @@ -4248,11 +4215,12 @@ SDValue SelectionDAG::getLoad(EVT VT, DebugLoc dl, MachinePointerInfo PtrInfo, bool isVolatile, bool isNonTemporal, bool isInvariant, unsigned Alignment, - const MDNode *TBAAInfo) { + const MDNode *TBAAInfo, + const MDNode *Ranges) { SDValue Undef = getUNDEF(Ptr.getValueType()); return getLoad(ISD::UNINDEXED, ISD::NON_EXTLOAD, VT, dl, Chain, Ptr, Undef, - PtrInfo, VT, isVolatile, isNonTemporal, isInvariant, Alignment, - TBAAInfo); + PtrInfo, VT, isVolatile, isNonTemporal, isInvariant, Alignment, + TBAAInfo, Ranges); } SDValue SelectionDAG::getExtLoad(ISD::LoadExtType ExtType, DebugLoc dl, EVT VT, @@ -6036,10 +6004,9 @@ unsigned SelectionDAG::InferPtrAlignment(SDValue Ptr) const { int64_t GVOffset = 0; if (TLI.isGAPlusOffset(Ptr.getNode(), GV, GVOffset)) { unsigned PtrWidth = TLI.getPointerTy().getSizeInBits(); - APInt AllOnes = APInt::getAllOnesValue(PtrWidth); APInt KnownZero(PtrWidth, 0), KnownOne(PtrWidth, 0); - llvm::ComputeMaskedBits(const_cast<GlobalValue*>(GV), AllOnes, - KnownZero, KnownOne, TLI.getTargetData()); + llvm::ComputeMaskedBits(const_cast<GlobalValue*>(GV), KnownZero, KnownOne, + TLI.getTargetData()); unsigned AlignBits = KnownZero.countTrailingOnes(); unsigned Align = AlignBits ? 1 << std::min(31U, AlignBits) : 0; if (Align) diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index 2ac9655..94cb958 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -2804,11 +2804,11 @@ void SelectionDAGBuilder::visitExtractElement(const User &I) { } // Utility for visitShuffleVector - Return true if every element in Mask, -// begining // from position Pos and ending in Pos+Size, falls within the +// begining from position Pos and ending in Pos+Size, falls within the // specified sequential range [L, L+Pos). or is undef. static bool isSequentialInRange(const SmallVectorImpl<int> &Mask, - int Pos, int Size, int Low) { - for (int i = Pos, e = Pos+Size; i != e; ++i, ++Low) + unsigned Pos, unsigned Size, int Low) { + for (unsigned i = Pos, e = Pos+Size; i != e; ++i, ++Low) if (Mask[i] >= 0 && Mask[i] != Low) return false; return true; @@ -2878,10 +2878,9 @@ void SelectionDAGBuilder::visitShuffleVector(const User &I) { SmallVector<int, 8> MappedOps; for (unsigned i = 0; i != MaskNumElts; ++i) { int Idx = Mask[i]; - if (Idx < (int)SrcNumElts) - MappedOps.push_back(Idx); - else - MappedOps.push_back(Idx + MaskNumElts - SrcNumElts); + if (Idx >= (int)SrcNumElts) + Idx -= SrcNumElts - MaskNumElts; + MappedOps.push_back(Idx); } setValue(&I, DAG.getVectorShuffle(VT, getCurDebugLoc(), Src1, Src2, @@ -2893,13 +2892,13 @@ void SelectionDAGBuilder::visitShuffleVector(const User &I) { // Analyze the access pattern of the vector to see if we can extract // two subvectors and do the shuffle. The analysis is done by calculating // the range of elements the mask access on both vectors. - int MinRange[2] = { static_cast<int>(SrcNumElts+1), - static_cast<int>(SrcNumElts+1)}; + int MinRange[2] = { static_cast<int>(SrcNumElts), + static_cast<int>(SrcNumElts)}; int MaxRange[2] = {-1, -1}; for (unsigned i = 0; i != MaskNumElts; ++i) { int Idx = Mask[i]; - int Input = 0; + unsigned Input = 0; if (Idx < 0) continue; @@ -2915,35 +2914,31 @@ void SelectionDAGBuilder::visitShuffleVector(const User &I) { // Check if the access is smaller than the vector size and can we find // a reasonable extract index. - int RangeUse[2] = { 2, 2 }; // 0 = Unused, 1 = Extract, 2 = Can not - // Extract. + int RangeUse[2] = { -1, -1 }; // 0 = Unused, 1 = Extract, -1 = Can not + // Extract. int StartIdx[2]; // StartIdx to extract from - for (int Input=0; Input < 2; ++Input) { - if (MinRange[Input] == (int)(SrcNumElts+1) && MaxRange[Input] == -1) { + for (unsigned Input = 0; Input < 2; ++Input) { + if (MinRange[Input] >= (int)SrcNumElts && MaxRange[Input] < 0) { RangeUse[Input] = 0; // Unused StartIdx[Input] = 0; - } else if (MaxRange[Input] - MinRange[Input] < (int)MaskNumElts) { - // Fits within range but we should see if we can find a good - // start index that is a multiple of the mask length. - if (MaxRange[Input] < (int)MaskNumElts) { - RangeUse[Input] = 1; // Extract from beginning of the vector - StartIdx[Input] = 0; - } else { - StartIdx[Input] = (MinRange[Input]/MaskNumElts)*MaskNumElts; - if (MaxRange[Input] - StartIdx[Input] < (int)MaskNumElts && - StartIdx[Input] + MaskNumElts <= SrcNumElts) - RangeUse[Input] = 1; // Extract from a multiple of the mask length. - } + continue; } + + // Find a good start index that is a multiple of the mask length. Then + // see if the rest of the elements are in range. + StartIdx[Input] = (MinRange[Input]/MaskNumElts)*MaskNumElts; + if (MaxRange[Input] - StartIdx[Input] < (int)MaskNumElts && + StartIdx[Input] + MaskNumElts <= SrcNumElts) + RangeUse[Input] = 1; // Extract from a multiple of the mask length. } if (RangeUse[0] == 0 && RangeUse[1] == 0) { setValue(&I, DAG.getUNDEF(VT)); // Vectors are not used. return; } - else if (RangeUse[0] < 2 && RangeUse[1] < 2) { + if (RangeUse[0] >= 0 && RangeUse[1] >= 0) { // Extract appropriate subvector and generate a vector shuffle - for (int Input=0; Input < 2; ++Input) { + for (unsigned Input = 0; Input < 2; ++Input) { SDValue &Src = Input == 0 ? Src1 : Src2; if (RangeUse[Input] == 0) Src = DAG.getUNDEF(VT); @@ -2956,12 +2951,13 @@ void SelectionDAGBuilder::visitShuffleVector(const User &I) { SmallVector<int, 8> MappedOps; for (unsigned i = 0; i != MaskNumElts; ++i) { int Idx = Mask[i]; - if (Idx < 0) - MappedOps.push_back(Idx); - else if (Idx < (int)SrcNumElts) - MappedOps.push_back(Idx - StartIdx[0]); - else - MappedOps.push_back(Idx - SrcNumElts - StartIdx[1] + MaskNumElts); + if (Idx >= 0) { + if (Idx < (int)SrcNumElts) + Idx -= StartIdx[0]; + else + Idx -= SrcNumElts + StartIdx[1] - MaskNumElts; + } + MappedOps.push_back(Idx); } setValue(&I, DAG.getVectorShuffle(VT, getCurDebugLoc(), Src1, Src2, @@ -2977,22 +2973,20 @@ void SelectionDAGBuilder::visitShuffleVector(const User &I) { EVT PtrVT = TLI.getPointerTy(); SmallVector<SDValue,8> Ops; for (unsigned i = 0; i != MaskNumElts; ++i) { - if (Mask[i] < 0) { - Ops.push_back(DAG.getUNDEF(EltVT)); - } else { - int Idx = Mask[i]; - SDValue Res; + int Idx = Mask[i]; + SDValue Res; - if (Idx < (int)SrcNumElts) - Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, getCurDebugLoc(), - EltVT, Src1, DAG.getConstant(Idx, PtrVT)); - else - Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, getCurDebugLoc(), - EltVT, Src2, - DAG.getConstant(Idx - SrcNumElts, PtrVT)); + if (Idx < 0) { + Res = DAG.getUNDEF(EltVT); + } else { + SDValue &Src = Idx < (int)SrcNumElts ? Src1 : Src2; + if (Idx >= (int)SrcNumElts) Idx -= SrcNumElts; - Ops.push_back(Res); + Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, getCurDebugLoc(), + EltVT, Src, DAG.getConstant(Idx, PtrVT)); } + + Ops.push_back(Res); } setValue(&I, DAG.getNode(ISD::BUILD_VECTOR, getCurDebugLoc(), @@ -3215,6 +3209,7 @@ void SelectionDAGBuilder::visitLoad(const LoadInst &I) { bool isInvariant = I.getMetadata("invariant.load") != 0; unsigned Alignment = I.getAlignment(); const MDNode *TBAAInfo = I.getMetadata(LLVMContext::MD_tbaa); + const MDNode *Ranges = I.getMetadata(LLVMContext::MD_range); SmallVector<EVT, 4> ValueVTs; SmallVector<uint64_t, 4> Offsets; @@ -3262,7 +3257,8 @@ void SelectionDAGBuilder::visitLoad(const LoadInst &I) { DAG.getConstant(Offsets[i], PtrVT)); SDValue L = DAG.getLoad(ValueVTs[i], getCurDebugLoc(), Root, A, MachinePointerInfo(SV, Offsets[i]), isVolatile, - isNonTemporal, isInvariant, Alignment, TBAAInfo); + isNonTemporal, isInvariant, Alignment, TBAAInfo, + Ranges); Values[i] = L; Chains[ChainI] = L.getValue(1); @@ -3586,6 +3582,12 @@ void SelectionDAGBuilder::visitTargetIntrinsic(const CallInst &I, } setValue(&I, Result); + } else { + // Assign order to result here. If the intrinsic does not produce a result, + // it won't be mapped to a SDNode and visit() will not assign it an order + // number. + ++SDNodeOrder; + AssignOrderingToNode(Result.getNode()); } } @@ -3627,17 +3629,6 @@ getF32Constant(SelectionDAG &DAG, unsigned Flt) { return DAG.getConstantFP(APFloat(APInt(32, Flt)), MVT::f32); } -// implVisitAluOverflow - Lower arithmetic overflow instrinsics. -const char * -SelectionDAGBuilder::implVisitAluOverflow(const CallInst &I, ISD::NodeType Op) { - SDValue Op1 = getValue(I.getArgOperand(0)); - SDValue Op2 = getValue(I.getArgOperand(1)); - - SDVTList VTs = DAG.getVTList(Op1.getValueType(), MVT::i1); - setValue(&I, DAG.getNode(Op, getCurDebugLoc(), VTs, Op1, Op2)); - return 0; -} - /// visitExp - Lower an exp intrinsic. Handles the special sequences for /// limited-precision mode. void @@ -4397,9 +4388,8 @@ static unsigned getTruncatedArgReg(const SDValue &N) { const SDValue &CFR = Ext.getOperand(0); if (CFR.getOpcode() == ISD::CopyFromReg) return cast<RegisterSDNode>(CFR.getOperand(1))->getReg(); - else - if (CFR.getOpcode() == ISD::TRUNCATE) - return getTruncatedArgReg(CFR); + if (CFR.getOpcode() == ISD::TRUNCATE) + return getTruncatedArgReg(CFR); } return 0; } @@ -4428,7 +4418,7 @@ SelectionDAGBuilder::EmitFuncArgumentDbgValue(const Value *V, MDNode *Variable, // Some arguments' frame index is recorded during argument lowering. Offset = FuncInfo.getArgumentFrameIndex(Arg); if (Offset) - Reg = TRI->getFrameRegister(MF); + Reg = TRI->getFrameRegister(MF); if (!Reg && N.getNode()) { if (N.getOpcode() == ISD::CopyFromReg) @@ -4690,8 +4680,11 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { V = BCI->getOperand(0); const AllocaInst *AI = dyn_cast<AllocaInst>(V); // Don't handle byval struct arguments or VLAs, for example. - if (!AI) + if (!AI) { + DEBUG(dbgs() << "Dropping debug location info for:\n " << DI << "\n"); + DEBUG(dbgs() << " Last seen at:\n " << *V << "\n"); return 0; + } DenseMap<const AllocaInst*, int>::iterator SI = FuncInfo.StaticAllocaMap.find(AI); if (SI == FuncInfo.StaticAllocaMap.end()) @@ -4837,7 +4830,8 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { } case Intrinsic::x86_avx_vinsertf128_pd_256: case Intrinsic::x86_avx_vinsertf128_ps_256: - case Intrinsic::x86_avx_vinsertf128_si_256: { + case Intrinsic::x86_avx_vinsertf128_si_256: + case Intrinsic::x86_avx2_vinserti128: { DebugLoc dl = getCurDebugLoc(); EVT DestVT = TLI.getValueType(I.getType()); EVT ElVT = TLI.getValueType(I.getArgOperand(1)->getType()); @@ -4861,6 +4855,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { case Intrinsic::convertuu: { ISD::CvtCode Code = ISD::CVT_INVALID; switch (Intrinsic) { + default: llvm_unreachable("Impossible intrinsic"); // Can't reach here. case Intrinsic::convertff: Code = ISD::CVT_FF; break; case Intrinsic::convertfsi: Code = ISD::CVT_FS; break; case Intrinsic::convertfui: Code = ISD::CVT_FU; break; @@ -5093,18 +5088,28 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { return 0; } case Intrinsic::uadd_with_overflow: - return implVisitAluOverflow(I, ISD::UADDO); case Intrinsic::sadd_with_overflow: - return implVisitAluOverflow(I, ISD::SADDO); case Intrinsic::usub_with_overflow: - return implVisitAluOverflow(I, ISD::USUBO); case Intrinsic::ssub_with_overflow: - return implVisitAluOverflow(I, ISD::SSUBO); case Intrinsic::umul_with_overflow: - return implVisitAluOverflow(I, ISD::UMULO); - case Intrinsic::smul_with_overflow: - return implVisitAluOverflow(I, ISD::SMULO); + case Intrinsic::smul_with_overflow: { + ISD::NodeType Op; + switch (Intrinsic) { + default: llvm_unreachable("Impossible intrinsic"); // Can't reach here. + case Intrinsic::uadd_with_overflow: Op = ISD::UADDO; break; + case Intrinsic::sadd_with_overflow: Op = ISD::SADDO; break; + case Intrinsic::usub_with_overflow: Op = ISD::USUBO; break; + case Intrinsic::ssub_with_overflow: Op = ISD::SSUBO; break; + case Intrinsic::umul_with_overflow: Op = ISD::UMULO; break; + case Intrinsic::smul_with_overflow: Op = ISD::SMULO; break; + } + SDValue Op1 = getValue(I.getArgOperand(0)); + SDValue Op2 = getValue(I.getArgOperand(1)); + SDVTList VTs = DAG.getVTList(Op1.getValueType(), MVT::i1); + setValue(&I, DAG.getNode(Op, getCurDebugLoc(), VTs, Op1, Op2)); + return 0; + } case Intrinsic::prefetch: { SDValue Ops[5]; unsigned rw = cast<ConstantInt>(I.getArgOperand(1))->getZExtValue(); diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h index 8cf88e1..8393b41 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h +++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h @@ -556,8 +556,6 @@ private: void visitUserOp2(const Instruction &I) { llvm_unreachable("UserOp2 should not exist at instruction selection time!"); } - - const char *implVisitAluOverflow(const CallInst &I, ISD::NodeType Op); void HandlePHINodesInSuccessorBlocks(const BasicBlock *LLVMBB); diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp index 8aabc02..605509b 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp @@ -508,7 +508,6 @@ void SelectionDAGISel::ComputeLiveOutVRegInfo() { Worklist.push_back(CurDAG->getRoot().getNode()); - APInt Mask; APInt KnownZero; APInt KnownOne; @@ -539,8 +538,7 @@ void SelectionDAGISel::ComputeLiveOutVRegInfo() { continue; unsigned NumSignBits = CurDAG->ComputeNumSignBits(Src); - Mask = APInt::getAllOnesValue(SrcVT.getSizeInBits()); - CurDAG->ComputeMaskedBits(Src, Mask, KnownZero, KnownOne); + CurDAG->ComputeMaskedBits(Src, KnownZero, KnownOne); FuncInfo->AddLiveOutRegInfo(DestReg, NumSignBits, KnownZero, KnownOne); } while (!Worklist.empty()); } @@ -1444,7 +1442,7 @@ bool SelectionDAGISel::CheckOrMask(SDValue LHS, ConstantSDNode *RHS, APInt NeededMask = DesiredMask & ~ActualMask; APInt KnownZero, KnownOne; - CurDAG->ComputeMaskedBits(LHS, NeededMask, KnownZero, KnownOne); + CurDAG->ComputeMaskedBits(LHS, KnownZero, KnownOne); // If all the missing bits in the or are already known to be set, match! if ((NeededMask & KnownOne) == NeededMask) diff --git a/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/lib/CodeGen/SelectionDAG/TargetLowering.cpp index 792de75..e341e15 100644 --- a/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -39,28 +39,6 @@ static cl::opt<bool> AllowPromoteIntElem("promote-elements", cl::Hidden, cl::init(true), cl::desc("Allow promotion of integer vector element types")); -namespace llvm { -TLSModel::Model getTLSModel(const GlobalValue *GV, Reloc::Model reloc) { - bool isLocal = GV->hasLocalLinkage(); - bool isDeclaration = GV->isDeclaration(); - // FIXME: what should we do for protected and internal visibility? - // For variables, is internal different from hidden? - bool isHidden = GV->hasHiddenVisibility(); - - if (reloc == Reloc::PIC_) { - if (isLocal || isHidden) - return TLSModel::LocalDynamic; - else - return TLSModel::GeneralDynamic; - } else { - if (!isDeclaration || isHidden) - return TLSModel::LocalExec; - else - return TLSModel::InitialExec; - } -} -} - /// InitLibcallNames - Set default libcall names. /// static void InitLibcallNames(const char **Names) { @@ -1101,8 +1079,12 @@ unsigned TargetLowering::getJumpTableEncoding() const { SDValue TargetLowering::getPICJumpTableRelocBase(SDValue Table, SelectionDAG &DAG) const { // If our PIC model is GP relative, use the global offset table as the base. - if (getJumpTableEncoding() == MachineJumpTableInfo::EK_GPRel32BlockAddress) + unsigned JTEncoding = getJumpTableEncoding(); + + if ((JTEncoding == MachineJumpTableInfo::EK_GPRel64BlockAddress) || + (JTEncoding == MachineJumpTableInfo::EK_GPRel32BlockAddress)) return DAG.getGLOBAL_OFFSET_TABLE(getPointerTy()); + return Table; } @@ -1244,7 +1226,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, if (Depth != 0) { // If not at the root, Just compute the KnownZero/KnownOne bits to // simplify things downstream. - TLO.DAG.ComputeMaskedBits(Op, DemandedMask, KnownZero, KnownOne, Depth); + TLO.DAG.ComputeMaskedBits(Op, KnownZero, KnownOne, Depth); return false; } // If this is the root being simplified, allow it to have multiple uses, @@ -1263,8 +1245,8 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, switch (Op.getOpcode()) { case ISD::Constant: // We know all of the bits for a constant! - KnownOne = cast<ConstantSDNode>(Op)->getAPIntValue() & NewMask; - KnownZero = ~KnownOne & NewMask; + KnownOne = cast<ConstantSDNode>(Op)->getAPIntValue(); + KnownZero = ~KnownOne; return false; // Don't fall through, will infinitely loop. case ISD::AND: // If the RHS is a constant, check to see if the LHS would be zero without @@ -1274,8 +1256,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(Op.getOperand(1))) { APInt LHSZero, LHSOne; // Do not increment Depth here; that can cause an infinite loop. - TLO.DAG.ComputeMaskedBits(Op.getOperand(0), NewMask, - LHSZero, LHSOne, Depth); + TLO.DAG.ComputeMaskedBits(Op.getOperand(0), LHSZero, LHSOne, Depth); // If the LHS already has zeros where RHSC does, this and is dead. if ((LHSZero & NewMask) == (~RHSC->getAPIntValue() & NewMask)) return TLO.CombineTo(Op, Op.getOperand(0)); @@ -1386,8 +1367,9 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, // bits on that side are also known to be set on the other side, turn this // into an AND, as we know the bits will be cleared. // e.g. (X | C1) ^ C2 --> (X | C1) & ~C2 iff (C1&C2) == C2 - if ((NewMask & (KnownZero|KnownOne)) == NewMask) { // all known - if ((KnownOne & KnownOne2) == KnownOne) { + // NB: it is okay if more bits are known than are requested + if ((NewMask & (KnownZero|KnownOne)) == NewMask) { // all known on one side + if (KnownOne == KnownOne2) { // set bits are the same on both sides EVT VT = Op.getValueType(); SDValue ANDC = TLO.DAG.getConstant(~KnownOne & NewMask, VT); return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::AND, dl, VT, @@ -1725,11 +1707,11 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, // If the sign bit is known one, the top bits match. if (KnownOne.intersects(InSignBit)) { - KnownOne |= NewBits; - KnownZero &= ~NewBits; + KnownOne |= NewBits; + assert((KnownZero & NewBits) == 0); } else { // Otherwise, top bits aren't known. - KnownOne &= ~NewBits; - KnownZero &= ~NewBits; + assert((KnownOne & NewBits) == 0); + assert((KnownZero & NewBits) == 0); } break; } @@ -1863,7 +1845,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, // FALL THROUGH default: // Just use ComputeMaskedBits to compute output bits. - TLO.DAG.ComputeMaskedBits(Op, NewMask, KnownZero, KnownOne, Depth); + TLO.DAG.ComputeMaskedBits(Op, KnownZero, KnownOne, Depth); break; } @@ -1879,7 +1861,6 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, /// in Mask are known to be either zero or one and return them in the /// KnownZero/KnownOne bitsets. void TargetLowering::computeMaskedBitsForTargetNode(const SDValue Op, - const APInt &Mask, APInt &KnownZero, APInt &KnownOne, const SelectionDAG &DAG, @@ -1890,7 +1871,7 @@ void TargetLowering::computeMaskedBitsForTargetNode(const SDValue Op, Op.getOpcode() == ISD::INTRINSIC_VOID) && "Should use MaskedValueIsZero if you don't know whether Op" " is a target node!"); - KnownZero = KnownOne = APInt(Mask.getBitWidth(), 0); + KnownZero = KnownOne = APInt(KnownOne.getBitWidth(), 0); } /// ComputeNumSignBitsForTargetNode - This method can be implemented by @@ -1934,9 +1915,8 @@ static bool ValueHasExactlyOneBitSet(SDValue Val, const SelectionDAG &DAG) { // Fall back to ComputeMaskedBits to catch other known cases. EVT OpVT = Val.getValueType(); unsigned BitWidth = OpVT.getScalarType().getSizeInBits(); - APInt Mask = APInt::getAllOnesValue(BitWidth); APInt KnownZero, KnownOne; - DAG.ComputeMaskedBits(Val, Mask, KnownZero, KnownOne); + DAG.ComputeMaskedBits(Val, KnownZero, KnownOne); return (KnownZero.countPopulation() == BitWidth - 1) && (KnownOne.countPopulation() == 1); } @@ -2432,8 +2412,15 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, if (N0 == N1) { // We can always fold X == X for integer setcc's. - if (N0.getValueType().isInteger()) - return DAG.getConstant(ISD::isTrueWhenEqual(Cond), VT); + if (N0.getValueType().isInteger()) { + switch (getBooleanContents(N0.getValueType().isVector())) { + case UndefinedBooleanContent: + case ZeroOrOneBooleanContent: + return DAG.getConstant(ISD::isTrueWhenEqual(Cond), VT); + case ZeroOrNegativeOneBooleanContent: + return DAG.getConstant(ISD::isTrueWhenEqual(Cond) ? -1 : 0, VT); + } + } unsigned UOF = ISD::getUnorderedFlavor(Cond); if (UOF == 2) // FP operators that are undefined on NaNs. return DAG.getConstant(ISD::isTrueWhenEqual(Cond), VT); @@ -2467,6 +2454,10 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, } } + // If RHS is a legal immediate value for a compare instruction, we need + // to be careful about increasing register pressure needlessly. + bool LegalRHSImm = false; + if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(N1)) { if (ConstantSDNode *LHSR = dyn_cast<ConstantSDNode>(N0.getOperand(1))) { // Turn (X+C1) == C2 --> X == C2-C1 @@ -2501,25 +2492,33 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, Cond); } } + + // Could RHSC fold directly into a compare? + if (RHSC->getValueType(0).getSizeInBits() <= 64) + LegalRHSImm = isLegalICmpImmediate(RHSC->getSExtValue()); } // Simplify (X+Z) == X --> Z == 0 - if (N0.getOperand(0) == N1) - return DAG.getSetCC(dl, VT, N0.getOperand(1), - DAG.getConstant(0, N0.getValueType()), Cond); - if (N0.getOperand(1) == N1) { - if (DAG.isCommutativeBinOp(N0.getOpcode())) - return DAG.getSetCC(dl, VT, N0.getOperand(0), - DAG.getConstant(0, N0.getValueType()), Cond); - else if (N0.getNode()->hasOneUse()) { - assert(N0.getOpcode() == ISD::SUB && "Unexpected operation!"); - // (Z-X) == X --> Z == X<<1 - SDValue SH = DAG.getNode(ISD::SHL, dl, N1.getValueType(), - N1, + // Don't do this if X is an immediate that can fold into a cmp + // instruction and X+Z has other uses. It could be an induction variable + // chain, and the transform would increase register pressure. + if (!LegalRHSImm || N0.getNode()->hasOneUse()) { + if (N0.getOperand(0) == N1) + return DAG.getSetCC(dl, VT, N0.getOperand(1), + DAG.getConstant(0, N0.getValueType()), Cond); + if (N0.getOperand(1) == N1) { + if (DAG.isCommutativeBinOp(N0.getOpcode())) + return DAG.getSetCC(dl, VT, N0.getOperand(0), + DAG.getConstant(0, N0.getValueType()), Cond); + else if (N0.getNode()->hasOneUse()) { + assert(N0.getOpcode() == ISD::SUB && "Unexpected operation!"); + // (Z-X) == X --> Z == X<<1 + SDValue SH = DAG.getNode(ISD::SHL, dl, N1.getValueType(), N1, DAG.getConstant(1, getShiftAmountTy(N1.getValueType()))); - if (!DCI.isCalledByLegalizer()) - DCI.AddToWorklist(SH.getNode()); - return DAG.getSetCC(dl, VT, N0.getOperand(0), SH, Cond); + if (!DCI.isCalledByLegalizer()) + DCI.AddToWorklist(SH.getNode()); + return DAG.getSetCC(dl, VT, N0.getOperand(0), SH, Cond); + } } } } diff --git a/lib/CodeGen/SlotIndexes.cpp b/lib/CodeGen/SlotIndexes.cpp index c5bd3a3..26cf259 100644 --- a/lib/CodeGen/SlotIndexes.cpp +++ b/lib/CodeGen/SlotIndexes.cpp @@ -34,7 +34,8 @@ void SlotIndexes::releaseMemory() { mi2iMap.clear(); MBBRanges.clear(); idx2MBBMap.clear(); - clearList(); + indexList.clear(); + ileAllocator.Reset(); } bool SlotIndexes::runOnMachineFunction(MachineFunction &fn) { @@ -45,17 +46,15 @@ bool SlotIndexes::runOnMachineFunction(MachineFunction &fn) { // iterator in lock-step (though skipping it over indexes which have // null pointers in the instruction field). // At each iteration assert that the instruction pointed to in the index - // is the same one pointed to by the MI iterator. This + // is the same one pointed to by the MI iterator. This // FIXME: This can be simplified. The mi2iMap_, Idx2MBBMap, etc. should // only need to be set up once after the first numbering is computed. mf = &fn; - initList(); // Check that the list contains only the sentinal. - assert(indexListHead->getNext() == 0 && - "Index list non-empty at initial numbering?"); + assert(indexList.empty() && "Index list non-empty at initial numbering?"); assert(idx2MBBMap.empty() && "Index -> MBB mapping non-empty at initial numbering?"); assert(MBBRanges.empty() && @@ -68,7 +67,7 @@ bool SlotIndexes::runOnMachineFunction(MachineFunction &fn) { MBBRanges.resize(mf->getNumBlockIDs()); idx2MBBMap.reserve(mf->size()); - push_back(createEntry(0, index)); + indexList.push_back(createEntry(0, index)); // Iterate over the function. for (MachineFunction::iterator mbbItr = mf->begin(), mbbEnd = mf->end(); @@ -76,7 +75,7 @@ bool SlotIndexes::runOnMachineFunction(MachineFunction &fn) { MachineBasicBlock *mbb = &*mbbItr; // Insert an index for the MBB start. - SlotIndex blockStartIndex(back(), SlotIndex::Slot_Block); + SlotIndex blockStartIndex(&indexList.back(), SlotIndex::Slot_Block); for (MachineBasicBlock::iterator miItr = mbb->begin(), miEnd = mbb->end(); miItr != miEnd; ++miItr) { @@ -85,20 +84,20 @@ bool SlotIndexes::runOnMachineFunction(MachineFunction &fn) { continue; // Insert a store index for the instr. - push_back(createEntry(mi, index += SlotIndex::InstrDist)); + indexList.push_back(createEntry(mi, index += SlotIndex::InstrDist)); // Save this base index in the maps. - mi2iMap.insert(std::make_pair(mi, SlotIndex(back(), + mi2iMap.insert(std::make_pair(mi, SlotIndex(&indexList.back(), SlotIndex::Slot_Block))); - + ++functionSize; } // We insert one blank instructions between basic blocks. - push_back(createEntry(0, index += SlotIndex::InstrDist)); + indexList.push_back(createEntry(0, index += SlotIndex::InstrDist)); MBBRanges[mbb->getNumber()].first = blockStartIndex; - MBBRanges[mbb->getNumber()].second = SlotIndex(back(), + MBBRanges[mbb->getNumber()].second = SlotIndex(&indexList.back(), SlotIndex::Slot_Block); idx2MBBMap.push_back(IdxMBBPair(blockStartIndex, mbb)); } @@ -119,38 +118,37 @@ void SlotIndexes::renumberIndexes() { unsigned index = 0; - for (IndexListEntry *curEntry = front(); curEntry != getTail(); - curEntry = curEntry->getNext()) { - curEntry->setIndex(index); + for (IndexList::iterator I = indexList.begin(), E = indexList.end(); + I != E; ++I) { + I->setIndex(index); index += SlotIndex::InstrDist; } } -// Renumber indexes locally after curEntry was inserted, but failed to get a new +// Renumber indexes locally after curItr was inserted, but failed to get a new // index. -void SlotIndexes::renumberIndexes(IndexListEntry *curEntry) { +void SlotIndexes::renumberIndexes(IndexList::iterator curItr) { // Number indexes with half the default spacing so we can catch up quickly. const unsigned Space = SlotIndex::InstrDist/2; assert((Space & 3) == 0 && "InstrDist must be a multiple of 2*NUM"); - IndexListEntry *start = curEntry->getPrev(); - unsigned index = start->getIndex(); - IndexListEntry *tail = getTail(); + IndexList::iterator startItr = prior(curItr); + unsigned index = startItr->getIndex(); do { - curEntry->setIndex(index += Space); - curEntry = curEntry->getNext(); + curItr->setIndex(index += Space); + ++curItr; // If the next index is bigger, we have caught up. - } while (curEntry != tail && curEntry->getIndex() <= index); + } while (curItr != indexList.end() && curItr->getIndex() <= index); - DEBUG(dbgs() << "\n*** Renumbered SlotIndexes " << start->getIndex() << '-' + DEBUG(dbgs() << "\n*** Renumbered SlotIndexes " << startItr->getIndex() << '-' << index << " ***\n"); ++NumLocalRenum; } void SlotIndexes::dump() const { - for (const IndexListEntry *itr = front(); itr != getTail(); - itr = itr->getNext()) { + for (IndexList::const_iterator itr = indexList.begin(); + itr != indexList.end(); ++itr) { dbgs() << itr->getIndex() << " "; if (itr->getInstr() != 0) { @@ -168,7 +166,7 @@ void SlotIndexes::dump() const { // Print a SlotIndex to a raw_ostream. void SlotIndex::print(raw_ostream &os) const { if (isValid()) - os << entry().getIndex() << "Berd"[getSlot()]; + os << listEntry()->getIndex() << "Berd"[getSlot()]; else os << "invalid"; } diff --git a/lib/CodeGen/Spiller.cpp b/lib/CodeGen/Spiller.cpp index b72dea7..4cd22eb 100644 --- a/lib/CodeGen/Spiller.cpp +++ b/lib/CodeGen/Spiller.cpp @@ -11,8 +11,8 @@ #include "Spiller.h" #include "VirtRegMap.h" -#include "LiveRangeEdit.h" #include "llvm/CodeGen/LiveIntervalAnalysis.h" +#include "llvm/CodeGen/LiveRangeEdit.h" #include "llvm/CodeGen/LiveStackAnalysis.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" @@ -116,7 +116,7 @@ protected: } // Create a new vreg & interval for this instr. - LiveInterval *newLI = &LRE.create(*lis, *vrm); + LiveInterval *newLI = &LRE.create(); newLI->weight = HUGE_VALF; // Update the reg operands & kill flags. diff --git a/lib/CodeGen/SplitKit.cpp b/lib/CodeGen/SplitKit.cpp index ab9b524..9959f74 100644 --- a/lib/CodeGen/SplitKit.cpp +++ b/lib/CodeGen/SplitKit.cpp @@ -14,10 +14,10 @@ #define DEBUG_TYPE "regalloc" #include "SplitKit.h" -#include "LiveRangeEdit.h" #include "VirtRegMap.h" #include "llvm/ADT/Statistic.h" #include "llvm/CodeGen/LiveIntervalAnalysis.h" +#include "llvm/CodeGen/LiveRangeEdit.h" #include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineLoopInfo.h" @@ -351,7 +351,7 @@ void SplitEditor::reset(LiveRangeEdit &LRE, ComplementSpillMode SM) { // We don't need an AliasAnalysis since we will only be performing // cheap-as-a-copy remats anyway. - Edit->anyRematerializable(LIS, TII, 0); + Edit->anyRematerializable(0); } void SplitEditor::dump() const { @@ -436,8 +436,8 @@ VNInfo *SplitEditor::defFromParent(unsigned RegIdx, // Attempt cheap-as-a-copy rematerialization. LiveRangeEdit::Remat RM(ParentVNI); - if (Edit->canRematerializeAt(RM, UseIdx, true, LIS)) { - Def = Edit->rematerializeAt(MBB, I, LI->reg, RM, LIS, TII, TRI, Late); + if (Edit->canRematerializeAt(RM, UseIdx, true)) { + Def = Edit->rematerializeAt(MBB, I, LI->reg, RM, TRI, Late); ++NumRemats; } else { // Can't remat, just insert a copy from parent. @@ -456,11 +456,11 @@ VNInfo *SplitEditor::defFromParent(unsigned RegIdx, unsigned SplitEditor::openIntv() { // Create the complement as index 0. if (Edit->empty()) - Edit->create(LIS, VRM); + Edit->create(); // Create the open interval. OpenIdx = Edit->size(); - Edit->create(LIS, VRM); + Edit->create(); return OpenIdx; } @@ -1033,7 +1033,7 @@ void SplitEditor::deleteRematVictims() { if (Dead.empty()) return; - Edit->eliminateDeadDefs(Dead, LIS, VRM, TII); + Edit->eliminateDeadDefs(Dead); } void SplitEditor::finish(SmallVectorImpl<unsigned> *LRMap) { @@ -1108,7 +1108,7 @@ void SplitEditor::finish(SmallVectorImpl<unsigned> *LRMap) { SmallVector<LiveInterval*, 8> dups; dups.push_back(li); for (unsigned j = 1; j != NumComp; ++j) - dups.push_back(&Edit->create(LIS, VRM)); + dups.push_back(&Edit->create()); ConEQ.Distribute(&dups[0], MRI); // The new intervals all map back to i. if (LRMap) @@ -1116,7 +1116,7 @@ void SplitEditor::finish(SmallVectorImpl<unsigned> *LRMap) { } // Calculate spill weight and allocation hints for new intervals. - Edit->calculateRegClassAndHint(VRM.getMachineFunction(), LIS, SA.Loops); + Edit->calculateRegClassAndHint(VRM.getMachineFunction(), SA.Loops); assert(!LRMap || LRMap->size() == Edit->size()); } diff --git a/lib/CodeGen/TargetInstrInfoImpl.cpp b/lib/CodeGen/TargetInstrInfoImpl.cpp index be25855..2beb928 100644 --- a/lib/CodeGen/TargetInstrInfoImpl.cpp +++ b/lib/CodeGen/TargetInstrInfoImpl.cpp @@ -78,6 +78,9 @@ MachineInstr *TargetInstrInfoImpl::commuteInstruction(MachineInstr *MI, unsigned Reg0 = HasDef ? MI->getOperand(0).getReg() : 0; unsigned Reg1 = MI->getOperand(Idx1).getReg(); unsigned Reg2 = MI->getOperand(Idx2).getReg(); + unsigned SubReg0 = HasDef ? MI->getOperand(0).getSubReg() : 0; + unsigned SubReg1 = MI->getOperand(Idx1).getSubReg(); + unsigned SubReg2 = MI->getOperand(Idx2).getSubReg(); bool Reg1IsKill = MI->getOperand(Idx1).isKill(); bool Reg2IsKill = MI->getOperand(Idx2).isKill(); // If destination is tied to either of the commuted source register, then @@ -86,10 +89,12 @@ MachineInstr *TargetInstrInfoImpl::commuteInstruction(MachineInstr *MI, MI->getDesc().getOperandConstraint(Idx1, MCOI::TIED_TO) == 0) { Reg2IsKill = false; Reg0 = Reg2; + SubReg0 = SubReg2; } else if (HasDef && Reg0 == Reg2 && MI->getDesc().getOperandConstraint(Idx2, MCOI::TIED_TO) == 0) { Reg1IsKill = false; Reg0 = Reg1; + SubReg0 = SubReg1; } if (NewMI) { @@ -98,19 +103,23 @@ MachineInstr *TargetInstrInfoImpl::commuteInstruction(MachineInstr *MI, MachineFunction &MF = *MI->getParent()->getParent(); if (HasDef) return BuildMI(MF, MI->getDebugLoc(), MI->getDesc()) - .addReg(Reg0, RegState::Define | getDeadRegState(Reg0IsDead)) - .addReg(Reg2, getKillRegState(Reg2IsKill)) - .addReg(Reg1, getKillRegState(Reg2IsKill)); + .addReg(Reg0, RegState::Define | getDeadRegState(Reg0IsDead), SubReg0) + .addReg(Reg2, getKillRegState(Reg2IsKill), SubReg2) + .addReg(Reg1, getKillRegState(Reg1IsKill), SubReg1); else return BuildMI(MF, MI->getDebugLoc(), MI->getDesc()) - .addReg(Reg2, getKillRegState(Reg2IsKill)) - .addReg(Reg1, getKillRegState(Reg2IsKill)); + .addReg(Reg2, getKillRegState(Reg2IsKill), SubReg2) + .addReg(Reg1, getKillRegState(Reg1IsKill), SubReg1); } - if (HasDef) + if (HasDef) { MI->getOperand(0).setReg(Reg0); + MI->getOperand(0).setSubReg(SubReg0); + } MI->getOperand(Idx2).setReg(Reg1); MI->getOperand(Idx1).setReg(Reg2); + MI->getOperand(Idx2).setSubReg(SubReg1); + MI->getOperand(Idx1).setSubReg(SubReg2); MI->getOperand(Idx2).setIsKill(Reg1IsKill); MI->getOperand(Idx1).setIsKill(Reg2IsKill); return MI; diff --git a/lib/CodeGen/TwoAddressInstructionPass.cpp b/lib/CodeGen/TwoAddressInstructionPass.cpp index 24b8bc2..c30b133 100644 --- a/lib/CodeGen/TwoAddressInstructionPass.cpp +++ b/lib/CodeGen/TwoAddressInstructionPass.cpp @@ -1183,8 +1183,9 @@ TwoAddressInstructionPass::RescheduleKillAboveMI(MachineBasicBlock *MBB, /// TryInstructionTransform - For the case where an instruction has a single /// pair of tied register operands, attempt some transformations that may /// either eliminate the tied operands or improve the opportunities for -/// coalescing away the register copy. Returns true if the tied operands -/// are eliminated altogether. +/// coalescing away the register copy. Returns true if no copy needs to be +/// inserted to untie mi's operands (either because they were untied, or +/// because mi was rescheduled, and will be visited again later). bool TwoAddressInstructionPass:: TryInstructionTransform(MachineBasicBlock::iterator &mi, MachineBasicBlock::iterator &nmi, @@ -1380,7 +1381,6 @@ TryInstructionTransform(MachineBasicBlock::iterator &mi, /// runOnMachineFunction - Reduce two-address instructions to two operands. /// bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &MF) { - DEBUG(dbgs() << "Machine Function\n"); const TargetMachine &TM = MF.getTarget(); MRI = &MF.getRegInfo(); TII = TM.getInstrInfo(); @@ -1595,19 +1595,19 @@ bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &MF) { MadeChange = true; DEBUG(dbgs() << "\t\trewrite to:\t" << *mi); - } - // Rewrite INSERT_SUBREG as COPY now that we no longer need SSA form. - if (mi->isInsertSubreg()) { - // From %reg = INSERT_SUBREG %reg, %subreg, subidx - // To %reg:subidx = COPY %subreg - unsigned SubIdx = mi->getOperand(3).getImm(); - mi->RemoveOperand(3); - assert(mi->getOperand(0).getSubReg() == 0 && "Unexpected subreg idx"); - mi->getOperand(0).setSubReg(SubIdx); - mi->RemoveOperand(1); - mi->setDesc(TII->get(TargetOpcode::COPY)); - DEBUG(dbgs() << "\t\tconvert to:\t" << *mi); + // Rewrite INSERT_SUBREG as COPY now that we no longer need SSA form. + if (mi->isInsertSubreg()) { + // From %reg = INSERT_SUBREG %reg, %subreg, subidx + // To %reg:subidx = COPY %subreg + unsigned SubIdx = mi->getOperand(3).getImm(); + mi->RemoveOperand(3); + assert(mi->getOperand(0).getSubReg() == 0 && "Unexpected subreg idx"); + mi->getOperand(0).setSubReg(SubIdx); + mi->RemoveOperand(1); + mi->setDesc(TII->get(TargetOpcode::COPY)); + DEBUG(dbgs() << "\t\tconvert to:\t" << *mi); + } } // Clear TiedOperands here instead of at the top of the loop @@ -1833,6 +1833,7 @@ bool TwoAddressInstructionPass::EliminateRegSequences() { SmallSet<unsigned, 4> Seen; for (unsigned i = 1, e = MI->getNumOperands(); i < e; i += 2) { unsigned SrcReg = MI->getOperand(i).getReg(); + unsigned SrcSubIdx = MI->getOperand(i).getSubReg(); unsigned SubIdx = MI->getOperand(i+1).getImm(); // DefMI of NULL means the value does not have a vreg in this block // i.e., its a physical register or a subreg. @@ -1888,7 +1889,7 @@ bool TwoAddressInstructionPass::EliminateRegSequences() { MachineInstr *CopyMI = BuildMI(*MI->getParent(), InsertLoc, MI->getDebugLoc(), TII->get(TargetOpcode::COPY)) .addReg(DstReg, RegState::Define, SubIdx) - .addReg(SrcReg, getKillRegState(isKill)); + .addReg(SrcReg, getKillRegState(isKill), SrcSubIdx); MI->getOperand(i).setReg(0); if (LV && isKill && !TargetRegisterInfo::isPhysicalRegister(SrcReg)) LV->replaceKillInstruction(SrcReg, MI, CopyMI); diff --git a/lib/DebugInfo/DWARFFormValue.cpp b/lib/DebugInfo/DWARFFormValue.cpp index 1c7b9d7..ee2a3ab 100644 --- a/lib/DebugInfo/DWARFFormValue.cpp +++ b/lib/DebugInfo/DWARFFormValue.cpp @@ -348,7 +348,7 @@ DWARFFormValue::dump(raw_ostream &OS, const DWARFCompileUnit *cu) const { } if (cu_relative_offset) - OS << format(" => {0x%8.8"PRIx64"}", (uvalue + (cu ? cu->getOffset() : 0))); + OS << format(" => {0x%8.8" PRIx64 "}", uvalue + (cu ? cu->getOffset() : 0)); } const char* diff --git a/lib/ExecutionEngine/ExecutionEngine.cpp b/lib/ExecutionEngine/ExecutionEngine.cpp index 2890174..a744d0c 100644 --- a/lib/ExecutionEngine/ExecutionEngine.cpp +++ b/lib/ExecutionEngine/ExecutionEngine.cpp @@ -402,14 +402,15 @@ ExecutionEngine *ExecutionEngine::create(Module *M, std::string *ErrorStr, CodeGenOpt::Level OptLevel, bool GVsWithCode) { - return EngineBuilder(M) + EngineBuilder EB = EngineBuilder(M) .setEngineKind(ForceInterpreter ? EngineKind::Interpreter : EngineKind::JIT) .setErrorStr(ErrorStr) .setOptLevel(OptLevel) - .setAllocateGVsWithCode(GVsWithCode) - .create(); + .setAllocateGVsWithCode(GVsWithCode); + + return EB.create(); } /// createJIT - This is the factory method for creating a JIT for the current @@ -430,21 +431,25 @@ ExecutionEngine *ExecutionEngine::createJIT(Module *M, // Use the defaults for extra parameters. Users can use EngineBuilder to // set them. - StringRef MArch = ""; - StringRef MCPU = ""; - SmallVector<std::string, 1> MAttrs; + EngineBuilder EB(M); + EB.setEngineKind(EngineKind::JIT); + EB.setErrorStr(ErrorStr); + EB.setRelocationModel(RM); + EB.setCodeModel(CMM); + EB.setAllocateGVsWithCode(GVsWithCode); + EB.setOptLevel(OL); + EB.setJITMemoryManager(JMM); - Triple TT(M->getTargetTriple()); // TODO: permit custom TargetOptions here - TargetMachine *TM = - EngineBuilder::selectTarget(TT, MArch, MCPU, MAttrs, TargetOptions(), RM, - CMM, OL, ErrorStr); + TargetMachine *TM = EB.selectTarget(); if (!TM || (ErrorStr && ErrorStr->length() > 0)) return 0; return ExecutionEngine::JITCtor(M, ErrorStr, JMM, GVsWithCode, TM); } -ExecutionEngine *EngineBuilder::create() { +ExecutionEngine *EngineBuilder::create(TargetMachine *TM) { + OwningPtr<TargetMachine> TheTM(TM); // Take ownership. + // Make sure we can resolve symbols in the program as well. The zero arg // to the function tells DynamicLibrary to load the program, not a library. if (sys::DynamicLibrary::LoadLibraryPermanently(0, ErrorStr)) @@ -465,29 +470,24 @@ ExecutionEngine *EngineBuilder::create() { // Unless the interpreter was explicitly selected or the JIT is not linked, // try making a JIT. - if (WhichEngine & EngineKind::JIT) { + if ((WhichEngine & EngineKind::JIT) && TheTM) { Triple TT(M->getTargetTriple()); - if (TargetMachine *TM = EngineBuilder::selectTarget(TT, MArch, MCPU, MAttrs, - Options, - RelocModel, CMModel, - OptLevel, ErrorStr)) { - if (!TM->getTarget().hasJIT()) { - errs() << "WARNING: This target JIT is not designed for the host" - << " you are running. If bad things happen, please choose" - << " a different -march switch.\n"; - } + if (!TM->getTarget().hasJIT()) { + errs() << "WARNING: This target JIT is not designed for the host" + << " you are running. If bad things happen, please choose" + << " a different -march switch.\n"; + } - if (UseMCJIT && ExecutionEngine::MCJITCtor) { - ExecutionEngine *EE = - ExecutionEngine::MCJITCtor(M, ErrorStr, JMM, - AllocateGVsWithCode, TM); - if (EE) return EE; - } else if (ExecutionEngine::JITCtor) { - ExecutionEngine *EE = - ExecutionEngine::JITCtor(M, ErrorStr, JMM, - AllocateGVsWithCode, TM); - if (EE) return EE; - } + if (UseMCJIT && ExecutionEngine::MCJITCtor) { + ExecutionEngine *EE = + ExecutionEngine::MCJITCtor(M, ErrorStr, JMM, + AllocateGVsWithCode, TheTM.take()); + if (EE) return EE; + } else if (ExecutionEngine::JITCtor) { + ExecutionEngine *EE = + ExecutionEngine::JITCtor(M, ErrorStr, JMM, + AllocateGVsWithCode, TheTM.take()); + if (EE) return EE; } } diff --git a/lib/ExecutionEngine/JIT/JIT.cpp b/lib/ExecutionEngine/JIT/JIT.cpp index 16b8ee2..a942299 100644 --- a/lib/ExecutionEngine/JIT/JIT.cpp +++ b/lib/ExecutionEngine/JIT/JIT.cpp @@ -269,7 +269,8 @@ extern "C" { JIT::JIT(Module *M, TargetMachine &tm, TargetJITInfo &tji, JITMemoryManager *jmm, bool GVsWithCode) - : ExecutionEngine(M), TM(tm), TJI(tji), JMM(jmm), + : ExecutionEngine(M), TM(tm), TJI(tji), + JMM(jmm ? jmm : JITMemoryManager::CreateDefaultMemManager()), AllocateGVsWithCode(GVsWithCode), isAlreadyCodeGenerating(false) { setTargetData(TM.getTargetData()); @@ -323,6 +324,7 @@ JIT::~JIT() { AllJits->Remove(this); delete jitstate; delete JCE; + // JMM is a ownership of JCE, so we no need delete JMM here. delete &TM; } diff --git a/lib/ExecutionEngine/JIT/JIT.h b/lib/ExecutionEngine/JIT/JIT.h index c557981..2ae155b 100644 --- a/lib/ExecutionEngine/JIT/JIT.h +++ b/lib/ExecutionEngine/JIT/JIT.h @@ -118,7 +118,7 @@ public: const std::vector<GenericValue> &ArgValues); /// getPointerToNamedFunction - This method returns the address of the - /// specified function by using the dlsym function call. As such it is only + /// specified function by using the MemoryManager. As such it is only /// useful for resolving library symbols, not code generated symbols. /// /// If AbortOnFailure is false and no function with the given name is diff --git a/lib/ExecutionEngine/JIT/JITMemoryManager.cpp b/lib/ExecutionEngine/JIT/JITMemoryManager.cpp index d404d0c..2d1775c 100644 --- a/lib/ExecutionEngine/JIT/JITMemoryManager.cpp +++ b/lib/ExecutionEngine/JIT/JITMemoryManager.cpp @@ -23,10 +23,22 @@ #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Support/Memory.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/DynamicLibrary.h" +#include "llvm/Config/config.h" #include <vector> #include <cassert> #include <climits> #include <cstring> + +#if defined(__linux__) +#if defined(HAVE_SYS_STAT_H) +#include <sys/stat.h> +#endif +#include <fcntl.h> +#include <unistd.h> +#endif + using namespace llvm; STATISTIC(NumSlabs, "Number of slabs of memory allocated by the JIT"); @@ -315,13 +327,7 @@ namespace { static const size_t DefaultSizeThreshold; /// getPointerToNamedFunction - This method returns the address of the - /// specified function by using the dlsym function call. As such it is only - /// useful for resolving library symbols, not code generated symbols. - /// - /// If AbortOnFailure is false and no function with the given name is - /// found, this function silently returns a null pointer. Otherwise, - /// it prints a message to stderr and aborts. - /// + /// specified function by using the dlsym function call. virtual void *getPointerToNamedFunction(const std::string &Name, bool AbortOnFailure = true); @@ -771,9 +777,6 @@ bool DefaultJITMemoryManager::CheckInvariants(std::string &ErrorStr) { //===----------------------------------------------------------------------===// // getPointerToNamedFunction() implementation. //===----------------------------------------------------------------------===// -#include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/DynamicLibrary.h" -#include "llvm/Config/config.h" // AtExitHandlers - List of functions to call when the program exits, // registered with the atexit() library function. @@ -793,8 +796,7 @@ static void runAtExitHandlers() { //===----------------------------------------------------------------------===// // Function stubs that are invoked instead of certain library calls -//===----------------------------------------------------------------------===// - +// // Force the following functions to be linked in to anything that uses the // JIT. This is a hack designed to work around the all-too-clever Glibc // strategy of making these functions work differently when inlined vs. when @@ -802,11 +804,6 @@ static void runAtExitHandlers() { // that the dynamic linker can't see. For more info, search for // 'libc_nonshared.a' on Google, or read http://llvm.org/PR274. #if defined(__linux__) -#if defined(HAVE_SYS_STAT_H) -#include <sys/stat.h> -#endif -#include <fcntl.h> -#include <unistd.h> /* stat functions are redirecting to __xstat with a version number. On x86-64 * linking with libc_nonshared.a and -Wl,--export-dynamic doesn't make 'stat' * available as an exported symbol, so we have to add it explicitly. diff --git a/lib/ExecutionEngine/MCJIT/MCJIT.cpp b/lib/ExecutionEngine/MCJIT/MCJIT.cpp index cbb23d3..44f89cf 100644 --- a/lib/ExecutionEngine/MCJIT/MCJIT.cpp +++ b/lib/ExecutionEngine/MCJIT/MCJIT.cpp @@ -74,9 +74,9 @@ MCJIT::MCJIT(Module *m, TargetMachine *tm, TargetJITInfo &tji, OS.flush(); // Load the object into the dynamic linker. - // FIXME: It would be nice to avoid making yet another copy. - MemoryBuffer *MB = MemoryBuffer::getMemBufferCopy(StringRef(Buffer.data(), - Buffer.size())); + MemoryBuffer *MB = MemoryBuffer::getMemBuffer(StringRef(Buffer.data(), + Buffer.size()), + "", false); if (Dyld.loadObject(MB)) report_fatal_error(Dyld.getErrorString()); // Resolve any relocations. @@ -218,7 +218,7 @@ GenericValue MCJIT::runFunction(Function *F, void *MCJIT::getPointerToNamedFunction(const std::string &Name, bool AbortOnFailure){ - if (!isSymbolSearchingDisabled()) { + if (!isSymbolSearchingDisabled() && MemMgr) { void *ptr = MemMgr->getPointerToNamedFunction(Name, false); if (ptr) return ptr; diff --git a/lib/ExecutionEngine/MCJIT/MCJITMemoryManager.h b/lib/ExecutionEngine/MCJIT/MCJITMemoryManager.h index dac8b26..a68949a 100644 --- a/lib/ExecutionEngine/MCJIT/MCJITMemoryManager.h +++ b/lib/ExecutionEngine/MCJIT/MCJITMemoryManager.h @@ -27,7 +27,8 @@ class MCJITMemoryManager : public RTDyldMemoryManager { // FIXME: Multiple modules. Module *M; public: - MCJITMemoryManager(JITMemoryManager *jmm, Module *m) : JMM(jmm), M(m) {} + MCJITMemoryManager(JITMemoryManager *jmm, Module *m) : + JMM(jmm?jmm:JITMemoryManager::CreateDefaultMemManager()), M(m) {} // We own the JMM, so make sure to delete it. ~MCJITMemoryManager() { delete JMM; } diff --git a/lib/ExecutionEngine/RuntimeDyld/CMakeLists.txt b/lib/ExecutionEngine/RuntimeDyld/CMakeLists.txt index 002e63c..cbf7cf1 100644 --- a/lib/ExecutionEngine/RuntimeDyld/CMakeLists.txt +++ b/lib/ExecutionEngine/RuntimeDyld/CMakeLists.txt @@ -1,5 +1,6 @@ add_llvm_library(LLVMRuntimeDyld + GDBRegistrar.cpp RuntimeDyld.cpp - RuntimeDyldMachO.cpp RuntimeDyldELF.cpp + RuntimeDyldMachO.cpp ) diff --git a/lib/ExecutionEngine/RuntimeDyld/DyldELFObject.h b/lib/ExecutionEngine/RuntimeDyld/DyldELFObject.h deleted file mode 100644 index 2d777da..0000000 --- a/lib/ExecutionEngine/RuntimeDyld/DyldELFObject.h +++ /dev/null @@ -1,388 +0,0 @@ -//===-- DyldELFObject.h - Dynamically loaded ELF object ----0---*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// Dynamically loaded ELF object class, a subclass of ELFObjectFile. Used -// to represent a loadable ELF image. -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_RUNTIMEDYLD_DYLDELFOBJECT_H -#define LLVM_RUNTIMEDYLD_DYLDELFOBJECT_H - -#include "llvm/Object/ELF.h" - - -namespace llvm { - -using support::endianness; -using namespace llvm::object; - -template<support::endianness target_endianness, bool is64Bits> -class DyldELFObject : public ELFObjectFile<target_endianness, is64Bits> { - LLVM_ELF_IMPORT_TYPES(target_endianness, is64Bits) - - typedef Elf_Shdr_Impl<target_endianness, is64Bits> Elf_Shdr; - typedef Elf_Sym_Impl<target_endianness, is64Bits> Elf_Sym; - typedef Elf_Rel_Impl<target_endianness, is64Bits, false> Elf_Rel; - typedef Elf_Rel_Impl<target_endianness, is64Bits, true> Elf_Rela; - - typedef typename ELFObjectFile<target_endianness, is64Bits>:: - Elf_Ehdr Elf_Ehdr; - Elf_Ehdr *Header; - - // Update section headers according to the current location in memory - virtual void rebaseObject(std::vector<uint8_t*> *MemoryMap); - // Record memory addresses for cleanup - virtual void saveAddress(std::vector<uint8_t*> *MemoryMap, uint8_t *addr); - -protected: - virtual error_code getSymbolAddress(DataRefImpl Symb, uint64_t &Res) const; - -public: - DyldELFObject(MemoryBuffer *Object, std::vector<uint8_t*> *MemoryMap, - error_code &ec); - - // Methods for type inquiry through isa, cast, and dyn_cast - static inline bool classof(const Binary *v) { - return (isa<ELFObjectFile<target_endianness, is64Bits> >(v) - && classof(cast<ELFObjectFile<target_endianness, is64Bits> >(v))); - } - static inline bool classof( - const ELFObjectFile<target_endianness, is64Bits> *v) { - return v->isDyldType(); - } - static inline bool classof(const DyldELFObject *v) { - return true; - } -}; - -template<support::endianness target_endianness, bool is64Bits> -DyldELFObject<target_endianness, is64Bits>::DyldELFObject(MemoryBuffer *Object, - std::vector<uint8_t*> *MemoryMap, error_code &ec) - : ELFObjectFile<target_endianness, is64Bits>(Object, ec) - , Header(0) { - this->isDyldELFObject = true; - Header = const_cast<Elf_Ehdr *>( - reinterpret_cast<const Elf_Ehdr *>(this->base())); - if (Header->e_shoff == 0) - return; - - // Mark the image as a dynamic shared library - Header->e_type = ELF::ET_DYN; - - rebaseObject(MemoryMap); -} - -// Walk through the ELF headers, updating virtual addresses to reflect where -// the object is currently loaded in memory -template<support::endianness target_endianness, bool is64Bits> -void DyldELFObject<target_endianness, is64Bits>::rebaseObject( - std::vector<uint8_t*> *MemoryMap) { - typedef typename ELFDataTypeTypedefHelper< - target_endianness, is64Bits>::value_type addr_type; - - uint8_t *base_p = const_cast<uint8_t *>(this->base()); - Elf_Shdr *sectionTable = - reinterpret_cast<Elf_Shdr *>(base_p + Header->e_shoff); - uint64_t numSections = this->getNumSections(); - - // Allocate memory space for NOBITS sections (such as .bss), which only exist - // in memory, but don't occupy space in the object file. - // Update the address in the section headers to reflect this allocation. - for (uint64_t index = 0; index < numSections; index++) { - Elf_Shdr *sec = reinterpret_cast<Elf_Shdr *>( - reinterpret_cast<char *>(sectionTable) + index * Header->e_shentsize); - - // Only update sections that are meant to be present in program memory - if (sec->sh_flags & ELF::SHF_ALLOC) { - uint8_t *addr = base_p + sec->sh_offset; - if (sec->sh_type == ELF::SHT_NOBITS) { - addr = static_cast<uint8_t *>(calloc(sec->sh_size, 1)); - saveAddress(MemoryMap, addr); - } - else { - // FIXME: Currently memory with RWX permissions is allocated. In the - // future, make sure that permissions are as necessary - if (sec->sh_flags & ELF::SHF_WRITE) { - // see FIXME above - } - if (sec->sh_flags & ELF::SHF_EXECINSTR) { - // see FIXME above - } - } - assert(sizeof(addr_type) == sizeof(intptr_t) && - "Cross-architecture ELF dy-load is not supported!"); - sec->sh_addr = static_cast<addr_type>(intptr_t(addr)); - } - } - - // Now allocate actual space for COMMON symbols, which also don't occupy - // space in the object file. - // We want to allocate space for all COMMON symbols at once, so the flow is: - // 1. Go over all symbols, find those that are in COMMON. For each such - // symbol, record its size and the value field in its symbol header in a - // special vector. - // 2. Allocate memory for all COMMON symbols in one fell swoop. - // 3. Using the recorded information from (1), update the address fields in - // the symbol headers of the COMMON symbols to reflect their allocated - // address. - uint64_t TotalSize = 0; - std::vector<std::pair<Elf_Addr *, uint64_t> > SymbAddrInfo; - error_code ec = object_error::success; - for (symbol_iterator si = this->begin_symbols(), - se = this->end_symbols(); si != se; si.increment(ec)) { - uint64_t Size = 0; - ec = si->getSize(Size); - Elf_Sym* symb = const_cast<Elf_Sym*>( - this->getSymbol(si->getRawDataRefImpl())); - if (ec == object_error::success && - this->getSymbolTableIndex(symb) == ELF::SHN_COMMON && Size > 0) { - SymbAddrInfo.push_back(std::make_pair(&(symb->st_value), Size)); - TotalSize += Size; - } - } - - uint8_t* SectionPtr = (uint8_t *)calloc(TotalSize, 1); - saveAddress(MemoryMap, SectionPtr); - - typedef typename std::vector<std::pair<Elf_Addr *, uint64_t> >::iterator - AddrInfoIterator; - AddrInfoIterator EndIter = SymbAddrInfo.end(); - for (AddrInfoIterator AddrIter = SymbAddrInfo.begin(); - AddrIter != EndIter; ++AddrIter) { - assert(sizeof(addr_type) == sizeof(intptr_t) && - "Cross-architecture ELF dy-load is not supported!"); - *(AddrIter->first) = static_cast<addr_type>(intptr_t(SectionPtr)); - SectionPtr += AddrIter->second; - } -} - -// Record memory addresses for callers -template<support::endianness target_endianness, bool is64Bits> -void DyldELFObject<target_endianness, is64Bits>::saveAddress( - std::vector<uint8_t*> *MemoryMap, uint8_t* addr) { - if (MemoryMap) - MemoryMap->push_back(addr); - else - errs() << "WARNING: Memory leak - cannot record memory for ELF dyld."; -} - -template<support::endianness target_endianness, bool is64Bits> -error_code DyldELFObject<target_endianness, is64Bits>::getSymbolAddress( - DataRefImpl Symb, uint64_t &Result) const { - this->validateSymbol(Symb); - const Elf_Sym *symb = this->getSymbol(Symb); - if (this->getSymbolTableIndex(symb) == ELF::SHN_COMMON) { - Result = symb->st_value; - return object_error::success; - } - else { - return ELFObjectFile<target_endianness, is64Bits>::getSymbolAddress( - Symb, Result); - } -} - -} - -#endif - -//===-- DyldELFObject.h - Dynamically loaded ELF object ----0---*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// Dynamically loaded ELF object class, a subclass of ELFObjectFile. Used -// to represent a loadable ELF image. -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_RUNTIMEDYLD_DYLDELFOBJECT_H -#define LLVM_RUNTIMEDYLD_DYLDELFOBJECT_H - -#include "llvm/Object/ELF.h" - - -namespace llvm { - -using support::endianness; -using namespace llvm::object; - -template<support::endianness target_endianness, bool is64Bits> -class DyldELFObject : public ELFObjectFile<target_endianness, is64Bits> { - LLVM_ELF_IMPORT_TYPES(target_endianness, is64Bits) - - typedef Elf_Shdr_Impl<target_endianness, is64Bits> Elf_Shdr; - typedef Elf_Sym_Impl<target_endianness, is64Bits> Elf_Sym; - typedef Elf_Rel_Impl<target_endianness, is64Bits, false> Elf_Rel; - typedef Elf_Rel_Impl<target_endianness, is64Bits, true> Elf_Rela; - - typedef typename ELFObjectFile<target_endianness, is64Bits>:: - Elf_Ehdr Elf_Ehdr; - Elf_Ehdr *Header; - - // Update section headers according to the current location in memory - virtual void rebaseObject(std::vector<uint8_t*> *MemoryMap); - // Record memory addresses for cleanup - virtual void saveAddress(std::vector<uint8_t*> *MemoryMap, uint8_t *addr); - -protected: - virtual error_code getSymbolAddress(DataRefImpl Symb, uint64_t &Res) const; - -public: - DyldELFObject(MemoryBuffer *Object, std::vector<uint8_t*> *MemoryMap, - error_code &ec); - - // Methods for type inquiry through isa, cast, and dyn_cast - static inline bool classof(const Binary *v) { - return (isa<ELFObjectFile<target_endianness, is64Bits> >(v) - && classof(cast<ELFObjectFile<target_endianness, is64Bits> >(v))); - } - static inline bool classof( - const ELFObjectFile<target_endianness, is64Bits> *v) { - return v->isDyldType(); - } - static inline bool classof(const DyldELFObject *v) { - return true; - } -}; - -template<support::endianness target_endianness, bool is64Bits> -DyldELFObject<target_endianness, is64Bits>::DyldELFObject(MemoryBuffer *Object, - std::vector<uint8_t*> *MemoryMap, error_code &ec) - : ELFObjectFile<target_endianness, is64Bits>(Object, ec) - , Header(0) { - this->isDyldELFObject = true; - Header = const_cast<Elf_Ehdr *>( - reinterpret_cast<const Elf_Ehdr *>(this->base())); - if (Header->e_shoff == 0) - return; - - // Mark the image as a dynamic shared library - Header->e_type = ELF::ET_DYN; - - rebaseObject(MemoryMap); -} - -// Walk through the ELF headers, updating virtual addresses to reflect where -// the object is currently loaded in memory -template<support::endianness target_endianness, bool is64Bits> -void DyldELFObject<target_endianness, is64Bits>::rebaseObject( - std::vector<uint8_t*> *MemoryMap) { - typedef typename ELFDataTypeTypedefHelper< - target_endianness, is64Bits>::value_type addr_type; - - uint8_t *base_p = const_cast<uint8_t *>(this->base()); - Elf_Shdr *sectionTable = - reinterpret_cast<Elf_Shdr *>(base_p + Header->e_shoff); - uint64_t numSections = this->getNumSections(); - - // Allocate memory space for NOBITS sections (such as .bss), which only exist - // in memory, but don't occupy space in the object file. - // Update the address in the section headers to reflect this allocation. - for (uint64_t index = 0; index < numSections; index++) { - Elf_Shdr *sec = reinterpret_cast<Elf_Shdr *>( - reinterpret_cast<char *>(sectionTable) + index * Header->e_shentsize); - - // Only update sections that are meant to be present in program memory - if (sec->sh_flags & ELF::SHF_ALLOC) { - uint8_t *addr = base_p + sec->sh_offset; - if (sec->sh_type == ELF::SHT_NOBITS) { - addr = static_cast<uint8_t *>(calloc(sec->sh_size, 1)); - saveAddress(MemoryMap, addr); - } - else { - // FIXME: Currently memory with RWX permissions is allocated. In the - // future, make sure that permissions are as necessary - if (sec->sh_flags & ELF::SHF_WRITE) { - // see FIXME above - } - if (sec->sh_flags & ELF::SHF_EXECINSTR) { - // see FIXME above - } - } - assert(sizeof(addr_type) == sizeof(intptr_t) && - "Cross-architecture ELF dy-load is not supported!"); - sec->sh_addr = static_cast<addr_type>(intptr_t(addr)); - } - } - - // Now allocate actual space for COMMON symbols, which also don't occupy - // space in the object file. - // We want to allocate space for all COMMON symbols at once, so the flow is: - // 1. Go over all symbols, find those that are in COMMON. For each such - // symbol, record its size and the value field in its symbol header in a - // special vector. - // 2. Allocate memory for all COMMON symbols in one fell swoop. - // 3. Using the recorded information from (1), update the address fields in - // the symbol headers of the COMMON symbols to reflect their allocated - // address. - uint64_t TotalSize = 0; - std::vector<std::pair<Elf_Addr *, uint64_t> > SymbAddrInfo; - error_code ec = object_error::success; - for (symbol_iterator si = this->begin_symbols(), - se = this->end_symbols(); si != se; si.increment(ec)) { - uint64_t Size = 0; - ec = si->getSize(Size); - Elf_Sym* symb = const_cast<Elf_Sym*>( - this->getSymbol(si->getRawDataRefImpl())); - if (ec == object_error::success && - this->getSymbolTableIndex(symb) == ELF::SHN_COMMON && Size > 0) { - SymbAddrInfo.push_back(std::make_pair(&(symb->st_value), Size)); - TotalSize += Size; - } - } - - uint8_t* SectionPtr = (uint8_t *)calloc(TotalSize, 1); - saveAddress(MemoryMap, SectionPtr); - - typedef typename std::vector<std::pair<Elf_Addr *, uint64_t> >::iterator - AddrInfoIterator; - AddrInfoIterator EndIter = SymbAddrInfo.end(); - for (AddrInfoIterator AddrIter = SymbAddrInfo.begin(); - AddrIter != EndIter; ++AddrIter) { - assert(sizeof(addr_type) == sizeof(intptr_t) && - "Cross-architecture ELF dy-load is not supported!"); - *(AddrIter->first) = static_cast<addr_type>(intptr_t(SectionPtr)); - SectionPtr += AddrIter->second; - } -} - -// Record memory addresses for callers -template<support::endianness target_endianness, bool is64Bits> -void DyldELFObject<target_endianness, is64Bits>::saveAddress( - std::vector<uint8_t*> *MemoryMap, uint8_t* addr) { - if (MemoryMap) - MemoryMap->push_back(addr); - else - errs() << "WARNING: Memory leak - cannot record memory for ELF dyld."; -} - -template<support::endianness target_endianness, bool is64Bits> -error_code DyldELFObject<target_endianness, is64Bits>::getSymbolAddress( - DataRefImpl Symb, uint64_t &Result) const { - this->validateSymbol(Symb); - const Elf_Sym *symb = this->getSymbol(Symb); - if (this->getSymbolTableIndex(symb) == ELF::SHN_COMMON) { - Result = symb->st_value; - return object_error::success; - } - else { - return ELFObjectFile<target_endianness, is64Bits>::getSymbolAddress( - Symb, Result); - } -} - -} - -#endif - diff --git a/lib/ExecutionEngine/RuntimeDyld/GDBRegistrar.cpp b/lib/ExecutionEngine/RuntimeDyld/GDBRegistrar.cpp new file mode 100644 index 0000000..8b50101 --- /dev/null +++ b/lib/ExecutionEngine/RuntimeDyld/GDBRegistrar.cpp @@ -0,0 +1,214 @@ +//===-- GDBRegistrar.cpp - Registers objects with GDB ---------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "JITRegistrar.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/Support/MutexGuard.h" +#include "llvm/Support/Mutex.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/Compiler.h" + +using namespace llvm; + +// This must be kept in sync with gdb/gdb/jit.h . +extern "C" { + + typedef enum { + JIT_NOACTION = 0, + JIT_REGISTER_FN, + JIT_UNREGISTER_FN + } jit_actions_t; + + struct jit_code_entry { + struct jit_code_entry *next_entry; + struct jit_code_entry *prev_entry; + const char *symfile_addr; + uint64_t symfile_size; + }; + + struct jit_descriptor { + uint32_t version; + // This should be jit_actions_t, but we want to be specific about the + // bit-width. + uint32_t action_flag; + struct jit_code_entry *relevant_entry; + struct jit_code_entry *first_entry; + }; + + // We put information about the JITed function in this global, which the + // debugger reads. Make sure to specify the version statically, because the + // debugger checks the version before we can set it during runtime. + static struct jit_descriptor __jit_debug_descriptor = { 1, 0, 0, 0 }; + + // Debuggers puts a breakpoint in this function. + LLVM_ATTRIBUTE_NOINLINE void __jit_debug_register_code() { } + +} + +namespace { + +// Buffer for an in-memory object file in executable memory +typedef llvm::DenseMap< const char*, + std::pair<std::size_t, jit_code_entry*> > + RegisteredObjectBufferMap; + +/// Global access point for the JIT debugging interface designed for use with a +/// singleton toolbox. Handles thread-safe registration and deregistration of +/// object files that are in executable memory managed by the client of this +/// class. +class GDBJITRegistrar : public JITRegistrar { + /// A map of in-memory object files that have been registered with the + /// JIT interface. + RegisteredObjectBufferMap ObjectBufferMap; + +public: + /// Instantiates the JIT service. + GDBJITRegistrar() : ObjectBufferMap() {} + + /// Unregisters each object that was previously registered and releases all + /// internal resources. + virtual ~GDBJITRegistrar(); + + /// Creates an entry in the JIT registry for the buffer @p Object, + /// which must contain an object file in executable memory with any + /// debug information for the debugger. + void registerObject(const MemoryBuffer &Object); + + /// Removes the internal registration of @p Object, and + /// frees associated resources. + /// Returns true if @p Object was found in ObjectBufferMap. + bool deregisterObject(const MemoryBuffer &Object); + +private: + /// Deregister the debug info for the given object file from the debugger + /// and delete any temporary copies. This private method does not remove + /// the function from Map so that it can be called while iterating over Map. + void deregisterObjectInternal(RegisteredObjectBufferMap::iterator I); +}; + +/// Lock used to serialize all jit registration events, since they +/// modify global variables. +llvm::sys::Mutex JITDebugLock; + +/// Acquire the lock and do the registration. +void NotifyDebugger(jit_code_entry* JITCodeEntry) { + llvm::MutexGuard locked(JITDebugLock); + __jit_debug_descriptor.action_flag = JIT_REGISTER_FN; + + // Insert this entry at the head of the list. + JITCodeEntry->prev_entry = NULL; + jit_code_entry* NextEntry = __jit_debug_descriptor.first_entry; + JITCodeEntry->next_entry = NextEntry; + if (NextEntry != NULL) { + NextEntry->prev_entry = JITCodeEntry; + } + __jit_debug_descriptor.first_entry = JITCodeEntry; + __jit_debug_descriptor.relevant_entry = JITCodeEntry; + __jit_debug_register_code(); +} + +GDBJITRegistrar::~GDBJITRegistrar() { + // Free all registered object files. + for (RegisteredObjectBufferMap::iterator I = ObjectBufferMap.begin(), E = ObjectBufferMap.end(); + I != E; ++I) { + // Call the private method that doesn't update the map so our iterator + // doesn't break. + deregisterObjectInternal(I); + } + ObjectBufferMap.clear(); +} + +void GDBJITRegistrar::registerObject(const MemoryBuffer &Object) { + + const char *Buffer = Object.getBufferStart(); + size_t Size = Object.getBufferSize(); + + assert(Buffer && "Attempt to register a null object with a debugger."); + assert(ObjectBufferMap.find(Buffer) == ObjectBufferMap.end() && + "Second attempt to perform debug registration."); + jit_code_entry* JITCodeEntry = new jit_code_entry(); + + if (JITCodeEntry == 0) { + llvm::report_fatal_error( + "Allocation failed when registering a JIT entry!\n"); + } + else { + JITCodeEntry->symfile_addr = Buffer; + JITCodeEntry->symfile_size = Size; + + ObjectBufferMap[Buffer] = std::make_pair(Size, JITCodeEntry); + NotifyDebugger(JITCodeEntry); + } +} + +bool GDBJITRegistrar::deregisterObject(const MemoryBuffer& Object) { + const char *Buffer = Object.getBufferStart(); + RegisteredObjectBufferMap::iterator I = ObjectBufferMap.find(Buffer); + + if (I != ObjectBufferMap.end()) { + deregisterObjectInternal(I); + ObjectBufferMap.erase(I); + return true; + } + return false; +} + +void GDBJITRegistrar::deregisterObjectInternal( + RegisteredObjectBufferMap::iterator I) { + + jit_code_entry*& JITCodeEntry = I->second.second; + + // Acquire the lock and do the unregistration. + { + llvm::MutexGuard locked(JITDebugLock); + __jit_debug_descriptor.action_flag = JIT_UNREGISTER_FN; + + // Remove the jit_code_entry from the linked list. + jit_code_entry* PrevEntry = JITCodeEntry->prev_entry; + jit_code_entry* NextEntry = JITCodeEntry->next_entry; + + if (NextEntry) { + NextEntry->prev_entry = PrevEntry; + } + if (PrevEntry) { + PrevEntry->next_entry = NextEntry; + } + else { + assert(__jit_debug_descriptor.first_entry == JITCodeEntry); + __jit_debug_descriptor.first_entry = NextEntry; + } + + // Tell the debugger which entry we removed, and unregister the code. + __jit_debug_descriptor.relevant_entry = JITCodeEntry; + __jit_debug_register_code(); + } + + delete JITCodeEntry; + JITCodeEntry = NULL; +} + +} // end namespace + +namespace llvm { + +JITRegistrar& JITRegistrar::getGDBRegistrar() { + static GDBJITRegistrar* sRegistrar = NULL; + if (sRegistrar == NULL) { + // The mutex is here so that it won't slow down access once the registrar + // is instantiated + llvm::MutexGuard locked(JITDebugLock); + // Check again to be sure another thread didn't create this while we waited + if (sRegistrar == NULL) { + sRegistrar = new GDBJITRegistrar; + } + } + return *sRegistrar; +} + +} // namespace llvm diff --git a/lib/ExecutionEngine/RuntimeDyld/JITRegistrar.h b/lib/ExecutionEngine/RuntimeDyld/JITRegistrar.h new file mode 100644 index 0000000..f964bc6 --- /dev/null +++ b/lib/ExecutionEngine/RuntimeDyld/JITRegistrar.h @@ -0,0 +1,43 @@ +//===-- JITRegistrar.h - Registers objects with a debugger ----------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_EXECUTION_ENGINE_JIT_REGISTRAR_H +#define LLVM_EXECUTION_ENGINE_JIT_REGISTRAR_H + +#include "llvm/Support/MemoryBuffer.h" + +namespace llvm { + +/// Global access point for the JIT debugging interface. +class JITRegistrar { +public: + /// Instantiates the JIT service. + JITRegistrar() {} + + /// Unregisters each object that was previously registered and releases all + /// internal resources. + virtual ~JITRegistrar() {} + + /// Creates an entry in the JIT registry for the buffer @p Object, + /// which must contain an object file in executable memory with any + /// debug information for the debugger. + virtual void registerObject(const MemoryBuffer &Object) = 0; + + /// Removes the internal registration of @p Object, and + /// frees associated resources. + /// Returns true if @p Object was previously registered. + virtual bool deregisterObject(const MemoryBuffer &Object) = 0; + + /// Returns a reference to a GDB JIT registrar singleton + static JITRegistrar& getGDBRegistrar(); +}; + +} // end namespace llvm + +#endif // LLVM_EXECUTION_ENGINE_JIT_REGISTRAR_H diff --git a/lib/ExecutionEngine/RuntimeDyld/ObjectImage.h b/lib/ExecutionEngine/RuntimeDyld/ObjectImage.h new file mode 100644 index 0000000..8206ead --- /dev/null +++ b/lib/ExecutionEngine/RuntimeDyld/ObjectImage.h @@ -0,0 +1,59 @@ +//===---- ObjectImage.h - Format independent executuable object image -----===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file declares a file format independent ObjectImage class. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_RUNTIMEDYLD_OBJECT_IMAGE_H +#define LLVM_RUNTIMEDYLD_OBJECT_IMAGE_H + +#include "llvm/Object/ObjectFile.h" + +namespace llvm { + +class ObjectImage { + ObjectImage(); // = delete + ObjectImage(const ObjectImage &other); // = delete +protected: + object::ObjectFile *ObjFile; + +public: + ObjectImage(object::ObjectFile *Obj) { ObjFile = Obj; } + virtual ~ObjectImage() {} + + virtual object::symbol_iterator begin_symbols() const + { return ObjFile->begin_symbols(); } + virtual object::symbol_iterator end_symbols() const + { return ObjFile->end_symbols(); } + + virtual object::section_iterator begin_sections() const + { return ObjFile->begin_sections(); } + virtual object::section_iterator end_sections() const + { return ObjFile->end_sections(); } + + virtual /* Triple::ArchType */ unsigned getArch() const + { return ObjFile->getArch(); } + + // Subclasses can override these methods to update the image with loaded + // addresses for sections and common symbols + virtual void updateSectionAddress(const object::SectionRef &Sec, + uint64_t Addr) {} + virtual void updateSymbolAddress(const object::SymbolRef &Sym, uint64_t Addr) + {} + + // Subclasses can override this method to provide JIT debugging support + virtual void registerWithDebugger() {} + virtual void deregisterWithDebugger() {} +}; + +} // end namespace llvm + +#endif // LLVM_RUNTIMEDYLD_OBJECT_IMAGE_H + diff --git a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp index ff4a2c8..1b1840a 100644 --- a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp +++ b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp @@ -26,11 +26,19 @@ RuntimeDyldImpl::~RuntimeDyldImpl() {} namespace llvm { - +namespace { + // Helper for extensive error checking in debug builds. + error_code Check(error_code Err) { + if (Err) { + report_fatal_error(Err.message()); + } + return Err; + } +} // end anonymous namespace // Resolve the relocations for all symbols we currently know about. void RuntimeDyldImpl::resolveRelocations() { - // First, resolve relocations assotiated with external symbols. + // First, resolve relocations associated with external symbols. resolveSymbols(); // Just iterate over the sections we have and resolve all the relocations @@ -51,90 +59,110 @@ void RuntimeDyldImpl::mapSectionAddress(void *LocalAddress, llvm_unreachable("Attempting to remap address of unknown section!"); } +// Subclasses can implement this method to create specialized image instances +// The caller owns the the pointer that is returned. +ObjectImage *RuntimeDyldImpl::createObjectImage(const MemoryBuffer *InputBuffer) { + ObjectFile *ObjFile = ObjectFile::createObjectFile(const_cast<MemoryBuffer*> + (InputBuffer)); + ObjectImage *Obj = new ObjectImage(ObjFile); + return Obj; +} + bool RuntimeDyldImpl::loadObject(const MemoryBuffer *InputBuffer) { - // FIXME: ObjectFile don't modify MemoryBuffer. - // It should use const MemoryBuffer as parameter. - ObjectFile *obj = ObjectFile:: - createObjectFile(const_cast<MemoryBuffer*>(InputBuffer)); + OwningPtr<ObjectImage> obj(createObjectImage(InputBuffer)); + if (!obj) + report_fatal_error("Unable to create object image from memory buffer!"); Arch = (Triple::ArchType)obj->getArch(); LocalSymbolMap LocalSymbols; // Functions and data symbols from the // object file. ObjSectionToIDMap LocalSections; // Used sections from the object file + CommonSymbolMap CommonSymbols; // Common symbols requiring allocation + uint64_t CommonSize = 0; error_code err; - - // Parse symbols DEBUG(dbgs() << "Parse symbols:\n"); - for (symbol_iterator it = obj->begin_symbols(), itEnd = obj->end_symbols(); - it != itEnd; it.increment(err)) { - if (err) break; + for (symbol_iterator i = obj->begin_symbols(), e = obj->end_symbols(); + i != e; i.increment(err)) { + Check(err); object::SymbolRef::Type SymType; StringRef Name; - if ((bool)(err = it->getType(SymType))) break; - if ((bool)(err = it->getName(Name))) break; - - if (SymType == object::SymbolRef::ST_Function || - SymType == object::SymbolRef::ST_Data) { - uint64_t FileOffset; - uint32_t flags; - StringRef sData; - section_iterator sIt = obj->end_sections(); - if ((bool)(err = it->getFileOffset(FileOffset))) break; - if ((bool)(err = it->getFlags(flags))) break; - if ((bool)(err = it->getSection(sIt))) break; - if (sIt == obj->end_sections()) continue; - if ((bool)(err = sIt->getContents(sData))) break; - const uint8_t* SymPtr = (const uint8_t*)InputBuffer->getBufferStart() + - (uintptr_t)FileOffset; - uintptr_t SectOffset = (uintptr_t)(SymPtr - (const uint8_t*)sData.begin()); - unsigned SectionID = - findOrEmitSection(*sIt, - SymType == object::SymbolRef::ST_Function, - LocalSections); - bool isGlobal = flags & SymbolRef::SF_Global; - LocalSymbols[Name.data()] = SymbolLoc(SectionID, SectOffset); - DEBUG(dbgs() << "\tFileOffset: " << format("%p", (uintptr_t)FileOffset) - << " flags: " << flags - << " SID: " << SectionID - << " Offset: " << format("%p", SectOffset)); - if (isGlobal) - SymbolTable[Name] = SymbolLoc(SectionID, SectOffset); + Check(i->getType(SymType)); + Check(i->getName(Name)); + + uint32_t flags; + Check(i->getFlags(flags)); + + bool isCommon = flags & SymbolRef::SF_Common; + if (isCommon) { + // Add the common symbols to a list. We'll allocate them all below. + uint64_t Size = 0; + Check(i->getSize(Size)); + CommonSize += Size; + CommonSymbols[*i] = Size; + } else { + if (SymType == object::SymbolRef::ST_Function || + SymType == object::SymbolRef::ST_Data) { + uint64_t FileOffset; + StringRef sData; + section_iterator si = obj->end_sections(); + Check(i->getFileOffset(FileOffset)); + Check(i->getSection(si)); + if (si == obj->end_sections()) continue; + Check(si->getContents(sData)); + const uint8_t* SymPtr = (const uint8_t*)InputBuffer->getBufferStart() + + (uintptr_t)FileOffset; + uintptr_t SectOffset = (uintptr_t)(SymPtr - (const uint8_t*)sData.begin()); + unsigned SectionID = + findOrEmitSection(*obj, + *si, + SymType == object::SymbolRef::ST_Function, + LocalSections); + bool isGlobal = flags & SymbolRef::SF_Global; + LocalSymbols[Name.data()] = SymbolLoc(SectionID, SectOffset); + DEBUG(dbgs() << "\tFileOffset: " << format("%p", (uintptr_t)FileOffset) + << " flags: " << flags + << " SID: " << SectionID + << " Offset: " << format("%p", SectOffset)); + if (isGlobal) + SymbolTable[Name] = SymbolLoc(SectionID, SectOffset); + } } DEBUG(dbgs() << "\tType: " << SymType << " Name: " << Name << "\n"); } - if (err) { - report_fatal_error(err.message()); - } + + // Allocate common symbols + if (CommonSize != 0) + emitCommonSymbols(*obj, CommonSymbols, CommonSize, LocalSymbols); // Parse and proccess relocations DEBUG(dbgs() << "Parse relocations:\n"); - for (section_iterator sIt = obj->begin_sections(), - sItEnd = obj->end_sections(); sIt != sItEnd; sIt.increment(err)) { - if (err) break; + for (section_iterator si = obj->begin_sections(), + se = obj->end_sections(); si != se; si.increment(err)) { + Check(err); bool isFirstRelocation = true; unsigned SectionID = 0; StubMap Stubs; - for (relocation_iterator it = sIt->begin_relocations(), - itEnd = sIt->end_relocations(); it != itEnd; it.increment(err)) { - if (err) break; + for (relocation_iterator i = si->begin_relocations(), + e = si->end_relocations(); i != e; i.increment(err)) { + Check(err); // If it's first relocation in this section, find its SectionID if (isFirstRelocation) { - SectionID = findOrEmitSection(*sIt, true, LocalSections); + SectionID = findOrEmitSection(*obj, *si, true, LocalSections); DEBUG(dbgs() << "\tSectionID: " << SectionID << "\n"); isFirstRelocation = false; } ObjRelocationInfo RI; RI.SectionID = SectionID; - if ((bool)(err = it->getAdditionalInfo(RI.AdditionalInfo))) break; - if ((bool)(err = it->getOffset(RI.Offset))) break; - if ((bool)(err = it->getSymbol(RI.Symbol))) break; - if ((bool)(err = it->getType(RI.Type))) break; + Check(i->getAdditionalInfo(RI.AdditionalInfo)); + Check(i->getOffset(RI.Offset)); + Check(i->getSymbol(RI.Symbol)); + Check(i->getType(RI.Type)); DEBUG(dbgs() << "\t\tAddend: " << RI.AdditionalInfo << " Offset: " << format("%p", (uintptr_t)RI.Offset) @@ -142,61 +170,138 @@ bool RuntimeDyldImpl::loadObject(const MemoryBuffer *InputBuffer) { << "\n"); processRelocationRef(RI, *obj, LocalSections, LocalSymbols, Stubs); } - if (err) { - report_fatal_error(err.message()); - } } + + handleObjectLoaded(obj.take()); + return false; } -unsigned RuntimeDyldImpl::emitSection(const SectionRef &Section, +unsigned RuntimeDyldImpl::emitCommonSymbols(ObjectImage &Obj, + const CommonSymbolMap &Map, + uint64_t TotalSize, + LocalSymbolMap &LocalSymbols) { + // Allocate memory for the section + unsigned SectionID = Sections.size(); + uint8_t *Addr = MemMgr->allocateDataSection(TotalSize, sizeof(void*), + SectionID); + if (!Addr) + report_fatal_error("Unable to allocate memory for common symbols!"); + uint64_t Offset = 0; + Sections.push_back(SectionEntry(Addr, TotalSize, TotalSize, 0)); + memset(Addr, 0, TotalSize); + + DEBUG(dbgs() << "emitCommonSection SectionID: " << SectionID + << " new addr: " << format("%p", Addr) + << " DataSize: " << TotalSize + << "\n"); + + // Assign the address of each symbol + for (CommonSymbolMap::const_iterator it = Map.begin(), itEnd = Map.end(); + it != itEnd; it++) { + uint64_t Size = it->second; + StringRef Name; + it->first.getName(Name); + Obj.updateSymbolAddress(it->first, (uint64_t)Addr); + LocalSymbols[Name.data()] = SymbolLoc(SectionID, Offset); + Offset += Size; + Addr += Size; + } + + return SectionID; +} + +unsigned RuntimeDyldImpl::emitSection(ObjectImage &Obj, + const SectionRef &Section, bool IsCode) { unsigned StubBufSize = 0, StubSize = getMaxStubSize(); error_code err; if (StubSize > 0) { - for (relocation_iterator it = Section.begin_relocations(), - itEnd = Section.end_relocations(); it != itEnd; it.increment(err)) + for (relocation_iterator i = Section.begin_relocations(), + e = Section.end_relocations(); i != e; i.increment(err), Check(err)) StubBufSize += StubSize; } StringRef data; uint64_t Alignment64; - if ((bool)(err = Section.getContents(data))) report_fatal_error(err.message()); - if ((bool)(err = Section.getAlignment(Alignment64))) - report_fatal_error(err.message()); + Check(Section.getContents(data)); + Check(Section.getAlignment(Alignment64)); unsigned Alignment = (unsigned)Alignment64 & 0xffffffffL; - unsigned DataSize = data.size(); - unsigned Allocate = DataSize + StubBufSize; + bool IsRequired; + bool IsVirtual; + bool IsZeroInit; + uint64_t DataSize; + Check(Section.isRequiredForExecution(IsRequired)); + Check(Section.isVirtual(IsVirtual)); + Check(Section.isZeroInit(IsZeroInit)); + Check(Section.getSize(DataSize)); + + unsigned Allocate; unsigned SectionID = Sections.size(); - const char *pData = data.data(); - uint8_t *Addr = IsCode - ? MemMgr->allocateCodeSection(Allocate, Alignment, SectionID) - : MemMgr->allocateDataSection(Allocate, Alignment, SectionID); - - memcpy(Addr, pData, DataSize); - DEBUG(dbgs() << "emitSection SectionID: " << SectionID - << " obj addr: " << format("%p", pData) - << " new addr: " << format("%p", Addr) - << " DataSize: " << DataSize - << " StubBufSize: " << StubBufSize - << " Allocate: " << Allocate - << "\n"); + uint8_t *Addr; + const char *pData = 0; + + // Some sections, such as debug info, don't need to be loaded for execution. + // Leave those where they are. + if (IsRequired) { + Allocate = DataSize + StubBufSize; + Addr = IsCode + ? MemMgr->allocateCodeSection(Allocate, Alignment, SectionID) + : MemMgr->allocateDataSection(Allocate, Alignment, SectionID); + if (!Addr) + report_fatal_error("Unable to allocate section memory!"); + + // Virtual sections have no data in the object image, so leave pData = 0 + if (!IsVirtual) + pData = data.data(); + + // Zero-initialize or copy the data from the image + if (IsZeroInit || IsVirtual) + memset(Addr, 0, DataSize); + else + memcpy(Addr, pData, DataSize); + + DEBUG(dbgs() << "emitSection SectionID: " << SectionID + << " obj addr: " << format("%p", pData) + << " new addr: " << format("%p", Addr) + << " DataSize: " << DataSize + << " StubBufSize: " << StubBufSize + << " Allocate: " << Allocate + << "\n"); + Obj.updateSectionAddress(Section, (uint64_t)Addr); + } + else { + // Even if we didn't load the section, we need to record an entry for it + // to handle later processing (and by 'handle' I mean don't do anything + // with these sections). + Allocate = 0; + Addr = 0; + DEBUG(dbgs() << "emitSection SectionID: " << SectionID + << " obj addr: " << format("%p", data.data()) + << " new addr: 0" + << " DataSize: " << DataSize + << " StubBufSize: " << StubBufSize + << " Allocate: " << Allocate + << "\n"); + } + Sections.push_back(SectionEntry(Addr, Allocate, DataSize,(uintptr_t)pData)); return SectionID; } -unsigned RuntimeDyldImpl:: -findOrEmitSection(const SectionRef &Section, bool IsCode, - ObjSectionToIDMap &LocalSections) { +unsigned RuntimeDyldImpl::findOrEmitSection(ObjectImage &Obj, + const SectionRef &Section, + bool IsCode, + ObjSectionToIDMap &LocalSections) { unsigned SectionID = 0; - ObjSectionToIDMap::iterator sIDIt = LocalSections.find(Section); - if (sIDIt != LocalSections.end()) - SectionID = sIDIt->second; + ObjSectionToIDMap::iterator i = LocalSections.find(Section); + if (i != LocalSections.end()) + SectionID = i->second; else { - SectionID = emitSection(Section, IsCode); + SectionID = emitSection(Obj, Section, IsCode); LocalSections[Section] = SectionID; } return SectionID; @@ -259,15 +364,18 @@ void RuntimeDyldImpl::reassignSectionAddress(unsigned SectionID, void RuntimeDyldImpl::resolveRelocationEntry(const RelocationEntry &RE, uint64_t Value) { - uint8_t *Target = Sections[RE.SectionID].Address + RE.Offset; - DEBUG(dbgs() << "\tSectionID: " << RE.SectionID - << " + " << RE.Offset << " (" << format("%p", Target) << ")" - << " Data: " << RE.Data - << " Addend: " << RE.Addend - << "\n"); - - resolveRelocation(Target, Sections[RE.SectionID].LoadAddress + RE.Offset, - Value, RE.Data, RE.Addend); + // Ignore relocations for sections that were not loaded + if (Sections[RE.SectionID].Address != 0) { + uint8_t *Target = Sections[RE.SectionID].Address + RE.Offset; + DEBUG(dbgs() << "\tSectionID: " << RE.SectionID + << " + " << RE.Offset << " (" << format("%p", Target) << ")" + << " Data: " << RE.Data + << " Addend: " << RE.Addend + << "\n"); + + resolveRelocation(Target, Sections[RE.SectionID].LoadAddress + RE.Offset, + Value, RE.Data, RE.Addend); + } } void RuntimeDyldImpl::resolveRelocationList(const RelocationList &Relocs, @@ -280,11 +388,11 @@ void RuntimeDyldImpl::resolveRelocationList(const RelocationList &Relocs, // resolveSymbols - Resolve any relocations to the specified symbols if // we know where it lives. void RuntimeDyldImpl::resolveSymbols() { - StringMap<RelocationList>::iterator it = SymbolRelocations.begin(), - itEnd = SymbolRelocations.end(); - for (; it != itEnd; it++) { - StringRef Name = it->first(); - RelocationList &Relocs = it->second; + StringMap<RelocationList>::iterator i = SymbolRelocations.begin(), + e = SymbolRelocations.end(); + for (; i != e; i++) { + StringRef Name = i->first(); + RelocationList &Relocs = i->second; StringMap<SymbolLoc>::const_iterator Loc = SymbolTable.find(Name); if (Loc == SymbolTable.end()) { // This is an external symbol, try to get it address from diff --git a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp index 9351b6c..db6da8c 100644 --- a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp +++ b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp @@ -20,11 +20,176 @@ #include "llvm/Object/ObjectFile.h" #include "llvm/Support/ELF.h" #include "llvm/ADT/Triple.h" +#include "llvm/Object/ELF.h" +#include "JITRegistrar.h" using namespace llvm; using namespace llvm::object; +namespace { + +template<support::endianness target_endianness, bool is64Bits> +class DyldELFObject : public ELFObjectFile<target_endianness, is64Bits> { + LLVM_ELF_IMPORT_TYPES(target_endianness, is64Bits) + + typedef Elf_Shdr_Impl<target_endianness, is64Bits> Elf_Shdr; + typedef Elf_Sym_Impl<target_endianness, is64Bits> Elf_Sym; + typedef Elf_Rel_Impl<target_endianness, is64Bits, false> Elf_Rel; + typedef Elf_Rel_Impl<target_endianness, is64Bits, true> Elf_Rela; + + typedef typename ELFObjectFile<target_endianness, is64Bits>:: + Elf_Ehdr Elf_Ehdr; + + typedef typename ELFDataTypeTypedefHelper< + target_endianness, is64Bits>::value_type addr_type; + +protected: + // This duplicates the 'Data' member in the 'Binary' base class + // but it is necessary to workaround a bug in gcc 4.2 + MemoryBuffer *InputData; + +public: + DyldELFObject(MemoryBuffer *Object, error_code &ec); + + void updateSectionAddress(const SectionRef &Sec, uint64_t Addr); + void updateSymbolAddress(const SymbolRef &Sym, uint64_t Addr); + + const MemoryBuffer& getBuffer() const { return *InputData; } + + // Methods for type inquiry through isa, cast, and dyn_cast + static inline bool classof(const Binary *v) { + return (isa<ELFObjectFile<target_endianness, is64Bits> >(v) + && classof(cast<ELFObjectFile<target_endianness, is64Bits> >(v))); + } + static inline bool classof( + const ELFObjectFile<target_endianness, is64Bits> *v) { + return v->isDyldType(); + } + static inline bool classof(const DyldELFObject *v) { + return true; + } +}; + +template<support::endianness target_endianness, bool is64Bits> +class ELFObjectImage : public ObjectImage { + protected: + DyldELFObject<target_endianness, is64Bits> *DyldObj; + bool Registered; + + public: + ELFObjectImage(DyldELFObject<target_endianness, is64Bits> *Obj) + : ObjectImage(Obj), + DyldObj(Obj), + Registered(false) {} + + virtual ~ELFObjectImage() { + if (Registered) + deregisterWithDebugger(); + } + + // Subclasses can override these methods to update the image with loaded + // addresses for sections and common symbols + virtual void updateSectionAddress(const SectionRef &Sec, uint64_t Addr) + { + DyldObj->updateSectionAddress(Sec, Addr); + } + + virtual void updateSymbolAddress(const SymbolRef &Sym, uint64_t Addr) + { + DyldObj->updateSymbolAddress(Sym, Addr); + } + + virtual void registerWithDebugger() + { + JITRegistrar::getGDBRegistrar().registerObject(DyldObj->getBuffer()); + Registered = true; + } + virtual void deregisterWithDebugger() + { + JITRegistrar::getGDBRegistrar().deregisterObject(DyldObj->getBuffer()); + } +}; + +template<support::endianness target_endianness, bool is64Bits> +DyldELFObject<target_endianness, is64Bits>::DyldELFObject(MemoryBuffer *Object, + error_code &ec) + : ELFObjectFile<target_endianness, is64Bits>(Object, ec), + InputData(Object) { + this->isDyldELFObject = true; +} + +template<support::endianness target_endianness, bool is64Bits> +void DyldELFObject<target_endianness, is64Bits>::updateSectionAddress( + const SectionRef &Sec, + uint64_t Addr) { + DataRefImpl ShdrRef = Sec.getRawDataRefImpl(); + Elf_Shdr *shdr = const_cast<Elf_Shdr*>( + reinterpret_cast<const Elf_Shdr *>(ShdrRef.p)); + + // This assumes the address passed in matches the target address bitness + // The template-based type cast handles everything else. + shdr->sh_addr = static_cast<addr_type>(Addr); +} + +template<support::endianness target_endianness, bool is64Bits> +void DyldELFObject<target_endianness, is64Bits>::updateSymbolAddress( + const SymbolRef &SymRef, + uint64_t Addr) { + + Elf_Sym *sym = const_cast<Elf_Sym*>( + ELFObjectFile<target_endianness, is64Bits>:: + getSymbol(SymRef.getRawDataRefImpl())); + + // This assumes the address passed in matches the target address bitness + // The template-based type cast handles everything else. + sym->st_value = static_cast<addr_type>(Addr); +} + +} // namespace + + namespace llvm { +ObjectImage *RuntimeDyldELF::createObjectImage( + const MemoryBuffer *ConstInputBuffer) { + MemoryBuffer *InputBuffer = const_cast<MemoryBuffer*>(ConstInputBuffer); + std::pair<unsigned char, unsigned char> Ident = getElfArchType(InputBuffer); + error_code ec; + + if (Ident.first == ELF::ELFCLASS32 && Ident.second == ELF::ELFDATA2LSB) { + DyldELFObject<support::little, false> *Obj = + new DyldELFObject<support::little, false>(InputBuffer, ec); + return new ELFObjectImage<support::little, false>(Obj); + } + else if (Ident.first == ELF::ELFCLASS32 && Ident.second == ELF::ELFDATA2MSB) { + DyldELFObject<support::big, false> *Obj = + new DyldELFObject<support::big, false>(InputBuffer, ec); + return new ELFObjectImage<support::big, false>(Obj); + } + else if (Ident.first == ELF::ELFCLASS64 && Ident.second == ELF::ELFDATA2MSB) { + DyldELFObject<support::big, true> *Obj = + new DyldELFObject<support::big, true>(InputBuffer, ec); + return new ELFObjectImage<support::big, true>(Obj); + } + else if (Ident.first == ELF::ELFCLASS64 && Ident.second == ELF::ELFDATA2LSB) { + DyldELFObject<support::little, true> *Obj = + new DyldELFObject<support::little, true>(InputBuffer, ec); + return new ELFObjectImage<support::little, true>(Obj); + } + else + llvm_unreachable("Unexpected ELF format"); +} + +void RuntimeDyldELF::handleObjectLoaded(ObjectImage *Obj) +{ + Obj->registerWithDebugger(); + // Save the loaded object. It will deregister itself when deleted + LoadedObject = Obj; +} + +RuntimeDyldELF::~RuntimeDyldELF() { + if (LoadedObject) + delete LoadedObject; +} void RuntimeDyldELF::resolveX86_64Relocation(uint8_t *LocalAddress, uint64_t FinalAddress, @@ -71,7 +236,8 @@ void RuntimeDyldELF::resolveX86Relocation(uint8_t *LocalAddress, switch (Type) { case ELF::R_386_32: { uint32_t *Target = (uint32_t*)(LocalAddress); - *Target = Value + Addend; + uint32_t Placeholder = *Target; + *Target = Placeholder + Value + Addend; break; } case ELF::R_386_PC32: { @@ -165,10 +331,11 @@ void RuntimeDyldELF::resolveRelocation(uint8_t *LocalAddress, } } -void RuntimeDyldELF:: -processRelocationRef(const ObjRelocationInfo &Rel, const ObjectFile &Obj, - ObjSectionToIDMap &ObjSectionToID, - LocalSymbolMap &Symbols, StubMap &Stubs) { +void RuntimeDyldELF::processRelocationRef(const ObjRelocationInfo &Rel, + ObjectImage &Obj, + ObjSectionToIDMap &ObjSectionToID, + LocalSymbolMap &Symbols, + StubMap &Stubs) { uint32_t RelType = (uint32_t)(Rel.Type & 0xffffffffL); intptr_t Addend = (intptr_t)Rel.AdditionalInfo; @@ -181,16 +348,16 @@ processRelocationRef(const ObjRelocationInfo &Rel, const ObjectFile &Obj, << " TargetName: " << TargetName << "\n"); // First look the symbol in object file symbols. - LocalSymbolMap::iterator it = Symbols.find(TargetName.data()); - if (it != Symbols.end()) { - Value.SectionID = it->second.first; - Value.Addend = it->second.second; + LocalSymbolMap::iterator lsi = Symbols.find(TargetName.data()); + if (lsi != Symbols.end()) { + Value.SectionID = lsi->second.first; + Value.Addend = lsi->second.second; } else { // Second look the symbol in global symbol table. - StringMap<SymbolLoc>::iterator itS = SymbolTable.find(TargetName.data()); - if (itS != SymbolTable.end()) { - Value.SectionID = itS->second.first; - Value.Addend = itS->second.second; + StringMap<SymbolLoc>::iterator gsi = SymbolTable.find(TargetName.data()); + if (gsi != SymbolTable.end()) { + Value.SectionID = gsi->second.first; + Value.Addend = gsi->second.second; } else { SymbolRef::Type SymType; Symbol.getType(SymType); @@ -199,12 +366,12 @@ processRelocationRef(const ObjRelocationInfo &Rel, const ObjectFile &Obj, // TODO: Now ELF SymbolRef::ST_Debug = STT_SECTION, it's not obviously // and can be changed by another developers. Maybe best way is add // a new symbol type ST_Section to SymbolRef and use it. - section_iterator sIt = Obj.end_sections(); - Symbol.getSection(sIt); - if (sIt == Obj.end_sections()) + section_iterator si = Obj.end_sections(); + Symbol.getSection(si); + if (si == Obj.end_sections()) llvm_unreachable("Symbol section not found, bad object file format!"); DEBUG(dbgs() << "\t\tThis is section symbol\n"); - Value.SectionID = findOrEmitSection((*sIt), true, ObjSectionToID); + Value.SectionID = findOrEmitSection(Obj, (*si), true, ObjSectionToID); Value.Addend = Addend; break; } @@ -232,10 +399,10 @@ processRelocationRef(const ObjRelocationInfo &Rel, const ObjectFile &Obj, uint8_t *Target = Section.Address + Rel.Offset; // Look up for existing stub. - StubMap::const_iterator stubIt = Stubs.find(Value); - if (stubIt != Stubs.end()) { - resolveRelocation(Target, Section.LoadAddress, (uint64_t)Section.Address + - stubIt->second, RelType, 0); + StubMap::const_iterator i = Stubs.find(Value); + if (i != Stubs.end()) { + resolveRelocation(Target, (uint64_t)Target, (uint64_t)Section.Address + + i->second, RelType, 0); DEBUG(dbgs() << " Stub function found\n"); } else { // Create a new stub function. @@ -245,7 +412,7 @@ processRelocationRef(const ObjRelocationInfo &Rel, const ObjectFile &Obj, Section.StubOffset); AddRelocation(Value, Rel.SectionID, StubTargetAddr - Section.Address, ELF::R_ARM_ABS32); - resolveRelocation(Target, Section.LoadAddress, (uint64_t)Section.Address + + resolveRelocation(Target, (uint64_t)Target, (uint64_t)Section.Address + Section.StubOffset, RelType, 0); Section.StubOffset += getMaxStubSize(); } diff --git a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.h b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.h index 36566da..e7f6fab 100644 --- a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.h +++ b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.h @@ -22,6 +22,8 @@ using namespace llvm; namespace llvm { class RuntimeDyldELF : public RuntimeDyldImpl { protected: + ObjectImage *LoadedObject; + void resolveX86_64Relocation(uint8_t *LocalAddress, uint64_t FinalAddress, uint64_t Value, @@ -47,12 +49,18 @@ protected: int64_t Addend); virtual void processRelocationRef(const ObjRelocationInfo &Rel, - const ObjectFile &Obj, + ObjectImage &Obj, ObjSectionToIDMap &ObjSectionToID, LocalSymbolMap &Symbols, StubMap &Stubs); + virtual ObjectImage *createObjectImage(const MemoryBuffer *InputBuffer); + virtual void handleObjectLoaded(ObjectImage *Obj); + public: - RuntimeDyldELF(RTDyldMemoryManager *mm) : RuntimeDyldImpl(mm) {} + RuntimeDyldELF(RTDyldMemoryManager *mm) + : RuntimeDyldImpl(mm), LoadedObject(0) {} + + virtual ~RuntimeDyldELF(); bool isCompatibleFormat(const MemoryBuffer *InputBuffer) const; }; diff --git a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldImpl.h b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldImpl.h index d6430a9..2dea13f 100644 --- a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldImpl.h +++ b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldImpl.h @@ -29,6 +29,7 @@ #include "llvm/ADT/Triple.h" #include <map> #include "llvm/Support/Format.h" +#include "ObjectImage.h" using namespace llvm; using namespace llvm::object; @@ -110,6 +111,9 @@ protected: StringMap<SymbolLoc> SymbolTable; typedef DenseMap<const char*, SymbolLoc> LocalSymbolMap; + // Keep a map of common symbols to their sizes + typedef std::map<SymbolRef, unsigned> CommonSymbolMap; + // For each symbol, keep a list of relocations based on it. Anytime // its address is reassigned (the JIT re-compiled the function, e.g.), // the relocations get re-resolved. @@ -149,18 +153,29 @@ protected: return (uint8_t*)Sections[SectionID].Address; } + /// \brief Emits a section containing common symbols. + /// \return SectionID. + unsigned emitCommonSymbols(ObjectImage &Obj, + const CommonSymbolMap &Map, + uint64_t TotalSize, + LocalSymbolMap &Symbols); + /// \brief Emits section data from the object file to the MemoryManager. /// \param IsCode if it's true then allocateCodeSection() will be /// used for emmits, else allocateDataSection() will be used. /// \return SectionID. - unsigned emitSection(const SectionRef &Section, bool IsCode); + unsigned emitSection(ObjectImage &Obj, + const SectionRef &Section, + bool IsCode); /// \brief Find Section in LocalSections. If the secton is not found - emit /// it and store in LocalSections. /// \param IsCode if it's true then allocateCodeSection() will be /// used for emmits, else allocateDataSection() will be used. /// \return SectionID. - unsigned findOrEmitSection(const SectionRef &Section, bool IsCode, + unsigned findOrEmitSection(ObjectImage &Obj, + const SectionRef &Section, + bool IsCode, ObjSectionToIDMap &LocalSections); /// \brief If Value.SymbolName is NULL then store relocation to the @@ -191,11 +206,18 @@ protected: /// \brief Parses the object file relocation and store it to Relocations /// or SymbolRelocations. Its depend from object file type. virtual void processRelocationRef(const ObjRelocationInfo &Rel, - const ObjectFile &Obj, + ObjectImage &Obj, ObjSectionToIDMap &ObjSectionToID, LocalSymbolMap &Symbols, StubMap &Stubs) = 0; void resolveSymbols(); + virtual ObjectImage *createObjectImage(const MemoryBuffer *InputBuffer); + virtual void handleObjectLoaded(ObjectImage *Obj) + { + // Subclasses may choose to retain this image if they have a use for it + delete Obj; + } + public: RuntimeDyldImpl(RTDyldMemoryManager *mm) : MemMgr(mm), HasError(false) {} diff --git a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.cpp b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.cpp index 24437e0..b7f515d 100644 --- a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.cpp +++ b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.cpp @@ -42,8 +42,7 @@ void RuntimeDyldMachO::resolveRelocation(uint8_t *LocalAddress, // This just dispatches to the proper target specific routine. switch (Arch) { default: llvm_unreachable("Unsupported CPU type!"); - case Triple::x86_64: // Fall through. - case Triple::x86: + case Triple::x86_64: resolveX86_64Relocation(LocalAddress, FinalAddress, (uintptr_t)Value, @@ -52,6 +51,15 @@ void RuntimeDyldMachO::resolveRelocation(uint8_t *LocalAddress, Size, Addend); break; + case Triple::x86: + resolveI386Relocation(LocalAddress, + FinalAddress, + (uintptr_t)Value, + isPCRel, + Type, + Size, + Addend); + break; case Triple::arm: // Fall through. case Triple::thumb: resolveARMRelocation(LocalAddress, @@ -66,6 +74,35 @@ void RuntimeDyldMachO::resolveRelocation(uint8_t *LocalAddress, } bool RuntimeDyldMachO:: +resolveI386Relocation(uint8_t *LocalAddress, + uint64_t FinalAddress, + uint64_t Value, + bool isPCRel, + unsigned Type, + unsigned Size, + int64_t Addend) { + if (isPCRel) + Value -= FinalAddress + 4; // see resolveX86_64Relocation + + switch (Type) { + default: + llvm_unreachable("Invalid relocation type!"); + case macho::RIT_Vanilla: { + uint8_t *p = LocalAddress; + uint64_t ValueToWrite = Value + Addend; + for (unsigned i = 0; i < Size; ++i) { + *p++ = (uint8_t)(ValueToWrite & 0xff); + ValueToWrite >>= 8; + } + } + case macho::RIT_Difference: + case macho::RIT_Generic_LocalDifference: + case macho::RIT_Generic_PreboundLazyPointer: + return Error("Relocation type not implemented yet!"); + } +} + +bool RuntimeDyldMachO:: resolveX86_64Relocation(uint8_t *LocalAddress, uint64_t FinalAddress, uint64_t Value, @@ -167,10 +204,11 @@ resolveARMRelocation(uint8_t *LocalAddress, return false; } -void RuntimeDyldMachO:: -processRelocationRef(const ObjRelocationInfo &Rel, const ObjectFile &Obj, - ObjSectionToIDMap &ObjSectionToID, - LocalSymbolMap &Symbols, StubMap &Stubs) { +void RuntimeDyldMachO::processRelocationRef(const ObjRelocationInfo &Rel, + ObjectImage &Obj, + ObjSectionToIDMap &ObjSectionToID, + LocalSymbolMap &Symbols, + StubMap &Stubs) { uint32_t RelType = (uint32_t) (Rel.Type & 0xffffffffL); RelocationValueRef Value; @@ -183,32 +221,32 @@ processRelocationRef(const ObjRelocationInfo &Rel, const ObjectFile &Obj, const SymbolRef &Symbol = Rel.Symbol; Symbol.getName(TargetName); // First look the symbol in object file symbols. - LocalSymbolMap::iterator it = Symbols.find(TargetName.data()); - if (it != Symbols.end()) { - Value.SectionID = it->second.first; - Value.Addend = it->second.second; + LocalSymbolMap::iterator lsi = Symbols.find(TargetName.data()); + if (lsi != Symbols.end()) { + Value.SectionID = lsi->second.first; + Value.Addend = lsi->second.second; } else { // Second look the symbol in global symbol table. - StringMap<SymbolLoc>::iterator itS = SymbolTable.find(TargetName.data()); - if (itS != SymbolTable.end()) { - Value.SectionID = itS->second.first; - Value.Addend = itS->second.second; + StringMap<SymbolLoc>::iterator gsi = SymbolTable.find(TargetName.data()); + if (gsi != SymbolTable.end()) { + Value.SectionID = gsi->second.first; + Value.Addend = gsi->second.second; } else Value.SymbolName = TargetName.data(); } } else { error_code err; - uint8_t sIdx = static_cast<uint8_t>(RelType & 0xFF); - section_iterator sIt = Obj.begin_sections(), - sItEnd = Obj.end_sections(); - for (uint8_t i = 1; i < sIdx; i++) { + uint8_t sectionIndex = static_cast<uint8_t>(RelType & 0xFF); + section_iterator si = Obj.begin_sections(), + se = Obj.end_sections(); + for (uint8_t i = 1; i < sectionIndex; i++) { error_code err; - sIt.increment(err); - if (sIt == sItEnd) + si.increment(err); + if (si == se) break; } - assert(sIt != sItEnd && "No section containing relocation!"); - Value.SectionID = findOrEmitSection(*sIt, true, ObjSectionToID); + assert(si != se && "No section containing relocation!"); + Value.SectionID = findOrEmitSection(Obj, *si, true, ObjSectionToID); Value.Addend = *(const intptr_t *)Target; if (Value.Addend) { // The MachO addend is offset from the current section, we need set it @@ -221,10 +259,10 @@ processRelocationRef(const ObjRelocationInfo &Rel, const ObjectFile &Obj, // This is an ARM branch relocation, need to use a stub function. // Look up for existing stub. - StubMap::const_iterator stubIt = Stubs.find(Value); - if (stubIt != Stubs.end()) + StubMap::const_iterator i = Stubs.find(Value); + if (i != Stubs.end()) resolveRelocation(Target, (uint64_t)Target, - (uint64_t)Section.Address + stubIt->second, + (uint64_t)Section.Address + i->second, RelType, 0); else { // Create a new stub function. diff --git a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.h b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.h index 36b39dd..418d130 100644 --- a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.h +++ b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.h @@ -26,6 +26,13 @@ using namespace llvm::object; namespace llvm { class RuntimeDyldMachO : public RuntimeDyldImpl { protected: + bool resolveI386Relocation(uint8_t *LocalAddress, + uint64_t FinalAddress, + uint64_t Value, + bool isPCRel, + unsigned Type, + unsigned Size, + int64_t Addend); bool resolveX86_64Relocation(uint8_t *LocalAddress, uint64_t FinalAddress, uint64_t Value, @@ -42,7 +49,7 @@ protected: int64_t Addend); virtual void processRelocationRef(const ObjRelocationInfo &Rel, - const ObjectFile &Obj, + ObjectImage &Obj, ObjSectionToIDMap &ObjSectionToID, LocalSymbolMap &Symbols, StubMap &Stubs); @@ -52,7 +59,7 @@ public: uint64_t Value, uint32_t Type, int64_t Addend); - + RuntimeDyldMachO(RTDyldMemoryManager *mm) : RuntimeDyldImpl(mm) {} bool isCompatibleFormat(const MemoryBuffer *InputBuffer) const; diff --git a/lib/ExecutionEngine/TargetSelect.cpp b/lib/ExecutionEngine/TargetSelect.cpp index 3937fe5..42364f9 100644 --- a/lib/ExecutionEngine/TargetSelect.cpp +++ b/lib/ExecutionEngine/TargetSelect.cpp @@ -15,6 +15,7 @@ //===----------------------------------------------------------------------===// #include "llvm/ExecutionEngine/ExecutionEngine.h" +#include "llvm/Module.h" #include "llvm/ADT/Triple.h" #include "llvm/MC/SubtargetFeature.h" #include "llvm/Target/TargetMachine.h" @@ -24,17 +25,21 @@ using namespace llvm; +TargetMachine *EngineBuilder::selectTarget() { + StringRef MArch = ""; + StringRef MCPU = ""; + SmallVector<std::string, 1> MAttrs; + Triple TT(M->getTargetTriple()); + + return selectTarget(TT, MArch, MCPU, MAttrs); +} + /// selectTarget - Pick a target either via -march or by guessing the native /// arch. Add any CPU features specified via -mcpu or -mattr. TargetMachine *EngineBuilder::selectTarget(const Triple &TargetTriple, StringRef MArch, StringRef MCPU, - const SmallVectorImpl<std::string>& MAttrs, - const TargetOptions &Options, - Reloc::Model RM, - CodeModel::Model CM, - CodeGenOpt::Level OL, - std::string *ErrorStr) { + const SmallVectorImpl<std::string>& MAttrs) { Triple TheTriple(TargetTriple); if (TheTriple.getTriple().empty()) TheTriple.setTriple(sys::getDefaultTargetTriple()); @@ -84,7 +89,8 @@ TargetMachine *EngineBuilder::selectTarget(const Triple &TargetTriple, TargetMachine *Target = TheTarget->createTargetMachine(TheTriple.getTriple(), MCPU, FeaturesStr, Options, - RM, CM, OL); + RelocModel, CMModel, + OptLevel); assert(Target && "Could not allocate target machine!"); return Target; } diff --git a/lib/Linker/LinkModules.cpp b/lib/Linker/LinkModules.cpp index f19e6e3..765fcc8 100644 --- a/lib/Linker/LinkModules.cpp +++ b/lib/Linker/LinkModules.cpp @@ -20,8 +20,9 @@ #include "llvm/ADT/Optional.h" #include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallPtrSet.h" -#include "llvm/Support/raw_ostream.h" +#include "llvm/Support/Debug.h" #include "llvm/Support/Path.h" +#include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/Utils/Cloning.h" #include "llvm/Transforms/Utils/ValueMapper.h" #include <cctype> @@ -50,8 +51,8 @@ class TypeMapTy : public ValueMapTypeRemapper { /// DstResolvedOpaqueTypes - This is the set of opaque types in the /// destination modules who are getting a body from the source module. SmallPtrSet<StructType*, 16> DstResolvedOpaqueTypes; + public: - /// addTypeMapping - Indicate that the specified type in the destination /// module is conceptually equivalent to the specified type in the source /// module. @@ -67,6 +68,18 @@ public: FunctionType *get(FunctionType *T) {return cast<FunctionType>(get((Type*)T));} + /// dump - Dump out the type map for debugging purposes. + void dump() const { + for (DenseMap<Type*, Type*>::const_iterator + I = MappedTypes.begin(), E = MappedTypes.end(); I != E; ++I) { + dbgs() << "TypeMap: "; + I->first->dump(); + dbgs() << " => "; + I->second->dump(); + dbgs() << '\n'; + } + } + private: Type *getImpl(Type *T); /// remapType - Implement the ValueMapTypeRemapper interface. @@ -224,7 +237,6 @@ void TypeMapTy::linkDefinedTypeBodies() { DstResolvedOpaqueTypes.clear(); } - /// get - Return the mapped type to use for the specified input type from the /// source module. Type *TypeMapTy::get(Type *Ty) { @@ -328,8 +340,6 @@ Type *TypeMapTy::getImpl(Type *Ty) { return *Entry = DTy; } - - //===----------------------------------------------------------------------===// // ModuleLinker implementation. //===----------------------------------------------------------------------===// @@ -431,8 +441,6 @@ namespace { }; } - - /// forceRenaming - The LLVM SymbolTable class autorenames globals that conflict /// in the symbol table. This is good for all clients except for us. Go /// through the trouble to force this back. @@ -454,9 +462,9 @@ static void forceRenaming(GlobalValue *GV, StringRef Name) { } } -/// CopyGVAttributes - copy additional attributes (those not needed to construct +/// copyGVAttributes - copy additional attributes (those not needed to construct /// a GlobalValue) from the SrcGV to the DestGV. -static void CopyGVAttributes(GlobalValue *DestGV, const GlobalValue *SrcGV) { +static void copyGVAttributes(GlobalValue *DestGV, const GlobalValue *SrcGV) { // Use the maximum alignment, rather than just copying the alignment of SrcGV. unsigned Alignment = std::max(DestGV->getAlignment(), SrcGV->getAlignment()); DestGV->copyAttributesFrom(SrcGV); @@ -586,14 +594,16 @@ void ModuleLinker::computeTypeMapping() { // At this point, the destination module may have a type "%foo = { i32 }" for // example. When the source module got loaded into the same LLVMContext, if // it had the same type, it would have been renamed to "%foo.42 = { i32 }". - // Though it isn't required for correctness, attempt to link these up to clean - // up the IR. std::vector<StructType*> SrcStructTypes; SrcM->findUsedStructTypes(SrcStructTypes); - SmallPtrSet<StructType*, 32> SrcStructTypesSet(SrcStructTypes.begin(), SrcStructTypes.end()); - + + std::vector<StructType*> DstStructTypes; + DstM->findUsedStructTypes(DstStructTypes); + SmallPtrSet<StructType*, 32> DstStructTypesSet(DstStructTypes.begin(), + DstStructTypes.end()); + for (unsigned i = 0, e = SrcStructTypes.size(); i != e; ++i) { StructType *ST = SrcStructTypes[i]; if (!ST->hasName()) continue; @@ -606,9 +616,24 @@ void ModuleLinker::computeTypeMapping() { // Check to see if the destination module has a struct with the prefix name. if (StructType *DST = DstM->getTypeByName(ST->getName().substr(0, DotPos))) - // Don't use it if this actually came from the source module. They're in - // the same LLVMContext after all. - if (!SrcStructTypesSet.count(DST)) + // Don't use it if this actually came from the source module. They're in + // the same LLVMContext after all. Also don't use it unless the type is + // actually used in the destination module. This can happen in situations + // like this: + // + // Module A Module B + // -------- -------- + // %Z = type { %A } %B = type { %C.1 } + // %A = type { %B.1, [7 x i8] } %C.1 = type { i8* } + // %B.1 = type { %C } %A.2 = type { %B.3, [5 x i8] } + // %C = type { i8* } %B.3 = type { %C.1 } + // + // When we link Module B with Module A, the '%B' in Module B is + // used. However, that would then use '%C.1'. But when we process '%C.1', + // we prefer to take the '%C' version. So we are then left with both + // '%C.1' and '%C' being used for the same types. This leads to some + // variables using one type and some using the other. + if (!SrcStructTypesSet.count(DST) && DstStructTypesSet.count(DST)) TypeMap.addTypeMapping(DST, ST); } @@ -662,7 +687,7 @@ bool ModuleLinker::linkAppendingVarProto(GlobalVariable *DstGV, DstGV->getType()->getAddressSpace()); // Propagate alignment, visibility and section info. - CopyGVAttributes(NG, DstGV); + copyGVAttributes(NG, DstGV); AppendingVarInfo AVI; AVI.NewGV = NG; @@ -736,7 +761,7 @@ bool ModuleLinker::linkGlobalProto(GlobalVariable *SGV) { SGV->isThreadLocal(), SGV->getType()->getAddressSpace()); // Propagate alignment, visibility and section info. - CopyGVAttributes(NewDGV, SGV); + copyGVAttributes(NewDGV, SGV); if (NewVisibility) NewDGV->setVisibility(*NewVisibility); @@ -784,7 +809,7 @@ bool ModuleLinker::linkFunctionProto(Function *SF) { // bring SF over. Function *NewDF = Function::Create(TypeMap.get(SF->getFunctionType()), SF->getLinkage(), SF->getName(), DstM); - CopyGVAttributes(NewDF, SF); + copyGVAttributes(NewDF, SF); if (NewVisibility) NewDF->setVisibility(*NewVisibility); @@ -839,7 +864,7 @@ bool ModuleLinker::linkAliasProto(GlobalAlias *SGA) { GlobalAlias *NewDA = new GlobalAlias(TypeMap.get(SGA->getType()), SGA->getLinkage(), SGA->getName(), /*aliasee*/0, DstM); - CopyGVAttributes(NewDA, SGA); + copyGVAttributes(NewDA, SGA); if (NewVisibility) NewDA->setVisibility(*NewVisibility); @@ -872,9 +897,8 @@ void ModuleLinker::linkAppendingVarInit(const AppendingVarInfo &AVI) { AVI.NewGV->setInitializer(ConstantArray::get(NewType, Elements)); } - -// linkGlobalInits - Update the initializers in the Dest module now that all -// globals that may be referenced are in Dest. +/// linkGlobalInits - Update the initializers in the Dest module now that all +/// globals that may be referenced are in Dest. void ModuleLinker::linkGlobalInits() { // Loop over all of the globals in the src module, mapping them over as we go for (Module::const_global_iterator I = SrcM->global_begin(), @@ -891,9 +915,9 @@ void ModuleLinker::linkGlobalInits() { } } -// linkFunctionBody - Copy the source function over into the dest function and -// fix up references to values. At this point we know that Dest is an external -// function, and that Src is not. +/// linkFunctionBody - Copy the source function over into the dest function and +/// fix up references to values. At this point we know that Dest is an external +/// function, and that Src is not. void ModuleLinker::linkFunctionBody(Function *Dst, Function *Src) { assert(Src && Dst && Dst->isDeclaration() && !Src->isDeclaration()); @@ -932,7 +956,7 @@ void ModuleLinker::linkFunctionBody(Function *Dst, Function *Src) { } - +/// linkAliasBodies - Insert all of the aliases in Src into the Dest module. void ModuleLinker::linkAliasBodies() { for (Module::alias_iterator I = SrcM->alias_begin(), E = SrcM->alias_end(); I != E; ++I) { @@ -945,7 +969,7 @@ void ModuleLinker::linkAliasBodies() { } } -/// linkNamedMDNodes - Insert all of the named mdnodes in Src into the Dest +/// linkNamedMDNodes - Insert all of the named MDNodes in Src into the Dest /// module. void ModuleLinker::linkNamedMDNodes() { const NamedMDNode *SrcModFlags = SrcM->getModuleFlagsMetadata(); @@ -961,7 +985,8 @@ void ModuleLinker::linkNamedMDNodes() { } } -/// categorizeModuleFlagNodes - +/// categorizeModuleFlagNodes - Categorize the module flags according to their +/// type: Error, Warning, Override, and Require. bool ModuleLinker:: categorizeModuleFlagNodes(const NamedMDNode *ModFlags, DenseMap<MDString*, MDNode*> &ErrorNode, @@ -1220,6 +1245,7 @@ bool ModuleLinker::run() { } linkFunctionBody(cast<Function>(ValueMap[SF]), SF); + SF->Dematerialize(); } // Resolve all uses of aliases with aliasees. @@ -1259,7 +1285,8 @@ bool ModuleLinker::run() { // Link in function body. linkFunctionBody(DF, SF); - + SF->Dematerialize(); + // "Remove" from vector by setting the element to 0. *I = 0; @@ -1293,11 +1320,11 @@ bool ModuleLinker::run() { // LinkModules entrypoint. //===----------------------------------------------------------------------===// -// LinkModules - This function links two modules together, with the resulting -// left module modified to be the composite of the two input modules. If an -// error occurs, true is returned and ErrorMsg (if not null) is set to indicate -// the problem. Upon failure, the Dest module could be in a modified state, and -// shouldn't be relied on to be consistent. +/// LinkModules - This function links two modules together, with the resulting +/// left module modified to be the composite of the two input modules. If an +/// error occurs, true is returned and ErrorMsg (if not null) is set to indicate +/// the problem. Upon failure, the Dest module could be in a modified state, +/// and shouldn't be relied on to be consistent. bool Linker::LinkModules(Module *Dest, Module *Src, unsigned Mode, std::string *ErrorMsg) { ModuleLinker TheLinker(Dest, Src, Mode); @@ -1305,6 +1332,6 @@ bool Linker::LinkModules(Module *Dest, Module *Src, unsigned Mode, if (ErrorMsg) *ErrorMsg = TheLinker.ErrorMsg; return true; } - + return false; } diff --git a/lib/MC/ELFObjectWriter.cpp b/lib/MC/ELFObjectWriter.cpp index 36f94b4..9fc33b6 100644 --- a/lib/MC/ELFObjectWriter.cpp +++ b/lib/MC/ELFObjectWriter.cpp @@ -17,7 +17,6 @@ #include "llvm/ADT/SmallString.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/StringMap.h" -#include "llvm/ADT/Twine.h" #include "llvm/MC/MCAsmBackend.h" #include "llvm/MC/MCAsmLayout.h" #include "llvm/MC/MCAssembler.h" @@ -25,13 +24,13 @@ #include "llvm/MC/MCELFObjectWriter.h" #include "llvm/MC/MCELFSymbolFlags.h" #include "llvm/MC/MCExpr.h" +#include "llvm/MC/MCFixupKindInfo.h" +#include "llvm/MC/MCObjectWriter.h" #include "llvm/MC/MCSectionELF.h" #include "llvm/MC/MCValue.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/ELF.h" -#include "llvm/Support/CommandLine.h" -#include "llvm/ADT/StringSwitch.h" #include <vector> using namespace llvm; @@ -84,32 +83,6 @@ class ELFObjectWriter : public MCObjectWriter { } }; - /// @name Relocation Data - /// @{ - - struct ELFRelocationEntry { - // Make these big enough for both 32-bit and 64-bit - uint64_t r_offset; - int Index; - unsigned Type; - const MCSymbol *Symbol; - uint64_t r_addend; - - ELFRelocationEntry() - : r_offset(0), Index(0), Type(0), Symbol(0), r_addend(0) {} - - ELFRelocationEntry(uint64_t RelocOffset, int Idx, - unsigned RelType, const MCSymbol *Sym, - uint64_t Addend) - : r_offset(RelocOffset), Index(Idx), Type(RelType), - Symbol(Sym), r_addend(Addend) {} - - // Support lexicographic sorting. - bool operator<(const ELFRelocationEntry &RE) const { - return RE.r_offset < r_offset; - } - }; - /// The target specific ELF writer instance. llvm::OwningPtr<MCELFObjectTargetWriter> TargetObjectWriter; @@ -786,7 +759,7 @@ void ELFObjectWriter::RecordRelocation(const MCAssembler &Asm, else assert(isInt<32>(Addend)); - ELFRelocationEntry ERE(RelocOffset, Index, Type, RelocSymbol, Addend); + ELFRelocationEntry ERE(RelocOffset, Index, Type, RelocSymbol, Addend, Fixup); Relocations[Fragment->getParent()].push_back(ERE); } @@ -1072,8 +1045,10 @@ void ELFObjectWriter::WriteRelocationsFragment(const MCAssembler &Asm, MCDataFragment *F, const MCSectionData *SD) { std::vector<ELFRelocationEntry> &Relocs = Relocations[SD]; - // sort by the r_offset just like gnu as does - array_pod_sort(Relocs.begin(), Relocs.end()); + + // Sort the relocation entries. Most targets just sort by r_offset, but some + // (e.g., MIPS) have additional constraints. + TargetObjectWriter->sortRelocs(Asm, Relocs); for (unsigned i = 0, e = Relocs.size(); i != e; ++i) { ELFRelocationEntry entry = Relocs[e - i - 1]; diff --git a/lib/MC/MCAsmBackend.cpp b/lib/MC/MCAsmBackend.cpp index 51c3977..0b2e4ae 100644 --- a/lib/MC/MCAsmBackend.cpp +++ b/lib/MC/MCAsmBackend.cpp @@ -8,6 +8,7 @@ //===----------------------------------------------------------------------===// #include "llvm/MC/MCAsmBackend.h" +#include "llvm/MC/MCFixupKindInfo.h" using namespace llvm; MCAsmBackend::MCAsmBackend() diff --git a/lib/MC/MCAsmInfo.cpp b/lib/MC/MCAsmInfo.cpp index 582d21f..8286c1d 100644 --- a/lib/MC/MCAsmInfo.cpp +++ b/lib/MC/MCAsmInfo.cpp @@ -49,6 +49,7 @@ MCAsmInfo::MCAsmInfo() { AllowQuotesInName = false; AllowNameToStartWithDigit = false; AllowPeriodsInName = true; + AllowUTF8 = true; ZeroDirective = "\t.zero\t"; AsciiDirective = "\t.ascii\t"; AscizDirective = "\t.asciz\t"; diff --git a/lib/MC/MCAsmInfoCOFF.cpp b/lib/MC/MCAsmInfoCOFF.cpp index 767ac29..881d992 100644 --- a/lib/MC/MCAsmInfoCOFF.cpp +++ b/lib/MC/MCAsmInfoCOFF.cpp @@ -13,7 +13,6 @@ //===----------------------------------------------------------------------===// #include "llvm/MC/MCAsmInfoCOFF.h" -#include "llvm/ADT/SmallVector.h" using namespace llvm; void MCAsmInfoCOFF::anchor() { } diff --git a/lib/MC/MCAssembler.cpp b/lib/MC/MCAssembler.cpp index 5ffb6f2..bb67868 100644 --- a/lib/MC/MCAssembler.cpp +++ b/lib/MC/MCAssembler.cpp @@ -13,13 +13,13 @@ #include "llvm/MC/MCCodeEmitter.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCExpr.h" +#include "llvm/MC/MCFixupKindInfo.h" #include "llvm/MC/MCObjectWriter.h" #include "llvm/MC/MCSection.h" #include "llvm/MC/MCSymbol.h" #include "llvm/MC/MCValue.h" #include "llvm/MC/MCDwarf.h" #include "llvm/MC/MCAsmBackend.h" -#include "llvm/ADT/OwningPtr.h" #include "llvm/ADT/Statistic.h" #include "llvm/ADT/StringExtras.h" #include "llvm/ADT/Twine.h" diff --git a/lib/MC/MCDisassembler/Disassembler.cpp b/lib/MC/MCDisassembler/Disassembler.cpp index 572a5a5..35f675d 100644 --- a/lib/MC/MCDisassembler/Disassembler.cpp +++ b/lib/MC/MCDisassembler/Disassembler.cpp @@ -15,7 +15,9 @@ #include "llvm/MC/MCDisassembler.h" #include "llvm/MC/MCInst.h" #include "llvm/MC/MCInstPrinter.h" +#include "llvm/MC/MCInstrInfo.h" #include "llvm/MC/MCRegisterInfo.h" +#include "llvm/MC/MCSubtargetInfo.h" #include "llvm/Support/MemoryObject.h" #include "llvm/Support/TargetRegistry.h" #include "llvm/Support/TargetSelect.h" @@ -57,6 +59,9 @@ LLVMDisasmContextRef LLVMCreateDisasm(const char *TripleName, void *DisInfo, const MCAsmInfo *MAI = TheTarget->createMCAsmInfo(TripleName); assert(MAI && "Unable to create target asm info!"); + const MCInstrInfo *MII = TheTarget->createMCInstrInfo(); + assert(MII && "Unable to create target instruction info!"); + const MCRegisterInfo *MRI = TheTarget->createMCRegInfo(TripleName); assert(MRI && "Unable to create target register info!"); @@ -80,13 +85,13 @@ LLVMDisasmContextRef LLVMCreateDisasm(const char *TripleName, void *DisInfo, // Set up the instruction printer. int AsmPrinterVariant = MAI->getAssemblerDialect(); MCInstPrinter *IP = TheTarget->createMCInstPrinter(AsmPrinterVariant, - *MAI, *MRI, *STI); + *MAI, *MII, *MRI, *STI); assert(IP && "Unable to create instruction printer!"); LLVMDisasmContext *DC = new LLVMDisasmContext(TripleName, DisInfo, TagType, GetOpInfo, SymbolLookUp, TheTarget, MAI, MRI, - Ctx, DisAsm, IP); + STI, MII, Ctx, DisAsm, IP); assert(DC && "Allocation failure!"); return DC; diff --git a/lib/MC/MCDisassembler/Disassembler.h b/lib/MC/MCDisassembler/Disassembler.h index 238ff7d..880a31a 100644 --- a/lib/MC/MCDisassembler/Disassembler.h +++ b/lib/MC/MCDisassembler/Disassembler.h @@ -28,7 +28,9 @@ class MCContext; class MCAsmInfo; class MCDisassembler; class MCInstPrinter; +class MCInstrInfo; class MCRegisterInfo; +class MCSubtargetInfo; class Target; // @@ -61,6 +63,10 @@ private: llvm::OwningPtr<const llvm::MCAsmInfo> MAI; // The register information for the target architecture. llvm::OwningPtr<const llvm::MCRegisterInfo> MRI; + // The subtarget information for the target architecture. + llvm::OwningPtr<const llvm::MCSubtargetInfo> MSI; + // The instruction information for the target architecture. + llvm::OwningPtr<const llvm::MCInstrInfo> MII; // The assembly context for creating symbols and MCExprs. llvm::OwningPtr<const llvm::MCContext> Ctx; // The disassembler for the target architecture. @@ -78,6 +84,8 @@ public: LLVMSymbolLookupCallback symbolLookUp, const Target *theTarget, const MCAsmInfo *mAI, const MCRegisterInfo *mRI, + const MCSubtargetInfo *mSI, + const MCInstrInfo *mII, llvm::MCContext *ctx, const MCDisassembler *disAsm, MCInstPrinter *iP) : TripleName(tripleName), DisInfo(disInfo), TagType(tagType), GetOpInfo(getOpInfo), @@ -85,6 +93,8 @@ public: CommentStream(CommentsToEmit) { MAI.reset(mAI); MRI.reset(mRI); + MSI.reset(mSI); + MII.reset(mII); Ctx.reset(ctx); DisAsm.reset(disAsm); IP.reset(iP); diff --git a/lib/MC/MCDisassembler/EDDisassembler.cpp b/lib/MC/MCDisassembler/EDDisassembler.cpp index 4c2dae8..b2672ca 100644 --- a/lib/MC/MCDisassembler/EDDisassembler.cpp +++ b/lib/MC/MCDisassembler/EDDisassembler.cpp @@ -22,6 +22,7 @@ #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCInst.h" #include "llvm/MC/MCInstPrinter.h" +#include "llvm/MC/MCInstrInfo.h" #include "llvm/MC/MCRegisterInfo.h" #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSubtargetInfo.h" @@ -165,11 +166,16 @@ EDDisassembler::EDDisassembler(CPUKey &key) : return; InstInfos = Disassembler->getEDInfo(); - + + MII.reset(Tgt->createMCInstrInfo()); + + if (!MII) + return; + InstString.reset(new std::string); InstStream.reset(new raw_string_ostream(*InstString)); InstPrinter.reset(Tgt->createMCInstPrinter(LLVMSyntaxVariant, *AsmInfo, - *MRI, *STI)); + *MII, *MRI, *STI)); if (!InstPrinter) return; diff --git a/lib/MC/MCDisassembler/EDDisassembler.h b/lib/MC/MCDisassembler/EDDisassembler.h index e97f11d..6f71908 100644 --- a/lib/MC/MCDisassembler/EDDisassembler.h +++ b/lib/MC/MCDisassembler/EDDisassembler.h @@ -36,8 +36,9 @@ class MCContext; class MCAsmInfo; class MCAsmLexer; class MCDisassembler; -class MCInstPrinter; class MCInst; +class MCInstPrinter; +class MCInstrInfo; class MCParsedAsmOperand; class MCRegisterInfo; class MCStreamer; @@ -137,6 +138,8 @@ struct EDDisassembler { llvm::OwningPtr<const llvm::MCAsmInfo> AsmInfo; /// The subtarget information for the target architecture llvm::OwningPtr<const llvm::MCSubtargetInfo> STI; + // The instruction information for the target architecture. + llvm::OwningPtr<const llvm::MCInstrInfo> MII; // The register information for the target architecture. llvm::OwningPtr<const llvm::MCRegisterInfo> MRI; /// The disassembler for the target architecture diff --git a/lib/MC/MCDwarf.cpp b/lib/MC/MCDwarf.cpp index e16f7ae..84a34f1 100644 --- a/lib/MC/MCDwarf.cpp +++ b/lib/MC/MCDwarf.cpp @@ -21,7 +21,7 @@ #include "llvm/Support/raw_ostream.h" #include "llvm/Support/Path.h" #include "llvm/Support/SourceMgr.h" -#include "llvm/ADT/FoldingSet.h" +#include "llvm/ADT/Hashing.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/Twine.h" #include "llvm/Config/config.h" @@ -1361,12 +1361,10 @@ namespace llvm { return CIEKey::getTombstoneKey(); } static unsigned getHashValue(const CIEKey &Key) { - FoldingSetNodeID ID; - ID.AddPointer(Key.Personality); - ID.AddInteger(Key.PersonalityEncoding); - ID.AddInteger(Key.LsdaEncoding); - ID.AddBoolean(Key.IsSignalFrame); - return ID.ComputeHash(); + return static_cast<unsigned>(hash_combine(Key.Personality, + Key.PersonalityEncoding, + Key.LsdaEncoding, + Key.IsSignalFrame)); } static bool isEqual(const CIEKey &LHS, const CIEKey &RHS) { diff --git a/lib/MC/MCELFObjectTargetWriter.cpp b/lib/MC/MCELFObjectTargetWriter.cpp index 15bf476..171ab4d 100644 --- a/lib/MC/MCELFObjectTargetWriter.cpp +++ b/lib/MC/MCELFObjectTargetWriter.cpp @@ -7,6 +7,7 @@ // //===----------------------------------------------------------------------===// +#include "llvm/ADT/STLExtras.h" #include "llvm/MC/MCELFObjectWriter.h" using namespace llvm; @@ -36,3 +37,10 @@ const MCSymbol *MCELFObjectTargetWriter::ExplicitRelSym(const MCAssembler &Asm, void MCELFObjectTargetWriter::adjustFixupOffset(const MCFixup &Fixup, uint64_t &RelocOffset) { } + +void +MCELFObjectTargetWriter::sortRelocs(const MCAssembler &Asm, + std::vector<ELFRelocationEntry> &Relocs) { + // Sort by the r_offset, just like gnu as does. + array_pod_sort(Relocs.begin(), Relocs.end()); +} diff --git a/lib/MC/MCExpr.cpp b/lib/MC/MCExpr.cpp index ceaecd0..7880155 100644 --- a/lib/MC/MCExpr.cpp +++ b/lib/MC/MCExpr.cpp @@ -14,6 +14,7 @@ #include "llvm/MC/MCAsmLayout.h" #include "llvm/MC/MCAssembler.h" #include "llvm/MC/MCContext.h" +#include "llvm/MC/MCObjectWriter.h" #include "llvm/MC/MCSymbol.h" #include "llvm/MC/MCValue.h" #include "llvm/Support/Debug.h" diff --git a/lib/MC/MCInstPrinter.cpp b/lib/MC/MCInstPrinter.cpp index 3060ad6..847bcc0 100644 --- a/lib/MC/MCInstPrinter.cpp +++ b/lib/MC/MCInstPrinter.cpp @@ -8,6 +8,7 @@ //===----------------------------------------------------------------------===// #include "llvm/MC/MCInstPrinter.h" +#include "llvm/MC/MCInstrInfo.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/ADT/StringRef.h" #include "llvm/Support/ErrorHandling.h" @@ -20,7 +21,7 @@ MCInstPrinter::~MCInstPrinter() { /// getOpcodeName - Return the name of the specified opcode enum (e.g. /// "MOV32ri") or empty if we can't resolve it. StringRef MCInstPrinter::getOpcodeName(unsigned Opcode) const { - return ""; + return MII.getName(Opcode); } void MCInstPrinter::printRegName(raw_ostream &OS, unsigned RegNo) const { diff --git a/lib/MC/MCObjectFileInfo.cpp b/lib/MC/MCObjectFileInfo.cpp index 7dd06e7..b22ae33 100644 --- a/lib/MC/MCObjectFileInfo.cpp +++ b/lib/MC/MCObjectFileInfo.cpp @@ -260,9 +260,14 @@ void MCObjectFileInfo::InitELFMCObjectFileInfo(Triple T) { // Solaris requires different flags for .eh_frame to seemingly every other // platform. + EHSectionType = ELF::SHT_PROGBITS; EHSectionFlags = ELF::SHF_ALLOC; - if (T.getOS() == Triple::Solaris) - EHSectionFlags |= ELF::SHF_WRITE; + if (T.getOS() == Triple::Solaris) { + if (T.getArch() == Triple::x86_64) + EHSectionType = ELF::SHT_X86_64_UNWIND; + else + EHSectionFlags |= ELF::SHF_WRITE; + } // ELF @@ -575,7 +580,7 @@ void MCObjectFileInfo::InitEHFrameSection() { SectionKind::getReadOnly()); else if (Env == IsELF) EHFrameSection = - Ctx->getELFSection(".eh_frame", ELF::SHT_PROGBITS, + Ctx->getELFSection(".eh_frame", EHSectionType, EHSectionFlags, SectionKind::getDataRel()); else diff --git a/lib/MC/MCObjectStreamer.cpp b/lib/MC/MCObjectStreamer.cpp index 906bdc3..bad7cfe 100644 --- a/lib/MC/MCObjectStreamer.cpp +++ b/lib/MC/MCObjectStreamer.cpp @@ -7,17 +7,17 @@ // //===----------------------------------------------------------------------===// -#include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCObjectStreamer.h" - -#include "llvm/Support/ErrorHandling.h" +#include "llvm/MC/MCAsmBackend.h" +#include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCAssembler.h" #include "llvm/MC/MCCodeEmitter.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCDwarf.h" #include "llvm/MC/MCExpr.h" +#include "llvm/MC/MCObjectWriter.h" #include "llvm/MC/MCSymbol.h" -#include "llvm/MC/MCAsmBackend.h" +#include "llvm/Support/ErrorHandling.h" using namespace llvm; MCObjectStreamer::MCObjectStreamer(MCContext &Context, MCAsmBackend &TAB, diff --git a/lib/MC/MCParser/AsmParser.cpp b/lib/MC/MCParser/AsmParser.cpp index ab5ddaf..8aef43c 100644 --- a/lib/MC/MCParser/AsmParser.cpp +++ b/lib/MC/MCParser/AsmParser.cpp @@ -14,7 +14,6 @@ #include "llvm/ADT/APFloat.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/StringMap.h" -#include "llvm/ADT/StringSwitch.h" #include "llvm/ADT/Twine.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCContext.h" @@ -1528,11 +1527,11 @@ bool AsmParser::HandleMacroEntry(StringRef Name, SMLoc NameLoc, } Lex(); } - // If there weren't any arguments, erase the token vector so everything - // else knows that. Leaving around the vestigal empty token list confuses - // things. - if (MacroArguments.size() == 1 && MacroArguments.back().empty()) - MacroArguments.clear(); + // If the last argument didn't end up with any tokens, it's not a real + // argument and we should remove it from the list. This happens with either + // a tailing comma or an empty argument list. + if (MacroArguments.back().empty()) + MacroArguments.pop_back(); // Macro instantiation is lexical, unfortunately. We construct a new buffer // to hold the macro body with substitutions. diff --git a/lib/MC/MachObjectWriter.cpp b/lib/MC/MachObjectWriter.cpp index 57f90d9..8e4066c 100644 --- a/lib/MC/MachObjectWriter.cpp +++ b/lib/MC/MachObjectWriter.cpp @@ -8,13 +8,13 @@ //===----------------------------------------------------------------------===// #include "llvm/MC/MCMachObjectWriter.h" -#include "llvm/ADT/OwningPtr.h" #include "llvm/ADT/StringMap.h" #include "llvm/ADT/Twine.h" #include "llvm/MC/MCAssembler.h" #include "llvm/MC/MCAsmBackend.h" #include "llvm/MC/MCAsmLayout.h" #include "llvm/MC/MCExpr.h" +#include "llvm/MC/MCFixupKindInfo.h" #include "llvm/MC/MCObjectWriter.h" #include "llvm/MC/MCSectionMachO.h" #include "llvm/MC/MCSymbol.h" diff --git a/lib/MC/WinCOFFStreamer.cpp b/lib/MC/WinCOFFStreamer.cpp index 7238c9e..67dc649 100644 --- a/lib/MC/WinCOFFStreamer.cpp +++ b/lib/MC/WinCOFFStreamer.cpp @@ -25,7 +25,6 @@ #include "llvm/MC/MCSectionCOFF.h" #include "llvm/MC/MCWin64EH.h" #include "llvm/MC/MCAsmBackend.h" -#include "llvm/ADT/StringMap.h" #include "llvm/Support/COFF.h" #include "llvm/Support/Debug.h" diff --git a/lib/Object/COFFObjectFile.cpp b/lib/Object/COFFObjectFile.cpp index b8ba905..bd27a56 100644 --- a/lib/Object/COFFObjectFile.cpp +++ b/lib/Object/COFFObjectFile.cpp @@ -282,7 +282,6 @@ error_code COFFObjectFile::getSymbolSection(DataRefImpl Symb, const coff_section *sec = 0; if (error_code ec = getSection(symb->SectionNumber, sec)) return ec; DataRefImpl Sec; - std::memset(&Sec, 0, sizeof(Sec)); Sec.p = reinterpret_cast<uintptr_t>(sec); Result = section_iterator(SectionRef(Sec, this)); } @@ -357,6 +356,27 @@ error_code COFFObjectFile::isSectionBSS(DataRefImpl Sec, return object_error::success; } +error_code COFFObjectFile::isSectionRequiredForExecution(DataRefImpl Sec, + bool &Result) const { + // FIXME: Unimplemented + Result = true; + return object_error::success; +} + +error_code COFFObjectFile::isSectionVirtual(DataRefImpl Sec, + bool &Result) const { + const coff_section *sec = toSec(Sec); + Result = sec->Characteristics & COFF::IMAGE_SCN_CNT_UNINITIALIZED_DATA; + return object_error::success; +} + +error_code COFFObjectFile::isSectionZeroInit(DataRefImpl Sec, + bool &Result) const { + // FIXME: Unimplemented + Result = false; + return object_error::success; +} + error_code COFFObjectFile::sectionContainsSymbol(DataRefImpl Sec, DataRefImpl Symb, bool &Result) const { @@ -374,7 +394,6 @@ error_code COFFObjectFile::sectionContainsSymbol(DataRefImpl Sec, relocation_iterator COFFObjectFile::getSectionRelBegin(DataRefImpl Sec) const { const coff_section *sec = toSec(Sec); DataRefImpl ret; - std::memset(&ret, 0, sizeof(ret)); if (sec->NumberOfRelocations == 0) ret.p = 0; else @@ -386,7 +405,6 @@ relocation_iterator COFFObjectFile::getSectionRelBegin(DataRefImpl Sec) const { relocation_iterator COFFObjectFile::getSectionRelEnd(DataRefImpl Sec) const { const coff_section *sec = toSec(Sec); DataRefImpl ret; - std::memset(&ret, 0, sizeof(ret)); if (sec->NumberOfRelocations == 0) ret.p = 0; else @@ -470,7 +488,6 @@ COFFObjectFile::COFFObjectFile(MemoryBuffer *Object, error_code &ec) symbol_iterator COFFObjectFile::begin_symbols() const { DataRefImpl ret; - std::memset(&ret, 0, sizeof(DataRefImpl)); ret.p = reinterpret_cast<intptr_t>(SymbolTable); return symbol_iterator(SymbolRef(ret, this)); } @@ -478,7 +495,6 @@ symbol_iterator COFFObjectFile::begin_symbols() const { symbol_iterator COFFObjectFile::end_symbols() const { // The symbol table ends where the string table begins. DataRefImpl ret; - std::memset(&ret, 0, sizeof(DataRefImpl)); ret.p = reinterpret_cast<intptr_t>(StringTable); return symbol_iterator(SymbolRef(ret, this)); } @@ -511,14 +527,12 @@ StringRef COFFObjectFile::getLoadName() const { section_iterator COFFObjectFile::begin_sections() const { DataRefImpl ret; - std::memset(&ret, 0, sizeof(DataRefImpl)); ret.p = reinterpret_cast<intptr_t>(SectionTable); return section_iterator(SectionRef(ret, this)); } section_iterator COFFObjectFile::end_sections() const { DataRefImpl ret; - std::memset(&ret, 0, sizeof(DataRefImpl)); ret.p = reinterpret_cast<intptr_t>(SectionTable + Header->NumberOfSections); return section_iterator(SectionRef(ret, this)); } @@ -669,7 +683,6 @@ error_code COFFObjectFile::getRelocationSymbol(DataRefImpl Rel, SymbolRef &Res) const { const coff_relocation* R = toRel(Rel); DataRefImpl Symb; - std::memset(&Symb, 0, sizeof(Symb)); Symb.p = reinterpret_cast<uintptr_t>(SymbolTable + R->SymbolTableIndex); Res = SymbolRef(Symb, this); return object_error::success; @@ -749,7 +762,6 @@ error_code COFFObjectFile::getRelocationValueString(DataRefImpl Rel, const coff_symbol *symb = 0; if (error_code ec = getSymbol(reloc->SymbolTableIndex, symb)) return ec; DataRefImpl sym; - ::memset(&sym, 0, sizeof(sym)); sym.p = reinterpret_cast<uintptr_t>(symb); StringRef symname; if (error_code ec = getSymbolName(sym, symname)) return ec; diff --git a/lib/Object/ELFObjectFile.cpp b/lib/Object/ELFObjectFile.cpp index ab5f810..663b84e 100644 --- a/lib/Object/ELFObjectFile.cpp +++ b/lib/Object/ELFObjectFile.cpp @@ -17,16 +17,6 @@ namespace llvm { using namespace object; -namespace { - std::pair<unsigned char, unsigned char> - getElfArchType(MemoryBuffer *Object) { - if (Object->getBufferSize() < ELF::EI_NIDENT) - return std::make_pair((uint8_t)ELF::ELFCLASSNONE,(uint8_t)ELF::ELFDATANONE); - return std::make_pair( (uint8_t)Object->getBufferStart()[ELF::EI_CLASS] - , (uint8_t)Object->getBufferStart()[ELF::EI_DATA]); - } -} - // Creates an in-memory object-file by default: createELFObjectFile(Buffer) ObjectFile *ObjectFile::createELFObjectFile(MemoryBuffer *Object) { std::pair<unsigned char, unsigned char> Ident = getElfArchType(Object); diff --git a/lib/Object/LLVMBuild.txt b/lib/Object/LLVMBuild.txt index 0041acd..69610f9 100644 --- a/lib/Object/LLVMBuild.txt +++ b/lib/Object/LLVMBuild.txt @@ -19,4 +19,4 @@ type = Library name = Object parent = Libraries -required_libraries = Core Support +required_libraries = Support diff --git a/lib/Object/MachOObjectFile.cpp b/lib/Object/MachOObjectFile.cpp index 819409e..3bcda17 100644 --- a/lib/Object/MachOObjectFile.cpp +++ b/lib/Object/MachOObjectFile.cpp @@ -34,7 +34,6 @@ MachOObjectFile::MachOObjectFile(MemoryBuffer *Object, MachOObject *MOO, MachOObj(MOO), RegisteredStringTable(std::numeric_limits<uint32_t>::max()) { DataRefImpl DRI; - DRI.d.a = DRI.d.b = 0; moveToNextSection(DRI); uint32_t LoadCommandCount = MachOObj->getHeader().NumLoadCommands; while (DRI.d.a < LoadCommandCount) { @@ -176,7 +175,12 @@ error_code MachOObjectFile::getSymbolSize(DataRefImpl DRI, BeginOffset = Entry->Value; SectionIndex = Entry->SectionIndex; if (!SectionIndex) { - Result = UnknownAddressOrSize; + uint32_t flags = SymbolRef::SF_None; + getSymbolFlags(DRI, flags); + if (flags & SymbolRef::SF_Common) + Result = Entry->Value; + else + Result = UnknownAddressOrSize; return object_error::success; } // Unfortunately symbols are unsorted so we need to touch all @@ -199,7 +203,12 @@ error_code MachOObjectFile::getSymbolSize(DataRefImpl DRI, BeginOffset = Entry->Value; SectionIndex = Entry->SectionIndex; if (!SectionIndex) { - Result = UnknownAddressOrSize; + uint32_t flags = SymbolRef::SF_None; + getSymbolFlags(DRI, flags); + if (flags & SymbolRef::SF_Common) + Result = Entry->Value; + else + Result = UnknownAddressOrSize; return object_error::success; } // Unfortunately symbols are unsorted so we need to touch all @@ -278,7 +287,7 @@ error_code MachOObjectFile::getSymbolFlags(DataRefImpl DRI, MachOType = Entry->Type; } - // TODO: Correctly set SF_ThreadLocal and SF_Common. + // TODO: Correctly set SF_ThreadLocal Result = SymbolRef::SF_None; if ((MachOType & MachO::NlistMaskType) == MachO::NListTypeUndefined) @@ -287,8 +296,11 @@ error_code MachOObjectFile::getSymbolFlags(DataRefImpl DRI, if (MachOFlags & macho::STF_StabsEntryMask) Result |= SymbolRef::SF_FormatSpecific; - if (MachOType & MachO::NlistMaskExternal) + if (MachOType & MachO::NlistMaskExternal) { Result |= SymbolRef::SF_Global; + if ((MachOType & MachO::NlistMaskType) == MachO::NListTypeUndefined) + Result |= SymbolRef::SF_Common; + } if (MachOFlags & (MachO::NListDescWeakRef | MachO::NListDescWeakDef)) Result |= SymbolRef::SF_Weak; @@ -355,7 +367,6 @@ error_code MachOObjectFile::getSymbolType(DataRefImpl Symb, symbol_iterator MachOObjectFile::begin_symbols() const { // DRI.d.a = segment number; DRI.d.b = symbol index. DataRefImpl DRI; - DRI.d.a = DRI.d.b = 0; moveToNextSymbol(DRI); return symbol_iterator(SymbolRef(DRI, this)); } @@ -363,7 +374,6 @@ symbol_iterator MachOObjectFile::begin_symbols() const { symbol_iterator MachOObjectFile::end_symbols() const { DataRefImpl DRI; DRI.d.a = MachOObj->getHeader().NumLoadCommands; - DRI.d.b = 0; return symbol_iterator(SymbolRef(DRI, this)); } @@ -569,6 +579,37 @@ error_code MachOObjectFile::isSectionBSS(DataRefImpl DRI, return object_error::success; } +error_code MachOObjectFile::isSectionRequiredForExecution(DataRefImpl Sec, + bool &Result) const { + // FIXME: Unimplemented + Result = true; + return object_error::success; +} + +error_code MachOObjectFile::isSectionVirtual(DataRefImpl Sec, + bool &Result) const { + // FIXME: Unimplemented + Result = false; + return object_error::success; +} + +error_code MachOObjectFile::isSectionZeroInit(DataRefImpl DRI, + bool &Result) const { + if (MachOObj->is64Bit()) { + InMemoryStruct<macho::Section64> Sect; + getSection64(DRI, Sect); + Result = (Sect->Flags & MachO::SectionTypeZeroFill || + Sect->Flags & MachO::SectionTypeZeroFillLarge); + } else { + InMemoryStruct<macho::Section> Sect; + getSection(DRI, Sect); + Result = (Sect->Flags & MachO::SectionTypeZeroFill || + Sect->Flags & MachO::SectionTypeZeroFillLarge); + } + + return object_error::success; +} + error_code MachOObjectFile::sectionContainsSymbol(DataRefImpl Sec, DataRefImpl Symb, bool &Result) const { @@ -601,7 +642,6 @@ error_code MachOObjectFile::sectionContainsSymbol(DataRefImpl Sec, relocation_iterator MachOObjectFile::getSectionRelBegin(DataRefImpl Sec) const { DataRefImpl ret; - ret.d.a = 0; ret.d.b = getSectionIndex(Sec); return relocation_iterator(RelocationRef(ret, this)); } @@ -624,7 +664,6 @@ relocation_iterator MachOObjectFile::getSectionRelEnd(DataRefImpl Sec) const { section_iterator MachOObjectFile::begin_sections() const { DataRefImpl DRI; - DRI.d.a = DRI.d.b = 0; moveToNextSection(DRI); return section_iterator(SectionRef(DRI, this)); } @@ -632,7 +671,6 @@ section_iterator MachOObjectFile::begin_sections() const { section_iterator MachOObjectFile::end_sections() const { DataRefImpl DRI; DRI.d.a = MachOObj->getHeader().NumLoadCommands; - DRI.d.b = 0; return section_iterator(SectionRef(DRI, this)); } @@ -708,7 +746,6 @@ error_code MachOObjectFile::getRelocationSymbol(DataRefImpl Rel, bool isExtern = (RE->Word1 >> 27) & 1; DataRefImpl Sym; - Sym.d.a = Sym.d.b = 0; moveToNextSymbol(Sym); if (isExtern) { for (unsigned i = 0; i < SymbolIdx; i++) { diff --git a/lib/Support/CMakeLists.txt b/lib/Support/CMakeLists.txt index 0b69238..9103327 100644 --- a/lib/Support/CMakeLists.txt +++ b/lib/Support/CMakeLists.txt @@ -32,7 +32,7 @@ add_llvm_library(LLVMSupport IntrusiveRefCntPtr.cpp IsInf.cpp IsNAN.cpp - JSONParser.cpp + Locale.cpp LockFileManager.cpp ManagedStatic.cpp MemoryBuffer.cpp @@ -54,6 +54,7 @@ add_llvm_library(LLVMSupport ToolOutputFile.cpp Triple.cpp Twine.cpp + YAMLParser.cpp raw_os_ostream.cpp raw_ostream.cpp regcomp.c diff --git a/lib/Support/Dwarf.cpp b/lib/Support/Dwarf.cpp index b317e49..5c59a3e 100644 --- a/lib/Support/Dwarf.cpp +++ b/lib/Support/Dwarf.cpp @@ -95,7 +95,7 @@ const char *llvm::dwarf::TagString(unsigned Tag) { return "DW_TAG_GNU_template_parameter_pack"; case DW_TAG_GNU_formal_parameter_pack: return "DW_TAG_GNU_formal_parameter_pack"; - case DW_TAG_APPLE_Property: return "DW_TAG_APPLE_property"; + case DW_TAG_APPLE_property: return "DW_TAG_APPLE_property"; } return 0; } diff --git a/lib/Support/FoldingSet.cpp b/lib/Support/FoldingSet.cpp index e029970..c6282c6 100644 --- a/lib/Support/FoldingSet.cpp +++ b/lib/Support/FoldingSet.cpp @@ -265,15 +265,15 @@ void FoldingSetImpl::GrowHashTable() { FoldingSetImpl::Node *FoldingSetImpl::FindNodeOrInsertPos(const FoldingSetNodeID &ID, void *&InsertPos) { - - void **Bucket = GetBucketFor(ID.ComputeHash(), Buckets, NumBuckets); + unsigned IDHash = ID.ComputeHash(); + void **Bucket = GetBucketFor(IDHash, Buckets, NumBuckets); void *Probe = *Bucket; InsertPos = 0; FoldingSetNodeID TempID; while (Node *NodeInBucket = GetNextPtr(Probe)) { - if (NodeEquals(NodeInBucket, ID, TempID)) + if (NodeEquals(NodeInBucket, ID, IDHash, TempID)) return NodeInBucket; TempID.clear(); diff --git a/lib/Support/JSONParser.cpp b/lib/Support/JSONParser.cpp deleted file mode 100644 index 5dfcf29..0000000 --- a/lib/Support/JSONParser.cpp +++ /dev/null @@ -1,302 +0,0 @@ -//===--- JSONParser.cpp - Simple JSON parser ------------------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file implements a JSON parser. -// -//===----------------------------------------------------------------------===// - -#include "llvm/Support/JSONParser.h" - -#include "llvm/ADT/Twine.h" -#include "llvm/Support/Casting.h" -#include "llvm/Support/MemoryBuffer.h" - -using namespace llvm; - -JSONParser::JSONParser(StringRef Input, SourceMgr *SM) - : SM(SM), Failed(false) { - InputBuffer = MemoryBuffer::getMemBuffer(Input, "JSON"); - SM->AddNewSourceBuffer(InputBuffer, SMLoc()); - End = InputBuffer->getBuffer().end(); - Position = InputBuffer->getBuffer().begin(); -} - -JSONValue *JSONParser::parseRoot() { - if (Position != InputBuffer->getBuffer().begin()) - report_fatal_error("Cannot reuse JSONParser."); - if (isWhitespace()) - nextNonWhitespace(); - if (errorIfAtEndOfFile("'[' or '{' at start of JSON text")) - return 0; - switch (*Position) { - case '[': - return new (ValueAllocator.Allocate<JSONArray>(1)) JSONArray(this); - case '{': - return new (ValueAllocator.Allocate<JSONObject>(1)) JSONObject(this); - default: - setExpectedError("'[' or '{' at start of JSON text", *Position); - return 0; - } -} - -bool JSONParser::validate() { - JSONValue *Root = parseRoot(); - if (Root == NULL) { - return false; - } - return skip(*Root); -} - -bool JSONParser::skip(const JSONAtom &Atom) { - switch(Atom.getKind()) { - case JSONAtom::JK_Array: - case JSONAtom::JK_Object: - return skipContainer(*cast<JSONContainer>(&Atom)); - case JSONAtom::JK_String: - return true; - case JSONAtom::JK_KeyValuePair: - return skip(*cast<JSONKeyValuePair>(&Atom)->Value); - } - llvm_unreachable("Impossible enum value."); -} - -// Sets the current error to: -// "expected <Expected>, but found <Found>". -void JSONParser::setExpectedError(StringRef Expected, StringRef Found) { - SM->PrintMessage(SMLoc::getFromPointer(Position), SourceMgr::DK_Error, - "expected " + Expected + ", but found " + Found + ".", ArrayRef<SMRange>()); - Failed = true; -} - -// Sets the current error to: -// "expected <Expected>, but found <Found>". -void JSONParser::setExpectedError(StringRef Expected, char Found) { - setExpectedError(Expected, ("'" + StringRef(&Found, 1) + "'").str()); -} - -// If there is no character available, returns true and sets the current error -// to: "expected <Expected>, but found EOF.". -bool JSONParser::errorIfAtEndOfFile(StringRef Expected) { - if (Position == End) { - setExpectedError(Expected, "EOF"); - return true; - } - return false; -} - -// Sets the current error if the current character is not C to: -// "expected 'C', but got <current character>". -bool JSONParser::errorIfNotAt(char C, StringRef Message) { - if (*Position != C) { - std::string Expected = - ("'" + StringRef(&C, 1) + "' " + Message).str(); - if (Position == End) - setExpectedError(Expected, "EOF"); - else - setExpectedError(Expected, *Position); - return true; - } - return false; -} - -// Forbidding inlining improves performance by roughly 20%. -// FIXME: Remove once llvm optimizes this to the faster version without hints. -LLVM_ATTRIBUTE_NOINLINE static bool -wasEscaped(StringRef::iterator First, StringRef::iterator Position); - -// Returns whether a character at 'Position' was escaped with a leading '\'. -// 'First' specifies the position of the first character in the string. -static bool wasEscaped(StringRef::iterator First, - StringRef::iterator Position) { - assert(Position - 1 >= First); - StringRef::iterator I = Position - 1; - // We calulate the number of consecutive '\'s before the current position - // by iterating backwards through our string. - while (I >= First && *I == '\\') --I; - // (Position - 1 - I) now contains the number of '\'s before the current - // position. If it is odd, the character at 'Positon' was escaped. - return (Position - 1 - I) % 2 == 1; -} - -// Parses a JSONString, assuming that the current position is on a quote. -JSONString *JSONParser::parseString() { - assert(Position != End); - assert(!isWhitespace()); - if (errorIfNotAt('"', "at start of string")) - return 0; - StringRef::iterator First = Position + 1; - - // Benchmarking shows that this loop is the hot path of the application with - // about 2/3rd of the runtime cycles. Since escaped quotes are not the common - // case, and multiple escaped backslashes before escaped quotes are very rare, - // we pessimize this case to achieve a smaller inner loop in the common case. - // We're doing that by having a quick inner loop that just scans for the next - // quote. Once we find the quote we check the last character to see whether - // the quote might have been escaped. If the last character is not a '\', we - // know the quote was not escaped and have thus found the end of the string. - // If the immediately preceding character was a '\', we have to scan backwards - // to see whether the previous character was actually an escaped backslash, or - // an escape character for the quote. If we find that the current quote was - // escaped, we continue parsing for the next quote and repeat. - // This optimization brings around 30% performance improvements. - do { - // Step over the current quote. - ++Position; - // Find the next quote. - while (Position != End && *Position != '"') - ++Position; - if (errorIfAtEndOfFile("'\"' at end of string")) - return 0; - // Repeat until the previous character was not a '\' or was an escaped - // backslash. - } while (*(Position - 1) == '\\' && wasEscaped(First, Position)); - - return new (ValueAllocator.Allocate<JSONString>()) - JSONString(StringRef(First, Position - First)); -} - - -// Advances the position to the next non-whitespace position. -void JSONParser::nextNonWhitespace() { - do { - ++Position; - } while (isWhitespace()); -} - -// Checks if there is a whitespace character at the current position. -bool JSONParser::isWhitespace() { - return *Position == ' ' || *Position == '\t' || - *Position == '\n' || *Position == '\r'; -} - -bool JSONParser::failed() const { - return Failed; -} - -// Parses a JSONValue, assuming that the current position is at the first -// character of the value. -JSONValue *JSONParser::parseValue() { - assert(Position != End); - assert(!isWhitespace()); - switch (*Position) { - case '[': - return new (ValueAllocator.Allocate<JSONArray>(1)) JSONArray(this); - case '{': - return new (ValueAllocator.Allocate<JSONObject>(1)) JSONObject(this); - case '"': - return parseString(); - default: - setExpectedError("'[', '{' or '\"' at start of value", *Position); - return 0; - } -} - -// Parses a JSONKeyValuePair, assuming that the current position is at the first -// character of the key, value pair. -JSONKeyValuePair *JSONParser::parseKeyValuePair() { - assert(Position != End); - assert(!isWhitespace()); - - JSONString *Key = parseString(); - if (Key == 0) - return 0; - - nextNonWhitespace(); - if (errorIfNotAt(':', "between key and value")) - return 0; - - nextNonWhitespace(); - const JSONValue *Value = parseValue(); - if (Value == 0) - return 0; - - return new (ValueAllocator.Allocate<JSONKeyValuePair>(1)) - JSONKeyValuePair(Key, Value); -} - -/// \brief Parses the first element of a JSON array or object, or closes the -/// array. -/// -/// The method assumes that the current position is before the first character -/// of the element, with possible white space in between. When successful, it -/// returns the new position after parsing the element. Otherwise, if there is -/// no next value, it returns a default constructed StringRef::iterator. -StringRef::iterator JSONParser::parseFirstElement(JSONAtom::Kind ContainerKind, - char StartChar, char EndChar, - const JSONAtom *&Element) { - assert(*Position == StartChar); - Element = 0; - nextNonWhitespace(); - if (errorIfAtEndOfFile("value or end of container at start of container")) - return StringRef::iterator(); - - if (*Position == EndChar) - return StringRef::iterator(); - - Element = parseElement(ContainerKind); - if (Element == 0) - return StringRef::iterator(); - - return Position; -} - -/// \brief Parses the next element of a JSON array or object, or closes the -/// array. -/// -/// The method assumes that the current position is before the ',' which -/// separates the next element from the current element. When successful, it -/// returns the new position after parsing the element. Otherwise, if there is -/// no next value, it returns a default constructed StringRef::iterator. -StringRef::iterator JSONParser::parseNextElement(JSONAtom::Kind ContainerKind, - char EndChar, - const JSONAtom *&Element) { - Element = 0; - nextNonWhitespace(); - if (errorIfAtEndOfFile("',' or end of container for next element")) - return 0; - - if (*Position == ',') { - nextNonWhitespace(); - if (errorIfAtEndOfFile("element in container")) - return StringRef::iterator(); - - Element = parseElement(ContainerKind); - if (Element == 0) - return StringRef::iterator(); - - return Position; - } else if (*Position == EndChar) { - return StringRef::iterator(); - } else { - setExpectedError("',' or end of container for next element", *Position); - return StringRef::iterator(); - } -} - -const JSONAtom *JSONParser::parseElement(JSONAtom::Kind ContainerKind) { - switch (ContainerKind) { - case JSONAtom::JK_Array: - return parseValue(); - case JSONAtom::JK_Object: - return parseKeyValuePair(); - default: - llvm_unreachable("Impossible code path"); - } -} - -bool JSONParser::skipContainer(const JSONContainer &Container) { - for (JSONContainer::AtomIterator I = Container.atom_current(), - E = Container.atom_end(); - I != E; ++I) { - assert(*I != 0); - if (!skip(**I)) - return false; - } - return !failed(); -} diff --git a/lib/Support/Locale.cpp b/lib/Support/Locale.cpp new file mode 100644 index 0000000..17b9b6c --- /dev/null +++ b/lib/Support/Locale.cpp @@ -0,0 +1,10 @@ +#include "llvm/Support/Locale.h" +#include "llvm/Config/config.h" + +#ifdef __APPLE__ +#include "LocaleXlocale.inc" +#elif LLVM_ON_WIN32 +#include "LocaleWindows.inc" +#else +#include "LocaleGeneric.inc" +#endif diff --git a/lib/Support/LocaleGeneric.inc b/lib/Support/LocaleGeneric.inc new file mode 100644 index 0000000..278deee --- /dev/null +++ b/lib/Support/LocaleGeneric.inc @@ -0,0 +1,17 @@ +#include <cwctype> + +namespace llvm { +namespace sys { +namespace locale { + +int columnWidth(StringRef s) { + return s.size(); +} + +bool isPrint(int c) { + return iswprint(c); +} + +} +} +} diff --git a/lib/Support/LocaleWindows.inc b/lib/Support/LocaleWindows.inc new file mode 100644 index 0000000..6827ac1 --- /dev/null +++ b/lib/Support/LocaleWindows.inc @@ -0,0 +1,15 @@ +namespace llvm { +namespace sys { +namespace locale { + +int columnWidth(StringRef s) { + return s.size(); +} + +bool isPrint(int c) { + return ' ' <= c && c <= '~'; +} + +} +} +}
\ No newline at end of file diff --git a/lib/Support/LocaleXlocale.inc b/lib/Support/LocaleXlocale.inc new file mode 100644 index 0000000..f595e7c --- /dev/null +++ b/lib/Support/LocaleXlocale.inc @@ -0,0 +1,61 @@ +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/Support/ManagedStatic.h" +#include <cassert> +#include <xlocale.h> + + +namespace { + struct locale_holder { + locale_holder() + : l(newlocale(LC_CTYPE_MASK,"en_US.UTF-8",LC_GLOBAL_LOCALE)) + { + assert(NULL!=l); + } + ~locale_holder() { + freelocale(l); + } + + int mbswidth(llvm::SmallString<16> s) const { + // this implementation assumes no '\0' in s + assert(s.size()==strlen(s.c_str())); + + size_t size = mbstowcs_l(NULL,s.c_str(),0,l); + assert(size!=(size_t)-1); + if (size==0) + return 0; + llvm::SmallVector<wchar_t,200> ws(size); + size = mbstowcs_l(&ws[0],s.c_str(),ws.size(),l); + assert(ws.size()==size); + return wcswidth_l(&ws[0],ws.size(),l); + } + + int isprint(int c) const { + return iswprint_l(c,l); + } + + private: + + locale_t l; + }; + + llvm::ManagedStatic<locale_holder> l; +} + +namespace llvm { +namespace sys { +namespace locale { + +int columnWidth(StringRef s) { + int width = l->mbswidth(s); + assert(width>=0); + return width; +} + +bool isPrint(int c) { + return l->isprint(c); +} + +} +} +} diff --git a/lib/Support/MemoryBuffer.cpp b/lib/Support/MemoryBuffer.cpp index 911a03f..16e5c7a 100644 --- a/lib/Support/MemoryBuffer.cpp +++ b/lib/Support/MemoryBuffer.cpp @@ -304,6 +304,16 @@ error_code MemoryBuffer::getOpenFile(int FD, const char *Filename, RealMapOffset)) { result.reset(GetNamedBuffer<MemoryBufferMMapFile>( StringRef(Pages + Delta, MapSize), Filename, RequiresNullTerminator)); + + if (RequiresNullTerminator && result->getBufferEnd()[0] != '\0') { + // There could be a racing issue that resulted in the file being larger + // than the FileSize passed by the caller. We already have an assertion + // for this in MemoryBuffer::init() but have a runtime guarantee that + // the buffer will be null-terminated here, so do a copy that adds a + // null-terminator. + result.reset(MemoryBuffer::getMemBufferCopy(result->getBuffer(), + Filename)); + } return error_code::success(); } } @@ -339,6 +349,7 @@ error_code MemoryBuffer::getOpenFile(int FD, const char *Filename, if (NumRead == 0) { assert(0 && "We got inaccurate FileSize value or fstat reported an " "invalid file size."); + *BufPtr = '\0'; // null-terminate at the actual size. break; } BytesLeft -= NumRead; diff --git a/lib/Support/PathV2.cpp b/lib/Support/PathV2.cpp index 786e1a1..e2a69a6 100644 --- a/lib/Support/PathV2.cpp +++ b/lib/Support/PathV2.cpp @@ -654,12 +654,13 @@ error_code create_directories(const Twine &path, bool &existed) { StringRef p = path.toStringRef(path_storage); StringRef parent = path::parent_path(p); - bool parent_exists; + if (!parent.empty()) { + bool parent_exists; + if (error_code ec = fs::exists(parent, parent_exists)) return ec; - if (error_code ec = fs::exists(parent, parent_exists)) return ec; - - if (!parent_exists) - if (error_code ec = create_directories(parent, existed)) return ec; + if (!parent_exists) + if (error_code ec = create_directories(parent, existed)) return ec; + } return create_directory(p, existed); } diff --git a/lib/Support/SmallPtrSet.cpp b/lib/Support/SmallPtrSet.cpp index 68d9c29..3b53e9f 100644 --- a/lib/Support/SmallPtrSet.cpp +++ b/lib/Support/SmallPtrSet.cpp @@ -13,6 +13,7 @@ //===----------------------------------------------------------------------===// #include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/DenseMapInfo.h" #include "llvm/Support/MathExtras.h" #include <algorithm> #include <cstdlib> @@ -102,7 +103,7 @@ bool SmallPtrSetImpl::erase_imp(const void * Ptr) { } const void * const *SmallPtrSetImpl::FindBucketFor(const void *Ptr) const { - unsigned Bucket = Hash(Ptr); + unsigned Bucket = DenseMapInfo<void *>::getHashValue(Ptr) & (CurArraySize-1); unsigned ArraySize = CurArraySize; unsigned ProbeAmt = 1; const void *const *Array = CurArray; diff --git a/lib/Support/SourceMgr.cpp b/lib/Support/SourceMgr.cpp index bbe36b2..15278c5 100644 --- a/lib/Support/SourceMgr.cpp +++ b/lib/Support/SourceMgr.cpp @@ -193,7 +193,8 @@ SMDiagnostic SourceMgr::GetMessage(SMLoc Loc, SourceMgr::DiagKind Kind, } void SourceMgr::PrintMessage(SMLoc Loc, SourceMgr::DiagKind Kind, - const Twine &Msg, ArrayRef<SMRange> Ranges) const { + const Twine &Msg, ArrayRef<SMRange> Ranges, + bool ShowColors) const { SMDiagnostic Diagnostic = GetMessage(Loc, Kind, Msg, Ranges); // Report the message with the diagnostic handler if present. @@ -208,7 +209,7 @@ void SourceMgr::PrintMessage(SMLoc Loc, SourceMgr::DiagKind Kind, assert(CurBuf != -1 && "Invalid or unspecified location!"); PrintIncludeStack(getBufferInfo(CurBuf).IncludeLoc, OS); - Diagnostic.print(0, OS); + Diagnostic.print(0, OS, ShowColors); } //===----------------------------------------------------------------------===// @@ -225,7 +226,14 @@ SMDiagnostic::SMDiagnostic(const SourceMgr &sm, SMLoc L, const std::string &FN, } -void SMDiagnostic::print(const char *ProgName, raw_ostream &S) const { +void SMDiagnostic::print(const char *ProgName, raw_ostream &S, + bool ShowColors) const { + // Display colors only if OS goes to a tty. + ShowColors &= S.is_displayed(); + + if (ShowColors) + S.changeColor(raw_ostream::SAVEDCOLOR, true); + if (ProgName && ProgName[0]) S << ProgName << ": "; @@ -244,13 +252,33 @@ void SMDiagnostic::print(const char *ProgName, raw_ostream &S) const { } switch (Kind) { - case SourceMgr::DK_Error: S << "error: "; break; - case SourceMgr::DK_Warning: S << "warning: "; break; - case SourceMgr::DK_Note: S << "note: "; break; + case SourceMgr::DK_Error: + if (ShowColors) + S.changeColor(raw_ostream::RED, true); + S << "error: "; + break; + case SourceMgr::DK_Warning: + if (ShowColors) + S.changeColor(raw_ostream::MAGENTA, true); + S << "warning: "; + break; + case SourceMgr::DK_Note: + if (ShowColors) + S.changeColor(raw_ostream::BLACK, true); + S << "note: "; + break; } - + + if (ShowColors) { + S.resetColor(); + S.changeColor(raw_ostream::SAVEDCOLOR, true); + } + S << Message << '\n'; + if (ShowColors) + S.resetColor(); + if (LineNo == -1 || ColumnNo == -1) return; @@ -292,6 +320,9 @@ void SMDiagnostic::print(const char *ProgName, raw_ostream &S) const { } S << '\n'; + if (ShowColors) + S.changeColor(raw_ostream::GREEN, true); + // Print out the caret line, matching tabs in the source line. for (unsigned i = 0, e = CaretLine.size(), OutCol = 0; i != e; ++i) { if (i >= LineContents.size() || LineContents[i] != '\t') { @@ -306,6 +337,9 @@ void SMDiagnostic::print(const char *ProgName, raw_ostream &S) const { ++OutCol; } while (OutCol & 7); } + + if (ShowColors) + S.resetColor(); S << '\n'; } diff --git a/lib/Support/Triple.cpp b/lib/Support/Triple.cpp index d261c53..44a1b38 100644 --- a/lib/Support/Triple.cpp +++ b/lib/Support/Triple.cpp @@ -88,6 +88,8 @@ const char *Triple::getVendorTypeName(VendorType Kind) { case Apple: return "apple"; case PC: return "pc"; case SCEI: return "scei"; + case BGP: return "bgp"; + case BGQ: return "bgq"; } llvm_unreachable("Invalid VendorType!"); @@ -116,6 +118,7 @@ const char *Triple::getOSTypeName(OSType Kind) { case Minix: return "minix"; case RTEMS: return "rtems"; case NativeClient: return "nacl"; + case CNK: return "cnk"; } llvm_unreachable("Invalid OSType"); @@ -258,6 +261,8 @@ static Triple::VendorType parseVendor(StringRef VendorName) { .Case("apple", Triple::Apple) .Case("pc", Triple::PC) .Case("scei", Triple::SCEI) + .Case("bgp", Triple::BGP) + .Case("bgq", Triple::BGQ) .Default(Triple::UnknownVendor); } @@ -282,6 +287,7 @@ static Triple::OSType parseOS(StringRef OSName) { .StartsWith("minix", Triple::Minix) .StartsWith("rtems", Triple::RTEMS) .StartsWith("nacl", Triple::NativeClient) + .StartsWith("cnk", Triple::CNK) .Default(Triple::UnknownOS); } diff --git a/lib/Support/Unix/Path.inc b/lib/Support/Unix/Path.inc index 418dc07..ddc1e0f 100644 --- a/lib/Support/Unix/Path.inc +++ b/lib/Support/Unix/Path.inc @@ -60,6 +60,11 @@ #include <mach-o/dyld.h> #endif +// For GNU Hurd +#if defined(__GNU__) && !defined(MAXPATHLEN) +# define MAXPATHLEN 4096 +#endif + // Put in a hack for Cygwin which falsely reports that the mkdtemp function // is available when it is not. #ifdef __CYGWIN__ @@ -256,7 +261,7 @@ Path::GetCurrentDirectory() { } #if defined(__FreeBSD__) || defined (__NetBSD__) || \ - defined(__OpenBSD__) || defined(__minix) + defined(__OpenBSD__) || defined(__minix) || defined(__FreeBSD_kernel__) static int test_dir(char buf[PATH_MAX], char ret[PATH_MAX], const char *dir, const char *bin) @@ -308,7 +313,7 @@ getprogpath(char ret[PATH_MAX], const char *bin) free(pv); return (NULL); } -#endif // __FreeBSD__ || __NetBSD__ +#endif // __FreeBSD__ || __NetBSD__ || __FreeBSD_kernel__ /// GetMainExecutable - Return the path to the main executable, given the /// value of argv[0] from program startup. @@ -325,7 +330,7 @@ Path Path::GetMainExecutable(const char *argv0, void *MainAddr) { return Path(link_path); } #elif defined(__FreeBSD__) || defined (__NetBSD__) || \ - defined(__OpenBSD__) || defined(__minix) + defined(__OpenBSD__) || defined(__minix) || defined(__FreeBSD_kernel__) char exe_path[PATH_MAX]; if (getprogpath(exe_path, argv0) != NULL) diff --git a/lib/Support/Unix/PathV2.inc b/lib/Support/Unix/PathV2.inc index 7d79947..7d259a3 100644 --- a/lib/Support/Unix/PathV2.inc +++ b/lib/Support/Unix/PathV2.inc @@ -46,6 +46,11 @@ #include <limits.h> #endif +// For GNU Hurd +#if defined(__GNU__) && !defined(PATH_MAX) +# define PATH_MAX 4096 +#endif + extern "C" int truncate (const char*, off_t); using namespace llvm; @@ -98,7 +103,12 @@ namespace sys { namespace fs { error_code current_path(SmallVectorImpl<char> &result) { +#ifdef MAXPATHLEN result.reserve(MAXPATHLEN); +#else +// For GNU Hurd + result.reserve(1024); +#endif while (true) { if (::getcwd(result.data(), result.capacity()) == 0) { diff --git a/lib/Support/Unix/Process.inc b/lib/Support/Unix/Process.inc index 5cdb11c..f640462 100644 --- a/lib/Support/Unix/Process.inc +++ b/lib/Support/Unix/Process.inc @@ -136,7 +136,7 @@ int Process::GetCurrentGroupId() { return getgid(); } -#ifdef HAVE_MACH_MACH_H +#if defined(HAVE_MACH_MACH_H) && !defined(__GNU__) #include <mach/mach.h> #endif @@ -150,7 +150,7 @@ void Process::PreventCoreFiles() { setrlimit(RLIMIT_CORE, &rlim); #endif -#ifdef HAVE_MACH_MACH_H +#if defined(HAVE_MACH_MACH_H) && !defined(__GNU__) // Disable crash reporting on Mac OS X 10.0-10.4 // get information about the original set of exception ports for the task @@ -290,6 +290,10 @@ const char *Process::OutputBold(bool bg) { return "\033[1m"; } +const char *Process::OutputReverse() { + return "\033[7m"; +} + const char *Process::ResetColor() { return "\033[0m"; } diff --git a/lib/Support/Unix/Signals.inc b/lib/Support/Unix/Signals.inc index c3885e1..399f686 100644 --- a/lib/Support/Unix/Signals.inc +++ b/lib/Support/Unix/Signals.inc @@ -275,9 +275,9 @@ void llvm::sys::PrintStackTraceOnErrorSignal() { kern_return_t ret = task_set_exception_ports(self, mask, - NULL, + MACH_PORT_NULL, EXCEPTION_STATE_IDENTITY | MACH_EXCEPTION_CODES, - NULL); + THREAD_STATE_NONE); (void)ret; } #endif diff --git a/lib/Support/Windows/Process.inc b/lib/Support/Windows/Process.inc index 913b073..9a388b4 100644 --- a/lib/Support/Windows/Process.inc +++ b/lib/Support/Windows/Process.inc @@ -215,6 +215,38 @@ const char *Process::OutputColor(char code, bool bold, bool bg) { return 0; } +static WORD GetConsoleTextAttribute(HANDLE hConsoleOutput) { + CONSOLE_SCREEN_BUFFER_INFO info; + GetConsoleScreenBufferInfo(GetStdHandle(STD_OUTPUT_HANDLE), &info); + return info.wAttributes; +} + +const char *Process::OutputReverse() { + const WORD attributes + = GetConsoleTextAttribute(GetStdHandle(STD_OUTPUT_HANDLE)); + + const WORD foreground_mask = FOREGROUND_BLUE | FOREGROUND_GREEN | + FOREGROUND_RED | FOREGROUND_INTENSITY; + const WORD background_mask = BACKGROUND_BLUE | BACKGROUND_GREEN | + BACKGROUND_RED | BACKGROUND_INTENSITY; + const WORD color_mask = foreground_mask | background_mask; + + WORD new_attributes = + ((attributes & FOREGROUND_BLUE )?BACKGROUND_BLUE :0) | + ((attributes & FOREGROUND_GREEN )?BACKGROUND_GREEN :0) | + ((attributes & FOREGROUND_RED )?BACKGROUND_RED :0) | + ((attributes & FOREGROUND_INTENSITY)?BACKGROUND_INTENSITY:0) | + ((attributes & BACKGROUND_BLUE )?FOREGROUND_BLUE :0) | + ((attributes & BACKGROUND_GREEN )?FOREGROUND_GREEN :0) | + ((attributes & BACKGROUND_RED )?FOREGROUND_RED :0) | + ((attributes & BACKGROUND_INTENSITY)?FOREGROUND_INTENSITY:0) | + 0; + new_attributes = (attributes & ~color_mask) | (new_attributes & color_mask); + + SetConsoleTextAttribute(GetStdHandle(STD_OUTPUT_HANDLE), new_attributes); + return 0; +} + const char *Process::ResetColor() { SetConsoleTextAttribute(GetStdHandle(STD_OUTPUT_HANDLE), defaultColors()); return 0; diff --git a/lib/Support/YAMLParser.cpp b/lib/Support/YAMLParser.cpp new file mode 100644 index 0000000..330519f --- /dev/null +++ b/lib/Support/YAMLParser.cpp @@ -0,0 +1,2117 @@ +//===--- YAMLParser.cpp - Simple YAML parser ------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements a YAML parser. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Support/YAMLParser.h" + +#include "llvm/ADT/ilist.h" +#include "llvm/ADT/ilist_node.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/ADT/Twine.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/MemoryBuffer.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Support/SourceMgr.h" + +using namespace llvm; +using namespace yaml; + +enum UnicodeEncodingForm { + UEF_UTF32_LE, //< UTF-32 Little Endian + UEF_UTF32_BE, //< UTF-32 Big Endian + UEF_UTF16_LE, //< UTF-16 Little Endian + UEF_UTF16_BE, //< UTF-16 Big Endian + UEF_UTF8, //< UTF-8 or ascii. + UEF_Unknown //< Not a valid Unicode encoding. +}; + +/// EncodingInfo - Holds the encoding type and length of the byte order mark if +/// it exists. Length is in {0, 2, 3, 4}. +typedef std::pair<UnicodeEncodingForm, unsigned> EncodingInfo; + +/// getUnicodeEncoding - Reads up to the first 4 bytes to determine the Unicode +/// encoding form of \a Input. +/// +/// @param Input A string of length 0 or more. +/// @returns An EncodingInfo indicating the Unicode encoding form of the input +/// and how long the byte order mark is if one exists. +static EncodingInfo getUnicodeEncoding(StringRef Input) { + if (Input.size() == 0) + return std::make_pair(UEF_Unknown, 0); + + switch (uint8_t(Input[0])) { + case 0x00: + if (Input.size() >= 4) { + if ( Input[1] == 0 + && uint8_t(Input[2]) == 0xFE + && uint8_t(Input[3]) == 0xFF) + return std::make_pair(UEF_UTF32_BE, 4); + if (Input[1] == 0 && Input[2] == 0 && Input[3] != 0) + return std::make_pair(UEF_UTF32_BE, 0); + } + + if (Input.size() >= 2 && Input[1] != 0) + return std::make_pair(UEF_UTF16_BE, 0); + return std::make_pair(UEF_Unknown, 0); + case 0xFF: + if ( Input.size() >= 4 + && uint8_t(Input[1]) == 0xFE + && Input[2] == 0 + && Input[3] == 0) + return std::make_pair(UEF_UTF32_LE, 4); + + if (Input.size() >= 2 && uint8_t(Input[1]) == 0xFE) + return std::make_pair(UEF_UTF16_LE, 2); + return std::make_pair(UEF_Unknown, 0); + case 0xFE: + if (Input.size() >= 2 && uint8_t(Input[1]) == 0xFF) + return std::make_pair(UEF_UTF16_BE, 2); + return std::make_pair(UEF_Unknown, 0); + case 0xEF: + if ( Input.size() >= 3 + && uint8_t(Input[1]) == 0xBB + && uint8_t(Input[2]) == 0xBF) + return std::make_pair(UEF_UTF8, 3); + return std::make_pair(UEF_Unknown, 0); + } + + // It could still be utf-32 or utf-16. + if (Input.size() >= 4 && Input[1] == 0 && Input[2] == 0 && Input[3] == 0) + return std::make_pair(UEF_UTF32_LE, 0); + + if (Input.size() >= 2 && Input[1] == 0) + return std::make_pair(UEF_UTF16_LE, 0); + + return std::make_pair(UEF_UTF8, 0); +} + +namespace llvm { +namespace yaml { +/// Token - A single YAML token. +struct Token : ilist_node<Token> { + enum TokenKind { + TK_Error, // Uninitialized token. + TK_StreamStart, + TK_StreamEnd, + TK_VersionDirective, + TK_TagDirective, + TK_DocumentStart, + TK_DocumentEnd, + TK_BlockEntry, + TK_BlockEnd, + TK_BlockSequenceStart, + TK_BlockMappingStart, + TK_FlowEntry, + TK_FlowSequenceStart, + TK_FlowSequenceEnd, + TK_FlowMappingStart, + TK_FlowMappingEnd, + TK_Key, + TK_Value, + TK_Scalar, + TK_Alias, + TK_Anchor, + TK_Tag + } Kind; + + /// A string of length 0 or more whose begin() points to the logical location + /// of the token in the input. + StringRef Range; + + Token() : Kind(TK_Error) {} +}; +} +} + +namespace llvm { +template<> +struct ilist_sentinel_traits<Token> { + Token *createSentinel() const { + return &Sentinel; + } + static void destroySentinel(Token*) {} + + Token *provideInitialHead() const { return createSentinel(); } + Token *ensureHead(Token*) const { return createSentinel(); } + static void noteHead(Token*, Token*) {} + +private: + mutable Token Sentinel; +}; + +template<> +struct ilist_node_traits<Token> { + Token *createNode(const Token &V) { + return new (Alloc.Allocate<Token>()) Token(V); + } + static void deleteNode(Token *V) {} + + void addNodeToList(Token *) {} + void removeNodeFromList(Token *) {} + void transferNodesFromList(ilist_node_traits & /*SrcTraits*/, + ilist_iterator<Token> /*first*/, + ilist_iterator<Token> /*last*/) {} + + BumpPtrAllocator Alloc; +}; +} + +typedef ilist<Token> TokenQueueT; + +namespace { +/// @brief This struct is used to track simple keys. +/// +/// Simple keys are handled by creating an entry in SimpleKeys for each Token +/// which could legally be the start of a simple key. When peekNext is called, +/// if the Token To be returned is referenced by a SimpleKey, we continue +/// tokenizing until that potential simple key has either been found to not be +/// a simple key (we moved on to the next line or went further than 1024 chars). +/// Or when we run into a Value, and then insert a Key token (and possibly +/// others) before the SimpleKey's Tok. +struct SimpleKey { + TokenQueueT::iterator Tok; + unsigned Column; + unsigned Line; + unsigned FlowLevel; + bool IsRequired; + + bool operator ==(const SimpleKey &Other) { + return Tok == Other.Tok; + } +}; +} + +/// @brief The Unicode scalar value of a UTF-8 minimal well-formed code unit +/// subsequence and the subsequence's length in code units (uint8_t). +/// A length of 0 represents an error. +typedef std::pair<uint32_t, unsigned> UTF8Decoded; + +static UTF8Decoded decodeUTF8(StringRef Range) { + StringRef::iterator Position= Range.begin(); + StringRef::iterator End = Range.end(); + // 1 byte: [0x00, 0x7f] + // Bit pattern: 0xxxxxxx + if ((*Position & 0x80) == 0) { + return std::make_pair(*Position, 1); + } + // 2 bytes: [0x80, 0x7ff] + // Bit pattern: 110xxxxx 10xxxxxx + if (Position + 1 != End && + ((*Position & 0xE0) == 0xC0) && + ((*(Position + 1) & 0xC0) == 0x80)) { + uint32_t codepoint = ((*Position & 0x1F) << 6) | + (*(Position + 1) & 0x3F); + if (codepoint >= 0x80) + return std::make_pair(codepoint, 2); + } + // 3 bytes: [0x8000, 0xffff] + // Bit pattern: 1110xxxx 10xxxxxx 10xxxxxx + if (Position + 2 != End && + ((*Position & 0xF0) == 0xE0) && + ((*(Position + 1) & 0xC0) == 0x80) && + ((*(Position + 2) & 0xC0) == 0x80)) { + uint32_t codepoint = ((*Position & 0x0F) << 12) | + ((*(Position + 1) & 0x3F) << 6) | + (*(Position + 2) & 0x3F); + // Codepoints between 0xD800 and 0xDFFF are invalid, as + // they are high / low surrogate halves used by UTF-16. + if (codepoint >= 0x800 && + (codepoint < 0xD800 || codepoint > 0xDFFF)) + return std::make_pair(codepoint, 3); + } + // 4 bytes: [0x10000, 0x10FFFF] + // Bit pattern: 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx + if (Position + 3 != End && + ((*Position & 0xF8) == 0xF0) && + ((*(Position + 1) & 0xC0) == 0x80) && + ((*(Position + 2) & 0xC0) == 0x80) && + ((*(Position + 3) & 0xC0) == 0x80)) { + uint32_t codepoint = ((*Position & 0x07) << 18) | + ((*(Position + 1) & 0x3F) << 12) | + ((*(Position + 2) & 0x3F) << 6) | + (*(Position + 3) & 0x3F); + if (codepoint >= 0x10000 && codepoint <= 0x10FFFF) + return std::make_pair(codepoint, 4); + } + return std::make_pair(0, 0); +} + +namespace llvm { +namespace yaml { +/// @brief Scans YAML tokens from a MemoryBuffer. +class Scanner { +public: + Scanner(const StringRef Input, SourceMgr &SM); + + /// @brief Parse the next token and return it without popping it. + Token &peekNext(); + + /// @brief Parse the next token and pop it from the queue. + Token getNext(); + + void printError(SMLoc Loc, SourceMgr::DiagKind Kind, const Twine &Message, + ArrayRef<SMRange> Ranges = ArrayRef<SMRange>()) { + SM.PrintMessage(Loc, Kind, Message, Ranges); + } + + void setError(const Twine &Message, StringRef::iterator Position) { + if (Current >= End) + Current = End - 1; + + // Don't print out more errors after the first one we encounter. The rest + // are just the result of the first, and have no meaning. + if (!Failed) + printError(SMLoc::getFromPointer(Current), SourceMgr::DK_Error, Message); + Failed = true; + } + + void setError(const Twine &Message) { + setError(Message, Current); + } + + /// @brief Returns true if an error occurred while parsing. + bool failed() { + return Failed; + } + +private: + StringRef currentInput() { + return StringRef(Current, End - Current); + } + + /// @brief Decode a UTF-8 minimal well-formed code unit subsequence starting + /// at \a Position. + /// + /// If the UTF-8 code units starting at Position do not form a well-formed + /// code unit subsequence, then the Unicode scalar value is 0, and the length + /// is 0. + UTF8Decoded decodeUTF8(StringRef::iterator Position) { + return ::decodeUTF8(StringRef(Position, End - Position)); + } + + // The following functions are based on the gramar rules in the YAML spec. The + // style of the function names it meant to closely match how they are written + // in the spec. The number within the [] is the number of the grammar rule in + // the spec. + // + // See 4.2 [Production Naming Conventions] for the meaning of the prefixes. + // + // c- + // A production starting and ending with a special character. + // b- + // A production matching a single line break. + // nb- + // A production starting and ending with a non-break character. + // s- + // A production starting and ending with a white space character. + // ns- + // A production starting and ending with a non-space character. + // l- + // A production matching complete line(s). + + /// @brief Skip a single nb-char[27] starting at Position. + /// + /// A nb-char is 0x9 | [0x20-0x7E] | 0x85 | [0xA0-0xD7FF] | [0xE000-0xFEFE] + /// | [0xFF00-0xFFFD] | [0x10000-0x10FFFF] + /// + /// @returns The code unit after the nb-char, or Position if it's not an + /// nb-char. + StringRef::iterator skip_nb_char(StringRef::iterator Position); + + /// @brief Skip a single b-break[28] starting at Position. + /// + /// A b-break is 0xD 0xA | 0xD | 0xA + /// + /// @returns The code unit after the b-break, or Position if it's not a + /// b-break. + StringRef::iterator skip_b_break(StringRef::iterator Position); + + /// @brief Skip a single s-white[33] starting at Position. + /// + /// A s-white is 0x20 | 0x9 + /// + /// @returns The code unit after the s-white, or Position if it's not a + /// s-white. + StringRef::iterator skip_s_white(StringRef::iterator Position); + + /// @brief Skip a single ns-char[34] starting at Position. + /// + /// A ns-char is nb-char - s-white + /// + /// @returns The code unit after the ns-char, or Position if it's not a + /// ns-char. + StringRef::iterator skip_ns_char(StringRef::iterator Position); + + typedef StringRef::iterator (Scanner::*SkipWhileFunc)(StringRef::iterator); + /// @brief Skip minimal well-formed code unit subsequences until Func + /// returns its input. + /// + /// @returns The code unit after the last minimal well-formed code unit + /// subsequence that Func accepted. + StringRef::iterator skip_while( SkipWhileFunc Func + , StringRef::iterator Position); + + /// @brief Scan ns-uri-char[39]s starting at Cur. + /// + /// This updates Cur and Column while scanning. + /// + /// @returns A StringRef starting at Cur which covers the longest contiguous + /// sequence of ns-uri-char. + StringRef scan_ns_uri_char(); + + /// @brief Scan ns-plain-one-line[133] starting at \a Cur. + StringRef scan_ns_plain_one_line(); + + /// @brief Consume a minimal well-formed code unit subsequence starting at + /// \a Cur. Return false if it is not the same Unicode scalar value as + /// \a Expected. This updates \a Column. + bool consume(uint32_t Expected); + + /// @brief Skip \a Distance UTF-8 code units. Updates \a Cur and \a Column. + void skip(uint32_t Distance); + + /// @brief Return true if the minimal well-formed code unit subsequence at + /// Pos is whitespace or a new line + bool isBlankOrBreak(StringRef::iterator Position); + + /// @brief If IsSimpleKeyAllowed, create and push_back a new SimpleKey. + void saveSimpleKeyCandidate( TokenQueueT::iterator Tok + , unsigned AtColumn + , bool IsRequired); + + /// @brief Remove simple keys that can no longer be valid simple keys. + /// + /// Invalid simple keys are not on the current line or are further than 1024 + /// columns back. + void removeStaleSimpleKeyCandidates(); + + /// @brief Remove all simple keys on FlowLevel \a Level. + void removeSimpleKeyCandidatesOnFlowLevel(unsigned Level); + + /// @brief Unroll indentation in \a Indents back to \a Col. Creates BlockEnd + /// tokens if needed. + bool unrollIndent(int ToColumn); + + /// @brief Increase indent to \a Col. Creates \a Kind token at \a InsertPoint + /// if needed. + bool rollIndent( int ToColumn + , Token::TokenKind Kind + , TokenQueueT::iterator InsertPoint); + + /// @brief Skip whitespace and comments until the start of the next token. + void scanToNextToken(); + + /// @brief Must be the first token generated. + bool scanStreamStart(); + + /// @brief Generate tokens needed to close out the stream. + bool scanStreamEnd(); + + /// @brief Scan a %BLAH directive. + bool scanDirective(); + + /// @brief Scan a ... or ---. + bool scanDocumentIndicator(bool IsStart); + + /// @brief Scan a [ or { and generate the proper flow collection start token. + bool scanFlowCollectionStart(bool IsSequence); + + /// @brief Scan a ] or } and generate the proper flow collection end token. + bool scanFlowCollectionEnd(bool IsSequence); + + /// @brief Scan the , that separates entries in a flow collection. + bool scanFlowEntry(); + + /// @brief Scan the - that starts block sequence entries. + bool scanBlockEntry(); + + /// @brief Scan an explicit ? indicating a key. + bool scanKey(); + + /// @brief Scan an explicit : indicating a value. + bool scanValue(); + + /// @brief Scan a quoted scalar. + bool scanFlowScalar(bool IsDoubleQuoted); + + /// @brief Scan an unquoted scalar. + bool scanPlainScalar(); + + /// @brief Scan an Alias or Anchor starting with * or &. + bool scanAliasOrAnchor(bool IsAlias); + + /// @brief Scan a block scalar starting with | or >. + bool scanBlockScalar(bool IsLiteral); + + /// @brief Scan a tag of the form !stuff. + bool scanTag(); + + /// @brief Dispatch to the next scanning function based on \a *Cur. + bool fetchMoreTokens(); + + /// @brief The SourceMgr used for diagnostics and buffer management. + SourceMgr &SM; + + /// @brief The original input. + MemoryBuffer *InputBuffer; + + /// @brief The current position of the scanner. + StringRef::iterator Current; + + /// @brief The end of the input (one past the last character). + StringRef::iterator End; + + /// @brief Current YAML indentation level in spaces. + int Indent; + + /// @brief Current column number in Unicode code points. + unsigned Column; + + /// @brief Current line number. + unsigned Line; + + /// @brief How deep we are in flow style containers. 0 Means at block level. + unsigned FlowLevel; + + /// @brief Are we at the start of the stream? + bool IsStartOfStream; + + /// @brief Can the next token be the start of a simple key? + bool IsSimpleKeyAllowed; + + /// @brief Is the next token required to start a simple key? + bool IsSimpleKeyRequired; + + /// @brief True if an error has occurred. + bool Failed; + + /// @brief Queue of tokens. This is required to queue up tokens while looking + /// for the end of a simple key. And for cases where a single character + /// can produce multiple tokens (e.g. BlockEnd). + TokenQueueT TokenQueue; + + /// @brief Indentation levels. + SmallVector<int, 4> Indents; + + /// @brief Potential simple keys. + SmallVector<SimpleKey, 4> SimpleKeys; +}; + +} // end namespace yaml +} // end namespace llvm + +/// encodeUTF8 - Encode \a UnicodeScalarValue in UTF-8 and append it to result. +static void encodeUTF8( uint32_t UnicodeScalarValue + , SmallVectorImpl<char> &Result) { + if (UnicodeScalarValue <= 0x7F) { + Result.push_back(UnicodeScalarValue & 0x7F); + } else if (UnicodeScalarValue <= 0x7FF) { + uint8_t FirstByte = 0xC0 | ((UnicodeScalarValue & 0x7C0) >> 6); + uint8_t SecondByte = 0x80 | (UnicodeScalarValue & 0x3F); + Result.push_back(FirstByte); + Result.push_back(SecondByte); + } else if (UnicodeScalarValue <= 0xFFFF) { + uint8_t FirstByte = 0xE0 | ((UnicodeScalarValue & 0xF000) >> 12); + uint8_t SecondByte = 0x80 | ((UnicodeScalarValue & 0xFC0) >> 6); + uint8_t ThirdByte = 0x80 | (UnicodeScalarValue & 0x3F); + Result.push_back(FirstByte); + Result.push_back(SecondByte); + Result.push_back(ThirdByte); + } else if (UnicodeScalarValue <= 0x10FFFF) { + uint8_t FirstByte = 0xF0 | ((UnicodeScalarValue & 0x1F0000) >> 18); + uint8_t SecondByte = 0x80 | ((UnicodeScalarValue & 0x3F000) >> 12); + uint8_t ThirdByte = 0x80 | ((UnicodeScalarValue & 0xFC0) >> 6); + uint8_t FourthByte = 0x80 | (UnicodeScalarValue & 0x3F); + Result.push_back(FirstByte); + Result.push_back(SecondByte); + Result.push_back(ThirdByte); + Result.push_back(FourthByte); + } +} + +bool yaml::dumpTokens(StringRef Input, raw_ostream &OS) { + SourceMgr SM; + Scanner scanner(Input, SM); + while (true) { + Token T = scanner.getNext(); + switch (T.Kind) { + case Token::TK_StreamStart: + OS << "Stream-Start: "; + break; + case Token::TK_StreamEnd: + OS << "Stream-End: "; + break; + case Token::TK_VersionDirective: + OS << "Version-Directive: "; + break; + case Token::TK_TagDirective: + OS << "Tag-Directive: "; + break; + case Token::TK_DocumentStart: + OS << "Document-Start: "; + break; + case Token::TK_DocumentEnd: + OS << "Document-End: "; + break; + case Token::TK_BlockEntry: + OS << "Block-Entry: "; + break; + case Token::TK_BlockEnd: + OS << "Block-End: "; + break; + case Token::TK_BlockSequenceStart: + OS << "Block-Sequence-Start: "; + break; + case Token::TK_BlockMappingStart: + OS << "Block-Mapping-Start: "; + break; + case Token::TK_FlowEntry: + OS << "Flow-Entry: "; + break; + case Token::TK_FlowSequenceStart: + OS << "Flow-Sequence-Start: "; + break; + case Token::TK_FlowSequenceEnd: + OS << "Flow-Sequence-End: "; + break; + case Token::TK_FlowMappingStart: + OS << "Flow-Mapping-Start: "; + break; + case Token::TK_FlowMappingEnd: + OS << "Flow-Mapping-End: "; + break; + case Token::TK_Key: + OS << "Key: "; + break; + case Token::TK_Value: + OS << "Value: "; + break; + case Token::TK_Scalar: + OS << "Scalar: "; + break; + case Token::TK_Alias: + OS << "Alias: "; + break; + case Token::TK_Anchor: + OS << "Anchor: "; + break; + case Token::TK_Tag: + OS << "Tag: "; + break; + case Token::TK_Error: + break; + } + OS << T.Range << "\n"; + if (T.Kind == Token::TK_StreamEnd) + break; + else if (T.Kind == Token::TK_Error) + return false; + } + return true; +} + +bool yaml::scanTokens(StringRef Input) { + llvm::SourceMgr SM; + llvm::yaml::Scanner scanner(Input, SM); + for (;;) { + llvm::yaml::Token T = scanner.getNext(); + if (T.Kind == Token::TK_StreamEnd) + break; + else if (T.Kind == Token::TK_Error) + return false; + } + return true; +} + +std::string yaml::escape(StringRef Input) { + std::string EscapedInput; + for (StringRef::iterator i = Input.begin(), e = Input.end(); i != e; ++i) { + if (*i == '\\') + EscapedInput += "\\\\"; + else if (*i == '"') + EscapedInput += "\\\""; + else if (*i == 0) + EscapedInput += "\\0"; + else if (*i == 0x07) + EscapedInput += "\\a"; + else if (*i == 0x08) + EscapedInput += "\\b"; + else if (*i == 0x09) + EscapedInput += "\\t"; + else if (*i == 0x0A) + EscapedInput += "\\n"; + else if (*i == 0x0B) + EscapedInput += "\\v"; + else if (*i == 0x0C) + EscapedInput += "\\f"; + else if (*i == 0x0D) + EscapedInput += "\\r"; + else if (*i == 0x1B) + EscapedInput += "\\e"; + else if (*i >= 0 && *i < 0x20) { // Control characters not handled above. + std::string HexStr = utohexstr(*i); + EscapedInput += "\\x" + std::string(2 - HexStr.size(), '0') + HexStr; + } else if (*i & 0x80) { // UTF-8 multiple code unit subsequence. + UTF8Decoded UnicodeScalarValue + = decodeUTF8(StringRef(i, Input.end() - i)); + if (UnicodeScalarValue.second == 0) { + // Found invalid char. + SmallString<4> Val; + encodeUTF8(0xFFFD, Val); + EscapedInput.insert(EscapedInput.end(), Val.begin(), Val.end()); + // FIXME: Error reporting. + return EscapedInput; + } + if (UnicodeScalarValue.first == 0x85) + EscapedInput += "\\N"; + else if (UnicodeScalarValue.first == 0xA0) + EscapedInput += "\\_"; + else if (UnicodeScalarValue.first == 0x2028) + EscapedInput += "\\L"; + else if (UnicodeScalarValue.first == 0x2029) + EscapedInput += "\\P"; + else { + std::string HexStr = utohexstr(UnicodeScalarValue.first); + if (HexStr.size() <= 2) + EscapedInput += "\\x" + std::string(2 - HexStr.size(), '0') + HexStr; + else if (HexStr.size() <= 4) + EscapedInput += "\\u" + std::string(4 - HexStr.size(), '0') + HexStr; + else if (HexStr.size() <= 8) + EscapedInput += "\\U" + std::string(8 - HexStr.size(), '0') + HexStr; + } + i += UnicodeScalarValue.second - 1; + } else + EscapedInput.push_back(*i); + } + return EscapedInput; +} + +Scanner::Scanner(StringRef Input, SourceMgr &sm) + : SM(sm) + , Indent(-1) + , Column(0) + , Line(0) + , FlowLevel(0) + , IsStartOfStream(true) + , IsSimpleKeyAllowed(true) + , IsSimpleKeyRequired(false) + , Failed(false) { + InputBuffer = MemoryBuffer::getMemBuffer(Input, "YAML"); + SM.AddNewSourceBuffer(InputBuffer, SMLoc()); + Current = InputBuffer->getBufferStart(); + End = InputBuffer->getBufferEnd(); +} + +Token &Scanner::peekNext() { + // If the current token is a possible simple key, keep parsing until we + // can confirm. + bool NeedMore = false; + while (true) { + if (TokenQueue.empty() || NeedMore) { + if (!fetchMoreTokens()) { + TokenQueue.clear(); + TokenQueue.push_back(Token()); + return TokenQueue.front(); + } + } + assert(!TokenQueue.empty() && + "fetchMoreTokens lied about getting tokens!"); + + removeStaleSimpleKeyCandidates(); + SimpleKey SK; + SK.Tok = TokenQueue.front(); + if (std::find(SimpleKeys.begin(), SimpleKeys.end(), SK) + == SimpleKeys.end()) + break; + else + NeedMore = true; + } + return TokenQueue.front(); +} + +Token Scanner::getNext() { + Token Ret = peekNext(); + // TokenQueue can be empty if there was an error getting the next token. + if (!TokenQueue.empty()) + TokenQueue.pop_front(); + + // There cannot be any referenced Token's if the TokenQueue is empty. So do a + // quick deallocation of them all. + if (TokenQueue.empty()) { + TokenQueue.Alloc.Reset(); + } + + return Ret; +} + +StringRef::iterator Scanner::skip_nb_char(StringRef::iterator Position) { + // Check 7 bit c-printable - b-char. + if ( *Position == 0x09 + || (*Position >= 0x20 && *Position <= 0x7E)) + return Position + 1; + + // Check for valid UTF-8. + if (uint8_t(*Position) & 0x80) { + UTF8Decoded u8d = decodeUTF8(Position); + if ( u8d.second != 0 + && u8d.first != 0xFEFF + && ( u8d.first == 0x85 + || ( u8d.first >= 0xA0 + && u8d.first <= 0xD7FF) + || ( u8d.first >= 0xE000 + && u8d.first <= 0xFFFD) + || ( u8d.first >= 0x10000 + && u8d.first <= 0x10FFFF))) + return Position + u8d.second; + } + return Position; +} + +StringRef::iterator Scanner::skip_b_break(StringRef::iterator Position) { + if (*Position == 0x0D) { + if (Position + 1 != End && *(Position + 1) == 0x0A) + return Position + 2; + return Position + 1; + } + + if (*Position == 0x0A) + return Position + 1; + return Position; +} + + +StringRef::iterator Scanner::skip_s_white(StringRef::iterator Position) { + if (Position == End) + return Position; + if (*Position == ' ' || *Position == '\t') + return Position + 1; + return Position; +} + +StringRef::iterator Scanner::skip_ns_char(StringRef::iterator Position) { + if (Position == End) + return Position; + if (*Position == ' ' || *Position == '\t') + return Position; + return skip_nb_char(Position); +} + +StringRef::iterator Scanner::skip_while( SkipWhileFunc Func + , StringRef::iterator Position) { + while (true) { + StringRef::iterator i = (this->*Func)(Position); + if (i == Position) + break; + Position = i; + } + return Position; +} + +static bool is_ns_hex_digit(const char C) { + return (C >= '0' && C <= '9') + || (C >= 'a' && C <= 'z') + || (C >= 'A' && C <= 'Z'); +} + +static bool is_ns_word_char(const char C) { + return C == '-' + || (C >= 'a' && C <= 'z') + || (C >= 'A' && C <= 'Z'); +} + +StringRef Scanner::scan_ns_uri_char() { + StringRef::iterator Start = Current; + while (true) { + if (Current == End) + break; + if (( *Current == '%' + && Current + 2 < End + && is_ns_hex_digit(*(Current + 1)) + && is_ns_hex_digit(*(Current + 2))) + || is_ns_word_char(*Current) + || StringRef(Current, 1).find_first_of("#;/?:@&=+$,_.!~*'()[]") + != StringRef::npos) { + ++Current; + ++Column; + } else + break; + } + return StringRef(Start, Current - Start); +} + +StringRef Scanner::scan_ns_plain_one_line() { + StringRef::iterator start = Current; + // The first character must already be verified. + ++Current; + while (true) { + if (Current == End) { + break; + } else if (*Current == ':') { + // Check if the next character is a ns-char. + if (Current + 1 == End) + break; + StringRef::iterator i = skip_ns_char(Current + 1); + if (Current + 1 != i) { + Current = i; + Column += 2; // Consume both the ':' and ns-char. + } else + break; + } else if (*Current == '#') { + // Check if the previous character was a ns-char. + // The & 0x80 check is to check for the trailing byte of a utf-8 + if (*(Current - 1) & 0x80 || skip_ns_char(Current - 1) == Current) { + ++Current; + ++Column; + } else + break; + } else { + StringRef::iterator i = skip_nb_char(Current); + if (i == Current) + break; + Current = i; + ++Column; + } + } + return StringRef(start, Current - start); +} + +bool Scanner::consume(uint32_t Expected) { + if (Expected >= 0x80) + report_fatal_error("Not dealing with this yet"); + if (Current == End) + return false; + if (uint8_t(*Current) >= 0x80) + report_fatal_error("Not dealing with this yet"); + if (uint8_t(*Current) == Expected) { + ++Current; + ++Column; + return true; + } + return false; +} + +void Scanner::skip(uint32_t Distance) { + Current += Distance; + Column += Distance; +} + +bool Scanner::isBlankOrBreak(StringRef::iterator Position) { + if (Position == End) + return false; + if ( *Position == ' ' || *Position == '\t' + || *Position == '\r' || *Position == '\n') + return true; + return false; +} + +void Scanner::saveSimpleKeyCandidate( TokenQueueT::iterator Tok + , unsigned AtColumn + , bool IsRequired) { + if (IsSimpleKeyAllowed) { + SimpleKey SK; + SK.Tok = Tok; + SK.Line = Line; + SK.Column = AtColumn; + SK.IsRequired = IsRequired; + SK.FlowLevel = FlowLevel; + SimpleKeys.push_back(SK); + } +} + +void Scanner::removeStaleSimpleKeyCandidates() { + for (SmallVectorImpl<SimpleKey>::iterator i = SimpleKeys.begin(); + i != SimpleKeys.end();) { + if (i->Line != Line || i->Column + 1024 < Column) { + if (i->IsRequired) + setError( "Could not find expected : for simple key" + , i->Tok->Range.begin()); + i = SimpleKeys.erase(i); + } else + ++i; + } +} + +void Scanner::removeSimpleKeyCandidatesOnFlowLevel(unsigned Level) { + if (!SimpleKeys.empty() && (SimpleKeys.end() - 1)->FlowLevel == Level) + SimpleKeys.pop_back(); +} + +bool Scanner::unrollIndent(int ToColumn) { + Token T; + // Indentation is ignored in flow. + if (FlowLevel != 0) + return true; + + while (Indent > ToColumn) { + T.Kind = Token::TK_BlockEnd; + T.Range = StringRef(Current, 1); + TokenQueue.push_back(T); + Indent = Indents.pop_back_val(); + } + + return true; +} + +bool Scanner::rollIndent( int ToColumn + , Token::TokenKind Kind + , TokenQueueT::iterator InsertPoint) { + if (FlowLevel) + return true; + if (Indent < ToColumn) { + Indents.push_back(Indent); + Indent = ToColumn; + + Token T; + T.Kind = Kind; + T.Range = StringRef(Current, 0); + TokenQueue.insert(InsertPoint, T); + } + return true; +} + +void Scanner::scanToNextToken() { + while (true) { + while (*Current == ' ' || *Current == '\t') { + skip(1); + } + + // Skip comment. + if (*Current == '#') { + while (true) { + // This may skip more than one byte, thus Column is only incremented + // for code points. + StringRef::iterator i = skip_nb_char(Current); + if (i == Current) + break; + Current = i; + ++Column; + } + } + + // Skip EOL. + StringRef::iterator i = skip_b_break(Current); + if (i == Current) + break; + Current = i; + ++Line; + Column = 0; + // New lines may start a simple key. + if (!FlowLevel) + IsSimpleKeyAllowed = true; + } +} + +bool Scanner::scanStreamStart() { + IsStartOfStream = false; + + EncodingInfo EI = getUnicodeEncoding(currentInput()); + + Token T; + T.Kind = Token::TK_StreamStart; + T.Range = StringRef(Current, EI.second); + TokenQueue.push_back(T); + Current += EI.second; + return true; +} + +bool Scanner::scanStreamEnd() { + // Force an ending new line if one isn't present. + if (Column != 0) { + Column = 0; + ++Line; + } + + unrollIndent(-1); + SimpleKeys.clear(); + IsSimpleKeyAllowed = false; + + Token T; + T.Kind = Token::TK_StreamEnd; + T.Range = StringRef(Current, 0); + TokenQueue.push_back(T); + return true; +} + +bool Scanner::scanDirective() { + // Reset the indentation level. + unrollIndent(-1); + SimpleKeys.clear(); + IsSimpleKeyAllowed = false; + + StringRef::iterator Start = Current; + consume('%'); + StringRef::iterator NameStart = Current; + Current = skip_while(&Scanner::skip_ns_char, Current); + StringRef Name(NameStart, Current - NameStart); + Current = skip_while(&Scanner::skip_s_white, Current); + + if (Name == "YAML") { + Current = skip_while(&Scanner::skip_ns_char, Current); + Token T; + T.Kind = Token::TK_VersionDirective; + T.Range = StringRef(Start, Current - Start); + TokenQueue.push_back(T); + return true; + } + return false; +} + +bool Scanner::scanDocumentIndicator(bool IsStart) { + unrollIndent(-1); + SimpleKeys.clear(); + IsSimpleKeyAllowed = false; + + Token T; + T.Kind = IsStart ? Token::TK_DocumentStart : Token::TK_DocumentEnd; + T.Range = StringRef(Current, 3); + skip(3); + TokenQueue.push_back(T); + return true; +} + +bool Scanner::scanFlowCollectionStart(bool IsSequence) { + Token T; + T.Kind = IsSequence ? Token::TK_FlowSequenceStart + : Token::TK_FlowMappingStart; + T.Range = StringRef(Current, 1); + skip(1); + TokenQueue.push_back(T); + + // [ and { may begin a simple key. + saveSimpleKeyCandidate(TokenQueue.back(), Column - 1, false); + + // And may also be followed by a simple key. + IsSimpleKeyAllowed = true; + ++FlowLevel; + return true; +} + +bool Scanner::scanFlowCollectionEnd(bool IsSequence) { + removeSimpleKeyCandidatesOnFlowLevel(FlowLevel); + IsSimpleKeyAllowed = false; + Token T; + T.Kind = IsSequence ? Token::TK_FlowSequenceEnd + : Token::TK_FlowMappingEnd; + T.Range = StringRef(Current, 1); + skip(1); + TokenQueue.push_back(T); + if (FlowLevel) + --FlowLevel; + return true; +} + +bool Scanner::scanFlowEntry() { + removeSimpleKeyCandidatesOnFlowLevel(FlowLevel); + IsSimpleKeyAllowed = true; + Token T; + T.Kind = Token::TK_FlowEntry; + T.Range = StringRef(Current, 1); + skip(1); + TokenQueue.push_back(T); + return true; +} + +bool Scanner::scanBlockEntry() { + rollIndent(Column, Token::TK_BlockSequenceStart, TokenQueue.end()); + removeSimpleKeyCandidatesOnFlowLevel(FlowLevel); + IsSimpleKeyAllowed = true; + Token T; + T.Kind = Token::TK_BlockEntry; + T.Range = StringRef(Current, 1); + skip(1); + TokenQueue.push_back(T); + return true; +} + +bool Scanner::scanKey() { + if (!FlowLevel) + rollIndent(Column, Token::TK_BlockMappingStart, TokenQueue.end()); + + removeSimpleKeyCandidatesOnFlowLevel(FlowLevel); + IsSimpleKeyAllowed = !FlowLevel; + + Token T; + T.Kind = Token::TK_Key; + T.Range = StringRef(Current, 1); + skip(1); + TokenQueue.push_back(T); + return true; +} + +bool Scanner::scanValue() { + // If the previous token could have been a simple key, insert the key token + // into the token queue. + if (!SimpleKeys.empty()) { + SimpleKey SK = SimpleKeys.pop_back_val(); + Token T; + T.Kind = Token::TK_Key; + T.Range = SK.Tok->Range; + TokenQueueT::iterator i, e; + for (i = TokenQueue.begin(), e = TokenQueue.end(); i != e; ++i) { + if (i == SK.Tok) + break; + } + assert(i != e && "SimpleKey not in token queue!"); + i = TokenQueue.insert(i, T); + + // We may also need to add a Block-Mapping-Start token. + rollIndent(SK.Column, Token::TK_BlockMappingStart, i); + + IsSimpleKeyAllowed = false; + } else { + if (!FlowLevel) + rollIndent(Column, Token::TK_BlockMappingStart, TokenQueue.end()); + IsSimpleKeyAllowed = !FlowLevel; + } + + Token T; + T.Kind = Token::TK_Value; + T.Range = StringRef(Current, 1); + skip(1); + TokenQueue.push_back(T); + return true; +} + +// Forbidding inlining improves performance by roughly 20%. +// FIXME: Remove once llvm optimizes this to the faster version without hints. +LLVM_ATTRIBUTE_NOINLINE static bool +wasEscaped(StringRef::iterator First, StringRef::iterator Position); + +// Returns whether a character at 'Position' was escaped with a leading '\'. +// 'First' specifies the position of the first character in the string. +static bool wasEscaped(StringRef::iterator First, + StringRef::iterator Position) { + assert(Position - 1 >= First); + StringRef::iterator I = Position - 1; + // We calculate the number of consecutive '\'s before the current position + // by iterating backwards through our string. + while (I >= First && *I == '\\') --I; + // (Position - 1 - I) now contains the number of '\'s before the current + // position. If it is odd, the character at 'Position' was escaped. + return (Position - 1 - I) % 2 == 1; +} + +bool Scanner::scanFlowScalar(bool IsDoubleQuoted) { + StringRef::iterator Start = Current; + unsigned ColStart = Column; + if (IsDoubleQuoted) { + do { + ++Current; + while (Current != End && *Current != '"') + ++Current; + // Repeat until the previous character was not a '\' or was an escaped + // backslash. + } while (*(Current - 1) == '\\' && wasEscaped(Start + 1, Current)); + } else { + skip(1); + while (true) { + // Skip a ' followed by another '. + if (Current + 1 < End && *Current == '\'' && *(Current + 1) == '\'') { + skip(2); + continue; + } else if (*Current == '\'') + break; + StringRef::iterator i = skip_nb_char(Current); + if (i == Current) { + i = skip_b_break(Current); + if (i == Current) + break; + Current = i; + Column = 0; + ++Line; + } else { + if (i == End) + break; + Current = i; + ++Column; + } + } + } + skip(1); // Skip ending quote. + Token T; + T.Kind = Token::TK_Scalar; + T.Range = StringRef(Start, Current - Start); + TokenQueue.push_back(T); + + saveSimpleKeyCandidate(TokenQueue.back(), ColStart, false); + + IsSimpleKeyAllowed = false; + + return true; +} + +bool Scanner::scanPlainScalar() { + StringRef::iterator Start = Current; + unsigned ColStart = Column; + unsigned LeadingBlanks = 0; + assert(Indent >= -1 && "Indent must be >= -1 !"); + unsigned indent = static_cast<unsigned>(Indent + 1); + while (true) { + if (*Current == '#') + break; + + while (!isBlankOrBreak(Current)) { + if ( FlowLevel && *Current == ':' + && !(isBlankOrBreak(Current + 1) || *(Current + 1) == ',')) { + setError("Found unexpected ':' while scanning a plain scalar", Current); + return false; + } + + // Check for the end of the plain scalar. + if ( (*Current == ':' && isBlankOrBreak(Current + 1)) + || ( FlowLevel + && (StringRef(Current, 1).find_first_of(",:?[]{}") + != StringRef::npos))) + break; + + StringRef::iterator i = skip_nb_char(Current); + if (i == Current) + break; + Current = i; + ++Column; + } + + // Are we at the end? + if (!isBlankOrBreak(Current)) + break; + + // Eat blanks. + StringRef::iterator Tmp = Current; + while (isBlankOrBreak(Tmp)) { + StringRef::iterator i = skip_s_white(Tmp); + if (i != Tmp) { + if (LeadingBlanks && (Column < indent) && *Tmp == '\t') { + setError("Found invalid tab character in indentation", Tmp); + return false; + } + Tmp = i; + ++Column; + } else { + i = skip_b_break(Tmp); + if (!LeadingBlanks) + LeadingBlanks = 1; + Tmp = i; + Column = 0; + ++Line; + } + } + + if (!FlowLevel && Column < indent) + break; + + Current = Tmp; + } + if (Start == Current) { + setError("Got empty plain scalar", Start); + return false; + } + Token T; + T.Kind = Token::TK_Scalar; + T.Range = StringRef(Start, Current - Start); + TokenQueue.push_back(T); + + // Plain scalars can be simple keys. + saveSimpleKeyCandidate(TokenQueue.back(), ColStart, false); + + IsSimpleKeyAllowed = false; + + return true; +} + +bool Scanner::scanAliasOrAnchor(bool IsAlias) { + StringRef::iterator Start = Current; + unsigned ColStart = Column; + skip(1); + while(true) { + if ( *Current == '[' || *Current == ']' + || *Current == '{' || *Current == '}' + || *Current == ',' + || *Current == ':') + break; + StringRef::iterator i = skip_ns_char(Current); + if (i == Current) + break; + Current = i; + ++Column; + } + + if (Start == Current) { + setError("Got empty alias or anchor", Start); + return false; + } + + Token T; + T.Kind = IsAlias ? Token::TK_Alias : Token::TK_Anchor; + T.Range = StringRef(Start, Current - Start); + TokenQueue.push_back(T); + + // Alias and anchors can be simple keys. + saveSimpleKeyCandidate(TokenQueue.back(), ColStart, false); + + IsSimpleKeyAllowed = false; + + return true; +} + +bool Scanner::scanBlockScalar(bool IsLiteral) { + StringRef::iterator Start = Current; + skip(1); // Eat | or > + while(true) { + StringRef::iterator i = skip_nb_char(Current); + if (i == Current) { + if (Column == 0) + break; + i = skip_b_break(Current); + if (i != Current) { + // We got a line break. + Column = 0; + ++Line; + Current = i; + continue; + } else { + // There was an error, which should already have been printed out. + return false; + } + } + Current = i; + ++Column; + } + + if (Start == Current) { + setError("Got empty block scalar", Start); + return false; + } + + Token T; + T.Kind = Token::TK_Scalar; + T.Range = StringRef(Start, Current - Start); + TokenQueue.push_back(T); + return true; +} + +bool Scanner::scanTag() { + StringRef::iterator Start = Current; + unsigned ColStart = Column; + skip(1); // Eat !. + if (Current == End || isBlankOrBreak(Current)); // An empty tag. + else if (*Current == '<') { + skip(1); + scan_ns_uri_char(); + if (!consume('>')) + return false; + } else { + // FIXME: Actually parse the c-ns-shorthand-tag rule. + Current = skip_while(&Scanner::skip_ns_char, Current); + } + + Token T; + T.Kind = Token::TK_Tag; + T.Range = StringRef(Start, Current - Start); + TokenQueue.push_back(T); + + // Tags can be simple keys. + saveSimpleKeyCandidate(TokenQueue.back(), ColStart, false); + + IsSimpleKeyAllowed = false; + + return true; +} + +bool Scanner::fetchMoreTokens() { + if (IsStartOfStream) + return scanStreamStart(); + + scanToNextToken(); + + if (Current == End) + return scanStreamEnd(); + + removeStaleSimpleKeyCandidates(); + + unrollIndent(Column); + + if (Column == 0 && *Current == '%') + return scanDirective(); + + if (Column == 0 && Current + 4 <= End + && *Current == '-' + && *(Current + 1) == '-' + && *(Current + 2) == '-' + && (Current + 3 == End || isBlankOrBreak(Current + 3))) + return scanDocumentIndicator(true); + + if (Column == 0 && Current + 4 <= End + && *Current == '.' + && *(Current + 1) == '.' + && *(Current + 2) == '.' + && (Current + 3 == End || isBlankOrBreak(Current + 3))) + return scanDocumentIndicator(false); + + if (*Current == '[') + return scanFlowCollectionStart(true); + + if (*Current == '{') + return scanFlowCollectionStart(false); + + if (*Current == ']') + return scanFlowCollectionEnd(true); + + if (*Current == '}') + return scanFlowCollectionEnd(false); + + if (*Current == ',') + return scanFlowEntry(); + + if (*Current == '-' && isBlankOrBreak(Current + 1)) + return scanBlockEntry(); + + if (*Current == '?' && (FlowLevel || isBlankOrBreak(Current + 1))) + return scanKey(); + + if (*Current == ':' && (FlowLevel || isBlankOrBreak(Current + 1))) + return scanValue(); + + if (*Current == '*') + return scanAliasOrAnchor(true); + + if (*Current == '&') + return scanAliasOrAnchor(false); + + if (*Current == '!') + return scanTag(); + + if (*Current == '|' && !FlowLevel) + return scanBlockScalar(true); + + if (*Current == '>' && !FlowLevel) + return scanBlockScalar(false); + + if (*Current == '\'') + return scanFlowScalar(false); + + if (*Current == '"') + return scanFlowScalar(true); + + // Get a plain scalar. + StringRef FirstChar(Current, 1); + if (!(isBlankOrBreak(Current) + || FirstChar.find_first_of("-?:,[]{}#&*!|>'\"%@`") != StringRef::npos) + || (*Current == '-' && !isBlankOrBreak(Current + 1)) + || (!FlowLevel && (*Current == '?' || *Current == ':') + && isBlankOrBreak(Current + 1)) + || (!FlowLevel && *Current == ':' + && Current + 2 < End + && *(Current + 1) == ':' + && !isBlankOrBreak(Current + 2))) + return scanPlainScalar(); + + setError("Unrecognized character while tokenizing."); + return false; +} + +Stream::Stream(StringRef Input, SourceMgr &SM) + : scanner(new Scanner(Input, SM)) + , CurrentDoc(0) {} + +Stream::~Stream() {} + +bool Stream::failed() { return scanner->failed(); } + +void Stream::printError(Node *N, const Twine &Msg) { + SmallVector<SMRange, 1> Ranges; + Ranges.push_back(N->getSourceRange()); + scanner->printError( N->getSourceRange().Start + , SourceMgr::DK_Error + , Msg + , Ranges); +} + +void Stream::handleYAMLDirective(const Token &t) { + // TODO: Ensure version is 1.x. +} + +document_iterator Stream::begin() { + if (CurrentDoc) + report_fatal_error("Can only iterate over the stream once"); + + // Skip Stream-Start. + scanner->getNext(); + + CurrentDoc.reset(new Document(*this)); + return document_iterator(CurrentDoc); +} + +document_iterator Stream::end() { + return document_iterator(); +} + +void Stream::skip() { + for (document_iterator i = begin(), e = end(); i != e; ++i) + i->skip(); +} + +Node::Node(unsigned int Type, OwningPtr<Document> &D, StringRef A) + : Doc(D) + , TypeID(Type) + , Anchor(A) { + SMLoc Start = SMLoc::getFromPointer(peekNext().Range.begin()); + SourceRange = SMRange(Start, Start); +} + +Token &Node::peekNext() { + return Doc->peekNext(); +} + +Token Node::getNext() { + return Doc->getNext(); +} + +Node *Node::parseBlockNode() { + return Doc->parseBlockNode(); +} + +BumpPtrAllocator &Node::getAllocator() { + return Doc->NodeAllocator; +} + +void Node::setError(const Twine &Msg, Token &Tok) const { + Doc->setError(Msg, Tok); +} + +bool Node::failed() const { + return Doc->failed(); +} + + + +StringRef ScalarNode::getValue(SmallVectorImpl<char> &Storage) const { + // TODO: Handle newlines properly. We need to remove leading whitespace. + if (Value[0] == '"') { // Double quoted. + // Pull off the leading and trailing "s. + StringRef UnquotedValue = Value.substr(1, Value.size() - 2); + // Search for characters that would require unescaping the value. + StringRef::size_type i = UnquotedValue.find_first_of("\\\r\n"); + if (i != StringRef::npos) + return unescapeDoubleQuoted(UnquotedValue, i, Storage); + return UnquotedValue; + } else if (Value[0] == '\'') { // Single quoted. + // Pull off the leading and trailing 's. + StringRef UnquotedValue = Value.substr(1, Value.size() - 2); + StringRef::size_type i = UnquotedValue.find('\''); + if (i != StringRef::npos) { + // We're going to need Storage. + Storage.clear(); + Storage.reserve(UnquotedValue.size()); + for (; i != StringRef::npos; i = UnquotedValue.find('\'')) { + StringRef Valid(UnquotedValue.begin(), i); + Storage.insert(Storage.end(), Valid.begin(), Valid.end()); + Storage.push_back('\''); + UnquotedValue = UnquotedValue.substr(i + 2); + } + Storage.insert(Storage.end(), UnquotedValue.begin(), UnquotedValue.end()); + return StringRef(Storage.begin(), Storage.size()); + } + return UnquotedValue; + } + // Plain or block. + size_t trimtrail = Value.rfind(' '); + return Value.drop_back( + trimtrail == StringRef::npos ? 0 : Value.size() - trimtrail); +} + +StringRef ScalarNode::unescapeDoubleQuoted( StringRef UnquotedValue + , StringRef::size_type i + , SmallVectorImpl<char> &Storage) + const { + // Use Storage to build proper value. + Storage.clear(); + Storage.reserve(UnquotedValue.size()); + for (; i != StringRef::npos; i = UnquotedValue.find_first_of("\\\r\n")) { + // Insert all previous chars into Storage. + StringRef Valid(UnquotedValue.begin(), i); + Storage.insert(Storage.end(), Valid.begin(), Valid.end()); + // Chop off inserted chars. + UnquotedValue = UnquotedValue.substr(i); + + assert(!UnquotedValue.empty() && "Can't be empty!"); + + // Parse escape or line break. + switch (UnquotedValue[0]) { + case '\r': + case '\n': + Storage.push_back('\n'); + if ( UnquotedValue.size() > 1 + && (UnquotedValue[1] == '\r' || UnquotedValue[1] == '\n')) + UnquotedValue = UnquotedValue.substr(1); + UnquotedValue = UnquotedValue.substr(1); + break; + default: + if (UnquotedValue.size() == 1) + // TODO: Report error. + break; + UnquotedValue = UnquotedValue.substr(1); + switch (UnquotedValue[0]) { + default: { + Token T; + T.Range = StringRef(UnquotedValue.begin(), 1); + setError("Unrecognized escape code!", T); + return ""; + } + case '\r': + case '\n': + // Remove the new line. + if ( UnquotedValue.size() > 1 + && (UnquotedValue[1] == '\r' || UnquotedValue[1] == '\n')) + UnquotedValue = UnquotedValue.substr(1); + // If this was just a single byte newline, it will get skipped + // below. + break; + case '0': + Storage.push_back(0x00); + break; + case 'a': + Storage.push_back(0x07); + break; + case 'b': + Storage.push_back(0x08); + break; + case 't': + case 0x09: + Storage.push_back(0x09); + break; + case 'n': + Storage.push_back(0x0A); + break; + case 'v': + Storage.push_back(0x0B); + break; + case 'f': + Storage.push_back(0x0C); + break; + case 'r': + Storage.push_back(0x0D); + break; + case 'e': + Storage.push_back(0x1B); + break; + case ' ': + Storage.push_back(0x20); + break; + case '"': + Storage.push_back(0x22); + break; + case '/': + Storage.push_back(0x2F); + break; + case '\\': + Storage.push_back(0x5C); + break; + case 'N': + encodeUTF8(0x85, Storage); + break; + case '_': + encodeUTF8(0xA0, Storage); + break; + case 'L': + encodeUTF8(0x2028, Storage); + break; + case 'P': + encodeUTF8(0x2029, Storage); + break; + case 'x': { + if (UnquotedValue.size() < 3) + // TODO: Report error. + break; + unsigned int UnicodeScalarValue; + UnquotedValue.substr(1, 2).getAsInteger(16, UnicodeScalarValue); + encodeUTF8(UnicodeScalarValue, Storage); + UnquotedValue = UnquotedValue.substr(2); + break; + } + case 'u': { + if (UnquotedValue.size() < 5) + // TODO: Report error. + break; + unsigned int UnicodeScalarValue; + UnquotedValue.substr(1, 4).getAsInteger(16, UnicodeScalarValue); + encodeUTF8(UnicodeScalarValue, Storage); + UnquotedValue = UnquotedValue.substr(4); + break; + } + case 'U': { + if (UnquotedValue.size() < 9) + // TODO: Report error. + break; + unsigned int UnicodeScalarValue; + UnquotedValue.substr(1, 8).getAsInteger(16, UnicodeScalarValue); + encodeUTF8(UnicodeScalarValue, Storage); + UnquotedValue = UnquotedValue.substr(8); + break; + } + } + UnquotedValue = UnquotedValue.substr(1); + } + } + Storage.insert(Storage.end(), UnquotedValue.begin(), UnquotedValue.end()); + return StringRef(Storage.begin(), Storage.size()); +} + +Node *KeyValueNode::getKey() { + if (Key) + return Key; + // Handle implicit null keys. + { + Token &t = peekNext(); + if ( t.Kind == Token::TK_BlockEnd + || t.Kind == Token::TK_Value + || t.Kind == Token::TK_Error) { + return Key = new (getAllocator()) NullNode(Doc); + } + if (t.Kind == Token::TK_Key) + getNext(); // skip TK_Key. + } + + // Handle explicit null keys. + Token &t = peekNext(); + if (t.Kind == Token::TK_BlockEnd || t.Kind == Token::TK_Value) { + return Key = new (getAllocator()) NullNode(Doc); + } + + // We've got a normal key. + return Key = parseBlockNode(); +} + +Node *KeyValueNode::getValue() { + if (Value) + return Value; + getKey()->skip(); + if (failed()) + return Value = new (getAllocator()) NullNode(Doc); + + // Handle implicit null values. + { + Token &t = peekNext(); + if ( t.Kind == Token::TK_BlockEnd + || t.Kind == Token::TK_FlowMappingEnd + || t.Kind == Token::TK_Key + || t.Kind == Token::TK_FlowEntry + || t.Kind == Token::TK_Error) { + return Value = new (getAllocator()) NullNode(Doc); + } + + if (t.Kind != Token::TK_Value) { + setError("Unexpected token in Key Value.", t); + return Value = new (getAllocator()) NullNode(Doc); + } + getNext(); // skip TK_Value. + } + + // Handle explicit null values. + Token &t = peekNext(); + if (t.Kind == Token::TK_BlockEnd || t.Kind == Token::TK_Key) { + return Value = new (getAllocator()) NullNode(Doc); + } + + // We got a normal value. + return Value = parseBlockNode(); +} + +void MappingNode::increment() { + if (failed()) { + IsAtEnd = true; + CurrentEntry = 0; + return; + } + if (CurrentEntry) { + CurrentEntry->skip(); + if (Type == MT_Inline) { + IsAtEnd = true; + CurrentEntry = 0; + return; + } + } + Token T = peekNext(); + if (T.Kind == Token::TK_Key || T.Kind == Token::TK_Scalar) { + // KeyValueNode eats the TK_Key. That way it can detect null keys. + CurrentEntry = new (getAllocator()) KeyValueNode(Doc); + } else if (Type == MT_Block) { + switch (T.Kind) { + case Token::TK_BlockEnd: + getNext(); + IsAtEnd = true; + CurrentEntry = 0; + break; + default: + setError("Unexpected token. Expected Key or Block End", T); + case Token::TK_Error: + IsAtEnd = true; + CurrentEntry = 0; + } + } else { + switch (T.Kind) { + case Token::TK_FlowEntry: + // Eat the flow entry and recurse. + getNext(); + return increment(); + case Token::TK_FlowMappingEnd: + getNext(); + case Token::TK_Error: + // Set this to end iterator. + IsAtEnd = true; + CurrentEntry = 0; + break; + default: + setError( "Unexpected token. Expected Key, Flow Entry, or Flow " + "Mapping End." + , T); + IsAtEnd = true; + CurrentEntry = 0; + } + } +} + +void SequenceNode::increment() { + if (failed()) { + IsAtEnd = true; + CurrentEntry = 0; + return; + } + if (CurrentEntry) + CurrentEntry->skip(); + Token T = peekNext(); + if (SeqType == ST_Block) { + switch (T.Kind) { + case Token::TK_BlockEntry: + getNext(); + CurrentEntry = parseBlockNode(); + if (CurrentEntry == 0) { // An error occurred. + IsAtEnd = true; + CurrentEntry = 0; + } + break; + case Token::TK_BlockEnd: + getNext(); + IsAtEnd = true; + CurrentEntry = 0; + break; + default: + setError( "Unexpected token. Expected Block Entry or Block End." + , T); + case Token::TK_Error: + IsAtEnd = true; + CurrentEntry = 0; + } + } else if (SeqType == ST_Indentless) { + switch (T.Kind) { + case Token::TK_BlockEntry: + getNext(); + CurrentEntry = parseBlockNode(); + if (CurrentEntry == 0) { // An error occurred. + IsAtEnd = true; + CurrentEntry = 0; + } + break; + default: + case Token::TK_Error: + IsAtEnd = true; + CurrentEntry = 0; + } + } else if (SeqType == ST_Flow) { + switch (T.Kind) { + case Token::TK_FlowEntry: + // Eat the flow entry and recurse. + getNext(); + WasPreviousTokenFlowEntry = true; + return increment(); + case Token::TK_FlowSequenceEnd: + getNext(); + case Token::TK_Error: + // Set this to end iterator. + IsAtEnd = true; + CurrentEntry = 0; + break; + case Token::TK_StreamEnd: + case Token::TK_DocumentEnd: + case Token::TK_DocumentStart: + setError("Could not find closing ]!", T); + // Set this to end iterator. + IsAtEnd = true; + CurrentEntry = 0; + break; + default: + if (!WasPreviousTokenFlowEntry) { + setError("Expected , between entries!", T); + IsAtEnd = true; + CurrentEntry = 0; + break; + } + // Otherwise it must be a flow entry. + CurrentEntry = parseBlockNode(); + if (!CurrentEntry) { + IsAtEnd = true; + } + WasPreviousTokenFlowEntry = false; + break; + } + } +} + +Document::Document(Stream &S) : stream(S), Root(0) { + if (parseDirectives()) + expectToken(Token::TK_DocumentStart); + Token &T = peekNext(); + if (T.Kind == Token::TK_DocumentStart) + getNext(); +} + +bool Document::skip() { + if (stream.scanner->failed()) + return false; + if (!Root) + getRoot(); + Root->skip(); + Token &T = peekNext(); + if (T.Kind == Token::TK_StreamEnd) + return false; + if (T.Kind == Token::TK_DocumentEnd) { + getNext(); + return skip(); + } + return true; +} + +Token &Document::peekNext() { + return stream.scanner->peekNext(); +} + +Token Document::getNext() { + return stream.scanner->getNext(); +} + +void Document::setError(const Twine &Message, Token &Location) const { + stream.scanner->setError(Message, Location.Range.begin()); +} + +bool Document::failed() const { + return stream.scanner->failed(); +} + +Node *Document::parseBlockNode() { + Token T = peekNext(); + // Handle properties. + Token AnchorInfo; +parse_property: + switch (T.Kind) { + case Token::TK_Alias: + getNext(); + return new (NodeAllocator) AliasNode(stream.CurrentDoc, T.Range.substr(1)); + case Token::TK_Anchor: + if (AnchorInfo.Kind == Token::TK_Anchor) { + setError("Already encountered an anchor for this node!", T); + return 0; + } + AnchorInfo = getNext(); // Consume TK_Anchor. + T = peekNext(); + goto parse_property; + case Token::TK_Tag: + getNext(); // Skip TK_Tag. + T = peekNext(); + goto parse_property; + default: + break; + } + + switch (T.Kind) { + case Token::TK_BlockEntry: + // We got an unindented BlockEntry sequence. This is not terminated with + // a BlockEnd. + // Don't eat the TK_BlockEntry, SequenceNode needs it. + return new (NodeAllocator) SequenceNode( stream.CurrentDoc + , AnchorInfo.Range.substr(1) + , SequenceNode::ST_Indentless); + case Token::TK_BlockSequenceStart: + getNext(); + return new (NodeAllocator) + SequenceNode( stream.CurrentDoc + , AnchorInfo.Range.substr(1) + , SequenceNode::ST_Block); + case Token::TK_BlockMappingStart: + getNext(); + return new (NodeAllocator) + MappingNode( stream.CurrentDoc + , AnchorInfo.Range.substr(1) + , MappingNode::MT_Block); + case Token::TK_FlowSequenceStart: + getNext(); + return new (NodeAllocator) + SequenceNode( stream.CurrentDoc + , AnchorInfo.Range.substr(1) + , SequenceNode::ST_Flow); + case Token::TK_FlowMappingStart: + getNext(); + return new (NodeAllocator) + MappingNode( stream.CurrentDoc + , AnchorInfo.Range.substr(1) + , MappingNode::MT_Flow); + case Token::TK_Scalar: + getNext(); + return new (NodeAllocator) + ScalarNode( stream.CurrentDoc + , AnchorInfo.Range.substr(1) + , T.Range); + case Token::TK_Key: + // Don't eat the TK_Key, KeyValueNode expects it. + return new (NodeAllocator) + MappingNode( stream.CurrentDoc + , AnchorInfo.Range.substr(1) + , MappingNode::MT_Inline); + case Token::TK_DocumentStart: + case Token::TK_DocumentEnd: + case Token::TK_StreamEnd: + default: + // TODO: Properly handle tags. "[!!str ]" should resolve to !!str "", not + // !!null null. + return new (NodeAllocator) NullNode(stream.CurrentDoc); + case Token::TK_Error: + return 0; + } + llvm_unreachable("Control flow shouldn't reach here."); + return 0; +} + +bool Document::parseDirectives() { + bool isDirective = false; + while (true) { + Token T = peekNext(); + if (T.Kind == Token::TK_TagDirective) { + handleTagDirective(getNext()); + isDirective = true; + } else if (T.Kind == Token::TK_VersionDirective) { + stream.handleYAMLDirective(getNext()); + isDirective = true; + } else + break; + } + return isDirective; +} + +bool Document::expectToken(int TK) { + Token T = getNext(); + if (T.Kind != TK) { + setError("Unexpected token", T); + return false; + } + return true; +} + +OwningPtr<Document> document_iterator::NullDoc; diff --git a/lib/Support/raw_ostream.cpp b/lib/Support/raw_ostream.cpp index 72d3986..86cdca1 100644 --- a/lib/Support/raw_ostream.cpp +++ b/lib/Support/raw_ostream.cpp @@ -633,6 +633,19 @@ raw_ostream &raw_fd_ostream::resetColor() { return *this; } +raw_ostream &raw_fd_ostream::reverseColor() { + if (sys::Process::ColorNeedsFlush()) + flush(); + const char *colorcode = sys::Process::OutputReverse(); + if (colorcode) { + size_t len = strlen(colorcode); + write(colorcode, len); + // don't account colors towards output characters + pos -= len; + } + return *this; +} + bool raw_fd_ostream::is_displayed() const { return sys::Process::FileDescriptorIsDisplayed(FD); } diff --git a/lib/TableGen/Error.cpp b/lib/TableGen/Error.cpp index 5071ee7..1463b68 100644 --- a/lib/TableGen/Error.cpp +++ b/lib/TableGen/Error.cpp @@ -20,6 +20,22 @@ namespace llvm { SourceMgr SrcMgr; +void PrintWarning(SMLoc WarningLoc, const Twine &Msg) { + SrcMgr.PrintMessage(WarningLoc, SourceMgr::DK_Warning, Msg); +} + +void PrintWarning(const char *Loc, const Twine &Msg) { + SrcMgr.PrintMessage(SMLoc::getFromPointer(Loc), SourceMgr::DK_Warning, Msg); +} + +void PrintWarning(const Twine &Msg) { + errs() << "warning:" << Msg << "\n"; +} + +void PrintWarning(const TGError &Warning) { + PrintWarning(Warning.getLoc(), Warning.getMessage()); +} + void PrintError(SMLoc ErrorLoc, const Twine &Msg) { SrcMgr.PrintMessage(ErrorLoc, SourceMgr::DK_Error, Msg); } diff --git a/lib/Target/ARM/ARM.td b/lib/Target/ARM/ARM.td index b05fe62..9b0cb0c 100644 --- a/lib/Target/ARM/ARM.td +++ b/lib/Target/ARM/ARM.td @@ -38,9 +38,6 @@ def FeatureVFP4 : SubtargetFeature<"vfp4", "HasVFPv4", "true", def FeatureNEON : SubtargetFeature<"neon", "HasNEON", "true", "Enable NEON instructions", [FeatureVFP3]>; -def FeatureNEON2 : SubtargetFeature<"neon2", "HasNEON2", "true", - "Enable Advanced SIMD2 instructions", - [FeatureNEON]>; def FeatureThumb2 : SubtargetFeature<"thumb2", "HasThumb2", "true", "Enable Thumb2 instructions">; def FeatureNoARM : SubtargetFeature<"noarm", "NoARM", "true", @@ -76,8 +73,6 @@ def FeatureVMLxForwarding : SubtargetFeature<"vmlx-forwarding", def FeatureNEONForFP : SubtargetFeature<"neonfp", "UseNEONForSinglePrecisionFP", "true", "Use NEON for single precision FP">; -// Allow more precision in FP computation -def FPContractions : Predicate<"!TM.Options.NoExcessFPPrecision">; // Disable 32-bit to 16-bit narrowing for experimentation. def FeaturePref32BitThumb : SubtargetFeature<"32bit", "Pref32BitThumb", "true", diff --git a/lib/Target/ARM/ARMAsmPrinter.cpp b/lib/Target/ARM/ARMAsmPrinter.cpp index ca30716..410790a 100644 --- a/lib/Target/ARM/ARMAsmPrinter.cpp +++ b/lib/Target/ARM/ARMAsmPrinter.cpp @@ -16,7 +16,6 @@ #include "ARMAsmPrinter.h" #include "ARM.h" #include "ARMBuildAttrs.h" -#include "ARMBaseRegisterInfo.h" #include "ARMConstantPoolValue.h" #include "ARMMachineFunctionInfo.h" #include "ARMTargetMachine.h" @@ -35,7 +34,6 @@ #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCAssembler.h" #include "llvm/MC/MCContext.h" -#include "llvm/MC/MCExpr.h" #include "llvm/MC/MCInst.h" #include "llvm/MC/MCSectionMachO.h" #include "llvm/MC/MCObjectStreamer.h" @@ -44,8 +42,6 @@ #include "llvm/Target/Mangler.h" #include "llvm/Target/TargetData.h" #include "llvm/Target/TargetMachine.h" -#include "llvm/Target/TargetOptions.h" -#include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallString.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" @@ -732,8 +728,9 @@ void ARMAsmPrinter::emitAttributes() { if (Subtarget->hasNEON() && emitFPU) { /* NEON is not exactly a VFP architecture, but GAS emit one of * neon/neon-vfpv4/vfpv3/vfpv2 for .fpu parameters */ - if (Subtarget->hasNEON2()) - AttrEmitter->EmitTextAttribute(ARMBuildAttrs::Advanced_SIMD_arch, "neon-vfpv4"); + if (Subtarget->hasVFP4()) + AttrEmitter->EmitTextAttribute(ARMBuildAttrs::Advanced_SIMD_arch, + "neon-vfpv4"); else AttrEmitter->EmitTextAttribute(ARMBuildAttrs::Advanced_SIMD_arch, "neon"); /* If emitted for NEON, omit from VFP below, since you can have both @@ -1270,7 +1267,6 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) { } // Darwin call instructions are just normal call instructions with different // clobber semantics (they clobber R9). - case ARM::BXr9_CALL: case ARM::BX_CALL: { { MCInst TmpInst; @@ -1292,7 +1288,6 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) { } return; } - case ARM::tBXr9_CALL: case ARM::tBX_CALL: { { MCInst TmpInst; @@ -1315,7 +1310,6 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) { } return; } - case ARM::BMOVPCRXr9_CALL: case ARM::BMOVPCRX_CALL: { { MCInst TmpInst; @@ -1343,7 +1337,6 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) { } return; } - case ARM::BMOVPCBr9_CALL: case ARM::BMOVPCB_CALL: { { MCInst TmpInst; @@ -1371,7 +1364,6 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) { } return; } - case ARM::t2BMOVPCBr9_CALL: case ARM::t2BMOVPCB_CALL: { { MCInst TmpInst; @@ -1984,10 +1976,10 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) { } { MCInst TmpInst; - TmpInst.setOpcode(ARM::tLDRr); + TmpInst.setOpcode(ARM::tLDRi); TmpInst.addOperand(MCOperand::CreateReg(ARM::R7)); TmpInst.addOperand(MCOperand::CreateReg(SrcReg)); - TmpInst.addOperand(MCOperand::CreateReg(0)); + TmpInst.addOperand(MCOperand::CreateImm(0)); // Predicate. TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL)); TmpInst.addOperand(MCOperand::CreateReg(0)); diff --git a/lib/Target/ARM/ARMAsmPrinter.h b/lib/Target/ARM/ARMAsmPrinter.h index 4b276c5..af3f75a 100644 --- a/lib/Target/ARM/ARMAsmPrinter.h +++ b/lib/Target/ARM/ARMAsmPrinter.h @@ -107,7 +107,7 @@ public: if (!Subtarget->isTargetDarwin()) return 0; return Subtarget->isThumb() ? - llvm::ARM::DW_ISA_ARM_thumb : llvm::ARM::DW_ISA_ARM_arm; + ARM::DW_ISA_ARM_thumb : ARM::DW_ISA_ARM_arm; } MCOperand GetSymbolRef(const MachineOperand &MO, const MCSymbol *Symbol); diff --git a/lib/Target/ARM/ARMBaseInstrInfo.cpp b/lib/Target/ARM/ARMBaseInstrInfo.cpp index 366e2fa..c6280f8 100644 --- a/lib/Target/ARM/ARMBaseInstrInfo.cpp +++ b/lib/Target/ARM/ARMBaseInstrInfo.cpp @@ -13,10 +13,10 @@ #include "ARMBaseInstrInfo.h" #include "ARM.h" +#include "ARMBaseRegisterInfo.h" #include "ARMConstantPoolValue.h" #include "ARMHazardRecognizer.h" #include "ARMMachineFunctionInfo.h" -#include "ARMRegisterInfo.h" #include "MCTargetDesc/ARMAddressingModes.h" #include "llvm/Constants.h" #include "llvm/Function.h" @@ -680,29 +680,51 @@ void ARMBaseInstrInfo::copyPhysReg(MachineBasicBlock &MBB, return; } - // Generate instructions for VMOVQQ and VMOVQQQQ pseudos in place. - if (ARM::QQPRRegClass.contains(DestReg, SrcReg) || - ARM::QQQQPRRegClass.contains(DestReg, SrcReg)) { + // Handle register classes that require multiple instructions. + unsigned BeginIdx = 0; + unsigned SubRegs = 0; + unsigned Spacing = 1; + + // Use VORRq when possible. + if (ARM::QQPRRegClass.contains(DestReg, SrcReg)) + Opc = ARM::VORRq, BeginIdx = ARM::qsub_0, SubRegs = 2; + else if (ARM::QQQQPRRegClass.contains(DestReg, SrcReg)) + Opc = ARM::VORRq, BeginIdx = ARM::qsub_0, SubRegs = 4; + // Fall back to VMOVD. + else if (ARM::DPairRegClass.contains(DestReg, SrcReg)) + Opc = ARM::VMOVD, BeginIdx = ARM::dsub_0, SubRegs = 2; + else if (ARM::DTripleRegClass.contains(DestReg, SrcReg)) + Opc = ARM::VMOVD, BeginIdx = ARM::dsub_0, SubRegs = 3; + else if (ARM::DQuadRegClass.contains(DestReg, SrcReg)) + Opc = ARM::VMOVD, BeginIdx = ARM::dsub_0, SubRegs = 4; + + else if (ARM::DPairSpcRegClass.contains(DestReg, SrcReg)) + Opc = ARM::VMOVD, BeginIdx = ARM::dsub_0, SubRegs = 2, Spacing = 2; + else if (ARM::DTripleSpcRegClass.contains(DestReg, SrcReg)) + Opc = ARM::VMOVD, BeginIdx = ARM::dsub_0, SubRegs = 3, Spacing = 2; + else if (ARM::DQuadSpcRegClass.contains(DestReg, SrcReg)) + Opc = ARM::VMOVD, BeginIdx = ARM::dsub_0, SubRegs = 4, Spacing = 2; + + if (Opc) { const TargetRegisterInfo *TRI = &getRegisterInfo(); - assert(ARM::qsub_0 + 3 == ARM::qsub_3 && "Expected contiguous enum."); - unsigned EndSubReg = ARM::QQPRRegClass.contains(DestReg, SrcReg) ? - ARM::qsub_1 : ARM::qsub_3; - for (unsigned i = ARM::qsub_0, e = EndSubReg + 1; i != e; ++i) { - unsigned Dst = TRI->getSubReg(DestReg, i); - unsigned Src = TRI->getSubReg(SrcReg, i); - MachineInstrBuilder Mov = - AddDefaultPred(BuildMI(MBB, I, I->getDebugLoc(), get(ARM::VORRq)) - .addReg(Dst, RegState::Define) - .addReg(Src, getKillRegState(KillSrc)) - .addReg(Src, getKillRegState(KillSrc))); - if (i == EndSubReg) { - Mov->addRegisterDefined(DestReg, TRI); - if (KillSrc) - Mov->addRegisterKilled(SrcReg, TRI); - } + MachineInstrBuilder Mov; + for (unsigned i = 0; i != SubRegs; ++i) { + unsigned Dst = TRI->getSubReg(DestReg, BeginIdx + i*Spacing); + unsigned Src = TRI->getSubReg(SrcReg, BeginIdx + i*Spacing); + assert(Dst && Src && "Bad sub-register"); + Mov = AddDefaultPred(BuildMI(MBB, I, I->getDebugLoc(), get(Opc), Dst) + .addReg(Src)); + // VORR takes two source operands. + if (Opc == ARM::VORRq) + Mov.addReg(Src); } + // Add implicit super-register defs and kills to the last instruction. + Mov->addRegisterDefined(DestReg, TRI); + if (KillSrc) + Mov->addRegisterKilled(SrcReg, TRI); return; } + llvm_unreachable("Impossible reg-to-reg copy"); } @@ -757,7 +779,7 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, llvm_unreachable("Unknown reg class!"); break; case 16: - if (ARM::QPRRegClass.hasSubClassEq(RC)) { + if (ARM::DPairRegClass.hasSubClassEq(RC)) { // Use aligned spills if the stack can be realigned. if (Align >= 16 && getRegisterInfo().canRealignStack(MF)) { AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VST1q64)) @@ -907,7 +929,7 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, llvm_unreachable("Unknown reg class!"); break; case 16: - if (ARM::QPRRegClass.hasSubClassEq(RC)) { + if (ARM::DPairRegClass.hasSubClassEq(RC)) { if (Align >= 16 && getRegisterInfo().canRealignStack(MF)) { AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLD1q64), DestReg) .addFrameIndex(FI).addImm(16) @@ -1478,6 +1500,29 @@ int llvm::getMatchingCondBranchOpcode(int Opc) { llvm_unreachable("Unknown unconditional branch opcode!"); } +/// commuteInstruction - Handle commutable instructions. +MachineInstr * +ARMBaseInstrInfo::commuteInstruction(MachineInstr *MI, bool NewMI) const { + switch (MI->getOpcode()) { + case ARM::MOVCCr: + case ARM::t2MOVCCr: { + // MOVCC can be commuted by inverting the condition. + unsigned PredReg = 0; + ARMCC::CondCodes CC = getInstrPredicate(MI, PredReg); + // MOVCC AL can't be inverted. Shouldn't happen. + if (CC == ARMCC::AL || PredReg != ARM::CPSR) + return NULL; + MI = TargetInstrInfoImpl::commuteInstruction(MI, NewMI); + if (!MI) + return NULL; + // After swapping the MOVCC operands, also invert the condition. + MI->getOperand(MI->findFirstPredOperandIdx()) + .setImm(ARMCC::getOppositeCondition(CC)); + return MI; + } + } + return TargetInstrInfoImpl::commuteInstruction(MI, NewMI); +} /// Map pseudo instructions that imply an 'S' bit onto real opcodes. Whether the /// instruction is encoded with an 'S' bit is determined by the optional CPSR @@ -1916,6 +1961,25 @@ bool ARMBaseInstrInfo::FoldImmediate(MachineInstr *UseMI, if (!MRI->hasOneNonDBGUse(Reg)) return false; + const MCInstrDesc &DefMCID = DefMI->getDesc(); + if (DefMCID.hasOptionalDef()) { + unsigned NumOps = DefMCID.getNumOperands(); + const MachineOperand &MO = DefMI->getOperand(NumOps-1); + if (MO.getReg() == ARM::CPSR && !MO.isDead()) + // If DefMI defines CPSR and it is not dead, it's obviously not safe + // to delete DefMI. + return false; + } + + const MCInstrDesc &UseMCID = UseMI->getDesc(); + if (UseMCID.hasOptionalDef()) { + unsigned NumOps = UseMCID.getNumOperands(); + if (UseMI->getOperand(NumOps-1).getReg() == ARM::CPSR) + // If the instruction sets the flag, do not attempt this optimization + // since it may change the semantics of the code. + return false; + } + unsigned UseOpc = UseMI->getOpcode(); unsigned NewUseOpc = 0; uint32_t ImmVal = (uint32_t)DefMI->getOperand(1).getImm(); diff --git a/lib/Target/ARM/ARMBaseInstrInfo.h b/lib/Target/ARM/ARMBaseInstrInfo.h index 314e317..2fe8507 100644 --- a/lib/Target/ARM/ARMBaseInstrInfo.h +++ b/lib/Target/ARM/ARMBaseInstrInfo.h @@ -139,6 +139,8 @@ public: MachineInstr *duplicate(MachineInstr *Orig, MachineFunction &MF) const; + MachineInstr *commuteInstruction(MachineInstr*, bool=false) const; + virtual bool produceSameValue(const MachineInstr *MI0, const MachineInstr *MI1, const MachineRegisterInfo *MRI) const; diff --git a/lib/Target/ARM/ARMBaseRegisterInfo.cpp b/lib/Target/ARM/ARMBaseRegisterInfo.cpp index 291369f..3907f75 100644 --- a/lib/Target/ARM/ARMBaseRegisterInfo.cpp +++ b/lib/Target/ARM/ARMBaseRegisterInfo.cpp @@ -15,7 +15,6 @@ #include "ARM.h" #include "ARMBaseInstrInfo.h" #include "ARMFrameLowering.h" -#include "ARMInstrInfo.h" #include "ARMMachineFunctionInfo.h" #include "ARMSubtarget.h" #include "MCTargetDesc/ARMAddressingModes.h" diff --git a/lib/Target/ARM/ARMCallingConv.h b/lib/Target/ARM/ARMCallingConv.h index 2b9c55d..0bd1c3e 100644 --- a/lib/Target/ARM/ARMCallingConv.h +++ b/lib/Target/ARM/ARMCallingConv.h @@ -17,7 +17,6 @@ #include "ARM.h" #include "ARMBaseInstrInfo.h" -#include "ARMRegisterInfo.h" #include "ARMSubtarget.h" #include "llvm/CallingConv.h" #include "llvm/CodeGen/CallingConvLower.h" diff --git a/lib/Target/ARM/ARMCallingConv.td b/lib/Target/ARM/ARMCallingConv.td index d33364b..b9a2512 100644 --- a/lib/Target/ARM/ARMCallingConv.td +++ b/lib/Target/ARM/ARMCallingConv.td @@ -9,10 +9,6 @@ // This describes the calling conventions for ARM architecture. //===----------------------------------------------------------------------===// -/// CCIfSubtarget - Match if the current subtarget has a feature F. -class CCIfSubtarget<string F, CCAction A>: - CCIf<!strconcat("State.getTarget().getSubtarget<ARMSubtarget>().", F), A>; - /// CCIfAlign - Match of the original alignment of the arg class CCIfAlign<string Align, CCAction A>: CCIf<!strconcat("ArgFlags.getOrigAlign() == ", Align), A>; diff --git a/lib/Target/ARM/ARMCodeEmitter.cpp b/lib/Target/ARM/ARMCodeEmitter.cpp index e48d07a..bc681be 100644 --- a/lib/Target/ARM/ARMCodeEmitter.cpp +++ b/lib/Target/ARM/ARMCodeEmitter.cpp @@ -15,7 +15,7 @@ #define DEBUG_TYPE "jit" #include "ARM.h" #include "ARMConstantPoolValue.h" -#include "ARMInstrInfo.h" +#include "ARMBaseInstrInfo.h" #include "ARMRelocations.h" #include "ARMSubtarget.h" #include "ARMTargetMachine.h" @@ -46,7 +46,7 @@ namespace { class ARMCodeEmitter : public MachineFunctionPass { ARMJITInfo *JTI; - const ARMInstrInfo *II; + const ARMBaseInstrInfo *II; const TargetData *TD; const ARMSubtarget *Subtarget; TargetMachine &TM; @@ -66,7 +66,7 @@ namespace { public: ARMCodeEmitter(TargetMachine &tm, JITCodeEmitter &mce) : MachineFunctionPass(ID), JTI(0), - II((const ARMInstrInfo *)tm.getInstrInfo()), + II((const ARMBaseInstrInfo *)tm.getInstrInfo()), TD(tm.getTargetData()), TM(tm), MCE(mce), MCPEs(0), MJTEs(0), IsPIC(TM.getRelocationModel() == Reloc::PIC_), IsThumb(false) {} @@ -383,9 +383,9 @@ bool ARMCodeEmitter::runOnMachineFunction(MachineFunction &MF) { assert((MF.getTarget().getRelocationModel() != Reloc::Default || MF.getTarget().getRelocationModel() != Reloc::Static) && "JIT relocation model must be set to static or default!"); - JTI = ((ARMTargetMachine &)MF.getTarget()).getJITInfo(); - II = ((const ARMTargetMachine &)MF.getTarget()).getInstrInfo(); - TD = ((const ARMTargetMachine &)MF.getTarget()).getTargetData(); + JTI = ((ARMBaseTargetMachine &)MF.getTarget()).getJITInfo(); + II = (const ARMBaseInstrInfo *)MF.getTarget().getInstrInfo(); + TD = MF.getTarget().getTargetData(); Subtarget = &TM.getSubtarget<ARMSubtarget>(); MCPEs = &MF.getConstantPool()->getConstants(); MJTEs = 0; @@ -917,9 +917,7 @@ void ARMCodeEmitter::emitPseudoInstruction(const MachineInstr &MI) { emitMiscBranchInstruction(MI); break; case ARM::BX_CALL: - case ARM::BMOVPCRX_CALL: - case ARM::BXr9_CALL: - case ARM::BMOVPCRXr9_CALL: { + case ARM::BMOVPCRX_CALL: { // First emit mov lr, pc unsigned Binary = 0x01a0e00f; Binary |= II->getPredicate(&MI) << ARMII::CondShift; diff --git a/lib/Target/ARM/ARMConstantIslandPass.cpp b/lib/Target/ARM/ARMConstantIslandPass.cpp index 2cdfd1e..fc35c7c 100644 --- a/lib/Target/ARM/ARMConstantIslandPass.cpp +++ b/lib/Target/ARM/ARMConstantIslandPass.cpp @@ -16,12 +16,12 @@ #define DEBUG_TYPE "arm-cp-islands" #include "ARM.h" #include "ARMMachineFunctionInfo.h" -#include "ARMInstrInfo.h" #include "Thumb2InstrInfo.h" #include "MCTargetDesc/ARMAddressingModes.h" #include "llvm/CodeGen/MachineConstantPool.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineJumpTableInfo.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/Target/TargetData.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Support/Debug.h" @@ -209,8 +209,9 @@ namespace { } /// getMaxDisp - Returns the maximum displacement supported by MI. /// Correct for unknown alignment. + /// Conservatively subtract 2 bytes to handle weird alignment effects. unsigned getMaxDisp() const { - return KnownAlignment ? MaxDisp : MaxDisp - 2; + return (KnownAlignment ? MaxDisp : MaxDisp - 2) - 2; } }; @@ -266,7 +267,7 @@ namespace { MachineFunction *MF; MachineConstantPool *MCP; - const ARMInstrInfo *TII; + const ARMBaseInstrInfo *TII; const ARMSubtarget *STI; ARMFunctionInfo *AFI; bool isThumb; @@ -283,51 +284,52 @@ namespace { } private: - void DoInitialPlacement(std::vector<MachineInstr*> &CPEMIs); + void doInitialPlacement(std::vector<MachineInstr*> &CPEMIs); CPEntry *findConstPoolEntry(unsigned CPI, const MachineInstr *CPEMI); unsigned getCPELogAlign(const MachineInstr *CPEMI); - void JumpTableFunctionScan(); - void InitialFunctionScan(const std::vector<MachineInstr*> &CPEMIs); - MachineBasicBlock *SplitBlockBeforeInstr(MachineInstr *MI); - void UpdateForInsertedWaterBlock(MachineBasicBlock *NewBB); - void AdjustBBOffsetsAfter(MachineBasicBlock *BB); - bool DecrementOldEntry(unsigned CPI, MachineInstr* CPEMI); - int LookForExistingCPEntry(CPUser& U, unsigned UserOffset); - bool LookForWater(CPUser&U, unsigned UserOffset, water_iterator &WaterIter); - void CreateNewWater(unsigned CPUserIndex, unsigned UserOffset, + void scanFunctionJumpTables(); + void initializeFunctionInfo(const std::vector<MachineInstr*> &CPEMIs); + MachineBasicBlock *splitBlockBeforeInstr(MachineInstr *MI); + void updateForInsertedWaterBlock(MachineBasicBlock *NewBB); + void adjustBBOffsetsAfter(MachineBasicBlock *BB); + bool decrementCPEReferenceCount(unsigned CPI, MachineInstr* CPEMI); + int findInRangeCPEntry(CPUser& U, unsigned UserOffset); + bool findAvailableWater(CPUser&U, unsigned UserOffset, + water_iterator &WaterIter); + void createNewWater(unsigned CPUserIndex, unsigned UserOffset, MachineBasicBlock *&NewMBB); - bool HandleConstantPoolUser(unsigned CPUserIndex); - void RemoveDeadCPEMI(MachineInstr *CPEMI); - bool RemoveUnusedCPEntries(); - bool CPEIsInRange(MachineInstr *MI, unsigned UserOffset, - MachineInstr *CPEMI, unsigned Disp, bool NegOk, - bool DoDump = false); - bool WaterIsInRange(unsigned UserOffset, MachineBasicBlock *Water, + bool handleConstantPoolUser(unsigned CPUserIndex); + void removeDeadCPEMI(MachineInstr *CPEMI); + bool removeUnusedCPEntries(); + bool isCPEntryInRange(MachineInstr *MI, unsigned UserOffset, + MachineInstr *CPEMI, unsigned Disp, bool NegOk, + bool DoDump = false); + bool isWaterInRange(unsigned UserOffset, MachineBasicBlock *Water, CPUser &U, unsigned &Growth); - bool BBIsInRange(MachineInstr *MI, MachineBasicBlock *BB, unsigned Disp); - bool FixUpImmediateBr(ImmBranch &Br); - bool FixUpConditionalBr(ImmBranch &Br); - bool FixUpUnconditionalBr(ImmBranch &Br); - bool UndoLRSpillRestore(); + bool isBBInRange(MachineInstr *MI, MachineBasicBlock *BB, unsigned Disp); + bool fixupImmediateBr(ImmBranch &Br); + bool fixupConditionalBr(ImmBranch &Br); + bool fixupUnconditionalBr(ImmBranch &Br); + bool undoLRSpillRestore(); bool mayOptimizeThumb2Instruction(const MachineInstr *MI) const; - bool OptimizeThumb2Instructions(); - bool OptimizeThumb2Branches(); - bool ReorderThumb2JumpTables(); - bool OptimizeThumb2JumpTables(); - MachineBasicBlock *AdjustJTTargetBlockForward(MachineBasicBlock *BB, + bool optimizeThumb2Instructions(); + bool optimizeThumb2Branches(); + bool reorderThumb2JumpTables(); + bool optimizeThumb2JumpTables(); + MachineBasicBlock *adjustJTTargetBlockForward(MachineBasicBlock *BB, MachineBasicBlock *JTBB); - void ComputeBlockSize(MachineBasicBlock *MBB); - unsigned GetOffsetOf(MachineInstr *MI) const; - unsigned GetUserOffset(CPUser&) const; + void computeBlockSize(MachineBasicBlock *MBB); + unsigned getOffsetOf(MachineInstr *MI) const; + unsigned getUserOffset(CPUser&) const; void dumpBBs(); void verify(); - bool OffsetIsInRange(unsigned UserOffset, unsigned TrialOffset, + bool isOffsetInRange(unsigned UserOffset, unsigned TrialOffset, unsigned Disp, bool NegativeOK, bool IsSoImm = false); - bool OffsetIsInRange(unsigned UserOffset, unsigned TrialOffset, + bool isOffsetInRange(unsigned UserOffset, unsigned TrialOffset, const CPUser &U) { - return OffsetIsInRange(UserOffset, TrialOffset, + return isOffsetInRange(UserOffset, TrialOffset, U.getMaxDisp(), U.NegOk, U.IsSoImm); } }; @@ -345,11 +347,21 @@ void ARMConstantIslands::verify() { assert(BBInfo[MBBId].Offset % (1u << Align) == 0); assert(!MBBId || BBInfo[MBBId - 1].postOffset() <= BBInfo[MBBId].Offset); } + DEBUG(dbgs() << "Verifying " << CPUsers.size() << " CP users.\n"); for (unsigned i = 0, e = CPUsers.size(); i != e; ++i) { CPUser &U = CPUsers[i]; - unsigned UserOffset = GetUserOffset(U); - assert(CPEIsInRange(U.MI, UserOffset, U.CPEMI, U.getMaxDisp(), U.NegOk) && - "Constant pool entry out of range!"); + unsigned UserOffset = getUserOffset(U); + // Verify offset using the real max displacement without the safety + // adjustment. + if (isCPEntryInRange(U.MI, UserOffset, U.CPEMI, U.getMaxDisp()+2, U.NegOk, + /* DoDump = */ true)) { + DEBUG(dbgs() << "OK\n"); + continue; + } + DEBUG(dbgs() << "Out of range.\n"); + dumpBBs(); + DEBUG(MF->dump()); + llvm_unreachable("Constant pool entry out of range!"); } #endif } @@ -382,7 +394,7 @@ bool ARMConstantIslands::runOnMachineFunction(MachineFunction &mf) { << MCP->getConstants().size() << " CP entries, aligned to " << MCP->getConstantPoolAlignment() << " bytes *****\n"); - TII = (const ARMInstrInfo*)MF->getTarget().getInstrInfo(); + TII = (const ARMBaseInstrInfo*)MF->getTarget().getInstrInfo(); AFI = MF->getInfo<ARMFunctionInfo>(); STI = &MF->getTarget().getSubtarget<ARMSubtarget>(); @@ -392,6 +404,9 @@ bool ARMConstantIslands::runOnMachineFunction(MachineFunction &mf) { HasFarJump = false; + // This pass invalidates liveness information when it splits basic blocks. + MF->getRegInfo().invalidateLiveness(); + // Renumber all of the machine basic blocks in the function, guaranteeing that // the numbers agree with the position of the block in the function. MF->RenumberBlocks(); @@ -400,8 +415,8 @@ bool ARMConstantIslands::runOnMachineFunction(MachineFunction &mf) { // of the TB[BH] instructions. bool MadeChange = false; if (isThumb2 && AdjustJumpTableBlocks) { - JumpTableFunctionScan(); - MadeChange |= ReorderThumb2JumpTables(); + scanFunctionJumpTables(); + MadeChange |= reorderThumb2JumpTables(); // Data is out of date, so clear it. It'll be re-computed later. T2JumpTables.clear(); // Blocks may have shifted around. Keep the numbering up to date. @@ -419,7 +434,7 @@ bool ARMConstantIslands::runOnMachineFunction(MachineFunction &mf) { // we put them all at the end of the function. std::vector<MachineInstr*> CPEMIs; if (!MCP->isEmpty()) - DoInitialPlacement(CPEMIs); + doInitialPlacement(CPEMIs); /// The next UID to take is the first unused one. AFI->initPICLabelUId(CPEMIs.size()); @@ -427,13 +442,13 @@ bool ARMConstantIslands::runOnMachineFunction(MachineFunction &mf) { // Do the initial scan of the function, building up information about the // sizes of each block, the location of all the water, and finding all of the // constant pool users. - InitialFunctionScan(CPEMIs); + initializeFunctionInfo(CPEMIs); CPEMIs.clear(); DEBUG(dumpBBs()); /// Remove dead constant pool entries. - MadeChange |= RemoveUnusedCPEntries(); + MadeChange |= removeUnusedCPEntries(); // Iteratively place constant pool entries and fix up branches until there // is no change. @@ -442,7 +457,7 @@ bool ARMConstantIslands::runOnMachineFunction(MachineFunction &mf) { DEBUG(dbgs() << "Beginning CP iteration #" << NoCPIters << '\n'); bool CPChange = false; for (unsigned i = 0, e = CPUsers.size(); i != e; ++i) - CPChange |= HandleConstantPoolUser(i); + CPChange |= handleConstantPoolUser(i); if (CPChange && ++NoCPIters > 30) report_fatal_error("Constant Island pass failed to converge!"); DEBUG(dumpBBs()); @@ -454,7 +469,7 @@ bool ARMConstantIslands::runOnMachineFunction(MachineFunction &mf) { DEBUG(dbgs() << "Beginning BR iteration #" << NoBRIters << '\n'); bool BRChange = false; for (unsigned i = 0, e = ImmBranches.size(); i != e; ++i) - BRChange |= FixUpImmediateBr(ImmBranches[i]); + BRChange |= fixupImmediateBr(ImmBranches[i]); if (BRChange && ++NoBRIters > 30) report_fatal_error("Branch Fix Up pass failed to converge!"); DEBUG(dumpBBs()); @@ -466,7 +481,7 @@ bool ARMConstantIslands::runOnMachineFunction(MachineFunction &mf) { // Shrink 32-bit Thumb2 branch, load, and store instructions. if (isThumb2 && !STI->prefers32BitThumb()) - MadeChange |= OptimizeThumb2Instructions(); + MadeChange |= optimizeThumb2Instructions(); // After a while, this might be made debug-only, but it is not expensive. verify(); @@ -474,7 +489,7 @@ bool ARMConstantIslands::runOnMachineFunction(MachineFunction &mf) { // If LR has been forced spilled and no far jump (i.e. BL) has been issued, // undo the spill / restore of LR if possible. if (isThumb && !HasFarJump && AFI->isLRSpilledForFarJump()) - MadeChange |= UndoLRSpillRestore(); + MadeChange |= undoLRSpillRestore(); // Save the mapping between original and cloned constpool entries. for (unsigned i = 0, e = CPEntries.size(); i != e; ++i) { @@ -497,10 +512,10 @@ bool ARMConstantIslands::runOnMachineFunction(MachineFunction &mf) { return MadeChange; } -/// DoInitialPlacement - Perform the initial placement of the constant pool +/// doInitialPlacement - Perform the initial placement of the constant pool /// entries. To start with, we put them all at the end of the function. void -ARMConstantIslands::DoInitialPlacement(std::vector<MachineInstr*> &CPEMIs) { +ARMConstantIslands::doInitialPlacement(std::vector<MachineInstr*> &CPEMIs) { // Create the basic block to hold the CPE's. MachineBasicBlock *BB = MF->CreateMachineBasicBlock(); MF->push_back(BB); @@ -610,10 +625,10 @@ unsigned ARMConstantIslands::getCPELogAlign(const MachineInstr *CPEMI) { return Log2_32(Align); } -/// JumpTableFunctionScan - Do a scan of the function, building up +/// scanFunctionJumpTables - Do a scan of the function, building up /// information about the sizes of each block and the locations of all /// the jump tables. -void ARMConstantIslands::JumpTableFunctionScan() { +void ARMConstantIslands::scanFunctionJumpTables() { for (MachineFunction::iterator MBBI = MF->begin(), E = MF->end(); MBBI != E; ++MBBI) { MachineBasicBlock &MBB = *MBBI; @@ -625,11 +640,11 @@ void ARMConstantIslands::JumpTableFunctionScan() { } } -/// InitialFunctionScan - Do the initial scan of the function, building up +/// initializeFunctionInfo - Do the initial scan of the function, building up /// information about the sizes of each block, the location of all the water, /// and finding all of the constant pool users. void ARMConstantIslands:: -InitialFunctionScan(const std::vector<MachineInstr*> &CPEMIs) { +initializeFunctionInfo(const std::vector<MachineInstr*> &CPEMIs) { BBInfo.clear(); BBInfo.resize(MF->getNumBlockIDs()); @@ -638,14 +653,14 @@ InitialFunctionScan(const std::vector<MachineInstr*> &CPEMIs) { // alignment assumptions, as we don't know for sure the size of any // instructions in the inline assembly. for (MachineFunction::iterator I = MF->begin(), E = MF->end(); I != E; ++I) - ComputeBlockSize(I); + computeBlockSize(I); // The known bits of the entry block offset are determined by the function // alignment. BBInfo.front().KnownBits = MF->getAlignment(); // Compute block offsets and known bits. - AdjustBBOffsetsAfter(MF->begin()); + adjustBBOffsetsAfter(MF->begin()); // Now go back through the instructions and build up our data structures. for (MachineFunction::iterator MBBI = MF->begin(), E = MF->end(); @@ -790,9 +805,9 @@ InitialFunctionScan(const std::vector<MachineInstr*> &CPEMIs) { } } -/// ComputeBlockSize - Compute the size and some alignment information for MBB. +/// computeBlockSize - Compute the size and some alignment information for MBB. /// This function updates BBInfo directly. -void ARMConstantIslands::ComputeBlockSize(MachineBasicBlock *MBB) { +void ARMConstantIslands::computeBlockSize(MachineBasicBlock *MBB) { BasicBlockInfo &BBI = BBInfo[MBB->getNumber()]; BBI.Size = 0; BBI.Unalign = 0; @@ -817,10 +832,10 @@ void ARMConstantIslands::ComputeBlockSize(MachineBasicBlock *MBB) { } } -/// GetOffsetOf - Return the current offset of the specified machine instruction +/// getOffsetOf - Return the current offset of the specified machine instruction /// from the start of the function. This offset changes as stuff is moved /// around inside the function. -unsigned ARMConstantIslands::GetOffsetOf(MachineInstr *MI) const { +unsigned ARMConstantIslands::getOffsetOf(MachineInstr *MI) const { MachineBasicBlock *MBB = MI->getParent(); // The offset is composed of two things: the sum of the sizes of all MBB's @@ -843,10 +858,10 @@ static bool CompareMBBNumbers(const MachineBasicBlock *LHS, return LHS->getNumber() < RHS->getNumber(); } -/// UpdateForInsertedWaterBlock - When a block is newly inserted into the +/// updateForInsertedWaterBlock - When a block is newly inserted into the /// machine function, it upsets all of the block numbers. Renumber the blocks /// and update the arrays that parallel this numbering. -void ARMConstantIslands::UpdateForInsertedWaterBlock(MachineBasicBlock *NewBB) { +void ARMConstantIslands::updateForInsertedWaterBlock(MachineBasicBlock *NewBB) { // Renumber the MBB's to keep them consecutive. NewBB->getParent()->RenumberBlocks(NewBB); @@ -866,7 +881,7 @@ void ARMConstantIslands::UpdateForInsertedWaterBlock(MachineBasicBlock *NewBB) { /// Split the basic block containing MI into two blocks, which are joined by /// an unconditional branch. Update data structures and renumber blocks to /// account for this change and returns the newly created block. -MachineBasicBlock *ARMConstantIslands::SplitBlockBeforeInstr(MachineInstr *MI) { +MachineBasicBlock *ARMConstantIslands::splitBlockBeforeInstr(MachineInstr *MI) { MachineBasicBlock *OrigBB = MI->getParent(); // Create a new MBB for the code after the OrigBB. @@ -897,7 +912,7 @@ MachineBasicBlock *ARMConstantIslands::SplitBlockBeforeInstr(MachineInstr *MI) { OrigBB->addSuccessor(NewBB); // Update internal data structures to account for the newly inserted MBB. - // This is almost the same as UpdateForInsertedWaterBlock, except that + // This is almost the same as updateForInsertedWaterBlock, except that // the Water goes after OrigBB, not NewBB. MF->RenumberBlocks(NewBB); @@ -924,23 +939,23 @@ MachineBasicBlock *ARMConstantIslands::SplitBlockBeforeInstr(MachineInstr *MI) { // the new jump we added. (It should be possible to do this without // recounting everything, but it's very confusing, and this is rarely // executed.) - ComputeBlockSize(OrigBB); + computeBlockSize(OrigBB); // Figure out how large the NewMBB is. As the second half of the original // block, it may contain a tablejump. - ComputeBlockSize(NewBB); + computeBlockSize(NewBB); // All BBOffsets following these blocks must be modified. - AdjustBBOffsetsAfter(OrigBB); + adjustBBOffsetsAfter(OrigBB); return NewBB; } -/// GetUserOffset - Compute the offset of U.MI as seen by the hardware +/// getUserOffset - Compute the offset of U.MI as seen by the hardware /// displacement computation. Update U.KnownAlignment to match its current /// basic block location. -unsigned ARMConstantIslands::GetUserOffset(CPUser &U) const { - unsigned UserOffset = GetOffsetOf(U.MI); +unsigned ARMConstantIslands::getUserOffset(CPUser &U) const { + unsigned UserOffset = getOffsetOf(U.MI); const BasicBlockInfo &BBI = BBInfo[U.MI->getParent()->getNumber()]; unsigned KnownBits = BBI.internalKnownBits(); @@ -960,13 +975,13 @@ unsigned ARMConstantIslands::GetUserOffset(CPUser &U) const { return UserOffset; } -/// OffsetIsInRange - Checks whether UserOffset (the location of a constant pool +/// isOffsetInRange - Checks whether UserOffset (the location of a constant pool /// reference) is within MaxDisp of TrialOffset (a proposed location of a /// constant pool entry). -/// UserOffset is computed by GetUserOffset above to include PC adjustments. If +/// UserOffset is computed by getUserOffset above to include PC adjustments. If /// the mod 4 alignment of UserOffset is not known, the uncertainty must be /// subtracted from MaxDisp instead. CPUser::getMaxDisp() does that. -bool ARMConstantIslands::OffsetIsInRange(unsigned UserOffset, +bool ARMConstantIslands::isOffsetInRange(unsigned UserOffset, unsigned TrialOffset, unsigned MaxDisp, bool NegativeOK, bool IsSoImm) { if (UserOffset <= TrialOffset) { @@ -982,11 +997,11 @@ bool ARMConstantIslands::OffsetIsInRange(unsigned UserOffset, return false; } -/// WaterIsInRange - Returns true if a CPE placed after the specified +/// isWaterInRange - Returns true if a CPE placed after the specified /// Water (a basic block) will be in range for the specific MI. /// /// Compute how much the function will grow by inserting a CPE after Water. -bool ARMConstantIslands::WaterIsInRange(unsigned UserOffset, +bool ARMConstantIslands::isWaterInRange(unsigned UserOffset, MachineBasicBlock* Water, CPUser &U, unsigned &Growth) { unsigned CPELogAlign = getCPELogAlign(U.CPEMI); @@ -1013,7 +1028,7 @@ bool ARMConstantIslands::WaterIsInRange(unsigned UserOffset, Growth += OffsetToAlignment(CPEEnd, 1u << NextBlockAlignment); // If the CPE is to be inserted before the instruction, that will raise - // the offset of the instruction. Also account for unknown alignment padding + // the offset of the instruction. Also account for unknown alignment padding // in blocks between CPE and the user. if (CPEOffset < UserOffset) UserOffset += Growth + UnknownPadding(MF->getAlignment(), CPELogAlign); @@ -1021,15 +1036,15 @@ bool ARMConstantIslands::WaterIsInRange(unsigned UserOffset, // CPE fits in existing padding. Growth = 0; - return OffsetIsInRange(UserOffset, CPEOffset, U); + return isOffsetInRange(UserOffset, CPEOffset, U); } -/// CPEIsInRange - Returns true if the distance between specific MI and +/// isCPEntryInRange - Returns true if the distance between specific MI and /// specific ConstPool entry instruction can fit in MI's displacement field. -bool ARMConstantIslands::CPEIsInRange(MachineInstr *MI, unsigned UserOffset, +bool ARMConstantIslands::isCPEntryInRange(MachineInstr *MI, unsigned UserOffset, MachineInstr *CPEMI, unsigned MaxDisp, bool NegOk, bool DoDump) { - unsigned CPEOffset = GetOffsetOf(CPEMI); + unsigned CPEOffset = getOffsetOf(CPEMI); assert(CPEOffset % 4 == 0 && "Misaligned CPE"); if (DoDump) { @@ -1046,7 +1061,7 @@ bool ARMConstantIslands::CPEIsInRange(MachineInstr *MI, unsigned UserOffset, }); } - return OffsetIsInRange(UserOffset, CPEOffset, MaxDisp, NegOk); + return isOffsetInRange(UserOffset, CPEOffset, MaxDisp, NegOk); } #ifndef NDEBUG @@ -1066,7 +1081,7 @@ static bool BBIsJumpedOver(MachineBasicBlock *MBB) { } #endif // NDEBUG -void ARMConstantIslands::AdjustBBOffsetsAfter(MachineBasicBlock *BB) { +void ARMConstantIslands::adjustBBOffsetsAfter(MachineBasicBlock *BB) { unsigned BBNum = BB->getNumber(); for(unsigned i = BBNum + 1, e = MF->getNumBlockIDs(); i < e; ++i) { // Get the offset and known bits at the end of the layout predecessor. @@ -1088,17 +1103,18 @@ void ARMConstantIslands::AdjustBBOffsetsAfter(MachineBasicBlock *BB) { } } -/// DecrementOldEntry - find the constant pool entry with index CPI +/// decrementCPEReferenceCount - find the constant pool entry with index CPI /// and instruction CPEMI, and decrement its refcount. If the refcount /// becomes 0 remove the entry and instruction. Returns true if we removed /// the entry, false if we didn't. -bool ARMConstantIslands::DecrementOldEntry(unsigned CPI, MachineInstr *CPEMI) { +bool ARMConstantIslands::decrementCPEReferenceCount(unsigned CPI, + MachineInstr *CPEMI) { // Find the old entry. Eliminate it if it is no longer used. CPEntry *CPE = findConstPoolEntry(CPI, CPEMI); assert(CPE && "Unexpected!"); if (--CPE->RefCount == 0) { - RemoveDeadCPEMI(CPEMI); + removeDeadCPEMI(CPEMI); CPE->CPEMI = NULL; --NumCPEs; return true; @@ -1112,13 +1128,14 @@ bool ARMConstantIslands::DecrementOldEntry(unsigned CPI, MachineInstr *CPEMI) { /// 0 = no existing entry found /// 1 = entry found, and there were no code insertions or deletions /// 2 = entry found, and there were code insertions or deletions -int ARMConstantIslands::LookForExistingCPEntry(CPUser& U, unsigned UserOffset) +int ARMConstantIslands::findInRangeCPEntry(CPUser& U, unsigned UserOffset) { MachineInstr *UserMI = U.MI; MachineInstr *CPEMI = U.CPEMI; // Check to see if the CPE is already in-range. - if (CPEIsInRange(UserMI, UserOffset, CPEMI, U.getMaxDisp(), U.NegOk, true)) { + if (isCPEntryInRange(UserMI, UserOffset, CPEMI, U.getMaxDisp(), U.NegOk, + true)) { DEBUG(dbgs() << "In range\n"); return 1; } @@ -1133,7 +1150,7 @@ int ARMConstantIslands::LookForExistingCPEntry(CPUser& U, unsigned UserOffset) // Removing CPEs can leave empty entries, skip if (CPEs[i].CPEMI == NULL) continue; - if (CPEIsInRange(UserMI, UserOffset, CPEs[i].CPEMI, U.getMaxDisp(), + if (isCPEntryInRange(UserMI, UserOffset, CPEs[i].CPEMI, U.getMaxDisp(), U.NegOk)) { DEBUG(dbgs() << "Replacing CPE#" << CPI << " with CPE#" << CPEs[i].CPI << "\n"); @@ -1149,7 +1166,7 @@ int ARMConstantIslands::LookForExistingCPEntry(CPUser& U, unsigned UserOffset) CPEs[i].RefCount++; // ...and the original. If we didn't remove the old entry, none of the // addresses changed, so we don't need another pass. - return DecrementOldEntry(CPI, CPEMI) ? 2 : 1; + return decrementCPEReferenceCount(CPI, CPEMI) ? 2 : 1; } } return 0; @@ -1170,7 +1187,7 @@ static inline unsigned getUnconditionalBrDisp(int Opc) { return ((1<<23)-1)*4; } -/// LookForWater - Look for an existing entry in the WaterList in which +/// findAvailableWater - Look for an existing entry in the WaterList in which /// we can place the CPE referenced from U so it's within range of U's MI. /// Returns true if found, false if not. If it returns true, WaterIter /// is set to the WaterList entry. For Thumb, prefer water that will not @@ -1178,7 +1195,7 @@ static inline unsigned getUnconditionalBrDisp(int Opc) { /// terminates, the CPE location for a particular CPUser is only allowed to /// move to a lower address, so search backward from the end of the list and /// prefer the first water that is in range. -bool ARMConstantIslands::LookForWater(CPUser &U, unsigned UserOffset, +bool ARMConstantIslands::findAvailableWater(CPUser &U, unsigned UserOffset, water_iterator &WaterIter) { if (WaterList.empty()) return false; @@ -1196,7 +1213,7 @@ bool ARMConstantIslands::LookForWater(CPUser &U, unsigned UserOffset, // sure to take advantage of it for all the CPEs near that block, so that // we don't insert more branches than necessary. unsigned Growth; - if (WaterIsInRange(UserOffset, WaterBB, U, Growth) && + if (isWaterInRange(UserOffset, WaterBB, U, Growth) && (WaterBB->getNumber() < U.HighWaterMark->getNumber() || NewWaterList.count(WaterBB)) && Growth < BestGrowth) { // This is the least amount of required padding seen so far. @@ -1215,14 +1232,14 @@ bool ARMConstantIslands::LookForWater(CPUser &U, unsigned UserOffset, return BestGrowth != ~0u; } -/// CreateNewWater - No existing WaterList entry will work for +/// createNewWater - No existing WaterList entry will work for /// CPUsers[CPUserIndex], so create a place to put the CPE. The end of the /// block is used if in range, and the conditional branch munged so control /// flow is correct. Otherwise the block is split to create a hole with an /// unconditional branch around it. In either case NewMBB is set to a /// block following which the new island can be inserted (the WaterList /// is not adjusted). -void ARMConstantIslands::CreateNewWater(unsigned CPUserIndex, +void ARMConstantIslands::createNewWater(unsigned CPUserIndex, unsigned UserOffset, MachineBasicBlock *&NewMBB) { CPUser &U = CPUsers[CPUserIndex]; @@ -1245,7 +1262,7 @@ void ARMConstantIslands::CreateNewWater(unsigned CPUserIndex, unsigned CPEOffset = WorstCaseAlign(UserBlockEnd, CPELogAlign, UserBBI.postKnownBits()); - if (OffsetIsInRange(UserOffset, CPEOffset, U)) { + if (isOffsetInRange(UserOffset, CPEOffset, U)) { DEBUG(dbgs() << "Split at end of BB#" << UserMBB->getNumber() << format(", expected CPE offset %#x\n", CPEOffset)); NewMBB = llvm::next(MachineFunction::iterator(UserMBB)); @@ -1264,7 +1281,7 @@ void ARMConstantIslands::CreateNewWater(unsigned CPUserIndex, ImmBranches.push_back(ImmBranch(&UserMBB->back(), MaxDisp, false, UncondBr)); BBInfo[UserMBB->getNumber()].Size += Delta; - AdjustBBOffsetsAfter(UserMBB); + adjustBBOffsetsAfter(UserMBB); return; } } @@ -1298,7 +1315,7 @@ void ARMConstantIslands::CreateNewWater(unsigned CPUserIndex, // The 4 in the following is for the unconditional branch we'll be inserting // (allows for long branch on Thumb1). Alignment of the island is handled - // inside OffsetIsInRange. + // inside isOffsetInRange. BaseInsertOffset -= 4; DEBUG(dbgs() << format(", adjusted to %#x", BaseInsertOffset) @@ -1327,7 +1344,7 @@ void ARMConstantIslands::CreateNewWater(unsigned CPUserIndex, MI = llvm::next(MI)) { if (CPUIndex < NumCPUsers && CPUsers[CPUIndex].MI == MI) { CPUser &U = CPUsers[CPUIndex]; - if (!OffsetIsInRange(Offset, EndInsertOffset, U)) { + if (!isOffsetInRange(Offset, EndInsertOffset, U)) { // Shift intertion point by one unit of alignment so it is within reach. BaseInsertOffset -= 1u << LogAlign; EndInsertOffset -= 1u << LogAlign; @@ -1352,29 +1369,29 @@ void ARMConstantIslands::CreateNewWater(unsigned CPUserIndex, // Avoid splitting an IT block. if (LastIT) { unsigned PredReg = 0; - ARMCC::CondCodes CC = llvm::getITInstrPredicate(MI, PredReg); + ARMCC::CondCodes CC = getITInstrPredicate(MI, PredReg); if (CC != ARMCC::AL) MI = LastIT; } - NewMBB = SplitBlockBeforeInstr(MI); + NewMBB = splitBlockBeforeInstr(MI); } -/// HandleConstantPoolUser - Analyze the specified user, checking to see if it +/// handleConstantPoolUser - Analyze the specified user, checking to see if it /// is out-of-range. If so, pick up the constant pool value and move it some /// place in-range. Return true if we changed any addresses (thus must run /// another pass of branch lengthening), false otherwise. -bool ARMConstantIslands::HandleConstantPoolUser(unsigned CPUserIndex) { +bool ARMConstantIslands::handleConstantPoolUser(unsigned CPUserIndex) { CPUser &U = CPUsers[CPUserIndex]; MachineInstr *UserMI = U.MI; MachineInstr *CPEMI = U.CPEMI; unsigned CPI = CPEMI->getOperand(1).getIndex(); unsigned Size = CPEMI->getOperand(2).getImm(); // Compute this only once, it's expensive. - unsigned UserOffset = GetUserOffset(U); + unsigned UserOffset = getUserOffset(U); // See if the current entry is within range, or there is a clone of it // in range. - int result = LookForExistingCPEntry(U, UserOffset); + int result = findInRangeCPEntry(U, UserOffset); if (result==1) return false; else if (result==2) return true; @@ -1386,7 +1403,7 @@ bool ARMConstantIslands::HandleConstantPoolUser(unsigned CPUserIndex) { MachineBasicBlock *NewIsland = MF->CreateMachineBasicBlock(); MachineBasicBlock *NewMBB; water_iterator IP; - if (LookForWater(U, UserOffset, IP)) { + if (findAvailableWater(U, UserOffset, IP)) { DEBUG(dbgs() << "Found water in range\n"); MachineBasicBlock *WaterBB = *IP; @@ -1403,9 +1420,9 @@ bool ARMConstantIslands::HandleConstantPoolUser(unsigned CPUserIndex) { } else { // No water found. DEBUG(dbgs() << "No water found\n"); - CreateNewWater(CPUserIndex, UserOffset, NewMBB); + createNewWater(CPUserIndex, UserOffset, NewMBB); - // SplitBlockBeforeInstr adds to WaterList, which is important when it is + // splitBlockBeforeInstr adds to WaterList, which is important when it is // called while handling branches so that the water will be seen on the // next iteration for constant pools, but in this context, we don't want // it. Check for this so it will be removed from the WaterList. @@ -1430,10 +1447,10 @@ bool ARMConstantIslands::HandleConstantPoolUser(unsigned CPUserIndex) { MF->insert(NewMBB, NewIsland); // Update internal data structures to account for the newly inserted MBB. - UpdateForInsertedWaterBlock(NewIsland); + updateForInsertedWaterBlock(NewIsland); // Decrement the old entry, and remove it if refcount becomes 0. - DecrementOldEntry(CPI, CPEMI); + decrementCPEReferenceCount(CPI, CPEMI); // Now that we have an island to add the CPE to, clone the original CPE and // add it to the island. @@ -1448,7 +1465,7 @@ bool ARMConstantIslands::HandleConstantPoolUser(unsigned CPUserIndex) { // Increase the size of the island block to account for the new entry. BBInfo[NewIsland->getNumber()].Size += Size; - AdjustBBOffsetsAfter(llvm::prior(MachineFunction::iterator(NewIsland))); + adjustBBOffsetsAfter(llvm::prior(MachineFunction::iterator(NewIsland))); // Finally, change the CPI in the instruction operand to be ID. for (unsigned i = 0, e = UserMI->getNumOperands(); i != e; ++i) @@ -1463,9 +1480,9 @@ bool ARMConstantIslands::HandleConstantPoolUser(unsigned CPUserIndex) { return true; } -/// RemoveDeadCPEMI - Remove a dead constant pool entry instruction. Update +/// removeDeadCPEMI - Remove a dead constant pool entry instruction. Update /// sizes and offsets of impacted basic blocks. -void ARMConstantIslands::RemoveDeadCPEMI(MachineInstr *CPEMI) { +void ARMConstantIslands::removeDeadCPEMI(MachineInstr *CPEMI) { MachineBasicBlock *CPEBB = CPEMI->getParent(); unsigned Size = CPEMI->getOperand(2).getImm(); CPEMI->eraseFromParent(); @@ -1480,7 +1497,7 @@ void ARMConstantIslands::RemoveDeadCPEMI(MachineInstr *CPEMI) { // Entries are sorted by descending alignment, so realign from the front. CPEBB->setAlignment(getCPELogAlign(CPEBB->begin())); - AdjustBBOffsetsAfter(CPEBB); + adjustBBOffsetsAfter(CPEBB); // An island has only one predecessor BB and one successor BB. Check if // this BB's predecessor jumps directly to this BB's successor. This // shouldn't happen currently. @@ -1488,15 +1505,15 @@ void ARMConstantIslands::RemoveDeadCPEMI(MachineInstr *CPEMI) { // FIXME: remove the empty blocks after all the work is done? } -/// RemoveUnusedCPEntries - Remove constant pool entries whose refcounts +/// removeUnusedCPEntries - Remove constant pool entries whose refcounts /// are zero. -bool ARMConstantIslands::RemoveUnusedCPEntries() { +bool ARMConstantIslands::removeUnusedCPEntries() { unsigned MadeChange = false; for (unsigned i = 0, e = CPEntries.size(); i != e; ++i) { std::vector<CPEntry> &CPEs = CPEntries[i]; for (unsigned j = 0, ee = CPEs.size(); j != ee; ++j) { if (CPEs[j].RefCount == 0 && CPEs[j].CPEMI) { - RemoveDeadCPEMI(CPEs[j].CPEMI); + removeDeadCPEMI(CPEs[j].CPEMI); CPEs[j].CPEMI = NULL; MadeChange = true; } @@ -1505,18 +1522,18 @@ bool ARMConstantIslands::RemoveUnusedCPEntries() { return MadeChange; } -/// BBIsInRange - Returns true if the distance between specific MI and +/// isBBInRange - Returns true if the distance between specific MI and /// specific BB can fit in MI's displacement field. -bool ARMConstantIslands::BBIsInRange(MachineInstr *MI,MachineBasicBlock *DestBB, +bool ARMConstantIslands::isBBInRange(MachineInstr *MI,MachineBasicBlock *DestBB, unsigned MaxDisp) { unsigned PCAdj = isThumb ? 4 : 8; - unsigned BrOffset = GetOffsetOf(MI) + PCAdj; + unsigned BrOffset = getOffsetOf(MI) + PCAdj; unsigned DestOffset = BBInfo[DestBB->getNumber()].Offset; DEBUG(dbgs() << "Branch of destination BB#" << DestBB->getNumber() << " from BB#" << MI->getParent()->getNumber() << " max delta=" << MaxDisp - << " from " << GetOffsetOf(MI) << " to " << DestOffset + << " from " << getOffsetOf(MI) << " to " << DestOffset << " offset " << int(DestOffset-BrOffset) << "\t" << *MI); if (BrOffset <= DestOffset) { @@ -1530,37 +1547,37 @@ bool ARMConstantIslands::BBIsInRange(MachineInstr *MI,MachineBasicBlock *DestBB, return false; } -/// FixUpImmediateBr - Fix up an immediate branch whose destination is too far +/// fixupImmediateBr - Fix up an immediate branch whose destination is too far /// away to fit in its displacement field. -bool ARMConstantIslands::FixUpImmediateBr(ImmBranch &Br) { +bool ARMConstantIslands::fixupImmediateBr(ImmBranch &Br) { MachineInstr *MI = Br.MI; MachineBasicBlock *DestBB = MI->getOperand(0).getMBB(); // Check to see if the DestBB is already in-range. - if (BBIsInRange(MI, DestBB, Br.MaxDisp)) + if (isBBInRange(MI, DestBB, Br.MaxDisp)) return false; if (!Br.isCond) - return FixUpUnconditionalBr(Br); - return FixUpConditionalBr(Br); + return fixupUnconditionalBr(Br); + return fixupConditionalBr(Br); } -/// FixUpUnconditionalBr - Fix up an unconditional branch whose destination is +/// fixupUnconditionalBr - Fix up an unconditional branch whose destination is /// too far away to fit in its displacement field. If the LR register has been /// spilled in the epilogue, then we can use BL to implement a far jump. /// Otherwise, add an intermediate branch instruction to a branch. bool -ARMConstantIslands::FixUpUnconditionalBr(ImmBranch &Br) { +ARMConstantIslands::fixupUnconditionalBr(ImmBranch &Br) { MachineInstr *MI = Br.MI; MachineBasicBlock *MBB = MI->getParent(); if (!isThumb1) - llvm_unreachable("FixUpUnconditionalBr is Thumb1 only!"); + llvm_unreachable("fixupUnconditionalBr is Thumb1 only!"); // Use BL to implement far jump. Br.MaxDisp = (1 << 21) * 2; MI->setDesc(TII->get(ARM::tBfar)); BBInfo[MBB->getNumber()].Size += 2; - AdjustBBOffsetsAfter(MBB); + adjustBBOffsetsAfter(MBB); HasFarJump = true; ++NumUBrFixed; @@ -1569,11 +1586,11 @@ ARMConstantIslands::FixUpUnconditionalBr(ImmBranch &Br) { return true; } -/// FixUpConditionalBr - Fix up a conditional branch whose destination is too +/// fixupConditionalBr - Fix up a conditional branch whose destination is too /// far away to fit in its displacement field. It is converted to an inverse /// conditional branch + an unconditional branch to the destination. bool -ARMConstantIslands::FixUpConditionalBr(ImmBranch &Br) { +ARMConstantIslands::fixupConditionalBr(ImmBranch &Br) { MachineInstr *MI = Br.MI; MachineBasicBlock *DestBB = MI->getOperand(0).getMBB(); @@ -1607,7 +1624,7 @@ ARMConstantIslands::FixUpConditionalBr(ImmBranch &Br) { // bne L2 // b L1 MachineBasicBlock *NewDest = BMI->getOperand(0).getMBB(); - if (BBIsInRange(MI, NewDest, Br.MaxDisp)) { + if (isBBInRange(MI, NewDest, Br.MaxDisp)) { DEBUG(dbgs() << " Invert Bcc condition and swap its destination with " << *BMI); BMI->getOperand(0).setMBB(DestBB); @@ -1619,7 +1636,7 @@ ARMConstantIslands::FixUpConditionalBr(ImmBranch &Br) { } if (NeedSplit) { - SplitBlockBeforeInstr(MI); + splitBlockBeforeInstr(MI); // No need for the branch to the next block. We're adding an unconditional // branch to the destination. int delta = TII->GetInstSizeInBytes(&MBB->back()); @@ -1651,14 +1668,14 @@ ARMConstantIslands::FixUpConditionalBr(ImmBranch &Br) { // Remove the old conditional branch. It may or may not still be in MBB. BBInfo[MI->getParent()->getNumber()].Size -= TII->GetInstSizeInBytes(MI); MI->eraseFromParent(); - AdjustBBOffsetsAfter(MBB); + adjustBBOffsetsAfter(MBB); return true; } -/// UndoLRSpillRestore - Remove Thumb push / pop instructions that only spills +/// undoLRSpillRestore - Remove Thumb push / pop instructions that only spills /// LR / restores LR to pc. FIXME: This is done here because it's only possible /// to do this if tBfar is not used. -bool ARMConstantIslands::UndoLRSpillRestore() { +bool ARMConstantIslands::undoLRSpillRestore() { bool MadeChange = false; for (unsigned i = 0, e = PushPopMIs.size(); i != e; ++i) { MachineInstr *MI = PushPopMIs[i]; @@ -1677,26 +1694,26 @@ bool ARMConstantIslands::UndoLRSpillRestore() { return MadeChange; } -// mayOptimizeThumb2Instruction - Returns true if OptimizeThumb2Instructions +// mayOptimizeThumb2Instruction - Returns true if optimizeThumb2Instructions // below may shrink MI. bool ARMConstantIslands::mayOptimizeThumb2Instruction(const MachineInstr *MI) const { switch(MI->getOpcode()) { - // OptimizeThumb2Instructions. + // optimizeThumb2Instructions. case ARM::t2LEApcrel: case ARM::t2LDRpci: - // OptimizeThumb2Branches. + // optimizeThumb2Branches. case ARM::t2B: case ARM::t2Bcc: case ARM::tBcc: - // OptimizeThumb2JumpTables. + // optimizeThumb2JumpTables. case ARM::t2BR_JT: return true; } return false; } -bool ARMConstantIslands::OptimizeThumb2Instructions() { +bool ARMConstantIslands::optimizeThumb2Instructions() { bool MadeChange = false; // Shrink ADR and LDR from constantpool. @@ -1727,7 +1744,7 @@ bool ARMConstantIslands::OptimizeThumb2Instructions() { if (!NewOpc) continue; - unsigned UserOffset = GetUserOffset(U); + unsigned UserOffset = getUserOffset(U); unsigned MaxOffs = ((1 << Bits) - 1) * Scale; // Be conservative with inline asm. @@ -1735,22 +1752,23 @@ bool ARMConstantIslands::OptimizeThumb2Instructions() { MaxOffs -= 2; // FIXME: Check if offset is multiple of scale if scale is not 4. - if (CPEIsInRange(U.MI, UserOffset, U.CPEMI, MaxOffs, false, true)) { + if (isCPEntryInRange(U.MI, UserOffset, U.CPEMI, MaxOffs, false, true)) { + DEBUG(dbgs() << "Shrink: " << *U.MI); U.MI->setDesc(TII->get(NewOpc)); MachineBasicBlock *MBB = U.MI->getParent(); BBInfo[MBB->getNumber()].Size -= 2; - AdjustBBOffsetsAfter(MBB); + adjustBBOffsetsAfter(MBB); ++NumT2CPShrunk; MadeChange = true; } } - MadeChange |= OptimizeThumb2Branches(); - MadeChange |= OptimizeThumb2JumpTables(); + MadeChange |= optimizeThumb2Branches(); + MadeChange |= optimizeThumb2JumpTables(); return MadeChange; } -bool ARMConstantIslands::OptimizeThumb2Branches() { +bool ARMConstantIslands::optimizeThumb2Branches() { bool MadeChange = false; for (unsigned i = 0, e = ImmBranches.size(); i != e; ++i) { @@ -1776,11 +1794,12 @@ bool ARMConstantIslands::OptimizeThumb2Branches() { if (NewOpc) { unsigned MaxOffs = ((1 << (Bits-1))-1) * Scale; MachineBasicBlock *DestBB = Br.MI->getOperand(0).getMBB(); - if (BBIsInRange(Br.MI, DestBB, MaxOffs)) { + if (isBBInRange(Br.MI, DestBB, MaxOffs)) { + DEBUG(dbgs() << "Shrink branch: " << *Br.MI); Br.MI->setDesc(TII->get(NewOpc)); MachineBasicBlock *MBB = Br.MI->getParent(); BBInfo[MBB->getNumber()].Size -= 2; - AdjustBBOffsetsAfter(MBB); + adjustBBOffsetsAfter(MBB); ++NumT2BrShrunk; MadeChange = true; } @@ -1797,7 +1816,7 @@ bool ARMConstantIslands::OptimizeThumb2Branches() { NewOpc = 0; unsigned PredReg = 0; - ARMCC::CondCodes Pred = llvm::getInstrPredicate(Br.MI, PredReg); + ARMCC::CondCodes Pred = getInstrPredicate(Br.MI, PredReg); if (Pred == ARMCC::EQ) NewOpc = ARM::tCBZ; else if (Pred == ARMCC::NE) @@ -1807,7 +1826,7 @@ bool ARMConstantIslands::OptimizeThumb2Branches() { MachineBasicBlock *DestBB = Br.MI->getOperand(0).getMBB(); // Check if the distance is within 126. Subtract starting offset by 2 // because the cmp will be eliminated. - unsigned BrOffset = GetOffsetOf(Br.MI) + 4 - 2; + unsigned BrOffset = getOffsetOf(Br.MI) + 4 - 2; unsigned DestOffset = BBInfo[DestBB->getNumber()].Offset; if (BrOffset < DestOffset && (DestOffset - BrOffset) <= 126) { MachineBasicBlock::iterator CmpMI = Br.MI; @@ -1815,11 +1834,12 @@ bool ARMConstantIslands::OptimizeThumb2Branches() { --CmpMI; if (CmpMI->getOpcode() == ARM::tCMPi8) { unsigned Reg = CmpMI->getOperand(0).getReg(); - Pred = llvm::getInstrPredicate(CmpMI, PredReg); + Pred = getInstrPredicate(CmpMI, PredReg); if (Pred == ARMCC::AL && CmpMI->getOperand(1).getImm() == 0 && isARMLowRegister(Reg)) { MachineBasicBlock *MBB = Br.MI->getParent(); + DEBUG(dbgs() << "Fold: " << *CmpMI << " and: " << *Br.MI); MachineInstr *NewBR = BuildMI(*MBB, CmpMI, Br.MI->getDebugLoc(), TII->get(NewOpc)) .addReg(Reg).addMBB(DestBB,Br.MI->getOperand(0).getTargetFlags()); @@ -1827,7 +1847,7 @@ bool ARMConstantIslands::OptimizeThumb2Branches() { Br.MI->eraseFromParent(); Br.MI = NewBR; BBInfo[MBB->getNumber()].Size -= 2; - AdjustBBOffsetsAfter(MBB); + adjustBBOffsetsAfter(MBB); ++NumCBZ; MadeChange = true; } @@ -1839,9 +1859,9 @@ bool ARMConstantIslands::OptimizeThumb2Branches() { return MadeChange; } -/// OptimizeThumb2JumpTables - Use tbb / tbh instructions to generate smaller +/// optimizeThumb2JumpTables - Use tbb / tbh instructions to generate smaller /// jumptables when it's possible. -bool ARMConstantIslands::OptimizeThumb2JumpTables() { +bool ARMConstantIslands::optimizeThumb2JumpTables() { bool MadeChange = false; // FIXME: After the tables are shrunk, can we get rid some of the @@ -1861,7 +1881,7 @@ bool ARMConstantIslands::OptimizeThumb2JumpTables() { bool ByteOk = true; bool HalfWordOk = true; - unsigned JTOffset = GetOffsetOf(MI) + 4; + unsigned JTOffset = getOffsetOf(MI) + 4; const std::vector<MachineBasicBlock*> &JTBBs = JT[JTI].MBBs; for (unsigned j = 0, ee = JTBBs.size(); j != ee; ++j) { MachineBasicBlock *MBB = JTBBs[j]; @@ -1936,11 +1956,14 @@ bool ARMConstantIslands::OptimizeThumb2JumpTables() { if (!OptOk) continue; + DEBUG(dbgs() << "Shrink JT: " << *MI << " addr: " << *AddrMI + << " lea: " << *LeaMI); unsigned Opc = ByteOk ? ARM::t2TBB_JT : ARM::t2TBH_JT; MachineInstr *NewJTMI = BuildMI(MBB, MI->getDebugLoc(), TII->get(Opc)) .addReg(IdxReg, getKillRegState(IdxRegKill)) .addJumpTableIndex(JTI, JTOP.getTargetFlags()) .addImm(MI->getOperand(JTOpIdx+1).getImm()); + DEBUG(dbgs() << "BB#" << MBB->getNumber() << ": " << *NewJTMI); // FIXME: Insert an "ALIGN" instruction to ensure the next instruction // is 2-byte aligned. For now, asm printer will fix it up. unsigned NewSize = TII->GetInstSizeInBytes(NewJTMI); @@ -1954,7 +1977,7 @@ bool ARMConstantIslands::OptimizeThumb2JumpTables() { int delta = OrigSize - NewSize; BBInfo[MBB->getNumber()].Size -= delta; - AdjustBBOffsetsAfter(MBB); + adjustBBOffsetsAfter(MBB); ++NumTBs; MadeChange = true; @@ -1964,9 +1987,9 @@ bool ARMConstantIslands::OptimizeThumb2JumpTables() { return MadeChange; } -/// ReorderThumb2JumpTables - Adjust the function's block layout to ensure that +/// reorderThumb2JumpTables - Adjust the function's block layout to ensure that /// jump tables always branch forwards, since that's what tbb and tbh need. -bool ARMConstantIslands::ReorderThumb2JumpTables() { +bool ARMConstantIslands::reorderThumb2JumpTables() { bool MadeChange = false; MachineJumpTableInfo *MJTI = MF->getJumpTableInfo(); @@ -1995,7 +2018,7 @@ bool ARMConstantIslands::ReorderThumb2JumpTables() { // The destination precedes the switch. Try to move the block forward // so we have a positive offset. MachineBasicBlock *NewBB = - AdjustJTTargetBlockForward(MBB, MI->getParent()); + adjustJTTargetBlockForward(MBB, MI->getParent()); if (NewBB) MJTI->ReplaceMBBInJumpTable(JTI, JTBBs[j], NewBB); MadeChange = true; @@ -2007,8 +2030,7 @@ bool ARMConstantIslands::ReorderThumb2JumpTables() { } MachineBasicBlock *ARMConstantIslands:: -AdjustJTTargetBlockForward(MachineBasicBlock *BB, MachineBasicBlock *JTBB) -{ +adjustJTTargetBlockForward(MachineBasicBlock *BB, MachineBasicBlock *JTBB) { // If the destination block is terminated by an unconditional branch, // try to move it; otherwise, create a new block following the jump // table that branches back to the actual target. This is a very simple diff --git a/lib/Target/ARM/ARMELFWriterInfo.h b/lib/Target/ARM/ARMELFWriterInfo.h index 1c4e532..6a84f8a 100644 --- a/lib/Target/ARM/ARMELFWriterInfo.h +++ b/lib/Target/ARM/ARMELFWriterInfo.h @@ -17,6 +17,7 @@ #include "llvm/Target/TargetELFWriterInfo.h" namespace llvm { + class TargetMachine; class ARMELFWriterInfo : public TargetELFWriterInfo { public: diff --git a/lib/Target/ARM/ARMExpandPseudoInsts.cpp b/lib/Target/ARM/ARMExpandPseudoInsts.cpp index c2b7816..5fc0360 100644 --- a/lib/Target/ARM/ARMExpandPseudoInsts.cpp +++ b/lib/Target/ARM/ARMExpandPseudoInsts.cpp @@ -19,7 +19,6 @@ #include "ARMBaseInstrInfo.h" #include "ARMBaseRegisterInfo.h" #include "ARMMachineFunctionInfo.h" -#include "ARMRegisterInfo.h" #include "MCTargetDesc/ARMAddressingModes.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunctionPass.h" @@ -613,7 +612,7 @@ void ARMExpandPseudo::ExpandMOV32BitImm(MachineBasicBlock &MBB, MachineInstr &MI = *MBBI; unsigned Opcode = MI.getOpcode(); unsigned PredReg = 0; - ARMCC::CondCodes Pred = llvm::getInstrPredicate(&MI, PredReg); + ARMCC::CondCodes Pred = getInstrPredicate(&MI, PredReg); unsigned DstReg = MI.getOperand(0).getReg(); bool DstIsDead = MI.getOperand(0).isDead(); bool isCC = Opcode == ARM::MOVCCi32imm || Opcode == ARM::t2MOVCCi32imm; @@ -794,15 +793,15 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB, "base pointer without frame pointer?"); if (AFI->isThumb2Function()) { - llvm::emitT2RegPlusImmediate(MBB, MBBI, MI.getDebugLoc(), ARM::R6, - FramePtr, -NumBytes, ARMCC::AL, 0, *TII); + emitT2RegPlusImmediate(MBB, MBBI, MI.getDebugLoc(), ARM::R6, + FramePtr, -NumBytes, ARMCC::AL, 0, *TII); } else if (AFI->isThumbFunction()) { - llvm::emitThumbRegPlusImmediate(MBB, MBBI, MI.getDebugLoc(), ARM::R6, - FramePtr, -NumBytes, *TII, RI); + emitThumbRegPlusImmediate(MBB, MBBI, MI.getDebugLoc(), ARM::R6, + FramePtr, -NumBytes, *TII, RI); } else { - llvm::emitARMRegPlusImmediate(MBB, MBBI, MI.getDebugLoc(), ARM::R6, - FramePtr, -NumBytes, ARMCC::AL, 0, - *TII); + emitARMRegPlusImmediate(MBB, MBBI, MI.getDebugLoc(), ARM::R6, + FramePtr, -NumBytes, ARMCC::AL, 0, + *TII); } // If there's dynamic realignment, adjust for it. if (RI.needsStackRealignment(MF)) { diff --git a/lib/Target/ARM/ARMFastISel.cpp b/lib/Target/ARM/ARMFastISel.cpp index a24eab4..2e1eaca 100644 --- a/lib/Target/ARM/ARMFastISel.cpp +++ b/lib/Target/ARM/ARMFastISel.cpp @@ -16,7 +16,6 @@ #include "ARM.h" #include "ARMBaseInstrInfo.h" #include "ARMCallingConv.h" -#include "ARMRegisterInfo.h" #include "ARMTargetMachine.h" #include "ARMSubtarget.h" #include "ARMConstantPoolValue.h" @@ -2112,13 +2111,10 @@ bool ARMFastISel::SelectRet(const Instruction *I) { } unsigned ARMFastISel::ARMSelectCallOp(const GlobalValue *GV) { - - // iOS needs the r9 versions of the opcodes. - bool isiOS = Subtarget->isTargetIOS(); if (isThumb2) { - return isiOS ? ARM::tBLr9 : ARM::tBL; + return ARM::tBL; } else { - return isiOS ? ARM::BLr9 : ARM::BL; + return ARM::BL; } } @@ -2177,8 +2173,7 @@ bool ARMFastISel::ARMEmitLibcall(const Instruction *I, RTLIB::Libcall Call) { if (!ProcessCallArgs(Args, ArgRegs, ArgVTs, ArgFlags, RegArgs, CC, NumBytes)) return false; - // Issue the call, BLr9 for iOS, BL otherwise. - // TODO: Turn this into the table of arm call ops. + // Issue the call. MachineInstrBuilder MIB; unsigned CallOpc = ARMSelectCallOp(NULL); if (isThumb2) @@ -2303,8 +2298,7 @@ bool ARMFastISel::SelectCall(const Instruction *I, if (!ProcessCallArgs(Args, ArgRegs, ArgVTs, ArgFlags, RegArgs, CC, NumBytes)) return false; - // Issue the call, BLr9 for iOS, BL otherwise. - // TODO: Turn this into the table of arm call ops. + // Issue the call. MachineInstrBuilder MIB; unsigned CallOpc = ARMSelectCallOp(GV); // Explicitly adding the predicate here. @@ -2350,7 +2344,8 @@ bool ARMFastISel::ARMIsMemCpySmall(uint64_t Len) { return Len <= 16; } -bool ARMFastISel::ARMTryEmitSmallMemCpy(Address Dest, Address Src, uint64_t Len) { +bool ARMFastISel::ARMTryEmitSmallMemCpy(Address Dest, Address Src, + uint64_t Len) { // Make sure we don't bloat code by inlining very large memcpy's. if (!ARMIsMemCpySmall(Len)) return false; @@ -2639,7 +2634,7 @@ bool ARMFastISel::TryToFoldLoad(MachineInstr *MI, unsigned OpNo, } namespace llvm { - llvm::FastISel *ARM::createFastISel(FunctionLoweringInfo &funcInfo) { + FastISel *ARM::createFastISel(FunctionLoweringInfo &funcInfo) { // Completely untested on non-iOS. const TargetMachine &TM = funcInfo.MF->getTarget(); diff --git a/lib/Target/ARM/ARMFrameLowering.cpp b/lib/Target/ARM/ARMFrameLowering.cpp index bd4b2a9..402ecb0 100644 --- a/lib/Target/ARM/ARMFrameLowering.cpp +++ b/lib/Target/ARM/ARMFrameLowering.cpp @@ -422,17 +422,16 @@ void ARMFrameLowering::emitEpilogue(MachineFunction &MF, if (AFI->getGPRCalleeSavedArea1Size()) MBBI++; } - if (RetOpcode == ARM::TCRETURNdi || RetOpcode == ARM::TCRETURNdiND || - RetOpcode == ARM::TCRETURNri || RetOpcode == ARM::TCRETURNriND) { + if (RetOpcode == ARM::TCRETURNdi || RetOpcode == ARM::TCRETURNri) { // Tail call return: adjust the stack pointer and jump to callee. MBBI = MBB.getLastNonDebugInstr(); MachineOperand &JumpTarget = MBBI->getOperand(0); // Jump to label or value in register. - if (RetOpcode == ARM::TCRETURNdi || RetOpcode == ARM::TCRETURNdiND) { - unsigned TCOpcode = (RetOpcode == ARM::TCRETURNdi) - ? (STI.isThumb() ? ARM::tTAILJMPd : ARM::TAILJMPd) - : (STI.isThumb() ? ARM::tTAILJMPdND : ARM::TAILJMPdND); + if (RetOpcode == ARM::TCRETURNdi) { + unsigned TCOpcode = STI.isThumb() ? + (STI.isTargetIOS() ? ARM::tTAILJMPd : ARM::tTAILJMPdND) : + ARM::TAILJMPd; MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII.get(TCOpcode)); if (JumpTarget.isGlobal()) MIB.addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset(), @@ -449,10 +448,6 @@ void ARMFrameLowering::emitEpilogue(MachineFunction &MF, BuildMI(MBB, MBBI, dl, TII.get(STI.isThumb() ? ARM::tTAILJMPr : ARM::TAILJMPr)). addReg(JumpTarget.getReg(), RegState::Kill); - } else if (RetOpcode == ARM::TCRETURNriND) { - BuildMI(MBB, MBBI, dl, - TII.get(STI.isThumb() ? ARM::tTAILJMPrND : ARM::TAILJMPrND)). - addReg(JumpTarget.getReg(), RegState::Kill); } MachineInstr *NewMI = prior(MBBI); @@ -648,9 +643,7 @@ void ARMFrameLowering::emitPopInst(MachineBasicBlock &MBB, DebugLoc DL = MI->getDebugLoc(); unsigned RetOpcode = MI->getOpcode(); bool isTailCall = (RetOpcode == ARM::TCRETURNdi || - RetOpcode == ARM::TCRETURNdiND || - RetOpcode == ARM::TCRETURNri || - RetOpcode == ARM::TCRETURNriND); + RetOpcode == ARM::TCRETURNri); SmallVector<unsigned, 4> Regs; unsigned i = CSI.size(); diff --git a/lib/Target/ARM/ARMISelDAGToDAG.cpp b/lib/Target/ARM/ARMISelDAGToDAG.cpp index ffb9acb..1eafbbc 100644 --- a/lib/Target/ARM/ARMISelDAGToDAG.cpp +++ b/lib/Target/ARM/ARMISelDAGToDAG.cpp @@ -2825,7 +2825,8 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { case MVT::v8i8: Opc = ARM::VZIPd8; break; case MVT::v4i16: Opc = ARM::VZIPd16; break; case MVT::v2f32: - case MVT::v2i32: Opc = ARM::VZIPd32; break; + // vzip.32 Dd, Dm is a pseudo-instruction expanded to vtrn.32 Dd, Dm. + case MVT::v2i32: Opc = ARM::VTRNd32; break; case MVT::v16i8: Opc = ARM::VZIPq8; break; case MVT::v8i16: Opc = ARM::VZIPq16; break; case MVT::v4f32: @@ -2844,7 +2845,8 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { case MVT::v8i8: Opc = ARM::VUZPd8; break; case MVT::v4i16: Opc = ARM::VUZPd16; break; case MVT::v2f32: - case MVT::v2i32: Opc = ARM::VUZPd32; break; + // vuzp.32 Dd, Dm is a pseudo-instruction expanded to vtrn.32 Dd, Dm. + case MVT::v2i32: Opc = ARM::VTRNd32; break; case MVT::v16i8: Opc = ARM::VUZPq8; break; case MVT::v8i16: Opc = ARM::VUZPq16; break; case MVT::v4f32: diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp index e26dd22..a103c94 100644 --- a/lib/Target/ARM/ARMISelLowering.cpp +++ b/lib/Target/ARM/ARMISelLowering.cpp @@ -19,7 +19,6 @@ #include "ARMConstantPoolValue.h" #include "ARMMachineFunctionInfo.h" #include "ARMPerfectShuffle.h" -#include "ARMRegisterInfo.h" #include "ARMSubtarget.h" #include "ARMTargetMachine.h" #include "ARMTargetObjectFile.h" @@ -508,7 +507,7 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) setOperationAction(ISD::FRINT, MVT::v2f64, Expand); setOperationAction(ISD::FNEARBYINT, MVT::v2f64, Expand); setOperationAction(ISD::FFLOOR, MVT::v2f64, Expand); - + setOperationAction(ISD::FSQRT, MVT::v4f32, Expand); setOperationAction(ISD::FSIN, MVT::v4f32, Expand); setOperationAction(ISD::FCOS, MVT::v4f32, Expand); @@ -770,8 +769,10 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) setOperationAction(ISD::FPOW, MVT::f64, Expand); setOperationAction(ISD::FPOW, MVT::f32, Expand); - setOperationAction(ISD::FMA, MVT::f64, Expand); - setOperationAction(ISD::FMA, MVT::f32, Expand); + if (!Subtarget->hasVFP4()) { + setOperationAction(ISD::FMA, MVT::f64, Expand); + setOperationAction(ISD::FMA, MVT::f32, Expand); + } // Various VFP goodness if (!TM.Options.UseSoftFloat && !Subtarget->isThumb1Only()) { @@ -1642,7 +1643,7 @@ ARMTargetLowering::LowerCall(SDValue Chain, SDValue Callee, /// and then confiscate the rest of the parameter registers to insure /// this. void -llvm::ARMTargetLowering::HandleByVal(CCState *State, unsigned &size) const { +ARMTargetLowering::HandleByVal(CCState *State, unsigned &size) const { unsigned reg = State->AllocateReg(GPRArgRegs, 4); assert((State->getCallOrPrologue() == Prologue || State->getCallOrPrologue() == Call) && @@ -1672,7 +1673,7 @@ llvm::ARMTargetLowering::HandleByVal(CCState *State, unsigned &size) const { static bool MatchingStackOffset(SDValue Arg, unsigned Offset, ISD::ArgFlagsTy Flags, MachineFrameInfo *MFI, const MachineRegisterInfo *MRI, - const ARMInstrInfo *TII) { + const TargetInstrInfo *TII) { unsigned Bytes = Arg.getValueType().getSizeInBits() / 8; int FI = INT_MAX; if (Arg.getOpcode() == ISD::CopyFromReg) { @@ -1807,8 +1808,7 @@ ARMTargetLowering::IsEligibleForTailCallOptimization(SDValue Callee, // the caller's fixed stack objects. MachineFrameInfo *MFI = MF.getFrameInfo(); const MachineRegisterInfo *MRI = &MF.getRegInfo(); - const ARMInstrInfo *TII = - ((ARMTargetMachine&)getTargetMachine()).getInstrInfo(); + const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); for (unsigned i = 0, realArgIdx = 0, e = ArgLocs.size(); i != e; ++i, ++realArgIdx) { @@ -1936,63 +1936,72 @@ ARMTargetLowering::LowerReturn(SDValue Chain, return result; } -bool ARMTargetLowering::isUsedByReturnOnly(SDNode *N) const { +bool ARMTargetLowering::isUsedByReturnOnly(SDNode *N, SDValue &Chain) const { if (N->getNumValues() != 1) return false; if (!N->hasNUsesOfValue(1, 0)) return false; - unsigned NumCopies = 0; - SDNode* Copies[2] = { 0, 0 }; - SDNode *Use = *N->use_begin(); - if (Use->getOpcode() == ISD::CopyToReg) { - Copies[NumCopies++] = Use; - } else if (Use->getOpcode() == ARMISD::VMOVRRD) { + SDValue TCChain = Chain; + SDNode *Copy = *N->use_begin(); + if (Copy->getOpcode() == ISD::CopyToReg) { + // If the copy has a glue operand, we conservatively assume it isn't safe to + // perform a tail call. + if (Copy->getOperand(Copy->getNumOperands()-1).getValueType() == MVT::Glue) + return false; + TCChain = Copy->getOperand(0); + } else if (Copy->getOpcode() == ARMISD::VMOVRRD) { + SDNode *VMov = Copy; // f64 returned in a pair of GPRs. - for (SDNode::use_iterator UI = Use->use_begin(), UE = Use->use_end(); + SmallPtrSet<SDNode*, 2> Copies; + for (SDNode::use_iterator UI = VMov->use_begin(), UE = VMov->use_end(); UI != UE; ++UI) { if (UI->getOpcode() != ISD::CopyToReg) return false; - Copies[UI.getUse().getResNo()] = *UI; - ++NumCopies; + Copies.insert(*UI); } - } else if (Use->getOpcode() == ISD::BITCAST) { + if (Copies.size() > 2) + return false; + + for (SDNode::use_iterator UI = VMov->use_begin(), UE = VMov->use_end(); + UI != UE; ++UI) { + SDValue UseChain = UI->getOperand(0); + if (Copies.count(UseChain.getNode())) + // Second CopyToReg + Copy = *UI; + else + // First CopyToReg + TCChain = UseChain; + } + } else if (Copy->getOpcode() == ISD::BITCAST) { // f32 returned in a single GPR. - if (!Use->hasNUsesOfValue(1, 0)) + if (!Copy->hasOneUse()) return false; - Use = *Use->use_begin(); - if (Use->getOpcode() != ISD::CopyToReg || !Use->hasNUsesOfValue(1, 0)) + Copy = *Copy->use_begin(); + if (Copy->getOpcode() != ISD::CopyToReg || !Copy->hasNUsesOfValue(1, 0)) return false; - Copies[NumCopies++] = Use; + Chain = Copy->getOperand(0); } else { return false; } - if (NumCopies != 1 && NumCopies != 2) - return false; - bool HasRet = false; - for (unsigned i = 0; i < NumCopies; ++i) { - SDNode *Copy = Copies[i]; - for (SDNode::use_iterator UI = Copy->use_begin(), UE = Copy->use_end(); - UI != UE; ++UI) { - if (UI->getOpcode() == ISD::CopyToReg) { - SDNode *Use = *UI; - if (Use == Copies[0] || ((NumCopies == 2) && (Use == Copies[1]))) - continue; - return false; - } - if (UI->getOpcode() != ARMISD::RET_FLAG) - return false; - HasRet = true; - } + for (SDNode::use_iterator UI = Copy->use_begin(), UE = Copy->use_end(); + UI != UE; ++UI) { + if (UI->getOpcode() != ARMISD::RET_FLAG) + return false; + HasRet = true; } - return HasRet; + if (!HasRet) + return false; + + Chain = TCChain; + return true; } bool ARMTargetLowering::mayBeEmittedAsTailCall(CallInst *CI) const { - if (!EnableARMTailCalls) + if (!EnableARMTailCalls && !Subtarget->supportsTailCall()) return false; if (!CI->isTailCall()) @@ -3674,27 +3683,6 @@ static SDValue LowerVSETCC(SDValue Op, SelectionDAG &DAG) { return Result; } -SDValue ARMTargetLowering::LowerConstantFP(SDValue Op, SelectionDAG &DAG, - const ARMSubtarget *ST) const { - if (!ST->useNEONForSinglePrecisionFP() || !ST->hasVFP3() || ST->hasD16()) - return SDValue(); - - ConstantFPSDNode *CFP = cast<ConstantFPSDNode>(Op); - assert(Op.getValueType() == MVT::f32 && - "ConstantFP custom lowering should only occur for f32."); - - APFloat FPVal = CFP->getValueAPF(); - int ImmVal = ARM_AM::getFP32Imm(FPVal); - if (ImmVal == -1) - return SDValue(); - - DebugLoc DL = Op.getDebugLoc(); - SDValue NewVal = DAG.getTargetConstant(ImmVal, MVT::i32); - SDValue VecConstant = DAG.getNode(ARMISD::VMOVFPIMM, DL, MVT::v2f32, NewVal); - return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, VecConstant, - DAG.getConstant(0, MVT::i32)); -} - /// isNEONModifiedImm - Check if the specified splat value corresponds to a /// valid vector constant for a NEON instruction with a "modified immediate" /// operand (e.g., VMOV). If so, return the encoded value. @@ -3831,6 +3819,58 @@ static SDValue isNEONModifiedImm(uint64_t SplatBits, uint64_t SplatUndef, return DAG.getTargetConstant(EncodedVal, MVT::i32); } +SDValue ARMTargetLowering::LowerConstantFP(SDValue Op, SelectionDAG &DAG, + const ARMSubtarget *ST) const { + if (!ST->useNEONForSinglePrecisionFP() || !ST->hasVFP3() || ST->hasD16()) + return SDValue(); + + ConstantFPSDNode *CFP = cast<ConstantFPSDNode>(Op); + assert(Op.getValueType() == MVT::f32 && + "ConstantFP custom lowering should only occur for f32."); + + // Try splatting with a VMOV.f32... + APFloat FPVal = CFP->getValueAPF(); + int ImmVal = ARM_AM::getFP32Imm(FPVal); + if (ImmVal != -1) { + DebugLoc DL = Op.getDebugLoc(); + SDValue NewVal = DAG.getTargetConstant(ImmVal, MVT::i32); + SDValue VecConstant = DAG.getNode(ARMISD::VMOVFPIMM, DL, MVT::v2f32, + NewVal); + return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, VecConstant, + DAG.getConstant(0, MVT::i32)); + } + + // If that fails, try a VMOV.i32 + EVT VMovVT; + unsigned iVal = FPVal.bitcastToAPInt().getZExtValue(); + SDValue NewVal = isNEONModifiedImm(iVal, 0, 32, DAG, VMovVT, false, + VMOVModImm); + if (NewVal != SDValue()) { + DebugLoc DL = Op.getDebugLoc(); + SDValue VecConstant = DAG.getNode(ARMISD::VMOVIMM, DL, VMovVT, + NewVal); + SDValue VecFConstant = DAG.getNode(ISD::BITCAST, DL, MVT::v2f32, + VecConstant); + return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, VecFConstant, + DAG.getConstant(0, MVT::i32)); + } + + // Finally, try a VMVN.i32 + NewVal = isNEONModifiedImm(~iVal & 0xffffffff, 0, 32, DAG, VMovVT, false, + VMVNModImm); + if (NewVal != SDValue()) { + DebugLoc DL = Op.getDebugLoc(); + SDValue VecConstant = DAG.getNode(ARMISD::VMVNIMM, DL, VMovVT, NewVal); + SDValue VecFConstant = DAG.getNode(ISD::BITCAST, DL, MVT::v2f32, + VecConstant); + return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, VecFConstant, + DAG.getConstant(0, MVT::i32)); + } + + return SDValue(); +} + + static bool isVEXTMask(ArrayRef<int> M, EVT VT, bool &ReverseVEXT, unsigned &Imm) { unsigned NumElts = VT.getVectorNumElements(); @@ -5795,7 +5835,8 @@ EmitSjLjDispatchBlock(MachineInstr *MI, MachineBasicBlock *MBB) const { DenseMap<unsigned, SmallVector<MachineBasicBlock*, 2> > CallSiteNumToLPad; unsigned MaxCSNum = 0; MachineModuleInfo &MMI = MF->getMMI(); - for (MachineFunction::iterator BB = MF->begin(), E = MF->end(); BB != E; ++BB) { + for (MachineFunction::iterator BB = MF->begin(), E = MF->end(); BB != E; + ++BB) { if (!BB->isLandingPad()) continue; // FIXME: We should assert that the EH_LABEL is the first MI in the landing @@ -5871,7 +5912,7 @@ EmitSjLjDispatchBlock(MachineInstr *MI, MachineBasicBlock *MBB) const { BuildMI(DispatchBB, dl, TII->get(ARM::tInt_eh_sjlj_dispatchsetup)); else if (!Subtarget->hasVFP2()) BuildMI(DispatchBB, dl, TII->get(ARM::Int_eh_sjlj_dispatchsetup_nofp)); - else + else BuildMI(DispatchBB, dl, TII->get(ARM::Int_eh_sjlj_dispatchsetup)); unsigned NumLPads = LPadList.size(); @@ -7308,15 +7349,99 @@ static SDValue PerformVMOVDRRCombine(SDNode *N, SelectionDAG &DAG) { /// ISD::STORE. static SDValue PerformSTORECombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) { - // Bitcast an i64 store extracted from a vector to f64. - // Otherwise, the i64 value will be legalized to a pair of i32 values. StoreSDNode *St = cast<StoreSDNode>(N); + if (St->isVolatile()) + return SDValue(); + + // Optimize trunc store (of multiple scalars) to shuffle and store. First, + // pack all of the elements in one place. Next, store to memory in fewer + // chunks. SDValue StVal = St->getValue(); - if (!ISD::isNormalStore(St) || St->isVolatile()) + EVT VT = StVal.getValueType(); + if (St->isTruncatingStore() && VT.isVector()) { + SelectionDAG &DAG = DCI.DAG; + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + EVT StVT = St->getMemoryVT(); + unsigned NumElems = VT.getVectorNumElements(); + assert(StVT != VT && "Cannot truncate to the same type"); + unsigned FromEltSz = VT.getVectorElementType().getSizeInBits(); + unsigned ToEltSz = StVT.getVectorElementType().getSizeInBits(); + + // From, To sizes and ElemCount must be pow of two + if (!isPowerOf2_32(NumElems * FromEltSz * ToEltSz)) return SDValue(); + + // We are going to use the original vector elt for storing. + // Accumulated smaller vector elements must be a multiple of the store size. + if (0 != (NumElems * FromEltSz) % ToEltSz) return SDValue(); + + unsigned SizeRatio = FromEltSz / ToEltSz; + assert(SizeRatio * NumElems * ToEltSz == VT.getSizeInBits()); + + // Create a type on which we perform the shuffle. + EVT WideVecVT = EVT::getVectorVT(*DAG.getContext(), StVT.getScalarType(), + NumElems*SizeRatio); + assert(WideVecVT.getSizeInBits() == VT.getSizeInBits()); + + DebugLoc DL = St->getDebugLoc(); + SDValue WideVec = DAG.getNode(ISD::BITCAST, DL, WideVecVT, StVal); + SmallVector<int, 8> ShuffleVec(NumElems * SizeRatio, -1); + for (unsigned i = 0; i < NumElems; ++i) ShuffleVec[i] = i * SizeRatio; + + // Can't shuffle using an illegal type. + if (!TLI.isTypeLegal(WideVecVT)) return SDValue(); + + SDValue Shuff = DAG.getVectorShuffle(WideVecVT, DL, WideVec, + DAG.getUNDEF(WideVec.getValueType()), + ShuffleVec.data()); + // At this point all of the data is stored at the bottom of the + // register. We now need to save it to mem. + + // Find the largest store unit + MVT StoreType = MVT::i8; + for (unsigned tp = MVT::FIRST_INTEGER_VALUETYPE; + tp < MVT::LAST_INTEGER_VALUETYPE; ++tp) { + MVT Tp = (MVT::SimpleValueType)tp; + if (TLI.isTypeLegal(Tp) && Tp.getSizeInBits() <= NumElems * ToEltSz) + StoreType = Tp; + } + // Didn't find a legal store type. + if (!TLI.isTypeLegal(StoreType)) + return SDValue(); + + // Bitcast the original vector into a vector of store-size units + EVT StoreVecVT = EVT::getVectorVT(*DAG.getContext(), + StoreType, VT.getSizeInBits()/EVT(StoreType).getSizeInBits()); + assert(StoreVecVT.getSizeInBits() == VT.getSizeInBits()); + SDValue ShuffWide = DAG.getNode(ISD::BITCAST, DL, StoreVecVT, Shuff); + SmallVector<SDValue, 8> Chains; + SDValue Increment = DAG.getConstant(StoreType.getSizeInBits()/8, + TLI.getPointerTy()); + SDValue BasePtr = St->getBasePtr(); + + // Perform one or more big stores into memory. + unsigned E = (ToEltSz*NumElems)/StoreType.getSizeInBits(); + for (unsigned I = 0; I < E; I++) { + SDValue SubVec = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, + StoreType, ShuffWide, + DAG.getIntPtrConstant(I)); + SDValue Ch = DAG.getStore(St->getChain(), DL, SubVec, BasePtr, + St->getPointerInfo(), St->isVolatile(), + St->isNonTemporal(), St->getAlignment()); + BasePtr = DAG.getNode(ISD::ADD, DL, BasePtr.getValueType(), BasePtr, + Increment); + Chains.push_back(Ch); + } + return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, &Chains[0], + Chains.size()); + } + + if (!ISD::isNormalStore(St)) return SDValue(); + // Split a store of a VMOVDRR into two integer stores to avoid mixing NEON and + // ARM stores of arguments in the same cache line. if (StVal.getNode()->getOpcode() == ARMISD::VMOVDRR && - StVal.getNode()->hasOneUse() && !St->isVolatile()) { + StVal.getNode()->hasOneUse()) { SelectionDAG &DAG = DCI.DAG; DebugLoc DL = St->getDebugLoc(); SDValue BasePtr = St->getBasePtr(); @@ -7337,6 +7462,8 @@ static SDValue PerformSTORECombine(SDNode *N, StVal.getNode()->getOpcode() != ISD::EXTRACT_VECTOR_ELT) return SDValue(); + // Bitcast an i64 store extracted from a vector to f64. + // Otherwise, the i64 value will be legalized to a pair of i32 values. SelectionDAG &DAG = DCI.DAG; DebugLoc dl = StVal.getDebugLoc(); SDValue IntVec = StVal.getOperand(0); @@ -8259,8 +8386,7 @@ ARMTargetLowering::PerformCMOVCombine(SDNode *N, SelectionDAG &DAG) const { if (Res.getNode()) { APInt KnownZero, KnownOne; - APInt Mask = APInt::getAllOnesValue(VT.getScalarType().getSizeInBits()); - DAG.ComputeMaskedBits(SDValue(N,0), Mask, KnownZero, KnownOne); + DAG.ComputeMaskedBits(SDValue(N,0), KnownZero, KnownOne); // Capture demanded bits information that would be otherwise lost. if (KnownZero == 0xfffffffe) Res = DAG.getNode(ISD::AssertZext, dl, MVT::i32, Res, @@ -8586,10 +8712,12 @@ bool ARMTargetLowering::isLegalAddressingMode(const AddrMode &AM, /// a register against the immediate without having to materialize the /// immediate into a register. bool ARMTargetLowering::isLegalICmpImmediate(int64_t Imm) const { + // Thumb2 and ARM modes can use cmn for negative immediates. if (!Subtarget->isThumb()) - return ARM_AM::getSOImmVal(Imm) != -1; + return ARM_AM::getSOImmVal(llvm::abs64(Imm)) != -1; if (Subtarget->isThumb2()) - return ARM_AM::getT2SOImmVal(Imm) != -1; + return ARM_AM::getT2SOImmVal(llvm::abs64(Imm)) != -1; + // Thumb1 doesn't have cmn, and only 8-bit immediates. return Imm >= 0 && Imm <= 255; } @@ -8776,22 +8904,20 @@ bool ARMTargetLowering::getPostIndexedAddressParts(SDNode *N, SDNode *Op, } void ARMTargetLowering::computeMaskedBitsForTargetNode(const SDValue Op, - const APInt &Mask, APInt &KnownZero, APInt &KnownOne, const SelectionDAG &DAG, unsigned Depth) const { - KnownZero = KnownOne = APInt(Mask.getBitWidth(), 0); + KnownZero = KnownOne = APInt(KnownOne.getBitWidth(), 0); switch (Op.getOpcode()) { default: break; case ARMISD::CMOV: { // Bits are known zero/one if known on the LHS and RHS. - DAG.ComputeMaskedBits(Op.getOperand(0), Mask, KnownZero, KnownOne, Depth+1); + DAG.ComputeMaskedBits(Op.getOperand(0), KnownZero, KnownOne, Depth+1); if (KnownZero == 0 && KnownOne == 0) return; APInt KnownZeroRHS, KnownOneRHS; - DAG.ComputeMaskedBits(Op.getOperand(1), Mask, - KnownZeroRHS, KnownOneRHS, Depth+1); + DAG.ComputeMaskedBits(Op.getOperand(1), KnownZeroRHS, KnownOneRHS, Depth+1); KnownZero &= KnownZeroRHS; KnownOne &= KnownOneRHS; return; diff --git a/lib/Target/ARM/ARMISelLowering.h b/lib/Target/ARM/ARMISelLowering.h index a71b74e..352d980 100644 --- a/lib/Target/ARM/ARMISelLowering.h +++ b/lib/Target/ARM/ARMISelLowering.h @@ -315,7 +315,6 @@ namespace llvm { SelectionDAG &DAG) const; virtual void computeMaskedBitsForTargetNode(const SDValue Op, - const APInt &Mask, APInt &KnownZero, APInt &KnownOne, const SelectionDAG &DAG, @@ -494,7 +493,7 @@ namespace llvm { const SmallVectorImpl<SDValue> &OutVals, DebugLoc dl, SelectionDAG &DAG) const; - virtual bool isUsedByReturnOnly(SDNode *N) const; + virtual bool isUsedByReturnOnly(SDNode *N, SDValue &Chain) const; virtual bool mayBeEmittedAsTailCall(CallInst *CI) const; diff --git a/lib/Target/ARM/ARMInstrFormats.td b/lib/Target/ARM/ARMInstrFormats.td index 1d38bcf..f04926a 100644 --- a/lib/Target/ARM/ARMInstrFormats.td +++ b/lib/Target/ARM/ARMInstrFormats.td @@ -532,6 +532,7 @@ class AIswp<bit b, dag oops, dag iops, string opc, list<dag> pattern> let Inst{11-4} = 0b00001001; let Inst{3-0} = Rt2; + let Unpredictable{11-8} = 0b1111; let DecoderMethod = "DecodeSwap"; } diff --git a/lib/Target/ARM/ARMInstrInfo.h b/lib/Target/ARM/ARMInstrInfo.h index 72af535..5d3e059 100644 --- a/lib/Target/ARM/ARMInstrInfo.h +++ b/lib/Target/ARM/ARMInstrInfo.h @@ -18,7 +18,6 @@ #include "ARMBaseInstrInfo.h" #include "ARMRegisterInfo.h" #include "ARMSubtarget.h" -#include "llvm/Target/TargetInstrInfo.h" namespace llvm { class ARMSubtarget; diff --git a/lib/Target/ARM/ARMInstrInfo.td b/lib/Target/ARM/ARMInstrInfo.td index 8196582..1eb561d 100644 --- a/lib/Target/ARM/ARMInstrInfo.td +++ b/lib/Target/ARM/ARMInstrInfo.td @@ -181,12 +181,8 @@ def HasVFP3 : Predicate<"Subtarget->hasVFP3()">, AssemblerPredicate<"FeatureVFP3">; def HasVFP4 : Predicate<"Subtarget->hasVFP4()">, AssemblerPredicate<"FeatureVFP4">; -def NoVFP4 : Predicate<"!Subtarget->hasVFP4()">; def HasNEON : Predicate<"Subtarget->hasNEON()">, AssemblerPredicate<"FeatureNEON">; -def HasNEON2 : Predicate<"Subtarget->hasNEON2()">, - AssemblerPredicate<"FeatureNEON2">; -def NoNEON2 : Predicate<"!Subtarget->hasNEON2()">; def HasFP16 : Predicate<"Subtarget->hasFP16()">, AssemblerPredicate<"FeatureFP16">; def HasDivide : Predicate<"Subtarget->hasDivide()">, @@ -221,6 +217,14 @@ def UseMovt : Predicate<"Subtarget->useMovt()">; def DontUseMovt : Predicate<"!Subtarget->useMovt()">; def UseFPVMLx : Predicate<"Subtarget->useFPVMLx()">; +// Prefer fused MAC for fp mul + add over fp VMLA / VMLS if they are available. +// But only select them if more precision in FP computation is allowed. +// Do not use them for Darwin platforms. +def UseFusedMAC : Predicate<"!TM.Options.NoExcessFPPrecision && " + "!Subtarget->isTargetDarwin()">; +def DontUseFusedMAC : Predicate<"!Subtarget->hasVFP4() || " + "Subtarget->isTargetDarwin()">; + //===----------------------------------------------------------------------===// // ARM Flag Definitions. @@ -251,7 +255,8 @@ def imm16_31 : ImmLeaf<i32, [{ def so_imm_neg_asmoperand : AsmOperandClass { let Name = "ARMSOImmNeg"; } def so_imm_neg : Operand<i32>, PatLeaf<(imm), [{ - return ARM_AM::getSOImmVal(-(uint32_t)N->getZExtValue()) != -1; + int64_t Value = -(int)N->getZExtValue(); + return Value && ARM_AM::getSOImmVal(Value) != -1; }], so_imm_neg_XFORM> { let ParserMatchClass = so_imm_neg_asmoperand; } @@ -736,7 +741,7 @@ def postidx_reg : Operand<i32> { let DecoderMethod = "DecodePostIdxReg"; let PrintMethod = "printPostIdxRegOperand"; let ParserMatchClass = PostIdxRegAsmOperand; - let MIOperandInfo = (ops GPR, i32imm); + let MIOperandInfo = (ops GPRnopc, i32imm); } @@ -903,6 +908,11 @@ def p_imm : Operand<i32> { let DecoderMethod = "DecodeCoprocessor"; } +def pf_imm : Operand<i32> { + let PrintMethod = "printPImmediate"; + let ParserMatchClass = CoprocNumAsmOperand; +} + def CoprocRegAsmOperand : AsmOperandClass { let Name = "CoprocReg"; let ParserMethod = "parseCoprocRegOperand"; @@ -1182,6 +1192,8 @@ multiclass AI1_cmp_irs<bits<4> opcod, string opc, let Inst{19-16} = Rn; let Inst{15-12} = 0b0000; let Inst{11-0} = imm; + + let Unpredictable{15-12} = 0b1111; } def rr : AI1<opcod, (outs), (ins GPR:$Rn, GPR:$Rm), DPFrm, iir, opc, "\t$Rn, $Rm", @@ -1195,6 +1207,8 @@ multiclass AI1_cmp_irs<bits<4> opcod, string opc, let Inst{15-12} = 0b0000; let Inst{11-4} = 0b00000000; let Inst{3-0} = Rm; + + let Unpredictable{15-12} = 0b1111; } def rsi : AI1<opcod, (outs), (ins GPR:$Rn, so_reg_imm:$shift), DPSoRegImmFrm, iis, @@ -1209,11 +1223,13 @@ multiclass AI1_cmp_irs<bits<4> opcod, string opc, let Inst{11-5} = shift{11-5}; let Inst{4} = 0; let Inst{3-0} = shift{3-0}; + + let Unpredictable{15-12} = 0b1111; } def rsr : AI1<opcod, (outs), - (ins GPR:$Rn, so_reg_reg:$shift), DPSoRegRegFrm, iis, + (ins GPRnopc:$Rn, so_reg_reg:$shift), DPSoRegRegFrm, iis, opc, "\t$Rn, $shift", - [(opnode GPR:$Rn, so_reg_reg:$shift)]> { + [(opnode GPRnopc:$Rn, so_reg_reg:$shift)]> { bits<4> Rn; bits<12> shift; let Inst{25} = 0; @@ -1225,6 +1241,8 @@ multiclass AI1_cmp_irs<bits<4> opcod, string opc, let Inst{6-5} = shift{6-5}; let Inst{4} = 1; let Inst{3-0} = shift{3-0}; + + let Unpredictable{15-12} = 0b1111; } } @@ -1330,10 +1348,10 @@ multiclass AI1_adde_sube_irs<bits<4> opcod, string opc, PatFrag opnode, let Inst{4} = 0; let Inst{3-0} = shift{3-0}; } - def rsr : AsI1<opcod, (outs GPR:$Rd), - (ins GPR:$Rn, so_reg_reg:$shift), + def rsr : AsI1<opcod, (outs GPRnopc:$Rd), + (ins GPRnopc:$Rn, so_reg_reg:$shift), DPSoRegRegFrm, IIC_iALUsr, opc, "\t$Rd, $Rn, $shift", - [(set GPR:$Rd, CPSR, (opnode GPR:$Rn, so_reg_reg:$shift, CPSR))]>, + [(set GPRnopc:$Rd, CPSR, (opnode GPRnopc:$Rn, so_reg_reg:$shift, CPSR))]>, Requires<[IsARM]> { bits<4> Rd; bits<4> Rn; @@ -1367,7 +1385,7 @@ multiclass AI1_adde_sube_irs<bits<4> opcod, string opc, PatFrag opnode, cc_out:$s)>, Requires<[IsARM]>; def : InstAlias<!strconcat(opc, "${s}${p} $Rdn, $shift"), - (!cast<Instruction>(!strconcat(baseOpc, "rsr")) GPR:$Rdn, GPR:$Rdn, + (!cast<Instruction>(!strconcat(baseOpc, "rsr")) GPRnopc:$Rdn, GPRnopc:$Rdn, so_reg_reg:$shift, pred:$p, cc_out:$s)>, Requires<[IsARM]>; @@ -1907,7 +1925,7 @@ let isCall = 1, def BL : ABXI<0b1011, (outs), (ins bl_target:$func, variable_ops), IIC_Br, "bl\t$func", [(ARMcall tglobaladdr:$func)]>, - Requires<[IsARM, IsNotIOS]> { + Requires<[IsARM]> { let Inst{31-28} = 0b1110; bits<24> func; let Inst{23-0} = func; @@ -1917,7 +1935,7 @@ let isCall = 1, def BL_pred : ABI<0b1011, (outs), (ins bl_target:$func, variable_ops), IIC_Br, "bl", "\t$func", [(ARMcall_pred tglobaladdr:$func)]>, - Requires<[IsARM, IsNotIOS]> { + Requires<[IsARM]> { bits<24> func; let Inst{23-0} = func; let DecoderMethod = "DecodeBranchImmInstruction"; @@ -1927,7 +1945,7 @@ let isCall = 1, def BLX : AXI<(outs), (ins GPR:$func, variable_ops), BrMiscFrm, IIC_Br, "blx\t$func", [(ARMcall GPR:$func)]>, - Requires<[IsARM, HasV5T, IsNotIOS]> { + Requires<[IsARM, HasV5T]> { bits<4> func; let Inst{31-4} = 0b1110000100101111111111110011; let Inst{3-0} = func; @@ -1936,7 +1954,7 @@ let isCall = 1, def BLX_pred : AI<(outs), (ins GPR:$func, variable_ops), BrMiscFrm, IIC_Br, "blx", "\t$func", [(ARMcall_pred GPR:$func)]>, - Requires<[IsARM, HasV5T, IsNotIOS]> { + Requires<[IsARM, HasV5T]> { bits<4> func; let Inst{27-4} = 0b000100101111111111110011; let Inst{3-0} = func; @@ -1946,67 +1964,19 @@ let isCall = 1, // Note: Restrict $func to the tGPR regclass to prevent it being in LR. def BX_CALL : ARMPseudoInst<(outs), (ins tGPR:$func, variable_ops), 8, IIC_Br, [(ARMcall_nolink tGPR:$func)]>, - Requires<[IsARM, HasV4T, IsNotIOS]>; + Requires<[IsARM, HasV4T]>; // ARMv4 def BMOVPCRX_CALL : ARMPseudoInst<(outs), (ins tGPR:$func, variable_ops), 8, IIC_Br, [(ARMcall_nolink tGPR:$func)]>, - Requires<[IsARM, NoV4T, IsNotIOS]>; + Requires<[IsARM, NoV4T]>; // mov lr, pc; b if callee is marked noreturn to avoid confusing the // return stack predictor. def BMOVPCB_CALL : ARMPseudoInst<(outs), (ins bl_target:$func, variable_ops), 8, IIC_Br, [(ARMcall_nolink tglobaladdr:$func)]>, - Requires<[IsARM, IsNotIOS]>; -} - -let isCall = 1, - // On IOS R9 is call-clobbered. - // R7 is marked as a use to prevent frame-pointer assignments from being - // moved above / below calls. - Defs = [LR], Uses = [R7, SP] in { - def BLr9 : ARMPseudoExpand<(outs), (ins bl_target:$func, variable_ops), - 4, IIC_Br, - [(ARMcall tglobaladdr:$func)], (BL bl_target:$func)>, - Requires<[IsARM, IsIOS]>; - - def BLr9_pred : ARMPseudoExpand<(outs), - (ins bl_target:$func, pred:$p, variable_ops), - 4, IIC_Br, - [(ARMcall_pred tglobaladdr:$func)], - (BL_pred bl_target:$func, pred:$p)>, - Requires<[IsARM, IsIOS]>; - - // ARMv5T and above - def BLXr9 : ARMPseudoExpand<(outs), (ins GPR:$func, variable_ops), - 4, IIC_Br, - [(ARMcall GPR:$func)], - (BLX GPR:$func)>, - Requires<[IsARM, HasV5T, IsIOS]>; - - def BLXr9_pred: ARMPseudoExpand<(outs), (ins GPR:$func, pred:$p,variable_ops), - 4, IIC_Br, - [(ARMcall_pred GPR:$func)], - (BLX_pred GPR:$func, pred:$p)>, - Requires<[IsARM, HasV5T, IsIOS]>; - - // ARMv4T - // Note: Restrict $func to the tGPR regclass to prevent it being in LR. - def BXr9_CALL : ARMPseudoInst<(outs), (ins tGPR:$func, variable_ops), - 8, IIC_Br, [(ARMcall_nolink tGPR:$func)]>, - Requires<[IsARM, HasV4T, IsIOS]>; - - // ARMv4 - def BMOVPCRXr9_CALL : ARMPseudoInst<(outs), (ins tGPR:$func, variable_ops), - 8, IIC_Br, [(ARMcall_nolink tGPR:$func)]>, - Requires<[IsARM, NoV4T, IsIOS]>; - - // mov lr, pc; b if callee is marked noreturn to avoid confusing the - // return stack predictor. - def BMOVPCBr9_CALL : ARMPseudoInst<(outs),(ins bl_target:$func, variable_ops), - 8, IIC_Br, [(ARMcall_nolink tglobaladdr:$func)]>, - Requires<[IsARM, IsIOS]>; + Requires<[IsARM]>; } let isBranch = 1, isTerminator = 1 in { @@ -2073,45 +2043,22 @@ def BXJ : ABI<0b0001, (outs), (ins GPR:$func), NoItinerary, "bxj", "\t$func", // Tail calls. -let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in { - // IOS versions. - let Uses = [SP] in { - def TCRETURNdi : PseudoInst<(outs), (ins i32imm:$dst, variable_ops), - IIC_Br, []>, Requires<[IsIOS]>; - - def TCRETURNri : PseudoInst<(outs), (ins tcGPR:$dst, variable_ops), - IIC_Br, []>, Requires<[IsIOS]>; - - def TAILJMPd : ARMPseudoExpand<(outs), (ins br_target:$dst, variable_ops), - 4, IIC_Br, [], - (Bcc br_target:$dst, (ops 14, zero_reg))>, - Requires<[IsARM, IsIOS]>; +let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, Uses = [SP] in { + def TCRETURNdi : PseudoInst<(outs), (ins i32imm:$dst, variable_ops), + IIC_Br, []>; - def TAILJMPr : ARMPseudoExpand<(outs), (ins tcGPR:$dst, variable_ops), - 4, IIC_Br, [], - (BX GPR:$dst)>, - Requires<[IsARM, IsIOS]>; + def TCRETURNri : PseudoInst<(outs), (ins tcGPR:$dst, variable_ops), + IIC_Br, []>; - } - - // Non-IOS versions (the difference is R9). - let Uses = [SP] in { - def TCRETURNdiND : PseudoInst<(outs), (ins i32imm:$dst, variable_ops), - IIC_Br, []>, Requires<[IsNotIOS]>; - - def TCRETURNriND : PseudoInst<(outs), (ins tcGPR:$dst, variable_ops), - IIC_Br, []>, Requires<[IsNotIOS]>; + def TAILJMPd : ARMPseudoExpand<(outs), (ins br_target:$dst, variable_ops), + 4, IIC_Br, [], + (Bcc br_target:$dst, (ops 14, zero_reg))>, + Requires<[IsARM]>; - def TAILJMPdND : ARMPseudoExpand<(outs), (ins brtarget:$dst, variable_ops), - 4, IIC_Br, [], - (Bcc br_target:$dst, (ops 14, zero_reg))>, - Requires<[IsARM, IsNotIOS]>; - - def TAILJMPrND : ARMPseudoExpand<(outs), (ins tcGPR:$dst, variable_ops), - 4, IIC_Br, [], - (BX GPR:$dst)>, - Requires<[IsARM, IsNotIOS]>; - } + def TAILJMPr : ARMPseudoExpand<(outs), (ins tcGPR:$dst, variable_ops), + 4, IIC_Br, [], + (BX GPR:$dst)>, + Requires<[IsARM]>; } // Secure Monitor Call is a system instruction. @@ -2484,7 +2431,7 @@ multiclass AI3ldrT<bits<4> op, string opc> { let Inst{3-0} = offset{3-0}; let AsmMatchConverter = "cvtLdExtTWriteBackImm"; } - def r : AI3ldstidxT<op, 1, (outs GPR:$Rt, GPR:$base_wb), + def r : AI3ldstidxT<op, 1, (outs GPRnopc:$Rt, GPRnopc:$base_wb), (ins addr_offset_none:$addr, postidx_reg:$Rm), IndexModePost, LdMiscFrm, IIC_iLoad_bh_ru, opc, "\t$Rt, $addr, $Rm", "$addr.base = $base_wb", []> { @@ -2492,8 +2439,10 @@ multiclass AI3ldrT<bits<4> op, string opc> { let Inst{23} = Rm{4}; let Inst{22} = 0; let Inst{11-8} = 0; + let Unpredictable{11-8} = 0b1111; let Inst{3-0} = Rm{3-0}; let AsmMatchConverter = "cvtLdExtTWriteBackReg"; + let DecoderMethod = "DecodeLDR"; } } @@ -3241,6 +3190,8 @@ class AAI<bits<8> op27_20, bits<8> op11_4, string opc, let Inst{19-16} = Rn; let Inst{15-12} = Rd; let Inst{3-0} = Rm; + + let Unpredictable{11-8} = 0b1111; } // Saturating add/subtract @@ -3533,19 +3484,20 @@ class AsMul1I64<bits<7> opcod, dag oops, dag iops, InstrItinClass itin, // property. Remove them when it's possible to add those properties // on an individual MachineInstr, not just an instuction description. let isCommutable = 1 in { -def MUL : AsMul1I32<0b0000000, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm), +def MUL : AsMul1I32<0b0000000, (outs GPRnopc:$Rd), (ins GPRnopc:$Rn, GPRnopc:$Rm), IIC_iMUL32, "mul", "\t$Rd, $Rn, $Rm", - [(set GPR:$Rd, (mul GPR:$Rn, GPR:$Rm))]>, + [(set GPRnopc:$Rd, (mul GPRnopc:$Rn, GPRnopc:$Rm))]>, Requires<[IsARM, HasV6]> { let Inst{15-12} = 0b0000; + let Unpredictable{15-12} = 0b1111; } let Constraints = "@earlyclobber $Rd" in -def MULv5: ARMPseudoExpand<(outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm, +def MULv5: ARMPseudoExpand<(outs GPRnopc:$Rd), (ins GPRnopc:$Rn, GPRnopc:$Rm, pred:$p, cc_out:$s), 4, IIC_iMUL32, - [(set GPR:$Rd, (mul GPR:$Rn, GPR:$Rm))], - (MUL GPR:$Rd, GPR:$Rn, GPR:$Rm, pred:$p, cc_out:$s)>, + [(set GPRnopc:$Rd, (mul GPRnopc:$Rn, GPRnopc:$Rm))], + (MUL GPRnopc:$Rd, GPRnopc:$Rn, GPRnopc:$Rm, pred:$p, cc_out:$s)>, Requires<[IsARM, NoV6]>; } @@ -4040,10 +3992,13 @@ def BCCZi64 : PseudoInst<(outs), // FIXME: should be able to write a pattern for ARMcmov, but can't use // a two-value operand where a dag node expects two operands. :( let neverHasSideEffects = 1 in { + +let isCommutable = 1 in def MOVCCr : ARMPseudoInst<(outs GPR:$Rd), (ins GPR:$false, GPR:$Rm, pred:$p), 4, IIC_iCMOVr, [/*(set GPR:$Rd, (ARMcmov GPR:$false, GPR:$Rm, imm:$cc, CCR:$ccr))*/]>, RegConstraint<"$false = $Rd">; + def MOVCCsi : ARMPseudoInst<(outs GPR:$Rd), (ins GPR:$false, so_reg_imm:$shift, pred:$p), 4, IIC_iCMOVsr, @@ -4164,7 +4119,7 @@ def ISB : AInoP<(outs), (ins memb_opt:$opt), MiscFrm, NoItinerary, let Inst{3-0} = opt; } -// Pseudo isntruction that combines movs + predicated rsbmi +// Pseudo instruction that combines movs + predicated rsbmi // to implement integer ABS let usesCustomInserter = 1, Defs = [CPSR] in { def ABS : ARMPseudoInst< @@ -4325,9 +4280,9 @@ def CLREX : AXI<(outs), (ins), MiscFrm, NoItinerary, "clrex", []>, // SWP/SWPB are deprecated in V6/V7. let mayLoad = 1, mayStore = 1 in { -def SWP : AIswp<0, (outs GPR:$Rt), (ins GPR:$Rt2, addr_offset_none:$addr), +def SWP : AIswp<0, (outs GPRnopc:$Rt), (ins GPRnopc:$Rt2, addr_offset_none:$addr), "swp", []>; -def SWPB: AIswp<1, (outs GPR:$Rt), (ins GPR:$Rt2, addr_offset_none:$addr), +def SWPB: AIswp<1, (outs GPRnopc:$Rt), (ins GPRnopc:$Rt2, addr_offset_none:$addr), "swpb", []>; } @@ -4356,7 +4311,7 @@ def CDP : ABI<0b1110, (outs), (ins p_imm:$cop, imm0_15:$opc1, let Inst{23-20} = opc1; } -def CDP2 : ABXI<0b1110, (outs), (ins p_imm:$cop, imm0_15:$opc1, +def CDP2 : ABXI<0b1110, (outs), (ins pf_imm:$cop, imm0_15:$opc1, c_imm:$CRd, c_imm:$CRn, c_imm:$CRm, imm0_7:$opc2), NoItinerary, "cdp2\t$cop, $opc1, $CRd, $CRn, $CRm, $opc2", [(int_arm_cdp2 imm:$cop, imm:$opc1, imm:$CRd, imm:$CRn, @@ -4635,7 +4590,7 @@ def : ARMV5TPat<(int_arm_mrc2 imm:$cop, imm:$opc1, imm:$CRn, class MovRRCopro<string opc, bit direction, list<dag> pattern = []> : ABI<0b1100, (outs), (ins p_imm:$cop, imm0_15:$opc1, - GPR:$Rt, GPR:$Rt2, c_imm:$CRm), + GPRnopc:$Rt, GPRnopc:$Rt2, c_imm:$CRm), NoItinerary, opc, "\t$cop, $opc1, $Rt, $Rt2, $CRm", pattern> { let Inst{23-21} = 0b010; let Inst{20} = direction; @@ -4654,13 +4609,13 @@ class MovRRCopro<string opc, bit direction, list<dag> pattern = []> } def MCRR : MovRRCopro<"mcrr", 0 /* from ARM core register to coprocessor */, - [(int_arm_mcrr imm:$cop, imm:$opc1, GPR:$Rt, GPR:$Rt2, + [(int_arm_mcrr imm:$cop, imm:$opc1, GPRnopc:$Rt, GPRnopc:$Rt2, imm:$CRm)]>; def MRRC : MovRRCopro<"mrrc", 1 /* from coprocessor to ARM core register */>; class MovRRCopro2<string opc, bit direction, list<dag> pattern = []> : ABXI<0b1100, (outs), (ins p_imm:$cop, imm0_15:$opc1, - GPR:$Rt, GPR:$Rt2, c_imm:$CRm), NoItinerary, + GPRnopc:$Rt, GPRnopc:$Rt2, c_imm:$CRm), NoItinerary, !strconcat(opc, "\t$cop, $opc1, $Rt, $Rt2, $CRm"), pattern> { let Inst{31-28} = 0b1111; let Inst{23-21} = 0b010; @@ -4677,10 +4632,12 @@ class MovRRCopro2<string opc, bit direction, list<dag> pattern = []> let Inst{11-8} = cop; let Inst{7-4} = opc1; let Inst{3-0} = CRm; + + let DecoderMethod = "DecodeMRRC2"; } def MCRR2 : MovRRCopro2<"mcrr2", 0 /* from ARM core register to coprocessor */, - [(int_arm_mcrr2 imm:$cop, imm:$opc1, GPR:$Rt, GPR:$Rt2, + [(int_arm_mcrr2 imm:$cop, imm:$opc1, GPRnopc:$Rt, GPRnopc:$Rt2, imm:$CRm)]>; def MRRC2 : MovRRCopro2<"mrrc2", 1 /* from coprocessor to ARM core register */>; @@ -4689,22 +4646,32 @@ def MRRC2 : MovRRCopro2<"mrrc2", 1 /* from coprocessor to ARM core register */>; // // Move to ARM core register from Special Register -def MRS : ABI<0b0001, (outs GPR:$Rd), (ins), NoItinerary, +def MRS : ABI<0b0001, (outs GPRnopc:$Rd), (ins), NoItinerary, "mrs", "\t$Rd, apsr", []> { bits<4> Rd; let Inst{23-16} = 0b00001111; + let Unpredictable{19-17} = 0b111; + let Inst{15-12} = Rd; - let Inst{7-4} = 0b0000; + + let Inst{11-0} = 0b000000000000; + let Unpredictable{11-0} = 0b110100001111; } -def : InstAlias<"mrs${p} $Rd, cpsr", (MRS GPR:$Rd, pred:$p)>, Requires<[IsARM]>; +def : InstAlias<"mrs${p} $Rd, cpsr", (MRS GPRnopc:$Rd, pred:$p)>, Requires<[IsARM]>; -def MRSsys : ABI<0b0001, (outs GPR:$Rd), (ins), NoItinerary, +// The MRSsys instruction is the MRS instruction from the ARM ARM, +// section B9.3.9, with the R bit set to 1. +def MRSsys : ABI<0b0001, (outs GPRnopc:$Rd), (ins), NoItinerary, "mrs", "\t$Rd, spsr", []> { bits<4> Rd; let Inst{23-16} = 0b01001111; + let Unpredictable{19-16} = 0b1111; + let Inst{15-12} = Rd; - let Inst{7-4} = 0b0000; + + let Inst{11-0} = 0b000000000000; + let Unpredictable{11-0} = 0b110100001111; } // Move from ARM core register to Special Register @@ -4868,36 +4835,15 @@ def : ARMPat<(ARMWrapperJT tjumptable:$dst, imm:$id), // TODO: add,sub,and, 3-instr forms? -// Tail calls -def : ARMPat<(ARMtcret tcGPR:$dst), - (TCRETURNri tcGPR:$dst)>, Requires<[IsIOS]>; - -def : ARMPat<(ARMtcret (i32 tglobaladdr:$dst)), - (TCRETURNdi texternalsym:$dst)>, Requires<[IsIOS]>; - -def : ARMPat<(ARMtcret (i32 texternalsym:$dst)), - (TCRETURNdi texternalsym:$dst)>, Requires<[IsIOS]>; - -def : ARMPat<(ARMtcret tcGPR:$dst), - (TCRETURNriND tcGPR:$dst)>, Requires<[IsNotIOS]>; - -def : ARMPat<(ARMtcret (i32 tglobaladdr:$dst)), - (TCRETURNdiND texternalsym:$dst)>, Requires<[IsNotIOS]>; - -def : ARMPat<(ARMtcret (i32 texternalsym:$dst)), - (TCRETURNdiND texternalsym:$dst)>, Requires<[IsNotIOS]>; +// Tail calls. These patterns also apply to Thumb mode. +def : Pat<(ARMtcret tcGPR:$dst), (TCRETURNri tcGPR:$dst)>; +def : Pat<(ARMtcret (i32 tglobaladdr:$dst)), (TCRETURNdi texternalsym:$dst)>; +def : Pat<(ARMtcret (i32 texternalsym:$dst)), (TCRETURNdi texternalsym:$dst)>; // Direct calls -def : ARMPat<(ARMcall texternalsym:$func), (BL texternalsym:$func)>, - Requires<[IsARM, IsNotIOS]>; -def : ARMPat<(ARMcall texternalsym:$func), (BLr9 texternalsym:$func)>, - Requires<[IsARM, IsIOS]>; -def : ARMPat<(ARMcall_nolink texternalsym:$func), - (BMOVPCB_CALL texternalsym:$func)>, - Requires<[IsARM, IsNotIOS]>; +def : ARMPat<(ARMcall texternalsym:$func), (BL texternalsym:$func)>; def : ARMPat<(ARMcall_nolink texternalsym:$func), - (BMOVPCBr9_CALL texternalsym:$func)>, - Requires<[IsARM, IsIOS]>; + (BMOVPCB_CALL texternalsym:$func)>; // zextload i1 -> zextload i8 def : ARMPat<(zextloadi1 addrmode_imm12:$addr), (LDRBi12 addrmode_imm12:$addr)>; diff --git a/lib/Target/ARM/ARMInstrNEON.td b/lib/Target/ARM/ARMInstrNEON.td index f61eb2b..fd8ac0b 100644 --- a/lib/Target/ARM/ARMInstrNEON.td +++ b/lib/Target/ARM/ARMInstrNEON.td @@ -530,16 +530,16 @@ def NEONimmAllOnesV: PatLeaf<(NEONvmovImm (i32 timm)), [{ // Use VLDM to load a Q register as a D register pair. // This is a pseudo instruction that is expanded to VLDMD after reg alloc. def VLDMQIA - : PseudoVFPLdStM<(outs QPR:$dst), (ins GPR:$Rn), + : PseudoVFPLdStM<(outs DPair:$dst), (ins GPR:$Rn), IIC_fpLoad_m, "", - [(set QPR:$dst, (v2f64 (load GPR:$Rn)))]>; + [(set DPair:$dst, (v2f64 (load GPR:$Rn)))]>; // Use VSTM to store a Q register as a D register pair. // This is a pseudo instruction that is expanded to VSTMD after reg alloc. def VSTMQIA - : PseudoVFPLdStM<(outs), (ins QPR:$src, GPR:$Rn), + : PseudoVFPLdStM<(outs), (ins DPair:$src, GPR:$Rn), IIC_fpStore_m, "", - [(store (v2f64 QPR:$src), GPR:$Rn)]>; + [(store (v2f64 DPair:$src), GPR:$Rn)]>; // Classes for VLD* pseudo-instructions with multi-register operands. // These are expanded to real instructions after register allocation. @@ -1938,20 +1938,11 @@ class VSTQQQQLNWBPseudo<InstrItinClass itin> // VST1LN : Vector Store (single element from one lane) class VST1LN<bits<4> op11_8, bits<4> op7_4, string Dt, ValueType Ty, - PatFrag StoreOp, SDNode ExtractOp> + PatFrag StoreOp, SDNode ExtractOp, Operand AddrMode> : NLdStLn<1, 0b00, op11_8, op7_4, (outs), - (ins addrmode6:$Rn, DPR:$Vd, nohash_imm:$lane), + (ins AddrMode:$Rn, DPR:$Vd, nohash_imm:$lane), IIC_VST1ln, "vst1", Dt, "\\{$Vd[$lane]\\}, $Rn", "", - [(StoreOp (ExtractOp (Ty DPR:$Vd), imm:$lane), addrmode6:$Rn)]> { - let Rm = 0b1111; - let DecoderMethod = "DecodeVST1LN"; -} -class VST1LN32<bits<4> op11_8, bits<4> op7_4, string Dt, ValueType Ty, - PatFrag StoreOp, SDNode ExtractOp> - : NLdStLn<1, 0b00, op11_8, op7_4, (outs), - (ins addrmode6oneL32:$Rn, DPR:$Vd, nohash_imm:$lane), - IIC_VST1ln, "vst1", Dt, "\\{$Vd[$lane]\\}, $Rn", "", - [(StoreOp (ExtractOp (Ty DPR:$Vd), imm:$lane), addrmode6oneL32:$Rn)]>{ + [(StoreOp (ExtractOp (Ty DPR:$Vd), imm:$lane), AddrMode:$Rn)]> { let Rm = 0b1111; let DecoderMethod = "DecodeVST1LN"; } @@ -1962,16 +1953,17 @@ class VST1QLNPseudo<ValueType Ty, PatFrag StoreOp, SDNode ExtractOp> } def VST1LNd8 : VST1LN<0b0000, {?,?,?,0}, "8", v8i8, truncstorei8, - NEONvgetlaneu> { + NEONvgetlaneu, addrmode6> { let Inst{7-5} = lane{2-0}; } def VST1LNd16 : VST1LN<0b0100, {?,?,0,?}, "16", v4i16, truncstorei16, - NEONvgetlaneu> { + NEONvgetlaneu, addrmode6> { let Inst{7-6} = lane{1-0}; let Inst{4} = Rn{5}; } -def VST1LNd32 : VST1LN32<0b1000, {?,0,?,?}, "32", v2i32, store, extractelt> { +def VST1LNd32 : VST1LN<0b1000, {?,0,?,?}, "32", v2i32, store, extractelt, + addrmode6oneL32> { let Inst{7} = lane{0}; let Inst{5-4} = Rn{5-4}; } @@ -1987,14 +1979,14 @@ def : Pat<(store (extractelt (v4f32 QPR:$src), imm:$lane), addrmode6:$addr), // ...with address register writeback: class VST1LNWB<bits<4> op11_8, bits<4> op7_4, string Dt, ValueType Ty, - PatFrag StoreOp, SDNode ExtractOp> + PatFrag StoreOp, SDNode ExtractOp, Operand AdrMode> : NLdStLn<1, 0b00, op11_8, op7_4, (outs GPR:$wb), - (ins addrmode6:$Rn, am6offset:$Rm, + (ins AdrMode:$Rn, am6offset:$Rm, DPR:$Vd, nohash_imm:$lane), IIC_VST1lnu, "vst1", Dt, "\\{$Vd[$lane]\\}, $Rn$Rm", "$Rn.addr = $wb", [(set GPR:$wb, (StoreOp (ExtractOp (Ty DPR:$Vd), imm:$lane), - addrmode6:$Rn, am6offset:$Rm))]> { + AdrMode:$Rn, am6offset:$Rm))]> { let DecoderMethod = "DecodeVST1LN"; } class VST1QLNWBPseudo<ValueType Ty, PatFrag StoreOp, SDNode ExtractOp> @@ -2004,16 +1996,16 @@ class VST1QLNWBPseudo<ValueType Ty, PatFrag StoreOp, SDNode ExtractOp> } def VST1LNd8_UPD : VST1LNWB<0b0000, {?,?,?,0}, "8", v8i8, post_truncsti8, - NEONvgetlaneu> { + NEONvgetlaneu, addrmode6> { let Inst{7-5} = lane{2-0}; } def VST1LNd16_UPD : VST1LNWB<0b0100, {?,?,0,?}, "16", v4i16, post_truncsti16, - NEONvgetlaneu> { + NEONvgetlaneu, addrmode6> { let Inst{7-6} = lane{1-0}; let Inst{4} = Rn{5}; } def VST1LNd32_UPD : VST1LNWB<0b1000, {?,0,?,?}, "32", v2i32, post_store, - extractelt> { + extractelt, addrmode6oneL32> { let Inst{7} = lane{0}; let Inst{5-4} = Rn{5-4}; } @@ -3642,7 +3634,7 @@ multiclass N2VShL_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4, } multiclass N2VShR_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4, InstrItinClass itin, string OpcodeStr, string Dt, - SDNode OpNode> { + string baseOpc, SDNode OpNode> { // 64-bit vector types. def v8i8 : N2VDSh<op24, op23, op11_8, 0, op4, N2RegVShRFrm, itin, shr_imm8, OpcodeStr, !strconcat(Dt, "8"), v8i8, OpNode> { @@ -3676,6 +3668,33 @@ multiclass N2VShR_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4, def v2i64 : N2VQSh<op24, op23, op11_8, 1, op4, N2RegVShRFrm, itin, shr_imm64, OpcodeStr, !strconcat(Dt, "64"), v2i64, OpNode>; // imm6 = xxxxxx + + // Aliases for two-operand forms (source and dest regs the same). + def : NEONInstAlias<!strconcat(OpcodeStr, "${p}.", Dt, "8 $Vdn, $imm"), + (!cast<Instruction>(!strconcat(baseOpc, "v8i8")) + DPR:$Vdn, DPR:$Vdn, shr_imm8:$imm, pred:$p)>; + def : NEONInstAlias<!strconcat(OpcodeStr, "${p}.", Dt, "16 $Vdn, $imm"), + (!cast<Instruction>(!strconcat(baseOpc, "v4i16")) + DPR:$Vdn, DPR:$Vdn, shr_imm16:$imm, pred:$p)>; + def : NEONInstAlias<!strconcat(OpcodeStr, "${p}.", Dt, "32 $Vdn, $imm"), + (!cast<Instruction>(!strconcat(baseOpc, "v2i32")) + DPR:$Vdn, DPR:$Vdn, shr_imm32:$imm, pred:$p)>; + def : NEONInstAlias<!strconcat(OpcodeStr, "${p}.", Dt, "64 $Vdn, $imm"), + (!cast<Instruction>(!strconcat(baseOpc, "v1i64")) + DPR:$Vdn, DPR:$Vdn, shr_imm64:$imm, pred:$p)>; + + def : NEONInstAlias<!strconcat(OpcodeStr, "${p}.", Dt, "8 $Vdn, $imm"), + (!cast<Instruction>(!strconcat(baseOpc, "v16i8")) + QPR:$Vdn, QPR:$Vdn, shr_imm8:$imm, pred:$p)>; + def : NEONInstAlias<!strconcat(OpcodeStr, "${p}.", Dt, "16 $Vdn, $imm"), + (!cast<Instruction>(!strconcat(baseOpc, "v8i16")) + QPR:$Vdn, QPR:$Vdn, shr_imm16:$imm, pred:$p)>; + def : NEONInstAlias<!strconcat(OpcodeStr, "${p}.", Dt, "32 $Vdn, $imm"), + (!cast<Instruction>(!strconcat(baseOpc, "v4i32")) + QPR:$Vdn, QPR:$Vdn, shr_imm32:$imm, pred:$p)>; + def : NEONInstAlias<!strconcat(OpcodeStr, "${p}.", Dt, "64 $Vdn, $imm"), + (!cast<Instruction>(!strconcat(baseOpc, "v2i64")) + QPR:$Vdn, QPR:$Vdn, shr_imm64:$imm, pred:$p)>; } // Neon Shift-Accumulate vector operations, @@ -3986,10 +4005,10 @@ defm VMLA : N3VMulOp_QHS<0, 0, 0b1001, 0, IIC_VMACi16D, IIC_VMACi32D, IIC_VMACi16Q, IIC_VMACi32Q, "vmla", "i", add>; def VMLAfd : N3VDMulOp<0, 0, 0b00, 0b1101, 1, IIC_VMACD, "vmla", "f32", v2f32, fmul_su, fadd_mlx>, - Requires<[HasNEON, UseFPVMLx, NoNEON2]>; + Requires<[HasNEON, UseFPVMLx, DontUseFusedMAC]>; def VMLAfq : N3VQMulOp<0, 0, 0b00, 0b1101, 1, IIC_VMACQ, "vmla", "f32", v4f32, fmul_su, fadd_mlx>, - Requires<[HasNEON, UseFPVMLx, NoNEON2]>; + Requires<[HasNEON, UseFPVMLx, DontUseFusedMAC]>; defm VMLAsl : N3VMulOpSL_HS<0b0000, IIC_VMACi16D, IIC_VMACi32D, IIC_VMACi16Q, IIC_VMACi32Q, "vmla", "i", add>; def VMLAslfd : N3VDMulOpSL<0b10, 0b0001, IIC_VMACD, "vmla", "f32", @@ -4044,10 +4063,10 @@ defm VMLS : N3VMulOp_QHS<1, 0, 0b1001, 0, IIC_VMACi16D, IIC_VMACi32D, IIC_VMACi16Q, IIC_VMACi32Q, "vmls", "i", sub>; def VMLSfd : N3VDMulOp<0, 0, 0b10, 0b1101, 1, IIC_VMACD, "vmls", "f32", v2f32, fmul_su, fsub_mlx>, - Requires<[HasNEON, UseFPVMLx, NoNEON2]>; + Requires<[HasNEON, UseFPVMLx, DontUseFusedMAC]>; def VMLSfq : N3VQMulOp<0, 0, 0b10, 0b1101, 1, IIC_VMACQ, "vmls", "f32", v4f32, fmul_su, fsub_mlx>, - Requires<[HasNEON, UseFPVMLx, NoNEON2]>; + Requires<[HasNEON, UseFPVMLx, DontUseFusedMAC]>; defm VMLSsl : N3VMulOpSL_HS<0b0100, IIC_VMACi16D, IIC_VMACi32D, IIC_VMACi16Q, IIC_VMACi32Q, "vmls", "i", sub>; def VMLSslfd : N3VDMulOpSL<0b10, 0b0101, IIC_VMACD, "vmls", "f32", @@ -4096,23 +4115,36 @@ defm VQDMLSL : N3VLInt3_HS<0, 1, 0b1011, 0, IIC_VMACi16D, IIC_VMACi32D, "vqdmlsl", "s", int_arm_neon_vqdmlsl>; defm VQDMLSLsl: N3VLInt3SL_HS<0, 0b111, "vqdmlsl", "s", int_arm_neon_vqdmlsl>; - // Fused Vector Multiply-Accumulate and Fused Multiply-Subtract Operations. def VFMAfd : N3VDMulOp<0, 0, 0b00, 0b1100, 1, IIC_VFMACD, "vfma", "f32", v2f32, fmul_su, fadd_mlx>, - Requires<[HasNEON2,FPContractions]>; + Requires<[HasVFP4,UseFusedMAC]>; def VFMAfq : N3VQMulOp<0, 0, 0b00, 0b1100, 1, IIC_VFMACQ, "vfma", "f32", v4f32, fmul_su, fadd_mlx>, - Requires<[HasNEON2,FPContractions]>; + Requires<[HasVFP4,UseFusedMAC]>; // Fused Vector Multiply Subtract (floating-point) def VFMSfd : N3VDMulOp<0, 0, 0b10, 0b1100, 1, IIC_VFMACD, "vfms", "f32", v2f32, fmul_su, fsub_mlx>, - Requires<[HasNEON2,FPContractions]>; + Requires<[HasVFP4,UseFusedMAC]>; def VFMSfq : N3VQMulOp<0, 0, 0b10, 0b1100, 1, IIC_VFMACQ, "vfms", "f32", v4f32, fmul_su, fsub_mlx>, - Requires<[HasNEON2,FPContractions]>; + Requires<[HasVFP4,UseFusedMAC]>; + +// Match @llvm.fma.* intrinsics +def : Pat<(v2f32 (fma DPR:$src1, DPR:$Vn, DPR:$Vm)), + (VFMAfd DPR:$src1, DPR:$Vn, DPR:$Vm)>, + Requires<[HasVFP4]>; +def : Pat<(v4f32 (fma QPR:$src1, QPR:$Vn, QPR:$Vm)), + (VFMAfq QPR:$src1, QPR:$Vn, QPR:$Vm)>, + Requires<[HasVFP4]>; +def : Pat<(v2f32 (fma (fneg DPR:$src1), DPR:$Vn, DPR:$Vm)), + (VFMSfd DPR:$src1, DPR:$Vn, DPR:$Vm)>, + Requires<[HasVFP4]>; +def : Pat<(v4f32 (fma (fneg QPR:$src1), QPR:$Vn, QPR:$Vm)), + (VFMSfq QPR:$src1, QPR:$Vn, QPR:$Vm)>, + Requires<[HasVFP4]>; // Vector Subtract Operations. @@ -4614,8 +4646,10 @@ defm VSHLu : N3VInt_QHSDSh<1, 0, 0b0100, 0, N3RegVShFrm, defm VSHLi : N2VShL_QHSD<0, 1, 0b0101, 1, IIC_VSHLiD, "vshl", "i", NEONvshl>; // VSHR : Vector Shift Right (Immediate) -defm VSHRs : N2VShR_QHSD<0, 1, 0b0000, 1, IIC_VSHLiD, "vshr", "s",NEONvshrs>; -defm VSHRu : N2VShR_QHSD<1, 1, 0b0000, 1, IIC_VSHLiD, "vshr", "u",NEONvshru>; +defm VSHRs : N2VShR_QHSD<0, 1, 0b0000, 1, IIC_VSHLiD, "vshr", "s", "VSHRs", + NEONvshrs>; +defm VSHRu : N2VShR_QHSD<1, 1, 0b0000, 1, IIC_VSHLiD, "vshr", "u", "VSHRu", + NEONvshru>; // VSHLL : Vector Shift Left Long defm VSHLLs : N2VLSh_QHS<0, 1, 0b1010, 0, 0, 1, "vshll", "s", NEONvshlls>; @@ -4649,8 +4683,10 @@ defm VRSHLu : N3VInt_QHSDSh<1, 0, 0b0101, 0, N3RegVShFrm, IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, IIC_VSHLi4Q, "vrshl", "u", int_arm_neon_vrshiftu>; // VRSHR : Vector Rounding Shift Right -defm VRSHRs : N2VShR_QHSD<0,1,0b0010,1, IIC_VSHLi4D, "vrshr", "s",NEONvrshrs>; -defm VRSHRu : N2VShR_QHSD<1,1,0b0010,1, IIC_VSHLi4D, "vrshr", "u",NEONvrshru>; +defm VRSHRs : N2VShR_QHSD<0,1,0b0010,1, IIC_VSHLi4D, "vrshr", "s", "VRSHRs", + NEONvrshrs>; +defm VRSHRu : N2VShR_QHSD<1,1,0b0010,1, IIC_VSHLi4D, "vrshr", "u", "VRSHRu", + NEONvrshru>; // VRSHRN : Vector Rounding Shift Right and Narrow defm VRSHRN : N2VNSh_HSD<0, 1, 0b1000, 0, 1, 1, IIC_VSHLi4D, "vrshrn", "i", @@ -4795,12 +4831,12 @@ def VCNTq : N2VQInt<0b11, 0b11, 0b00, 0b00, 0b01010, 0, // Vector Swap def VSWPd : N2VX<0b11, 0b11, 0b00, 0b10, 0b00000, 0, 0, - (outs DPR:$Vd, DPR:$Vd1), (ins DPR:$Vm, DPR:$Vm1), - NoItinerary, "vswp", "$Vd, $Vd1", "$Vm = $Vd, $Vm1 = $Vd1", + (outs DPR:$Vd, DPR:$Vm), (ins DPR:$in1, DPR:$in2), + NoItinerary, "vswp", "$Vd, $Vm", "$in1 = $Vd, $in2 = $Vm", []>; def VSWPq : N2VX<0b11, 0b11, 0b00, 0b10, 0b00000, 1, 0, - (outs QPR:$Vd, QPR:$Vd1), (ins QPR:$Vm, QPR:$Vm1), - NoItinerary, "vswp", "$Vd, $Vd1", "$Vm = $Vd, $Vm1 = $Vd1", + (outs QPR:$Vd, QPR:$Vm), (ins QPR:$in1, QPR:$in2), + NoItinerary, "vswp", "$Vd, $Vm", "$in1 = $Vd, $in2 = $Vm", []>; // Vector Move Operations. @@ -5342,7 +5378,9 @@ def VTRNq32 : N2VQShuffle<0b10, 0b00001, IIC_VPERMQ, "vtrn", "32">; def VUZPd8 : N2VDShuffle<0b00, 0b00010, "vuzp", "8">; def VUZPd16 : N2VDShuffle<0b01, 0b00010, "vuzp", "16">; -def VUZPd32 : N2VDShuffle<0b10, 0b00010, "vuzp", "32">; +// vuzp.32 Dd, Dm is a pseudo-instruction expanded to vtrn.32 Dd, Dm. +def : NEONInstAlias<"vuzp${p}.32 $Dd, $Dm", + (VTRNd32 DPR:$Dd, DPR:$Dm, pred:$p)>; def VUZPq8 : N2VQShuffle<0b00, 0b00010, IIC_VPERMQ3, "vuzp", "8">; def VUZPq16 : N2VQShuffle<0b01, 0b00010, IIC_VPERMQ3, "vuzp", "16">; @@ -5352,7 +5390,9 @@ def VUZPq32 : N2VQShuffle<0b10, 0b00010, IIC_VPERMQ3, "vuzp", "32">; def VZIPd8 : N2VDShuffle<0b00, 0b00011, "vzip", "8">; def VZIPd16 : N2VDShuffle<0b01, 0b00011, "vzip", "16">; -def VZIPd32 : N2VDShuffle<0b10, 0b00011, "vzip", "32">; +// vzip.32 Dd, Dm is a pseudo-instruction expanded to vtrn.32 Dd, Dm. +def : NEONInstAlias<"vzip${p}.32 $Dd, $Dm", + (VTRNd32 DPR:$Dd, DPR:$Dm, pred:$p)>; def VZIPq8 : N2VQShuffle<0b00, 0b00011, IIC_VPERMQ3, "vzip", "8">; def VZIPq16 : N2VQShuffle<0b01, 0b00011, IIC_VPERMQ3, "vzip", "16">; @@ -5462,13 +5502,13 @@ def : N3VSPat<fadd, VADDfd>; def : N3VSPat<fsub, VSUBfd>; def : N3VSPat<fmul, VMULfd>; def : N3VSMulOpPat<fmul, fadd, VMLAfd>, - Requires<[HasNEON, UseNEONForFP, UseFPVMLx, NoNEON2]>; + Requires<[HasNEON, UseNEONForFP, UseFPVMLx, DontUseFusedMAC]>; def : N3VSMulOpPat<fmul, fsub, VMLSfd>, - Requires<[HasNEON, UseNEONForFP, UseFPVMLx, NoNEON2]>; + Requires<[HasNEON, UseNEONForFP, UseFPVMLx, DontUseFusedMAC]>; def : N3VSMulOpPat<fmul, fadd, VFMAfd>, - Requires<[HasNEON2, UseNEONForFP,FPContractions]>; + Requires<[HasVFP4, UseNEONForFP, UseFusedMAC]>; def : N3VSMulOpPat<fmul, fsub, VFMSfd>, - Requires<[HasNEON2, UseNEONForFP,FPContractions]>; + Requires<[HasVFP4, UseNEONForFP, UseFusedMAC]>; def : N2VSPat<fabs, VABSfd>; def : N2VSPat<fneg, VNEGfd>; def : N3VSPat<NEONfmax, VMAXfd>; @@ -5594,6 +5634,7 @@ multiclass Lengthen_HalfSingle<string DestLanes, string DestTy, string SrcTy, // extload, zextload and sextload for a lengthening load followed by another // lengthening load, to quadruple the initial length. +// // Lengthen_Double<"4", "i32", "i8", "8", "i16", "4", "i32", qsub_0> = // Pat<(v4i32 (extloadvi8 addrmode5:$addr)) // (EXTRACT_SUBREG (VMOVLuv4i32 @@ -5604,28 +5645,63 @@ multiclass Lengthen_HalfSingle<string DestLanes, string DestTy, string SrcTy, // qsub_0)>; multiclass Lengthen_Double<string DestLanes, string DestTy, string SrcTy, string Insn1Lanes, string Insn1Ty, string Insn2Lanes, - string Insn2Ty, SubRegIndex RegType> { + string Insn2Ty> { + def _Any : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) + (!cast<PatFrag>("extloadv" # SrcTy) addrmode5:$addr)), + (!cast<Instruction>("VMOVLuv" # Insn2Lanes # Insn2Ty) + (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn1Lanes # Insn1Ty) + (INSERT_SUBREG (f64 (IMPLICIT_DEF)), (VLDRS addrmode5:$addr), + ssub_0)), dsub_0))>; + def _Z : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) + (!cast<PatFrag>("zextloadv" # SrcTy) addrmode5:$addr)), + (!cast<Instruction>("VMOVLuv" # Insn2Lanes # Insn2Ty) + (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn1Lanes # Insn1Ty) + (INSERT_SUBREG (f64 (IMPLICIT_DEF)), (VLDRS addrmode5:$addr), + ssub_0)), dsub_0))>; + def _S : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) + (!cast<PatFrag>("sextloadv" # SrcTy) addrmode5:$addr)), + (!cast<Instruction>("VMOVLsv" # Insn2Lanes # Insn2Ty) + (EXTRACT_SUBREG (!cast<Instruction>("VMOVLsv" # Insn1Lanes # Insn1Ty) + (INSERT_SUBREG (f64 (IMPLICIT_DEF)), (VLDRS addrmode5:$addr), + ssub_0)), dsub_0))>; +} + +// extload, zextload and sextload for a lengthening load followed by another +// lengthening load, to quadruple the initial length, but which ends up only +// requiring half the available lanes (a 64-bit outcome instead of a 128-bit). +// +// Lengthen_HalfDouble<"2", "i32", "i8", "8", "i16", "4", "i32"> = +// Pat<(v4i32 (extloadvi8 addrmode5:$addr)) +// (EXTRACT_SUBREG (VMOVLuv4i32 +// (EXTRACT_SUBREG (VMOVLuv8i16 (INSERT_SUBREG (f64 (IMPLICIT_DEF)), +// (VLDRS addrmode5:$addr), +// ssub_0)), +// dsub_0)), +// dsub_0)>; +multiclass Lengthen_HalfDouble<string DestLanes, string DestTy, string SrcTy, + string Insn1Lanes, string Insn1Ty, string Insn2Lanes, + string Insn2Ty> { def _Any : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) (!cast<PatFrag>("extloadv" # SrcTy) addrmode5:$addr)), (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn2Lanes # Insn2Ty) (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn1Lanes # Insn1Ty) (INSERT_SUBREG (f64 (IMPLICIT_DEF)), (VLDRS addrmode5:$addr), ssub_0)), dsub_0)), - RegType)>; + dsub_0)>; def _Z : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) (!cast<PatFrag>("zextloadv" # SrcTy) addrmode5:$addr)), (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn2Lanes # Insn2Ty) (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn1Lanes # Insn1Ty) (INSERT_SUBREG (f64 (IMPLICIT_DEF)), (VLDRS addrmode5:$addr), ssub_0)), dsub_0)), - RegType)>; + dsub_0)>; def _S : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) (!cast<PatFrag>("sextloadv" # SrcTy) addrmode5:$addr)), (EXTRACT_SUBREG (!cast<Instruction>("VMOVLsv" # Insn2Lanes # Insn2Ty) (EXTRACT_SUBREG (!cast<Instruction>("VMOVLsv" # Insn1Lanes # Insn1Ty) (INSERT_SUBREG (f64 (IMPLICIT_DEF)), (VLDRS addrmode5:$addr), ssub_0)), dsub_0)), - RegType)>; + dsub_0)>; } defm : Lengthen_Single<"8", "i16", "i8">; // v8i8 -> v8i16 @@ -5636,12 +5712,12 @@ defm : Lengthen_HalfSingle<"4", "i16", "i8", "8", "i16">; // v4i8 -> v4i16 defm : Lengthen_HalfSingle<"2", "i16", "i8", "8", "i16">; // v2i8 -> v2i16 defm : Lengthen_HalfSingle<"2", "i32", "i16", "4", "i32">; // v2i16 -> v2i32 -// Double lengthening - v4i8 -> v4i16 -> v4i32 -defm : Lengthen_Double<"4", "i32", "i8", "8", "i16", "4", "i32", qsub_0>; +// Double lengthening - v4i8 -> v4i16 -> v4i32 +defm : Lengthen_Double<"4", "i32", "i8", "8", "i16", "4", "i32">; // v2i8 -> v2i16 -> v2i32 -defm : Lengthen_Double<"2", "i32", "i8", "8", "i16", "4", "i32", dsub_0>; +defm : Lengthen_HalfDouble<"2", "i32", "i8", "8", "i16", "4", "i32">; // v2i16 -> v2i32 -> v2i64 -defm : Lengthen_Double<"2", "i64", "i16", "4", "i32", "2", "i64", qsub_0>; +defm : Lengthen_Double<"2", "i64", "i16", "4", "i32", "2", "i64">; // Triple lengthening - v2i8 -> v2i16 -> v2i32 -> v2i64 def : Pat<(v2i64 (extloadvi8 addrmode5:$addr)), @@ -5911,7 +5987,7 @@ def : NEONInstAlias<"vshl${p}.u32 $Vdn, $Vm", def : NEONInstAlias<"vshl${p}.u64 $Vdn, $Vm", (VSHLuv2i64 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; -// VSHL (immediate) two-operand aliases. +// VSHR (immediate) two-operand aliases. def : NEONInstAlias<"vshr${p}.s8 $Vdn, $imm", (VSHRsv8i8 DPR:$Vdn, DPR:$Vdn, shr_imm8:$imm, pred:$p)>; def : NEONInstAlias<"vshr${p}.s16 $Vdn, $imm", @@ -5948,6 +6024,41 @@ def : NEONInstAlias<"vshr${p}.u32 $Vdn, $imm", def : NEONInstAlias<"vshr${p}.u64 $Vdn, $imm", (VSHRuv2i64 QPR:$Vdn, QPR:$Vdn, shr_imm64:$imm, pred:$p)>; +// VRSHL two-operand aliases. +def : NEONInstAlias<"vrshl${p}.s8 $Vdn, $Vm", + (VRSHLsv8i8 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vrshl${p}.s16 $Vdn, $Vm", + (VRSHLsv4i16 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vrshl${p}.s32 $Vdn, $Vm", + (VRSHLsv2i32 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vrshl${p}.s64 $Vdn, $Vm", + (VRSHLsv1i64 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vrshl${p}.u8 $Vdn, $Vm", + (VRSHLuv8i8 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vrshl${p}.u16 $Vdn, $Vm", + (VRSHLuv4i16 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vrshl${p}.u32 $Vdn, $Vm", + (VRSHLuv2i32 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vrshl${p}.u64 $Vdn, $Vm", + (VRSHLuv1i64 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; + +def : NEONInstAlias<"vrshl${p}.s8 $Vdn, $Vm", + (VRSHLsv16i8 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vrshl${p}.s16 $Vdn, $Vm", + (VRSHLsv8i16 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vrshl${p}.s32 $Vdn, $Vm", + (VRSHLsv4i32 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vrshl${p}.s64 $Vdn, $Vm", + (VRSHLsv2i64 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vrshl${p}.u8 $Vdn, $Vm", + (VRSHLuv16i8 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vrshl${p}.u16 $Vdn, $Vm", + (VRSHLuv8i16 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vrshl${p}.u32 $Vdn, $Vm", + (VRSHLuv4i32 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vrshl${p}.u64 $Vdn, $Vm", + (VRSHLuv2i64 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; + // VLD1 single-lane pseudo-instructions. These need special handling for // the lane index that an InstAlias can't handle, so we use these instead. def VLD1LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vld1${p}", ".8", "$list, $addr", @@ -6911,6 +7022,100 @@ def : NEONInstAlias<"vsli${p}.32 $Vdm, $imm", def : NEONInstAlias<"vsli${p}.64 $Vdm, $imm", (VSLIv2i64 QPR:$Vdm, QPR:$Vdm, shr_imm64:$imm, pred:$p)>; +// Two-operand variants for VHSUB. + // Signed. +def : NEONInstAlias<"vhsub${p}.s8 $Vdn, $Vm", + (VHSUBsv8i8 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vhsub${p}.s16 $Vdn, $Vm", + (VHSUBsv4i16 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vhsub${p}.s32 $Vdn, $Vm", + (VHSUBsv2i32 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; + +def : NEONInstAlias<"vhsub${p}.s8 $Vdn, $Vm", + (VHSUBsv16i8 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vhsub${p}.s16 $Vdn, $Vm", + (VHSUBsv8i16 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vhsub${p}.s32 $Vdn, $Vm", + (VHSUBsv4i32 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; + + // Unsigned. +def : NEONInstAlias<"vhsub${p}.u8 $Vdn, $Vm", + (VHSUBuv8i8 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vhsub${p}.u16 $Vdn, $Vm", + (VHSUBuv4i16 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vhsub${p}.u32 $Vdn, $Vm", + (VHSUBuv2i32 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; + +def : NEONInstAlias<"vhsub${p}.u8 $Vdn, $Vm", + (VHSUBuv16i8 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vhsub${p}.u16 $Vdn, $Vm", + (VHSUBuv8i16 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vhsub${p}.u32 $Vdn, $Vm", + (VHSUBuv4i32 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; + + +// Two-operand variants for VHADD. + // Signed. +def : NEONInstAlias<"vhadd${p}.s8 $Vdn, $Vm", + (VHADDsv8i8 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vhadd${p}.s16 $Vdn, $Vm", + (VHADDsv4i16 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vhadd${p}.s32 $Vdn, $Vm", + (VHADDsv2i32 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; + +def : NEONInstAlias<"vhadd${p}.s8 $Vdn, $Vm", + (VHADDsv16i8 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vhadd${p}.s16 $Vdn, $Vm", + (VHADDsv8i16 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vhadd${p}.s32 $Vdn, $Vm", + (VHADDsv4i32 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; + + // Unsigned. +def : NEONInstAlias<"vhadd${p}.u8 $Vdn, $Vm", + (VHADDuv8i8 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vhadd${p}.u16 $Vdn, $Vm", + (VHADDuv4i16 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vhadd${p}.u32 $Vdn, $Vm", + (VHADDuv2i32 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; + +def : NEONInstAlias<"vhadd${p}.u8 $Vdn, $Vm", + (VHADDuv16i8 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vhadd${p}.u16 $Vdn, $Vm", + (VHADDuv8i16 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vhadd${p}.u32 $Vdn, $Vm", + (VHADDuv4i32 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; + +// Two-operand variants for VRHADD. + // Signed. +def : NEONInstAlias<"vrhadd${p}.s8 $Vdn, $Rm", + (VRHADDsv8i8 DPR:$Vdn, DPR:$Vdn, DPR:$Rm, pred:$p)>; +def : NEONInstAlias<"vrhadd${p}.s16 $Vdn, $Rm", + (VRHADDsv4i16 DPR:$Vdn, DPR:$Vdn, DPR:$Rm, pred:$p)>; +def : NEONInstAlias<"vrhadd${p}.s32 $Vdn, $Rm", + (VRHADDsv2i32 DPR:$Vdn, DPR:$Vdn, DPR:$Rm, pred:$p)>; + +def : NEONInstAlias<"vrhadd${p}.s8 $Vdn, $Rm", + (VRHADDsv16i8 QPR:$Vdn, QPR:$Vdn, QPR:$Rm, pred:$p)>; +def : NEONInstAlias<"vrhadd${p}.s16 $Vdn, $Rm", + (VRHADDsv8i16 QPR:$Vdn, QPR:$Vdn, QPR:$Rm, pred:$p)>; +def : NEONInstAlias<"vrhadd${p}.s32 $Vdn, $Rm", + (VRHADDsv4i32 QPR:$Vdn, QPR:$Vdn, QPR:$Rm, pred:$p)>; + + // Unsigned. +def : NEONInstAlias<"vrhadd${p}.u8 $Vdn, $Rm", + (VRHADDuv8i8 DPR:$Vdn, DPR:$Vdn, DPR:$Rm, pred:$p)>; +def : NEONInstAlias<"vrhadd${p}.u16 $Vdn, $Rm", + (VRHADDuv4i16 DPR:$Vdn, DPR:$Vdn, DPR:$Rm, pred:$p)>; +def : NEONInstAlias<"vrhadd${p}.u32 $Vdn, $Rm", + (VRHADDuv2i32 DPR:$Vdn, DPR:$Vdn, DPR:$Rm, pred:$p)>; + +def : NEONInstAlias<"vrhadd${p}.u8 $Vdn, $Rm", + (VRHADDuv16i8 QPR:$Vdn, QPR:$Vdn, QPR:$Rm, pred:$p)>; +def : NEONInstAlias<"vrhadd${p}.u16 $Vdn, $Rm", + (VRHADDuv8i16 QPR:$Vdn, QPR:$Vdn, QPR:$Rm, pred:$p)>; +def : NEONInstAlias<"vrhadd${p}.u32 $Vdn, $Rm", + (VRHADDuv4i32 QPR:$Vdn, QPR:$Vdn, QPR:$Rm, pred:$p)>; + // VSWP allows, but does not require, a type suffix. defm : NEONDTAnyInstAlias<"vswp${p}", "$Vd, $Vm", (VSWPd DPR:$Vd, DPR:$Vm, pred:$p)>; diff --git a/lib/Target/ARM/ARMInstrThumb.td b/lib/Target/ARM/ARMInstrThumb.td index ba1791b..6335229 100644 --- a/lib/Target/ARM/ARMInstrThumb.td +++ b/lib/Target/ARM/ARMInstrThumb.td @@ -91,6 +91,12 @@ def t_imm0_508s4 : Operand<i32> { let ParserMatchClass = t_imm0_508s4_asmoperand; let OperandType = "OPERAND_IMMEDIATE"; } +// Alias use only, so no printer is necessary. +def t_imm0_508s4_neg_asmoperand: AsmOperandClass { let Name = "Imm0_508s4Neg"; } +def t_imm0_508s4_neg : Operand<i32> { + let ParserMatchClass = t_imm0_508s4_neg_asmoperand; + let OperandType = "OPERAND_IMMEDIATE"; +} // Define Thumb specific addressing modes. @@ -345,6 +351,11 @@ def tSUBspi : T1pIt<(outs GPRsp:$Rdn), (ins GPRsp:$Rn, t_imm0_508s4:$imm), let DecoderMethod = "DecodeThumbAddSPImm"; } +def : tInstAlias<"add${p} sp, $imm", + (tSUBspi SP, t_imm0_508s4_neg:$imm, pred:$p)>; +def : tInstAlias<"add${p} sp, sp, $imm", + (tSUBspi SP, t_imm0_508s4_neg:$imm, pred:$p)>; + // Can optionally specify SP as a three operand instruction. def : tInstAlias<"add${p} sp, sp, $imm", (tADDspi SP, t_imm0_508s4:$imm, pred:$p)>; @@ -405,14 +416,13 @@ let isReturn = 1, isTerminator = 1, isBarrier = 1 in { // prevent stack-pointer assignments that appear immediately before calls from // potentially appearing dead. let isCall = 1, - // On non-IOS platforms R9 is callee-saved. Defs = [LR], Uses = [SP] in { // Also used for Thumb2 def tBL : TIx2<0b11110, 0b11, 1, (outs), (ins pred:$p, t_bltarget:$func, variable_ops), IIC_Br, "bl${p}\t$func", [(ARMtcall tglobaladdr:$func)]>, - Requires<[IsThumb, IsNotIOS]> { + Requires<[IsThumb]> { bits<22> func; let Inst{26} = func{21}; let Inst{25-16} = func{20-11}; @@ -426,7 +436,7 @@ let isCall = 1, (outs), (ins pred:$p, t_blxtarget:$func, variable_ops), IIC_Br, "blx${p}\t$func", [(ARMcall tglobaladdr:$func)]>, - Requires<[IsThumb, HasV5T, IsNotIOS]> { + Requires<[IsThumb, HasV5T]> { bits<21> func; let Inst{25-16} = func{20-11}; let Inst{13} = 1; @@ -439,7 +449,7 @@ let isCall = 1, def tBLXr : TI<(outs), (ins pred:$p, GPR:$func, variable_ops), IIC_Br, "blx${p}\t$func", [(ARMtcall GPR:$func)]>, - Requires<[IsThumb, HasV5T, IsNotIOS]>, + Requires<[IsThumb, HasV5T]>, T1Special<{1,1,1,?}> { // A6.2.3 & A8.6.24; bits<4> func; let Inst{6-3} = func; @@ -450,37 +460,7 @@ let isCall = 1, def tBX_CALL : tPseudoInst<(outs), (ins tGPR:$func, variable_ops), 4, IIC_Br, [(ARMcall_nolink tGPR:$func)]>, - Requires<[IsThumb, IsThumb1Only, IsNotIOS]>; -} - -let isCall = 1, - // On IOS R9 is call-clobbered. - // R7 is marked as a use to prevent frame-pointer assignments from being - // moved above / below calls. - Defs = [LR], Uses = [R7, SP] in { - // Also used for Thumb2 - def tBLr9 : tPseudoExpand<(outs), (ins pred:$p, t_bltarget:$func, variable_ops), - 4, IIC_Br, [(ARMtcall tglobaladdr:$func)], - (tBL pred:$p, t_bltarget:$func)>, - Requires<[IsThumb, IsIOS]>; - - // ARMv5T and above, also used for Thumb2 - def tBLXi_r9 : tPseudoExpand<(outs), (ins pred:$p, t_blxtarget:$func, variable_ops), - 4, IIC_Br, [(ARMcall tglobaladdr:$func)], - (tBLXi pred:$p, t_blxtarget:$func)>, - Requires<[IsThumb, HasV5T, IsIOS]>; - - // Also used for Thumb2 - def tBLXr_r9 : tPseudoExpand<(outs), (ins pred:$p, GPR:$func, variable_ops), - 2, IIC_Br, [(ARMtcall GPR:$func)], - (tBLXr pred:$p, GPR:$func)>, - Requires<[IsThumb, HasV5T, IsIOS]>; - - // ARMv4T - def tBXr9_CALL : tPseudoInst<(outs), (ins tGPR:$func, variable_ops), - 4, IIC_Br, - [(ARMcall_nolink tGPR:$func)]>, - Requires<[IsThumb, IsThumb1Only, IsIOS]>; + Requires<[IsThumb, IsThumb1Only]>; } let isBranch = 1, isTerminator = 1, isBarrier = 1 in { @@ -524,24 +504,20 @@ let isBranch = 1, isTerminator = 1 in let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in { // IOS versions. let Uses = [SP] in { - // tTAILJMPd: IOS version uses a Thumb2 branch (no Thumb1 tail calls - // on IOS), so it's in ARMInstrThumb2.td. def tTAILJMPr : tPseudoExpand<(outs), (ins tcGPR:$dst, variable_ops), 4, IIC_Br, [], (tBX GPR:$dst, (ops 14, zero_reg))>, - Requires<[IsThumb, IsIOS]>; + Requires<[IsThumb]>; } - // Non-IOS versions (the difference is R9). + // tTAILJMPd: IOS version uses a Thumb2 branch (no Thumb1 tail calls + // on IOS), so it's in ARMInstrThumb2.td. + // Non-IOS version: let Uses = [SP] in { def tTAILJMPdND : tPseudoExpand<(outs), (ins t_brtarget:$dst, pred:$p, variable_ops), 4, IIC_Br, [], (tB t_brtarget:$dst, pred:$p)>, Requires<[IsThumb, IsNotIOS]>; - def tTAILJMPrND : tPseudoExpand<(outs), (ins tcGPR:$dst, variable_ops), - 4, IIC_Br, [], - (tBX GPR:$dst, (ops 14, zero_reg))>, - Requires<[IsThumb, IsNotIOS]>; } } @@ -1307,20 +1283,14 @@ def : T1Pat<(ARMWrapperJT tjumptable:$dst, imm:$id), // Direct calls def : T1Pat<(ARMtcall texternalsym:$func), (tBL texternalsym:$func)>, - Requires<[IsThumb, IsNotIOS]>; -def : T1Pat<(ARMtcall texternalsym:$func), (tBLr9 texternalsym:$func)>, - Requires<[IsThumb, IsIOS]>; + Requires<[IsThumb]>; def : Tv5Pat<(ARMcall texternalsym:$func), (tBLXi texternalsym:$func)>, - Requires<[IsThumb, HasV5T, IsNotIOS]>; -def : Tv5Pat<(ARMcall texternalsym:$func), (tBLXi_r9 texternalsym:$func)>, - Requires<[IsThumb, HasV5T, IsIOS]>; + Requires<[IsThumb, HasV5T]>; // Indirect calls to ARM routines def : Tv5Pat<(ARMcall GPR:$dst), (tBLXr GPR:$dst)>, - Requires<[IsThumb, HasV5T, IsNotIOS]>; -def : Tv5Pat<(ARMcall GPR:$dst), (tBLXr_r9 GPR:$dst)>, - Requires<[IsThumb, HasV5T, IsIOS]>; + Requires<[IsThumb, HasV5T]>; // zextload i1 -> zextload i8 def : T1Pat<(zextloadi1 t_addrmode_rrs1:$addr), @@ -1437,3 +1407,11 @@ def : tInstAlias<"cps$imod", (tCPS imod_op:$imod, 0)>; def : tInstAlias<"neg${s}${p} $Rd, $Rm", (tRSB tGPR:$Rd, s_cc_out:$s, tGPR:$Rm, pred:$p)>; + +// Implied destination operand forms for shifts. +def : tInstAlias<"lsl${s}${p} $Rdm, $imm", + (tLSLri tGPR:$Rdm, cc_out:$s, tGPR:$Rdm, imm0_31:$imm, pred:$p)>; +def : tInstAlias<"lsr${s}${p} $Rdm, $imm", + (tLSRri tGPR:$Rdm, cc_out:$s, tGPR:$Rdm, imm_sr:$imm, pred:$p)>; +def : tInstAlias<"asr${s}${p} $Rdm, $imm", + (tASRri tGPR:$Rdm, cc_out:$s, tGPR:$Rdm, imm_sr:$imm, pred:$p)>; diff --git a/lib/Target/ARM/ARMInstrThumb2.td b/lib/Target/ARM/ARMInstrThumb2.td index 1f7edc1..e6fb9d5 100644 --- a/lib/Target/ARM/ARMInstrThumb2.td +++ b/lib/Target/ARM/ARMInstrThumb2.td @@ -89,20 +89,26 @@ def t2_so_imm_not : Operand<i32>, PatLeaf<(imm), [{ // t2_so_imm_neg - Match an immediate that is a negation of a t2_so_imm. def t2_so_imm_neg_asmoperand : AsmOperandClass { let Name = "T2SOImmNeg"; } def t2_so_imm_neg : Operand<i32>, PatLeaf<(imm), [{ - return ARM_AM::getT2SOImmVal(-((uint32_t)N->getZExtValue())) != -1; + int64_t Value = -(int)N->getZExtValue(); + return Value && ARM_AM::getT2SOImmVal(Value) != -1; }], t2_so_imm_neg_XFORM> { let ParserMatchClass = t2_so_imm_neg_asmoperand; } /// imm0_4095 predicate - True if the 32-bit immediate is in the range [0.4095]. -def imm0_4095 : Operand<i32>, - ImmLeaf<i32, [{ +def imm0_4095_asmoperand: ImmAsmOperand { let Name = "Imm0_4095"; } +def imm0_4095 : Operand<i32>, ImmLeaf<i32, [{ return Imm >= 0 && Imm < 4096; -}]>; +}]> { + let ParserMatchClass = imm0_4095_asmoperand; +} -def imm0_4095_neg : PatLeaf<(i32 imm), [{ +def imm0_4095_neg_asmoperand: AsmOperandClass { let Name = "Imm0_4095Neg"; } +def imm0_4095_neg : Operand<i32>, PatLeaf<(i32 imm), [{ return (uint32_t)(-N->getZExtValue()) < 4096; -}], imm_neg_XFORM>; +}], imm_neg_XFORM> { + let ParserMatchClass = imm0_4095_neg_asmoperand; +} def imm0_255_neg : PatLeaf<(i32 imm), [{ return (uint32_t)(-N->getZExtValue()) < 255; @@ -2871,6 +2877,8 @@ defm t2TEQ : T2I_cmp_irs<0b0100, "teq", // FIXME: should be able to write a pattern for ARMcmov, but can't use // a two-value operand where a dag node expects two operands. :( let neverHasSideEffects = 1 in { + +let isCommutable = 1 in def t2MOVCCr : t2PseudoInst<(outs rGPR:$Rd), (ins rGPR:$false, rGPR:$Rm, pred:$p), 4, IIC_iCMOVr, @@ -3189,6 +3197,7 @@ def t2B : T2I<(outs), (ins uncondbrtarget:$target), IIC_Br, let Inst{13} = target{17}; let Inst{21-16} = target{16-11}; let Inst{10-0} = target{10-0}; + let DecoderMethod = "DecodeT2BInstruction"; } let isNotDuplicable = 1, isIndirectBranch = 1 in { @@ -3268,37 +3277,19 @@ let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in { Requires<[IsThumb2, IsIOS]>; } -let isCall = 1, - // On non-IOS platforms R9 is callee-saved. - Defs = [LR], Uses = [SP] in { +let isCall = 1, Defs = [LR], Uses = [SP] in { // mov lr, pc; b if callee is marked noreturn to avoid confusing the // return stack predictor. def t2BMOVPCB_CALL : tPseudoInst<(outs), (ins t_bltarget:$func, variable_ops), 6, IIC_Br, [(ARMcall_nolink tglobaladdr:$func)]>, - Requires<[IsThumb, IsNotIOS]>; -} - -let isCall = 1, - // On IOS R9 is call-clobbered. - // R7 is marked as a use to prevent frame-pointer assignments from being - // moved above / below calls. - Defs = [LR], Uses = [R7, SP] in { - // mov lr, pc; b if callee is marked noreturn to avoid confusing the - // return stack predictor. - def t2BMOVPCBr9_CALL : tPseudoInst<(outs), - (ins t_bltarget:$func, variable_ops), - 6, IIC_Br, [(ARMcall_nolink tglobaladdr:$func)]>, - Requires<[IsThumb, IsIOS]>; + Requires<[IsThumb]>; } // Direct calls def : T2Pat<(ARMcall_nolink texternalsym:$func), (t2BMOVPCB_CALL texternalsym:$func)>, - Requires<[IsThumb, IsNotIOS]>; -def : T2Pat<(ARMcall_nolink texternalsym:$func), - (t2BMOVPCBr9_CALL texternalsym:$func)>, - Requires<[IsThumb, IsIOS]>; + Requires<[IsThumb]>; // IT block let Defs = [ITSTATE] in @@ -3966,6 +3957,19 @@ def : t2InstAlias<"add${s}${p} $Rdn, $ShiftedRm", (t2ADDrs GPRnopc:$Rdn, GPRnopc:$Rdn, t2_so_reg:$ShiftedRm, pred:$p, cc_out:$s)>; +// add w/ negative immediates is just a sub. +def : t2InstAlias<"add${s}${p} $Rd, $Rn, $imm", + (t2SUBri GPRnopc:$Rd, GPRnopc:$Rn, t2_so_imm_neg:$imm, pred:$p, + cc_out:$s)>; +def : t2InstAlias<"add${p} $Rd, $Rn, $imm", + (t2SUBri12 GPRnopc:$Rd, GPR:$Rn, imm0_4095_neg:$imm, pred:$p)>; +def : t2InstAlias<"add${s}${p} $Rdn, $imm", + (t2SUBri GPRnopc:$Rdn, GPRnopc:$Rdn, t2_so_imm_neg:$imm, pred:$p, + cc_out:$s)>; +def : t2InstAlias<"add${p} $Rdn, $imm", + (t2SUBri12 GPRnopc:$Rdn, GPRnopc:$Rdn, imm0_4095_neg:$imm, pred:$p)>; + + // Aliases for SUB without the ".w" optional width specifier. def : t2InstAlias<"sub${s}${p} $Rd, $Rn, $imm", (t2SUBri GPRnopc:$Rd, GPRnopc:$Rn, t2_so_imm:$imm, pred:$p, cc_out:$s)>; @@ -3981,13 +3985,14 @@ def : t2InstAlias<"sub${s}${p} $Rdn, $imm", (t2SUBri GPRnopc:$Rdn, GPRnopc:$Rdn, t2_so_imm:$imm, pred:$p, cc_out:$s)>; def : t2InstAlias<"sub${p} $Rdn, $imm", (t2SUBri12 GPRnopc:$Rdn, GPRnopc:$Rdn, imm0_4095:$imm, pred:$p)>; +def : t2InstAlias<"sub${s}${p}.w $Rdn, $Rm", + (t2SUBrr GPRnopc:$Rdn, GPRnopc:$Rdn, rGPR:$Rm, pred:$p, cc_out:$s)>; def : t2InstAlias<"sub${s}${p} $Rdn, $Rm", (t2SUBrr GPRnopc:$Rdn, GPRnopc:$Rdn, rGPR:$Rm, pred:$p, cc_out:$s)>; def : t2InstAlias<"sub${s}${p} $Rdn, $ShiftedRm", (t2SUBrs GPRnopc:$Rdn, GPRnopc:$Rdn, t2_so_reg:$ShiftedRm, pred:$p, cc_out:$s)>; - // Alias for compares without the ".w" optional width specifier. def : t2InstAlias<"cmn${p} $Rn, $Rm", (t2CMNzrr GPRnopc:$Rn, rGPR:$Rm, pred:$p)>; diff --git a/lib/Target/ARM/ARMInstrVFP.td b/lib/Target/ARM/ARMInstrVFP.td index e9d5720..3600b88 100644 --- a/lib/Target/ARM/ARMInstrVFP.td +++ b/lib/Target/ARM/ARMInstrVFP.td @@ -950,7 +950,7 @@ def VMLAD : ADbI<0b11100, 0b00, 0, 0, [(set DPR:$Dd, (fadd_mlx (fmul_su DPR:$Dn, DPR:$Dm), (f64 DPR:$Ddin)))]>, RegConstraint<"$Ddin = $Dd">, - Requires<[HasVFP2,UseFPVMLx,NoVFP4]>; + Requires<[HasVFP2,UseFPVMLx,DontUseFusedMAC]>; def VMLAS : ASbIn<0b11100, 0b00, 0, 0, (outs SPR:$Sd), (ins SPR:$Sdin, SPR:$Sn, SPR:$Sm), @@ -958,7 +958,7 @@ def VMLAS : ASbIn<0b11100, 0b00, 0, 0, [(set SPR:$Sd, (fadd_mlx (fmul_su SPR:$Sn, SPR:$Sm), SPR:$Sdin))]>, RegConstraint<"$Sdin = $Sd">, - Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx,NoVFP4]> { + Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx,DontUseFusedMAC]> { // Some single precision VFP instructions may be executed on both NEON and // VFP pipelines on A8. let D = VFPNeonA8Domain; @@ -966,10 +966,10 @@ def VMLAS : ASbIn<0b11100, 0b00, 0, 0, def : Pat<(fadd_mlx DPR:$dstin, (fmul_su DPR:$a, (f64 DPR:$b))), (VMLAD DPR:$dstin, DPR:$a, DPR:$b)>, - Requires<[HasVFP2,UseFPVMLx,NoVFP4]>; + Requires<[HasVFP2,UseFPVMLx,DontUseFusedMAC]>; def : Pat<(fadd_mlx SPR:$dstin, (fmul_su SPR:$a, SPR:$b)), (VMLAS SPR:$dstin, SPR:$a, SPR:$b)>, - Requires<[HasVFP2,DontUseNEONForFP, UseFPVMLx,NoVFP4]>; + Requires<[HasVFP2,DontUseNEONForFP, UseFPVMLx,DontUseFusedMAC]>; def VMLSD : ADbI<0b11100, 0b00, 1, 0, (outs DPR:$Dd), (ins DPR:$Ddin, DPR:$Dn, DPR:$Dm), @@ -977,7 +977,7 @@ def VMLSD : ADbI<0b11100, 0b00, 1, 0, [(set DPR:$Dd, (fadd_mlx (fneg (fmul_su DPR:$Dn,DPR:$Dm)), (f64 DPR:$Ddin)))]>, RegConstraint<"$Ddin = $Dd">, - Requires<[HasVFP2,UseFPVMLx,NoVFP4]>; + Requires<[HasVFP2,UseFPVMLx,DontUseFusedMAC]>; def VMLSS : ASbIn<0b11100, 0b00, 1, 0, (outs SPR:$Sd), (ins SPR:$Sdin, SPR:$Sn, SPR:$Sm), @@ -985,7 +985,7 @@ def VMLSS : ASbIn<0b11100, 0b00, 1, 0, [(set SPR:$Sd, (fadd_mlx (fneg (fmul_su SPR:$Sn, SPR:$Sm)), SPR:$Sdin))]>, RegConstraint<"$Sdin = $Sd">, - Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx,NoVFP4]> { + Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx,DontUseFusedMAC]> { // Some single precision VFP instructions may be executed on both NEON and // VFP pipelines on A8. let D = VFPNeonA8Domain; @@ -993,10 +993,10 @@ def VMLSS : ASbIn<0b11100, 0b00, 1, 0, def : Pat<(fsub_mlx DPR:$dstin, (fmul_su DPR:$a, (f64 DPR:$b))), (VMLSD DPR:$dstin, DPR:$a, DPR:$b)>, - Requires<[HasVFP2,UseFPVMLx,NoVFP4]>; + Requires<[HasVFP2,UseFPVMLx,DontUseFusedMAC]>; def : Pat<(fsub_mlx SPR:$dstin, (fmul_su SPR:$a, SPR:$b)), (VMLSS SPR:$dstin, SPR:$a, SPR:$b)>, - Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx,NoVFP4]>; + Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx,DontUseFusedMAC]>; def VNMLAD : ADbI<0b11100, 0b01, 1, 0, (outs DPR:$Dd), (ins DPR:$Ddin, DPR:$Dn, DPR:$Dm), @@ -1004,7 +1004,7 @@ def VNMLAD : ADbI<0b11100, 0b01, 1, 0, [(set DPR:$Dd,(fsub_mlx (fneg (fmul_su DPR:$Dn,DPR:$Dm)), (f64 DPR:$Ddin)))]>, RegConstraint<"$Ddin = $Dd">, - Requires<[HasVFP2,UseFPVMLx,NoVFP4]>; + Requires<[HasVFP2,UseFPVMLx,DontUseFusedMAC]>; def VNMLAS : ASbI<0b11100, 0b01, 1, 0, (outs SPR:$Sd), (ins SPR:$Sdin, SPR:$Sn, SPR:$Sm), @@ -1012,7 +1012,7 @@ def VNMLAS : ASbI<0b11100, 0b01, 1, 0, [(set SPR:$Sd, (fsub_mlx (fneg (fmul_su SPR:$Sn, SPR:$Sm)), SPR:$Sdin))]>, RegConstraint<"$Sdin = $Sd">, - Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx,NoVFP4]> { + Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx,DontUseFusedMAC]> { // Some single precision VFP instructions may be executed on both NEON and // VFP pipelines on A8. let D = VFPNeonA8Domain; @@ -1020,10 +1020,10 @@ def VNMLAS : ASbI<0b11100, 0b01, 1, 0, def : Pat<(fsub_mlx (fneg (fmul_su DPR:$a, (f64 DPR:$b))), DPR:$dstin), (VNMLAD DPR:$dstin, DPR:$a, DPR:$b)>, - Requires<[HasVFP2,UseFPVMLx,NoVFP4]>; + Requires<[HasVFP2,UseFPVMLx,DontUseFusedMAC]>; def : Pat<(fsub_mlx (fneg (fmul_su SPR:$a, SPR:$b)), SPR:$dstin), (VNMLAS SPR:$dstin, SPR:$a, SPR:$b)>, - Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx,NoVFP4]>; + Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx,DontUseFusedMAC]>; def VNMLSD : ADbI<0b11100, 0b01, 0, 0, (outs DPR:$Dd), (ins DPR:$Ddin, DPR:$Dn, DPR:$Dm), @@ -1031,14 +1031,14 @@ def VNMLSD : ADbI<0b11100, 0b01, 0, 0, [(set DPR:$Dd, (fsub_mlx (fmul_su DPR:$Dn, DPR:$Dm), (f64 DPR:$Ddin)))]>, RegConstraint<"$Ddin = $Dd">, - Requires<[HasVFP2,UseFPVMLx,NoVFP4]>; + Requires<[HasVFP2,UseFPVMLx,DontUseFusedMAC]>; def VNMLSS : ASbI<0b11100, 0b01, 0, 0, (outs SPR:$Sd), (ins SPR:$Sdin, SPR:$Sn, SPR:$Sm), IIC_fpMAC32, "vnmls", ".f32\t$Sd, $Sn, $Sm", [(set SPR:$Sd, (fsub_mlx (fmul_su SPR:$Sn, SPR:$Sm), SPR:$Sdin))]>, RegConstraint<"$Sdin = $Sd">, - Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx,NoVFP4]> { + Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx,DontUseFusedMAC]> { // Some single precision VFP instructions may be executed on both NEON and // VFP pipelines on A8. let D = VFPNeonA8Domain; @@ -1046,10 +1046,10 @@ def VNMLSS : ASbI<0b11100, 0b01, 0, 0, def : Pat<(fsub_mlx (fmul_su DPR:$a, (f64 DPR:$b)), DPR:$dstin), (VNMLSD DPR:$dstin, DPR:$a, DPR:$b)>, - Requires<[HasVFP2,UseFPVMLx,NoVFP4]>; + Requires<[HasVFP2,UseFPVMLx,DontUseFusedMAC]>; def : Pat<(fsub_mlx (fmul_su SPR:$a, SPR:$b), SPR:$dstin), (VNMLSS SPR:$dstin, SPR:$a, SPR:$b)>, - Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx,NoVFP4]>; + Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx,DontUseFusedMAC]>; //===----------------------------------------------------------------------===// // Fused FP Multiply-Accumulate Operations. @@ -1060,7 +1060,7 @@ def VFMAD : ADbI<0b11101, 0b10, 0, 0, [(set DPR:$Dd, (fadd_mlx (fmul_su DPR:$Dn, DPR:$Dm), (f64 DPR:$Ddin)))]>, RegConstraint<"$Ddin = $Dd">, - Requires<[HasVFP4,FPContractions]>; + Requires<[HasVFP4,UseFusedMAC]>; def VFMAS : ASbIn<0b11101, 0b10, 0, 0, (outs SPR:$Sd), (ins SPR:$Sdin, SPR:$Sn, SPR:$Sm), @@ -1068,17 +1068,25 @@ def VFMAS : ASbIn<0b11101, 0b10, 0, 0, [(set SPR:$Sd, (fadd_mlx (fmul_su SPR:$Sn, SPR:$Sm), SPR:$Sdin))]>, RegConstraint<"$Sdin = $Sd">, - Requires<[HasVFP4,DontUseNEONForFP,FPContractions]> { + Requires<[HasVFP4,DontUseNEONForFP,UseFusedMAC]> { // Some single precision VFP instructions may be executed on both NEON and // VFP pipelines. } def : Pat<(fadd_mlx DPR:$dstin, (fmul_su DPR:$a, (f64 DPR:$b))), (VFMAD DPR:$dstin, DPR:$a, DPR:$b)>, - Requires<[HasVFP4,FPContractions]>; + Requires<[HasVFP4,UseFusedMAC]>; def : Pat<(fadd_mlx SPR:$dstin, (fmul_su SPR:$a, SPR:$b)), (VFMAS SPR:$dstin, SPR:$a, SPR:$b)>, - Requires<[HasVFP4,DontUseNEONForFP,FPContractions]>; + Requires<[HasVFP4,DontUseNEONForFP,UseFusedMAC]>; + +// Match @llvm.fma.* intrinsics +def : Pat<(f64 (fma DPR:$Ddin, DPR:$Dn, DPR:$Dm)), + (VFMAD DPR:$Ddin, DPR:$Dn, DPR:$Dm)>, + Requires<[HasVFP4]>; +def : Pat<(f32 (fma SPR:$Sdin, SPR:$Sn, SPR:$Sm)), + (VFMAS SPR:$Sdin, SPR:$Sn, SPR:$Sm)>, + Requires<[HasVFP4]>; def VFMSD : ADbI<0b11101, 0b10, 1, 0, (outs DPR:$Dd), (ins DPR:$Ddin, DPR:$Dn, DPR:$Dm), @@ -1086,7 +1094,7 @@ def VFMSD : ADbI<0b11101, 0b10, 1, 0, [(set DPR:$Dd, (fadd_mlx (fneg (fmul_su DPR:$Dn,DPR:$Dm)), (f64 DPR:$Ddin)))]>, RegConstraint<"$Ddin = $Dd">, - Requires<[HasVFP4,FPContractions]>; + Requires<[HasVFP4,UseFusedMAC]>; def VFMSS : ASbIn<0b11101, 0b10, 1, 0, (outs SPR:$Sd), (ins SPR:$Sdin, SPR:$Sn, SPR:$Sm), @@ -1094,17 +1102,33 @@ def VFMSS : ASbIn<0b11101, 0b10, 1, 0, [(set SPR:$Sd, (fadd_mlx (fneg (fmul_su SPR:$Sn, SPR:$Sm)), SPR:$Sdin))]>, RegConstraint<"$Sdin = $Sd">, - Requires<[HasVFP4,DontUseNEONForFP,FPContractions]> { + Requires<[HasVFP4,DontUseNEONForFP,UseFusedMAC]> { // Some single precision VFP instructions may be executed on both NEON and // VFP pipelines. } def : Pat<(fsub_mlx DPR:$dstin, (fmul_su DPR:$a, (f64 DPR:$b))), (VFMSD DPR:$dstin, DPR:$a, DPR:$b)>, - Requires<[HasVFP4,FPContractions]>; + Requires<[HasVFP4,UseFusedMAC]>; def : Pat<(fsub_mlx SPR:$dstin, (fmul_su SPR:$a, SPR:$b)), (VFMSS SPR:$dstin, SPR:$a, SPR:$b)>, - Requires<[HasVFP4,DontUseNEONForFP,FPContractions]>; + Requires<[HasVFP4,DontUseNEONForFP,UseFusedMAC]>; + +// Match @llvm.fma.* intrinsics +// (fma (fneg x), y, z) -> (vfms x, y, z) +def : Pat<(f64 (fma (fneg DPR:$Ddin), DPR:$Dn, DPR:$Dm)), + (VFMSD DPR:$Ddin, DPR:$Dn, DPR:$Dm)>, + Requires<[HasVFP4]>; +def : Pat<(f32 (fma (fneg SPR:$Sdin), SPR:$Sn, SPR:$Sm)), + (VFMSS SPR:$Sdin, SPR:$Sn, SPR:$Sm)>, + Requires<[HasVFP4]>; +// (fneg (fma x, (fneg y), z) -> (vfms x, y, z) +def : Pat<(fneg (f64 (fma DPR:$Ddin, (fneg DPR:$Dn), DPR:$Dm))), + (VFMSD DPR:$Ddin, DPR:$Dn, DPR:$Dm)>, + Requires<[HasVFP4]>; +def : Pat<(fneg (f32 (fma SPR:$Sdin, (fneg SPR:$Sn), SPR:$Sm))), + (VFMSS SPR:$Sdin, SPR:$Sn, SPR:$Sm)>, + Requires<[HasVFP4]>; def VFNMAD : ADbI<0b11101, 0b01, 1, 0, (outs DPR:$Dd), (ins DPR:$Ddin, DPR:$Dn, DPR:$Dm), @@ -1112,7 +1136,7 @@ def VFNMAD : ADbI<0b11101, 0b01, 1, 0, [(set DPR:$Dd,(fsub_mlx (fneg (fmul_su DPR:$Dn,DPR:$Dm)), (f64 DPR:$Ddin)))]>, RegConstraint<"$Ddin = $Dd">, - Requires<[HasVFP4,FPContractions]>; + Requires<[HasVFP4,UseFusedMAC]>; def VFNMAS : ASbI<0b11101, 0b01, 1, 0, (outs SPR:$Sd), (ins SPR:$Sdin, SPR:$Sn, SPR:$Sm), @@ -1120,17 +1144,33 @@ def VFNMAS : ASbI<0b11101, 0b01, 1, 0, [(set SPR:$Sd, (fsub_mlx (fneg (fmul_su SPR:$Sn, SPR:$Sm)), SPR:$Sdin))]>, RegConstraint<"$Sdin = $Sd">, - Requires<[HasVFP4,DontUseNEONForFP,FPContractions]> { + Requires<[HasVFP4,DontUseNEONForFP,UseFusedMAC]> { // Some single precision VFP instructions may be executed on both NEON and // VFP pipelines. } def : Pat<(fsub_mlx (fneg (fmul_su DPR:$a, (f64 DPR:$b))), DPR:$dstin), (VFNMAD DPR:$dstin, DPR:$a, DPR:$b)>, - Requires<[HasVFP4,FPContractions]>; + Requires<[HasVFP4,UseFusedMAC]>; def : Pat<(fsub_mlx (fneg (fmul_su SPR:$a, SPR:$b)), SPR:$dstin), (VFNMAS SPR:$dstin, SPR:$a, SPR:$b)>, - Requires<[HasVFP4,DontUseNEONForFP,FPContractions]>; + Requires<[HasVFP4,DontUseNEONForFP,UseFusedMAC]>; + +// Match @llvm.fma.* intrinsics +// (fneg (fma x, y, z)) -> (vfnma x, y, z) +def : Pat<(fneg (fma (f64 DPR:$Ddin), (f64 DPR:$Dn), (f64 DPR:$Dm))), + (VFNMAD DPR:$Ddin, DPR:$Dn, DPR:$Dm)>, + Requires<[HasVFP4]>; +def : Pat<(fneg (fma (f32 SPR:$Sdin), (f32 SPR:$Sn), (f32 SPR:$Sm))), + (VFNMAS SPR:$Sdin, SPR:$Sn, SPR:$Sm)>, + Requires<[HasVFP4]>; +// (fma (fneg x), y, (fneg z)) -> (vfnma x, y, z) +def : Pat<(f64 (fma (fneg DPR:$Ddin), DPR:$Dn, (fneg DPR:$Dm))), + (VFNMAD DPR:$Ddin, DPR:$Dn, DPR:$Dm)>, + Requires<[HasVFP4]>; +def : Pat<(f32 (fma (fneg SPR:$Sdin), SPR:$Sn, (fneg SPR:$Sm))), + (VFNMAS SPR:$Sdin, SPR:$Sn, SPR:$Sm)>, + Requires<[HasVFP4]>; def VFNMSD : ADbI<0b11101, 0b01, 0, 0, (outs DPR:$Dd), (ins DPR:$Ddin, DPR:$Dn, DPR:$Dm), @@ -1138,24 +1178,40 @@ def VFNMSD : ADbI<0b11101, 0b01, 0, 0, [(set DPR:$Dd, (fsub_mlx (fmul_su DPR:$Dn, DPR:$Dm), (f64 DPR:$Ddin)))]>, RegConstraint<"$Ddin = $Dd">, - Requires<[HasVFP4,FPContractions]>; + Requires<[HasVFP4,UseFusedMAC]>; def VFNMSS : ASbI<0b11101, 0b01, 0, 0, (outs SPR:$Sd), (ins SPR:$Sdin, SPR:$Sn, SPR:$Sm), IIC_fpFMAC32, "vfnms", ".f32\t$Sd, $Sn, $Sm", [(set SPR:$Sd, (fsub_mlx (fmul_su SPR:$Sn, SPR:$Sm), SPR:$Sdin))]>, RegConstraint<"$Sdin = $Sd">, - Requires<[HasVFP4,DontUseNEONForFP,FPContractions]> { + Requires<[HasVFP4,DontUseNEONForFP,UseFusedMAC]> { // Some single precision VFP instructions may be executed on both NEON and // VFP pipelines. } def : Pat<(fsub_mlx (fmul_su DPR:$a, (f64 DPR:$b)), DPR:$dstin), (VFNMSD DPR:$dstin, DPR:$a, DPR:$b)>, - Requires<[HasVFP4,FPContractions]>; + Requires<[HasVFP4,UseFusedMAC]>; def : Pat<(fsub_mlx (fmul_su SPR:$a, SPR:$b), SPR:$dstin), (VFNMSS SPR:$dstin, SPR:$a, SPR:$b)>, - Requires<[HasVFP4,DontUseNEONForFP,FPContractions]>; + Requires<[HasVFP4,DontUseNEONForFP,UseFusedMAC]>; + +// Match @llvm.fma.* intrinsics +// (fneg (fma (fneg x), y, z)) -> (vnfms x, y, z) +def : Pat<(fneg (f64 (fma (fneg DPR:$Ddin), DPR:$Dn, DPR:$Dm))), + (VFNMSD DPR:$Ddin, DPR:$Dn, DPR:$Dm)>, + Requires<[HasVFP4]>; +def : Pat<(fneg (f32 (fma (fneg SPR:$Sdin), SPR:$Sn, SPR:$Sm))), + (VFNMSS SPR:$Sdin, SPR:$Sn, SPR:$Sm)>, + Requires<[HasVFP4]>; +// (fma x, (fneg y), z) -> (vnfms x, y, z) +def : Pat<(f64 (fma DPR:$Ddin, (fneg DPR:$Dn), DPR:$Dm)), + (VFNMSD DPR:$Ddin, DPR:$Dn, DPR:$Dm)>, + Requires<[HasVFP4]>; +def : Pat<(f32 (fma SPR:$Sdin, (fneg SPR:$Sn), SPR:$Sm)), + (VFNMSS SPR:$Sdin, SPR:$Sn, SPR:$Sm)>, + Requires<[HasVFP4]>; //===----------------------------------------------------------------------===// // FP Conditional moves. diff --git a/lib/Target/ARM/ARMJITInfo.cpp b/lib/Target/ARM/ARMJITInfo.cpp index 753e578..c5db211 100644 --- a/lib/Target/ARM/ARMJITInfo.cpp +++ b/lib/Target/ARM/ARMJITInfo.cpp @@ -13,7 +13,7 @@ #define DEBUG_TYPE "jit" #include "ARMJITInfo.h" -#include "ARMInstrInfo.h" +#include "ARM.h" #include "ARMConstantPoolValue.h" #include "ARMRelocations.h" #include "ARMSubtarget.h" diff --git a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp index 0f6dc04..9ef2ace 100644 --- a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp +++ b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp @@ -15,8 +15,8 @@ #define DEBUG_TYPE "arm-ldst-opt" #include "ARM.h" #include "ARMBaseInstrInfo.h" +#include "ARMBaseRegisterInfo.h" #include "ARMMachineFunctionInfo.h" -#include "ARMRegisterInfo.h" #include "MCTargetDesc/ARMAddressingModes.h" #include "llvm/DerivedTypes.h" #include "llvm/Function.h" @@ -93,7 +93,9 @@ namespace { bool MergeOps(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, int Offset, unsigned Base, bool BaseKill, int Opcode, ARMCC::CondCodes Pred, unsigned PredReg, unsigned Scratch, - DebugLoc dl, SmallVector<std::pair<unsigned, bool>, 8> &Regs); + DebugLoc dl, + ArrayRef<std::pair<unsigned, bool> > Regs, + ArrayRef<unsigned> ImpDefs); void MergeOpsUpdate(MachineBasicBlock &MBB, MemOpQueue &MemOps, unsigned memOpsBegin, @@ -282,7 +284,8 @@ ARMLoadStoreOpt::MergeOps(MachineBasicBlock &MBB, int Offset, unsigned Base, bool BaseKill, int Opcode, ARMCC::CondCodes Pred, unsigned PredReg, unsigned Scratch, DebugLoc dl, - SmallVector<std::pair<unsigned, bool>, 8> &Regs) { + ArrayRef<std::pair<unsigned, bool> > Regs, + ArrayRef<unsigned> ImpDefs) { // Only a single register to load / store. Don't bother. unsigned NumRegs = Regs.size(); if (NumRegs <= 1) @@ -350,6 +353,10 @@ ARMLoadStoreOpt::MergeOps(MachineBasicBlock &MBB, MIB = MIB.addReg(Regs[i].first, getDefRegState(isDef) | getKillRegState(Regs[i].second)); + // Add implicit defs for super-registers. + for (unsigned i = 0, e = ImpDefs.size(); i != e; ++i) + MIB.addReg(ImpDefs[i], RegState::ImplicitDefine); + return true; } @@ -384,19 +391,29 @@ void ARMLoadStoreOpt::MergeOpsUpdate(MachineBasicBlock &MBB, } SmallVector<std::pair<unsigned, bool>, 8> Regs; + SmallVector<unsigned, 8> ImpDefs; for (unsigned i = memOpsBegin; i < memOpsEnd; ++i) { unsigned Reg = memOps[i].Reg; // If we are inserting the merged operation after an operation that // uses the same register, make sure to transfer any kill flag. bool isKill = memOps[i].isKill || KilledRegs.count(Reg); Regs.push_back(std::make_pair(Reg, isKill)); + + // Collect any implicit defs of super-registers. They must be preserved. + for (MIOperands MO(memOps[i].MBBI); MO.isValid(); ++MO) { + if (!MO->isReg() || !MO->isDef() || !MO->isImplicit() || MO->isDead()) + continue; + unsigned DefReg = MO->getReg(); + if (std::find(ImpDefs.begin(), ImpDefs.end(), DefReg) == ImpDefs.end()) + ImpDefs.push_back(DefReg); + } } // Try to do the merge. MachineBasicBlock::iterator Loc = memOps[insertAfter].MBBI; ++Loc; if (!MergeOps(MBB, Loc, Offset, Base, BaseKill, Opcode, - Pred, PredReg, Scratch, dl, Regs)) + Pred, PredReg, Scratch, dl, Regs, ImpDefs)) return; // Merge succeeded, update records. @@ -537,7 +554,7 @@ static bool isMatchingDecrement(MachineInstr *MI, unsigned Base, if (!(MI->getOperand(0).getReg() == Base && MI->getOperand(1).getReg() == Base && (MI->getOperand(2).getImm()*Scale) == Bytes && - llvm::getInstrPredicate(MI, MyPredReg) == Pred && + getInstrPredicate(MI, MyPredReg) == Pred && MyPredReg == PredReg)) return false; @@ -570,7 +587,7 @@ static bool isMatchingIncrement(MachineInstr *MI, unsigned Base, if (!(MI->getOperand(0).getReg() == Base && MI->getOperand(1).getReg() == Base && (MI->getOperand(2).getImm()*Scale) == Bytes && - llvm::getInstrPredicate(MI, MyPredReg) == Pred && + getInstrPredicate(MI, MyPredReg) == Pred && MyPredReg == PredReg)) return false; @@ -701,7 +718,7 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLSMultiple(MachineBasicBlock &MBB, bool BaseKill = MI->getOperand(0).isKill(); unsigned Bytes = getLSMultipleTransferSize(MI); unsigned PredReg = 0; - ARMCC::CondCodes Pred = llvm::getInstrPredicate(MI, PredReg); + ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg); int Opcode = MI->getOpcode(); DebugLoc dl = MI->getDebugLoc(); @@ -854,7 +871,7 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineBasicBlock &MBB, return false; unsigned PredReg = 0; - ARMCC::CondCodes Pred = llvm::getInstrPredicate(MI, PredReg); + ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg); bool DoMerge = false; ARM_AM::AddrOpc AddSub = ARM_AM::add; unsigned NewOpc = 0; @@ -1112,7 +1129,7 @@ bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &MBB, bool OffUndef = isT2 ? false : MI->getOperand(3).isUndef(); int OffImm = getMemoryOpOffset(MI); unsigned PredReg = 0; - ARMCC::CondCodes Pred = llvm::getInstrPredicate(MI, PredReg); + ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg); if (OddRegNum > EvenRegNum && OffImm == 0) { // Ascending register numbers and no offset. It's safe to change it to a @@ -1143,6 +1160,11 @@ bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &MBB, unsigned NewOpc = (isLd) ? (isT2 ? (OffImm < 0 ? ARM::t2LDRi8 : ARM::t2LDRi12) : ARM::LDRi12) : (isT2 ? (OffImm < 0 ? ARM::t2STRi8 : ARM::t2STRi12) : ARM::STRi12); + // Be extra careful for thumb2. t2LDRi8 can't reference a zero offset, + // so adjust and use t2LDRi12 here for that. + unsigned NewOpc2 = (isLd) + ? (isT2 ? (OffImm+4 < 0 ? ARM::t2LDRi8 : ARM::t2LDRi12) : ARM::LDRi12) + : (isT2 ? (OffImm+4 < 0 ? ARM::t2STRi8 : ARM::t2STRi12) : ARM::STRi12); DebugLoc dl = MBBI->getDebugLoc(); // If this is a load and base register is killed, it may have been // re-defed by the load, make sure the first load does not clobber it. @@ -1150,11 +1172,13 @@ bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &MBB, (BaseKill || OffKill) && (TRI->regsOverlap(EvenReg, BaseReg))) { assert(!TRI->regsOverlap(OddReg, BaseReg)); - InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc, + InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc2, OddReg, OddDeadKill, false, BaseReg, false, BaseUndef, false, OffUndef, Pred, PredReg, TII, isT2); NewBBI = llvm::prior(MBBI); + if (isT2 && NewOpc == ARM::t2LDRi8 && OffImm+4 >= 0) + NewOpc = ARM::t2LDRi12; InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc, EvenReg, EvenDeadKill, false, BaseReg, BaseKill, BaseUndef, OffKill, OffUndef, @@ -1167,12 +1191,16 @@ bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &MBB, EvenDeadKill = false; OddDeadKill = true; } + // Never kill the base register in the first instruction. + // <rdar://problem/11101911> + if (EvenReg == BaseReg) + EvenDeadKill = false; InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc, EvenReg, EvenDeadKill, EvenUndef, BaseReg, false, BaseUndef, false, OffUndef, Pred, PredReg, TII, isT2); NewBBI = llvm::prior(MBBI); - InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc, + InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc2, OddReg, OddDeadKill, OddUndef, BaseReg, BaseKill, BaseUndef, OffKill, OffUndef, Pred, PredReg, TII, isT2); @@ -1223,7 +1251,7 @@ bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) { bool isKill = MO.isDef() ? false : MO.isKill(); unsigned Base = MBBI->getOperand(1).getReg(); unsigned PredReg = 0; - ARMCC::CondCodes Pred = llvm::getInstrPredicate(MBBI, PredReg); + ARMCC::CondCodes Pred = getInstrPredicate(MBBI, PredReg); int Offset = getMemoryOpOffset(MBBI); // Watch out for: // r4 := ldr [r5] @@ -1599,7 +1627,7 @@ ARMPreAllocLoadStoreOpt::CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1, if (EvenReg == OddReg) return false; BaseReg = Op0->getOperand(1).getReg(); - Pred = llvm::getInstrPredicate(Op0, PredReg); + Pred = getInstrPredicate(Op0, PredReg); dl = Op0->getDebugLoc(); return true; } @@ -1796,7 +1824,7 @@ ARMPreAllocLoadStoreOpt::RescheduleLoadStoreInstrs(MachineBasicBlock *MBB) { if (!isMemoryOp(MI)) continue; unsigned PredReg = 0; - if (llvm::getInstrPredicate(MI, PredReg) != ARMCC::AL) + if (getInstrPredicate(MI, PredReg) != ARMCC::AL) continue; int Opc = MI->getOpcode(); diff --git a/lib/Target/ARM/ARMRegisterInfo.td b/lib/Target/ARM/ARMRegisterInfo.td index 1327fb8..1466e98 100644 --- a/lib/Target/ARM/ARMRegisterInfo.td +++ b/lib/Target/ARM/ARMRegisterInfo.td @@ -314,7 +314,8 @@ def TuplesOE2D : RegisterTuples<[dsub_0, dsub_1], def DPair : RegisterClass<"ARM", [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64], 128, (interleave QPR, TuplesOE2D)> { // Allocate starting at non-VFP2 registers D16-D31 first. - let AltOrders = [(rotl DPair, 16)]; + // Prefer even-odd pairs as they are easier to copy. + let AltOrders = [(add (rotl QPR, 8), (rotl DPair, 16))]; let AltOrderSelect = [{ return 1; }]; } diff --git a/lib/Target/ARM/ARMScheduleA8.td b/lib/Target/ARM/ARMScheduleA8.td index 8d86c01..8b1fb93 100644 --- a/lib/Target/ARM/ARMScheduleA8.td +++ b/lib/Target/ARM/ARMScheduleA8.td @@ -324,6 +324,15 @@ def CortexA8Itineraries : ProcessorItineraries< InstrStage<19, [A8_NPipe], 0>, InstrStage<19, [A8_NLSPipe]>], [19, 2, 1, 1]>, // + // Single-precision Fused FP MAC + InstrItinData<IIC_fpFMAC32, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, + InstrStage<1, [A8_NPipe]>], [7, 2, 1, 1]>, + // + // Double-precision Fused FP MAC + InstrItinData<IIC_fpFMAC64, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, + InstrStage<19, [A8_NPipe], 0>, + InstrStage<19, [A8_NLSPipe]>], [19, 2, 1, 1]>, + // // Single-precision FP DIV InstrItinData<IIC_fpDIV32 , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, InstrStage<20, [A8_NPipe], 0>, @@ -860,6 +869,16 @@ def CortexA8Itineraries : ProcessorItineraries< InstrItinData<IIC_VMACQ, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, InstrStage<2, [A8_NPipe]>], [10, 3, 2, 2]>, // + // Double-register Fused FP Multiple-Accumulate + InstrItinData<IIC_VFMACD, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, + InstrStage<1, [A8_NPipe]>], [9, 3, 2, 2]>, + // + // Quad-register Fused FP Multiple-Accumulate + // Result written in N9, but that is relative to the last cycle of multicycle, + // so we use 10 for those cases + InstrItinData<IIC_VFMACQ, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, + InstrStage<2, [A8_NPipe]>], [10, 3, 2, 2]>, + // // Double-register Reciprical Step InstrItinData<IIC_VRECSD, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, InstrStage<1, [A8_NPipe]>], [9, 2, 2]>, diff --git a/lib/Target/ARM/ARMScheduleA9.td b/lib/Target/ARM/ARMScheduleA9.td index 49fedf6..0d710cc 100644 --- a/lib/Target/ARM/ARMScheduleA9.td +++ b/lib/Target/ARM/ARMScheduleA9.td @@ -604,6 +604,22 @@ def CortexA9Itineraries : ProcessorItineraries< InstrStage<2, [A9_NPipe]>], [9, 1, 1, 1]>, // + // Single-precision Fused FP MAC + InstrItinData<IIC_fpFMAC32, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, + InstrStage<1, [A9_MUX0], 0>, + InstrStage<1, [A9_DRegsVFP], 0, Required>, + InstrStage<9, [A9_DRegsN], 0, Reserved>, + InstrStage<1, [A9_NPipe]>], + [8, 1, 1, 1]>, + // + // Double-precision Fused FP MAC + InstrItinData<IIC_fpFMAC64, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, + InstrStage<1, [A9_MUX0], 0>, + InstrStage<1, [A9_DRegsVFP], 0, Required>, + InstrStage<10, [A9_DRegsN], 0, Reserved>, + InstrStage<2, [A9_NPipe]>], + [9, 1, 1, 1]>, + // // Single-precision FP DIV InstrItinData<IIC_fpDIV32 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, InstrStage<1, [A9_MUX0], 0>, @@ -1697,6 +1713,26 @@ def CortexA9Itineraries : ProcessorItineraries< InstrStage<4, [A9_NPipe]>], [8, 4, 2, 1]>, // + // Double-register Fused FP Multiple-Accumulate + InstrItinData<IIC_VFMACD, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, + InstrStage<1, [A9_MUX0], 0>, + InstrStage<1, [A9_DRegsN], 0, Required>, + // Extra latency cycles since wbck is 7 cycles + InstrStage<8, [A9_DRegsVFP], 0, Reserved>, + InstrStage<2, [A9_NPipe]>], + [6, 3, 2, 1]>, + // + // Quad-register Fused FP Multiple-Accumulate + // Result written in N9, but that is relative to the last cycle of multicycle, + // so we use 10 for those cases + InstrItinData<IIC_VFMACQ, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, + InstrStage<1, [A9_MUX0], 0>, + InstrStage<1, [A9_DRegsN], 0, Required>, + // Extra latency cycles since wbck is 9 cycles + InstrStage<10, [A9_DRegsVFP], 0, Reserved>, + InstrStage<4, [A9_NPipe]>], + [8, 4, 2, 1]>, + // // Double-register Reciprical Step InstrItinData<IIC_VRECSD, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, InstrStage<1, [A9_MUX0], 0>, diff --git a/lib/Target/ARM/ARMScheduleV6.td b/lib/Target/ARM/ARMScheduleV6.td index 4d959f5..0ace9bc 100644 --- a/lib/Target/ARM/ARMScheduleV6.td +++ b/lib/Target/ARM/ARMScheduleV6.td @@ -243,6 +243,12 @@ def ARMV6Itineraries : ProcessorItineraries< // Double-precision FP MAC InstrItinData<IIC_fpMAC64 , [InstrStage<2, [V6_Pipe]>], [9, 2, 2, 2]>, // + // Single-precision Fused FP MAC + InstrItinData<IIC_fpFMAC32, [InstrStage<1, [V6_Pipe]>], [9, 2, 2, 2]>, + // + // Double-precision Fused FP MAC + InstrItinData<IIC_fpFMAC64, [InstrStage<2, [V6_Pipe]>], [9, 2, 2, 2]>, + // // Single-precision FP DIV InstrItinData<IIC_fpDIV32 , [InstrStage<15, [V6_Pipe]>], [20, 2, 2]>, // diff --git a/lib/Target/ARM/ARMSubtarget.cpp b/lib/Target/ARM/ARMSubtarget.cpp index 1e8cda5..ca172ed 100644 --- a/lib/Target/ARM/ARMSubtarget.cpp +++ b/lib/Target/ARM/ARMSubtarget.cpp @@ -16,7 +16,6 @@ #include "llvm/GlobalValue.h" #include "llvm/Target/TargetSubtargetInfo.h" #include "llvm/Support/CommandLine.h" -#include "llvm/ADT/SmallVector.h" #define GET_SUBTARGETINFO_TARGET_DESC #define GET_SUBTARGETINFO_CTOR @@ -49,7 +48,6 @@ ARMSubtarget::ARMSubtarget(const std::string &TT, const std::string &CPU, , HasVFPv3(false) , HasVFPv4(false) , HasNEON(false) - , HasNEON2(false) , UseNEONForSinglePrecisionFP(false) , SlowFPVMLx(false) , HasVMLxForwarding(false) diff --git a/lib/Target/ARM/ARMSubtarget.h b/lib/Target/ARM/ARMSubtarget.h index 3d9c03d..e72b06f 100644 --- a/lib/Target/ARM/ARMSubtarget.h +++ b/lib/Target/ARM/ARMSubtarget.h @@ -45,13 +45,12 @@ protected: bool HasV6T2Ops; bool HasV7Ops; - /// HasVFPv2, HasVFPv3, HasVFPv4, HasNEON, HasNEONVFPv4 - Specify what + /// HasVFPv2, HasVFPv3, HasVFPv4, HasNEON - Specify what /// floating point ISAs are supported. bool HasVFPv2; bool HasVFPv3; bool HasVFPv4; bool HasNEON; - bool HasNEON2; /// UseNEONForSinglePrecisionFP - if the NEONFP attribute has been /// specified. Use the method useNEONForSinglePrecisionFP() to @@ -205,7 +204,6 @@ protected: bool hasVFP3() const { return HasVFPv3; } bool hasVFP4() const { return HasVFPv4; } bool hasNEON() const { return HasNEON; } - bool hasNEON2() const { return HasNEON2 || (HasNEON && HasVFPv4); } bool useNEONForSinglePrecisionFP() const { return hasNEON() && UseNEONForSinglePrecisionFP; } diff --git a/lib/Target/ARM/ARMTargetMachine.cpp b/lib/Target/ARM/ARMTargetMachine.cpp index 44229ad..047efc2 100644 --- a/lib/Target/ARM/ARMTargetMachine.cpp +++ b/lib/Target/ARM/ARMTargetMachine.cpp @@ -158,8 +158,10 @@ bool ARMPassConfig::addPreRegAlloc() { bool ARMPassConfig::addPreSched2() { // FIXME: temporarily disabling load / store optimization pass for Thumb1. if (getOptLevel() != CodeGenOpt::None) { - if (!getARMSubtarget().isThumb1Only()) + if (!getARMSubtarget().isThumb1Only()) { PM.add(createARMLoadStoreOptimizationPass()); + printAndVerify("After ARM load / store optimizer"); + } if (getARMSubtarget().hasNEON()) PM.add(createExecutionDependencyFixPass(&ARM::DPRRegClass)); } @@ -192,7 +194,8 @@ bool ARMPassConfig::addPreEmitPass() { return true; } -bool ARMBaseTargetMachine::addCodeEmitter(PassManagerBase &PM, JITCodeEmitter &JCE) { +bool ARMBaseTargetMachine::addCodeEmitter(PassManagerBase &PM, + JITCodeEmitter &JCE) { // Machine code emitter pass for ARM. PM.add(createARMJITCodeEmitterPass(*this, JCE)); return false; diff --git a/lib/Target/ARM/AsmParser/ARMAsmLexer.cpp b/lib/Target/ARM/AsmParser/ARMAsmLexer.cpp index eb8aaf2..fda8536 100644 --- a/lib/Target/ARM/AsmParser/ARMAsmLexer.cpp +++ b/lib/Target/ARM/AsmParser/ARMAsmLexer.cpp @@ -17,8 +17,6 @@ #include "llvm/Support/TargetRegistry.h" -#include "llvm/ADT/OwningPtr.h" -#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringSwitch.h" #include <string> diff --git a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp index 911eb13..2c53e3f 100644 --- a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp +++ b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp @@ -82,8 +82,14 @@ class ARMAsmParser : public MCTargetAsmParser { MCAsmParser &getParser() const { return Parser; } MCAsmLexer &getLexer() const { return Parser.getLexer(); } - void Warning(SMLoc L, const Twine &Msg) { Parser.Warning(L, Msg); } - bool Error(SMLoc L, const Twine &Msg) { return Parser.Error(L, Msg); } + bool Warning(SMLoc L, const Twine &Msg, + ArrayRef<SMRange> Ranges = ArrayRef<SMRange>()) { + return Parser.Warning(L, Msg, Ranges); + } + bool Error(SMLoc L, const Twine &Msg, + ArrayRef<SMRange> Ranges = ArrayRef<SMRange>()) { + return Parser.Error(L, Msg, Ranges); + } int tryParseRegister(); bool tryParseRegisterWithWriteBack(SmallVectorImpl<MCParsedAsmOperand*> &); @@ -478,6 +484,8 @@ public: /// getEndLoc - Get the location of the last token of this operand. SMLoc getEndLoc() const { return EndLoc; } + SMRange getLocRange() const { return SMRange(StartLoc, EndLoc); } + ARMCC::CondCodes getCondCode() const { assert(Kind == k_CondCode && "Invalid access!"); return CC.Val; @@ -579,6 +587,14 @@ public: int64_t Value = CE->getValue(); return ((Value & 3) == 0) && Value >= 0 && Value <= 508; } + bool isImm0_508s4Neg() const { + if (!isImm()) return false; + const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); + if (!CE) return false; + int64_t Value = -CE->getValue(); + // explicitly exclude zero. we want that to use the normal 0_508 version. + return ((Value & 3) == 0) && Value > 0 && Value <= 508; + } bool isImm0_255() const { if (!isImm()) return false; const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); @@ -586,6 +602,20 @@ public: int64_t Value = CE->getValue(); return Value >= 0 && Value < 256; } + bool isImm0_4095() const { + if (!isImm()) return false; + const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); + if (!CE) return false; + int64_t Value = CE->getValue(); + return Value >= 0 && Value < 4096; + } + bool isImm0_4095Neg() const { + if (!isImm()) return false; + const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); + if (!CE) return false; + int64_t Value = -CE->getValue(); + return Value > 0 && Value < 4096; + } bool isImm0_1() const { if (!isImm()) return false; const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); @@ -782,7 +812,9 @@ public: const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); if (!CE) return false; int64_t Value = CE->getValue(); - return ARM_AM::getSOImmVal(-Value) != -1; + // Only use this when not representable as a plain so_imm. + return ARM_AM::getSOImmVal(Value) == -1 && + ARM_AM::getSOImmVal(-Value) != -1; } bool isT2SOImm() const { if (!isImm()) return false; @@ -803,7 +835,9 @@ public: const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); if (!CE) return false; int64_t Value = CE->getValue(); - return ARM_AM::getT2SOImmVal(-Value) != -1; + // Only use this when not representable as a plain so_imm. + return ARM_AM::getT2SOImmVal(Value) == -1 && + ARM_AM::getT2SOImmVal(-Value) != -1; } bool isSetEndImm() const { if (!isImm()) return false; @@ -1495,6 +1529,14 @@ public: Inst.addOperand(MCOperand::CreateImm(CE->getValue() / 4)); } + void addImm0_508s4NegOperands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + // The immediate is scaled by four in the encoding and is stored + // in the MCInst as such. Lop off the low two bits here. + const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); + Inst.addOperand(MCOperand::CreateImm(-(CE->getValue() / 4))); + } + void addImm0_508s4Operands(MCInst &Inst, unsigned N) const { assert(N == 1 && "Invalid number of operands!"); // The immediate is scaled by four in the encoding and is stored @@ -1553,6 +1595,14 @@ public: Inst.addOperand(MCOperand::CreateImm(-CE->getValue())); } + void addImm0_4095NegOperands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + // The operand is actually an imm0_4095, but we have its + // negation in the assembly source, so twiddle it here. + const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); + Inst.addOperand(MCOperand::CreateImm(-CE->getValue())); + } + void addARMSOImmNotOperands(MCInst &Inst, unsigned N) const { assert(N == 1 && "Invalid number of operands!"); // The operand is actually a so_imm, but we have its bitwise @@ -3324,7 +3374,8 @@ parseMSRMaskOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { FlagsVal = 8; // No flag } } else if (SpecReg == "cpsr" || SpecReg == "spsr") { - if (Flags == "all") // cpsr_all is an alias for cpsr_fc + // cpsr_all is an alias for cpsr_fc, as is plain cpsr. + if (Flags == "all" || Flags == "") Flags = "fc"; for (int i = 0, e = Flags.size(); i != e; ++i) { unsigned Flag = StringSwitch<unsigned>(Flags.substr(i, 1)) @@ -4475,22 +4526,26 @@ bool ARMAsmParser::parseOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands, case AsmToken::Dollar: case AsmToken::Hash: { // #42 -> immediate. - // TODO: ":lower16:" and ":upper16:" modifiers after # before immediate S = Parser.getTok().getLoc(); Parser.Lex(); - bool isNegative = Parser.getTok().is(AsmToken::Minus); - const MCExpr *ImmVal; - if (getParser().ParseExpression(ImmVal)) - return true; - const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(ImmVal); - if (CE) { - int32_t Val = CE->getValue(); - if (isNegative && Val == 0) - ImmVal = MCConstantExpr::Create(INT32_MIN, getContext()); + + if (Parser.getTok().isNot(AsmToken::Colon)) { + bool isNegative = Parser.getTok().is(AsmToken::Minus); + const MCExpr *ImmVal; + if (getParser().ParseExpression(ImmVal)) + return true; + const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(ImmVal); + if (CE) { + int32_t Val = CE->getValue(); + if (isNegative && Val == 0) + ImmVal = MCConstantExpr::Create(INT32_MIN, getContext()); + } + E = SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1); + Operands.push_back(ARMOperand::CreateImm(ImmVal, S, E)); + return false; } - E = SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1); - Operands.push_back(ARMOperand::CreateImm(ImmVal, S, E)); - return false; + // w/ a ':' after the '#', it's just like a plain ':'. + // FALLTHROUGH } case AsmToken::Colon: { // ":lower16:" and ":upper16:" expression prefixes @@ -4616,6 +4671,7 @@ StringRef ARMAsmParser::splitMnemonic(StringRef Mnemonic, Mnemonic == "fmrs" || Mnemonic == "fsqrts" || Mnemonic == "fsubs" || Mnemonic == "fsts" || Mnemonic == "fcpys" || Mnemonic == "fdivs" || Mnemonic == "fmuls" || Mnemonic == "fcmps" || Mnemonic == "fcmpzs" || + Mnemonic == "vfms" || Mnemonic == "vfnms" || (Mnemonic == "movs" && isThumb()))) { Mnemonic = Mnemonic.slice(0, Mnemonic.size() - 1); CarrySetting = true; @@ -4659,6 +4715,7 @@ getMnemonicAcceptInfo(StringRef Mnemonic, bool &CanAcceptCarrySet, Mnemonic == "orr" || Mnemonic == "mvn" || Mnemonic == "rsb" || Mnemonic == "rsc" || Mnemonic == "orn" || Mnemonic == "sbc" || Mnemonic == "eor" || Mnemonic == "neg" || + Mnemonic == "vfm" || Mnemonic == "vfnm" || (!isThumb() && (Mnemonic == "smull" || Mnemonic == "mov" || Mnemonic == "mla" || Mnemonic == "smlal" || Mnemonic == "umlal" || Mnemonic == "umull"))) { @@ -4727,7 +4784,7 @@ bool ARMAsmParser::shouldOmitCCOutOperand(StringRef Mnemonic, static_cast<ARMOperand*>(Operands[4])->isReg() && static_cast<ARMOperand*>(Operands[4])->getReg() == ARM::SP && static_cast<ARMOperand*>(Operands[1])->getReg() == 0 && - (static_cast<ARMOperand*>(Operands[5])->isReg() || + ((Mnemonic == "add" &&static_cast<ARMOperand*>(Operands[5])->isReg()) || static_cast<ARMOperand*>(Operands[5])->isImm0_1020s4())) return true; // For Thumb2, add/sub immediate does not have a cc_out operand for the @@ -4811,7 +4868,10 @@ bool ARMAsmParser::shouldOmitCCOutOperand(StringRef Mnemonic, (Operands.size() == 5 || Operands.size() == 6) && static_cast<ARMOperand*>(Operands[3])->isReg() && static_cast<ARMOperand*>(Operands[3])->getReg() == ARM::SP && - static_cast<ARMOperand*>(Operands[1])->getReg() == 0) + static_cast<ARMOperand*>(Operands[1])->getReg() == 0 && + (static_cast<ARMOperand*>(Operands[4])->isImm() || + (Operands.size() == 6 && + static_cast<ARMOperand*>(Operands[5])->isImm()))) return true; return false; @@ -6602,6 +6662,37 @@ processInstruction(MCInst &Inst, return true; } + // Handle encoding choice for the shift-immediate instructions. + case ARM::t2LSLri: + case ARM::t2LSRri: + case ARM::t2ASRri: { + if (isARMLowRegister(Inst.getOperand(0).getReg()) && + Inst.getOperand(0).getReg() == Inst.getOperand(1).getReg() && + Inst.getOperand(5).getReg() == (inITBlock() ? 0 : ARM::CPSR) && + !(static_cast<ARMOperand*>(Operands[3])->isToken() && + static_cast<ARMOperand*>(Operands[3])->getToken() == ".w")) { + unsigned NewOpc; + switch (Inst.getOpcode()) { + default: llvm_unreachable("unexpected opcode"); + case ARM::t2LSLri: NewOpc = ARM::tLSLri; break; + case ARM::t2LSRri: NewOpc = ARM::tLSRri; break; + case ARM::t2ASRri: NewOpc = ARM::tASRri; break; + } + // The Thumb1 operands aren't in the same order. Awesome, eh? + MCInst TmpInst; + TmpInst.setOpcode(NewOpc); + TmpInst.addOperand(Inst.getOperand(0)); + TmpInst.addOperand(Inst.getOperand(5)); + TmpInst.addOperand(Inst.getOperand(1)); + TmpInst.addOperand(Inst.getOperand(2)); + TmpInst.addOperand(Inst.getOperand(3)); + TmpInst.addOperand(Inst.getOperand(4)); + Inst = TmpInst; + return true; + } + return false; + } + // Handle the Thumb2 mode MOV complex aliases. case ARM::t2MOVsr: case ARM::t2MOVSsr: { @@ -6833,7 +6924,7 @@ processInstruction(MCInst &Inst, // explicitly specified. From the ARM ARM: "Encoding T1 is preferred // to encoding T2 if <Rd> is specified and encoding T2 is preferred // to encoding T1 if <Rd> is omitted." - if (Inst.getOperand(3).getImm() < 8 && Operands.size() == 6) { + if ((unsigned)Inst.getOperand(3).getImm() < 8 && Operands.size() == 6) { Inst.setOpcode(ARM::tADDi3); return true; } @@ -6843,11 +6934,37 @@ processInstruction(MCInst &Inst, // explicitly specified. From the ARM ARM: "Encoding T1 is preferred // to encoding T2 if <Rd> is specified and encoding T2 is preferred // to encoding T1 if <Rd> is omitted." - if (Inst.getOperand(3).getImm() < 8 && Operands.size() == 6) { + if ((unsigned)Inst.getOperand(3).getImm() < 8 && Operands.size() == 6) { Inst.setOpcode(ARM::tSUBi3); return true; } break; + case ARM::t2ADDri: + case ARM::t2SUBri: { + // If the destination and first source operand are the same, and + // the flags are compatible with the current IT status, use encoding T2 + // instead of T3. For compatibility with the system 'as'. Make sure the + // wide encoding wasn't explicit. + if (Inst.getOperand(0).getReg() != Inst.getOperand(1).getReg() || + !isARMLowRegister(Inst.getOperand(0).getReg()) || + (unsigned)Inst.getOperand(2).getImm() > 255 || + ((!inITBlock() && Inst.getOperand(5).getReg() != ARM::CPSR) || + (inITBlock() && Inst.getOperand(5).getReg() != 0)) || + (static_cast<ARMOperand*>(Operands[3])->isToken() && + static_cast<ARMOperand*>(Operands[3])->getToken() == ".w")) + break; + MCInst TmpInst; + TmpInst.setOpcode(Inst.getOpcode() == ARM::t2ADDri ? + ARM::tADDi8 : ARM::tSUBi8); + TmpInst.addOperand(Inst.getOperand(0)); + TmpInst.addOperand(Inst.getOperand(5)); + TmpInst.addOperand(Inst.getOperand(0)); + TmpInst.addOperand(Inst.getOperand(2)); + TmpInst.addOperand(Inst.getOperand(3)); + TmpInst.addOperand(Inst.getOperand(4)); + Inst = TmpInst; + return true; + } case ARM::t2ADDrr: { // If the destination and first source operand are the same, and // there's no setting of the flags, use encoding T2 instead of T3. @@ -6964,7 +7081,7 @@ processInstruction(MCInst &Inst, // If we can use the 16-bit encoding and the user didn't explicitly // request the 32-bit variant, transform it here. if (isARMLowRegister(Inst.getOperand(0).getReg()) && - Inst.getOperand(1).getImm() <= 255 && + (unsigned)Inst.getOperand(1).getImm() <= 255 && ((!inITBlock() && Inst.getOperand(2).getImm() == ARMCC::AL && Inst.getOperand(4).getReg() == ARM::CPSR) || (inITBlock() && Inst.getOperand(4).getReg() == 0)) && @@ -7216,7 +7333,8 @@ MatchAndEmitInstruction(SMLoc IDLoc, return Error(ErrorLoc, "invalid operand for instruction"); } case Match_MnemonicFail: - return Error(IDLoc, "invalid instruction"); + return Error(IDLoc, "invalid instruction", + ((ARMOperand*)Operands[0])->getLocRange()); case Match_ConversionFail: // The converter function will have already emited a diagnostic. return true; diff --git a/lib/Target/ARM/Disassembler/ARMDisassembler.cpp b/lib/Target/ARM/Disassembler/ARMDisassembler.cpp index ce4587b..912935d 100644 --- a/lib/Target/ARM/Disassembler/ARMDisassembler.cpp +++ b/lib/Target/ARM/Disassembler/ARMDisassembler.cpp @@ -9,8 +9,6 @@ #define DEBUG_TYPE "arm-disassembler" -#include "ARM.h" -#include "ARMSubtarget.h" #include "MCTargetDesc/ARMAddressingModes.h" #include "MCTargetDesc/ARMMCExpr.h" #include "MCTargetDesc/ARMBaseInfo.h" @@ -20,6 +18,7 @@ #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCDisassembler.h" +#include "llvm/MC/MCSubtargetInfo.h" #include "llvm/Support/Debug.h" #include "llvm/Support/MemoryObject.h" #include "llvm/Support/ErrorHandling.h" @@ -103,228 +102,232 @@ static bool Check(DecodeStatus &Out, DecodeStatus In) { // Forward declare these because the autogenerated code will reference them. // Definitions are further down. -static DecodeStatus DecodeGPRRegisterClass(llvm::MCInst &Inst, unsigned RegNo, +static DecodeStatus DecodeGPRRegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Address, const void *Decoder); -static DecodeStatus DecodeGPRnopcRegisterClass(llvm::MCInst &Inst, +static DecodeStatus DecodeGPRnopcRegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Address, const void *Decoder); -static DecodeStatus DecodetGPRRegisterClass(llvm::MCInst &Inst, unsigned RegNo, +static DecodeStatus DecodetGPRRegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Address, const void *Decoder); -static DecodeStatus DecodetcGPRRegisterClass(llvm::MCInst &Inst, unsigned RegNo, +static DecodeStatus DecodetcGPRRegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Address, const void *Decoder); -static DecodeStatus DecoderGPRRegisterClass(llvm::MCInst &Inst, unsigned RegNo, +static DecodeStatus DecoderGPRRegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Address, const void *Decoder); -static DecodeStatus DecodeSPRRegisterClass(llvm::MCInst &Inst, unsigned RegNo, +static DecodeStatus DecodeSPRRegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Address, const void *Decoder); -static DecodeStatus DecodeDPRRegisterClass(llvm::MCInst &Inst, unsigned RegNo, +static DecodeStatus DecodeDPRRegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Address, const void *Decoder); -static DecodeStatus DecodeDPR_8RegisterClass(llvm::MCInst &Inst, unsigned RegNo, +static DecodeStatus DecodeDPR_8RegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Address, const void *Decoder); -static DecodeStatus DecodeDPR_VFP2RegisterClass(llvm::MCInst &Inst, +static DecodeStatus DecodeDPR_VFP2RegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Address, const void *Decoder); -static DecodeStatus DecodeQPRRegisterClass(llvm::MCInst &Inst, unsigned RegNo, +static DecodeStatus DecodeQPRRegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Address, const void *Decoder); -static DecodeStatus DecodeDPairRegisterClass(llvm::MCInst &Inst, unsigned RegNo, +static DecodeStatus DecodeDPairRegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Address, const void *Decoder); -static DecodeStatus DecodeDPairSpacedRegisterClass(llvm::MCInst &Inst, +static DecodeStatus DecodeDPairSpacedRegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Address, const void *Decoder); -static DecodeStatus DecodePredicateOperand(llvm::MCInst &Inst, unsigned Val, +static DecodeStatus DecodePredicateOperand(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder); -static DecodeStatus DecodeCCOutOperand(llvm::MCInst &Inst, unsigned Val, +static DecodeStatus DecodeCCOutOperand(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder); -static DecodeStatus DecodeSOImmOperand(llvm::MCInst &Inst, unsigned Val, +static DecodeStatus DecodeSOImmOperand(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder); -static DecodeStatus DecodeRegListOperand(llvm::MCInst &Inst, unsigned Val, +static DecodeStatus DecodeRegListOperand(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder); -static DecodeStatus DecodeSPRRegListOperand(llvm::MCInst &Inst, unsigned Val, +static DecodeStatus DecodeSPRRegListOperand(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder); -static DecodeStatus DecodeDPRRegListOperand(llvm::MCInst &Inst, unsigned Val, +static DecodeStatus DecodeDPRRegListOperand(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder); -static DecodeStatus DecodeBitfieldMaskOperand(llvm::MCInst &Inst, unsigned Insn, +static DecodeStatus DecodeBitfieldMaskOperand(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder); -static DecodeStatus DecodeCopMemInstruction(llvm::MCInst &Inst, unsigned Insn, +static DecodeStatus DecodeCopMemInstruction(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder); -static DecodeStatus DecodeAddrMode2IdxInstruction(llvm::MCInst &Inst, +static DecodeStatus DecodeAddrMode2IdxInstruction(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder); -static DecodeStatus DecodeSORegMemOperand(llvm::MCInst &Inst, unsigned Insn, +static DecodeStatus DecodeSORegMemOperand(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder); -static DecodeStatus DecodeAddrMode3Instruction(llvm::MCInst &Inst,unsigned Insn, +static DecodeStatus DecodeAddrMode3Instruction(MCInst &Inst,unsigned Insn, uint64_t Address, const void *Decoder); -static DecodeStatus DecodeSORegImmOperand(llvm::MCInst &Inst, unsigned Insn, +static DecodeStatus DecodeSORegImmOperand(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder); -static DecodeStatus DecodeSORegRegOperand(llvm::MCInst &Inst, unsigned Insn, +static DecodeStatus DecodeSORegRegOperand(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder); -static DecodeStatus DecodeMemMultipleWritebackInstruction(llvm::MCInst & Inst, +static DecodeStatus DecodeMemMultipleWritebackInstruction(MCInst & Inst, unsigned Insn, uint64_t Adddress, const void *Decoder); -static DecodeStatus DecodeT2MOVTWInstruction(llvm::MCInst &Inst, unsigned Insn, +static DecodeStatus DecodeT2MOVTWInstruction(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder); -static DecodeStatus DecodeArmMOVTWInstruction(llvm::MCInst &Inst, unsigned Insn, +static DecodeStatus DecodeArmMOVTWInstruction(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder); -static DecodeStatus DecodeSMLAInstruction(llvm::MCInst &Inst, unsigned Insn, +static DecodeStatus DecodeSMLAInstruction(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder); -static DecodeStatus DecodeCPSInstruction(llvm::MCInst &Inst, unsigned Insn, +static DecodeStatus DecodeCPSInstruction(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder); -static DecodeStatus DecodeT2CPSInstruction(llvm::MCInst &Inst, unsigned Insn, +static DecodeStatus DecodeT2CPSInstruction(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder); -static DecodeStatus DecodeAddrModeImm12Operand(llvm::MCInst &Inst, unsigned Val, +static DecodeStatus DecodeAddrModeImm12Operand(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder); -static DecodeStatus DecodeAddrMode5Operand(llvm::MCInst &Inst, unsigned Val, +static DecodeStatus DecodeAddrMode5Operand(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder); -static DecodeStatus DecodeAddrMode7Operand(llvm::MCInst &Inst, unsigned Val, +static DecodeStatus DecodeAddrMode7Operand(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder); -static DecodeStatus DecodeBranchImmInstruction(llvm::MCInst &Inst,unsigned Insn, +static DecodeStatus DecodeT2BInstruction(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder); -static DecodeStatus DecodeAddrMode6Operand(llvm::MCInst &Inst, unsigned Val, +static DecodeStatus DecodeBranchImmInstruction(MCInst &Inst,unsigned Insn, uint64_t Address, const void *Decoder); -static DecodeStatus DecodeVLDInstruction(llvm::MCInst &Inst, unsigned Val, +static DecodeStatus DecodeAddrMode6Operand(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder); -static DecodeStatus DecodeVSTInstruction(llvm::MCInst &Inst, unsigned Val, +static DecodeStatus DecodeVLDInstruction(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder); -static DecodeStatus DecodeVLD1DupInstruction(llvm::MCInst &Inst, unsigned Val, +static DecodeStatus DecodeVSTInstruction(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder); -static DecodeStatus DecodeVLD2DupInstruction(llvm::MCInst &Inst, unsigned Val, +static DecodeStatus DecodeVLD1DupInstruction(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder); -static DecodeStatus DecodeVLD3DupInstruction(llvm::MCInst &Inst, unsigned Val, +static DecodeStatus DecodeVLD2DupInstruction(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder); -static DecodeStatus DecodeVLD4DupInstruction(llvm::MCInst &Inst, unsigned Val, +static DecodeStatus DecodeVLD3DupInstruction(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder); -static DecodeStatus DecodeNEONModImmInstruction(llvm::MCInst &Inst,unsigned Val, +static DecodeStatus DecodeVLD4DupInstruction(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder); -static DecodeStatus DecodeVSHLMaxInstruction(llvm::MCInst &Inst, unsigned Val, +static DecodeStatus DecodeNEONModImmInstruction(MCInst &Inst,unsigned Val, uint64_t Address, const void *Decoder); -static DecodeStatus DecodeShiftRight8Imm(llvm::MCInst &Inst, unsigned Val, +static DecodeStatus DecodeVSHLMaxInstruction(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder); -static DecodeStatus DecodeShiftRight16Imm(llvm::MCInst &Inst, unsigned Val, +static DecodeStatus DecodeShiftRight8Imm(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder); -static DecodeStatus DecodeShiftRight32Imm(llvm::MCInst &Inst, unsigned Val, +static DecodeStatus DecodeShiftRight16Imm(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder); -static DecodeStatus DecodeShiftRight64Imm(llvm::MCInst &Inst, unsigned Val, +static DecodeStatus DecodeShiftRight32Imm(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder); -static DecodeStatus DecodeTBLInstruction(llvm::MCInst &Inst, unsigned Insn, +static DecodeStatus DecodeShiftRight64Imm(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder); -static DecodeStatus DecodePostIdxReg(llvm::MCInst &Inst, unsigned Insn, +static DecodeStatus DecodeTBLInstruction(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder); -static DecodeStatus DecodeCoprocessor(llvm::MCInst &Inst, unsigned Insn, +static DecodeStatus DecodePostIdxReg(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder); -static DecodeStatus DecodeMemBarrierOption(llvm::MCInst &Inst, unsigned Insn, +static DecodeStatus DecodeCoprocessor(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder); -static DecodeStatus DecodeMSRMask(llvm::MCInst &Inst, unsigned Insn, +static DecodeStatus DecodeMemBarrierOption(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder); -static DecodeStatus DecodeDoubleRegLoad(llvm::MCInst &Inst, unsigned Insn, +static DecodeStatus DecodeMSRMask(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder); -static DecodeStatus DecodeDoubleRegStore(llvm::MCInst &Inst, unsigned Insn, +static DecodeStatus DecodeDoubleRegLoad(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder); -static DecodeStatus DecodeLDRPreImm(llvm::MCInst &Inst, unsigned Insn, +static DecodeStatus DecodeDoubleRegStore(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder); -static DecodeStatus DecodeLDRPreReg(llvm::MCInst &Inst, unsigned Insn, +static DecodeStatus DecodeLDRPreImm(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder); -static DecodeStatus DecodeSTRPreImm(llvm::MCInst &Inst, unsigned Insn, +static DecodeStatus DecodeLDRPreReg(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder); -static DecodeStatus DecodeSTRPreReg(llvm::MCInst &Inst, unsigned Insn, +static DecodeStatus DecodeSTRPreImm(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder); -static DecodeStatus DecodeVLD1LN(llvm::MCInst &Inst, unsigned Insn, +static DecodeStatus DecodeSTRPreReg(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder); -static DecodeStatus DecodeVLD2LN(llvm::MCInst &Inst, unsigned Insn, +static DecodeStatus DecodeVLD1LN(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder); -static DecodeStatus DecodeVLD3LN(llvm::MCInst &Inst, unsigned Insn, +static DecodeStatus DecodeVLD2LN(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder); -static DecodeStatus DecodeVLD4LN(llvm::MCInst &Inst, unsigned Insn, +static DecodeStatus DecodeVLD3LN(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder); -static DecodeStatus DecodeVST1LN(llvm::MCInst &Inst, unsigned Insn, +static DecodeStatus DecodeVLD4LN(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder); -static DecodeStatus DecodeVST2LN(llvm::MCInst &Inst, unsigned Insn, +static DecodeStatus DecodeVST1LN(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder); -static DecodeStatus DecodeVST3LN(llvm::MCInst &Inst, unsigned Insn, +static DecodeStatus DecodeVST2LN(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder); -static DecodeStatus DecodeVST4LN(llvm::MCInst &Inst, unsigned Insn, +static DecodeStatus DecodeVST3LN(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder); -static DecodeStatus DecodeVMOVSRR(llvm::MCInst &Inst, unsigned Insn, +static DecodeStatus DecodeVST4LN(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder); -static DecodeStatus DecodeVMOVRRS(llvm::MCInst &Inst, unsigned Insn, +static DecodeStatus DecodeVMOVSRR(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder); -static DecodeStatus DecodeSwap(llvm::MCInst &Inst, unsigned Insn, +static DecodeStatus DecodeVMOVRRS(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder); -static DecodeStatus DecodeVCVTD(llvm::MCInst &Inst, unsigned Insn, +static DecodeStatus DecodeSwap(MCInst &Inst, unsigned Insn, + uint64_t Address, const void *Decoder); +static DecodeStatus DecodeVCVTD(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder); -static DecodeStatus DecodeVCVTQ(llvm::MCInst &Inst, unsigned Insn, +static DecodeStatus DecodeVCVTQ(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder); -static DecodeStatus DecodeThumbAddSpecialReg(llvm::MCInst &Inst, uint16_t Insn, +static DecodeStatus DecodeThumbAddSpecialReg(MCInst &Inst, uint16_t Insn, uint64_t Address, const void *Decoder); -static DecodeStatus DecodeThumbBROperand(llvm::MCInst &Inst, unsigned Val, +static DecodeStatus DecodeThumbBROperand(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder); -static DecodeStatus DecodeT2BROperand(llvm::MCInst &Inst, unsigned Val, +static DecodeStatus DecodeT2BROperand(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder); -static DecodeStatus DecodeThumbCmpBROperand(llvm::MCInst &Inst, unsigned Val, +static DecodeStatus DecodeThumbCmpBROperand(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder); -static DecodeStatus DecodeThumbAddrModeRR(llvm::MCInst &Inst, unsigned Val, +static DecodeStatus DecodeThumbAddrModeRR(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder); -static DecodeStatus DecodeThumbAddrModeIS(llvm::MCInst &Inst, unsigned Val, +static DecodeStatus DecodeThumbAddrModeIS(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder); -static DecodeStatus DecodeThumbAddrModePC(llvm::MCInst &Inst, unsigned Val, +static DecodeStatus DecodeThumbAddrModePC(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder); -static DecodeStatus DecodeThumbAddrModeSP(llvm::MCInst &Inst, unsigned Val, +static DecodeStatus DecodeThumbAddrModeSP(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder); -static DecodeStatus DecodeT2AddrModeSOReg(llvm::MCInst &Inst, unsigned Val, +static DecodeStatus DecodeT2AddrModeSOReg(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder); -static DecodeStatus DecodeT2LoadShift(llvm::MCInst &Inst, unsigned Val, +static DecodeStatus DecodeT2LoadShift(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder); -static DecodeStatus DecodeT2Imm8S4(llvm::MCInst &Inst, unsigned Val, +static DecodeStatus DecodeT2Imm8S4(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder); -static DecodeStatus DecodeT2AddrModeImm8s4(llvm::MCInst &Inst, unsigned Val, +static DecodeStatus DecodeT2AddrModeImm8s4(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder); -static DecodeStatus DecodeT2AddrModeImm0_1020s4(llvm::MCInst &Inst,unsigned Val, +static DecodeStatus DecodeT2AddrModeImm0_1020s4(MCInst &Inst,unsigned Val, uint64_t Address, const void *Decoder); -static DecodeStatus DecodeT2Imm8(llvm::MCInst &Inst, unsigned Val, +static DecodeStatus DecodeT2Imm8(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder); -static DecodeStatus DecodeT2AddrModeImm8(llvm::MCInst &Inst, unsigned Val, +static DecodeStatus DecodeT2AddrModeImm8(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder); -static DecodeStatus DecodeThumbAddSPImm(llvm::MCInst &Inst, uint16_t Val, +static DecodeStatus DecodeThumbAddSPImm(MCInst &Inst, uint16_t Val, uint64_t Address, const void *Decoder); -static DecodeStatus DecodeThumbAddSPReg(llvm::MCInst &Inst, uint16_t Insn, +static DecodeStatus DecodeThumbAddSPReg(MCInst &Inst, uint16_t Insn, uint64_t Address, const void *Decoder); -static DecodeStatus DecodeThumbCPS(llvm::MCInst &Inst, uint16_t Insn, +static DecodeStatus DecodeThumbCPS(MCInst &Inst, uint16_t Insn, uint64_t Address, const void *Decoder); -static DecodeStatus DecodeThumbBLXOffset(llvm::MCInst &Inst, unsigned Insn, +static DecodeStatus DecodeThumbBLXOffset(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder); -static DecodeStatus DecodeT2AddrModeImm12(llvm::MCInst &Inst, unsigned Val, +static DecodeStatus DecodeT2AddrModeImm12(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder); -static DecodeStatus DecodeThumbTableBranch(llvm::MCInst &Inst, unsigned Val, +static DecodeStatus DecodeThumbTableBranch(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder); -static DecodeStatus DecodeThumb2BCCInstruction(llvm::MCInst &Inst, unsigned Val, +static DecodeStatus DecodeThumb2BCCInstruction(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder); -static DecodeStatus DecodeT2SOImm(llvm::MCInst &Inst, unsigned Val, +static DecodeStatus DecodeT2SOImm(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder); -static DecodeStatus DecodeThumbBCCTargetOperand(llvm::MCInst &Inst,unsigned Val, +static DecodeStatus DecodeThumbBCCTargetOperand(MCInst &Inst,unsigned Val, uint64_t Address, const void *Decoder); -static DecodeStatus DecodeThumbBLTargetOperand(llvm::MCInst &Inst, unsigned Val, +static DecodeStatus DecodeThumbBLTargetOperand(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder); -static DecodeStatus DecodeIT(llvm::MCInst &Inst, unsigned Val, +static DecodeStatus DecodeIT(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder); -static DecodeStatus DecodeT2LDRDPreInstruction(llvm::MCInst &Inst,unsigned Insn, +static DecodeStatus DecodeT2LDRDPreInstruction(MCInst &Inst,unsigned Insn, uint64_t Address, const void *Decoder); -static DecodeStatus DecodeT2STRDPreInstruction(llvm::MCInst &Inst,unsigned Insn, +static DecodeStatus DecodeT2STRDPreInstruction(MCInst &Inst,unsigned Insn, uint64_t Address, const void *Decoder); -static DecodeStatus DecodeT2Adr(llvm::MCInst &Inst, unsigned Val, +static DecodeStatus DecodeT2Adr(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder); -static DecodeStatus DecodeT2LdStPre(llvm::MCInst &Inst, unsigned Val, +static DecodeStatus DecodeT2LdStPre(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder); -static DecodeStatus DecodeT2ShifterImmOperand(llvm::MCInst &Inst, unsigned Val, +static DecodeStatus DecodeT2ShifterImmOperand(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder); - - +static DecodeStatus DecodeLDR(MCInst &Inst, unsigned Val, + uint64_t Address, const void *Decoder); +static DecodeStatus DecodeMRRC2(llvm::MCInst &Inst, unsigned Val, + uint64_t Address, const void *Decoder); #include "ARMGenDisassemblerTables.inc" #include "ARMGenInstrInfo.inc" #include "ARMGenEDInfo.inc" @@ -856,7 +859,7 @@ static const uint16_t GPRDecoderTable[] = { ARM::R12, ARM::SP, ARM::LR, ARM::PC }; -static DecodeStatus DecodeGPRRegisterClass(llvm::MCInst &Inst, unsigned RegNo, +static DecodeStatus DecodeGPRRegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Address, const void *Decoder) { if (RegNo > 15) return MCDisassembler::Fail; @@ -867,7 +870,7 @@ static DecodeStatus DecodeGPRRegisterClass(llvm::MCInst &Inst, unsigned RegNo, } static DecodeStatus -DecodeGPRnopcRegisterClass(llvm::MCInst &Inst, unsigned RegNo, +DecodeGPRnopcRegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Address, const void *Decoder) { DecodeStatus S = MCDisassembler::Success; @@ -879,14 +882,14 @@ DecodeGPRnopcRegisterClass(llvm::MCInst &Inst, unsigned RegNo, return S; } -static DecodeStatus DecodetGPRRegisterClass(llvm::MCInst &Inst, unsigned RegNo, +static DecodeStatus DecodetGPRRegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Address, const void *Decoder) { if (RegNo > 7) return MCDisassembler::Fail; return DecodeGPRRegisterClass(Inst, RegNo, Address, Decoder); } -static DecodeStatus DecodetcGPRRegisterClass(llvm::MCInst &Inst, unsigned RegNo, +static DecodeStatus DecodetcGPRRegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Address, const void *Decoder) { unsigned Register = 0; switch (RegNo) { @@ -916,7 +919,7 @@ static DecodeStatus DecodetcGPRRegisterClass(llvm::MCInst &Inst, unsigned RegNo, return MCDisassembler::Success; } -static DecodeStatus DecoderGPRRegisterClass(llvm::MCInst &Inst, unsigned RegNo, +static DecodeStatus DecoderGPRRegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Address, const void *Decoder) { if (RegNo == 13 || RegNo == 15) return MCDisassembler::Fail; return DecodeGPRRegisterClass(Inst, RegNo, Address, Decoder); @@ -933,7 +936,7 @@ static const uint16_t SPRDecoderTable[] = { ARM::S28, ARM::S29, ARM::S30, ARM::S31 }; -static DecodeStatus DecodeSPRRegisterClass(llvm::MCInst &Inst, unsigned RegNo, +static DecodeStatus DecodeSPRRegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Address, const void *Decoder) { if (RegNo > 31) return MCDisassembler::Fail; @@ -954,7 +957,7 @@ static const uint16_t DPRDecoderTable[] = { ARM::D28, ARM::D29, ARM::D30, ARM::D31 }; -static DecodeStatus DecodeDPRRegisterClass(llvm::MCInst &Inst, unsigned RegNo, +static DecodeStatus DecodeDPRRegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Address, const void *Decoder) { if (RegNo > 31) return MCDisassembler::Fail; @@ -964,7 +967,7 @@ static DecodeStatus DecodeDPRRegisterClass(llvm::MCInst &Inst, unsigned RegNo, return MCDisassembler::Success; } -static DecodeStatus DecodeDPR_8RegisterClass(llvm::MCInst &Inst, unsigned RegNo, +static DecodeStatus DecodeDPR_8RegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Address, const void *Decoder) { if (RegNo > 7) return MCDisassembler::Fail; @@ -972,7 +975,7 @@ static DecodeStatus DecodeDPR_8RegisterClass(llvm::MCInst &Inst, unsigned RegNo, } static DecodeStatus -DecodeDPR_VFP2RegisterClass(llvm::MCInst &Inst, unsigned RegNo, +DecodeDPR_VFP2RegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Address, const void *Decoder) { if (RegNo > 15) return MCDisassembler::Fail; @@ -987,7 +990,7 @@ static const uint16_t QPRDecoderTable[] = { }; -static DecodeStatus DecodeQPRRegisterClass(llvm::MCInst &Inst, unsigned RegNo, +static DecodeStatus DecodeQPRRegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Address, const void *Decoder) { if (RegNo > 31) return MCDisassembler::Fail; @@ -1007,7 +1010,7 @@ static const uint16_t DPairDecoderTable[] = { ARM::Q15 }; -static DecodeStatus DecodeDPairRegisterClass(llvm::MCInst &Inst, unsigned RegNo, +static DecodeStatus DecodeDPairRegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Address, const void *Decoder) { if (RegNo > 30) return MCDisassembler::Fail; @@ -1028,7 +1031,7 @@ static const uint16_t DPairSpacedDecoderTable[] = { ARM::D28_D30, ARM::D29_D31 }; -static DecodeStatus DecodeDPairSpacedRegisterClass(llvm::MCInst &Inst, +static DecodeStatus DecodeDPairSpacedRegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Address, const void *Decoder) { @@ -1040,7 +1043,7 @@ static DecodeStatus DecodeDPairSpacedRegisterClass(llvm::MCInst &Inst, return MCDisassembler::Success; } -static DecodeStatus DecodePredicateOperand(llvm::MCInst &Inst, unsigned Val, +static DecodeStatus DecodePredicateOperand(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder) { if (Val == 0xF) return MCDisassembler::Fail; // AL predicate is not allowed on Thumb1 branches. @@ -1054,7 +1057,7 @@ static DecodeStatus DecodePredicateOperand(llvm::MCInst &Inst, unsigned Val, return MCDisassembler::Success; } -static DecodeStatus DecodeCCOutOperand(llvm::MCInst &Inst, unsigned Val, +static DecodeStatus DecodeCCOutOperand(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder) { if (Val) Inst.addOperand(MCOperand::CreateReg(ARM::CPSR)); @@ -1063,7 +1066,7 @@ static DecodeStatus DecodeCCOutOperand(llvm::MCInst &Inst, unsigned Val, return MCDisassembler::Success; } -static DecodeStatus DecodeSOImmOperand(llvm::MCInst &Inst, unsigned Val, +static DecodeStatus DecodeSOImmOperand(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder) { uint32_t imm = Val & 0xFF; uint32_t rot = (Val & 0xF00) >> 7; @@ -1072,7 +1075,7 @@ static DecodeStatus DecodeSOImmOperand(llvm::MCInst &Inst, unsigned Val, return MCDisassembler::Success; } -static DecodeStatus DecodeSORegImmOperand(llvm::MCInst &Inst, unsigned Val, +static DecodeStatus DecodeSORegImmOperand(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder) { DecodeStatus S = MCDisassembler::Success; @@ -1109,7 +1112,7 @@ static DecodeStatus DecodeSORegImmOperand(llvm::MCInst &Inst, unsigned Val, return S; } -static DecodeStatus DecodeSORegRegOperand(llvm::MCInst &Inst, unsigned Val, +static DecodeStatus DecodeSORegRegOperand(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder) { DecodeStatus S = MCDisassembler::Success; @@ -1144,7 +1147,7 @@ static DecodeStatus DecodeSORegRegOperand(llvm::MCInst &Inst, unsigned Val, return S; } -static DecodeStatus DecodeRegListOperand(llvm::MCInst &Inst, unsigned Val, +static DecodeStatus DecodeRegListOperand(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder) { DecodeStatus S = MCDisassembler::Success; @@ -1179,7 +1182,7 @@ static DecodeStatus DecodeRegListOperand(llvm::MCInst &Inst, unsigned Val, return S; } -static DecodeStatus DecodeSPRRegListOperand(llvm::MCInst &Inst, unsigned Val, +static DecodeStatus DecodeSPRRegListOperand(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder) { DecodeStatus S = MCDisassembler::Success; @@ -1196,7 +1199,7 @@ static DecodeStatus DecodeSPRRegListOperand(llvm::MCInst &Inst, unsigned Val, return S; } -static DecodeStatus DecodeDPRRegListOperand(llvm::MCInst &Inst, unsigned Val, +static DecodeStatus DecodeDPRRegListOperand(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder) { DecodeStatus S = MCDisassembler::Success; @@ -1213,7 +1216,7 @@ static DecodeStatus DecodeDPRRegListOperand(llvm::MCInst &Inst, unsigned Val, return S; } -static DecodeStatus DecodeBitfieldMaskOperand(llvm::MCInst &Inst, unsigned Val, +static DecodeStatus DecodeBitfieldMaskOperand(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder) { // This operand encodes a mask of contiguous zeros between a specified MSB // and LSB. To decode it, we create the mask of all bits MSB-and-lower, @@ -1234,7 +1237,7 @@ static DecodeStatus DecodeBitfieldMaskOperand(llvm::MCInst &Inst, unsigned Val, return S; } -static DecodeStatus DecodeCopMemInstruction(llvm::MCInst &Inst, unsigned Insn, +static DecodeStatus DecodeCopMemInstruction(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder) { DecodeStatus S = MCDisassembler::Success; @@ -1379,7 +1382,7 @@ static DecodeStatus DecodeCopMemInstruction(llvm::MCInst &Inst, unsigned Insn, } static DecodeStatus -DecodeAddrMode2IdxInstruction(llvm::MCInst &Inst, unsigned Insn, +DecodeAddrMode2IdxInstruction(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder) { DecodeStatus S = MCDisassembler::Success; @@ -1482,7 +1485,7 @@ DecodeAddrMode2IdxInstruction(llvm::MCInst &Inst, unsigned Insn, return S; } -static DecodeStatus DecodeSORegMemOperand(llvm::MCInst &Inst, unsigned Val, +static DecodeStatus DecodeSORegMemOperand(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder) { DecodeStatus S = MCDisassembler::Success; @@ -1523,7 +1526,7 @@ static DecodeStatus DecodeSORegMemOperand(llvm::MCInst &Inst, unsigned Val, } static DecodeStatus -DecodeAddrMode3Instruction(llvm::MCInst &Inst, unsigned Insn, +DecodeAddrMode3Instruction(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder) { DecodeStatus S = MCDisassembler::Success; @@ -1536,6 +1539,7 @@ DecodeAddrMode3Instruction(llvm::MCInst &Inst, unsigned Insn, unsigned pred = fieldFromInstruction32(Insn, 28, 4); unsigned W = fieldFromInstruction32(Insn, 21, 1); unsigned P = fieldFromInstruction32(Insn, 24, 1); + unsigned Rt2 = Rt + 1; bool writeback = (W == 1) | (P == 0); @@ -1547,7 +1551,86 @@ DecodeAddrMode3Instruction(llvm::MCInst &Inst, unsigned Insn, case ARM::LDRD: case ARM::LDRD_PRE: case ARM::LDRD_POST: - if (Rt & 0x1) return MCDisassembler::Fail; + if (Rt & 0x1) S = MCDisassembler::SoftFail; + break; + default: + break; + } + switch (Inst.getOpcode()) { + case ARM::STRD: + case ARM::STRD_PRE: + case ARM::STRD_POST: + if (P == 0 && W == 1) + S = MCDisassembler::SoftFail; + + if (writeback && (Rn == 15 || Rn == Rt || Rn == Rt2)) + S = MCDisassembler::SoftFail; + if (type && Rm == 15) + S = MCDisassembler::SoftFail; + if (Rt2 == 15) + S = MCDisassembler::SoftFail; + if (!type && fieldFromInstruction32(Insn, 8, 4)) + S = MCDisassembler::SoftFail; + break; + case ARM::STRH: + case ARM::STRH_PRE: + case ARM::STRH_POST: + if (Rt == 15) + S = MCDisassembler::SoftFail; + if (writeback && (Rn == 15 || Rn == Rt)) + S = MCDisassembler::SoftFail; + if (!type && Rm == 15) + S = MCDisassembler::SoftFail; + break; + case ARM::LDRD: + case ARM::LDRD_PRE: + case ARM::LDRD_POST: + if (type && Rn == 15){ + if (Rt2 == 15) + S = MCDisassembler::SoftFail; + break; + } + if (P == 0 && W == 1) + S = MCDisassembler::SoftFail; + if (!type && (Rt2 == 15 || Rm == 15 || Rm == Rt || Rm == Rt2)) + S = MCDisassembler::SoftFail; + if (!type && writeback && Rn == 15) + S = MCDisassembler::SoftFail; + if (writeback && (Rn == Rt || Rn == Rt2)) + S = MCDisassembler::SoftFail; + break; + case ARM::LDRH: + case ARM::LDRH_PRE: + case ARM::LDRH_POST: + if (type && Rn == 15){ + if (Rt == 15) + S = MCDisassembler::SoftFail; + break; + } + if (Rt == 15) + S = MCDisassembler::SoftFail; + if (!type && Rm == 15) + S = MCDisassembler::SoftFail; + if (!type && writeback && (Rn == 15 || Rn == Rt)) + S = MCDisassembler::SoftFail; + break; + case ARM::LDRSH: + case ARM::LDRSH_PRE: + case ARM::LDRSH_POST: + case ARM::LDRSB: + case ARM::LDRSB_PRE: + case ARM::LDRSB_POST: + if (type && Rn == 15){ + if (Rt == 15) + S = MCDisassembler::SoftFail; + break; + } + if (type && (Rt == 15 || (writeback && Rn == Rt))) + S = MCDisassembler::SoftFail; + if (!type && (Rt == 15 || Rm == 15)) + S = MCDisassembler::SoftFail; + if (!type && writeback && (Rn == 15 || Rn == Rt)) + S = MCDisassembler::SoftFail; break; default: break; @@ -1634,7 +1717,7 @@ DecodeAddrMode3Instruction(llvm::MCInst &Inst, unsigned Insn, return S; } -static DecodeStatus DecodeRFEInstruction(llvm::MCInst &Inst, unsigned Insn, +static DecodeStatus DecodeRFEInstruction(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder) { DecodeStatus S = MCDisassembler::Success; @@ -1663,7 +1746,7 @@ static DecodeStatus DecodeRFEInstruction(llvm::MCInst &Inst, unsigned Insn, return S; } -static DecodeStatus DecodeMemMultipleWritebackInstruction(llvm::MCInst &Inst, +static DecodeStatus DecodeMemMultipleWritebackInstruction(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder) { DecodeStatus S = MCDisassembler::Success; @@ -1748,7 +1831,7 @@ static DecodeStatus DecodeMemMultipleWritebackInstruction(llvm::MCInst &Inst, return S; } -static DecodeStatus DecodeCPSInstruction(llvm::MCInst &Inst, unsigned Insn, +static DecodeStatus DecodeCPSInstruction(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder) { unsigned imod = fieldFromInstruction32(Insn, 18, 2); unsigned M = fieldFromInstruction32(Insn, 17, 1); @@ -1788,7 +1871,7 @@ static DecodeStatus DecodeCPSInstruction(llvm::MCInst &Inst, unsigned Insn, return S; } -static DecodeStatus DecodeT2CPSInstruction(llvm::MCInst &Inst, unsigned Insn, +static DecodeStatus DecodeT2CPSInstruction(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder) { unsigned imod = fieldFromInstruction32(Insn, 9, 2); unsigned M = fieldFromInstruction32(Insn, 8, 1); @@ -1828,7 +1911,7 @@ static DecodeStatus DecodeT2CPSInstruction(llvm::MCInst &Inst, unsigned Insn, return S; } -static DecodeStatus DecodeT2MOVTWInstruction(llvm::MCInst &Inst, unsigned Insn, +static DecodeStatus DecodeT2MOVTWInstruction(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder) { DecodeStatus S = MCDisassembler::Success; @@ -1852,7 +1935,7 @@ static DecodeStatus DecodeT2MOVTWInstruction(llvm::MCInst &Inst, unsigned Insn, return S; } -static DecodeStatus DecodeArmMOVTWInstruction(llvm::MCInst &Inst, unsigned Insn, +static DecodeStatus DecodeArmMOVTWInstruction(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder) { DecodeStatus S = MCDisassembler::Success; @@ -1878,7 +1961,7 @@ static DecodeStatus DecodeArmMOVTWInstruction(llvm::MCInst &Inst, unsigned Insn, return S; } -static DecodeStatus DecodeSMLAInstruction(llvm::MCInst &Inst, unsigned Insn, +static DecodeStatus DecodeSMLAInstruction(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder) { DecodeStatus S = MCDisassembler::Success; @@ -1906,7 +1989,7 @@ static DecodeStatus DecodeSMLAInstruction(llvm::MCInst &Inst, unsigned Insn, return S; } -static DecodeStatus DecodeAddrModeImm12Operand(llvm::MCInst &Inst, unsigned Val, +static DecodeStatus DecodeAddrModeImm12Operand(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder) { DecodeStatus S = MCDisassembler::Success; @@ -1926,7 +2009,7 @@ static DecodeStatus DecodeAddrModeImm12Operand(llvm::MCInst &Inst, unsigned Val, return S; } -static DecodeStatus DecodeAddrMode5Operand(llvm::MCInst &Inst, unsigned Val, +static DecodeStatus DecodeAddrMode5Operand(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder) { DecodeStatus S = MCDisassembler::Success; @@ -1945,13 +2028,28 @@ static DecodeStatus DecodeAddrMode5Operand(llvm::MCInst &Inst, unsigned Val, return S; } -static DecodeStatus DecodeAddrMode7Operand(llvm::MCInst &Inst, unsigned Val, +static DecodeStatus DecodeAddrMode7Operand(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder) { return DecodeGPRRegisterClass(Inst, Val, Address, Decoder); } static DecodeStatus -DecodeBranchImmInstruction(llvm::MCInst &Inst, unsigned Insn, +DecodeT2BInstruction(MCInst &Inst, unsigned Insn, + uint64_t Address, const void *Decoder) { + DecodeStatus S = MCDisassembler::Success; + unsigned imm = (fieldFromInstruction32(Insn, 0, 11) << 0) | + (fieldFromInstruction32(Insn, 11, 1) << 18) | + (fieldFromInstruction32(Insn, 13, 1) << 17) | + (fieldFromInstruction32(Insn, 16, 6) << 11) | + (fieldFromInstruction32(Insn, 26, 1) << 19); + if (!tryAddingSymbolicOperand(Address, Address + SignExtend32<20>(imm<<1) + 4, + true, 4, Inst, Decoder)) + Inst.addOperand(MCOperand::CreateImm(SignExtend32<20>(imm << 1))); + return S; +} + +static DecodeStatus +DecodeBranchImmInstruction(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder) { DecodeStatus S = MCDisassembler::Success; @@ -1977,7 +2075,7 @@ DecodeBranchImmInstruction(llvm::MCInst &Inst, unsigned Insn, } -static DecodeStatus DecodeAddrMode6Operand(llvm::MCInst &Inst, unsigned Val, +static DecodeStatus DecodeAddrMode6Operand(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder) { DecodeStatus S = MCDisassembler::Success; @@ -1994,7 +2092,7 @@ static DecodeStatus DecodeAddrMode6Operand(llvm::MCInst &Inst, unsigned Val, return S; } -static DecodeStatus DecodeVLDInstruction(llvm::MCInst &Inst, unsigned Insn, +static DecodeStatus DecodeVLDInstruction(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder) { DecodeStatus S = MCDisassembler::Success; @@ -2183,6 +2281,8 @@ static DecodeStatus DecodeVLDInstruction(llvm::MCInst &Inst, unsigned Insn, case ARM::VLD2b8wb_register: case ARM::VLD2b16wb_register: case ARM::VLD2b32wb_register: + Inst.addOperand(MCOperand::CreateImm(0)); + break; case ARM::VLD3d8_UPD: case ARM::VLD3d16_UPD: case ARM::VLD3d32_UPD: @@ -2251,12 +2351,22 @@ static DecodeStatus DecodeVLDInstruction(llvm::MCInst &Inst, unsigned Insn, !Check(S, DecodeGPRRegisterClass(Inst, Rm, Address, Decoder))) return MCDisassembler::Fail; break; + case ARM::VLD2d8wb_fixed: + case ARM::VLD2d16wb_fixed: + case ARM::VLD2d32wb_fixed: + case ARM::VLD2b8wb_fixed: + case ARM::VLD2b16wb_fixed: + case ARM::VLD2b32wb_fixed: + case ARM::VLD2q8wb_fixed: + case ARM::VLD2q16wb_fixed: + case ARM::VLD2q32wb_fixed: + break; } return S; } -static DecodeStatus DecodeVSTInstruction(llvm::MCInst &Inst, unsigned Insn, +static DecodeStatus DecodeVSTInstruction(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder) { DecodeStatus S = MCDisassembler::Success; @@ -2319,6 +2429,8 @@ static DecodeStatus DecodeVSTInstruction(llvm::MCInst &Inst, unsigned Insn, case ARM::VST2b8wb_register: case ARM::VST2b16wb_register: case ARM::VST2b32wb_register: + if (Rm == 0xF) + return MCDisassembler::Fail; Inst.addOperand(MCOperand::CreateImm(0)); break; case ARM::VST3d8_UPD: @@ -2525,7 +2637,7 @@ static DecodeStatus DecodeVSTInstruction(llvm::MCInst &Inst, unsigned Insn, return S; } -static DecodeStatus DecodeVLD1DupInstruction(llvm::MCInst &Inst, unsigned Insn, +static DecodeStatus DecodeVLD1DupInstruction(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder) { DecodeStatus S = MCDisassembler::Success; @@ -2570,7 +2682,7 @@ static DecodeStatus DecodeVLD1DupInstruction(llvm::MCInst &Inst, unsigned Insn, return S; } -static DecodeStatus DecodeVLD2DupInstruction(llvm::MCInst &Inst, unsigned Insn, +static DecodeStatus DecodeVLD2DupInstruction(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder) { DecodeStatus S = MCDisassembler::Success; @@ -2580,7 +2692,6 @@ static DecodeStatus DecodeVLD2DupInstruction(llvm::MCInst &Inst, unsigned Insn, unsigned Rm = fieldFromInstruction32(Insn, 0, 4); unsigned align = fieldFromInstruction32(Insn, 4, 1); unsigned size = 1 << fieldFromInstruction32(Insn, 6, 2); - unsigned pred = fieldFromInstruction32(Insn, 22, 4); align *= 2*size; switch (Inst.getOpcode()) { @@ -2611,20 +2722,15 @@ static DecodeStatus DecodeVLD2DupInstruction(llvm::MCInst &Inst, unsigned Insn, return MCDisassembler::Fail; Inst.addOperand(MCOperand::CreateImm(align)); - if (Rm == 0xD) - Inst.addOperand(MCOperand::CreateReg(0)); - else if (Rm != 0xF) { + if (Rm != 0xD && Rm != 0xF) { if (!Check(S, DecodeGPRRegisterClass(Inst, Rm, Address, Decoder))) return MCDisassembler::Fail; } - if (!Check(S, DecodePredicateOperand(Inst, pred, Address, Decoder))) - return MCDisassembler::Fail; - return S; } -static DecodeStatus DecodeVLD3DupInstruction(llvm::MCInst &Inst, unsigned Insn, +static DecodeStatus DecodeVLD3DupInstruction(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder) { DecodeStatus S = MCDisassembler::Success; @@ -2659,7 +2765,7 @@ static DecodeStatus DecodeVLD3DupInstruction(llvm::MCInst &Inst, unsigned Insn, return S; } -static DecodeStatus DecodeVLD4DupInstruction(llvm::MCInst &Inst, unsigned Insn, +static DecodeStatus DecodeVLD4DupInstruction(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder) { DecodeStatus S = MCDisassembler::Success; @@ -2712,7 +2818,7 @@ static DecodeStatus DecodeVLD4DupInstruction(llvm::MCInst &Inst, unsigned Insn, } static DecodeStatus -DecodeNEONModImmInstruction(llvm::MCInst &Inst, unsigned Insn, +DecodeNEONModImmInstruction(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder) { DecodeStatus S = MCDisassembler::Success; @@ -2757,7 +2863,7 @@ DecodeNEONModImmInstruction(llvm::MCInst &Inst, unsigned Insn, return S; } -static DecodeStatus DecodeVSHLMaxInstruction(llvm::MCInst &Inst, unsigned Insn, +static DecodeStatus DecodeVSHLMaxInstruction(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder) { DecodeStatus S = MCDisassembler::Success; @@ -2776,31 +2882,31 @@ static DecodeStatus DecodeVSHLMaxInstruction(llvm::MCInst &Inst, unsigned Insn, return S; } -static DecodeStatus DecodeShiftRight8Imm(llvm::MCInst &Inst, unsigned Val, +static DecodeStatus DecodeShiftRight8Imm(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder) { Inst.addOperand(MCOperand::CreateImm(8 - Val)); return MCDisassembler::Success; } -static DecodeStatus DecodeShiftRight16Imm(llvm::MCInst &Inst, unsigned Val, +static DecodeStatus DecodeShiftRight16Imm(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder) { Inst.addOperand(MCOperand::CreateImm(16 - Val)); return MCDisassembler::Success; } -static DecodeStatus DecodeShiftRight32Imm(llvm::MCInst &Inst, unsigned Val, +static DecodeStatus DecodeShiftRight32Imm(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder) { Inst.addOperand(MCOperand::CreateImm(32 - Val)); return MCDisassembler::Success; } -static DecodeStatus DecodeShiftRight64Imm(llvm::MCInst &Inst, unsigned Val, +static DecodeStatus DecodeShiftRight64Imm(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder) { Inst.addOperand(MCOperand::CreateImm(64 - Val)); return MCDisassembler::Success; } -static DecodeStatus DecodeTBLInstruction(llvm::MCInst &Inst, unsigned Insn, +static DecodeStatus DecodeTBLInstruction(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder) { DecodeStatus S = MCDisassembler::Success; @@ -2836,7 +2942,7 @@ static DecodeStatus DecodeTBLInstruction(llvm::MCInst &Inst, unsigned Insn, return S; } -static DecodeStatus DecodeThumbAddSpecialReg(llvm::MCInst &Inst, uint16_t Insn, +static DecodeStatus DecodeThumbAddSpecialReg(MCInst &Inst, uint16_t Insn, uint64_t Address, const void *Decoder) { DecodeStatus S = MCDisassembler::Success; @@ -2860,25 +2966,31 @@ static DecodeStatus DecodeThumbAddSpecialReg(llvm::MCInst &Inst, uint16_t Insn, return S; } -static DecodeStatus DecodeThumbBROperand(llvm::MCInst &Inst, unsigned Val, +static DecodeStatus DecodeThumbBROperand(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder) { - Inst.addOperand(MCOperand::CreateImm(SignExtend32<12>(Val << 1))); + if (!tryAddingSymbolicOperand(Address, Address + SignExtend32<12>(Val<<1) + 4, + true, 2, Inst, Decoder)) + Inst.addOperand(MCOperand::CreateImm(SignExtend32<12>(Val << 1))); return MCDisassembler::Success; } -static DecodeStatus DecodeT2BROperand(llvm::MCInst &Inst, unsigned Val, +static DecodeStatus DecodeT2BROperand(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder) { - Inst.addOperand(MCOperand::CreateImm(SignExtend32<21>(Val))); + if (!tryAddingSymbolicOperand(Address, Address + SignExtend32<22>(Val<<1) + 4, + true, 4, Inst, Decoder)) + Inst.addOperand(MCOperand::CreateImm(SignExtend32<21>(Val))); return MCDisassembler::Success; } -static DecodeStatus DecodeThumbCmpBROperand(llvm::MCInst &Inst, unsigned Val, +static DecodeStatus DecodeThumbCmpBROperand(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder) { - Inst.addOperand(MCOperand::CreateImm(SignExtend32<7>(Val << 1))); + if (!tryAddingSymbolicOperand(Address, Address + SignExtend32<7>(Val<<1) + 4, + true, 2, Inst, Decoder)) + Inst.addOperand(MCOperand::CreateImm(SignExtend32<7>(Val << 1))); return MCDisassembler::Success; } -static DecodeStatus DecodeThumbAddrModeRR(llvm::MCInst &Inst, unsigned Val, +static DecodeStatus DecodeThumbAddrModeRR(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder) { DecodeStatus S = MCDisassembler::Success; @@ -2893,7 +3005,7 @@ static DecodeStatus DecodeThumbAddrModeRR(llvm::MCInst &Inst, unsigned Val, return S; } -static DecodeStatus DecodeThumbAddrModeIS(llvm::MCInst &Inst, unsigned Val, +static DecodeStatus DecodeThumbAddrModeIS(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder) { DecodeStatus S = MCDisassembler::Success; @@ -2907,7 +3019,7 @@ static DecodeStatus DecodeThumbAddrModeIS(llvm::MCInst &Inst, unsigned Val, return S; } -static DecodeStatus DecodeThumbAddrModePC(llvm::MCInst &Inst, unsigned Val, +static DecodeStatus DecodeThumbAddrModePC(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder) { unsigned imm = Val << 2; @@ -2917,7 +3029,7 @@ static DecodeStatus DecodeThumbAddrModePC(llvm::MCInst &Inst, unsigned Val, return MCDisassembler::Success; } -static DecodeStatus DecodeThumbAddrModeSP(llvm::MCInst &Inst, unsigned Val, +static DecodeStatus DecodeThumbAddrModeSP(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder) { Inst.addOperand(MCOperand::CreateReg(ARM::SP)); Inst.addOperand(MCOperand::CreateImm(Val)); @@ -2925,7 +3037,7 @@ static DecodeStatus DecodeThumbAddrModeSP(llvm::MCInst &Inst, unsigned Val, return MCDisassembler::Success; } -static DecodeStatus DecodeT2AddrModeSOReg(llvm::MCInst &Inst, unsigned Val, +static DecodeStatus DecodeT2AddrModeSOReg(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder) { DecodeStatus S = MCDisassembler::Success; @@ -2942,7 +3054,7 @@ static DecodeStatus DecodeT2AddrModeSOReg(llvm::MCInst &Inst, unsigned Val, return S; } -static DecodeStatus DecodeT2LoadShift(llvm::MCInst &Inst, unsigned Insn, +static DecodeStatus DecodeT2LoadShift(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder) { DecodeStatus S = MCDisassembler::Success; @@ -2997,7 +3109,7 @@ static DecodeStatus DecodeT2LoadShift(llvm::MCInst &Inst, unsigned Insn, return S; } -static DecodeStatus DecodeT2Imm8S4(llvm::MCInst &Inst, unsigned Val, +static DecodeStatus DecodeT2Imm8S4(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder) { int imm = Val & 0xFF; if (!(Val & 0x100)) imm *= -1; @@ -3006,7 +3118,7 @@ static DecodeStatus DecodeT2Imm8S4(llvm::MCInst &Inst, unsigned Val, return MCDisassembler::Success; } -static DecodeStatus DecodeT2AddrModeImm8s4(llvm::MCInst &Inst, unsigned Val, +static DecodeStatus DecodeT2AddrModeImm8s4(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder) { DecodeStatus S = MCDisassembler::Success; @@ -3021,7 +3133,7 @@ static DecodeStatus DecodeT2AddrModeImm8s4(llvm::MCInst &Inst, unsigned Val, return S; } -static DecodeStatus DecodeT2AddrModeImm0_1020s4(llvm::MCInst &Inst,unsigned Val, +static DecodeStatus DecodeT2AddrModeImm0_1020s4(MCInst &Inst,unsigned Val, uint64_t Address, const void *Decoder) { DecodeStatus S = MCDisassembler::Success; @@ -3036,7 +3148,7 @@ static DecodeStatus DecodeT2AddrModeImm0_1020s4(llvm::MCInst &Inst,unsigned Val, return S; } -static DecodeStatus DecodeT2Imm8(llvm::MCInst &Inst, unsigned Val, +static DecodeStatus DecodeT2Imm8(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder) { int imm = Val & 0xFF; if (Val == 0) @@ -3049,7 +3161,7 @@ static DecodeStatus DecodeT2Imm8(llvm::MCInst &Inst, unsigned Val, } -static DecodeStatus DecodeT2AddrModeImm8(llvm::MCInst &Inst, unsigned Val, +static DecodeStatus DecodeT2AddrModeImm8(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder) { DecodeStatus S = MCDisassembler::Success; @@ -3080,7 +3192,7 @@ static DecodeStatus DecodeT2AddrModeImm8(llvm::MCInst &Inst, unsigned Val, return S; } -static DecodeStatus DecodeT2LdStPre(llvm::MCInst &Inst, unsigned Insn, +static DecodeStatus DecodeT2LdStPre(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder) { DecodeStatus S = MCDisassembler::Success; @@ -3110,7 +3222,7 @@ static DecodeStatus DecodeT2LdStPre(llvm::MCInst &Inst, unsigned Insn, return S; } -static DecodeStatus DecodeT2AddrModeImm12(llvm::MCInst &Inst, unsigned Val, +static DecodeStatus DecodeT2AddrModeImm12(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder) { DecodeStatus S = MCDisassembler::Success; @@ -3125,7 +3237,7 @@ static DecodeStatus DecodeT2AddrModeImm12(llvm::MCInst &Inst, unsigned Val, } -static DecodeStatus DecodeThumbAddSPImm(llvm::MCInst &Inst, uint16_t Insn, +static DecodeStatus DecodeThumbAddSPImm(MCInst &Inst, uint16_t Insn, uint64_t Address, const void *Decoder) { unsigned imm = fieldFromInstruction16(Insn, 0, 7); @@ -3136,7 +3248,7 @@ static DecodeStatus DecodeThumbAddSPImm(llvm::MCInst &Inst, uint16_t Insn, return MCDisassembler::Success; } -static DecodeStatus DecodeThumbAddSPReg(llvm::MCInst &Inst, uint16_t Insn, +static DecodeStatus DecodeThumbAddSPReg(MCInst &Inst, uint16_t Insn, uint64_t Address, const void *Decoder) { DecodeStatus S = MCDisassembler::Success; @@ -3161,7 +3273,7 @@ static DecodeStatus DecodeThumbAddSPReg(llvm::MCInst &Inst, uint16_t Insn, return S; } -static DecodeStatus DecodeThumbCPS(llvm::MCInst &Inst, uint16_t Insn, +static DecodeStatus DecodeThumbCPS(MCInst &Inst, uint16_t Insn, uint64_t Address, const void *Decoder) { unsigned imod = fieldFromInstruction16(Insn, 4, 1) | 0x2; unsigned flags = fieldFromInstruction16(Insn, 0, 3); @@ -3172,20 +3284,20 @@ static DecodeStatus DecodeThumbCPS(llvm::MCInst &Inst, uint16_t Insn, return MCDisassembler::Success; } -static DecodeStatus DecodePostIdxReg(llvm::MCInst &Inst, unsigned Insn, +static DecodeStatus DecodePostIdxReg(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder) { DecodeStatus S = MCDisassembler::Success; unsigned Rm = fieldFromInstruction32(Insn, 0, 4); unsigned add = fieldFromInstruction32(Insn, 4, 1); - if (!Check(S, DecodeGPRRegisterClass(Inst, Rm, Address, Decoder))) + if (!Check(S, DecodeGPRnopcRegisterClass(Inst, Rm, Address, Decoder))) return MCDisassembler::Fail; Inst.addOperand(MCOperand::CreateImm(add)); return S; } -static DecodeStatus DecodeThumbBLXOffset(llvm::MCInst &Inst, unsigned Val, +static DecodeStatus DecodeThumbBLXOffset(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder) { if (!tryAddingSymbolicOperand(Address, (Address & ~2u) + SignExtend32<22>(Val << 1) + 4, @@ -3194,7 +3306,7 @@ static DecodeStatus DecodeThumbBLXOffset(llvm::MCInst &Inst, unsigned Val, return MCDisassembler::Success; } -static DecodeStatus DecodeCoprocessor(llvm::MCInst &Inst, unsigned Val, +static DecodeStatus DecodeCoprocessor(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder) { if (Val == 0xA || Val == 0xB) return MCDisassembler::Fail; @@ -3204,7 +3316,7 @@ static DecodeStatus DecodeCoprocessor(llvm::MCInst &Inst, unsigned Val, } static DecodeStatus -DecodeThumbTableBranch(llvm::MCInst &Inst, unsigned Insn, +DecodeThumbTableBranch(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder) { DecodeStatus S = MCDisassembler::Success; @@ -3220,7 +3332,7 @@ DecodeThumbTableBranch(llvm::MCInst &Inst, unsigned Insn, } static DecodeStatus -DecodeThumb2BCCInstruction(llvm::MCInst &Inst, unsigned Insn, +DecodeThumb2BCCInstruction(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder) { DecodeStatus S = MCDisassembler::Success; @@ -3262,7 +3374,7 @@ DecodeThumb2BCCInstruction(llvm::MCInst &Inst, unsigned Insn, // Decode a shifted immediate operand. These basically consist // of an 8-bit value, and a 4-bit directive that specifies either // a splat operation or a rotation. -static DecodeStatus DecodeT2SOImm(llvm::MCInst &Inst, unsigned Val, +static DecodeStatus DecodeT2SOImm(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder) { unsigned ctrl = fieldFromInstruction32(Val, 10, 2); if (ctrl == 0) { @@ -3294,13 +3406,15 @@ static DecodeStatus DecodeT2SOImm(llvm::MCInst &Inst, unsigned Val, } static DecodeStatus -DecodeThumbBCCTargetOperand(llvm::MCInst &Inst, unsigned Val, +DecodeThumbBCCTargetOperand(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder){ - Inst.addOperand(MCOperand::CreateImm(Val << 1)); + if (!tryAddingSymbolicOperand(Address, Address + SignExtend32<8>(Val<<1) + 4, + true, 2, Inst, Decoder)) + Inst.addOperand(MCOperand::CreateImm(SignExtend32<8>(Val << 1))); return MCDisassembler::Success; } -static DecodeStatus DecodeThumbBLTargetOperand(llvm::MCInst &Inst, unsigned Val, +static DecodeStatus DecodeThumbBLTargetOperand(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder){ if (!tryAddingSymbolicOperand(Address, Address + SignExtend32<22>(Val<<1) + 4, true, 4, Inst, Decoder)) @@ -3308,7 +3422,7 @@ static DecodeStatus DecodeThumbBLTargetOperand(llvm::MCInst &Inst, unsigned Val, return MCDisassembler::Success; } -static DecodeStatus DecodeMemBarrierOption(llvm::MCInst &Inst, unsigned Val, +static DecodeStatus DecodeMemBarrierOption(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder) { switch (Val) { default: @@ -3328,14 +3442,14 @@ static DecodeStatus DecodeMemBarrierOption(llvm::MCInst &Inst, unsigned Val, return MCDisassembler::Success; } -static DecodeStatus DecodeMSRMask(llvm::MCInst &Inst, unsigned Val, +static DecodeStatus DecodeMSRMask(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder) { if (!Val) return MCDisassembler::Fail; Inst.addOperand(MCOperand::CreateImm(Val)); return MCDisassembler::Success; } -static DecodeStatus DecodeDoubleRegLoad(llvm::MCInst &Inst, unsigned Insn, +static DecodeStatus DecodeDoubleRegLoad(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder) { DecodeStatus S = MCDisassembler::Success; @@ -3358,7 +3472,7 @@ static DecodeStatus DecodeDoubleRegLoad(llvm::MCInst &Inst, unsigned Insn, } -static DecodeStatus DecodeDoubleRegStore(llvm::MCInst &Inst, unsigned Insn, +static DecodeStatus DecodeDoubleRegStore(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder){ DecodeStatus S = MCDisassembler::Success; @@ -3385,7 +3499,7 @@ static DecodeStatus DecodeDoubleRegStore(llvm::MCInst &Inst, unsigned Insn, return S; } -static DecodeStatus DecodeLDRPreImm(llvm::MCInst &Inst, unsigned Insn, +static DecodeStatus DecodeLDRPreImm(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder) { DecodeStatus S = MCDisassembler::Success; @@ -3410,7 +3524,7 @@ static DecodeStatus DecodeLDRPreImm(llvm::MCInst &Inst, unsigned Insn, return S; } -static DecodeStatus DecodeLDRPreReg(llvm::MCInst &Inst, unsigned Insn, +static DecodeStatus DecodeLDRPreReg(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder) { DecodeStatus S = MCDisassembler::Success; @@ -3438,7 +3552,7 @@ static DecodeStatus DecodeLDRPreReg(llvm::MCInst &Inst, unsigned Insn, } -static DecodeStatus DecodeSTRPreImm(llvm::MCInst &Inst, unsigned Insn, +static DecodeStatus DecodeSTRPreImm(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder) { DecodeStatus S = MCDisassembler::Success; @@ -3463,7 +3577,7 @@ static DecodeStatus DecodeSTRPreImm(llvm::MCInst &Inst, unsigned Insn, return S; } -static DecodeStatus DecodeSTRPreReg(llvm::MCInst &Inst, unsigned Insn, +static DecodeStatus DecodeSTRPreReg(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder) { DecodeStatus S = MCDisassembler::Success; @@ -3488,7 +3602,7 @@ static DecodeStatus DecodeSTRPreReg(llvm::MCInst &Inst, unsigned Insn, return S; } -static DecodeStatus DecodeVLD1LN(llvm::MCInst &Inst, unsigned Insn, +static DecodeStatus DecodeVLD1LN(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder) { DecodeStatus S = MCDisassembler::Success; @@ -3547,7 +3661,7 @@ static DecodeStatus DecodeVLD1LN(llvm::MCInst &Inst, unsigned Insn, return S; } -static DecodeStatus DecodeVST1LN(llvm::MCInst &Inst, unsigned Insn, +static DecodeStatus DecodeVST1LN(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder) { DecodeStatus S = MCDisassembler::Success; @@ -3605,7 +3719,7 @@ static DecodeStatus DecodeVST1LN(llvm::MCInst &Inst, unsigned Insn, } -static DecodeStatus DecodeVLD2LN(llvm::MCInst &Inst, unsigned Insn, +static DecodeStatus DecodeVLD2LN(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder) { DecodeStatus S = MCDisassembler::Success; @@ -3672,7 +3786,7 @@ static DecodeStatus DecodeVLD2LN(llvm::MCInst &Inst, unsigned Insn, return S; } -static DecodeStatus DecodeVST2LN(llvm::MCInst &Inst, unsigned Insn, +static DecodeStatus DecodeVST2LN(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder) { DecodeStatus S = MCDisassembler::Success; @@ -3736,7 +3850,7 @@ static DecodeStatus DecodeVST2LN(llvm::MCInst &Inst, unsigned Insn, } -static DecodeStatus DecodeVLD3LN(llvm::MCInst &Inst, unsigned Insn, +static DecodeStatus DecodeVLD3LN(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder) { DecodeStatus S = MCDisassembler::Success; @@ -3806,7 +3920,7 @@ static DecodeStatus DecodeVLD3LN(llvm::MCInst &Inst, unsigned Insn, return S; } -static DecodeStatus DecodeVST3LN(llvm::MCInst &Inst, unsigned Insn, +static DecodeStatus DecodeVST3LN(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder) { DecodeStatus S = MCDisassembler::Success; @@ -3870,7 +3984,7 @@ static DecodeStatus DecodeVST3LN(llvm::MCInst &Inst, unsigned Insn, } -static DecodeStatus DecodeVLD4LN(llvm::MCInst &Inst, unsigned Insn, +static DecodeStatus DecodeVLD4LN(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder) { DecodeStatus S = MCDisassembler::Success; @@ -3944,7 +4058,7 @@ static DecodeStatus DecodeVLD4LN(llvm::MCInst &Inst, unsigned Insn, return S; } -static DecodeStatus DecodeVST4LN(llvm::MCInst &Inst, unsigned Insn, +static DecodeStatus DecodeVST4LN(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder) { DecodeStatus S = MCDisassembler::Success; @@ -4009,7 +4123,7 @@ static DecodeStatus DecodeVST4LN(llvm::MCInst &Inst, unsigned Insn, return S; } -static DecodeStatus DecodeVMOVSRR(llvm::MCInst &Inst, unsigned Insn, +static DecodeStatus DecodeVMOVSRR(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder) { DecodeStatus S = MCDisassembler::Success; unsigned Rt = fieldFromInstruction32(Insn, 12, 4); @@ -4035,7 +4149,7 @@ static DecodeStatus DecodeVMOVSRR(llvm::MCInst &Inst, unsigned Insn, return S; } -static DecodeStatus DecodeVMOVRRS(llvm::MCInst &Inst, unsigned Insn, +static DecodeStatus DecodeVMOVRRS(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder) { DecodeStatus S = MCDisassembler::Success; unsigned Rt = fieldFromInstruction32(Insn, 12, 4); @@ -4061,7 +4175,7 @@ static DecodeStatus DecodeVMOVRRS(llvm::MCInst &Inst, unsigned Insn, return S; } -static DecodeStatus DecodeIT(llvm::MCInst &Inst, unsigned Insn, +static DecodeStatus DecodeIT(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder) { DecodeStatus S = MCDisassembler::Success; unsigned pred = fieldFromInstruction16(Insn, 4, 4); @@ -4088,7 +4202,7 @@ static DecodeStatus DecodeIT(llvm::MCInst &Inst, unsigned Insn, } static DecodeStatus -DecodeT2LDRDPreInstruction(llvm::MCInst &Inst, unsigned Insn, +DecodeT2LDRDPreInstruction(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder) { DecodeStatus S = MCDisassembler::Success; @@ -4125,7 +4239,7 @@ DecodeT2LDRDPreInstruction(llvm::MCInst &Inst, unsigned Insn, } static DecodeStatus -DecodeT2STRDPreInstruction(llvm::MCInst &Inst, unsigned Insn, +DecodeT2STRDPreInstruction(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder) { DecodeStatus S = MCDisassembler::Success; @@ -4159,7 +4273,7 @@ DecodeT2STRDPreInstruction(llvm::MCInst &Inst, unsigned Insn, return S; } -static DecodeStatus DecodeT2Adr(llvm::MCInst &Inst, uint32_t Insn, +static DecodeStatus DecodeT2Adr(MCInst &Inst, uint32_t Insn, uint64_t Address, const void *Decoder) { unsigned sign1 = fieldFromInstruction32(Insn, 21, 1); unsigned sign2 = fieldFromInstruction32(Insn, 23, 1); @@ -4174,7 +4288,7 @@ static DecodeStatus DecodeT2Adr(llvm::MCInst &Inst, uint32_t Insn, return MCDisassembler::Success; } -static DecodeStatus DecodeT2ShifterImmOperand(llvm::MCInst &Inst, uint32_t Val, +static DecodeStatus DecodeT2ShifterImmOperand(MCInst &Inst, uint32_t Val, uint64_t Address, const void *Decoder) { DecodeStatus S = MCDisassembler::Success; @@ -4185,7 +4299,7 @@ static DecodeStatus DecodeT2ShifterImmOperand(llvm::MCInst &Inst, uint32_t Val, return S; } -static DecodeStatus DecodeSwap(llvm::MCInst &Inst, unsigned Insn, +static DecodeStatus DecodeSwap(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder) { unsigned Rt = fieldFromInstruction32(Insn, 12, 4); unsigned Rt2 = fieldFromInstruction32(Insn, 0, 4); @@ -4196,6 +4310,10 @@ static DecodeStatus DecodeSwap(llvm::MCInst &Inst, unsigned Insn, return DecodeCPSInstruction(Inst, Insn, Address, Decoder); DecodeStatus S = MCDisassembler::Success; + + if (Rt == Rn || Rn == Rt2) + S = MCDisassembler::SoftFail; + if (!Check(S, DecodeGPRnopcRegisterClass(Inst, Rt, Address, Decoder))) return MCDisassembler::Fail; if (!Check(S, DecodeGPRnopcRegisterClass(Inst, Rt2, Address, Decoder))) @@ -4208,7 +4326,7 @@ static DecodeStatus DecodeSwap(llvm::MCInst &Inst, unsigned Insn, return S; } -static DecodeStatus DecodeVCVTD(llvm::MCInst &Inst, unsigned Insn, +static DecodeStatus DecodeVCVTD(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder) { unsigned Vd = (fieldFromInstruction32(Insn, 12, 4) << 0); Vd |= (fieldFromInstruction32(Insn, 22, 1) << 4); @@ -4236,7 +4354,7 @@ static DecodeStatus DecodeVCVTD(llvm::MCInst &Inst, unsigned Insn, return S; } -static DecodeStatus DecodeVCVTQ(llvm::MCInst &Inst, unsigned Insn, +static DecodeStatus DecodeVCVTQ(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder) { unsigned Vd = (fieldFromInstruction32(Insn, 12, 4) << 0); Vd |= (fieldFromInstruction32(Insn, 22, 1) << 4); @@ -4263,3 +4381,59 @@ static DecodeStatus DecodeVCVTQ(llvm::MCInst &Inst, unsigned Insn, return S; } + +static DecodeStatus DecodeLDR(MCInst &Inst, unsigned Val, + uint64_t Address, const void *Decoder) { + DecodeStatus S = MCDisassembler::Success; + + unsigned Rn = fieldFromInstruction32(Val, 16, 4); + unsigned Rt = fieldFromInstruction32(Val, 12, 4); + unsigned Rm = fieldFromInstruction32(Val, 0, 4); + Rm |= (fieldFromInstruction32(Val, 23, 1) << 4); + unsigned Cond = fieldFromInstruction32(Val, 28, 4); + + if (fieldFromInstruction32(Val, 8, 4) != 0 || Rn == Rt) + S = MCDisassembler::SoftFail; + + if (!Check(S, DecodeGPRnopcRegisterClass(Inst, Rt, Address, Decoder))) + return MCDisassembler::Fail; + if (!Check(S, DecodeGPRnopcRegisterClass(Inst, Rn, Address, Decoder))) + return MCDisassembler::Fail; + if (!Check(S, DecodeAddrMode7Operand(Inst, Rn, Address, Decoder))) + return MCDisassembler::Fail; + if (!Check(S, DecodePostIdxReg(Inst, Rm, Address, Decoder))) + return MCDisassembler::Fail; + if (!Check(S, DecodePredicateOperand(Inst, Cond, Address, Decoder))) + return MCDisassembler::Fail; + + return S; +} + +static DecodeStatus DecodeMRRC2(llvm::MCInst &Inst, unsigned Val, + uint64_t Address, const void *Decoder) { + + DecodeStatus S = MCDisassembler::Success; + + unsigned CRm = fieldFromInstruction32(Val, 0, 4); + unsigned opc1 = fieldFromInstruction32(Val, 4, 4); + unsigned cop = fieldFromInstruction32(Val, 8, 4); + unsigned Rt = fieldFromInstruction32(Val, 12, 4); + unsigned Rt2 = fieldFromInstruction32(Val, 16, 4); + + if ((cop & ~0x1) == 0xa) + return MCDisassembler::Fail; + + if (Rt == Rt2) + S = MCDisassembler::SoftFail; + + Inst.addOperand(MCOperand::CreateImm(cop)); + Inst.addOperand(MCOperand::CreateImm(opc1)); + if (!Check(S, DecodeGPRnopcRegisterClass(Inst, Rt, Address, Decoder))) + return MCDisassembler::Fail; + if (!Check(S, DecodeGPRnopcRegisterClass(Inst, Rt2, Address, Decoder))) + return MCDisassembler::Fail; + Inst.addOperand(MCOperand::CreateImm(CRm)); + + return S; +} + diff --git a/lib/Target/ARM/Disassembler/LLVMBuild.txt b/lib/Target/ARM/Disassembler/LLVMBuild.txt index 94075a9..52d8338 100644 --- a/lib/Target/ARM/Disassembler/LLVMBuild.txt +++ b/lib/Target/ARM/Disassembler/LLVMBuild.txt @@ -19,5 +19,5 @@ type = Library name = ARMDisassembler parent = ARM -required_libraries = ARMCodeGen ARMDesc ARMInfo MC Support +required_libraries = ARMDesc ARMInfo MC Support add_to_library_groups = ARM diff --git a/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp b/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp index 2b994df..cbd81c1 100644 --- a/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp +++ b/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp @@ -18,11 +18,11 @@ #include "llvm/MC/MCInst.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCExpr.h" +#include "llvm/MC/MCInstrInfo.h" #include "llvm/MC/MCRegisterInfo.h" #include "llvm/Support/raw_ostream.h" using namespace llvm; -#define GET_INSTRUCTION_NAME #include "ARMGenAsmWriter.inc" /// translateShiftImm - Convert shift immediate from 0-31 to 1-32 for printing. @@ -36,17 +36,14 @@ static unsigned translateShiftImm(unsigned imm) { ARMInstPrinter::ARMInstPrinter(const MCAsmInfo &MAI, + const MCInstrInfo &MII, const MCRegisterInfo &MRI, const MCSubtargetInfo &STI) : - MCInstPrinter(MAI, MRI) { + MCInstPrinter(MAI, MII, MRI) { // Initialize the set of available features. setAvailableFeatures(STI.getFeatureBits()); } -StringRef ARMInstPrinter::getOpcodeName(unsigned Opcode) const { - return getInstructionName(Opcode); -} - void ARMInstPrinter::printRegName(raw_ostream &OS, unsigned RegNo) const { OS << getRegisterName(RegNo); } @@ -212,12 +209,12 @@ void ARMInstPrinter::printOperand(const MCInst *MI, unsigned OpNo, } else { assert(Op.isExpr() && "unknown operand kind in printOperand"); // If a symbolic branch target was added as a constant expression then print - // that address in hex. + // that address in hex. And only print 32 unsigned bits for the address. const MCConstantExpr *BranchTarget = dyn_cast<MCConstantExpr>(Op.getExpr()); int64_t Address; if (BranchTarget && BranchTarget->EvaluateAsAbsolute(Address)) { O << "0x"; - O.write_hex(Address); + O.write_hex((uint32_t)Address); } else { // Otherwise, just print the expression. diff --git a/lib/Target/ARM/InstPrinter/ARMInstPrinter.h b/lib/Target/ARM/InstPrinter/ARMInstPrinter.h index e9cd407..8acb7ee 100644 --- a/lib/Target/ARM/InstPrinter/ARMInstPrinter.h +++ b/lib/Target/ARM/InstPrinter/ARMInstPrinter.h @@ -23,15 +23,12 @@ class MCOperand; class ARMInstPrinter : public MCInstPrinter { public: - ARMInstPrinter(const MCAsmInfo &MAI, const MCRegisterInfo &MRI, - const MCSubtargetInfo &STI); + ARMInstPrinter(const MCAsmInfo &MAI, const MCInstrInfo &MII, + const MCRegisterInfo &MRI, const MCSubtargetInfo &STI); virtual void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot); - virtual StringRef getOpcodeName(unsigned Opcode) const; virtual void printRegName(raw_ostream &OS, unsigned RegNo) const; - static const char *getInstructionName(unsigned Opcode); - // Autogenerated by tblgen. void printInstruction(const MCInst *MI, raw_ostream &O); static const char *getRegisterName(unsigned RegNo); diff --git a/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp b/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp index 25849ee..d10bfc1 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp +++ b/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp @@ -11,11 +11,11 @@ #include "MCTargetDesc/ARMBaseInfo.h" #include "MCTargetDesc/ARMFixupKinds.h" #include "MCTargetDesc/ARMAddressingModes.h" -#include "llvm/ADT/Twine.h" #include "llvm/MC/MCAssembler.h" #include "llvm/MC/MCDirectives.h" #include "llvm/MC/MCELFObjectWriter.h" #include "llvm/MC/MCExpr.h" +#include "llvm/MC/MCFixupKindInfo.h" #include "llvm/MC/MCMachObjectWriter.h" #include "llvm/MC/MCObjectWriter.h" #include "llvm/MC/MCSectionELF.h" @@ -78,7 +78,8 @@ public: { "fixup_t2_condbranch", 0, 32, MCFixupKindInfo::FKF_IsPCRel }, { "fixup_t2_uncondbranch", 0, 32, MCFixupKindInfo::FKF_IsPCRel }, { "fixup_arm_thumb_br", 0, 16, MCFixupKindInfo::FKF_IsPCRel }, -{ "fixup_arm_bl", 0, 24, MCFixupKindInfo::FKF_IsPCRel }, +{ "fixup_arm_uncondbl", 0, 24, MCFixupKindInfo::FKF_IsPCRel }, +{ "fixup_arm_condbl", 0, 24, MCFixupKindInfo::FKF_IsPCRel }, { "fixup_arm_blx", 0, 24, MCFixupKindInfo::FKF_IsPCRel }, { "fixup_arm_thumb_bl", 0, 32, MCFixupKindInfo::FKF_IsPCRel }, { "fixup_arm_thumb_blx", 0, 32, MCFixupKindInfo::FKF_IsPCRel }, @@ -115,6 +116,9 @@ public: // twiddled. if ((unsigned)Fixup.getKind() != ARM::fixup_arm_ldst_pcrel_12 && (unsigned)Fixup.getKind() != ARM::fixup_t2_ldst_pcrel_12 && + (unsigned)Fixup.getKind() != ARM::fixup_arm_adr_pcrel_12 && + (unsigned)Fixup.getKind() != ARM::fixup_thumb_adr_pcrel_10 && + (unsigned)Fixup.getKind() != ARM::fixup_t2_adr_pcrel_12 && (unsigned)Fixup.getKind() != ARM::fixup_arm_thumb_cp) { if (A) { const MCSymbol &Sym = A->getSymbol().AliasedSymbol(); @@ -128,7 +132,8 @@ public: if (A && ((unsigned)Fixup.getKind() == ARM::fixup_arm_thumb_blx || (unsigned)Fixup.getKind() == ARM::fixup_arm_thumb_bl || (unsigned)Fixup.getKind() == ARM::fixup_arm_blx || - (unsigned)Fixup.getKind() == ARM::fixup_arm_bl)) + (unsigned)Fixup.getKind() == ARM::fixup_arm_uncondbl || + (unsigned)Fixup.getKind() == ARM::fixup_arm_condbl)) IsResolved = false; } @@ -366,7 +371,8 @@ static unsigned adjustFixupValue(unsigned Kind, uint64_t Value) { case ARM::fixup_arm_condbranch: case ARM::fixup_arm_uncondbranch: - case ARM::fixup_arm_bl: + case ARM::fixup_arm_uncondbl: + case ARM::fixup_arm_condbl: case ARM::fixup_arm_blx: // These values don't encode the low two bits since they're always zero. // Offset by 8 just as above. @@ -466,7 +472,9 @@ static unsigned adjustFixupValue(unsigned Kind, uint64_t Value) { Value = -Value; isAdd = false; } + // The value has the low 4 bits encoded in [3:0] and the high 4 in [11:8]. assert ((Value < 256) && "Out of range pc-relative fixup value!"); + Value = (Value & 0xf) | ((Value & 0xf0) << 4); return Value | (isAdd << 23); } case ARM::fixup_arm_pcrel_10: @@ -577,7 +585,8 @@ static unsigned getFixupKindNumBytes(unsigned Kind) { case ARM::fixup_arm_ldst_pcrel_12: case ARM::fixup_arm_pcrel_10: case ARM::fixup_arm_adr_pcrel_12: - case ARM::fixup_arm_bl: + case ARM::fixup_arm_uncondbl: + case ARM::fixup_arm_condbl: case ARM::fixup_arm_blx: case ARM::fixup_arm_condbranch: case ARM::fixup_arm_uncondbranch: diff --git a/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp b/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp index 5476a46..aa649ba 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp +++ b/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp @@ -11,6 +11,7 @@ #include "MCTargetDesc/ARMMCTargetDesc.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" #include "llvm/ADT/Statistic.h" #include "llvm/ADT/StringSwitch.h" #include "llvm/MC/MCELFObjectWriter.h" @@ -177,7 +178,7 @@ unsigned ARMELFObjectWriter::GetRelocTypeInner(const MCValue &Target, break; } break; - case ARM::fixup_arm_bl: + case ARM::fixup_arm_uncondbl: case ARM::fixup_arm_blx: case ARM::fixup_arm_uncondbranch: switch (Modifier) { @@ -189,6 +190,7 @@ unsigned ARMELFObjectWriter::GetRelocTypeInner(const MCValue &Target, break; } break; + case ARM::fixup_arm_condbl: case ARM::fixup_arm_condbranch: Type = ELF::R_ARM_JUMP24; break; diff --git a/lib/Target/ARM/MCTargetDesc/ARMFixupKinds.h b/lib/Target/ARM/MCTargetDesc/ARMFixupKinds.h index 1827986..0085feb 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMFixupKinds.h +++ b/lib/Target/ARM/MCTargetDesc/ARMFixupKinds.h @@ -59,8 +59,21 @@ enum Fixups { // fixup_arm_thumb_br - 12-bit fixup for Thumb B instructions. fixup_arm_thumb_br, - // fixup_arm_bl - Fixup for ARM BL instructions. - fixup_arm_bl, + // The following fixups handle the ARM BL instructions. These can be + // conditionalised; however, the ARM ELF ABI requires a different relocation + // in that case: R_ARM_JUMP24 instead of R_ARM_CALL. The difference is that + // R_ARM_CALL is allowed to change the instruction to a BLX inline, which has + // no conditional version; R_ARM_JUMP24 would have to insert a veneer. + // + // MachO does not draw a distinction between the two cases, so it will treat + // fixup_arm_uncondbl and fixup_arm_condbl as identical fixups. + + // fixup_arm_uncondbl - Fixup for unconditional ARM BL instructions. + fixup_arm_uncondbl, + + // fixup_arm_condbl - Fixup for ARM BL instructions with nontrivial + // conditionalisation. + fixup_arm_condbl, // fixup_arm_blx - Fixup for ARM BLX instructions. fixup_arm_blx, diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp b/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp index 4445dcd..10d1c48 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp +++ b/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp @@ -597,8 +597,12 @@ uint32_t ARMMCCodeEmitter:: getARMBLTargetOpValue(const MCInst &MI, unsigned OpIdx, SmallVectorImpl<MCFixup> &Fixups) const { const MCOperand MO = MI.getOperand(OpIdx); - if (MO.isExpr()) - return ::getBranchTargetOpValue(MI, OpIdx, ARM::fixup_arm_bl, Fixups); + if (MO.isExpr()) { + if (HasConditionalBranch(MI)) + return ::getBranchTargetOpValue(MI, OpIdx, + ARM::fixup_arm_condbl, Fixups); + return ::getBranchTargetOpValue(MI, OpIdx, ARM::fixup_arm_uncondbl, Fixups); + } return MO.getImm() >> 2; } @@ -1330,8 +1334,8 @@ getRegisterListOpValue(const MCInst &MI, unsigned Op, // LDM/STM: // {15-0} = Bitfield of GPRs. unsigned Reg = MI.getOperand(Op).getReg(); - bool SPRRegs = llvm::ARMMCRegisterClasses[ARM::SPRRegClassID].contains(Reg); - bool DPRRegs = llvm::ARMMCRegisterClasses[ARM::DPRRegClassID].contains(Reg); + bool SPRRegs = ARMMCRegisterClasses[ARM::SPRRegClassID].contains(Reg); + bool DPRRegs = ARMMCRegisterClasses[ARM::DPRRegClassID].contains(Reg); unsigned Binary = 0; diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp b/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp index ed27f9f..e3512cd 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp +++ b/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp @@ -163,10 +163,11 @@ static MCStreamer *createMCStreamer(const Target &T, StringRef TT, static MCInstPrinter *createARMMCInstPrinter(const Target &T, unsigned SyntaxVariant, const MCAsmInfo &MAI, + const MCInstrInfo &MII, const MCRegisterInfo &MRI, const MCSubtargetInfo &STI) { if (SyntaxVariant == 0) - return new ARMInstPrinter(MAI, MRI, STI); + return new ARMInstPrinter(MAI, MII, MRI, STI); return 0; } diff --git a/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp b/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp index 9d3da14..8057cb6 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp +++ b/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp @@ -82,7 +82,8 @@ static bool getARMFixupKindMachOInfo(unsigned Kind, unsigned &RelocType, case ARM::fixup_arm_adr_pcrel_12: case ARM::fixup_arm_condbranch: case ARM::fixup_arm_uncondbranch: - case ARM::fixup_arm_bl: + case ARM::fixup_arm_uncondbl: + case ARM::fixup_arm_condbl: case ARM::fixup_arm_blx: RelocType = unsigned(macho::RIT_ARM_Branch24Bit); // Report as 'long', even though that is not quite accurate. diff --git a/lib/Target/ARM/README.txt b/lib/Target/ARM/README.txt index 4fcaecf..3eddda8 100644 --- a/lib/Target/ARM/README.txt +++ b/lib/Target/ARM/README.txt @@ -501,11 +501,6 @@ those operations and the ARMv6 scalar versions. //===---------------------------------------------------------------------===// -ARM::MOVCCr is commutable (by flipping the condition). But we need to implement -ARMInstrInfo::commuteInstruction() to support it. - -//===---------------------------------------------------------------------===// - Split out LDR (literal) from normal ARM LDR instruction. Also consider spliting LDR into imm12 and so_reg forms. This allows us to clean up some code. e.g. ARMLoadStoreOptimizer does not need to look at LDR (literal) and LDR (so_reg) diff --git a/lib/Target/ARM/Thumb1InstrInfo.cpp b/lib/Target/ARM/Thumb1InstrInfo.cpp index 8cf7cac..e03e758 100644 --- a/lib/Target/ARM/Thumb1InstrInfo.cpp +++ b/lib/Target/ARM/Thumb1InstrInfo.cpp @@ -17,7 +17,6 @@ #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/MachineMemOperand.h" -#include "llvm/ADT/SmallVector.h" #include "llvm/MC/MCInst.h" using namespace llvm; diff --git a/lib/Target/ARM/Thumb1InstrInfo.h b/lib/Target/ARM/Thumb1InstrInfo.h index 27fce9b..36af204 100644 --- a/lib/Target/ARM/Thumb1InstrInfo.h +++ b/lib/Target/ARM/Thumb1InstrInfo.h @@ -17,7 +17,6 @@ #include "ARM.h" #include "ARMBaseInstrInfo.h" #include "Thumb1RegisterInfo.h" -#include "llvm/Target/TargetInstrInfo.h" namespace llvm { class ARMSubtarget; diff --git a/lib/Target/ARM/Thumb2ITBlockPass.cpp b/lib/Target/ARM/Thumb2ITBlockPass.cpp index def75dd..ecb4c2f 100644 --- a/lib/Target/ARM/Thumb2ITBlockPass.cpp +++ b/lib/Target/ARM/Thumb2ITBlockPass.cpp @@ -154,7 +154,7 @@ Thumb2ITBlockPass::MoveCopyOutOfITBlock(MachineInstr *MI, ++I; if (I != E) { unsigned NPredReg = 0; - ARMCC::CondCodes NCC = llvm::getITInstrPredicate(I, NPredReg); + ARMCC::CondCodes NCC = getITInstrPredicate(I, NPredReg); if (NCC == CC || NCC == OCC) return true; } @@ -171,7 +171,7 @@ bool Thumb2ITBlockPass::InsertITInstructions(MachineBasicBlock &MBB) { MachineInstr *MI = &*MBBI; DebugLoc dl = MI->getDebugLoc(); unsigned PredReg = 0; - ARMCC::CondCodes CC = llvm::getITInstrPredicate(MI, PredReg); + ARMCC::CondCodes CC = getITInstrPredicate(MI, PredReg); if (CC == ARMCC::AL) { ++MBBI; continue; @@ -207,7 +207,7 @@ bool Thumb2ITBlockPass::InsertITInstructions(MachineBasicBlock &MBB) { MI = NMI; unsigned NPredReg = 0; - ARMCC::CondCodes NCC = llvm::getITInstrPredicate(NMI, NPredReg); + ARMCC::CondCodes NCC = getITInstrPredicate(NMI, NPredReg); if (NCC == CC || NCC == OCC) { Mask |= (NCC & 1) << Pos; // Add implicit use of ITSTATE. diff --git a/lib/Target/ARM/Thumb2InstrInfo.cpp b/lib/Target/ARM/Thumb2InstrInfo.cpp index 2fe4b85..8ab486b 100644 --- a/lib/Target/ARM/Thumb2InstrInfo.cpp +++ b/lib/Target/ARM/Thumb2InstrInfo.cpp @@ -19,7 +19,6 @@ #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineMemOperand.h" -#include "llvm/ADT/SmallVector.h" #include "llvm/MC/MCInst.h" #include "llvm/Support/CommandLine.h" @@ -59,7 +58,7 @@ Thumb2InstrInfo::ReplaceTailWithBranchTo(MachineBasicBlock::iterator Tail, // If the first instruction of Tail is predicated, we may have to update // the IT instruction. unsigned PredReg = 0; - ARMCC::CondCodes CC = llvm::getInstrPredicate(Tail, PredReg); + ARMCC::CondCodes CC = getInstrPredicate(Tail, PredReg); MachineBasicBlock::iterator MBBI = Tail; if (CC != ARMCC::AL) // Expecting at least the t2IT instruction before it. @@ -107,7 +106,7 @@ Thumb2InstrInfo::isLegalToSplitMBBAt(MachineBasicBlock &MBB, } unsigned PredReg = 0; - return llvm::getITInstrPredicate(MBBI, PredReg) == ARMCC::AL; + return getITInstrPredicate(MBBI, PredReg) == ARMCC::AL; } void Thumb2InstrInfo::copyPhysReg(MachineBasicBlock &MBB, @@ -574,7 +573,7 @@ Thumb2InstrInfo::scheduleTwoAddrSource(MachineInstr *SrcMI, return; unsigned PredReg = 0; - ARMCC::CondCodes CC = llvm::getInstrPredicate(UseMI, PredReg); + ARMCC::CondCodes CC = getInstrPredicate(UseMI, PredReg); if (CC == ARMCC::AL || PredReg != ARM::CPSR) return; @@ -590,7 +589,7 @@ Thumb2InstrInfo::scheduleTwoAddrSource(MachineInstr *SrcMI, continue; MachineInstr *NMI = &*MBBI; - ARMCC::CondCodes NCC = llvm::getInstrPredicate(NMI, PredReg); + ARMCC::CondCodes NCC = getInstrPredicate(NMI, PredReg); if (!(NCC == CC || NCC == OCC) || NMI->modifiesRegister(SrcReg, &TRI) || NMI->modifiesRegister(ARM::CPSR, &TRI)) @@ -611,5 +610,5 @@ llvm::getITInstrPredicate(const MachineInstr *MI, unsigned &PredReg) { unsigned Opc = MI->getOpcode(); if (Opc == ARM::tBcc || Opc == ARM::t2Bcc) return ARMCC::AL; - return llvm::getInstrPredicate(MI, PredReg); + return getInstrPredicate(MI, PredReg); } diff --git a/lib/Target/ARM/Thumb2InstrInfo.h b/lib/Target/ARM/Thumb2InstrInfo.h index 1ae2ef1..0911f8a 100644 --- a/lib/Target/ARM/Thumb2InstrInfo.h +++ b/lib/Target/ARM/Thumb2InstrInfo.h @@ -15,9 +15,8 @@ #define THUMB2INSTRUCTIONINFO_H #include "ARM.h" -#include "ARMInstrInfo.h" +#include "ARMBaseInstrInfo.h" #include "Thumb2RegisterInfo.h" -#include "llvm/Target/TargetInstrInfo.h" namespace llvm { class ARMSubtarget; diff --git a/lib/Target/ARM/Thumb2SizeReduction.cpp b/lib/Target/ARM/Thumb2SizeReduction.cpp index fb9d93b..b5a397e 100644 --- a/lib/Target/ARM/Thumb2SizeReduction.cpp +++ b/lib/Target/ARM/Thumb2SizeReduction.cpp @@ -851,7 +851,7 @@ bool Thumb2SizeReduce::ReduceMBB(MachineBasicBlock &MBB) { // If this BB loops back to itself, conservatively avoid narrowing the // first instruction that does partial flag update. bool IsSelfLoop = MBB.isSuccessor(&MBB); - MachineBasicBlock::instr_iterator MII = MBB.instr_begin(), E = MBB.instr_end(); + MachineBasicBlock::instr_iterator MII = MBB.instr_begin(),E = MBB.instr_end(); MachineBasicBlock::instr_iterator NextMII; for (; MII != E; MII = NextMII) { NextMII = llvm::next(MII); diff --git a/lib/Target/CBackend/CBackend.cpp b/lib/Target/CBackend/CBackend.cpp deleted file mode 100644 index b6b209e..0000000 --- a/lib/Target/CBackend/CBackend.cpp +++ /dev/null @@ -1,3616 +0,0 @@ -//===-- CBackend.cpp - Library for converting LLVM code to C --------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This library converts LLVM code to C code, compilable by GCC and other C -// compilers. -// -//===----------------------------------------------------------------------===// - -#include "CTargetMachine.h" -#include "llvm/CallingConv.h" -#include "llvm/Constants.h" -#include "llvm/DerivedTypes.h" -#include "llvm/Module.h" -#include "llvm/Instructions.h" -#include "llvm/Pass.h" -#include "llvm/PassManager.h" -#include "llvm/Intrinsics.h" -#include "llvm/IntrinsicInst.h" -#include "llvm/InlineAsm.h" -#include "llvm/ADT/StringExtras.h" -#include "llvm/ADT/SmallString.h" -#include "llvm/ADT/STLExtras.h" -#include "llvm/Analysis/ConstantsScanner.h" -#include "llvm/Analysis/FindUsedTypes.h" -#include "llvm/Analysis/LoopInfo.h" -#include "llvm/Analysis/ValueTracking.h" -#include "llvm/CodeGen/Passes.h" -#include "llvm/CodeGen/IntrinsicLowering.h" -#include "llvm/Target/Mangler.h" -#include "llvm/Transforms/Scalar.h" -#include "llvm/MC/MCAsmInfo.h" -#include "llvm/MC/MCContext.h" -#include "llvm/MC/MCInstrInfo.h" -#include "llvm/MC/MCObjectFileInfo.h" -#include "llvm/MC/MCRegisterInfo.h" -#include "llvm/MC/MCSubtargetInfo.h" -#include "llvm/MC/MCSymbol.h" -#include "llvm/Target/TargetData.h" -#include "llvm/Support/CallSite.h" -#include "llvm/Support/CFG.h" -#include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/FormattedStream.h" -#include "llvm/Support/GetElementPtrTypeIterator.h" -#include "llvm/Support/InstVisitor.h" -#include "llvm/Support/MathExtras.h" -#include "llvm/Support/TargetRegistry.h" -#include "llvm/Support/Host.h" -#include "llvm/Config/config.h" -#include <algorithm> -// Some ms header decided to define setjmp as _setjmp, undo this for this file. -#ifdef _MSC_VER -#undef setjmp -#endif -using namespace llvm; - -extern "C" void LLVMInitializeCBackendTarget() { - // Register the target. - RegisterTargetMachine<CTargetMachine> X(TheCBackendTarget); -} - -namespace { - class CBEMCAsmInfo : public MCAsmInfo { - public: - CBEMCAsmInfo() { - GlobalPrefix = ""; - PrivateGlobalPrefix = ""; - } - }; - - /// CWriter - This class is the main chunk of code that converts an LLVM - /// module to a C translation unit. - class CWriter : public FunctionPass, public InstVisitor<CWriter> { - formatted_raw_ostream &Out; - IntrinsicLowering *IL; - Mangler *Mang; - LoopInfo *LI; - const Module *TheModule; - const MCAsmInfo* TAsm; - const MCRegisterInfo *MRI; - const MCObjectFileInfo *MOFI; - MCContext *TCtx; - const TargetData* TD; - - std::map<const ConstantFP *, unsigned> FPConstantMap; - std::set<Function*> intrinsicPrototypesAlreadyGenerated; - std::set<const Argument*> ByValParams; - unsigned FPCounter; - unsigned OpaqueCounter; - DenseMap<const Value*, unsigned> AnonValueNumbers; - unsigned NextAnonValueNumber; - - /// UnnamedStructIDs - This contains a unique ID for each struct that is - /// either anonymous or has no name. - DenseMap<StructType*, unsigned> UnnamedStructIDs; - - public: - static char ID; - explicit CWriter(formatted_raw_ostream &o) - : FunctionPass(ID), Out(o), IL(0), Mang(0), LI(0), - TheModule(0), TAsm(0), MRI(0), MOFI(0), TCtx(0), TD(0), - OpaqueCounter(0), NextAnonValueNumber(0) { - initializeLoopInfoPass(*PassRegistry::getPassRegistry()); - FPCounter = 0; - } - - virtual const char *getPassName() const { return "C backend"; } - - void getAnalysisUsage(AnalysisUsage &AU) const { - AU.addRequired<LoopInfo>(); - AU.setPreservesAll(); - } - - virtual bool doInitialization(Module &M); - - bool runOnFunction(Function &F) { - // Do not codegen any 'available_externally' functions at all, they have - // definitions outside the translation unit. - if (F.hasAvailableExternallyLinkage()) - return false; - - LI = &getAnalysis<LoopInfo>(); - - // Get rid of intrinsics we can't handle. - lowerIntrinsics(F); - - // Output all floating point constants that cannot be printed accurately. - printFloatingPointConstants(F); - - printFunction(F); - return false; - } - - virtual bool doFinalization(Module &M) { - // Free memory... - delete IL; - delete TD; - delete Mang; - delete TCtx; - delete TAsm; - delete MRI; - delete MOFI; - FPConstantMap.clear(); - ByValParams.clear(); - intrinsicPrototypesAlreadyGenerated.clear(); - UnnamedStructIDs.clear(); - return false; - } - - raw_ostream &printType(raw_ostream &Out, Type *Ty, - bool isSigned = false, - const std::string &VariableName = "", - bool IgnoreName = false, - const AttrListPtr &PAL = AttrListPtr()); - raw_ostream &printSimpleType(raw_ostream &Out, Type *Ty, - bool isSigned, - const std::string &NameSoFar = ""); - - void printStructReturnPointerFunctionType(raw_ostream &Out, - const AttrListPtr &PAL, - PointerType *Ty); - - std::string getStructName(StructType *ST); - - /// writeOperandDeref - Print the result of dereferencing the specified - /// operand with '*'. This is equivalent to printing '*' then using - /// writeOperand, but avoids excess syntax in some cases. - void writeOperandDeref(Value *Operand) { - if (isAddressExposed(Operand)) { - // Already something with an address exposed. - writeOperandInternal(Operand); - } else { - Out << "*("; - writeOperand(Operand); - Out << ")"; - } - } - - void writeOperand(Value *Operand, bool Static = false); - void writeInstComputationInline(Instruction &I); - void writeOperandInternal(Value *Operand, bool Static = false); - void writeOperandWithCast(Value* Operand, unsigned Opcode); - void writeOperandWithCast(Value* Operand, const ICmpInst &I); - bool writeInstructionCast(const Instruction &I); - - void writeMemoryAccess(Value *Operand, Type *OperandType, - bool IsVolatile, unsigned Alignment); - - private : - std::string InterpretASMConstraint(InlineAsm::ConstraintInfo& c); - - void lowerIntrinsics(Function &F); - /// Prints the definition of the intrinsic function F. Supports the - /// intrinsics which need to be explicitly defined in the CBackend. - void printIntrinsicDefinition(const Function &F, raw_ostream &Out); - - void printModuleTypes(); - void printContainedStructs(Type *Ty, SmallPtrSet<Type *, 16> &); - void printFloatingPointConstants(Function &F); - void printFloatingPointConstants(const Constant *C); - void printFunctionSignature(const Function *F, bool Prototype); - - void printFunction(Function &); - void printBasicBlock(BasicBlock *BB); - void printLoop(Loop *L); - - void printCast(unsigned opcode, Type *SrcTy, Type *DstTy); - void printConstant(Constant *CPV, bool Static); - void printConstantWithCast(Constant *CPV, unsigned Opcode); - bool printConstExprCast(const ConstantExpr *CE, bool Static); - void printConstantArray(ConstantArray *CPA, bool Static); - void printConstantVector(ConstantVector *CV, bool Static); - void printConstantDataSequential(ConstantDataSequential *CDS, bool Static); - - - /// isAddressExposed - Return true if the specified value's name needs to - /// have its address taken in order to get a C value of the correct type. - /// This happens for global variables, byval parameters, and direct allocas. - bool isAddressExposed(const Value *V) const { - if (const Argument *A = dyn_cast<Argument>(V)) - return ByValParams.count(A); - return isa<GlobalVariable>(V) || isDirectAlloca(V); - } - - // isInlinableInst - Attempt to inline instructions into their uses to build - // trees as much as possible. To do this, we have to consistently decide - // what is acceptable to inline, so that variable declarations don't get - // printed and an extra copy of the expr is not emitted. - // - static bool isInlinableInst(const Instruction &I) { - // Always inline cmp instructions, even if they are shared by multiple - // expressions. GCC generates horrible code if we don't. - if (isa<CmpInst>(I)) - return true; - - // Must be an expression, must be used exactly once. If it is dead, we - // emit it inline where it would go. - if (I.getType() == Type::getVoidTy(I.getContext()) || !I.hasOneUse() || - isa<TerminatorInst>(I) || isa<CallInst>(I) || isa<PHINode>(I) || - isa<LoadInst>(I) || isa<VAArgInst>(I) || isa<InsertElementInst>(I) || - isa<InsertValueInst>(I)) - // Don't inline a load across a store or other bad things! - return false; - - // Must not be used in inline asm, extractelement, or shufflevector. - if (I.hasOneUse()) { - const Instruction &User = cast<Instruction>(*I.use_back()); - if (isInlineAsm(User) || isa<ExtractElementInst>(User) || - isa<ShuffleVectorInst>(User)) - return false; - } - - // Only inline instruction it if it's use is in the same BB as the inst. - return I.getParent() == cast<Instruction>(I.use_back())->getParent(); - } - - // isDirectAlloca - Define fixed sized allocas in the entry block as direct - // variables which are accessed with the & operator. This causes GCC to - // generate significantly better code than to emit alloca calls directly. - // - static const AllocaInst *isDirectAlloca(const Value *V) { - const AllocaInst *AI = dyn_cast<AllocaInst>(V); - if (!AI) return 0; - if (AI->isArrayAllocation()) - return 0; // FIXME: we can also inline fixed size array allocas! - if (AI->getParent() != &AI->getParent()->getParent()->getEntryBlock()) - return 0; - return AI; - } - - // isInlineAsm - Check if the instruction is a call to an inline asm chunk. - static bool isInlineAsm(const Instruction& I) { - if (const CallInst *CI = dyn_cast<CallInst>(&I)) - return isa<InlineAsm>(CI->getCalledValue()); - return false; - } - - // Instruction visitation functions - friend class InstVisitor<CWriter>; - - void visitReturnInst(ReturnInst &I); - void visitBranchInst(BranchInst &I); - void visitSwitchInst(SwitchInst &I); - void visitIndirectBrInst(IndirectBrInst &I); - void visitInvokeInst(InvokeInst &I) { - llvm_unreachable("Lowerinvoke pass didn't work!"); - } - void visitResumeInst(ResumeInst &I) { - llvm_unreachable("DwarfEHPrepare pass didn't work!"); - } - void visitUnreachableInst(UnreachableInst &I); - - void visitPHINode(PHINode &I); - void visitBinaryOperator(Instruction &I); - void visitICmpInst(ICmpInst &I); - void visitFCmpInst(FCmpInst &I); - - void visitCastInst (CastInst &I); - void visitSelectInst(SelectInst &I); - void visitCallInst (CallInst &I); - void visitInlineAsm(CallInst &I); - bool visitBuiltinCall(CallInst &I, Intrinsic::ID ID, bool &WroteCallee); - - void visitAllocaInst(AllocaInst &I); - void visitLoadInst (LoadInst &I); - void visitStoreInst (StoreInst &I); - void visitGetElementPtrInst(GetElementPtrInst &I); - void visitVAArgInst (VAArgInst &I); - - void visitInsertElementInst(InsertElementInst &I); - void visitExtractElementInst(ExtractElementInst &I); - void visitShuffleVectorInst(ShuffleVectorInst &SVI); - - void visitInsertValueInst(InsertValueInst &I); - void visitExtractValueInst(ExtractValueInst &I); - - void visitInstruction(Instruction &I) { -#ifndef NDEBUG - errs() << "C Writer does not know about " << I; -#endif - llvm_unreachable(0); - } - - void outputLValue(Instruction *I) { - Out << " " << GetValueName(I) << " = "; - } - - bool isGotoCodeNecessary(BasicBlock *From, BasicBlock *To); - void printPHICopiesForSuccessor(BasicBlock *CurBlock, - BasicBlock *Successor, unsigned Indent); - void printBranchToBlock(BasicBlock *CurBlock, BasicBlock *SuccBlock, - unsigned Indent); - void printGEPExpression(Value *Ptr, gep_type_iterator I, - gep_type_iterator E, bool Static); - - std::string GetValueName(const Value *Operand); - }; -} - -char CWriter::ID = 0; - - - -static std::string CBEMangle(const std::string &S) { - std::string Result; - - for (unsigned i = 0, e = S.size(); i != e; ++i) - if (isalnum(S[i]) || S[i] == '_') { - Result += S[i]; - } else { - Result += '_'; - Result += 'A'+(S[i]&15); - Result += 'A'+((S[i]>>4)&15); - Result += '_'; - } - return Result; -} - -std::string CWriter::getStructName(StructType *ST) { - if (!ST->isLiteral() && !ST->getName().empty()) - return CBEMangle("l_"+ST->getName().str()); - - return "l_unnamed_" + utostr(UnnamedStructIDs[ST]); -} - - -/// printStructReturnPointerFunctionType - This is like printType for a struct -/// return type, except, instead of printing the type as void (*)(Struct*, ...) -/// print it as "Struct (*)(...)", for struct return functions. -void CWriter::printStructReturnPointerFunctionType(raw_ostream &Out, - const AttrListPtr &PAL, - PointerType *TheTy) { - FunctionType *FTy = cast<FunctionType>(TheTy->getElementType()); - std::string tstr; - raw_string_ostream FunctionInnards(tstr); - FunctionInnards << " (*) ("; - bool PrintedType = false; - - FunctionType::param_iterator I = FTy->param_begin(), E = FTy->param_end(); - Type *RetTy = cast<PointerType>(*I)->getElementType(); - unsigned Idx = 1; - for (++I, ++Idx; I != E; ++I, ++Idx) { - if (PrintedType) - FunctionInnards << ", "; - Type *ArgTy = *I; - if (PAL.paramHasAttr(Idx, Attribute::ByVal)) { - assert(ArgTy->isPointerTy()); - ArgTy = cast<PointerType>(ArgTy)->getElementType(); - } - printType(FunctionInnards, ArgTy, - /*isSigned=*/PAL.paramHasAttr(Idx, Attribute::SExt), ""); - PrintedType = true; - } - if (FTy->isVarArg()) { - if (!PrintedType) - FunctionInnards << " int"; //dummy argument for empty vararg functs - FunctionInnards << ", ..."; - } else if (!PrintedType) { - FunctionInnards << "void"; - } - FunctionInnards << ')'; - printType(Out, RetTy, - /*isSigned=*/PAL.paramHasAttr(0, Attribute::SExt), FunctionInnards.str()); -} - -raw_ostream & -CWriter::printSimpleType(raw_ostream &Out, Type *Ty, bool isSigned, - const std::string &NameSoFar) { - assert((Ty->isPrimitiveType() || Ty->isIntegerTy() || Ty->isVectorTy()) && - "Invalid type for printSimpleType"); - switch (Ty->getTypeID()) { - case Type::VoidTyID: return Out << "void " << NameSoFar; - case Type::IntegerTyID: { - unsigned NumBits = cast<IntegerType>(Ty)->getBitWidth(); - if (NumBits == 1) - return Out << "bool " << NameSoFar; - else if (NumBits <= 8) - return Out << (isSigned?"signed":"unsigned") << " char " << NameSoFar; - else if (NumBits <= 16) - return Out << (isSigned?"signed":"unsigned") << " short " << NameSoFar; - else if (NumBits <= 32) - return Out << (isSigned?"signed":"unsigned") << " int " << NameSoFar; - else if (NumBits <= 64) - return Out << (isSigned?"signed":"unsigned") << " long long "<< NameSoFar; - else { - assert(NumBits <= 128 && "Bit widths > 128 not implemented yet"); - return Out << (isSigned?"llvmInt128":"llvmUInt128") << " " << NameSoFar; - } - } - case Type::FloatTyID: return Out << "float " << NameSoFar; - case Type::DoubleTyID: return Out << "double " << NameSoFar; - // Lacking emulation of FP80 on PPC, etc., we assume whichever of these is - // present matches host 'long double'. - case Type::X86_FP80TyID: - case Type::PPC_FP128TyID: - case Type::FP128TyID: return Out << "long double " << NameSoFar; - - case Type::X86_MMXTyID: - return printSimpleType(Out, Type::getInt32Ty(Ty->getContext()), isSigned, - " __attribute__((vector_size(64))) " + NameSoFar); - - case Type::VectorTyID: { - VectorType *VTy = cast<VectorType>(Ty); - return printSimpleType(Out, VTy->getElementType(), isSigned, - " __attribute__((vector_size(" + - utostr(TD->getTypeAllocSize(VTy)) + " ))) " + NameSoFar); - } - - default: -#ifndef NDEBUG - errs() << "Unknown primitive type: " << *Ty << "\n"; -#endif - llvm_unreachable(0); - } -} - -// Pass the Type* and the variable name and this prints out the variable -// declaration. -// -raw_ostream &CWriter::printType(raw_ostream &Out, Type *Ty, - bool isSigned, const std::string &NameSoFar, - bool IgnoreName, const AttrListPtr &PAL) { - if (Ty->isPrimitiveType() || Ty->isIntegerTy() || Ty->isVectorTy()) { - printSimpleType(Out, Ty, isSigned, NameSoFar); - return Out; - } - - switch (Ty->getTypeID()) { - case Type::FunctionTyID: { - FunctionType *FTy = cast<FunctionType>(Ty); - std::string tstr; - raw_string_ostream FunctionInnards(tstr); - FunctionInnards << " (" << NameSoFar << ") ("; - unsigned Idx = 1; - for (FunctionType::param_iterator I = FTy->param_begin(), - E = FTy->param_end(); I != E; ++I) { - Type *ArgTy = *I; - if (PAL.paramHasAttr(Idx, Attribute::ByVal)) { - assert(ArgTy->isPointerTy()); - ArgTy = cast<PointerType>(ArgTy)->getElementType(); - } - if (I != FTy->param_begin()) - FunctionInnards << ", "; - printType(FunctionInnards, ArgTy, - /*isSigned=*/PAL.paramHasAttr(Idx, Attribute::SExt), ""); - ++Idx; - } - if (FTy->isVarArg()) { - if (!FTy->getNumParams()) - FunctionInnards << " int"; //dummy argument for empty vaarg functs - FunctionInnards << ", ..."; - } else if (!FTy->getNumParams()) { - FunctionInnards << "void"; - } - FunctionInnards << ')'; - printType(Out, FTy->getReturnType(), - /*isSigned=*/PAL.paramHasAttr(0, Attribute::SExt), FunctionInnards.str()); - return Out; - } - case Type::StructTyID: { - StructType *STy = cast<StructType>(Ty); - - // Check to see if the type is named. - if (!IgnoreName) - return Out << getStructName(STy) << ' ' << NameSoFar; - - Out << NameSoFar + " {\n"; - unsigned Idx = 0; - for (StructType::element_iterator I = STy->element_begin(), - E = STy->element_end(); I != E; ++I) { - Out << " "; - printType(Out, *I, false, "field" + utostr(Idx++)); - Out << ";\n"; - } - Out << '}'; - if (STy->isPacked()) - Out << " __attribute__ ((packed))"; - return Out; - } - - case Type::PointerTyID: { - PointerType *PTy = cast<PointerType>(Ty); - std::string ptrName = "*" + NameSoFar; - - if (PTy->getElementType()->isArrayTy() || - PTy->getElementType()->isVectorTy()) - ptrName = "(" + ptrName + ")"; - - if (!PAL.isEmpty()) - // Must be a function ptr cast! - return printType(Out, PTy->getElementType(), false, ptrName, true, PAL); - return printType(Out, PTy->getElementType(), false, ptrName); - } - - case Type::ArrayTyID: { - ArrayType *ATy = cast<ArrayType>(Ty); - unsigned NumElements = ATy->getNumElements(); - if (NumElements == 0) NumElements = 1; - // Arrays are wrapped in structs to allow them to have normal - // value semantics (avoiding the array "decay"). - Out << NameSoFar << " { "; - printType(Out, ATy->getElementType(), false, - "array[" + utostr(NumElements) + "]"); - return Out << "; }"; - } - - default: - llvm_unreachable("Unhandled case in getTypeProps!"); - } -} - -void CWriter::printConstantArray(ConstantArray *CPA, bool Static) { - Out << "{ "; - printConstant(cast<Constant>(CPA->getOperand(0)), Static); - for (unsigned i = 1, e = CPA->getNumOperands(); i != e; ++i) { - Out << ", "; - printConstant(cast<Constant>(CPA->getOperand(i)), Static); - } - Out << " }"; -} - -void CWriter::printConstantVector(ConstantVector *CP, bool Static) { - Out << "{ "; - printConstant(cast<Constant>(CP->getOperand(0)), Static); - for (unsigned i = 1, e = CP->getNumOperands(); i != e; ++i) { - Out << ", "; - printConstant(cast<Constant>(CP->getOperand(i)), Static); - } - Out << " }"; -} - -void CWriter::printConstantDataSequential(ConstantDataSequential *CDS, - bool Static) { - // As a special case, print the array as a string if it is an array of - // ubytes or an array of sbytes with positive values. - // - if (CDS->isCString()) { - Out << '\"'; - // Keep track of whether the last number was a hexadecimal escape. - bool LastWasHex = false; - - StringRef Bytes = CDS->getAsCString(); - - // Do not include the last character, which we know is null - for (unsigned i = 0, e = Bytes.size(); i != e; ++i) { - unsigned char C = Bytes[i]; - - // Print it out literally if it is a printable character. The only thing - // to be careful about is when the last letter output was a hex escape - // code, in which case we have to be careful not to print out hex digits - // explicitly (the C compiler thinks it is a continuation of the previous - // character, sheesh...) - // - if (isprint(C) && (!LastWasHex || !isxdigit(C))) { - LastWasHex = false; - if (C == '"' || C == '\\') - Out << "\\" << (char)C; - else - Out << (char)C; - } else { - LastWasHex = false; - switch (C) { - case '\n': Out << "\\n"; break; - case '\t': Out << "\\t"; break; - case '\r': Out << "\\r"; break; - case '\v': Out << "\\v"; break; - case '\a': Out << "\\a"; break; - case '\"': Out << "\\\""; break; - case '\'': Out << "\\\'"; break; - default: - Out << "\\x"; - Out << (char)(( C/16 < 10) ? ( C/16 +'0') : ( C/16 -10+'A')); - Out << (char)(((C&15) < 10) ? ((C&15)+'0') : ((C&15)-10+'A')); - LastWasHex = true; - break; - } - } - } - Out << '\"'; - } else { - Out << "{ "; - printConstant(CDS->getElementAsConstant(0), Static); - for (unsigned i = 1, e = CDS->getNumElements(); i != e; ++i) { - Out << ", "; - printConstant(CDS->getElementAsConstant(i), Static); - } - Out << " }"; - } -} - - -// isFPCSafeToPrint - Returns true if we may assume that CFP may be written out -// textually as a double (rather than as a reference to a stack-allocated -// variable). We decide this by converting CFP to a string and back into a -// double, and then checking whether the conversion results in a bit-equal -// double to the original value of CFP. This depends on us and the target C -// compiler agreeing on the conversion process (which is pretty likely since we -// only deal in IEEE FP). -// -static bool isFPCSafeToPrint(const ConstantFP *CFP) { - bool ignored; - // Do long doubles in hex for now. - if (CFP->getType() != Type::getFloatTy(CFP->getContext()) && - CFP->getType() != Type::getDoubleTy(CFP->getContext())) - return false; - APFloat APF = APFloat(CFP->getValueAPF()); // copy - if (CFP->getType() == Type::getFloatTy(CFP->getContext())) - APF.convert(APFloat::IEEEdouble, APFloat::rmNearestTiesToEven, &ignored); -#if HAVE_PRINTF_A && ENABLE_CBE_PRINTF_A - char Buffer[100]; - sprintf(Buffer, "%a", APF.convertToDouble()); - if (!strncmp(Buffer, "0x", 2) || - !strncmp(Buffer, "-0x", 3) || - !strncmp(Buffer, "+0x", 3)) - return APF.bitwiseIsEqual(APFloat(atof(Buffer))); - return false; -#else - std::string StrVal = ftostr(APF); - - while (StrVal[0] == ' ') - StrVal.erase(StrVal.begin()); - - // Check to make sure that the stringized number is not some string like "Inf" - // or NaN. Check that the string matches the "[-+]?[0-9]" regex. - if ((StrVal[0] >= '0' && StrVal[0] <= '9') || - ((StrVal[0] == '-' || StrVal[0] == '+') && - (StrVal[1] >= '0' && StrVal[1] <= '9'))) - // Reparse stringized version! - return APF.bitwiseIsEqual(APFloat(atof(StrVal.c_str()))); - return false; -#endif -} - -/// Print out the casting for a cast operation. This does the double casting -/// necessary for conversion to the destination type, if necessary. -/// @brief Print a cast -void CWriter::printCast(unsigned opc, Type *SrcTy, Type *DstTy) { - // Print the destination type cast - switch (opc) { - case Instruction::UIToFP: - case Instruction::SIToFP: - case Instruction::IntToPtr: - case Instruction::Trunc: - case Instruction::BitCast: - case Instruction::FPExt: - case Instruction::FPTrunc: // For these the DstTy sign doesn't matter - Out << '('; - printType(Out, DstTy); - Out << ')'; - break; - case Instruction::ZExt: - case Instruction::PtrToInt: - case Instruction::FPToUI: // For these, make sure we get an unsigned dest - Out << '('; - printSimpleType(Out, DstTy, false); - Out << ')'; - break; - case Instruction::SExt: - case Instruction::FPToSI: // For these, make sure we get a signed dest - Out << '('; - printSimpleType(Out, DstTy, true); - Out << ')'; - break; - default: - llvm_unreachable("Invalid cast opcode"); - } - - // Print the source type cast - switch (opc) { - case Instruction::UIToFP: - case Instruction::ZExt: - Out << '('; - printSimpleType(Out, SrcTy, false); - Out << ')'; - break; - case Instruction::SIToFP: - case Instruction::SExt: - Out << '('; - printSimpleType(Out, SrcTy, true); - Out << ')'; - break; - case Instruction::IntToPtr: - case Instruction::PtrToInt: - // Avoid "cast to pointer from integer of different size" warnings - Out << "(unsigned long)"; - break; - case Instruction::Trunc: - case Instruction::BitCast: - case Instruction::FPExt: - case Instruction::FPTrunc: - case Instruction::FPToSI: - case Instruction::FPToUI: - break; // These don't need a source cast. - default: - llvm_unreachable("Invalid cast opcode"); - } -} - -// printConstant - The LLVM Constant to C Constant converter. -void CWriter::printConstant(Constant *CPV, bool Static) { - if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(CPV)) { - switch (CE->getOpcode()) { - case Instruction::Trunc: - case Instruction::ZExt: - case Instruction::SExt: - case Instruction::FPTrunc: - case Instruction::FPExt: - case Instruction::UIToFP: - case Instruction::SIToFP: - case Instruction::FPToUI: - case Instruction::FPToSI: - case Instruction::PtrToInt: - case Instruction::IntToPtr: - case Instruction::BitCast: - Out << "("; - printCast(CE->getOpcode(), CE->getOperand(0)->getType(), CE->getType()); - if (CE->getOpcode() == Instruction::SExt && - CE->getOperand(0)->getType() == Type::getInt1Ty(CPV->getContext())) { - // Make sure we really sext from bool here by subtracting from 0 - Out << "0-"; - } - printConstant(CE->getOperand(0), Static); - if (CE->getType() == Type::getInt1Ty(CPV->getContext()) && - (CE->getOpcode() == Instruction::Trunc || - CE->getOpcode() == Instruction::FPToUI || - CE->getOpcode() == Instruction::FPToSI || - CE->getOpcode() == Instruction::PtrToInt)) { - // Make sure we really truncate to bool here by anding with 1 - Out << "&1u"; - } - Out << ')'; - return; - - case Instruction::GetElementPtr: - Out << "("; - printGEPExpression(CE->getOperand(0), gep_type_begin(CPV), - gep_type_end(CPV), Static); - Out << ")"; - return; - case Instruction::Select: - Out << '('; - printConstant(CE->getOperand(0), Static); - Out << '?'; - printConstant(CE->getOperand(1), Static); - Out << ':'; - printConstant(CE->getOperand(2), Static); - Out << ')'; - return; - case Instruction::Add: - case Instruction::FAdd: - case Instruction::Sub: - case Instruction::FSub: - case Instruction::Mul: - case Instruction::FMul: - case Instruction::SDiv: - case Instruction::UDiv: - case Instruction::FDiv: - case Instruction::URem: - case Instruction::SRem: - case Instruction::FRem: - case Instruction::And: - case Instruction::Or: - case Instruction::Xor: - case Instruction::ICmp: - case Instruction::Shl: - case Instruction::LShr: - case Instruction::AShr: - { - Out << '('; - bool NeedsClosingParens = printConstExprCast(CE, Static); - printConstantWithCast(CE->getOperand(0), CE->getOpcode()); - switch (CE->getOpcode()) { - case Instruction::Add: - case Instruction::FAdd: Out << " + "; break; - case Instruction::Sub: - case Instruction::FSub: Out << " - "; break; - case Instruction::Mul: - case Instruction::FMul: Out << " * "; break; - case Instruction::URem: - case Instruction::SRem: - case Instruction::FRem: Out << " % "; break; - case Instruction::UDiv: - case Instruction::SDiv: - case Instruction::FDiv: Out << " / "; break; - case Instruction::And: Out << " & "; break; - case Instruction::Or: Out << " | "; break; - case Instruction::Xor: Out << " ^ "; break; - case Instruction::Shl: Out << " << "; break; - case Instruction::LShr: - case Instruction::AShr: Out << " >> "; break; - case Instruction::ICmp: - switch (CE->getPredicate()) { - case ICmpInst::ICMP_EQ: Out << " == "; break; - case ICmpInst::ICMP_NE: Out << " != "; break; - case ICmpInst::ICMP_SLT: - case ICmpInst::ICMP_ULT: Out << " < "; break; - case ICmpInst::ICMP_SLE: - case ICmpInst::ICMP_ULE: Out << " <= "; break; - case ICmpInst::ICMP_SGT: - case ICmpInst::ICMP_UGT: Out << " > "; break; - case ICmpInst::ICMP_SGE: - case ICmpInst::ICMP_UGE: Out << " >= "; break; - default: llvm_unreachable("Illegal ICmp predicate"); - } - break; - default: llvm_unreachable("Illegal opcode here!"); - } - printConstantWithCast(CE->getOperand(1), CE->getOpcode()); - if (NeedsClosingParens) - Out << "))"; - Out << ')'; - return; - } - case Instruction::FCmp: { - Out << '('; - bool NeedsClosingParens = printConstExprCast(CE, Static); - if (CE->getPredicate() == FCmpInst::FCMP_FALSE) - Out << "0"; - else if (CE->getPredicate() == FCmpInst::FCMP_TRUE) - Out << "1"; - else { - const char* op = 0; - switch (CE->getPredicate()) { - default: llvm_unreachable("Illegal FCmp predicate"); - case FCmpInst::FCMP_ORD: op = "ord"; break; - case FCmpInst::FCMP_UNO: op = "uno"; break; - case FCmpInst::FCMP_UEQ: op = "ueq"; break; - case FCmpInst::FCMP_UNE: op = "une"; break; - case FCmpInst::FCMP_ULT: op = "ult"; break; - case FCmpInst::FCMP_ULE: op = "ule"; break; - case FCmpInst::FCMP_UGT: op = "ugt"; break; - case FCmpInst::FCMP_UGE: op = "uge"; break; - case FCmpInst::FCMP_OEQ: op = "oeq"; break; - case FCmpInst::FCMP_ONE: op = "one"; break; - case FCmpInst::FCMP_OLT: op = "olt"; break; - case FCmpInst::FCMP_OLE: op = "ole"; break; - case FCmpInst::FCMP_OGT: op = "ogt"; break; - case FCmpInst::FCMP_OGE: op = "oge"; break; - } - Out << "llvm_fcmp_" << op << "("; - printConstantWithCast(CE->getOperand(0), CE->getOpcode()); - Out << ", "; - printConstantWithCast(CE->getOperand(1), CE->getOpcode()); - Out << ")"; - } - if (NeedsClosingParens) - Out << "))"; - Out << ')'; - return; - } - default: -#ifndef NDEBUG - errs() << "CWriter Error: Unhandled constant expression: " - << *CE << "\n"; -#endif - llvm_unreachable(0); - } - } else if (isa<UndefValue>(CPV) && CPV->getType()->isSingleValueType()) { - Out << "(("; - printType(Out, CPV->getType()); // sign doesn't matter - Out << ")/*UNDEF*/"; - if (!CPV->getType()->isVectorTy()) { - Out << "0)"; - } else { - Out << "{})"; - } - return; - } - - if (ConstantInt *CI = dyn_cast<ConstantInt>(CPV)) { - Type* Ty = CI->getType(); - if (Ty == Type::getInt1Ty(CPV->getContext())) - Out << (CI->getZExtValue() ? '1' : '0'); - else if (Ty == Type::getInt32Ty(CPV->getContext())) - Out << CI->getZExtValue() << 'u'; - else if (Ty->getPrimitiveSizeInBits() > 32) - Out << CI->getZExtValue() << "ull"; - else { - Out << "(("; - printSimpleType(Out, Ty, false) << ')'; - if (CI->isMinValue(true)) - Out << CI->getZExtValue() << 'u'; - else - Out << CI->getSExtValue(); - Out << ')'; - } - return; - } - - switch (CPV->getType()->getTypeID()) { - case Type::FloatTyID: - case Type::DoubleTyID: - case Type::X86_FP80TyID: - case Type::PPC_FP128TyID: - case Type::FP128TyID: { - ConstantFP *FPC = cast<ConstantFP>(CPV); - std::map<const ConstantFP*, unsigned>::iterator I = FPConstantMap.find(FPC); - if (I != FPConstantMap.end()) { - // Because of FP precision problems we must load from a stack allocated - // value that holds the value in hex. - Out << "(*(" << (FPC->getType() == Type::getFloatTy(CPV->getContext()) ? - "float" : - FPC->getType() == Type::getDoubleTy(CPV->getContext()) ? - "double" : - "long double") - << "*)&FPConstant" << I->second << ')'; - } else { - double V; - if (FPC->getType() == Type::getFloatTy(CPV->getContext())) - V = FPC->getValueAPF().convertToFloat(); - else if (FPC->getType() == Type::getDoubleTy(CPV->getContext())) - V = FPC->getValueAPF().convertToDouble(); - else { - // Long double. Convert the number to double, discarding precision. - // This is not awesome, but it at least makes the CBE output somewhat - // useful. - APFloat Tmp = FPC->getValueAPF(); - bool LosesInfo; - Tmp.convert(APFloat::IEEEdouble, APFloat::rmTowardZero, &LosesInfo); - V = Tmp.convertToDouble(); - } - - if (IsNAN(V)) { - // The value is NaN - - // FIXME the actual NaN bits should be emitted. - // The prefix for a quiet NaN is 0x7FF8. For a signalling NaN, - // it's 0x7ff4. - const unsigned long QuietNaN = 0x7ff8UL; - //const unsigned long SignalNaN = 0x7ff4UL; - - // We need to grab the first part of the FP # - char Buffer[100]; - - uint64_t ll = DoubleToBits(V); - sprintf(Buffer, "0x%llx", static_cast<long long>(ll)); - - std::string Num(&Buffer[0], &Buffer[6]); - unsigned long Val = strtoul(Num.c_str(), 0, 16); - - if (FPC->getType() == Type::getFloatTy(FPC->getContext())) - Out << "LLVM_NAN" << (Val == QuietNaN ? "" : "S") << "F(\"" - << Buffer << "\") /*nan*/ "; - else - Out << "LLVM_NAN" << (Val == QuietNaN ? "" : "S") << "(\"" - << Buffer << "\") /*nan*/ "; - } else if (IsInf(V)) { - // The value is Inf - if (V < 0) Out << '-'; - Out << "LLVM_INF" << - (FPC->getType() == Type::getFloatTy(FPC->getContext()) ? "F" : "") - << " /*inf*/ "; - } else { - std::string Num; -#if HAVE_PRINTF_A && ENABLE_CBE_PRINTF_A - // Print out the constant as a floating point number. - char Buffer[100]; - sprintf(Buffer, "%a", V); - Num = Buffer; -#else - Num = ftostr(FPC->getValueAPF()); -#endif - Out << Num; - } - } - break; - } - - case Type::ArrayTyID: - // Use C99 compound expression literal initializer syntax. - if (!Static) { - Out << "("; - printType(Out, CPV->getType()); - Out << ")"; - } - Out << "{ "; // Arrays are wrapped in struct types. - if (ConstantArray *CA = dyn_cast<ConstantArray>(CPV)) { - printConstantArray(CA, Static); - } else if (ConstantDataSequential *CDS = - dyn_cast<ConstantDataSequential>(CPV)) { - printConstantDataSequential(CDS, Static); - } else { - assert(isa<ConstantAggregateZero>(CPV) || isa<UndefValue>(CPV)); - ArrayType *AT = cast<ArrayType>(CPV->getType()); - Out << '{'; - if (AT->getNumElements()) { - Out << ' '; - Constant *CZ = Constant::getNullValue(AT->getElementType()); - printConstant(CZ, Static); - for (unsigned i = 1, e = AT->getNumElements(); i != e; ++i) { - Out << ", "; - printConstant(CZ, Static); - } - } - Out << " }"; - } - Out << " }"; // Arrays are wrapped in struct types. - break; - - case Type::VectorTyID: - // Use C99 compound expression literal initializer syntax. - if (!Static) { - Out << "("; - printType(Out, CPV->getType()); - Out << ")"; - } - if (ConstantVector *CV = dyn_cast<ConstantVector>(CPV)) { - printConstantVector(CV, Static); - } else if (ConstantDataSequential *CDS = - dyn_cast<ConstantDataSequential>(CPV)) { - printConstantDataSequential(CDS, Static); - } else { - assert(isa<ConstantAggregateZero>(CPV) || isa<UndefValue>(CPV)); - VectorType *VT = cast<VectorType>(CPV->getType()); - Out << "{ "; - Constant *CZ = Constant::getNullValue(VT->getElementType()); - printConstant(CZ, Static); - for (unsigned i = 1, e = VT->getNumElements(); i != e; ++i) { - Out << ", "; - printConstant(CZ, Static); - } - Out << " }"; - } - break; - - case Type::StructTyID: - // Use C99 compound expression literal initializer syntax. - if (!Static) { - Out << "("; - printType(Out, CPV->getType()); - Out << ")"; - } - if (isa<ConstantAggregateZero>(CPV) || isa<UndefValue>(CPV)) { - StructType *ST = cast<StructType>(CPV->getType()); - Out << '{'; - if (ST->getNumElements()) { - Out << ' '; - printConstant(Constant::getNullValue(ST->getElementType(0)), Static); - for (unsigned i = 1, e = ST->getNumElements(); i != e; ++i) { - Out << ", "; - printConstant(Constant::getNullValue(ST->getElementType(i)), Static); - } - } - Out << " }"; - } else { - Out << '{'; - if (CPV->getNumOperands()) { - Out << ' '; - printConstant(cast<Constant>(CPV->getOperand(0)), Static); - for (unsigned i = 1, e = CPV->getNumOperands(); i != e; ++i) { - Out << ", "; - printConstant(cast<Constant>(CPV->getOperand(i)), Static); - } - } - Out << " }"; - } - break; - - case Type::PointerTyID: - if (isa<ConstantPointerNull>(CPV)) { - Out << "(("; - printType(Out, CPV->getType()); // sign doesn't matter - Out << ")/*NULL*/0)"; - break; - } else if (GlobalValue *GV = dyn_cast<GlobalValue>(CPV)) { - writeOperand(GV, Static); - break; - } - // FALL THROUGH - default: -#ifndef NDEBUG - errs() << "Unknown constant type: " << *CPV << "\n"; -#endif - llvm_unreachable(0); - } -} - -// Some constant expressions need to be casted back to the original types -// because their operands were casted to the expected type. This function takes -// care of detecting that case and printing the cast for the ConstantExpr. -bool CWriter::printConstExprCast(const ConstantExpr* CE, bool Static) { - bool NeedsExplicitCast = false; - Type *Ty = CE->getOperand(0)->getType(); - bool TypeIsSigned = false; - switch (CE->getOpcode()) { - case Instruction::Add: - case Instruction::Sub: - case Instruction::Mul: - // We need to cast integer arithmetic so that it is always performed - // as unsigned, to avoid undefined behavior on overflow. - case Instruction::LShr: - case Instruction::URem: - case Instruction::UDiv: NeedsExplicitCast = true; break; - case Instruction::AShr: - case Instruction::SRem: - case Instruction::SDiv: NeedsExplicitCast = true; TypeIsSigned = true; break; - case Instruction::SExt: - Ty = CE->getType(); - NeedsExplicitCast = true; - TypeIsSigned = true; - break; - case Instruction::ZExt: - case Instruction::Trunc: - case Instruction::FPTrunc: - case Instruction::FPExt: - case Instruction::UIToFP: - case Instruction::SIToFP: - case Instruction::FPToUI: - case Instruction::FPToSI: - case Instruction::PtrToInt: - case Instruction::IntToPtr: - case Instruction::BitCast: - Ty = CE->getType(); - NeedsExplicitCast = true; - break; - default: break; - } - if (NeedsExplicitCast) { - Out << "(("; - if (Ty->isIntegerTy() && Ty != Type::getInt1Ty(Ty->getContext())) - printSimpleType(Out, Ty, TypeIsSigned); - else - printType(Out, Ty); // not integer, sign doesn't matter - Out << ")("; - } - return NeedsExplicitCast; -} - -// Print a constant assuming that it is the operand for a given Opcode. The -// opcodes that care about sign need to cast their operands to the expected -// type before the operation proceeds. This function does the casting. -void CWriter::printConstantWithCast(Constant* CPV, unsigned Opcode) { - - // Extract the operand's type, we'll need it. - Type* OpTy = CPV->getType(); - - // Indicate whether to do the cast or not. - bool shouldCast = false; - bool typeIsSigned = false; - - // Based on the Opcode for which this Constant is being written, determine - // the new type to which the operand should be casted by setting the value - // of OpTy. If we change OpTy, also set shouldCast to true so it gets - // casted below. - switch (Opcode) { - default: - // for most instructions, it doesn't matter - break; - case Instruction::Add: - case Instruction::Sub: - case Instruction::Mul: - // We need to cast integer arithmetic so that it is always performed - // as unsigned, to avoid undefined behavior on overflow. - case Instruction::LShr: - case Instruction::UDiv: - case Instruction::URem: - shouldCast = true; - break; - case Instruction::AShr: - case Instruction::SDiv: - case Instruction::SRem: - shouldCast = true; - typeIsSigned = true; - break; - } - - // Write out the casted constant if we should, otherwise just write the - // operand. - if (shouldCast) { - Out << "(("; - printSimpleType(Out, OpTy, typeIsSigned); - Out << ")"; - printConstant(CPV, false); - Out << ")"; - } else - printConstant(CPV, false); -} - -std::string CWriter::GetValueName(const Value *Operand) { - - // Resolve potential alias. - if (const GlobalAlias *GA = dyn_cast<GlobalAlias>(Operand)) { - if (const Value *V = GA->resolveAliasedGlobal(false)) - Operand = V; - } - - // Mangle globals with the standard mangler interface for LLC compatibility. - if (const GlobalValue *GV = dyn_cast<GlobalValue>(Operand)) { - SmallString<128> Str; - Mang->getNameWithPrefix(Str, GV, false); - return CBEMangle(Str.str().str()); - } - - std::string Name = Operand->getName(); - - if (Name.empty()) { // Assign unique names to local temporaries. - unsigned &No = AnonValueNumbers[Operand]; - if (No == 0) - No = ++NextAnonValueNumber; - Name = "tmp__" + utostr(No); - } - - std::string VarName; - VarName.reserve(Name.capacity()); - - for (std::string::iterator I = Name.begin(), E = Name.end(); - I != E; ++I) { - char ch = *I; - - if (!((ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z') || - (ch >= '0' && ch <= '9') || ch == '_')) { - char buffer[5]; - sprintf(buffer, "_%x_", ch); - VarName += buffer; - } else - VarName += ch; - } - - return "llvm_cbe_" + VarName; -} - -/// writeInstComputationInline - Emit the computation for the specified -/// instruction inline, with no destination provided. -void CWriter::writeInstComputationInline(Instruction &I) { - // We can't currently support integer types other than 1, 8, 16, 32, 64. - // Validate this. - Type *Ty = I.getType(); - if (Ty->isIntegerTy() && (Ty!=Type::getInt1Ty(I.getContext()) && - Ty!=Type::getInt8Ty(I.getContext()) && - Ty!=Type::getInt16Ty(I.getContext()) && - Ty!=Type::getInt32Ty(I.getContext()) && - Ty!=Type::getInt64Ty(I.getContext()))) { - report_fatal_error("The C backend does not currently support integer " - "types of widths other than 1, 8, 16, 32, 64.\n" - "This is being tracked as PR 4158."); - } - - // If this is a non-trivial bool computation, make sure to truncate down to - // a 1 bit value. This is important because we want "add i1 x, y" to return - // "0" when x and y are true, not "2" for example. - bool NeedBoolTrunc = false; - if (I.getType() == Type::getInt1Ty(I.getContext()) && - !isa<ICmpInst>(I) && !isa<FCmpInst>(I)) - NeedBoolTrunc = true; - - if (NeedBoolTrunc) - Out << "(("; - - visit(I); - - if (NeedBoolTrunc) - Out << ")&1)"; -} - - -void CWriter::writeOperandInternal(Value *Operand, bool Static) { - if (Instruction *I = dyn_cast<Instruction>(Operand)) - // Should we inline this instruction to build a tree? - if (isInlinableInst(*I) && !isDirectAlloca(I)) { - Out << '('; - writeInstComputationInline(*I); - Out << ')'; - return; - } - - Constant* CPV = dyn_cast<Constant>(Operand); - - if (CPV && !isa<GlobalValue>(CPV)) - printConstant(CPV, Static); - else - Out << GetValueName(Operand); -} - -void CWriter::writeOperand(Value *Operand, bool Static) { - bool isAddressImplicit = isAddressExposed(Operand); - if (isAddressImplicit) - Out << "(&"; // Global variables are referenced as their addresses by llvm - - writeOperandInternal(Operand, Static); - - if (isAddressImplicit) - Out << ')'; -} - -// Some instructions need to have their result value casted back to the -// original types because their operands were casted to the expected type. -// This function takes care of detecting that case and printing the cast -// for the Instruction. -bool CWriter::writeInstructionCast(const Instruction &I) { - Type *Ty = I.getOperand(0)->getType(); - switch (I.getOpcode()) { - case Instruction::Add: - case Instruction::Sub: - case Instruction::Mul: - // We need to cast integer arithmetic so that it is always performed - // as unsigned, to avoid undefined behavior on overflow. - case Instruction::LShr: - case Instruction::URem: - case Instruction::UDiv: - Out << "(("; - printSimpleType(Out, Ty, false); - Out << ")("; - return true; - case Instruction::AShr: - case Instruction::SRem: - case Instruction::SDiv: - Out << "(("; - printSimpleType(Out, Ty, true); - Out << ")("; - return true; - default: break; - } - return false; -} - -// Write the operand with a cast to another type based on the Opcode being used. -// This will be used in cases where an instruction has specific type -// requirements (usually signedness) for its operands. -void CWriter::writeOperandWithCast(Value* Operand, unsigned Opcode) { - - // Extract the operand's type, we'll need it. - Type* OpTy = Operand->getType(); - - // Indicate whether to do the cast or not. - bool shouldCast = false; - - // Indicate whether the cast should be to a signed type or not. - bool castIsSigned = false; - - // Based on the Opcode for which this Operand is being written, determine - // the new type to which the operand should be casted by setting the value - // of OpTy. If we change OpTy, also set shouldCast to true. - switch (Opcode) { - default: - // for most instructions, it doesn't matter - break; - case Instruction::Add: - case Instruction::Sub: - case Instruction::Mul: - // We need to cast integer arithmetic so that it is always performed - // as unsigned, to avoid undefined behavior on overflow. - case Instruction::LShr: - case Instruction::UDiv: - case Instruction::URem: // Cast to unsigned first - shouldCast = true; - castIsSigned = false; - break; - case Instruction::GetElementPtr: - case Instruction::AShr: - case Instruction::SDiv: - case Instruction::SRem: // Cast to signed first - shouldCast = true; - castIsSigned = true; - break; - } - - // Write out the casted operand if we should, otherwise just write the - // operand. - if (shouldCast) { - Out << "(("; - printSimpleType(Out, OpTy, castIsSigned); - Out << ")"; - writeOperand(Operand); - Out << ")"; - } else - writeOperand(Operand); -} - -// Write the operand with a cast to another type based on the icmp predicate -// being used. -void CWriter::writeOperandWithCast(Value* Operand, const ICmpInst &Cmp) { - // This has to do a cast to ensure the operand has the right signedness. - // Also, if the operand is a pointer, we make sure to cast to an integer when - // doing the comparison both for signedness and so that the C compiler doesn't - // optimize things like "p < NULL" to false (p may contain an integer value - // f.e.). - bool shouldCast = Cmp.isRelational(); - - // Write out the casted operand if we should, otherwise just write the - // operand. - if (!shouldCast) { - writeOperand(Operand); - return; - } - - // Should this be a signed comparison? If so, convert to signed. - bool castIsSigned = Cmp.isSigned(); - - // If the operand was a pointer, convert to a large integer type. - Type* OpTy = Operand->getType(); - if (OpTy->isPointerTy()) - OpTy = TD->getIntPtrType(Operand->getContext()); - - Out << "(("; - printSimpleType(Out, OpTy, castIsSigned); - Out << ")"; - writeOperand(Operand); - Out << ")"; -} - -// generateCompilerSpecificCode - This is where we add conditional compilation -// directives to cater to specific compilers as need be. -// -static void generateCompilerSpecificCode(formatted_raw_ostream& Out, - const TargetData *TD) { - // Alloca is hard to get, and we don't want to include stdlib.h here. - Out << "/* get a declaration for alloca */\n" - << "#if defined(__CYGWIN__) || defined(__MINGW32__)\n" - << "#define alloca(x) __builtin_alloca((x))\n" - << "#define _alloca(x) __builtin_alloca((x))\n" - << "#elif defined(__APPLE__)\n" - << "extern void *__builtin_alloca(unsigned long);\n" - << "#define alloca(x) __builtin_alloca(x)\n" - << "#define longjmp _longjmp\n" - << "#define setjmp _setjmp\n" - << "#elif defined(__sun__)\n" - << "#if defined(__sparcv9)\n" - << "extern void *__builtin_alloca(unsigned long);\n" - << "#else\n" - << "extern void *__builtin_alloca(unsigned int);\n" - << "#endif\n" - << "#define alloca(x) __builtin_alloca(x)\n" - << "#elif defined(__FreeBSD__) || defined(__NetBSD__) || defined(__OpenBSD__) || defined(__DragonFly__) || defined(__arm__)\n" - << "#define alloca(x) __builtin_alloca(x)\n" - << "#elif defined(_MSC_VER)\n" - << "#define inline _inline\n" - << "#define alloca(x) _alloca(x)\n" - << "#else\n" - << "#include <alloca.h>\n" - << "#endif\n\n"; - - // We output GCC specific attributes to preserve 'linkonce'ness on globals. - // If we aren't being compiled with GCC, just drop these attributes. - Out << "#ifndef __GNUC__ /* Can only support \"linkonce\" vars with GCC */\n" - << "#define __attribute__(X)\n" - << "#endif\n\n"; - - // On Mac OS X, "external weak" is spelled "__attribute__((weak_import))". - Out << "#if defined(__GNUC__) && defined(__APPLE_CC__)\n" - << "#define __EXTERNAL_WEAK__ __attribute__((weak_import))\n" - << "#elif defined(__GNUC__)\n" - << "#define __EXTERNAL_WEAK__ __attribute__((weak))\n" - << "#else\n" - << "#define __EXTERNAL_WEAK__\n" - << "#endif\n\n"; - - // For now, turn off the weak linkage attribute on Mac OS X. (See above.) - Out << "#if defined(__GNUC__) && defined(__APPLE_CC__)\n" - << "#define __ATTRIBUTE_WEAK__\n" - << "#elif defined(__GNUC__)\n" - << "#define __ATTRIBUTE_WEAK__ __attribute__((weak))\n" - << "#else\n" - << "#define __ATTRIBUTE_WEAK__\n" - << "#endif\n\n"; - - // Add hidden visibility support. FIXME: APPLE_CC? - Out << "#if defined(__GNUC__)\n" - << "#define __HIDDEN__ __attribute__((visibility(\"hidden\")))\n" - << "#endif\n\n"; - - // Define NaN and Inf as GCC builtins if using GCC, as 0 otherwise - // From the GCC documentation: - // - // double __builtin_nan (const char *str) - // - // This is an implementation of the ISO C99 function nan. - // - // Since ISO C99 defines this function in terms of strtod, which we do - // not implement, a description of the parsing is in order. The string is - // parsed as by strtol; that is, the base is recognized by leading 0 or - // 0x prefixes. The number parsed is placed in the significand such that - // the least significant bit of the number is at the least significant - // bit of the significand. The number is truncated to fit the significand - // field provided. The significand is forced to be a quiet NaN. - // - // This function, if given a string literal, is evaluated early enough - // that it is considered a compile-time constant. - // - // float __builtin_nanf (const char *str) - // - // Similar to __builtin_nan, except the return type is float. - // - // double __builtin_inf (void) - // - // Similar to __builtin_huge_val, except a warning is generated if the - // target floating-point format does not support infinities. This - // function is suitable for implementing the ISO C99 macro INFINITY. - // - // float __builtin_inff (void) - // - // Similar to __builtin_inf, except the return type is float. - Out << "#ifdef __GNUC__\n" - << "#define LLVM_NAN(NanStr) __builtin_nan(NanStr) /* Double */\n" - << "#define LLVM_NANF(NanStr) __builtin_nanf(NanStr) /* Float */\n" - << "#define LLVM_NANS(NanStr) __builtin_nans(NanStr) /* Double */\n" - << "#define LLVM_NANSF(NanStr) __builtin_nansf(NanStr) /* Float */\n" - << "#define LLVM_INF __builtin_inf() /* Double */\n" - << "#define LLVM_INFF __builtin_inff() /* Float */\n" - << "#define LLVM_PREFETCH(addr,rw,locality) " - "__builtin_prefetch(addr,rw,locality)\n" - << "#define __ATTRIBUTE_CTOR__ __attribute__((constructor))\n" - << "#define __ATTRIBUTE_DTOR__ __attribute__((destructor))\n" - << "#define LLVM_ASM __asm__\n" - << "#else\n" - << "#define LLVM_NAN(NanStr) ((double)0.0) /* Double */\n" - << "#define LLVM_NANF(NanStr) 0.0F /* Float */\n" - << "#define LLVM_NANS(NanStr) ((double)0.0) /* Double */\n" - << "#define LLVM_NANSF(NanStr) 0.0F /* Float */\n" - << "#define LLVM_INF ((double)0.0) /* Double */\n" - << "#define LLVM_INFF 0.0F /* Float */\n" - << "#define LLVM_PREFETCH(addr,rw,locality) /* PREFETCH */\n" - << "#define __ATTRIBUTE_CTOR__\n" - << "#define __ATTRIBUTE_DTOR__\n" - << "#define LLVM_ASM(X)\n" - << "#endif\n\n"; - - Out << "#if __GNUC__ < 4 /* Old GCC's, or compilers not GCC */ \n" - << "#define __builtin_stack_save() 0 /* not implemented */\n" - << "#define __builtin_stack_restore(X) /* noop */\n" - << "#endif\n\n"; - - // Output typedefs for 128-bit integers. If these are needed with a - // 32-bit target or with a C compiler that doesn't support mode(TI), - // more drastic measures will be needed. - Out << "#if __GNUC__ && __LP64__ /* 128-bit integer types */\n" - << "typedef int __attribute__((mode(TI))) llvmInt128;\n" - << "typedef unsigned __attribute__((mode(TI))) llvmUInt128;\n" - << "#endif\n\n"; - - // Output target-specific code that should be inserted into main. - Out << "#define CODE_FOR_MAIN() /* Any target-specific code for main()*/\n"; -} - -/// FindStaticTors - Given a static ctor/dtor list, unpack its contents into -/// the StaticTors set. -static void FindStaticTors(GlobalVariable *GV, std::set<Function*> &StaticTors){ - ConstantArray *InitList = dyn_cast<ConstantArray>(GV->getInitializer()); - if (!InitList) return; - - for (unsigned i = 0, e = InitList->getNumOperands(); i != e; ++i) - if (ConstantStruct *CS = dyn_cast<ConstantStruct>(InitList->getOperand(i))){ - if (CS->getNumOperands() != 2) return; // Not array of 2-element structs. - - if (CS->getOperand(1)->isNullValue()) - return; // Found a null terminator, exit printing. - Constant *FP = CS->getOperand(1); - if (ConstantExpr *CE = dyn_cast<ConstantExpr>(FP)) - if (CE->isCast()) - FP = CE->getOperand(0); - if (Function *F = dyn_cast<Function>(FP)) - StaticTors.insert(F); - } -} - -enum SpecialGlobalClass { - NotSpecial = 0, - GlobalCtors, GlobalDtors, - NotPrinted -}; - -/// getGlobalVariableClass - If this is a global that is specially recognized -/// by LLVM, return a code that indicates how we should handle it. -static SpecialGlobalClass getGlobalVariableClass(const GlobalVariable *GV) { - // If this is a global ctors/dtors list, handle it now. - if (GV->hasAppendingLinkage() && GV->use_empty()) { - if (GV->getName() == "llvm.global_ctors") - return GlobalCtors; - else if (GV->getName() == "llvm.global_dtors") - return GlobalDtors; - } - - // Otherwise, if it is other metadata, don't print it. This catches things - // like debug information. - if (GV->getSection() == "llvm.metadata") - return NotPrinted; - - return NotSpecial; -} - -// PrintEscapedString - Print each character of the specified string, escaping -// it if it is not printable or if it is an escape char. -static void PrintEscapedString(const char *Str, unsigned Length, - raw_ostream &Out) { - for (unsigned i = 0; i != Length; ++i) { - unsigned char C = Str[i]; - if (isprint(C) && C != '\\' && C != '"') - Out << C; - else if (C == '\\') - Out << "\\\\"; - else if (C == '\"') - Out << "\\\""; - else if (C == '\t') - Out << "\\t"; - else - Out << "\\x" << hexdigit(C >> 4) << hexdigit(C & 0x0F); - } -} - -// PrintEscapedString - Print each character of the specified string, escaping -// it if it is not printable or if it is an escape char. -static void PrintEscapedString(const std::string &Str, raw_ostream &Out) { - PrintEscapedString(Str.c_str(), Str.size(), Out); -} - -bool CWriter::doInitialization(Module &M) { - FunctionPass::doInitialization(M); - - // Initialize - TheModule = &M; - - TD = new TargetData(&M); - IL = new IntrinsicLowering(*TD); - IL->AddPrototypes(M); - -#if 0 - std::string Triple = TheModule->getTargetTriple(); - if (Triple.empty()) - Triple = llvm::sys::getDefaultTargetTriple(); - - std::string E; - if (const Target *Match = TargetRegistry::lookupTarget(Triple, E)) - TAsm = Match->createMCAsmInfo(Triple); -#endif - TAsm = new CBEMCAsmInfo(); - MRI = new MCRegisterInfo(); - TCtx = new MCContext(*TAsm, *MRI, NULL); - Mang = new Mangler(*TCtx, *TD); - - // Keep track of which functions are static ctors/dtors so they can have - // an attribute added to their prototypes. - std::set<Function*> StaticCtors, StaticDtors; - for (Module::global_iterator I = M.global_begin(), E = M.global_end(); - I != E; ++I) { - switch (getGlobalVariableClass(I)) { - default: break; - case GlobalCtors: - FindStaticTors(I, StaticCtors); - break; - case GlobalDtors: - FindStaticTors(I, StaticDtors); - break; - } - } - - // get declaration for alloca - Out << "/* Provide Declarations */\n"; - Out << "#include <stdarg.h>\n"; // Varargs support - Out << "#include <setjmp.h>\n"; // Unwind support - Out << "#include <limits.h>\n"; // With overflow intrinsics support. - generateCompilerSpecificCode(Out, TD); - - // Provide a definition for `bool' if not compiling with a C++ compiler. - Out << "\n" - << "#ifndef __cplusplus\ntypedef unsigned char bool;\n#endif\n" - - << "\n\n/* Support for floating point constants */\n" - << "typedef unsigned long long ConstantDoubleTy;\n" - << "typedef unsigned int ConstantFloatTy;\n" - << "typedef struct { unsigned long long f1; unsigned short f2; " - "unsigned short pad[3]; } ConstantFP80Ty;\n" - // This is used for both kinds of 128-bit long double; meaning differs. - << "typedef struct { unsigned long long f1; unsigned long long f2; }" - " ConstantFP128Ty;\n" - << "\n\n/* Global Declarations */\n"; - - // First output all the declarations for the program, because C requires - // Functions & globals to be declared before they are used. - // - if (!M.getModuleInlineAsm().empty()) { - Out << "/* Module asm statements */\n" - << "asm("; - - // Split the string into lines, to make it easier to read the .ll file. - std::string Asm = M.getModuleInlineAsm(); - size_t CurPos = 0; - size_t NewLine = Asm.find_first_of('\n', CurPos); - while (NewLine != std::string::npos) { - // We found a newline, print the portion of the asm string from the - // last newline up to this newline. - Out << "\""; - PrintEscapedString(std::string(Asm.begin()+CurPos, Asm.begin()+NewLine), - Out); - Out << "\\n\"\n"; - CurPos = NewLine+1; - NewLine = Asm.find_first_of('\n', CurPos); - } - Out << "\""; - PrintEscapedString(std::string(Asm.begin()+CurPos, Asm.end()), Out); - Out << "\");\n" - << "/* End Module asm statements */\n"; - } - - // Loop over the symbol table, emitting all named constants. - printModuleTypes(); - - // Global variable declarations... - if (!M.global_empty()) { - Out << "\n/* External Global Variable Declarations */\n"; - for (Module::global_iterator I = M.global_begin(), E = M.global_end(); - I != E; ++I) { - - if (I->hasExternalLinkage() || I->hasExternalWeakLinkage() || - I->hasCommonLinkage()) - Out << "extern "; - else if (I->hasDLLImportLinkage()) - Out << "__declspec(dllimport) "; - else - continue; // Internal Global - - // Thread Local Storage - if (I->isThreadLocal()) - Out << "__thread "; - - printType(Out, I->getType()->getElementType(), false, GetValueName(I)); - - if (I->hasExternalWeakLinkage()) - Out << " __EXTERNAL_WEAK__"; - Out << ";\n"; - } - } - - // Function declarations - Out << "\n/* Function Declarations */\n"; - Out << "double fmod(double, double);\n"; // Support for FP rem - Out << "float fmodf(float, float);\n"; - Out << "long double fmodl(long double, long double);\n"; - - // Store the intrinsics which will be declared/defined below. - SmallVector<const Function*, 8> intrinsicsToDefine; - - for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) { - // Don't print declarations for intrinsic functions. - // Store the used intrinsics, which need to be explicitly defined. - if (I->isIntrinsic()) { - switch (I->getIntrinsicID()) { - default: - break; - case Intrinsic::uadd_with_overflow: - case Intrinsic::sadd_with_overflow: - intrinsicsToDefine.push_back(I); - break; - } - continue; - } - - if (I->getName() == "setjmp" || - I->getName() == "longjmp" || I->getName() == "_setjmp") - continue; - - if (I->hasExternalWeakLinkage()) - Out << "extern "; - printFunctionSignature(I, true); - if (I->hasWeakLinkage() || I->hasLinkOnceLinkage()) - Out << " __ATTRIBUTE_WEAK__"; - if (I->hasExternalWeakLinkage()) - Out << " __EXTERNAL_WEAK__"; - if (StaticCtors.count(I)) - Out << " __ATTRIBUTE_CTOR__"; - if (StaticDtors.count(I)) - Out << " __ATTRIBUTE_DTOR__"; - if (I->hasHiddenVisibility()) - Out << " __HIDDEN__"; - - if (I->hasName() && I->getName()[0] == 1) - Out << " LLVM_ASM(\"" << I->getName().substr(1) << "\")"; - - Out << ";\n"; - } - - // Output the global variable declarations - if (!M.global_empty()) { - Out << "\n\n/* Global Variable Declarations */\n"; - for (Module::global_iterator I = M.global_begin(), E = M.global_end(); - I != E; ++I) - if (!I->isDeclaration()) { - // Ignore special globals, such as debug info. - if (getGlobalVariableClass(I)) - continue; - - if (I->hasLocalLinkage()) - Out << "static "; - else - Out << "extern "; - - // Thread Local Storage - if (I->isThreadLocal()) - Out << "__thread "; - - printType(Out, I->getType()->getElementType(), false, - GetValueName(I)); - - if (I->hasLinkOnceLinkage()) - Out << " __attribute__((common))"; - else if (I->hasCommonLinkage()) // FIXME is this right? - Out << " __ATTRIBUTE_WEAK__"; - else if (I->hasWeakLinkage()) - Out << " __ATTRIBUTE_WEAK__"; - else if (I->hasExternalWeakLinkage()) - Out << " __EXTERNAL_WEAK__"; - if (I->hasHiddenVisibility()) - Out << " __HIDDEN__"; - Out << ";\n"; - } - } - - // Output the global variable definitions and contents... - if (!M.global_empty()) { - Out << "\n\n/* Global Variable Definitions and Initialization */\n"; - for (Module::global_iterator I = M.global_begin(), E = M.global_end(); - I != E; ++I) - if (!I->isDeclaration()) { - // Ignore special globals, such as debug info. - if (getGlobalVariableClass(I)) - continue; - - if (I->hasLocalLinkage()) - Out << "static "; - else if (I->hasDLLImportLinkage()) - Out << "__declspec(dllimport) "; - else if (I->hasDLLExportLinkage()) - Out << "__declspec(dllexport) "; - - // Thread Local Storage - if (I->isThreadLocal()) - Out << "__thread "; - - printType(Out, I->getType()->getElementType(), false, - GetValueName(I)); - if (I->hasLinkOnceLinkage()) - Out << " __attribute__((common))"; - else if (I->hasWeakLinkage()) - Out << " __ATTRIBUTE_WEAK__"; - else if (I->hasCommonLinkage()) - Out << " __ATTRIBUTE_WEAK__"; - - if (I->hasHiddenVisibility()) - Out << " __HIDDEN__"; - - // If the initializer is not null, emit the initializer. If it is null, - // we try to avoid emitting large amounts of zeros. The problem with - // this, however, occurs when the variable has weak linkage. In this - // case, the assembler will complain about the variable being both weak - // and common, so we disable this optimization. - // FIXME common linkage should avoid this problem. - if (!I->getInitializer()->isNullValue()) { - Out << " = " ; - writeOperand(I->getInitializer(), true); - } else if (I->hasWeakLinkage()) { - // We have to specify an initializer, but it doesn't have to be - // complete. If the value is an aggregate, print out { 0 }, and let - // the compiler figure out the rest of the zeros. - Out << " = " ; - if (I->getInitializer()->getType()->isStructTy() || - I->getInitializer()->getType()->isVectorTy()) { - Out << "{ 0 }"; - } else if (I->getInitializer()->getType()->isArrayTy()) { - // As with structs and vectors, but with an extra set of braces - // because arrays are wrapped in structs. - Out << "{ { 0 } }"; - } else { - // Just print it out normally. - writeOperand(I->getInitializer(), true); - } - } - Out << ";\n"; - } - } - - if (!M.empty()) - Out << "\n\n/* Function Bodies */\n"; - - // Emit some helper functions for dealing with FCMP instruction's - // predicates - Out << "static inline int llvm_fcmp_ord(double X, double Y) { "; - Out << "return X == X && Y == Y; }\n"; - Out << "static inline int llvm_fcmp_uno(double X, double Y) { "; - Out << "return X != X || Y != Y; }\n"; - Out << "static inline int llvm_fcmp_ueq(double X, double Y) { "; - Out << "return X == Y || llvm_fcmp_uno(X, Y); }\n"; - Out << "static inline int llvm_fcmp_une(double X, double Y) { "; - Out << "return X != Y; }\n"; - Out << "static inline int llvm_fcmp_ult(double X, double Y) { "; - Out << "return X < Y || llvm_fcmp_uno(X, Y); }\n"; - Out << "static inline int llvm_fcmp_ugt(double X, double Y) { "; - Out << "return X > Y || llvm_fcmp_uno(X, Y); }\n"; - Out << "static inline int llvm_fcmp_ule(double X, double Y) { "; - Out << "return X <= Y || llvm_fcmp_uno(X, Y); }\n"; - Out << "static inline int llvm_fcmp_uge(double X, double Y) { "; - Out << "return X >= Y || llvm_fcmp_uno(X, Y); }\n"; - Out << "static inline int llvm_fcmp_oeq(double X, double Y) { "; - Out << "return X == Y ; }\n"; - Out << "static inline int llvm_fcmp_one(double X, double Y) { "; - Out << "return X != Y && llvm_fcmp_ord(X, Y); }\n"; - Out << "static inline int llvm_fcmp_olt(double X, double Y) { "; - Out << "return X < Y ; }\n"; - Out << "static inline int llvm_fcmp_ogt(double X, double Y) { "; - Out << "return X > Y ; }\n"; - Out << "static inline int llvm_fcmp_ole(double X, double Y) { "; - Out << "return X <= Y ; }\n"; - Out << "static inline int llvm_fcmp_oge(double X, double Y) { "; - Out << "return X >= Y ; }\n"; - - // Emit definitions of the intrinsics. - for (SmallVector<const Function*, 8>::const_iterator - I = intrinsicsToDefine.begin(), - E = intrinsicsToDefine.end(); I != E; ++I) { - printIntrinsicDefinition(**I, Out); - } - - return false; -} - - -/// Output all floating point constants that cannot be printed accurately... -void CWriter::printFloatingPointConstants(Function &F) { - // Scan the module for floating point constants. If any FP constant is used - // in the function, we want to redirect it here so that we do not depend on - // the precision of the printed form, unless the printed form preserves - // precision. - // - for (constant_iterator I = constant_begin(&F), E = constant_end(&F); - I != E; ++I) - printFloatingPointConstants(*I); - - Out << '\n'; -} - -void CWriter::printFloatingPointConstants(const Constant *C) { - // If this is a constant expression, recursively check for constant fp values. - if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(C)) { - for (unsigned i = 0, e = CE->getNumOperands(); i != e; ++i) - printFloatingPointConstants(CE->getOperand(i)); - return; - } - - // Otherwise, check for a FP constant that we need to print. - const ConstantFP *FPC = dyn_cast<ConstantFP>(C); - if (FPC == 0 || - // Do not put in FPConstantMap if safe. - isFPCSafeToPrint(FPC) || - // Already printed this constant? - FPConstantMap.count(FPC)) - return; - - FPConstantMap[FPC] = FPCounter; // Number the FP constants - - if (FPC->getType() == Type::getDoubleTy(FPC->getContext())) { - double Val = FPC->getValueAPF().convertToDouble(); - uint64_t i = FPC->getValueAPF().bitcastToAPInt().getZExtValue(); - Out << "static const ConstantDoubleTy FPConstant" << FPCounter++ - << " = 0x" << utohexstr(i) - << "ULL; /* " << Val << " */\n"; - } else if (FPC->getType() == Type::getFloatTy(FPC->getContext())) { - float Val = FPC->getValueAPF().convertToFloat(); - uint32_t i = (uint32_t)FPC->getValueAPF().bitcastToAPInt(). - getZExtValue(); - Out << "static const ConstantFloatTy FPConstant" << FPCounter++ - << " = 0x" << utohexstr(i) - << "U; /* " << Val << " */\n"; - } else if (FPC->getType() == Type::getX86_FP80Ty(FPC->getContext())) { - // api needed to prevent premature destruction - APInt api = FPC->getValueAPF().bitcastToAPInt(); - const uint64_t *p = api.getRawData(); - Out << "static const ConstantFP80Ty FPConstant" << FPCounter++ - << " = { 0x" << utohexstr(p[0]) - << "ULL, 0x" << utohexstr((uint16_t)p[1]) << ",{0,0,0}" - << "}; /* Long double constant */\n"; - } else if (FPC->getType() == Type::getPPC_FP128Ty(FPC->getContext()) || - FPC->getType() == Type::getFP128Ty(FPC->getContext())) { - APInt api = FPC->getValueAPF().bitcastToAPInt(); - const uint64_t *p = api.getRawData(); - Out << "static const ConstantFP128Ty FPConstant" << FPCounter++ - << " = { 0x" - << utohexstr(p[0]) << ", 0x" << utohexstr(p[1]) - << "}; /* Long double constant */\n"; - - } else { - llvm_unreachable("Unknown float type!"); - } -} - - -/// printSymbolTable - Run through symbol table looking for type names. If a -/// type name is found, emit its declaration... -/// -void CWriter::printModuleTypes() { - Out << "/* Helper union for bitcasts */\n"; - Out << "typedef union {\n"; - Out << " unsigned int Int32;\n"; - Out << " unsigned long long Int64;\n"; - Out << " float Float;\n"; - Out << " double Double;\n"; - Out << "} llvmBitCastUnion;\n"; - - // Get all of the struct types used in the module. - std::vector<StructType*> StructTypes; - TheModule->findUsedStructTypes(StructTypes); - - if (StructTypes.empty()) return; - - Out << "/* Structure forward decls */\n"; - - unsigned NextTypeID = 0; - - // If any of them are missing names, add a unique ID to UnnamedStructIDs. - // Print out forward declarations for structure types. - for (unsigned i = 0, e = StructTypes.size(); i != e; ++i) { - StructType *ST = StructTypes[i]; - - if (ST->isLiteral() || ST->getName().empty()) - UnnamedStructIDs[ST] = NextTypeID++; - - std::string Name = getStructName(ST); - - Out << "typedef struct " << Name << ' ' << Name << ";\n"; - } - - Out << '\n'; - - // Keep track of which structures have been printed so far. - SmallPtrSet<Type *, 16> StructPrinted; - - // Loop over all structures then push them into the stack so they are - // printed in the correct order. - // - Out << "/* Structure contents */\n"; - for (unsigned i = 0, e = StructTypes.size(); i != e; ++i) - if (StructTypes[i]->isStructTy()) - // Only print out used types! - printContainedStructs(StructTypes[i], StructPrinted); -} - -// Push the struct onto the stack and recursively push all structs -// this one depends on. -// -// TODO: Make this work properly with vector types -// -void CWriter::printContainedStructs(Type *Ty, - SmallPtrSet<Type *, 16> &StructPrinted) { - // Don't walk through pointers. - if (Ty->isPointerTy() || Ty->isPrimitiveType() || Ty->isIntegerTy()) - return; - - // Print all contained types first. - for (Type::subtype_iterator I = Ty->subtype_begin(), - E = Ty->subtype_end(); I != E; ++I) - printContainedStructs(*I, StructPrinted); - - if (StructType *ST = dyn_cast<StructType>(Ty)) { - // Check to see if we have already printed this struct. - if (!StructPrinted.insert(Ty)) return; - - // Print structure type out. - printType(Out, ST, false, getStructName(ST), true); - Out << ";\n\n"; - } -} - -void CWriter::printFunctionSignature(const Function *F, bool Prototype) { - /// isStructReturn - Should this function actually return a struct by-value? - bool isStructReturn = F->hasStructRetAttr(); - - if (F->hasLocalLinkage()) Out << "static "; - if (F->hasDLLImportLinkage()) Out << "__declspec(dllimport) "; - if (F->hasDLLExportLinkage()) Out << "__declspec(dllexport) "; - switch (F->getCallingConv()) { - case CallingConv::X86_StdCall: - Out << "__attribute__((stdcall)) "; - break; - case CallingConv::X86_FastCall: - Out << "__attribute__((fastcall)) "; - break; - case CallingConv::X86_ThisCall: - Out << "__attribute__((thiscall)) "; - break; - default: - break; - } - - // Loop over the arguments, printing them... - FunctionType *FT = cast<FunctionType>(F->getFunctionType()); - const AttrListPtr &PAL = F->getAttributes(); - - std::string tstr; - raw_string_ostream FunctionInnards(tstr); - - // Print out the name... - FunctionInnards << GetValueName(F) << '('; - - bool PrintedArg = false; - if (!F->isDeclaration()) { - if (!F->arg_empty()) { - Function::const_arg_iterator I = F->arg_begin(), E = F->arg_end(); - unsigned Idx = 1; - - // If this is a struct-return function, don't print the hidden - // struct-return argument. - if (isStructReturn) { - assert(I != E && "Invalid struct return function!"); - ++I; - ++Idx; - } - - std::string ArgName; - for (; I != E; ++I) { - if (PrintedArg) FunctionInnards << ", "; - if (I->hasName() || !Prototype) - ArgName = GetValueName(I); - else - ArgName = ""; - Type *ArgTy = I->getType(); - if (PAL.paramHasAttr(Idx, Attribute::ByVal)) { - ArgTy = cast<PointerType>(ArgTy)->getElementType(); - ByValParams.insert(I); - } - printType(FunctionInnards, ArgTy, - /*isSigned=*/PAL.paramHasAttr(Idx, Attribute::SExt), - ArgName); - PrintedArg = true; - ++Idx; - } - } - } else { - // Loop over the arguments, printing them. - FunctionType::param_iterator I = FT->param_begin(), E = FT->param_end(); - unsigned Idx = 1; - - // If this is a struct-return function, don't print the hidden - // struct-return argument. - if (isStructReturn) { - assert(I != E && "Invalid struct return function!"); - ++I; - ++Idx; - } - - for (; I != E; ++I) { - if (PrintedArg) FunctionInnards << ", "; - Type *ArgTy = *I; - if (PAL.paramHasAttr(Idx, Attribute::ByVal)) { - assert(ArgTy->isPointerTy()); - ArgTy = cast<PointerType>(ArgTy)->getElementType(); - } - printType(FunctionInnards, ArgTy, - /*isSigned=*/PAL.paramHasAttr(Idx, Attribute::SExt)); - PrintedArg = true; - ++Idx; - } - } - - if (!PrintedArg && FT->isVarArg()) { - FunctionInnards << "int vararg_dummy_arg"; - PrintedArg = true; - } - - // Finish printing arguments... if this is a vararg function, print the ..., - // unless there are no known types, in which case, we just emit (). - // - if (FT->isVarArg() && PrintedArg) { - FunctionInnards << ",..."; // Output varargs portion of signature! - } else if (!FT->isVarArg() && !PrintedArg) { - FunctionInnards << "void"; // ret() -> ret(void) in C. - } - FunctionInnards << ')'; - - // Get the return tpe for the function. - Type *RetTy; - if (!isStructReturn) - RetTy = F->getReturnType(); - else { - // If this is a struct-return function, print the struct-return type. - RetTy = cast<PointerType>(FT->getParamType(0))->getElementType(); - } - - // Print out the return type and the signature built above. - printType(Out, RetTy, - /*isSigned=*/PAL.paramHasAttr(0, Attribute::SExt), - FunctionInnards.str()); -} - -static inline bool isFPIntBitCast(const Instruction &I) { - if (!isa<BitCastInst>(I)) - return false; - Type *SrcTy = I.getOperand(0)->getType(); - Type *DstTy = I.getType(); - return (SrcTy->isFloatingPointTy() && DstTy->isIntegerTy()) || - (DstTy->isFloatingPointTy() && SrcTy->isIntegerTy()); -} - -void CWriter::printFunction(Function &F) { - /// isStructReturn - Should this function actually return a struct by-value? - bool isStructReturn = F.hasStructRetAttr(); - - printFunctionSignature(&F, false); - Out << " {\n"; - - // If this is a struct return function, handle the result with magic. - if (isStructReturn) { - Type *StructTy = - cast<PointerType>(F.arg_begin()->getType())->getElementType(); - Out << " "; - printType(Out, StructTy, false, "StructReturn"); - Out << "; /* Struct return temporary */\n"; - - Out << " "; - printType(Out, F.arg_begin()->getType(), false, - GetValueName(F.arg_begin())); - Out << " = &StructReturn;\n"; - } - - bool PrintedVar = false; - - // print local variable information for the function - for (inst_iterator I = inst_begin(&F), E = inst_end(&F); I != E; ++I) { - if (const AllocaInst *AI = isDirectAlloca(&*I)) { - Out << " "; - printType(Out, AI->getAllocatedType(), false, GetValueName(AI)); - Out << "; /* Address-exposed local */\n"; - PrintedVar = true; - } else if (I->getType() != Type::getVoidTy(F.getContext()) && - !isInlinableInst(*I)) { - Out << " "; - printType(Out, I->getType(), false, GetValueName(&*I)); - Out << ";\n"; - - if (isa<PHINode>(*I)) { // Print out PHI node temporaries as well... - Out << " "; - printType(Out, I->getType(), false, - GetValueName(&*I)+"__PHI_TEMPORARY"); - Out << ";\n"; - } - PrintedVar = true; - } - // We need a temporary for the BitCast to use so it can pluck a value out - // of a union to do the BitCast. This is separate from the need for a - // variable to hold the result of the BitCast. - if (isFPIntBitCast(*I)) { - Out << " llvmBitCastUnion " << GetValueName(&*I) - << "__BITCAST_TEMPORARY;\n"; - PrintedVar = true; - } - } - - if (PrintedVar) - Out << '\n'; - - if (F.hasExternalLinkage() && F.getName() == "main") - Out << " CODE_FOR_MAIN();\n"; - - // print the basic blocks - for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) { - if (Loop *L = LI->getLoopFor(BB)) { - if (L->getHeader() == BB && L->getParentLoop() == 0) - printLoop(L); - } else { - printBasicBlock(BB); - } - } - - Out << "}\n\n"; -} - -void CWriter::printLoop(Loop *L) { - Out << " do { /* Syntactic loop '" << L->getHeader()->getName() - << "' to make GCC happy */\n"; - for (unsigned i = 0, e = L->getBlocks().size(); i != e; ++i) { - BasicBlock *BB = L->getBlocks()[i]; - Loop *BBLoop = LI->getLoopFor(BB); - if (BBLoop == L) - printBasicBlock(BB); - else if (BB == BBLoop->getHeader() && BBLoop->getParentLoop() == L) - printLoop(BBLoop); - } - Out << " } while (1); /* end of syntactic loop '" - << L->getHeader()->getName() << "' */\n"; -} - -void CWriter::printBasicBlock(BasicBlock *BB) { - - // Don't print the label for the basic block if there are no uses, or if - // the only terminator use is the predecessor basic block's terminator. - // We have to scan the use list because PHI nodes use basic blocks too but - // do not require a label to be generated. - // - bool NeedsLabel = false; - for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) - if (isGotoCodeNecessary(*PI, BB)) { - NeedsLabel = true; - break; - } - - if (NeedsLabel) Out << GetValueName(BB) << ":\n"; - - // Output all of the instructions in the basic block... - for (BasicBlock::iterator II = BB->begin(), E = --BB->end(); II != E; - ++II) { - if (!isInlinableInst(*II) && !isDirectAlloca(II)) { - if (II->getType() != Type::getVoidTy(BB->getContext()) && - !isInlineAsm(*II)) - outputLValue(II); - else - Out << " "; - writeInstComputationInline(*II); - Out << ";\n"; - } - } - - // Don't emit prefix or suffix for the terminator. - visit(*BB->getTerminator()); -} - - -// Specific Instruction type classes... note that all of the casts are -// necessary because we use the instruction classes as opaque types... -// -void CWriter::visitReturnInst(ReturnInst &I) { - // If this is a struct return function, return the temporary struct. - bool isStructReturn = I.getParent()->getParent()->hasStructRetAttr(); - - if (isStructReturn) { - Out << " return StructReturn;\n"; - return; - } - - // Don't output a void return if this is the last basic block in the function - if (I.getNumOperands() == 0 && - &*--I.getParent()->getParent()->end() == I.getParent() && - !I.getParent()->size() == 1) { - return; - } - - Out << " return"; - if (I.getNumOperands()) { - Out << ' '; - writeOperand(I.getOperand(0)); - } - Out << ";\n"; -} - -void CWriter::visitSwitchInst(SwitchInst &SI) { - - Value* Cond = SI.getCondition(); - - Out << " switch ("; - writeOperand(Cond); - Out << ") {\n default:\n"; - printPHICopiesForSuccessor (SI.getParent(), SI.getDefaultDest(), 2); - printBranchToBlock(SI.getParent(), SI.getDefaultDest(), 2); - Out << ";\n"; - - // Skip the first item since that's the default case. - for (SwitchInst::CaseIt i = SI.case_begin(), e = SI.case_end(); i != e; ++i) { - ConstantInt* CaseVal = i.getCaseValue(); - BasicBlock* Succ = i.getCaseSuccessor(); - Out << " case "; - writeOperand(CaseVal); - Out << ":\n"; - printPHICopiesForSuccessor (SI.getParent(), Succ, 2); - printBranchToBlock(SI.getParent(), Succ, 2); - if (Function::iterator(Succ) == - llvm::next(Function::iterator(SI.getParent()))) - Out << " break;\n"; - } - - Out << " }\n"; -} - -void CWriter::visitIndirectBrInst(IndirectBrInst &IBI) { - Out << " goto *(void*)("; - writeOperand(IBI.getOperand(0)); - Out << ");\n"; -} - -void CWriter::visitUnreachableInst(UnreachableInst &I) { - Out << " /*UNREACHABLE*/;\n"; -} - -bool CWriter::isGotoCodeNecessary(BasicBlock *From, BasicBlock *To) { - /// FIXME: This should be reenabled, but loop reordering safe!! - return true; - - if (llvm::next(Function::iterator(From)) != Function::iterator(To)) - return true; // Not the direct successor, we need a goto. - - //isa<SwitchInst>(From->getTerminator()) - - if (LI->getLoopFor(From) != LI->getLoopFor(To)) - return true; - return false; -} - -void CWriter::printPHICopiesForSuccessor (BasicBlock *CurBlock, - BasicBlock *Successor, - unsigned Indent) { - for (BasicBlock::iterator I = Successor->begin(); isa<PHINode>(I); ++I) { - PHINode *PN = cast<PHINode>(I); - // Now we have to do the printing. - Value *IV = PN->getIncomingValueForBlock(CurBlock); - if (!isa<UndefValue>(IV)) { - Out << std::string(Indent, ' '); - Out << " " << GetValueName(I) << "__PHI_TEMPORARY = "; - writeOperand(IV); - Out << "; /* for PHI node */\n"; - } - } -} - -void CWriter::printBranchToBlock(BasicBlock *CurBB, BasicBlock *Succ, - unsigned Indent) { - if (isGotoCodeNecessary(CurBB, Succ)) { - Out << std::string(Indent, ' ') << " goto "; - writeOperand(Succ); - Out << ";\n"; - } -} - -// Branch instruction printing - Avoid printing out a branch to a basic block -// that immediately succeeds the current one. -// -void CWriter::visitBranchInst(BranchInst &I) { - - if (I.isConditional()) { - if (isGotoCodeNecessary(I.getParent(), I.getSuccessor(0))) { - Out << " if ("; - writeOperand(I.getCondition()); - Out << ") {\n"; - - printPHICopiesForSuccessor (I.getParent(), I.getSuccessor(0), 2); - printBranchToBlock(I.getParent(), I.getSuccessor(0), 2); - - if (isGotoCodeNecessary(I.getParent(), I.getSuccessor(1))) { - Out << " } else {\n"; - printPHICopiesForSuccessor (I.getParent(), I.getSuccessor(1), 2); - printBranchToBlock(I.getParent(), I.getSuccessor(1), 2); - } - } else { - // First goto not necessary, assume second one is... - Out << " if (!"; - writeOperand(I.getCondition()); - Out << ") {\n"; - - printPHICopiesForSuccessor (I.getParent(), I.getSuccessor(1), 2); - printBranchToBlock(I.getParent(), I.getSuccessor(1), 2); - } - - Out << " }\n"; - } else { - printPHICopiesForSuccessor (I.getParent(), I.getSuccessor(0), 0); - printBranchToBlock(I.getParent(), I.getSuccessor(0), 0); - } - Out << "\n"; -} - -// PHI nodes get copied into temporary values at the end of predecessor basic -// blocks. We now need to copy these temporary values into the REAL value for -// the PHI. -void CWriter::visitPHINode(PHINode &I) { - writeOperand(&I); - Out << "__PHI_TEMPORARY"; -} - - -void CWriter::visitBinaryOperator(Instruction &I) { - // binary instructions, shift instructions, setCond instructions. - assert(!I.getType()->isPointerTy()); - - // We must cast the results of binary operations which might be promoted. - bool needsCast = false; - if ((I.getType() == Type::getInt8Ty(I.getContext())) || - (I.getType() == Type::getInt16Ty(I.getContext())) - || (I.getType() == Type::getFloatTy(I.getContext()))) { - needsCast = true; - Out << "(("; - printType(Out, I.getType(), false); - Out << ")("; - } - - // If this is a negation operation, print it out as such. For FP, we don't - // want to print "-0.0 - X". - if (BinaryOperator::isNeg(&I)) { - Out << "-("; - writeOperand(BinaryOperator::getNegArgument(cast<BinaryOperator>(&I))); - Out << ")"; - } else if (BinaryOperator::isFNeg(&I)) { - Out << "-("; - writeOperand(BinaryOperator::getFNegArgument(cast<BinaryOperator>(&I))); - Out << ")"; - } else if (I.getOpcode() == Instruction::FRem) { - // Output a call to fmod/fmodf instead of emitting a%b - if (I.getType() == Type::getFloatTy(I.getContext())) - Out << "fmodf("; - else if (I.getType() == Type::getDoubleTy(I.getContext())) - Out << "fmod("; - else // all 3 flavors of long double - Out << "fmodl("; - writeOperand(I.getOperand(0)); - Out << ", "; - writeOperand(I.getOperand(1)); - Out << ")"; - } else { - - // Write out the cast of the instruction's value back to the proper type - // if necessary. - bool NeedsClosingParens = writeInstructionCast(I); - - // Certain instructions require the operand to be forced to a specific type - // so we use writeOperandWithCast here instead of writeOperand. Similarly - // below for operand 1 - writeOperandWithCast(I.getOperand(0), I.getOpcode()); - - switch (I.getOpcode()) { - case Instruction::Add: - case Instruction::FAdd: Out << " + "; break; - case Instruction::Sub: - case Instruction::FSub: Out << " - "; break; - case Instruction::Mul: - case Instruction::FMul: Out << " * "; break; - case Instruction::URem: - case Instruction::SRem: - case Instruction::FRem: Out << " % "; break; - case Instruction::UDiv: - case Instruction::SDiv: - case Instruction::FDiv: Out << " / "; break; - case Instruction::And: Out << " & "; break; - case Instruction::Or: Out << " | "; break; - case Instruction::Xor: Out << " ^ "; break; - case Instruction::Shl : Out << " << "; break; - case Instruction::LShr: - case Instruction::AShr: Out << " >> "; break; - default: -#ifndef NDEBUG - errs() << "Invalid operator type!" << I; -#endif - llvm_unreachable(0); - } - - writeOperandWithCast(I.getOperand(1), I.getOpcode()); - if (NeedsClosingParens) - Out << "))"; - } - - if (needsCast) { - Out << "))"; - } -} - -void CWriter::visitICmpInst(ICmpInst &I) { - // We must cast the results of icmp which might be promoted. - bool needsCast = false; - - // Write out the cast of the instruction's value back to the proper type - // if necessary. - bool NeedsClosingParens = writeInstructionCast(I); - - // Certain icmp predicate require the operand to be forced to a specific type - // so we use writeOperandWithCast here instead of writeOperand. Similarly - // below for operand 1 - writeOperandWithCast(I.getOperand(0), I); - - switch (I.getPredicate()) { - case ICmpInst::ICMP_EQ: Out << " == "; break; - case ICmpInst::ICMP_NE: Out << " != "; break; - case ICmpInst::ICMP_ULE: - case ICmpInst::ICMP_SLE: Out << " <= "; break; - case ICmpInst::ICMP_UGE: - case ICmpInst::ICMP_SGE: Out << " >= "; break; - case ICmpInst::ICMP_ULT: - case ICmpInst::ICMP_SLT: Out << " < "; break; - case ICmpInst::ICMP_UGT: - case ICmpInst::ICMP_SGT: Out << " > "; break; - default: -#ifndef NDEBUG - errs() << "Invalid icmp predicate!" << I; -#endif - llvm_unreachable(0); - } - - writeOperandWithCast(I.getOperand(1), I); - if (NeedsClosingParens) - Out << "))"; - - if (needsCast) { - Out << "))"; - } -} - -void CWriter::visitFCmpInst(FCmpInst &I) { - if (I.getPredicate() == FCmpInst::FCMP_FALSE) { - Out << "0"; - return; - } - if (I.getPredicate() == FCmpInst::FCMP_TRUE) { - Out << "1"; - return; - } - - const char* op = 0; - switch (I.getPredicate()) { - default: llvm_unreachable("Illegal FCmp predicate"); - case FCmpInst::FCMP_ORD: op = "ord"; break; - case FCmpInst::FCMP_UNO: op = "uno"; break; - case FCmpInst::FCMP_UEQ: op = "ueq"; break; - case FCmpInst::FCMP_UNE: op = "une"; break; - case FCmpInst::FCMP_ULT: op = "ult"; break; - case FCmpInst::FCMP_ULE: op = "ule"; break; - case FCmpInst::FCMP_UGT: op = "ugt"; break; - case FCmpInst::FCMP_UGE: op = "uge"; break; - case FCmpInst::FCMP_OEQ: op = "oeq"; break; - case FCmpInst::FCMP_ONE: op = "one"; break; - case FCmpInst::FCMP_OLT: op = "olt"; break; - case FCmpInst::FCMP_OLE: op = "ole"; break; - case FCmpInst::FCMP_OGT: op = "ogt"; break; - case FCmpInst::FCMP_OGE: op = "oge"; break; - } - - Out << "llvm_fcmp_" << op << "("; - // Write the first operand - writeOperand(I.getOperand(0)); - Out << ", "; - // Write the second operand - writeOperand(I.getOperand(1)); - Out << ")"; -} - -static const char * getFloatBitCastField(Type *Ty) { - switch (Ty->getTypeID()) { - default: llvm_unreachable("Invalid Type"); - case Type::FloatTyID: return "Float"; - case Type::DoubleTyID: return "Double"; - case Type::IntegerTyID: { - unsigned NumBits = cast<IntegerType>(Ty)->getBitWidth(); - if (NumBits <= 32) - return "Int32"; - else - return "Int64"; - } - } -} - -void CWriter::visitCastInst(CastInst &I) { - Type *DstTy = I.getType(); - Type *SrcTy = I.getOperand(0)->getType(); - if (isFPIntBitCast(I)) { - Out << '('; - // These int<->float and long<->double casts need to be handled specially - Out << GetValueName(&I) << "__BITCAST_TEMPORARY." - << getFloatBitCastField(I.getOperand(0)->getType()) << " = "; - writeOperand(I.getOperand(0)); - Out << ", " << GetValueName(&I) << "__BITCAST_TEMPORARY." - << getFloatBitCastField(I.getType()); - Out << ')'; - return; - } - - Out << '('; - printCast(I.getOpcode(), SrcTy, DstTy); - - // Make a sext from i1 work by subtracting the i1 from 0 (an int). - if (SrcTy == Type::getInt1Ty(I.getContext()) && - I.getOpcode() == Instruction::SExt) - Out << "0-"; - - writeOperand(I.getOperand(0)); - - if (DstTy == Type::getInt1Ty(I.getContext()) && - (I.getOpcode() == Instruction::Trunc || - I.getOpcode() == Instruction::FPToUI || - I.getOpcode() == Instruction::FPToSI || - I.getOpcode() == Instruction::PtrToInt)) { - // Make sure we really get a trunc to bool by anding the operand with 1 - Out << "&1u"; - } - Out << ')'; -} - -void CWriter::visitSelectInst(SelectInst &I) { - Out << "(("; - writeOperand(I.getCondition()); - Out << ") ? ("; - writeOperand(I.getTrueValue()); - Out << ") : ("; - writeOperand(I.getFalseValue()); - Out << "))"; -} - -// Returns the macro name or value of the max or min of an integer type -// (as defined in limits.h). -static void printLimitValue(IntegerType &Ty, bool isSigned, bool isMax, - raw_ostream &Out) { - const char* type; - const char* sprefix = ""; - - unsigned NumBits = Ty.getBitWidth(); - if (NumBits <= 8) { - type = "CHAR"; - sprefix = "S"; - } else if (NumBits <= 16) { - type = "SHRT"; - } else if (NumBits <= 32) { - type = "INT"; - } else if (NumBits <= 64) { - type = "LLONG"; - } else { - llvm_unreachable("Bit widths > 64 not implemented yet"); - } - - if (isSigned) - Out << sprefix << type << (isMax ? "_MAX" : "_MIN"); - else - Out << "U" << type << (isMax ? "_MAX" : "0"); -} - -#ifndef NDEBUG -static bool isSupportedIntegerSize(IntegerType &T) { - return T.getBitWidth() == 8 || T.getBitWidth() == 16 || - T.getBitWidth() == 32 || T.getBitWidth() == 64; -} -#endif - -void CWriter::printIntrinsicDefinition(const Function &F, raw_ostream &Out) { - FunctionType *funT = F.getFunctionType(); - Type *retT = F.getReturnType(); - IntegerType *elemT = cast<IntegerType>(funT->getParamType(1)); - - assert(isSupportedIntegerSize(*elemT) && - "CBackend does not support arbitrary size integers."); - assert(cast<StructType>(retT)->getElementType(0) == elemT && - elemT == funT->getParamType(0) && funT->getNumParams() == 2); - - switch (F.getIntrinsicID()) { - default: - llvm_unreachable("Unsupported Intrinsic."); - case Intrinsic::uadd_with_overflow: - // static inline Rty uadd_ixx(unsigned ixx a, unsigned ixx b) { - // Rty r; - // r.field0 = a + b; - // r.field1 = (r.field0 < a); - // return r; - // } - Out << "static inline "; - printType(Out, retT); - Out << GetValueName(&F); - Out << "("; - printSimpleType(Out, elemT, false); - Out << "a,"; - printSimpleType(Out, elemT, false); - Out << "b) {\n "; - printType(Out, retT); - Out << "r;\n"; - Out << " r.field0 = a + b;\n"; - Out << " r.field1 = (r.field0 < a);\n"; - Out << " return r;\n}\n"; - break; - - case Intrinsic::sadd_with_overflow: - // static inline Rty sadd_ixx(ixx a, ixx b) { - // Rty r; - // r.field1 = (b > 0 && a > XX_MAX - b) || - // (b < 0 && a < XX_MIN - b); - // r.field0 = r.field1 ? 0 : a + b; - // return r; - // } - Out << "static "; - printType(Out, retT); - Out << GetValueName(&F); - Out << "("; - printSimpleType(Out, elemT, true); - Out << "a,"; - printSimpleType(Out, elemT, true); - Out << "b) {\n "; - printType(Out, retT); - Out << "r;\n"; - Out << " r.field1 = (b > 0 && a > "; - printLimitValue(*elemT, true, true, Out); - Out << " - b) || (b < 0 && a < "; - printLimitValue(*elemT, true, false, Out); - Out << " - b);\n"; - Out << " r.field0 = r.field1 ? 0 : a + b;\n"; - Out << " return r;\n}\n"; - break; - } -} - -void CWriter::lowerIntrinsics(Function &F) { - // This is used to keep track of intrinsics that get generated to a lowered - // function. We must generate the prototypes before the function body which - // will only be expanded on first use (by the loop below). - std::vector<Function*> prototypesToGen; - - // Examine all the instructions in this function to find the intrinsics that - // need to be lowered. - for (Function::iterator BB = F.begin(), EE = F.end(); BB != EE; ++BB) - for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ) - if (CallInst *CI = dyn_cast<CallInst>(I++)) - if (Function *F = CI->getCalledFunction()) - switch (F->getIntrinsicID()) { - case Intrinsic::not_intrinsic: - case Intrinsic::vastart: - case Intrinsic::vacopy: - case Intrinsic::vaend: - case Intrinsic::returnaddress: - case Intrinsic::frameaddress: - case Intrinsic::setjmp: - case Intrinsic::longjmp: - case Intrinsic::prefetch: - case Intrinsic::powi: - case Intrinsic::x86_sse_cmp_ss: - case Intrinsic::x86_sse_cmp_ps: - case Intrinsic::x86_sse2_cmp_sd: - case Intrinsic::x86_sse2_cmp_pd: - case Intrinsic::ppc_altivec_lvsl: - case Intrinsic::uadd_with_overflow: - case Intrinsic::sadd_with_overflow: - // We directly implement these intrinsics - break; - default: - // If this is an intrinsic that directly corresponds to a GCC - // builtin, we handle it. - const char *BuiltinName = ""; -#define GET_GCC_BUILTIN_NAME -#include "llvm/Intrinsics.gen" -#undef GET_GCC_BUILTIN_NAME - // If we handle it, don't lower it. - if (BuiltinName[0]) break; - - // All other intrinsic calls we must lower. - Instruction *Before = 0; - if (CI != &BB->front()) - Before = prior(BasicBlock::iterator(CI)); - - IL->LowerIntrinsicCall(CI); - if (Before) { // Move iterator to instruction after call - I = Before; ++I; - } else { - I = BB->begin(); - } - // If the intrinsic got lowered to another call, and that call has - // a definition then we need to make sure its prototype is emitted - // before any calls to it. - if (CallInst *Call = dyn_cast<CallInst>(I)) - if (Function *NewF = Call->getCalledFunction()) - if (!NewF->isDeclaration()) - prototypesToGen.push_back(NewF); - - break; - } - - // We may have collected some prototypes to emit in the loop above. - // Emit them now, before the function that uses them is emitted. But, - // be careful not to emit them twice. - std::vector<Function*>::iterator I = prototypesToGen.begin(); - std::vector<Function*>::iterator E = prototypesToGen.end(); - for ( ; I != E; ++I) { - if (intrinsicPrototypesAlreadyGenerated.insert(*I).second) { - Out << '\n'; - printFunctionSignature(*I, true); - Out << ";\n"; - } - } -} - -void CWriter::visitCallInst(CallInst &I) { - if (isa<InlineAsm>(I.getCalledValue())) - return visitInlineAsm(I); - - bool WroteCallee = false; - - // Handle intrinsic function calls first... - if (Function *F = I.getCalledFunction()) - if (Intrinsic::ID ID = (Intrinsic::ID)F->getIntrinsicID()) - if (visitBuiltinCall(I, ID, WroteCallee)) - return; - - Value *Callee = I.getCalledValue(); - - PointerType *PTy = cast<PointerType>(Callee->getType()); - FunctionType *FTy = cast<FunctionType>(PTy->getElementType()); - - // If this is a call to a struct-return function, assign to the first - // parameter instead of passing it to the call. - const AttrListPtr &PAL = I.getAttributes(); - bool hasByVal = I.hasByValArgument(); - bool isStructRet = I.hasStructRetAttr(); - if (isStructRet) { - writeOperandDeref(I.getArgOperand(0)); - Out << " = "; - } - - if (I.isTailCall()) Out << " /*tail*/ "; - - if (!WroteCallee) { - // If this is an indirect call to a struct return function, we need to cast - // the pointer. Ditto for indirect calls with byval arguments. - bool NeedsCast = (hasByVal || isStructRet) && !isa<Function>(Callee); - - // GCC is a real PITA. It does not permit codegening casts of functions to - // function pointers if they are in a call (it generates a trap instruction - // instead!). We work around this by inserting a cast to void* in between - // the function and the function pointer cast. Unfortunately, we can't just - // form the constant expression here, because the folder will immediately - // nuke it. - // - // Note finally, that this is completely unsafe. ANSI C does not guarantee - // that void* and function pointers have the same size. :( To deal with this - // in the common case, we handle casts where the number of arguments passed - // match exactly. - // - if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Callee)) - if (CE->isCast()) - if (Function *RF = dyn_cast<Function>(CE->getOperand(0))) { - NeedsCast = true; - Callee = RF; - } - - if (NeedsCast) { - // Ok, just cast the pointer type. - Out << "(("; - if (isStructRet) - printStructReturnPointerFunctionType(Out, PAL, - cast<PointerType>(I.getCalledValue()->getType())); - else if (hasByVal) - printType(Out, I.getCalledValue()->getType(), false, "", true, PAL); - else - printType(Out, I.getCalledValue()->getType()); - Out << ")(void*)"; - } - writeOperand(Callee); - if (NeedsCast) Out << ')'; - } - - Out << '('; - - bool PrintedArg = false; - if(FTy->isVarArg() && !FTy->getNumParams()) { - Out << "0 /*dummy arg*/"; - PrintedArg = true; - } - - unsigned NumDeclaredParams = FTy->getNumParams(); - CallSite CS(&I); - CallSite::arg_iterator AI = CS.arg_begin(), AE = CS.arg_end(); - unsigned ArgNo = 0; - if (isStructRet) { // Skip struct return argument. - ++AI; - ++ArgNo; - } - - - for (; AI != AE; ++AI, ++ArgNo) { - if (PrintedArg) Out << ", "; - if (ArgNo < NumDeclaredParams && - (*AI)->getType() != FTy->getParamType(ArgNo)) { - Out << '('; - printType(Out, FTy->getParamType(ArgNo), - /*isSigned=*/PAL.paramHasAttr(ArgNo+1, Attribute::SExt)); - Out << ')'; - } - // Check if the argument is expected to be passed by value. - if (I.paramHasAttr(ArgNo+1, Attribute::ByVal)) - writeOperandDeref(*AI); - else - writeOperand(*AI); - PrintedArg = true; - } - Out << ')'; -} - -/// visitBuiltinCall - Handle the call to the specified builtin. Returns true -/// if the entire call is handled, return false if it wasn't handled, and -/// optionally set 'WroteCallee' if the callee has already been printed out. -bool CWriter::visitBuiltinCall(CallInst &I, Intrinsic::ID ID, - bool &WroteCallee) { - switch (ID) { - default: { - // If this is an intrinsic that directly corresponds to a GCC - // builtin, we emit it here. - const char *BuiltinName = ""; - Function *F = I.getCalledFunction(); -#define GET_GCC_BUILTIN_NAME -#include "llvm/Intrinsics.gen" -#undef GET_GCC_BUILTIN_NAME - assert(BuiltinName[0] && "Unknown LLVM intrinsic!"); - - Out << BuiltinName; - WroteCallee = true; - return false; - } - case Intrinsic::vastart: - Out << "0; "; - - Out << "va_start(*(va_list*)"; - writeOperand(I.getArgOperand(0)); - Out << ", "; - // Output the last argument to the enclosing function. - if (I.getParent()->getParent()->arg_empty()) - Out << "vararg_dummy_arg"; - else - writeOperand(--I.getParent()->getParent()->arg_end()); - Out << ')'; - return true; - case Intrinsic::vaend: - if (!isa<ConstantPointerNull>(I.getArgOperand(0))) { - Out << "0; va_end(*(va_list*)"; - writeOperand(I.getArgOperand(0)); - Out << ')'; - } else { - Out << "va_end(*(va_list*)0)"; - } - return true; - case Intrinsic::vacopy: - Out << "0; "; - Out << "va_copy(*(va_list*)"; - writeOperand(I.getArgOperand(0)); - Out << ", *(va_list*)"; - writeOperand(I.getArgOperand(1)); - Out << ')'; - return true; - case Intrinsic::returnaddress: - Out << "__builtin_return_address("; - writeOperand(I.getArgOperand(0)); - Out << ')'; - return true; - case Intrinsic::frameaddress: - Out << "__builtin_frame_address("; - writeOperand(I.getArgOperand(0)); - Out << ')'; - return true; - case Intrinsic::powi: - Out << "__builtin_powi("; - writeOperand(I.getArgOperand(0)); - Out << ", "; - writeOperand(I.getArgOperand(1)); - Out << ')'; - return true; - case Intrinsic::setjmp: - Out << "setjmp(*(jmp_buf*)"; - writeOperand(I.getArgOperand(0)); - Out << ')'; - return true; - case Intrinsic::longjmp: - Out << "longjmp(*(jmp_buf*)"; - writeOperand(I.getArgOperand(0)); - Out << ", "; - writeOperand(I.getArgOperand(1)); - Out << ')'; - return true; - case Intrinsic::prefetch: - Out << "LLVM_PREFETCH((const void *)"; - writeOperand(I.getArgOperand(0)); - Out << ", "; - writeOperand(I.getArgOperand(1)); - Out << ", "; - writeOperand(I.getArgOperand(2)); - Out << ")"; - return true; - case Intrinsic::stacksave: - // Emit this as: Val = 0; *((void**)&Val) = __builtin_stack_save() - // to work around GCC bugs (see PR1809). - Out << "0; *((void**)&" << GetValueName(&I) - << ") = __builtin_stack_save()"; - return true; - case Intrinsic::x86_sse_cmp_ss: - case Intrinsic::x86_sse_cmp_ps: - case Intrinsic::x86_sse2_cmp_sd: - case Intrinsic::x86_sse2_cmp_pd: - Out << '('; - printType(Out, I.getType()); - Out << ')'; - // Multiple GCC builtins multiplex onto this intrinsic. - switch (cast<ConstantInt>(I.getArgOperand(2))->getZExtValue()) { - default: llvm_unreachable("Invalid llvm.x86.sse.cmp!"); - case 0: Out << "__builtin_ia32_cmpeq"; break; - case 1: Out << "__builtin_ia32_cmplt"; break; - case 2: Out << "__builtin_ia32_cmple"; break; - case 3: Out << "__builtin_ia32_cmpunord"; break; - case 4: Out << "__builtin_ia32_cmpneq"; break; - case 5: Out << "__builtin_ia32_cmpnlt"; break; - case 6: Out << "__builtin_ia32_cmpnle"; break; - case 7: Out << "__builtin_ia32_cmpord"; break; - } - if (ID == Intrinsic::x86_sse_cmp_ps || ID == Intrinsic::x86_sse2_cmp_pd) - Out << 'p'; - else - Out << 's'; - if (ID == Intrinsic::x86_sse_cmp_ss || ID == Intrinsic::x86_sse_cmp_ps) - Out << 's'; - else - Out << 'd'; - - Out << "("; - writeOperand(I.getArgOperand(0)); - Out << ", "; - writeOperand(I.getArgOperand(1)); - Out << ")"; - return true; - case Intrinsic::ppc_altivec_lvsl: - Out << '('; - printType(Out, I.getType()); - Out << ')'; - Out << "__builtin_altivec_lvsl(0, (void*)"; - writeOperand(I.getArgOperand(0)); - Out << ")"; - return true; - case Intrinsic::uadd_with_overflow: - case Intrinsic::sadd_with_overflow: - Out << GetValueName(I.getCalledFunction()) << "("; - writeOperand(I.getArgOperand(0)); - Out << ", "; - writeOperand(I.getArgOperand(1)); - Out << ")"; - return true; - } -} - -//This converts the llvm constraint string to something gcc is expecting. -//TODO: work out platform independent constraints and factor those out -// of the per target tables -// handle multiple constraint codes -std::string CWriter::InterpretASMConstraint(InlineAsm::ConstraintInfo& c) { - assert(c.Codes.size() == 1 && "Too many asm constraint codes to handle"); - - // Grab the translation table from MCAsmInfo if it exists. - const MCAsmInfo *TargetAsm; - std::string Triple = TheModule->getTargetTriple(); - if (Triple.empty()) - Triple = llvm::sys::getDefaultTargetTriple(); - - std::string E; - if (const Target *Match = TargetRegistry::lookupTarget(Triple, E)) - TargetAsm = Match->createMCAsmInfo(Triple); - else - return c.Codes[0]; - - const char *const *table = TargetAsm->getAsmCBE(); - - // Search the translation table if it exists. - for (int i = 0; table && table[i]; i += 2) - if (c.Codes[0] == table[i]) { - delete TargetAsm; - return table[i+1]; - } - - // Default is identity. - delete TargetAsm; - return c.Codes[0]; -} - -//TODO: import logic from AsmPrinter.cpp -static std::string gccifyAsm(std::string asmstr) { - for (std::string::size_type i = 0; i != asmstr.size(); ++i) - if (asmstr[i] == '\n') - asmstr.replace(i, 1, "\\n"); - else if (asmstr[i] == '\t') - asmstr.replace(i, 1, "\\t"); - else if (asmstr[i] == '$') { - if (asmstr[i + 1] == '{') { - std::string::size_type a = asmstr.find_first_of(':', i + 1); - std::string::size_type b = asmstr.find_first_of('}', i + 1); - std::string n = "%" + - asmstr.substr(a + 1, b - a - 1) + - asmstr.substr(i + 2, a - i - 2); - asmstr.replace(i, b - i + 1, n); - i += n.size() - 1; - } else - asmstr.replace(i, 1, "%"); - } - else if (asmstr[i] == '%')//grr - { asmstr.replace(i, 1, "%%"); ++i;} - - return asmstr; -} - -//TODO: assumptions about what consume arguments from the call are likely wrong -// handle communitivity -void CWriter::visitInlineAsm(CallInst &CI) { - InlineAsm* as = cast<InlineAsm>(CI.getCalledValue()); - InlineAsm::ConstraintInfoVector Constraints = as->ParseConstraints(); - - std::vector<std::pair<Value*, int> > ResultVals; - if (CI.getType() == Type::getVoidTy(CI.getContext())) - ; - else if (StructType *ST = dyn_cast<StructType>(CI.getType())) { - for (unsigned i = 0, e = ST->getNumElements(); i != e; ++i) - ResultVals.push_back(std::make_pair(&CI, (int)i)); - } else { - ResultVals.push_back(std::make_pair(&CI, -1)); - } - - // Fix up the asm string for gcc and emit it. - Out << "__asm__ volatile (\"" << gccifyAsm(as->getAsmString()) << "\"\n"; - Out << " :"; - - unsigned ValueCount = 0; - bool IsFirst = true; - - // Convert over all the output constraints. - for (InlineAsm::ConstraintInfoVector::iterator I = Constraints.begin(), - E = Constraints.end(); I != E; ++I) { - - if (I->Type != InlineAsm::isOutput) { - ++ValueCount; - continue; // Ignore non-output constraints. - } - - assert(I->Codes.size() == 1 && "Too many asm constraint codes to handle"); - std::string C = InterpretASMConstraint(*I); - if (C.empty()) continue; - - if (!IsFirst) { - Out << ", "; - IsFirst = false; - } - - // Unpack the dest. - Value *DestVal; - int DestValNo = -1; - - if (ValueCount < ResultVals.size()) { - DestVal = ResultVals[ValueCount].first; - DestValNo = ResultVals[ValueCount].second; - } else - DestVal = CI.getArgOperand(ValueCount-ResultVals.size()); - - if (I->isEarlyClobber) - C = "&"+C; - - Out << "\"=" << C << "\"(" << GetValueName(DestVal); - if (DestValNo != -1) - Out << ".field" << DestValNo; // Multiple retvals. - Out << ")"; - ++ValueCount; - } - - - // Convert over all the input constraints. - Out << "\n :"; - IsFirst = true; - ValueCount = 0; - for (InlineAsm::ConstraintInfoVector::iterator I = Constraints.begin(), - E = Constraints.end(); I != E; ++I) { - if (I->Type != InlineAsm::isInput) { - ++ValueCount; - continue; // Ignore non-input constraints. - } - - assert(I->Codes.size() == 1 && "Too many asm constraint codes to handle"); - std::string C = InterpretASMConstraint(*I); - if (C.empty()) continue; - - if (!IsFirst) { - Out << ", "; - IsFirst = false; - } - - assert(ValueCount >= ResultVals.size() && "Input can't refer to result"); - Value *SrcVal = CI.getArgOperand(ValueCount-ResultVals.size()); - - Out << "\"" << C << "\"("; - if (!I->isIndirect) - writeOperand(SrcVal); - else - writeOperandDeref(SrcVal); - Out << ")"; - } - - // Convert over the clobber constraints. - IsFirst = true; - for (InlineAsm::ConstraintInfoVector::iterator I = Constraints.begin(), - E = Constraints.end(); I != E; ++I) { - if (I->Type != InlineAsm::isClobber) - continue; // Ignore non-input constraints. - - assert(I->Codes.size() == 1 && "Too many asm constraint codes to handle"); - std::string C = InterpretASMConstraint(*I); - if (C.empty()) continue; - - if (!IsFirst) { - Out << ", "; - IsFirst = false; - } - - Out << '\"' << C << '"'; - } - - Out << ")"; -} - -void CWriter::visitAllocaInst(AllocaInst &I) { - Out << '('; - printType(Out, I.getType()); - Out << ") alloca(sizeof("; - printType(Out, I.getType()->getElementType()); - Out << ')'; - if (I.isArrayAllocation()) { - Out << " * " ; - writeOperand(I.getOperand(0)); - } - Out << ')'; -} - -void CWriter::printGEPExpression(Value *Ptr, gep_type_iterator I, - gep_type_iterator E, bool Static) { - - // If there are no indices, just print out the pointer. - if (I == E) { - writeOperand(Ptr); - return; - } - - // Find out if the last index is into a vector. If so, we have to print this - // specially. Since vectors can't have elements of indexable type, only the - // last index could possibly be of a vector element. - VectorType *LastIndexIsVector = 0; - { - for (gep_type_iterator TmpI = I; TmpI != E; ++TmpI) - LastIndexIsVector = dyn_cast<VectorType>(*TmpI); - } - - Out << "("; - - // If the last index is into a vector, we can't print it as &a[i][j] because - // we can't index into a vector with j in GCC. Instead, emit this as - // (((float*)&a[i])+j) - if (LastIndexIsVector) { - Out << "(("; - printType(Out, PointerType::getUnqual(LastIndexIsVector->getElementType())); - Out << ")("; - } - - Out << '&'; - - // If the first index is 0 (very typical) we can do a number of - // simplifications to clean up the code. - Value *FirstOp = I.getOperand(); - if (!isa<Constant>(FirstOp) || !cast<Constant>(FirstOp)->isNullValue()) { - // First index isn't simple, print it the hard way. - writeOperand(Ptr); - } else { - ++I; // Skip the zero index. - - // Okay, emit the first operand. If Ptr is something that is already address - // exposed, like a global, avoid emitting (&foo)[0], just emit foo instead. - if (isAddressExposed(Ptr)) { - writeOperandInternal(Ptr, Static); - } else if (I != E && (*I)->isStructTy()) { - // If we didn't already emit the first operand, see if we can print it as - // P->f instead of "P[0].f" - writeOperand(Ptr); - Out << "->field" << cast<ConstantInt>(I.getOperand())->getZExtValue(); - ++I; // eat the struct index as well. - } else { - // Instead of emitting P[0][1], emit (*P)[1], which is more idiomatic. - Out << "(*"; - writeOperand(Ptr); - Out << ")"; - } - } - - for (; I != E; ++I) { - if ((*I)->isStructTy()) { - Out << ".field" << cast<ConstantInt>(I.getOperand())->getZExtValue(); - } else if ((*I)->isArrayTy()) { - Out << ".array["; - writeOperandWithCast(I.getOperand(), Instruction::GetElementPtr); - Out << ']'; - } else if (!(*I)->isVectorTy()) { - Out << '['; - writeOperandWithCast(I.getOperand(), Instruction::GetElementPtr); - Out << ']'; - } else { - // If the last index is into a vector, then print it out as "+j)". This - // works with the 'LastIndexIsVector' code above. - if (isa<Constant>(I.getOperand()) && - cast<Constant>(I.getOperand())->isNullValue()) { - Out << "))"; // avoid "+0". - } else { - Out << ")+("; - writeOperandWithCast(I.getOperand(), Instruction::GetElementPtr); - Out << "))"; - } - } - } - Out << ")"; -} - -void CWriter::writeMemoryAccess(Value *Operand, Type *OperandType, - bool IsVolatile, unsigned Alignment) { - - bool IsUnaligned = Alignment && - Alignment < TD->getABITypeAlignment(OperandType); - - if (!IsUnaligned) - Out << '*'; - if (IsVolatile || IsUnaligned) { - Out << "(("; - if (IsUnaligned) - Out << "struct __attribute__ ((packed, aligned(" << Alignment << "))) {"; - printType(Out, OperandType, false, IsUnaligned ? "data" : "volatile*"); - if (IsUnaligned) { - Out << "; } "; - if (IsVolatile) Out << "volatile "; - Out << "*"; - } - Out << ")"; - } - - writeOperand(Operand); - - if (IsVolatile || IsUnaligned) { - Out << ')'; - if (IsUnaligned) - Out << "->data"; - } -} - -void CWriter::visitLoadInst(LoadInst &I) { - writeMemoryAccess(I.getOperand(0), I.getType(), I.isVolatile(), - I.getAlignment()); - -} - -void CWriter::visitStoreInst(StoreInst &I) { - writeMemoryAccess(I.getPointerOperand(), I.getOperand(0)->getType(), - I.isVolatile(), I.getAlignment()); - Out << " = "; - Value *Operand = I.getOperand(0); - Constant *BitMask = 0; - if (IntegerType* ITy = dyn_cast<IntegerType>(Operand->getType())) - if (!ITy->isPowerOf2ByteWidth()) - // We have a bit width that doesn't match an even power-of-2 byte - // size. Consequently we must & the value with the type's bit mask - BitMask = ConstantInt::get(ITy, ITy->getBitMask()); - if (BitMask) - Out << "(("; - writeOperand(Operand); - if (BitMask) { - Out << ") & "; - printConstant(BitMask, false); - Out << ")"; - } -} - -void CWriter::visitGetElementPtrInst(GetElementPtrInst &I) { - printGEPExpression(I.getPointerOperand(), gep_type_begin(I), - gep_type_end(I), false); -} - -void CWriter::visitVAArgInst(VAArgInst &I) { - Out << "va_arg(*(va_list*)"; - writeOperand(I.getOperand(0)); - Out << ", "; - printType(Out, I.getType()); - Out << ");\n "; -} - -void CWriter::visitInsertElementInst(InsertElementInst &I) { - Type *EltTy = I.getType()->getElementType(); - writeOperand(I.getOperand(0)); - Out << ";\n "; - Out << "(("; - printType(Out, PointerType::getUnqual(EltTy)); - Out << ")(&" << GetValueName(&I) << "))["; - writeOperand(I.getOperand(2)); - Out << "] = ("; - writeOperand(I.getOperand(1)); - Out << ")"; -} - -void CWriter::visitExtractElementInst(ExtractElementInst &I) { - // We know that our operand is not inlined. - Out << "(("; - Type *EltTy = - cast<VectorType>(I.getOperand(0)->getType())->getElementType(); - printType(Out, PointerType::getUnqual(EltTy)); - Out << ")(&" << GetValueName(I.getOperand(0)) << "))["; - writeOperand(I.getOperand(1)); - Out << "]"; -} - -void CWriter::visitShuffleVectorInst(ShuffleVectorInst &SVI) { - Out << "("; - printType(Out, SVI.getType()); - Out << "){ "; - VectorType *VT = SVI.getType(); - unsigned NumElts = VT->getNumElements(); - Type *EltTy = VT->getElementType(); - - for (unsigned i = 0; i != NumElts; ++i) { - if (i) Out << ", "; - int SrcVal = SVI.getMaskValue(i); - if ((unsigned)SrcVal >= NumElts*2) { - Out << " 0/*undef*/ "; - } else { - Value *Op = SVI.getOperand((unsigned)SrcVal >= NumElts); - if (isa<Instruction>(Op)) { - // Do an extractelement of this value from the appropriate input. - Out << "(("; - printType(Out, PointerType::getUnqual(EltTy)); - Out << ")(&" << GetValueName(Op) - << "))[" << (SrcVal & (NumElts-1)) << "]"; - } else if (isa<ConstantAggregateZero>(Op) || isa<UndefValue>(Op)) { - Out << "0"; - } else { - printConstant(cast<ConstantVector>(Op)->getOperand(SrcVal & - (NumElts-1)), - false); - } - } - } - Out << "}"; -} - -void CWriter::visitInsertValueInst(InsertValueInst &IVI) { - // Start by copying the entire aggregate value into the result variable. - writeOperand(IVI.getOperand(0)); - Out << ";\n "; - - // Then do the insert to update the field. - Out << GetValueName(&IVI); - for (const unsigned *b = IVI.idx_begin(), *i = b, *e = IVI.idx_end(); - i != e; ++i) { - Type *IndexedTy = - ExtractValueInst::getIndexedType(IVI.getOperand(0)->getType(), - makeArrayRef(b, i+1)); - if (IndexedTy->isArrayTy()) - Out << ".array[" << *i << "]"; - else - Out << ".field" << *i; - } - Out << " = "; - writeOperand(IVI.getOperand(1)); -} - -void CWriter::visitExtractValueInst(ExtractValueInst &EVI) { - Out << "("; - if (isa<UndefValue>(EVI.getOperand(0))) { - Out << "("; - printType(Out, EVI.getType()); - Out << ") 0/*UNDEF*/"; - } else { - Out << GetValueName(EVI.getOperand(0)); - for (const unsigned *b = EVI.idx_begin(), *i = b, *e = EVI.idx_end(); - i != e; ++i) { - Type *IndexedTy = - ExtractValueInst::getIndexedType(EVI.getOperand(0)->getType(), - makeArrayRef(b, i+1)); - if (IndexedTy->isArrayTy()) - Out << ".array[" << *i << "]"; - else - Out << ".field" << *i; - } - } - Out << ")"; -} - -//===----------------------------------------------------------------------===// -// External Interface declaration -//===----------------------------------------------------------------------===// - -bool CTargetMachine::addPassesToEmitFile(PassManagerBase &PM, - formatted_raw_ostream &o, - CodeGenFileType FileType, - bool DisableVerify) { - if (FileType != TargetMachine::CGFT_AssemblyFile) return true; - - PM.add(createGCLoweringPass()); - PM.add(createLowerInvokePass()); - PM.add(createCFGSimplificationPass()); // clean up after lower invoke. - PM.add(new CWriter(o)); - PM.add(createGCInfoDeleter()); - return false; -} diff --git a/lib/Target/CBackend/CMakeLists.txt b/lib/Target/CBackend/CMakeLists.txt deleted file mode 100644 index fa819a4..0000000 --- a/lib/Target/CBackend/CMakeLists.txt +++ /dev/null @@ -1,5 +0,0 @@ -add_llvm_target(CBackendCodeGen - CBackend.cpp - ) - -add_subdirectory(TargetInfo) diff --git a/lib/Target/CBackend/CTargetMachine.h b/lib/Target/CBackend/CTargetMachine.h deleted file mode 100644 index 8b2286e..0000000 --- a/lib/Target/CBackend/CTargetMachine.h +++ /dev/null @@ -1,42 +0,0 @@ -//===-- CTargetMachine.h - TargetMachine for the C backend ------*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file declares the TargetMachine that is used by the C backend. -// -//===----------------------------------------------------------------------===// - -#ifndef CTARGETMACHINE_H -#define CTARGETMACHINE_H - -#include "llvm/Target/TargetMachine.h" -#include "llvm/Target/TargetData.h" - -namespace llvm { - -struct CTargetMachine : public TargetMachine { - CTargetMachine(const Target &T, StringRef TT, - StringRef CPU, StringRef FS, const TargetOptions &Options, - Reloc::Model RM, CodeModel::Model CM, - CodeGenOpt::Level OL) - : TargetMachine(T, TT, CPU, FS, Options) { } - - virtual bool addPassesToEmitFile(PassManagerBase &PM, - formatted_raw_ostream &Out, - CodeGenFileType FileType, - bool DisableVerify); - - virtual const TargetData *getTargetData() const { return 0; } -}; - -extern Target TheCBackendTarget; - -} // End llvm namespace - - -#endif diff --git a/lib/Target/CBackend/TargetInfo/CBackendTargetInfo.cpp b/lib/Target/CBackend/TargetInfo/CBackendTargetInfo.cpp deleted file mode 100644 index e8274ff..0000000 --- a/lib/Target/CBackend/TargetInfo/CBackendTargetInfo.cpp +++ /dev/null @@ -1,21 +0,0 @@ -//===-- CBackendTargetInfo.cpp - CBackend Target Implementation -----------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// - -#include "CTargetMachine.h" -#include "llvm/Module.h" -#include "llvm/Support/TargetRegistry.h" -using namespace llvm; - -Target llvm::TheCBackendTarget; - -extern "C" void LLVMInitializeCBackendTargetInfo() { - RegisterTarget<> X(TheCBackendTarget, "c", "C backend"); -} - -extern "C" void LLVMInitializeCBackendTargetMC() {} diff --git a/lib/Target/CBackend/TargetInfo/CMakeLists.txt b/lib/Target/CBackend/TargetInfo/CMakeLists.txt deleted file mode 100644 index 6203616..0000000 --- a/lib/Target/CBackend/TargetInfo/CMakeLists.txt +++ /dev/null @@ -1,5 +0,0 @@ -include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. ) - -add_llvm_library(LLVMCBackendInfo - CBackendTargetInfo.cpp - ) diff --git a/lib/Target/CMakeLists.txt b/lib/Target/CMakeLists.txt index d8bc743..5913a9c 100644 --- a/lib/Target/CMakeLists.txt +++ b/lib/Target/CMakeLists.txt @@ -9,6 +9,7 @@ add_llvm_library(LLVMTarget TargetLibraryInfo.cpp TargetLoweringObjectFile.cpp TargetMachine.cpp + TargetMachineC.cpp TargetRegisterInfo.cpp TargetSubtargetInfo.cpp ) diff --git a/lib/Target/CellSPU/MCTargetDesc/SPUMCTargetDesc.h b/lib/Target/CellSPU/MCTargetDesc/SPUMCTargetDesc.h index a3717b0..d26449e 100644 --- a/lib/Target/CellSPU/MCTargetDesc/SPUMCTargetDesc.h +++ b/lib/Target/CellSPU/MCTargetDesc/SPUMCTargetDesc.h @@ -15,9 +15,7 @@ #define SPUMCTARGETDESC_H namespace llvm { -class MCSubtargetInfo; class Target; -class StringRef; extern Target TheCellSPUTarget; diff --git a/lib/Target/CellSPU/SPUCallingConv.td b/lib/Target/CellSPU/SPUCallingConv.td index 9f9692b..9bc6be7 100644 --- a/lib/Target/CellSPU/SPUCallingConv.td +++ b/lib/Target/CellSPU/SPUCallingConv.td @@ -11,10 +11,6 @@ // //===----------------------------------------------------------------------===// -/// CCIfSubtarget - Match if the current subtarget has a feature F. -class CCIfSubtarget<string F, CCAction A> - : CCIf<!strconcat("State.getTarget().getSubtarget<PPCSubtarget>().", F), A>; - //===----------------------------------------------------------------------===// // Return Value Calling Convention //===----------------------------------------------------------------------===// diff --git a/lib/Target/CellSPU/SPUISelLowering.cpp b/lib/Target/CellSPU/SPUISelLowering.cpp index 55b3f72..0623741 100644 --- a/lib/Target/CellSPU/SPUISelLowering.cpp +++ b/lib/Target/CellSPU/SPUISelLowering.cpp @@ -3158,7 +3158,6 @@ SPUTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint, //! Compute used/known bits for a SPU operand void SPUTargetLowering::computeMaskedBitsForTargetNode(const SDValue Op, - const APInt &Mask, APInt &KnownZero, APInt &KnownOne, const SelectionDAG &DAG, @@ -3224,7 +3223,7 @@ bool SPUTargetLowering::isLegalAddressImmediate(int64_t V, return (V > -(1 << 18) && V < (1 << 18) - 1); } -bool SPUTargetLowering::isLegalAddressImmediate(llvm::GlobalValue* GV) const { +bool SPUTargetLowering::isLegalAddressImmediate(GlobalValue* GV) const { return false; } diff --git a/lib/Target/CellSPU/SPUISelLowering.h b/lib/Target/CellSPU/SPUISelLowering.h index 25c5355..e3db7b2 100644 --- a/lib/Target/CellSPU/SPUISelLowering.h +++ b/lib/Target/CellSPU/SPUISelLowering.h @@ -121,7 +121,6 @@ namespace llvm { virtual SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const; virtual void computeMaskedBitsForTargetNode(const SDValue Op, - const APInt &Mask, APInt &KnownZero, APInt &KnownOne, const SelectionDAG &DAG, diff --git a/lib/Target/CellSPU/SPUSubtarget.cpp b/lib/Target/CellSPU/SPUSubtarget.cpp index ebfefe2..eec2d25 100644 --- a/lib/Target/CellSPU/SPUSubtarget.cpp +++ b/lib/Target/CellSPU/SPUSubtarget.cpp @@ -15,7 +15,6 @@ #include "SPU.h" #include "SPURegisterInfo.h" #include "llvm/Support/TargetRegistry.h" -#include "llvm/ADT/SmallVector.h" #define GET_SUBTARGETINFO_TARGET_DESC #define GET_SUBTARGETINFO_CTOR diff --git a/lib/Target/CppBackend/CPPBackend.cpp b/lib/Target/CppBackend/CPPBackend.cpp index 107c6cc..69f0ff8 100644 --- a/lib/Target/CppBackend/CPPBackend.cpp +++ b/lib/Target/CppBackend/CPPBackend.cpp @@ -33,8 +33,9 @@ #include "llvm/ADT/StringExtras.h" #include "llvm/Config/config.h" #include <algorithm> -#include <set> +#include <cstdio> #include <map> +#include <set> using namespace llvm; static cl::opt<std::string> @@ -195,6 +196,18 @@ void CppWriter::error(const std::string& msg) { report_fatal_error(msg); } +static inline std::string ftostr(const APFloat& V) { + std::string Buf; + if (&V.getSemantics() == &APFloat::IEEEdouble) { + raw_string_ostream(Buf) << V.convertToDouble(); + return Buf; + } else if (&V.getSemantics() == &APFloat::IEEEsingle) { + raw_string_ostream(Buf) << (double)V.convertToFloat(); + return Buf; + } + return "<unknown format in ftostr>"; // error +} + // printCFP - Print a floating point constant .. very carefully :) // This makes sure that conversion to/from floating yields the same binary // result so that we don't lose precision. diff --git a/lib/Target/Hexagon/CMakeLists.txt b/lib/Target/Hexagon/CMakeLists.txt index 6c5da72..af9e813 100644 --- a/lib/Target/Hexagon/CMakeLists.txt +++ b/lib/Target/Hexagon/CMakeLists.txt @@ -16,6 +16,7 @@ add_llvm_target(HexagonCodeGen HexagonExpandPredSpillCode.cpp HexagonFrameLowering.cpp HexagonHardwareLoops.cpp + HexagonMCInstLower.cpp HexagonInstrInfo.cpp HexagonISelDAGToDAG.cpp HexagonISelLowering.cpp @@ -27,8 +28,9 @@ add_llvm_target(HexagonCodeGen HexagonSubtarget.cpp HexagonTargetMachine.cpp HexagonTargetObjectFile.cpp - ) +) add_subdirectory(TargetInfo) +add_subdirectory(InstPrinter) add_subdirectory(MCTargetDesc) diff --git a/lib/Target/Hexagon/Hexagon.h b/lib/Target/Hexagon/Hexagon.h index 270c7a7..0808323 100644 --- a/lib/Target/Hexagon/Hexagon.h +++ b/lib/Target/Hexagon/Hexagon.h @@ -17,10 +17,14 @@ #include "MCTargetDesc/HexagonMCTargetDesc.h" #include "llvm/Target/TargetLowering.h" +#include "llvm/Target/TargetMachine.h" namespace llvm { class FunctionPass; class TargetMachine; + class MachineInstr; + class MCInst; + class HexagonAsmPrinter; class HexagonTargetMachine; class raw_ostream; @@ -30,13 +34,23 @@ namespace llvm { FunctionPass *createHexagonRemoveExtendOps(HexagonTargetMachine &TM); FunctionPass *createHexagonCFGOptimizer(HexagonTargetMachine &TM); - FunctionPass* createHexagonSplitTFRCondSets(HexagonTargetMachine &TM); - FunctionPass* createHexagonExpandPredSpillCode(HexagonTargetMachine &TM); + FunctionPass *createHexagonSplitTFRCondSets(HexagonTargetMachine &TM); + FunctionPass *createHexagonExpandPredSpillCode(HexagonTargetMachine &TM); FunctionPass *createHexagonHardwareLoops(); FunctionPass *createHexagonPeephole(); FunctionPass *createHexagonFixupHwLoops(); +/* TODO: object output. + MCCodeEmitter *createHexagonMCCodeEmitter(const Target &, + TargetMachine &TM, + MCContext &Ctx); +*/ +/* TODO: assembler input. + TargetAsmBackend *createHexagonAsmBackend(const Target &, const std::string &); +*/ + void HexagonLowerToMC(const MachineInstr *MI, MCInst &MCI, + HexagonAsmPrinter &AP); } // end namespace llvm; #define Hexagon_POINTER_SIZE 4 @@ -50,4 +64,10 @@ namespace llvm { // a new stack frame. This takes 8 bytes. #define HEXAGON_LRFP_SIZE 8 +// Normal instruction size (in bytes). +#define HEXAGON_INSTR_SIZE 4 + +// Maximum number of words in a packet (in instructions). +#define HEXAGON_PACKET_SIZE 4 + #endif diff --git a/lib/Target/Hexagon/Hexagon.td b/lib/Target/Hexagon/Hexagon.td index ab5093d..4a50d16 100644 --- a/lib/Target/Hexagon/Hexagon.td +++ b/lib/Target/Hexagon/Hexagon.td @@ -39,10 +39,7 @@ include "HexagonInstrInfo.td" include "HexagonIntrinsics.td" include "HexagonIntrinsicsDerived.td" - -def HexagonInstrInfo : InstrInfo { - // Define how we want to layout our target-specific information field. -} +def HexagonInstrInfo : InstrInfo; //===----------------------------------------------------------------------===// // Hexagon processors supported. @@ -56,6 +53,13 @@ def : Proc<"hexagonv2", HexagonItineraries, [ArchV2]>; def : Proc<"hexagonv3", HexagonItineraries, [ArchV2, ArchV3]>; def : Proc<"hexagonv4", HexagonItinerariesV4, [ArchV2, ArchV3, ArchV4]>; +// Hexagon Uses the MC printer for assembler output, so make sure the TableGen +// AsmWriter bits get associated with the correct class. +def HexagonAsmWriter : AsmWriter { + string AsmWriterClassName = "InstPrinter"; + bit isMCAsmWriter = 1; +} + //===----------------------------------------------------------------------===// // Declare the target which we are implementing //===----------------------------------------------------------------------===// @@ -63,4 +67,6 @@ def : Proc<"hexagonv4", HexagonItinerariesV4, [ArchV2, ArchV3, ArchV4]>; def Hexagon : Target { // Pull in Instruction Info: let InstructionSet = HexagonInstrInfo; + + let AssemblyWriters = [HexagonAsmWriter]; } diff --git a/lib/Target/Hexagon/HexagonAsmPrinter.cpp b/lib/Target/Hexagon/HexagonAsmPrinter.cpp index bf333b7..39bf45d 100644 --- a/lib/Target/Hexagon/HexagonAsmPrinter.cpp +++ b/lib/Target/Hexagon/HexagonAsmPrinter.cpp @@ -16,25 +16,33 @@ #define DEBUG_TYPE "asm-printer" #include "Hexagon.h" +#include "HexagonAsmPrinter.h" +#include "HexagonMachineFunctionInfo.h" #include "HexagonTargetMachine.h" #include "HexagonSubtarget.h" -#include "HexagonMachineFunctionInfo.h" +#include "InstPrinter/HexagonInstPrinter.h" #include "llvm/Constants.h" #include "llvm/DerivedTypes.h" #include "llvm/Module.h" +#include "llvm/Analysis/ConstantFolding.h" #include "llvm/Assembly/Writer.h" #include "llvm/CodeGen/AsmPrinter.h" #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineInstrBuilder.h" -#include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCAsmInfo.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCExpr.h" +#include "llvm/MC/MCInst.h" +#include "llvm/MC/MCSection.h" +#include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSymbol.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/Compiler.h" +#include "llvm/Support/Format.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Support/TargetRegistry.h" #include "llvm/Target/Mangler.h" @@ -43,8 +51,8 @@ #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetOptions.h" -#include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallString.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringExtras.h" using namespace llvm; @@ -53,163 +61,9 @@ static cl::opt<bool> AlignCalls( "hexagon-align-calls", cl::Hidden, cl::init(true), cl::desc("Insert falign after call instruction for Hexagon target")); - -namespace { - class HexagonAsmPrinter : public AsmPrinter { - const HexagonSubtarget *Subtarget; - - public: - explicit HexagonAsmPrinter(TargetMachine &TM, MCStreamer &Streamer) - : AsmPrinter(TM, Streamer) { - Subtarget = &TM.getSubtarget<HexagonSubtarget>(); - } - - virtual const char *getPassName() const { - return "Hexagon Assembly Printer"; - } - - /// printInstruction - This method is automatically generated by tablegen - /// from the instruction set description. This method returns true if the - /// machine instruction was sufficiently described to print it, otherwise it - void printInstruction(const MachineInstr *MI, raw_ostream &O); - virtual void EmitInstruction(const MachineInstr *MI); - - void printOp(const MachineOperand &MO, raw_ostream &O); - - /// printRegister - Print register according to target requirements. - /// - void printRegister(const MachineOperand &MO, bool R0AsZero, - raw_ostream &O) { - unsigned RegNo = MO.getReg(); - assert(TargetRegisterInfo::isPhysicalRegister(RegNo) && "Not physreg??"); - O << getRegisterName(RegNo); - } - - void printOperand(const MachineInstr *MI, unsigned OpNo, raw_ostream &OS) { - const MachineOperand &MO = MI->getOperand(OpNo); - if (MO.isReg()) { - printRegister(MO, false, OS); - } else if (MO.isImm()) { - OS << MO.getImm(); - } else { - printOp(MO, OS); - } - } - - - bool isBlockOnlyReachableByFallthrough(const MachineBasicBlock *MBB) const; - - bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, - unsigned AsmVariant, const char *ExtraCode, - raw_ostream &OS); - bool PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNo, - unsigned AsmVariant, const char *ExtraCode, - raw_ostream &OS); - - - void printHexagonImmOperand(const MachineInstr *MI, unsigned OpNo, - raw_ostream &O) { - int value = MI->getOperand(OpNo).getImm(); - O << value; - } - - - void printHexagonNegImmOperand(const MachineInstr *MI, unsigned OpNo, - raw_ostream &O) { - int value = MI->getOperand(OpNo).getImm(); - O << -value; - } - - void printHexagonNOneImmOperand(const MachineInstr *MI, unsigned OpNo, - raw_ostream &O) const { - O << -1; - } - - void printHexagonMEMriOperand(const MachineInstr *MI, unsigned OpNo, - raw_ostream &O) { - const MachineOperand &MO1 = MI->getOperand(OpNo); - const MachineOperand &MO2 = MI->getOperand(OpNo+1); - - O << getRegisterName(MO1.getReg()) - << " + #" - << (int) MO2.getImm(); - } - - - void printHexagonFrameIndexOperand(const MachineInstr *MI, unsigned OpNo, - raw_ostream &O) { - const MachineOperand &MO1 = MI->getOperand(OpNo); - const MachineOperand &MO2 = MI->getOperand(OpNo+1); - - O << getRegisterName(MO1.getReg()) - << ", #" - << MO2.getImm(); - } - - void printBranchOperand(const MachineInstr *MI, unsigned OpNo, - raw_ostream &O) { - // Branches can take an immediate operand. This is used by the branch - // selection pass to print $+8, an eight byte displacement from the PC. - if (MI->getOperand(OpNo).isImm()) { - O << "$+" << MI->getOperand(OpNo).getImm()*4; - } else { - printOp(MI->getOperand(OpNo), O); - } - } - - void printCallOperand(const MachineInstr *MI, unsigned OpNo, - raw_ostream &O) { - } - - void printAbsAddrOperand(const MachineInstr *MI, unsigned OpNo, - raw_ostream &O) { - } - - - void printSymbolHi(const MachineInstr *MI, unsigned OpNo, raw_ostream &O) { - O << "#HI("; - if (MI->getOperand(OpNo).isImm()) { - printHexagonImmOperand(MI, OpNo, O); - } else { - printOp(MI->getOperand(OpNo), O); - } - O << ")"; - } - - void printSymbolLo(const MachineInstr *MI, unsigned OpNo, raw_ostream &O) { - O << "#HI("; - if (MI->getOperand(OpNo).isImm()) { - printHexagonImmOperand(MI, OpNo, O); - } else { - printOp(MI->getOperand(OpNo), O); - } - O << ")"; - } - - void printPredicateOperand(const MachineInstr *MI, unsigned OpNo, - raw_ostream &O); - - void printAddrModeBasePlusOffset(const MachineInstr *MI, int OpNo, - raw_ostream &O); - - void printGlobalOperand(const MachineInstr *MI, int OpNo, raw_ostream &O); - void printJumpTable(const MachineInstr *MI, int OpNo, raw_ostream &O); - - void EmitAlignment(unsigned NumBits, const GlobalValue *GV = 0) const; - - static const char *getRegisterName(unsigned RegNo); - }; - -} // end of anonymous namespace - -// Include the auto-generated portion of the assembly writer. -#include "HexagonGenAsmWriter.inc" - - void HexagonAsmPrinter::EmitAlignment(unsigned NumBits, const GlobalValue *GV) const { - - // For basic block level alignment, use falign. + // For basic block level alignment, use ".falign". if (!GV) { OutStreamer.EmitRawText(StringRef("\t.falign")); return; @@ -218,12 +72,19 @@ void HexagonAsmPrinter::EmitAlignment(unsigned NumBits, AsmPrinter::EmitAlignment(NumBits, GV); } -void HexagonAsmPrinter::printOp(const MachineOperand &MO, raw_ostream &O) { +void HexagonAsmPrinter::printOperand(const MachineInstr *MI, unsigned OpNo, + raw_ostream &O) { + const MachineOperand &MO = MI->getOperand(OpNo); + switch (MO.getType()) { + default: + assert(0 && "<unknown operand type>"); + case MachineOperand::MO_Register: + O << HexagonInstPrinter::getRegisterName(MO.getReg()); + return; case MachineOperand::MO_Immediate: - dbgs() << "printOp() does not handle immediate values\n"; - abort(); - + O << MO.getImm(); + return; case MachineOperand::MO_MachineBasicBlock: O << *MO.getMBB()->getSymbol(); return; @@ -237,20 +98,14 @@ void HexagonAsmPrinter::printOp(const MachineOperand &MO, raw_ostream &O) { case MachineOperand::MO_ExternalSymbol: O << *GetExternalSymbolSymbol(MO.getSymbolName()); return; - case MachineOperand::MO_GlobalAddress: { + case MachineOperand::MO_GlobalAddress: // Computing the address of a global symbol, not calling it. O << *Mang->getSymbol(MO.getGlobal()); printOffset(MO.getOffset(), O); return; } - - default: - O << "<unknown operand type: " << MO.getType() << ">"; - return; - } } - // // isBlockOnlyReachableByFallthrough - We need to override this since the // default AsmPrinter does not print labels for any basic block that @@ -273,7 +128,7 @@ isBlockOnlyReachableByFallthrough(const MachineBasicBlock *MBB) const { bool HexagonAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, unsigned AsmVariant, const char *ExtraCode, - raw_ostream &OS) { + raw_ostream &OS) { // Does this asm operand have a single letter operand modifier? if (ExtraCode && ExtraCode[0]) { if (ExtraCode[1] != 0) return true; // Unknown modifier. @@ -341,154 +196,11 @@ void HexagonAsmPrinter::printPredicateOperand(const MachineInstr *MI, /// the current output stream. /// void HexagonAsmPrinter::EmitInstruction(const MachineInstr *MI) { - SmallString<128> Str; - raw_svector_ostream O(Str); - - const MachineFunction* MF = MI->getParent()->getParent(); - const HexagonMachineFunctionInfo* MFI = - (const HexagonMachineFunctionInfo*) - MF->getInfo<HexagonMachineFunctionInfo>(); - + MCInst MCI; + HexagonLowerToMC(MI, MCI, *this); + OutStreamer.EmitInstruction(MCI); - // Print a brace for the beginning of the packet. - if (MFI->isStartPacket(MI)) { - O << "\t{" << '\n'; - } - - DEBUG( O << "// MI = " << *MI << '\n';); - - // Indent - O << "\t"; - - - if (MI->getOpcode() == Hexagon::ENDLOOP0) { - if (MFI->isEndPacket(MI) && MFI->isStartPacket(MI)) { - O << "\t{ nop }"; - } else { - O << "}"; - } - printInstruction(MI, O); - } else if (MI->getOpcode() == Hexagon::MPYI_rin) { - // Handle multipy with -ve constant on Hexagon: - // "$dst =- mpyi($src1, #$src2)" - printOperand(MI, 0, O); - O << " =- mpyi("; - printOperand(MI, 1, O); - O << ", #"; - printHexagonNegImmOperand(MI, 2, O); - O << ")"; - } else if (MI->getOpcode() == Hexagon::MEMw_ADDSUBi_indexed_MEM_V4) { - // - // Handle memw(Rs+u6:2) [+-]= #U5 - // - O << "\tmemw("; printHexagonMEMriOperand(MI, 0, O); O << ") "; - int addend = MI->getOperand(2).getImm(); - if (addend < 0) - O << "-= " << "#" << -addend << '\n'; - else - O << "+= " << "#" << addend << '\n'; - } else if (MI->getOpcode() == Hexagon::MEMw_ADDSUBi_MEM_V4) { - // - // Handle memw(Rs+u6:2) [+-]= #U5 - // - O << "\tmemw("; printHexagonMEMriOperand(MI, 0, O); O << ") "; - int addend = MI->getOperand(2).getImm(); - if (addend < 0) - O << "-= " << "#" << -addend << '\n'; - else - O << "+= " << "#" << addend << '\n'; - } else if (MI->getOpcode() == Hexagon::MEMh_ADDSUBi_indexed_MEM_V4) { - // - // Handle memh(Rs+u6:1) [+-]= #U5 - // - O << "\tmemh("; printHexagonMEMriOperand(MI, 0, O); O << ") "; - int addend = MI->getOperand(2).getImm(); - if (addend < 0) - O << "-= " << "#" << -addend << '\n'; - else - O << "+= " << "#" << addend << '\n'; - } else if (MI->getOpcode() == Hexagon::MEMh_ADDSUBi_MEM_V4) { - // - // Handle memh(Rs+u6:1) [+-]= #U5 - // - O << "\tmemh("; printHexagonMEMriOperand(MI, 0, O); O << ") "; - int addend = MI->getOperand(2).getImm(); - if (addend < 0) - O << "-= " << "#" << -addend << '\n'; - else - O << "+= " << "#" << addend << '\n'; - } else if (MI->getOpcode() == Hexagon::MEMb_ADDSUBi_indexed_MEM_V4) { - // - // Handle memb(Rs+u6:1) [+-]= #U5 - // - O << "\tmemb("; printHexagonMEMriOperand(MI, 0, O); O << ") "; - int addend = MI->getOperand(2).getImm(); - if (addend < 0) - O << "-= " << "#" << -addend << '\n'; - else - O << "+= " << "#" << addend << '\n'; - } else if (MI->getOpcode() == Hexagon::MEMb_ADDSUBi_MEM_V4) { - // - // Handle memb(Rs+u6:1) [+-]= #U5 - // - O << "\tmemb("; printHexagonMEMriOperand(MI, 0, O); O << ") "; - int addend = MI->getOperand(2).getImm(); - if (addend < 0) - O << "-= " << "#" << -addend << '\n'; - else - O << "+= " << "#" << addend << '\n'; - } else if (MI->getOpcode() == Hexagon::CMPbGTri_V4) { - // - // Handle Pd=cmpb.gt(Rs,#s8) - // - O << "\t"; - printRegister(MI->getOperand(0), false, O); - O << " = cmpb.gt("; - printRegister(MI->getOperand(1), false, O); - O << ", "; - int val = MI->getOperand(2).getImm() >> 24; - O << "#" << val << ")" << '\n'; - } else if (MI->getOpcode() == Hexagon::CMPhEQri_V4) { - // - // Handle Pd=cmph.eq(Rs,#8) - // - O << "\t"; - printRegister(MI->getOperand(0), false, O); - O << " = cmph.eq("; - printRegister(MI->getOperand(1), false, O); - O << ", "; - int val = MI->getOperand(2).getImm(); - assert((((0 <= val) && (val <= 127)) || - ((65408 <= val) && (val <= 65535))) && - "Not in correct range!"); - if (val >= 65408) val -= 65536; - O << "#" << val << ")" << '\n'; - } else if (MI->getOpcode() == Hexagon::CMPhGTri_V4) { - // - // Handle Pd=cmph.gt(Rs,#8) - // - O << "\t"; - printRegister(MI->getOperand(0), false, O); - O << " = cmph.gt("; - printRegister(MI->getOperand(1), false, O); - O << ", "; - int val = MI->getOperand(2).getImm() >> 16; - O << "#" << val << ")" << '\n'; - } else { - printInstruction(MI, O); - } - - // Print a brace for the end of the packet. - if (MFI->isEndPacket(MI) && MI->getOpcode() != Hexagon::ENDLOOP0) { - O << "\n\t}" << '\n'; - } - - if (AlignCalls && MI->getDesc().isCall()) { - O << "\n\t.falign" << "\n"; - } - - OutStreamer.EmitRawText(O.str()); return; } @@ -507,7 +219,7 @@ void HexagonAsmPrinter::printAddrModeBasePlusOffset(const MachineInstr *MI, const MachineOperand &MO1 = MI->getOperand(OpNo); const MachineOperand &MO2 = MI->getOperand(OpNo+1); - O << getRegisterName(MO1.getReg()) + O << HexagonInstPrinter::getRegisterName(MO1.getReg()) << " + #" << MO2.getImm(); } @@ -536,6 +248,31 @@ void HexagonAsmPrinter::printJumpTable(const MachineInstr *MI, int OpNo, O << *GetJTISymbol(MO.getIndex()); } +void HexagonAsmPrinter::printConstantPool(const MachineInstr *MI, int OpNo, + raw_ostream &O) { + const MachineOperand &MO = MI->getOperand(OpNo); + assert( (MO.getType() == MachineOperand::MO_ConstantPoolIndex) && + "Expecting constant pool index"); + + // Hexagon_TODO: Do we need name mangling? + O << *GetCPISymbol(MO.getIndex()); +} + +static MCInstPrinter *createHexagonMCInstPrinter(const Target &T, + unsigned SyntaxVariant, + const MCAsmInfo &MAI, + const MCInstrInfo &MII, + const MCRegisterInfo &MRI, + const MCSubtargetInfo &STI) { + if (SyntaxVariant == 0) + return(new HexagonInstPrinter(MAI, MII, MRI)); + else + return NULL; +} + extern "C" void LLVMInitializeHexagonAsmPrinter() { RegisterAsmPrinter<HexagonAsmPrinter> X(TheHexagonTarget); + + TargetRegistry::RegisterMCInstPrinter(TheHexagonTarget, + createHexagonMCInstPrinter); } diff --git a/lib/Target/Hexagon/HexagonAsmPrinter.h b/lib/Target/Hexagon/HexagonAsmPrinter.h new file mode 100755 index 0000000..bc2af63 --- /dev/null +++ b/lib/Target/Hexagon/HexagonAsmPrinter.h @@ -0,0 +1,165 @@ +//===-- HexagonAsmPrinter.h - Print machine code to an Hexagon .s file ----===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Hexagon Assembly printer class. +// +//===----------------------------------------------------------------------===// + +#ifndef HEXAGONASMPRINTER_H +#define HEXAGONASMPRINTER_H + +#include "Hexagon.h" +#include "HexagonTargetMachine.h" +#include "llvm/CodeGen/AsmPrinter.h" +#include "llvm/Support/Compiler.h" +#include "llvm/Support/raw_ostream.h" + +namespace llvm { + class HexagonAsmPrinter : public AsmPrinter { + const HexagonSubtarget *Subtarget; + + public: + explicit HexagonAsmPrinter(TargetMachine &TM, MCStreamer &Streamer) + : AsmPrinter(TM, Streamer) { + Subtarget = &TM.getSubtarget<HexagonSubtarget>(); + } + + virtual const char *getPassName() const { + return "Hexagon Assembly Printer"; + } + + bool isBlockOnlyReachableByFallthrough(const MachineBasicBlock *MBB) const; + + virtual void EmitInstruction(const MachineInstr *MI); + virtual void EmitAlignment(unsigned NumBits, + const GlobalValue *GV = 0) const; + + void printOperand(const MachineInstr *MI, unsigned OpNo, raw_ostream &O); + bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, + unsigned AsmVariant, const char *ExtraCode, + raw_ostream &OS); + bool PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNo, + unsigned AsmVariant, const char *ExtraCode, + raw_ostream &OS); + + /// printInstruction - This method is automatically generated by tablegen + /// from the instruction set description. This method returns true if the + /// machine instruction was sufficiently described to print it, otherwise it + /// returns false. + void printInstruction(const MachineInstr *MI, raw_ostream &O); + + // void printMachineInstruction(const MachineInstr *MI); + void printOp(const MachineOperand &MO, raw_ostream &O); + + /// printRegister - Print register according to target requirements. + /// + void printRegister(const MachineOperand &MO, bool R0AsZero, + raw_ostream &O) { + unsigned RegNo = MO.getReg(); + assert(TargetRegisterInfo::isPhysicalRegister(RegNo) && "Not physreg??"); + O << getRegisterName(RegNo); + } + + void printImmOperand(const MachineInstr *MI, unsigned OpNo, + raw_ostream &O) { + int value = MI->getOperand(OpNo).getImm(); + O << value; + } + + void printNegImmOperand(const MachineInstr *MI, unsigned OpNo, + raw_ostream &O) { + int value = MI->getOperand(OpNo).getImm(); + O << -value; + } + + void printMEMriOperand(const MachineInstr *MI, unsigned OpNo, + raw_ostream &O) { + const MachineOperand &MO1 = MI->getOperand(OpNo); + const MachineOperand &MO2 = MI->getOperand(OpNo+1); + + O << getRegisterName(MO1.getReg()) + << " + #" + << (int) MO2.getImm(); + } + + void printFrameIndexOperand(const MachineInstr *MI, unsigned OpNo, + raw_ostream &O) { + const MachineOperand &MO1 = MI->getOperand(OpNo); + const MachineOperand &MO2 = MI->getOperand(OpNo+1); + + O << getRegisterName(MO1.getReg()) + << ", #" + << MO2.getImm(); + } + + void printBranchOperand(const MachineInstr *MI, unsigned OpNo, + raw_ostream &O) { + // Branches can take an immediate operand. This is used by the branch + // selection pass to print $+8, an eight byte displacement from the PC. + if (MI->getOperand(OpNo).isImm()) { + O << "$+" << MI->getOperand(OpNo).getImm()*4; + } else { + printOp(MI->getOperand(OpNo), O); + } + } + + void printCallOperand(const MachineInstr *MI, unsigned OpNo, + raw_ostream &O) { + } + + void printAbsAddrOperand(const MachineInstr *MI, unsigned OpNo, + raw_ostream &O) { + } + + void printSymbolHi(const MachineInstr *MI, unsigned OpNo, raw_ostream &O) { + O << "#HI("; + if (MI->getOperand(OpNo).isImm()) { + printImmOperand(MI, OpNo, O); + } + else { + printOp(MI->getOperand(OpNo), O); + } + O << ")"; + } + + void printSymbolLo(const MachineInstr *MI, unsigned OpNo, raw_ostream &O) { + O << "#HI("; + if (MI->getOperand(OpNo).isImm()) { + printImmOperand(MI, OpNo, O); + } + else { + printOp(MI->getOperand(OpNo), O); + } + O << ")"; + } + + void printPredicateOperand(const MachineInstr *MI, unsigned OpNo, + raw_ostream &O); + +#if 0 + void printModuleLevelGV(const GlobalVariable* GVar, raw_ostream &O); +#endif + + void printAddrModeBasePlusOffset(const MachineInstr *MI, int OpNo, + raw_ostream &O); + + void printGlobalOperand(const MachineInstr *MI, int OpNo, raw_ostream &O); + void printJumpTable(const MachineInstr *MI, int OpNo, raw_ostream &O); + void printConstantPool(const MachineInstr *MI, int OpNo, raw_ostream &O); + + static const char *getRegisterName(unsigned RegNo); + +#if 0 + void EmitStartOfAsmFile(Module &M); +#endif + }; + +} // end of llvm namespace + +#endif diff --git a/lib/Target/Hexagon/HexagonImmediates.td b/lib/Target/Hexagon/HexagonImmediates.td index 18589a2..e78bb79 100644 --- a/lib/Target/Hexagon/HexagonImmediates.td +++ b/lib/Target/Hexagon/HexagonImmediates.td @@ -10,211 +10,211 @@ // From IA64's InstrInfo file def s32Imm : Operand<i32> { // For now, we use a generic print function for all operands. - let PrintMethod = "printHexagonImmOperand"; + let PrintMethod = "printImmOperand"; } def s16Imm : Operand<i32> { - let PrintMethod = "printHexagonImmOperand"; + let PrintMethod = "printImmOperand"; } def s12Imm : Operand<i32> { // For now, we use a generic print function for all operands. - let PrintMethod = "printHexagonImmOperand"; + let PrintMethod = "printImmOperand"; } def s11Imm : Operand<i32> { // For now, we use a generic print function for all operands. - let PrintMethod = "printHexagonImmOperand"; + let PrintMethod = "printImmOperand"; } def s11_0Imm : Operand<i32> { // For now, we use a generic print function for all operands. - let PrintMethod = "printHexagonImmOperand"; + let PrintMethod = "printImmOperand"; } def s11_1Imm : Operand<i32> { // For now, we use a generic print function for all operands. - let PrintMethod = "printHexagonImmOperand"; + let PrintMethod = "printImmOperand"; } def s11_2Imm : Operand<i32> { // For now, we use a generic print function for all operands. - let PrintMethod = "printHexagonImmOperand"; + let PrintMethod = "printImmOperand"; } def s11_3Imm : Operand<i32> { // For now, we use a generic print function for all operands. - let PrintMethod = "printHexagonImmOperand"; + let PrintMethod = "printImmOperand"; } def s10Imm : Operand<i32> { // For now, we use a generic print function for all operands. - let PrintMethod = "printHexagonImmOperand"; + let PrintMethod = "printImmOperand"; } def s9Imm : Operand<i32> { // For now, we use a generic print function for all operands. - let PrintMethod = "printHexagonImmOperand"; + let PrintMethod = "printImmOperand"; } def s8Imm : Operand<i32> { // For now, we use a generic print function for all operands. - let PrintMethod = "printHexagonImmOperand"; + let PrintMethod = "printImmOperand"; } def s8Imm64 : Operand<i64> { // For now, we use a generic print function for all operands. - let PrintMethod = "printHexagonImmOperand"; + let PrintMethod = "printImmOperand"; } def s6Imm : Operand<i32> { // For now, we use a generic print function for all operands. - let PrintMethod = "printHexagonImmOperand"; + let PrintMethod = "printImmOperand"; } def s4Imm : Operand<i32> { // For now, we use a generic print function for all operands. - let PrintMethod = "printHexagonImmOperand"; + let PrintMethod = "printImmOperand"; } def s4_0Imm : Operand<i32> { // For now, we use a generic print function for all operands. - let PrintMethod = "printHexagonImmOperand"; + let PrintMethod = "printImmOperand"; } def s4_1Imm : Operand<i32> { // For now, we use a generic print function for all operands. - let PrintMethod = "printHexagonImmOperand"; + let PrintMethod = "printImmOperand"; } def s4_2Imm : Operand<i32> { // For now, we use a generic print function for all operands. - let PrintMethod = "printHexagonImmOperand"; + let PrintMethod = "printImmOperand"; } def s4_3Imm : Operand<i32> { // For now, we use a generic print function for all operands. - let PrintMethod = "printHexagonImmOperand"; + let PrintMethod = "printImmOperand"; } def u64Imm : Operand<i64> { // For now, we use a generic print function for all operands. - let PrintMethod = "printHexagonImmOperand"; + let PrintMethod = "printImmOperand"; } def u32Imm : Operand<i32> { // For now, we use a generic print function for all operands. - let PrintMethod = "printHexagonImmOperand"; + let PrintMethod = "printImmOperand"; } def u16Imm : Operand<i32> { // For now, we use a generic print function for all operands. - let PrintMethod = "printHexagonImmOperand"; + let PrintMethod = "printImmOperand"; } def u16_0Imm : Operand<i32> { // For now, we use a generic print function for all operands. - let PrintMethod = "printHexagonImmOperand"; + let PrintMethod = "printImmOperand"; } def u16_1Imm : Operand<i32> { // For now, we use a generic print function for all operands. - let PrintMethod = "printHexagonImmOperand"; + let PrintMethod = "printImmOperand"; } def u16_2Imm : Operand<i32> { // For now, we use a generic print function for all operands. - let PrintMethod = "printHexagonImmOperand"; + let PrintMethod = "printImmOperand"; } def u11_3Imm : Operand<i32> { // For now, we use a generic print function for all operands. - let PrintMethod = "printHexagonImmOperand"; + let PrintMethod = "printImmOperand"; } def u10Imm : Operand<i32> { // For now, we use a generic print function for all operands. - let PrintMethod = "printHexagonImmOperand"; + let PrintMethod = "printImmOperand"; } def u9Imm : Operand<i32> { // For now, we use a generic print function for all operands. - let PrintMethod = "printHexagonImmOperand"; + let PrintMethod = "printImmOperand"; } def u8Imm : Operand<i32> { // For now, we use a generic print function for all operands. - let PrintMethod = "printHexagonImmOperand"; + let PrintMethod = "printImmOperand"; } def u7Imm : Operand<i32> { // For now, we use a generic print function for all operands. - let PrintMethod = "printHexagonImmOperand"; + let PrintMethod = "printImmOperand"; } def u6Imm : Operand<i32> { // For now, we use a generic print function for all operands. - let PrintMethod = "printHexagonImmOperand"; + let PrintMethod = "printImmOperand"; } def u6_0Imm : Operand<i32> { // For now, we use a generic print function for all operands. - let PrintMethod = "printHexagonImmOperand"; + let PrintMethod = "printImmOperand"; } def u6_1Imm : Operand<i32> { // For now, we use a generic print function for all operands. - let PrintMethod = "printHexagonImmOperand"; + let PrintMethod = "printImmOperand"; } def u6_2Imm : Operand<i32> { // For now, we use a generic print function for all operands. - let PrintMethod = "printHexagonImmOperand"; + let PrintMethod = "printImmOperand"; } def u6_3Imm : Operand<i32> { // For now, we use a generic print function for all operands. - let PrintMethod = "printHexagonImmOperand"; + let PrintMethod = "printImmOperand"; } def u5Imm : Operand<i32> { // For now, we use a generic print function for all operands. - let PrintMethod = "printHexagonImmOperand"; + let PrintMethod = "printImmOperand"; } def u4Imm : Operand<i32> { // For now, we use a generic print function for all operands. - let PrintMethod = "printHexagonImmOperand"; + let PrintMethod = "printImmOperand"; } def u3Imm : Operand<i32> { // For now, we use a generic print function for all operands. - let PrintMethod = "printHexagonImmOperand"; + let PrintMethod = "printImmOperand"; } def u2Imm : Operand<i32> { // For now, we use a generic print function for all operands. - let PrintMethod = "printHexagonImmOperand"; + let PrintMethod = "printImmOperand"; } def u1Imm : Operand<i32> { // For now, we use a generic print function for all operands. - let PrintMethod = "printHexagonImmOperand"; + let PrintMethod = "printImmOperand"; } def n8Imm : Operand<i32> { // For now, we use a generic print function for all operands. - let PrintMethod = "printHexagonImmOperand"; + let PrintMethod = "printImmOperand"; } def m6Imm : Operand<i32> { // For now, we use a generic print function for all operands. - let PrintMethod = "printHexagonImmOperand"; + let PrintMethod = "printImmOperand"; } def nOneImm : Operand<i32> { // For now, we use a generic print function for all operands. - let PrintMethod = "printHexagonNOneImmOperand"; + let PrintMethod = "printNOneImmOperand"; } // @@ -494,7 +494,7 @@ def m6ImmPred : PatLeaf<(i32 imm), [{ //InN means negative integers in [-(2^N - 1), 0] def n8ImmPred : PatLeaf<(i32 imm), [{ - // n8ImmPred predicate - True if the immediate fits in a 8-bit unsigned + // n8ImmPred predicate - True if the immediate fits in a 8-bit signed // field. int64_t v = (int64_t)N->getSExtValue(); return (-255 <= v && v <= 0); diff --git a/lib/Target/Hexagon/HexagonInstrInfo.cpp b/lib/Target/Hexagon/HexagonInstrInfo.cpp index 3d7ace5..77b3663 100644 --- a/lib/Target/Hexagon/HexagonInstrInfo.cpp +++ b/lib/Target/Hexagon/HexagonInstrInfo.cpp @@ -11,10 +11,10 @@ // //===----------------------------------------------------------------------===// +#include "Hexagon.h" #include "HexagonInstrInfo.h" #include "HexagonRegisterInfo.h" #include "HexagonSubtarget.h" -#include "Hexagon.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallVector.h" #include "llvm/CodeGen/DFAPacketizer.h" diff --git a/lib/Target/Hexagon/HexagonInstrInfo.td b/lib/Target/Hexagon/HexagonInstrInfo.td index f3c6622..b563ac3 100644 --- a/lib/Target/Hexagon/HexagonInstrInfo.td +++ b/lib/Target/Hexagon/HexagonInstrInfo.td @@ -40,24 +40,24 @@ def ADDRriU6_2 : ComplexPattern<i32, 2, "SelectADDRriU6_2", [frameindex], []>; // Address operands. def MEMrr : Operand<i32> { - let PrintMethod = "printHexagonMEMrrOperand"; + let PrintMethod = "printMEMrrOperand"; let MIOperandInfo = (ops IntRegs, IntRegs); } // Address operands def MEMri : Operand<i32> { - let PrintMethod = "printHexagonMEMriOperand"; + let PrintMethod = "printMEMriOperand"; let MIOperandInfo = (ops IntRegs, IntRegs); } def MEMri_s11_2 : Operand<i32>, ComplexPattern<i32, 2, "SelectMEMriS11_2", []> { - let PrintMethod = "printHexagonMEMriOperand"; + let PrintMethod = "printMEMriOperand"; let MIOperandInfo = (ops IntRegs, s11Imm); } def FrameIndex : Operand<i32> { - let PrintMethod = "printHexagonFrameIndexOperand"; + let PrintMethod = "printFrameIndexOperand"; let MIOperandInfo = (ops IntRegs, s11Imm); } diff --git a/lib/Target/Hexagon/HexagonMCInstLower.cpp b/lib/Target/Hexagon/HexagonMCInstLower.cpp new file mode 100644 index 0000000..fbb331b --- /dev/null +++ b/lib/Target/Hexagon/HexagonMCInstLower.cpp @@ -0,0 +1,93 @@ +//===- HexagonMCInstLower.cpp - Convert Hexagon MachineInstr to an MCInst -===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains code to lower Hexagon MachineInstrs to their corresponding +// MCInst records. +// +//===----------------------------------------------------------------------===// + +#include "Hexagon.h" +#include "HexagonAsmPrinter.h" +#include "HexagonMachineFunctionInfo.h" +#include "llvm/Constants.h" +#include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/MC/MCExpr.h" +#include "llvm/MC/MCInst.h" +#include "llvm/Target/Mangler.h" + +using namespace llvm; + +static MCOperand GetSymbolRef(const MachineOperand& MO, const MCSymbol* Symbol, + HexagonAsmPrinter& Printer) { + MCContext &MC = Printer.OutContext; + const MCExpr *ME; + + ME = MCSymbolRefExpr::Create(Symbol, MCSymbolRefExpr::VK_None, MC); + + if (!MO.isJTI() && MO.getOffset()) + ME = MCBinaryExpr::CreateAdd(ME, MCConstantExpr::Create(MO.getOffset(), MC), + MC); + + return (MCOperand::CreateExpr(ME)); +} + +// Create an MCInst from a MachineInstr +void llvm::HexagonLowerToMC(const MachineInstr* MI, MCInst& MCI, + HexagonAsmPrinter& AP) { + MCI.setOpcode(MI->getOpcode()); + + for (unsigned i = 0, e = MI->getNumOperands(); i < e; i++) { + const MachineOperand &MO = MI->getOperand(i); + MCOperand MCO; + + switch (MO.getType()) { + default: + MI->dump(); + assert(0 && "unknown operand type"); + case MachineOperand::MO_Register: + // Ignore all implicit register operands. + if (MO.isImplicit()) continue; + MCO = MCOperand::CreateReg(MO.getReg()); + break; + case MachineOperand::MO_FPImmediate: { + APFloat Val = MO.getFPImm()->getValueAPF(); + // FP immediates are used only when setting GPRs, so they may be dealt + // with like regular immediates from this point on. + MCO = MCOperand::CreateImm(*Val.bitcastToAPInt().getRawData()); + break; + } + case MachineOperand::MO_Immediate: + MCO = MCOperand::CreateImm(MO.getImm()); + break; + case MachineOperand::MO_MachineBasicBlock: + MCO = MCOperand::CreateExpr + (MCSymbolRefExpr::Create(MO.getMBB()->getSymbol(), + AP.OutContext)); + break; + case MachineOperand::MO_GlobalAddress: + MCO = GetSymbolRef(MO, AP.Mang->getSymbol(MO.getGlobal()), AP); + break; + case MachineOperand::MO_ExternalSymbol: + MCO = GetSymbolRef(MO, AP.GetExternalSymbolSymbol(MO.getSymbolName()), + AP); + break; + case MachineOperand::MO_JumpTableIndex: + MCO = GetSymbolRef(MO, AP.GetJTISymbol(MO.getIndex()), AP); + break; + case MachineOperand::MO_ConstantPoolIndex: + MCO = GetSymbolRef(MO, AP.GetCPISymbol(MO.getIndex()), AP); + break; + case MachineOperand::MO_BlockAddress: + MCO = GetSymbolRef(MO, AP.GetBlockAddressSymbol(MO.getBlockAddress()),AP); + break; + } + + MCI.addOperand(MCO); + } +} diff --git a/lib/Target/Hexagon/InstPrinter/CMakeLists.txt b/lib/Target/Hexagon/InstPrinter/CMakeLists.txt new file mode 100644 index 0000000..cb106a8 --- /dev/null +++ b/lib/Target/Hexagon/InstPrinter/CMakeLists.txt @@ -0,0 +1,5 @@ +add_llvm_library(LLVMHexagonAsmPrinter + HexagonInstPrinter.cpp + ) + +add_dependencies(LLVMHexagonAsmPrinter HexagonCommonTableGen) diff --git a/lib/Target/Hexagon/InstPrinter/HexagonInstPrinter.cpp b/lib/Target/Hexagon/InstPrinter/HexagonInstPrinter.cpp new file mode 100644 index 0000000..ef36881 --- /dev/null +++ b/lib/Target/Hexagon/InstPrinter/HexagonInstPrinter.cpp @@ -0,0 +1,170 @@ +//===- HexagonInstPrinter.cpp - Convert Hexagon MCInst to assembly syntax -===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This class prints an Hexagon MCInst to a .s file. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "asm-printer" +#include "Hexagon.h" +#include "HexagonAsmPrinter.h" +#include "HexagonInstPrinter.h" +#include "llvm/MC/MCInst.h" +#include "llvm/MC/MCAsmInfo.h" +#include "llvm/MC/MCExpr.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/Support/raw_ostream.h" +#include <cstdio> + +using namespace llvm; + +#define GET_INSTRUCTION_NAME +#include "HexagonGenAsmWriter.inc" + +StringRef HexagonInstPrinter::getOpcodeName(unsigned Opcode) const { + return MII.getName(Opcode); +} + +StringRef HexagonInstPrinter::getRegName(unsigned RegNo) const { + return getRegisterName(RegNo); +} + +void HexagonInstPrinter::printInst(const MCInst *MI, raw_ostream &O, + StringRef Annot) { + const char packetPadding[] = " "; + const char startPacket = '{', + endPacket = '}'; + // TODO: add outer HW loop when it's supported too. + if (MI->getOpcode() == Hexagon::ENDLOOP0) { + MCInst Nop; + + O << packetPadding << startPacket << '\n'; + Nop.setOpcode(Hexagon::NOP); + printInstruction(&Nop, O); + O << packetPadding << endPacket; + } + + printInstruction(MI, O); + printAnnotation(O, Annot); +} + +void HexagonInstPrinter::printOperand(const MCInst *MI, unsigned OpNo, + raw_ostream &O) const { + const MCOperand& MO = MI->getOperand(OpNo); + + if (MO.isReg()) { + O << getRegisterName(MO.getReg()); + } else if(MO.isExpr()) { + O << *MO.getExpr(); + } else if(MO.isImm()) { + printImmOperand(MI, OpNo, O); + } else { + assert(false && "Unknown operand"); + } +} + +void HexagonInstPrinter::printImmOperand + (const MCInst *MI, unsigned OpNo, raw_ostream &O) const { + O << MI->getOperand(OpNo).getImm(); +} + +void HexagonInstPrinter::printExtOperand(const MCInst *MI, unsigned OpNo, + raw_ostream &O) const { + O << MI->getOperand(OpNo).getImm(); +} + +void HexagonInstPrinter::printUnsignedImmOperand + (const MCInst *MI, unsigned OpNo, raw_ostream &O) const { + O << MI->getOperand(OpNo).getImm(); +} + +void HexagonInstPrinter::printNegImmOperand(const MCInst *MI, unsigned OpNo, + raw_ostream &O) const { + O << -MI->getOperand(OpNo).getImm(); +} + +void HexagonInstPrinter::printNOneImmOperand + (const MCInst *MI, unsigned OpNo, raw_ostream &O) const { + O << -1; +} + +void HexagonInstPrinter::printMEMriOperand + (const MCInst *MI, unsigned OpNo, raw_ostream &O) const { + const MCOperand& MO0 = MI->getOperand(OpNo); + const MCOperand& MO1 = MI->getOperand(OpNo + 1); + + O << getRegisterName(MO0.getReg()); + O << " + #" << MO1.getImm(); +} + +void HexagonInstPrinter::printFrameIndexOperand + (const MCInst *MI, unsigned OpNo, raw_ostream &O) const { + const MCOperand& MO0 = MI->getOperand(OpNo); + const MCOperand& MO1 = MI->getOperand(OpNo + 1); + + O << getRegisterName(MO0.getReg()) << ", #" << MO1.getImm(); +} + +void HexagonInstPrinter::printGlobalOperand(const MCInst *MI, unsigned OpNo, + raw_ostream &O) const { + const MCOperand& MO = MI->getOperand(OpNo); + assert(MO.isExpr() && "Expecting expression"); + + printOperand(MI, OpNo, O); +} + +void HexagonInstPrinter::printJumpTable(const MCInst *MI, unsigned OpNo, + raw_ostream &O) const { + const MCOperand& MO = MI->getOperand(OpNo); + assert(MO.isExpr() && "Expecting expression"); + + printOperand(MI, OpNo, O); +} + +void HexagonInstPrinter::printConstantPool(const MCInst *MI, unsigned OpNo, + raw_ostream &O) const { + const MCOperand& MO = MI->getOperand(OpNo); + assert(MO.isExpr() && "Expecting expression"); + + printOperand(MI, OpNo, O); +} + +void HexagonInstPrinter::printBranchOperand(const MCInst *MI, unsigned OpNo, + raw_ostream &O) const { + // Branches can take an immediate operand. This is used by the branch + // selection pass to print $+8, an eight byte displacement from the PC. + assert("Unknown branch operand."); +} + +void HexagonInstPrinter::printCallOperand(const MCInst *MI, unsigned OpNo, + raw_ostream &O) const { +} + +void HexagonInstPrinter::printAbsAddrOperand(const MCInst *MI, unsigned OpNo, + raw_ostream &O) const { +} + +void HexagonInstPrinter::printPredicateOperand(const MCInst *MI, unsigned OpNo, + raw_ostream &O) const { +} + +void HexagonInstPrinter::printSymbol(const MCInst *MI, unsigned OpNo, + raw_ostream &O, bool hi) const { + const MCOperand& MO = MI->getOperand(OpNo); + + O << '#' << (hi? "HI": "LO") << '('; + if (MO.isImm()) { + O << '#'; + printOperand(MI, OpNo, O); + } else { + assert("Unknown symbol operand"); + printOperand(MI, OpNo, O); + } + O << ')'; +} diff --git a/lib/Target/Hexagon/InstPrinter/HexagonInstPrinter.h b/lib/Target/Hexagon/InstPrinter/HexagonInstPrinter.h new file mode 100644 index 0000000..dad4334 --- /dev/null +++ b/lib/Target/Hexagon/InstPrinter/HexagonInstPrinter.h @@ -0,0 +1,73 @@ +//===-- HexagonInstPrinter.h - Convert Hexagon MCInst to assembly syntax --===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This class prints an Hexagon MCInst to a .s file. +// +//===----------------------------------------------------------------------===// + +#ifndef HEXAGONINSTPRINTER_H +#define HEXAGONINSTPRINTER_H + +#include "llvm/MC/MCInstPrinter.h" + +namespace llvm { + class HexagonInstPrinter : public MCInstPrinter { + public: + explicit HexagonInstPrinter(const MCAsmInfo &MAI, + const MCInstrInfo &MII, + const MCRegisterInfo &MRI) + : MCInstPrinter(MAI, MII, MRI) {} + + virtual void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot); + virtual StringRef getOpcodeName(unsigned Opcode) const; + void printInstruction(const MCInst *MI, raw_ostream &O); + StringRef getRegName(unsigned RegNo) const; + static const char *getRegisterName(unsigned RegNo); + + void printOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O) const; + void printImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O) const; + void printExtOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O) const; + void printUnsignedImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O) + const; + void printNegImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O) + const; + void printNOneImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O) + const; + void printMEMriOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O) + const; + void printFrameIndexOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O) + const; + void printBranchOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O) + const; + void printCallOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O) + const; + void printAbsAddrOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O) + const; + void printPredicateOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O) + const; + void printGlobalOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O) + const; + void printJumpTable(const MCInst *MI, unsigned OpNo, raw_ostream &O) const; + + void printConstantPool(const MCInst *MI, unsigned OpNo, raw_ostream &O) const; + + void printSymbolHi(const MCInst *MI, unsigned OpNo, raw_ostream &O) const + { printSymbol(MI, OpNo, O, true); } + void printSymbolLo(const MCInst *MI, unsigned OpNo, raw_ostream &O) const + { printSymbol(MI, OpNo, O, false); } + + bool isConstExtended(const MCInst *MI) const; + protected: + void printSymbol(const MCInst *MI, unsigned OpNo, raw_ostream &O, bool hi) + const; + }; + +} // end namespace llvm + +#endif diff --git a/lib/Target/CBackend/TargetInfo/LLVMBuild.txt b/lib/Target/Hexagon/InstPrinter/LLVMBuild.txt index 1b47d8e..8678401 100644 --- a/lib/Target/CBackend/TargetInfo/LLVMBuild.txt +++ b/lib/Target/Hexagon/InstPrinter/LLVMBuild.txt @@ -1,4 +1,4 @@ -;===- ./lib/Target/CBackend/TargetInfo/LLVMBuild.txt -----------*- Conf -*--===; +;===- ./lib/Target/Hexagon/InstPrinter/LLVMBuild.txt -----------*- Conf -*--===; ; ; The LLVM Compiler Infrastructure ; @@ -17,7 +17,7 @@ [component_0] type = Library -name = CBackendInfo -parent = CBackend -required_libraries = MC Support Target -add_to_library_groups = CBackend +name = HexagonAsmPrinter +parent = Hexagon +required_libraries = MC Support +add_to_library_groups = Hexagon diff --git a/lib/Target/CBackend/TargetInfo/Makefile b/lib/Target/Hexagon/InstPrinter/Makefile index d4d5e15..20331d8 100644 --- a/lib/Target/CBackend/TargetInfo/Makefile +++ b/lib/Target/Hexagon/InstPrinter/Makefile @@ -1,4 +1,4 @@ -##===- lib/Target/CBackend/TargetInfo/Makefile -------------*- Makefile -*-===## +##===- lib/Target/Hexagon/InstPrinter/Makefile ----------------------------===## # # The LLVM Compiler Infrastructure # @@ -7,9 +7,9 @@ # ##===----------------------------------------------------------------------===## LEVEL = ../../../.. -LIBRARYNAME = LLVMCBackendInfo +LIBRARYNAME = LLVMHexagonAsmPrinter -# Hack: we need to include 'main' target directory to grab private headers +# Hack: we need to include 'main' Hexagon target directory to grab private headers CPPFLAGS = -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/.. include $(LEVEL)/Makefile.common diff --git a/lib/Target/Hexagon/LLVMBuild.txt b/lib/Target/Hexagon/LLVMBuild.txt index 84ea6a0..c6d419a 100644 --- a/lib/Target/Hexagon/LLVMBuild.txt +++ b/lib/Target/Hexagon/LLVMBuild.txt @@ -16,7 +16,7 @@ ;===------------------------------------------------------------------------===; [common] -subdirectories = TargetInfo MCTargetDesc +subdirectories = InstPrinter MCTargetDesc TargetInfo [component_0] type = TargetGroup @@ -28,5 +28,5 @@ has_asmprinter = 1 type = Library name = HexagonCodeGen parent = Hexagon -required_libraries = AsmPrinter CodeGen Core HexagonInfo SelectionDAG Support Target MC HexagonDesc +required_libraries = AsmPrinter CodeGen Core HexagonAsmPrinter HexagonDesc HexagonInfo MC SelectionDAG Support Target add_to_library_groups = Hexagon diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.h b/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.h index b18d23a..2238b1a 100644 --- a/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.h +++ b/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.h @@ -17,7 +17,6 @@ namespace llvm { class MCSubtargetInfo; class Target; -class StringRef; extern Target TheHexagonTarget; diff --git a/lib/Target/Hexagon/Makefile b/lib/Target/Hexagon/Makefile index 34bc68d..dc387c5 100644 --- a/lib/Target/Hexagon/Makefile +++ b/lib/Target/Hexagon/Makefile @@ -16,9 +16,8 @@ BUILT_SOURCES = HexagonGenRegisterInfo.inc \ HexagonGenAsmWriter.inc \ HexagonGenDAGISel.inc HexagonGenSubtargetInfo.inc \ HexagonGenCallingConv.inc \ - HexagonGenDFAPacketizer.inc \ - HexagonAsmPrinter.cpp + HexagonGenDFAPacketizer.inc -DIRS = TargetInfo MCTargetDesc +DIRS = InstPrinter TargetInfo MCTargetDesc include $(LEVEL)/Makefile.common diff --git a/lib/Target/LLVMBuild.txt b/lib/Target/LLVMBuild.txt index 5a42ca5..8ec5673 100644 --- a/lib/Target/LLVMBuild.txt +++ b/lib/Target/LLVMBuild.txt @@ -16,7 +16,7 @@ ;===------------------------------------------------------------------------===; [common] -subdirectories = ARM CBackend CellSPU CppBackend Hexagon MBlaze MSP430 Mips PTX PowerPC Sparc X86 XCore +subdirectories = ARM CellSPU CppBackend Hexagon MBlaze MSP430 Mips PTX PowerPC Sparc X86 XCore ; This is a special group whose required libraries are extended (by llvm-build) ; with the best execution engine (the native JIT, if available, or the diff --git a/lib/Target/MBlaze/AsmParser/MBlazeAsmLexer.cpp b/lib/Target/MBlaze/AsmParser/MBlazeAsmLexer.cpp index 7105b2e..59a1ed9 100644 --- a/lib/Target/MBlaze/AsmParser/MBlazeAsmLexer.cpp +++ b/lib/Target/MBlaze/AsmParser/MBlazeAsmLexer.cpp @@ -9,9 +9,6 @@ #include "MCTargetDesc/MBlazeBaseInfo.h" -#include "llvm/ADT/OwningPtr.h" -#include "llvm/ADT/SmallVector.h" - #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCParser/MCAsmLexer.h" #include "llvm/MC/MCParser/MCParsedAsmOperand.h" diff --git a/lib/Target/MBlaze/AsmParser/MBlazeAsmParser.cpp b/lib/Target/MBlaze/AsmParser/MBlazeAsmParser.cpp index c1b003b..38fb0e8 100644 --- a/lib/Target/MBlaze/AsmParser/MBlazeAsmParser.cpp +++ b/lib/Target/MBlaze/AsmParser/MBlazeAsmParser.cpp @@ -18,9 +18,7 @@ #include "llvm/Support/SourceMgr.h" #include "llvm/Support/TargetRegistry.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/ADT/OwningPtr.h" #include "llvm/ADT/SmallVector.h" -#include "llvm/ADT/StringSwitch.h" #include "llvm/ADT/Twine.h" using namespace llvm; diff --git a/lib/Target/MBlaze/InstPrinter/MBlazeInstPrinter.h b/lib/Target/MBlaze/InstPrinter/MBlazeInstPrinter.h index 236583a..51ba7c3 100644 --- a/lib/Target/MBlaze/InstPrinter/MBlazeInstPrinter.h +++ b/lib/Target/MBlaze/InstPrinter/MBlazeInstPrinter.h @@ -21,15 +21,15 @@ namespace llvm { class MBlazeInstPrinter : public MCInstPrinter { public: - MBlazeInstPrinter(const MCAsmInfo &MAI, const MCRegisterInfo &MRI) - : MCInstPrinter(MAI, MRI) {} + MBlazeInstPrinter(const MCAsmInfo &MAI, const MCInstrInfo &MII, + const MCRegisterInfo &MRI) + : MCInstPrinter(MAI, MII, MRI) {} virtual void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot); // Autogenerated by tblgen. void printInstruction(const MCInst *MI, raw_ostream &O); static const char *getRegisterName(unsigned RegNo); - static const char *getInstructionName(unsigned Opcode); void printOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O, const char *Modifier = 0); diff --git a/lib/Target/MBlaze/MBlazeAsmPrinter.cpp b/lib/Target/MBlaze/MBlazeAsmPrinter.cpp index c751dd8..55fffe3 100644 --- a/lib/Target/MBlaze/MBlazeAsmPrinter.cpp +++ b/lib/Target/MBlaze/MBlazeAsmPrinter.cpp @@ -38,7 +38,6 @@ #include "llvm/Target/TargetLoweringObjectFile.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetOptions.h" -#include "llvm/ADT/SmallString.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/TargetRegistry.h" #include "llvm/Support/raw_ostream.h" diff --git a/lib/Target/MBlaze/MBlazeCallingConv.td b/lib/Target/MBlaze/MBlazeCallingConv.td index 4962573..00a4219 100644 --- a/lib/Target/MBlaze/MBlazeCallingConv.td +++ b/lib/Target/MBlaze/MBlazeCallingConv.td @@ -9,10 +9,6 @@ // This describes the calling conventions for MBlaze architecture. //===----------------------------------------------------------------------===// -/// CCIfSubtarget - Match if the current subtarget has a feature F. -class CCIfSubtarget<string F, CCAction A>: - CCIf<!strconcat("State.getTarget().getSubtarget<MBlazeSubtarget>().", F), A>; - //===----------------------------------------------------------------------===// // MBlaze ABI Calling Convention //===----------------------------------------------------------------------===// diff --git a/lib/Target/MBlaze/MBlazeELFWriterInfo.cpp b/lib/Target/MBlaze/MBlazeELFWriterInfo.cpp index 60a65bb..e3c7236 100644 --- a/lib/Target/MBlaze/MBlazeELFWriterInfo.cpp +++ b/lib/Target/MBlaze/MBlazeELFWriterInfo.cpp @@ -100,8 +100,8 @@ unsigned MBlazeELFWriterInfo::getAbsoluteLabelMachineRelTy() const { long int MBlazeELFWriterInfo::computeRelocation(unsigned SymOffset, unsigned RelOffset, unsigned RelTy) const { - if (RelTy == ELF::R_MICROBLAZE_32_PCREL || ELF::R_MICROBLAZE_64_PCREL) - return SymOffset - (RelOffset + 4); - - llvm_unreachable("computeRelocation unknown for this relocation type"); + assert((RelTy == ELF::R_MICROBLAZE_32_PCREL || + RelTy == ELF::R_MICROBLAZE_64_PCREL) && + "computeRelocation unknown for this relocation type"); + return SymOffset - (RelOffset + 4); } diff --git a/lib/Target/MBlaze/MBlazeELFWriterInfo.h b/lib/Target/MBlaze/MBlazeELFWriterInfo.h index 63bfc0d..a314eb7 100644 --- a/lib/Target/MBlaze/MBlazeELFWriterInfo.h +++ b/lib/Target/MBlaze/MBlazeELFWriterInfo.h @@ -17,6 +17,7 @@ #include "llvm/Target/TargetELFWriterInfo.h" namespace llvm { + class TargetMachine; class MBlazeELFWriterInfo : public TargetELFWriterInfo { public: diff --git a/lib/Target/MBlaze/MBlazeFrameLowering.cpp b/lib/Target/MBlaze/MBlazeFrameLowering.cpp index 6531064..d2f14a5 100644 --- a/lib/Target/MBlaze/MBlazeFrameLowering.cpp +++ b/lib/Target/MBlaze/MBlazeFrameLowering.cpp @@ -211,13 +211,13 @@ static void analyzeFrameIndexes(MachineFunction &MF) { static void interruptFrameLayout(MachineFunction &MF) { const Function *F = MF.getFunction(); - llvm::CallingConv::ID CallConv = F->getCallingConv(); + CallingConv::ID CallConv = F->getCallingConv(); // If this function is not using either the interrupt_handler // calling convention or the save_volatiles calling convention // then we don't need to do any additional frame layout. - if (CallConv != llvm::CallingConv::MBLAZE_INTR && - CallConv != llvm::CallingConv::MBLAZE_SVOL) + if (CallConv != CallingConv::MBLAZE_INTR && + CallConv != CallingConv::MBLAZE_SVOL) return; MachineFrameInfo *MFI = MF.getFrameInfo(); @@ -228,7 +228,7 @@ static void interruptFrameLayout(MachineFunction &MF) { // Determine if the calling convention is the interrupt_handler // calling convention. Some pieces of the prologue and epilogue // only need to be emitted if we are lowering and interrupt handler. - bool isIntr = CallConv == llvm::CallingConv::MBLAZE_INTR; + bool isIntr = CallConv == CallingConv::MBLAZE_INTR; // Determine where to put prologue and epilogue additions MachineBasicBlock &MENT = MF.front(); @@ -347,8 +347,8 @@ void MBlazeFrameLowering::emitPrologue(MachineFunction &MF) const { MachineBasicBlock::iterator MBBI = MBB.begin(); DebugLoc DL = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc(); - llvm::CallingConv::ID CallConv = MF.getFunction()->getCallingConv(); - bool requiresRA = CallConv == llvm::CallingConv::MBLAZE_INTR; + CallingConv::ID CallConv = MF.getFunction()->getCallingConv(); + bool requiresRA = CallConv == CallingConv::MBLAZE_INTR; // Determine the correct frame layout determineFrameLayout(MF); @@ -393,8 +393,8 @@ void MBlazeFrameLowering::emitEpilogue(MachineFunction &MF, DebugLoc dl = MBBI->getDebugLoc(); - llvm::CallingConv::ID CallConv = MF.getFunction()->getCallingConv(); - bool requiresRA = CallConv == llvm::CallingConv::MBLAZE_INTR; + CallingConv::ID CallConv = MF.getFunction()->getCallingConv(); + bool requiresRA = CallConv == CallingConv::MBLAZE_INTR; // Get the FI's where RA and FP are saved. int FPOffset = MBlazeFI->getFPStackOffset(); @@ -431,8 +431,8 @@ processFunctionBeforeCalleeSavedScan(MachineFunction &MF, RegScavenger *RS) const { MachineFrameInfo *MFI = MF.getFrameInfo(); MBlazeFunctionInfo *MBlazeFI = MF.getInfo<MBlazeFunctionInfo>(); - llvm::CallingConv::ID CallConv = MF.getFunction()->getCallingConv(); - bool requiresRA = CallConv == llvm::CallingConv::MBLAZE_INTR; + CallingConv::ID CallConv = MF.getFunction()->getCallingConv(); + bool requiresRA = CallConv == CallingConv::MBLAZE_INTR; if (MFI->adjustsStack() || requiresRA) { MBlazeFI->setRAStackOffset(0); diff --git a/lib/Target/MBlaze/MBlazeISelLowering.cpp b/lib/Target/MBlaze/MBlazeISelLowering.cpp index 9ef6bb6..edfc335 100644 --- a/lib/Target/MBlaze/MBlazeISelLowering.cpp +++ b/lib/Target/MBlaze/MBlazeISelLowering.cpp @@ -1046,10 +1046,10 @@ LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, // If this function is using the interrupt_handler calling convention // then use "rtid r14, 0" otherwise use "rtsd r15, 8" - unsigned Ret = (CallConv == llvm::CallingConv::MBLAZE_INTR) ? MBlazeISD::IRet - : MBlazeISD::Ret; - unsigned Reg = (CallConv == llvm::CallingConv::MBLAZE_INTR) ? MBlaze::R14 - : MBlaze::R15; + unsigned Ret = (CallConv == CallingConv::MBLAZE_INTR) ? MBlazeISD::IRet + : MBlazeISD::Ret; + unsigned Reg = (CallConv == CallingConv::MBLAZE_INTR) ? MBlaze::R14 + : MBlaze::R15; SDValue DReg = DAG.getRegister(Reg, MVT::i32); if (Flag.getNode()) diff --git a/lib/Target/MBlaze/MCTargetDesc/MBlazeMCAsmInfo.h b/lib/Target/MBlaze/MCTargetDesc/MBlazeMCAsmInfo.h index 36bf655..977f9a6 100644 --- a/lib/Target/MBlaze/MCTargetDesc/MBlazeMCAsmInfo.h +++ b/lib/Target/MBlaze/MCTargetDesc/MBlazeMCAsmInfo.h @@ -14,7 +14,6 @@ #ifndef MBLAZETARGETASMINFO_H #define MBLAZETARGETASMINFO_H -#include "llvm/ADT/StringRef.h" #include "llvm/MC/MCAsmInfo.h" namespace llvm { diff --git a/lib/Target/MBlaze/MCTargetDesc/MBlazeMCTargetDesc.cpp b/lib/Target/MBlaze/MCTargetDesc/MBlazeMCTargetDesc.cpp index 5da0aa7..9a7549b 100644 --- a/lib/Target/MBlaze/MCTargetDesc/MBlazeMCTargetDesc.cpp +++ b/lib/Target/MBlaze/MCTargetDesc/MBlazeMCTargetDesc.cpp @@ -95,10 +95,11 @@ static MCStreamer *createMCStreamer(const Target &T, StringRef TT, static MCInstPrinter *createMBlazeMCInstPrinter(const Target &T, unsigned SyntaxVariant, const MCAsmInfo &MAI, + const MCInstrInfo &MII, const MCRegisterInfo &MRI, const MCSubtargetInfo &STI) { if (SyntaxVariant == 0) - return new MBlazeInstPrinter(MAI, MRI); + return new MBlazeInstPrinter(MAI, MII, MRI); return 0; } diff --git a/lib/Target/MBlaze/MCTargetDesc/MBlazeMCTargetDesc.h b/lib/Target/MBlaze/MCTargetDesc/MBlazeMCTargetDesc.h index 088d163..ae82c32 100644 --- a/lib/Target/MBlaze/MCTargetDesc/MBlazeMCTargetDesc.h +++ b/lib/Target/MBlaze/MCTargetDesc/MBlazeMCTargetDesc.h @@ -25,7 +25,6 @@ class MCObjectWriter; class MCSubtargetInfo; class Target; class StringRef; -class formatted_raw_ostream; class raw_ostream; extern Target TheMBlazeTarget; diff --git a/lib/Target/MSP430/InstPrinter/MSP430InstPrinter.h b/lib/Target/MSP430/InstPrinter/MSP430InstPrinter.h index 3fd7ce0..d32eb3a 100644 --- a/lib/Target/MSP430/InstPrinter/MSP430InstPrinter.h +++ b/lib/Target/MSP430/InstPrinter/MSP430InstPrinter.h @@ -21,8 +21,9 @@ namespace llvm { class MSP430InstPrinter : public MCInstPrinter { public: - MSP430InstPrinter(const MCAsmInfo &MAI, const MCRegisterInfo &MRI) - : MCInstPrinter(MAI, MRI) {} + MSP430InstPrinter(const MCAsmInfo &MAI, const MCInstrInfo &MII, + const MCRegisterInfo &MRI) + : MCInstPrinter(MAI, MII, MRI) {} virtual void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot); diff --git a/lib/Target/MSP430/MCTargetDesc/MSP430MCAsmInfo.cpp b/lib/Target/MSP430/MCTargetDesc/MSP430MCAsmInfo.cpp index 5e5f3d8..2e328cb 100644 --- a/lib/Target/MSP430/MCTargetDesc/MSP430MCAsmInfo.cpp +++ b/lib/Target/MSP430/MCTargetDesc/MSP430MCAsmInfo.cpp @@ -12,6 +12,7 @@ //===----------------------------------------------------------------------===// #include "MSP430MCAsmInfo.h" +#include "llvm/ADT/StringRef.h" using namespace llvm; void MSP430MCAsmInfo::anchor() { } diff --git a/lib/Target/MSP430/MCTargetDesc/MSP430MCAsmInfo.h b/lib/Target/MSP430/MCTargetDesc/MSP430MCAsmInfo.h index 690fc19..e5c2fc2 100644 --- a/lib/Target/MSP430/MCTargetDesc/MSP430MCAsmInfo.h +++ b/lib/Target/MSP430/MCTargetDesc/MSP430MCAsmInfo.h @@ -14,10 +14,10 @@ #ifndef MSP430TARGETASMINFO_H #define MSP430TARGETASMINFO_H -#include "llvm/ADT/StringRef.h" #include "llvm/MC/MCAsmInfo.h" namespace llvm { + class StringRef; class Target; class MSP430MCAsmInfo : public MCAsmInfo { diff --git a/lib/Target/MSP430/MCTargetDesc/MSP430MCTargetDesc.cpp b/lib/Target/MSP430/MCTargetDesc/MSP430MCTargetDesc.cpp index 8545055..c455f6b 100644 --- a/lib/Target/MSP430/MCTargetDesc/MSP430MCTargetDesc.cpp +++ b/lib/Target/MSP430/MCTargetDesc/MSP430MCTargetDesc.cpp @@ -61,10 +61,11 @@ static MCCodeGenInfo *createMSP430MCCodeGenInfo(StringRef TT, Reloc::Model RM, static MCInstPrinter *createMSP430MCInstPrinter(const Target &T, unsigned SyntaxVariant, const MCAsmInfo &MAI, + const MCInstrInfo &MII, const MCRegisterInfo &MRI, const MCSubtargetInfo &STI) { if (SyntaxVariant == 0) - return new MSP430InstPrinter(MAI, MRI); + return new MSP430InstPrinter(MAI, MII, MRI); return 0; } diff --git a/lib/Target/MSP430/MCTargetDesc/MSP430MCTargetDesc.h b/lib/Target/MSP430/MCTargetDesc/MSP430MCTargetDesc.h index 35f2590..7f3505c 100644 --- a/lib/Target/MSP430/MCTargetDesc/MSP430MCTargetDesc.h +++ b/lib/Target/MSP430/MCTargetDesc/MSP430MCTargetDesc.h @@ -15,9 +15,7 @@ #define MSP430MCTARGETDESC_H namespace llvm { -class MCSubtargetInfo; class Target; -class StringRef; extern Target TheMSP430Target; diff --git a/lib/Target/Mangler.cpp b/lib/Target/Mangler.cpp index 53ad155..786a0c5 100644 --- a/lib/Target/Mangler.cpp +++ b/lib/Target/Mangler.cpp @@ -22,12 +22,13 @@ #include "llvm/ADT/Twine.h" using namespace llvm; -static bool isAcceptableChar(char C, bool AllowPeriod) { +static bool isAcceptableChar(char C, bool AllowPeriod, bool AllowUTF8) { if ((C < 'a' || C > 'z') && (C < 'A' || C > 'Z') && (C < '0' || C > '9') && C != '_' && C != '$' && C != '@' && - !(AllowPeriod && C == '.')) + !(AllowPeriod && C == '.') && + !(AllowUTF8 && (C & 0x80))) return false; return true; } @@ -56,8 +57,9 @@ static bool NameNeedsEscaping(StringRef Str, const MCAsmInfo &MAI) { // If any of the characters in the string is an unacceptable character, force // quotes. bool AllowPeriod = MAI.doesAllowPeriodsInName(); + bool AllowUTF8 = MAI.doesAllowUTF8(); for (unsigned i = 0, e = Str.size(); i != e; ++i) - if (!isAcceptableChar(Str[i], AllowPeriod)) + if (!isAcceptableChar(Str[i], AllowPeriod, AllowUTF8)) return true; return false; } @@ -74,8 +76,9 @@ static void appendMangledName(SmallVectorImpl<char> &OutName, StringRef Str, } bool AllowPeriod = MAI.doesAllowPeriodsInName(); + bool AllowUTF8 = MAI.doesAllowUTF8(); for (unsigned i = 0, e = Str.size(); i != e; ++i) { - if (!isAcceptableChar(Str[i], AllowPeriod)) + if (!isAcceptableChar(Str[i], AllowPeriod, AllowUTF8)) MangleLetter(OutName, Str[i]); else OutName.push_back(Str[i]); diff --git a/lib/Target/Mips/CMakeLists.txt b/lib/Target/Mips/CMakeLists.txt index 13d17e4..0500c5d 100644 --- a/lib/Target/Mips/CMakeLists.txt +++ b/lib/Target/Mips/CMakeLists.txt @@ -2,12 +2,14 @@ set(LLVM_TARGET_DEFINITIONS Mips.td) tablegen(LLVM MipsGenRegisterInfo.inc -gen-register-info) tablegen(LLVM MipsGenInstrInfo.inc -gen-instr-info) +tablegen(LLVM MipsGenDisassemblerTables.inc -gen-disassembler) tablegen(LLVM MipsGenCodeEmitter.inc -gen-emitter) tablegen(LLVM MipsGenMCCodeEmitter.inc -gen-emitter -mc-emitter) tablegen(LLVM MipsGenAsmWriter.inc -gen-asm-writer) tablegen(LLVM MipsGenDAGISel.inc -gen-dag-isel) tablegen(LLVM MipsGenCallingConv.inc -gen-callingconv) tablegen(LLVM MipsGenSubtargetInfo.inc -gen-subtarget) +tablegen(LLVM MipsGenEDInfo.inc -gen-enhanced-disassembly-info) add_public_tablegen_target(MipsCommonTableGen) add_llvm_target(MipsCodeGen @@ -32,6 +34,7 @@ add_llvm_target(MipsCodeGen ) add_subdirectory(InstPrinter) +add_subdirectory(Disassembler) add_subdirectory(TargetInfo) add_subdirectory(MCTargetDesc) add_subdirectory(AsmParser) diff --git a/lib/Target/Mips/Disassembler/CMakeLists.txt b/lib/Target/Mips/Disassembler/CMakeLists.txt new file mode 100644 index 0000000..fe1dc75 --- /dev/null +++ b/lib/Target/Mips/Disassembler/CMakeLists.txt @@ -0,0 +1,15 @@ +include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. ) + +add_llvm_library(LLVMMipsDisassembler + MipsDisassembler.cpp + ) + +# workaround for hanging compilation on MSVC9 and 10 +if( MSVC_VERSION EQUAL 1400 OR MSVC_VERSION EQUAL 1500 OR MSVC_VERSION EQUAL 1600 ) +set_property( + SOURCE MipsDisassembler.cpp + PROPERTY COMPILE_FLAGS "/Od" + ) +endif() + +add_dependencies(LLVMMipsDisassembler MipsCommonTableGen) diff --git a/lib/Target/CBackend/LLVMBuild.txt b/lib/Target/Mips/Disassembler/LLVMBuild.txt index e64feb0..048ad0d 100644 --- a/lib/Target/CBackend/LLVMBuild.txt +++ b/lib/Target/Mips/Disassembler/LLVMBuild.txt @@ -1,4 +1,4 @@ -;===- ./lib/Target/CBackend/LLVMBuild.txt ----------------------*- Conf -*--===; +;===- ./lib/Target/Mips/Disassembler/LLVMBuild.txt --------------*- Conf -*--===; ; ; The LLVM Compiler Infrastructure ; @@ -15,17 +15,9 @@ ; ;===------------------------------------------------------------------------===; -[common] -subdirectories = TargetInfo - [component_0] -type = TargetGroup -name = CBackend -parent = Target - -[component_1] type = Library -name = CBackendCodeGen -parent = CBackend -required_libraries = Analysis CBackendInfo CodeGen Core MC Scalar Support Target TransformUtils -add_to_library_groups = CBackend +name = MipsDisassembler +parent = Mips +required_libraries = MC Support MipsInfo +add_to_library_groups = Mips diff --git a/lib/Target/CBackend/Makefile b/lib/Target/Mips/Disassembler/Makefile index bac3474..a78feba 100644 --- a/lib/Target/CBackend/Makefile +++ b/lib/Target/Mips/Disassembler/Makefile @@ -1,4 +1,4 @@ -##===- lib/Target/CBackend/Makefile ------------------------*- Makefile -*-===## +##===- lib/Target/Mips/Disassembler/Makefile ----------------*- Makefile -*-===## # # The LLVM Compiler Infrastructure # @@ -7,10 +7,10 @@ # ##===----------------------------------------------------------------------===## -LEVEL = ../../.. -LIBRARYNAME = LLVMCBackendCodeGen -DIRS = TargetInfo +LEVEL = ../../../.. +LIBRARYNAME = LLVMMipsDisassembler -include $(LEVEL)/Makefile.common +# Hack: we need to include 'main' Mips target directory to grab private headers +CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/.. -CompileCommonOpts += -Wno-format +include $(LEVEL)/Makefile.common diff --git a/lib/Target/Mips/Disassembler/MipsDisassembler.cpp b/lib/Target/Mips/Disassembler/MipsDisassembler.cpp new file mode 100644 index 0000000..78dbc06 --- /dev/null +++ b/lib/Target/Mips/Disassembler/MipsDisassembler.cpp @@ -0,0 +1,552 @@ +//===- MipsDisassembler.cpp - Disassembler for Mips -------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file is part of the Mips Disassembler. +// +//===----------------------------------------------------------------------===// + +#include "Mips.h" +#include "MipsSubtarget.h" +#include "llvm/MC/EDInstInfo.h" +#include "llvm/MC/MCDisassembler.h" +#include "llvm/Support/MemoryObject.h" +#include "llvm/Support/TargetRegistry.h" +#include "llvm/MC/MCSubtargetInfo.h" +#include "llvm/MC/MCInst.h" +#include "llvm/MC/MCRegisterInfo.h" +#include "llvm/Support/MathExtras.h" + + +#include "MipsGenEDInfo.inc" + +using namespace llvm; + +typedef MCDisassembler::DecodeStatus DecodeStatus; + +/// MipsDisassembler - a disasembler class for Mips32. +class MipsDisassembler : public MCDisassembler { +public: + /// Constructor - Initializes the disassembler. + /// + MipsDisassembler(const MCSubtargetInfo &STI, bool bigEndian) : + MCDisassembler(STI), isBigEndian(bigEndian) { + } + + ~MipsDisassembler() { + } + + /// getInstruction - See MCDisassembler. + DecodeStatus getInstruction(MCInst &instr, + uint64_t &size, + const MemoryObject ®ion, + uint64_t address, + raw_ostream &vStream, + raw_ostream &cStream) const; + + /// getEDInfo - See MCDisassembler. + const EDInstInfo *getEDInfo() const; + +private: + bool isBigEndian; +}; + + +/// Mips64Disassembler - a disasembler class for Mips64. +class Mips64Disassembler : public MCDisassembler { +public: + /// Constructor - Initializes the disassembler. + /// + Mips64Disassembler(const MCSubtargetInfo &STI, bool bigEndian) : + MCDisassembler(STI), isBigEndian(bigEndian) { + } + + ~Mips64Disassembler() { + } + + /// getInstruction - See MCDisassembler. + DecodeStatus getInstruction(MCInst &instr, + uint64_t &size, + const MemoryObject ®ion, + uint64_t address, + raw_ostream &vStream, + raw_ostream &cStream) const; + + /// getEDInfo - See MCDisassembler. + const EDInstInfo *getEDInfo() const; + +private: + bool isBigEndian; +}; + +const EDInstInfo *MipsDisassembler::getEDInfo() const { + return instInfoMips; +} + +const EDInstInfo *Mips64Disassembler::getEDInfo() const { + return instInfoMips; +} + +// Decoder tables for Mips register +static const unsigned CPURegsTable[] = { + Mips::ZERO, Mips::AT, Mips::V0, Mips::V1, + Mips::A0, Mips::A1, Mips::A2, Mips::A3, + Mips::T0, Mips::T1, Mips::T2, Mips::T3, + Mips::T4, Mips::T5, Mips::T6, Mips::T7, + Mips::S0, Mips::S1, Mips::S2, Mips::S3, + Mips::S4, Mips::S5, Mips::S6, Mips::S7, + Mips::T8, Mips::T9, Mips::K0, Mips::K1, + Mips::GP, Mips::SP, Mips::FP, Mips::RA +}; + +static const unsigned FGR32RegsTable[] = { + Mips::F0, Mips::F1, Mips::F2, Mips::F3, + Mips::F4, Mips::F5, Mips::F6, Mips::F7, + Mips::F8, Mips::F9, Mips::F10, Mips::F11, + Mips::F12, Mips::F13, Mips::F14, Mips::F15, + Mips::F16, Mips::F17, Mips::F18, Mips::F18, + Mips::F20, Mips::F21, Mips::F22, Mips::F23, + Mips::F24, Mips::F25, Mips::F26, Mips::F27, + Mips::F28, Mips::F29, Mips::F30, Mips::F31 +}; + +static const unsigned CPU64RegsTable[] = { + Mips::ZERO_64, Mips::AT_64, Mips::V0_64, Mips::V1_64, + Mips::A0_64, Mips::A1_64, Mips::A2_64, Mips::A3_64, + Mips::T0_64, Mips::T1_64, Mips::T2_64, Mips::T3_64, + Mips::T4_64, Mips::T5_64, Mips::T6_64, Mips::T7_64, + Mips::S0_64, Mips::S1_64, Mips::S2_64, Mips::S3_64, + Mips::S4_64, Mips::S5_64, Mips::S6_64, Mips::S7_64, + Mips::T8_64, Mips::T9_64, Mips::K0_64, Mips::K1_64, + Mips::GP_64, Mips::SP_64, Mips::FP_64, Mips::RA_64 +}; + +static const unsigned FGR64RegsTable[] = { + Mips::D0_64, Mips::D1_64, Mips::D2_64, Mips::D3_64, + Mips::D4_64, Mips::D5_64, Mips::D6_64, Mips::D7_64, + Mips::D8_64, Mips::D9_64, Mips::D10_64, Mips::D11_64, + Mips::D12_64, Mips::D13_64, Mips::D14_64, Mips::D15_64, + Mips::D16_64, Mips::D17_64, Mips::D18_64, Mips::D19_64, + Mips::D20_64, Mips::D21_64, Mips::D22_64, Mips::D23_64, + Mips::D24_64, Mips::D25_64, Mips::D26_64, Mips::D27_64, + Mips::D28_64, Mips::D29_64, Mips::D30_64, Mips::D31_64 +}; + +static const unsigned AFGR64RegsTable[] = { + Mips::D0, Mips::D1, Mips::D2, Mips::D3, + Mips::D4, Mips::D5, Mips::D6, Mips::D7, + Mips::D8, Mips::D9, Mips::D10, Mips::D11, + Mips::D12, Mips::D13, Mips::D14, Mips::D15 +}; + +// Forward declare these because the autogenerated code will reference them. +// Definitions are further down. +static DecodeStatus DecodeCPU64RegsRegisterClass(MCInst &Inst, + unsigned RegNo, + uint64_t Address, + const void *Decoder); + +static DecodeStatus DecodeCPURegsRegisterClass(MCInst &Inst, + unsigned RegNo, + uint64_t Address, + const void *Decoder); + +static DecodeStatus DecodeFGR64RegisterClass(MCInst &Inst, + unsigned RegNo, + uint64_t Address, + const void *Decoder); + +static DecodeStatus DecodeFGR32RegisterClass(MCInst &Inst, + unsigned RegNo, + uint64_t Address, + const void *Decoder); + +static DecodeStatus DecodeCCRRegisterClass(MCInst &Inst, + unsigned RegNo, + uint64_t Address, + const void *Decoder); + +static DecodeStatus DecodeHWRegsRegisterClass(MCInst &Inst, + unsigned Insn, + uint64_t Address, + const void *Decoder); + +static DecodeStatus DecodeAFGR64RegisterClass(MCInst &Inst, + unsigned RegNo, + uint64_t Address, + const void *Decoder); + +static DecodeStatus DecodeHWRegs64RegisterClass(MCInst &Inst, + unsigned Insn, + uint64_t Address, + const void *Decoder); + +static DecodeStatus DecodeBranchTarget(MCInst &Inst, + unsigned Offset, + uint64_t Address, + const void *Decoder); + +static DecodeStatus DecodeBC1(MCInst &Inst, + unsigned Insn, + uint64_t Address, + const void *Decoder); + + +static DecodeStatus DecodeJumpTarget(MCInst &Inst, + unsigned Insn, + uint64_t Address, + const void *Decoder); + +static DecodeStatus DecodeMem(MCInst &Inst, + unsigned Insn, + uint64_t Address, + const void *Decoder); + +static DecodeStatus DecodeFMem(MCInst &Inst, unsigned Insn, + uint64_t Address, + const void *Decoder); + +static DecodeStatus DecodeSimm16(MCInst &Inst, + unsigned Insn, + uint64_t Address, + const void *Decoder); + +static DecodeStatus DecodeCondCode(MCInst &Inst, + unsigned Insn, + uint64_t Address, + const void *Decoder); + +static DecodeStatus DecodeInsSize(MCInst &Inst, + unsigned Insn, + uint64_t Address, + const void *Decoder); + +static DecodeStatus DecodeExtSize(MCInst &Inst, + unsigned Insn, + uint64_t Address, + const void *Decoder); + +namespace llvm { +extern Target TheMipselTarget, TheMipsTarget, TheMips64Target, + TheMips64elTarget; +} + +static MCDisassembler *createMipsDisassembler( + const Target &T, + const MCSubtargetInfo &STI) { + return new MipsDisassembler(STI,true); +} + +static MCDisassembler *createMipselDisassembler( + const Target &T, + const MCSubtargetInfo &STI) { + return new MipsDisassembler(STI,false); +} + +static MCDisassembler *createMips64Disassembler( + const Target &T, + const MCSubtargetInfo &STI) { + return new Mips64Disassembler(STI,true); +} + +static MCDisassembler *createMips64elDisassembler( + const Target &T, + const MCSubtargetInfo &STI) { + return new Mips64Disassembler(STI, false); +} + +extern "C" void LLVMInitializeMipsDisassembler() { + // Register the disassembler. + TargetRegistry::RegisterMCDisassembler(TheMipsTarget, + createMipsDisassembler); + TargetRegistry::RegisterMCDisassembler(TheMipselTarget, + createMipselDisassembler); + TargetRegistry::RegisterMCDisassembler(TheMips64Target, + createMips64Disassembler); + TargetRegistry::RegisterMCDisassembler(TheMips64elTarget, + createMips64elDisassembler); +} + + +#include "MipsGenDisassemblerTables.inc" + + /// readInstruction - read four bytes from the MemoryObject + /// and return 32 bit word sorted according to the given endianess +static DecodeStatus readInstruction32(const MemoryObject ®ion, + uint64_t address, + uint64_t &size, + uint32_t &insn, + bool isBigEndian) { + uint8_t Bytes[4]; + + // We want to read exactly 4 Bytes of data. + if (region.readBytes(address, 4, (uint8_t*)Bytes, NULL) == -1) { + size = 0; + return MCDisassembler::Fail; + } + + if (isBigEndian) { + // Encoded as a big-endian 32-bit word in the stream. + insn = (Bytes[3] << 0) | + (Bytes[2] << 8) | + (Bytes[1] << 16) | + (Bytes[0] << 24); + } + else { + // Encoded as a small-endian 32-bit word in the stream. + insn = (Bytes[0] << 0) | + (Bytes[1] << 8) | + (Bytes[2] << 16) | + (Bytes[3] << 24); + } + + return MCDisassembler::Success; +} + +DecodeStatus +MipsDisassembler::getInstruction(MCInst &instr, + uint64_t &Size, + const MemoryObject &Region, + uint64_t Address, + raw_ostream &vStream, + raw_ostream &cStream) const { + uint32_t Insn; + + DecodeStatus Result = readInstruction32(Region, Address, Size, + Insn, isBigEndian); + if (Result == MCDisassembler::Fail) + return MCDisassembler::Fail; + + // Calling the auto-generated decoder function. + Result = decodeMipsInstruction32(instr, Insn, Address, this, STI); + if (Result != MCDisassembler::Fail) { + Size = 4; + return Result; + } + + return MCDisassembler::Fail; +} + +DecodeStatus +Mips64Disassembler::getInstruction(MCInst &instr, + uint64_t &Size, + const MemoryObject &Region, + uint64_t Address, + raw_ostream &vStream, + raw_ostream &cStream) const { + uint32_t Insn; + + DecodeStatus Result = readInstruction32(Region, Address, Size, + Insn, isBigEndian); + if (Result == MCDisassembler::Fail) + return MCDisassembler::Fail; + + // Calling the auto-generated decoder function. + Result = decodeMips64Instruction32(instr, Insn, Address, this, STI); + if (Result != MCDisassembler::Fail) { + Size = 4; + return Result; + } + // If we fail to decode in Mips64 decoder space we can try in Mips32 + Result = decodeMipsInstruction32(instr, Insn, Address, this, STI); + if (Result != MCDisassembler::Fail) { + Size = 4; + return Result; + } + + return MCDisassembler::Fail; +} + +static DecodeStatus DecodeCPU64RegsRegisterClass(MCInst &Inst, + unsigned RegNo, + uint64_t Address, + const void *Decoder) { + + if (RegNo > 31) + return MCDisassembler::Fail; + + Inst.addOperand(MCOperand::CreateReg(CPU64RegsTable[RegNo])); + return MCDisassembler::Success; +} + +static DecodeStatus DecodeCPURegsRegisterClass(MCInst &Inst, + unsigned RegNo, + uint64_t Address, + const void *Decoder) { + if (RegNo > 31) + return MCDisassembler::Fail; + + Inst.addOperand(MCOperand::CreateReg(CPURegsTable[RegNo])); + return MCDisassembler::Success; +} + +static DecodeStatus DecodeFGR64RegisterClass(MCInst &Inst, + unsigned RegNo, + uint64_t Address, + const void *Decoder) { + if (RegNo > 31) + return MCDisassembler::Fail; + + Inst.addOperand(MCOperand::CreateReg(FGR64RegsTable[RegNo])); + return MCDisassembler::Success; +} + +static DecodeStatus DecodeFGR32RegisterClass(MCInst &Inst, + unsigned RegNo, + uint64_t Address, + const void *Decoder) { + if (RegNo > 31) + return MCDisassembler::Fail; + + Inst.addOperand(MCOperand::CreateReg(FGR32RegsTable[RegNo])); + return MCDisassembler::Success; +} + +static DecodeStatus DecodeCCRRegisterClass(MCInst &Inst, + unsigned RegNo, + uint64_t Address, + const void *Decoder) { + Inst.addOperand(MCOperand::CreateReg(RegNo)); + return MCDisassembler::Success; +} + +static DecodeStatus DecodeMem(MCInst &Inst, + unsigned Insn, + uint64_t Address, + const void *Decoder) { + int Offset = SignExtend32<16>(Insn & 0xffff); + int Reg = (int)fieldFromInstruction32(Insn, 16, 5); + int Base = (int)fieldFromInstruction32(Insn, 21, 5); + + if(Inst.getOpcode() == Mips::SC){ + Inst.addOperand(MCOperand::CreateReg(CPURegsTable[Reg])); + } + + Inst.addOperand(MCOperand::CreateReg(CPURegsTable[Reg])); + Inst.addOperand(MCOperand::CreateReg(CPURegsTable[Base])); + Inst.addOperand(MCOperand::CreateImm(Offset)); + + return MCDisassembler::Success; +} + +static DecodeStatus DecodeFMem(MCInst &Inst, + unsigned Insn, + uint64_t Address, + const void *Decoder) { + int Offset = SignExtend32<16>(Insn & 0xffff); + int Reg = (int)fieldFromInstruction32(Insn, 16, 5); + int Base = (int)fieldFromInstruction32(Insn, 21, 5); + + Inst.addOperand(MCOperand::CreateReg(FGR64RegsTable[Reg])); + Inst.addOperand(MCOperand::CreateReg(CPURegsTable[Base])); + Inst.addOperand(MCOperand::CreateImm(Offset)); + + return MCDisassembler::Success; +} + + +static DecodeStatus DecodeHWRegsRegisterClass(MCInst &Inst, + unsigned RegNo, + uint64_t Address, + const void *Decoder) { + // Currently only hardware register 29 is supported. + if (RegNo != 29) + return MCDisassembler::Fail; + Inst.addOperand(MCOperand::CreateReg(Mips::HWR29)); + return MCDisassembler::Success; +} + +static DecodeStatus DecodeCondCode(MCInst &Inst, + unsigned Insn, + uint64_t Address, + const void *Decoder) { + int CondCode = Insn & 0xf; + Inst.addOperand(MCOperand::CreateImm(CondCode)); + return MCDisassembler::Success; +} + +static DecodeStatus DecodeAFGR64RegisterClass(MCInst &Inst, + unsigned RegNo, + uint64_t Address, + const void *Decoder) { + if (RegNo > 31) + return MCDisassembler::Fail; + + Inst.addOperand(MCOperand::CreateReg(AFGR64RegsTable[RegNo])); + return MCDisassembler::Success; +} + +static DecodeStatus DecodeHWRegs64RegisterClass(MCInst &Inst, + unsigned RegNo, + uint64_t Address, + const void *Decoder) { + //Currently only hardware register 29 is supported + if (RegNo != 29) + return MCDisassembler::Fail; + Inst.addOperand(MCOperand::CreateReg(Mips::HWR29)); + return MCDisassembler::Success; +} + +static DecodeStatus DecodeBranchTarget(MCInst &Inst, + unsigned Offset, + uint64_t Address, + const void *Decoder) { + unsigned BranchOffset = Offset & 0xffff; + BranchOffset = SignExtend32<18>(BranchOffset << 2) + 4; + Inst.addOperand(MCOperand::CreateImm(BranchOffset)); + return MCDisassembler::Success; +} + +static DecodeStatus DecodeBC1(MCInst &Inst, + unsigned Insn, + uint64_t Address, + const void *Decoder) { + unsigned BranchOffset = Insn & 0xffff; + BranchOffset = SignExtend32<18>(BranchOffset << 2) + 4; + Inst.addOperand(MCOperand::CreateImm(BranchOffset)); + return MCDisassembler::Success; +} + +static DecodeStatus DecodeJumpTarget(MCInst &Inst, + unsigned Insn, + uint64_t Address, + const void *Decoder) { + + unsigned JumpOffset = fieldFromInstruction32(Insn, 0, 26) << 2; + Inst.addOperand(MCOperand::CreateImm(JumpOffset)); + return MCDisassembler::Success; +} + + +static DecodeStatus DecodeSimm16(MCInst &Inst, + unsigned Insn, + uint64_t Address, + const void *Decoder) { + Inst.addOperand(MCOperand::CreateImm(SignExtend32<16>(Insn))); + return MCDisassembler::Success; +} + +static DecodeStatus DecodeInsSize(MCInst &Inst, + unsigned Insn, + uint64_t Address, + const void *Decoder) { + // First we need to grab the pos(lsb) from MCInst. + int Pos = Inst.getOperand(2).getImm(); + int Size = (int) Insn - Pos + 1; + Inst.addOperand(MCOperand::CreateImm(SignExtend32<16>(Size))); + return MCDisassembler::Success; +} + +static DecodeStatus DecodeExtSize(MCInst &Inst, + unsigned Insn, + uint64_t Address, + const void *Decoder) { + int Size = (int) Insn + 1; + Inst.addOperand(MCOperand::CreateImm(SignExtend32<16>(Size))); + return MCDisassembler::Success; +} diff --git a/lib/Target/Mips/InstPrinter/MipsInstPrinter.cpp b/lib/Target/Mips/InstPrinter/MipsInstPrinter.cpp index 2917a89..6886b17 100644 --- a/lib/Target/Mips/InstPrinter/MipsInstPrinter.cpp +++ b/lib/Target/Mips/InstPrinter/MipsInstPrinter.cpp @@ -16,12 +16,12 @@ #include "llvm/ADT/StringExtras.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCInst.h" +#include "llvm/MC/MCInstrInfo.h" #include "llvm/MC/MCSymbol.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" using namespace llvm; -#define GET_INSTRUCTION_NAME #include "MipsGenAsmWriter.inc" const char* Mips::MipsFCCToString(Mips::CondCode CC) { @@ -62,10 +62,6 @@ const char* Mips::MipsFCCToString(Mips::CondCode CC) { llvm_unreachable("Impossible condition code!"); } -StringRef MipsInstPrinter::getOpcodeName(unsigned Opcode) const { - return getInstructionName(Opcode); -} - void MipsInstPrinter::printRegName(raw_ostream &OS, unsigned RegNo) const { OS << '$' << StringRef(getRegisterName(RegNo)).lower(); } diff --git a/lib/Target/Mips/InstPrinter/MipsInstPrinter.h b/lib/Target/Mips/InstPrinter/MipsInstPrinter.h index acd761d..76b839b 100644 --- a/lib/Target/Mips/InstPrinter/MipsInstPrinter.h +++ b/lib/Target/Mips/InstPrinter/MipsInstPrinter.h @@ -77,15 +77,14 @@ class TargetMachine; class MipsInstPrinter : public MCInstPrinter { public: - MipsInstPrinter(const MCAsmInfo &MAI, const MCRegisterInfo &MRI) : - MCInstPrinter(MAI, MRI) {} + MipsInstPrinter(const MCAsmInfo &MAI, const MCInstrInfo &MII, + const MCRegisterInfo &MRI) + : MCInstPrinter(MAI, MII, MRI) {} // Autogenerated by tblgen. void printInstruction(const MCInst *MI, raw_ostream &O); - static const char *getInstructionName(unsigned Opcode); static const char *getRegisterName(unsigned RegNo); - virtual StringRef getOpcodeName(unsigned Opcode) const; virtual void printRegName(raw_ostream &OS, unsigned RegNo) const; virtual void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot); diff --git a/lib/Target/Mips/LLVMBuild.txt b/lib/Target/Mips/LLVMBuild.txt index abbed8c..a95d6bc 100644 --- a/lib/Target/Mips/LLVMBuild.txt +++ b/lib/Target/Mips/LLVMBuild.txt @@ -16,7 +16,7 @@ ;===------------------------------------------------------------------------===; [common] -subdirectories = AsmParser InstPrinter MCTargetDesc TargetInfo +subdirectories = AsmParser Disassembler InstPrinter MCTargetDesc TargetInfo [component_0] type = TargetGroup @@ -24,6 +24,7 @@ name = Mips parent = Target has_asmparser = 1 has_asmprinter = 1 +has_disassembler = 1 has_jit = 1 [component_1] diff --git a/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp b/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp index 9d5a2f1..9b4caf6 100644 --- a/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp +++ b/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp @@ -14,19 +14,13 @@ #include "MipsFixupKinds.h" #include "MCTargetDesc/MipsMCTargetDesc.h" -#include "llvm/ADT/Twine.h" #include "llvm/MC/MCAsmBackend.h" #include "llvm/MC/MCAssembler.h" #include "llvm/MC/MCDirectives.h" #include "llvm/MC/MCELFObjectWriter.h" -#include "llvm/MC/MCExpr.h" -#include "llvm/MC/MCMachObjectWriter.h" +#include "llvm/MC/MCFixupKindInfo.h" #include "llvm/MC/MCObjectWriter.h" -#include "llvm/MC/MCSectionELF.h" -#include "llvm/MC/MCSectionMachO.h" #include "llvm/MC/MCSubtargetInfo.h" -#include "llvm/Object/MachOFormat.h" -#include "llvm/Support/ELF.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" @@ -61,7 +55,7 @@ static unsigned adjustFixupValue(unsigned Kind, uint64_t Value) { case Mips::fixup_Mips_HI16: case Mips::fixup_Mips_GOT_Local: // Get the higher 16-bits. Also add 1 if bit 15 is 1. - Value = (Value >> 16) + ((Value & 0x8000) != 0); + Value = ((Value + 0x8000) >> 16) & 0xffff; break; } @@ -72,13 +66,15 @@ namespace { class MipsAsmBackend : public MCAsmBackend { Triple::OSType OSType; bool IsLittle; // Big or little endian + bool Is64Bit; // 32 or 64 bit words public: - MipsAsmBackend(const Target &T, Triple::OSType _OSType, bool _isLittle) : - MCAsmBackend(), OSType(_OSType), IsLittle(_isLittle) {} + MipsAsmBackend(const Target &T, Triple::OSType _OSType, + bool _isLittle, bool _is64Bit) + :MCAsmBackend(), OSType(_OSType), IsLittle(_isLittle), Is64Bit(_is64Bit) {} MCObjectWriter *createObjectWriter(raw_ostream &OS) const { - return createMipsELFObjectWriter(OS, OSType, IsLittle); + return createMipsELFObjectWriter(OS, OSType, IsLittle, Is64Bit); } /// ApplyFixup - Apply the \arg Value for given \arg Fixup into the provided @@ -120,7 +116,7 @@ public: } uint64_t Mask = ((uint64_t)(-1) >> (64 - getFixupKindInfo(Kind).TargetSize)); - CurVal = (CurVal & ~Mask) | ((CurVal + Value) & Mask); + CurVal |= Value & Mask; // Write out the fixed up bytes back to the code/data bits. for (unsigned i = 0; i != NumBytes; ++i) { @@ -212,17 +208,28 @@ public: bool writeNopData(uint64_t Count, MCObjectWriter *OW) const { return true; } -}; +}; // class MipsAsmBackend } // namespace // MCAsmBackend -MCAsmBackend *llvm::createMipsAsmBackendEL(const Target &T, StringRef TT) { +MCAsmBackend *llvm::createMipsAsmBackendEL32(const Target &T, StringRef TT) { return new MipsAsmBackend(T, Triple(TT).getOS(), - /*IsLittle*/true); + /*IsLittle*/true, /*Is64Bit*/false); } -MCAsmBackend *llvm::createMipsAsmBackendEB(const Target &T, StringRef TT) { +MCAsmBackend *llvm::createMipsAsmBackendEB32(const Target &T, StringRef TT) { return new MipsAsmBackend(T, Triple(TT).getOS(), - /*IsLittle*/false); + /*IsLittle*/false, /*Is64Bit*/false); } + +MCAsmBackend *llvm::createMipsAsmBackendEL64(const Target &T, StringRef TT) { + return new MipsAsmBackend(T, Triple(TT).getOS(), + /*IsLittle*/true, /*Is64Bit*/true); +} + +MCAsmBackend *llvm::createMipsAsmBackendEB64(const Target &T, StringRef TT) { + return new MipsAsmBackend(T, Triple(TT).getOS(), + /*IsLittle*/false, /*Is64Bit*/true); +} + diff --git a/lib/Target/Mips/MCTargetDesc/MipsBaseInfo.h b/lib/Target/Mips/MCTargetDesc/MipsBaseInfo.h index 34e3a6e..fb1c5ce 100644 --- a/lib/Target/Mips/MCTargetDesc/MipsBaseInfo.h +++ b/lib/Target/Mips/MCTargetDesc/MipsBaseInfo.h @@ -14,7 +14,9 @@ #ifndef MIPSBASEINFO_H #define MIPSBASEINFO_H +#include "MipsFixupKinds.h" #include "MipsMCTargetDesc.h" +#include "llvm/MC/MCExpr.h" #include "llvm/Support/DataTypes.h" #include "llvm/Support/ErrorHandling.h" @@ -198,6 +200,34 @@ inline static unsigned getMipsRegisterNumbering(unsigned RegEnum) default: llvm_unreachable("Unknown register number!"); } } + +inline static std::pair<const MCSymbolRefExpr*, int64_t> +MipsGetSymAndOffset(const MCFixup &Fixup) { + MCFixupKind FixupKind = Fixup.getKind(); + + if ((FixupKind < FirstTargetFixupKind) || + (FixupKind >= MCFixupKind(Mips::LastTargetFixupKind))) + return std::make_pair((const MCSymbolRefExpr*)0, (int64_t)0); + + const MCExpr *Expr = Fixup.getValue(); + MCExpr::ExprKind Kind = Expr->getKind(); + + if (Kind == MCExpr::Binary) { + const MCBinaryExpr *BE = static_cast<const MCBinaryExpr*>(Expr); + const MCExpr *LHS = BE->getLHS(); + const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(BE->getRHS()); + + if ((LHS->getKind() != MCExpr::SymbolRef) || !CE) + return std::make_pair((const MCSymbolRefExpr*)0, (int64_t)0); + + return std::make_pair(cast<MCSymbolRefExpr>(LHS), CE->getValue()); + } + + if (Kind != MCExpr::SymbolRef) + return std::make_pair((const MCSymbolRefExpr*)0, (int64_t)0); + + return std::make_pair(cast<MCSymbolRefExpr>(Expr), 0); +} } #endif diff --git a/lib/Target/Mips/MCTargetDesc/MipsELFObjectWriter.cpp b/lib/Target/Mips/MCTargetDesc/MipsELFObjectWriter.cpp index 076a6a8..2091bec 100644 --- a/lib/Target/Mips/MCTargetDesc/MipsELFObjectWriter.cpp +++ b/lib/Target/Mips/MCTargetDesc/MipsELFObjectWriter.cpp @@ -7,20 +7,34 @@ // //===----------------------------------------------------------------------===// +#include "MCTargetDesc/MipsBaseInfo.h" #include "MCTargetDesc/MipsFixupKinds.h" #include "MCTargetDesc/MipsMCTargetDesc.h" +#include "llvm/MC/MCAssembler.h" #include "llvm/MC/MCELFObjectWriter.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCSection.h" #include "llvm/MC/MCValue.h" #include "llvm/Support/ErrorHandling.h" +#include <list> using namespace llvm; namespace { + struct RelEntry { + RelEntry(const ELFRelocationEntry &R, const MCSymbol *S, int64_t O) : + Reloc(R), Sym(S), Offset(O) {} + ELFRelocationEntry Reloc; + const MCSymbol *Sym; + int64_t Offset; + }; + + typedef std::list<RelEntry> RelLs; + typedef RelLs::iterator RelLsIter; + class MipsELFObjectWriter : public MCELFObjectTargetWriter { public: - MipsELFObjectWriter(uint8_t OSABI); + MipsELFObjectWriter(bool _is64Bit, uint8_t OSABI); virtual ~MipsELFObjectWriter(); @@ -33,18 +47,28 @@ namespace { const MCFragment &F, const MCFixup &Fixup, bool IsPCRel) const; + virtual void sortRelocs(const MCAssembler &Asm, + std::vector<ELFRelocationEntry> &Relocs); }; } -MipsELFObjectWriter::MipsELFObjectWriter(uint8_t OSABI) - : MCELFObjectTargetWriter(/*Is64Bit*/ false, OSABI, ELF::EM_MIPS, +MipsELFObjectWriter::MipsELFObjectWriter(bool _is64Bit, uint8_t OSABI) + : MCELFObjectTargetWriter(_is64Bit, OSABI, ELF::EM_MIPS, /*HasRelocationAddend*/ false) {} MipsELFObjectWriter::~MipsELFObjectWriter() {} -// FIXME: get the real EABI Version from the Triple. +// FIXME: get the real EABI Version from the Subtarget class. unsigned MipsELFObjectWriter::getEFlags() const { - return ELF::EF_MIPS_NOREORDER | ELF::EF_MIPS_ARCH_32R2; + + // FIXME: We can't tell if we are PIC (dynamic) or CPIC (static) + unsigned Flag = ELF::EF_MIPS_NOREORDER; + + if (is64Bit()) + Flag |= ELF::EF_MIPS_ARCH_64R2; + else + Flag |= ELF::EF_MIPS_ARCH_32R2; + return Flag; } const MCSymbol *MipsELFObjectWriter::ExplicitRelSym(const MCAssembler &Asm, @@ -129,8 +153,97 @@ unsigned MipsELFObjectWriter::GetRelocType(const MCValue &Target, return Type; } -MCObjectWriter *llvm::createMipsELFObjectWriter(raw_ostream &OS, uint8_t OSABI, - bool IsLittleEndian) { - MCELFObjectTargetWriter *MOTW = new MipsELFObjectWriter(OSABI); +// Return true if R is either a GOT16 against a local symbol or HI16. +static bool NeedsMatchingLo(const MCAssembler &Asm, const RelEntry &R) { + if (!R.Sym) + return false; + + MCSymbolData &SD = Asm.getSymbolData(R.Sym->AliasedSymbol()); + + return ((R.Reloc.Type == ELF::R_MIPS_GOT16) && !SD.isExternal()) || + (R.Reloc.Type == ELF::R_MIPS_HI16); +} + +static bool HasMatchingLo(const MCAssembler &Asm, RelLsIter I, RelLsIter Last) { + if (I == Last) + return false; + + RelLsIter Hi = I++; + + return (I->Reloc.Type == ELF::R_MIPS_LO16) && (Hi->Sym == I->Sym) && + (Hi->Offset == I->Offset); +} + +static bool HasSameSymbol(const RelEntry &R0, const RelEntry &R1) { + return R0.Sym == R1.Sym; +} + +static int CompareOffset(const RelEntry &R0, const RelEntry &R1) { + return (R0.Offset > R1.Offset) ? 1 : ((R0.Offset == R1.Offset) ? 0 : -1); +} + +void MipsELFObjectWriter::sortRelocs(const MCAssembler &Asm, + std::vector<ELFRelocationEntry> &Relocs) { + // Call the defualt function first. Relocations are sorted in descending + // order of r_offset. + MCELFObjectTargetWriter::sortRelocs(Asm, Relocs); + + RelLs RelocLs; + std::vector<RelLsIter> Unmatched; + + // Fill RelocLs. Traverse Relocs backwards so that relocations in RelocLs + // are in ascending order of r_offset. + for (std::vector<ELFRelocationEntry>::reverse_iterator R = Relocs.rbegin(); + R != Relocs.rend(); ++R) { + std::pair<const MCSymbolRefExpr*, int64_t> P = + MipsGetSymAndOffset(*R->Fixup); + RelocLs.push_back(RelEntry(*R, P.first ? &P.first->getSymbol() : 0, + P.second)); + } + + // Get list of unmatched HI16 and GOT16. + for (RelLsIter R = RelocLs.begin(); R != RelocLs.end(); ++R) + if (NeedsMatchingLo(Asm, *R) && !HasMatchingLo(Asm, R, --RelocLs.end())) + Unmatched.push_back(R); + + // Insert unmatched HI16 and GOT16 immediately before their matching LO16. + for (std::vector<RelLsIter>::iterator U = Unmatched.begin(); + U != Unmatched.end(); ++U) { + RelLsIter LoPos = RelocLs.end(), HiPos = *U; + bool MatchedLo = false; + + for (RelLsIter R = RelocLs.begin(); R != RelocLs.end(); ++R) { + if ((R->Reloc.Type == ELF::R_MIPS_LO16) && HasSameSymbol(*HiPos, *R) && + (CompareOffset(*R, *HiPos) >= 0) && + ((LoPos == RelocLs.end()) || ((CompareOffset(*R, *LoPos) < 0)) || + (!MatchedLo && !CompareOffset(*R, *LoPos)))) + LoPos = R; + + MatchedLo = NeedsMatchingLo(Asm, *R) && + HasMatchingLo(Asm, R, --RelocLs.end()); + } + + // If a matching LoPos was found, move HiPos and insert it before LoPos. + // Make the offsets of HiPos and LoPos match. + if (LoPos != RelocLs.end()) { + HiPos->Offset = LoPos->Offset; + RelocLs.insert(LoPos, *HiPos); + RelocLs.erase(HiPos); + } + } + + // Put the sorted list back in reverse order. + assert(Relocs.size() == RelocLs.size()); + unsigned I = RelocLs.size(); + + for (RelLsIter R = RelocLs.begin(); R != RelocLs.end(); ++R) + Relocs[--I] = R->Reloc; +} + +MCObjectWriter *llvm::createMipsELFObjectWriter(raw_ostream &OS, + uint8_t OSABI, + bool IsLittleEndian, + bool Is64Bit) { + MCELFObjectTargetWriter *MOTW = new MipsELFObjectWriter(Is64Bit, OSABI); return createELFObjectWriter(MOTW, OS, IsLittleEndian); } diff --git a/lib/Target/Mips/MCTargetDesc/MipsMCAsmInfo.h b/lib/Target/Mips/MCTargetDesc/MipsMCAsmInfo.h index ef4c6e2..e1d8789 100644 --- a/lib/Target/Mips/MCTargetDesc/MipsMCAsmInfo.h +++ b/lib/Target/Mips/MCTargetDesc/MipsMCAsmInfo.h @@ -14,10 +14,10 @@ #ifndef MIPSTARGETASMINFO_H #define MIPSTARGETASMINFO_H -#include "llvm/ADT/StringRef.h" #include "llvm/MC/MCAsmInfo.h" namespace llvm { + class StringRef; class Target; class MipsMCAsmInfo : public MCAsmInfo { diff --git a/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp b/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp index 9ebb6d2..27954b1 100644 --- a/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp +++ b/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp @@ -179,73 +179,71 @@ getMachineOpValue(const MCInst &MI, const MCOperand &MO, } else if (MO.isFPImm()) { return static_cast<unsigned>(APFloat(MO.getFPImm()) .bitcastToAPInt().getHiBits(32).getLimitedValue()); - } else if (MO.isExpr()) { - const MCExpr *Expr = MO.getExpr(); - MCExpr::ExprKind Kind = Expr->getKind(); - unsigned Ret = 0; - - if (Kind == MCExpr::Binary) { - const MCBinaryExpr *BE = static_cast<const MCBinaryExpr*>(Expr); - Expr = BE->getLHS(); - Kind = Expr->getKind(); - const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(BE->getRHS()); - assert((Kind == MCExpr::SymbolRef) && CE && - "Binary expression must be sym+const."); - Ret = CE->getValue(); - } + } + + // MO must be an Expr. + assert(MO.isExpr()); + + const MCExpr *Expr = MO.getExpr(); + MCExpr::ExprKind Kind = Expr->getKind(); - if (Kind == MCExpr::SymbolRef) { - Mips::Fixups FixupKind; - - switch(cast<MCSymbolRefExpr>(Expr)->getKind()) { - case MCSymbolRefExpr::VK_Mips_GPREL: - FixupKind = Mips::fixup_Mips_GPREL16; - break; - case MCSymbolRefExpr::VK_Mips_GOT_CALL: - FixupKind = Mips::fixup_Mips_CALL16; - break; - case MCSymbolRefExpr::VK_Mips_GOT16: - FixupKind = Mips::fixup_Mips_GOT_Global; - break; - case MCSymbolRefExpr::VK_Mips_GOT: - FixupKind = Mips::fixup_Mips_GOT_Local; - break; - case MCSymbolRefExpr::VK_Mips_ABS_HI: - FixupKind = Mips::fixup_Mips_HI16; - break; - case MCSymbolRefExpr::VK_Mips_ABS_LO: - FixupKind = Mips::fixup_Mips_LO16; - break; - case MCSymbolRefExpr::VK_Mips_TLSGD: - FixupKind = Mips::fixup_Mips_TLSGD; - break; - case MCSymbolRefExpr::VK_Mips_TLSLDM: - FixupKind = Mips::fixup_Mips_TLSLDM; - break; - case MCSymbolRefExpr::VK_Mips_DTPREL_HI: - FixupKind = Mips::fixup_Mips_DTPREL_HI; - break; - case MCSymbolRefExpr::VK_Mips_DTPREL_LO: - FixupKind = Mips::fixup_Mips_DTPREL_LO; - break; - case MCSymbolRefExpr::VK_Mips_GOTTPREL: - FixupKind = Mips::fixup_Mips_GOTTPREL; - break; - case MCSymbolRefExpr::VK_Mips_TPREL_HI: - FixupKind = Mips::fixup_Mips_TPREL_HI; - break; - case MCSymbolRefExpr::VK_Mips_TPREL_LO: - FixupKind = Mips::fixup_Mips_TPREL_LO; - break; - default: - return Ret; - } // switch - Fixups.push_back(MCFixup::Create(0, Expr, MCFixupKind(FixupKind))); - } // if SymbolRef - // All of the information is in the fixup. - return Ret; + if (Kind == MCExpr::Binary) { + Expr = static_cast<const MCBinaryExpr*>(Expr)->getLHS(); + Kind = Expr->getKind(); } - llvm_unreachable("Unable to encode MCOperand!"); + + assert (Kind == MCExpr::SymbolRef); + + Mips::Fixups FixupKind; + + switch(cast<MCSymbolRefExpr>(Expr)->getKind()) { + case MCSymbolRefExpr::VK_Mips_GPREL: + FixupKind = Mips::fixup_Mips_GPREL16; + break; + case MCSymbolRefExpr::VK_Mips_GOT_CALL: + FixupKind = Mips::fixup_Mips_CALL16; + break; + case MCSymbolRefExpr::VK_Mips_GOT16: + FixupKind = Mips::fixup_Mips_GOT_Global; + break; + case MCSymbolRefExpr::VK_Mips_GOT: + FixupKind = Mips::fixup_Mips_GOT_Local; + break; + case MCSymbolRefExpr::VK_Mips_ABS_HI: + FixupKind = Mips::fixup_Mips_HI16; + break; + case MCSymbolRefExpr::VK_Mips_ABS_LO: + FixupKind = Mips::fixup_Mips_LO16; + break; + case MCSymbolRefExpr::VK_Mips_TLSGD: + FixupKind = Mips::fixup_Mips_TLSGD; + break; + case MCSymbolRefExpr::VK_Mips_TLSLDM: + FixupKind = Mips::fixup_Mips_TLSLDM; + break; + case MCSymbolRefExpr::VK_Mips_DTPREL_HI: + FixupKind = Mips::fixup_Mips_DTPREL_HI; + break; + case MCSymbolRefExpr::VK_Mips_DTPREL_LO: + FixupKind = Mips::fixup_Mips_DTPREL_LO; + break; + case MCSymbolRefExpr::VK_Mips_GOTTPREL: + FixupKind = Mips::fixup_Mips_GOTTPREL; + break; + case MCSymbolRefExpr::VK_Mips_TPREL_HI: + FixupKind = Mips::fixup_Mips_TPREL_HI; + break; + case MCSymbolRefExpr::VK_Mips_TPREL_LO: + FixupKind = Mips::fixup_Mips_TPREL_LO; + break; + default: + break; + } // switch + + Fixups.push_back(MCFixup::Create(0, MO.getExpr(), MCFixupKind(FixupKind))); + + // All of the information is in the fixup. + return 0; } /// getMemEncoding - Return binary encoding of memory related operand. diff --git a/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.cpp b/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.cpp index 7652675..f634f08 100644 --- a/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.cpp +++ b/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.cpp @@ -34,6 +34,38 @@ using namespace llvm; +static std::string ParseMipsTriple(StringRef TT, StringRef CPU) { + std::string MipsArchFeature; + size_t DashPosition = 0; + StringRef TheTriple; + + // Let's see if there is a dash, like mips-unknown-linux. + DashPosition = TT.find('-'); + + if (DashPosition == StringRef::npos) { + // No dash, we check the string size. + TheTriple = TT.substr(0); + } else { + // We are only interested in substring before dash. + TheTriple = TT.substr(0,DashPosition); + } + + if (TheTriple == "mips" || TheTriple == "mipsel") { + if (CPU.empty() || CPU == "mips32") { + MipsArchFeature = "+mips32"; + } else if (CPU == "mips32r2") { + MipsArchFeature = "+mips32r2"; + } + } else { + if (CPU.empty() || CPU == "mips64") { + MipsArchFeature = "+mips64"; + } else if (CPU == "mips64r2") { + MipsArchFeature = "+mips64r2"; + } + } + return MipsArchFeature; +} + static MCInstrInfo *createMipsMCInstrInfo() { MCInstrInfo *X = new MCInstrInfo(); InitMipsMCInstrInfo(X); @@ -48,8 +80,15 @@ static MCRegisterInfo *createMipsMCRegisterInfo(StringRef TT) { static MCSubtargetInfo *createMipsMCSubtargetInfo(StringRef TT, StringRef CPU, StringRef FS) { + std::string ArchFS = ParseMipsTriple(TT,CPU); + if (!FS.empty()) { + if (!ArchFS.empty()) + ArchFS = ArchFS + "," + FS.str(); + else + ArchFS = FS; + } MCSubtargetInfo *X = new MCSubtargetInfo(); - InitMipsMCSubtargetInfo(X, TT, CPU, FS); + InitMipsMCSubtargetInfo(X, TT, CPU, ArchFS); return X; } @@ -67,7 +106,9 @@ static MCCodeGenInfo *createMipsMCCodeGenInfo(StringRef TT, Reloc::Model RM, CodeModel::Model CM, CodeGenOpt::Level OL) { MCCodeGenInfo *X = new MCCodeGenInfo(); - if (RM == Reloc::Default) + if (CM == CodeModel::JITDefault) + RM = Reloc::Static; + else if (RM == Reloc::Default) RM = Reloc::PIC_; X->InitMCCodeGenInfo(RM, CM, OL); return X; @@ -76,9 +117,10 @@ static MCCodeGenInfo *createMipsMCCodeGenInfo(StringRef TT, Reloc::Model RM, static MCInstPrinter *createMipsMCInstPrinter(const Target &T, unsigned SyntaxVariant, const MCAsmInfo &MAI, + const MCInstrInfo &MII, const MCRegisterInfo &MRI, const MCSubtargetInfo &STI) { - return new MipsInstPrinter(MAI, MRI); + return new MipsInstPrinter(MAI, MII, MRI); } static MCStreamer *createMCStreamer(const Target &T, StringRef TT, @@ -142,13 +184,13 @@ extern "C" void LLVMInitializeMipsTargetMC() { // Register the asm backend. TargetRegistry::RegisterMCAsmBackend(TheMipsTarget, - createMipsAsmBackendEB); + createMipsAsmBackendEB32); TargetRegistry::RegisterMCAsmBackend(TheMipselTarget, - createMipsAsmBackendEL); + createMipsAsmBackendEL32); TargetRegistry::RegisterMCAsmBackend(TheMips64Target, - createMipsAsmBackendEB); + createMipsAsmBackendEB64); TargetRegistry::RegisterMCAsmBackend(TheMips64elTarget, - createMipsAsmBackendEL); + createMipsAsmBackendEL64); // Register the MC subtarget info. TargetRegistry::RegisterMCSubtargetInfo(TheMipsTarget, diff --git a/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.h b/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.h index 2e58f9d..547ccdd 100644 --- a/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.h +++ b/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.h @@ -39,12 +39,15 @@ MCCodeEmitter *createMipsMCCodeEmitterEL(const MCInstrInfo &MCII, const MCSubtargetInfo &STI, MCContext &Ctx); -MCAsmBackend *createMipsAsmBackendEB(const Target &T, StringRef TT); -MCAsmBackend *createMipsAsmBackendEL(const Target &T, StringRef TT); +MCAsmBackend *createMipsAsmBackendEB32(const Target &T, StringRef TT); +MCAsmBackend *createMipsAsmBackendEL32(const Target &T, StringRef TT); +MCAsmBackend *createMipsAsmBackendEB64(const Target &T, StringRef TT); +MCAsmBackend *createMipsAsmBackendEL64(const Target &T, StringRef TT); MCObjectWriter *createMipsELFObjectWriter(raw_ostream &OS, uint8_t OSABI, - bool IsLittleEndian); + bool IsLittleEndian, + bool Is64Bit); } // End llvm namespace // Defines symbolic names for Mips registers. This defines a mapping from diff --git a/lib/Target/Mips/Makefile b/lib/Target/Mips/Makefile index 168635c..596f071 100644 --- a/lib/Target/Mips/Makefile +++ b/lib/Target/Mips/Makefile @@ -15,9 +15,9 @@ TARGET = Mips BUILT_SOURCES = MipsGenRegisterInfo.inc MipsGenInstrInfo.inc \ MipsGenAsmWriter.inc MipsGenCodeEmitter.inc \ MipsGenDAGISel.inc MipsGenCallingConv.inc \ - MipsGenSubtargetInfo.inc MipsGenMCCodeEmitter.inc - -DIRS = InstPrinter AsmParser TargetInfo MCTargetDesc + MipsGenSubtargetInfo.inc MipsGenMCCodeEmitter.inc \ + MipsGenEDInfo.inc MipsGenDisassemblerTables.inc +DIRS = InstPrinter Disassembler AsmParser TargetInfo MCTargetDesc include $(LEVEL)/Makefile.common diff --git a/lib/Target/Mips/Mips64InstrInfo.td b/lib/Target/Mips/Mips64InstrInfo.td index 427e8d9..0382869 100644 --- a/lib/Target/Mips/Mips64InstrInfo.td +++ b/lib/Target/Mips/Mips64InstrInfo.td @@ -36,6 +36,7 @@ def immZExt6 : ImmLeaf<i32, [{return Imm == (Imm & 0x3f);}]>; //===----------------------------------------------------------------------===// // Shifts // 64-bit shift instructions. +let DecoderNamespace = "Mips64" in { class shift_rotate_imm64<bits<6> func, bits<5> isRotate, string instr_asm, SDNode OpNode>: shift_rotate_imm<func, isRotate, instr_asm, OpNode, immZExt6, shamt, @@ -49,16 +50,21 @@ class Div64<SDNode op, bits<6> func, string instr_asm, InstrItinClass itin>: multiclass Atomic2Ops64<PatFrag Op, string Opstr> { def #NAME# : Atomic2Ops<Op, Opstr, CPU64Regs, CPURegs>, Requires<[NotN64]>; - def _P8 : Atomic2Ops<Op, Opstr, CPU64Regs, CPU64Regs>, Requires<[IsN64]>; + def _P8 : Atomic2Ops<Op, Opstr, CPU64Regs, CPU64Regs>, Requires<[IsN64]> { + let isCodeGenOnly = 1; + } } multiclass AtomicCmpSwap64<PatFrag Op, string Width> { def #NAME# : AtomicCmpSwap<Op, Width, CPU64Regs, CPURegs>, Requires<[NotN64]>; def _P8 : AtomicCmpSwap<Op, Width, CPU64Regs, CPU64Regs>, - Requires<[IsN64]>; + Requires<[IsN64]> { + let isCodeGenOnly = 1; + } } - -let usesCustomInserter = 1, Predicates = [HasMips64] in { +} +let usesCustomInserter = 1, Predicates = [HasMips64], + DecoderNamespace = "Mips64" in { defm ATOMIC_LOAD_ADD_I64 : Atomic2Ops64<atomic_load_add_64, "load_add_64">; defm ATOMIC_LOAD_SUB_I64 : Atomic2Ops64<atomic_load_sub_64, "load_sub_64">; defm ATOMIC_LOAD_AND_I64 : Atomic2Ops64<atomic_load_and_64, "load_and_64">; @@ -72,7 +78,7 @@ let usesCustomInserter = 1, Predicates = [HasMips64] in { //===----------------------------------------------------------------------===// // Instruction definition //===----------------------------------------------------------------------===// - +let DecoderNamespace = "Mips64" in { /// Arithmetic Instructions (ALU Immediate) def DADDiu : ArithLogicI<0x19, "daddiu", add, simm16_64, immSExt16, CPU64Regs>; @@ -97,16 +103,17 @@ def NOR64 : LogicNOR<0x00, 0x27, "nor", CPU64Regs>; def DSLL : shift_rotate_imm64<0x38, 0x00, "dsll", shl>; def DSRL : shift_rotate_imm64<0x3a, 0x00, "dsrl", srl>; def DSRA : shift_rotate_imm64<0x3b, 0x00, "dsra", sra>; -def DSLLV : shift_rotate_reg<0x24, 0x00, "dsllv", shl, CPU64Regs>; -def DSRLV : shift_rotate_reg<0x26, 0x00, "dsrlv", srl, CPU64Regs>; -def DSRAV : shift_rotate_reg<0x27, 0x00, "dsrav", sra, CPU64Regs>; - +def DSLLV : shift_rotate_reg<0x14, 0x00, "dsllv", shl, CPU64Regs>; +def DSRLV : shift_rotate_reg<0x16, 0x00, "dsrlv", srl, CPU64Regs>; +def DSRAV : shift_rotate_reg<0x17, 0x00, "dsrav", sra, CPU64Regs>; +} // Rotate Instructions -let Predicates = [HasMips64r2] in { +let Predicates = [HasMips64r2], DecoderNamespace = "Mips64" in { def DROTR : shift_rotate_imm64<0x3a, 0x01, "drotr", rotr>; def DROTRV : shift_rotate_reg<0x16, 0x01, "drotrv", rotr, CPU64Regs>; } +let DecoderNamespace = "Mips64" in { /// Load and Store Instructions /// aligned defm LB64 : LoadM64<0x20, "lb", sextloadi8>; @@ -132,9 +139,13 @@ defm USD : StoreM64<0x3f, "usd", store_u, 1>; /// Load-linked, Store-conditional def LLD : LLBase<0x34, "lld", CPU64Regs, mem>, Requires<[NotN64]>; -def LLD_P8 : LLBase<0x34, "lld", CPU64Regs, mem64>, Requires<[IsN64]>; +def LLD_P8 : LLBase<0x34, "lld", CPU64Regs, mem64>, Requires<[IsN64]> { + let isCodeGenOnly = 1; +} def SCD : SCBase<0x3c, "scd", CPU64Regs, mem>, Requires<[NotN64]>; -def SCD_P8 : SCBase<0x3c, "scd", CPU64Regs, mem64>, Requires<[IsN64]>; +def SCD_P8 : SCBase<0x3c, "scd", CPU64Regs, mem64>, Requires<[IsN64]> { + let isCodeGenOnly = 1; +} /// Jump and Branch Instructions def JR64 : JumpFR<0x00, 0x08, "jr", CPU64Regs>; @@ -142,11 +153,13 @@ def BEQ64 : CBranch<0x04, "beq", seteq, CPU64Regs>; def BNE64 : CBranch<0x05, "bne", setne, CPU64Regs>; def BGEZ64 : CBranchZero<0x01, 1, "bgez", setge, CPU64Regs>; def BGTZ64 : CBranchZero<0x07, 0, "bgtz", setgt, CPU64Regs>; -def BLEZ64 : CBranchZero<0x07, 0, "blez", setle, CPU64Regs>; +def BLEZ64 : CBranchZero<0x06, 0, "blez", setle, CPU64Regs>; def BLTZ64 : CBranchZero<0x01, 0, "bltz", setlt, CPU64Regs>; - +} +let DecoderNamespace = "Mips64" in def JALR64 : JumpLinkReg<0x00, 0x09, "jalr", CPU64Regs>; +let DecoderNamespace = "Mips64" in { /// Multiply and Divide Instructions. def DMULT : Mult64<0x1c, "dmult", IIImul>; def DMULTu : Mult64<0x1d, "dmultu", IIImul>; @@ -171,11 +184,13 @@ def DSBH : SubwordSwap<0x24, 0x2, "dsbh", CPU64Regs>; def DSHD : SubwordSwap<0x24, 0x5, "dshd", CPU64Regs>; def LEA_ADDiu64 : EffectiveAddress<"daddiu\t$rt, $addr", CPU64Regs, mem_ea_64>; - -let Uses = [SP_64] in +} +let Uses = [SP_64], DecoderNamespace = "Mips64" in def DynAlloc64 : EffectiveAddress<"daddiu\t$rt, $addr", CPU64Regs, mem_ea_64>, - Requires<[IsN64]>; - + Requires<[IsN64]> { + let isCodeGenOnly = 1; +} +let DecoderNamespace = "Mips64" in { def RDHWR64 : ReadHardware<CPU64Regs, HWRegs64>; def DEXT : ExtBase<3, "dext", CPU64Regs>; @@ -183,12 +198,12 @@ def DINS : InsBase<7, "dins", CPU64Regs>; def DSLL64_32 : FR<0x3c, 0x00, (outs CPU64Regs:$rd), (ins CPURegs:$rt), "dsll\t$rd, $rt, 32", [], IIAlu>; - def SLL64_32 : FR<0x0, 0x00, (outs CPU64Regs:$rd), (ins CPURegs:$rt), "sll\t$rd, $rt, 0", [], IIAlu>; +let isCodeGenOnly = 1 in def SLL64_64 : FR<0x0, 0x00, (outs CPU64Regs:$rd), (ins CPU64Regs:$rt), "sll\t$rd, $rt, 0", [], IIAlu>; - +} //===----------------------------------------------------------------------===// // Arbitrary patterns that map to one or more instructions //===----------------------------------------------------------------------===// diff --git a/lib/Target/Mips/MipsAsmPrinter.cpp b/lib/Target/Mips/MipsAsmPrinter.cpp index f2b842a..8206cfc 100644 --- a/lib/Target/Mips/MipsAsmPrinter.cpp +++ b/lib/Target/Mips/MipsAsmPrinter.cpp @@ -16,8 +16,6 @@ #include "MipsAsmPrinter.h" #include "Mips.h" #include "MipsInstrInfo.h" -#include "MipsMachineFunction.h" -#include "MipsMCInstLower.h" #include "InstPrinter/MipsInstPrinter.h" #include "MCTargetDesc/MipsBaseInfo.h" #include "llvm/ADT/SmallString.h" @@ -45,24 +43,23 @@ using namespace llvm; -static bool isUnalignedLoadStore(unsigned Opc) { - return Opc == Mips::ULW || Opc == Mips::ULH || Opc == Mips::ULHu || - Opc == Mips::USW || Opc == Mips::USH || - Opc == Mips::ULW_P8 || Opc == Mips::ULH_P8 || Opc == Mips::ULHu_P8 || - Opc == Mips::USW_P8 || Opc == Mips::USH_P8 || - Opc == Mips::ULD || Opc == Mips::ULW64 || Opc == Mips::ULH64 || - Opc == Mips::ULHu64 || Opc == Mips::USD || Opc == Mips::USW64 || - Opc == Mips::USH64 || - Opc == Mips::ULD_P8 || Opc == Mips::ULW64_P8 || - Opc == Mips::ULH64_P8 || Opc == Mips::ULHu64_P8 || - Opc == Mips::USD_P8 || Opc == Mips::USW64_P8 || - Opc == Mips::USH64_P8; +void MipsAsmPrinter::EmitInstrWithMacroNoAT(const MachineInstr *MI) { + MCInst TmpInst; + + MCInstLowering.Lower(MI, TmpInst); + OutStreamer.EmitRawText(StringRef("\t.set\tmacro")); + if (MipsFI->getEmitNOAT()) + OutStreamer.EmitRawText(StringRef("\t.set\tat")); + OutStreamer.EmitInstruction(TmpInst); + if (MipsFI->getEmitNOAT()) + OutStreamer.EmitRawText(StringRef("\t.set\tnoat")); + OutStreamer.EmitRawText(StringRef("\t.set\tnomacro")); } -static bool isDirective(unsigned Opc) { - return Opc == Mips::MACRO || Opc == Mips::NOMACRO || - Opc == Mips::REORDER || Opc == Mips::NOREORDER || - Opc == Mips::ATMACRO || Opc == Mips::NOAT; +bool MipsAsmPrinter::runOnMachineFunction(MachineFunction &MF) { + MipsFI = MF.getInfo<MipsFunctionInfo>(); + AsmPrinter::runOnMachineFunction(MF); + return true; } void MipsAsmPrinter::EmitInstruction(const MachineInstr *MI) { @@ -74,49 +71,70 @@ void MipsAsmPrinter::EmitInstruction(const MachineInstr *MI) { return; } - MipsMCInstLower MCInstLowering(Mang, *MF, *this); unsigned Opc = MI->getOpcode(); MCInst TmpInst0; SmallVector<MCInst, 4> MCInsts; - MCInstLowering.Lower(MI, TmpInst0); - - if (!OutStreamer.hasRawTextSupport() && isDirective(Opc)) - return; - // Enclose unaligned load or store with .macro & .nomacro directives. - if (isUnalignedLoadStore(Opc)) { + switch (Opc) { + case Mips::ULW: + case Mips::ULH: + case Mips::ULHu: + case Mips::USW: + case Mips::USH: + case Mips::ULW_P8: + case Mips::ULH_P8: + case Mips::ULHu_P8: + case Mips::USW_P8: + case Mips::USH_P8: + case Mips::ULD: + case Mips::ULW64: + case Mips::ULH64: + case Mips::ULHu64: + case Mips::USD: + case Mips::USW64: + case Mips::USH64: + case Mips::ULD_P8: + case Mips::ULW64_P8: + case Mips::ULH64_P8: + case Mips::ULHu64_P8: + case Mips::USD_P8: + case Mips::USW64_P8: + case Mips::USH64_P8: { if (OutStreamer.hasRawTextSupport()) { - MCInst Directive; - Directive.setOpcode(Mips::MACRO); - OutStreamer.EmitInstruction(Directive); - OutStreamer.EmitInstruction(TmpInst0); - Directive.setOpcode(Mips::NOMACRO); - OutStreamer.EmitInstruction(Directive); - } else { - MCInstLowering.LowerUnalignedLoadStore(MI, MCInsts); - for (SmallVector<MCInst, 4>::iterator I = MCInsts.begin(); I - != MCInsts.end(); ++I) - OutStreamer.EmitInstruction(*I); + EmitInstrWithMacroNoAT(MI); + return; } + + MCInstLowering.LowerUnalignedLoadStore(MI, MCInsts); + for (SmallVector<MCInst, 4>::iterator I = MCInsts.begin(); I + != MCInsts.end(); ++I) + OutStreamer.EmitInstruction(*I); + return; } + case Mips::CPRESTORE: { + const MachineOperand &MO = MI->getOperand(0); + assert(MO.isImm() && "CPRESTORE's operand must be an immediate."); + int64_t Offset = MO.getImm(); - if (!OutStreamer.hasRawTextSupport()) { - // Lower CPLOAD and CPRESTORE - if (Opc == Mips::CPLOAD) - MCInstLowering.LowerCPLOAD(MI, MCInsts); - else if (Opc == Mips::CPRESTORE) - MCInstLowering.LowerCPRESTORE(MI, MCInsts); + if (OutStreamer.hasRawTextSupport()) { + if (!isInt<16>(Offset)) { + EmitInstrWithMacroNoAT(MI); + return; + } + } else { + MCInstLowering.LowerCPRESTORE(Offset, MCInsts); - if (!MCInsts.empty()) { for (SmallVector<MCInst, 4>::iterator I = MCInsts.begin(); I != MCInsts.end(); ++I) OutStreamer.EmitInstruction(*I); + return; } - } - if (Opc == Mips::SETGP01) { + break; + } + case Mips::SETGP01: { MCInstLowering.LowerSETGP01(MI, MCInsts); for (SmallVector<MCInst, 4>::iterator I = MCInsts.begin(); @@ -125,7 +143,11 @@ void MipsAsmPrinter::EmitInstruction(const MachineInstr *MI) { return; } + default: + break; + } + MCInstLowering.Lower(MI, TmpInst0); OutStreamer.EmitInstruction(TmpInst0); } @@ -269,13 +291,35 @@ void MipsAsmPrinter::EmitFunctionEntryLabel() { /// EmitFunctionBodyStart - Targets can override this to emit stuff before /// the first basic block in the function. void MipsAsmPrinter::EmitFunctionBodyStart() { + MCInstLowering.Initialize(Mang, &MF->getContext()); + emitFrameDirective(); + bool EmitCPLoad = (MF->getTarget().getRelocationModel() == Reloc::PIC_) && + Subtarget->isABI_O32() && MipsFI->globalBaseRegSet() && + MipsFI->globalBaseRegFixed(); + if (OutStreamer.hasRawTextSupport()) { SmallString<128> Str; raw_svector_ostream OS(Str); printSavedRegsBitmask(OS); OutStreamer.EmitRawText(OS.str()); + + OutStreamer.EmitRawText(StringRef("\t.set\tnoreorder")); + + // Emit .cpload directive if needed. + if (EmitCPLoad) + OutStreamer.EmitRawText(StringRef("\t.cpload\t$25")); + + OutStreamer.EmitRawText(StringRef("\t.set\tnomacro")); + if (MipsFI->getEmitNOAT()) + OutStreamer.EmitRawText(StringRef("\t.set\tnoat")); + } else if (EmitCPLoad) { + SmallVector<MCInst, 4> MCInsts; + MCInstLowering.LowerCPLOAD(MCInsts); + for (SmallVector<MCInst, 4>::iterator I = MCInsts.begin(); + I != MCInsts.end(); ++I) + OutStreamer.EmitInstruction(*I); } } @@ -286,6 +330,9 @@ void MipsAsmPrinter::EmitFunctionBodyEnd() { // always be at the function end, and we can't emit and // break with BB logic. if (OutStreamer.hasRawTextSupport()) { + if (MipsFI->getEmitNOAT()) + OutStreamer.EmitRawText(StringRef("\t.set\tat")); + OutStreamer.EmitRawText(StringRef("\t.set\tmacro")); OutStreamer.EmitRawText(StringRef("\t.set\treorder")); OutStreamer.EmitRawText("\t.end\t" + Twine(CurrentFnSym->getName())); diff --git a/lib/Target/Mips/MipsAsmPrinter.h b/lib/Target/Mips/MipsAsmPrinter.h index 473da7e..562bf9c 100644 --- a/lib/Target/Mips/MipsAsmPrinter.h +++ b/lib/Target/Mips/MipsAsmPrinter.h @@ -14,6 +14,8 @@ #ifndef MIPSASMPRINTER_H #define MIPSASMPRINTER_H +#include "MipsMachineFunction.h" +#include "MipsMCInstLower.h" #include "MipsSubtarget.h" #include "llvm/CodeGen/AsmPrinter.h" #include "llvm/Support/Compiler.h" @@ -28,12 +30,16 @@ class raw_ostream; class LLVM_LIBRARY_VISIBILITY MipsAsmPrinter : public AsmPrinter { + void EmitInstrWithMacroNoAT(const MachineInstr *MI); + public: const MipsSubtarget *Subtarget; + const MipsFunctionInfo *MipsFI; + MipsMCInstLower MCInstLowering; explicit MipsAsmPrinter(TargetMachine &TM, MCStreamer &Streamer) - : AsmPrinter(TM, Streamer) { + : AsmPrinter(TM, Streamer), MCInstLowering(*this) { Subtarget = &TM.getSubtarget<MipsSubtarget>(); } @@ -41,6 +47,8 @@ public: return "Mips Assembly Printer"; } + virtual bool runOnMachineFunction(MachineFunction &MF); + void EmitInstruction(const MachineInstr *MI); void printSavedRegsBitmask(raw_ostream &O); void printHex32(unsigned int Value, raw_ostream &O); diff --git a/lib/Target/Mips/MipsCondMov.td b/lib/Target/Mips/MipsCondMov.td index 075a3e8..da33680 100644 --- a/lib/Target/Mips/MipsCondMov.td +++ b/lib/Target/Mips/MipsCondMov.td @@ -95,45 +95,65 @@ multiclass MovnPats<RegisterClass CRC, RegisterClass DRC, Instruction MOVNInst, // Instantiation of instructions. def MOVZ_I_I : CondMovIntInt<CPURegs, CPURegs, 0x0a, "movz">; -let Predicates = [HasMips64] in { +let Predicates = [HasMips64],DecoderNamespace = "Mips64" in { def MOVZ_I_I64 : CondMovIntInt<CPURegs, CPU64Regs, 0x0a, "movz">; - def MOVZ_I64_I : CondMovIntInt<CPU64Regs, CPURegs, 0x0a, "movz">; - def MOVZ_I64_I64 : CondMovIntInt<CPU64Regs, CPU64Regs, 0x0a, "movz">; + def MOVZ_I64_I : CondMovIntInt<CPU64Regs, CPURegs, 0x0a, "movz"> { + let isCodeGenOnly = 1; + } + def MOVZ_I64_I64 : CondMovIntInt<CPU64Regs, CPU64Regs, 0x0a, "movz"> { + let isCodeGenOnly = 1; + } } def MOVN_I_I : CondMovIntInt<CPURegs, CPURegs, 0x0b, "movn">; -let Predicates = [HasMips64] in { +let Predicates = [HasMips64],DecoderNamespace = "Mips64" in { def MOVN_I_I64 : CondMovIntInt<CPURegs, CPU64Regs, 0x0b, "movn">; - def MOVN_I64_I : CondMovIntInt<CPU64Regs, CPURegs, 0x0b, "movn">; - def MOVN_I64_I64 : CondMovIntInt<CPU64Regs, CPU64Regs, 0x0b, "movn">; + def MOVN_I64_I : CondMovIntInt<CPU64Regs, CPURegs, 0x0b, "movn"> { + let isCodeGenOnly = 1; + } + def MOVN_I64_I64 : CondMovIntInt<CPU64Regs, CPU64Regs, 0x0b, "movn"> { + let isCodeGenOnly = 1; + } } def MOVZ_I_S : CondMovIntFP<CPURegs, FGR32, 16, 18, "movz.s">; def MOVZ_I64_S : CondMovIntFP<CPU64Regs, FGR32, 16, 18, "movz.s">, - Requires<[HasMips64]>; + Requires<[HasMips64]> { + let DecoderNamespace = "Mips64"; +} def MOVN_I_S : CondMovIntFP<CPURegs, FGR32, 16, 19, "movn.s">; def MOVN_I64_S : CondMovIntFP<CPU64Regs, FGR32, 16, 19, "movn.s">, - Requires<[HasMips64]>; + Requires<[HasMips64]> { + let DecoderNamespace = "Mips64"; +} let Predicates = [NotFP64bit] in { def MOVZ_I_D32 : CondMovIntFP<CPURegs, AFGR64, 17, 18, "movz.d">; def MOVN_I_D32 : CondMovIntFP<CPURegs, AFGR64, 17, 19, "movn.d">; } -let Predicates = [IsFP64bit] in { +let Predicates = [IsFP64bit],DecoderNamespace = "Mips64" in { def MOVZ_I_D64 : CondMovIntFP<CPURegs, FGR64, 17, 18, "movz.d">; - def MOVZ_I64_D64 : CondMovIntFP<CPU64Regs, FGR64, 17, 18, "movz.d">; + def MOVZ_I64_D64 : CondMovIntFP<CPU64Regs, FGR64, 17, 18, "movz.d"> { + let isCodeGenOnly = 1; + } def MOVN_I_D64 : CondMovIntFP<CPURegs, FGR64, 17, 19, "movn.d">; - def MOVN_I64_D64 : CondMovIntFP<CPU64Regs, FGR64, 17, 19, "movn.d">; + def MOVN_I64_D64 : CondMovIntFP<CPU64Regs, FGR64, 17, 19, "movn.d"> { + let isCodeGenOnly = 1; + } } def MOVT_I : CondMovFPInt<CPURegs, MipsCMovFP_T, 1, "movt">; def MOVT_I64 : CondMovFPInt<CPU64Regs, MipsCMovFP_T, 1, "movt">, - Requires<[HasMips64]>; + Requires<[HasMips64]> { + let DecoderNamespace = "Mips64"; +} def MOVF_I : CondMovFPInt<CPURegs, MipsCMovFP_F, 0, "movf">; def MOVF_I64 : CondMovFPInt<CPU64Regs, MipsCMovFP_F, 0, "movf">, - Requires<[HasMips64]>; + Requires<[HasMips64]> { + let DecoderNamespace = "Mips64"; +} def MOVT_S : CondMovFPFP<FGR32, MipsCMovFP_T, 16, 1, "movt.s">; def MOVF_S : CondMovFPFP<FGR32, MipsCMovFP_F, 16, 0, "movf.s">; @@ -142,7 +162,7 @@ let Predicates = [NotFP64bit] in { def MOVT_D32 : CondMovFPFP<AFGR64, MipsCMovFP_T, 17, 1, "movt.d">; def MOVF_D32 : CondMovFPFP<AFGR64, MipsCMovFP_F, 17, 0, "movf.d">; } -let Predicates = [IsFP64bit] in { +let Predicates = [IsFP64bit], DecoderNamespace = "Mips64" in { def MOVT_D64 : CondMovFPFP<FGR64, MipsCMovFP_T, 17, 1, "movt.d">; def MOVF_D64 : CondMovFPFP<FGR64, MipsCMovFP_F, 17, 0, "movf.d">; } diff --git a/lib/Target/Mips/MipsFrameLowering.cpp b/lib/Target/Mips/MipsFrameLowering.cpp index ebfbb4a..f8ea3d0 100644 --- a/lib/Target/Mips/MipsFrameLowering.cpp +++ b/lib/Target/Mips/MipsFrameLowering.cpp @@ -108,9 +108,6 @@ static void expandLargeImm(unsigned Reg, int64_t Imm, bool IsN64, AnalyzeImm.Analyze(Imm, IsN64 ? 64 : 32, false /* LastInstrIsADDiu */); MipsAnalyzeImmediate::InstSeq::const_iterator Inst = Seq.begin(); - // FIXME: change this when mips goes MC". - BuildMI(MBB, II, DL, TII.get(Mips::NOAT)); - // The first instruction can be a LUi, which is different from other // instructions (ADDiu, ORI and SLL) in that it does not have a register // operand. @@ -127,7 +124,6 @@ static void expandLargeImm(unsigned Reg, int64_t Imm, bool IsN64, .addImm(SignExtend64<16>(Inst->ImmOpnd)); BuildMI(MBB, II, DL, TII.get(ADDu), Reg).addReg(Reg).addReg(ATReg); - BuildMI(MBB, II, DL, TII.get(Mips::ATMACRO)); } void MipsFrameLowering::emitPrologue(MachineFunction &MF) const { @@ -159,18 +155,22 @@ void MipsFrameLowering::emitPrologue(MachineFunction &MF) const { // Update stack size MFI->setStackSize(StackSize); - BuildMI(MBB, MBBI, dl, TII.get(Mips::NOREORDER)); - BuildMI(MBB, MBBI, dl, TII.get(Mips::NOMACRO)); - // Emit instructions that set the global base register if the target ABI is // O32. - if (isPIC && MipsFI->globalBaseRegSet() && STI.isABI_O32()) { - if (MipsFI->globalBaseRegFixed()) - BuildMI(MBB, llvm::prior(MBBI), dl, TII.get(Mips::CPLOAD)) - .addReg(RegInfo->getPICCallReg()); - else + if (isPIC && MipsFI->globalBaseRegSet() && STI.isABI_O32() && + !MipsFI->globalBaseRegFixed()) { // See MipsInstrInfo.td for explanation. - BuildMI(MBB, MBBI, dl, TII.get(Mips:: SETGP01), Mips::V0); + MachineBasicBlock *NewEntry = MF.CreateMachineBasicBlock(); + MF.insert(&MBB, NewEntry); + NewEntry->addSuccessor(&MBB); + + // Copy live in registers. + for (MachineBasicBlock::livein_iterator R = MBB.livein_begin(); + R != MBB.livein_end(); ++R) + NewEntry->addLiveIn(*R); + + BuildMI(*NewEntry, NewEntry->begin(), dl, TII.get(Mips:: SETGP01), + Mips::V0); } // No need to allocate space on the stack. @@ -183,8 +183,10 @@ void MipsFrameLowering::emitPrologue(MachineFunction &MF) const { // Adjust stack. if (isInt<16>(-StackSize)) // addi sp, sp, (-stacksize) BuildMI(MBB, MBBI, dl, TII.get(ADDiu), SP).addReg(SP).addImm(-StackSize); - else // Expand immediate that doesn't fit in 16-bit. + else { // Expand immediate that doesn't fit in 16-bit. + MipsFI->setEmitNOAT(); expandLargeImm(SP, -StackSize, STI.isABI_N64(), TII, MBB, MBBI, dl); + } // emit ".cfi_def_cfa_offset StackSize" MCSymbol *AdjustSPLabel = MMI.getContext().CreateTempSymbol(); @@ -254,12 +256,8 @@ void MipsFrameLowering::emitPrologue(MachineFunction &MF) const { // Restore GP from the saved stack location if (MipsFI->needGPSaveRestore()) { unsigned Offset = MFI->getObjectOffset(MipsFI->getGPFI()); - BuildMI(MBB, MBBI, dl, TII.get(Mips::CPRESTORE)).addImm(Offset); - - if (Offset >= 0x8000) { - BuildMI(MBB, llvm::prior(MBBI), dl, TII.get(Mips::MACRO)); - BuildMI(MBB, MBBI, dl, TII.get(Mips::NOMACRO)); - } + BuildMI(MBB, MBBI, dl, TII.get(Mips::CPRESTORE)).addImm(Offset) + .addReg(Mips::GP); } } diff --git a/lib/Target/Mips/MipsISelDAGToDAG.cpp b/lib/Target/Mips/MipsISelDAGToDAG.cpp index 536879e..f0651c6 100644 --- a/lib/Target/Mips/MipsISelDAGToDAG.cpp +++ b/lib/Target/Mips/MipsISelDAGToDAG.cpp @@ -142,6 +142,7 @@ void MipsDAGToDAGISel::InitGlobalBaseReg(MachineFunction &MF) { if (Subtarget.isABI_N64()) { MF.getRegInfo().addLiveIn(Mips::T9_64); + MBB.addLiveIn(Mips::T9_64); // lui $v0, %hi(%neg(%gp_rel(fname))) // daddu $v1, $v0, $t9 @@ -163,6 +164,7 @@ void MipsDAGToDAGISel::InitGlobalBaseReg(MachineFunction &MF) { .addExternalSymbol("__gnu_local_gp", MipsII::MO_ABS_LO); } else { MF.getRegInfo().addLiveIn(Mips::T9); + MBB.addLiveIn(Mips::T9); if (Subtarget.isABI_N32()) { // lui $v0, %hi(%neg(%gp_rel(fname))) diff --git a/lib/Target/Mips/MipsISelLowering.cpp b/lib/Target/Mips/MipsISelLowering.cpp index ecde5b6..6a23bc3 100644 --- a/lib/Target/Mips/MipsISelLowering.cpp +++ b/lib/Target/Mips/MipsISelLowering.cpp @@ -147,6 +147,11 @@ MipsTargetLowering(MipsTargetMachine &TM) setOperationAction(ISD::MEMBARRIER, MVT::Other, Custom); setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Custom); + if (!TM.Options.NoNaNsFPMath) { + setOperationAction(ISD::FABS, MVT::f32, Custom); + setOperationAction(ISD::FABS, MVT::f64, Custom); + } + if (HasMips64) { setOperationAction(ISD::GlobalAddress, MVT::i64, Custom); setOperationAction(ISD::BlockAddress, MVT::i64, Custom); @@ -208,6 +213,13 @@ MipsTargetLowering(MipsTargetMachine &TM) setOperationAction(ISD::FEXP, MVT::f32, Expand); setOperationAction(ISD::FMA, MVT::f32, Expand); setOperationAction(ISD::FMA, MVT::f64, Expand); + setOperationAction(ISD::FREM, MVT::f32, Expand); + setOperationAction(ISD::FREM, MVT::f64, Expand); + + if (!TM.Options.NoNaNsFPMath) { + setOperationAction(ISD::FNEG, MVT::f32, Expand); + setOperationAction(ISD::FNEG, MVT::f64, Expand); + } setOperationAction(ISD::EXCEPTIONADDR, MVT::i32, Expand); setOperationAction(ISD::EXCEPTIONADDR, MVT::i64, Expand); @@ -732,6 +744,7 @@ LowerOperation(SDValue Op, SelectionDAG &DAG) const case ISD::SETCC: return LowerSETCC(Op, DAG); case ISD::VASTART: return LowerVASTART(Op, DAG); case ISD::FCOPYSIGN: return LowerFCOPYSIGN(Op, DAG); + case ISD::FABS: return LowerFABS(Op, DAG); case ISD::FRAMEADDR: return LowerFRAMEADDR(Op, DAG); case ISD::MEMBARRIER: return LowerMEMBARRIER(Op, DAG); case ISD::ATOMIC_FENCE: return LowerATOMIC_FENCE(Op, DAG); @@ -1541,7 +1554,7 @@ SDValue MipsTargetLowering::LowerGlobalAddress(SDValue Op, EVT ValTy = Op.getValueType(); bool HasGotOfst = (GV->hasInternalLinkage() || (GV->hasLocalLinkage() && !isa<Function>(GV))); - unsigned GotFlag = IsN64 ? + unsigned GotFlag = HasMips64 ? (HasGotOfst ? MipsII::MO_GOT_PAGE : MipsII::MO_GOT_DISP) : (HasGotOfst ? MipsII::MO_GOT : MipsII::MO_GOT16); SDValue GA = DAG.getTargetGlobalAddress(GV, dl, ValTy, 0, GotFlag); @@ -1553,8 +1566,8 @@ SDValue MipsTargetLowering::LowerGlobalAddress(SDValue Op, if (!HasGotOfst) return ResNode; SDValue GALo = DAG.getTargetGlobalAddress(GV, dl, ValTy, 0, - IsN64 ? MipsII::MO_GOT_OFST : - MipsII::MO_ABS_LO); + HasMips64 ? MipsII::MO_GOT_OFST : + MipsII::MO_ABS_LO); SDValue Lo = DAG.getNode(MipsISD::Lo, dl, ValTy, GALo); return DAG.getNode(ISD::ADD, dl, ValTy, ResNode, Lo); } @@ -1575,8 +1588,8 @@ SDValue MipsTargetLowering::LowerBlockAddress(SDValue Op, } EVT ValTy = Op.getValueType(); - unsigned GOTFlag = IsN64 ? MipsII::MO_GOT_PAGE : MipsII::MO_GOT; - unsigned OFSTFlag = IsN64 ? MipsII::MO_GOT_OFST : MipsII::MO_ABS_LO; + unsigned GOTFlag = HasMips64 ? MipsII::MO_GOT_PAGE : MipsII::MO_GOT; + unsigned OFSTFlag = HasMips64 ? MipsII::MO_GOT_OFST : MipsII::MO_ABS_LO; SDValue BAGOTOffset = DAG.getBlockAddress(BA, ValTy, true, GOTFlag); BAGOTOffset = DAG.getNode(MipsISD::Wrapper, dl, ValTy, GetGlobalReg(DAG, ValTy), BAGOTOffset); @@ -1679,8 +1692,8 @@ LowerJumpTable(SDValue Op, SelectionDAG &DAG) const HiPart = DAG.getNode(MipsISD::Hi, dl, PtrVT, JTI); JTILo = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, MipsII::MO_ABS_LO); } else {// Emit Load from Global Pointer - unsigned GOTFlag = IsN64 ? MipsII::MO_GOT_PAGE : MipsII::MO_GOT; - unsigned OfstFlag = IsN64 ? MipsII::MO_GOT_OFST : MipsII::MO_ABS_LO; + unsigned GOTFlag = HasMips64 ? MipsII::MO_GOT_PAGE : MipsII::MO_GOT; + unsigned OfstFlag = HasMips64 ? MipsII::MO_GOT_OFST : MipsII::MO_ABS_LO; JTI = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, GOTFlag); JTI = DAG.getNode(MipsISD::Wrapper, dl, PtrVT, GetGlobalReg(DAG, PtrVT), JTI); @@ -1712,7 +1725,7 @@ LowerConstantPool(SDValue Op, SelectionDAG &DAG) const // SDValue GOT = DAG.getGLOBAL_OFFSET_TABLE(MVT::i32); // ResNode = DAG.getNode(ISD::ADD, MVT::i32, GOT, GPRelNode); - if (getTargetMachine().getRelocationModel() != Reloc::PIC_) { + if (getTargetMachine().getRelocationModel() != Reloc::PIC_ && !IsN64) { SDValue CPHi = DAG.getTargetConstantPool(C, MVT::i32, N->getAlignment(), N->getOffset(), MipsII::MO_ABS_HI); SDValue CPLo = DAG.getTargetConstantPool(C, MVT::i32, N->getAlignment(), @@ -1722,8 +1735,8 @@ LowerConstantPool(SDValue Op, SelectionDAG &DAG) const ResNode = DAG.getNode(ISD::ADD, dl, MVT::i32, HiPart, Lo); } else { EVT ValTy = Op.getValueType(); - unsigned GOTFlag = IsN64 ? MipsII::MO_GOT_PAGE : MipsII::MO_GOT; - unsigned OFSTFlag = IsN64 ? MipsII::MO_GOT_OFST : MipsII::MO_ABS_LO; + unsigned GOTFlag = HasMips64 ? MipsII::MO_GOT_PAGE : MipsII::MO_GOT; + unsigned OFSTFlag = HasMips64 ? MipsII::MO_GOT_OFST : MipsII::MO_ABS_LO; SDValue CP = DAG.getTargetConstantPool(C, ValTy, N->getAlignment(), N->getOffset(), GOTFlag); CP = DAG.getNode(MipsISD::Wrapper, dl, ValTy, GetGlobalReg(DAG, ValTy), CP); @@ -1754,66 +1767,162 @@ SDValue MipsTargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) const { MachinePointerInfo(SV), false, false, 0); } -// Called if the size of integer registers is large enough to hold the whole -// floating point number. -static SDValue LowerFCOPYSIGNLargeIntReg(SDValue Op, SelectionDAG &DAG) { - // FIXME: Use ext/ins instructions if target architecture is Mips32r2. - EVT ValTy = Op.getValueType(); - EVT IntValTy = MVT::getIntegerVT(ValTy.getSizeInBits()); - uint64_t Mask = (uint64_t)1 << (ValTy.getSizeInBits() - 1); - DebugLoc dl = Op.getDebugLoc(); - SDValue Op0 = DAG.getNode(ISD::BITCAST, dl, IntValTy, Op.getOperand(0)); - SDValue Op1 = DAG.getNode(ISD::BITCAST, dl, IntValTy, Op.getOperand(1)); - SDValue And0 = DAG.getNode(ISD::AND, dl, IntValTy, Op0, - DAG.getConstant(Mask - 1, IntValTy)); - SDValue And1 = DAG.getNode(ISD::AND, dl, IntValTy, Op1, - DAG.getConstant(Mask, IntValTy)); - SDValue Result = DAG.getNode(ISD::OR, dl, IntValTy, And0, And1); - return DAG.getNode(ISD::BITCAST, dl, ValTy, Result); -} - -// Called if the size of integer registers is not large enough to hold the whole -// floating point number (e.g. f64 & 32-bit integer register). -static SDValue -LowerFCOPYSIGNSmallIntReg(SDValue Op, SelectionDAG &DAG, bool isLittle) { - // FIXME: - // Use ext/ins instructions if target architecture is Mips32r2. - // Eliminate redundant mfc1 and mtc1 instructions. - unsigned LoIdx = 0, HiIdx = 1; - - if (!isLittle) - std::swap(LoIdx, HiIdx); +static SDValue LowerFCOPYSIGN32(SDValue Op, SelectionDAG &DAG, bool HasR2) { + EVT TyX = Op.getOperand(0).getValueType(); + EVT TyY = Op.getOperand(1).getValueType(); + SDValue Const1 = DAG.getConstant(1, MVT::i32); + SDValue Const31 = DAG.getConstant(31, MVT::i32); + DebugLoc DL = Op.getDebugLoc(); + SDValue Res; + + // If operand is of type f64, extract the upper 32-bit. Otherwise, bitcast it + // to i32. + SDValue X = (TyX == MVT::f32) ? + DAG.getNode(ISD::BITCAST, DL, MVT::i32, Op.getOperand(0)) : + DAG.getNode(MipsISD::ExtractElementF64, DL, MVT::i32, Op.getOperand(0), + Const1); + SDValue Y = (TyY == MVT::f32) ? + DAG.getNode(ISD::BITCAST, DL, MVT::i32, Op.getOperand(1)) : + DAG.getNode(MipsISD::ExtractElementF64, DL, MVT::i32, Op.getOperand(1), + Const1); + + if (HasR2) { + // ext E, Y, 31, 1 ; extract bit31 of Y + // ins X, E, 31, 1 ; insert extracted bit at bit31 of X + SDValue E = DAG.getNode(MipsISD::Ext, DL, MVT::i32, Y, Const31, Const1); + Res = DAG.getNode(MipsISD::Ins, DL, MVT::i32, E, Const31, Const1, X); + } else { + // sll SllX, X, 1 + // srl SrlX, SllX, 1 + // srl SrlY, Y, 31 + // sll SllY, SrlX, 31 + // or Or, SrlX, SllY + SDValue SllX = DAG.getNode(ISD::SHL, DL, MVT::i32, X, Const1); + SDValue SrlX = DAG.getNode(ISD::SRL, DL, MVT::i32, SllX, Const1); + SDValue SrlY = DAG.getNode(ISD::SRL, DL, MVT::i32, Y, Const31); + SDValue SllY = DAG.getNode(ISD::SHL, DL, MVT::i32, SrlY, Const31); + Res = DAG.getNode(ISD::OR, DL, MVT::i32, SrlX, SllY); + } - DebugLoc dl = Op.getDebugLoc(); - SDValue Word0 = DAG.getNode(MipsISD::ExtractElementF64, dl, MVT::i32, - Op.getOperand(0), - DAG.getConstant(LoIdx, MVT::i32)); - SDValue Hi0 = DAG.getNode(MipsISD::ExtractElementF64, dl, MVT::i32, - Op.getOperand(0), DAG.getConstant(HiIdx, MVT::i32)); - SDValue Hi1 = DAG.getNode(MipsISD::ExtractElementF64, dl, MVT::i32, - Op.getOperand(1), DAG.getConstant(HiIdx, MVT::i32)); - SDValue And0 = DAG.getNode(ISD::AND, dl, MVT::i32, Hi0, - DAG.getConstant(0x7fffffff, MVT::i32)); - SDValue And1 = DAG.getNode(ISD::AND, dl, MVT::i32, Hi1, - DAG.getConstant(0x80000000, MVT::i32)); - SDValue Word1 = DAG.getNode(ISD::OR, dl, MVT::i32, And0, And1); + if (TyX == MVT::f32) + return DAG.getNode(ISD::BITCAST, DL, Op.getOperand(0).getValueType(), Res); - if (!isLittle) - std::swap(Word0, Word1); + SDValue LowX = DAG.getNode(MipsISD::ExtractElementF64, DL, MVT::i32, + Op.getOperand(0), DAG.getConstant(0, MVT::i32)); + return DAG.getNode(MipsISD::BuildPairF64, DL, MVT::f64, LowX, Res); +} - return DAG.getNode(MipsISD::BuildPairF64, dl, MVT::f64, Word0, Word1); +static SDValue LowerFCOPYSIGN64(SDValue Op, SelectionDAG &DAG, bool HasR2) { + unsigned WidthX = Op.getOperand(0).getValueSizeInBits(); + unsigned WidthY = Op.getOperand(1).getValueSizeInBits(); + EVT TyX = MVT::getIntegerVT(WidthX), TyY = MVT::getIntegerVT(WidthY); + SDValue Const1 = DAG.getConstant(1, MVT::i32); + DebugLoc DL = Op.getDebugLoc(); + + // Bitcast to integer nodes. + SDValue X = DAG.getNode(ISD::BITCAST, DL, TyX, Op.getOperand(0)); + SDValue Y = DAG.getNode(ISD::BITCAST, DL, TyY, Op.getOperand(1)); + + if (HasR2) { + // ext E, Y, width(Y) - 1, 1 ; extract bit width(Y)-1 of Y + // ins X, E, width(X) - 1, 1 ; insert extracted bit at bit width(X)-1 of X + SDValue E = DAG.getNode(MipsISD::Ext, DL, TyY, Y, + DAG.getConstant(WidthY - 1, MVT::i32), Const1); + + if (WidthX > WidthY) + E = DAG.getNode(ISD::ZERO_EXTEND, DL, TyX, E); + else if (WidthY > WidthX) + E = DAG.getNode(ISD::TRUNCATE, DL, TyX, E); + + SDValue I = DAG.getNode(MipsISD::Ins, DL, TyX, E, + DAG.getConstant(WidthX - 1, MVT::i32), Const1, X); + return DAG.getNode(ISD::BITCAST, DL, Op.getOperand(0).getValueType(), I); + } + + // (d)sll SllX, X, 1 + // (d)srl SrlX, SllX, 1 + // (d)srl SrlY, Y, width(Y)-1 + // (d)sll SllY, SrlX, width(Y)-1 + // or Or, SrlX, SllY + SDValue SllX = DAG.getNode(ISD::SHL, DL, TyX, X, Const1); + SDValue SrlX = DAG.getNode(ISD::SRL, DL, TyX, SllX, Const1); + SDValue SrlY = DAG.getNode(ISD::SRL, DL, TyY, Y, + DAG.getConstant(WidthY - 1, MVT::i32)); + + if (WidthX > WidthY) + SrlY = DAG.getNode(ISD::ZERO_EXTEND, DL, TyX, SrlY); + else if (WidthY > WidthX) + SrlY = DAG.getNode(ISD::TRUNCATE, DL, TyX, SrlY); + + SDValue SllY = DAG.getNode(ISD::SHL, DL, TyX, SrlY, + DAG.getConstant(WidthX - 1, MVT::i32)); + SDValue Or = DAG.getNode(ISD::OR, DL, TyX, SrlX, SllY); + return DAG.getNode(ISD::BITCAST, DL, Op.getOperand(0).getValueType(), Or); } SDValue MipsTargetLowering::LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const { - EVT Ty = Op.getValueType(); + if (Subtarget->hasMips64()) + return LowerFCOPYSIGN64(Op, DAG, Subtarget->hasMips32r2()); - assert(Ty == MVT::f32 || Ty == MVT::f64); + return LowerFCOPYSIGN32(Op, DAG, Subtarget->hasMips32r2()); +} - if (Ty == MVT::f32 || HasMips64) - return LowerFCOPYSIGNLargeIntReg(Op, DAG); +static SDValue LowerFABS32(SDValue Op, SelectionDAG &DAG, bool HasR2) { + SDValue Res, Const1 = DAG.getConstant(1, MVT::i32); + DebugLoc DL = Op.getDebugLoc(); + + // If operand is of type f64, extract the upper 32-bit. Otherwise, bitcast it + // to i32. + SDValue X = (Op.getValueType() == MVT::f32) ? + DAG.getNode(ISD::BITCAST, DL, MVT::i32, Op.getOperand(0)) : + DAG.getNode(MipsISD::ExtractElementF64, DL, MVT::i32, Op.getOperand(0), + Const1); + + // Clear MSB. + if (HasR2) + Res = DAG.getNode(MipsISD::Ins, DL, MVT::i32, + DAG.getRegister(Mips::ZERO, MVT::i32), + DAG.getConstant(31, MVT::i32), Const1, X); + else { + SDValue SllX = DAG.getNode(ISD::SHL, DL, MVT::i32, X, Const1); + Res = DAG.getNode(ISD::SRL, DL, MVT::i32, SllX, Const1); + } + + if (Op.getValueType() == MVT::f32) + return DAG.getNode(ISD::BITCAST, DL, MVT::f32, Res); + + SDValue LowX = DAG.getNode(MipsISD::ExtractElementF64, DL, MVT::i32, + Op.getOperand(0), DAG.getConstant(0, MVT::i32)); + return DAG.getNode(MipsISD::BuildPairF64, DL, MVT::f64, LowX, Res); +} + +static SDValue LowerFABS64(SDValue Op, SelectionDAG &DAG, bool HasR2) { + SDValue Res, Const1 = DAG.getConstant(1, MVT::i32); + DebugLoc DL = Op.getDebugLoc(); + + // Bitcast to integer node. + SDValue X = DAG.getNode(ISD::BITCAST, DL, MVT::i64, Op.getOperand(0)); + + // Clear MSB. + if (HasR2) + Res = DAG.getNode(MipsISD::Ins, DL, MVT::i64, + DAG.getRegister(Mips::ZERO_64, MVT::i64), + DAG.getConstant(63, MVT::i32), Const1, X); + else { + SDValue SllX = DAG.getNode(ISD::SHL, DL, MVT::i64, X, Const1); + Res = DAG.getNode(ISD::SRL, DL, MVT::i64, SllX, Const1); + } - return LowerFCOPYSIGNSmallIntReg(Op, DAG, Subtarget->isLittle()); + return DAG.getNode(ISD::BITCAST, DL, MVT::f64, Res); +} + +SDValue +MipsTargetLowering::LowerFABS(SDValue Op, SelectionDAG &DAG) const { + if (Subtarget->hasMips64() && (Op.getValueType() == MVT::f64)) + return LowerFABS64(Op, DAG, Subtarget->hasMips32r2()); + + return LowerFABS32(Op, DAG, Subtarget->hasMips32r2()); } SDValue MipsTargetLowering:: @@ -2545,7 +2654,8 @@ MipsTargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag, static void ReadByValArg(MachineFunction &MF, SDValue Chain, DebugLoc dl, std::vector<SDValue>& OutChains, SelectionDAG &DAG, unsigned NumWords, SDValue FIN, - const CCValAssign &VA, const ISD::ArgFlagsTy& Flags) { + const CCValAssign &VA, const ISD::ArgFlagsTy& Flags, + const Argument *FuncArg) { unsigned LocMem = VA.getLocMemOffset(); unsigned FirstWord = LocMem / 4; @@ -2560,8 +2670,8 @@ static void ReadByValArg(MachineFunction &MF, SDValue Chain, DebugLoc dl, SDValue StorePtr = DAG.getNode(ISD::ADD, dl, MVT::i32, FIN, DAG.getConstant(i * 4, MVT::i32)); SDValue Store = DAG.getStore(Chain, dl, DAG.getRegister(Reg, MVT::i32), - StorePtr, MachinePointerInfo(), false, - false, 0); + StorePtr, MachinePointerInfo(FuncArg, i * 4), + false, false, 0); OutChains.push_back(Store); } } @@ -2573,7 +2683,7 @@ CopyMips64ByValRegs(MachineFunction &MF, SDValue Chain, DebugLoc dl, const CCValAssign &VA, const ISD::ArgFlagsTy& Flags, MachineFrameInfo *MFI, bool IsRegLoc, SmallVectorImpl<SDValue> &InVals, MipsFunctionInfo *MipsFI, - EVT PtrTy) { + EVT PtrTy, const Argument *FuncArg) { const uint16_t *Reg = Mips64IntRegs + 8; int FOOffset; // Frame object offset from virtual frame pointer. @@ -2597,8 +2707,8 @@ CopyMips64ByValRegs(MachineFunction &MF, SDValue Chain, DebugLoc dl, SDValue StorePtr = DAG.getNode(ISD::ADD, dl, PtrTy, FIN, DAG.getConstant(I * 8, PtrTy)); SDValue Store = DAG.getStore(Chain, dl, DAG.getRegister(VReg, MVT::i64), - StorePtr, MachinePointerInfo(), false, - false, 0); + StorePtr, MachinePointerInfo(FuncArg, I * 8), + false, false, 0); OutChains.push_back(Store); } @@ -2634,9 +2744,11 @@ MipsTargetLowering::LowerFormalArguments(SDValue Chain, else CCInfo.AnalyzeFormalArguments(Ins, CC_Mips); + Function::const_arg_iterator FuncArg = + DAG.getMachineFunction().getFunction()->arg_begin(); int LastFI = 0;// MipsFI->LastInArgFI is 0 at the entry of this function. - for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { + for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i, ++FuncArg) { CCValAssign &VA = ArgLocs[i]; EVT ValVT = VA.getValVT(); ISD::ArgFlagsTy Flags = Ins[i].Flags; @@ -2651,11 +2763,12 @@ MipsTargetLowering::LowerFormalArguments(SDValue Chain, true); SDValue FIN = DAG.getFrameIndex(LastFI, getPointerTy()); InVals.push_back(FIN); - ReadByValArg(MF, Chain, dl, OutChains, DAG, NumWords, FIN, VA, Flags); + ReadByValArg(MF, Chain, dl, OutChains, DAG, NumWords, FIN, VA, Flags, + &*FuncArg); } else // N32/64 LastFI = CopyMips64ByValRegs(MF, Chain, dl, OutChains, DAG, VA, Flags, MFI, IsRegLoc, InVals, MipsFI, - getPointerTy()); + getPointerTy(), &*FuncArg); continue; } diff --git a/lib/Target/Mips/MipsISelLowering.h b/lib/Target/Mips/MipsISelLowering.h index 66f45cd..c36f40f 100644 --- a/lib/Target/Mips/MipsISelLowering.h +++ b/lib/Target/Mips/MipsISelLowering.h @@ -131,6 +131,7 @@ namespace llvm { SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const; SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) const; SDValue LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerFABS(SDValue Op, SelectionDAG &DAG) const; SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const; SDValue LowerMEMBARRIER(SDValue Op, SelectionDAG& DAG) const; SDValue LowerATOMIC_FENCE(SDValue Op, SelectionDAG& DAG) const; diff --git a/lib/Target/Mips/MipsInstrFPU.td b/lib/Target/Mips/MipsInstrFPU.td index fe5eaec..14d8f1e 100644 --- a/lib/Target/Mips/MipsInstrFPU.td +++ b/lib/Target/Mips/MipsInstrFPU.td @@ -47,17 +47,17 @@ def MipsExtractElementF64 : SDNode<"MipsISD::ExtractElementF64", SDT_MipsExtractElementF64>; // Operand for printing out a condition code. -let PrintMethod = "printFCCOperand" in +let PrintMethod = "printFCCOperand", DecoderMethod = "DecodeCondCode" in def condcode : Operand<i32>; //===----------------------------------------------------------------------===// // Feature predicates. //===----------------------------------------------------------------------===// -def IsFP64bit : Predicate<"Subtarget.isFP64bit()">; -def NotFP64bit : Predicate<"!Subtarget.isFP64bit()">; -def IsSingleFloat : Predicate<"Subtarget.isSingleFloat()">; -def IsNotSingleFloat : Predicate<"!Subtarget.isSingleFloat()">; +def IsFP64bit : Predicate<"Subtarget.isFP64bit()">, AssemblerPredicate<"FeatureFP64Bit">; +def NotFP64bit : Predicate<"!Subtarget.isFP64bit()">, AssemblerPredicate<"!FeatureFP64Bit">; +def IsSingleFloat : Predicate<"Subtarget.isSingleFloat()">, AssemblerPredicate<"FeatureSingleFloat">; +def IsNotSingleFloat : Predicate<"!Subtarget.isSingleFloat()">, AssemblerPredicate<"!FeatureSingleFloat">; // FP immediate patterns. def fpimm0 : PatLeaf<(fpimm), [{ @@ -83,6 +83,7 @@ def fpimm0neg : PatLeaf<(fpimm), [{ //===----------------------------------------------------------------------===// // FP load. +let DecoderMethod = "DecodeFMem" in { class FPLoad<bits<6> op, string opstr, RegisterClass RC, Operand MemOpnd>: FMem<op, (outs RC:$ft), (ins MemOpnd:$addr), !strconcat(opstr, "\t$ft, $addr"), [(set RC:$ft, (load_a addr:$addr))], @@ -93,7 +94,7 @@ class FPStore<bits<6> op, string opstr, RegisterClass RC, Operand MemOpnd>: FMem<op, (outs), (ins RC:$ft, MemOpnd:$addr), !strconcat(opstr, "\t$ft, $addr"), [(store_a RC:$ft, addr:$addr)], IIStore>; - +} // FP indexed load. class FPIdxLoad<bits<6> funct, string opstr, RegisterClass DRC, RegisterClass PRC, PatFrag FOp>: @@ -118,11 +119,13 @@ multiclass FFR1_W_M<bits<6> funct, string opstr> { def _D32 : FFR1<funct, 17, opstr, "w.d", FGR32, AFGR64>, Requires<[NotFP64bit]>; def _D64 : FFR1<funct, 17, opstr, "w.d", FGR32, FGR64>, - Requires<[IsFP64bit]>; + Requires<[IsFP64bit]> { + let DecoderNamespace = "Mips64"; + } } // Instructions that convert an FP value to 64-bit fixed point. -let Predicates = [IsFP64bit] in +let Predicates = [IsFP64bit], DecoderNamespace = "Mips64" in multiclass FFR1_L_M<bits<6> funct, string opstr> { def _S : FFR1<funct, 16, opstr, "l.s", FGR64, FGR32>; def _D64 : FFR1<funct, 17, opstr, "l.d", FGR64, FGR64>; @@ -134,7 +137,9 @@ multiclass FFR1P_M<bits<6> funct, string opstr, SDNode OpNode> { def _D32 : FFR1P<funct, 17, opstr, "d", AFGR64, AFGR64, OpNode>, Requires<[NotFP64bit]>; def _D64 : FFR1P<funct, 17, opstr, "d", FGR64, FGR64, OpNode>, - Requires<[IsFP64bit]>; + Requires<[IsFP64bit]> { + let DecoderNamespace = "Mips64"; + } } multiclass FFR2P_M<bits<6> funct, string opstr, SDNode OpNode, bit isComm = 0> { @@ -143,9 +148,11 @@ multiclass FFR2P_M<bits<6> funct, string opstr, SDNode OpNode, bit isComm = 0> { def _D32 : FFR2P<funct, 17, opstr, "d", AFGR64, OpNode>, Requires<[NotFP64bit]>; def _D64 : FFR2P<funct, 17, opstr, "d", FGR64, OpNode>, - Requires<[IsFP64bit]>; + Requires<[IsFP64bit]> { + let DecoderNamespace = "Mips64"; } } +} // FP madd/msub/nmadd/nmsub instruction classes. class FMADDSUB<bits<3> funct, bits<3> fmt, string opstr, string fmtstr, @@ -172,9 +179,11 @@ defm CEIL_L : FFR1_L_M<0xa, "ceil">; defm FLOOR_W : FFR1_W_M<0xf, "floor">; defm FLOOR_L : FFR1_L_M<0xb, "floor">; defm CVT_W : FFR1_W_M<0x24, "cvt">; -defm CVT_L : FFR1_L_M<0x25, "cvt">; +//defm CVT_L : FFR1_L_M<0x25, "cvt">; def CVT_S_W : FFR1<0x20, 20, "cvt", "s.w", FGR32, FGR32>; +def CVT_L_S : FFR1<0x25, 16, "cvt", "l.s", FGR64, FGR32>; +def CVT_L_D64: FFR1<0x25, 17, "cvt", "l.d", FGR64, FGR64>; let Predicates = [NotFP64bit] in { def CVT_S_D32 : FFR1<0x20, 17, "cvt", "s.d", FGR32, AFGR64>; @@ -182,7 +191,7 @@ let Predicates = [NotFP64bit] in { def CVT_D32_S : FFR1<0x21, 16, "cvt", "d.s", AFGR64, FGR32>; } -let Predicates = [IsFP64bit] in { +let Predicates = [IsFP64bit], DecoderNamespace = "Mips64" in { def CVT_S_D64 : FFR1<0x20, 17, "cvt", "s.d", FGR32, FGR64>; def CVT_S_L : FFR1<0x20, 21, "cvt", "s.l", FGR32, FGR64>; def CVT_D64_W : FFR1<0x21, 20, "cvt", "d.w", FGR64, FGR32>; @@ -190,8 +199,10 @@ let Predicates = [IsFP64bit] in { def CVT_D64_L : FFR1<0x21, 21, "cvt", "d.l", FGR64, FGR64>; } -defm FABS : FFR1P_M<0x5, "abs", fabs>; -defm FNEG : FFR1P_M<0x7, "neg", fneg>; +let Predicates = [NoNaNsFPMath] in { + defm FABS : FFR1P_M<0x5, "abs", fabs>; + defm FNEG : FFR1P_M<0x7, "neg", fneg>; +} defm FSQRT : FFR1P_M<0x4, "sqrt", fsqrt>; // The odd-numbered registers are only referenced when doing loads, @@ -233,14 +244,20 @@ def FMOV_S : FFR1<0x6, 16, "mov", "s", FGR32, FGR32>; def FMOV_D32 : FFR1<0x6, 17, "mov", "d", AFGR64, AFGR64>, Requires<[NotFP64bit]>; def FMOV_D64 : FFR1<0x6, 17, "mov", "d", FGR64, FGR64>, - Requires<[IsFP64bit]>; + Requires<[IsFP64bit]> { + let DecoderNamespace = "Mips64"; +} /// Floating Point Memory Instructions -let Predicates = [IsN64] in { +let Predicates = [IsN64], DecoderNamespace = "Mips64" in { def LWC1_P8 : FPLoad<0x31, "lwc1", FGR32, mem64>; def SWC1_P8 : FPStore<0x39, "swc1", FGR32, mem64>; - def LDC164_P8 : FPLoad<0x35, "ldc1", FGR64, mem64>; - def SDC164_P8 : FPStore<0x3d, "sdc1", FGR64, mem64>; + def LDC164_P8 : FPLoad<0x35, "ldc1", FGR64, mem64> { + let isCodeGenOnly =1; + } + def SDC164_P8 : FPStore<0x3d, "sdc1", FGR64, mem64> { + let isCodeGenOnly =1; + } } let Predicates = [NotN64] in { @@ -248,7 +265,7 @@ let Predicates = [NotN64] in { def SWC1 : FPStore<0x39, "swc1", FGR32, mem>; } -let Predicates = [NotN64, HasMips64] in { +let Predicates = [NotN64, HasMips64], DecoderNamespace = "Mips64" in { def LDC164 : FPLoad<0x35, "ldc1", FGR64, mem>; def SDC164 : FPStore<0x3d, "sdc1", FGR64, mem>; } @@ -271,13 +288,13 @@ let Predicates = [HasMips32r2, NotMips64] in { def SDXC1 : FPIdxStore<0x9, "sdxc1", AFGR64, CPURegs, store_a>; } -let Predicates = [HasMips64, NotN64] in { +let Predicates = [HasMips64, NotN64], DecoderNamespace="Mips64" in { def LDXC164 : FPIdxLoad<0x1, "ldxc1", FGR64, CPURegs, load_a>; def SDXC164 : FPIdxStore<0x9, "sdxc1", FGR64, CPURegs, store_a>; } // n64 -let Predicates = [IsN64] in { +let Predicates = [IsN64], isCodeGenOnly=1 in { def LWXC1_P8 : FPIdxLoad<0x0, "lwxc1", FGR32, CPU64Regs, load_a>; def LUXC1_P8 : FPIdxLoad<0x5, "luxc1", FGR32, CPU64Regs, load_u>; def LDXC164_P8 : FPIdxLoad<0x1, "ldxc1", FGR64, CPU64Regs, load_a>; @@ -312,12 +329,12 @@ let Predicates = [HasMips32r2, NotFP64bit, NoNaNsFPMath] in { def NMSUB_D32 : FNMADDSUB<0x7, 1, "nmsub", "d", fsub, AFGR64>; } -let Predicates = [HasMips32r2, IsFP64bit] in { +let Predicates = [HasMips32r2, IsFP64bit], isCodeGenOnly=1 in { def MADD_D64 : FMADDSUB<0x4, 1, "madd", "d", fadd, FGR64>; def MSUB_D64 : FMADDSUB<0x5, 1, "msub", "d", fsub, FGR64>; } -let Predicates = [HasMips32r2, IsFP64bit, NoNaNsFPMath] in { +let Predicates = [HasMips32r2, IsFP64bit, NoNaNsFPMath], isCodeGenOnly=1 in { def NMADD_D64 : FNMADDSUB<0x6, 1, "nmadd", "d", fadd, FGR64>; def NMSUB_D64 : FNMADDSUB<0x7, 1, "nmsub", "d", fsub, FGR64>; } @@ -340,9 +357,10 @@ let isBranch=1, isTerminator=1, hasDelaySlot=1, base=0x8, Uses=[FCR31] in let Inst{16} = tf; } +let DecoderMethod = "DecodeBC1" in { def BC1F : FBRANCH<0, 0, MIPS_BRANCH_F, "bc1f">; def BC1T : FBRANCH<0, 1, MIPS_BRANCH_T, "bc1t">; - +} //===----------------------------------------------------------------------===// // Floating Point Flag Conditions //===----------------------------------------------------------------------===// @@ -374,7 +392,9 @@ class FCMP<bits<5> fmt, RegisterClass RC, string typestr> : let Defs=[FCR31] in { def FCMP_S32 : FCMP<0x10, FGR32, "s">; def FCMP_D32 : FCMP<0x11, AFGR64, "d">, Requires<[NotFP64bit]>; - def FCMP_D64 : FCMP<0x11, FGR64, "d">, Requires<[IsFP64bit]>; + def FCMP_D64 : FCMP<0x11, FGR64, "d">, Requires<[IsFP64bit]> { + let DecoderNamespace = "Mips64"; + } } //===----------------------------------------------------------------------===// @@ -436,13 +456,13 @@ let Predicates = [IsFP64bit] in { // Patterns for unaligned floating point loads and stores. let Predicates = [HasMips32r2Or64, NotN64] in { - def : Pat<(f32 (load_u CPURegs:$addr)), (LUXC1 CPURegs:$addr, ZERO)>; + def : Pat<(f32 (load_u CPURegs:$addr)), (LUXC1 CPURegs:$addr, ZERO)>; def : Pat<(store_u FGR32:$src, CPURegs:$addr), (SUXC1 FGR32:$src, CPURegs:$addr, ZERO)>; } let Predicates = [IsN64] in { - def : Pat<(f32 (load_u CPU64Regs:$addr)), (LUXC1_P8 CPU64Regs:$addr, ZERO_64)>; + def : Pat<(f32 (load_u CPU64Regs:$addr)), (LUXC1_P8 CPU64Regs:$addr, ZERO_64)>; def : Pat<(store_u FGR32:$src, CPU64Regs:$addr), (SUXC1_P8 FGR32:$src, CPU64Regs:$addr, ZERO_64)>; } diff --git a/lib/Target/Mips/MipsInstrFormats.td b/lib/Target/Mips/MipsInstrFormats.td index 4555303..841eba0 100644 --- a/lib/Target/Mips/MipsInstrFormats.td +++ b/lib/Target/Mips/MipsInstrFormats.td @@ -45,6 +45,8 @@ class MipsInst<dag outs, dag ins, string asmstr, list<dag> pattern, let Namespace = "Mips"; + let Size = 4; + bits<6> Opcode = 0; // Top 6 bits are the 'opcode' field @@ -64,6 +66,10 @@ class MipsInst<dag outs, dag ins, string asmstr, list<dag> pattern, // TSFlags layout should be kept in sync with MipsInstrInfo.h. let TSFlags{3-0} = FormBits; + + let DecoderNamespace = "Mips"; + + field bits<32> SoftFail = 0; } // Mips Pseudo Instructions Format diff --git a/lib/Target/Mips/MipsInstrInfo.td b/lib/Target/Mips/MipsInstrInfo.td index bc85fa6..873d2bd 100644 --- a/lib/Target/Mips/MipsInstrInfo.td +++ b/lib/Target/Mips/MipsInstrInfo.td @@ -121,21 +121,36 @@ def MipsIns : SDNode<"MipsISD::Ins", SDT_Ins>; //===----------------------------------------------------------------------===// // Mips Instruction Predicate Definitions. //===----------------------------------------------------------------------===// -def HasSEInReg : Predicate<"Subtarget.hasSEInReg()">; -def HasBitCount : Predicate<"Subtarget.hasBitCount()">; -def HasSwap : Predicate<"Subtarget.hasSwap()">; -def HasCondMov : Predicate<"Subtarget.hasCondMov()">; -def HasMips32 : Predicate<"Subtarget.hasMips32()">; -def HasMips32r2 : Predicate<"Subtarget.hasMips32r2()">; -def HasMips64 : Predicate<"Subtarget.hasMips64()">; -def HasMips32r2Or64 : Predicate<"Subtarget.hasMips32r2Or64()">; -def NotMips64 : Predicate<"!Subtarget.hasMips64()">; -def HasMips64r2 : Predicate<"Subtarget.hasMips64r2()">; -def IsN64 : Predicate<"Subtarget.isABI_N64()">; -def NotN64 : Predicate<"!Subtarget.isABI_N64()">; -def RelocStatic : Predicate<"TM.getRelocationModel() == Reloc::Static">; -def RelocPIC : Predicate<"TM.getRelocationModel() == Reloc::PIC_">; -def NoNaNsFPMath : Predicate<"TM.Options.NoNaNsFPMath">; +def HasSEInReg : Predicate<"Subtarget.hasSEInReg()">, + AssemblerPredicate<"FeatureSEInReg">; +def HasBitCount : Predicate<"Subtarget.hasBitCount()">, + AssemblerPredicate<"FeatureBitCount">; +def HasSwap : Predicate<"Subtarget.hasSwap()">, + AssemblerPredicate<"FeatureSwap">; +def HasCondMov : Predicate<"Subtarget.hasCondMov()">, + AssemblerPredicate<"FeatureCondMov">; +def HasMips32 : Predicate<"Subtarget.hasMips32()">, + AssemblerPredicate<"FeatureMips32">; +def HasMips32r2 : Predicate<"Subtarget.hasMips32r2()">, + AssemblerPredicate<"FeatureMips32r2">; +def HasMips64 : Predicate<"Subtarget.hasMips64()">, + AssemblerPredicate<"FeatureMips64">; +def HasMips32r2Or64 : Predicate<"Subtarget.hasMips32r2Or64()">, + AssemblerPredicate<"FeatureMips32r2,FeatureMips64">; +def NotMips64 : Predicate<"!Subtarget.hasMips64()">, + AssemblerPredicate<"!FeatureMips64">; +def HasMips64r2 : Predicate<"Subtarget.hasMips64r2()">, + AssemblerPredicate<"FeatureMips64r2">; +def IsN64 : Predicate<"Subtarget.isABI_N64()">, + AssemblerPredicate<"FeatureN64">; +def NotN64 : Predicate<"!Subtarget.isABI_N64()">, + AssemblerPredicate<"!FeatureN64">; +def RelocStatic : Predicate<"TM.getRelocationModel() == Reloc::Static">, + AssemblerPredicate<"FeatureMips32">; +def RelocPIC : Predicate<"TM.getRelocationModel() == Reloc::PIC_">, + AssemblerPredicate<"FeatureMips32">; +def NoNaNsFPMath : Predicate<"TM.Options.NoNaNsFPMath">, + AssemblerPredicate<"FeatureMips32">; //===----------------------------------------------------------------------===// // Mips Operand, Complex Patterns and Transformations Definitions. @@ -148,12 +163,15 @@ def jmptarget : Operand<OtherVT> { def brtarget : Operand<OtherVT> { let EncoderMethod = "getBranchTargetOpValue"; let OperandType = "OPERAND_PCREL"; + let DecoderMethod = "DecodeBranchTarget"; } def calltarget : Operand<iPTR> { let EncoderMethod = "getJumpTargetOpValue"; } def calltarget64: Operand<i64>; -def simm16 : Operand<i32>; +def simm16 : Operand<i32> { + let DecoderMethod= "DecodeSimm16"; +} def simm16_64 : Operand<i64>; def shamt : Operand<i32>; @@ -189,11 +207,13 @@ def mem_ea_64 : Operand<i64> { // size operand of ext instruction def size_ext : Operand<i32> { let EncoderMethod = "getSizeExtEncoding"; + let DecoderMethod = "DecodeExtSize"; } // size operand of ins instruction def size_ins : Operand<i32> { let EncoderMethod = "getSizeInsEncoding"; + let DecoderMethod = "DecodeInsSize"; } // Transformation Function - get the lower 16 bits. @@ -295,6 +315,7 @@ class ArithLogicR<bits<6> op, bits<6> func, string instr_asm, SDNode OpNode, [(set RC:$rd, (OpNode RC:$rs, RC:$rt))], itin> { let shamt = 0; let isCommutable = isComm; + let isReMaterializable = 1; } class ArithOverflowR<bits<6> op, bits<6> func, string instr_asm, @@ -310,7 +331,9 @@ class ArithLogicI<bits<6> op, string instr_asm, SDNode OpNode, Operand Od, PatLeaf imm_type, RegisterClass RC> : FI<op, (outs RC:$rt), (ins RC:$rs, Od:$imm16), !strconcat(instr_asm, "\t$rt, $rs, $imm16"), - [(set RC:$rt, (OpNode RC:$rs, imm_type:$imm16))], IIAlu>; + [(set RC:$rt, (OpNode RC:$rs, imm_type:$imm16))], IIAlu> { + let isReMaterializable = 1; +} class ArithOverflowI<bits<6> op, string instr_asm, SDNode OpNode, Operand Od, PatLeaf imm_type, RegisterClass RC> : @@ -365,6 +388,8 @@ class LoadUpper<bits<6> op, string instr_asm, RegisterClass RC, Operand Imm>: FI<op, (outs RC:$rt), (ins Imm:$imm16), !strconcat(instr_asm, "\t$rt, $imm16"), [], IIAlu> { let rs = 0; + let neverHasSideEffects = 1; + let isReMaterializable = 1; } class FMem<bits<6> op, dag outs, dag ins, string asmstr, list<dag> pattern, @@ -372,6 +397,7 @@ class FMem<bits<6> op, dag outs, dag ins, string asmstr, list<dag> pattern, bits<21> addr; let Inst{25-21} = addr{20-16}; let Inst{15-0} = addr{15-0}; + let DecoderMethod = "DecodeMem"; } // Memory Load/Store @@ -406,7 +432,10 @@ multiclass LoadM32<bits<6> op, string instr_asm, PatFrag OpNode, def #NAME# : LoadM<op, instr_asm, OpNode, CPURegs, mem, Pseudo>, Requires<[NotN64]>; def _P8 : LoadM<op, instr_asm, OpNode, CPURegs, mem64, Pseudo>, - Requires<[IsN64]>; + Requires<[IsN64]> { + let DecoderNamespace = "Mips64"; + let isCodeGenOnly = 1; + } } // 64-bit load. @@ -415,7 +444,10 @@ multiclass LoadM64<bits<6> op, string instr_asm, PatFrag OpNode, def #NAME# : LoadM<op, instr_asm, OpNode, CPU64Regs, mem, Pseudo>, Requires<[NotN64]>; def _P8 : LoadM<op, instr_asm, OpNode, CPU64Regs, mem64, Pseudo>, - Requires<[IsN64]>; + Requires<[IsN64]> { + let DecoderNamespace = "Mips64"; + let isCodeGenOnly = 1; + } } // 32-bit load. @@ -423,7 +455,10 @@ multiclass LoadUnAlign32<bits<6> op> { def #NAME# : LoadUnAlign<op, CPURegs, mem>, Requires<[NotN64]>; def _P8 : LoadUnAlign<op, CPURegs, mem64>, - Requires<[IsN64]>; + Requires<[IsN64]> { + let DecoderNamespace = "Mips64"; + let isCodeGenOnly = 1; + } } // 32-bit store. multiclass StoreM32<bits<6> op, string instr_asm, PatFrag OpNode, @@ -431,7 +466,10 @@ multiclass StoreM32<bits<6> op, string instr_asm, PatFrag OpNode, def #NAME# : StoreM<op, instr_asm, OpNode, CPURegs, mem, Pseudo>, Requires<[NotN64]>; def _P8 : StoreM<op, instr_asm, OpNode, CPURegs, mem64, Pseudo>, - Requires<[IsN64]>; + Requires<[IsN64]> { + let DecoderNamespace = "Mips64"; + let isCodeGenOnly = 1; + } } // 64-bit store. @@ -440,7 +478,10 @@ multiclass StoreM64<bits<6> op, string instr_asm, PatFrag OpNode, def #NAME# : StoreM<op, instr_asm, OpNode, CPU64Regs, mem, Pseudo>, Requires<[NotN64]>; def _P8 : StoreM<op, instr_asm, OpNode, CPU64Regs, mem64, Pseudo>, - Requires<[IsN64]>; + Requires<[IsN64]> { + let DecoderNamespace = "Mips64"; + let isCodeGenOnly = 1; + } } // 32-bit store. @@ -448,7 +489,10 @@ multiclass StoreUnAlign32<bits<6> op> { def #NAME# : StoreUnAlign<op, CPURegs, mem>, Requires<[NotN64]>; def _P8 : StoreUnAlign<op, CPURegs, mem64>, - Requires<[IsN64]>; + Requires<[IsN64]> { + let DecoderNamespace = "Mips64"; + let isCodeGenOnly = 1; + } } // Conditional Branch @@ -498,6 +542,7 @@ class JumpFJ<bits<6> op, string instr_asm>: let isBarrier=1; let hasDelaySlot = 1; let Predicates = [RelocStatic]; + let DecoderMethod = "DecodeJumpTarget"; } // Unconditional branch @@ -528,7 +573,9 @@ let isCall=1, hasDelaySlot=1 in { class JumpLink<bits<6> op, string instr_asm>: FJ<op, (outs), (ins calltarget:$target, variable_ops), !strconcat(instr_asm, "\t$target"), [(MipsJmpLink imm:$target)], - IIBranch>; + IIBranch> { + let DecoderMethod = "DecodeJumpTarget"; + } class JumpLinkReg<bits<6> op, bits<6> func, string instr_asm, RegisterClass RC>: @@ -555,6 +602,7 @@ class Mult<bits<6> func, string instr_asm, InstrItinClass itin, let shamt = 0; let isCommutable = 1; let Defs = DefRegs; + let neverHasSideEffects = 1; } class Mult32<bits<6> func, string instr_asm, InstrItinClass itin>: @@ -582,6 +630,7 @@ class MoveFromLOHI<bits<6> func, string instr_asm, RegisterClass RC, let rt = 0; let shamt = 0; let Uses = UseRegs; + let neverHasSideEffects = 1; } class MoveToLOHI<bits<6> func, string instr_asm, RegisterClass RC, @@ -592,6 +641,7 @@ class MoveToLOHI<bits<6> func, string instr_asm, RegisterClass RC, let rd = 0; let shamt = 0; let Defs = DefRegs; + let neverHasSideEffects = 1; } class EffectiveAddress<string instr_asm, RegisterClass RC, Operand Mem> : @@ -635,6 +685,7 @@ class SubwordSwap<bits<6> func, bits<5> sa, string instr_asm, RegisterClass RC>: let rs = 0; let shamt = sa; let Predicates = [HasSwap]; + let neverHasSideEffects = 1; } // Read Hardware @@ -680,7 +731,9 @@ class Atomic2Ops<PatFrag Op, string Opstr, RegisterClass DRC, multiclass Atomic2Ops32<PatFrag Op, string Opstr> { def #NAME# : Atomic2Ops<Op, Opstr, CPURegs, CPURegs>, Requires<[NotN64]>; - def _P8 : Atomic2Ops<Op, Opstr, CPURegs, CPU64Regs>, Requires<[IsN64]>; + def _P8 : Atomic2Ops<Op, Opstr, CPURegs, CPU64Regs>, Requires<[IsN64]> { + let DecoderNamespace = "Mips64"; + } } // Atomic Compare & Swap. @@ -692,7 +745,9 @@ class AtomicCmpSwap<PatFrag Op, string Width, RegisterClass DRC, multiclass AtomicCmpSwap32<PatFrag Op, string Width> { def #NAME# : AtomicCmpSwap<Op, Width, CPURegs, CPURegs>, Requires<[NotN64]>; - def _P8 : AtomicCmpSwap<Op, Width, CPURegs, CPU64Regs>, Requires<[IsN64]>; + def _P8 : AtomicCmpSwap<Op, Width, CPURegs, CPU64Regs>, Requires<[IsN64]> { + let DecoderNamespace = "Mips64"; + } } class LLBase<bits<6> Opc, string opstring, RegisterClass RC, Operand Mem> : @@ -722,24 +777,13 @@ def ADJCALLSTACKUP : MipsPseudo<(outs), (ins uimm16:$amt1, uimm16:$amt2), [(callseq_end timm:$amt1, timm:$amt2)]>; } -// Some assembly macros need to avoid pseudoinstructions and assembler -// automatic reodering, we should reorder ourselves. -def MACRO : MipsPseudo<(outs), (ins), ".set\tmacro", []>; -def REORDER : MipsPseudo<(outs), (ins), ".set\treorder", []>; -def NOMACRO : MipsPseudo<(outs), (ins), ".set\tnomacro", []>; -def NOREORDER : MipsPseudo<(outs), (ins), ".set\tnoreorder", []>; - -// These macros are inserted to prevent GAS from complaining -// when using the AT register. -def NOAT : MipsPseudo<(outs), (ins), ".set\tnoat", []>; -def ATMACRO : MipsPseudo<(outs), (ins), ".set\tat", []>; - // When handling PIC code the assembler needs .cpload and .cprestore // directives. If the real instructions corresponding these directives // are used, we have the same behavior, but get also a bunch of warnings // from the assembler. -def CPLOAD : MipsPseudo<(outs), (ins CPURegs:$picreg), ".cpload\t$picreg", []>; -def CPRESTORE : MipsPseudo<(outs), (ins i32imm:$loc), ".cprestore\t$loc", []>; +let neverHasSideEffects = 1 in +def CPRESTORE : MipsPseudo<(outs), (ins i32imm:$loc, CPURegs:$gp), + ".cprestore\t$loc", []>; // For O32 ABI & PIC & non-fixed global base register, the following instruction // seqeunce is emitted to set the global base register: @@ -757,7 +801,10 @@ def CPRESTORE : MipsPseudo<(outs), (ins i32imm:$loc), ".cprestore\t$loc", []>; // before or between instructions 0 and 1, which is a limitation imposed by // GNU linker. +let isTerminator = 1, isBarrier = 1 in def SETGP01 : MipsPseudo<(outs CPURegs:$dst), (ins), "", []>; + +let neverHasSideEffects = 1 in def SETGP2 : MipsPseudo<(outs CPURegs:$globalreg), (ins CPURegs:$picreg), "", []>; @@ -871,9 +918,14 @@ def SYNC : MipsInst<(outs), (ins i32imm:$stype), "sync $stype", /// Load-linked, Store-conditional def LL : LLBase<0x30, "ll", CPURegs, mem>, Requires<[NotN64]>; -def LL_P8 : LLBase<0x30, "ll", CPURegs, mem64>, Requires<[IsN64]>; +def LL_P8 : LLBase<0x30, "ll", CPURegs, mem64>, Requires<[IsN64]> { + let DecoderNamespace = "Mips64"; +} + def SC : SCBase<0x38, "sc", CPURegs, mem>, Requires<[NotN64]>; -def SC_P8 : SCBase<0x38, "sc", CPURegs, mem64>, Requires<[IsN64]>; +def SC_P8 : SCBase<0x38, "sc", CPURegs, mem64>, Requires<[IsN64]> { + let DecoderNamespace = "Mips64"; +} /// Jump and Branch Instructions def J : JumpFJ<0x02, "j">; @@ -891,7 +943,7 @@ def JALR : JumpLinkReg<0x00, 0x09, "jalr", CPURegs>; def BGEZAL : BranchLink<"bgezal", 0x11, CPURegs>; def BLTZAL : BranchLink<"bltzal", 0x10, CPURegs>; -let isReturn=1, isTerminator=1, hasDelaySlot=1, +let isReturn=1, isTerminator=1, hasDelaySlot=1, isCodeGenOnly=1, isBarrier=1, hasCtrlDep=1, rd=0, rt=0, shamt=0 in def RET : FR <0x00, 0x08, (outs), (ins CPURegs:$target), "jr\t$target", [(MipsRet CPURegs:$target)], IIBranch>; @@ -926,13 +978,17 @@ let addr=0 in // instructions. The same not happens for stack address copies, so an // add op with mem ComplexPattern is used and the stack address copy // can be matched. It's similar to Sparc LEA_ADDRi -def LEA_ADDiu : EffectiveAddress<"addiu\t$rt, $addr", CPURegs, mem_ea>; +def LEA_ADDiu : EffectiveAddress<"addiu\t$rt, $addr", CPURegs, mem_ea> { + let isCodeGenOnly = 1; +} // DynAlloc node points to dynamically allocated stack space. // $sp is added to the list of implicitly used registers to prevent dead code // elimination from removing instructions that modify $sp. let Uses = [SP] in -def DynAlloc : EffectiveAddress<"addiu\t$rt, $addr", CPURegs, mem_ea>; +def DynAlloc : EffectiveAddress<"addiu\t$rt, $addr", CPURegs, mem_ea> { + let isCodeGenOnly = 1; +} // MADD*/MSUB* def MADD : MArithR<0, "madd", MipsMAdd, 1>; diff --git a/lib/Target/Mips/MipsJITInfo.h b/lib/Target/Mips/MipsJITInfo.h index ad3c930..f4c4ae8 100644 --- a/lib/Target/Mips/MipsJITInfo.h +++ b/lib/Target/Mips/MipsJITInfo.h @@ -19,8 +19,6 @@ #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineJumpTableInfo.h" #include "llvm/Target/TargetJITInfo.h" -#include "llvm/ADT/DenseMap.h" -#include "llvm/ADT/SmallVector.h" namespace llvm { class MipsTargetMachine; diff --git a/lib/Target/Mips/MipsMCInstLower.cpp b/lib/Target/Mips/MipsMCInstLower.cpp index 0d51298..1597b93 100644 --- a/lib/Target/Mips/MipsMCInstLower.cpp +++ b/lib/Target/Mips/MipsMCInstLower.cpp @@ -26,9 +26,13 @@ using namespace llvm; -MipsMCInstLower::MipsMCInstLower(Mangler *mang, const MachineFunction &mf, - MipsAsmPrinter &asmprinter) - : Ctx(mf.getContext()), Mang(mang), AsmPrinter(asmprinter) {} +MipsMCInstLower::MipsMCInstLower(MipsAsmPrinter &asmprinter) + : AsmPrinter(asmprinter) {} + +void MipsMCInstLower::Initialize(Mangler *M, MCContext* C) { + Mang = M; + Ctx = C; +} MCOperand MipsMCInstLower::LowerSymbolOperand(const MachineOperand &MO, MachineOperandType MOTy, @@ -90,7 +94,7 @@ MCOperand MipsMCInstLower::LowerSymbolOperand(const MachineOperand &MO, llvm_unreachable("<unknown operand type>"); } - const MCSymbolRefExpr *MCSym = MCSymbolRefExpr::Create(Symbol, Kind, Ctx); + const MCSymbolRefExpr *MCSym = MCSymbolRefExpr::Create(Symbol, Kind, *Ctx); if (!Offset) return MCOperand::CreateExpr(MCSym); @@ -98,76 +102,68 @@ MCOperand MipsMCInstLower::LowerSymbolOperand(const MachineOperand &MO, // Assume offset is never negative. assert(Offset > 0); - const MCConstantExpr *OffsetExpr = MCConstantExpr::Create(Offset, Ctx); - const MCBinaryExpr *AddExpr = MCBinaryExpr::CreateAdd(MCSym, OffsetExpr, Ctx); + const MCConstantExpr *OffsetExpr = MCConstantExpr::Create(Offset, *Ctx); + const MCBinaryExpr *AddExpr = MCBinaryExpr::CreateAdd(MCSym, OffsetExpr, *Ctx); return MCOperand::CreateExpr(AddExpr); } +static void CreateMCInst(MCInst& Inst, unsigned Opc, const MCOperand& Opnd0, + const MCOperand& Opnd1, + const MCOperand& Opnd2 = MCOperand()) { + Inst.setOpcode(Opc); + Inst.addOperand(Opnd0); + Inst.addOperand(Opnd1); + if (Opnd2.isValid()) + Inst.addOperand(Opnd2); +} + // Lower ".cpload $reg" to // "lui $gp, %hi(_gp_disp)" // "addiu $gp, $gp, %lo(_gp_disp)" -// "addu $gp. $gp, $reg" -void MipsMCInstLower::LowerCPLOAD(const MachineInstr *MI, - SmallVector<MCInst, 4>& MCInsts) { - MCInst Lui, Addiu, Addu; +// "addu $gp, $gp, $t9" +void MipsMCInstLower::LowerCPLOAD(SmallVector<MCInst, 4>& MCInsts) { + MCOperand GPReg = MCOperand::CreateReg(Mips::GP); + MCOperand T9Reg = MCOperand::CreateReg(Mips::T9); StringRef SymName("_gp_disp"); - const MCSymbol *Symbol = Ctx.GetOrCreateSymbol(SymName); + const MCSymbol *Sym = Ctx->GetOrCreateSymbol(SymName); const MCSymbolRefExpr *MCSym; - // lui $gp, %hi(_gp_disp) - Lui.setOpcode(Mips::LUi); - Lui.addOperand(MCOperand::CreateReg(Mips::GP)); - MCSym = MCSymbolRefExpr::Create(Symbol, MCSymbolRefExpr::VK_Mips_ABS_HI, Ctx); - Lui.addOperand(MCOperand::CreateExpr(MCSym)); - MCInsts.push_back(Lui); - - // addiu $gp, $gp, %lo(_gp_disp) - Addiu.setOpcode(Mips::ADDiu); - Addiu.addOperand(MCOperand::CreateReg(Mips::GP)); - Addiu.addOperand(MCOperand::CreateReg(Mips::GP)); - MCSym = MCSymbolRefExpr::Create(Symbol, MCSymbolRefExpr::VK_Mips_ABS_LO, Ctx); - Addiu.addOperand(MCOperand::CreateExpr(MCSym)); - MCInsts.push_back(Addiu); - - // addu $gp. $gp, $reg - Addu.setOpcode(Mips::ADDu); - Addu.addOperand(MCOperand::CreateReg(Mips::GP)); - Addu.addOperand(MCOperand::CreateReg(Mips::GP)); - const MachineOperand &MO = MI->getOperand(0); - assert(MO.isReg() && "CPLOAD's operand must be a register."); - Addu.addOperand(MCOperand::CreateReg(MO.getReg())); - MCInsts.push_back(Addu); + MCSym = MCSymbolRefExpr::Create(Sym, MCSymbolRefExpr::VK_Mips_ABS_HI, *Ctx); + MCOperand SymHi = MCOperand::CreateExpr(MCSym); + MCSym = MCSymbolRefExpr::Create(Sym, MCSymbolRefExpr::VK_Mips_ABS_LO, *Ctx); + MCOperand SymLo = MCOperand::CreateExpr(MCSym); + + MCInsts.resize(3); + + CreateMCInst(MCInsts[0], Mips::LUi, GPReg, SymHi); + CreateMCInst(MCInsts[1], Mips::ADDiu, GPReg, GPReg, SymLo); + CreateMCInst(MCInsts[2], Mips::ADDu, GPReg, GPReg, T9Reg); } // Lower ".cprestore offset" to "sw $gp, offset($sp)". -void MipsMCInstLower::LowerCPRESTORE(const MachineInstr *MI, +void MipsMCInstLower::LowerCPRESTORE(int64_t Offset, SmallVector<MCInst, 4>& MCInsts) { - const MachineOperand &MO = MI->getOperand(0); - assert(MO.isImm() && "CPRESTORE's operand must be an immediate."); - unsigned Offset = MO.getImm(), Reg = Mips::SP; - MCInst Sw; + assert(isInt<32>(Offset) && (Offset >= 0) && + "Imm operand of .cprestore must be a non-negative 32-bit value."); - if (Offset >= 0x8000) { - unsigned Hi = (Offset >> 16) + ((Offset & 0x8000) != 0); + MCOperand SPReg = MCOperand::CreateReg(Mips::SP), BaseReg = SPReg; + MCOperand GPReg = MCOperand::CreateReg(Mips::GP); + + if (!isInt<16>(Offset)) { + unsigned Hi = ((Offset + 0x8000) >> 16) & 0xffff; Offset &= 0xffff; - Reg = Mips::AT; + MCOperand ATReg = MCOperand::CreateReg(Mips::AT); + BaseReg = ATReg; // lui at,hi // addu at,at,sp MCInsts.resize(2); - MCInsts[0].setOpcode(Mips::LUi); - MCInsts[0].addOperand(MCOperand::CreateReg(Mips::AT)); - MCInsts[0].addOperand(MCOperand::CreateImm(Hi)); - MCInsts[1].setOpcode(Mips::ADDu); - MCInsts[1].addOperand(MCOperand::CreateReg(Mips::AT)); - MCInsts[1].addOperand(MCOperand::CreateReg(Mips::AT)); - MCInsts[1].addOperand(MCOperand::CreateReg(Mips::SP)); + CreateMCInst(MCInsts[0], Mips::LUi, ATReg, MCOperand::CreateImm(Hi)); + CreateMCInst(MCInsts[1], Mips::ADDu, ATReg, ATReg, SPReg); } - Sw.setOpcode(Mips::SW); - Sw.addOperand(MCOperand::CreateReg(Mips::GP)); - Sw.addOperand(MCOperand::CreateReg(Reg)); - Sw.addOperand(MCOperand::CreateImm(Offset)); + MCInst Sw; + CreateMCInst(Sw, Mips::SW, GPReg, BaseReg, MCOperand::CreateImm(Offset)); MCInsts.push_back(Sw); } @@ -332,18 +328,16 @@ void MipsMCInstLower::LowerSETGP01(const MachineInstr *MI, assert(MO.isReg()); MCOperand RegOpnd = MCOperand::CreateReg(MO.getReg()); StringRef SymName("_gp_disp"); - const MCSymbol *Sym = Ctx.GetOrCreateSymbol(SymName); + const MCSymbol *Sym = Ctx->GetOrCreateSymbol(SymName); const MCSymbolRefExpr *MCSym; + MCSym = MCSymbolRefExpr::Create(Sym, MCSymbolRefExpr::VK_Mips_ABS_HI, *Ctx); + MCOperand SymHi = MCOperand::CreateExpr(MCSym); + MCSym = MCSymbolRefExpr::Create(Sym, MCSymbolRefExpr::VK_Mips_ABS_LO, *Ctx); + MCOperand SymLo = MCOperand::CreateExpr(MCSym); + MCInsts.resize(2); - MCSym = MCSymbolRefExpr::Create(Sym, MCSymbolRefExpr::VK_Mips_ABS_HI, Ctx); - MCInsts[0].setOpcode(Mips::LUi); - MCInsts[0].addOperand(RegOpnd); - MCInsts[0].addOperand(MCOperand::CreateExpr(MCSym)); - MCSym = MCSymbolRefExpr::Create(Sym, MCSymbolRefExpr::VK_Mips_ABS_LO, Ctx); - MCInsts[1].setOpcode(Mips::ADDiu); - MCInsts[1].addOperand(RegOpnd); - MCInsts[1].addOperand(RegOpnd); - MCInsts[1].addOperand(MCOperand::CreateExpr(MCSym)); + CreateMCInst(MCInsts[0], Mips::LUi, RegOpnd, SymHi); + CreateMCInst(MCInsts[1], Mips::ADDiu, RegOpnd, RegOpnd, SymLo); } diff --git a/lib/Target/Mips/MipsMCInstLower.h b/lib/Target/Mips/MipsMCInstLower.h index 20bb338..c1d007d 100644 --- a/lib/Target/Mips/MipsMCInstLower.h +++ b/lib/Target/Mips/MipsMCInstLower.h @@ -26,15 +26,15 @@ namespace llvm { // MCInst. class LLVM_LIBRARY_VISIBILITY MipsMCInstLower { typedef MachineOperand::MachineOperandType MachineOperandType; - MCContext &Ctx; + MCContext *Ctx; Mangler *Mang; MipsAsmPrinter &AsmPrinter; public: - MipsMCInstLower(Mangler *mang, const MachineFunction &MF, - MipsAsmPrinter &asmprinter); + MipsMCInstLower(MipsAsmPrinter &asmprinter); + void Initialize(Mangler *mang, MCContext* C); void Lower(const MachineInstr *MI, MCInst &OutMI) const; - void LowerCPLOAD(const MachineInstr *MI, SmallVector<MCInst, 4>& MCInsts); - void LowerCPRESTORE(const MachineInstr *MI, SmallVector<MCInst, 4>& MCInsts); + void LowerCPLOAD(SmallVector<MCInst, 4>& MCInsts); + void LowerCPRESTORE(int64_t Offset, SmallVector<MCInst, 4>& MCInsts); void LowerUnalignedLoadStore(const MachineInstr *MI, SmallVector<MCInst, 4>& MCInsts); void LowerSETGP01(const MachineInstr *MI, SmallVector<MCInst, 4>& MCInsts); diff --git a/lib/Target/Mips/MipsMachineFunction.h b/lib/Target/Mips/MipsMachineFunction.h index abb5404..0fde55c 100644 --- a/lib/Target/Mips/MipsMachineFunction.h +++ b/lib/Target/Mips/MipsMachineFunction.h @@ -14,7 +14,6 @@ #ifndef MIPS_MACHINE_FUNCTION_INFO_H #define MIPS_MACHINE_FUNCTION_INFO_H -#include "llvm/ADT/SmallVector.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include <utility> @@ -50,12 +49,14 @@ class MipsFunctionInfo : public MachineFunctionInfo { mutable int DynAllocFI; // Frame index of dynamically allocated stack area. unsigned MaxCallFrameSize; + bool EmitNOAT; + public: MipsFunctionInfo(MachineFunction& MF) : MF(MF), SRetReturnReg(0), GlobalBaseReg(0), VarArgsFrameIndex(0), InArgFIRange(std::make_pair(-1, 0)), OutArgFIRange(std::make_pair(-1, 0)), GPFI(0), DynAllocFI(0), - MaxCallFrameSize(0) + MaxCallFrameSize(0), EmitNOAT(false) {} bool isInArgFI(int FI) const { @@ -100,6 +101,9 @@ public: unsigned getMaxCallFrameSize() const { return MaxCallFrameSize; } void setMaxCallFrameSize(unsigned S) { MaxCallFrameSize = S; } + + bool getEmitNOAT() const { return EmitNOAT; } + void setEmitNOAT() { EmitNOAT = true; } }; } // end of namespace llvm diff --git a/lib/Target/Mips/MipsRegisterInfo.cpp b/lib/Target/Mips/MipsRegisterInfo.cpp index 5cfda34..f30de44 100644 --- a/lib/Target/Mips/MipsRegisterInfo.cpp +++ b/lib/Target/Mips/MipsRegisterInfo.cpp @@ -62,7 +62,7 @@ getCalleeSavedRegs(const MachineFunction *MF) const return CSR_O32_SaveList; else if (Subtarget.isABI_N32()) return CSR_N32_SaveList; - + assert(Subtarget.isABI_N64()); return CSR_N64_SaveList; } @@ -125,9 +125,18 @@ getReservedRegs(const MachineFunction &MF) const { Reserved.set(Mips::GP_64); } + // Reserve hardware registers. + Reserved.set(Mips::HWR29); + Reserved.set(Mips::HWR29_64); + return Reserved; } +bool +MipsRegisterInfo::requiresRegisterScavenging(const MachineFunction &MF) const { + return true; +} + // This function eliminate ADJCALLSTACKDOWN, // ADJCALLSTACKUP pseudo instructions void MipsRegisterInfo:: @@ -223,8 +232,7 @@ eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj, AnalyzeImm.Analyze(Offset, Size, true /* LastInstrIsADDiu */); MipsAnalyzeImmediate::InstSeq::const_iterator Inst = Seq.begin(); - // FIXME: change this when mips goes MC". - BuildMI(MBB, II, DL, TII.get(Mips::NOAT)); + MipsFI->setEmitNOAT(); // The first instruction can be a LUi, which is different from other // instructions (ADDiu, ORI and SLL) in that it does not have a register @@ -245,7 +253,6 @@ eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj, FrameReg = ATReg; Offset = SignExtend64<16>(Inst->ImmOpnd); - BuildMI(MBB, ++II, MI.getDebugLoc(), TII.get(Mips::ATMACRO)); } MI.getOperand(i).ChangeToRegister(FrameReg, false); diff --git a/lib/Target/Mips/MipsRegisterInfo.h b/lib/Target/Mips/MipsRegisterInfo.h index 7037ca6..0716d29 100644 --- a/lib/Target/Mips/MipsRegisterInfo.h +++ b/lib/Target/Mips/MipsRegisterInfo.h @@ -47,6 +47,8 @@ struct MipsRegisterInfo : public MipsGenRegisterInfo { BitVector getReservedRegs(const MachineFunction &MF) const; + virtual bool requiresRegisterScavenging(const MachineFunction &MF) const; + void eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator I) const; diff --git a/lib/Target/Mips/MipsSubtarget.cpp b/lib/Target/Mips/MipsSubtarget.cpp index d4a50ee..00347df 100644 --- a/lib/Target/Mips/MipsSubtarget.cpp +++ b/lib/Target/Mips/MipsSubtarget.cpp @@ -13,6 +13,7 @@ #include "MipsSubtarget.h" #include "Mips.h" +#include "MipsRegisterInfo.h" #include "llvm/Support/TargetRegistry.h" #define GET_SUBTARGETINFO_TARGET_DESC @@ -54,3 +55,14 @@ MipsSubtarget::MipsSubtarget(const std::string &TT, const std::string &CPU, if (TT.find("linux") == std::string::npos) IsLinux = false; } + +bool +MipsSubtarget::enablePostRAScheduler(CodeGenOpt::Level OptLevel, + TargetSubtargetInfo::AntiDepBreakMode& Mode, + RegClassVector& CriticalPathRCs) const { + Mode = TargetSubtargetInfo::ANTIDEP_CRITICAL; + CriticalPathRCs.clear(); + CriticalPathRCs.push_back(hasMips64() ? + &Mips::CPU64RegsRegClass : &Mips::CPURegsRegClass); + return OptLevel >= CodeGenOpt::Aggressive; +} diff --git a/lib/Target/Mips/MipsSubtarget.h b/lib/Target/Mips/MipsSubtarget.h index ba0bbac..7faf77b 100644 --- a/lib/Target/Mips/MipsSubtarget.h +++ b/lib/Target/Mips/MipsSubtarget.h @@ -89,6 +89,9 @@ protected: InstrItineraryData InstrItins; public: + virtual bool enablePostRAScheduler(CodeGenOpt::Level OptLevel, + AntiDepBreakMode& Mode, + RegClassVector& CriticalPathRCs) const; /// Only O32 and EABI supported right now. bool isABI_EABI() const { return MipsABI == EABI; } diff --git a/lib/Target/PTX/InstPrinter/PTXInstPrinter.cpp b/lib/Target/PTX/InstPrinter/PTXInstPrinter.cpp index ec7e2a7..1830213 100644 --- a/lib/Target/PTX/InstPrinter/PTXInstPrinter.cpp +++ b/lib/Target/PTX/InstPrinter/PTXInstPrinter.cpp @@ -18,26 +18,24 @@ #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCInst.h" #include "llvm/MC/MCSymbol.h" +#include "llvm/MC/MCInstrInfo.h" +#include "llvm/ADT/APFloat.h" #include "llvm/ADT/StringExtras.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" using namespace llvm; -#define GET_INSTRUCTION_NAME #include "PTXGenAsmWriter.inc" PTXInstPrinter::PTXInstPrinter(const MCAsmInfo &MAI, + const MCInstrInfo &MII, const MCRegisterInfo &MRI, const MCSubtargetInfo &STI) : - MCInstPrinter(MAI, MRI) { + MCInstPrinter(MAI, MII, MRI) { // Initialize the set of available features. setAvailableFeatures(STI.getFeatureBits()); } -StringRef PTXInstPrinter::getOpcodeName(unsigned Opcode) const { - return getInstructionName(Opcode); -} - void PTXInstPrinter::printRegName(raw_ostream &OS, unsigned RegNo) const { // Decode the register number into type and offset unsigned RegSpace = RegNo & 0x7; diff --git a/lib/Target/PTX/InstPrinter/PTXInstPrinter.h b/lib/Target/PTX/InstPrinter/PTXInstPrinter.h index eef6101..ea4d504 100644 --- a/lib/Target/PTX/InstPrinter/PTXInstPrinter.h +++ b/lib/Target/PTX/InstPrinter/PTXInstPrinter.h @@ -23,15 +23,12 @@ class MCOperand; class PTXInstPrinter : public MCInstPrinter { public: - PTXInstPrinter(const MCAsmInfo &MAI, const MCRegisterInfo &MRI, - const MCSubtargetInfo &STI); + PTXInstPrinter(const MCAsmInfo &MAI, const MCInstrInfo &MII, + const MCRegisterInfo &MRI, const MCSubtargetInfo &STI); virtual void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot); - virtual StringRef getOpcodeName(unsigned Opcode) const; virtual void printRegName(raw_ostream &OS, unsigned RegNo) const; - static const char *getInstructionName(unsigned Opcode); - // Autogenerated by tblgen. void printInstruction(const MCInst *MI, raw_ostream &O); static const char *getRegisterName(unsigned RegNo); diff --git a/lib/Target/PTX/MCTargetDesc/PTXMCTargetDesc.cpp b/lib/Target/PTX/MCTargetDesc/PTXMCTargetDesc.cpp index 7671b11..08fb970 100644 --- a/lib/Target/PTX/MCTargetDesc/PTXMCTargetDesc.cpp +++ b/lib/Target/PTX/MCTargetDesc/PTXMCTargetDesc.cpp @@ -62,10 +62,11 @@ static MCCodeGenInfo *createPTXMCCodeGenInfo(StringRef TT, Reloc::Model RM, static MCInstPrinter *createPTXMCInstPrinter(const Target &T, unsigned SyntaxVariant, const MCAsmInfo &MAI, + const MCInstrInfo &MII, const MCRegisterInfo &MRI, const MCSubtargetInfo &STI) { assert(SyntaxVariant == 0 && "We only have one syntax variant"); - return new PTXInstPrinter(MAI, MRI, STI); + return new PTXInstPrinter(MAI, MII, MRI, STI); } extern "C" void LLVMInitializePTXTargetMC() { diff --git a/lib/Target/PTX/MCTargetDesc/PTXMCTargetDesc.h b/lib/Target/PTX/MCTargetDesc/PTXMCTargetDesc.h index 1003b0b..542638a 100644 --- a/lib/Target/PTX/MCTargetDesc/PTXMCTargetDesc.h +++ b/lib/Target/PTX/MCTargetDesc/PTXMCTargetDesc.h @@ -15,9 +15,7 @@ #define PTXMCTARGETDESC_H namespace llvm { -class MCSubtargetInfo; class Target; -class StringRef; extern Target ThePTX32Target; extern Target ThePTX64Target; diff --git a/lib/Target/PTX/PTXISelLowering.cpp b/lib/Target/PTX/PTXISelLowering.cpp index db1c953..ef4455b 100644 --- a/lib/Target/PTX/PTXISelLowering.cpp +++ b/lib/Target/PTX/PTXISelLowering.cpp @@ -97,7 +97,8 @@ PTXTargetLowering::PTXTargetLowering(TargetMachine &TM) // customise setcc to use bitwise logic if possible - setOperationAction(ISD::SETCC, MVT::i1, Custom); + //setOperationAction(ISD::SETCC, MVT::i1, Custom); + setOperationAction(ISD::SETCC, MVT::i1, Legal); // customize translation of memory addresses @@ -156,18 +157,27 @@ SDValue PTXTargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const { SDValue Op1 = Op.getOperand(1); SDValue Op2 = Op.getOperand(2); DebugLoc dl = Op.getDebugLoc(); - ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get(); + //ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get(); // Look for X == 0, X == 1, X != 0, or X != 1 // We can simplify these to bitwise logic - if (Op1.getOpcode() == ISD::Constant && - (cast<ConstantSDNode>(Op1)->getZExtValue() == 1 || - cast<ConstantSDNode>(Op1)->isNullValue()) && - (CC == ISD::SETEQ || CC == ISD::SETNE)) { + //if (Op1.getOpcode() == ISD::Constant && + // (cast<ConstantSDNode>(Op1)->getZExtValue() == 1 || + // cast<ConstantSDNode>(Op1)->isNullValue()) && + // (CC == ISD::SETEQ || CC == ISD::SETNE)) { + // + // return DAG.getNode(ISD::AND, dl, MVT::i1, Op0, Op1); + //} - return DAG.getNode(ISD::AND, dl, MVT::i1, Op0, Op1); - } + //ConstantSDNode* COp1 = cast<ConstantSDNode>(Op1); + //if(COp1 && COp1->getZExtValue() == 1) { + // if(CC == ISD::SETNE) { + // return DAG.getNode(PTX::XORripreds, dl, MVT::i1, Op0); + // } + //} + + llvm_unreachable("setcc was not matched by a pattern!"); return DAG.getNode(ISD::SETCC, dl, MVT::i1, Op0, Op1, Op2); } @@ -384,22 +394,22 @@ PTXTargetLowering::LowerCall(SDValue Chain, SDValue Callee, PTXMachineFunctionInfo *PTXMFI = MF.getInfo<PTXMachineFunctionInfo>(); PTXParamManager &PM = PTXMFI->getParamManager(); MachineFrameInfo *MFI = MF.getFrameInfo(); - + assert(getTargetMachine().getSubtarget<PTXSubtarget>().callsAreHandled() && "Calls are not handled for the target device"); // Identify the callee function const GlobalValue *GV = cast<GlobalAddressSDNode>(Callee)->getGlobal(); const Function *function = cast<Function>(GV); - + // allow non-device calls only for printf - bool isPrintf = function->getName() == "printf" || function->getName() == "puts"; - + bool isPrintf = function->getName() == "printf" || function->getName() == "puts"; + assert((isPrintf || function->getCallingConv() == CallingConv::PTX_Device) && "PTX function calls must be to PTX device functions"); - + unsigned outSize = isPrintf ? 2 : Outs.size(); - + std::vector<SDValue> Ops; // The layout of the ops will be [Chain, #Ins, Ins, Callee, #Outs, Outs] Ops.resize(outSize + Ins.size() + 4); @@ -412,7 +422,7 @@ PTXTargetLowering::LowerCall(SDValue Chain, SDValue Callee, // #Outs Ops[Ins.size()+3] = DAG.getTargetConstant(outSize, MVT::i32); - + if (isPrintf) { // first argument is the address of the global string variable in memory unsigned Param0 = PM.addLocalParam(getPointerTy().getSizeInBits()); @@ -421,29 +431,29 @@ PTXTargetLowering::LowerCall(SDValue Chain, SDValue Callee, Chain = DAG.getNode(PTXISD::STORE_PARAM, dl, MVT::Other, Chain, ParamValue0, OutVals[0]); Ops[Ins.size()+4] = ParamValue0; - + // alignment is the maximum size of all the arguments unsigned alignment = 0; for (unsigned i = 1; i < OutVals.size(); ++i) { - alignment = std::max(alignment, + alignment = std::max(alignment, OutVals[i].getValueType().getSizeInBits()); } // size is the alignment multiplied by the number of arguments unsigned size = alignment * (OutVals.size() - 1); - + // second argument is the address of the stack object (unless no arguments) unsigned Param1 = PM.addLocalParam(getPointerTy().getSizeInBits()); SDValue ParamValue1 = DAG.getTargetExternalSymbol(PM.getParamName(Param1).c_str(), MVT::Other); Ops[Ins.size()+5] = ParamValue1; - + if (size > 0) { // create a local stack object to store the arguments unsigned StackObject = MFI->CreateStackObject(size / 8, alignment / 8, false); SDValue FrameIndex = DAG.getFrameIndex(StackObject, getPointerTy()); - + // store each of the arguments to the stack in turn for (unsigned int i = 1; i != OutVals.size(); i++) { SDValue FrameAddr = DAG.getNode(ISD::ADD, dl, getPointerTy(), FrameIndex, DAG.getTargetConstant((i - 1) * 8, getPointerTy())); @@ -475,7 +485,7 @@ PTXTargetLowering::LowerCall(SDValue Chain, SDValue Callee, Ops[i+Ins.size()+4] = ParamValue; } } - + std::vector<SDValue> InParams; // Generate list of .param variables to hold the return value(s). diff --git a/lib/Target/PTX/PTXInstrInfo.td b/lib/Target/PTX/PTXInstrInfo.td index 818d444..bead428 100644 --- a/lib/Target/PTX/PTXInstrInfo.td +++ b/lib/Target/PTX/PTXInstrInfo.td @@ -808,6 +808,8 @@ let isBranch = 1, isTerminator = 1, isBarrier = 1 in { let isBranch = 1, isTerminator = 1 in { // FIXME: The pattern part is blank because I cannot (or do not yet know // how to) use the first operand of PredicateOperand (a RegPred register) here + // When this is revisited, make sure to also look at LowerSETCC and try to + // fold it into negated predicates, if possible. def BRAdp : InstPTX<(outs), (ins brtarget:$d), "bra\t$d", [/*(brcond pred:$_p, bb:$d)*/]>; @@ -1017,6 +1019,9 @@ def : Pat<(f64 (sint_to_fp RegI64:$a)), (CVTf64s64 RndDefault, RegI64:$a)>; def : Pat<(f64 (fextend RegF32:$a)), (CVTf64f32 RegF32:$a)>; def : Pat<(f64 (bitconvert RegI64:$a)), (MOVf64i64 RegI64:$a)>; +// setcc - predicate inversion for branch conditions +def : Pat<(i1 (setcc RegPred:$a, imm:$b, SETNE)), + (XORripreds RegPred:$a, imm:$b)>; ///===- Intrinsic Instructions --------------------------------------------===// include "PTXIntrinsicInstrInfo.td" diff --git a/lib/Target/PTX/PTXTargetMachine.cpp b/lib/Target/PTX/PTXTargetMachine.cpp index 40835d0..c55a658 100644 --- a/lib/Target/PTX/PTXTargetMachine.cpp +++ b/lib/Target/PTX/PTXTargetMachine.cpp @@ -17,7 +17,6 @@ #include "llvm/Analysis/Passes.h" #include "llvm/Analysis/Verifier.h" #include "llvm/Assembly/PrintModulePass.h" -#include "llvm/ADT/OwningPtr.h" #include "llvm/CodeGen/AsmPrinter.h" #include "llvm/CodeGen/MachineFunctionAnalysis.h" #include "llvm/CodeGen/MachineModuleInfo.h" @@ -153,10 +152,10 @@ bool PTXPassConfig::addPostRegAlloc() { /// Add passes that optimize machine instructions after register allocation. void PTXPassConfig::addMachineLateOptimization() { if (addPass(BranchFolderPassID) != &NoPassID) - printNoVerify("After BranchFolding"); + printAndVerify("After BranchFolding"); if (addPass(TailDuplicateID) != &NoPassID) - printNoVerify("After TailDuplicate"); + printAndVerify("After TailDuplicate"); } bool PTXPassConfig::addPreEmitPass() { diff --git a/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp b/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp index 000d6d4..61d23ce 100644 --- a/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp +++ b/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp @@ -17,16 +17,12 @@ #include "MCTargetDesc/PPCPredicates.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCInst.h" +#include "llvm/MC/MCInstrInfo.h" #include "llvm/Support/raw_ostream.h" using namespace llvm; -#define GET_INSTRUCTION_NAME #include "PPCGenAsmWriter.inc" -StringRef PPCInstPrinter::getOpcodeName(unsigned Opcode) const { - return getInstructionName(Opcode); -} - void PPCInstPrinter::printRegName(raw_ostream &OS, unsigned RegNo) const { OS << getRegisterName(RegNo); } diff --git a/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.h b/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.h index 21fc733..73fd534 100644 --- a/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.h +++ b/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.h @@ -24,9 +24,9 @@ class PPCInstPrinter : public MCInstPrinter { // 0 -> AIX, 1 -> Darwin. unsigned SyntaxVariant; public: - PPCInstPrinter(const MCAsmInfo &MAI, const MCRegisterInfo &MRI, - unsigned syntaxVariant) - : MCInstPrinter(MAI, MRI), SyntaxVariant(syntaxVariant) {} + PPCInstPrinter(const MCAsmInfo &MAI, const MCInstrInfo &MII, + const MCRegisterInfo &MRI, unsigned syntaxVariant) + : MCInstPrinter(MAI, MII, MRI), SyntaxVariant(syntaxVariant) {} bool isDarwinSyntax() const { return SyntaxVariant == 1; @@ -34,9 +34,6 @@ public: virtual void printRegName(raw_ostream &OS, unsigned RegNo) const; virtual void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot); - virtual StringRef getOpcodeName(unsigned Opcode) const; - - static const char *getInstructionName(unsigned Opcode); // Autogenerated by tblgen. void printInstruction(const MCInst *MI, raw_ostream &O); diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp index 9c6eefe..48de583 100644 --- a/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp +++ b/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp @@ -11,6 +11,7 @@ #include "MCTargetDesc/PPCFixupKinds.h" #include "llvm/MC/MCAsmBackend.h" #include "llvm/MC/MCELFObjectWriter.h" +#include "llvm/MC/MCFixupKindInfo.h" #include "llvm/MC/MCMachObjectWriter.h" #include "llvm/MC/MCSectionMachO.h" #include "llvm/MC/MCObjectWriter.h" diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp index 226fbfe..6568e82 100644 --- a/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp +++ b/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp @@ -108,9 +108,10 @@ static MCStreamer *createMCStreamer(const Target &T, StringRef TT, static MCInstPrinter *createPPCMCInstPrinter(const Target &T, unsigned SyntaxVariant, const MCAsmInfo &MAI, + const MCInstrInfo &MII, const MCRegisterInfo &MRI, const MCSubtargetInfo &STI) { - return new PPCInstPrinter(MAI, MRI, SyntaxVariant); + return new PPCInstPrinter(MAI, MII, MRI, SyntaxVariant); } extern "C" void LLVMInitializePowerPCTargetMC() { diff --git a/lib/Target/PowerPC/PPC.td b/lib/Target/PowerPC/PPC.td index 724374c..c554d39 100644 --- a/lib/Target/PowerPC/PPC.td +++ b/lib/Target/PowerPC/PPC.td @@ -34,6 +34,7 @@ def Directive750 : SubtargetFeature<"", "DarwinDirective", "PPC::DIR_750", "">; def Directive970 : SubtargetFeature<"", "DarwinDirective", "PPC::DIR_970", "">; def Directive32 : SubtargetFeature<"", "DarwinDirective", "PPC::DIR_32", "">; def Directive64 : SubtargetFeature<"", "DarwinDirective", "PPC::DIR_64", "">; +def DirectiveA2 : SubtargetFeature<"", "DarwinDirective", "PPC::DIR_A2", "">; def Feature64Bit : SubtargetFeature<"64bit","Has64BitSupport", "true", "Enable 64-bit instructions">; @@ -87,6 +88,10 @@ def : Processor<"g5", G5Itineraries, [Directive970, FeatureAltivec, FeatureGPUL, FeatureFSqrt, FeatureSTFIWX, Feature64Bit /*, Feature64BitRegs */]>; +def : Processor<"a2", PPCA2Itineraries, [DirectiveA2, FeatureBookE, + FeatureFSqrt, FeatureSTFIWX, + Feature64Bit + /*, Feature64BitRegs */]>; def : Processor<"ppc", G3Itineraries, [Directive32]>; def : Processor<"ppc64", G5Itineraries, [Directive64, FeatureAltivec, diff --git a/lib/Target/PowerPC/PPCAsmPrinter.cpp b/lib/Target/PowerPC/PPCAsmPrinter.cpp index 4abb469..fb7aa71 100644 --- a/lib/Target/PowerPC/PPCAsmPrinter.cpp +++ b/lib/Target/PowerPC/PPCAsmPrinter.cpp @@ -450,6 +450,7 @@ void PPCDarwinAsmPrinter::EmitStartOfAsmFile(Module &M) { "ppc7400", "ppc750", "ppc970", + "ppcA2", "ppc64" }; diff --git a/lib/Target/PowerPC/PPCCallingConv.td b/lib/Target/PowerPC/PPCCallingConv.td index 9883c2e..b2b5364 100644 --- a/lib/Target/PowerPC/PPCCallingConv.td +++ b/lib/Target/PowerPC/PPCCallingConv.td @@ -12,10 +12,6 @@ // //===----------------------------------------------------------------------===// -/// CCIfSubtarget - Match if the current subtarget has a feature F. -class CCIfSubtarget<string F, CCAction A> - : CCIf<!strconcat("State.getTarget().getSubtarget<PPCSubtarget>().", F), A>; - //===----------------------------------------------------------------------===// // Return Value Calling Convention //===----------------------------------------------------------------------===// diff --git a/lib/Target/PowerPC/PPCHazardRecognizers.cpp b/lib/Target/PowerPC/PPCHazardRecognizers.cpp index ae317af..6ed1fb9 100644 --- a/lib/Target/PowerPC/PPCHazardRecognizers.cpp +++ b/lib/Target/PowerPC/PPCHazardRecognizers.cpp @@ -22,17 +22,29 @@ using namespace llvm; //===----------------------------------------------------------------------===// -// PowerPC 440 Hazard Recognizer -void PPCHazardRecognizer440::EmitInstruction(SUnit *SU) { +// PowerPC Scoreboard Hazard Recognizer +void PPCScoreboardHazardRecognizer::EmitInstruction(SUnit *SU) { const MCInstrDesc *MCID = DAG->getInstrDesc(SU); - if (!MCID) { + if (!MCID) // This is a PPC pseudo-instruction. return; - } ScoreboardHazardRecognizer::EmitInstruction(SU); } +ScheduleHazardRecognizer::HazardType +PPCScoreboardHazardRecognizer::getHazardType(SUnit *SU, int Stalls) { + return ScoreboardHazardRecognizer::getHazardType(SU, Stalls); +} + +void PPCScoreboardHazardRecognizer::AdvanceCycle() { + ScoreboardHazardRecognizer::AdvanceCycle(); +} + +void PPCScoreboardHazardRecognizer::Reset() { + ScoreboardHazardRecognizer::Reset(); +} + //===----------------------------------------------------------------------===// // PowerPC 970 Hazard Recognizer // @@ -61,7 +73,6 @@ void PPCHazardRecognizer440::EmitInstruction(SUnit *SU) { PPCHazardRecognizer970::PPCHazardRecognizer970(const TargetInstrInfo &tii) : TII(tii) { - LastWasBL8_ELF = false; EndDispatchGroup(); } @@ -132,15 +143,6 @@ getHazardType(SUnit *SU, int Stalls) { return NoHazard; unsigned Opcode = MI->getOpcode(); - - // If the last instruction was a BL8_ELF, then the NOP must follow it - // directly (this is strong requirement from the linker due to the ELF ABI). - // We return only Hazard (and not NoopHazard) because if the NOP is necessary - // then it will already be in the instruction stream (it is not always - // necessary; tail calls, for example, do not need it). - if (LastWasBL8_ELF && Opcode != PPC::NOP) - return Hazard; - bool isFirst, isSingle, isCracked, isLoad, isStore; PPCII::PPC970_Unit InstrType = GetInstrType(Opcode, isFirst, isSingle, isCracked, @@ -199,8 +201,6 @@ void PPCHazardRecognizer970::EmitInstruction(SUnit *SU) { return; unsigned Opcode = MI->getOpcode(); - LastWasBL8_ELF = (Opcode == PPC::BL8_ELF); - bool isFirst, isSingle, isCracked, isLoad, isStore; PPCII::PPC970_Unit InstrType = GetInstrType(Opcode, isFirst, isSingle, isCracked, @@ -240,7 +240,6 @@ void PPCHazardRecognizer970::AdvanceCycle() { } void PPCHazardRecognizer970::Reset() { - LastWasBL8_ELF = false; EndDispatchGroup(); } diff --git a/lib/Target/PowerPC/PPCHazardRecognizers.h b/lib/Target/PowerPC/PPCHazardRecognizers.h index d80a385..55b45d0 100644 --- a/lib/Target/PowerPC/PPCHazardRecognizers.h +++ b/lib/Target/PowerPC/PPCHazardRecognizers.h @@ -21,16 +21,19 @@ namespace llvm { -/// PPCHazardRecognizer440 - This class implements a scoreboard-based -/// hazard recognizer for the PPC 440 and friends. -class PPCHazardRecognizer440 : public ScoreboardHazardRecognizer { +/// PPCScoreboardHazardRecognizer - This class implements a scoreboard-based +/// hazard recognizer for generic PPC processors. +class PPCScoreboardHazardRecognizer : public ScoreboardHazardRecognizer { const ScheduleDAG *DAG; public: - PPCHazardRecognizer440(const InstrItineraryData *ItinData, + PPCScoreboardHazardRecognizer(const InstrItineraryData *ItinData, const ScheduleDAG *DAG_) : ScoreboardHazardRecognizer(ItinData, DAG_), DAG(DAG_) {} + virtual HazardType getHazardType(SUnit *SU, int Stalls); virtual void EmitInstruction(SUnit *SU); + virtual void AdvanceCycle(); + virtual void Reset(); }; /// PPCHazardRecognizer970 - This class defines a finite state automata that @@ -49,9 +52,6 @@ class PPCHazardRecognizer970 : public ScheduleHazardRecognizer { // HasCTRSet - If the CTR register is set in this group, disallow BCTRL. bool HasCTRSet; - // Was the last instruction issued a BL8_ELF - bool LastWasBL8_ELF; - // StoredPtr - Keep track of the address of any store. If we see a load from // the same address (or one that aliases it), disallow the store. We can have // up to four stores in one dispatch group, hence we track up to 4. diff --git a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp index 6651d14..5a04888 100644 --- a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp +++ b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp @@ -377,8 +377,8 @@ SDNode *PPCDAGToDAGISel::SelectBitfieldInsert(SDNode *N) { DebugLoc dl = N->getDebugLoc(); APInt LKZ, LKO, RKZ, RKO; - CurDAG->ComputeMaskedBits(Op0, APInt::getAllOnesValue(32), LKZ, LKO); - CurDAG->ComputeMaskedBits(Op1, APInt::getAllOnesValue(32), RKZ, RKO); + CurDAG->ComputeMaskedBits(Op0, LKZ, LKO); + CurDAG->ComputeMaskedBits(Op1, RKZ, RKO); unsigned TargetMask = LKZ.getZExtValue(); unsigned InsertMask = RKZ.getZExtValue(); diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp index 85b5bc1..3b24951 100644 --- a/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/lib/Target/PowerPC/PPCISelLowering.cpp @@ -226,11 +226,23 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM) // VASTART needs to be custom lowered to use the VarArgsFrameIndex setOperationAction(ISD::VASTART , MVT::Other, Custom); - // VAARG is custom lowered with the 32-bit SVR4 ABI. - if (TM.getSubtarget<PPCSubtarget>().isSVR4ABI() - && !TM.getSubtarget<PPCSubtarget>().isPPC64()) { - setOperationAction(ISD::VAARG, MVT::Other, Custom); - setOperationAction(ISD::VAARG, MVT::i64, Custom); + if (TM.getSubtarget<PPCSubtarget>().isSVR4ABI()) { + if (TM.getSubtarget<PPCSubtarget>().isPPC64()) { + // VAARG always uses double-word chunks, so promote anything smaller. + setOperationAction(ISD::VAARG, MVT::i1, Promote); + AddPromotedToType (ISD::VAARG, MVT::i1, MVT::i64); + setOperationAction(ISD::VAARG, MVT::i8, Promote); + AddPromotedToType (ISD::VAARG, MVT::i8, MVT::i64); + setOperationAction(ISD::VAARG, MVT::i16, Promote); + AddPromotedToType (ISD::VAARG, MVT::i16, MVT::i64); + setOperationAction(ISD::VAARG, MVT::i32, Promote); + AddPromotedToType (ISD::VAARG, MVT::i32, MVT::i64); + setOperationAction(ISD::VAARG, MVT::Other, Expand); + } else { + // VAARG is custom lowered with the 32-bit SVR4 ABI. + setOperationAction(ISD::VAARG, MVT::Other, Custom); + setOperationAction(ISD::VAARG, MVT::i64, Custom); + } } else setOperationAction(ISD::VAARG, MVT::Other, Expand); @@ -377,6 +389,9 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM) setOperationAction(ISD::BUILD_VECTOR, MVT::v4f32, Custom); } + if (TM.getSubtarget<PPCSubtarget>().has64BitSupport()) + setOperationAction(ISD::PREFETCH, MVT::Other, Legal); + setOperationAction(ISD::ATOMIC_LOAD, MVT::i32, Expand); setOperationAction(ISD::ATOMIC_STORE, MVT::i32, Expand); @@ -431,7 +446,16 @@ unsigned PPCTargetLowering::getByValTypeAlignment(Type *Ty) const { // Darwin passes everything on 4 byte boundary. if (TM.getSubtarget<PPCSubtarget>().isDarwin()) return 4; - // FIXME SVR4 TBD + + // 16byte and wider vectors are passed on 16byte boundary. + if (VectorType *VTy = dyn_cast<VectorType>(Ty)) + if (VTy->getBitWidth() >= 128) + return 16; + + // The rest is 8 on PPC64 and 4 on PPC32 boundary. + if (PPCSubTarget.isPPC64()) + return 8; + return 4; } @@ -460,6 +484,7 @@ const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const { case PPCISD::EXTSW_32: return "PPCISD::EXTSW_32"; case PPCISD::STD_32: return "PPCISD::STD_32"; case PPCISD::CALL_SVR4: return "PPCISD::CALL_SVR4"; + case PPCISD::CALL_NOP_SVR4: return "PPCISD::CALL_NOP_SVR4"; case PPCISD::CALL_Darwin: return "PPCISD::CALL_Darwin"; case PPCISD::NOP: return "PPCISD::NOP"; case PPCISD::MTCTR: return "PPCISD::MTCTR"; @@ -835,14 +860,10 @@ bool PPCTargetLowering::SelectAddressRegReg(SDValue N, SDValue &Base, APInt LHSKnownZero, LHSKnownOne; APInt RHSKnownZero, RHSKnownOne; DAG.ComputeMaskedBits(N.getOperand(0), - APInt::getAllOnesValue(N.getOperand(0) - .getValueSizeInBits()), LHSKnownZero, LHSKnownOne); if (LHSKnownZero.getBoolValue()) { DAG.ComputeMaskedBits(N.getOperand(1), - APInt::getAllOnesValue(N.getOperand(1) - .getValueSizeInBits()), RHSKnownZero, RHSKnownOne); // If all of the bits are known zero on the LHS or RHS, the add won't // carry. @@ -897,10 +918,7 @@ bool PPCTargetLowering::SelectAddressRegImm(SDValue N, SDValue &Disp, // (for better address arithmetic) if the LHS and RHS of the OR are // provably disjoint. APInt LHSKnownZero, LHSKnownOne; - DAG.ComputeMaskedBits(N.getOperand(0), - APInt::getAllOnesValue(N.getOperand(0) - .getValueSizeInBits()), - LHSKnownZero, LHSKnownOne); + DAG.ComputeMaskedBits(N.getOperand(0), LHSKnownZero, LHSKnownOne); if ((LHSKnownZero.getZExtValue()|~(uint64_t)imm) == ~0ULL) { // If all of the bits are known zero on the LHS or RHS, the add won't @@ -1013,10 +1031,7 @@ bool PPCTargetLowering::SelectAddressRegImmShift(SDValue N, SDValue &Disp, // (for better address arithmetic) if the LHS and RHS of the OR are // provably disjoint. APInt LHSKnownZero, LHSKnownOne; - DAG.ComputeMaskedBits(N.getOperand(0), - APInt::getAllOnesValue(N.getOperand(0) - .getValueSizeInBits()), - LHSKnownZero, LHSKnownOne); + DAG.ComputeMaskedBits(N.getOperand(0), LHSKnownZero, LHSKnownOne); if ((LHSKnownZero.getZExtValue()|~(uint64_t)imm) == ~0ULL) { // If all of the bits are known zero on the LHS or RHS, the add won't // carry. @@ -2801,9 +2816,6 @@ PPCTargetLowering::FinishCall(CallingConv::ID CallConv, DebugLoc dl, return DAG.getNode(PPCISD::TC_RETURN, dl, MVT::Other, &Ops[0], Ops.size()); } - Chain = DAG.getNode(CallOpc, dl, NodeTys, &Ops[0], Ops.size()); - InFlag = Chain.getValue(1); - // Add a NOP immediately after the branch instruction when using the 64-bit // SVR4 ABI. At link time, if caller and callee are in a different module and // thus have a different TOC, the call will be replaced with a call to a stub @@ -2812,8 +2824,9 @@ PPCTargetLowering::FinishCall(CallingConv::ID CallConv, DebugLoc dl, // which restores the TOC of the caller from the TOC save slot of the current // stack frame. If caller and callee belong to the same module (and have the // same TOC), the NOP will remain unchanged. + + bool needsTOCRestore = false; if (!isTailCall && PPCSubTarget.isSVR4ABI()&& PPCSubTarget.isPPC64()) { - SDVTList VTs = DAG.getVTList(MVT::Other, MVT::Glue); if (CallOpc == PPCISD::BCTRL_SVR4) { // This is a call through a function pointer. // Restore the caller TOC from the save area into R2. @@ -2824,14 +2837,22 @@ PPCTargetLowering::FinishCall(CallingConv::ID CallConv, DebugLoc dl, // since r2 is a reserved register (which prevents the register allocator // from allocating it), resulting in an additional register being // allocated and an unnecessary move instruction being generated. - Chain = DAG.getNode(PPCISD::TOC_RESTORE, dl, VTs, Chain, InFlag); - InFlag = Chain.getValue(1); - } else { + needsTOCRestore = true; + } else if (CallOpc == PPCISD::CALL_SVR4) { // Otherwise insert NOP. - InFlag = DAG.getNode(PPCISD::NOP, dl, MVT::Glue, InFlag); + CallOpc = PPCISD::CALL_NOP_SVR4; } } + Chain = DAG.getNode(CallOpc, dl, NodeTys, &Ops[0], Ops.size()); + InFlag = Chain.getValue(1); + + if (needsTOCRestore) { + SDVTList VTs = DAG.getVTList(MVT::Other, MVT::Glue); + Chain = DAG.getNode(PPCISD::TOC_RESTORE, dl, VTs, Chain, InFlag); + InFlag = Chain.getValue(1); + } + Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, true), DAG.getIntPtrConstant(BytesCalleePops, true), InFlag); @@ -5486,12 +5507,11 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N, //===----------------------------------------------------------------------===// void PPCTargetLowering::computeMaskedBitsForTargetNode(const SDValue Op, - const APInt &Mask, APInt &KnownZero, APInt &KnownOne, const SelectionDAG &DAG, unsigned Depth) const { - KnownZero = KnownOne = APInt(Mask.getBitWidth(), 0); + KnownZero = KnownOne = APInt(KnownZero.getBitWidth(), 0); switch (Op.getOpcode()) { default: break; case PPCISD::LBRX: { @@ -5725,7 +5745,7 @@ bool PPCTargetLowering::isLegalAddressImmediate(int64_t V,Type *Ty) const{ return (V > -(1 << 16) && V < (1 << 16)-1); } -bool PPCTargetLowering::isLegalAddressImmediate(llvm::GlobalValue* GV) const { +bool PPCTargetLowering::isLegalAddressImmediate(GlobalValue* GV) const { return false; } @@ -5818,3 +5838,12 @@ EVT PPCTargetLowering::getOptimalMemOpType(uint64_t Size, return MVT::i32; } } + +Sched::Preference PPCTargetLowering::getSchedulingPreference(SDNode *N) const { + unsigned Directive = PPCSubTarget.getDarwinDirective(); + if (Directive == PPC::DIR_440 || Directive == PPC::DIR_A2) + return Sched::ILP; + + return TargetLowering::getSchedulingPreference(N); +} + diff --git a/lib/Target/PowerPC/PPCISelLowering.h b/lib/Target/PowerPC/PPCISelLowering.h index 2e046c4..18eb072 100644 --- a/lib/Target/PowerPC/PPCISelLowering.h +++ b/lib/Target/PowerPC/PPCISelLowering.h @@ -95,7 +95,9 @@ namespace llvm { EXTSW_32, /// CALL - A direct function call. - CALL_Darwin, CALL_SVR4, + /// CALL_NOP_SVR4 is a call with the special NOP which follows 64-bit + /// SVR4 calls. + CALL_Darwin, CALL_SVR4, CALL_NOP_SVR4, /// NOP - Special NOP which follows 64-bit SVR4 calls. NOP, @@ -279,6 +281,7 @@ namespace llvm { bool SelectAddressRegImmShift(SDValue N, SDValue &Disp, SDValue &Base, SelectionDAG &DAG) const; + Sched::Preference getSchedulingPreference(SDNode *N) const; /// LowerOperation - Provide custom lowering hooks for some operations. /// @@ -293,7 +296,6 @@ namespace llvm { virtual SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const; virtual void computeMaskedBitsForTargetNode(const SDValue Op, - const APInt &Mask, APInt &KnownZero, APInt &KnownOne, const SelectionDAG &DAG, diff --git a/lib/Target/PowerPC/PPCInstr64Bit.td b/lib/Target/PowerPC/PPCInstr64Bit.td index 78f3596..7f67a41 100644 --- a/lib/Target/PowerPC/PPCInstr64Bit.td +++ b/lib/Target/PowerPC/PPCInstr64Bit.td @@ -89,10 +89,22 @@ let isCall = 1, PPC970_Unit = 7, Defs = [LR8] in { let Uses = [RM] in { def BL8_ELF : IForm<18, 0, 1, (outs), (ins calltarget:$func, variable_ops), - "bl $func", BrB, []>; // See Pat patterns below. + "bl $func", BrB, []>; // See Pat patterns below. + + let isCodeGenOnly = 1 in + def BL8_NOP_ELF : IForm_and_DForm_4_zero<18, 0, 1, 24, + (outs), (ins calltarget:$func, variable_ops), + "bl $func\n\tnop", BrB, []>; + def BLA8_ELF : IForm<18, 1, 1, (outs), (ins aaddr:$func, variable_ops), "bla $func", BrB, [(PPCcall_SVR4 (i64 imm:$func))]>; + + let isCodeGenOnly = 1 in + def BLA8_NOP_ELF : IForm_and_DForm_4_zero<18, 1, 1, 24, + (outs), (ins aaddr:$func, variable_ops), + "bla $func\n\tnop", BrB, + [(PPCcall_nop_SVR4 (i64 imm:$func))]>; } let Uses = [X11, CTR8, RM] in { def BCTRL8_ELF : XLForm_2_ext<19, 528, 20, 0, 1, @@ -111,8 +123,14 @@ def : Pat<(PPCcall_Darwin (i64 texternalsym:$dst)), def : Pat<(PPCcall_SVR4 (i64 tglobaladdr:$dst)), (BL8_ELF tglobaladdr:$dst)>; +def : Pat<(PPCcall_nop_SVR4 (i64 tglobaladdr:$dst)), + (BL8_NOP_ELF tglobaladdr:$dst)>; + def : Pat<(PPCcall_SVR4 (i64 texternalsym:$dst)), (BL8_ELF texternalsym:$dst)>; +def : Pat<(PPCcall_nop_SVR4 (i64 texternalsym:$dst)), + (BL8_NOP_ELF texternalsym:$dst)>; + def : Pat<(PPCnop), (NOP)>; @@ -506,7 +524,7 @@ def LWAX : XForm_1<31, 341, (outs G8RC:$rD), (ins memrr:$src), let mayLoad = 1 in def LHAU8 : DForm_1a<43, (outs G8RC:$rD, ptr_rc:$ea_result), (ins symbolLo:$disp, ptr_rc:$rA), - "lhau $rD, $disp($rA)", LdStGeneral, + "lhau $rD, $disp($rA)", LdStLoad, []>, RegConstraint<"$rA = $ea_result">, NoEncode<"$ea_result">; // NO LWAU! @@ -516,38 +534,38 @@ def LHAU8 : DForm_1a<43, (outs G8RC:$rD, ptr_rc:$ea_result), (ins symbolLo:$disp // Zero extending loads. let canFoldAsLoad = 1, PPC970_Unit = 2 in { def LBZ8 : DForm_1<34, (outs G8RC:$rD), (ins memri:$src), - "lbz $rD, $src", LdStGeneral, + "lbz $rD, $src", LdStLoad, [(set G8RC:$rD, (zextloadi8 iaddr:$src))]>; def LHZ8 : DForm_1<40, (outs G8RC:$rD), (ins memri:$src), - "lhz $rD, $src", LdStGeneral, + "lhz $rD, $src", LdStLoad, [(set G8RC:$rD, (zextloadi16 iaddr:$src))]>; def LWZ8 : DForm_1<32, (outs G8RC:$rD), (ins memri:$src), - "lwz $rD, $src", LdStGeneral, + "lwz $rD, $src", LdStLoad, [(set G8RC:$rD, (zextloadi32 iaddr:$src))]>, isPPC64; def LBZX8 : XForm_1<31, 87, (outs G8RC:$rD), (ins memrr:$src), - "lbzx $rD, $src", LdStGeneral, + "lbzx $rD, $src", LdStLoad, [(set G8RC:$rD, (zextloadi8 xaddr:$src))]>; def LHZX8 : XForm_1<31, 279, (outs G8RC:$rD), (ins memrr:$src), - "lhzx $rD, $src", LdStGeneral, + "lhzx $rD, $src", LdStLoad, [(set G8RC:$rD, (zextloadi16 xaddr:$src))]>; def LWZX8 : XForm_1<31, 23, (outs G8RC:$rD), (ins memrr:$src), - "lwzx $rD, $src", LdStGeneral, + "lwzx $rD, $src", LdStLoad, [(set G8RC:$rD, (zextloadi32 xaddr:$src))]>; // Update forms. let mayLoad = 1 in { def LBZU8 : DForm_1<35, (outs G8RC:$rD, ptr_rc:$ea_result), (ins memri:$addr), - "lbzu $rD, $addr", LdStGeneral, + "lbzu $rD, $addr", LdStLoad, []>, RegConstraint<"$addr.reg = $ea_result">, NoEncode<"$ea_result">; def LHZU8 : DForm_1<41, (outs G8RC:$rD, ptr_rc:$ea_result), (ins memri:$addr), - "lhzu $rD, $addr", LdStGeneral, + "lhzu $rD, $addr", LdStLoad, []>, RegConstraint<"$addr.reg = $ea_result">, NoEncode<"$ea_result">; def LWZU8 : DForm_1<33, (outs G8RC:$rD, ptr_rc:$ea_result), (ins memri:$addr), - "lwzu $rD, $addr", LdStGeneral, + "lwzu $rD, $addr", LdStLoad, []>, RegConstraint<"$addr.reg = $ea_result">, NoEncode<"$ea_result">; } @@ -595,24 +613,24 @@ def : Pat<(PPCload xaddr:$src), let PPC970_Unit = 2 in { // Truncating stores. def STB8 : DForm_1<38, (outs), (ins G8RC:$rS, memri:$src), - "stb $rS, $src", LdStGeneral, + "stb $rS, $src", LdStStore, [(truncstorei8 G8RC:$rS, iaddr:$src)]>; def STH8 : DForm_1<44, (outs), (ins G8RC:$rS, memri:$src), - "sth $rS, $src", LdStGeneral, + "sth $rS, $src", LdStStore, [(truncstorei16 G8RC:$rS, iaddr:$src)]>; def STW8 : DForm_1<36, (outs), (ins G8RC:$rS, memri:$src), - "stw $rS, $src", LdStGeneral, + "stw $rS, $src", LdStStore, [(truncstorei32 G8RC:$rS, iaddr:$src)]>; def STBX8 : XForm_8<31, 215, (outs), (ins G8RC:$rS, memrr:$dst), - "stbx $rS, $dst", LdStGeneral, + "stbx $rS, $dst", LdStStore, [(truncstorei8 G8RC:$rS, xaddr:$dst)]>, PPC970_DGroup_Cracked; def STHX8 : XForm_8<31, 407, (outs), (ins G8RC:$rS, memrr:$dst), - "sthx $rS, $dst", LdStGeneral, + "sthx $rS, $dst", LdStStore, [(truncstorei16 G8RC:$rS, xaddr:$dst)]>, PPC970_DGroup_Cracked; def STWX8 : XForm_8<31, 151, (outs), (ins G8RC:$rS, memrr:$dst), - "stwx $rS, $dst", LdStGeneral, + "stwx $rS, $dst", LdStStore, [(truncstorei32 G8RC:$rS, xaddr:$dst)]>, PPC970_DGroup_Cracked; // Normal 8-byte stores. @@ -629,14 +647,14 @@ let PPC970_Unit = 2 in { def STBU8 : DForm_1a<38, (outs ptr_rc:$ea_res), (ins G8RC:$rS, symbolLo:$ptroff, ptr_rc:$ptrreg), - "stbu $rS, $ptroff($ptrreg)", LdStGeneral, + "stbu $rS, $ptroff($ptrreg)", LdStStore, [(set ptr_rc:$ea_res, (pre_truncsti8 G8RC:$rS, ptr_rc:$ptrreg, iaddroff:$ptroff))]>, RegConstraint<"$ptrreg = $ea_res">, NoEncode<"$ea_res">; def STHU8 : DForm_1a<45, (outs ptr_rc:$ea_res), (ins G8RC:$rS, symbolLo:$ptroff, ptr_rc:$ptrreg), - "sthu $rS, $ptroff($ptrreg)", LdStGeneral, + "sthu $rS, $ptroff($ptrreg)", LdStStore, [(set ptr_rc:$ea_res, (pre_truncsti16 G8RC:$rS, ptr_rc:$ptrreg, iaddroff:$ptroff))]>, diff --git a/lib/Target/PowerPC/PPCInstrAltivec.td b/lib/Target/PowerPC/PPCInstrAltivec.td index 707fa41..6c0f3d3 100644 --- a/lib/Target/PowerPC/PPCInstrAltivec.td +++ b/lib/Target/PowerPC/PPCInstrAltivec.td @@ -188,85 +188,85 @@ class VX2_Int<bits<11> xo, string opc, Intrinsic IntID> def DSS : DSS_Form<822, (outs), (ins u5imm:$ZERO0, u5imm:$STRM,u5imm:$ZERO1,u5imm:$ZERO2), - "dss $STRM", LdStGeneral /*FIXME*/, []>; + "dss $STRM", LdStLoad /*FIXME*/, []>; def DSSALL : DSS_Form<822, (outs), (ins u5imm:$ONE, u5imm:$ZERO0,u5imm:$ZERO1,u5imm:$ZERO2), - "dssall", LdStGeneral /*FIXME*/, []>; + "dssall", LdStLoad /*FIXME*/, []>; def DST : DSS_Form<342, (outs), (ins u5imm:$ZERO, u5imm:$STRM, GPRC:$rA, GPRC:$rB), - "dst $rA, $rB, $STRM", LdStGeneral /*FIXME*/, []>; + "dst $rA, $rB, $STRM", LdStLoad /*FIXME*/, []>; def DSTT : DSS_Form<342, (outs), (ins u5imm:$ONE, u5imm:$STRM, GPRC:$rA, GPRC:$rB), - "dstt $rA, $rB, $STRM", LdStGeneral /*FIXME*/, []>; + "dstt $rA, $rB, $STRM", LdStLoad /*FIXME*/, []>; def DSTST : DSS_Form<374, (outs), (ins u5imm:$ZERO, u5imm:$STRM, GPRC:$rA, GPRC:$rB), - "dstst $rA, $rB, $STRM", LdStGeneral /*FIXME*/, []>; + "dstst $rA, $rB, $STRM", LdStLoad /*FIXME*/, []>; def DSTSTT : DSS_Form<374, (outs), (ins u5imm:$ONE, u5imm:$STRM, GPRC:$rA, GPRC:$rB), - "dststt $rA, $rB, $STRM", LdStGeneral /*FIXME*/, []>; + "dststt $rA, $rB, $STRM", LdStLoad /*FIXME*/, []>; def DST64 : DSS_Form<342, (outs), (ins u5imm:$ZERO, u5imm:$STRM, G8RC:$rA, GPRC:$rB), - "dst $rA, $rB, $STRM", LdStGeneral /*FIXME*/, []>; + "dst $rA, $rB, $STRM", LdStLoad /*FIXME*/, []>; def DSTT64 : DSS_Form<342, (outs), (ins u5imm:$ONE, u5imm:$STRM, G8RC:$rA, GPRC:$rB), - "dstt $rA, $rB, $STRM", LdStGeneral /*FIXME*/, []>; + "dstt $rA, $rB, $STRM", LdStLoad /*FIXME*/, []>; def DSTST64 : DSS_Form<374, (outs), (ins u5imm:$ZERO, u5imm:$STRM, G8RC:$rA, GPRC:$rB), - "dstst $rA, $rB, $STRM", LdStGeneral /*FIXME*/, []>; + "dstst $rA, $rB, $STRM", LdStLoad /*FIXME*/, []>; def DSTSTT64 : DSS_Form<374, (outs), (ins u5imm:$ONE, u5imm:$STRM, G8RC:$rA, GPRC:$rB), - "dststt $rA, $rB, $STRM", LdStGeneral /*FIXME*/, []>; + "dststt $rA, $rB, $STRM", LdStLoad /*FIXME*/, []>; def MFVSCR : VXForm_4<1540, (outs VRRC:$vD), (ins), - "mfvscr $vD", LdStGeneral, + "mfvscr $vD", LdStStore, [(set VRRC:$vD, (int_ppc_altivec_mfvscr))]>; def MTVSCR : VXForm_5<1604, (outs), (ins VRRC:$vB), - "mtvscr $vB", LdStGeneral, + "mtvscr $vB", LdStLoad, [(int_ppc_altivec_mtvscr VRRC:$vB)]>; let canFoldAsLoad = 1, PPC970_Unit = 2 in { // Loads. def LVEBX: XForm_1<31, 7, (outs VRRC:$vD), (ins memrr:$src), - "lvebx $vD, $src", LdStGeneral, + "lvebx $vD, $src", LdStLoad, [(set VRRC:$vD, (int_ppc_altivec_lvebx xoaddr:$src))]>; def LVEHX: XForm_1<31, 39, (outs VRRC:$vD), (ins memrr:$src), - "lvehx $vD, $src", LdStGeneral, + "lvehx $vD, $src", LdStLoad, [(set VRRC:$vD, (int_ppc_altivec_lvehx xoaddr:$src))]>; def LVEWX: XForm_1<31, 71, (outs VRRC:$vD), (ins memrr:$src), - "lvewx $vD, $src", LdStGeneral, + "lvewx $vD, $src", LdStLoad, [(set VRRC:$vD, (int_ppc_altivec_lvewx xoaddr:$src))]>; def LVX : XForm_1<31, 103, (outs VRRC:$vD), (ins memrr:$src), - "lvx $vD, $src", LdStGeneral, + "lvx $vD, $src", LdStLoad, [(set VRRC:$vD, (int_ppc_altivec_lvx xoaddr:$src))]>; def LVXL : XForm_1<31, 359, (outs VRRC:$vD), (ins memrr:$src), - "lvxl $vD, $src", LdStGeneral, + "lvxl $vD, $src", LdStLoad, [(set VRRC:$vD, (int_ppc_altivec_lvxl xoaddr:$src))]>; } def LVSL : XForm_1<31, 6, (outs VRRC:$vD), (ins memrr:$src), - "lvsl $vD, $src", LdStGeneral, + "lvsl $vD, $src", LdStLoad, [(set VRRC:$vD, (int_ppc_altivec_lvsl xoaddr:$src))]>, PPC970_Unit_LSU; def LVSR : XForm_1<31, 38, (outs VRRC:$vD), (ins memrr:$src), - "lvsr $vD, $src", LdStGeneral, + "lvsr $vD, $src", LdStLoad, [(set VRRC:$vD, (int_ppc_altivec_lvsr xoaddr:$src))]>, PPC970_Unit_LSU; let PPC970_Unit = 2 in { // Stores. def STVEBX: XForm_8<31, 135, (outs), (ins VRRC:$rS, memrr:$dst), - "stvebx $rS, $dst", LdStGeneral, + "stvebx $rS, $dst", LdStStore, [(int_ppc_altivec_stvebx VRRC:$rS, xoaddr:$dst)]>; def STVEHX: XForm_8<31, 167, (outs), (ins VRRC:$rS, memrr:$dst), - "stvehx $rS, $dst", LdStGeneral, + "stvehx $rS, $dst", LdStStore, [(int_ppc_altivec_stvehx VRRC:$rS, xoaddr:$dst)]>; def STVEWX: XForm_8<31, 199, (outs), (ins VRRC:$rS, memrr:$dst), - "stvewx $rS, $dst", LdStGeneral, + "stvewx $rS, $dst", LdStStore, [(int_ppc_altivec_stvewx VRRC:$rS, xoaddr:$dst)]>; def STVX : XForm_8<31, 231, (outs), (ins VRRC:$rS, memrr:$dst), - "stvx $rS, $dst", LdStGeneral, + "stvx $rS, $dst", LdStStore, [(int_ppc_altivec_stvx VRRC:$rS, xoaddr:$dst)]>; def STVXL : XForm_8<31, 487, (outs), (ins VRRC:$rS, memrr:$dst), - "stvxl $rS, $dst", LdStGeneral, + "stvxl $rS, $dst", LdStStore, [(int_ppc_altivec_stvxl VRRC:$rS, xoaddr:$dst)]>; } diff --git a/lib/Target/PowerPC/PPCInstrFormats.td b/lib/Target/PowerPC/PPCInstrFormats.td index d332e2a..d8e4b2b 100644 --- a/lib/Target/PowerPC/PPCInstrFormats.td +++ b/lib/Target/PowerPC/PPCInstrFormats.td @@ -51,6 +51,36 @@ class PPC970_Unit_VALU { bits<3> PPC970_Unit = 5; } class PPC970_Unit_VPERM { bits<3> PPC970_Unit = 6; } class PPC970_Unit_BRU { bits<3> PPC970_Unit = 7; } +// Two joined instructions; used to emit two adjacent instructions as one. +// The itinerary from the first instruction is used for scheduling and +// classification. +class I2<bits<6> opcode1, bits<6> opcode2, dag OOL, dag IOL, string asmstr, + InstrItinClass itin> + : Instruction { + field bits<64> Inst; + + bit PPC64 = 0; // Default value, override with isPPC64 + + let Namespace = "PPC"; + let Inst{0-5} = opcode1; + let Inst{32-37} = opcode2; + let OutOperandList = OOL; + let InOperandList = IOL; + let AsmString = asmstr; + let Itinerary = itin; + + bits<1> PPC970_First = 0; + bits<1> PPC970_Single = 0; + bits<1> PPC970_Cracked = 0; + bits<3> PPC970_Unit = 0; + + /// These fields correspond to the fields in PPCInstrInfo.h. Any changes to + /// these must be reflected there! See comments there for what these are. + let TSFlags{0} = PPC970_First; + let TSFlags{1} = PPC970_Single; + let TSFlags{2} = PPC970_Cracked; + let TSFlags{5-3} = PPC970_Unit; +} // 1.7.1 I-Form class IForm<bits<6> opcode, bit aa, bit lk, dag OOL, dag IOL, string asmstr, @@ -164,6 +194,35 @@ class DForm_4_zero<bits<6> opcode, dag OOL, dag IOL, string asmstr, let Addr = 0; } +class IForm_and_DForm_1<bits<6> opcode1, bit aa, bit lk, bits<6> opcode2, + dag OOL, dag IOL, string asmstr, + InstrItinClass itin, list<dag> pattern> + : I2<opcode1, opcode2, OOL, IOL, asmstr, itin> { + bits<5> A; + bits<21> Addr; + + let Pattern = pattern; + bits<24> LI; + + let Inst{6-29} = LI; + let Inst{30} = aa; + let Inst{31} = lk; + + let Inst{38-42} = A; + let Inst{43-47} = Addr{20-16}; // Base Reg + let Inst{48-63} = Addr{15-0}; // Displacement +} + +// This is used to emit BL8+NOP. +class IForm_and_DForm_4_zero<bits<6> opcode1, bit aa, bit lk, bits<6> opcode2, + dag OOL, dag IOL, string asmstr, + InstrItinClass itin, list<dag> pattern> + : IForm_and_DForm_1<opcode1, aa, lk, opcode2, + OOL, IOL, asmstr, itin, pattern> { + let A = 0; + let Addr = 0; +} + class DForm_5<bits<6> opcode, dag OOL, dag IOL, string asmstr, InstrItinClass itin> : I<opcode, OOL, IOL, asmstr, itin> { diff --git a/lib/Target/PowerPC/PPCInstrInfo.cpp b/lib/Target/PowerPC/PPCInstrInfo.cpp index 7a8ec40..b45ada9 100644 --- a/lib/Target/PowerPC/PPCInstrInfo.cpp +++ b/lib/Target/PowerPC/PPCInstrInfo.cpp @@ -22,6 +22,7 @@ #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineMemOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/PseudoSourceValue.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/ErrorHandling.h" @@ -49,9 +50,9 @@ ScheduleHazardRecognizer *PPCInstrInfo::CreateTargetHazardRecognizer( const TargetMachine *TM, const ScheduleDAG *DAG) const { unsigned Directive = TM->getSubtarget<PPCSubtarget>().getDarwinDirective(); - if (Directive == PPC::DIR_440) { + if (Directive == PPC::DIR_440 || Directive == PPC::DIR_A2) { const InstrItineraryData *II = TM->getInstrItineraryData(); - return new PPCHazardRecognizer440(II, DAG); + return new PPCScoreboardHazardRecognizer(II, DAG); } return TargetInstrInfoImpl::CreateTargetHazardRecognizer(TM, DAG); @@ -65,14 +66,14 @@ ScheduleHazardRecognizer *PPCInstrInfo::CreateTargetPostRAHazardRecognizer( unsigned Directive = TM.getSubtarget<PPCSubtarget>().getDarwinDirective(); // Most subtargets use a PPC970 recognizer. - if (Directive != PPC::DIR_440) { + if (Directive != PPC::DIR_440 && Directive != PPC::DIR_A2) { const TargetInstrInfo *TII = TM.getInstrInfo(); assert(TII && "No InstrInfo?"); return new PPCHazardRecognizer970(*TII); } - return TargetInstrInfoImpl::CreateTargetPostRAHazardRecognizer(II, DAG); + return new PPCScoreboardHazardRecognizer(II, DAG); } unsigned PPCInstrInfo::isLoadFromStackSlot(const MachineInstr *MI, int &FrameIndex) const { @@ -684,6 +685,9 @@ unsigned PPCInstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const { case PPC::GC_LABEL: case PPC::DBG_VALUE: return 0; + case PPC::BL8_NOP_ELF: + case PPC::BLA8_NOP_ELF: + return 8; default: return 4; // PowerPC instructions are all 4 bytes } diff --git a/lib/Target/PowerPC/PPCInstrInfo.td b/lib/Target/PowerPC/PPCInstrInfo.td index 939b71a..748486c 100644 --- a/lib/Target/PowerPC/PPCInstrInfo.td +++ b/lib/Target/PowerPC/PPCInstrInfo.td @@ -116,6 +116,9 @@ def PPCcall_Darwin : SDNode<"PPCISD::CALL_Darwin", SDT_PPCCall, def PPCcall_SVR4 : SDNode<"PPCISD::CALL_SVR4", SDT_PPCCall, [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue, SDNPVariadic]>; +def PPCcall_nop_SVR4 : SDNode<"PPCISD::CALL_NOP_SVR4", SDT_PPCCall, + [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue, + SDNPVariadic]>; def PPCnop : SDNode<"PPCISD::NOP", SDT_PPCnop, [SDNPInGlue, SDNPOutGlue]>; def PPCload : SDNode<"PPCISD::LOAD", SDTypeProfile<1, 1, []>, [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>; @@ -542,6 +545,9 @@ def DCBZL : DCB_Form<1014, 1, (outs), (ins memrr:$dst), "dcbzl $dst", LdStDCBF, [(int_ppc_dcbzl xoaddr:$dst)]>, PPC970_DGroup_Single; +def : Pat<(prefetch xoaddr:$dst, (i32 0), imm, (i32 1)), + (DCBT xoaddr:$dst)>; + // Atomic operations let usesCustomInserter = 1 in { let Defs = [CR0] in { @@ -637,7 +643,7 @@ def STWCX : XForm_1<31, 150, (outs), (ins GPRC:$rS, memrr:$dst), isDOT; let isTerminator = 1, isBarrier = 1, hasCtrlDep = 1 in -def TRAP : XForm_24<31, 4, (outs), (ins), "trap", LdStGeneral, [(trap)]>; +def TRAP : XForm_24<31, 4, (outs), (ins), "trap", LdStLoad, [(trap)]>; //===----------------------------------------------------------------------===// // PPC32 Load Instructions. @@ -646,17 +652,17 @@ def TRAP : XForm_24<31, 4, (outs), (ins), "trap", LdStGeneral, [(trap)]>; // Unindexed (r+i) Loads. let canFoldAsLoad = 1, PPC970_Unit = 2 in { def LBZ : DForm_1<34, (outs GPRC:$rD), (ins memri:$src), - "lbz $rD, $src", LdStGeneral, + "lbz $rD, $src", LdStLoad, [(set GPRC:$rD, (zextloadi8 iaddr:$src))]>; def LHA : DForm_1<42, (outs GPRC:$rD), (ins memri:$src), "lha $rD, $src", LdStLHA, [(set GPRC:$rD, (sextloadi16 iaddr:$src))]>, PPC970_DGroup_Cracked; def LHZ : DForm_1<40, (outs GPRC:$rD), (ins memri:$src), - "lhz $rD, $src", LdStGeneral, + "lhz $rD, $src", LdStLoad, [(set GPRC:$rD, (zextloadi16 iaddr:$src))]>; def LWZ : DForm_1<32, (outs GPRC:$rD), (ins memri:$src), - "lwz $rD, $src", LdStGeneral, + "lwz $rD, $src", LdStLoad, [(set GPRC:$rD, (load iaddr:$src))]>; def LFS : DForm_1<48, (outs F4RC:$rD), (ins memri:$src), @@ -670,22 +676,22 @@ def LFD : DForm_1<50, (outs F8RC:$rD), (ins memri:$src), // Unindexed (r+i) Loads with Update (preinc). let mayLoad = 1 in { def LBZU : DForm_1<35, (outs GPRC:$rD, ptr_rc:$ea_result), (ins memri:$addr), - "lbzu $rD, $addr", LdStGeneral, + "lbzu $rD, $addr", LdStLoad, []>, RegConstraint<"$addr.reg = $ea_result">, NoEncode<"$ea_result">; def LHAU : DForm_1<43, (outs GPRC:$rD, ptr_rc:$ea_result), (ins memri:$addr), - "lhau $rD, $addr", LdStGeneral, + "lhau $rD, $addr", LdStLoad, []>, RegConstraint<"$addr.reg = $ea_result">, NoEncode<"$ea_result">; def LHZU : DForm_1<41, (outs GPRC:$rD, ptr_rc:$ea_result), (ins memri:$addr), - "lhzu $rD, $addr", LdStGeneral, + "lhzu $rD, $addr", LdStLoad, []>, RegConstraint<"$addr.reg = $ea_result">, NoEncode<"$ea_result">; def LWZU : DForm_1<33, (outs GPRC:$rD, ptr_rc:$ea_result), (ins memri:$addr), - "lwzu $rD, $addr", LdStGeneral, + "lwzu $rD, $addr", LdStLoad, []>, RegConstraint<"$addr.reg = $ea_result">, NoEncode<"$ea_result">; @@ -705,25 +711,25 @@ def LFDU : DForm_1<51, (outs F8RC:$rD, ptr_rc:$ea_result), (ins memri:$addr), // let canFoldAsLoad = 1, PPC970_Unit = 2 in { def LBZX : XForm_1<31, 87, (outs GPRC:$rD), (ins memrr:$src), - "lbzx $rD, $src", LdStGeneral, + "lbzx $rD, $src", LdStLoad, [(set GPRC:$rD, (zextloadi8 xaddr:$src))]>; def LHAX : XForm_1<31, 343, (outs GPRC:$rD), (ins memrr:$src), "lhax $rD, $src", LdStLHA, [(set GPRC:$rD, (sextloadi16 xaddr:$src))]>, PPC970_DGroup_Cracked; def LHZX : XForm_1<31, 279, (outs GPRC:$rD), (ins memrr:$src), - "lhzx $rD, $src", LdStGeneral, + "lhzx $rD, $src", LdStLoad, [(set GPRC:$rD, (zextloadi16 xaddr:$src))]>; def LWZX : XForm_1<31, 23, (outs GPRC:$rD), (ins memrr:$src), - "lwzx $rD, $src", LdStGeneral, + "lwzx $rD, $src", LdStLoad, [(set GPRC:$rD, (load xaddr:$src))]>; def LHBRX : XForm_1<31, 790, (outs GPRC:$rD), (ins memrr:$src), - "lhbrx $rD, $src", LdStGeneral, + "lhbrx $rD, $src", LdStLoad, [(set GPRC:$rD, (PPClbrx xoaddr:$src, i16))]>; def LWBRX : XForm_1<31, 534, (outs GPRC:$rD), (ins memrr:$src), - "lwbrx $rD, $src", LdStGeneral, + "lwbrx $rD, $src", LdStLoad, [(set GPRC:$rD, (PPClbrx xoaddr:$src, i32))]>; def LFSX : XForm_25<31, 535, (outs F4RC:$frD), (ins memrr:$src), @@ -741,13 +747,13 @@ def LFDX : XForm_25<31, 599, (outs F8RC:$frD), (ins memrr:$src), // Unindexed (r+i) Stores. let PPC970_Unit = 2 in { def STB : DForm_1<38, (outs), (ins GPRC:$rS, memri:$src), - "stb $rS, $src", LdStGeneral, + "stb $rS, $src", LdStStore, [(truncstorei8 GPRC:$rS, iaddr:$src)]>; def STH : DForm_1<44, (outs), (ins GPRC:$rS, memri:$src), - "sth $rS, $src", LdStGeneral, + "sth $rS, $src", LdStStore, [(truncstorei16 GPRC:$rS, iaddr:$src)]>; def STW : DForm_1<36, (outs), (ins GPRC:$rS, memri:$src), - "stw $rS, $src", LdStGeneral, + "stw $rS, $src", LdStStore, [(store GPRC:$rS, iaddr:$src)]>; def STFS : DForm_1<52, (outs), (ins F4RC:$rS, memri:$dst), "stfs $rS, $dst", LdStUX, @@ -761,33 +767,33 @@ def STFD : DForm_1<54, (outs), (ins F8RC:$rS, memri:$dst), let PPC970_Unit = 2 in { def STBU : DForm_1a<39, (outs ptr_rc:$ea_res), (ins GPRC:$rS, symbolLo:$ptroff, ptr_rc:$ptrreg), - "stbu $rS, $ptroff($ptrreg)", LdStGeneral, + "stbu $rS, $ptroff($ptrreg)", LdStStore, [(set ptr_rc:$ea_res, (pre_truncsti8 GPRC:$rS, ptr_rc:$ptrreg, iaddroff:$ptroff))]>, RegConstraint<"$ptrreg = $ea_res">, NoEncode<"$ea_res">; def STHU : DForm_1a<45, (outs ptr_rc:$ea_res), (ins GPRC:$rS, symbolLo:$ptroff, ptr_rc:$ptrreg), - "sthu $rS, $ptroff($ptrreg)", LdStGeneral, + "sthu $rS, $ptroff($ptrreg)", LdStStore, [(set ptr_rc:$ea_res, (pre_truncsti16 GPRC:$rS, ptr_rc:$ptrreg, iaddroff:$ptroff))]>, RegConstraint<"$ptrreg = $ea_res">, NoEncode<"$ea_res">; def STWU : DForm_1a<37, (outs ptr_rc:$ea_res), (ins GPRC:$rS, symbolLo:$ptroff, ptr_rc:$ptrreg), - "stwu $rS, $ptroff($ptrreg)", LdStGeneral, + "stwu $rS, $ptroff($ptrreg)", LdStStore, [(set ptr_rc:$ea_res, (pre_store GPRC:$rS, ptr_rc:$ptrreg, iaddroff:$ptroff))]>, RegConstraint<"$ptrreg = $ea_res">, NoEncode<"$ea_res">; def STFSU : DForm_1a<37, (outs ptr_rc:$ea_res), (ins F4RC:$rS, symbolLo:$ptroff, ptr_rc:$ptrreg), - "stfsu $rS, $ptroff($ptrreg)", LdStGeneral, + "stfsu $rS, $ptroff($ptrreg)", LdStStore, [(set ptr_rc:$ea_res, (pre_store F4RC:$rS, ptr_rc:$ptrreg, iaddroff:$ptroff))]>, RegConstraint<"$ptrreg = $ea_res">, NoEncode<"$ea_res">; def STFDU : DForm_1a<37, (outs ptr_rc:$ea_res), (ins F8RC:$rS, symbolLo:$ptroff, ptr_rc:$ptrreg), - "stfdu $rS, $ptroff($ptrreg)", LdStGeneral, + "stfdu $rS, $ptroff($ptrreg)", LdStStore, [(set ptr_rc:$ea_res, (pre_store F8RC:$rS, ptr_rc:$ptrreg, iaddroff:$ptroff))]>, RegConstraint<"$ptrreg = $ea_res">, NoEncode<"$ea_res">; @@ -798,29 +804,29 @@ def STFDU : DForm_1a<37, (outs ptr_rc:$ea_res), (ins F8RC:$rS, // let PPC970_Unit = 2 in { def STBX : XForm_8<31, 215, (outs), (ins GPRC:$rS, memrr:$dst), - "stbx $rS, $dst", LdStGeneral, + "stbx $rS, $dst", LdStStore, [(truncstorei8 GPRC:$rS, xaddr:$dst)]>, PPC970_DGroup_Cracked; def STHX : XForm_8<31, 407, (outs), (ins GPRC:$rS, memrr:$dst), - "sthx $rS, $dst", LdStGeneral, + "sthx $rS, $dst", LdStStore, [(truncstorei16 GPRC:$rS, xaddr:$dst)]>, PPC970_DGroup_Cracked; def STWX : XForm_8<31, 151, (outs), (ins GPRC:$rS, memrr:$dst), - "stwx $rS, $dst", LdStGeneral, + "stwx $rS, $dst", LdStStore, [(store GPRC:$rS, xaddr:$dst)]>, PPC970_DGroup_Cracked; let mayStore = 1 in { def STWUX : XForm_8<31, 183, (outs), (ins GPRC:$rS, GPRC:$rA, GPRC:$rB), - "stwux $rS, $rA, $rB", LdStGeneral, + "stwux $rS, $rA, $rB", LdStStore, []>; } def STHBRX: XForm_8<31, 918, (outs), (ins GPRC:$rS, memrr:$dst), - "sthbrx $rS, $dst", LdStGeneral, + "sthbrx $rS, $dst", LdStStore, [(PPCstbrx GPRC:$rS, xoaddr:$dst, i16)]>, PPC970_DGroup_Cracked; def STWBRX: XForm_8<31, 662, (outs), (ins GPRC:$rS, memrr:$dst), - "stwbrx $rS, $dst", LdStGeneral, + "stwbrx $rS, $dst", LdStStore, [(PPCstbrx GPRC:$rS, xoaddr:$dst, i32)]>, PPC970_DGroup_Cracked; diff --git a/lib/Target/PowerPC/PPCJITInfo.cpp b/lib/Target/PowerPC/PPCJITInfo.cpp index 4590f00..a6528c0 100644 --- a/lib/Target/PowerPC/PPCJITInfo.cpp +++ b/lib/Target/PowerPC/PPCJITInfo.cpp @@ -291,9 +291,10 @@ void PPC64CompilationCallback() { } #endif -extern "C" void *PPCCompilationCallbackC(unsigned *StubCallAddrPlus4, - unsigned *OrigCallAddrPlus4, - bool is64Bit) { +extern "C" { +static void* LLVM_ATTRIBUTE_USED PPCCompilationCallbackC(unsigned *StubCallAddrPlus4, + unsigned *OrigCallAddrPlus4, + bool is64Bit) { // Adjust the pointer to the address of the call instruction in the stub // emitted by emitFunctionStub, rather than the instruction after it. unsigned *StubCallAddr = StubCallAddrPlus4 - 1; @@ -337,6 +338,7 @@ extern "C" void *PPCCompilationCallbackC(unsigned *StubCallAddrPlus4, // stack after we restore all regs. return Target; } +} diff --git a/lib/Target/PowerPC/PPCRegisterInfo.cpp b/lib/Target/PowerPC/PPCRegisterInfo.cpp index 2976f01..ef13571 100644 --- a/lib/Target/PowerPC/PPCRegisterInfo.cpp +++ b/lib/Target/PowerPC/PPCRegisterInfo.cpp @@ -554,7 +554,8 @@ PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, // clear can be encoded. This is extremely uncommon, because normally you // only "std" to a stack slot that is at least 4-byte aligned, but it can // happen in invalid code. - if (isInt<16>(Offset) && (!isIXAddr || (Offset & 3) == 0)) { + if (OpC == PPC::DBG_VALUE || // DBG_VALUE is always Reg+Imm + (isInt<16>(Offset) && (!isIXAddr || (Offset & 3) == 0))) { if (isIXAddr) Offset >>= 2; // The actual encoded value has the low two bits zero. MI.getOperand(OffsetOperandNo).ChangeToImmediate(Offset); diff --git a/lib/Target/PowerPC/PPCSchedule.td b/lib/Target/PowerPC/PPCSchedule.td index 4e37d0a..8c0a858 100644 --- a/lib/Target/PowerPC/PPCSchedule.td +++ b/lib/Target/PowerPC/PPCSchedule.td @@ -50,7 +50,8 @@ def BrMCRX : InstrItinClass; def LdStDCBA : InstrItinClass; def LdStDCBF : InstrItinClass; def LdStDCBI : InstrItinClass; -def LdStGeneral : InstrItinClass; +def LdStLoad : InstrItinClass; +def LdStStore : InstrItinClass; def LdStDSS : InstrItinClass; def LdStICBI : InstrItinClass; def LdStUX : InstrItinClass; @@ -107,6 +108,7 @@ include "PPCSchedule440.td" include "PPCScheduleG4.td" include "PPCScheduleG4Plus.td" include "PPCScheduleG5.td" +include "PPCScheduleA2.td" //===----------------------------------------------------------------------===// // Instruction to itinerary class map - When add new opcodes to the supported @@ -150,8 +152,8 @@ include "PPCScheduleG5.td" // dcbf LdStDCBF // dcbi LdStDCBI // dcbst LdStDCBF -// dcbt LdStGeneral -// dcbtst LdStGeneral +// dcbt LdStLoad +// dcbtst LdStLoad // dcbz LdStDCBF // divd IntDivD // divdu IntDivD @@ -160,9 +162,9 @@ include "PPCScheduleG5.td" // dss LdStDSS // dst LdStDSS // dstst LdStDSS -// eciwx LdStGeneral -// ecowx LdStGeneral -// eieio LdStGeneral +// eciwx LdStLoad +// ecowx LdStLoad +// eieio LdStLoad // eqv IntGeneral // extsb IntGeneral // extsh IntGeneral @@ -202,10 +204,10 @@ include "PPCScheduleG5.td" // fsubs FPGeneral // icbi LdStICBI // isync SprISYNC -// lbz LdStGeneral -// lbzu LdStGeneral +// lbz LdStLoad +// lbzu LdStLoad // lbzux LdStUX -// lbzx LdStGeneral +// lbzx LdStLoad // ld LdStLD // ldarx LdStLDARX // ldu LdStLD @@ -223,11 +225,11 @@ include "PPCScheduleG5.td" // lhau LdStLHA // lhaux LdStLHA // lhax LdStLHA -// lhbrx LdStGeneral -// lhz LdStGeneral -// lhzu LdStGeneral +// lhbrx LdStLoad +// lhz LdStLoad +// lhzu LdStLoad // lhzux LdStUX -// lhzx LdStGeneral +// lhzx LdStLoad // lmw LdStLMW // lswi LdStLMW // lswx LdStLMW @@ -242,11 +244,11 @@ include "PPCScheduleG5.td" // lwarx LdStLWARX // lwaux LdStLHA // lwax LdStLHA -// lwbrx LdStGeneral -// lwz LdStGeneral -// lwzu LdStGeneral +// lwbrx LdStLoad +// lwz LdStLoad +// lwzu LdStLoad // lwzux LdStUX -// lwzx LdStGeneral +// lwzx LdStLoad // mcrf BrMCR // mcrfs FPGeneral // mcrxr BrMCRX @@ -307,10 +309,10 @@ include "PPCScheduleG5.td" // srawi IntShift // srd IntRotateD // srw IntGeneral -// stb LdStGeneral -// stbu LdStGeneral -// stbux LdStGeneral -// stbx LdStGeneral +// stb LdStStore +// stbu LdStStore +// stbux LdStStore +// stbx LdStStore // std LdStSTD // stdcx. LdStSTDCX // stdu LdStSTD @@ -325,11 +327,11 @@ include "PPCScheduleG5.td" // stfsu LdStUX // stfsux LdStUX // stfsx LdStUX -// sth LdStGeneral -// sthbrx LdStGeneral -// sthu LdStGeneral -// sthux LdStGeneral -// sthx LdStGeneral +// sth LdStStore +// sthbrx LdStStore +// sthu LdStStore +// sthux LdStStore +// sthx LdStStore // stmw LdStLMW // stswi LdStLMW // stswx LdStLMW @@ -338,12 +340,12 @@ include "PPCScheduleG5.td" // stvewx LdStSTVEBX // stvx LdStSTVEBX // stvxl LdStSTVEBX -// stw LdStGeneral -// stwbrx LdStGeneral +// stw LdStStore +// stwbrx LdStStore // stwcx. LdStSTWCX -// stwu LdStGeneral -// stwux LdStGeneral -// stwx LdStGeneral +// stwu LdStStore +// stwux LdStStore +// stwx LdStStore // subf IntGeneral // subfc IntGeneral // subfe IntGeneral diff --git a/lib/Target/PowerPC/PPCSchedule440.td b/lib/Target/PowerPC/PPCSchedule440.td index 76f7465..419faea 100644 --- a/lib/Target/PowerPC/PPCSchedule440.td +++ b/lib/Target/PowerPC/PPCSchedule440.td @@ -270,15 +270,23 @@ def PPC440Itineraries : ProcessorItineraries< InstrStage<1, [LWB]>], [8, 5], [NoBypass, GPR_Bypass]>, - InstrItinData<LdStGeneral , [InstrStage<1, [IFTH1, IFTH2]>, + InstrItinData<LdStLoad , [InstrStage<1, [IFTH1, IFTH2]>, InstrStage<1, [PDCD1, PDCD2]>, InstrStage<1, [DISS1, DISS2]>, InstrStage<1, [LRACC]>, InstrStage<1, [AGEN]>, InstrStage<1, [CRD]>, InstrStage<2, [LWB]>], - [9, 5], // FIXME: should be [9, 5] for loads and - // [8, 5] for stores. + [9, 5], + [GPR_Bypass, GPR_Bypass]>, + InstrItinData<LdStStore , [InstrStage<1, [IFTH1, IFTH2]>, + InstrStage<1, [PDCD1, PDCD2]>, + InstrStage<1, [DISS1, DISS2]>, + InstrStage<1, [LRACC]>, + InstrStage<1, [AGEN]>, + InstrStage<1, [CRD]>, + InstrStage<2, [LWB]>], + [8, 5], [NoBypass, GPR_Bypass]>, InstrItinData<LdStICBI , [InstrStage<1, [IFTH1, IFTH2]>, InstrStage<1, [PDCD1, PDCD2]>, @@ -345,6 +353,46 @@ def PPC440Itineraries : ProcessorItineraries< InstrStage<1, [LWB]>], [8, 5], [NoBypass, GPR_Bypass]>, + InstrItinData<LdStSTD , [InstrStage<1, [IFTH1, IFTH2]>, + InstrStage<1, [PDCD1, PDCD2]>, + InstrStage<1, [DISS1, DISS2]>, + InstrStage<1, [LRACC]>, + InstrStage<1, [AGEN]>, + InstrStage<1, [CRD]>, + InstrStage<2, [LWB]>], + [8, 5], + [NoBypass, GPR_Bypass]>, + InstrItinData<LdStSTDCX , [InstrStage<1, [IFTH1, IFTH2]>, + InstrStage<1, [PDCD1, PDCD2]>, + InstrStage<1, [DISS1]>, + InstrStage<1, [IRACC], 0>, + InstrStage<4, [LWARX_Hold], 0>, + InstrStage<1, [LRACC]>, + InstrStage<1, [AGEN]>, + InstrStage<1, [CRD]>, + InstrStage<1, [LWB]>], + [8, 5], + [NoBypass, GPR_Bypass]>, + InstrItinData<LdStSTD , [InstrStage<1, [IFTH1, IFTH2]>, + InstrStage<1, [PDCD1, PDCD2]>, + InstrStage<1, [DISS1, DISS2]>, + InstrStage<1, [LRACC]>, + InstrStage<1, [AGEN]>, + InstrStage<1, [CRD]>, + InstrStage<2, [LWB]>], + [8, 5], + [NoBypass, GPR_Bypass]>, + InstrItinData<LdStSTDCX , [InstrStage<1, [IFTH1, IFTH2]>, + InstrStage<1, [PDCD1, PDCD2]>, + InstrStage<1, [DISS1]>, + InstrStage<1, [IRACC], 0>, + InstrStage<4, [LWARX_Hold], 0>, + InstrStage<1, [LRACC]>, + InstrStage<1, [AGEN]>, + InstrStage<1, [CRD]>, + InstrStage<1, [LWB]>], + [8, 5], + [NoBypass, GPR_Bypass]>, InstrItinData<LdStSTWCX , [InstrStage<1, [IFTH1, IFTH2]>, InstrStage<1, [PDCD1, PDCD2]>, InstrStage<1, [DISS1]>, diff --git a/lib/Target/PowerPC/PPCScheduleA2.td b/lib/Target/PowerPC/PPCScheduleA2.td new file mode 100644 index 0000000..857ba40 --- /dev/null +++ b/lib/Target/PowerPC/PPCScheduleA2.td @@ -0,0 +1,652 @@ +//===- PPCScheduleA2.td - PPC A2 Scheduling Definitions --*- tablegen -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +// Primary reference: +// A2 Processor User's Manual. +// IBM (as updated in) 2010. + +//===----------------------------------------------------------------------===// +// Functional units on the PowerPC A2 chip sets +// +def IU0to3_0 : FuncUnit; // Fetch unit 1 to 4 slot 1 +def IU0to3_1 : FuncUnit; // Fetch unit 1 to 4 slot 2 +def IU0to3_2 : FuncUnit; // Fetch unit 1 to 4 slot 3 +def IU0to3_3 : FuncUnit; // Fetch unit 1 to 4 slot 4 +def IU4_0 : FuncUnit; // Instruction buffer slot 1 +def IU4_1 : FuncUnit; // Instruction buffer slot 2 +def IU4_2 : FuncUnit; // Instruction buffer slot 3 +def IU4_3 : FuncUnit; // Instruction buffer slot 4 +def IU4_4 : FuncUnit; // Instruction buffer slot 5 +def IU4_5 : FuncUnit; // Instruction buffer slot 6 +def IU4_6 : FuncUnit; // Instruction buffer slot 7 +def IU4_7 : FuncUnit; // Instruction buffer slot 8 +def IU5 : FuncUnit; // Dependency resolution +def IU6 : FuncUnit; // Instruction issue +def RF0 : FuncUnit; +def XRF1 : FuncUnit; +def XEX1 : FuncUnit; // Execution stage 1 for the XU pipeline +def XEX2 : FuncUnit; // Execution stage 2 for the XU pipeline +def XEX3 : FuncUnit; // Execution stage 3 for the XU pipeline +def XEX4 : FuncUnit; // Execution stage 4 for the XU pipeline +def XEX5 : FuncUnit; // Execution stage 5 for the XU pipeline +def XEX6 : FuncUnit; // Execution stage 6 for the XU pipeline +def FRF1 : FuncUnit; +def FEX1 : FuncUnit; // Execution stage 1 for the FU pipeline +def FEX2 : FuncUnit; // Execution stage 2 for the FU pipeline +def FEX3 : FuncUnit; // Execution stage 3 for the FU pipeline +def FEX4 : FuncUnit; // Execution stage 4 for the FU pipeline +def FEX5 : FuncUnit; // Execution stage 5 for the FU pipeline +def FEX6 : FuncUnit; // Execution stage 6 for the FU pipeline + +def CR_Bypass : Bypass; // The bypass for condition regs. +//def GPR_Bypass : Bypass; // The bypass for general-purpose regs. +//def FPR_Bypass : Bypass; // The bypass for floating-point regs. + +// +// This file defines the itinerary class data for the PPC A2 processor. +// +//===----------------------------------------------------------------------===// + + +def PPCA2Itineraries : ProcessorItineraries< + [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3, + IU4_0, IU4_1, IU4_2, IU4_3, IU4_4, IU4_5, IU4_6, IU4_7, + IU5, IU6, RF0, XRF1, XEX1, XEX2, XEX3, XEX4, XEX5, XEX6, + FRF1, FEX1, FEX2, FEX3, FEX4, FEX5, FEX6], + [CR_Bypass, GPR_Bypass, FPR_Bypass], [ + InstrItinData<IntGeneral , [InstrStage<4, + [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>, + InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, + IU4_4, IU4_5, IU4_6, IU4_7]>, + InstrStage<1, [IU5]>, InstrStage<1, [IU6]>, + InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>, + InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>, + InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>, + InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>], + [10, 7, 7], + [GPR_Bypass, GPR_Bypass, GPR_Bypass]>, + InstrItinData<IntCompare , [InstrStage<4, + [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>, + InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, + IU4_4, IU4_5, IU4_6, IU4_7]>, + InstrStage<1, [IU5]>, InstrStage<1, [IU6]>, + InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>, + InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>, + InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>, + InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>], + [10, 7, 7], + [CR_Bypass, GPR_Bypass, GPR_Bypass]>, + InstrItinData<IntDivW , [InstrStage<4, + [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>, + InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, + IU4_4, IU4_5, IU4_6, IU4_7]>, + InstrStage<1, [IU5]>, InstrStage<1, [IU6]>, + InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>, + InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>, + InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>, + InstrStage<1, [XEX5]>, InstrStage<38, [XEX6]>], + [53, 7, 7], + [NoBypass, GPR_Bypass, GPR_Bypass]>, + InstrItinData<IntMFFS , [InstrStage<4, + [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>, + InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, + IU4_4, IU4_5, IU4_6, IU4_7]>, + InstrStage<1, [IU5]>, InstrStage<1, [IU6]>, + InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>, + InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>, + InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>, + InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>], + [10, 7, 7], + [GPR_Bypass, GPR_Bypass, GPR_Bypass]>, + InstrItinData<IntMTFSB0 , [InstrStage<4, + [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>, + InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, + IU4_4, IU4_5, IU4_6, IU4_7]>, + InstrStage<1, [IU5]>, InstrStage<1, [IU6]>, + InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>, + InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>, + InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>, + InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>], + [10, 7, 7], + [GPR_Bypass, GPR_Bypass, GPR_Bypass]>, + InstrItinData<IntMulHW , [InstrStage<4, + [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>, + InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, + IU4_4, IU4_5, IU4_6, IU4_7]>, + InstrStage<1, [IU5]>, InstrStage<1, [IU6]>, + InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>, + InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>, + InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>, + InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>], + [14, 7, 7], + [GPR_Bypass, GPR_Bypass, GPR_Bypass]>, + InstrItinData<IntMulHWU , [InstrStage<4, + [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>, + InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, + IU4_4, IU4_5, IU4_6, IU4_7]>, + InstrStage<1, [IU5]>, InstrStage<1, [IU6]>, + InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>, + InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>, + InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>, + InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>], + [14, 7, 7], + [GPR_Bypass, GPR_Bypass, GPR_Bypass]>, + InstrItinData<IntMulLI , [InstrStage<4, + [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>, + InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, + IU4_4, IU4_5, IU4_6, IU4_7]>, + InstrStage<1, [IU5]>, InstrStage<1, [IU6]>, + InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>, + InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>, + InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>, + InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>], + [15, 7, 7], + [GPR_Bypass, GPR_Bypass, GPR_Bypass]>, + InstrItinData<IntRotate , [InstrStage<4, + [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>, + InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, + IU4_4, IU4_5, IU4_6, IU4_7]>, + InstrStage<1, [IU5]>, InstrStage<1, [IU6]>, + InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>, + InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>, + InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>, + InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>], + [10, 7, 7], + [GPR_Bypass, GPR_Bypass, GPR_Bypass]>, + InstrItinData<IntShift , [InstrStage<4, + [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>, + InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, + IU4_4, IU4_5, IU4_6, IU4_7]>, + InstrStage<1, [IU5]>, InstrStage<1, [IU6]>, + InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>, + InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>, + InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>, + InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>], + [10, 7, 7], + [GPR_Bypass, GPR_Bypass, GPR_Bypass]>, + InstrItinData<IntTrapW , [InstrStage<4, + [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>, + InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, + IU4_4, IU4_5, IU4_6, IU4_7]>, + InstrStage<1, [IU5]>, InstrStage<1, [IU6]>, + InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>, + InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>, + InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>, + InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>], + [10, 7, 7], + [GPR_Bypass, GPR_Bypass]>, + InstrItinData<BrB , [InstrStage<4, + [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>, + InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, + IU4_4, IU4_5, IU4_6, IU4_7]>, + InstrStage<1, [IU5]>, InstrStage<1, [IU6]>, + InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>, + InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>, + InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>, + InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>], + [15, 7, 7], + [NoBypass, GPR_Bypass]>, + InstrItinData<BrCR , [InstrStage<4, + [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>, + InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, + IU4_4, IU4_5, IU4_6, IU4_7]>, + InstrStage<1, [IU5]>, InstrStage<1, [IU6]>, + InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>, + InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>, + InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>, + InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>], + [10, 7, 7], + [CR_Bypass, CR_Bypass, CR_Bypass]>, + InstrItinData<BrMCR , [InstrStage<4, + [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>, + InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, + IU4_4, IU4_5, IU4_6, IU4_7]>, + InstrStage<1, [IU5]>, InstrStage<1, [IU6]>, + InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>, + InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>, + InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>, + InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>], + [10, 7, 7], + [CR_Bypass, CR_Bypass, CR_Bypass]>, + InstrItinData<BrMCRX , [InstrStage<4, + [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>, + InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, + IU4_4, IU4_5, IU4_6, IU4_7]>, + InstrStage<1, [IU5]>, InstrStage<1, [IU6]>, + InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>, + InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>, + InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>, + InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>], + [10, 7, 7], + [CR_Bypass, GPR_Bypass, GPR_Bypass]>, + InstrItinData<LdStDCBA , [InstrStage<4, + [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>, + InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, + IU4_4, IU4_5, IU4_6, IU4_7]>, + InstrStage<1, [IU5]>, InstrStage<1, [IU6]>, + InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>, + InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>, + InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>, + InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>], + [13, 11], + [NoBypass, GPR_Bypass]>, + InstrItinData<LdStDCBF , [InstrStage<4, + [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>, + InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, + IU4_4, IU4_5, IU4_6, IU4_7]>, + InstrStage<1, [IU5]>, InstrStage<1, [IU6]>, + InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>, + InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>, + InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>, + InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>], + [13, 11], + [NoBypass, GPR_Bypass]>, + InstrItinData<LdStDCBI , [InstrStage<4, + [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>, + InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, + IU4_4, IU4_5, IU4_6, IU4_7]>, + InstrStage<1, [IU5]>, InstrStage<1, [IU6]>, + InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>, + InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>, + InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>, + InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>], + [13, 11], + [NoBypass, GPR_Bypass]>, + InstrItinData<LdStLoad , [InstrStage<4, + [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>, + InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, + IU4_4, IU4_5, IU4_6, IU4_7]>, + InstrStage<1, [IU5]>, InstrStage<1, [IU6]>, + InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>, + InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>, + InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>, + InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>], + [14, 7], + [GPR_Bypass, GPR_Bypass]>, + InstrItinData<LdStStore , [InstrStage<4, + [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>, + InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, + IU4_4, IU4_5, IU4_6, IU4_7]>, + InstrStage<1, [IU5]>, InstrStage<1, [IU6]>, + InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>, + InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>, + InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>, + InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>], + [13, 7], + [GPR_Bypass, GPR_Bypass]>, + InstrItinData<LdStICBI , [InstrStage<4, + [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>, + InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, + IU4_4, IU4_5, IU4_6, IU4_7]>, + InstrStage<1, [IU5]>, InstrStage<1, [IU6]>, + InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>, + InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>, + InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>, + InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>], + [14, 7], + [NoBypass, GPR_Bypass]>, + InstrItinData<LdStUX , [InstrStage<4, + [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>, + InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, + IU4_4, IU4_5, IU4_6, IU4_7]>, + InstrStage<1, [IU5]>, InstrStage<1, [IU6]>, + InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>, + InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>, + InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>, + InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>], + [14, 7, 7], + [NoBypass, FPR_Bypass, FPR_Bypass]>, + InstrItinData<LdStLFD , [InstrStage<4, + [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>, + InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, + IU4_4, IU4_5, IU4_6, IU4_7]>, + InstrStage<1, [IU5]>, InstrStage<1, [IU6]>, + InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>, + InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>, + InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>, + InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>], + [14, 7, 7], + [FPR_Bypass, GPR_Bypass, GPR_Bypass]>, + InstrItinData<LdStLFDU , [InstrStage<4, + [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>, + InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, + IU4_4, IU4_5, IU4_6, IU4_7]>, + InstrStage<1, [IU5]>, InstrStage<1, [IU6]>, + InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>, + InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>, + InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>, + InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>], + [14, 7, 7], + [FPR_Bypass, GPR_Bypass, GPR_Bypass]>, + InstrItinData<LdStLHA , [InstrStage<4, + [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>, + InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, + IU4_4, IU4_5, IU4_6, IU4_7]>, + InstrStage<1, [IU5]>, InstrStage<1, [IU6]>, + InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>, + InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>, + InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>, + InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>], + [14, 7], + [NoBypass, GPR_Bypass]>, + InstrItinData<LdStLMW , [InstrStage<4, + [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>, + InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, + IU4_4, IU4_5, IU4_6, IU4_7]>, + InstrStage<1, [IU5]>, InstrStage<1, [IU6]>, + InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>, + InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>, + InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>, + InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>], + [14, 7], + [NoBypass, GPR_Bypass]>, + InstrItinData<LdStLWARX , [InstrStage<4, + [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>, + InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, + IU4_4, IU4_5, IU4_6, IU4_7]>, + InstrStage<1, [IU5]>, InstrStage<13, [IU6]>, + InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>, + InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>, + InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>, + InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>], + [26, 7], + [NoBypass, GPR_Bypass]>, + InstrItinData<LdStSTD , [InstrStage<4, + [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>, + InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, + IU4_4, IU4_5, IU4_6, IU4_7]>, + InstrStage<1, [IU5]>, InstrStage<1, [IU6]>, + InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>, + InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>, + InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>, + InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>], + [13, 7], + [GPR_Bypass, GPR_Bypass]>, + InstrItinData<LdStSTDCX , [InstrStage<4, + [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>, + InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, + IU4_4, IU4_5, IU4_6, IU4_7]>, + InstrStage<1, [IU5]>, InstrStage<13, [IU6]>, + InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>, + InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>, + InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>, + InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>], + [26, 7], + [NoBypass, GPR_Bypass]>, + InstrItinData<LdStSTD , [InstrStage<4, + [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>, + InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, + IU4_4, IU4_5, IU4_6, IU4_7]>, + InstrStage<1, [IU5]>, InstrStage<1, [IU6]>, + InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>, + InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>, + InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>, + InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>], + [13, 7], + [GPR_Bypass, GPR_Bypass]>, + InstrItinData<LdStSTDCX , [InstrStage<4, + [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>, + InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, + IU4_4, IU4_5, IU4_6, IU4_7]>, + InstrStage<1, [IU5]>, InstrStage<13, [IU6]>, + InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>, + InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>, + InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>, + InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>], + [26, 7], + [NoBypass, GPR_Bypass]>, + InstrItinData<LdStSTWCX , [InstrStage<4, + [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>, + InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, + IU4_4, IU4_5, IU4_6, IU4_7]>, + InstrStage<1, [IU5]>, InstrStage<13, [IU6]>, + InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>, + InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>, + InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>, + InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>], + [26, 7], + [NoBypass, GPR_Bypass]>, + InstrItinData<LdStSync , [InstrStage<4, + [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>, + InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, + IU4_4, IU4_5, IU4_6, IU4_7]>, + InstrStage<1, [IU5]>, InstrStage<1, [IU6]>, + InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>, + InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>, + InstrStage<1, [XEX3]>, InstrStage<12, [XEX4]>, + InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>]>, + InstrItinData<SprISYNC , [InstrStage<4, + [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>, + InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, + IU4_4, IU4_5, IU4_6, IU4_7]>, + InstrStage<1, [IU5]>, InstrStage<1, [IU6]>, + InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>, + InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>, + InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>, + InstrStage<1, [XEX5]>, InstrStage<14, [XEX6]>]>, + InstrItinData<SprMFSR , [InstrStage<4, + [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>, + InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, + IU4_4, IU4_5, IU4_6, IU4_7]>, + InstrStage<1, [IU5]>, InstrStage<1, [IU6]>, + InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>, + InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>, + InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>, + InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>], + [15, 7], + [GPR_Bypass, NoBypass]>, + InstrItinData<SprMTMSR , [InstrStage<4, + [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>, + InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, + IU4_4, IU4_5, IU4_6, IU4_7]>, + InstrStage<1, [IU5]>, InstrStage<1, [IU6]>, + InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>, + InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>, + InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>, + InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>], + [15, 7], + [NoBypass, GPR_Bypass]>, + InstrItinData<SprMTSR , [InstrStage<4, + [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>, + InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, + IU4_4, IU4_5, IU4_6, IU4_7]>, + InstrStage<1, [IU5]>, InstrStage<1, [IU6]>, + InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>, + InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>, + InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>, + InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>], + [15, 7], + [NoBypass, GPR_Bypass]>, + InstrItinData<SprTLBSYNC , [InstrStage<4, + [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>, + InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, + IU4_4, IU4_5, IU4_6, IU4_7]>, + InstrStage<1, [IU5]>, InstrStage<1, [IU6]>, + InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>, + InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>, + InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>, + InstrStage<1, [XEX5]>, InstrStage<14, [XEX6]>]>, + InstrItinData<SprMFCR , [InstrStage<4, + [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>, + InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, + IU4_4, IU4_5, IU4_6, IU4_7]>, + InstrStage<1, [IU5]>, InstrStage<1, [IU6]>, + InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>, + InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>, + InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>, + InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>], + [10, 7], + [GPR_Bypass, CR_Bypass]>, + InstrItinData<SprMFMSR , [InstrStage<4, + [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>, + InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, + IU4_4, IU4_5, IU4_6, IU4_7]>, + InstrStage<1, [IU5]>, InstrStage<1, [IU6]>, + InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>, + InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>, + InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>, + InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>], + [15, 7], + [GPR_Bypass, NoBypass]>, + InstrItinData<SprMFSPR , [InstrStage<4, + [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>, + InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, + IU4_4, IU4_5, IU4_6, IU4_7]>, + InstrStage<1, [IU5]>, InstrStage<1, [IU6]>, + InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>, + InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>, + InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>, + InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>], + [15, 7], + [NoBypass, GPR_Bypass]>, + InstrItinData<SprMFTB , [InstrStage<4, + [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>, + InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, + IU4_4, IU4_5, IU4_6, IU4_7]>, + InstrStage<1, [IU5]>, InstrStage<1, [IU6]>, + InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>, + InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>, + InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>, + InstrStage<1, [XEX5]>, InstrStage<14, [XEX6]>], + [29, 7], + [NoBypass, GPR_Bypass]>, + InstrItinData<SprMTSPR , [InstrStage<4, + [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>, + InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, + IU4_4, IU4_5, IU4_6, IU4_7]>, + InstrStage<1, [IU5]>, InstrStage<1, [IU6]>, + InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>, + InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>, + InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>, + InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>], + [15, 7], + [NoBypass, GPR_Bypass]>, + InstrItinData<SprMTSRIN , [InstrStage<4, + [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>, + InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, + IU4_4, IU4_5, IU4_6, IU4_7]>, + InstrStage<1, [IU5]>, InstrStage<1, [IU6]>, + InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>, + InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>, + InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>, + InstrStage<1, [XEX5]>, InstrStage<14, [XEX6]>], + [29, 7], + [NoBypass, GPR_Bypass]>, + InstrItinData<SprRFI , [InstrStage<4, + [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>, + InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, + IU4_4, IU4_5, IU4_6, IU4_7]>, + InstrStage<1, [IU5]>, InstrStage<1, [IU6]>, + InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>, + InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>, + InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>, + InstrStage<1, [XEX5]>, InstrStage<14, [XEX6]>], + [29, 7], + [NoBypass, GPR_Bypass]>, + InstrItinData<SprSC , [InstrStage<4, + [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>, + InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, + IU4_4, IU4_5, IU4_6, IU4_7]>, + InstrStage<1, [IU5]>, InstrStage<1, [IU6]>, + InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>, + InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>, + InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>, + InstrStage<1, [XEX5]>, InstrStage<14, [XEX6]>], + [29, 7], + [NoBypass, GPR_Bypass]>, + InstrItinData<FPGeneral , [InstrStage<4, + [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>, + InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, + IU4_4, IU4_5, IU4_6, IU4_7]>, + InstrStage<1, [IU5]>, InstrStage<1, [IU6]>, + InstrStage<1, [RF0]>, InstrStage<1, [FRF1]>, + InstrStage<1, [FEX1]>, InstrStage<1, [FEX2]>, + InstrStage<1, [FEX3]>, InstrStage<1, [FEX4]>, + InstrStage<1, [FEX5]>, InstrStage<1, [FEX6]>], + [15, 7, 7], + [FPR_Bypass, FPR_Bypass, FPR_Bypass]>, + InstrItinData<FPCompare , [InstrStage<4, + [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>, + InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, + IU4_4, IU4_5, IU4_6, IU4_7]>, + InstrStage<1, [IU5]>, InstrStage<1, [IU6]>, + InstrStage<1, [RF0]>, InstrStage<1, [FRF1]>, + InstrStage<1, [FEX1]>, InstrStage<1, [FEX2]>, + InstrStage<1, [FEX3]>, InstrStage<1, [FEX4]>, + InstrStage<1, [FEX5]>, InstrStage<1, [FEX6]>], + [13, 7, 7], + [CR_Bypass, FPR_Bypass, FPR_Bypass]>, + InstrItinData<FPDivD , [InstrStage<4, + [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>, + InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, + IU4_4, IU4_5, IU4_6, IU4_7]>, + InstrStage<1, [IU5]>, InstrStage<1, [IU6]>, + InstrStage<1, [RF0]>, InstrStage<71, [FRF1], 0>, + InstrStage<71, [FEX1], 0>, + InstrStage<71, [FEX2], 0>, + InstrStage<71, [FEX3], 0>, + InstrStage<71, [FEX4], 0>, + InstrStage<71, [FEX5], 0>, + InstrStage<71, [FEX6]>], + [86, 7, 7], + [NoBypass, FPR_Bypass, FPR_Bypass]>, + InstrItinData<FPDivS , [InstrStage<4, + [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>, + InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, + IU4_4, IU4_5, IU4_6, IU4_7]>, + InstrStage<1, [IU5]>, InstrStage<1, [IU6]>, + InstrStage<1, [RF0]>, InstrStage<58, [FRF1], 0>, + InstrStage<58, [FEX1], 0>, + InstrStage<58, [FEX2], 0>, + InstrStage<58, [FEX3], 0>, + InstrStage<58, [FEX4], 0>, + InstrStage<58, [FEX5], 0>, + InstrStage<58, [FEX6]>], + [73, 7, 7], + [NoBypass, FPR_Bypass, FPR_Bypass]>, + InstrItinData<FPSqrt , [InstrStage<4, + [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>, + InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, + IU4_4, IU4_5, IU4_6, IU4_7]>, + InstrStage<1, [IU5]>, InstrStage<1, [IU6]>, + InstrStage<1, [RF0]>, InstrStage<68, [FRF1], 0>, + InstrStage<68, [FEX1], 0>, + InstrStage<68, [FEX2], 0>, + InstrStage<68, [FEX3], 0>, + InstrStage<68, [FEX4], 0>, + InstrStage<68, [FEX5], 0>, + InstrStage<68, [FEX6]>], + [86, 7], // FIXME: should be [86, 7] for double + // and [82, 7] for single. Likewise, + // the FEX? cycle count should be 68 + // for double and 64 for single. + [NoBypass, FPR_Bypass]>, + InstrItinData<FPFused , [InstrStage<4, + [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>, + InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, + IU4_4, IU4_5, IU4_6, IU4_7]>, + InstrStage<1, [IU5]>, InstrStage<1, [IU6]>, + InstrStage<1, [RF0]>, InstrStage<1, [FRF1]>, + InstrStage<1, [FEX1]>, InstrStage<1, [FEX2]>, + InstrStage<1, [FEX3]>, InstrStage<1, [FEX4]>, + InstrStage<1, [FEX5]>, InstrStage<1, [FEX6]>], + [15, 7, 7, 7], + [FPR_Bypass, FPR_Bypass, FPR_Bypass, FPR_Bypass]>, + InstrItinData<FPRes , [InstrStage<4, + [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>, + InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3, + IU4_4, IU4_5, IU4_6, IU4_7]>, + InstrStage<1, [IU5]>, InstrStage<1, [IU6]>, + InstrStage<1, [RF0]>, InstrStage<1, [FRF1]>, + InstrStage<1, [FEX1]>, InstrStage<1, [FEX2]>, + InstrStage<1, [FEX3]>, InstrStage<1, [FEX4]>, + InstrStage<1, [FEX5]>, InstrStage<1, [FEX6]>], + [15, 7], + [FPR_Bypass, FPR_Bypass]> +]>; diff --git a/lib/Target/PowerPC/PPCScheduleG3.td b/lib/Target/PowerPC/PPCScheduleG3.td index e7e5498..bc926f7 100644 --- a/lib/Target/PowerPC/PPCScheduleG3.td +++ b/lib/Target/PowerPC/PPCScheduleG3.td @@ -32,7 +32,8 @@ def G3Itineraries : ProcessorItineraries< InstrItinData<LdStDCBA , [InstrStage<2, [SLU]>]>, InstrItinData<LdStDCBF , [InstrStage<3, [SLU]>]>, InstrItinData<LdStDCBI , [InstrStage<3, [SLU]>]>, - InstrItinData<LdStGeneral , [InstrStage<2, [SLU]>]>, + InstrItinData<LdStLoad , [InstrStage<2, [SLU]>]>, + InstrItinData<LdStStore , [InstrStage<2, [SLU]>]>, InstrItinData<LdStICBI , [InstrStage<3, [SLU]>]>, InstrItinData<LdStUX , [InstrStage<2, [SLU]>]>, InstrItinData<LdStLFD , [InstrStage<2, [SLU]>]>, diff --git a/lib/Target/PowerPC/PPCScheduleG4.td b/lib/Target/PowerPC/PPCScheduleG4.td index 87a3151..f7ec1e0 100644 --- a/lib/Target/PowerPC/PPCScheduleG4.td +++ b/lib/Target/PowerPC/PPCScheduleG4.td @@ -31,7 +31,8 @@ def G4Itineraries : ProcessorItineraries< InstrItinData<BrMCRX , [InstrStage<1, [SRU]>]>, InstrItinData<LdStDCBF , [InstrStage<2, [SLU]>]>, InstrItinData<LdStDCBI , [InstrStage<2, [SLU]>]>, - InstrItinData<LdStGeneral , [InstrStage<2, [SLU]>]>, + InstrItinData<LdStLoad , [InstrStage<2, [SLU]>]>, + InstrItinData<LdStStore , [InstrStage<2, [SLU]>]>, InstrItinData<LdStDSS , [InstrStage<2, [SLU]>]>, InstrItinData<LdStICBI , [InstrStage<2, [SLU]>]>, InstrItinData<LdStUX , [InstrStage<2, [SLU]>]>, diff --git a/lib/Target/PowerPC/PPCScheduleG4Plus.td b/lib/Target/PowerPC/PPCScheduleG4Plus.td index f76557a..37ebfc5 100644 --- a/lib/Target/PowerPC/PPCScheduleG4Plus.td +++ b/lib/Target/PowerPC/PPCScheduleG4Plus.td @@ -34,7 +34,8 @@ def G4PlusItineraries : ProcessorItineraries< InstrItinData<BrMCRX , [InstrStage<2, [IU2]>]>, InstrItinData<LdStDCBF , [InstrStage<3, [SLU]>]>, InstrItinData<LdStDCBI , [InstrStage<3, [SLU]>]>, - InstrItinData<LdStGeneral , [InstrStage<3, [SLU]>]>, + InstrItinData<LdStLoad , [InstrStage<3, [SLU]>]>, + InstrItinData<LdStStore , [InstrStage<3, [SLU]>]>, InstrItinData<LdStDSS , [InstrStage<3, [SLU]>]>, InstrItinData<LdStICBI , [InstrStage<3, [IU2]>]>, InstrItinData<LdStUX , [InstrStage<3, [SLU]>]>, diff --git a/lib/Target/PowerPC/PPCScheduleG5.td b/lib/Target/PowerPC/PPCScheduleG5.td index bc0820b..d1e40ce 100644 --- a/lib/Target/PowerPC/PPCScheduleG5.td +++ b/lib/Target/PowerPC/PPCScheduleG5.td @@ -35,7 +35,8 @@ def G5Itineraries : ProcessorItineraries< InstrItinData<BrMCR , [InstrStage<2, [BPU]>]>, InstrItinData<BrMCRX , [InstrStage<3, [BPU]>]>, InstrItinData<LdStDCBF , [InstrStage<3, [SLU]>]>, - InstrItinData<LdStGeneral , [InstrStage<3, [SLU]>]>, + InstrItinData<LdStLoad , [InstrStage<3, [SLU]>]>, + InstrItinData<LdStStore , [InstrStage<3, [SLU]>]>, InstrItinData<LdStDSS , [InstrStage<10, [SLU]>]>, InstrItinData<LdStICBI , [InstrStage<40, [SLU]>]>, InstrItinData<LdStUX , [InstrStage<4, [SLU]>]>, diff --git a/lib/Target/PowerPC/PPCSubtarget.cpp b/lib/Target/PowerPC/PPCSubtarget.cpp index c89fab3..f405b47 100644 --- a/lib/Target/PowerPC/PPCSubtarget.cpp +++ b/lib/Target/PowerPC/PPCSubtarget.cpp @@ -146,10 +146,11 @@ bool PPCSubtarget::enablePostRAScheduler( CodeGenOpt::Level OptLevel, TargetSubtargetInfo::AntiDepBreakMode& Mode, RegClassVector& CriticalPathRCs) const { - if (DarwinDirective == PPC::DIR_440) - return false; + if (DarwinDirective == PPC::DIR_440 || DarwinDirective == PPC::DIR_A2) + Mode = TargetSubtargetInfo::ANTIDEP_ALL; + else + Mode = TargetSubtargetInfo::ANTIDEP_CRITICAL; - Mode = TargetSubtargetInfo::ANTIDEP_CRITICAL; CriticalPathRCs.clear(); if (isPPC64()) diff --git a/lib/Target/PowerPC/PPCSubtarget.h b/lib/Target/PowerPC/PPCSubtarget.h index 69fe50b..a275029 100644 --- a/lib/Target/PowerPC/PPCSubtarget.h +++ b/lib/Target/PowerPC/PPCSubtarget.h @@ -40,6 +40,7 @@ namespace PPC { DIR_7400, DIR_750, DIR_970, + DIR_A2, DIR_64 }; } @@ -144,6 +145,8 @@ public: /// isDarwin - True if this is any darwin platform. bool isDarwin() const { return TargetTriple.isMacOSX(); } + /// isBGP - True if this is a BG/P platform. + bool isBGP() const { return TargetTriple.getVendor() == Triple::BGP; } bool isDarwinABI() const { return isDarwin(); } bool isSVR4ABI() const { return !isDarwin(); } diff --git a/lib/Target/PowerPC/PPCTargetMachine.cpp b/lib/Target/PowerPC/PPCTargetMachine.cpp index ba9c779..d113976 100644 --- a/lib/Target/PowerPC/PPCTargetMachine.cpp +++ b/lib/Target/PowerPC/PPCTargetMachine.cpp @@ -39,6 +39,10 @@ PPCTargetMachine::PPCTargetMachine(const Target &T, StringRef TT, FrameLowering(Subtarget), JITInfo(*this, is64Bit), TLInfo(*this), TSInfo(*this), InstrItins(Subtarget.getInstrItineraryData()) { + + // The binutils for the BG/P are too old for CFI. + if (Subtarget.isBGP()) + setMCUseCFI(false); } void PPC32TargetMachine::anchor() { } diff --git a/lib/Target/README.txt b/lib/Target/README.txt index 1f69ffb..093255e 100644 --- a/lib/Target/README.txt +++ b/lib/Target/README.txt @@ -2,22 +2,6 @@ Target Independent Opportunities: //===---------------------------------------------------------------------===// -With the recent changes to make the implicit def/use set explicit in -machineinstrs, we should change the target descriptions for 'call' instructions -so that the .td files don't list all the call-clobbered registers as implicit -defs. Instead, these should be added by the code generator (e.g. on the dag). - -This has a number of uses: - -1. PPC32/64 and X86 32/64 can avoid having multiple copies of call instructions - for their different impdef sets. -2. Targets with multiple calling convs (e.g. x86) which have different clobber - sets don't need copies of call instructions. -3. 'Interprocedural register allocation' can be done to reduce the clobber sets - of calls. - -//===---------------------------------------------------------------------===// - We should recognized various "overflow detection" idioms and translate them into llvm.uadd.with.overflow and similar intrinsics. Here is a multiply idiom: @@ -961,6 +945,25 @@ optimized with "clang -emit-llvm-bc | opt -std-compile-opts". //===---------------------------------------------------------------------===// +int g(int x) { return (x - 10) < 0; } +Should combine to "x <= 9" (the sub has nsw). Currently not +optimized with "clang -emit-llvm-bc | opt -std-compile-opts". + +//===---------------------------------------------------------------------===// + +int g(int x) { return (x + 10) < 0; } +Should combine to "x < -10" (the add has nsw). Currently not +optimized with "clang -emit-llvm-bc | opt -std-compile-opts". + +//===---------------------------------------------------------------------===// + +int f(int i, int j) { return i < j + 1; } +int g(int i, int j) { return j > i - 1; } +Should combine to "i <= j" (the add/sub has nsw). Currently not +optimized with "clang -emit-llvm-bc | opt -std-compile-opts". + +//===---------------------------------------------------------------------===// + This was noticed in the entryblock for grokdeclarator in 403.gcc: %tmp = icmp eq i32 %decl_context, 4 @@ -2358,3 +2361,8 @@ unsigned foo(unsigned x, unsigned y) { return x > y && x != 0; } should fold to x > y. //===---------------------------------------------------------------------===// + +int f(double x) { return __builtin_fabs(x) < 0.0; } +should fold to false. + +//===---------------------------------------------------------------------===// diff --git a/lib/Target/Sparc/MCTargetDesc/SparcMCAsmInfo.h b/lib/Target/Sparc/MCTargetDesc/SparcMCAsmInfo.h index 616e1c5..f0e1354 100644 --- a/lib/Target/Sparc/MCTargetDesc/SparcMCAsmInfo.h +++ b/lib/Target/Sparc/MCTargetDesc/SparcMCAsmInfo.h @@ -14,10 +14,10 @@ #ifndef SPARCTARGETASMINFO_H #define SPARCTARGETASMINFO_H -#include "llvm/ADT/StringRef.h" #include "llvm/MC/MCAsmInfo.h" namespace llvm { + class StringRef; class Target; class SparcELFMCAsmInfo : public MCAsmInfo { diff --git a/lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.h b/lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.h index 2fd9e3f..cba775a 100644 --- a/lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.h +++ b/lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.h @@ -15,9 +15,7 @@ #define SPARCMCTARGETDESC_H namespace llvm { -class MCSubtargetInfo; class Target; -class StringRef; extern Target TheSparcTarget; extern Target TheSparcV9Target; diff --git a/lib/Target/Sparc/SparcISelLowering.cpp b/lib/Target/Sparc/SparcISelLowering.cpp index ee12633..c3e6f16 100644 --- a/lib/Target/Sparc/SparcISelLowering.cpp +++ b/lib/Target/Sparc/SparcISelLowering.cpp @@ -832,22 +832,19 @@ const char *SparcTargetLowering::getTargetNodeName(unsigned Opcode) const { /// be zero. Op is expected to be a target specific node. Used by DAG /// combiner. void SparcTargetLowering::computeMaskedBitsForTargetNode(const SDValue Op, - const APInt &Mask, APInt &KnownZero, APInt &KnownOne, const SelectionDAG &DAG, unsigned Depth) const { APInt KnownZero2, KnownOne2; - KnownZero = KnownOne = APInt(Mask.getBitWidth(), 0); // Don't know anything. + KnownZero = KnownOne = APInt(KnownZero.getBitWidth(), 0); switch (Op.getOpcode()) { default: break; case SPISD::SELECT_ICC: case SPISD::SELECT_FCC: - DAG.ComputeMaskedBits(Op.getOperand(1), Mask, KnownZero, KnownOne, - Depth+1); - DAG.ComputeMaskedBits(Op.getOperand(0), Mask, KnownZero2, KnownOne2, - Depth+1); + DAG.ComputeMaskedBits(Op.getOperand(1), KnownZero, KnownOne, Depth+1); + DAG.ComputeMaskedBits(Op.getOperand(0), KnownZero2, KnownOne2, Depth+1); assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); diff --git a/lib/Target/Sparc/SparcISelLowering.h b/lib/Target/Sparc/SparcISelLowering.h index f483c96..cf43048 100644 --- a/lib/Target/Sparc/SparcISelLowering.h +++ b/lib/Target/Sparc/SparcISelLowering.h @@ -50,7 +50,6 @@ namespace llvm { /// in Mask are known to be either zero or one and return them in the /// KnownZero/KnownOne bitsets. virtual void computeMaskedBitsForTargetNode(const SDValue Op, - const APInt &Mask, APInt &KnownZero, APInt &KnownOne, const SelectionDAG &DAG, diff --git a/lib/Target/TargetData.cpp b/lib/Target/TargetData.cpp index 3acb4dd..acb7476 100644 --- a/lib/Target/TargetData.cpp +++ b/lib/Target/TargetData.cpp @@ -373,7 +373,7 @@ unsigned TargetData::getAlignmentInfo(AlignTypeEnum AlignType, // If the alignment is not a power of 2, round up to the next power of 2. // This happens for non-power-of-2 length vectors. if (Align & (Align-1)) - Align = llvm::NextPowerOf2(Align); + Align = NextPowerOf2(Align); return Align; } } diff --git a/lib/Target/TargetLibraryInfo.cpp b/lib/Target/TargetLibraryInfo.cpp index 269958f..ec95ad4 100644 --- a/lib/Target/TargetLibraryInfo.cpp +++ b/lib/Target/TargetLibraryInfo.cpp @@ -56,7 +56,7 @@ const char* TargetLibraryInfo::StandardNames[LibFunc::NumLibFuncs] = "exp2f", "expm1", "expm1l", - "expl1f", + "expm1f", "fabs", "fabsl", "fabsf", @@ -95,6 +95,9 @@ const char* TargetLibraryInfo::StandardNames[LibFunc::NumLibFuncs] = "rint", "rintf", "rintl", + "round", + "roundf", + "roundl", "sin", "sinl", "sinf", @@ -155,6 +158,81 @@ static void initialize(TargetLibraryInfo &TLI, const Triple &T) { TLI.setUnavailable(LibFunc::siprintf); TLI.setUnavailable(LibFunc::fiprintf); } + + if (T.getOS() == Triple::Win32) { + // Win32 does not support long double + TLI.setUnavailable(LibFunc::acosl); + TLI.setUnavailable(LibFunc::asinl); + TLI.setUnavailable(LibFunc::atanl); + TLI.setUnavailable(LibFunc::atan2l); + TLI.setUnavailable(LibFunc::ceill); + TLI.setUnavailable(LibFunc::copysignl); + TLI.setUnavailable(LibFunc::cosl); + TLI.setUnavailable(LibFunc::coshl); + TLI.setUnavailable(LibFunc::expl); + TLI.setUnavailable(LibFunc::fabsf); // Win32 and Win64 both lack fabsf + TLI.setUnavailable(LibFunc::fabsl); + TLI.setUnavailable(LibFunc::floorl); + TLI.setUnavailable(LibFunc::fmodl); + TLI.setUnavailable(LibFunc::logl); + TLI.setUnavailable(LibFunc::powl); + TLI.setUnavailable(LibFunc::sinl); + TLI.setUnavailable(LibFunc::sinhl); + TLI.setUnavailable(LibFunc::sqrtl); + TLI.setUnavailable(LibFunc::tanl); + TLI.setUnavailable(LibFunc::tanhl); + + // Win32 only has C89 math + TLI.setUnavailable(LibFunc::exp2); + TLI.setUnavailable(LibFunc::exp2f); + TLI.setUnavailable(LibFunc::exp2l); + TLI.setUnavailable(LibFunc::expm1); + TLI.setUnavailable(LibFunc::expm1f); + TLI.setUnavailable(LibFunc::expm1l); + TLI.setUnavailable(LibFunc::log2); + TLI.setUnavailable(LibFunc::log2f); + TLI.setUnavailable(LibFunc::log2l); + TLI.setUnavailable(LibFunc::log1p); + TLI.setUnavailable(LibFunc::log1pf); + TLI.setUnavailable(LibFunc::log1pl); + TLI.setUnavailable(LibFunc::nearbyint); + TLI.setUnavailable(LibFunc::nearbyintf); + TLI.setUnavailable(LibFunc::nearbyintl); + TLI.setUnavailable(LibFunc::rint); + TLI.setUnavailable(LibFunc::rintf); + TLI.setUnavailable(LibFunc::rintl); + TLI.setUnavailable(LibFunc::round); + TLI.setUnavailable(LibFunc::roundf); + TLI.setUnavailable(LibFunc::roundl); + TLI.setUnavailable(LibFunc::trunc); + TLI.setUnavailable(LibFunc::truncf); + TLI.setUnavailable(LibFunc::truncl); + + // Win32 provides some C99 math with mangled names + TLI.setAvailableWithName(LibFunc::copysign, "_copysign"); + + if (T.getArch() == Triple::x86) { + // Win32 on x86 implements single-precision math functions as macros + TLI.setUnavailable(LibFunc::acosf); + TLI.setUnavailable(LibFunc::asinf); + TLI.setUnavailable(LibFunc::atanf); + TLI.setUnavailable(LibFunc::atan2f); + TLI.setUnavailable(LibFunc::ceilf); + TLI.setUnavailable(LibFunc::copysignf); + TLI.setUnavailable(LibFunc::cosf); + TLI.setUnavailable(LibFunc::coshf); + TLI.setUnavailable(LibFunc::expf); + TLI.setUnavailable(LibFunc::floorf); + TLI.setUnavailable(LibFunc::fmodf); + TLI.setUnavailable(LibFunc::logf); + TLI.setUnavailable(LibFunc::powf); + TLI.setUnavailable(LibFunc::sinf); + TLI.setUnavailable(LibFunc::sinhf); + TLI.setUnavailable(LibFunc::sqrtf); + TLI.setUnavailable(LibFunc::tanf); + TLI.setUnavailable(LibFunc::tanhf); + } + } } diff --git a/lib/Target/TargetLoweringObjectFile.cpp b/lib/Target/TargetLoweringObjectFile.cpp index 1589604..2570e0d 100644 --- a/lib/Target/TargetLoweringObjectFile.cpp +++ b/lib/Target/TargetLoweringObjectFile.cpp @@ -28,7 +28,6 @@ #include "llvm/Support/Dwarf.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/ADT/SmallString.h" using namespace llvm; //===----------------------------------------------------------------------===// diff --git a/lib/Target/TargetMachine.cpp b/lib/Target/TargetMachine.cpp index b4969ca..b9b2526 100644 --- a/lib/Target/TargetMachine.cpp +++ b/lib/Target/TargetMachine.cpp @@ -11,9 +11,10 @@ // //===----------------------------------------------------------------------===// +#include "llvm/GlobalValue.h" #include "llvm/MC/MCAsmInfo.h" +#include "llvm/MC/MCCodeGenInfo.h" #include "llvm/Target/TargetMachine.h" -#include "llvm/Target/TargetOptions.h" #include "llvm/Support/CommandLine.h" using namespace llvm; @@ -74,6 +75,27 @@ CodeModel::Model TargetMachine::getCodeModel() const { return CodeGenInfo->getCodeModel(); } +TLSModel::Model TargetMachine::getTLSModel(const GlobalValue *GV) const { + bool isLocal = GV->hasLocalLinkage(); + bool isDeclaration = GV->isDeclaration(); + // FIXME: what should we do for protected and internal visibility? + // For variables, is internal different from hidden? + bool isHidden = GV->hasHiddenVisibility(); + + if (getRelocationModel() == Reloc::PIC_ && + !Options.PositionIndependentExecutable) { + if (isLocal || isHidden) + return TLSModel::LocalDynamic; + else + return TLSModel::GeneralDynamic; + } else { + if (!isDeclaration || isHidden) + return TLSModel::LocalExec; + else + return TLSModel::InitialExec; + } +} + /// getOptLevel - Returns the optimization level: None, Less, /// Default, or Aggressive. CodeGenOpt::Level TargetMachine::getOptLevel() const { diff --git a/lib/Target/TargetMachineC.cpp b/lib/Target/TargetMachineC.cpp new file mode 100644 index 0000000..d6bba8b --- /dev/null +++ b/lib/Target/TargetMachineC.cpp @@ -0,0 +1,197 @@ +//===-- TargetMachine.cpp -------------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the LLVM-C part of TargetMachine.h +// +//===----------------------------------------------------------------------===// + +#include "llvm-c/Core.h" +#include "llvm-c/Target.h" +#include "llvm-c/TargetMachine.h" +#include "llvm/Target/TargetData.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Support/TargetRegistry.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Support/CodeGen.h" +#include "llvm/Support/FormattedStream.h" +#include "llvm/Module.h" +#include "llvm/PassManager.h" +#include <cassert> +#include <cstdlib> +#include <cstring> + +using namespace llvm; + + + +LLVMTargetRef LLVMGetFirstTarget() { + const Target* target = &*TargetRegistry::begin(); + return wrap(target); +} +LLVMTargetRef LLVMGetNextTarget(LLVMTargetRef T) { + return wrap(unwrap(T)->getNext()); +} + +const char * LLVMGetTargetName(LLVMTargetRef T) { + return unwrap(T)->getName(); +} + +const char * LLVMGetTargetDescription(LLVMTargetRef T) { + return unwrap(T)->getShortDescription(); +} + +LLVMBool LLVMTargetHasJIT(LLVMTargetRef T) { + return unwrap(T)->hasJIT(); +} + +LLVMBool LLVMTargetHasTargetMachine(LLVMTargetRef T) { + return unwrap(T)->hasTargetMachine(); +} + +LLVMBool LLVMTargetHasAsmBackend(LLVMTargetRef T) { + return unwrap(T)->hasMCAsmBackend(); +} + +LLVMTargetMachineRef LLVMCreateTargetMachine(LLVMTargetRef T, char* Triple, + char* CPU, char* Features, LLVMCodeGenOptLevel Level, LLVMRelocMode Reloc, + LLVMCodeModel CodeModel) { + Reloc::Model RM; + switch (Reloc){ + case LLVMRelocStatic: + RM = Reloc::Static; + break; + case LLVMRelocPIC: + RM = Reloc::PIC_; + break; + case LLVMRelocDynamicNoPic: + RM = Reloc::DynamicNoPIC; + break; + default: + RM = Reloc::Default; + break; + } + + CodeModel::Model CM; + switch (CodeModel) { + case LLVMCodeModelJITDefault: + CM = CodeModel::JITDefault; + break; + case LLVMCodeModelSmall: + CM = CodeModel::Small; + break; + case LLVMCodeModelKernel: + CM = CodeModel::Kernel; + break; + case LLVMCodeModelMedium: + CM = CodeModel::Medium; + break; + case LLVMCodeModelLarge: + CM = CodeModel::Large; + break; + default: + CM = CodeModel::Default; + break; + } + CodeGenOpt::Level OL; + + switch (Level) { + case LLVMCodeGenLevelNone: + OL = CodeGenOpt::None; + break; + case LLVMCodeGenLevelLess: + OL = CodeGenOpt::Less; + break; + case LLVMCodeGenLevelAggressive: + OL = CodeGenOpt::Aggressive; + break; + default: + OL = CodeGenOpt::Default; + break; + } + + TargetOptions opt; + return wrap(unwrap(T)->createTargetMachine(Triple, CPU, Features, opt, RM, + CM, OL)); +} + + +void LLVMDisposeTargetMachine(LLVMTargetMachineRef T) { + delete unwrap(T); +} + +LLVMTargetRef LLVMGetTargetMachineTarget(LLVMTargetMachineRef T) { + const Target* target = &(unwrap(T)->getTarget()); + return wrap(target); +} + +char* LLVMGetTargetMachineTriple(LLVMTargetMachineRef T) { + std::string StringRep = unwrap(T)->getTargetTriple(); + return strdup(StringRep.c_str()); +} + +char* LLVMGetTargetMachineCPU(LLVMTargetMachineRef T) { + std::string StringRep = unwrap(T)->getTargetCPU(); + return strdup(StringRep.c_str()); +} + +char* LLVMGetTargetMachineFeatureString(LLVMTargetMachineRef T) { + std::string StringRep = unwrap(T)->getTargetFeatureString(); + return strdup(StringRep.c_str()); +} + +LLVMTargetDataRef LLVMGetTargetMachineData(LLVMTargetMachineRef T) { + return wrap(unwrap(T)->getTargetData()); +} + +LLVMBool LLVMTargetMachineEmitToFile(LLVMTargetMachineRef T, LLVMModuleRef M, + char* Filename, LLVMCodeGenFileType codegen, char** ErrorMessage) { + TargetMachine* TM = unwrap(T); + Module* Mod = unwrap(M); + + PassManager pass; + + std::string error; + + const TargetData* td = TM->getTargetData(); + + if (!td) { + error = "No TargetData in TargetMachine"; + *ErrorMessage = strdup(error.c_str()); + return true; + } + pass.add(new TargetData(*td)); + + TargetMachine::CodeGenFileType ft; + switch (codegen) { + case LLVMAssemblyFile: + ft = TargetMachine::CGFT_AssemblyFile; + break; + default: + ft = TargetMachine::CGFT_ObjectFile; + break; + } + raw_fd_ostream dest(Filename, error, raw_fd_ostream::F_Binary); + formatted_raw_ostream destf(dest); + if (!error.empty()) { + *ErrorMessage = strdup(error.c_str()); + return true; + } + + if (TM->addPassesToEmitFile(pass, destf, ft)) { + error = "No TargetData in TargetMachine"; + *ErrorMessage = strdup(error.c_str()); + return true; + } + + pass.run(*Mod); + + destf.flush(); + dest.flush(); + return false; +} diff --git a/lib/Target/X86/AsmParser/X86AsmParser.cpp b/lib/Target/X86/AsmParser/X86AsmParser.cpp index 9e88472..08c732c 100644 --- a/lib/Target/X86/AsmParser/X86AsmParser.cpp +++ b/lib/Target/X86/AsmParser/X86AsmParser.cpp @@ -17,7 +17,6 @@ #include "llvm/MC/MCParser/MCAsmLexer.h" #include "llvm/MC/MCParser/MCAsmParser.h" #include "llvm/MC/MCParser/MCParsedAsmOperand.h" -#include "llvm/ADT/OwningPtr.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringSwitch.h" @@ -951,20 +950,21 @@ ParseInstruction(StringRef Name, SMLoc NameLoc, if ((PatchedName.startswith("cmp") || PatchedName.startswith("vcmp")) && (PatchedName.endswith("ss") || PatchedName.endswith("sd") || PatchedName.endswith("ps") || PatchedName.endswith("pd"))) { - bool IsVCMP = PatchedName.startswith("vcmp"); + bool IsVCMP = PatchedName[0] == 'v'; unsigned SSECCIdx = IsVCMP ? 4 : 3; unsigned SSEComparisonCode = StringSwitch<unsigned>( PatchedName.slice(SSECCIdx, PatchedName.size() - 2)) - .Case("eq", 0) - .Case("lt", 1) - .Case("le", 2) - .Case("unord", 3) - .Case("neq", 4) - .Case("nlt", 5) - .Case("nle", 6) - .Case("ord", 7) - .Case("eq_uq", 8) - .Case("nge", 9) + .Case("eq", 0x00) + .Case("lt", 0x01) + .Case("le", 0x02) + .Case("unord", 0x03) + .Case("neq", 0x04) + .Case("nlt", 0x05) + .Case("nle", 0x06) + .Case("ord", 0x07) + /* AVX only from here */ + .Case("eq_uq", 0x08) + .Case("nge", 0x09) .Case("ngt", 0x0A) .Case("false", 0x0B) .Case("neq_oq", 0x0C) @@ -988,7 +988,7 @@ ParseInstruction(StringRef Name, SMLoc NameLoc, .Case("gt_oq", 0x1E) .Case("true_us", 0x1F) .Default(~0U); - if (SSEComparisonCode != ~0U) { + if (SSEComparisonCode != ~0U && (IsVCMP || SSEComparisonCode < 8)) { ExtraImmOp = MCConstantExpr::Create(SSEComparisonCode, getParser().getContext()); if (PatchedName.endswith("ss")) { diff --git a/lib/Target/X86/Disassembler/X86Disassembler.cpp b/lib/Target/X86/Disassembler/X86Disassembler.cpp index 8278bde..b13a006 100644 --- a/lib/Target/X86/Disassembler/X86Disassembler.cpp +++ b/lib/Target/X86/Disassembler/X86Disassembler.cpp @@ -322,7 +322,12 @@ static void translateImmediate(MCInst &mcInst, uint64_t immediate, OperandType type = (OperandType)operand.type; + bool isBranch = false; + uint64_t pcrel = 0; if (type == TYPE_RELv) { + isBranch = true; + pcrel = insn.startLocation + + insn.displacementOffset + insn.displacementSize; switch (insn.displacementSize) { default: break; @@ -373,8 +378,6 @@ static void translateImmediate(MCInst &mcInst, uint64_t immediate, } } - bool isBranch = false; - uint64_t pcrel = 0; switch (type) { case TYPE_XMM128: mcInst.addOperand(MCOperand::CreateReg(X86::XMM0 + (immediate >> 4))); diff --git a/lib/Target/X86/Disassembler/X86DisassemblerDecoder.c b/lib/Target/X86/Disassembler/X86DisassemblerDecoder.c index fbd81d2..6020877 100644 --- a/lib/Target/X86/Disassembler/X86DisassemblerDecoder.c +++ b/lib/Target/X86/Disassembler/X86DisassemblerDecoder.c @@ -1527,6 +1527,9 @@ static int readOperands(struct InternalInstruction* insn) { if (insn->spec->operands[index].type == TYPE_IMM3 && insn->immediates[insn->numImmediatesConsumed - 1] > 7) return -1; + if (insn->spec->operands[index].type == TYPE_IMM5 && + insn->immediates[insn->numImmediatesConsumed - 1] > 31) + return -1; if (insn->spec->operands[index].type == TYPE_XMM128 || insn->spec->operands[index].type == TYPE_XMM256) sawRegImm = 1; diff --git a/lib/Target/X86/Disassembler/X86DisassemblerDecoderCommon.h b/lib/Target/X86/Disassembler/X86DisassemblerDecoderCommon.h index d2e30f1..13e1136 100644 --- a/lib/Target/X86/Disassembler/X86DisassemblerDecoderCommon.h +++ b/lib/Target/X86/Disassembler/X86DisassemblerDecoderCommon.h @@ -273,6 +273,7 @@ struct ContextDecision { ENUM_ENTRY(TYPE_IMM32, "4-byte") \ ENUM_ENTRY(TYPE_IMM64, "8-byte") \ ENUM_ENTRY(TYPE_IMM3, "1-byte immediate operand between 0 and 7") \ + ENUM_ENTRY(TYPE_IMM5, "1-byte immediate operand between 0 and 31") \ ENUM_ENTRY(TYPE_RM8, "1-byte register or memory operand") \ ENUM_ENTRY(TYPE_RM16, "2-byte") \ ENUM_ENTRY(TYPE_RM32, "4-byte") \ diff --git a/lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp b/lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp index b7ccb4c..5118e4c 100644 --- a/lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp +++ b/lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp @@ -19,6 +19,8 @@ #include "llvm/MC/MCInst.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCExpr.h" +#include "llvm/MC/MCInstrInfo.h" +#include "llvm/MC/MCRegisterInfo.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/Format.h" #include "llvm/Support/FormattedStream.h" @@ -26,7 +28,6 @@ using namespace llvm; // Include the auto-generated portion of the assembly writer. -#define GET_INSTRUCTION_NAME #define PRINT_ALIAS_INSTR #include "X86GenAsmWriter.inc" @@ -49,10 +50,6 @@ void X86ATTInstPrinter::printInst(const MCInst *MI, raw_ostream &OS, EmitAnyX86InstComments(MI, *CommentStream, getRegisterName); } -StringRef X86ATTInstPrinter::getOpcodeName(unsigned Opcode) const { - return getInstructionName(Opcode); -} - void X86ATTInstPrinter::printSSECC(const MCInst *MI, unsigned Op, raw_ostream &O) { switch (MI->getOperand(Op).getImm()) { diff --git a/lib/Target/X86/InstPrinter/X86ATTInstPrinter.h b/lib/Target/X86/InstPrinter/X86ATTInstPrinter.h index ff94301..2e00bff 100644 --- a/lib/Target/X86/InstPrinter/X86ATTInstPrinter.h +++ b/lib/Target/X86/InstPrinter/X86ATTInstPrinter.h @@ -22,12 +22,12 @@ class MCOperand; class X86ATTInstPrinter : public MCInstPrinter { public: - X86ATTInstPrinter(const MCAsmInfo &MAI, const MCRegisterInfo &MRI) - : MCInstPrinter(MAI, MRI) {} + X86ATTInstPrinter(const MCAsmInfo &MAI, const MCInstrInfo &MII, + const MCRegisterInfo &MRI) + : MCInstPrinter(MAI, MII, MRI) {} virtual void printRegName(raw_ostream &OS, unsigned RegNo) const; virtual void printInst(const MCInst *MI, raw_ostream &OS, StringRef Annot); - virtual StringRef getOpcodeName(unsigned Opcode) const; // Autogenerated by tblgen, returns true if we successfully printed an // alias. @@ -36,7 +36,6 @@ public: // Autogenerated by tblgen. void printInstruction(const MCInst *MI, raw_ostream &OS); static const char *getRegisterName(unsigned RegNo); - static const char *getInstructionName(unsigned Opcode); void printOperand(const MCInst *MI, unsigned OpNo, raw_ostream &OS); void printMemReference(const MCInst *MI, unsigned Op, raw_ostream &OS); diff --git a/lib/Target/X86/InstPrinter/X86IntelInstPrinter.cpp b/lib/Target/X86/InstPrinter/X86IntelInstPrinter.cpp index 46a96d2..4ea662c 100644 --- a/lib/Target/X86/InstPrinter/X86IntelInstPrinter.cpp +++ b/lib/Target/X86/InstPrinter/X86IntelInstPrinter.cpp @@ -17,15 +17,13 @@ #include "X86InstComments.h" #include "MCTargetDesc/X86MCTargetDesc.h" #include "llvm/MC/MCInst.h" -#include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCExpr.h" +#include "llvm/MC/MCInstrInfo.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/FormattedStream.h" #include <cctype> using namespace llvm; -// Include the auto-generated portion of the assembly writer. -#define GET_INSTRUCTION_NAME #include "X86GenAsmWriter1.inc" void X86IntelInstPrinter::printRegName(raw_ostream &OS, unsigned RegNo) const { @@ -43,9 +41,6 @@ void X86IntelInstPrinter::printInst(const MCInst *MI, raw_ostream &OS, if (CommentStream) EmitAnyX86InstComments(MI, *CommentStream, getRegisterName); } -StringRef X86IntelInstPrinter::getOpcodeName(unsigned Opcode) const { - return getInstructionName(Opcode); -} void X86IntelInstPrinter::printSSECC(const MCInst *MI, unsigned Op, raw_ostream &O) { diff --git a/lib/Target/X86/InstPrinter/X86IntelInstPrinter.h b/lib/Target/X86/InstPrinter/X86IntelInstPrinter.h index ea1d38a..4f5938d 100644 --- a/lib/Target/X86/InstPrinter/X86IntelInstPrinter.h +++ b/lib/Target/X86/InstPrinter/X86IntelInstPrinter.h @@ -23,17 +23,16 @@ class MCOperand; class X86IntelInstPrinter : public MCInstPrinter { public: - X86IntelInstPrinter(const MCAsmInfo &MAI, const MCRegisterInfo &MRI) - : MCInstPrinter(MAI, MRI) {} + X86IntelInstPrinter(const MCAsmInfo &MAI, const MCInstrInfo &MII, + const MCRegisterInfo &MRI) + : MCInstPrinter(MAI, MII, MRI) {} virtual void printRegName(raw_ostream &OS, unsigned RegNo) const; virtual void printInst(const MCInst *MI, raw_ostream &OS, StringRef Annot); - virtual StringRef getOpcodeName(unsigned Opcode) const; // Autogenerated by tblgen. void printInstruction(const MCInst *MI, raw_ostream &O); static const char *getRegisterName(unsigned RegNo); - static const char *getInstructionName(unsigned Opcode); void printOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O); void printMemReference(const MCInst *MI, unsigned Op, raw_ostream &O); diff --git a/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp b/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp index 3f770f7..32e40fe 100644 --- a/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp +++ b/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp @@ -9,7 +9,6 @@ #include "MCTargetDesc/X86BaseInfo.h" #include "MCTargetDesc/X86FixupKinds.h" -#include "llvm/ADT/Twine.h" #include "llvm/MC/MCAsmBackend.h" #include "llvm/MC/MCAssembler.h" #include "llvm/MC/MCELFObjectWriter.h" diff --git a/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp b/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp index 003a14a..afa545c 100644 --- a/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp +++ b/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp @@ -155,4 +155,7 @@ X86MCAsmInfoGNUCOFF::X86MCAsmInfoGNUCOFF(const Triple &Triple) { AssemblerDialect = AsmWriterFlavor; TextAlignFillValue = 0x90; + + // Exceptions handling + ExceptionsType = ExceptionHandling::DwarfCFI; } diff --git a/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp b/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp index efd18c7..3482363 100644 --- a/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp +++ b/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp @@ -474,12 +474,13 @@ static MCStreamer *createMCStreamer(const Target &T, StringRef TT, static MCInstPrinter *createX86MCInstPrinter(const Target &T, unsigned SyntaxVariant, const MCAsmInfo &MAI, + const MCInstrInfo &MII, const MCRegisterInfo &MRI, const MCSubtargetInfo &STI) { if (SyntaxVariant == 0) - return new X86ATTInstPrinter(MAI, MRI); + return new X86ATTInstPrinter(MAI, MII, MRI); if (SyntaxVariant == 1) - return new X86IntelInstPrinter(MAI, MRI); + return new X86IntelInstPrinter(MAI, MII, MRI); return 0; } diff --git a/lib/Target/X86/README.txt b/lib/Target/X86/README.txt index f9c1d35..6a8a4fd 100644 --- a/lib/Target/X86/README.txt +++ b/lib/Target/X86/README.txt @@ -2060,3 +2060,21 @@ Instead we could generate: The trick is to match "fetch_and_add(X, -C) == C". //===---------------------------------------------------------------------===// + +unsigned t(unsigned a, unsigned b) { + return a <= b ? 5 : -5; +} + +We generate: + movl $5, %ecx + cmpl %esi, %edi + movl $-5, %eax + cmovbel %ecx, %eax + +GCC: + cmpl %edi, %esi + sbbl %eax, %eax + andl $-10, %eax + addl $5, %eax + +//===---------------------------------------------------------------------===// diff --git a/lib/Target/X86/Utils/X86ShuffleDecode.cpp b/lib/Target/X86/Utils/X86ShuffleDecode.cpp index 32c722a..a802333 100644 --- a/lib/Target/X86/Utils/X86ShuffleDecode.cpp +++ b/lib/Target/X86/Utils/X86ShuffleDecode.cpp @@ -169,6 +169,9 @@ void DecodeUNPCKLMask(EVT VT, SmallVectorImpl<int> &ShuffleMask) { void DecodeVPERM2X128Mask(EVT VT, unsigned Imm, SmallVectorImpl<int> &ShuffleMask) { + if (Imm & 0x88) + return; // Not a shuffle + unsigned HalfSize = VT.getVectorNumElements()/2; unsigned FstHalfBegin = (Imm & 0x3) * HalfSize; unsigned SndHalfBegin = ((Imm >> 4) & 0x3) * HalfSize; diff --git a/lib/Target/X86/X86AsmPrinter.cpp b/lib/Target/X86/X86AsmPrinter.cpp index f1cedf3..7db7ccb 100644 --- a/lib/Target/X86/X86AsmPrinter.cpp +++ b/lib/Target/X86/X86AsmPrinter.cpp @@ -19,7 +19,6 @@ #include "X86MachineFunctionInfo.h" #include "X86TargetMachine.h" #include "InstPrinter/X86ATTInstPrinter.h" -#include "InstPrinter/X86IntelInstPrinter.h" #include "llvm/CallingConv.h" #include "llvm/DerivedTypes.h" #include "llvm/Module.h" @@ -265,8 +264,8 @@ void X86AsmPrinter::printOperand(const MachineInstr *MI, unsigned OpNo, void X86AsmPrinter::printSSECC(const MachineInstr *MI, unsigned Op, raw_ostream &O) { unsigned char value = MI->getOperand(Op).getImm(); - assert(value <= 7 && "Invalid ssecc argument!"); switch (value) { + default: llvm_unreachable("Invalid ssecc argument!"); case 0: O << "eq"; break; case 1: O << "lt"; break; case 2: O << "le"; break; diff --git a/lib/Target/X86/X86FastISel.cpp b/lib/Target/X86/X86FastISel.cpp index 3d63b7e..69752c5 100644 --- a/lib/Target/X86/X86FastISel.cpp +++ b/lib/Target/X86/X86FastISel.cpp @@ -2179,7 +2179,7 @@ bool X86FastISel::TryToFoldLoad(MachineInstr *MI, unsigned OpNo, namespace llvm { - llvm::FastISel *X86::createFastISel(FunctionLoweringInfo &funcInfo) { + FastISel *X86::createFastISel(FunctionLoweringInfo &funcInfo) { return new X86FastISel(funcInfo); } } diff --git a/lib/Target/X86/X86FloatingPoint.cpp b/lib/Target/X86/X86FloatingPoint.cpp index 936df27..ed1707d 100644 --- a/lib/Target/X86/X86FloatingPoint.cpp +++ b/lib/Target/X86/X86FloatingPoint.cpp @@ -28,7 +28,6 @@ #include "X86InstrInfo.h" #include "llvm/InlineAsm.h" #include "llvm/ADT/DepthFirstIterator.h" -#include "llvm/ADT/DenseMap.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" @@ -219,7 +218,7 @@ namespace { /// getSTReg - Return the X86::ST(i) register which contains the specified /// FP<RegNo> register. unsigned getSTReg(unsigned RegNo) const { - return StackTop - 1 - getSlot(RegNo) + llvm::X86::ST0; + return StackTop - 1 - getSlot(RegNo) + X86::ST0; } // pushReg - Push the specified FP<n> register onto the stack. diff --git a/lib/Target/X86/X86ISelDAGToDAG.cpp b/lib/Target/X86/X86ISelDAGToDAG.cpp index 9405c2f..8e2b1d6 100644 --- a/lib/Target/X86/X86ISelDAGToDAG.cpp +++ b/lib/Target/X86/X86ISelDAGToDAG.cpp @@ -36,7 +36,6 @@ #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/Statistic.h" using namespace llvm; @@ -621,14 +620,14 @@ bool X86DAGToDAGISel::MatchWrapper(SDValue N, X86ISelAddressMode &AM) { // Handle X86-64 rip-relative addresses. We check this before checking direct // folding because RIP is preferable to non-RIP accesses. - if (Subtarget->is64Bit() && + if (Subtarget->is64Bit() && N.getOpcode() == X86ISD::WrapperRIP && // Under X86-64 non-small code model, GV (and friends) are 64-bits, so // they cannot be folded into immediate fields. // FIXME: This can be improved for kernel and other models? - (M == CodeModel::Small || M == CodeModel::Kernel) && - // Base and index reg must be 0 in order to use %rip as base and lowering - // must allow RIP. - !AM.hasBaseOrIndexReg() && N.getOpcode() == X86ISD::WrapperRIP) { + (M == CodeModel::Small || M == CodeModel::Kernel)) { + // Base and index reg must be 0 in order to use %rip as base. + if (AM.hasBaseOrIndexReg()) + return true; if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(N0)) { X86ISelAddressMode Backup = AM; AM.GV = G->getGlobal(); @@ -663,11 +662,12 @@ bool X86DAGToDAGISel::MatchWrapper(SDValue N, X86ISelAddressMode &AM) { } // Handle the case when globals fit in our immediate field: This is true for - // X86-32 always and X86-64 when in -static -mcmodel=small mode. In 64-bit - // mode, this results in a non-RIP-relative computation. + // X86-32 always and X86-64 when in -mcmodel=small mode. In 64-bit + // mode, this only applies to a non-RIP-relative computation. if (!Subtarget->is64Bit() || - ((M == CodeModel::Small || M == CodeModel::Kernel) && - TM.getRelocationModel() == Reloc::Static)) { + M == CodeModel::Small || M == CodeModel::Kernel) { + assert(N.getOpcode() != X86ISD::WrapperRIP && + "RIP-relative addressing already handled"); if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(N0)) { AM.GV = G->getGlobal(); AM.Disp += G->getOffset(); @@ -897,7 +897,7 @@ static bool FoldMaskAndShiftToScale(SelectionDAG &DAG, SDValue N, APInt MaskedHighBits = APInt::getHighBitsSet(X.getValueSizeInBits(), MaskLZ); APInt KnownZero, KnownOne; - DAG.ComputeMaskedBits(X, MaskedHighBits, KnownZero, KnownOne); + DAG.ComputeMaskedBits(X, KnownZero, KnownOne); if (MaskedHighBits != KnownZero) return true; // We've identified a pattern that can be transformed into a single shift @@ -1848,6 +1848,96 @@ static bool HasNoSignedComparisonUses(SDNode *N) { return true; } +/// isLoadIncOrDecStore - Check whether or not the chain ending in StoreNode +/// is suitable for doing the {load; increment or decrement; store} to modify +/// transformation. +static bool isLoadIncOrDecStore(StoreSDNode *StoreNode, unsigned Opc, + SDValue StoredVal, SelectionDAG *CurDAG, + LoadSDNode* &LoadNode, SDValue &InputChain) { + + // is the value stored the result of a DEC or INC? + if (!(Opc == X86ISD::DEC || Opc == X86ISD::INC)) return false; + + // is the stored value result 0 of the load? + if (StoredVal.getResNo() != 0) return false; + + // are there other uses of the loaded value than the inc or dec? + if (!StoredVal.getNode()->hasNUsesOfValue(1, 0)) return false; + + // is the store non-extending and non-indexed? + if (!ISD::isNormalStore(StoreNode) || StoreNode->isNonTemporal()) + return false; + + SDValue Load = StoredVal->getOperand(0); + // Is the stored value a non-extending and non-indexed load? + if (!ISD::isNormalLoad(Load.getNode())) return false; + + // Return LoadNode by reference. + LoadNode = cast<LoadSDNode>(Load); + // is the size of the value one that we can handle? (i.e. 64, 32, 16, or 8) + EVT LdVT = LoadNode->getMemoryVT(); + if (LdVT != MVT::i64 && LdVT != MVT::i32 && LdVT != MVT::i16 && + LdVT != MVT::i8) + return false; + + // Is store the only read of the loaded value? + if (!Load.hasOneUse()) + return false; + + // Is the address of the store the same as the load? + if (LoadNode->getBasePtr() != StoreNode->getBasePtr() || + LoadNode->getOffset() != StoreNode->getOffset()) + return false; + + // Check if the chain is produced by the load or is a TokenFactor with + // the load output chain as an operand. Return InputChain by reference. + SDValue Chain = StoreNode->getChain(); + + bool ChainCheck = false; + if (Chain == Load.getValue(1)) { + ChainCheck = true; + InputChain = LoadNode->getChain(); + } else if (Chain.getOpcode() == ISD::TokenFactor) { + SmallVector<SDValue, 4> ChainOps; + for (unsigned i = 0, e = Chain.getNumOperands(); i != e; ++i) { + SDValue Op = Chain.getOperand(i); + if (Op == Load.getValue(1)) { + ChainCheck = true; + continue; + } + ChainOps.push_back(Op); + } + + if (ChainCheck) + // Make a new TokenFactor with all the other input chains except + // for the load. + InputChain = CurDAG->getNode(ISD::TokenFactor, Chain.getDebugLoc(), + MVT::Other, &ChainOps[0], ChainOps.size()); + } + if (!ChainCheck) + return false; + + return true; +} + +/// getFusedLdStOpcode - Get the appropriate X86 opcode for an in memory +/// increment or decrement. Opc should be X86ISD::DEC or X86ISD::INC. +static unsigned getFusedLdStOpcode(EVT &LdVT, unsigned Opc) { + if (Opc == X86ISD::DEC) { + if (LdVT == MVT::i64) return X86::DEC64m; + if (LdVT == MVT::i32) return X86::DEC32m; + if (LdVT == MVT::i16) return X86::DEC16m; + if (LdVT == MVT::i8) return X86::DEC8m; + } else { + assert(Opc == X86ISD::INC && "unrecognized opcode"); + if (LdVT == MVT::i64) return X86::INC64m; + if (LdVT == MVT::i32) return X86::INC32m; + if (LdVT == MVT::i16) return X86::INC16m; + if (LdVT == MVT::i8) return X86::INC8m; + } + llvm_unreachable("unrecognized size for LdVT"); +} + SDNode *X86DAGToDAGISel::Select(SDNode *Node) { EVT NVT = Node->getValueType(0); unsigned Opc, MOpc; @@ -2355,9 +2445,13 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) { break; } case ISD::STORE: { + // Change a chain of {load; incr or dec; store} of the same value into + // a simple increment or decrement through memory of that value, if the + // uses of the modified value and its address are suitable. // The DEC64m tablegen pattern is currently not able to match the case where - // the EFLAGS on the original DEC are used. - // we'll need to improve tablegen to allow flags to be transferred from a + // the EFLAGS on the original DEC are used. (This also applies to + // {INC,DEC}X{64,32,16,8}.) + // We'll need to improve tablegen to allow flags to be transferred from a // node in the pattern to the result node. probably with a new keyword // for example, we have this // def DEC64m : RI<0xFF, MRM1m, (outs), (ins i64mem:$dst), "dec{q}\t$dst", @@ -2367,44 +2461,17 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) { // def DEC64m : RI<0xFF, MRM1m, (outs), (ins i64mem:$dst), "dec{q}\t$dst", // [(store (add (loadi64 addr:$dst), -1), addr:$dst), // (transferrable EFLAGS)]>; + StoreSDNode *StoreNode = cast<StoreSDNode>(Node); - SDValue Chain = StoreNode->getOperand(0); SDValue StoredVal = StoreNode->getOperand(1); - SDValue Address = StoreNode->getOperand(2); - SDValue Undef = StoreNode->getOperand(3); - - if (StoreNode->getMemOperand()->getSize() != 8 || - Undef->getOpcode() != ISD::UNDEF || - Chain->getOpcode() != ISD::LOAD || - StoredVal->getOpcode() != X86ISD::DEC || - StoredVal.getResNo() != 0 || - !StoredVal.getNode()->hasNUsesOfValue(1, 0) || - !Chain.getNode()->hasNUsesOfValue(1, 0) || - StoredVal->getOperand(0).getNode() != Chain.getNode()) - break; + unsigned Opc = StoredVal->getOpcode(); - //OPC_CheckPredicate, 1, // Predicate_nontemporalstore - if (StoreNode->isNonTemporal()) + LoadSDNode *LoadNode = 0; + SDValue InputChain; + if (!isLoadIncOrDecStore(StoreNode, Opc, StoredVal, CurDAG, + LoadNode, InputChain)) break; - LoadSDNode *LoadNode = cast<LoadSDNode>(Chain.getNode()); - if (LoadNode->getOperand(1) != Address || - LoadNode->getOperand(2) != Undef) - break; - - if (!ISD::isNormalLoad(LoadNode)) - break; - - if (!ISD::isNormalStore(StoreNode)) - break; - - // check load chain has only one use (from the store) - if (!Chain.hasOneUse()) - break; - - // Merge the input chains if they are not intra-pattern references. - SDValue InputChain = LoadNode->getOperand(0); - SDValue Base, Scale, Index, Disp, Segment; if (!SelectAddr(LoadNode, LoadNode->getBasePtr(), Base, Scale, Index, Disp, Segment)) @@ -2414,7 +2481,9 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) { MemOp[0] = StoreNode->getMemOperand(); MemOp[1] = LoadNode->getMemOperand(); const SDValue Ops[] = { Base, Scale, Index, Disp, Segment, InputChain }; - MachineSDNode *Result = CurDAG->getMachineNode(X86::DEC64m, + EVT LdVT = LoadNode->getMemoryVT(); + unsigned newOpc = getFusedLdStOpcode(LdVT, Opc); + MachineSDNode *Result = CurDAG->getMachineNode(newOpc, Node->getDebugLoc(), MVT::i32, MVT::Other, Ops, array_lengthof(Ops)); @@ -2465,6 +2534,6 @@ SelectInlineAsmMemoryOperand(const SDValue &Op, char ConstraintCode, /// X86-specific DAG, ready for instruction scheduling. /// FunctionPass *llvm::createX86ISelDag(X86TargetMachine &TM, - llvm::CodeGenOpt::Level OptLevel) { + CodeGenOpt::Level OptLevel) { return new X86DAGToDAGISel(TM, OptLevel); } diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 88f3829..04299f3 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -1578,18 +1578,20 @@ X86TargetLowering::LowerReturn(SDValue Chain, MVT::Other, &RetOps[0], RetOps.size()); } -bool X86TargetLowering::isUsedByReturnOnly(SDNode *N) const { +bool X86TargetLowering::isUsedByReturnOnly(SDNode *N, SDValue &Chain) const { if (N->getNumValues() != 1) return false; if (!N->hasNUsesOfValue(1, 0)) return false; + SDValue TCChain = Chain; SDNode *Copy = *N->use_begin(); if (Copy->getOpcode() == ISD::CopyToReg) { // If the copy has a glue operand, we conservatively assume it isn't safe to // perform a tail call. if (Copy->getOperand(Copy->getNumOperands()-1).getValueType() == MVT::Glue) return false; + TCChain = Copy->getOperand(0); } else if (Copy->getOpcode() != ISD::FP_EXTEND) return false; @@ -1601,7 +1603,11 @@ bool X86TargetLowering::isUsedByReturnOnly(SDNode *N) const { HasRet = true; } - return HasRet; + if (!HasRet) + return false; + + Chain = TCChain; + return true; } EVT @@ -2929,6 +2935,7 @@ static SDValue getTargetShuffleNode(unsigned Opc, DebugLoc dl, EVT VT, case X86ISD::PSHUFHW: case X86ISD::PSHUFLW: case X86ISD::VPERMILP: + case X86ISD::VPERMI: return DAG.getNode(Opc, dl, VT, V1, DAG.getConstant(TargetMask, MVT::i8)); } } @@ -3970,6 +3977,27 @@ unsigned X86::getInsertVINSERTF128Immediate(SDNode *N) { return Index / NumElemsPerChunk; } +/// getShuffleCLImmediate - Return the appropriate immediate to shuffle +/// the specified VECTOR_SHUFFLE mask with VPERMQ and VPERMPD instructions. +/// Handles 256-bit. +static unsigned getShuffleCLImmediate(ShuffleVectorSDNode *N) { + EVT VT = N->getValueType(0); + + unsigned NumElts = VT.getVectorNumElements(); + + assert((VT.is256BitVector() && NumElts == 4) && + "Unsupported vector type for VPERMQ/VPERMPD"); + + unsigned Mask = 0; + for (unsigned i = 0; i != NumElts; ++i) { + int Elt = N->getMaskElt(i); + if (Elt < 0) + continue; + Mask |= Elt << (i*2); + } + + return Mask; +} /// isZeroNode - Returns true if Elt is a constant zero or a floating point /// constant +0.0. bool X86::isZeroNode(SDValue Elt) { @@ -4402,6 +4430,7 @@ static bool getTargetShuffleMask(SDNode *N, EVT VT, case X86ISD::VPERM2X128: ImmN = N->getOperand(N->getNumOperands()-1); DecodeVPERM2X128Mask(VT, cast<ConstantSDNode>(ImmN)->getZExtValue(), Mask); + if (Mask.empty()) return false; break; case X86ISD::MOVDDUP: case X86ISD::MOVLHPD: @@ -4852,41 +4881,42 @@ static SDValue EltsFromConsecutiveLoads(EVT VT, SmallVectorImpl<SDValue> &Elts, return SDValue(); } -/// isVectorBroadcast - Check if the node chain is suitable to be xformed to -/// a vbroadcast node. We support two patterns: -/// 1. A splat BUILD_VECTOR which uses a single scalar load. +/// LowerVectorBroadcast - Attempt to use the vbroadcast instruction +/// to generate a splat value for the following cases: +/// 1. A splat BUILD_VECTOR which uses a single scalar load, or a constant. /// 2. A splat shuffle which uses a scalar_to_vector node which comes from -/// a scalar load. -/// The scalar load node is returned when a pattern is found, +/// a scalar load, or a constant. +/// The VBROADCAST node is returned when a pattern is found, /// or SDValue() otherwise. -static SDValue isVectorBroadcast(SDValue &Op, const X86Subtarget *Subtarget) { +SDValue +X86TargetLowering::LowerVectorBroadcast(SDValue &Op, SelectionDAG &DAG) const { if (!Subtarget->hasAVX()) return SDValue(); EVT VT = Op.getValueType(); - SDValue V = Op; - - if (V.hasOneUse() && V.getOpcode() == ISD::BITCAST) - V = V.getOperand(0); + DebugLoc dl = Op.getDebugLoc(); - //A suspected load to be broadcasted. SDValue Ld; + bool ConstSplatVal; - switch (V.getOpcode()) { + switch (Op.getOpcode()) { default: // Unknown pattern found. return SDValue(); case ISD::BUILD_VECTOR: { // The BUILD_VECTOR node must be a splat. - if (!isSplatVector(V.getNode())) + if (!isSplatVector(Op.getNode())) return SDValue(); - Ld = V.getOperand(0); + Ld = Op.getOperand(0); + ConstSplatVal = (Ld.getOpcode() == ISD::Constant || + Ld.getOpcode() == ISD::ConstantFP); // The suspected load node has several users. Make sure that all // of its users are from the BUILD_VECTOR node. - if (!Ld->hasNUsesOfValue(VT.getVectorNumElements(), 0)) + // Constants may have multiple users. + if (!ConstSplatVal && !Ld->hasNUsesOfValue(VT.getVectorNumElements(), 0)) return SDValue(); break; } @@ -4904,15 +4934,50 @@ static SDValue isVectorBroadcast(SDValue &Op, const X86Subtarget *Subtarget) { return SDValue(); Ld = Sc.getOperand(0); + ConstSplatVal = (Ld.getOpcode() == ISD::Constant || + Ld.getOpcode() == ISD::ConstantFP); // The scalar_to_vector node and the suspected // load node must have exactly one user. - if (!Sc.hasOneUse() || !Ld.hasOneUse()) + // Constants may have multiple users. + if (!ConstSplatVal && (!Sc.hasOneUse() || !Ld.hasOneUse())) return SDValue(); break; } } + bool Is256 = VT.getSizeInBits() == 256; + bool Is128 = VT.getSizeInBits() == 128; + + // Handle the broadcasting a single constant scalar from the constant pool + // into a vector. On Sandybridge it is still better to load a constant vector + // from the constant pool and not to broadcast it from a scalar. + if (ConstSplatVal && Subtarget->hasAVX2()) { + EVT CVT = Ld.getValueType(); + assert(!CVT.isVector() && "Must not broadcast a vector type"); + unsigned ScalarSize = CVT.getSizeInBits(); + + if ((Is256 && (ScalarSize == 32 || ScalarSize == 64)) || + (Is128 && (ScalarSize == 32))) { + + const Constant *C = 0; + if (ConstantSDNode *CI = dyn_cast<ConstantSDNode>(Ld)) + C = CI->getConstantIntValue(); + else if (ConstantFPSDNode *CF = dyn_cast<ConstantFPSDNode>(Ld)) + C = CF->getConstantFPValue(); + + assert(C && "Invalid constant type"); + + SDValue CP = DAG.getConstantPool(C, getPointerTy()); + unsigned Alignment = cast<ConstantPoolSDNode>(CP)->getAlignment(); + Ld = DAG.getLoad(CVT, dl, DAG.getEntryNode(), CP, + MachinePointerInfo::getConstantPool(), + false, false, false, Alignment); + + return DAG.getNode(X86ISD::VBROADCAST, dl, VT, Ld); + } + } + // The scalar source must be a normal load. if (!ISD::isNormalLoad(Ld.getNode())) return SDValue(); @@ -4921,28 +4986,26 @@ static SDValue isVectorBroadcast(SDValue &Op, const X86Subtarget *Subtarget) { if (Ld->hasAnyUseOfValue(1)) return SDValue(); - bool Is256 = VT.getSizeInBits() == 256; - bool Is128 = VT.getSizeInBits() == 128; unsigned ScalarSize = Ld.getValueType().getSizeInBits(); // VBroadcast to YMM if (Is256 && (ScalarSize == 32 || ScalarSize == 64)) - return Ld; + return DAG.getNode(X86ISD::VBROADCAST, dl, VT, Ld); // VBroadcast to XMM if (Is128 && (ScalarSize == 32)) - return Ld; + return DAG.getNode(X86ISD::VBROADCAST, dl, VT, Ld); // The integer check is needed for the 64-bit into 128-bit so it doesn't match // double since there is vbroadcastsd xmm if (Subtarget->hasAVX2() && Ld.getValueType().isInteger()) { // VBroadcast to YMM if (Is256 && (ScalarSize == 8 || ScalarSize == 16)) - return Ld; + return DAG.getNode(X86ISD::VBROADCAST, dl, VT, Ld); // VBroadcast to XMM if (Is128 && (ScalarSize == 8 || ScalarSize == 16 || ScalarSize == 64)) - return Ld; + return DAG.getNode(X86ISD::VBROADCAST, dl, VT, Ld); } // Unsupported broadcast. @@ -4977,9 +5040,9 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const { return getOnesVector(VT, Subtarget->hasAVX2(), DAG, dl); } - SDValue LD = isVectorBroadcast(Op, Subtarget); - if (LD.getNode()) - return DAG.getNode(X86ISD::VBROADCAST, dl, VT, LD); + SDValue Broadcast = LowerVectorBroadcast(Op, DAG); + if (Broadcast.getNode()) + return Broadcast; unsigned EVTBits = ExtVT.getSizeInBits(); @@ -5343,6 +5406,85 @@ X86TargetLowering::LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const { return LowerAVXCONCAT_VECTORS(Op, DAG); } +// Try to lower a shuffle node into a simple blend instruction. +static SDValue LowerVECTOR_SHUFFLEtoBlend(SDValue Op, + const X86Subtarget *Subtarget, + SelectionDAG &DAG) { + ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op); + SDValue V1 = SVOp->getOperand(0); + SDValue V2 = SVOp->getOperand(1); + DebugLoc dl = SVOp->getDebugLoc(); + EVT VT = Op.getValueType(); + EVT InVT = V1.getValueType(); + int MaskSize = VT.getVectorNumElements(); + int InSize = InVT.getVectorNumElements(); + + if (!Subtarget->hasSSE41()) + return SDValue(); + + if (MaskSize != InSize) + return SDValue(); + + int ISDNo = 0; + MVT OpTy; + + switch (VT.getSimpleVT().SimpleTy) { + default: return SDValue(); + case MVT::v8i16: + ISDNo = X86ISD::BLENDPW; + OpTy = MVT::v8i16; + break; + case MVT::v4i32: + case MVT::v4f32: + ISDNo = X86ISD::BLENDPS; + OpTy = MVT::v4f32; + break; + case MVT::v2i64: + case MVT::v2f64: + ISDNo = X86ISD::BLENDPD; + OpTy = MVT::v2f64; + break; + case MVT::v8i32: + case MVT::v8f32: + if (!Subtarget->hasAVX()) + return SDValue(); + ISDNo = X86ISD::BLENDPS; + OpTy = MVT::v8f32; + break; + case MVT::v4i64: + case MVT::v4f64: + if (!Subtarget->hasAVX()) + return SDValue(); + ISDNo = X86ISD::BLENDPD; + OpTy = MVT::v4f64; + break; + case MVT::v16i16: + if (!Subtarget->hasAVX2()) + return SDValue(); + ISDNo = X86ISD::BLENDPW; + OpTy = MVT::v16i16; + break; + } + assert(ISDNo && "Invalid Op Number"); + + unsigned MaskVals = 0; + + for (int i = 0; i < MaskSize; ++i) { + int EltIdx = SVOp->getMaskElt(i); + if (EltIdx == i || EltIdx == -1) + MaskVals |= (1<<i); + else if (EltIdx == (i + MaskSize)) + continue; // Bit is set to zero; + else return SDValue(); + } + + V1 = DAG.getNode(ISD::BITCAST, dl, OpTy, V1); + V2 = DAG.getNode(ISD::BITCAST, dl, OpTy, V2); + SDValue Ret = DAG.getNode(ISDNo, dl, OpTy, V1, V2, + DAG.getConstant(MaskVals, MVT::i32)); + return DAG.getNode(ISD::BITCAST, dl, VT, Ret); +} + // v8i16 shuffles - Prefer shuffles in the following order: // 1. [all] pshuflw, pshufhw, optional move // 2. [ssse3] 1 x pshufb @@ -5836,96 +5978,79 @@ LowerVECTOR_SHUFFLE_256(ShuffleVectorSDNode *SVOp, SelectionDAG &DAG) { unsigned NumElems = VT.getVectorNumElements(); unsigned NumLaneElems = NumElems / 2; - int MinRange[2][2] = { { static_cast<int>(NumElems), - static_cast<int>(NumElems) }, - { static_cast<int>(NumElems), - static_cast<int>(NumElems) } }; - int MaxRange[2][2] = { { -1, -1 }, { -1, -1 } }; + DebugLoc dl = SVOp->getDebugLoc(); + MVT EltVT = VT.getVectorElementType().getSimpleVT(); + EVT NVT = MVT::getVectorVT(EltVT, NumLaneElems); + SDValue Shufs[2]; - // Collect used ranges for each source in each lane + SmallVector<int, 16> Mask; for (unsigned l = 0; l < 2; ++l) { - unsigned LaneStart = l*NumLaneElems; + // Build a shuffle mask for the output, discovering on the fly which + // input vectors to use as shuffle operands (recorded in InputUsed). + // If building a suitable shuffle vector proves too hard, then bail + // out with useBuildVector set. + int InputUsed[2] = { -1, -1 }; // Not yet discovered. + unsigned LaneStart = l * NumLaneElems; for (unsigned i = 0; i != NumLaneElems; ++i) { + // The mask element. This indexes into the input. int Idx = SVOp->getMaskElt(i+LaneStart); - if (Idx < 0) + if (Idx < 0) { + // the mask element does not index into any input vector. + Mask.push_back(-1); continue; - - int Input = 0; - if (Idx >= (int)NumElems) { - Idx -= NumElems; - Input = 1; } - if (Idx > MaxRange[l][Input]) - MaxRange[l][Input] = Idx; - if (Idx < MinRange[l][Input]) - MinRange[l][Input] = Idx; - } - } + // The input vector this mask element indexes into. + int Input = Idx / NumLaneElems; - // Make sure each range is 128-bits - int ExtractIdx[2][2] = { { -1, -1 }, { -1, -1 } }; - for (unsigned l = 0; l < 2; ++l) { - for (unsigned Input = 0; Input < 2; ++Input) { - if (MinRange[l][Input] == (int)NumElems && MaxRange[l][Input] < 0) - continue; + // Turn the index into an offset from the start of the input vector. + Idx -= Input * NumLaneElems; - if (MinRange[l][Input] >= 0 && MaxRange[l][Input] < (int)NumLaneElems) - ExtractIdx[l][Input] = 0; - else if (MinRange[l][Input] >= (int)NumLaneElems && - MaxRange[l][Input] < (int)NumElems) - ExtractIdx[l][Input] = NumLaneElems; - else - return SDValue(); - } - } + // Find or create a shuffle vector operand to hold this input. + unsigned OpNo; + for (OpNo = 0; OpNo < array_lengthof(InputUsed); ++OpNo) { + if (InputUsed[OpNo] == Input) + // This input vector is already an operand. + break; + if (InputUsed[OpNo] < 0) { + // Create a new operand for this input vector. + InputUsed[OpNo] = Input; + break; + } + } - DebugLoc dl = SVOp->getDebugLoc(); - MVT EltVT = VT.getVectorElementType().getSimpleVT(); - EVT NVT = MVT::getVectorVT(EltVT, NumElems/2); + if (OpNo >= array_lengthof(InputUsed)) { + // More than two input vectors used! Give up. + return SDValue(); + } - SDValue Ops[2][2]; - for (unsigned l = 0; l < 2; ++l) { - for (unsigned Input = 0; Input < 2; ++Input) { - if (ExtractIdx[l][Input] >= 0) - Ops[l][Input] = Extract128BitVector(SVOp->getOperand(Input), - DAG.getConstant(ExtractIdx[l][Input], MVT::i32), - DAG, dl); - else - Ops[l][Input] = DAG.getUNDEF(NVT); + // Add the mask index for the new shuffle vector. + Mask.push_back(Idx + OpNo * NumLaneElems); } - } - // Generate 128-bit shuffles - SmallVector<int, 16> Mask1, Mask2; - for (unsigned i = 0; i != NumLaneElems; ++i) { - int Elt = SVOp->getMaskElt(i); - if (Elt >= (int)NumElems) { - Elt %= NumLaneElems; - Elt += NumLaneElems; - } else if (Elt >= 0) { - Elt %= NumLaneElems; - } - Mask1.push_back(Elt); - } - for (unsigned i = NumLaneElems; i != NumElems; ++i) { - int Elt = SVOp->getMaskElt(i); - if (Elt >= (int)NumElems) { - Elt %= NumLaneElems; - Elt += NumLaneElems; - } else if (Elt >= 0) { - Elt %= NumLaneElems; + if (InputUsed[0] < 0) { + // No input vectors were used! The result is undefined. + Shufs[l] = DAG.getUNDEF(NVT); + } else { + SDValue Op0 = Extract128BitVector(SVOp->getOperand(InputUsed[0] / 2), + DAG.getConstant((InputUsed[0] % 2) * NumLaneElems, MVT::i32), + DAG, dl); + // If only one input was used, use an undefined vector for the other. + SDValue Op1 = (InputUsed[1] < 0) ? DAG.getUNDEF(NVT) : + Extract128BitVector(SVOp->getOperand(InputUsed[1] / 2), + DAG.getConstant((InputUsed[1] % 2) * NumLaneElems, MVT::i32), + DAG, dl); + // At least one input vector was used. Create a new shuffle vector. + Shufs[l] = DAG.getVectorShuffle(NVT, dl, Op0, Op1, &Mask[0]); } - Mask2.push_back(Elt); - } - SDValue Shuf1 = DAG.getVectorShuffle(NVT, dl, Ops[0][0], Ops[0][1], &Mask1[0]); - SDValue Shuf2 = DAG.getVectorShuffle(NVT, dl, Ops[1][0], Ops[1][1], &Mask2[0]); + Mask.clear(); + } // Concatenate the result back - SDValue V = Insert128BitVector(DAG.getNode(ISD::UNDEF, dl, VT), Shuf1, + SDValue V = Insert128BitVector(DAG.getNode(ISD::UNDEF, dl, VT), Shufs[0], DAG.getConstant(0, MVT::i32), DAG, dl); - return Insert128BitVector(V, Shuf2, DAG.getConstant(NumElems/2, MVT::i32), + return Insert128BitVector(V, Shufs[1],DAG.getConstant(NumLaneElems, MVT::i32), DAG, dl); } @@ -6203,10 +6328,8 @@ SDValue getMOVLP(SDValue &Op, DebugLoc &dl, SelectionDAG &DAG, bool HasSSE2) { getShuffleSHUFImmediate(SVOp), DAG); } -static -SDValue NormalizeVectorShuffle(SDValue Op, SelectionDAG &DAG, - const TargetLowering &TLI, - const X86Subtarget *Subtarget) { +SDValue +X86TargetLowering::NormalizeVectorShuffle(SDValue Op, SelectionDAG &DAG) const { ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op); EVT VT = Op.getValueType(); DebugLoc dl = Op.getDebugLoc(); @@ -6222,9 +6345,9 @@ SDValue NormalizeVectorShuffle(SDValue Op, SelectionDAG &DAG, int Size = VT.getSizeInBits(); // Use vbroadcast whenever the splat comes from a foldable load - SDValue LD = isVectorBroadcast(Op, Subtarget); - if (LD.getNode()) - return DAG.getNode(X86ISD::VBROADCAST, dl, VT, LD); + SDValue Broadcast = LowerVectorBroadcast(Op, DAG); + if (Broadcast.getNode()) + return Broadcast; // Handle splats by matching through known shuffle masks if ((Size == 128 && NumElem <= 4) || @@ -6309,7 +6432,7 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const { // Normalize the input vectors. Here splats, zeroed vectors, profitable // narrowing and commutation of operands should be handled. The actual code // doesn't include all of those, work in progress... - SDValue NewOp = NormalizeVectorShuffle(Op, DAG, *this, Subtarget); + SDValue NewOp = NormalizeVectorShuffle(Op, DAG); if (NewOp.getNode()) return NewOp; @@ -6524,6 +6647,27 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const { return getTargetShuffleNode(X86ISD::VPERM2X128, dl, VT, V1, V2, getShuffleVPERM2X128Immediate(SVOp), DAG); + SDValue BlendOp = LowerVECTOR_SHUFFLEtoBlend(Op, Subtarget, DAG); + if (BlendOp.getNode()) + return BlendOp; + + if (V2IsUndef && HasAVX2 && (VT == MVT::v8i32 || VT == MVT::v8f32)) { + SmallVector<SDValue, 8> permclMask; + for (unsigned i = 0; i != 8; ++i) { + permclMask.push_back(DAG.getConstant((M[i]>=0) ? M[i] : 0, MVT::i32)); + } + SDValue Mask = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v8i32, + &permclMask[0], 8); + // Bitcast is for VPERMPS since mask is v8i32 but node takes v8f32 + return DAG.getNode(X86ISD::VPERMV, dl, VT, + DAG.getNode(ISD::BITCAST, dl, VT, Mask), V1); + } + + if (V2IsUndef && HasAVX2 && (VT == MVT::v4i64 || VT == MVT::v4f64)) + return getTargetShuffleNode(X86ISD::VPERMI, dl, VT, V1, + getShuffleCLImmediate(SVOp), DAG); + + //===--------------------------------------------------------------------===// // Since no target specific shuffle was selected for this generic one, // lower it into other known shuffles. FIXME: this isn't true yet, but @@ -7182,8 +7326,7 @@ X86TargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const { if (const GlobalAlias *GA = dyn_cast<GlobalAlias>(GV)) GV = GA->resolveAliasedGlobal(false); - TLSModel::Model model - = getTLSModel(GV, getTargetMachine().getRelocationModel()); + TLSModel::Model model = getTargetMachine().getTLSModel(GV); switch (model) { case TLSModel::GeneralDynamic: @@ -8099,8 +8242,8 @@ SDValue X86TargetLowering::LowerToBT(SDValue And, ISD::CondCode CC, unsigned BitWidth = Op0.getValueSizeInBits(); unsigned AndBitWidth = And.getValueSizeInBits(); if (BitWidth > AndBitWidth) { - APInt Mask = APInt::getAllOnesValue(BitWidth), Zeros, Ones; - DAG.ComputeMaskedBits(Op0, Mask, Zeros, Ones); + APInt Zeros, Ones; + DAG.ComputeMaskedBits(Op0, Zeros, Ones); if (Zeros.countLeadingOnes() < BitWidth - AndBitWidth) return SDValue(); } @@ -9449,12 +9592,12 @@ X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const case Intrinsic::x86_avx2_vperm2i128: return DAG.getNode(X86ISD::VPERM2X128, dl, Op.getValueType(), Op.getOperand(1), Op.getOperand(2), Op.getOperand(3)); - case Intrinsic::x86_avx_vpermil_ps: - case Intrinsic::x86_avx_vpermil_pd: - case Intrinsic::x86_avx_vpermil_ps_256: - case Intrinsic::x86_avx_vpermil_pd_256: - return DAG.getNode(X86ISD::VPERMILP, dl, Op.getValueType(), - Op.getOperand(1), Op.getOperand(2)); + case Intrinsic::x86_avx2_permd: + case Intrinsic::x86_avx2_permps: + // Operands intentionally swapped. Mask is last operand to intrinsic, + // but second operand for node/intruction. + return DAG.getNode(X86ISD::VPERMV, dl, Op.getValueType(), + Op.getOperand(2), Op.getOperand(1)); // ptest and testp intrinsics. The intrinsic these come from are designed to // return an integer value, not just an instruction so lower it to the ptest @@ -10963,6 +11106,9 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const { case X86ISD::ANDNP: return "X86ISD::ANDNP"; case X86ISD::PSIGN: return "X86ISD::PSIGN"; case X86ISD::BLENDV: return "X86ISD::BLENDV"; + case X86ISD::BLENDPW: return "X86ISD::BLENDPW"; + case X86ISD::BLENDPS: return "X86ISD::BLENDPS"; + case X86ISD::BLENDPD: return "X86ISD::BLENDPD"; case X86ISD::HADD: return "X86ISD::HADD"; case X86ISD::HSUB: return "X86ISD::HSUB"; case X86ISD::FHADD: return "X86ISD::FHADD"; @@ -11035,6 +11181,8 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const { case X86ISD::VBROADCAST: return "X86ISD::VBROADCAST"; case X86ISD::VPERMILP: return "X86ISD::VPERMILP"; case X86ISD::VPERM2X128: return "X86ISD::VPERM2X128"; + case X86ISD::VPERMV: return "X86ISD::VPERMV"; + case X86ISD::VPERMI: return "X86ISD::VPERMI"; case X86ISD::PMULUDQ: return "X86ISD::PMULUDQ"; case X86ISD::VASTART_SAVE_XMM_REGS: return "X86ISD::VASTART_SAVE_XMM_REGS"; case X86ISD::VAARG_64: return "X86ISD::VAARG_64"; @@ -11192,14 +11340,15 @@ X86TargetLowering::EmitAtomicBitwiseWithCustomInserter(MachineInstr *bInstr, unsigned notOpc, unsigned EAXreg, const TargetRegisterClass *RC, - bool invSrc) const { + bool Invert) const { // For the atomic bitwise operator, we generate // thisMBB: // newMBB: // ld t1 = [bitinstr.addr] // op t2 = t1, [bitinstr.val] + // not t3 = t2 (if Invert) // mov EAX = t1 - // lcs dest = [bitinstr.addr], t2 [EAX is implicit] + // lcs dest = [bitinstr.addr], t3 [EAX is implicit] // bz newMBB // fallthrough -->nextMBB const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); @@ -11247,13 +11396,6 @@ X86TargetLowering::EmitAtomicBitwiseWithCustomInserter(MachineInstr *bInstr, for (int i=0; i <= lastAddrIndx; ++i) (*MIB).addOperand(*argOpers[i]); - unsigned tt = F->getRegInfo().createVirtualRegister(RC); - if (invSrc) { - MIB = BuildMI(newMBB, dl, TII->get(notOpc), tt).addReg(t1); - } - else - tt = t1; - unsigned t2 = F->getRegInfo().createVirtualRegister(RC); assert((argOpers[valArgIndx]->isReg() || argOpers[valArgIndx]->isImm()) && @@ -11262,16 +11404,23 @@ X86TargetLowering::EmitAtomicBitwiseWithCustomInserter(MachineInstr *bInstr, MIB = BuildMI(newMBB, dl, TII->get(regOpc), t2); else MIB = BuildMI(newMBB, dl, TII->get(immOpc), t2); - MIB.addReg(tt); + MIB.addReg(t1); (*MIB).addOperand(*argOpers[valArgIndx]); + unsigned t3 = F->getRegInfo().createVirtualRegister(RC); + if (Invert) { + MIB = BuildMI(newMBB, dl, TII->get(notOpc), t3).addReg(t2); + } + else + t3 = t2; + MIB = BuildMI(newMBB, dl, TII->get(TargetOpcode::COPY), EAXreg); MIB.addReg(t1); MIB = BuildMI(newMBB, dl, TII->get(CXchgOpc)); for (int i=0; i <= lastAddrIndx; ++i) (*MIB).addOperand(*argOpers[i]); - MIB.addReg(t2); + MIB.addReg(t3); assert(bInstr->hasOneMemOperand() && "Unexpected number of memoperand"); (*MIB).setMemRefs(bInstr->memoperands_begin(), bInstr->memoperands_end()); @@ -11294,7 +11443,7 @@ X86TargetLowering::EmitAtomicBit6432WithCustomInserter(MachineInstr *bInstr, unsigned regOpcH, unsigned immOpcL, unsigned immOpcH, - bool invSrc) const { + bool Invert) const { // For the atomic bitwise operator, we generate // thisMBB (instructions are in pairs, except cmpxchg8b) // ld t1,t2 = [bitinstr.addr] @@ -11302,6 +11451,7 @@ X86TargetLowering::EmitAtomicBit6432WithCustomInserter(MachineInstr *bInstr, // out1, out2 = phi (thisMBB, t1/t2) (newMBB, t3/t4) // op t5, t6 <- out1, out2, [bitinstr.val] // (for SWAP, substitute: mov t5, t6 <- [bitinstr.val]) + // neg t7, t8 < t5, t6 (if Invert) // mov ECX, EBX <- t5, t6 // mov EAX, EDX <- t1, t2 // cmpxchg8b [bitinstr.addr] [EAX, EDX, EBX, ECX implicit] @@ -11385,16 +11535,9 @@ X86TargetLowering::EmitAtomicBit6432WithCustomInserter(MachineInstr *bInstr, .addReg(t2).addMBB(thisMBB).addReg(t4).addMBB(newMBB); // The subsequent operations should be using the destination registers of - //the PHI instructions. - if (invSrc) { - t1 = F->getRegInfo().createVirtualRegister(RC); - t2 = F->getRegInfo().createVirtualRegister(RC); - MIB = BuildMI(newMBB, dl, TII->get(NotOpc), t1).addReg(dest1Oper.getReg()); - MIB = BuildMI(newMBB, dl, TII->get(NotOpc), t2).addReg(dest2Oper.getReg()); - } else { - t1 = dest1Oper.getReg(); - t2 = dest2Oper.getReg(); - } + // the PHI instructions. + t1 = dest1Oper.getReg(); + t2 = dest2Oper.getReg(); int valArgIndx = lastAddrIndx + 1; assert((argOpers[valArgIndx]->isReg() || @@ -11421,15 +11564,26 @@ X86TargetLowering::EmitAtomicBit6432WithCustomInserter(MachineInstr *bInstr, MIB.addReg(t2); (*MIB).addOperand(*argOpers[valArgIndx + 1]); + unsigned t7, t8; + if (Invert) { + t7 = F->getRegInfo().createVirtualRegister(RC); + t8 = F->getRegInfo().createVirtualRegister(RC); + MIB = BuildMI(newMBB, dl, TII->get(NotOpc), t7).addReg(t5); + MIB = BuildMI(newMBB, dl, TII->get(NotOpc), t8).addReg(t6); + } else { + t7 = t5; + t8 = t6; + } + MIB = BuildMI(newMBB, dl, TII->get(TargetOpcode::COPY), X86::EAX); MIB.addReg(t1); MIB = BuildMI(newMBB, dl, TII->get(TargetOpcode::COPY), X86::EDX); MIB.addReg(t2); MIB = BuildMI(newMBB, dl, TII->get(TargetOpcode::COPY), X86::EBX); - MIB.addReg(t5); + MIB.addReg(t7); MIB = BuildMI(newMBB, dl, TII->get(TargetOpcode::COPY), X86::ECX); - MIB.addReg(t6); + MIB.addReg(t8); MIB = BuildMI(newMBB, dl, TII->get(X86::LCMPXCHG8B)); for (int i=0; i <= lastAddrIndx; ++i) @@ -12620,11 +12774,11 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, //===----------------------------------------------------------------------===// void X86TargetLowering::computeMaskedBitsForTargetNode(const SDValue Op, - const APInt &Mask, APInt &KnownZero, APInt &KnownOne, const SelectionDAG &DAG, unsigned Depth) const { + unsigned BitWidth = KnownZero.getBitWidth(); unsigned Opc = Op.getOpcode(); assert((Opc >= ISD::BUILTIN_OP_END || Opc == ISD::INTRINSIC_WO_CHAIN || @@ -12633,7 +12787,7 @@ void X86TargetLowering::computeMaskedBitsForTargetNode(const SDValue Op, "Should use MaskedValueIsZero if you don't know whether Op" " is a target node!"); - KnownZero = KnownOne = APInt(Mask.getBitWidth(), 0); // Don't know anything. + KnownZero = KnownOne = APInt(BitWidth, 0); // Don't know anything. switch (Opc) { default: break; case X86ISD::ADD: @@ -12652,8 +12806,7 @@ void X86TargetLowering::computeMaskedBitsForTargetNode(const SDValue Op, break; // Fallthrough case X86ISD::SETCC: - KnownZero |= APInt::getHighBitsSet(Mask.getBitWidth(), - Mask.getBitWidth() - 1); + KnownZero |= APInt::getHighBitsSet(BitWidth, BitWidth - 1); break; case ISD::INTRINSIC_WO_CHAIN: { unsigned IntId = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); @@ -12678,8 +12831,7 @@ void X86TargetLowering::computeMaskedBitsForTargetNode(const SDValue Op, case Intrinsic::x86_sse2_pmovmskb_128: NumLoBits = 16; break; case Intrinsic::x86_avx2_pmovmskb: NumLoBits = 32; break; } - KnownZero = APInt::getHighBitsSet(Mask.getBitWidth(), - Mask.getBitWidth() - NumLoBits); + KnownZero = APInt::getHighBitsSet(BitWidth, BitWidth - NumLoBits); break; } } @@ -14000,13 +14152,14 @@ static SDValue PerformOrCombine(SDNode *N, SelectionDAG &DAG, return SDValue(); // Validate that X, Y, and Mask are BIT_CONVERTS, and see through them. - if (Mask.getOpcode() != ISD::BITCAST || - X.getOpcode() != ISD::BITCAST || - Y.getOpcode() != ISD::BITCAST) - return SDValue(); - // Look through mask bitcast. - Mask = Mask.getOperand(0); + if (Mask.getOpcode() == ISD::BITCAST) + Mask = Mask.getOperand(0); + if (X.getOpcode() == ISD::BITCAST) + X = X.getOperand(0); + if (Y.getOpcode() == ISD::BITCAST) + Y = Y.getOperand(0); + EVT MaskVT = Mask.getValueType(); // Validate that the Mask operand is a vector sra node. @@ -14027,8 +14180,6 @@ static SDValue PerformOrCombine(SDNode *N, SelectionDAG &DAG, // Now we know we at least have a plendvb with the mask val. See if // we can form a psignb/w/d. // psign = x.type == y.type == mask.type && y = sub(0, x); - X = X.getOperand(0); - Y = Y.getOperand(0); if (Y.getOpcode() == ISD::SUB && Y.getOperand(1) == X && ISD::isBuildVectorAllZeros(Y.getOperand(0).getNode()) && X.getValueType() == MaskVT && Y.getValueType() == MaskVT) { diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h index 0327b1f..09116e8 100644 --- a/lib/Target/X86/X86ISelLowering.h +++ b/lib/Target/X86/X86ISelLowering.h @@ -175,9 +175,14 @@ namespace llvm { /// PSIGN - Copy integer sign. PSIGN, - /// BLEND family of opcodes + /// BLENDV - Blend where the selector is an XMM. BLENDV, + /// BLENDxx - Blend where the selector is an immediate. + BLENDPW, + BLENDPS, + BLENDPD, + /// HADD - Integer horizontal add. HADD, @@ -280,6 +285,8 @@ namespace llvm { UNPCKL, UNPCKH, VPERMILP, + VPERMV, + VPERMI, VPERM2X128, VBROADCAST, @@ -504,7 +511,6 @@ namespace llvm { /// in Mask are known to be either zero or one and return them in the /// KnownZero/KnownOne bitsets. virtual void computeMaskedBitsForTargetNode(const SDValue Op, - const APInt &Mask, APInt &KnownZero, APInt &KnownOne, const SelectionDAG &DAG, @@ -781,6 +787,8 @@ namespace llvm { // Utility functions to help LowerVECTOR_SHUFFLE SDValue LowerVECTOR_SHUFFLEv8i16(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerVectorBroadcast(SDValue &Op, SelectionDAG &DAG) const; + SDValue NormalizeVectorShuffle(SDValue Op, SelectionDAG &DAG) const; virtual SDValue LowerFormalArguments(SDValue Chain, @@ -804,7 +812,7 @@ namespace llvm { const SmallVectorImpl<SDValue> &OutVals, DebugLoc dl, SelectionDAG &DAG) const; - virtual bool isUsedByReturnOnly(SDNode *N) const; + virtual bool isUsedByReturnOnly(SDNode *N, SDValue &Chain) const; virtual bool mayBeEmittedAsTailCall(CallInst *CI) const; @@ -849,7 +857,7 @@ namespace llvm { unsigned notOpc, unsigned EAXreg, const TargetRegisterClass *RC, - bool invSrc = false) const; + bool Invert = false) const; MachineBasicBlock *EmitAtomicBit6432WithCustomInserter( MachineInstr *BInstr, @@ -858,7 +866,7 @@ namespace llvm { unsigned regOpcH, unsigned immOpcL, unsigned immOpcH, - bool invSrc = false) const; + bool Invert = false) const; /// Utility function to emit atomic min and max. It takes the min/max /// instruction to expand, the associated basic block, and the associated diff --git a/lib/Target/X86/X86InstrArithmetic.td b/lib/Target/X86/X86InstrArithmetic.td index 7fa7499..0eee083 100644 --- a/lib/Target/X86/X86InstrArithmetic.td +++ b/lib/Target/X86/X86InstrArithmetic.td @@ -53,7 +53,7 @@ def MUL8r : I<0xF6, MRM4r, (outs), (ins GR8:$src), "mul{b}\t$src", // This probably ought to be moved to a def : Pat<> if the // syntax can be accepted. [(set AL, (mul AL, GR8:$src)), - (implicit EFLAGS)]>; // AL,AH = AL*GR8 + (implicit EFLAGS)], IIC_MUL8>; // AL,AH = AL*GR8 let Defs = [AX,DX,EFLAGS], Uses = [AX], neverHasSideEffects = 1 in def MUL16r : I<0xF7, MRM4r, (outs), (ins GR16:$src), @@ -97,31 +97,32 @@ def MUL64m : RI<0xF7, MRM4m, (outs), (ins i64mem:$src), let neverHasSideEffects = 1 in { let Defs = [AL,EFLAGS,AX], Uses = [AL] in -def IMUL8r : I<0xF6, MRM5r, (outs), (ins GR8:$src), "imul{b}\t$src", []>; - // AL,AH = AL*GR8 +def IMUL8r : I<0xF6, MRM5r, (outs), (ins GR8:$src), "imul{b}\t$src", [], + IIC_IMUL8>; // AL,AH = AL*GR8 let Defs = [AX,DX,EFLAGS], Uses = [AX] in -def IMUL16r : I<0xF7, MRM5r, (outs), (ins GR16:$src), "imul{w}\t$src", []>, - OpSize; // AX,DX = AX*GR16 +def IMUL16r : I<0xF7, MRM5r, (outs), (ins GR16:$src), "imul{w}\t$src", [], + IIC_IMUL16_RR>, OpSize; // AX,DX = AX*GR16 let Defs = [EAX,EDX,EFLAGS], Uses = [EAX] in -def IMUL32r : I<0xF7, MRM5r, (outs), (ins GR32:$src), "imul{l}\t$src", []>; - // EAX,EDX = EAX*GR32 +def IMUL32r : I<0xF7, MRM5r, (outs), (ins GR32:$src), "imul{l}\t$src", [], + IIC_IMUL32_RR>; // EAX,EDX = EAX*GR32 let Defs = [RAX,RDX,EFLAGS], Uses = [RAX] in -def IMUL64r : RI<0xF7, MRM5r, (outs), (ins GR64:$src), "imul{q}\t$src", []>; - // RAX,RDX = RAX*GR64 +def IMUL64r : RI<0xF7, MRM5r, (outs), (ins GR64:$src), "imul{q}\t$src", [], + IIC_IMUL64_RR>; // RAX,RDX = RAX*GR64 let mayLoad = 1 in { let Defs = [AL,EFLAGS,AX], Uses = [AL] in def IMUL8m : I<0xF6, MRM5m, (outs), (ins i8mem :$src), - "imul{b}\t$src", []>; // AL,AH = AL*[mem8] + "imul{b}\t$src", [], IIC_IMUL8>; // AL,AH = AL*[mem8] let Defs = [AX,DX,EFLAGS], Uses = [AX] in def IMUL16m : I<0xF7, MRM5m, (outs), (ins i16mem:$src), - "imul{w}\t$src", []>, OpSize; // AX,DX = AX*[mem16] + "imul{w}\t$src", [], IIC_IMUL16_MEM>, OpSize; + // AX,DX = AX*[mem16] let Defs = [EAX,EDX,EFLAGS], Uses = [EAX] in def IMUL32m : I<0xF7, MRM5m, (outs), (ins i32mem:$src), - "imul{l}\t$src", []>; // EAX,EDX = EAX*[mem32] + "imul{l}\t$src", [], IIC_IMUL32_MEM>; // EAX,EDX = EAX*[mem32] let Defs = [RAX,RDX,EFLAGS], Uses = [RAX] in def IMUL64m : RI<0xF7, MRM5m, (outs), (ins i64mem:$src), - "imul{q}\t$src", []>; // RAX,RDX = RAX*[mem64] + "imul{q}\t$src", [], IIC_IMUL64>; // RAX,RDX = RAX*[mem64] } } // neverHasSideEffects @@ -639,10 +640,11 @@ class ITy<bits<8> opcode, Format f, X86TypeInfo typeinfo, dag outs, dag ins, // BinOpRR - Instructions like "add reg, reg, reg". class BinOpRR<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo, - dag outlist, list<dag> pattern, Format f = MRMDestReg> + dag outlist, list<dag> pattern, InstrItinClass itin, + Format f = MRMDestReg> : ITy<opcode, f, typeinfo, outlist, (ins typeinfo.RegClass:$src1, typeinfo.RegClass:$src2), - mnemonic, "{$src2, $src1|$src1, $src2}", pattern>; + mnemonic, "{$src2, $src1|$src1, $src2}", pattern, itin>; // BinOpRR_R - Instructions like "add reg, reg, reg", where the pattern has // just a regclass (no eflags) as a result. @@ -650,7 +652,8 @@ class BinOpRR_R<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo, SDNode opnode> : BinOpRR<opcode, mnemonic, typeinfo, (outs typeinfo.RegClass:$dst), [(set typeinfo.RegClass:$dst, - (opnode typeinfo.RegClass:$src1, typeinfo.RegClass:$src2))]>; + (opnode typeinfo.RegClass:$src1, typeinfo.RegClass:$src2))], + IIC_BIN_NONMEM>; // BinOpRR_F - Instructions like "cmp reg, Reg", where the pattern has // just a EFLAGS as a result. @@ -659,7 +662,7 @@ class BinOpRR_F<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo, : BinOpRR<opcode, mnemonic, typeinfo, (outs), [(set EFLAGS, (opnode typeinfo.RegClass:$src1, typeinfo.RegClass:$src2))], - f>; + IIC_BIN_NONMEM, f>; // BinOpRR_RF - Instructions like "add reg, reg, reg", where the pattern has // both a regclass and EFLAGS as a result. @@ -667,7 +670,8 @@ class BinOpRR_RF<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo, SDNode opnode> : BinOpRR<opcode, mnemonic, typeinfo, (outs typeinfo.RegClass:$dst), [(set typeinfo.RegClass:$dst, EFLAGS, - (opnode typeinfo.RegClass:$src1, typeinfo.RegClass:$src2))]>; + (opnode typeinfo.RegClass:$src1, typeinfo.RegClass:$src2))], + IIC_BIN_NONMEM>; // BinOpRR_RFF - Instructions like "adc reg, reg, reg", where the pattern has // both a regclass and EFLAGS as a result, and has EFLAGS as input. @@ -676,14 +680,14 @@ class BinOpRR_RFF<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo, : BinOpRR<opcode, mnemonic, typeinfo, (outs typeinfo.RegClass:$dst), [(set typeinfo.RegClass:$dst, EFLAGS, (opnode typeinfo.RegClass:$src1, typeinfo.RegClass:$src2, - EFLAGS))]>; + EFLAGS))], IIC_BIN_NONMEM>; // BinOpRR_Rev - Instructions like "add reg, reg, reg" (reversed encoding). class BinOpRR_Rev<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo> : ITy<opcode, MRMSrcReg, typeinfo, (outs typeinfo.RegClass:$dst), (ins typeinfo.RegClass:$src1, typeinfo.RegClass:$src2), - mnemonic, "{$src2, $dst|$dst, $src2}", []> { + mnemonic, "{$src2, $dst|$dst, $src2}", [], IIC_BIN_NONMEM> { // The disassembler should know about this, but not the asmparser. let isCodeGenOnly = 1; } @@ -692,7 +696,7 @@ class BinOpRR_Rev<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo> class BinOpRR_F_Rev<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo> : ITy<opcode, MRMSrcReg, typeinfo, (outs), (ins typeinfo.RegClass:$src1, typeinfo.RegClass:$src2), - mnemonic, "{$src2, $src1|$src1, $src2}", []> { + mnemonic, "{$src2, $src1|$src1, $src2}", [], IIC_BIN_NONMEM> { // The disassembler should know about this, but not the asmparser. let isCodeGenOnly = 1; } @@ -702,7 +706,7 @@ class BinOpRM<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo, dag outlist, list<dag> pattern> : ITy<opcode, MRMSrcMem, typeinfo, outlist, (ins typeinfo.RegClass:$src1, typeinfo.MemOperand:$src2), - mnemonic, "{$src2, $src1|$src1, $src2}", pattern, IIC_BIN_MEM>; + mnemonic, "{$src2, $src1|$src1, $src2}", pattern, IIC_BIN_NONMEM>; // BinOpRM_R - Instructions like "add reg, reg, [mem]". class BinOpRM_R<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo, @@ -738,7 +742,7 @@ class BinOpRI<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo, Format f, dag outlist, list<dag> pattern> : ITy<opcode, f, typeinfo, outlist, (ins typeinfo.RegClass:$src1, typeinfo.ImmOperand:$src2), - mnemonic, "{$src2, $src1|$src1, $src2}", pattern> { + mnemonic, "{$src2, $src1|$src1, $src2}", pattern, IIC_BIN_NONMEM> { let ImmT = typeinfo.ImmEncoding; } @@ -762,7 +766,6 @@ class BinOpRI_RF<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo, : BinOpRI<opcode, mnemonic, typeinfo, f, (outs typeinfo.RegClass:$dst), [(set typeinfo.RegClass:$dst, EFLAGS, (opnode typeinfo.RegClass:$src1, typeinfo.ImmOperator:$src2))]>; - // BinOpRI_RFF - Instructions like "adc reg, reg, imm". class BinOpRI_RFF<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo, SDNode opnode, Format f> @@ -776,7 +779,7 @@ class BinOpRI8<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo, Format f, dag outlist, list<dag> pattern> : ITy<opcode, f, typeinfo, outlist, (ins typeinfo.RegClass:$src1, typeinfo.Imm8Operand:$src2), - mnemonic, "{$src2, $src1|$src1, $src2}", pattern> { + mnemonic, "{$src2, $src1|$src1, $src2}", pattern, IIC_BIN_NONMEM> { let ImmT = Imm8; // Always 8-bit immediate. } @@ -853,7 +856,6 @@ class BinOpMI_RMW<string mnemonic, X86TypeInfo typeinfo, [(store (opnode (typeinfo.VT (load addr:$dst)), typeinfo.ImmOperator:$src), addr:$dst), (implicit EFLAGS)]>; - // BinOpMI_RMW_FF - Instructions like "adc [mem], imm". class BinOpMI_RMW_FF<string mnemonic, X86TypeInfo typeinfo, SDNode opnode, Format f> @@ -1219,12 +1221,12 @@ let neverHasSideEffects = 1 in { let isCommutable = 1 in def rr : I<0xF6, MRMSrcReg, (outs RC:$dst1, RC:$dst2), (ins RC:$src), !strconcat(mnemonic, "\t{$src, $dst2, $dst1|$dst1, $dst2, $src}"), - []>, T8XD, VEX_4V; + [], IIC_MUL8>, T8XD, VEX_4V; let mayLoad = 1 in def rm : I<0xF6, MRMSrcMem, (outs RC:$dst1, RC:$dst2), (ins x86memop:$src), !strconcat(mnemonic, "\t{$src, $dst2, $dst1|$dst1, $dst2, $src}"), - []>, T8XD, VEX_4V; + [], IIC_MUL8>, T8XD, VEX_4V; } } diff --git a/lib/Target/X86/X86InstrCompiler.td b/lib/Target/X86/X86InstrCompiler.td index 42a5014..6f9e849 100644 --- a/lib/Target/X86/X86InstrCompiler.td +++ b/lib/Target/X86/X86InstrCompiler.td @@ -301,34 +301,67 @@ def : Pat<(sub GR64:$op, (i64 (X86setcc_c X86_COND_B, EFLAGS))), // String Pseudo Instructions // let Defs = [ECX,EDI,ESI], Uses = [ECX,EDI,ESI], isCodeGenOnly = 1 in { -def REP_MOVSB : I<0xA4, RawFrm, (outs), (ins), "{rep;movsb|rep movsb}", - [(X86rep_movs i8)], IIC_REP_MOVS>, REP; -def REP_MOVSW : I<0xA5, RawFrm, (outs), (ins), "{rep;movsw|rep movsw}", - [(X86rep_movs i16)], IIC_REP_MOVS>, REP, OpSize; -def REP_MOVSD : I<0xA5, RawFrm, (outs), (ins), "{rep;movsl|rep movsd}", - [(X86rep_movs i32)], IIC_REP_MOVS>, REP; +def REP_MOVSB_32 : I<0xA4, RawFrm, (outs), (ins), "{rep;movsb|rep movsb}", + [(X86rep_movs i8)], IIC_REP_MOVS>, REP, + Requires<[In32BitMode]>; +def REP_MOVSW_32 : I<0xA5, RawFrm, (outs), (ins), "{rep;movsw|rep movsw}", + [(X86rep_movs i16)], IIC_REP_MOVS>, REP, OpSize, + Requires<[In32BitMode]>; +def REP_MOVSD_32 : I<0xA5, RawFrm, (outs), (ins), "{rep;movsl|rep movsd}", + [(X86rep_movs i32)], IIC_REP_MOVS>, REP, + Requires<[In32BitMode]>; } -let Defs = [RCX,RDI,RSI], Uses = [RCX,RDI,RSI], isCodeGenOnly = 1 in -def REP_MOVSQ : RI<0xA5, RawFrm, (outs), (ins), "{rep;movsq|rep movsq}", - [(X86rep_movs i64)], IIC_REP_MOVS>, REP; - +let Defs = [RCX,RDI,RSI], Uses = [RCX,RDI,RSI], isCodeGenOnly = 1 in { +def REP_MOVSB_64 : I<0xA4, RawFrm, (outs), (ins), "{rep;movsb|rep movsb}", + [(X86rep_movs i8)], IIC_REP_MOVS>, REP, + Requires<[In64BitMode]>; +def REP_MOVSW_64 : I<0xA5, RawFrm, (outs), (ins), "{rep;movsw|rep movsw}", + [(X86rep_movs i16)], IIC_REP_MOVS>, REP, OpSize, + Requires<[In64BitMode]>; +def REP_MOVSD_64 : I<0xA5, RawFrm, (outs), (ins), "{rep;movsl|rep movsd}", + [(X86rep_movs i32)], IIC_REP_MOVS>, REP, + Requires<[In64BitMode]>; +def REP_MOVSQ_64 : RI<0xA5, RawFrm, (outs), (ins), "{rep;movsq|rep movsq}", + [(X86rep_movs i64)], IIC_REP_MOVS>, REP, + Requires<[In64BitMode]>; +} // FIXME: Should use "(X86rep_stos AL)" as the pattern. -let Defs = [ECX,EDI], Uses = [AL,ECX,EDI], isCodeGenOnly = 1 in -def REP_STOSB : I<0xAA, RawFrm, (outs), (ins), "{rep;stosb|rep stosb}", - [(X86rep_stos i8)], IIC_REP_STOS>, REP; -let Defs = [ECX,EDI], Uses = [AX,ECX,EDI], isCodeGenOnly = 1 in -def REP_STOSW : I<0xAB, RawFrm, (outs), (ins), "{rep;stosw|rep stosw}", - [(X86rep_stos i16)], IIC_REP_STOS>, REP, OpSize; -let Defs = [ECX,EDI], Uses = [EAX,ECX,EDI], isCodeGenOnly = 1 in -def REP_STOSD : I<0xAB, RawFrm, (outs), (ins), "{rep;stosl|rep stosd}", - [(X86rep_stos i32)], IIC_REP_STOS>, REP; - -let Defs = [RCX,RDI], Uses = [RAX,RCX,RDI], isCodeGenOnly = 1 in -def REP_STOSQ : RI<0xAB, RawFrm, (outs), (ins), "{rep;stosq|rep stosq}", - [(X86rep_stos i64)], IIC_REP_STOS>, REP; +let Defs = [ECX,EDI], isCodeGenOnly = 1 in { + let Uses = [AL,ECX,EDI] in + def REP_STOSB_32 : I<0xAA, RawFrm, (outs), (ins), "{rep;stosb|rep stosb}", + [(X86rep_stos i8)], IIC_REP_STOS>, REP, + Requires<[In32BitMode]>; + let Uses = [AX,ECX,EDI] in + def REP_STOSW_32 : I<0xAB, RawFrm, (outs), (ins), "{rep;stosw|rep stosw}", + [(X86rep_stos i16)], IIC_REP_STOS>, REP, OpSize, + Requires<[In32BitMode]>; + let Uses = [EAX,ECX,EDI] in + def REP_STOSD_32 : I<0xAB, RawFrm, (outs), (ins), "{rep;stosl|rep stosd}", + [(X86rep_stos i32)], IIC_REP_STOS>, REP, + Requires<[In32BitMode]>; +} +let Defs = [RCX,RDI], isCodeGenOnly = 1 in { + let Uses = [AL,RCX,RDI] in + def REP_STOSB_64 : I<0xAA, RawFrm, (outs), (ins), "{rep;stosb|rep stosb}", + [(X86rep_stos i8)], IIC_REP_STOS>, REP, + Requires<[In64BitMode]>; + let Uses = [AX,RCX,RDI] in + def REP_STOSW_64 : I<0xAB, RawFrm, (outs), (ins), "{rep;stosw|rep stosw}", + [(X86rep_stos i16)], IIC_REP_STOS>, REP, OpSize, + Requires<[In64BitMode]>; + let Uses = [RAX,RCX,RDI] in + def REP_STOSD_64 : I<0xAB, RawFrm, (outs), (ins), "{rep;stosl|rep stosd}", + [(X86rep_stos i32)], IIC_REP_STOS>, REP, + Requires<[In64BitMode]>; + + let Uses = [RAX,RCX,RDI] in + def REP_STOSQ_64 : RI<0xAB, RawFrm, (outs), (ins), "{rep;stosq|rep stosq}", + [(X86rep_stos i64)], IIC_REP_STOS>, REP, + Requires<[In64BitMode]>; +} //===----------------------------------------------------------------------===// // Thread Local Storage Instructions @@ -1134,12 +1167,10 @@ def or_is_add : PatFrag<(ops node:$lhs, node:$rhs), (or node:$lhs, node:$rhs),[{ if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N->getOperand(1))) return CurDAG->MaskedValueIsZero(N->getOperand(0), CN->getAPIntValue()); - unsigned BitWidth = N->getValueType(0).getScalarType().getSizeInBits(); - APInt Mask = APInt::getAllOnesValue(BitWidth); APInt KnownZero0, KnownOne0; - CurDAG->ComputeMaskedBits(N->getOperand(0), Mask, KnownZero0, KnownOne0, 0); + CurDAG->ComputeMaskedBits(N->getOperand(0), KnownZero0, KnownOne0, 0); APInt KnownZero1, KnownOne1; - CurDAG->ComputeMaskedBits(N->getOperand(1), Mask, KnownZero1, KnownOne1, 0); + CurDAG->ComputeMaskedBits(N->getOperand(1), KnownZero1, KnownOne1, 0); return (~KnownZero0 & ~KnownZero1) == 0; }]>; diff --git a/lib/Target/X86/X86InstrControl.td b/lib/Target/X86/X86InstrControl.td index ba86098..bf11fde 100644 --- a/lib/Target/X86/X86InstrControl.td +++ b/lib/Target/X86/X86InstrControl.td @@ -21,20 +21,25 @@ let isTerminator = 1, isReturn = 1, isBarrier = 1, def RET : I <0xC3, RawFrm, (outs), (ins variable_ops), "ret", [(X86retflag 0)], IIC_RET>; + def RETW : I <0xC3, RawFrm, (outs), (ins variable_ops), + "ret{w}", + [], IIC_RET>, OpSize; def RETI : Ii16<0xC2, RawFrm, (outs), (ins i16imm:$amt, variable_ops), "ret\t$amt", [(X86retflag timm:$amt)], IIC_RET_IMM>; def RETIW : Ii16<0xC2, RawFrm, (outs), (ins i16imm:$amt, variable_ops), - "retw\t$amt", + "ret{w}\t$amt", [], IIC_RET_IMM>, OpSize; def LRETL : I <0xCB, RawFrm, (outs), (ins), - "lretl", [], IIC_RET>; + "{l}ret{l|f}", [], IIC_RET>; + def LRETW : I <0xCB, RawFrm, (outs), (ins), + "{l}ret{w|f}", [], IIC_RET>, OpSize; def LRETQ : RI <0xCB, RawFrm, (outs), (ins), - "lretq", [], IIC_RET>; + "{l}ret{q|f}", [], IIC_RET>; def LRETI : Ii16<0xCA, RawFrm, (outs), (ins i16imm:$amt), - "lret\t$amt", [], IIC_RET>; + "{l}ret{l|f}\t$amt", [], IIC_RET>; def LRETIW : Ii16<0xCA, RawFrm, (outs), (ins i16imm:$amt), - "lretw\t$amt", [], IIC_RET>, OpSize; + "{l}ret{w|f}\t$amt", [], IIC_RET>, OpSize; } // Unconditional branches. diff --git a/lib/Target/X86/X86InstrFragmentsSIMD.td b/lib/Target/X86/X86InstrFragmentsSIMD.td index ae3ed1b..35801e4 100644 --- a/lib/Target/X86/X86InstrFragmentsSIMD.td +++ b/lib/Target/X86/X86InstrFragmentsSIMD.td @@ -126,6 +126,8 @@ def SDTShuff3OpI : SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0,1>, SDTCisSameAs<0,2>, SDTCisInt<3>]>; def SDTVBroadcast : SDTypeProfile<1, 1, [SDTCisVec<0>]>; +def SDTBlend : SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0,1>, +SDTCisSameAs<1,2>, SDTCisVT<3, i32>]>; def X86PAlign : SDNode<"X86ISD::PALIGN", SDTShuff3OpI>; @@ -153,11 +155,17 @@ def X86Unpckl : SDNode<"X86ISD::UNPCKL", SDTShuff2Op>; def X86Unpckh : SDNode<"X86ISD::UNPCKH", SDTShuff2Op>; def X86VPermilp : SDNode<"X86ISD::VPERMILP", SDTShuff2OpI>; +def X86VPermv : SDNode<"X86ISD::VPERMV", SDTShuff2Op>; +def X86VPermi : SDNode<"X86ISD::VPERMI", SDTShuff2OpI>; def X86VPerm2x128 : SDNode<"X86ISD::VPERM2X128", SDTShuff3OpI>; def X86VBroadcast : SDNode<"X86ISD::VBROADCAST", SDTVBroadcast>; +def X86Blendpw : SDNode<"X86ISD::BLENDPW", SDTBlend>; +def X86Blendps : SDNode<"X86ISD::BLENDPS", SDTBlend>; +def X86Blendpd : SDNode<"X86ISD::BLENDPD", SDTBlend>; + //===----------------------------------------------------------------------===// // SSE Complex Patterns //===----------------------------------------------------------------------===// diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp index 307c96b..b12c1db 100644 --- a/lib/Target/X86/X86InstrInfo.cpp +++ b/lib/Target/X86/X86InstrInfo.cpp @@ -1049,9 +1049,9 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm) { X86::VPCMPGTWYrr, X86::VPCMPGTWYrm, TB_ALIGN_32 }, { X86::VPERM2I128rr, X86::VPERM2I128rm, TB_ALIGN_32 }, { X86::VPERMDYrr, X86::VPERMDYrm, TB_ALIGN_32 }, - { X86::VPERMPDYrr, X86::VPERMPDYrm, TB_ALIGN_32 }, + { X86::VPERMPDYri, X86::VPERMPDYmi, TB_ALIGN_32 }, { X86::VPERMPSYrr, X86::VPERMPSYrm, TB_ALIGN_32 }, - { X86::VPERMQYrr, X86::VPERMQYrm, TB_ALIGN_32 }, + { X86::VPERMQYri, X86::VPERMQYmi, TB_ALIGN_32 }, { X86::VPHADDDYrr, X86::VPHADDDYrm, TB_ALIGN_32 }, { X86::VPHADDSWrr256, X86::VPHADDSWrm256, TB_ALIGN_32 }, { X86::VPHADDWYrr, X86::VPHADDWYrm, TB_ALIGN_32 }, diff --git a/lib/Target/X86/X86InstrInfo.td b/lib/Target/X86/X86InstrInfo.td index dd7cf50..6a25312 100644 --- a/lib/Target/X86/X86InstrInfo.td +++ b/lib/Target/X86/X86InstrInfo.td @@ -374,6 +374,11 @@ def SSECC : Operand<i8> { let OperandType = "OPERAND_IMMEDIATE"; } +def AVXCC : Operand<i8> { + let PrintMethod = "printSSECC"; + let OperandType = "OPERAND_IMMEDIATE"; +} + class ImmSExtAsmOperandClass : AsmOperandClass { let SuperClasses = [ImmAsmOperand]; let RenderMethod = "addImmOperands"; diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index df42627..65e3c1e 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -2162,15 +2162,15 @@ def : Pat<(v4f64 (fextend (loadv4f32 addr:$src))), // sse12_cmp_scalar - sse 1 & 2 compare scalar instructions multiclass sse12_cmp_scalar<RegisterClass RC, X86MemOperand x86memop, - SDNode OpNode, ValueType VT, PatFrag ld_frag, - string asm, string asm_alt, + Operand CC, SDNode OpNode, ValueType VT, + PatFrag ld_frag, string asm, string asm_alt, OpndItins itins> { def rr : SIi8<0xC2, MRMSrcReg, - (outs RC:$dst), (ins RC:$src1, RC:$src2, SSECC:$cc), asm, + (outs RC:$dst), (ins RC:$src1, RC:$src2, CC:$cc), asm, [(set RC:$dst, (OpNode (VT RC:$src1), RC:$src2, imm:$cc))], itins.rr>; def rm : SIi8<0xC2, MRMSrcMem, - (outs RC:$dst), (ins RC:$src1, x86memop:$src2, SSECC:$cc), asm, + (outs RC:$dst), (ins RC:$src1, x86memop:$src2, CC:$cc), asm, [(set RC:$dst, (OpNode (VT RC:$src1), (ld_frag addr:$src2), imm:$cc))], itins.rm>; @@ -2187,57 +2187,57 @@ multiclass sse12_cmp_scalar<RegisterClass RC, X86MemOperand x86memop, } } -defm VCMPSS : sse12_cmp_scalar<FR32, f32mem, X86cmpss, f32, loadf32, +defm VCMPSS : sse12_cmp_scalar<FR32, f32mem, AVXCC, X86cmpss, f32, loadf32, "cmp${cc}ss\t{$src2, $src1, $dst|$dst, $src1, $src2}", "cmpss\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}", SSE_ALU_F32S>, XS, VEX_4V, VEX_LIG; -defm VCMPSD : sse12_cmp_scalar<FR64, f64mem, X86cmpsd, f64, loadf64, +defm VCMPSD : sse12_cmp_scalar<FR64, f64mem, AVXCC, X86cmpsd, f64, loadf64, "cmp${cc}sd\t{$src2, $src1, $dst|$dst, $src1, $src2}", "cmpsd\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}", SSE_ALU_F32S>, // same latency as 32 bit compare XD, VEX_4V, VEX_LIG; let Constraints = "$src1 = $dst" in { - defm CMPSS : sse12_cmp_scalar<FR32, f32mem, X86cmpss, f32, loadf32, + defm CMPSS : sse12_cmp_scalar<FR32, f32mem, SSECC, X86cmpss, f32, loadf32, "cmp${cc}ss\t{$src2, $dst|$dst, $src2}", "cmpss\t{$cc, $src2, $dst|$dst, $src2, $cc}", SSE_ALU_F32S>, XS; - defm CMPSD : sse12_cmp_scalar<FR64, f64mem, X86cmpsd, f64, loadf64, + defm CMPSD : sse12_cmp_scalar<FR64, f64mem, SSECC, X86cmpsd, f64, loadf64, "cmp${cc}sd\t{$src2, $dst|$dst, $src2}", "cmpsd\t{$cc, $src2, $dst|$dst, $src2, $cc}", SSE_ALU_F32S>, // same latency as 32 bit compare XD; } -multiclass sse12_cmp_scalar_int<RegisterClass RC, X86MemOperand x86memop, +multiclass sse12_cmp_scalar_int<X86MemOperand x86memop, Operand CC, Intrinsic Int, string asm, OpndItins itins> { def rr : SIi8<0xC2, MRMSrcReg, (outs VR128:$dst), - (ins VR128:$src1, VR128:$src, SSECC:$cc), asm, + (ins VR128:$src1, VR128:$src, CC:$cc), asm, [(set VR128:$dst, (Int VR128:$src1, VR128:$src, imm:$cc))], itins.rr>; def rm : SIi8<0xC2, MRMSrcMem, (outs VR128:$dst), - (ins VR128:$src1, x86memop:$src, SSECC:$cc), asm, + (ins VR128:$src1, x86memop:$src, CC:$cc), asm, [(set VR128:$dst, (Int VR128:$src1, (load addr:$src), imm:$cc))], itins.rm>; } // Aliases to match intrinsics which expect XMM operand(s). -defm Int_VCMPSS : sse12_cmp_scalar_int<VR128, f32mem, int_x86_sse_cmp_ss, +defm Int_VCMPSS : sse12_cmp_scalar_int<f32mem, AVXCC, int_x86_sse_cmp_ss, "cmp${cc}ss\t{$src, $src1, $dst|$dst, $src1, $src}", SSE_ALU_F32S>, XS, VEX_4V; -defm Int_VCMPSD : sse12_cmp_scalar_int<VR128, f64mem, int_x86_sse2_cmp_sd, +defm Int_VCMPSD : sse12_cmp_scalar_int<f64mem, AVXCC, int_x86_sse2_cmp_sd, "cmp${cc}sd\t{$src, $src1, $dst|$dst, $src1, $src}", SSE_ALU_F32S>, // same latency as f32 XD, VEX_4V; let Constraints = "$src1 = $dst" in { - defm Int_CMPSS : sse12_cmp_scalar_int<VR128, f32mem, int_x86_sse_cmp_ss, + defm Int_CMPSS : sse12_cmp_scalar_int<f32mem, SSECC, int_x86_sse_cmp_ss, "cmp${cc}ss\t{$src, $dst|$dst, $src}", SSE_ALU_F32S>, XS; - defm Int_CMPSD : sse12_cmp_scalar_int<VR128, f64mem, int_x86_sse2_cmp_sd, + defm Int_CMPSD : sse12_cmp_scalar_int<f64mem, SSECC, int_x86_sse2_cmp_sd, "cmp${cc}sd\t{$src, $dst|$dst, $src}", SSE_ALU_F32S>, // same latency as f32 XD; @@ -2308,50 +2308,50 @@ let Defs = [EFLAGS] in { // sse12_cmp_packed - sse 1 & 2 compare packed instructions multiclass sse12_cmp_packed<RegisterClass RC, X86MemOperand x86memop, - Intrinsic Int, string asm, string asm_alt, - Domain d> { - let isAsmParserOnly = 1 in { - def rri : PIi8<0xC2, MRMSrcReg, - (outs RC:$dst), (ins RC:$src1, RC:$src2, SSECC:$cc), asm, - [(set RC:$dst, (Int RC:$src1, RC:$src2, imm:$cc))], - IIC_SSE_CMPP_RR, d>; - def rmi : PIi8<0xC2, MRMSrcMem, - (outs RC:$dst), (ins RC:$src1, x86memop:$src2, SSECC:$cc), asm, - [(set RC:$dst, (Int RC:$src1, (memop addr:$src2), imm:$cc))], - IIC_SSE_CMPP_RM, d>; - } + Operand CC, Intrinsic Int, string asm, + string asm_alt, Domain d> { + def rri : PIi8<0xC2, MRMSrcReg, + (outs RC:$dst), (ins RC:$src1, RC:$src2, CC:$cc), asm, + [(set RC:$dst, (Int RC:$src1, RC:$src2, imm:$cc))], + IIC_SSE_CMPP_RR, d>; + def rmi : PIi8<0xC2, MRMSrcMem, + (outs RC:$dst), (ins RC:$src1, x86memop:$src2, CC:$cc), asm, + [(set RC:$dst, (Int RC:$src1, (memop addr:$src2), imm:$cc))], + IIC_SSE_CMPP_RM, d>; // Accept explicit immediate argument form instead of comparison code. - def rri_alt : PIi8<0xC2, MRMSrcReg, - (outs RC:$dst), (ins RC:$src1, RC:$src2, i8imm:$cc), - asm_alt, [], IIC_SSE_CMPP_RR, d>; - def rmi_alt : PIi8<0xC2, MRMSrcMem, - (outs RC:$dst), (ins RC:$src1, x86memop:$src2, i8imm:$cc), - asm_alt, [], IIC_SSE_CMPP_RM, d>; + let neverHasSideEffects = 1 in { + def rri_alt : PIi8<0xC2, MRMSrcReg, + (outs RC:$dst), (ins RC:$src1, RC:$src2, i8imm:$cc), + asm_alt, [], IIC_SSE_CMPP_RR, d>; + def rmi_alt : PIi8<0xC2, MRMSrcMem, + (outs RC:$dst), (ins RC:$src1, x86memop:$src2, i8imm:$cc), + asm_alt, [], IIC_SSE_CMPP_RM, d>; + } } -defm VCMPPS : sse12_cmp_packed<VR128, f128mem, int_x86_sse_cmp_ps, +defm VCMPPS : sse12_cmp_packed<VR128, f128mem, AVXCC, int_x86_sse_cmp_ps, "cmp${cc}ps\t{$src2, $src1, $dst|$dst, $src1, $src2}", "cmpps\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}", SSEPackedSingle>, TB, VEX_4V; -defm VCMPPD : sse12_cmp_packed<VR128, f128mem, int_x86_sse2_cmp_pd, +defm VCMPPD : sse12_cmp_packed<VR128, f128mem, AVXCC, int_x86_sse2_cmp_pd, "cmp${cc}pd\t{$src2, $src1, $dst|$dst, $src1, $src2}", "cmppd\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}", SSEPackedDouble>, TB, OpSize, VEX_4V; -defm VCMPPSY : sse12_cmp_packed<VR256, f256mem, int_x86_avx_cmp_ps_256, +defm VCMPPSY : sse12_cmp_packed<VR256, f256mem, AVXCC, int_x86_avx_cmp_ps_256, "cmp${cc}ps\t{$src2, $src1, $dst|$dst, $src1, $src2}", "cmpps\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}", SSEPackedSingle>, TB, VEX_4V; -defm VCMPPDY : sse12_cmp_packed<VR256, f256mem, int_x86_avx_cmp_pd_256, +defm VCMPPDY : sse12_cmp_packed<VR256, f256mem, AVXCC, int_x86_avx_cmp_pd_256, "cmp${cc}pd\t{$src2, $src1, $dst|$dst, $src1, $src2}", "cmppd\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}", SSEPackedDouble>, TB, OpSize, VEX_4V; let Constraints = "$src1 = $dst" in { - defm CMPPS : sse12_cmp_packed<VR128, f128mem, int_x86_sse_cmp_ps, + defm CMPPS : sse12_cmp_packed<VR128, f128mem, SSECC, int_x86_sse_cmp_ps, "cmp${cc}ps\t{$src2, $dst|$dst, $src2}", "cmpps\t{$cc, $src2, $dst|$dst, $src2, $cc}", SSEPackedSingle>, TB; - defm CMPPD : sse12_cmp_packed<VR128, f128mem, int_x86_sse2_cmp_pd, + defm CMPPD : sse12_cmp_packed<VR128, f128mem, SSECC, int_x86_sse2_cmp_pd, "cmp${cc}pd\t{$src2, $dst|$dst, $src2}", "cmppd\t{$cc, $src2, $dst|$dst, $src2, $cc}", SSEPackedDouble>, TB, OpSize; @@ -6331,11 +6331,11 @@ def : Pat<(f64 (ftrunc FR64:$src)), let Defs = [EFLAGS], Predicates = [HasAVX] in { def VPTESTrr : SS48I<0x17, MRMSrcReg, (outs), (ins VR128:$src1, VR128:$src2), "vptest\t{$src2, $src1|$src1, $src2}", - [(set EFLAGS, (X86ptest VR128:$src1, (v4f32 VR128:$src2)))]>, + [(set EFLAGS, (X86ptest VR128:$src1, (v2i64 VR128:$src2)))]>, OpSize, VEX; def VPTESTrm : SS48I<0x17, MRMSrcMem, (outs), (ins VR128:$src1, f128mem:$src2), "vptest\t{$src2, $src1|$src1, $src2}", - [(set EFLAGS,(X86ptest VR128:$src1, (memopv4f32 addr:$src2)))]>, + [(set EFLAGS,(X86ptest VR128:$src1, (memopv2i64 addr:$src2)))]>, OpSize, VEX; def VPTESTYrr : SS48I<0x17, MRMSrcReg, (outs), (ins VR256:$src1, VR256:$src2), @@ -6351,11 +6351,11 @@ def VPTESTYrm : SS48I<0x17, MRMSrcMem, (outs), (ins VR256:$src1, i256mem:$src2), let Defs = [EFLAGS] in { def PTESTrr : SS48I<0x17, MRMSrcReg, (outs), (ins VR128:$src1, VR128:$src2), "ptest\t{$src2, $src1|$src1, $src2}", - [(set EFLAGS, (X86ptest VR128:$src1, (v4f32 VR128:$src2)))]>, + [(set EFLAGS, (X86ptest VR128:$src1, (v2i64 VR128:$src2)))]>, OpSize; def PTESTrm : SS48I<0x17, MRMSrcMem, (outs), (ins VR128:$src1, f128mem:$src2), "ptest\t{$src2, $src1|$src1, $src2}", - [(set EFLAGS, (X86ptest VR128:$src1, (memopv4f32 addr:$src2)))]>, + [(set EFLAGS, (X86ptest VR128:$src1, (memopv2i64 addr:$src2)))]>, OpSize; } @@ -6735,12 +6735,32 @@ let Predicates = [HasAVX] in { def : Pat<(v4f64 (vselect (v4i64 VR256:$mask), (v4f64 VR256:$src1), (v4f64 VR256:$src2))), (VBLENDVPDYrr VR256:$src2, VR256:$src1, VR256:$mask)>; + + def : Pat<(v8f32 (X86Blendps (v8f32 VR256:$src1), (v8f32 VR256:$src2), + (imm:$mask))), + (VBLENDPSYrri VR256:$src2, VR256:$src1, imm:$mask)>; + def : Pat<(v4f64 (X86Blendpd (v4f64 VR256:$src1), (v4f64 VR256:$src2), + (imm:$mask))), + (VBLENDPDYrri VR256:$src2, VR256:$src1, imm:$mask)>; + + def : Pat<(v8i16 (X86Blendpw (v8i16 VR128:$src1), (v8i16 VR128:$src2), + (imm:$mask))), + (VPBLENDWrri VR128:$src2, VR128:$src1, imm:$mask)>; + def : Pat<(v4f32 (X86Blendps (v4f32 VR128:$src1), (v4f32 VR128:$src2), + (imm:$mask))), + (VBLENDPSrri VR128:$src2, VR128:$src1, imm:$mask)>; + def : Pat<(v2f64 (X86Blendpd (v2f64 VR128:$src1), (v2f64 VR128:$src2), + (imm:$mask))), + (VBLENDPDrri VR128:$src2, VR128:$src1, imm:$mask)>; } let Predicates = [HasAVX2] in { def : Pat<(v32i8 (vselect (v32i8 VR256:$mask), (v32i8 VR256:$src1), (v32i8 VR256:$src2))), (VPBLENDVBYrr VR256:$src2, VR256:$src1, VR256:$mask)>; + def : Pat<(v16i16 (X86Blendpw (v16i16 VR256:$src1), (v16i16 VR256:$src2), + (imm:$mask))), + (VPBLENDWYrri VR256:$src2, VR256:$src1, imm:$mask)>; } /// SS41I_ternary_int - SSE 4.1 ternary operator @@ -6789,6 +6809,17 @@ let Predicates = [HasSSE41] in { def : Pat<(v2f64 (vselect (v2i64 XMM0), (v2f64 VR128:$src1), (v2f64 VR128:$src2))), (BLENDVPDrr0 VR128:$src2, VR128:$src1)>; + + def : Pat<(v8i16 (X86Blendpw (v8i16 VR128:$src1), (v8i16 VR128:$src2), + (imm:$mask))), + (PBLENDWrri VR128:$src2, VR128:$src1, imm:$mask)>; + def : Pat<(v4f32 (X86Blendps (v4f32 VR128:$src1), (v4f32 VR128:$src2), + (imm:$mask))), + (BLENDPSrri VR128:$src2, VR128:$src1, imm:$mask)>; + def : Pat<(v2f64 (X86Blendpd (v2f64 VR128:$src1), (v2f64 VR128:$src2), + (imm:$mask))), + (BLENDPDrri VR128:$src2, VR128:$src1, imm:$mask)>; + } let Predicates = [HasAVX] in @@ -7294,6 +7325,46 @@ def VINSERTF128rm : AVXAIi8<0x18, MRMSrcMem, (outs VR256:$dst), []>, VEX_4V; } +let Predicates = [HasAVX] in { +def : Pat<(vinsertf128_insert:$ins (v8f32 VR256:$src1), (v4f32 VR128:$src2), + (i32 imm)), + (VINSERTF128rr VR256:$src1, VR128:$src2, + (INSERT_get_vinsertf128_imm VR256:$ins))>; +def : Pat<(vinsertf128_insert:$ins (v4f64 VR256:$src1), (v2f64 VR128:$src2), + (i32 imm)), + (VINSERTF128rr VR256:$src1, VR128:$src2, + (INSERT_get_vinsertf128_imm VR256:$ins))>; +def : Pat<(vinsertf128_insert:$ins (v4i64 VR256:$src1), (v2i64 VR128:$src2), + (i32 imm)), + (VINSERTF128rr VR256:$src1, VR128:$src2, + (INSERT_get_vinsertf128_imm VR256:$ins))>; +def : Pat<(vinsertf128_insert:$ins (v8i32 VR256:$src1), (v4i32 VR128:$src2), + (i32 imm)), + (VINSERTF128rr VR256:$src1, VR128:$src2, + (INSERT_get_vinsertf128_imm VR256:$ins))>; +def : Pat<(vinsertf128_insert:$ins (v32i8 VR256:$src1), (v16i8 VR128:$src2), + (i32 imm)), + (VINSERTF128rr VR256:$src1, VR128:$src2, + (INSERT_get_vinsertf128_imm VR256:$ins))>; +def : Pat<(vinsertf128_insert:$ins (v16i16 VR256:$src1), (v8i16 VR128:$src2), + (i32 imm)), + (VINSERTF128rr VR256:$src1, VR128:$src2, + (INSERT_get_vinsertf128_imm VR256:$ins))>; + +def : Pat<(vinsertf128_insert:$ins (v8f32 VR256:$src1), (loadv4f32 addr:$src2), + (i32 imm)), + (VINSERTF128rm VR256:$src1, addr:$src2, + (INSERT_get_vinsertf128_imm VR256:$ins))>; +def : Pat<(vinsertf128_insert:$ins (v4f64 VR256:$src1), (loadv2f64 addr:$src2), + (i32 imm)), + (VINSERTF128rm VR256:$src1, addr:$src2, + (INSERT_get_vinsertf128_imm VR256:$ins))>; +def : Pat<(vinsertf128_insert:$ins (v4i64 VR256:$src1), (loadv2i64 addr:$src2), + (i32 imm)), + (VINSERTF128rm VR256:$src1, addr:$src2, + (INSERT_get_vinsertf128_imm VR256:$ins))>; +} + //===----------------------------------------------------------------------===// // VEXTRACTF128 - Extract packed floating-point values // @@ -7664,45 +7735,47 @@ def : Pat<(v4i32 (X86VBroadcast (loadi32 addr:$src))), // multiclass avx2_perm<bits<8> opc, string OpcodeStr, PatFrag mem_frag, - Intrinsic Int> { + ValueType OpVT> { def Yrr : AVX28I<opc, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src1, VR256:$src2), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), - [(set VR256:$dst, (Int VR256:$src1, VR256:$src2))]>, VEX_4V; + [(set VR256:$dst, + (OpVT (X86VPermv VR256:$src1, VR256:$src2)))]>, VEX_4V; def Yrm : AVX28I<opc, MRMSrcMem, (outs VR256:$dst), (ins VR256:$src1, i256mem:$src2), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), - [(set VR256:$dst, (Int VR256:$src1, - (bitconvert (mem_frag addr:$src2))))]>, + [(set VR256:$dst, + (OpVT (X86VPermv VR256:$src1, + (bitconvert (mem_frag addr:$src2)))))]>, VEX_4V; } -defm VPERMD : avx2_perm<0x36, "vpermd", memopv4i64, int_x86_avx2_permd>; +defm VPERMD : avx2_perm<0x36, "vpermd", memopv4i64, v8i32>; let ExeDomain = SSEPackedSingle in -defm VPERMPS : avx2_perm<0x16, "vpermps", memopv8f32, int_x86_avx2_permps>; +defm VPERMPS : avx2_perm<0x16, "vpermps", memopv8f32, v8f32>; multiclass avx2_perm_imm<bits<8> opc, string OpcodeStr, PatFrag mem_frag, - Intrinsic Int> { - def Yrr : AVX2AIi8<opc, MRMSrcReg, (outs VR256:$dst), + ValueType OpVT> { + def Yri : AVX2AIi8<opc, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src1, i8imm:$src2), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), - [(set VR256:$dst, (Int VR256:$src1, imm:$src2))]>, VEX; - def Yrm : AVX2AIi8<opc, MRMSrcMem, (outs VR256:$dst), + [(set VR256:$dst, + (OpVT (X86VPermi VR256:$src1, (i8 imm:$src2))))]>, VEX; + def Ymi : AVX2AIi8<opc, MRMSrcMem, (outs VR256:$dst), (ins i256mem:$src1, i8imm:$src2), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), - [(set VR256:$dst, (Int (mem_frag addr:$src1), imm:$src2))]>, - VEX; + [(set VR256:$dst, + (OpVT (X86VPermi (mem_frag addr:$src1), + (i8 imm:$src2))))]>, VEX; } -defm VPERMQ : avx2_perm_imm<0x00, "vpermq", memopv4i64, int_x86_avx2_permq>, - VEX_W; +defm VPERMQ : avx2_perm_imm<0x00, "vpermq", memopv4i64, v4i64>, VEX_W; let ExeDomain = SSEPackedDouble in -defm VPERMPD : avx2_perm_imm<0x01, "vpermpd", memopv4f64, int_x86_avx2_permpd>, - VEX_W; +defm VPERMPD : avx2_perm_imm<0x01, "vpermpd", memopv4f64, v4f64>, VEX_W; //===----------------------------------------------------------------------===// // VPERM2I128 - Permute Floating-Point Values in 128-bit chunks @@ -7743,18 +7816,17 @@ def : Pat<(v8i32 (X86VPerm2x128 VR256:$src1, (bc_v8i32 (memopv4i64 addr:$src2)), //===----------------------------------------------------------------------===// // VINSERTI128 - Insert packed integer values // +let neverHasSideEffects = 1 in { def VINSERTI128rr : AVX2AIi8<0x38, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src1, VR128:$src2, i8imm:$src3), "vinserti128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", - [(set VR256:$dst, - (int_x86_avx2_vinserti128 VR256:$src1, VR128:$src2, imm:$src3))]>, + []>, VEX_4V; def VINSERTI128rm : AVX2AIi8<0x38, MRMSrcMem, (outs VR256:$dst), (ins VR256:$src1, i128mem:$src2, i8imm:$src3), "vinserti128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", - [(set VR256:$dst, - (int_x86_avx2_vinserti128 VR256:$src1, (memopv2i64 addr:$src2), - imm:$src3))]>, VEX_4V; + []>, VEX_4V; +} let Predicates = [HasAVX2], AddedComplexity = 1 in { def : Pat<(vinsertf128_insert:$ins (v4i64 VR256:$src1), (v2i64 VR128:$src2), @@ -7775,47 +7847,6 @@ def : Pat<(vinsertf128_insert:$ins (v16i16 VR256:$src1), (v8i16 VR128:$src2), (INSERT_get_vinsertf128_imm VR256:$ins))>; } -// AVX1 patterns -let Predicates = [HasAVX] in { -def : Pat<(vinsertf128_insert:$ins (v8f32 VR256:$src1), (v4f32 VR128:$src2), - (i32 imm)), - (VINSERTF128rr VR256:$src1, VR128:$src2, - (INSERT_get_vinsertf128_imm VR256:$ins))>; -def : Pat<(vinsertf128_insert:$ins (v4f64 VR256:$src1), (v2f64 VR128:$src2), - (i32 imm)), - (VINSERTF128rr VR256:$src1, VR128:$src2, - (INSERT_get_vinsertf128_imm VR256:$ins))>; -def : Pat<(vinsertf128_insert:$ins (v4i64 VR256:$src1), (v2i64 VR128:$src2), - (i32 imm)), - (VINSERTF128rr VR256:$src1, VR128:$src2, - (INSERT_get_vinsertf128_imm VR256:$ins))>; -def : Pat<(vinsertf128_insert:$ins (v8i32 VR256:$src1), (v4i32 VR128:$src2), - (i32 imm)), - (VINSERTF128rr VR256:$src1, VR128:$src2, - (INSERT_get_vinsertf128_imm VR256:$ins))>; -def : Pat<(vinsertf128_insert:$ins (v32i8 VR256:$src1), (v16i8 VR128:$src2), - (i32 imm)), - (VINSERTF128rr VR256:$src1, VR128:$src2, - (INSERT_get_vinsertf128_imm VR256:$ins))>; -def : Pat<(vinsertf128_insert:$ins (v16i16 VR256:$src1), (v8i16 VR128:$src2), - (i32 imm)), - (VINSERTF128rr VR256:$src1, VR128:$src2, - (INSERT_get_vinsertf128_imm VR256:$ins))>; - -def : Pat<(vinsertf128_insert:$ins (v8f32 VR256:$src1), (loadv4f32 addr:$src2), - (i32 imm)), - (VINSERTF128rm VR256:$src1, addr:$src2, - (INSERT_get_vinsertf128_imm VR256:$ins))>; -def : Pat<(vinsertf128_insert:$ins (v4f64 VR256:$src1), (loadv2f64 addr:$src2), - (i32 imm)), - (VINSERTF128rm VR256:$src1, addr:$src2, - (INSERT_get_vinsertf128_imm VR256:$ins))>; -def : Pat<(vinsertf128_insert:$ins (v4i64 VR256:$src1), (loadv2i64 addr:$src2), - (i32 imm)), - (VINSERTF128rm VR256:$src1, addr:$src2, - (INSERT_get_vinsertf128_imm VR256:$ins))>; -} - //===----------------------------------------------------------------------===// // VEXTRACTI128 - Extract packed integer values // @@ -7830,7 +7861,7 @@ def VEXTRACTI128mr : AVX2AIi8<0x39, MRMDestMem, (outs), (ins i128mem:$dst, VR256:$src1, i8imm:$src2), "vextracti128\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, VEX; -let Predicates = [HasAVX2] in { +let Predicates = [HasAVX2], AddedComplexity = 1 in { def : Pat<(vextractf128_extract:$ext VR256:$src1, (i32 imm)), (v2i64 (VEXTRACTI128rr (v4i64 VR256:$src1), diff --git a/lib/Target/X86/X86Subtarget.cpp b/lib/Target/X86/X86Subtarget.cpp index 3eb9441..ed1a409 100644 --- a/lib/Target/X86/X86Subtarget.cpp +++ b/lib/Target/X86/X86Subtarget.cpp @@ -21,7 +21,6 @@ #include "llvm/Support/Host.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetOptions.h" -#include "llvm/ADT/SmallVector.h" #define GET_SUBTARGETINFO_TARGET_DESC #define GET_SUBTARGETINFO_CTOR @@ -425,7 +424,9 @@ bool X86Subtarget::enablePostRAScheduler( CodeGenOpt::Level OptLevel, TargetSubtargetInfo::AntiDepBreakMode& Mode, RegClassVector& CriticalPathRCs) const { - Mode = TargetSubtargetInfo::ANTIDEP_CRITICAL; + //TODO: change back to ANTIDEP_CRITICAL when the + // X86 subtarget properly sets up post RA liveness. + Mode = TargetSubtargetInfo::ANTIDEP_NONE; CriticalPathRCs.clear(); return PostRAScheduler && OptLevel >= CodeGenOpt::Default; } diff --git a/lib/Target/X86/X86TargetObjectFile.cpp b/lib/Target/X86/X86TargetObjectFile.cpp index c0d2a9c..718f35e 100644 --- a/lib/Target/X86/X86TargetObjectFile.cpp +++ b/lib/Target/X86/X86TargetObjectFile.cpp @@ -14,7 +14,6 @@ #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCSectionMachO.h" #include "llvm/Target/Mangler.h" -#include "llvm/ADT/SmallString.h" #include "llvm/Support/Dwarf.h" using namespace llvm; using namespace dwarf; diff --git a/lib/Target/XCore/MCTargetDesc/XCoreMCAsmInfo.cpp b/lib/Target/XCore/MCTargetDesc/XCoreMCAsmInfo.cpp index 64f1a8e..1cfdbda 100644 --- a/lib/Target/XCore/MCTargetDesc/XCoreMCAsmInfo.cpp +++ b/lib/Target/XCore/MCTargetDesc/XCoreMCAsmInfo.cpp @@ -8,6 +8,7 @@ //===----------------------------------------------------------------------===// #include "XCoreMCAsmInfo.h" +#include "llvm/ADT/StringRef.h" using namespace llvm; void XCoreMCAsmInfo::anchor() { } diff --git a/lib/Target/XCore/MCTargetDesc/XCoreMCAsmInfo.h b/lib/Target/XCore/MCTargetDesc/XCoreMCAsmInfo.h index 24e170a..0767775 100644 --- a/lib/Target/XCore/MCTargetDesc/XCoreMCAsmInfo.h +++ b/lib/Target/XCore/MCTargetDesc/XCoreMCAsmInfo.h @@ -14,10 +14,10 @@ #ifndef XCORETARGETASMINFO_H #define XCORETARGETASMINFO_H -#include "llvm/ADT/StringRef.h" #include "llvm/MC/MCAsmInfo.h" namespace llvm { + class StringRef; class Target; class XCoreMCAsmInfo : public MCAsmInfo { diff --git a/lib/Target/XCore/MCTargetDesc/XCoreMCTargetDesc.h b/lib/Target/XCore/MCTargetDesc/XCoreMCTargetDesc.h index 3cfc376..a255adb 100644 --- a/lib/Target/XCore/MCTargetDesc/XCoreMCTargetDesc.h +++ b/lib/Target/XCore/MCTargetDesc/XCoreMCTargetDesc.h @@ -15,9 +15,7 @@ #define XCOREMCTARGETDESC_H namespace llvm { -class MCSubtargetInfo; class Target; -class StringRef; extern Target TheXCoreTarget; diff --git a/lib/Target/XCore/XCoreISelLowering.cpp b/lib/Target/XCore/XCoreISelLowering.cpp index 593cebc..fdf2b78 100644 --- a/lib/Target/XCore/XCoreISelLowering.cpp +++ b/lib/Target/XCore/XCoreISelLowering.cpp @@ -1363,8 +1363,8 @@ SDValue XCoreTargetLowering::PerformDAGCombine(SDNode *N, APInt KnownZero, KnownOne; APInt Mask = APInt::getHighBitsSet(VT.getSizeInBits(), VT.getSizeInBits() - 1); - DAG.ComputeMaskedBits(N2, Mask, KnownZero, KnownOne); - if (KnownZero == Mask) { + DAG.ComputeMaskedBits(N2, KnownZero, KnownOne); + if ((KnownZero & Mask) == Mask) { SDValue Carry = DAG.getConstant(0, VT); SDValue Result = DAG.getNode(ISD::ADD, dl, VT, N0, N2); SDValue Ops [] = { Carry, Result }; @@ -1386,8 +1386,8 @@ SDValue XCoreTargetLowering::PerformDAGCombine(SDNode *N, APInt KnownZero, KnownOne; APInt Mask = APInt::getHighBitsSet(VT.getSizeInBits(), VT.getSizeInBits() - 1); - DAG.ComputeMaskedBits(N2, Mask, KnownZero, KnownOne); - if (KnownZero == Mask) { + DAG.ComputeMaskedBits(N2, KnownZero, KnownOne); + if ((KnownZero & Mask) == Mask) { SDValue Borrow = N2; SDValue Result = DAG.getNode(ISD::SUB, dl, VT, DAG.getConstant(0, VT), N2); @@ -1402,8 +1402,8 @@ SDValue XCoreTargetLowering::PerformDAGCombine(SDNode *N, APInt KnownZero, KnownOne; APInt Mask = APInt::getHighBitsSet(VT.getSizeInBits(), VT.getSizeInBits() - 1); - DAG.ComputeMaskedBits(N2, Mask, KnownZero, KnownOne); - if (KnownZero == Mask) { + DAG.ComputeMaskedBits(N2, KnownZero, KnownOne); + if ((KnownZero & Mask) == Mask) { SDValue Borrow = DAG.getConstant(0, VT); SDValue Result = DAG.getNode(ISD::SUB, dl, VT, N0, N2); SDValue Ops [] = { Borrow, Result }; @@ -1521,21 +1521,19 @@ SDValue XCoreTargetLowering::PerformDAGCombine(SDNode *N, } void XCoreTargetLowering::computeMaskedBitsForTargetNode(const SDValue Op, - const APInt &Mask, APInt &KnownZero, APInt &KnownOne, const SelectionDAG &DAG, unsigned Depth) const { - KnownZero = KnownOne = APInt(Mask.getBitWidth(), 0); + KnownZero = KnownOne = APInt(KnownZero.getBitWidth(), 0); switch (Op.getOpcode()) { default: break; case XCoreISD::LADD: case XCoreISD::LSUB: if (Op.getResNo() == 0) { // Top bits of carry / borrow are clear. - KnownZero = APInt::getHighBitsSet(Mask.getBitWidth(), - Mask.getBitWidth() - 1); - KnownZero &= Mask; + KnownZero = APInt::getHighBitsSet(KnownZero.getBitWidth(), + KnownZero.getBitWidth() - 1); } break; } diff --git a/lib/Target/XCore/XCoreISelLowering.h b/lib/Target/XCore/XCoreISelLowering.h index 5cd3e67..0b63ecd 100644 --- a/lib/Target/XCore/XCoreISelLowering.h +++ b/lib/Target/XCore/XCoreISelLowering.h @@ -160,7 +160,6 @@ namespace llvm { virtual SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const; virtual void computeMaskedBitsForTargetNode(const SDValue Op, - const APInt &Mask, APInt &KnownZero, APInt &KnownOne, const SelectionDAG &DAG, diff --git a/lib/Transforms/IPO/GlobalOpt.cpp b/lib/Transforms/IPO/GlobalOpt.cpp index a32e550..1522aa4 100644 --- a/lib/Transforms/IPO/GlobalOpt.cpp +++ b/lib/Transforms/IPO/GlobalOpt.cpp @@ -341,6 +341,12 @@ static bool CleanupConstantGlobalUsers(Value *V, Constant *Init, dyn_cast_or_null<ConstantExpr>(ConstantFoldInstruction(GEP, TD, TLI)); if (Init && CE && CE->getOpcode() == Instruction::GetElementPtr) SubInit = ConstantFoldLoadThroughGEPConstantExpr(Init, CE); + + // If the initializer is an all-null value and we have an inbounds GEP, + // we already know what the result of any load from that GEP is. + // TODO: Handle splats. + if (Init && isa<ConstantAggregateZero>(Init) && GEP->isInBounds()) + SubInit = Constant::getNullValue(GEP->getType()->getElementType()); } Changed |= CleanupConstantGlobalUsers(GEP, SubInit, TD, TLI); diff --git a/lib/Transforms/IPO/InlineAlways.cpp b/lib/Transforms/IPO/InlineAlways.cpp index 3c7fac6..664ddf6 100644 --- a/lib/Transforms/IPO/InlineAlways.cpp +++ b/lib/Transforms/IPO/InlineAlways.cpp @@ -32,7 +32,6 @@ namespace { // AlwaysInliner only inlines functions that are mark as "always inline". class AlwaysInliner : public Inliner { - InlineCostAnalyzer CA; public: // Use extremely low threshold. AlwaysInliner() : Inliner(ID, -2000000000, /*InsertLifetime*/true) { @@ -43,40 +42,11 @@ namespace { initializeAlwaysInlinerPass(*PassRegistry::getPassRegistry()); } static char ID; // Pass identification, replacement for typeid - InlineCost getInlineCost(CallSite CS) { - Function *Callee = CS.getCalledFunction(); - // We assume indirect calls aren't calling an always-inline function. - if (!Callee) return InlineCost::getNever(); - - // We can't inline calls to external functions. - // FIXME: We shouldn't even get here. - if (Callee->isDeclaration()) return InlineCost::getNever(); - - // Return never for anything not marked as always inline. - if (!Callee->hasFnAttr(Attribute::AlwaysInline)) - return InlineCost::getNever(); - - // We still have to check the inline cost in case there are reasons to - // not inline which trump the always-inline attribute such as setjmp and - // indirectbr. - return CA.getInlineCost(CS); - } - float getInlineFudgeFactor(CallSite CS) { - return CA.getInlineFudgeFactor(CS); - } - void resetCachedCostInfo(Function *Caller) { - CA.resetCachedCostInfo(Caller); - } - void growCachedCostInfo(Function* Caller, Function* Callee) { - CA.growCachedCostInfo(Caller, Callee); - } + virtual InlineCost getInlineCost(CallSite CS); virtual bool doFinalization(CallGraph &CG) { return removeDeadFunctions(CG, /*AlwaysInlineOnly=*/true); } virtual bool doInitialization(CallGraph &CG); - void releaseMemory() { - CA.clear(); - } }; } @@ -93,9 +63,70 @@ Pass *llvm::createAlwaysInlinerPass(bool InsertLifetime) { return new AlwaysInliner(InsertLifetime); } +/// \brief Minimal filter to detect invalid constructs for inlining. +static bool isInlineViable(Function &F) { + bool ReturnsTwice = F.hasFnAttr(Attribute::ReturnsTwice); + for (Function::iterator BI = F.begin(), BE = F.end(); BI != BE; ++BI) { + // Disallow inlining of functions which contain an indirect branch. + if (isa<IndirectBrInst>(BI->getTerminator())) + return false; + + for (BasicBlock::iterator II = BI->begin(), IE = BI->end(); II != IE; + ++II) { + CallSite CS(II); + if (!CS) + continue; + + // Disallow recursive calls. + if (&F == CS.getCalledFunction()) + return false; + + // Disallow calls which expose returns-twice to a function not previously + // attributed as such. + if (!ReturnsTwice && CS.isCall() && + cast<CallInst>(CS.getInstruction())->canReturnTwice()) + return false; + } + } + + return true; +} + +/// \brief Get the inline cost for the always-inliner. +/// +/// The always inliner *only* handles functions which are marked with the +/// attribute to force inlining. As such, it is dramatically simpler and avoids +/// using the powerful (but expensive) inline cost analysis. Instead it uses +/// a very simple and boring direct walk of the instructions looking for +/// impossible-to-inline constructs. +/// +/// Note, it would be possible to go to some lengths to cache the information +/// computed here, but as we only expect to do this for relatively few and +/// small functions which have the explicit attribute to force inlining, it is +/// likely not worth it in practice. +InlineCost AlwaysInliner::getInlineCost(CallSite CS) { + Function *Callee = CS.getCalledFunction(); + // We assume indirect calls aren't calling an always-inline function. + if (!Callee) return InlineCost::getNever(); + + // We can't inline calls to external functions. + // FIXME: We shouldn't even get here. + if (Callee->isDeclaration()) return InlineCost::getNever(); + + // Return never for anything not marked as always inline. + if (!Callee->hasFnAttr(Attribute::AlwaysInline)) + return InlineCost::getNever(); + + // Do some minimal analysis to preclude non-viable functions. + if (!isInlineViable(*Callee)) + return InlineCost::getNever(); + + // Otherwise, force inlining. + return InlineCost::getAlways(); +} + // doInitialization - Initializes the vector of functions that have not // been annotated with the "always inline" attribute. bool AlwaysInliner::doInitialization(CallGraph &CG) { - CA.setTargetData(getAnalysisIfAvailable<TargetData>()); return false; } diff --git a/lib/Transforms/IPO/InlineSimple.cpp b/lib/Transforms/IPO/InlineSimple.cpp index 03032e6..50038d8 100644 --- a/lib/Transforms/IPO/InlineSimple.cpp +++ b/lib/Transforms/IPO/InlineSimple.cpp @@ -40,21 +40,9 @@ namespace { } static char ID; // Pass identification, replacement for typeid InlineCost getInlineCost(CallSite CS) { - return CA.getInlineCost(CS); - } - float getInlineFudgeFactor(CallSite CS) { - return CA.getInlineFudgeFactor(CS); - } - void resetCachedCostInfo(Function *Caller) { - CA.resetCachedCostInfo(Caller); - } - void growCachedCostInfo(Function* Caller, Function* Callee) { - CA.growCachedCostInfo(Caller, Callee); + return CA.getInlineCost(CS, getInlineThreshold(CS)); } virtual bool doInitialization(CallGraph &CG); - void releaseMemory() { - CA.clear(); - } }; } diff --git a/lib/Transforms/IPO/Inliner.cpp b/lib/Transforms/IPO/Inliner.cpp index 9975333..dc9cbfb 100644 --- a/lib/Transforms/IPO/Inliner.cpp +++ b/lib/Transforms/IPO/Inliner.cpp @@ -19,7 +19,6 @@ #include "llvm/IntrinsicInst.h" #include "llvm/Analysis/CallGraph.h" #include "llvm/Analysis/InlineCost.h" -#include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Target/TargetData.h" #include "llvm/Transforms/IPO/InlinerPass.h" #include "llvm/Transforms/Utils/Cloning.h" @@ -37,6 +36,11 @@ STATISTIC(NumCallsDeleted, "Number of call sites deleted, not inlined"); STATISTIC(NumDeleted, "Number of functions deleted because all callers found"); STATISTIC(NumMergedAllocas, "Number of allocas merged together"); +// This weirdly named statistic tracks the number of times that, when attemting +// to inline a function A into B, we analyze the callers of B in order to see +// if those would be more profitable and blocked inline steps. +STATISTIC(NumCallerCallersAnalyzed, "Number of caller-callers analyzed"); + static cl::opt<int> InlineLimit("inline-threshold", cl::Hidden, cl::init(225), cl::ZeroOrMore, cl::desc("Control the amount of inlining to perform (default = 225)")); @@ -232,14 +236,10 @@ bool Inliner::shouldInline(CallSite CS) { return false; } - int Cost = IC.getValue(); Function *Caller = CS.getCaller(); - int CurrentThreshold = getInlineThreshold(CS); - float FudgeFactor = getInlineFudgeFactor(CS); - int AdjThreshold = (int)(CurrentThreshold * FudgeFactor); - if (Cost >= AdjThreshold) { - DEBUG(dbgs() << " NOT Inlining: cost=" << Cost - << ", thres=" << AdjThreshold + if (!IC) { + DEBUG(dbgs() << " NOT Inlining: cost=" << IC.getCost() + << ", thres=" << (IC.getCostDelta() + IC.getCost()) << ", Call: " << *CS.getInstruction() << "\n"); return false; } @@ -256,12 +256,17 @@ bool Inliner::shouldInline(CallSite CS) { // are used. Thus we will always have the opportunity to make local inlining // decisions. Importantly the linkonce-ODR linkage covers inline functions // and templates in C++. + // + // FIXME: All of this logic should be sunk into getInlineCost. It relies on + // the internal implementation of the inline cost metrics rather than + // treating them as truly abstract units etc. if (Caller->hasLocalLinkage() || Caller->getLinkage() == GlobalValue::LinkOnceODRLinkage) { int TotalSecondaryCost = 0; - bool outerCallsFound = false; + // The candidate cost to be imposed upon the current function. + int CandidateCost = IC.getCost() - (InlineConstants::CallPenalty + 1); // This bool tracks what happens if we do NOT inline C into B. - bool callerWillBeRemoved = true; + bool callerWillBeRemoved = Caller->hasLocalLinkage(); // This bool tracks what happens if we DO inline C into B. bool inliningPreventsSomeOuterInline = false; for (Value::use_iterator I = Caller->use_begin(), E =Caller->use_end(); @@ -277,26 +282,20 @@ bool Inliner::shouldInline(CallSite CS) { } InlineCost IC2 = getInlineCost(CS2); - if (IC2.isNever()) + ++NumCallerCallersAnalyzed; + if (!IC2) { callerWillBeRemoved = false; - if (IC2.isAlways() || IC2.isNever()) + continue; + } + if (IC2.isAlways()) continue; - outerCallsFound = true; - int Cost2 = IC2.getValue(); - int CurrentThreshold2 = getInlineThreshold(CS2); - float FudgeFactor2 = getInlineFudgeFactor(CS2); - - if (Cost2 >= (int)(CurrentThreshold2 * FudgeFactor2)) - callerWillBeRemoved = false; - - // See if we have this case. We subtract off the penalty - // for the call instruction, which we would be deleting. - if (Cost2 < (int)(CurrentThreshold2 * FudgeFactor2) && - Cost2 + Cost - (InlineConstants::CallPenalty + 1) >= - (int)(CurrentThreshold2 * FudgeFactor2)) { + // See if inlining or original callsite would erase the cost delta of + // this callsite. We subtract off the penalty for the call instruction, + // which we would be deleting. + if (IC2.getCostDelta() <= CandidateCost) { inliningPreventsSomeOuterInline = true; - TotalSecondaryCost += Cost2; + TotalSecondaryCost += IC2.getCost(); } } // If all outer calls to Caller would get inlined, the cost for the last @@ -306,17 +305,16 @@ bool Inliner::shouldInline(CallSite CS) { if (callerWillBeRemoved && Caller->use_begin() != Caller->use_end()) TotalSecondaryCost += InlineConstants::LastCallToStaticBonus; - if (outerCallsFound && inliningPreventsSomeOuterInline && - TotalSecondaryCost < Cost) { - DEBUG(dbgs() << " NOT Inlining: " << *CS.getInstruction() << - " Cost = " << Cost << + if (inliningPreventsSomeOuterInline && TotalSecondaryCost < IC.getCost()) { + DEBUG(dbgs() << " NOT Inlining: " << *CS.getInstruction() << + " Cost = " << IC.getCost() << ", outer Cost = " << TotalSecondaryCost << '\n'); return false; } } - DEBUG(dbgs() << " Inlining: cost=" << Cost - << ", thres=" << AdjThreshold + DEBUG(dbgs() << " Inlining: cost=" << IC.getCost() + << ", thres=" << (IC.getCostDelta() + IC.getCost()) << ", Call: " << *CS.getInstruction() << '\n'); return true; } @@ -335,38 +333,6 @@ static bool InlineHistoryIncludes(Function *F, int InlineHistoryID, return false; } -/// \brief Simplify arguments going into a particular callsite. -/// -/// This is important to do each time we add a callsite due to inlining so that -/// constants and other entities which feed into inline cost estimation are -/// properly recognized when analyzing the new callsite. Consider: -/// void outer(int x) { -/// if (x < 42) -/// return inner(42 - x); -/// ... -/// } -/// void inner(int x) { -/// ... -/// } -/// -/// The inliner gives calls to 'outer' with a constant argument a bonus because -/// it will delete one side of a branch. But the resulting call to 'inner' -/// will, after inlining, also have a constant operand. We need to do just -/// enough constant folding to expose this for callsite arguments. The rest -/// will be taken care of after the inliner finishes running. -static void simplifyCallSiteArguments(const TargetData *TD, CallSite CS) { - // FIXME: It would be nice to avoid this smallvector if RAUW doesn't - // invalidate operand iterators in any cases. - SmallVector<std::pair<Value *, Value*>, 4> SimplifiedArgs; - for (CallSite::arg_iterator I = CS.arg_begin(), E = CS.arg_end(); - I != E; ++I) - if (Instruction *Inst = dyn_cast<Instruction>(*I)) - if (Value *SimpleArg = SimplifyInstruction(Inst, TD)) - SimplifiedArgs.push_back(std::make_pair(Inst, SimpleArg)); - for (unsigned Idx = 0, Size = SimplifiedArgs.size(); Idx != Size; ++Idx) - SimplifiedArgs[Idx].first->replaceAllUsesWith(SimplifiedArgs[Idx].second); -} - bool Inliner::runOnSCC(CallGraphSCC &SCC) { CallGraph &CG = getAnalysis<CallGraph>(); const TargetData *TD = getAnalysisIfAvailable<TargetData>(); @@ -455,8 +421,6 @@ bool Inliner::runOnSCC(CallGraphSCC &SCC) { CG[Caller]->removeCallEdgeFor(CS); CS.getInstruction()->eraseFromParent(); ++NumCallsDeleted; - // Update the cached cost info with the missing call - growCachedCostInfo(Caller, NULL); } else { // We can only inline direct calls to non-declarations. if (Callee == 0 || Callee->isDeclaration()) continue; @@ -494,14 +458,9 @@ bool Inliner::runOnSCC(CallGraphSCC &SCC) { for (unsigned i = 0, e = InlineInfo.InlinedCalls.size(); i != e; ++i) { Value *Ptr = InlineInfo.InlinedCalls[i]; - CallSite NewCS = Ptr; - simplifyCallSiteArguments(TD, NewCS); - CallSites.push_back(std::make_pair(NewCS, NewHistoryID)); + CallSites.push_back(std::make_pair(CallSite(Ptr), NewHistoryID)); } } - - // Update the cached cost info with the inlined call. - growCachedCostInfo(Caller, Callee); } // If we inlined or deleted the last possible call site to the function, @@ -521,8 +480,6 @@ bool Inliner::runOnSCC(CallGraphSCC &SCC) { // Remove any call graph edges from the callee to its callees. CalleeNode->removeAllCalledFunctions(); - resetCachedCostInfo(Callee); - // Removing the node for callee from the call graph and delete it. delete CG.removeFunctionFromModule(CalleeNode); ++NumDeleted; @@ -601,14 +558,13 @@ bool Inliner::removeDeadFunctions(CallGraph &CG, bool AlwaysInlineOnly) { // Note that it doesn't matter that we are iterating over a non-stable order // here to do this, it doesn't matter which order the functions are deleted // in. - std::sort(FunctionsToRemove.begin(), FunctionsToRemove.end()); + array_pod_sort(FunctionsToRemove.begin(), FunctionsToRemove.end()); FunctionsToRemove.erase(std::unique(FunctionsToRemove.begin(), FunctionsToRemove.end()), FunctionsToRemove.end()); for (SmallVectorImpl<CallGraphNode *>::iterator I = FunctionsToRemove.begin(), E = FunctionsToRemove.end(); I != E; ++I) { - resetCachedCostInfo((*I)->getFunction()); delete CG.removeFunctionFromModule(*I); ++NumDeleted; } diff --git a/lib/Transforms/IPO/Internalize.cpp b/lib/Transforms/IPO/Internalize.cpp index 7cb1d18..fb5869e 100644 --- a/lib/Transforms/IPO/Internalize.cpp +++ b/lib/Transforms/IPO/Internalize.cpp @@ -122,6 +122,11 @@ bool InternalizePass::runOnModule(Module &M) { bool Changed = false; + // Never internalize functions which code-gen might insert. + // FIXME: We should probably add this (and the __stack_chk_guard) via some + // type of call-back in CodeGen. + ExternalNames.insert("__stack_chk_fail"); + // Mark all functions not in the api as internal. // FIXME: maybe use private linkage? for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) @@ -148,9 +153,11 @@ bool InternalizePass::runOnModule(Module &M) { // won't find them. (see MachineModuleInfo.) ExternalNames.insert("llvm.global_ctors"); ExternalNames.insert("llvm.global_dtors"); - ExternalNames.insert("llvm.noinline"); ExternalNames.insert("llvm.global.annotations"); + // Never internalize symbols code-gen inserts. + ExternalNames.insert("__stack_chk_guard"); + // Mark all global variables with initializers that are not in the api as // internal as well. // FIXME: maybe use private linkage? diff --git a/lib/Transforms/IPO/PassManagerBuilder.cpp b/lib/Transforms/IPO/PassManagerBuilder.cpp index 8408437..43b4ab5 100644 --- a/lib/Transforms/IPO/PassManagerBuilder.cpp +++ b/lib/Transforms/IPO/PassManagerBuilder.cpp @@ -35,6 +35,11 @@ using namespace llvm; static cl::opt<bool> RunVectorization("vectorize", cl::desc("Run vectorization passes")); +static cl::opt<bool> +UseGVNAfterVectorization("use-gvn-after-vectorization", + cl::init(false), cl::Hidden, + cl::desc("Run GVN instead of Early CSE after vectorization passes")); + PassManagerBuilder::PassManagerBuilder() { OptLevel = 2; SizeLevel = 0; @@ -182,8 +187,10 @@ void PassManagerBuilder::populateModulePassManager(PassManagerBase &MPM) { if (Vectorize) { MPM.add(createBBVectorizePass()); MPM.add(createInstructionCombiningPass()); - if (OptLevel > 1) - MPM.add(createGVNPass()); // Remove redundancies + if (OptLevel > 1 && UseGVNAfterVectorization) + MPM.add(createGVNPass()); // Remove redundancies + else + MPM.add(createEarlyCSEPass()); // Catch trivial redundancies } MPM.add(createAggressiveDCEPass()); // Delete dead instructions @@ -202,11 +209,13 @@ void PassManagerBuilder::populateModulePassManager(PassManagerBase &MPM) { if (OptLevel > 1) MPM.add(createConstantMergePass()); // Merge dup global constants } + addExtensionsToPM(EP_OptimizerLast, MPM); } void PassManagerBuilder::populateLTOPassManager(PassManagerBase &PM, bool Internalize, - bool RunInliner) { + bool RunInliner, + bool DisableGVNLoadPRE) { // Provide AliasAnalysis services for optimizations. addInitialAliasAnalysisPasses(PM); @@ -262,9 +271,9 @@ void PassManagerBuilder::populateLTOPassManager(PassManagerBase &PM, PM.add(createFunctionAttrsPass()); // Add nocapture. PM.add(createGlobalsModRefPass()); // IP alias analysis. - PM.add(createLICMPass()); // Hoist loop invariants. - PM.add(createGVNPass()); // Remove redundancies. - PM.add(createMemCpyOptPass()); // Remove dead memcpys. + PM.add(createLICMPass()); // Hoist loop invariants. + PM.add(createGVNPass(DisableGVNLoadPRE)); // Remove redundancies. + PM.add(createMemCpyOptPass()); // Remove dead memcpys. // Nuke dead stores. PM.add(createDeadStoreEliminationPass()); diff --git a/lib/Transforms/InstCombine/InstCombine.h b/lib/Transforms/InstCombine/InstCombine.h index 464e9d0..199df51 100644 --- a/lib/Transforms/InstCombine/InstCombine.h +++ b/lib/Transforms/InstCombine/InstCombine.h @@ -291,9 +291,9 @@ public: return 0; // Don't do anything with FI } - void ComputeMaskedBits(Value *V, const APInt &Mask, APInt &KnownZero, + void ComputeMaskedBits(Value *V, APInt &KnownZero, APInt &KnownOne, unsigned Depth = 0) const { - return llvm::ComputeMaskedBits(V, Mask, KnownZero, KnownOne, TD, Depth); + return llvm::ComputeMaskedBits(V, KnownZero, KnownOne, TD, Depth); } bool MaskedValueIsZero(Value *V, const APInt &Mask, diff --git a/lib/Transforms/InstCombine/InstCombineAddSub.cpp b/lib/Transforms/InstCombine/InstCombineAddSub.cpp index 908038b..05e702f 100644 --- a/lib/Transforms/InstCombine/InstCombineAddSub.cpp +++ b/lib/Transforms/InstCombine/InstCombineAddSub.cpp @@ -141,10 +141,9 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) { // a sub and fuse this add with it. if (LHS->hasOneUse() && (XorRHS->getValue()+1).isPowerOf2()) { IntegerType *IT = cast<IntegerType>(I.getType()); - APInt Mask = APInt::getAllOnesValue(IT->getBitWidth()); APInt LHSKnownOne(IT->getBitWidth(), 0); APInt LHSKnownZero(IT->getBitWidth(), 0); - ComputeMaskedBits(XorLHS, Mask, LHSKnownZero, LHSKnownOne); + ComputeMaskedBits(XorLHS, LHSKnownZero, LHSKnownOne); if ((XorRHS->getValue() | LHSKnownZero).isAllOnesValue()) return BinaryOperator::CreateSub(ConstantExpr::getAdd(XorRHS, CI), XorLHS); @@ -202,14 +201,13 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) { // A+B --> A|B iff A and B have no bits set in common. if (IntegerType *IT = dyn_cast<IntegerType>(I.getType())) { - APInt Mask = APInt::getAllOnesValue(IT->getBitWidth()); APInt LHSKnownOne(IT->getBitWidth(), 0); APInt LHSKnownZero(IT->getBitWidth(), 0); - ComputeMaskedBits(LHS, Mask, LHSKnownZero, LHSKnownOne); + ComputeMaskedBits(LHS, LHSKnownZero, LHSKnownOne); if (LHSKnownZero != 0) { APInt RHSKnownOne(IT->getBitWidth(), 0); APInt RHSKnownZero(IT->getBitWidth(), 0); - ComputeMaskedBits(RHS, Mask, RHSKnownZero, RHSKnownOne); + ComputeMaskedBits(RHS, RHSKnownZero, RHSKnownOne); // No bits in common -> bitwise or. if ((LHSKnownZero|RHSKnownZero).isAllOnesValue()) diff --git a/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp index 1165660..0dbe11d 100644 --- a/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp +++ b/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp @@ -1362,13 +1362,8 @@ static bool CollectBSwapParts(Value *V, int OverallLeftShift, uint32_t ByteMask, // part of the value (e.g. byte 3) then it must be shifted right. If from the // low part, it must be shifted left. unsigned DestByteNo = InputByteNo + OverallLeftShift; - if (InputByteNo < ByteValues.size()/2) { - if (ByteValues.size()-1-DestByteNo != InputByteNo) - return true; - } else { - if (ByteValues.size()-1-DestByteNo != InputByteNo) - return true; - } + if (ByteValues.size()-1-DestByteNo != InputByteNo) + return true; // If the destination byte value is already defined, the values are or'd // together, which isn't a bswap (unless it's an or of the same bits). diff --git a/lib/Transforms/InstCombine/InstCombineCalls.cpp b/lib/Transforms/InstCombine/InstCombineCalls.cpp index b550fe8..77e4727 100644 --- a/lib/Transforms/InstCombine/InstCombineCalls.cpp +++ b/lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -361,8 +361,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { uint32_t BitWidth = IT->getBitWidth(); APInt KnownZero(BitWidth, 0); APInt KnownOne(BitWidth, 0); - ComputeMaskedBits(II->getArgOperand(0), APInt::getAllOnesValue(BitWidth), - KnownZero, KnownOne); + ComputeMaskedBits(II->getArgOperand(0), KnownZero, KnownOne); unsigned TrailingZeros = KnownOne.countTrailingZeros(); APInt Mask(APInt::getLowBitsSet(BitWidth, TrailingZeros)); if ((Mask & KnownZero) == Mask) @@ -380,8 +379,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { uint32_t BitWidth = IT->getBitWidth(); APInt KnownZero(BitWidth, 0); APInt KnownOne(BitWidth, 0); - ComputeMaskedBits(II->getArgOperand(0), APInt::getAllOnesValue(BitWidth), - KnownZero, KnownOne); + ComputeMaskedBits(II->getArgOperand(0), KnownZero, KnownOne); unsigned LeadingZeros = KnownOne.countLeadingZeros(); APInt Mask(APInt::getHighBitsSet(BitWidth, LeadingZeros)); if ((Mask & KnownZero) == Mask) @@ -394,17 +392,16 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { Value *LHS = II->getArgOperand(0), *RHS = II->getArgOperand(1); IntegerType *IT = cast<IntegerType>(II->getArgOperand(0)->getType()); uint32_t BitWidth = IT->getBitWidth(); - APInt Mask = APInt::getSignBit(BitWidth); APInt LHSKnownZero(BitWidth, 0); APInt LHSKnownOne(BitWidth, 0); - ComputeMaskedBits(LHS, Mask, LHSKnownZero, LHSKnownOne); + ComputeMaskedBits(LHS, LHSKnownZero, LHSKnownOne); bool LHSKnownNegative = LHSKnownOne[BitWidth - 1]; bool LHSKnownPositive = LHSKnownZero[BitWidth - 1]; if (LHSKnownNegative || LHSKnownPositive) { APInt RHSKnownZero(BitWidth, 0); APInt RHSKnownOne(BitWidth, 0); - ComputeMaskedBits(RHS, Mask, RHSKnownZero, RHSKnownOne); + ComputeMaskedBits(RHS, RHSKnownZero, RHSKnownOne); bool RHSKnownNegative = RHSKnownOne[BitWidth - 1]; bool RHSKnownPositive = RHSKnownZero[BitWidth - 1]; if (LHSKnownNegative && RHSKnownNegative) { @@ -488,14 +485,13 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { case Intrinsic::umul_with_overflow: { Value *LHS = II->getArgOperand(0), *RHS = II->getArgOperand(1); unsigned BitWidth = cast<IntegerType>(LHS->getType())->getBitWidth(); - APInt Mask = APInt::getAllOnesValue(BitWidth); APInt LHSKnownZero(BitWidth, 0); APInt LHSKnownOne(BitWidth, 0); - ComputeMaskedBits(LHS, Mask, LHSKnownZero, LHSKnownOne); + ComputeMaskedBits(LHS, LHSKnownZero, LHSKnownOne); APInt RHSKnownZero(BitWidth, 0); APInt RHSKnownOne(BitWidth, 0); - ComputeMaskedBits(RHS, Mask, RHSKnownZero, RHSKnownOne); + ComputeMaskedBits(RHS, RHSKnownZero, RHSKnownOne); // Get the largest possible values for each operand. APInt LHSMax = ~LHSKnownZero; diff --git a/lib/Transforms/InstCombine/InstCombineCasts.cpp b/lib/Transforms/InstCombine/InstCombineCasts.cpp index c5ddb75..39279f4 100644 --- a/lib/Transforms/InstCombine/InstCombineCasts.cpp +++ b/lib/Transforms/InstCombine/InstCombineCasts.cpp @@ -541,8 +541,7 @@ Instruction *InstCombiner::transformZExtICmp(ICmpInst *ICI, Instruction &CI, // If Op1C some other power of two, convert: uint32_t BitWidth = Op1C->getType()->getBitWidth(); APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0); - APInt TypeMask(APInt::getAllOnesValue(BitWidth)); - ComputeMaskedBits(ICI->getOperand(0), TypeMask, KnownZero, KnownOne); + ComputeMaskedBits(ICI->getOperand(0), KnownZero, KnownOne); APInt KnownZeroMask(~KnownZero); if (KnownZeroMask.isPowerOf2()) { // Exactly 1 possible 1? @@ -590,9 +589,8 @@ Instruction *InstCombiner::transformZExtICmp(ICmpInst *ICI, Instruction &CI, APInt KnownZeroLHS(BitWidth, 0), KnownOneLHS(BitWidth, 0); APInt KnownZeroRHS(BitWidth, 0), KnownOneRHS(BitWidth, 0); - APInt TypeMask(APInt::getAllOnesValue(BitWidth)); - ComputeMaskedBits(LHS, TypeMask, KnownZeroLHS, KnownOneLHS); - ComputeMaskedBits(RHS, TypeMask, KnownZeroRHS, KnownOneRHS); + ComputeMaskedBits(LHS, KnownZeroLHS, KnownOneLHS); + ComputeMaskedBits(RHS, KnownZeroRHS, KnownOneRHS); if (KnownZeroLHS == KnownZeroRHS && KnownOneLHS == KnownOneRHS) { APInt KnownBits = KnownZeroLHS | KnownOneLHS; @@ -911,8 +909,7 @@ Instruction *InstCombiner::transformSExtICmp(ICmpInst *ICI, Instruction &CI) { ICI->isEquality() && (Op1C->isZero() || Op1C->getValue().isPowerOf2())){ unsigned BitWidth = Op1C->getType()->getBitWidth(); APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0); - APInt TypeMask(APInt::getAllOnesValue(BitWidth)); - ComputeMaskedBits(Op0, TypeMask, KnownZero, KnownOne); + ComputeMaskedBits(Op0, KnownZero, KnownOne); APInt KnownZeroMask(~KnownZero); if (KnownZeroMask.isPowerOf2()) { diff --git a/lib/Transforms/InstCombine/InstCombineCompares.cpp b/lib/Transforms/InstCombine/InstCombineCompares.cpp index 5308992..ab2987f 100644 --- a/lib/Transforms/InstCombine/InstCombineCompares.cpp +++ b/lib/Transforms/InstCombine/InstCombineCompares.cpp @@ -1028,9 +1028,8 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI, // of the high bits truncated out of x are known. unsigned DstBits = LHSI->getType()->getPrimitiveSizeInBits(), SrcBits = LHSI->getOperand(0)->getType()->getPrimitiveSizeInBits(); - APInt Mask(APInt::getHighBitsSet(SrcBits, SrcBits-DstBits)); APInt KnownZero(SrcBits, 0), KnownOne(SrcBits, 0); - ComputeMaskedBits(LHSI->getOperand(0), Mask, KnownZero, KnownOne); + ComputeMaskedBits(LHSI->getOperand(0), KnownZero, KnownOne); // If all the high bits are known, we can do this xform. if ((KnownZero|KnownOne).countLeadingOnes() >= SrcBits-DstBits) { diff --git a/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp b/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp index 7446a51..b2f2e24 100644 --- a/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp +++ b/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp @@ -22,6 +22,72 @@ using namespace llvm; STATISTIC(NumDeadStore, "Number of dead stores eliminated"); +// Try to kill dead allocas by walking through its uses until we see some use +// that could escape. This is a conservative analysis which tries to handle +// GEPs, bitcasts, stores, and no-op intrinsics. These tend to be the things +// left after inlining and SROA finish chewing on an alloca. +static Instruction *removeDeadAlloca(InstCombiner &IC, AllocaInst &AI) { + SmallVector<Instruction *, 4> Worklist, DeadStores; + Worklist.push_back(&AI); + do { + Instruction *PI = Worklist.pop_back_val(); + for (Value::use_iterator UI = PI->use_begin(), UE = PI->use_end(); + UI != UE; ++UI) { + Instruction *I = cast<Instruction>(*UI); + switch (I->getOpcode()) { + default: + // Give up the moment we see something we can't handle. + return 0; + + case Instruction::GetElementPtr: + case Instruction::BitCast: + Worklist.push_back(I); + continue; + + case Instruction::Call: + // We can handle a limited subset of calls to no-op intrinsics. + if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) { + switch (II->getIntrinsicID()) { + case Intrinsic::dbg_declare: + case Intrinsic::dbg_value: + case Intrinsic::invariant_start: + case Intrinsic::invariant_end: + case Intrinsic::lifetime_start: + case Intrinsic::lifetime_end: + continue; + default: + return 0; + } + } + // Reject everything else. + return 0; + + case Instruction::Store: { + // Stores into the alloca are only live if the alloca is live. + StoreInst *SI = cast<StoreInst>(I); + // We can eliminate atomic stores, but not volatile. + if (SI->isVolatile()) + return 0; + // The store is only trivially safe if the poniter is the destination + // as opposed to the value. We're conservative here and don't check for + // the case where we store the address of a dead alloca into a dead + // alloca. + if (SI->getPointerOperand() != PI) + return 0; + DeadStores.push_back(I); + continue; + } + } + } + } while (!Worklist.empty()); + + // The alloca is dead. Kill off all the stores to it, and then replace it + // with undef. + while (!DeadStores.empty()) + IC.EraseInstFromFunction(*DeadStores.pop_back_val()); + return IC.ReplaceInstUsesWith(AI, UndefValue::get(AI.getType())); +} + Instruction *InstCombiner::visitAllocaInst(AllocaInst &AI) { // Ensure that the alloca array size argument has type intptr_t, so that // any casting is exposed early. @@ -81,7 +147,10 @@ Instruction *InstCombiner::visitAllocaInst(AllocaInst &AI) { AI.setAlignment(TD->getPrefTypeAlignment(AI.getAllocatedType())); } - return 0; + // Try to aggressively remove allocas which are only used for GEPs, lifetime + // markers, and stores. This happens when SROA iteratively promotes stores + // out of the alloca, and we need to cleanup after it. + return removeDeadAlloca(*this, AI); } diff --git a/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp b/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp index 61a8e5b..125c74a 100644 --- a/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp +++ b/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp @@ -142,7 +142,7 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, Instruction *I = dyn_cast<Instruction>(V); if (!I) { - ComputeMaskedBits(V, DemandedMask, KnownZero, KnownOne, Depth); + ComputeMaskedBits(V, KnownZero, KnownOne, Depth); return 0; // Only analyze instructions. } @@ -156,10 +156,8 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, // this instruction has a simpler value in that context. if (I->getOpcode() == Instruction::And) { // If either the LHS or the RHS are Zero, the result is zero. - ComputeMaskedBits(I->getOperand(1), DemandedMask, - RHSKnownZero, RHSKnownOne, Depth+1); - ComputeMaskedBits(I->getOperand(0), DemandedMask & ~RHSKnownZero, - LHSKnownZero, LHSKnownOne, Depth+1); + ComputeMaskedBits(I->getOperand(1), RHSKnownZero, RHSKnownOne, Depth+1); + ComputeMaskedBits(I->getOperand(0), LHSKnownZero, LHSKnownOne, Depth+1); // If all of the demanded bits are known 1 on one side, return the other. // These bits cannot contribute to the result of the 'and' in this @@ -180,10 +178,8 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, // only bits from X or Y are demanded. // If either the LHS or the RHS are One, the result is One. - ComputeMaskedBits(I->getOperand(1), DemandedMask, - RHSKnownZero, RHSKnownOne, Depth+1); - ComputeMaskedBits(I->getOperand(0), DemandedMask & ~RHSKnownOne, - LHSKnownZero, LHSKnownOne, Depth+1); + ComputeMaskedBits(I->getOperand(1), RHSKnownZero, RHSKnownOne, Depth+1); + ComputeMaskedBits(I->getOperand(0), LHSKnownZero, LHSKnownOne, Depth+1); // If all of the demanded bits are known zero on one side, return the // other. These bits cannot contribute to the result of the 'or' in this @@ -206,7 +202,7 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, } // Compute the KnownZero/KnownOne bits to simplify things downstream. - ComputeMaskedBits(I, DemandedMask, KnownZero, KnownOne, Depth); + ComputeMaskedBits(I, KnownZero, KnownOne, Depth); return 0; } @@ -219,7 +215,7 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, switch (I->getOpcode()) { default: - ComputeMaskedBits(I, DemandedMask, KnownZero, KnownOne, Depth); + ComputeMaskedBits(I, KnownZero, KnownOne, Depth); break; case Instruction::And: // If either the LHS or the RHS are Zero, the result is zero. @@ -570,7 +566,7 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, // Otherwise just hand the sub off to ComputeMaskedBits to fill in // the known zeros and ones. - ComputeMaskedBits(V, DemandedMask, KnownZero, KnownOne, Depth); + ComputeMaskedBits(V, KnownZero, KnownOne, Depth); // Turn this into a xor if LHS is 2^n-1 and the remaining bits are known // zero. @@ -729,10 +725,8 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, // The sign bit is the LHS's sign bit, except when the result of the // remainder is zero. if (DemandedMask.isNegative() && KnownZero.isNonNegative()) { - APInt Mask2 = APInt::getSignBit(BitWidth); APInt LHSKnownZero(BitWidth, 0), LHSKnownOne(BitWidth, 0); - ComputeMaskedBits(I->getOperand(0), Mask2, LHSKnownZero, LHSKnownOne, - Depth+1); + ComputeMaskedBits(I->getOperand(0), LHSKnownZero, LHSKnownOne, Depth+1); // If it's known zero, our sign bit is also zero. if (LHSKnownZero.isNegative()) KnownZero |= LHSKnownZero; @@ -795,7 +789,7 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, return 0; } } - ComputeMaskedBits(V, DemandedMask, KnownZero, KnownOne, Depth); + ComputeMaskedBits(V, KnownZero, KnownOne, Depth); break; } diff --git a/lib/Transforms/InstCombine/InstructionCombining.cpp b/lib/Transforms/InstCombine/InstructionCombining.cpp index 349ba83..066b2ec 100644 --- a/lib/Transforms/InstCombine/InstructionCombining.cpp +++ b/lib/Transforms/InstCombine/InstructionCombining.cpp @@ -916,8 +916,9 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) { // Handle gep(bitcast x) and gep(gep x, 0, 0, 0). Value *StrippedPtr = PtrOp->stripPointerCasts(); PointerType *StrippedPtrTy = dyn_cast<PointerType>(StrippedPtr->getType()); - // We do not handle pointer-vector geps here - if (!StrippedPtr) + + // We do not handle pointer-vector geps here. + if (!StrippedPtrTy) return 0; if (StrippedPtr != PtrOp && diff --git a/lib/Transforms/Instrumentation/ThreadSanitizer.cpp b/lib/Transforms/Instrumentation/ThreadSanitizer.cpp index 85fda30..8bb337e 100644 --- a/lib/Transforms/Instrumentation/ThreadSanitizer.cpp +++ b/lib/Transforms/Instrumentation/ThreadSanitizer.cpp @@ -22,16 +22,20 @@ #define DEBUG_TYPE "tsan" #include "FunctionBlackList.h" +#include "llvm/ADT/SmallSet.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringExtras.h" #include "llvm/Intrinsics.h" #include "llvm/Function.h" +#include "llvm/LLVMContext.h" +#include "llvm/Metadata.h" #include "llvm/Module.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/IRBuilder.h" #include "llvm/Support/MathExtras.h" +#include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetData.h" #include "llvm/Transforms/Instrumentation.h" #include "llvm/Transforms/Utils/ModuleUtils.h" @@ -42,16 +46,36 @@ using namespace llvm; static cl::opt<std::string> ClBlackListFile("tsan-blacklist", cl::desc("Blacklist file"), cl::Hidden); +static cl::opt<bool> ClPrintStats("tsan-print-stats", + cl::desc("Print ThreadSanitizer instrumentation stats"), cl::Hidden); + namespace { + +// Stats counters for ThreadSanitizer instrumentation. +struct ThreadSanitizerStats { + size_t NumInstrumentedReads; + size_t NumInstrumentedWrites; + size_t NumOmittedReadsBeforeWrite; + size_t NumAccessesWithBadSize; + size_t NumInstrumentedVtableWrites; + size_t NumOmittedReadsFromConstantGlobals; + size_t NumOmittedReadsFromVtable; +}; + /// ThreadSanitizer: instrument the code in module to find races. struct ThreadSanitizer : public FunctionPass { ThreadSanitizer(); bool runOnFunction(Function &F); bool doInitialization(Module &M); + bool doFinalization(Module &M); bool instrumentLoadOrStore(Instruction *I); static char ID; // Pass identification, replacement for typeid. private: + void choseInstructionsToInstrument(SmallVectorImpl<Instruction*> &Local, + SmallVectorImpl<Instruction*> &All); + bool addrPointsToConstantData(Value *Addr); + TargetData *TD; OwningPtr<FunctionBlackList> BL; // Callbacks to run-time library are computed in doInitialization. @@ -61,6 +85,10 @@ struct ThreadSanitizer : public FunctionPass { static const size_t kNumberOfAccessSizes = 5; Value *TsanRead[kNumberOfAccessSizes]; Value *TsanWrite[kNumberOfAccessSizes]; + Value *TsanVptrUpdate; + + // Stats are modified w/o synchronization. + ThreadSanitizerStats stats; }; } // namespace @@ -83,6 +111,7 @@ bool ThreadSanitizer::doInitialization(Module &M) { if (!TD) return false; BL.reset(new FunctionBlackList(ClBlackListFile)); + memset(&stats, 0, sizeof(stats)); // Always insert a call to __tsan_init into the module's CTORs. IRBuilder<> IRB(M.getContext()); @@ -105,14 +134,103 @@ bool ThreadSanitizer::doInitialization(Module &M) { TsanWrite[i] = M.getOrInsertFunction(WriteName, IRB.getVoidTy(), IRB.getInt8PtrTy(), NULL); } + TsanVptrUpdate = M.getOrInsertFunction("__tsan_vptr_update", IRB.getVoidTy(), + IRB.getInt8PtrTy(), IRB.getInt8PtrTy(), + NULL); + return true; +} + +bool ThreadSanitizer::doFinalization(Module &M) { + if (ClPrintStats) { + errs() << "ThreadSanitizerStats " << M.getModuleIdentifier() + << ": wr " << stats.NumInstrumentedWrites + << "; rd " << stats.NumInstrumentedReads + << "; vt " << stats.NumInstrumentedVtableWrites + << "; bs " << stats.NumAccessesWithBadSize + << "; rbw " << stats.NumOmittedReadsBeforeWrite + << "; rcg " << stats.NumOmittedReadsFromConstantGlobals + << "; rvt " << stats.NumOmittedReadsFromVtable + << "\n"; + } return true; } +static bool isVtableAccess(Instruction *I) { + if (MDNode *Tag = I->getMetadata(LLVMContext::MD_tbaa)) { + if (Tag->getNumOperands() < 1) return false; + if (MDString *Tag1 = dyn_cast<MDString>(Tag->getOperand(0))) { + if (Tag1->getString() == "vtable pointer") return true; + } + } + return false; +} + +bool ThreadSanitizer::addrPointsToConstantData(Value *Addr) { + // If this is a GEP, just analyze its pointer operand. + if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Addr)) + Addr = GEP->getPointerOperand(); + + if (GlobalVariable *GV = dyn_cast<GlobalVariable>(Addr)) { + if (GV->isConstant()) { + // Reads from constant globals can not race with any writes. + stats.NumOmittedReadsFromConstantGlobals++; + return true; + } + } else if(LoadInst *L = dyn_cast<LoadInst>(Addr)) { + if (isVtableAccess(L)) { + // Reads from a vtable pointer can not race with any writes. + stats.NumOmittedReadsFromVtable++; + return true; + } + } + return false; +} + +// Instrumenting some of the accesses may be proven redundant. +// Currently handled: +// - read-before-write (within same BB, no calls between) +// +// We do not handle some of the patterns that should not survive +// after the classic compiler optimizations. +// E.g. two reads from the same temp should be eliminated by CSE, +// two writes should be eliminated by DSE, etc. +// +// 'Local' is a vector of insns within the same BB (no calls between). +// 'All' is a vector of insns that will be instrumented. +void ThreadSanitizer::choseInstructionsToInstrument( + SmallVectorImpl<Instruction*> &Local, + SmallVectorImpl<Instruction*> &All) { + SmallSet<Value*, 8> WriteTargets; + // Iterate from the end. + for (SmallVectorImpl<Instruction*>::reverse_iterator It = Local.rbegin(), + E = Local.rend(); It != E; ++It) { + Instruction *I = *It; + if (StoreInst *Store = dyn_cast<StoreInst>(I)) { + WriteTargets.insert(Store->getPointerOperand()); + } else { + LoadInst *Load = cast<LoadInst>(I); + Value *Addr = Load->getPointerOperand(); + if (WriteTargets.count(Addr)) { + // We will write to this temp, so no reason to analyze the read. + stats.NumOmittedReadsBeforeWrite++; + continue; + } + if (addrPointsToConstantData(Addr)) { + // Addr points to some constant data -- it can not race with any writes. + continue; + } + } + All.push_back(I); + } + Local.clear(); +} + bool ThreadSanitizer::runOnFunction(Function &F) { if (!TD) return false; if (BL->isIn(F)) return false; SmallVector<Instruction*, 8> RetVec; - SmallVector<Instruction*, 8> LoadsAndStores; + SmallVector<Instruction*, 8> AllLoadsAndStores; + SmallVector<Instruction*, 8> LocalLoadsAndStores; bool Res = false; bool HasCalls = false; @@ -123,12 +241,15 @@ bool ThreadSanitizer::runOnFunction(Function &F) { for (BasicBlock::iterator BI = BB.begin(), BE = BB.end(); BI != BE; ++BI) { if (isa<LoadInst>(BI) || isa<StoreInst>(BI)) - LoadsAndStores.push_back(BI); + LocalLoadsAndStores.push_back(BI); else if (isa<ReturnInst>(BI)) RetVec.push_back(BI); - else if (isa<CallInst>(BI) || isa<InvokeInst>(BI)) + else if (isa<CallInst>(BI) || isa<InvokeInst>(BI)) { HasCalls = true; + choseInstructionsToInstrument(LocalLoadsAndStores, AllLoadsAndStores); + } } + choseInstructionsToInstrument(LocalLoadsAndStores, AllLoadsAndStores); } // We have collected all loads and stores. @@ -136,8 +257,8 @@ bool ThreadSanitizer::runOnFunction(Function &F) { // (e.g. variables that do not escape, etc). // Instrument memory accesses. - for (size_t i = 0, n = LoadsAndStores.size(); i < n; ++i) { - Res |= instrumentLoadOrStore(LoadsAndStores[i]); + for (size_t i = 0, n = AllLoadsAndStores.size(); i < n; ++i) { + Res |= instrumentLoadOrStore(AllLoadsAndStores[i]); } // Instrument function entry/exit points if there were instrumented accesses. @@ -151,6 +272,7 @@ bool ThreadSanitizer::runOnFunction(Function &F) { IRBuilder<> IRBRet(RetVec[i]); IRBRet.CreateCall(TsanFuncExit); } + Res = true; } return Res; } @@ -167,12 +289,23 @@ bool ThreadSanitizer::instrumentLoadOrStore(Instruction *I) { uint32_t TypeSize = TD->getTypeStoreSizeInBits(OrigTy); if (TypeSize != 8 && TypeSize != 16 && TypeSize != 32 && TypeSize != 64 && TypeSize != 128) { + stats.NumAccessesWithBadSize++; // Ignore all unusual sizes. return false; } + if (IsWrite && isVtableAccess(I)) { + Value *StoredValue = cast<StoreInst>(I)->getValueOperand(); + IRB.CreateCall2(TsanVptrUpdate, + IRB.CreatePointerCast(Addr, IRB.getInt8PtrTy()), + IRB.CreatePointerCast(StoredValue, IRB.getInt8PtrTy())); + stats.NumInstrumentedVtableWrites++; + return true; + } size_t Idx = CountTrailingZeros_32(TypeSize / 8); assert(Idx < kNumberOfAccessSizes); Value *OnAccessFunc = IsWrite ? TsanWrite[Idx] : TsanRead[Idx]; IRB.CreateCall(OnAccessFunc, IRB.CreatePointerCast(Addr, IRB.getInt8PtrTy())); + if (IsWrite) stats.NumInstrumentedWrites++; + else stats.NumInstrumentedReads++; return true; } diff --git a/lib/Transforms/Scalar/CodeGenPrepare.cpp b/lib/Transforms/Scalar/CodeGenPrepare.cpp index 020ec57..9a5423f 100644 --- a/lib/Transforms/Scalar/CodeGenPrepare.cpp +++ b/lib/Transforms/Scalar/CodeGenPrepare.cpp @@ -567,8 +567,8 @@ bool CodeGenPrepare::OptimizeCallInst(CallInst *CI) { // happens. WeakVH IterHandle(CurInstIterator); - ReplaceAndSimplifyAllUses(CI, RetVal, TLI ? TLI->getTargetData() : 0, - TLInfo, ModifiedDT ? 0 : DT); + replaceAndRecursivelySimplify(CI, RetVal, TLI ? TLI->getTargetData() : 0, + TLInfo, ModifiedDT ? 0 : DT); // If the iterator instruction was recursively deleted, start over at the // start of the block. diff --git a/lib/Transforms/Scalar/GVN.cpp b/lib/Transforms/Scalar/GVN.cpp index ac80c48..fb733ad 100644 --- a/lib/Transforms/Scalar/GVN.cpp +++ b/lib/Transforms/Scalar/GVN.cpp @@ -1974,109 +1974,119 @@ unsigned GVN::replaceAllDominatedUsesWith(Value *From, Value *To, /// dominated by 'Root'. Exploit this, for example by replacing 'LHS' with /// 'RHS' everywhere in the scope. Returns whether a change was made. bool GVN::propagateEquality(Value *LHS, Value *RHS, BasicBlock *Root) { - if (LHS == RHS) return false; - assert(LHS->getType() == RHS->getType() && "Equal but types differ!"); + SmallVector<std::pair<Value*, Value*>, 4> Worklist; + Worklist.push_back(std::make_pair(LHS, RHS)); + bool Changed = false; - // Don't try to propagate equalities between constants. - if (isa<Constant>(LHS) && isa<Constant>(RHS)) - return false; + while (!Worklist.empty()) { + std::pair<Value*, Value*> Item = Worklist.pop_back_val(); + LHS = Item.first; RHS = Item.second; + + if (LHS == RHS) continue; + assert(LHS->getType() == RHS->getType() && "Equality but unequal types!"); + + // Don't try to propagate equalities between constants. + if (isa<Constant>(LHS) && isa<Constant>(RHS)) continue; - // Prefer a constant on the right-hand side, or an Argument if no constants. - if (isa<Constant>(LHS) || (isa<Argument>(LHS) && !isa<Constant>(RHS))) - std::swap(LHS, RHS); - assert((isa<Argument>(LHS) || isa<Instruction>(LHS)) && "Unexpected value!"); - - // If there is no obvious reason to prefer the left-hand side over the right- - // hand side, ensure the longest lived term is on the right-hand side, so the - // shortest lived term will be replaced by the longest lived. This tends to - // expose more simplifications. - uint32_t LVN = VN.lookup_or_add(LHS); - if ((isa<Argument>(LHS) && isa<Argument>(RHS)) || - (isa<Instruction>(LHS) && isa<Instruction>(RHS))) { - // Move the 'oldest' value to the right-hand side, using the value number as - // a proxy for age. - uint32_t RVN = VN.lookup_or_add(RHS); - if (LVN < RVN) { + // Prefer a constant on the right-hand side, or an Argument if no constants. + if (isa<Constant>(LHS) || (isa<Argument>(LHS) && !isa<Constant>(RHS))) std::swap(LHS, RHS); - LVN = RVN; + assert((isa<Argument>(LHS) || isa<Instruction>(LHS)) && "Unexpected value!"); + + // If there is no obvious reason to prefer the left-hand side over the right- + // hand side, ensure the longest lived term is on the right-hand side, so the + // shortest lived term will be replaced by the longest lived. This tends to + // expose more simplifications. + uint32_t LVN = VN.lookup_or_add(LHS); + if ((isa<Argument>(LHS) && isa<Argument>(RHS)) || + (isa<Instruction>(LHS) && isa<Instruction>(RHS))) { + // Move the 'oldest' value to the right-hand side, using the value number as + // a proxy for age. + uint32_t RVN = VN.lookup_or_add(RHS); + if (LVN < RVN) { + std::swap(LHS, RHS); + LVN = RVN; + } + } + assert((!isa<Instruction>(RHS) || + DT->properlyDominates(cast<Instruction>(RHS)->getParent(), Root)) && + "Instruction doesn't dominate scope!"); + + // If value numbering later deduces that an instruction in the scope is equal + // to 'LHS' then ensure it will be turned into 'RHS'. + addToLeaderTable(LVN, RHS, Root); + + // Replace all occurrences of 'LHS' with 'RHS' everywhere in the scope. As + // LHS always has at least one use that is not dominated by Root, this will + // never do anything if LHS has only one use. + if (!LHS->hasOneUse()) { + unsigned NumReplacements = replaceAllDominatedUsesWith(LHS, RHS, Root); + Changed |= NumReplacements > 0; + NumGVNEqProp += NumReplacements; } - } - - // If value numbering later deduces that an instruction in the scope is equal - // to 'LHS' then ensure it will be turned into 'RHS'. - addToLeaderTable(LVN, RHS, Root); - // Replace all occurrences of 'LHS' with 'RHS' everywhere in the scope. As - // LHS always has at least one use that is not dominated by Root, this will - // never do anything if LHS has only one use. - bool Changed = false; - if (!LHS->hasOneUse()) { - unsigned NumReplacements = replaceAllDominatedUsesWith(LHS, RHS, Root); - Changed |= NumReplacements > 0; - NumGVNEqProp += NumReplacements; - } - - // Now try to deduce additional equalities from this one. For example, if the - // known equality was "(A != B)" == "false" then it follows that A and B are - // equal in the scope. Only boolean equalities with an explicit true or false - // RHS are currently supported. - if (!RHS->getType()->isIntegerTy(1)) - // Not a boolean equality - bail out. - return Changed; - ConstantInt *CI = dyn_cast<ConstantInt>(RHS); - if (!CI) - // RHS neither 'true' nor 'false' - bail out. - return Changed; - // Whether RHS equals 'true'. Otherwise it equals 'false'. - bool isKnownTrue = CI->isAllOnesValue(); - bool isKnownFalse = !isKnownTrue; - - // If "A && B" is known true then both A and B are known true. If "A || B" - // is known false then both A and B are known false. - Value *A, *B; - if ((isKnownTrue && match(LHS, m_And(m_Value(A), m_Value(B)))) || - (isKnownFalse && match(LHS, m_Or(m_Value(A), m_Value(B))))) { - Changed |= propagateEquality(A, RHS, Root); - Changed |= propagateEquality(B, RHS, Root); - return Changed; - } + // Now try to deduce additional equalities from this one. For example, if the + // known equality was "(A != B)" == "false" then it follows that A and B are + // equal in the scope. Only boolean equalities with an explicit true or false + // RHS are currently supported. + if (!RHS->getType()->isIntegerTy(1)) + // Not a boolean equality - bail out. + continue; + ConstantInt *CI = dyn_cast<ConstantInt>(RHS); + if (!CI) + // RHS neither 'true' nor 'false' - bail out. + continue; + // Whether RHS equals 'true'. Otherwise it equals 'false'. + bool isKnownTrue = CI->isAllOnesValue(); + bool isKnownFalse = !isKnownTrue; + + // If "A && B" is known true then both A and B are known true. If "A || B" + // is known false then both A and B are known false. + Value *A, *B; + if ((isKnownTrue && match(LHS, m_And(m_Value(A), m_Value(B)))) || + (isKnownFalse && match(LHS, m_Or(m_Value(A), m_Value(B))))) { + Worklist.push_back(std::make_pair(A, RHS)); + Worklist.push_back(std::make_pair(B, RHS)); + continue; + } - // If we are propagating an equality like "(A == B)" == "true" then also - // propagate the equality A == B. When propagating a comparison such as - // "(A >= B)" == "true", replace all instances of "A < B" with "false". - if (ICmpInst *Cmp = dyn_cast<ICmpInst>(LHS)) { - Value *Op0 = Cmp->getOperand(0), *Op1 = Cmp->getOperand(1); - - // If "A == B" is known true, or "A != B" is known false, then replace - // A with B everywhere in the scope. - if ((isKnownTrue && Cmp->getPredicate() == CmpInst::ICMP_EQ) || - (isKnownFalse && Cmp->getPredicate() == CmpInst::ICMP_NE)) - Changed |= propagateEquality(Op0, Op1, Root); - - // If "A >= B" is known true, replace "A < B" with false everywhere. - CmpInst::Predicate NotPred = Cmp->getInversePredicate(); - Constant *NotVal = ConstantInt::get(Cmp->getType(), isKnownFalse); - // Since we don't have the instruction "A < B" immediately to hand, work out - // the value number that it would have and use that to find an appropriate - // instruction (if any). - uint32_t NextNum = VN.getNextUnusedValueNumber(); - uint32_t Num = VN.lookup_or_add_cmp(Cmp->getOpcode(), NotPred, Op0, Op1); - // If the number we were assigned was brand new then there is no point in - // looking for an instruction realizing it: there cannot be one! - if (Num < NextNum) { - Value *NotCmp = findLeader(Root, Num); - if (NotCmp && isa<Instruction>(NotCmp)) { - unsigned NumReplacements = - replaceAllDominatedUsesWith(NotCmp, NotVal, Root); - Changed |= NumReplacements > 0; - NumGVNEqProp += NumReplacements; + // If we are propagating an equality like "(A == B)" == "true" then also + // propagate the equality A == B. When propagating a comparison such as + // "(A >= B)" == "true", replace all instances of "A < B" with "false". + if (ICmpInst *Cmp = dyn_cast<ICmpInst>(LHS)) { + Value *Op0 = Cmp->getOperand(0), *Op1 = Cmp->getOperand(1); + + // If "A == B" is known true, or "A != B" is known false, then replace + // A with B everywhere in the scope. + if ((isKnownTrue && Cmp->getPredicate() == CmpInst::ICMP_EQ) || + (isKnownFalse && Cmp->getPredicate() == CmpInst::ICMP_NE)) + Worklist.push_back(std::make_pair(Op0, Op1)); + + // If "A >= B" is known true, replace "A < B" with false everywhere. + CmpInst::Predicate NotPred = Cmp->getInversePredicate(); + Constant *NotVal = ConstantInt::get(Cmp->getType(), isKnownFalse); + // Since we don't have the instruction "A < B" immediately to hand, work out + // the value number that it would have and use that to find an appropriate + // instruction (if any). + uint32_t NextNum = VN.getNextUnusedValueNumber(); + uint32_t Num = VN.lookup_or_add_cmp(Cmp->getOpcode(), NotPred, Op0, Op1); + // If the number we were assigned was brand new then there is no point in + // looking for an instruction realizing it: there cannot be one! + if (Num < NextNum) { + Value *NotCmp = findLeader(Root, Num); + if (NotCmp && isa<Instruction>(NotCmp)) { + unsigned NumReplacements = + replaceAllDominatedUsesWith(NotCmp, NotVal, Root); + Changed |= NumReplacements > 0; + NumGVNEqProp += NumReplacements; + } } - } - // Ensure that any instruction in scope that gets the "A < B" value number - // is replaced with false. - addToLeaderTable(Num, NotVal, Root); + // Ensure that any instruction in scope that gets the "A < B" value number + // is replaced with false. + addToLeaderTable(Num, NotVal, Root); - return Changed; + continue; + } } return Changed; @@ -2325,7 +2335,14 @@ bool GVN::performPRE(Function &F) { CurInst->mayReadFromMemory() || CurInst->mayHaveSideEffects() || isa<DbgInfoIntrinsic>(CurInst)) continue; - + + // Don't do PRE on compares. The PHI would prevent CodeGenPrepare from + // sinking the compare again, and it would force the code generator to + // move the i1 from processor flags or predicate registers into a general + // purpose register. + if (isa<CmpInst>(CurInst)) + continue; + // We don't currently value number ANY inline asm calls. if (CallInst *CallI = dyn_cast<CallInst>(CurInst)) if (CallI->isInlineAsm()) diff --git a/lib/Transforms/Scalar/IndVarSimplify.cpp b/lib/Transforms/Scalar/IndVarSimplify.cpp index 490617a..a9ba657 100644 --- a/lib/Transforms/Scalar/IndVarSimplify.cpp +++ b/lib/Transforms/Scalar/IndVarSimplify.cpp @@ -33,7 +33,6 @@ #include "llvm/LLVMContext.h" #include "llvm/Type.h" #include "llvm/Analysis/Dominators.h" -#include "llvm/Analysis/IVUsers.h" #include "llvm/Analysis/ScalarEvolutionExpander.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/LoopPass.h" @@ -50,18 +49,12 @@ #include "llvm/ADT/Statistic.h" using namespace llvm; -STATISTIC(NumRemoved , "Number of aux indvars removed"); STATISTIC(NumWidened , "Number of indvars widened"); -STATISTIC(NumInserted , "Number of canonical indvars added"); STATISTIC(NumReplaced , "Number of exit values replaced"); STATISTIC(NumLFTR , "Number of loop exit tests replaced"); STATISTIC(NumElimExt , "Number of IV sign/zero extends eliminated"); STATISTIC(NumElimIV , "Number of congruent IVs eliminated"); -static cl::opt<bool> EnableIVRewrite( - "enable-iv-rewrite", cl::Hidden, - cl::desc("Enable canonical induction variable rewriting")); - // Trip count verification can be enabled by default under NDEBUG if we // implement a strong expression equivalence checker in SCEV. Until then, we // use the verify-indvars flag, which may assert in some cases. @@ -71,7 +64,6 @@ static cl::opt<bool> VerifyIndvars( namespace { class IndVarSimplify : public LoopPass { - IVUsers *IU; LoopInfo *LI; ScalarEvolution *SE; DominatorTree *DT; @@ -82,7 +74,7 @@ namespace { public: static char ID; // Pass identification, replacement for typeid - IndVarSimplify() : LoopPass(ID), IU(0), LI(0), SE(0), DT(0), TD(0), + IndVarSimplify() : LoopPass(ID), LI(0), SE(0), DT(0), TD(0), Changed(false) { initializeIndVarSimplifyPass(*PassRegistry::getPassRegistry()); } @@ -95,13 +87,9 @@ namespace { AU.addRequired<ScalarEvolution>(); AU.addRequiredID(LoopSimplifyID); AU.addRequiredID(LCSSAID); - if (EnableIVRewrite) - AU.addRequired<IVUsers>(); AU.addPreserved<ScalarEvolution>(); AU.addPreservedID(LoopSimplifyID); AU.addPreservedID(LCSSAID); - if (EnableIVRewrite) - AU.addPreserved<IVUsers>(); AU.setPreservesCFG(); } @@ -119,8 +107,6 @@ namespace { void RewriteLoopExitValues(Loop *L, SCEVExpander &Rewriter); - void RewriteIVExpressions(Loop *L, SCEVExpander &Rewriter); - Value *LinearFunctionTestReplace(Loop *L, const SCEV *BackedgeTakenCount, PHINode *IndVar, SCEVExpander &Rewriter); @@ -136,7 +122,6 @@ INITIALIZE_PASS_DEPENDENCY(LoopInfo) INITIALIZE_PASS_DEPENDENCY(ScalarEvolution) INITIALIZE_PASS_DEPENDENCY(LoopSimplify) INITIALIZE_PASS_DEPENDENCY(LCSSA) -INITIALIZE_PASS_DEPENDENCY(IVUsers) INITIALIZE_PASS_END(IndVarSimplify, "indvars", "Induction Variable Simplification", false, false) @@ -448,13 +433,6 @@ void IndVarSimplify::HandleFloatingPointIV(Loop *L, PHINode *PN) { PN->replaceAllUsesWith(Conv); RecursivelyDeleteTriviallyDeadInstructions(PN); } - - // Add a new IVUsers entry for the newly-created integer PHI. - if (IU) { - SmallPtrSet<Loop*, 16> SimplifiedLoopNests; - IU->AddUsersIfInteresting(NewPHI, SimplifiedLoopNests); - } - Changed = true; } @@ -600,124 +578,6 @@ void IndVarSimplify::RewriteLoopExitValues(Loop *L, SCEVExpander &Rewriter) { } //===----------------------------------------------------------------------===// -// Rewrite IV users based on a canonical IV. -// Only for use with -enable-iv-rewrite. -//===----------------------------------------------------------------------===// - -/// FIXME: It is an extremely bad idea to indvar substitute anything more -/// complex than affine induction variables. Doing so will put expensive -/// polynomial evaluations inside of the loop, and the str reduction pass -/// currently can only reduce affine polynomials. For now just disable -/// indvar subst on anything more complex than an affine addrec, unless -/// it can be expanded to a trivial value. -static bool isSafe(const SCEV *S, const Loop *L, ScalarEvolution *SE) { - // Loop-invariant values are safe. - if (SE->isLoopInvariant(S, L)) return true; - - // Affine addrecs are safe. Non-affine are not, because LSR doesn't know how - // to transform them into efficient code. - if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) - return AR->isAffine(); - - // An add is safe it all its operands are safe. - if (const SCEVCommutativeExpr *Commutative - = dyn_cast<SCEVCommutativeExpr>(S)) { - for (SCEVCommutativeExpr::op_iterator I = Commutative->op_begin(), - E = Commutative->op_end(); I != E; ++I) - if (!isSafe(*I, L, SE)) return false; - return true; - } - - // A cast is safe if its operand is. - if (const SCEVCastExpr *C = dyn_cast<SCEVCastExpr>(S)) - return isSafe(C->getOperand(), L, SE); - - // A udiv is safe if its operands are. - if (const SCEVUDivExpr *UD = dyn_cast<SCEVUDivExpr>(S)) - return isSafe(UD->getLHS(), L, SE) && - isSafe(UD->getRHS(), L, SE); - - // SCEVUnknown is always safe. - if (isa<SCEVUnknown>(S)) - return true; - - // Nothing else is safe. - return false; -} - -void IndVarSimplify::RewriteIVExpressions(Loop *L, SCEVExpander &Rewriter) { - // Rewrite all induction variable expressions in terms of the canonical - // induction variable. - // - // If there were induction variables of other sizes or offsets, manually - // add the offsets to the primary induction variable and cast, avoiding - // the need for the code evaluation methods to insert induction variables - // of different sizes. - for (IVUsers::iterator UI = IU->begin(), E = IU->end(); UI != E; ++UI) { - Value *Op = UI->getOperandValToReplace(); - Type *UseTy = Op->getType(); - Instruction *User = UI->getUser(); - - // Compute the final addrec to expand into code. - const SCEV *AR = IU->getReplacementExpr(*UI); - - // Evaluate the expression out of the loop, if possible. - if (!L->contains(UI->getUser())) { - const SCEV *ExitVal = SE->getSCEVAtScope(AR, L->getParentLoop()); - if (SE->isLoopInvariant(ExitVal, L)) - AR = ExitVal; - } - - // FIXME: It is an extremely bad idea to indvar substitute anything more - // complex than affine induction variables. Doing so will put expensive - // polynomial evaluations inside of the loop, and the str reduction pass - // currently can only reduce affine polynomials. For now just disable - // indvar subst on anything more complex than an affine addrec, unless - // it can be expanded to a trivial value. - if (!isSafe(AR, L, SE)) - continue; - - // Determine the insertion point for this user. By default, insert - // immediately before the user. The SCEVExpander class will automatically - // hoist loop invariants out of the loop. For PHI nodes, there may be - // multiple uses, so compute the nearest common dominator for the - // incoming blocks. - Instruction *InsertPt = getInsertPointForUses(User, Op, DT); - - // Now expand it into actual Instructions and patch it into place. - Value *NewVal = Rewriter.expandCodeFor(AR, UseTy, InsertPt); - - DEBUG(dbgs() << "INDVARS: Rewrote IV '" << *AR << "' " << *Op << '\n' - << " into = " << *NewVal << "\n"); - - if (!isValidRewrite(Op, NewVal)) { - DeadInsts.push_back(NewVal); - continue; - } - // Inform ScalarEvolution that this value is changing. The change doesn't - // affect its value, but it does potentially affect which use lists the - // value will be on after the replacement, which affects ScalarEvolution's - // ability to walk use lists and drop dangling pointers when a value is - // deleted. - SE->forgetValue(User); - - // Patch the new value into place. - if (Op->hasName()) - NewVal->takeName(Op); - if (Instruction *NewValI = dyn_cast<Instruction>(NewVal)) - NewValI->setDebugLoc(User->getDebugLoc()); - User->replaceUsesOfWith(Op, NewVal); - UI->setOperandValToReplace(NewVal); - - ++NumRemoved; - Changed = true; - - // The old value may be dead now. - DeadInsts.push_back(Op); - } -} - -//===----------------------------------------------------------------------===// // IV Widening - Extend the width of an IV to cover its widest uses. //===----------------------------------------------------------------------===// @@ -1262,9 +1122,6 @@ static bool isHighCostExpansion(const SCEV *S, BranchInst *BI, } } - if (EnableIVRewrite) - return false; - // Recurse past add expressions, which commonly occur in the // BackedgeTakenCount. They may already exist in program code, and if not, // they are not too expensive rematerialize. @@ -1321,36 +1178,6 @@ static bool canExpandBackedgeTakenCount(Loop *L, ScalarEvolution *SE) { return true; } -/// getBackedgeIVType - Get the widest type used by the loop test after peeking -/// through Truncs. -/// -/// TODO: Unnecessary when ForceLFTR is removed. -static Type *getBackedgeIVType(Loop *L) { - if (!L->getExitingBlock()) - return 0; - - // Can't rewrite non-branch yet. - BranchInst *BI = dyn_cast<BranchInst>(L->getExitingBlock()->getTerminator()); - if (!BI) - return 0; - - ICmpInst *Cond = dyn_cast<ICmpInst>(BI->getCondition()); - if (!Cond) - return 0; - - Type *Ty = 0; - for(User::op_iterator OI = Cond->op_begin(), OE = Cond->op_end(); - OI != OE; ++OI) { - assert((!Ty || Ty == (*OI)->getType()) && "bad icmp operand types"); - TruncInst *Trunc = dyn_cast<TruncInst>(*OI); - if (!Trunc) - continue; - - return Trunc->getSrcTy(); - } - return Ty; -} - /// getLoopPhiForCounter - Return the loop header phi IFF IncV adds a loop /// invariant value to the phi. static PHINode *getLoopPhiForCounter(Value *IncV, Loop *L, DominatorTree *DT) { @@ -1619,8 +1446,7 @@ LinearFunctionTestReplace(Loop *L, // LFTR can ignore IV overflow and truncate to the width of // BECount. This avoids materializing the add(zext(add)) expression. - Type *CntTy = !EnableIVRewrite ? - BackedgeTakenCount->getType() : IndVar->getType(); + Type *CntTy = BackedgeTakenCount->getType(); const SCEV *IVCount = BackedgeTakenCount; @@ -1805,8 +1631,6 @@ bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) { if (!L->isLoopSimplifyForm()) return false; - if (EnableIVRewrite) - IU = &getAnalysis<IVUsers>(); LI = &getAnalysis<LoopInfo>(); SE = &getAnalysis<ScalarEvolution>(); DT = &getAnalysis<DominatorTree>(); @@ -1833,10 +1657,8 @@ bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) { // attempt to avoid evaluating SCEVs for sign/zero extend operations until // other expressions involving loop IVs have been evaluated. This helps SCEV // set no-wrap flags before normalizing sign/zero extension. - if (!EnableIVRewrite) { - Rewriter.disableCanonicalMode(); - SimplifyAndExtend(L, Rewriter, LPM); - } + Rewriter.disableCanonicalMode(); + SimplifyAndExtend(L, Rewriter, LPM); // Check to see if this loop has a computable loop-invariant execution count. // If so, this means that we can compute the final value of any expressions @@ -1847,106 +1669,28 @@ bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) { if (!isa<SCEVCouldNotCompute>(BackedgeTakenCount)) RewriteLoopExitValues(L, Rewriter); - // Eliminate redundant IV users. - if (EnableIVRewrite) - Changed |= simplifyIVUsers(IU, SE, &LPM, DeadInsts); - // Eliminate redundant IV cycles. - if (!EnableIVRewrite) - NumElimIV += Rewriter.replaceCongruentIVs(L, DT, DeadInsts); - - // Compute the type of the largest recurrence expression, and decide whether - // a canonical induction variable should be inserted. - Type *LargestType = 0; - bool NeedCannIV = false; - bool ExpandBECount = canExpandBackedgeTakenCount(L, SE); - if (EnableIVRewrite && ExpandBECount) { - // If we have a known trip count and a single exit block, we'll be - // rewriting the loop exit test condition below, which requires a - // canonical induction variable. - NeedCannIV = true; - Type *Ty = BackedgeTakenCount->getType(); - if (!EnableIVRewrite) { - // In this mode, SimplifyIVUsers may have already widened the IV used by - // the backedge test and inserted a Trunc on the compare's operand. Get - // the wider type to avoid creating a redundant narrow IV only used by the - // loop test. - LargestType = getBackedgeIVType(L); - } - if (!LargestType || - SE->getTypeSizeInBits(Ty) > - SE->getTypeSizeInBits(LargestType)) - LargestType = SE->getEffectiveSCEVType(Ty); - } - if (EnableIVRewrite) { - for (IVUsers::const_iterator I = IU->begin(), E = IU->end(); I != E; ++I) { - NeedCannIV = true; - Type *Ty = - SE->getEffectiveSCEVType(I->getOperandValToReplace()->getType()); - if (!LargestType || - SE->getTypeSizeInBits(Ty) > - SE->getTypeSizeInBits(LargestType)) - LargestType = Ty; - } - } - - // Now that we know the largest of the induction variable expressions - // in this loop, insert a canonical induction variable of the largest size. - PHINode *IndVar = 0; - if (NeedCannIV) { - // Check to see if the loop already has any canonical-looking induction - // variables. If any are present and wider than the planned canonical - // induction variable, temporarily remove them, so that the Rewriter - // doesn't attempt to reuse them. - SmallVector<PHINode *, 2> OldCannIVs; - while (PHINode *OldCannIV = L->getCanonicalInductionVariable()) { - if (SE->getTypeSizeInBits(OldCannIV->getType()) > - SE->getTypeSizeInBits(LargestType)) - OldCannIV->removeFromParent(); - else - break; - OldCannIVs.push_back(OldCannIV); - } - - IndVar = Rewriter.getOrInsertCanonicalInductionVariable(L, LargestType); - - ++NumInserted; - Changed = true; - DEBUG(dbgs() << "INDVARS: New CanIV: " << *IndVar << '\n'); + NumElimIV += Rewriter.replaceCongruentIVs(L, DT, DeadInsts); - // Now that the official induction variable is established, reinsert - // any old canonical-looking variables after it so that the IR remains - // consistent. They will be deleted as part of the dead-PHI deletion at - // the end of the pass. - while (!OldCannIVs.empty()) { - PHINode *OldCannIV = OldCannIVs.pop_back_val(); - OldCannIV->insertBefore(L->getHeader()->getFirstInsertionPt()); - } - } - else if (!EnableIVRewrite && ExpandBECount && needsLFTR(L, DT)) { - IndVar = FindLoopCounter(L, BackedgeTakenCount, SE, DT, TD); - } // If we have a trip count expression, rewrite the loop's exit condition // using it. We can currently only handle loops with a single exit. - Value *NewICmp = 0; - if (ExpandBECount && IndVar) { - // Check preconditions for proper SCEVExpander operation. SCEV does not - // express SCEVExpander's dependencies, such as LoopSimplify. Instead any - // pass that uses the SCEVExpander must do it. This does not work well for - // loop passes because SCEVExpander makes assumptions about all loops, while - // LoopPassManager only forces the current loop to be simplified. - // - // FIXME: SCEV expansion has no way to bail out, so the caller must - // explicitly check any assumptions made by SCEV. Brittle. - const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(BackedgeTakenCount); - if (!AR || AR->getLoop()->getLoopPreheader()) - NewICmp = - LinearFunctionTestReplace(L, BackedgeTakenCount, IndVar, Rewriter); + if (canExpandBackedgeTakenCount(L, SE) && needsLFTR(L, DT)) { + PHINode *IndVar = FindLoopCounter(L, BackedgeTakenCount, SE, DT, TD); + if (IndVar) { + // Check preconditions for proper SCEVExpander operation. SCEV does not + // express SCEVExpander's dependencies, such as LoopSimplify. Instead any + // pass that uses the SCEVExpander must do it. This does not work well for + // loop passes because SCEVExpander makes assumptions about all loops, while + // LoopPassManager only forces the current loop to be simplified. + // + // FIXME: SCEV expansion has no way to bail out, so the caller must + // explicitly check any assumptions made by SCEV. Brittle. + const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(BackedgeTakenCount); + if (!AR || AR->getLoop()->getLoopPreheader()) + (void)LinearFunctionTestReplace(L, BackedgeTakenCount, IndVar, + Rewriter); + } } - // Rewrite IV-derived expressions. - if (EnableIVRewrite) - RewriteIVExpressions(L, Rewriter); - // Clear the rewriter cache, because values that are in the rewriter's cache // can be deleted in the loop below, causing the AssertingVH in the cache to // trigger. @@ -1965,16 +1709,6 @@ bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) { // loop may be sunk below the loop to reduce register pressure. SinkUnusedInvariants(L); - // For completeness, inform IVUsers of the IV use in the newly-created - // loop exit test instruction. - if (IU && NewICmp) { - ICmpInst *NewICmpInst = dyn_cast<ICmpInst>(NewICmp); - if (NewICmpInst) { - SmallPtrSet<Loop*, 16> SimplifiedLoopNests; - IU->AddUsersIfInteresting(cast<Instruction>(NewICmpInst->getOperand(0)), - SimplifiedLoopNests); - } - } // Clean up dead instructions. Changed |= DeleteDeadPHIs(L->getHeader()); // Check a post-condition. @@ -1984,8 +1718,7 @@ bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) { // Verify that LFTR, and any other change have not interfered with SCEV's // ability to compute trip count. #ifndef NDEBUG - if (!EnableIVRewrite && VerifyIndvars && - !isa<SCEVCouldNotCompute>(BackedgeTakenCount)) { + if (VerifyIndvars && !isa<SCEVCouldNotCompute>(BackedgeTakenCount)) { SE->forgetLoop(L); const SCEV *NewBECount = SE->getBackedgeTakenCount(L); if (SE->getTypeSizeInBits(BackedgeTakenCount->getType()) < diff --git a/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/lib/Transforms/Scalar/LoopStrengthReduce.cpp index 82d918e..fe4700b 100644 --- a/lib/Transforms/Scalar/LoopStrengthReduce.cpp +++ b/lib/Transforms/Scalar/LoopStrengthReduce.cpp @@ -77,11 +77,11 @@ #include <algorithm> using namespace llvm; -static cl::opt<bool> EnableNested( - "enable-lsr-nested", cl::Hidden, cl::desc("Enable LSR on nested loops")); - -static cl::opt<bool> EnableRetry( - "enable-lsr-retry", cl::Hidden, cl::desc("Enable LSR retry")); +/// MaxIVUsers is an arbitrary threshold that provides an early opportunitiy for +/// bail out. This threshold is far beyond the number of users that LSR can +/// conceivably solve, so it should not affect generated code, but catches the +/// worst cases before LSR burns too much compile time and stack space. +static const unsigned MaxIVUsers = 200; // Temporary flag to cleanup congruent phis after LSR phi expansion. // It's currently disabled until we can determine whether it's truly useful or @@ -710,8 +710,9 @@ static bool isHighCostExpansion(const SCEV *S, Value *UVal = U->getValue(); for (Value::use_iterator UI = UVal->use_begin(), UE = UVal->use_end(); UI != UE; ++UI) { - Instruction *User = cast<Instruction>(*UI); - if (User->getOpcode() == Instruction::Mul + // If U is a constant, it may be used by a ConstantExpr. + Instruction *User = dyn_cast<Instruction>(*UI); + if (User && User->getOpcode() == Instruction::Mul && SE.isSCEVable(User->getType())) { return SE.getSCEV(User) == Mul; } @@ -824,36 +825,20 @@ void Cost::RateRegister(const SCEV *Reg, const Loop *L, ScalarEvolution &SE, DominatorTree &DT) { if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Reg)) { - if (AR->getLoop() == L) - AddRecCost += 1; /// TODO: This should be a function of the stride. - // If this is an addrec for another loop, don't second-guess its addrec phi // nodes. LSR isn't currently smart enough to reason about more than one - // loop at a time. LSR has either already run on inner loops, will not run - // on other loops, and cannot be expected to change sibling loops. If the - // AddRec exists, consider it's register free and leave it alone. Otherwise, - // do not consider this formula at all. - else if (!EnableNested || L->contains(AR->getLoop()) || - (!AR->getLoop()->contains(L) && - DT.dominates(L->getHeader(), AR->getLoop()->getHeader()))) { + // loop at a time. LSR has already run on inner loops, will not run on outer + // loops, and cannot be expected to change sibling loops. + if (AR->getLoop() != L) { + // If the AddRec exists, consider it's register free and leave it alone. if (isExistingPhi(AR, SE)) return; - // For !EnableNested, never rewrite IVs in other loops. - if (!EnableNested) { - Loose(); - return; - } - // If this isn't one of the addrecs that the loop already has, it - // would require a costly new phi and add. TODO: This isn't - // precisely modeled right now. - ++NumBaseAdds; - if (!Regs.count(AR->getStart())) { - RateRegister(AR->getStart(), Regs, L, SE, DT); - if (isLoser()) - return; - } + // Otherwise, do not consider this formula at all. + Loose(); + return; } + AddRecCost += 1; /// TODO: This should be a function of the stride. // Add the step value register, if it needs one. // TODO: The non-affine case isn't precisely modeled here. @@ -1303,10 +1288,19 @@ static bool isLegalUse(const TargetLowering::AddrMode &AM, // If we have low-level target information, ask the target if it can fold an // integer immediate on an icmp. if (AM.BaseOffs != 0) { - if (TLI) return TLI->isLegalICmpImmediate(-(uint64_t)AM.BaseOffs); - return false; + if (!TLI) + return false; + // We have one of: + // ICmpZero BaseReg + Offset => ICmp BaseReg, -Offset + // ICmpZero -1*ScaleReg + Offset => ICmp ScaleReg, Offset + // Offs is the ICmp immediate. + int64_t Offs = AM.BaseOffs; + if (AM.Scale == 0) + Offs = -(uint64_t)Offs; // The cast does the right thing with INT64_MIN. + return TLI->isLegalICmpImmediate(Offs); } + // ICmpZero BaseReg + -1*ScaleReg => ICmp BaseReg, ScaleReg return true; case LSRUse::Basic: @@ -2193,7 +2187,7 @@ void LSRInstance::CollectInterestingTypesAndFactors() { do { const SCEV *S = Worklist.pop_back_val(); if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) { - if (EnableNested || AR->getLoop() == L) + if (AR->getLoop() == L) Strides.insert(AR->getStepRecurrence(SE)); Worklist.push_back(AR->getStart()); } else if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) { @@ -2463,7 +2457,7 @@ void LSRInstance::ChainInstruction(Instruction *UserInst, Instruction *IVOper, if (!isCompatibleIVType(PrevIV, NextIV)) continue; - // A phi nodes terminates a chain. + // A phi node terminates a chain. if (isa<PHINode>(UserInst) && isa<PHINode>(IVChainVec[ChainIdx].back().UserInst)) continue; @@ -2519,13 +2513,14 @@ void LSRInstance::ChainInstruction(Instruction *UserInst, Instruction *IVOper, for (Value::use_iterator UseIter = IVOper->use_begin(), UseEnd = IVOper->use_end(); UseIter != UseEnd; ++UseIter) { Instruction *OtherUse = dyn_cast<Instruction>(*UseIter); + if (!OtherUse || OtherUse == UserInst) + continue; if (SE.isSCEVable(OtherUse->getType()) && !isa<SCEVUnknown>(SE.getSCEV(OtherUse)) && IU.isIVUserOrOperand(OtherUse)) { continue; } - if (OtherUse && OtherUse != UserInst) - NearUsers.insert(OtherUse); + NearUsers.insert(OtherUse); } // Since this user is part of the chain, it's no longer considered a use @@ -3986,24 +3981,29 @@ void LSRInstance::SolveRecurse(SmallVectorImpl<const Formula *> &Solution, if (LU.Regs.count(*I)) ReqRegs.insert(*I); - bool AnySatisfiedReqRegs = false; SmallPtrSet<const SCEV *, 16> NewRegs; Cost NewCost; -retry: for (SmallVectorImpl<Formula>::const_iterator I = LU.Formulae.begin(), E = LU.Formulae.end(); I != E; ++I) { const Formula &F = *I; // Ignore formulae which do not use any of the required registers. + bool SatisfiedReqReg = true; for (SmallSetVector<const SCEV *, 4>::const_iterator J = ReqRegs.begin(), JE = ReqRegs.end(); J != JE; ++J) { const SCEV *Reg = *J; if ((!F.ScaledReg || F.ScaledReg != Reg) && std::find(F.BaseRegs.begin(), F.BaseRegs.end(), Reg) == - F.BaseRegs.end()) - goto skip; + F.BaseRegs.end()) { + SatisfiedReqReg = false; + break; + } + } + if (!SatisfiedReqReg) { + // If none of the formulae satisfied the required registers, then we could + // clear ReqRegs and try again. Currently, we simply give up in this case. + continue; } - AnySatisfiedReqRegs = true; // Evaluate the cost of the current formula. If it's already worse than // the current best, prune the search at that point. @@ -4030,18 +4030,6 @@ retry: } Workspace.pop_back(); } - skip:; - } - - if (!EnableRetry && !AnySatisfiedReqRegs) - return; - - // If none of the formulae had all of the required registers, relax the - // constraint so that we don't exclude all formulae. - if (!AnySatisfiedReqRegs) { - assert(!ReqRegs.empty() && "Solver failed even without required registers"); - ReqRegs.clear(); - goto retry; } } @@ -4537,6 +4525,17 @@ LSRInstance::LSRInstance(const TargetLowering *tli, Loop *l, Pass *P) // If there's no interesting work to be done, bail early. if (IU.empty()) return; + // If there's too much analysis to be done, bail early. We won't be able to + // model the problem anyway. + unsigned NumUsers = 0; + for (IVUsers::const_iterator UI = IU.begin(), E = IU.end(); UI != E; ++UI) { + if (++NumUsers > MaxIVUsers) { + DEBUG(dbgs() << "LSR skipping loop, too many IV Users in " << *L + << "\n"); + return; + } + } + #ifndef NDEBUG // All dominating loops must have preheaders, or SCEVExpander may not be able // to materialize an AddRecExpr whose Start is an outer AddRecExpr. @@ -4566,7 +4565,7 @@ LSRInstance::LSRInstance(const TargetLowering *tli, Loop *l, Pass *P) if (IU.empty()) return; // Skip nested loops until we can model them better with formulae. - if (!EnableNested && !L->empty()) { + if (!L->empty()) { DEBUG(dbgs() << "LSR skipping outer loop " << *L << "\n"); return; } diff --git a/lib/Transforms/Scalar/LoopUnrollPass.cpp b/lib/Transforms/Scalar/LoopUnrollPass.cpp index 22dbfe3..09a186f 100644 --- a/lib/Transforms/Scalar/LoopUnrollPass.cpp +++ b/lib/Transforms/Scalar/LoopUnrollPass.cpp @@ -197,13 +197,13 @@ bool LoopUnroll::runOnLoop(Loop *L, LPPassManager &LPM) { } if (TripCount) { // Reduce unroll count to be modulo of TripCount for partial unrolling - Count = CurrentThreshold / LoopSize; + Count = Threshold / LoopSize; while (Count != 0 && TripCount%Count != 0) Count--; } else if (UnrollRuntime) { // Reduce unroll count to be a lower power-of-two value - while (Count != 0 && Size > CurrentThreshold) { + while (Count != 0 && Size > Threshold) { Count >>= 1; Size = LoopSize*Count; } diff --git a/lib/Transforms/Scalar/LoopUnswitch.cpp b/lib/Transforms/Scalar/LoopUnswitch.cpp index 053eb0c..00ecc74 100644 --- a/lib/Transforms/Scalar/LoopUnswitch.cpp +++ b/lib/Transforms/Scalar/LoopUnswitch.cpp @@ -64,63 +64,63 @@ STATISTIC(TotalInsts, "Total number of instructions analyzed"); static cl::opt<unsigned> Threshold("loop-unswitch-threshold", cl::desc("Max loop size to unswitch"), cl::init(100), cl::Hidden); - + namespace { - + class LUAnalysisCache { typedef DenseMap<const SwitchInst*, SmallPtrSet<const Value *, 8> > UnswitchedValsMap; - + typedef UnswitchedValsMap::iterator UnswitchedValsIt; - + struct LoopProperties { unsigned CanBeUnswitchedCount; unsigned SizeEstimation; UnswitchedValsMap UnswitchedVals; }; - - // Here we use std::map instead of DenseMap, since we need to keep valid + + // Here we use std::map instead of DenseMap, since we need to keep valid // LoopProperties pointer for current loop for better performance. typedef std::map<const Loop*, LoopProperties> LoopPropsMap; typedef LoopPropsMap::iterator LoopPropsMapIt; - + LoopPropsMap LoopsProperties; UnswitchedValsMap* CurLoopInstructions; LoopProperties* CurrentLoopProperties; - + // Max size of code we can produce on remained iterations. unsigned MaxSize; - + public: - + LUAnalysisCache() : CurLoopInstructions(NULL), CurrentLoopProperties(NULL), MaxSize(Threshold) {} - + // Analyze loop. Check its size, calculate is it possible to unswitch // it. Returns true if we can unswitch this loop. bool countLoop(const Loop* L); - + // Clean all data related to given loop. void forgetLoop(const Loop* L); - + // Mark case value as unswitched. // Since SI instruction can be partly unswitched, in order to avoid // extra unswitching in cloned loops keep track all unswitched values. void setUnswitched(const SwitchInst* SI, const Value* V); - + // Check was this case value unswitched before or not. bool isUnswitched(const SwitchInst* SI, const Value* V); - + // Clone all loop-unswitch related loop properties. // Redistribute unswitching quotas. // Note, that new loop data is stored inside the VMap. void cloneData(const Loop* NewLoop, const Loop* OldLoop, const ValueToValueMapTy& VMap); }; - + class LoopUnswitch : public LoopPass { LoopInfo *LI; // Loop information LPPassManager *LPM; @@ -130,7 +130,7 @@ namespace { std::vector<Loop*> LoopProcessWorklist; LUAnalysisCache BranchesInfo; - + bool OptimizeForSize; bool redoLoop; @@ -138,9 +138,9 @@ namespace { DominatorTree *DT; BasicBlock *loopHeader; BasicBlock *loopPreheader; - + // LoopBlocks contains all of the basic blocks of the loop, including the - // preheader of the loop, the body of the loop, and the exit blocks of the + // preheader of the loop, the body of the loop, and the exit blocks of the // loop, in that order. std::vector<BasicBlock*> LoopBlocks; // NewBlocks contained cloned copy of basic blocks from LoopBlocks. @@ -148,8 +148,8 @@ namespace { public: static char ID; // Pass ID, replacement for typeid - explicit LoopUnswitch(bool Os = false) : - LoopPass(ID), OptimizeForSize(Os), redoLoop(false), + explicit LoopUnswitch(bool Os = false) : + LoopPass(ID), OptimizeForSize(Os), redoLoop(false), currentLoop(NULL), DT(NULL), loopHeader(NULL), loopPreheader(NULL) { initializeLoopUnswitchPass(*PassRegistry::getPassRegistry()); @@ -186,7 +186,7 @@ namespace { if (I != LoopProcessWorklist.end()) LoopProcessWorklist.erase(I); } - + void initLoopData() { loopHeader = currentLoop->getHeader(); loopPreheader = currentLoop->getLoopPreheader(); @@ -205,7 +205,7 @@ namespace { Constant *Val, bool isEqual); void EmitPreheaderBranchOnCondition(Value *LIC, Constant *Val, - BasicBlock *TrueDest, + BasicBlock *TrueDest, BasicBlock *FalseDest, Instruction *InsertPt); @@ -222,12 +222,12 @@ namespace { // Analyze loop. Check its size, calculate is it possible to unswitch // it. Returns true if we can unswitch this loop. bool LUAnalysisCache::countLoop(const Loop* L) { - + std::pair<LoopPropsMapIt, bool> InsertRes = LoopsProperties.insert(std::make_pair(L, LoopProperties())); - + LoopProperties& Props = InsertRes.first->second; - + if (InsertRes.second) { // New loop. @@ -235,39 +235,39 @@ bool LUAnalysisCache::countLoop(const Loop* L) { // expansion, and the number of basic blocks, to avoid loops with // large numbers of branches which cause loop unswitching to go crazy. // This is a very ad-hoc heuristic. - + // FIXME: This is overly conservative because it does not take into // consideration code simplification opportunities and code that can // be shared by the resultant unswitched loops. CodeMetrics Metrics; - for (Loop::block_iterator I = L->block_begin(), + for (Loop::block_iterator I = L->block_begin(), E = L->block_end(); I != E; ++I) - Metrics.analyzeBasicBlock(*I); + Metrics.analyzeBasicBlock(*I); Props.SizeEstimation = std::min(Metrics.NumInsts, Metrics.NumBlocks * 5); Props.CanBeUnswitchedCount = MaxSize / (Props.SizeEstimation); MaxSize -= Props.SizeEstimation * Props.CanBeUnswitchedCount; - } - + } + if (!Props.CanBeUnswitchedCount) { DEBUG(dbgs() << "NOT unswitching loop %" << L->getHeader()->getName() << ", cost too high: " << L->getBlocks().size() << "\n"); - + return false; } - + // Be careful. This links are good only before new loop addition. CurrentLoopProperties = &Props; CurLoopInstructions = &Props.UnswitchedVals; - + return true; } // Clean all data related to given loop. void LUAnalysisCache::forgetLoop(const Loop* L) { - + LoopPropsMapIt LIt = LoopsProperties.find(L); if (LIt != LoopsProperties.end()) { @@ -275,9 +275,9 @@ void LUAnalysisCache::forgetLoop(const Loop* L) { MaxSize += Props.CanBeUnswitchedCount * Props.SizeEstimation; LoopsProperties.erase(LIt); } - + CurrentLoopProperties = NULL; - CurLoopInstructions = NULL; + CurLoopInstructions = NULL; } // Mark case value as unswitched. @@ -289,7 +289,7 @@ void LUAnalysisCache::setUnswitched(const SwitchInst* SI, const Value* V) { // Check was this case value unswitched before or not. bool LUAnalysisCache::isUnswitched(const SwitchInst* SI, const Value* V) { - return (*CurLoopInstructions)[SI].count(V); + return (*CurLoopInstructions)[SI].count(V); } // Clone all loop-unswitch related loop properties. @@ -297,20 +297,20 @@ bool LUAnalysisCache::isUnswitched(const SwitchInst* SI, const Value* V) { // Note, that new loop data is stored inside the VMap. void LUAnalysisCache::cloneData(const Loop* NewLoop, const Loop* OldLoop, const ValueToValueMapTy& VMap) { - + LoopProperties& NewLoopProps = LoopsProperties[NewLoop]; LoopProperties& OldLoopProps = *CurrentLoopProperties; UnswitchedValsMap& Insts = OldLoopProps.UnswitchedVals; - + // Reallocate "can-be-unswitched quota" --OldLoopProps.CanBeUnswitchedCount; unsigned Quota = OldLoopProps.CanBeUnswitchedCount; NewLoopProps.CanBeUnswitchedCount = Quota / 2; OldLoopProps.CanBeUnswitchedCount = Quota - Quota / 2; - + NewLoopProps.SizeEstimation = OldLoopProps.SizeEstimation; - + // Clone unswitched values info: // for new loop switches we clone info about values that was // already unswitched and has redundant successors. @@ -319,7 +319,7 @@ void LUAnalysisCache::cloneData(const Loop* NewLoop, const Loop* OldLoop, Value* NewI = VMap.lookup(OldInst); const SwitchInst* NewInst = cast_or_null<SwitchInst>(NewI); assert(NewInst && "All instructions that are in SrcBB must be in VMap."); - + NewLoopProps.UnswitchedVals[NewInst] = OldLoopProps.UnswitchedVals[OldInst]; } } @@ -333,18 +333,18 @@ INITIALIZE_PASS_DEPENDENCY(LCSSA) INITIALIZE_PASS_END(LoopUnswitch, "loop-unswitch", "Unswitch loops", false, false) -Pass *llvm::createLoopUnswitchPass(bool Os) { - return new LoopUnswitch(Os); +Pass *llvm::createLoopUnswitchPass(bool Os) { + return new LoopUnswitch(Os); } /// FindLIVLoopCondition - Cond is a condition that occurs in L. If it is /// invariant in the loop, or has an invariant piece, return the invariant. /// Otherwise, return null. static Value *FindLIVLoopCondition(Value *Cond, Loop *L, bool &Changed) { - + // We started analyze new instruction, increment scanned instructions counter. ++TotalInsts; - + // We can never unswitch on vector conditions. if (Cond->getType()->isVectorTy()) return 0; @@ -369,7 +369,7 @@ static Value *FindLIVLoopCondition(Value *Cond, Loop *L, bool &Changed) { if (Value *RHS = FindLIVLoopCondition(BO->getOperand(1), L, Changed)) return RHS; } - + return 0; } @@ -394,19 +394,36 @@ bool LoopUnswitch::runOnLoop(Loop *L, LPPassManager &LPM_Ref) { return Changed; } -/// processCurrentLoop - Do actual work and unswitch loop if possible +/// processCurrentLoop - Do actual work and unswitch loop if possible /// and profitable. bool LoopUnswitch::processCurrentLoop() { bool Changed = false; initLoopData(); - + // If LoopSimplify was unable to form a preheader, don't do any unswitching. if (!loopPreheader) return false; - + + // Loops with indirectbr cannot be cloned. + if (!currentLoop->isSafeToClone()) + return false; + + // Loops with invokes, whose unwind edge escapes the loop, cannot be + // unswitched because splitting their edges are non-trivial and don't preserve + // loop simplify information. + for (Loop::block_iterator I = currentLoop->block_begin(), + E = currentLoop->block_end(); I != E; ++I) + if (const InvokeInst *II = dyn_cast<InvokeInst>((*I)->getTerminator())) + if (!currentLoop->contains(II->getUnwindDest())) + return false; + + // Without dedicated exits, splitting the exit edge may fail. + if (!currentLoop->hasDedicatedExits()) + return false; + LLVMContext &Context = loopHeader->getContext(); - + // Probably we reach the quota of branches for this loop. If so // stop unswitching. if (!BranchesInfo.countLoop(currentLoop)) @@ -415,7 +432,7 @@ bool LoopUnswitch::processCurrentLoop() { // Loop over all of the basic blocks in the loop. If we find an interior // block that is branching on a loop-invariant condition, we can unswitch this // loop. - for (Loop::block_iterator I = currentLoop->block_begin(), + for (Loop::block_iterator I = currentLoop->block_begin(), E = currentLoop->block_end(); I != E; ++I) { TerminatorInst *TI = (*I)->getTerminator(); if (BranchInst *BI = dyn_cast<BranchInst>(TI)) { @@ -424,24 +441,24 @@ bool LoopUnswitch::processCurrentLoop() { if (BI->isConditional()) { // See if this, or some part of it, is loop invariant. If so, we can // unswitch on it if we desire. - Value *LoopCond = FindLIVLoopCondition(BI->getCondition(), + Value *LoopCond = FindLIVLoopCondition(BI->getCondition(), currentLoop, Changed); - if (LoopCond && UnswitchIfProfitable(LoopCond, + if (LoopCond && UnswitchIfProfitable(LoopCond, ConstantInt::getTrue(Context))) { ++NumBranches; return true; } - } + } } else if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) { - Value *LoopCond = FindLIVLoopCondition(SI->getCondition(), + Value *LoopCond = FindLIVLoopCondition(SI->getCondition(), currentLoop, Changed); - unsigned NumCases = SI->getNumCases(); + unsigned NumCases = SI->getNumCases(); if (LoopCond && NumCases) { // Find a value to unswitch on: // FIXME: this should chose the most expensive case! // FIXME: scan for a case with a non-critical edge? Constant *UnswitchVal = NULL; - + // Do not process same value again and again. // At this point we have some cases already unswitched and // some not yet unswitched. Let's find the first not yet unswitched one. @@ -453,7 +470,7 @@ bool LoopUnswitch::processCurrentLoop() { break; } } - + if (!UnswitchVal) continue; @@ -463,14 +480,14 @@ bool LoopUnswitch::processCurrentLoop() { } } } - + // Scan the instructions to check for unswitchable values. - for (BasicBlock::iterator BBI = (*I)->begin(), E = (*I)->end(); + for (BasicBlock::iterator BBI = (*I)->begin(), E = (*I)->end(); BBI != E; ++BBI) if (SelectInst *SI = dyn_cast<SelectInst>(BBI)) { - Value *LoopCond = FindLIVLoopCondition(SI->getCondition(), + Value *LoopCond = FindLIVLoopCondition(SI->getCondition(), currentLoop, Changed); - if (LoopCond && UnswitchIfProfitable(LoopCond, + if (LoopCond && UnswitchIfProfitable(LoopCond, ConstantInt::getTrue(Context))) { ++NumSelects; return true; @@ -500,7 +517,7 @@ static bool isTrivialLoopExitBlockHelper(Loop *L, BasicBlock *BB, ExitBB = BB; return true; } - + // Otherwise, this is an unvisited intra-loop node. Check all successors. for (succ_iterator SI = succ_begin(BB), E = succ_end(BB); SI != E; ++SI) { // Check to see if the successor is a trivial loop exit. @@ -513,12 +530,12 @@ static bool isTrivialLoopExitBlockHelper(Loop *L, BasicBlock *BB, for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) if (I->mayHaveSideEffects()) return false; - + return true; } /// isTrivialLoopExitBlock - Return true if the specified block unconditionally -/// leads to an exit from the specified loop, and has no side-effects in the +/// leads to an exit from the specified loop, and has no side-effects in the /// process. If so, return the block that is exited to, otherwise return null. static BasicBlock *isTrivialLoopExitBlock(Loop *L, BasicBlock *BB) { std::set<BasicBlock*> Visited; @@ -546,39 +563,39 @@ bool LoopUnswitch::IsTrivialUnswitchCondition(Value *Cond, Constant **Val, BasicBlock *Header = currentLoop->getHeader(); TerminatorInst *HeaderTerm = Header->getTerminator(); LLVMContext &Context = Header->getContext(); - + BasicBlock *LoopExitBB = 0; if (BranchInst *BI = dyn_cast<BranchInst>(HeaderTerm)) { // If the header block doesn't end with a conditional branch on Cond, we // can't handle it. if (!BI->isConditional() || BI->getCondition() != Cond) return false; - - // Check to see if a successor of the branch is guaranteed to - // exit through a unique exit block without having any + + // Check to see if a successor of the branch is guaranteed to + // exit through a unique exit block without having any // side-effects. If so, determine the value of Cond that causes it to do // this. - if ((LoopExitBB = isTrivialLoopExitBlock(currentLoop, + if ((LoopExitBB = isTrivialLoopExitBlock(currentLoop, BI->getSuccessor(0)))) { if (Val) *Val = ConstantInt::getTrue(Context); - } else if ((LoopExitBB = isTrivialLoopExitBlock(currentLoop, + } else if ((LoopExitBB = isTrivialLoopExitBlock(currentLoop, BI->getSuccessor(1)))) { if (Val) *Val = ConstantInt::getFalse(Context); } } else if (SwitchInst *SI = dyn_cast<SwitchInst>(HeaderTerm)) { // If this isn't a switch on Cond, we can't handle it. if (SI->getCondition() != Cond) return false; - + // Check to see if a successor of the switch is guaranteed to go to the - // latch block or exit through a one exit block without having any + // latch block or exit through a one exit block without having any // side-effects. If so, determine the value of Cond that causes it to do - // this. + // this. // Note that we can't trivially unswitch on the default case or // on already unswitched cases. for (SwitchInst::CaseIt i = SI->case_begin(), e = SI->case_end(); i != e; ++i) { BasicBlock* LoopExitCandidate; - if ((LoopExitCandidate = isTrivialLoopExitBlock(currentLoop, + if ((LoopExitCandidate = isTrivialLoopExitBlock(currentLoop, i.getCaseSuccessor()))) { // Okay, we found a trivial case, remember the value that is trivial. ConstantInt* CaseVal = i.getCaseValue(); @@ -598,9 +615,9 @@ bool LoopUnswitch::IsTrivialUnswitchCondition(Value *Cond, Constant **Val, // contains phi nodes, this isn't trivial. if (!LoopExitBB || isa<PHINode>(LoopExitBB->begin())) return false; // Can't handle this. - + if (LoopExit) *LoopExit = LoopExitBB; - + // We already know that nothing uses any scalar values defined inside of this // loop. As such, we just have to check to see if this loop will execute any // side-effecting instructions (e.g. stores, calls, volatile loads) in the @@ -686,17 +703,17 @@ void LoopUnswitch::EmitPreheaderBranchOnCondition(Value *LIC, Constant *Val, /// UnswitchTrivialCondition - Given a loop that has a trivial unswitchable /// condition in it (a cond branch from its header block to its latch block, -/// where the path through the loop that doesn't execute its body has no +/// where the path through the loop that doesn't execute its body has no /// side-effects), unswitch it. This doesn't involve any code duplication, just /// moving the conditional branch outside of the loop and updating loop info. -void LoopUnswitch::UnswitchTrivialCondition(Loop *L, Value *Cond, - Constant *Val, +void LoopUnswitch::UnswitchTrivialCondition(Loop *L, Value *Cond, + Constant *Val, BasicBlock *ExitBlock) { DEBUG(dbgs() << "loop-unswitch: Trivial-Unswitch loop %" << loopHeader->getName() << " [" << L->getBlocks().size() << " blocks] in Function " << L->getHeader()->getParent()->getName() << " on cond: " << *Val << " == " << *Cond << "\n"); - + // First step, split the preheader, so that we know that there is a safe place // to insert the conditional branch. We will change loopPreheader to have a // conditional branch on Cond. @@ -705,24 +722,24 @@ void LoopUnswitch::UnswitchTrivialCondition(Loop *L, Value *Cond, // Now that we have a place to insert the conditional branch, create a place // to branch to: this is the exit block out of the loop that we should // short-circuit to. - + // Split this block now, so that the loop maintains its exit block, and so // that the jump from the preheader can execute the contents of the exit block // without actually branching to it (the exit block should be dominated by the // loop header, not the preheader). assert(!L->contains(ExitBlock) && "Exit block is in the loop?"); BasicBlock *NewExit = SplitBlock(ExitBlock, ExitBlock->begin(), this); - - // Okay, now we have a position to branch from and a position to branch to, + + // Okay, now we have a position to branch from and a position to branch to, // insert the new conditional branch. - EmitPreheaderBranchOnCondition(Cond, Val, NewExit, NewPH, + EmitPreheaderBranchOnCondition(Cond, Val, NewExit, NewPH, loopPreheader->getTerminator()); LPM->deleteSimpleAnalysisValue(loopPreheader->getTerminator(), L); loopPreheader->getTerminator()->eraseFromParent(); // We need to reprocess this loop, it could be unswitched again. redoLoop = true; - + // Now that we know that the loop is never entered when this condition is a // particular value, rewrite the loop with this info. We know that this will // at least eliminate the old branch. @@ -732,7 +749,7 @@ void LoopUnswitch::UnswitchTrivialCondition(Loop *L, Value *Cond, /// SplitExitEdges - Split all of the edges from inside the loop to their exit /// blocks. Update the appropriate Phi nodes as we do so. -void LoopUnswitch::SplitExitEdges(Loop *L, +void LoopUnswitch::SplitExitEdges(Loop *L, const SmallVector<BasicBlock *, 8> &ExitBlocks){ for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i) { @@ -752,10 +769,10 @@ void LoopUnswitch::SplitExitEdges(Loop *L, } } -/// UnswitchNontrivialCondition - We determined that the loop is profitable -/// to unswitch when LIC equal Val. Split it into loop versions and test the +/// UnswitchNontrivialCondition - We determined that the loop is profitable +/// to unswitch when LIC equal Val. Split it into loop versions and test the /// condition outside of either loop. Return the loops created as Out1/Out2. -void LoopUnswitch::UnswitchNontrivialCondition(Value *LIC, Constant *Val, +void LoopUnswitch::UnswitchNontrivialCondition(Value *LIC, Constant *Val, Loop *L) { Function *F = loopHeader->getParent(); DEBUG(dbgs() << "loop-unswitch: Unswitching loop %" @@ -798,7 +815,7 @@ void LoopUnswitch::UnswitchNontrivialCondition(Value *LIC, Constant *Val, ValueToValueMapTy VMap; for (unsigned i = 0, e = LoopBlocks.size(); i != e; ++i) { BasicBlock *NewBB = CloneBasicBlock(LoopBlocks[i], VMap, ".us", F); - + NewBlocks.push_back(NewBB); VMap[LoopBlocks[i]] = NewBB; // Keep the BB mapping. LPM->cloneBasicBlockSimpleAnalysis(LoopBlocks[i], NewBB, L); @@ -828,7 +845,7 @@ void LoopUnswitch::UnswitchNontrivialCondition(Value *LIC, Constant *Val, // The new exit block should be in the same loop as the old one. if (Loop *ExitBBLoop = LI->getLoopFor(ExitBlocks[i])) ExitBBLoop->addBasicBlockToLoop(NewExit, LI->getBase()); - + assert(NewExit->getTerminator()->getNumSuccessors() == 1 && "Exit block should have been split to have one successor!"); BasicBlock *ExitSucc = NewExit->getTerminator()->getSuccessor(0); @@ -863,7 +880,7 @@ void LoopUnswitch::UnswitchNontrivialCondition(Value *LIC, Constant *Val, for (BasicBlock::iterator I = NewBlocks[i]->begin(), E = NewBlocks[i]->end(); I != E; ++I) RemapInstruction(I, VMap,RF_NoModuleLevelChanges|RF_IgnoreMissingEntries); - + // Rewrite the original preheader to select between versions of the loop. BranchInst *OldBR = cast<BranchInst>(loopPreheader->getTerminator()); assert(OldBR->isUnconditional() && OldBR->getSuccessor(0) == LoopBlocks[0] && @@ -882,7 +899,7 @@ void LoopUnswitch::UnswitchNontrivialCondition(Value *LIC, Constant *Val, // the condition that we're unswitching on), we don't rewrite the second // iteration. WeakVH LICHandle(LIC); - + // Now we rewrite the original code to know that the condition is true and the // new code to know that the condition is false. RewriteLoopBodyWithConditionConstant(L, LIC, Val, false); @@ -897,7 +914,7 @@ void LoopUnswitch::UnswitchNontrivialCondition(Value *LIC, Constant *Val, /// RemoveFromWorklist - Remove all instances of I from the worklist vector /// specified. -static void RemoveFromWorklist(Instruction *I, +static void RemoveFromWorklist(Instruction *I, std::vector<Instruction*> &Worklist) { std::vector<Instruction*>::iterator WI = std::find(Worklist.begin(), Worklist.end(), I); @@ -910,7 +927,7 @@ static void RemoveFromWorklist(Instruction *I, /// ReplaceUsesOfWith - When we find that I really equals V, remove I from the /// program, replacing all uses with V and update the worklist. -static void ReplaceUsesOfWith(Instruction *I, Value *V, +static void ReplaceUsesOfWith(Instruction *I, Value *V, std::vector<Instruction*> &Worklist, Loop *L, LPPassManager *LPM) { DEBUG(dbgs() << "Replace with '" << *V << "': " << *I); @@ -943,10 +960,10 @@ void LoopUnswitch::RemoveBlockIfDead(BasicBlock *BB, if (BasicBlock *Pred = BB->getSinglePredecessor()) { // If it has one pred, fold phi nodes in BB. while (isa<PHINode>(BB->begin())) - ReplaceUsesOfWith(BB->begin(), - cast<PHINode>(BB->begin())->getIncomingValue(0), + ReplaceUsesOfWith(BB->begin(), + cast<PHINode>(BB->begin())->getIncomingValue(0), Worklist, L, LPM); - + // If this is the header of a loop and the only pred is the latch, we now // have an unreachable loop. if (Loop *L = LI->getLoopFor(BB)) @@ -957,15 +974,15 @@ void LoopUnswitch::RemoveBlockIfDead(BasicBlock *BB, LPM->deleteSimpleAnalysisValue(Pred->getTerminator(), L); Pred->getTerminator()->eraseFromParent(); new UnreachableInst(BB->getContext(), Pred); - + // The loop is now broken, remove it from LI. RemoveLoopFromHierarchy(L); - + // Reprocess the header, which now IS dead. RemoveBlockIfDead(BB, Worklist, L); return; } - + // If pred ends in a uncond branch, add uncond branch to worklist so that // the two blocks will get merged. if (BranchInst *BI = dyn_cast<BranchInst>(Pred->getTerminator())) @@ -976,11 +993,11 @@ void LoopUnswitch::RemoveBlockIfDead(BasicBlock *BB, } DEBUG(dbgs() << "Nuking dead block: " << *BB); - + // Remove the instructions in the basic block from the worklist. for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) { RemoveFromWorklist(I, Worklist); - + // Anything that uses the instructions in this basic block should have their // uses replaced with undefs. // If I is not void type then replaceAllUsesWith undef. @@ -988,7 +1005,7 @@ void LoopUnswitch::RemoveBlockIfDead(BasicBlock *BB, if (!I->getType()->isVoidTy()) I->replaceAllUsesWith(UndefValue::get(I->getType())); } - + // If this is the edge to the header block for a loop, remove the loop and // promote all subloops. if (Loop *BBLoop = LI->getLoopFor(BB)) { @@ -1004,8 +1021,8 @@ void LoopUnswitch::RemoveBlockIfDead(BasicBlock *BB, // Remove the block from the loop info, which removes it from any loops it // was in. LI->removeBlock(BB); - - + + // Remove phi node entries in successors for this block. TerminatorInst *TI = BB->getTerminator(); SmallVector<BasicBlock*, 4> Succs; @@ -1013,13 +1030,13 @@ void LoopUnswitch::RemoveBlockIfDead(BasicBlock *BB, Succs.push_back(TI->getSuccessor(i)); TI->getSuccessor(i)->removePredecessor(BB); } - + // Unique the successors, remove anything with multiple uses. array_pod_sort(Succs.begin(), Succs.end()); Succs.erase(std::unique(Succs.begin(), Succs.end()), Succs.end()); - + // Remove the basic block, including all of the instructions contained in it. - LPM->deleteSimpleAnalysisValue(BB, L); + LPM->deleteSimpleAnalysisValue(BB, L); BB->eraseFromParent(); // Remove successor blocks here that are not dead, so that we know we only // have dead blocks in this list. Nondead blocks have a way of becoming dead, @@ -1037,7 +1054,7 @@ void LoopUnswitch::RemoveBlockIfDead(BasicBlock *BB, --i; } } - + for (unsigned i = 0, e = Succs.size(); i != e; ++i) RemoveBlockIfDead(Succs[i], Worklist, L); } @@ -1060,14 +1077,14 @@ void LoopUnswitch::RewriteLoopBodyWithConditionConstant(Loop *L, Value *LIC, Constant *Val, bool IsEqual) { assert(!isa<Constant>(LIC) && "Why are we unswitching on a constant?"); - + // FIXME: Support correlated properties, like: // for (...) // if (li1 < li2) // ... // if (li1 > li2) // ... - + // FOLD boolean conditions (X|LIC), (X&LIC). Fold conditional branches, // selects, switches. std::vector<Instruction*> Worklist; @@ -1082,9 +1099,9 @@ void LoopUnswitch::RewriteLoopBodyWithConditionConstant(Loop *L, Value *LIC, if (IsEqual) Replacement = Val; else - Replacement = ConstantInt::get(Type::getInt1Ty(Val->getContext()), + Replacement = ConstantInt::get(Type::getInt1Ty(Val->getContext()), !cast<ConstantInt>(Val)->getZExtValue()); - + for (Value::use_iterator UI = LIC->use_begin(), E = LIC->use_end(); UI != E; ++UI) { Instruction *U = dyn_cast<Instruction>(*UI); @@ -1092,15 +1109,15 @@ void LoopUnswitch::RewriteLoopBodyWithConditionConstant(Loop *L, Value *LIC, continue; Worklist.push_back(U); } - + for (std::vector<Instruction*>::iterator UI = Worklist.begin(); UI != Worklist.end(); ++UI) - (*UI)->replaceUsesOfWith(LIC, Replacement); - + (*UI)->replaceUsesOfWith(LIC, Replacement); + SimplifyCode(Worklist, L); return; } - + // Otherwise, we don't know the precise value of LIC, but we do know that it // is certainly NOT "Val". As such, simplify any uses in the loop that we // can. This case occurs when we unswitch switch statements. @@ -1112,27 +1129,27 @@ void LoopUnswitch::RewriteLoopBodyWithConditionConstant(Loop *L, Value *LIC, Worklist.push_back(U); - // TODO: We could do other simplifications, for example, turning + // TODO: We could do other simplifications, for example, turning // 'icmp eq LIC, Val' -> false. // If we know that LIC is not Val, use this info to simplify code. SwitchInst *SI = dyn_cast<SwitchInst>(U); if (SI == 0 || !isa<ConstantInt>(Val)) continue; - + SwitchInst::CaseIt DeadCase = SI->findCaseValue(cast<ConstantInt>(Val)); // Default case is live for multiple values. if (DeadCase == SI->case_default()) continue; - - // Found a dead case value. Don't remove PHI nodes in the + + // Found a dead case value. Don't remove PHI nodes in the // successor if they become single-entry, those PHI nodes may // be in the Users list. BasicBlock *Switch = SI->getParent(); BasicBlock *SISucc = DeadCase.getCaseSuccessor(); BasicBlock *Latch = L->getLoopLatch(); - + BranchesInfo.setUnswitched(SI, Val); - + if (!SI->findCaseDest(SISucc)) continue; // Edge is critical. // If the DeadCase successor dominates the loop latch, then the // transformation isn't safe since it will delete the sole predecessor edge @@ -1172,7 +1189,7 @@ void LoopUnswitch::RewriteLoopBodyWithConditionConstant(Loop *L, Value *LIC, if (DT) DT->addNewBlock(Abort, NewSISucc); } - + SimplifyCode(Worklist, L); } @@ -1193,7 +1210,7 @@ void LoopUnswitch::SimplifyCode(std::vector<Instruction*> &Worklist, Loop *L) { // Simple DCE. if (isInstructionTriviallyDead(I)) { DEBUG(dbgs() << "Remove dead instruction '" << *I); - + // Add uses to the worklist, which may be dead now. for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) if (Instruction *Use = dyn_cast<Instruction>(I->getOperand(i))) @@ -1225,24 +1242,24 @@ void LoopUnswitch::SimplifyCode(std::vector<Instruction*> &Worklist, Loop *L) { if (!SinglePred) continue; // Nothing to do. assert(SinglePred == Pred && "CFG broken"); - DEBUG(dbgs() << "Merging blocks: " << Pred->getName() << " <- " + DEBUG(dbgs() << "Merging blocks: " << Pred->getName() << " <- " << Succ->getName() << "\n"); - + // Resolve any single entry PHI nodes in Succ. while (PHINode *PN = dyn_cast<PHINode>(Succ->begin())) ReplaceUsesOfWith(PN, PN->getIncomingValue(0), Worklist, L, LPM); - + // If Succ has any successors with PHI nodes, update them to have // entries coming from Pred instead of Succ. Succ->replaceAllUsesWith(Pred); - + // Move all of the successor contents from Succ to Pred. Pred->getInstList().splice(BI, Succ->getInstList(), Succ->begin(), Succ->end()); LPM->deleteSimpleAnalysisValue(BI, L); BI->eraseFromParent(); RemoveFromWorklist(BI, Worklist); - + // Remove Succ from the loop tree. LI->removeBlock(Succ); LPM->deleteSimpleAnalysisValue(Succ, L); @@ -1250,7 +1267,7 @@ void LoopUnswitch::SimplifyCode(std::vector<Instruction*> &Worklist, Loop *L) { ++NumSimplify; continue; } - + if (ConstantInt *CB = dyn_cast<ConstantInt>(BI->getCondition())){ // Conditional branch. Turn it into an unconditional branch, then // remove dead blocks. diff --git a/lib/Transforms/Scalar/ObjCARC.cpp b/lib/Transforms/Scalar/ObjCARC.cpp index 9fdea8d..29234da 100644 --- a/lib/Transforms/Scalar/ObjCARC.cpp +++ b/lib/Transforms/Scalar/ObjCARC.cpp @@ -162,6 +162,7 @@ namespace { IC_MoveWeak, ///< objc_moveWeak (derived) IC_CopyWeak, ///< objc_copyWeak (derived) IC_DestroyWeak, ///< objc_destroyWeak (derived) + IC_StoreStrong, ///< objc_storeStrong (derived) IC_CallOrUser, ///< could call objc_release and/or "use" pointers IC_Call, ///< could call objc_release IC_User, ///< could "use" a pointer @@ -262,6 +263,7 @@ static InstructionClass GetFunctionClass(const Function *F) { return StringSwitch<InstructionClass>(F->getName()) .Case("objc_storeWeak", IC_StoreWeak) .Case("objc_initWeak", IC_InitWeak) + .Case("objc_storeStrong", IC_StoreStrong) .Default(IC_CallOrUser); // Second argument is i8**. if (PointerType *Pte1 = dyn_cast<PointerType>(ETy1)) @@ -618,22 +620,35 @@ static bool DoesObjCBlockEscape(const Value *BlockPtr) { const User *UUser = *UI; // Special - Use by a call (callee or argument) is not considered // to be an escape. - if (isa<CallInst>(UUser) || isa<InvokeInst>(UUser)) - continue; - // Use by an instruction which copies the value is an escape if the - // result is an escape. - if (isa<BitCastInst>(UUser) || isa<GetElementPtrInst>(UUser) || - isa<PHINode>(UUser) || isa<SelectInst>(UUser)) { - Worklist.push_back(UUser); + switch (GetBasicInstructionClass(UUser)) { + case IC_StoreWeak: + case IC_InitWeak: + case IC_StoreStrong: + case IC_Autorelease: + case IC_AutoreleaseRV: + // These special functions make copies of their pointer arguments. + return true; + case IC_User: + case IC_None: + // Use by an instruction which copies the value is an escape if the + // result is an escape. + if (isa<BitCastInst>(UUser) || isa<GetElementPtrInst>(UUser) || + isa<PHINode>(UUser) || isa<SelectInst>(UUser)) { + Worklist.push_back(UUser); + continue; + } + // Use by a load is not an escape. + if (isa<LoadInst>(UUser)) + continue; + // Use by a store is not an escape if the use is the address. + if (const StoreInst *SI = dyn_cast<StoreInst>(UUser)) + if (V != SI->getValueOperand()) + continue; + break; + default: + // Regular calls and other stuff are not considered escapes. continue; } - // Use by a load is not an escape. - if (isa<LoadInst>(UUser)) - continue; - // Use by a store is not an escape if the use is the address. - if (const StoreInst *SI = dyn_cast<StoreInst>(UUser)) - if (V != SI->getValueOperand()) - continue; // Otherwise, conservatively assume an escape. return true; } @@ -883,7 +898,7 @@ bool ObjCARCExpand::runOnFunction(Function &F) { // These calls return their argument verbatim, as a low-level // optimization. However, this makes high-level optimizations // harder. Undo any uses of this optimization that the front-end - // emitted here. We'll redo them in a later pass. + // emitted here. We'll redo them in the contract pass. Changed = true; Inst->replaceAllUsesWith(cast<CallInst>(Inst)->getArgOperand(0)); break; @@ -997,7 +1012,11 @@ bool ObjCARCAPElim::runOnModule(Module &M) { return false; // Find the llvm.global_ctors variable, as the first step in - // identifying the global constructors. + // identifying the global constructors. In theory, unnecessary autorelease + // pools could occur anywhere, but in practice it's pretty rare. Global + // ctors are a place where autorelease pools get inserted automatically, + // so it's pretty common for them to be unnecessary, and it's pretty + // profitable to eliminate them. GlobalVariable *GV = M.getGlobalVariable("llvm.global_ctors"); if (!GV) return false; @@ -1014,7 +1033,11 @@ bool ObjCARCAPElim::runOnModule(Module &M) { Value *Op = *OI; // llvm.global_ctors is an array of pairs where the second members // are constructor functions. - Function *F = cast<Function>(cast<ConstantStruct>(Op)->getOperand(1)); + Function *F = dyn_cast<Function>(cast<ConstantStruct>(Op)->getOperand(1)); + // If the user used a constructor function with the wrong signature and + // it got bitcasted or whatever, look the other way. + if (!F) + continue; // Only look at function definitions. if (F->isDeclaration()) continue; @@ -1678,9 +1701,16 @@ namespace { void CheckForCFGHazards(const BasicBlock *BB, DenseMap<const BasicBlock *, BBState> &BBStates, BBState &MyStates) const; + bool VisitInstructionBottomUp(Instruction *Inst, + BasicBlock *BB, + MapVector<Value *, RRInfo> &Retains, + BBState &MyStates); bool VisitBottomUp(BasicBlock *BB, DenseMap<const BasicBlock *, BBState> &BBStates, MapVector<Value *, RRInfo> &Retains); + bool VisitInstructionTopDown(Instruction *Inst, + DenseMap<Value *, RRInfo> &Releases, + BBState &MyStates); bool VisitTopDown(BasicBlock *BB, DenseMap<const BasicBlock *, BBState> &BBStates, DenseMap<Value *, RRInfo> &Releases); @@ -1956,6 +1986,7 @@ namespace { /// use here. enum DependenceKind { NeedsPositiveRetainCount, + AutoreleasePoolBoundary, CanChangeRetainCount, RetainAutoreleaseDep, ///< Blocks objc_retainAutorelease. RetainAutoreleaseRVDep, ///< Blocks objc_retainAutoreleaseReturnValue. @@ -1985,6 +2016,19 @@ Depends(DependenceKind Flavor, Instruction *Inst, const Value *Arg, } } + case AutoreleasePoolBoundary: { + InstructionClass Class = GetInstructionClass(Inst); + switch (Class) { + case IC_AutoreleasepoolPop: + case IC_AutoreleasepoolPush: + // These mark the end and begin of an autorelease pool scope. + return true; + default: + // Nothing else does this. + return false; + } + } + case CanChangeRetainCount: { InstructionClass Class = GetInstructionClass(Inst); switch (Class) { @@ -2002,6 +2046,7 @@ Depends(DependenceKind Flavor, Instruction *Inst, const Value *Arg, case RetainAutoreleaseDep: switch (GetBasicInstructionClass(Inst)) { case IC_AutoreleasepoolPop: + case IC_AutoreleasepoolPush: // Don't merge an objc_autorelease with an objc_retain inside a different // autoreleasepool scope. return true; @@ -2136,17 +2181,26 @@ ObjCARCOpt::OptimizeRetainCall(Function &F, Instruction *Retain) { /// return true. bool ObjCARCOpt::OptimizeRetainRVCall(Function &F, Instruction *RetainRV) { - // Check for the argument being from an immediately preceding call. + // Check for the argument being from an immediately preceding call or invoke. Value *Arg = GetObjCArg(RetainRV); CallSite CS(Arg); - if (Instruction *Call = CS.getInstruction()) + if (Instruction *Call = CS.getInstruction()) { if (Call->getParent() == RetainRV->getParent()) { BasicBlock::iterator I = Call; ++I; while (isNoopInstruction(I)) ++I; if (&*I == RetainRV) return false; + } else if (InvokeInst *II = dyn_cast<InvokeInst>(Call)) { + BasicBlock *RetainRVParent = RetainRV->getParent(); + if (II->getNormalDest() == RetainRVParent) { + BasicBlock::iterator I = RetainRVParent->begin(); + while (isNoopInstruction(I)) ++I; + if (&*I == RetainRV) + return false; + } } + } // Check for being preceded by an objc_autoreleaseReturnValue on the same // pointer. In this case, we can delete the pair. @@ -2232,6 +2286,7 @@ void ObjCARCOpt::OptimizeIndividualCalls(Function &F) { case IC_DestroyWeak: { CallInst *CI = cast<CallInst>(Inst); if (isNullOrUndef(CI->getArgOperand(0))) { + Changed = true; Type *Ty = CI->getArgOperand(0)->getType(); new StoreInst(UndefValue::get(cast<PointerType>(Ty)->getElementType()), Constant::getNullValue(Ty), @@ -2247,6 +2302,7 @@ void ObjCARCOpt::OptimizeIndividualCalls(Function &F) { CallInst *CI = cast<CallInst>(Inst); if (isNullOrUndef(CI->getArgOperand(0)) || isNullOrUndef(CI->getArgOperand(1))) { + Changed = true; Type *Ty = CI->getArgOperand(0)->getType(); new StoreInst(UndefValue::get(cast<PointerType>(Ty)->getElementType()), Constant::getNullValue(Ty), @@ -2360,9 +2416,34 @@ void ObjCARCOpt::OptimizeIndividualCalls(Function &F) { // Check that there is nothing that cares about the reference // count between the call and the phi. - FindDependencies(NeedsPositiveRetainCount, Arg, - Inst->getParent(), Inst, - DependingInstructions, Visited, PA); + switch (Class) { + case IC_Retain: + case IC_RetainBlock: + // These can always be moved up. + break; + case IC_Release: + // These can't be moved across things that care about the retain count. + FindDependencies(NeedsPositiveRetainCount, Arg, + Inst->getParent(), Inst, + DependingInstructions, Visited, PA); + break; + case IC_Autorelease: + // These can't be moved across autorelease pool scope boundaries. + FindDependencies(AutoreleasePoolBoundary, Arg, + Inst->getParent(), Inst, + DependingInstructions, Visited, PA); + break; + case IC_RetainRV: + case IC_AutoreleaseRV: + // Don't move these; the RV optimization depends on the autoreleaseRV + // being tail called, and the retainRV being immediately after a call + // (which might still happen if we get lucky with codegen layout, but + // it's not worth taking the chance). + continue; + default: + llvm_unreachable("Invalid dependence flavor"); + } + if (DependingInstructions.size() == 1 && *DependingInstructions.begin() == PN) { Changed = true; @@ -2516,6 +2597,164 @@ ObjCARCOpt::CheckForCFGHazards(const BasicBlock *BB, } bool +ObjCARCOpt::VisitInstructionBottomUp(Instruction *Inst, + BasicBlock *BB, + MapVector<Value *, RRInfo> &Retains, + BBState &MyStates) { + bool NestingDetected = false; + InstructionClass Class = GetInstructionClass(Inst); + const Value *Arg = 0; + + switch (Class) { + case IC_Release: { + Arg = GetObjCArg(Inst); + + PtrState &S = MyStates.getPtrBottomUpState(Arg); + + // If we see two releases in a row on the same pointer. If so, make + // a note, and we'll cicle back to revisit it after we've + // hopefully eliminated the second release, which may allow us to + // eliminate the first release too. + // Theoretically we could implement removal of nested retain+release + // pairs by making PtrState hold a stack of states, but this is + // simple and avoids adding overhead for the non-nested case. + if (S.GetSeq() == S_Release || S.GetSeq() == S_MovableRelease) + NestingDetected = true; + + S.RRI.clear(); + + MDNode *ReleaseMetadata = Inst->getMetadata(ImpreciseReleaseMDKind); + S.SetSeq(ReleaseMetadata ? S_MovableRelease : S_Release); + S.RRI.ReleaseMetadata = ReleaseMetadata; + S.RRI.KnownSafe = S.IsKnownNested() || S.IsKnownIncremented(); + S.RRI.IsTailCallRelease = cast<CallInst>(Inst)->isTailCall(); + S.RRI.Calls.insert(Inst); + + S.IncrementRefCount(); + S.IncrementNestCount(); + break; + } + case IC_RetainBlock: + // An objc_retainBlock call with just a use may need to be kept, + // because it may be copying a block from the stack to the heap. + if (!IsRetainBlockOptimizable(Inst)) + break; + // FALLTHROUGH + case IC_Retain: + case IC_RetainRV: { + Arg = GetObjCArg(Inst); + + PtrState &S = MyStates.getPtrBottomUpState(Arg); + S.DecrementRefCount(); + S.SetAtLeastOneRefCount(); + S.DecrementNestCount(); + + switch (S.GetSeq()) { + case S_Stop: + case S_Release: + case S_MovableRelease: + case S_Use: + S.RRI.ReverseInsertPts.clear(); + // FALL THROUGH + case S_CanRelease: + // Don't do retain+release tracking for IC_RetainRV, because it's + // better to let it remain as the first instruction after a call. + if (Class != IC_RetainRV) { + S.RRI.IsRetainBlock = Class == IC_RetainBlock; + Retains[Inst] = S.RRI; + } + S.ClearSequenceProgress(); + break; + case S_None: + break; + case S_Retain: + llvm_unreachable("bottom-up pointer in retain state!"); + } + return NestingDetected; + } + case IC_AutoreleasepoolPop: + // Conservatively, clear MyStates for all known pointers. + MyStates.clearBottomUpPointers(); + return NestingDetected; + case IC_AutoreleasepoolPush: + case IC_None: + // These are irrelevant. + return NestingDetected; + default: + break; + } + + // Consider any other possible effects of this instruction on each + // pointer being tracked. + for (BBState::ptr_iterator MI = MyStates.bottom_up_ptr_begin(), + ME = MyStates.bottom_up_ptr_end(); MI != ME; ++MI) { + const Value *Ptr = MI->first; + if (Ptr == Arg) + continue; // Handled above. + PtrState &S = MI->second; + Sequence Seq = S.GetSeq(); + + // Check for possible releases. + if (CanAlterRefCount(Inst, Ptr, PA, Class)) { + S.DecrementRefCount(); + switch (Seq) { + case S_Use: + S.SetSeq(S_CanRelease); + continue; + case S_CanRelease: + case S_Release: + case S_MovableRelease: + case S_Stop: + case S_None: + break; + case S_Retain: + llvm_unreachable("bottom-up pointer in retain state!"); + } + } + + // Check for possible direct uses. + switch (Seq) { + case S_Release: + case S_MovableRelease: + if (CanUse(Inst, Ptr, PA, Class)) { + assert(S.RRI.ReverseInsertPts.empty()); + // If this is an invoke instruction, we're scanning it as part of + // one of its successor blocks, since we can't insert code after it + // in its own block, and we don't want to split critical edges. + if (isa<InvokeInst>(Inst)) + S.RRI.ReverseInsertPts.insert(BB->getFirstInsertionPt()); + else + S.RRI.ReverseInsertPts.insert(llvm::next(BasicBlock::iterator(Inst))); + S.SetSeq(S_Use); + } else if (Seq == S_Release && + (Class == IC_User || Class == IC_CallOrUser)) { + // Non-movable releases depend on any possible objc pointer use. + S.SetSeq(S_Stop); + assert(S.RRI.ReverseInsertPts.empty()); + // As above; handle invoke specially. + if (isa<InvokeInst>(Inst)) + S.RRI.ReverseInsertPts.insert(BB->getFirstInsertionPt()); + else + S.RRI.ReverseInsertPts.insert(llvm::next(BasicBlock::iterator(Inst))); + } + break; + case S_Stop: + if (CanUse(Inst, Ptr, PA, Class)) + S.SetSeq(S_Use); + break; + case S_CanRelease: + case S_Use: + case S_None: + break; + case S_Retain: + llvm_unreachable("bottom-up pointer in retain state!"); + } + } + + return NestingDetected; +} + +bool ObjCARCOpt::VisitBottomUp(BasicBlock *BB, DenseMap<const BasicBlock *, BBState> &BBStates, MapVector<Value *, RRInfo> &Retains) { @@ -2560,144 +2799,164 @@ ObjCARCOpt::VisitBottomUp(BasicBlock *BB, // Visit all the instructions, bottom-up. for (BasicBlock::iterator I = BB->end(), E = BB->begin(); I != E; --I) { Instruction *Inst = llvm::prior(I); - InstructionClass Class = GetInstructionClass(Inst); - const Value *Arg = 0; - switch (Class) { - case IC_Release: { - Arg = GetObjCArg(Inst); + // Invoke instructions are visited as part of their successors (below). + if (isa<InvokeInst>(Inst)) + continue; + + NestingDetected |= VisitInstructionBottomUp(Inst, BB, Retains, MyStates); + } + + // If there's a predecessor with an invoke, visit the invoke as + // if it were part of this block, since we can't insert code after + // an invoke in its own block, and we don't want to split critical + // edges. + for (pred_iterator PI(BB), PE(BB, false); PI != PE; ++PI) { + BasicBlock *Pred = *PI; + TerminatorInst *PredTI = cast<TerminatorInst>(&Pred->back()); + if (isa<InvokeInst>(PredTI)) + NestingDetected |= VisitInstructionBottomUp(PredTI, BB, Retains, MyStates); + } + + return NestingDetected; +} + +bool +ObjCARCOpt::VisitInstructionTopDown(Instruction *Inst, + DenseMap<Value *, RRInfo> &Releases, + BBState &MyStates) { + bool NestingDetected = false; + InstructionClass Class = GetInstructionClass(Inst); + const Value *Arg = 0; + + switch (Class) { + case IC_RetainBlock: + // An objc_retainBlock call with just a use may need to be kept, + // because it may be copying a block from the stack to the heap. + if (!IsRetainBlockOptimizable(Inst)) + break; + // FALLTHROUGH + case IC_Retain: + case IC_RetainRV: { + Arg = GetObjCArg(Inst); - PtrState &S = MyStates.getPtrBottomUpState(Arg); + PtrState &S = MyStates.getPtrTopDownState(Arg); - // If we see two releases in a row on the same pointer. If so, make + // Don't do retain+release tracking for IC_RetainRV, because it's + // better to let it remain as the first instruction after a call. + if (Class != IC_RetainRV) { + // If we see two retains in a row on the same pointer. If so, make // a note, and we'll cicle back to revisit it after we've - // hopefully eliminated the second release, which may allow us to - // eliminate the first release too. + // hopefully eliminated the second retain, which may allow us to + // eliminate the first retain too. // Theoretically we could implement removal of nested retain+release // pairs by making PtrState hold a stack of states, but this is // simple and avoids adding overhead for the non-nested case. - if (S.GetSeq() == S_Release || S.GetSeq() == S_MovableRelease) + if (S.GetSeq() == S_Retain) NestingDetected = true; + S.SetSeq(S_Retain); S.RRI.clear(); - - MDNode *ReleaseMetadata = Inst->getMetadata(ImpreciseReleaseMDKind); - S.SetSeq(ReleaseMetadata ? S_MovableRelease : S_Release); - S.RRI.ReleaseMetadata = ReleaseMetadata; - S.RRI.KnownSafe = S.IsKnownNested() || S.IsKnownIncremented(); - S.RRI.IsTailCallRelease = cast<CallInst>(Inst)->isTailCall(); + S.RRI.IsRetainBlock = Class == IC_RetainBlock; + // Don't check S.IsKnownIncremented() here because it's not + // sufficient. + S.RRI.KnownSafe = S.IsKnownNested(); S.RRI.Calls.insert(Inst); + } - S.IncrementRefCount(); - S.IncrementNestCount(); + S.SetAtLeastOneRefCount(); + S.IncrementRefCount(); + S.IncrementNestCount(); + return NestingDetected; + } + case IC_Release: { + Arg = GetObjCArg(Inst); + + PtrState &S = MyStates.getPtrTopDownState(Arg); + S.DecrementRefCount(); + S.DecrementNestCount(); + + switch (S.GetSeq()) { + case S_Retain: + case S_CanRelease: + S.RRI.ReverseInsertPts.clear(); + // FALL THROUGH + case S_Use: + S.RRI.ReleaseMetadata = Inst->getMetadata(ImpreciseReleaseMDKind); + S.RRI.IsTailCallRelease = cast<CallInst>(Inst)->isTailCall(); + Releases[Inst] = S.RRI; + S.ClearSequenceProgress(); + break; + case S_None: break; + case S_Stop: + case S_Release: + case S_MovableRelease: + llvm_unreachable("top-down pointer in release state!"); } - case IC_RetainBlock: - // An objc_retainBlock call with just a use may need to be kept, - // because it may be copying a block from the stack to the heap. - if (!IsRetainBlockOptimizable(Inst)) - break; - // FALLTHROUGH - case IC_Retain: - case IC_RetainRV: { - Arg = GetObjCArg(Inst); + break; + } + case IC_AutoreleasepoolPop: + // Conservatively, clear MyStates for all known pointers. + MyStates.clearTopDownPointers(); + return NestingDetected; + case IC_AutoreleasepoolPush: + case IC_None: + // These are irrelevant. + return NestingDetected; + default: + break; + } - PtrState &S = MyStates.getPtrBottomUpState(Arg); + // Consider any other possible effects of this instruction on each + // pointer being tracked. + for (BBState::ptr_iterator MI = MyStates.top_down_ptr_begin(), + ME = MyStates.top_down_ptr_end(); MI != ME; ++MI) { + const Value *Ptr = MI->first; + if (Ptr == Arg) + continue; // Handled above. + PtrState &S = MI->second; + Sequence Seq = S.GetSeq(); + + // Check for possible releases. + if (CanAlterRefCount(Inst, Ptr, PA, Class)) { S.DecrementRefCount(); - S.SetAtLeastOneRefCount(); - S.DecrementNestCount(); + switch (Seq) { + case S_Retain: + S.SetSeq(S_CanRelease); + assert(S.RRI.ReverseInsertPts.empty()); + S.RRI.ReverseInsertPts.insert(Inst); - switch (S.GetSeq()) { - case S_Stop: - case S_Release: - case S_MovableRelease: + // One call can't cause a transition from S_Retain to S_CanRelease + // and S_CanRelease to S_Use. If we've made the first transition, + // we're done. + continue; case S_Use: - S.RRI.ReverseInsertPts.clear(); - // FALL THROUGH case S_CanRelease: - // Don't do retain+release tracking for IC_RetainRV, because it's - // better to let it remain as the first instruction after a call. - if (Class != IC_RetainRV) { - S.RRI.IsRetainBlock = Class == IC_RetainBlock; - Retains[Inst] = S.RRI; - } - S.ClearSequenceProgress(); - break; case S_None: break; - case S_Retain: - llvm_unreachable("bottom-up pointer in retain state!"); - } - continue; - } - case IC_AutoreleasepoolPop: - // Conservatively, clear MyStates for all known pointers. - MyStates.clearBottomUpPointers(); - continue; - case IC_AutoreleasepoolPush: - case IC_None: - // These are irrelevant. - continue; - default: - break; - } - - // Consider any other possible effects of this instruction on each - // pointer being tracked. - for (BBState::ptr_iterator MI = MyStates.bottom_up_ptr_begin(), - ME = MyStates.bottom_up_ptr_end(); MI != ME; ++MI) { - const Value *Ptr = MI->first; - if (Ptr == Arg) - continue; // Handled above. - PtrState &S = MI->second; - Sequence Seq = S.GetSeq(); - - // Check for possible releases. - if (CanAlterRefCount(Inst, Ptr, PA, Class)) { - S.DecrementRefCount(); - switch (Seq) { - case S_Use: - S.SetSeq(S_CanRelease); - continue; - case S_CanRelease: - case S_Release: - case S_MovableRelease: - case S_Stop: - case S_None: - break; - case S_Retain: - llvm_unreachable("bottom-up pointer in retain state!"); - } - } - - // Check for possible direct uses. - switch (Seq) { + case S_Stop: case S_Release: case S_MovableRelease: - if (CanUse(Inst, Ptr, PA, Class)) { - assert(S.RRI.ReverseInsertPts.empty()); - S.RRI.ReverseInsertPts.insert(Inst); - S.SetSeq(S_Use); - } else if (Seq == S_Release && - (Class == IC_User || Class == IC_CallOrUser)) { - // Non-movable releases depend on any possible objc pointer use. - S.SetSeq(S_Stop); - assert(S.RRI.ReverseInsertPts.empty()); - S.RRI.ReverseInsertPts.insert(Inst); - } - break; - case S_Stop: - if (CanUse(Inst, Ptr, PA, Class)) - S.SetSeq(S_Use); - break; - case S_CanRelease: - case S_Use: - case S_None: - break; - case S_Retain: - llvm_unreachable("bottom-up pointer in retain state!"); + llvm_unreachable("top-down pointer in release state!"); } } + + // Check for possible direct uses. + switch (Seq) { + case S_CanRelease: + if (CanUse(Inst, Ptr, PA, Class)) + S.SetSeq(S_Use); + break; + case S_Retain: + case S_Use: + case S_None: + break; + case S_Stop: + case S_Release: + case S_MovableRelease: + llvm_unreachable("top-down pointer in release state!"); + } } return NestingDetected; @@ -2751,138 +3010,7 @@ ObjCARCOpt::VisitTopDown(BasicBlock *BB, // Visit all the instructions, top-down. for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) { Instruction *Inst = I; - InstructionClass Class = GetInstructionClass(Inst); - const Value *Arg = 0; - - switch (Class) { - case IC_RetainBlock: - // An objc_retainBlock call with just a use may need to be kept, - // because it may be copying a block from the stack to the heap. - if (!IsRetainBlockOptimizable(Inst)) - break; - // FALLTHROUGH - case IC_Retain: - case IC_RetainRV: { - Arg = GetObjCArg(Inst); - - PtrState &S = MyStates.getPtrTopDownState(Arg); - - // Don't do retain+release tracking for IC_RetainRV, because it's - // better to let it remain as the first instruction after a call. - if (Class != IC_RetainRV) { - // If we see two retains in a row on the same pointer. If so, make - // a note, and we'll cicle back to revisit it after we've - // hopefully eliminated the second retain, which may allow us to - // eliminate the first retain too. - // Theoretically we could implement removal of nested retain+release - // pairs by making PtrState hold a stack of states, but this is - // simple and avoids adding overhead for the non-nested case. - if (S.GetSeq() == S_Retain) - NestingDetected = true; - - S.SetSeq(S_Retain); - S.RRI.clear(); - S.RRI.IsRetainBlock = Class == IC_RetainBlock; - // Don't check S.IsKnownIncremented() here because it's not - // sufficient. - S.RRI.KnownSafe = S.IsKnownNested(); - S.RRI.Calls.insert(Inst); - } - - S.SetAtLeastOneRefCount(); - S.IncrementRefCount(); - S.IncrementNestCount(); - continue; - } - case IC_Release: { - Arg = GetObjCArg(Inst); - - PtrState &S = MyStates.getPtrTopDownState(Arg); - S.DecrementRefCount(); - S.DecrementNestCount(); - - switch (S.GetSeq()) { - case S_Retain: - case S_CanRelease: - S.RRI.ReverseInsertPts.clear(); - // FALL THROUGH - case S_Use: - S.RRI.ReleaseMetadata = Inst->getMetadata(ImpreciseReleaseMDKind); - S.RRI.IsTailCallRelease = cast<CallInst>(Inst)->isTailCall(); - Releases[Inst] = S.RRI; - S.ClearSequenceProgress(); - break; - case S_None: - break; - case S_Stop: - case S_Release: - case S_MovableRelease: - llvm_unreachable("top-down pointer in release state!"); - } - break; - } - case IC_AutoreleasepoolPop: - // Conservatively, clear MyStates for all known pointers. - MyStates.clearTopDownPointers(); - continue; - case IC_AutoreleasepoolPush: - case IC_None: - // These are irrelevant. - continue; - default: - break; - } - - // Consider any other possible effects of this instruction on each - // pointer being tracked. - for (BBState::ptr_iterator MI = MyStates.top_down_ptr_begin(), - ME = MyStates.top_down_ptr_end(); MI != ME; ++MI) { - const Value *Ptr = MI->first; - if (Ptr == Arg) - continue; // Handled above. - PtrState &S = MI->second; - Sequence Seq = S.GetSeq(); - - // Check for possible releases. - if (CanAlterRefCount(Inst, Ptr, PA, Class)) { - S.DecrementRefCount(); - switch (Seq) { - case S_Retain: - S.SetSeq(S_CanRelease); - assert(S.RRI.ReverseInsertPts.empty()); - S.RRI.ReverseInsertPts.insert(Inst); - - // One call can't cause a transition from S_Retain to S_CanRelease - // and S_CanRelease to S_Use. If we've made the first transition, - // we're done. - continue; - case S_Use: - case S_CanRelease: - case S_None: - break; - case S_Stop: - case S_Release: - case S_MovableRelease: - llvm_unreachable("top-down pointer in release state!"); - } - } - - // Check for possible direct uses. - switch (Seq) { - case S_CanRelease: - if (CanUse(Inst, Ptr, PA, Class)) - S.SetSeq(S_Use); - break; - case S_Retain: - case S_Use: - case S_None: - break; - case S_Stop: - case S_Release: - case S_MovableRelease: - llvm_unreachable("top-down pointer in release state!"); - } - } + NestingDetected |= VisitInstructionTopDown(Inst, Releases, MyStates); } CheckForCFGHazards(BB, BBStates, MyStates); @@ -3032,35 +3160,17 @@ void ObjCARCOpt::MoveCalls(Value *Arg, for (SmallPtrSet<Instruction *, 2>::const_iterator PI = RetainsToMove.ReverseInsertPts.begin(), PE = RetainsToMove.ReverseInsertPts.end(); PI != PE; ++PI) { - Instruction *LastUse = *PI; - Instruction *InsertPts[] = { 0, 0, 0 }; - if (InvokeInst *II = dyn_cast<InvokeInst>(LastUse)) { - // We can't insert code immediately after an invoke instruction, so - // insert code at the beginning of both successor blocks instead. - // The invoke's return value isn't available in the unwind block, - // but our releases will never depend on it, because they must be - // paired with retains from before the invoke. - InsertPts[0] = II->getNormalDest()->getFirstInsertionPt(); - if (!II->getMetadata(NoObjCARCExceptionsMDKind)) - InsertPts[1] = II->getUnwindDest()->getFirstInsertionPt(); - } else { - // Insert code immediately after the last use. - InsertPts[0] = llvm::next(BasicBlock::iterator(LastUse)); - } - - for (Instruction **I = InsertPts; *I; ++I) { - Instruction *InsertPt = *I; - Value *MyArg = ArgTy == ParamTy ? Arg : - new BitCastInst(Arg, ParamTy, "", InsertPt); - CallInst *Call = CallInst::Create(getReleaseCallee(M), MyArg, - "", InsertPt); - // Attach a clang.imprecise_release metadata tag, if appropriate. - if (MDNode *M = ReleasesToMove.ReleaseMetadata) - Call->setMetadata(ImpreciseReleaseMDKind, M); - Call->setDoesNotThrow(); - if (ReleasesToMove.IsTailCallRelease) - Call->setTailCall(); - } + Instruction *InsertPt = *PI; + Value *MyArg = ArgTy == ParamTy ? Arg : + new BitCastInst(Arg, ParamTy, "", InsertPt); + CallInst *Call = CallInst::Create(getReleaseCallee(M), MyArg, + "", InsertPt); + // Attach a clang.imprecise_release metadata tag, if appropriate. + if (MDNode *M = ReleasesToMove.ReleaseMetadata) + Call->setMetadata(ImpreciseReleaseMDKind, M); + Call->setDoesNotThrow(); + if (ReleasesToMove.IsTailCallRelease) + Call->setTailCall(); } // Delete the original retain and release calls. @@ -3080,6 +3190,8 @@ void ObjCARCOpt::MoveCalls(Value *Arg, } } +/// PerformCodePlacement - Identify pairings between the retains and releases, +/// and delete and/or move them. bool ObjCARCOpt::PerformCodePlacement(DenseMap<const BasicBlock *, BBState> &BBStates, @@ -3093,6 +3205,7 @@ ObjCARCOpt::PerformCodePlacement(DenseMap<const BasicBlock *, BBState> SmallVector<Instruction *, 4> NewReleases; SmallVector<Instruction *, 8> DeadInsts; + // Visit each retain. for (MapVector<Value *, RRInfo>::const_iterator I = Retains.begin(), E = Retains.end(); I != E; ++I) { Value *V = I->first; @@ -3566,6 +3679,7 @@ bool ObjCARCOpt::doInitialization(Module &M) { if (!EnableARCOpts) return false; + // If nothing in the Module uses ARC, don't do anything. Run = ModuleHasARC(M); if (!Run) return false; @@ -3900,6 +4014,7 @@ void ObjCARCContract::ContractRelease(Instruction *Release, } bool ObjCARCContract::doInitialization(Module &M) { + // If nothing in the Module uses ARC, don't do anything. Run = ModuleHasARC(M); if (!Run) return false; @@ -3975,6 +4090,7 @@ bool ObjCARCContract::runOnFunction(Function &F) { --BBI; while (isNoopInstruction(BBI)) --BBI; if (&*BBI == GetObjCArg(Inst)) { + Changed = true; InlineAsm *IA = InlineAsm::get(FunctionType::get(Type::getVoidTy(Inst->getContext()), /*isVarArg=*/false), @@ -4024,16 +4140,19 @@ bool ObjCARCContract::runOnFunction(Function &F) { Use &U = UI.getUse(); unsigned OperandNo = UI.getOperandNo(); ++UI; // Increment UI now, because we may unlink its element. - Instruction *UserInst = dyn_cast<Instruction>(U.getUser()); - if (!UserInst) - continue; - // FIXME: dominates should return true for unreachable UserInst. - if (!DT->isReachableFromEntry(UserInst->getParent()) || - DT->dominates(Inst, UserInst)) { + + // If the call's return value dominates a use of the call's argument + // value, rewrite the use to use the return value. We check for + // reachability here because an unreachable call is considered to + // trivially dominate itself, which would lead us to rewriting its + // argument in terms of its return value, which would lead to + // infinite loops in GetObjCArg. + if (DT->isReachableFromEntry(U) && + DT->dominates(Inst, U)) { Changed = true; Instruction *Replacement = Inst; Type *UseTy = U.get()->getType(); - if (PHINode *PHI = dyn_cast<PHINode>(UserInst)) { + if (PHINode *PHI = dyn_cast<PHINode>(U.getUser())) { // For PHI nodes, insert the bitcast in the predecessor block. unsigned ValNo = PHINode::getIncomingValueNumForOperand(OperandNo); @@ -4042,6 +4161,9 @@ bool ObjCARCContract::runOnFunction(Function &F) { if (Replacement->getType() != UseTy) Replacement = new BitCastInst(Replacement, UseTy, "", &BB->back()); + // While we're here, rewrite all edges for this PHI, rather + // than just one use at a time, to minimize the number of + // bitcasts we emit. for (unsigned i = 0, e = PHI->getNumIncomingValues(); i != e; ++i) if (PHI->getIncomingBlock(i) == BB) { @@ -4054,7 +4176,8 @@ bool ObjCARCContract::runOnFunction(Function &F) { } } else { if (Replacement->getType() != UseTy) - Replacement = new BitCastInst(Replacement, UseTy, "", UserInst); + Replacement = new BitCastInst(Replacement, UseTy, "", + cast<Instruction>(U.getUser())); U.set(Replacement); } } diff --git a/lib/Transforms/Scalar/Reassociate.cpp b/lib/Transforms/Scalar/Reassociate.cpp index 8f98a5b..cb408a1 100644 --- a/lib/Transforms/Scalar/Reassociate.cpp +++ b/lib/Transforms/Scalar/Reassociate.cpp @@ -74,7 +74,7 @@ static void PrintOps(Instruction *I, const SmallVectorImpl<ValueEntry> &Ops) { namespace { class Reassociate : public FunctionPass { DenseMap<BasicBlock*, unsigned> RankMap; - DenseMap<AssertingVH<>, unsigned> ValueRankMap; + DenseMap<AssertingVH<Value>, unsigned> ValueRankMap; SmallVector<WeakVH, 8> RedoInsts; SmallVector<WeakVH, 8> DeadInsts; bool MadeChange; @@ -210,7 +210,7 @@ static BinaryOperator *isReassociableOp(Value *V, unsigned Opcode) { /// LowerNegateToMultiply - Replace 0-X with X*-1. /// static Instruction *LowerNegateToMultiply(Instruction *Neg, - DenseMap<AssertingVH<>, unsigned> &ValueRankMap) { + DenseMap<AssertingVH<Value>, unsigned> &ValueRankMap) { Constant *Cst = Constant::getAllOnesValue(Neg->getType()); Instruction *Res = BinaryOperator::CreateMul(Neg->getOperand(1), Cst, "",Neg); @@ -492,7 +492,7 @@ static bool ShouldBreakUpSubtract(Instruction *Sub) { /// only used by an add, transform this into (X+(0-Y)) to promote better /// reassociation. static Instruction *BreakUpSubtract(Instruction *Sub, - DenseMap<AssertingVH<>, unsigned> &ValueRankMap) { + DenseMap<AssertingVH<Value>, unsigned> &ValueRankMap) { // Convert a subtract into an add and a neg instruction. This allows sub // instructions to be commuted with other add instructions. // @@ -517,8 +517,8 @@ static Instruction *BreakUpSubtract(Instruction *Sub, /// ConvertShiftToMul - If this is a shift of a reassociable multiply or is used /// by one, change this into a multiply by a constant to assist with further /// reassociation. -static Instruction *ConvertShiftToMul(Instruction *Shl, - DenseMap<AssertingVH<>, unsigned> &ValueRankMap) { +static Instruction *ConvertShiftToMul(Instruction *Shl, + DenseMap<AssertingVH<Value>, unsigned> &ValueRankMap) { // If an operand of this shift is a reassociable multiply, or if the shift // is used by a reassociable multiply or add, turn into a multiply. if (isReassociableOp(Shl->getOperand(0), Instruction::Mul) || diff --git a/lib/Transforms/Scalar/SCCP.cpp b/lib/Transforms/Scalar/SCCP.cpp index 5ce82b9..16b64a5 100644 --- a/lib/Transforms/Scalar/SCCP.cpp +++ b/lib/Transforms/Scalar/SCCP.cpp @@ -1925,8 +1925,8 @@ bool IPSCCP::runOnModule(Module &M) { ReturnsToZap[i]->setOperand(0, UndefValue::get(F->getReturnType())); } - // If we inferred constant or undef values for globals variables, we can delete - // the global and any stores that remain to it. + // If we inferred constant or undef values for globals variables, we can + // delete the global and any stores that remain to it. const DenseMap<GlobalVariable*, LatticeVal> &TG = Solver.getTrackedGlobals(); for (DenseMap<GlobalVariable*, LatticeVal>::const_iterator I = TG.begin(), E = TG.end(); I != E; ++I) { diff --git a/lib/Transforms/Scalar/ScalarReplAggregates.cpp b/lib/Transforms/Scalar/ScalarReplAggregates.cpp index d36a18f..026fea1 100644 --- a/lib/Transforms/Scalar/ScalarReplAggregates.cpp +++ b/lib/Transforms/Scalar/ScalarReplAggregates.cpp @@ -13,7 +13,7 @@ // each member (if possible). Then, if possible, it transforms the individual // alloca instructions into nice clean scalar SSA form. // -// This combines a simple SRoA algorithm with the Mem2Reg algorithm because +// This combines a simple SRoA algorithm with the Mem2Reg algorithm because they // often interact, especially for C++ programs. As such, iterating between // SRoA, then Mem2Reg until we run out of things to promote works well. // @@ -574,8 +574,8 @@ void ConvertToScalarInfo::ConvertUsesToScalar(Value *Ptr, AllocaInst *NewAI, // transform it into a store of the expanded constant value. if (MemSetInst *MSI = dyn_cast<MemSetInst>(User)) { assert(MSI->getRawDest() == Ptr && "Consistency error!"); - signed SNumBytes = cast<ConstantInt>(MSI->getLength())->getSExtValue(); - if (SNumBytes > 0) { + int64_t SNumBytes = cast<ConstantInt>(MSI->getLength())->getSExtValue(); + if (SNumBytes > 0 && (SNumBytes >> 32) == 0) { unsigned NumBytes = static_cast<unsigned>(SNumBytes); unsigned Val = cast<ConstantInt>(MSI->getValue())->getZExtValue(); diff --git a/lib/Transforms/Scalar/SimplifyLibCalls.cpp b/lib/Transforms/Scalar/SimplifyLibCalls.cpp index 9c49ec1..f7b6941 100644 --- a/lib/Transforms/Scalar/SimplifyLibCalls.cpp +++ b/lib/Transforms/Scalar/SimplifyLibCalls.cpp @@ -1583,21 +1583,16 @@ void SimplifyLibCalls::InitOptimizations() { Optimizations["llvm.exp2.f64"] = &Exp2; Optimizations["llvm.exp2.f32"] = &Exp2; -#ifdef HAVE_FLOORF - Optimizations["floor"] = &UnaryDoubleFP; -#endif -#ifdef HAVE_CEILF - Optimizations["ceil"] = &UnaryDoubleFP; -#endif -#ifdef HAVE_ROUNDF - Optimizations["round"] = &UnaryDoubleFP; -#endif -#ifdef HAVE_RINTF - Optimizations["rint"] = &UnaryDoubleFP; -#endif -#ifdef HAVE_NEARBYINTF - Optimizations["nearbyint"] = &UnaryDoubleFP; -#endif + if (TLI->has(LibFunc::floor) && TLI->has(LibFunc::floorf)) + Optimizations["floor"] = &UnaryDoubleFP; + if (TLI->has(LibFunc::ceil) && TLI->has(LibFunc::ceilf)) + Optimizations["ceil"] = &UnaryDoubleFP; + if (TLI->has(LibFunc::round) && TLI->has(LibFunc::roundf)) + Optimizations["round"] = &UnaryDoubleFP; + if (TLI->has(LibFunc::rint) && TLI->has(LibFunc::rintf)) + Optimizations["rint"] = &UnaryDoubleFP; + if (TLI->has(LibFunc::nearbyint) && TLI->has(LibFunc::nearbyintf)) + Optimizations["nearbyint"] = &UnaryDoubleFP; // Integer Optimizations Optimizations["ffs"] = &FFS; diff --git a/lib/Transforms/Utils/CloneFunction.cpp b/lib/Transforms/Utils/CloneFunction.cpp index 1b28c35..20052a4 100644 --- a/lib/Transforms/Utils/CloneFunction.cpp +++ b/lib/Transforms/Utils/CloneFunction.cpp @@ -23,8 +23,11 @@ #include "llvm/LLVMContext.h" #include "llvm/Metadata.h" #include "llvm/Support/CFG.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" +#include "llvm/Transforms/Utils/Local.h" #include "llvm/Transforms/Utils/ValueMapper.h" #include "llvm/Analysis/ConstantFolding.h" +#include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/DebugInfo.h" #include "llvm/ADT/SmallVector.h" #include <map> @@ -197,7 +200,6 @@ namespace { const Function *OldFunc; ValueToValueMapTy &VMap; bool ModuleLevelChanges; - SmallVectorImpl<ReturnInst*> &Returns; const char *NameSuffix; ClonedCodeInfo *CodeInfo; const TargetData *TD; @@ -205,24 +207,18 @@ namespace { PruningFunctionCloner(Function *newFunc, const Function *oldFunc, ValueToValueMapTy &valueMap, bool moduleLevelChanges, - SmallVectorImpl<ReturnInst*> &returns, const char *nameSuffix, ClonedCodeInfo *codeInfo, const TargetData *td) : NewFunc(newFunc), OldFunc(oldFunc), VMap(valueMap), ModuleLevelChanges(moduleLevelChanges), - Returns(returns), NameSuffix(nameSuffix), CodeInfo(codeInfo), TD(td) { + NameSuffix(nameSuffix), CodeInfo(codeInfo), TD(td) { } /// CloneBlock - The specified block is found to be reachable, clone it and /// anything that it can reach. void CloneBlock(const BasicBlock *BB, std::vector<const BasicBlock*> &ToClone); - - public: - /// ConstantFoldMappedInstruction - Constant fold the specified instruction, - /// mapping its operands through VMap if they are available. - Constant *ConstantFoldMappedInstruction(const Instruction *I); }; } @@ -230,7 +226,7 @@ namespace { /// anything that it can reach. void PruningFunctionCloner::CloneBlock(const BasicBlock *BB, std::vector<const BasicBlock*> &ToClone){ - TrackingVH<Value> &BBEntry = VMap[BB]; + WeakVH &BBEntry = VMap[BB]; // Have we already cloned this block? if (BBEntry) return; @@ -262,19 +258,33 @@ void PruningFunctionCloner::CloneBlock(const BasicBlock *BB, // loop doesn't include the terminator. for (BasicBlock::const_iterator II = BB->begin(), IE = --BB->end(); II != IE; ++II) { - // If this instruction constant folds, don't bother cloning the instruction, - // instead, just add the constant to the value map. - if (Constant *C = ConstantFoldMappedInstruction(II)) { - VMap[II] = C; - continue; + Instruction *NewInst = II->clone(); + + // Eagerly remap operands to the newly cloned instruction, except for PHI + // nodes for which we defer processing until we update the CFG. + if (!isa<PHINode>(NewInst)) { + RemapInstruction(NewInst, VMap, + ModuleLevelChanges ? RF_None : RF_NoModuleLevelChanges); + + // If we can simplify this instruction to some other value, simply add + // a mapping to that value rather than inserting a new instruction into + // the basic block. + if (Value *V = SimplifyInstruction(NewInst, TD)) { + // On the off-chance that this simplifies to an instruction in the old + // function, map it back into the new function. + if (Value *MappedV = VMap.lookup(V)) + V = MappedV; + + VMap[II] = V; + delete NewInst; + continue; + } } - Instruction *NewInst = II->clone(); if (II->hasName()) NewInst->setName(II->getName()+NameSuffix); - NewBB->getInstList().push_back(NewInst); VMap[II] = NewInst; // Add instruction map to value. - + NewBB->getInstList().push_back(NewInst); hasCalls |= (isa<CallInst>(II) && !isa<DbgInfoIntrinsic>(II)); if (const AllocaInst *AI = dyn_cast<AllocaInst>(II)) { if (isa<ConstantInt>(AI->getArraySize())) @@ -340,33 +350,6 @@ void PruningFunctionCloner::CloneBlock(const BasicBlock *BB, CodeInfo->ContainsDynamicAllocas |= hasStaticAllocas && BB != &BB->getParent()->front(); } - - if (ReturnInst *RI = dyn_cast<ReturnInst>(NewBB->getTerminator())) - Returns.push_back(RI); -} - -/// ConstantFoldMappedInstruction - Constant fold the specified instruction, -/// mapping its operands through VMap if they are available. -Constant *PruningFunctionCloner:: -ConstantFoldMappedInstruction(const Instruction *I) { - SmallVector<Constant*, 8> Ops; - for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) - if (Constant *Op = dyn_cast_or_null<Constant>(MapValue(I->getOperand(i), - VMap, - ModuleLevelChanges ? RF_None : RF_NoModuleLevelChanges))) - Ops.push_back(Op); - else - return 0; // All operands not constant! - - if (const CmpInst *CI = dyn_cast<CmpInst>(I)) - return ConstantFoldCompareInstOperands(CI->getPredicate(), Ops[0], Ops[1], - TD); - - if (const LoadInst *LI = dyn_cast<LoadInst>(I)) - if (!LI->isVolatile()) - return ConstantFoldLoadFromConstPtr(Ops[0], TD); - - return ConstantFoldInstOperands(I->getOpcode(), I->getType(), Ops, TD); } /// CloneAndPruneFunctionInto - This works exactly like CloneFunctionInto, @@ -393,7 +376,7 @@ void llvm::CloneAndPruneFunctionInto(Function *NewFunc, const Function *OldFunc, #endif PruningFunctionCloner PFC(NewFunc, OldFunc, VMap, ModuleLevelChanges, - Returns, NameSuffix, CodeInfo, TD); + NameSuffix, CodeInfo, TD); // Clone the entry block, and anything recursively reachable from it. std::vector<const BasicBlock*> CloneWorklist; @@ -418,25 +401,19 @@ void llvm::CloneAndPruneFunctionInto(Function *NewFunc, const Function *OldFunc, // Add the new block to the new function. NewFunc->getBasicBlockList().push_back(NewBB); - - // Loop over all of the instructions in the block, fixing up operand - // references as we go. This uses VMap to do all the hard work. - // - BasicBlock::iterator I = NewBB->begin(); // Handle PHI nodes specially, as we have to remove references to dead // blocks. - if (PHINode *PN = dyn_cast<PHINode>(I)) { - // Skip over all PHI nodes, remembering them for later. - BasicBlock::const_iterator OldI = BI->begin(); - for (; (PN = dyn_cast<PHINode>(I)); ++I, ++OldI) - PHIToResolve.push_back(cast<PHINode>(OldI)); - } - - // Otherwise, remap the rest of the instructions normally. - for (; I != NewBB->end(); ++I) - RemapInstruction(I, VMap, - ModuleLevelChanges ? RF_None : RF_NoModuleLevelChanges); + for (BasicBlock::const_iterator I = BI->begin(), E = BI->end(); I != E; ++I) + if (const PHINode *PN = dyn_cast<PHINode>(I)) + PHIToResolve.push_back(PN); + else + break; + + // Finally, remap the terminator instructions, as those can't be remapped + // until all BBs are mapped. + RemapInstruction(NewBB->getTerminator(), VMap, + ModuleLevelChanges ? RF_None : RF_NoModuleLevelChanges); } // Defer PHI resolution until rest of function is resolved, PHI resolution @@ -518,31 +495,55 @@ void llvm::CloneAndPruneFunctionInto(Function *NewFunc, const Function *OldFunc, ++OldI; } } - // NOTE: We cannot eliminate single entry phi nodes here, because of - // VMap. Single entry phi nodes can have multiple VMap entries - // pointing at them. Thus, deleting one would require scanning the VMap - // to update any entries in it that would require that. This would be - // really slow. } - + + // Make a second pass over the PHINodes now that all of them have been + // remapped into the new function, simplifying the PHINode and performing any + // recursive simplifications exposed. This will transparently update the + // WeakVH in the VMap. Notably, we rely on that so that if we coalesce + // two PHINodes, the iteration over the old PHIs remains valid, and the + // mapping will just map us to the new node (which may not even be a PHI + // node). + for (unsigned Idx = 0, Size = PHIToResolve.size(); Idx != Size; ++Idx) + if (PHINode *PN = dyn_cast<PHINode>(VMap[PHIToResolve[Idx]])) + recursivelySimplifyInstruction(PN, TD); + // Now that the inlined function body has been fully constructed, go through // and zap unconditional fall-through branches. This happen all the time when // specializing code: code specialization turns conditional branches into // uncond branches, and this code folds them. - Function::iterator I = cast<BasicBlock>(VMap[&OldFunc->getEntryBlock()]); + Function::iterator Begin = cast<BasicBlock>(VMap[&OldFunc->getEntryBlock()]); + Function::iterator I = Begin; while (I != NewFunc->end()) { + // Check if this block has become dead during inlining or other + // simplifications. Note that the first block will appear dead, as it has + // not yet been wired up properly. + if (I != Begin && (pred_begin(I) == pred_end(I) || + I->getSinglePredecessor() == I)) { + BasicBlock *DeadBB = I++; + DeleteDeadBlock(DeadBB); + continue; + } + + // We need to simplify conditional branches and switches with a constant + // operand. We try to prune these out when cloning, but if the + // simplification required looking through PHI nodes, those are only + // available after forming the full basic block. That may leave some here, + // and we still want to prune the dead code as early as possible. + ConstantFoldTerminator(I); + BranchInst *BI = dyn_cast<BranchInst>(I->getTerminator()); if (!BI || BI->isConditional()) { ++I; continue; } - // Note that we can't eliminate uncond branches if the destination has - // single-entry PHI nodes. Eliminating the single-entry phi nodes would - // require scanning the VMap to update any entries that point to the phi - // node. BasicBlock *Dest = BI->getSuccessor(0); - if (!Dest->getSinglePredecessor() || isa<PHINode>(Dest->begin())) { + if (!Dest->getSinglePredecessor()) { ++I; continue; } - + + // We shouldn't be able to get single-entry PHI nodes here, as instsimplify + // above should have zapped all of them.. + assert(!isa<PHINode>(Dest->begin())); + // We know all single-entry PHI nodes in the inlined function have been // removed, so we just need to splice the blocks. BI->eraseFromParent(); @@ -558,4 +559,13 @@ void llvm::CloneAndPruneFunctionInto(Function *NewFunc, const Function *OldFunc, // Do not increment I, iteratively merge all things this block branches to. } + + // Make a final pass over the basic blocks from theh old function to gather + // any return instructions which survived folding. We have to do this here + // because we can iteratively remove and merge returns above. + for (Function::iterator I = cast<BasicBlock>(VMap[&OldFunc->getEntryBlock()]), + E = NewFunc->end(); + I != E; ++I) + if (ReturnInst *RI = dyn_cast<ReturnInst>(I->getTerminator())) + Returns.push_back(RI); } diff --git a/lib/Transforms/Utils/InlineFunction.cpp b/lib/Transforms/Utils/InlineFunction.cpp index b84de05..d2b167a 100644 --- a/lib/Transforms/Utils/InlineFunction.cpp +++ b/lib/Transforms/Utils/InlineFunction.cpp @@ -31,10 +31,12 @@ #include "llvm/Support/IRBuilder.h" using namespace llvm; -bool llvm::InlineFunction(CallInst *CI, InlineFunctionInfo &IFI, bool InsertLifetime) { +bool llvm::InlineFunction(CallInst *CI, InlineFunctionInfo &IFI, + bool InsertLifetime) { return InlineFunction(CallSite(CI), IFI, InsertLifetime); } -bool llvm::InlineFunction(InvokeInst *II, InlineFunctionInfo &IFI, bool InsertLifetime) { +bool llvm::InlineFunction(InvokeInst *II, InlineFunctionInfo &IFI, + bool InsertLifetime) { return InlineFunction(CallSite(II), IFI, InsertLifetime); } @@ -434,8 +436,8 @@ static bool hasLifetimeMarkers(AllocaInst *AI) { return false; } -/// updateInlinedAtInfo - Helper function used by fixupLineNumbers to recursively -/// update InlinedAtEntry of a DebugLoc. +/// updateInlinedAtInfo - Helper function used by fixupLineNumbers to +/// recursively update InlinedAtEntry of a DebugLoc. static DebugLoc updateInlinedAtInfo(const DebugLoc &DL, const DebugLoc &InlinedAtDL, LLVMContext &Ctx) { @@ -445,7 +447,7 @@ static DebugLoc updateInlinedAtInfo(const DebugLoc &DL, return DebugLoc::get(DL.getLine(), DL.getCol(), DL.getScope(Ctx), NewInlinedAtDL.getAsMDNode(Ctx)); } - + return DebugLoc::get(DL.getLine(), DL.getCol(), DL.getScope(Ctx), InlinedAtDL.getAsMDNode(Ctx)); } @@ -453,7 +455,7 @@ static DebugLoc updateInlinedAtInfo(const DebugLoc &DL, /// fixupLineNumbers - Update inlined instructions' line numbers to /// to encode location where these instructions are inlined. static void fixupLineNumbers(Function *Fn, Function::iterator FI, - Instruction *TheCall) { + Instruction *TheCall) { DebugLoc TheCallDL = TheCall->getDebugLoc(); if (TheCallDL.isUnknown()) return; @@ -484,7 +486,8 @@ static void fixupLineNumbers(Function *Fn, Function::iterator FI, /// instruction 'call B' is inlined, and 'B' calls 'C', then the call to 'C' now /// exists in the instruction stream. Similarly this will inline a recursive /// function by one level. -bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI, bool InsertLifetime) { +bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI, + bool InsertLifetime) { Instruction *TheCall = CS.getInstruction(); assert(TheCall->getParent() && TheCall->getParent()->getParent() && "Instruction not in function!"); diff --git a/lib/Transforms/Utils/Local.cpp b/lib/Transforms/Utils/Local.cpp index 5f895eb..d1c4d59 100644 --- a/lib/Transforms/Utils/Local.cpp +++ b/lib/Transforms/Utils/Local.cpp @@ -355,22 +355,27 @@ bool llvm::RecursivelyDeleteDeadPHINode(PHINode *PN) { /// instructions in other blocks as well in this block. bool llvm::SimplifyInstructionsInBlock(BasicBlock *BB, const TargetData *TD) { bool MadeChange = false; - for (BasicBlock::iterator BI = BB->begin(), E = BB->end(); BI != E; ) { + +#ifndef NDEBUG + // In debug builds, ensure that the terminator of the block is never replaced + // or deleted by these simplifications. The idea of simplification is that it + // cannot introduce new instructions, and there is no way to replace the + // terminator of a block without introducing a new instruction. + AssertingVH<Instruction> TerminatorVH(--BB->end()); +#endif + + for (BasicBlock::iterator BI = BB->begin(), E = --BB->end(); BI != E; ) { + assert(!BI->isTerminator()); Instruction *Inst = BI++; - - if (Value *V = SimplifyInstruction(Inst, TD)) { - WeakVH BIHandle(BI); - ReplaceAndSimplifyAllUses(Inst, V, TD); + + WeakVH BIHandle(BI); + if (recursivelySimplifyInstruction(Inst, TD)) { MadeChange = true; if (BIHandle != BI) BI = BB->begin(); continue; } - if (Inst->isTerminator()) - break; - - WeakVH BIHandle(BI); MadeChange |= RecursivelyDeleteTriviallyDeadInstructions(Inst); if (BIHandle != BI) BI = BB->begin(); @@ -408,17 +413,11 @@ void llvm::RemovePredecessorAndSimplify(BasicBlock *BB, BasicBlock *Pred, WeakVH PhiIt = &BB->front(); while (PHINode *PN = dyn_cast<PHINode>(PhiIt)) { PhiIt = &*++BasicBlock::iterator(cast<Instruction>(PhiIt)); + Value *OldPhiIt = PhiIt; - Value *PNV = SimplifyInstruction(PN, TD); - if (PNV == 0) continue; + if (!recursivelySimplifyInstruction(PN, TD)) + continue; - // If we're able to simplify the phi to a single value, substitute the new - // value into all of its uses. - assert(PNV != PN && "SimplifyInstruction broken!"); - - Value *OldPhiIt = PhiIt; - ReplaceAndSimplifyAllUses(PN, PNV, TD); - // If recursive simplification ended up deleting the next PHI node we would // iterate to, then our iterator is invalid, restart scanning from the top // of the block. @@ -763,9 +762,8 @@ unsigned llvm::getOrEnforceKnownAlignment(Value *V, unsigned PrefAlign, assert(V->getType()->isPointerTy() && "getOrEnforceKnownAlignment expects a pointer!"); unsigned BitWidth = TD ? TD->getPointerSizeInBits() : 64; - APInt Mask = APInt::getAllOnesValue(BitWidth); APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0); - ComputeMaskedBits(V, Mask, KnownZero, KnownOne, TD); + ComputeMaskedBits(V, KnownZero, KnownOne, TD); unsigned TrailZ = KnownZero.countTrailingOnes(); // Avoid trouble with rediculously large TrailZ values, such as diff --git a/lib/Transforms/Utils/LoopUnroll.cpp b/lib/Transforms/Utils/LoopUnroll.cpp index 512b689..e15497a 100644 --- a/lib/Transforms/Utils/LoopUnroll.cpp +++ b/lib/Transforms/Utils/LoopUnroll.cpp @@ -149,6 +149,12 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount, return false; } + // Loops with indirectbr cannot be cloned. + if (!L->isSafeToClone()) { + DEBUG(dbgs() << " Can't unroll; Loop body cannot be cloned.\n"); + return false; + } + BasicBlock *Header = L->getHeader(); BranchInst *BI = dyn_cast<BranchInst>(LatchBlock->getTerminator()); diff --git a/lib/Transforms/Utils/SimplifyCFG.cpp b/lib/Transforms/Utils/SimplifyCFG.cpp index d53a46e..66dd2c9 100644 --- a/lib/Transforms/Utils/SimplifyCFG.cpp +++ b/lib/Transforms/Utils/SimplifyCFG.cpp @@ -2562,7 +2562,7 @@ static bool EliminateDeadSwitchCases(SwitchInst *SI) { Value *Cond = SI->getCondition(); unsigned Bits = cast<IntegerType>(Cond->getType())->getBitWidth(); APInt KnownZero(Bits, 0), KnownOne(Bits, 0); - ComputeMaskedBits(Cond, APInt::getAllOnesValue(Bits), KnownZero, KnownOne); + ComputeMaskedBits(Cond, KnownZero, KnownOne); // Gather dead cases. SmallVector<ConstantInt*, 8> DeadCases; diff --git a/lib/Transforms/Utils/SimplifyIndVar.cpp b/lib/Transforms/Utils/SimplifyIndVar.cpp index e00565d..4030bef 100644 --- a/lib/Transforms/Utils/SimplifyIndVar.cpp +++ b/lib/Transforms/Utils/SimplifyIndVar.cpp @@ -46,7 +46,6 @@ namespace { LoopInfo *LI; DominatorTree *DT; ScalarEvolution *SE; - IVUsers *IU; // NULL for DisableIVRewrite const TargetData *TD; // May be NULL SmallVectorImpl<WeakVH> &DeadInsts; @@ -59,7 +58,6 @@ namespace { L(Loop), LI(LPM->getAnalysisIfAvailable<LoopInfo>()), SE(SE), - IU(IVU), TD(LPM->getAnalysisIfAvailable<TargetData>()), DeadInsts(Dead), Changed(false) { @@ -229,13 +227,6 @@ void SimplifyIndvar::eliminateIVRemainder(BinaryOperator *Rem, Rem->replaceAllUsesWith(Sel); } - // Inform IVUsers about the new users. - if (IU) { - if (Instruction *I = dyn_cast<Instruction>(Rem->getOperand(0))) { - SmallPtrSet<Loop*, 16> SimplifiedLoopNests; - IU->AddUsersIfInteresting(I, SimplifiedLoopNests); - } - } DEBUG(dbgs() << "INDVARS: Simplified rem: " << *Rem << '\n'); ++NumElimRem; Changed = true; @@ -401,36 +392,4 @@ bool simplifyLoopIVs(Loop *L, ScalarEvolution *SE, LPPassManager *LPM, return Changed; } -/// simplifyIVUsers - Perform simplification on instructions recorded by the -/// IVUsers pass. -/// -/// This is the old approach to IV simplification to be replaced by -/// SimplifyLoopIVs. -bool simplifyIVUsers(IVUsers *IU, ScalarEvolution *SE, LPPassManager *LPM, - SmallVectorImpl<WeakVH> &Dead) { - SimplifyIndvar SIV(IU->getLoop(), SE, LPM, Dead); - - // Each round of simplification involves a round of eliminating operations - // followed by a round of widening IVs. A single IVUsers worklist is used - // across all rounds. The inner loop advances the user. If widening exposes - // more uses, then another pass through the outer loop is triggered. - for (IVUsers::iterator I = IU->begin(); I != IU->end(); ++I) { - Instruction *UseInst = I->getUser(); - Value *IVOperand = I->getOperandValToReplace(); - - if (ICmpInst *ICmp = dyn_cast<ICmpInst>(UseInst)) { - SIV.eliminateIVComparison(ICmp, IVOperand); - continue; - } - if (BinaryOperator *Rem = dyn_cast<BinaryOperator>(UseInst)) { - bool IsSigned = Rem->getOpcode() == Instruction::SRem; - if (IsSigned || Rem->getOpcode() == Instruction::URem) { - SIV.eliminateIVRemainder(Rem, IVOperand, IsSigned); - continue; - } - } - } - return SIV.hasChanged(); -} - } // namespace llvm diff --git a/lib/Transforms/Vectorize/BBVectorize.cpp b/lib/Transforms/Vectorize/BBVectorize.cpp index 32eec79..9d62306 100644 --- a/lib/Transforms/Vectorize/BBVectorize.cpp +++ b/lib/Transforms/Vectorize/BBVectorize.cpp @@ -84,6 +84,10 @@ NoFloats("bb-vectorize-no-floats", cl::init(false), cl::Hidden, cl::desc("Don't try to vectorize floating-point values")); static cl::opt<bool> +NoPointers("bb-vectorize-no-pointers", cl::init(false), cl::Hidden, + cl::desc("Don't try to vectorize pointer values")); + +static cl::opt<bool> NoCasts("bb-vectorize-no-casts", cl::init(false), cl::Hidden, cl::desc("Don't try to vectorize casting (conversion) operations")); @@ -96,6 +100,14 @@ NoFMA("bb-vectorize-no-fma", cl::init(false), cl::Hidden, cl::desc("Don't try to vectorize the fused-multiply-add intrinsic")); static cl::opt<bool> +NoSelect("bb-vectorize-no-select", cl::init(false), cl::Hidden, + cl::desc("Don't try to vectorize select instructions")); + +static cl::opt<bool> +NoGEP("bb-vectorize-no-gep", cl::init(false), cl::Hidden, + cl::desc("Don't try to vectorize getelementptr instructions")); + +static cl::opt<bool> NoMemOps("bb-vectorize-no-mem-ops", cl::init(false), cl::Hidden, cl::desc("Don't try to vectorize loads and stores")); @@ -140,10 +152,21 @@ STATISTIC(NumFusedOps, "Number of operations fused by bb-vectorize"); namespace { struct BBVectorize : public BasicBlockPass { static char ID; // Pass identification, replacement for typeid - BBVectorize() : BasicBlockPass(ID) { + + const VectorizeConfig Config; + + BBVectorize(const VectorizeConfig &C = VectorizeConfig()) + : BasicBlockPass(ID), Config(C) { initializeBBVectorizePass(*PassRegistry::getPassRegistry()); } + BBVectorize(Pass *P, const VectorizeConfig &C) + : BasicBlockPass(ID), Config(C) { + AA = &P->getAnalysis<AliasAnalysis>(); + SE = &P->getAnalysis<ScalarEvolution>(); + TD = P->getAnalysisIfAvailable<TargetData>(); + } + typedef std::pair<Value *, Value *> ValuePair; typedef std::pair<ValuePair, size_t> ValuePairWithDepth; typedef std::pair<ValuePair, ValuePair> VPPair; // A ValuePair pair @@ -280,18 +303,15 @@ namespace { Instruction *&InsertionPt, Instruction *I, Instruction *J); - virtual bool runOnBasicBlock(BasicBlock &BB) { - AA = &getAnalysis<AliasAnalysis>(); - SE = &getAnalysis<ScalarEvolution>(); - TD = getAnalysisIfAvailable<TargetData>(); - + bool vectorizeBB(BasicBlock &BB) { bool changed = false; // Iterate a sufficient number of times to merge types of size 1 bit, // then 2 bits, then 4, etc. up to half of the target vector width of the // target vector register. - for (unsigned v = 2, n = 1; v <= VectorBits && (!MaxIter || n <= MaxIter); + for (unsigned v = 2, n = 1; + v <= Config.VectorBits && (!Config.MaxIter || n <= Config.MaxIter); v *= 2, ++n) { - DEBUG(dbgs() << "BBV: fusing loop #" << n << + DEBUG(dbgs() << "BBV: fusing loop #" << n << " for " << BB.getName() << " in " << BB.getParent()->getName() << "...\n"); if (vectorizePairs(BB)) @@ -304,6 +324,14 @@ namespace { return changed; } + virtual bool runOnBasicBlock(BasicBlock &BB) { + AA = &getAnalysis<AliasAnalysis>(); + SE = &getAnalysis<ScalarEvolution>(); + TD = getAnalysisIfAvailable<TargetData>(); + + return vectorizeBB(BB); + } + virtual void getAnalysisUsage(AnalysisUsage &AU) const { BasicBlockPass::getAnalysisUsage(AU); AU.addRequired<AliasAnalysis>(); @@ -333,7 +361,7 @@ namespace { // candidate chains where longer chains are considered to be better. // Note: when this function returns 0, the resulting instructions are // not actually fused. - static inline size_t getDepthFactor(Value *V) { + inline size_t getDepthFactor(Value *V) { // InsertElement and ExtractElement have a depth factor of zero. This is // for two reasons: First, they cannot be usefully fused. Second, because // the pass generates a lot of these, they can confuse the simple metric @@ -347,8 +375,8 @@ namespace { // Give a load or store half of the required depth so that load/store // pairs will vectorize. - if (!NoMemOpBoost && (isa<LoadInst>(V) || isa<StoreInst>(V))) - return ReqChainDepth/2; + if (!Config.NoMemOpBoost && (isa<LoadInst>(V) || isa<StoreInst>(V))) + return Config.ReqChainDepth/2; return 1; } @@ -421,9 +449,9 @@ namespace { case Intrinsic::exp: case Intrinsic::exp2: case Intrinsic::pow: - return !NoMath; + return Config.VectorizeMath; case Intrinsic::fma: - return !NoFMA; + return Config.VectorizeFMA; } } @@ -517,24 +545,34 @@ namespace { } else if (LoadInst *L = dyn_cast<LoadInst>(I)) { // Vectorize simple loads if possbile: IsSimpleLoadStore = L->isSimple(); - if (!IsSimpleLoadStore || NoMemOps) + if (!IsSimpleLoadStore || !Config.VectorizeMemOps) return false; } else if (StoreInst *S = dyn_cast<StoreInst>(I)) { // Vectorize simple stores if possbile: IsSimpleLoadStore = S->isSimple(); - if (!IsSimpleLoadStore || NoMemOps) + if (!IsSimpleLoadStore || !Config.VectorizeMemOps) return false; } else if (CastInst *C = dyn_cast<CastInst>(I)) { // We can vectorize casts, but not casts of pointer types, etc. - if (NoCasts) + if (!Config.VectorizeCasts) return false; Type *SrcTy = C->getSrcTy(); - if (!SrcTy->isSingleValueType() || SrcTy->isPointerTy()) + if (!SrcTy->isSingleValueType()) return false; Type *DestTy = C->getDestTy(); - if (!DestTy->isSingleValueType() || DestTy->isPointerTy()) + if (!DestTy->isSingleValueType()) + return false; + } else if (isa<SelectInst>(I)) { + if (!Config.VectorizeSelect) + return false; + } else if (GetElementPtrInst *G = dyn_cast<GetElementPtrInst>(I)) { + if (!Config.VectorizeGEP) + return false; + + // Currently, vector GEPs exist only with one index. + if (G->getNumIndices() != 1) return false; } else if (!(I->isBinaryOp() || isa<ShuffleVectorInst>(I) || isa<ExtractElementInst>(I) || isa<InsertElementInst>(I))) { @@ -566,14 +604,21 @@ namespace { !(VectorType::isValidElementType(T2) || T2->isVectorTy())) return false; - if (NoInts && (T1->isIntOrIntVectorTy() || T2->isIntOrIntVectorTy())) + if (!Config.VectorizeInts + && (T1->isIntOrIntVectorTy() || T2->isIntOrIntVectorTy())) + return false; + + if (!Config.VectorizeFloats + && (T1->isFPOrFPVectorTy() || T2->isFPOrFPVectorTy())) return false; - if (NoFloats && (T1->isFPOrFPVectorTy() || T2->isFPOrFPVectorTy())) + if ((!Config.VectorizePointers || TD == 0) && + (T1->getScalarType()->isPointerTy() || + T2->getScalarType()->isPointerTy())) return false; - if (T1->getPrimitiveSizeInBits() > VectorBits/2 || - T2->getPrimitiveSizeInBits() > VectorBits/2) + if (T1->getPrimitiveSizeInBits() > Config.VectorBits/2 || + T2->getPrimitiveSizeInBits() > Config.VectorBits/2) return false; return true; @@ -601,7 +646,7 @@ namespace { LI->isVolatile() != LJ->isVolatile() || LI->getOrdering() != LJ->getOrdering() || LI->getSynchScope() != LJ->getSynchScope()) - return false; + return false; } else if ((SI = dyn_cast<StoreInst>(I)) && (SJ = dyn_cast<StoreInst>(J))) { if (SI->getValueOperand()->getType() != SJ->getValueOperand()->getType() || @@ -622,7 +667,7 @@ namespace { int64_t OffsetInElmts = 0; if (getPairPtrInfo(I, J, IPtr, JPtr, IAlignment, JAlignment, OffsetInElmts) && abs64(OffsetInElmts) == 1) { - if (AlignedOnly) { + if (Config.AlignedOnly) { Type *aType = isa<StoreInst>(I) ? cast<StoreInst>(I)->getValueOperand()->getType() : I->getType(); // An aligned load or store is possible only if the instruction @@ -647,6 +692,20 @@ namespace { // FIXME: We may want to vectorize non-constant shuffles also. } + // The powi intrinsic is special because only the first argument is + // vectorized, the second arguments must be equal. + CallInst *CI = dyn_cast<CallInst>(I); + Function *FI; + if (CI && (FI = CI->getCalledFunction()) && + FI->getIntrinsicID() == Intrinsic::powi) { + + Value *A1I = CI->getArgOperand(1), + *A1J = cast<CallInst>(J)->getArgOperand(1); + const SCEV *A1ISCEV = SE->getSCEV(A1I), + *A1JSCEV = SE->getSCEV(A1J); + return (A1ISCEV == A1JSCEV); + } + return true; } @@ -729,12 +788,12 @@ namespace { AliasSetTracker WriteSet(*AA); bool JAfterStart = IAfterStart; BasicBlock::iterator J = llvm::next(I); - for (unsigned ss = 0; J != E && ss <= SearchLimit; ++J, ++ss) { + for (unsigned ss = 0; J != E && ss <= Config.SearchLimit; ++J, ++ss) { if (J == Start) JAfterStart = true; // Determine if J uses I, if so, exit the loop. - bool UsesI = trackUsesOfI(Users, WriteSet, I, J, !FastDep); - if (FastDep) { + bool UsesI = trackUsesOfI(Users, WriteSet, I, J, !Config.FastDep); + if (Config.FastDep) { // Note: For this heuristic to be effective, independent operations // must tend to be intermixed. This is likely to be true from some // kinds of grouped loop unrolling (but not the generic LLVM pass), @@ -772,7 +831,7 @@ namespace { // If we have already found too many pairs, break here and this function // will be called again starting after the last instruction selected // during this invocation. - if (PairableInsts.size() >= MaxInsts) { + if (PairableInsts.size() >= Config.MaxInsts) { ShouldContinue = true; break; } @@ -796,16 +855,33 @@ namespace { std::vector<Value *> &PairableInsts, std::multimap<ValuePair, ValuePair> &ConnectedPairs, ValuePair P) { + StoreInst *SI, *SJ; + // For each possible pairing for this variable, look at the uses of // the first value... for (Value::use_iterator I = P.first->use_begin(), E = P.first->use_end(); I != E; ++I) { + if (isa<LoadInst>(*I)) { + // A pair cannot be connected to a load because the load only takes one + // operand (the address) and it is a scalar even after vectorization. + continue; + } else if ((SI = dyn_cast<StoreInst>(*I)) && + P.first == SI->getPointerOperand()) { + // Similarly, a pair cannot be connected to a store through its + // pointer operand. + continue; + } + VPIteratorPair IPairRange = CandidatePairs.equal_range(*I); // For each use of the first variable, look for uses of the second // variable... for (Value::use_iterator J = P.second->use_begin(), E2 = P.second->use_end(); J != E2; ++J) { + if ((SJ = dyn_cast<StoreInst>(*J)) && + P.second == SJ->getPointerOperand()) + continue; + VPIteratorPair JPairRange = CandidatePairs.equal_range(*J); // Look for <I, J>: @@ -817,23 +893,37 @@ namespace { ConnectedPairs.insert(VPPair(P, ValuePair(*J, *I))); } - if (SplatBreaksChain) continue; + if (Config.SplatBreaksChain) continue; // Look for cases where just the first value in the pair is used by // both members of another pair (splatting). for (Value::use_iterator J = P.first->use_begin(); J != E; ++J) { + if ((SJ = dyn_cast<StoreInst>(*J)) && + P.first == SJ->getPointerOperand()) + continue; + if (isSecondInIteratorPair<Value*>(*J, IPairRange)) ConnectedPairs.insert(VPPair(P, ValuePair(*I, *J))); } } - if (SplatBreaksChain) return; + if (Config.SplatBreaksChain) return; // Look for cases where just the second value in the pair is used by // both members of another pair (splatting). for (Value::use_iterator I = P.second->use_begin(), E = P.second->use_end(); I != E; ++I) { + if (isa<LoadInst>(*I)) + continue; + else if ((SI = dyn_cast<StoreInst>(*I)) && + P.second == SI->getPointerOperand()) + continue; + VPIteratorPair IPairRange = CandidatePairs.equal_range(*I); for (Value::use_iterator J = P.second->use_begin(); J != E; ++J) { + if ((SJ = dyn_cast<StoreInst>(*J)) && + P.second == SJ->getPointerOperand()) + continue; + if (isSecondInIteratorPair<Value*>(*J, IPairRange)) ConnectedPairs.insert(VPPair(P, ValuePair(*I, *J))); } @@ -1256,7 +1346,7 @@ namespace { << *J->first << " <-> " << *J->second << "} of depth " << MaxDepth << " and size " << PrunedTree.size() << " (effective size: " << EffSize << ")\n"); - if (MaxDepth >= ReqChainDepth && EffSize > BestEffSize) { + if (MaxDepth >= Config.ReqChainDepth && EffSize > BestEffSize) { BestMaxDepth = MaxDepth; BestEffSize = EffSize; BestTree = PrunedTree; @@ -1272,7 +1362,8 @@ namespace { std::multimap<ValuePair, ValuePair> &ConnectedPairs, DenseSet<ValuePair> &PairableInstUsers, DenseMap<Value *, Value *>& ChosenPairs) { - bool UseCycleCheck = CandidatePairs.size() <= MaxCandPairsForCycleCheck; + bool UseCycleCheck = + CandidatePairs.size() <= Config.MaxCandPairsForCycleCheck; std::multimap<ValuePair, ValuePair> PairableInstUserMap; for (std::vector<Value *>::iterator I = PairableInsts.begin(), E = PairableInsts.end(); I != E; ++I) { @@ -1518,19 +1609,27 @@ namespace { ReplacedOperands[o] = getReplacementPointerInput(Context, I, J, o, FlipMemInputs); continue; - } else if (isa<CallInst>(I) && o == NumOperands-1) { + } else if (isa<CallInst>(I)) { Function *F = cast<CallInst>(I)->getCalledFunction(); unsigned IID = F->getIntrinsicID(); - BasicBlock &BB = *I->getParent(); + if (o == NumOperands-1) { + BasicBlock &BB = *I->getParent(); - Module *M = BB.getParent()->getParent(); - Type *ArgType = I->getType(); - Type *VArgType = getVecTypeForPair(ArgType); + Module *M = BB.getParent()->getParent(); + Type *ArgType = I->getType(); + Type *VArgType = getVecTypeForPair(ArgType); - // FIXME: is it safe to do this here? - ReplacedOperands[o] = Intrinsic::getDeclaration(M, - (Intrinsic::ID) IID, VArgType); - continue; + // FIXME: is it safe to do this here? + ReplacedOperands[o] = Intrinsic::getDeclaration(M, + (Intrinsic::ID) IID, VArgType); + continue; + } else if (IID == Intrinsic::powi && o == 1) { + // The second argument of powi is a single integer and we've already + // checked that both arguments are equal. As a result, we just keep + // I's second argument. + ReplacedOperands[o] = I->getOperand(o); + continue; + } } else if (isa<ShuffleVectorInst>(I) && o == NumOperands-1) { ReplacedOperands[o] = getReplacementShuffleMask(Context, I, J); continue; @@ -1835,7 +1934,35 @@ INITIALIZE_AG_DEPENDENCY(AliasAnalysis) INITIALIZE_PASS_DEPENDENCY(ScalarEvolution) INITIALIZE_PASS_END(BBVectorize, BBV_NAME, bb_vectorize_name, false, false) -BasicBlockPass *llvm::createBBVectorizePass() { - return new BBVectorize(); +BasicBlockPass *llvm::createBBVectorizePass(const VectorizeConfig &C) { + return new BBVectorize(C); +} + +bool +llvm::vectorizeBasicBlock(Pass *P, BasicBlock &BB, const VectorizeConfig &C) { + BBVectorize BBVectorizer(P, C); + return BBVectorizer.vectorizeBB(BB); } +//===----------------------------------------------------------------------===// +VectorizeConfig::VectorizeConfig() { + VectorBits = ::VectorBits; + VectorizeInts = !::NoInts; + VectorizeFloats = !::NoFloats; + VectorizePointers = !::NoPointers; + VectorizeCasts = !::NoCasts; + VectorizeMath = !::NoMath; + VectorizeFMA = !::NoFMA; + VectorizeSelect = !::NoSelect; + VectorizeGEP = !::NoGEP; + VectorizeMemOps = !::NoMemOps; + AlignedOnly = ::AlignedOnly; + ReqChainDepth= ::ReqChainDepth; + SearchLimit = ::SearchLimit; + MaxCandPairsForCycleCheck = ::MaxCandPairsForCycleCheck; + SplatBreaksChain = ::SplatBreaksChain; + MaxInsts = ::MaxInsts; + MaxIter = ::MaxIter; + NoMemOpBoost = ::NoMemOpBoost; + FastDep = ::FastDep; +} diff --git a/lib/VMCore/AutoUpgrade.cpp b/lib/VMCore/AutoUpgrade.cpp index ea3d4ba..2e16372 100644 --- a/lib/VMCore/AutoUpgrade.cpp +++ b/lib/VMCore/AutoUpgrade.cpp @@ -18,9 +18,6 @@ #include "llvm/LLVMContext.h" #include "llvm/Module.h" #include "llvm/IntrinsicInst.h" -#include "llvm/ADT/DenseMap.h" -#include "llvm/ADT/SmallPtrSet.h" -#include "llvm/ADT/SmallVector.h" #include "llvm/Support/CallSite.h" #include "llvm/Support/CFG.h" #include "llvm/Support/ErrorHandling.h" @@ -59,7 +56,8 @@ static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) { if (Name.startswith("x86.sse2.pcmpeq.") || Name.startswith("x86.sse2.pcmpgt.") || Name.startswith("x86.avx2.pcmpeq.") || - Name.startswith("x86.avx2.pcmpgt.")) { + Name.startswith("x86.avx2.pcmpgt.") || + Name.startswith("x86.avx.vpermil.")) { NewFn = 0; return true; } @@ -121,7 +119,42 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) { // need to sign extend since icmp returns vector of i1 Rep = Builder.CreateSExt(Rep, CI->getType(), ""); } else { - llvm_unreachable("Unknown function for CallInst upgrade."); + bool PD128 = false, PD256 = false, PS128 = false, PS256 = false; + if (Name.startswith("llvm.x86.avx.vpermil.pd.256")) + PD256 = true; + else if (Name.startswith("llvm.x86.avx.vpermil.pd")) + PD128 = true; + else if (Name.startswith("llvm.x86.avx.vpermil.ps.256")) + PS256 = true; + else if (Name.startswith("llvm.x86.avx.vpermil.ps")) + PS128 = true; + + if (PD256 || PD128 || PS256 || PS128) { + Value *Op0 = CI->getArgOperand(0); + unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue(); + SmallVector<Constant*, 8> Idxs; + + if (PD128) + for (unsigned i = 0; i != 2; ++i) + Idxs.push_back(Builder.getInt32((Imm >> i) & 0x1)); + else if (PD256) + for (unsigned l = 0; l != 4; l+=2) + for (unsigned i = 0; i != 2; ++i) + Idxs.push_back(Builder.getInt32(((Imm >> (l+i)) & 0x1) + l)); + else if (PS128) + for (unsigned i = 0; i != 4; ++i) + Idxs.push_back(Builder.getInt32((Imm >> (2 * i)) & 0x3)); + else if (PS256) + for (unsigned l = 0; l != 8; l+=4) + for (unsigned i = 0; i != 4; ++i) + Idxs.push_back(Builder.getInt32(((Imm >> (2 * i)) & 0x3) + l)); + else + llvm_unreachable("Unexpected function"); + + Rep = Builder.CreateShuffleVector(Op0, Op0, ConstantVector::get(Idxs)); + } else { + llvm_unreachable("Unknown function for CallInst upgrade."); + } } CI->replaceAllUsesWith(Rep); diff --git a/lib/VMCore/CMakeLists.txt b/lib/VMCore/CMakeLists.txt index 99eeba1..e1efcda 100644 --- a/lib/VMCore/CMakeLists.txt +++ b/lib/VMCore/CMakeLists.txt @@ -8,7 +8,6 @@ add_llvm_library(LLVMCore ConstantFold.cpp Constants.cpp Core.cpp - DebugInfoProbe.cpp DebugLoc.cpp Dominators.cpp Function.cpp diff --git a/lib/VMCore/Core.cpp b/lib/VMCore/Core.cpp index e86d805..a9cca22 100644 --- a/lib/VMCore/Core.cpp +++ b/lib/VMCore/Core.cpp @@ -2066,6 +2066,20 @@ LLVMValueRef LLVMBuildGlobalStringPtr(LLVMBuilderRef B, const char *Str, return wrap(unwrap(B)->CreateGlobalStringPtr(Str, Name)); } +LLVMBool LLVMGetVolatile(LLVMValueRef MemAccessInst) { + Value *P = unwrap<Value>(MemAccessInst); + if (LoadInst *LI = dyn_cast<LoadInst>(P)) + return LI->isVolatile(); + return cast<StoreInst>(P)->isVolatile(); +} + +void LLVMSetVolatile(LLVMValueRef MemAccessInst, LLVMBool isVolatile) { + Value *P = unwrap<Value>(MemAccessInst); + if (LoadInst *LI = dyn_cast<LoadInst>(P)) + return LI->setVolatile(isVolatile); + return cast<StoreInst>(P)->setVolatile(isVolatile); +} + /*--.. Casts ...............................................................--*/ LLVMValueRef LLVMBuildTrunc(LLVMBuilderRef B, LLVMValueRef Val, diff --git a/lib/VMCore/DebugInfoProbe.cpp b/lib/VMCore/DebugInfoProbe.cpp deleted file mode 100644 index d1275ff..0000000 --- a/lib/VMCore/DebugInfoProbe.cpp +++ /dev/null @@ -1,225 +0,0 @@ -//===-- DebugInfoProbe.cpp - DebugInfo Probe ------------------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file implements DebugInfoProbe. This probe can be used by a pass -// manager to analyze how optimizer is treating debugging information. -// -//===----------------------------------------------------------------------===// - -#define DEBUG_TYPE "debuginfoprobe" -#include "llvm/DebugInfoProbe.h" -#include "llvm/Function.h" -#include "llvm/IntrinsicInst.h" -#include "llvm/Metadata.h" -#include "llvm/PassManager.h" -#include "llvm/Support/CommandLine.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/DebugLoc.h" -#include "llvm/Support/raw_ostream.h" -#include "llvm/ADT/StringRef.h" -#include <set> -#include <string> - -using namespace llvm; - -static cl::opt<bool> -EnableDebugInfoProbe("enable-debug-info-probe", cl::Hidden, - cl::desc("Enable debug info probe")); - -// CreateInfoOutputFile - Return a file stream to print our output on. -namespace llvm { extern raw_ostream *CreateInfoOutputFile(); } - -//===----------------------------------------------------------------------===// -// DebugInfoProbeImpl - This class implements a interface to monitor -// how an optimization pass is preserving debugging information. - -namespace llvm { - - class DebugInfoProbeImpl { - public: - DebugInfoProbeImpl() : NumDbgLineLost(0),NumDbgValueLost(0) {} - void initialize(StringRef PName, Function &F); - void finalize(Function &F); - void report(); - private: - unsigned NumDbgLineLost, NumDbgValueLost; - std::string PassName; - Function *TheFn; - std::set<MDNode *> DbgVariables; - std::set<Instruction *> MissingDebugLoc; - }; -} - -//===----------------------------------------------------------------------===// -// DebugInfoProbeImpl - -/// initialize - Collect information before running an optimization pass. -void DebugInfoProbeImpl::initialize(StringRef PName, Function &F) { - if (!EnableDebugInfoProbe) return; - PassName = PName; - - DbgVariables.clear(); - MissingDebugLoc.clear(); - TheFn = &F; - - for (Function::iterator FI = F.begin(), FE = F.end(); FI != FE; ++FI) - for (BasicBlock::iterator BI = FI->begin(), BE = FI->end(); - BI != BE; ++BI) { - if (!isa<PHINode>(BI) && BI->getDebugLoc().isUnknown()) - MissingDebugLoc.insert(BI); - if (!isa<DbgInfoIntrinsic>(BI)) continue; - Value *Addr = NULL; - MDNode *Node = NULL; - if (DbgDeclareInst *DDI = dyn_cast<DbgDeclareInst>(BI)) { - Addr = DDI->getAddress(); - Node = DDI->getVariable(); - } else if (DbgValueInst *DVI = dyn_cast<DbgValueInst>(BI)) { - Addr = DVI->getValue(); - Node = DVI->getVariable(); - } - if (Addr) - DbgVariables.insert(Node); - } -} - -/// report - Report findings. This should be invoked after finalize. -void DebugInfoProbeImpl::report() { - if (!EnableDebugInfoProbe) return; - if (NumDbgLineLost || NumDbgValueLost) { - raw_ostream *OutStream = CreateInfoOutputFile(); - if (NumDbgLineLost) - *OutStream << NumDbgLineLost - << "\t times line number info lost by " - << PassName << "\n"; - if (NumDbgValueLost) - *OutStream << NumDbgValueLost - << "\t times variable info lost by " - << PassName << "\n"; - delete OutStream; - } - NumDbgLineLost = 0; - NumDbgValueLost = 0; -} - -/// finalize - Collect information after running an optimization pass. This -/// must be used after initialization. -void DebugInfoProbeImpl::finalize(Function &F) { - if (!EnableDebugInfoProbe) return; - assert (TheFn == &F && "Invalid function to measure!"); - - std::set<MDNode *>DbgVariables2; - for (Function::iterator FI = F.begin(), FE = F.end(); FI != FE; ++FI) - for (BasicBlock::iterator BI = FI->begin(), BE = FI->end(); - BI != BE; ++BI) { - if (!isa<PHINode>(BI) && BI->getDebugLoc().isUnknown() && - MissingDebugLoc.count(BI) == 0) { - ++NumDbgLineLost; - DEBUG(dbgs() << "DebugInfoProbe (" << PassName << "): --- "); - DEBUG(BI->print(dbgs())); - DEBUG(dbgs() << "\n"); - } - if (!isa<DbgInfoIntrinsic>(BI)) continue; - Value *Addr = NULL; - MDNode *Node = NULL; - if (DbgDeclareInst *DDI = dyn_cast<DbgDeclareInst>(BI)) { - Addr = DDI->getAddress(); - Node = DDI->getVariable(); - } else if (DbgValueInst *DVI = dyn_cast<DbgValueInst>(BI)) { - Addr = DVI->getValue(); - Node = DVI->getVariable(); - } - if (Addr) - DbgVariables2.insert(Node); - } - - for (std::set<MDNode *>::iterator I = DbgVariables.begin(), - E = DbgVariables.end(); I != E; ++I) { - if (DbgVariables2.count(*I) == 0 && (*I)->getNumOperands() >= 2) { - DEBUG(dbgs() - << "DebugInfoProbe(" - << PassName - << "): Losing dbg info for variable: "; - if (MDString *MDS = dyn_cast_or_null<MDString>( - (*I)->getOperand(2))) - dbgs() << MDS->getString(); - else - dbgs() << "..."; - dbgs() << "\n"); - ++NumDbgValueLost; - } - } -} - -//===----------------------------------------------------------------------===// -// DebugInfoProbe - -DebugInfoProbe::DebugInfoProbe() { - pImpl = new DebugInfoProbeImpl(); -} - -DebugInfoProbe::~DebugInfoProbe() { - delete pImpl; -} - -/// initialize - Collect information before running an optimization pass. -void DebugInfoProbe::initialize(StringRef PName, Function &F) { - pImpl->initialize(PName, F); -} - -/// finalize - Collect information after running an optimization pass. This -/// must be used after initialization. -void DebugInfoProbe::finalize(Function &F) { - pImpl->finalize(F); -} - -/// report - Report findings. This should be invoked after finalize. -void DebugInfoProbe::report() { - pImpl->report(); -} - -//===----------------------------------------------------------------------===// -// DebugInfoProbeInfo - -/// ~DebugInfoProbeInfo - Report data collected by all probes before deleting -/// them. -DebugInfoProbeInfo::~DebugInfoProbeInfo() { - if (!EnableDebugInfoProbe) return; - for (StringMap<DebugInfoProbe*>::iterator I = Probes.begin(), - E = Probes.end(); I != E; ++I) { - I->second->report(); - delete I->second; - } - } - -/// initialize - Collect information before running an optimization pass. -void DebugInfoProbeInfo::initialize(Pass *P, Function &F) { - if (!EnableDebugInfoProbe) return; - if (P->getAsPMDataManager()) - return; - - StringMapEntry<DebugInfoProbe *> &Entry = - Probes.GetOrCreateValue(P->getPassName()); - DebugInfoProbe *&Probe = Entry.getValue(); - if (!Probe) - Probe = new DebugInfoProbe(); - Probe->initialize(P->getPassName(), F); -} - -/// finalize - Collect information after running an optimization pass. This -/// must be used after initialization. -void DebugInfoProbeInfo::finalize(Pass *P, Function &F) { - if (!EnableDebugInfoProbe) return; - if (P->getAsPMDataManager()) - return; - StringMapEntry<DebugInfoProbe *> &Entry = - Probes.GetOrCreateValue(P->getPassName()); - DebugInfoProbe *&Probe = Entry.getValue(); - assert (Probe && "DebugInfoProbe is not initialized!"); - Probe->finalize(F); -} diff --git a/lib/VMCore/DebugLoc.cpp b/lib/VMCore/DebugLoc.cpp index 328244f..9013d28 100644 --- a/lib/VMCore/DebugLoc.cpp +++ b/lib/VMCore/DebugLoc.cpp @@ -173,10 +173,7 @@ DebugLoc DenseMapInfo<DebugLoc>::getTombstoneKey() { } unsigned DenseMapInfo<DebugLoc>::getHashValue(const DebugLoc &Key) { - FoldingSetNodeID ID; - ID.AddInteger(Key.LineCol); - ID.AddInteger(Key.ScopeIdx); - return ID.ComputeHash(); + return static_cast<unsigned>(hash_combine(Key.LineCol, Key.ScopeIdx)); } bool DenseMapInfo<DebugLoc>::isEqual(const DebugLoc &LHS, const DebugLoc &RHS) { diff --git a/lib/VMCore/Dominators.cpp b/lib/VMCore/Dominators.cpp index af51a05..219e631 100644 --- a/lib/VMCore/Dominators.cpp +++ b/lib/VMCore/Dominators.cpp @@ -88,8 +88,13 @@ bool DominatorTree::dominates(const Instruction *Def, const BasicBlock *UseBB = User->getParent(); const BasicBlock *DefBB = Def->getParent(); - assert(isReachableFromEntry(DefBB) && isReachableFromEntry(UseBB) && - "We only handle reachable blocks"); + // Any unreachable use is dominated, even if Def == User. + if (!isReachableFromEntry(UseBB)) + return true; + + // Unreachable definitions don't dominate anything. + if (!isReachableFromEntry(DefBB)) + return false; // An instruction doesn't dominate a use in itself. if (Def == User) @@ -119,8 +124,13 @@ bool DominatorTree::dominates(const Instruction *Def, const BasicBlock *UseBB) const { const BasicBlock *DefBB = Def->getParent(); - assert(isReachableFromEntry(DefBB) && isReachableFromEntry(UseBB) && - "We only handle reachable blocks"); + // Any unreachable use is dominated, even if DefBB == UseBB. + if (!isReachableFromEntry(UseBB)) + return true; + + // Unreachable definitions don't dominate anything. + if (!isReachableFromEntry(DefBB)) + return false; if (DefBB == UseBB) return false; @@ -174,3 +184,83 @@ bool DominatorTree::dominates(const Instruction *Def, } return true; } + +bool DominatorTree::dominates(const Instruction *Def, + const Use &U) const { + Instruction *UserInst = dyn_cast<Instruction>(U.getUser()); + + // Instructions do not dominate non-instructions. + if (!UserInst) + return false; + + const BasicBlock *DefBB = Def->getParent(); + + // Determine the block in which the use happens. PHI nodes use + // their operands on edges; simulate this by thinking of the use + // happening at the end of the predecessor block. + const BasicBlock *UseBB; + if (PHINode *PN = dyn_cast<PHINode>(UserInst)) + UseBB = PN->getIncomingBlock(U); + else + UseBB = UserInst->getParent(); + + // Any unreachable use is dominated, even if Def == User. + if (!isReachableFromEntry(UseBB)) + return true; + + // Unreachable definitions don't dominate anything. + if (!isReachableFromEntry(DefBB)) + return false; + + // Invoke instructions define their return values on the edges + // to their normal successors, so we have to handle them specially. + // Among other things, this means they don't dominate anything in + // their own block, except possibly a phi, so we don't need to + // walk the block in any case. + if (const InvokeInst *II = dyn_cast<InvokeInst>(Def)) { + // A PHI in the normal successor using the invoke's return value is + // dominated by the invoke's return value. + if (isa<PHINode>(UserInst) && + UserInst->getParent() == II->getNormalDest() && + cast<PHINode>(UserInst)->getIncomingBlock(U) == DefBB) + return true; + + // Otherwise use the instruction-dominates-block query, which + // handles the crazy case of an invoke with a critical edge + // properly. + return dominates(Def, UseBB); + } + + // If the def and use are in different blocks, do a simple CFG dominator + // tree query. + if (DefBB != UseBB) + return dominates(DefBB, UseBB); + + // Ok, def and use are in the same block. If the def is an invoke, it + // doesn't dominate anything in the block. If it's a PHI, it dominates + // everything in the block. + if (isa<PHINode>(UserInst)) + return true; + + // Otherwise, just loop through the basic block until we find Def or User. + BasicBlock::const_iterator I = DefBB->begin(); + for (; &*I != Def && &*I != UserInst; ++I) + /*empty*/; + + return &*I != UserInst; +} + +bool DominatorTree::isReachableFromEntry(const Use &U) const { + Instruction *I = dyn_cast<Instruction>(U.getUser()); + + // ConstantExprs aren't really reachable from the entry block, but they + // don't need to be treated like unreachable code either. + if (!I) return true; + + // PHI nodes use their operands on their incoming edges. + if (PHINode *PN = dyn_cast<PHINode>(I)) + return isReachableFromEntry(PN->getIncomingBlock(U)); + + // Everything else uses their operands in their own block. + return isReachableFromEntry(I->getParent()); +} diff --git a/lib/VMCore/Instructions.cpp b/lib/VMCore/Instructions.cpp index 8db6ac9..6c5db32 100644 --- a/lib/VMCore/Instructions.cpp +++ b/lib/VMCore/Instructions.cpp @@ -2003,6 +2003,23 @@ bool BinaryOperator::isExact() const { } //===----------------------------------------------------------------------===// +// FPMathOperator Class +//===----------------------------------------------------------------------===// + +/// getFPAccuracy - Get the maximum error permitted by this operation in ULPs. +/// An accuracy of 0.0 means that the operation should be performed with the +/// default precision. +float FPMathOperator::getFPAccuracy() const { + const MDNode *MD = + cast<Instruction>(this)->getMetadata(LLVMContext::MD_fpmath); + if (!MD) + return 0.0; + ConstantFP *Accuracy = cast<ConstantFP>(MD->getOperand(0)); + return Accuracy->getValueAPF().convertToFloat(); +} + + +//===----------------------------------------------------------------------===// // CastInst Class //===----------------------------------------------------------------------===// diff --git a/lib/VMCore/LLVMContext.cpp b/lib/VMCore/LLVMContext.cpp index d77e996..f07f0b3 100644 --- a/lib/VMCore/LLVMContext.cpp +++ b/lib/VMCore/LLVMContext.cpp @@ -44,10 +44,15 @@ LLVMContext::LLVMContext() : pImpl(new LLVMContextImpl(*this)) { unsigned ProfID = getMDKindID("prof"); assert(ProfID == MD_prof && "prof kind id drifted"); (void)ProfID; - // Create the 'fpaccuracy' metadata kind. - unsigned FPAccuracyID = getMDKindID("fpaccuracy"); - assert(FPAccuracyID == MD_fpaccuracy && "fpaccuracy kind id drifted"); + // Create the 'fpmath' metadata kind. + unsigned FPAccuracyID = getMDKindID("fpmath"); + assert(FPAccuracyID == MD_fpmath && "fpmath kind id drifted"); (void)FPAccuracyID; + + // Create the 'range' metadata kind. + unsigned RangeID = getMDKindID("range"); + assert(RangeID == MD_range && "range kind id drifted"); + (void)RangeID; } LLVMContext::~LLVMContext() { delete pImpl; } diff --git a/lib/VMCore/LLVMContextImpl.h b/lib/VMCore/LLVMContextImpl.h index f98526d..2252028 100644 --- a/lib/VMCore/LLVMContextImpl.h +++ b/lib/VMCore/LLVMContextImpl.h @@ -194,6 +194,26 @@ struct FunctionTypeKeyInfo { } }; +// Provide a FoldingSetTrait::Equals specialization for MDNode that can use a +// shortcut to avoid comparing all operands. +template<> struct FoldingSetTrait<MDNode> : DefaultFoldingSetTrait<MDNode> { + static bool Equals(const MDNode &X, const FoldingSetNodeID &ID, + unsigned IDHash, FoldingSetNodeID &TempID) { + assert(!X.isNotUniqued() && "Non-uniqued MDNode in FoldingSet?"); + // First, check if the cached hashes match. If they don't we can skip the + // expensive operand walk. + if (X.Hash != IDHash) + return false; + + // If they match we have to compare the operands. + X.Profile(TempID); + return TempID == ID; + } + static unsigned ComputeHash(const MDNode &X, FoldingSetNodeID &) { + return X.Hash; // Return cached hash. + } +}; + /// DebugRecVH - This is a CallbackVH used to keep the Scope -> index maps /// up to date as MDNodes mutate. This class is implemented in DebugLoc.cpp. class DebugRecVH : public CallbackVH { @@ -234,7 +254,7 @@ public: DenseMapAPFloatKeyInfo> FPMapTy; FPMapTy FPConstants; - StringMap<MDString*> MDStringCache; + StringMap<Value*> MDStringCache; FoldingSet<MDNode> MDNodeSet; // MDNodes may be uniqued or not uniqued. When they're not uniqued, they diff --git a/lib/VMCore/Metadata.cpp b/lib/VMCore/Metadata.cpp index 0fc2a25..090b09a 100644 --- a/lib/VMCore/Metadata.cpp +++ b/lib/VMCore/Metadata.cpp @@ -31,16 +31,17 @@ using namespace llvm; void MDString::anchor() { } -MDString::MDString(LLVMContext &C, StringRef S) - : Value(Type::getMetadataTy(C), Value::MDStringVal), Str(S) {} +MDString::MDString(LLVMContext &C) + : Value(Type::getMetadataTy(C), Value::MDStringVal) {} MDString *MDString::get(LLVMContext &Context, StringRef Str) { LLVMContextImpl *pImpl = Context.pImpl; - StringMapEntry<MDString *> &Entry = + StringMapEntry<Value*> &Entry = pImpl->MDStringCache.GetOrCreateValue(Str); - MDString *&S = Entry.getValue(); - if (!S) S = new MDString(Context, Entry.getKey()); - return S; + Value *&S = Entry.getValue(); + if (!S) S = new MDString(Context); + S->setValueName(&Entry); + return cast<MDString>(S); } //===----------------------------------------------------------------------===// @@ -50,14 +51,26 @@ MDString *MDString::get(LLVMContext &Context, StringRef Str) { // Use CallbackVH to hold MDNode operands. namespace llvm { class MDNodeOperand : public CallbackVH { - MDNode *Parent; + MDNode *getParent() { + MDNodeOperand *Cur = this; + + while (Cur->getValPtrInt() != 1) + --Cur; + + assert(Cur->getValPtrInt() == 1 && + "Couldn't find the beginning of the operand list!"); + return reinterpret_cast<MDNode*>(Cur) - 1; + } + public: - MDNodeOperand(Value *V, MDNode *P) : CallbackVH(V), Parent(P) {} + MDNodeOperand(Value *V) : CallbackVH(V) {} ~MDNodeOperand() {} - void set(Value *V) { - setValPtr(V); - } + void set(Value *V) { this->setValPtr(V); } + + /// setAsFirstOperand - Accessor method to mark the operand as the first in + /// the list. + void setAsFirstOperand(unsigned V) { this->setValPtrInt(V); } virtual void deleted(); virtual void allUsesReplacedWith(Value *NV); @@ -66,15 +79,13 @@ public: void MDNodeOperand::deleted() { - Parent->replaceOperand(this, 0); + getParent()->replaceOperand(this, 0); } void MDNodeOperand::allUsesReplacedWith(Value *NV) { - Parent->replaceOperand(this, NV); + getParent()->replaceOperand(this, NV); } - - //===----------------------------------------------------------------------===// // MDNode implementation. // @@ -102,8 +113,13 @@ MDNode::MDNode(LLVMContext &C, ArrayRef<Value*> Vals, bool isFunctionLocal) // Initialize the operand list, which is co-allocated on the end of the node. unsigned i = 0; for (MDNodeOperand *Op = getOperandPtr(this, 0), *E = Op+NumOperands; - Op != E; ++Op, ++i) - new (Op) MDNodeOperand(Vals[i], this); + Op != E; ++Op, ++i) { + new (Op) MDNodeOperand(Vals[i]); + + // Mark the first MDNodeOperand as being the first in the list of operands. + if (i == 0) + Op->setAsFirstOperand(1); + } } @@ -205,11 +221,11 @@ MDNode *MDNode::getMDNode(LLVMContext &Context, ArrayRef<Value*> Vals, ID.AddPointer(Vals[i]); void *InsertPoint; - MDNode *N = NULL; - - if ((N = pImpl->MDNodeSet.FindNodeOrInsertPos(ID, InsertPoint))) + MDNode *N = pImpl->MDNodeSet.FindNodeOrInsertPos(ID, InsertPoint); + + if (N || !Insert) return N; - + bool isFunctionLocal = false; switch (FL) { case FL_Unknown: @@ -234,6 +250,9 @@ MDNode *MDNode::getMDNode(LLVMContext &Context, ArrayRef<Value*> Vals, void *Ptr = malloc(sizeof(MDNode)+Vals.size()*sizeof(MDNodeOperand)); N = new (Ptr) MDNode(Context, Vals, isFunctionLocal); + // Cache the operand hash. + N->Hash = ID.ComputeHash(); + // InsertPoint will have been set by the FindNodeOrInsertPos call. pImpl->MDNodeSet.InsertNode(N, InsertPoint); @@ -357,6 +376,8 @@ void MDNode::replaceOperand(MDNodeOperand *Op, Value *To) { return; } + // Cache the operand hash. + Hash = ID.ComputeHash(); // InsertPoint will have been set by the FindNodeOrInsertPos call. pImpl->MDNodeSet.InsertNode(this, InsertPoint); @@ -551,17 +572,15 @@ getAllMetadataOtherThanDebugLocImpl(SmallVectorImpl<std::pair<unsigned, getContext().pImpl->MetadataStore.count(this) && "Shouldn't have called this"); const LLVMContextImpl::MDMapTy &Info = - getContext().pImpl->MetadataStore.find(this)->second; + getContext().pImpl->MetadataStore.find(this)->second; assert(!Info.empty() && "Shouldn't have called this"); - Result.append(Info.begin(), Info.end()); - + // Sort the resulting array so it is stable. if (Result.size() > 1) array_pod_sort(Result.begin(), Result.end()); } - /// clearMetadataHashEntries - Clear all hashtable-based metadata from /// this instruction. void Instruction::clearMetadataHashEntries() { diff --git a/lib/VMCore/Module.cpp b/lib/VMCore/Module.cpp index e8bc6db..3c67191 100644 --- a/lib/VMCore/Module.cpp +++ b/lib/VMCore/Module.cpp @@ -434,7 +434,7 @@ bool Module::MaterializeAllPermanently(std::string *ErrInfo) { // -// dropAllReferences() - This function causes all the subelementss to "let go" +// dropAllReferences() - This function causes all the subelements to "let go" // of all references that they are maintaining. This allows one to 'delete' a // whole module at a time, even though there may be circular references... first // all references are dropped, and all use counts go to zero. Then everything diff --git a/lib/VMCore/PassManager.cpp b/lib/VMCore/PassManager.cpp index 773862d..28fbaa6 100644 --- a/lib/VMCore/PassManager.cpp +++ b/lib/VMCore/PassManager.cpp @@ -14,7 +14,6 @@ #include "llvm/PassManagers.h" #include "llvm/PassManager.h" -#include "llvm/DebugInfoProbe.h" #include "llvm/Assembly/PrintModulePass.h" #include "llvm/Assembly/Writer.h" #include "llvm/Support/CommandLine.h" @@ -26,7 +25,6 @@ #include "llvm/Support/PassNameParser.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Support/Mutex.h" -#include "llvm/ADT/StringMap.h" #include <algorithm> #include <map> using namespace llvm; @@ -422,20 +420,6 @@ char PassManagerImpl::ID = 0; namespace { //===----------------------------------------------------------------------===// -// DebugInfoProbe - -static DebugInfoProbeInfo *TheDebugProbe; -static void createDebugInfoProbe() { - if (TheDebugProbe) return; - - // Constructed the first time this is called. This guarantees that the - // object will be constructed, if -enable-debug-info-probe is set, - // before static globals, thus it will be destroyed before them. - static ManagedStatic<DebugInfoProbeInfo> DIP; - TheDebugProbe = &*DIP; -} - -//===----------------------------------------------------------------------===// /// TimingInfo Class - This class is used to calculate information about the /// amount of time each pass takes to execute. This only happens when /// -time-passes is enabled on the command line. @@ -1440,7 +1424,6 @@ void FunctionPassManagerImpl::releaseMemoryOnTheFly() { bool FunctionPassManagerImpl::run(Function &F) { bool Changed = false; TimingInfo::createTheTimeInfo(); - createDebugInfoProbe(); initializeAllAnalysisInfo(); for (unsigned Index = 0; Index < getNumContainedManagers(); ++Index) @@ -1488,16 +1471,13 @@ bool FPPassManager::runOnFunction(Function &F) { dumpRequiredSet(FP); initializeAnalysisImpl(FP); - if (TheDebugProbe) - TheDebugProbe->initialize(FP, F); + { PassManagerPrettyStackEntry X(FP, F); TimeRegion PassTimer(getPassTimer(FP)); LocalChanged |= FP->runOnFunction(F); } - if (TheDebugProbe) - TheDebugProbe->finalize(FP, F); Changed |= LocalChanged; if (LocalChanged) @@ -1647,7 +1627,6 @@ Pass* MPPassManager::getOnTheFlyPass(Pass *MP, AnalysisID PI, Function &F){ bool PassManagerImpl::run(Module &M) { bool Changed = false; TimingInfo::createTheTimeInfo(); - createDebugInfoProbe(); dumpArguments(); dumpPasses(); diff --git a/lib/VMCore/Use.cpp b/lib/VMCore/Use.cpp index 359a151..0128adc 100644 --- a/lib/VMCore/Use.cpp +++ b/lib/VMCore/Use.cpp @@ -12,6 +12,7 @@ //===----------------------------------------------------------------------===// #include "llvm/Value.h" +#include <new> namespace llvm { diff --git a/lib/VMCore/Value.cpp b/lib/VMCore/Value.cpp index 41cc38c..4006b2c 100644 --- a/lib/VMCore/Value.cpp +++ b/lib/VMCore/Value.cpp @@ -76,7 +76,7 @@ Value::~Value() { // If this value is named, destroy the name. This should not be in a symtab // at this point. - if (Name) + if (Name && SubclassID != MDStringVal) Name->Destroy(); // There should be no uses of this object anymore, remove it. @@ -170,6 +170,9 @@ StringRef Value::getName() const { } void Value::setName(const Twine &NewName) { + assert(SubclassID != MDStringVal && + "Cannot set the name of MDString with this method!"); + // Fast path for common IRBuilder case of setName("") when there is no name. if (NewName.isTriviallyEmpty() && !hasName()) return; @@ -228,6 +231,8 @@ void Value::setName(const Twine &NewName) { /// takeName - transfer the name from V to this value, setting V's name to /// empty. It is an error to call V->takeName(V). void Value::takeName(Value *V) { + assert(SubclassID != MDStringVal && "Cannot take the name of an MDString!"); + ValueSymbolTable *ST = 0; // If this value has a name, drop it. if (hasName()) { @@ -477,7 +482,7 @@ void ValueHandleBase::AddToExistingUseList(ValueHandleBase **List) { setPrevPtr(List); if (Next) { Next->setPrevPtr(&Next); - assert(VP == Next->VP && "Added to wrong list?"); + assert(VP.getPointer() == Next->VP.getPointer() && "Added to wrong list?"); } } @@ -493,14 +498,14 @@ void ValueHandleBase::AddToExistingUseListAfter(ValueHandleBase *List) { /// AddToUseList - Add this ValueHandle to the use list for VP. void ValueHandleBase::AddToUseList() { - assert(VP && "Null pointer doesn't have a use list!"); + assert(VP.getPointer() && "Null pointer doesn't have a use list!"); - LLVMContextImpl *pImpl = VP->getContext().pImpl; + LLVMContextImpl *pImpl = VP.getPointer()->getContext().pImpl; - if (VP->HasValueHandle) { + if (VP.getPointer()->HasValueHandle) { // If this value already has a ValueHandle, then it must be in the // ValueHandles map already. - ValueHandleBase *&Entry = pImpl->ValueHandles[VP]; + ValueHandleBase *&Entry = pImpl->ValueHandles[VP.getPointer()]; assert(Entry != 0 && "Value doesn't have any handles?"); AddToExistingUseList(&Entry); return; @@ -514,10 +519,10 @@ void ValueHandleBase::AddToUseList() { DenseMap<Value*, ValueHandleBase*> &Handles = pImpl->ValueHandles; const void *OldBucketPtr = Handles.getPointerIntoBucketsArray(); - ValueHandleBase *&Entry = Handles[VP]; + ValueHandleBase *&Entry = Handles[VP.getPointer()]; assert(Entry == 0 && "Value really did already have handles?"); AddToExistingUseList(&Entry); - VP->HasValueHandle = true; + VP.getPointer()->HasValueHandle = true; // If reallocation didn't happen or if this was the first insertion, don't // walk the table. @@ -529,14 +534,16 @@ void ValueHandleBase::AddToUseList() { // Okay, reallocation did happen. Fix the Prev Pointers. for (DenseMap<Value*, ValueHandleBase*>::iterator I = Handles.begin(), E = Handles.end(); I != E; ++I) { - assert(I->second && I->first == I->second->VP && "List invariant broken!"); + assert(I->second && I->first == I->second->VP.getPointer() && + "List invariant broken!"); I->second->setPrevPtr(&I->second); } } /// RemoveFromUseList - Remove this ValueHandle from its current use list. void ValueHandleBase::RemoveFromUseList() { - assert(VP && VP->HasValueHandle && "Pointer doesn't have a use list!"); + assert(VP.getPointer() && VP.getPointer()->HasValueHandle && + "Pointer doesn't have a use list!"); // Unlink this from its use list. ValueHandleBase **PrevPtr = getPrevPtr(); @@ -552,11 +559,11 @@ void ValueHandleBase::RemoveFromUseList() { // If the Next pointer was null, then it is possible that this was the last // ValueHandle watching VP. If so, delete its entry from the ValueHandles // map. - LLVMContextImpl *pImpl = VP->getContext().pImpl; + LLVMContextImpl *pImpl = VP.getPointer()->getContext().pImpl; DenseMap<Value*, ValueHandleBase*> &Handles = pImpl->ValueHandles; if (Handles.isPointerIntoBucketsArray(PrevPtr)) { - Handles.erase(VP); - VP->HasValueHandle = false; + Handles.erase(VP.getPointer()); + VP.getPointer()->HasValueHandle = false; } } diff --git a/lib/VMCore/Verifier.cpp b/lib/VMCore/Verifier.cpp index 5b9b2a5..47baef3 100644 --- a/lib/VMCore/Verifier.cpp +++ b/lib/VMCore/Verifier.cpp @@ -51,6 +51,7 @@ #include "llvm/DerivedTypes.h" #include "llvm/InlineAsm.h" #include "llvm/IntrinsicInst.h" +#include "llvm/LLVMContext.h" #include "llvm/Metadata.h" #include "llvm/Module.h" #include "llvm/Pass.h" @@ -117,7 +118,6 @@ namespace { struct Verifier : public FunctionPass, public InstVisitor<Verifier> { static char ID; // Pass ID, replacement for typeid bool Broken; // Is this module found to be broken? - bool RealPass; // Are we not being run by a PassManager? VerifierFailureAction action; // What to do if verification fails. Module *Mod; // Module we are verifying right now @@ -143,13 +143,13 @@ namespace { const Value *PersonalityFn; Verifier() - : FunctionPass(ID), Broken(false), RealPass(true), + : FunctionPass(ID), Broken(false), action(AbortProcessAction), Mod(0), Context(0), DT(0), MessagesStr(Messages), PersonalityFn(0) { initializeVerifierPass(*PassRegistry::getPassRegistry()); } explicit Verifier(VerifierFailureAction ctn) - : FunctionPass(ID), Broken(false), RealPass(true), action(ctn), Mod(0), + : FunctionPass(ID), Broken(false), action(ctn), Mod(0), Context(0), DT(0), MessagesStr(Messages), PersonalityFn(0) { initializeVerifierPass(*PassRegistry::getPassRegistry()); } @@ -158,17 +158,14 @@ namespace { Mod = &M; Context = &M.getContext(); - // If this is a real pass, in a pass manager, we must abort before - // returning back to the pass manager, or else the pass manager may try to - // run other passes on the broken module. - if (RealPass) - return abortIfBroken(); - return false; + // We must abort before returning back to the pass manager, or else the + // pass manager may try to run other passes on the broken module. + return abortIfBroken(); } bool runOnFunction(Function &F) { // Get dominator information if we are being run by PassManager - if (RealPass) DT = &getAnalysis<DominatorTree>(); + DT = &getAnalysis<DominatorTree>(); Mod = F.getParent(); if (!Context) Context = &F.getContext(); @@ -177,13 +174,9 @@ namespace { InstsInThisBlock.clear(); PersonalityFn = 0; - // If this is a real pass, in a pass manager, we must abort before - // returning back to the pass manager, or else the pass manager may try to - // run other passes on the broken module. - if (RealPass) - return abortIfBroken(); - - return false; + // We must abort before returning back to the pass manager, or else the + // pass manager may try to run other passes on the broken module. + return abortIfBroken(); } bool doFinalization(Module &M) { @@ -214,8 +207,7 @@ namespace { virtual void getAnalysisUsage(AnalysisUsage &AU) const { AU.setPreservesAll(); AU.addRequiredID(PreVerifyID); - if (RealPass) - AU.addRequired<DominatorTree>(); + AU.addRequired<DominatorTree>(); } /// abortIfBroken - If the module is broken and we are supposed to abort on @@ -1369,6 +1361,25 @@ void Verifier::visitLoadInst(LoadInst &LI) { Assert1(LI.getSynchScope() == CrossThread, "Non-atomic load cannot have SynchronizationScope specified", &LI); } + + if (MDNode *Range = LI.getMetadata(LLVMContext::MD_range)) { + unsigned NumOperands = Range->getNumOperands(); + Assert1(NumOperands % 2 == 0, "Unfinished range!", Range); + unsigned NumRanges = NumOperands / 2; + Assert1(NumRanges >= 1, "It should have at least one range!", Range); + for (unsigned i = 0; i < NumRanges; ++i) { + ConstantInt *Low = dyn_cast<ConstantInt>(Range->getOperand(2*i)); + Assert1(Low, "The lower limit must be an integer!", Low); + ConstantInt *High = dyn_cast<ConstantInt>(Range->getOperand(2*i + 1)); + Assert1(High, "The upper limit must be an integer!", High); + Assert1(High->getType() == Low->getType() && + High->getType() == ElTy, "Range types must match load type!", + &LI); + Assert1(High->getValue() != Low->getValue(), "Range must not be empty!", + Range); + } + } + visitInstruction(LI); } @@ -1641,6 +1652,24 @@ void Verifier::visitInstruction(Instruction &I) { "Cannot take the address of an inline asm!", &I); } } + + if (MDNode *MD = I.getMetadata(LLVMContext::MD_fpmath)) { + Assert1(I.getType()->isFPOrFPVectorTy(), + "fpmath requires a floating point result!", &I); + Assert1(MD->getNumOperands() == 1, "fpmath takes one operand!", &I); + Value *Op0 = MD->getOperand(0); + if (ConstantFP *CFP0 = dyn_cast_or_null<ConstantFP>(Op0)) { + APFloat Accuracy = CFP0->getValueAPF(); + Assert1(Accuracy.isNormal() && !Accuracy.isNegative(), + "fpmath accuracy not a positive number!", &I); + } else { + Assert1(false, "invalid fpmath accuracy!", &I); + } + } + + MDNode *MD = I.getMetadata(LLVMContext::MD_range); + Assert1(!MD || isa<LoadInst>(I), "Ranges are only for loads!", &I); + InstsInThisBlock.insert(&I); } |