diff options
Diffstat (limited to 'lib')
172 files changed, 3947 insertions, 1996 deletions
diff --git a/lib/Analysis/BasicAliasAnalysis.cpp b/lib/Analysis/BasicAliasAnalysis.cpp index af400ba..568983a 100644 --- a/lib/Analysis/BasicAliasAnalysis.cpp +++ b/lib/Analysis/BasicAliasAnalysis.cpp @@ -706,8 +706,7 @@ BasicAliasAnalysis::getModRefInfo(ImmutableCallSite CS, // pointer were passed to arguments that were neither of these, then it // couldn't be no-capture. if (!(*CI)->getType()->isPointerTy() || - (!CS.paramHasAttr(ArgNo+1, Attribute::NoCapture) && - !CS.paramHasAttr(ArgNo+1, Attribute::ByVal))) + (!CS.doesNotCapture(ArgNo) && !CS.isByValArgument(ArgNo))) continue; // If this is a no-capture pointer argument, see if we can tell that it diff --git a/lib/Analysis/BranchProbabilityInfo.cpp b/lib/Analysis/BranchProbabilityInfo.cpp index 258fe54..f9461c0 100644 --- a/lib/Analysis/BranchProbabilityInfo.cpp +++ b/lib/Analysis/BranchProbabilityInfo.cpp @@ -480,8 +480,8 @@ getEdgeWeight(const BasicBlock *Src, const BasicBlock *Dst) const { void BranchProbabilityInfo:: setEdgeWeight(const BasicBlock *Src, const BasicBlock *Dst, uint32_t Weight) { Weights[std::make_pair(Src, Dst)] = Weight; - DEBUG(dbgs() << "set edge " << Src->getNameStr() << " -> " - << Dst->getNameStr() << " weight to " << Weight + DEBUG(dbgs() << "set edge " << Src->getName() << " -> " + << Dst->getName() << " weight to " << Weight << (isEdgeHot(Src, Dst) ? " [is HOT now]\n" : "\n")); } @@ -501,7 +501,7 @@ BranchProbabilityInfo::printEdgeProbability(raw_ostream &OS, const BasicBlock *Dst) const { const BranchProbability Prob = getEdgeProbability(Src, Dst); - OS << "edge " << Src->getNameStr() << " -> " << Dst->getNameStr() + OS << "edge " << Src->getName() << " -> " << Dst->getName() << " probability is " << Prob << (isEdgeHot(Src, Dst) ? " [HOT edge]\n" : "\n"); diff --git a/lib/Analysis/CFGPrinter.cpp b/lib/Analysis/CFGPrinter.cpp index 7bb063f..7685400 100644 --- a/lib/Analysis/CFGPrinter.cpp +++ b/lib/Analysis/CFGPrinter.cpp @@ -77,7 +77,7 @@ namespace { } virtual bool runOnFunction(Function &F) { - std::string Filename = "cfg." + F.getNameStr() + ".dot"; + std::string Filename = "cfg." + F.getName().str() + ".dot"; errs() << "Writing '" << Filename << "'..."; std::string ErrorInfo; @@ -111,7 +111,7 @@ namespace { } virtual bool runOnFunction(Function &F) { - std::string Filename = "cfg." + F.getNameStr() + ".dot"; + std::string Filename = "cfg." + F.getName().str() + ".dot"; errs() << "Writing '" << Filename << "'..."; std::string ErrorInfo; @@ -143,7 +143,7 @@ INITIALIZE_PASS(CFGOnlyPrinter, "dot-cfg-only", /// being a 'dot' and 'gv' program in your path. /// void Function::viewCFG() const { - ViewGraph(this, "cfg" + getNameStr()); + ViewGraph(this, "cfg" + getName()); } /// viewCFGOnly - This function is meant for use from the debugger. It works @@ -152,7 +152,7 @@ void Function::viewCFG() const { /// his can make the graph smaller. /// void Function::viewCFGOnly() const { - ViewGraph(this, "cfg" + getNameStr(), true); + ViewGraph(this, "cfg" + getName(), true); } FunctionPass *llvm::createCFGPrinterPass () { diff --git a/lib/Analysis/CaptureTracking.cpp b/lib/Analysis/CaptureTracking.cpp index a84dafb..9a7992e 100644 --- a/lib/Analysis/CaptureTracking.cpp +++ b/lib/Analysis/CaptureTracking.cpp @@ -19,8 +19,10 @@ #include "llvm/Analysis/CaptureTracking.h" using namespace llvm; +CaptureTracker::~CaptureTracker() {} + namespace { - struct SimpleCaptureTracker { + struct SimpleCaptureTracker : public CaptureTracker { explicit SimpleCaptureTracker(bool ReturnCaptures) : ReturnCaptures(ReturnCaptures), Captured(false) {} @@ -51,6 +53,9 @@ namespace { /// counts as capturing it or not. bool llvm::PointerMayBeCaptured(const Value *V, bool ReturnCaptures, bool StoreCaptures) { + assert(!isa<GlobalValue>(V) && + "It doesn't make sense to ask whether a global is captured."); + // TODO: If StoreCaptures is not true, we could do Fancy analysis // to determine whether this store is not actually an escape point. // In that case, BasicAliasAnalysis should be updated as well to @@ -58,6 +63,111 @@ bool llvm::PointerMayBeCaptured(const Value *V, (void)StoreCaptures; SimpleCaptureTracker SCT(ReturnCaptures); - PointerMayBeCaptured(V, SCT); + PointerMayBeCaptured(V, &SCT); return SCT.Captured; } + +/// TODO: Write a new FunctionPass AliasAnalysis so that it can keep +/// a cache. Then we can move the code from BasicAliasAnalysis into +/// that path, and remove this threshold. +static int const Threshold = 20; + +void llvm::PointerMayBeCaptured(const Value *V, CaptureTracker *Tracker) { + assert(V->getType()->isPointerTy() && "Capture is for pointers only!"); + SmallVector<Use*, Threshold> Worklist; + SmallSet<Use*, Threshold> Visited; + int Count = 0; + + for (Value::const_use_iterator UI = V->use_begin(), UE = V->use_end(); + UI != UE; ++UI) { + // If there are lots of uses, conservatively say that the value + // is captured to avoid taking too much compile time. + if (Count++ >= Threshold) + return Tracker->tooManyUses(); + + Use *U = &UI.getUse(); + if (!Tracker->shouldExplore(U)) continue; + Visited.insert(U); + Worklist.push_back(U); + } + + while (!Worklist.empty()) { + Use *U = Worklist.pop_back_val(); + Instruction *I = cast<Instruction>(U->getUser()); + V = U->get(); + + switch (I->getOpcode()) { + case Instruction::Call: + case Instruction::Invoke: { + CallSite CS(I); + // Not captured if the callee is readonly, doesn't return a copy through + // its return value and doesn't unwind (a readonly function can leak bits + // by throwing an exception or not depending on the input value). + if (CS.onlyReadsMemory() && CS.doesNotThrow() && I->getType()->isVoidTy()) + break; + + // Not captured if only passed via 'nocapture' arguments. Note that + // calling a function pointer does not in itself cause the pointer to + // be captured. This is a subtle point considering that (for example) + // the callee might return its own address. It is analogous to saying + // that loading a value from a pointer does not cause the pointer to be + // captured, even though the loaded value might be the pointer itself + // (think of self-referential objects). + CallSite::arg_iterator B = CS.arg_begin(), E = CS.arg_end(); + for (CallSite::arg_iterator A = B; A != E; ++A) + if (A->get() == V && !CS.doesNotCapture(A - B)) + // The parameter is not marked 'nocapture' - captured. + if (Tracker->captured(I)) + return; + break; + } + case Instruction::Load: + // Loading from a pointer does not cause it to be captured. + break; + case Instruction::VAArg: + // "va-arg" from a pointer does not cause it to be captured. + break; + case Instruction::Store: + if (V == I->getOperand(0)) + // Stored the pointer - conservatively assume it may be captured. + if (Tracker->captured(I)) + return; + // Storing to the pointee does not cause the pointer to be captured. + break; + case Instruction::BitCast: + case Instruction::GetElementPtr: + case Instruction::PHI: + case Instruction::Select: + // The original value is not captured via this if the new value isn't. + for (Instruction::use_iterator UI = I->use_begin(), UE = I->use_end(); + UI != UE; ++UI) { + Use *U = &UI.getUse(); + if (Visited.insert(U)) + if (Tracker->shouldExplore(U)) + Worklist.push_back(U); + } + break; + case Instruction::ICmp: + // Don't count comparisons of a no-alias return value against null as + // captures. This allows us to ignore comparisons of malloc results + // with null, for example. + if (isNoAliasCall(V->stripPointerCasts())) + if (ConstantPointerNull *CPN = + dyn_cast<ConstantPointerNull>(I->getOperand(1))) + if (CPN->getType()->getAddressSpace() == 0) + break; + // Otherwise, be conservative. There are crazy ways to capture pointers + // using comparisons. + if (Tracker->captured(I)) + return; + break; + default: + // Something else - be conservative and say it is captured. + if (Tracker->captured(I)) + return; + break; + } + } + + // All uses examined. +} diff --git a/lib/Analysis/LoopInfo.cpp b/lib/Analysis/LoopInfo.cpp index 85aacca..c7833bf 100644 --- a/lib/Analysis/LoopInfo.cpp +++ b/lib/Analysis/LoopInfo.cpp @@ -477,21 +477,19 @@ void UnloopUpdater::updateBlockParents() { /// removeBlocksFromAncestors - Remove unloop's blocks from all ancestors below /// their new parents. void UnloopUpdater::removeBlocksFromAncestors() { - // Remove unloop's blocks from all ancestors below their new parents. + // Remove all unloop's blocks (including those in nested subloops) from + // ancestors below the new parent loop. for (Loop::block_iterator BI = Unloop->block_begin(), BE = Unloop->block_end(); BI != BE; ++BI) { - Loop *NewParent = LI->getLoopFor(*BI); - // If this block is an immediate subloop, remove all blocks (including - // nested subloops) from ancestors below the new parent loop. - // Otherwise, if this block is in a nested subloop, skip it. - if (SubloopParents.count(NewParent)) - NewParent = SubloopParents[NewParent]; - else if (Unloop->contains(NewParent)) - continue; - + Loop *OuterParent = LI->getLoopFor(*BI); + if (Unloop->contains(OuterParent)) { + while (OuterParent->getParentLoop() != Unloop) + OuterParent = OuterParent->getParentLoop(); + OuterParent = SubloopParents[OuterParent]; + } // Remove blocks from former Ancestors except Unloop itself which will be // deleted. - for (Loop *OldParent = Unloop->getParentLoop(); OldParent != NewParent; + for (Loop *OldParent = Unloop->getParentLoop(); OldParent != OuterParent; OldParent = OldParent->getParentLoop()) { assert(OldParent && "new loop is not an ancestor of the original"); OldParent->removeBlockFromLoop(*BI); diff --git a/lib/Analysis/MemoryBuiltins.cpp b/lib/Analysis/MemoryBuiltins.cpp index 8d451c4..b145650 100644 --- a/lib/Analysis/MemoryBuiltins.cpp +++ b/lib/Analysis/MemoryBuiltins.cpp @@ -48,10 +48,10 @@ static bool isMallocCall(const CallInst *CI) { // FIXME: workaround for PR5130, this will be obsolete when a nobuiltin // attribute will exist. FunctionType *FTy = Callee->getFunctionType(); - if (FTy->getNumParams() != 1) - return false; - return FTy->getParamType(0)->isIntegerTy(32) || - FTy->getParamType(0)->isIntegerTy(64); + return FTy->getReturnType() == Type::getInt8PtrTy(FTy->getContext()) && + FTy->getNumParams() == 1 && + (FTy->getParamType(0)->isIntegerTy(32) || + FTy->getParamType(0)->isIntegerTy(64)); } /// extractMallocCall - Returns the corresponding CallInst if the instruction diff --git a/lib/Analysis/MemoryDependenceAnalysis.cpp b/lib/Analysis/MemoryDependenceAnalysis.cpp index 323c84f..704e27b 100644 --- a/lib/Analysis/MemoryDependenceAnalysis.cpp +++ b/lib/Analysis/MemoryDependenceAnalysis.cpp @@ -336,7 +336,7 @@ getLoadLoadClobberFullWidthSize(const Value *MemLocBase, int64_t MemLocOffs, namespace { /// Only find pointer captures which happen before the given instruction. Uses /// the dominator tree to determine whether one instruction is before another. - struct CapturesBefore { + struct CapturesBefore : public CaptureTracker { CapturesBefore(const Instruction *I, DominatorTree *DT) : BeforeHere(I), DT(DT), Captured(false) {} @@ -375,13 +375,13 @@ MemoryDependenceAnalysis::getModRefInfo(const Instruction *Inst, // with a smarter AA in place, this test is just wasting compile time. if (!DT) return AliasAnalysis::ModRef; const Value *Object = GetUnderlyingObject(MemLoc.Ptr, TD); - if (!isIdentifiedObject(Object) || isa<GlobalVariable>(Object)) + if (!isIdentifiedObject(Object) || isa<GlobalValue>(Object)) return AliasAnalysis::ModRef; ImmutableCallSite CS(Inst); if (!CS.getInstruction()) return AliasAnalysis::ModRef; CapturesBefore CB(Inst, DT); - llvm::PointerMayBeCaptured(Object, CB); + llvm::PointerMayBeCaptured(Object, &CB); if (isa<Constant>(Object) || CS.getInstruction() == Object || CB.Captured) return AliasAnalysis::ModRef; @@ -393,8 +393,7 @@ MemoryDependenceAnalysis::getModRefInfo(const Instruction *Inst, // pointer were passed to arguments that were neither of these, then it // couldn't be no-capture. if (!(*CI)->getType()->isPointerTy() || - (!CS.paramHasAttr(ArgNo+1, Attribute::NoCapture) && - !CS.paramHasAttr(ArgNo+1, Attribute::ByVal))) + (!CS.doesNotCapture(ArgNo) && !CS.isByValArgument(ArgNo))) continue; // If this is a no-capture pointer argument, see if we can tell that it diff --git a/lib/Analysis/PathProfileVerifier.cpp b/lib/Analysis/PathProfileVerifier.cpp index 0ae734e..0fcdfe7 100644 --- a/lib/Analysis/PathProfileVerifier.cpp +++ b/lib/Analysis/PathProfileVerifier.cpp @@ -137,22 +137,22 @@ bool PathProfileVerifier::runOnModule (Module &M) { BasicBlock* source = nextEdge->getSource(); BasicBlock* target = nextEdge->getTarget(); unsigned duplicateNumber = nextEdge->getDuplicateNumber(); - DEBUG(dbgs () << source->getNameStr() << " --{" << duplicateNumber - << "}--> " << target->getNameStr()); + DEBUG(dbgs() << source->getName() << " --{" << duplicateNumber + << "}--> " << target->getName()); // Ensure all the referenced edges exist // TODO: make this a separate function if( !arrayMap.count(source) ) { - errs() << " error [" << F->getNameStr() << "()]: source '" - << source->getNameStr() + errs() << " error [" << F->getName() << "()]: source '" + << source->getName() << "' does not exist in the array map.\n"; } else if( !arrayMap[source].count(target) ) { - errs() << " error [" << F->getNameStr() << "()]: target '" - << target->getNameStr() + errs() << " error [" << F->getName() << "()]: target '" + << target->getName() << "' does not exist in the array map.\n"; } else if( !arrayMap[source][target].count(duplicateNumber) ) { - errs() << " error [" << F->getNameStr() << "()]: edge " - << source->getNameStr() << " -> " << target->getNameStr() + errs() << " error [" << F->getName() << "()]: edge " + << source->getName() << " -> " << target->getName() << " duplicate number " << duplicateNumber << " does not exist in the array map.\n"; } else { diff --git a/lib/Analysis/ProfileEstimatorPass.cpp b/lib/Analysis/ProfileEstimatorPass.cpp index b594e2b..63468f8 100644 --- a/lib/Analysis/ProfileEstimatorPass.cpp +++ b/lib/Analysis/ProfileEstimatorPass.cpp @@ -332,7 +332,7 @@ bool ProfileEstimatorPass::runOnFunction(Function &F) { // Clear Minimal Edges. MinimalWeight.clear(); - DEBUG(dbgs() << "Working on function " << F.getNameStr() << "\n"); + DEBUG(dbgs() << "Working on function " << F.getName() << "\n"); // Since the entry block is the first one and has no predecessors, the edge // (0,entry) is inserted with the starting weight of 1. diff --git a/lib/Analysis/ProfileInfoLoaderPass.cpp b/lib/Analysis/ProfileInfoLoaderPass.cpp index 098079b..c4da807 100644 --- a/lib/Analysis/ProfileInfoLoaderPass.cpp +++ b/lib/Analysis/ProfileInfoLoaderPass.cpp @@ -160,7 +160,7 @@ bool LoaderPass::runOnModule(Module &M) { ReadCount = 0; for (Module::iterator F = M.begin(), E = M.end(); F != E; ++F) { if (F->isDeclaration()) continue; - DEBUG(dbgs()<<"Working on "<<F->getNameStr()<<"\n"); + DEBUG(dbgs() << "Working on " << F->getName() << "\n"); readEdge(getEdge(0,&F->getEntryBlock()), Counters); for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) { TerminatorInst *TI = BB->getTerminator(); @@ -181,7 +181,7 @@ bool LoaderPass::runOnModule(Module &M) { ReadCount = 0; for (Module::iterator F = M.begin(), E = M.end(); F != E; ++F) { if (F->isDeclaration()) continue; - DEBUG(dbgs()<<"Working on "<<F->getNameStr()<<"\n"); + DEBUG(dbgs() << "Working on " << F->getName() << "\n"); readEdge(getEdge(0,&F->getEntryBlock()), Counters); for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) { TerminatorInst *TI = BB->getTerminator(); diff --git a/lib/Analysis/ProfileVerifierPass.cpp b/lib/Analysis/ProfileVerifierPass.cpp index a017518..379d79c 100644 --- a/lib/Analysis/ProfileVerifierPass.cpp +++ b/lib/Analysis/ProfileVerifierPass.cpp @@ -125,8 +125,8 @@ namespace llvm { outCount++; } } - dbgs() << "Block " << BB->getNameStr() << " in " - << BB->getParent()->getNameStr() << ":" + dbgs() << "Block " << BB->getName() << " in " + << BB->getParent()->getName() << ":" << "BBWeight=" << format("%20.20g",BBWeight) << "," << "inWeight=" << format("%20.20g",inWeight) << "," << "inCount=" << inCount << "," @@ -143,8 +143,8 @@ namespace llvm { template<class FType, class BType> void ProfileVerifierPassT<FType, BType>::debugEntry (DetailedBlockInfo *DI) { - dbgs() << "TROUBLE: Block " << DI->BB->getNameStr() << " in " - << DI->BB->getParent()->getNameStr() << ":" + dbgs() << "TROUBLE: Block " << DI->BB->getName() << " in " + << DI->BB->getParent()->getName() << ":" << "BBWeight=" << format("%20.20g",DI->BBWeight) << "," << "inWeight=" << format("%20.20g",DI->inWeight) << "," << "inCount=" << DI->inCount << "," @@ -201,13 +201,13 @@ namespace llvm { double EdgeWeight = PI->getEdgeWeight(E); if (EdgeWeight == ProfileInfoT<FType, BType>::MissingValue) { dbgs() << "Edge " << E << " in Function " - << ProfileInfoT<FType, BType>::getFunction(E)->getNameStr() << ": "; + << ProfileInfoT<FType, BType>::getFunction(E)->getName() << ": "; ASSERTMESSAGE("Edge has missing value"); return 0; } else { if (EdgeWeight < 0) { dbgs() << "Edge " << E << " in Function " - << ProfileInfoT<FType, BType>::getFunction(E)->getNameStr() << ": "; + << ProfileInfoT<FType, BType>::getFunction(E)->getName() << ": "; ASSERTMESSAGE("Edge has negative value"); } return EdgeWeight; @@ -220,8 +220,8 @@ namespace llvm { DetailedBlockInfo *DI) { if (Error) { DEBUG(debugEntry(DI)); - dbgs() << "Block " << DI->BB->getNameStr() << " in Function " - << DI->BB->getParent()->getNameStr() << ": "; + dbgs() << "Block " << DI->BB->getName() << " in Function " + << DI->BB->getParent()->getName() << ": "; ASSERTMESSAGE(Message); } return; diff --git a/lib/Analysis/RegionInfo.cpp b/lib/Analysis/RegionInfo.cpp index 52753cb..828913d 100644 --- a/lib/Analysis/RegionInfo.cpp +++ b/lib/Analysis/RegionInfo.cpp @@ -186,18 +186,16 @@ std::string Region::getNameStr() const { raw_string_ostream OS(entryName); WriteAsOperand(OS, getEntry(), false); - entryName = OS.str(); } else - entryName = getEntry()->getNameStr(); + entryName = getEntry()->getName(); if (getExit()) { if (getExit()->getName().empty()) { raw_string_ostream OS(exitName); WriteAsOperand(OS, getExit(), false); - exitName = OS.str(); } else - exitName = getExit()->getNameStr(); + exitName = getExit()->getName(); } else exitName = "<Function Return>"; diff --git a/lib/Analysis/ScalarEvolution.cpp b/lib/Analysis/ScalarEvolution.cpp index ac00259..622b214 100644 --- a/lib/Analysis/ScalarEvolution.cpp +++ b/lib/Analysis/ScalarEvolution.cpp @@ -3601,9 +3601,9 @@ const SCEV *ScalarEvolution::createSCEV(Value *V) { SCEV::NoWrapFlags Flags = SCEV::FlagAnyWrap; OverflowingBinaryOperator *OBO = cast<OverflowingBinaryOperator>(V); if (OBO->hasNoSignedWrap()) - setFlags(Flags, SCEV::FlagNSW); + Flags = setFlags(Flags, SCEV::FlagNSW); if (OBO->hasNoUnsignedWrap()) - setFlags(Flags, SCEV::FlagNUW); + Flags = setFlags(Flags, SCEV::FlagNUW); return getAddExpr(AddOps, Flags); } case Instruction::Mul: { @@ -4153,13 +4153,19 @@ void ScalarEvolution::forgetValue(Value *V) { } /// getExact - Get the exact loop backedge taken count considering all loop -/// exits. If all exits are computable, this is the minimum computed count. +/// exits. A computable result can only be return for loops with a single exit. +/// Returning the minimum taken count among all exits is incorrect because one +/// of the loop's exit limit's may have been skipped. HowFarToZero assumes that +/// the limit of each loop test is never skipped. This is a valid assumption as +/// long as the loop exits via that test. For precise results, it is the +/// caller's responsibility to specify the relevant loop exit using +/// getExact(ExitingBlock, SE). const SCEV * ScalarEvolution::BackedgeTakenInfo::getExact(ScalarEvolution *SE) const { // If any exits were not computable, the loop is not computable. if (!ExitNotTaken.isCompleteList()) return SE->getCouldNotCompute(); - // We need at least one computable exit. + // We need exactly one computable exit. if (!ExitNotTaken.ExitingBlock) return SE->getCouldNotCompute(); assert(ExitNotTaken.ExactNotTaken && "uninitialized not-taken info"); @@ -4171,8 +4177,8 @@ ScalarEvolution::BackedgeTakenInfo::getExact(ScalarEvolution *SE) const { if (!BECount) BECount = ENT->ExactNotTaken; - else - BECount = SE->getUMinFromMismatchedTypes(BECount, ENT->ExactNotTaken); + else if (BECount != ENT->ExactNotTaken) + return SE->getCouldNotCompute(); } assert(BECount && "Invalid not taken count for loop exit"); return BECount; @@ -4253,8 +4259,15 @@ ScalarEvolution::ComputeBackedgeTakenCount(const Loop *L) { if (MaxBECount == getCouldNotCompute()) MaxBECount = EL.Max; - else if (EL.Max != getCouldNotCompute()) - MaxBECount = getUMinFromMismatchedTypes(MaxBECount, EL.Max); + else if (EL.Max != getCouldNotCompute()) { + // We cannot take the "min" MaxBECount, because non-unit stride loops may + // skip some loop tests. Taking the max over the exits is sufficiently + // conservative. TODO: We could do better taking into consideration + // that (1) the loop has unit stride (2) the last loop test is + // less-than/greater-than (3) any loop test is less-than/greater-than AND + // falls-through some constant times less then the other tests. + MaxBECount = getUMaxFromMismatchedTypes(MaxBECount, EL.Max); + } } return BackedgeTakenInfo(ExitCounts, CouldComputeBECount, MaxBECount); @@ -4920,7 +4933,7 @@ const SCEV *ScalarEvolution::ComputeExitCountExhaustively(const Loop *L, // the loop symbolically to determine when the condition gets a value of // "ExitWhen". - unsigned MaxIterations = MaxBruteForceIterations; // Limit analysis. + unsigned MaxIterations = MaxBruteForceIterations; // Limit analysis. for (unsigned IterationNum = 0; IterationNum != MaxIterations;++IterationNum){ ConstantInt *CondVal = dyn_cast_or_null<ConstantInt>(EvaluateExpression(Cond, L, @@ -5507,10 +5520,10 @@ ScalarEvolution::HowFarToZero(const SCEV *V, const Loop *L) { // behavior. Loops must exhibit defined behavior until a wrapped value is // actually used. So the trip count computed by udiv could be smaller than the // number of well-defined iterations. - if (AddRec->getNoWrapFlags(SCEV::FlagNW)) + if (AddRec->getNoWrapFlags(SCEV::FlagNW)) { // FIXME: We really want an "isexact" bit for udiv. return getUDivExpr(Distance, CountDown ? getNegativeSCEV(Step) : Step); - + } // Then, try to solve the above equation provided that Start is constant. if (const SCEVConstant *StartC = dyn_cast<SCEVConstant>(Start)) return SolveLinEquationWithOverflow(StepC->getValue()->getValue(), diff --git a/lib/Analysis/SparsePropagation.cpp b/lib/Analysis/SparsePropagation.cpp index d8c207b..035bcea 100644 --- a/lib/Analysis/SparsePropagation.cpp +++ b/lib/Analysis/SparsePropagation.cpp @@ -327,13 +327,13 @@ void SparseSolver::Solve(Function &F) { } void SparseSolver::Print(Function &F, raw_ostream &OS) const { - OS << "\nFUNCTION: " << F.getNameStr() << "\n"; + OS << "\nFUNCTION: " << F.getName() << "\n"; for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) { if (!BBExecutable.count(BB)) OS << "INFEASIBLE: "; OS << "\t"; if (BB->hasName()) - OS << BB->getNameStr() << ":\n"; + OS << BB->getName() << ":\n"; else OS << "; anon bb\n"; for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) { diff --git a/lib/Analysis/Trace.cpp b/lib/Analysis/Trace.cpp index 68a39cd..ff5010b 100644 --- a/lib/Analysis/Trace.cpp +++ b/lib/Analysis/Trace.cpp @@ -34,7 +34,7 @@ Module *Trace::getModule() const { /// void Trace::print(raw_ostream &O) const { Function *F = getFunction(); - O << "; Trace from function " << F->getNameStr() << ", blocks:\n"; + O << "; Trace from function " << F->getName() << ", blocks:\n"; for (const_iterator i = begin(), e = end(); i != e; ++i) { O << "; "; WriteAsOperand(O, *i, true, getModule()); diff --git a/lib/Analysis/ValueTracking.cpp b/lib/Analysis/ValueTracking.cpp index 9f7b5b5..22f1c14 100644 --- a/lib/Analysis/ValueTracking.cpp +++ b/lib/Analysis/ValueTracking.cpp @@ -248,9 +248,14 @@ void llvm::ComputeMaskedBits(Value *V, const APInt &Mask, APInt::getHighBitsSet(BitWidth, LeadZ); KnownZero &= Mask; - if (isKnownNonNegative) + // Only make use of no-wrap flags if we failed to compute the sign bit + // directly. This matters if the multiplication always overflows, in + // which case we prefer to follow the result of the direct computation, + // though as the program is invoking undefined behaviour we can choose + // whatever we like here. + if (isKnownNonNegative && !KnownOne.isNegative()) KnownZero.setBit(BitWidth - 1); - else if (isKnownNegative) + else if (isKnownNegative && !KnownZero.isNegative()) KnownOne.setBit(BitWidth - 1); return; diff --git a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp index a08b61c..711b796 100644 --- a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp +++ b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp @@ -749,6 +749,18 @@ void AsmPrinter::EmitFunctionBody() { OutStreamer.EmitRawText(StringRef("\tnop\n")); } + const Function *F = MF->getFunction(); + for (Function::const_iterator i = F->begin(), e = F->end(); i != e; ++i) { + const BasicBlock *BB = i; + if (!BB->hasAddressTaken()) + continue; + MCSymbol *Sym = GetBlockAddressSymbol(BB); + if (Sym->isDefined()) + continue; + OutStreamer.AddComment("Address of block that was removed by CodeGen"); + OutStreamer.EmitLabel(Sym); + } + // Emit target-specific gunk after the function body. EmitFunctionBodyEnd(); diff --git a/lib/CodeGen/AsmPrinter/DwarfAccelTable.cpp b/lib/CodeGen/AsmPrinter/DwarfAccelTable.cpp index a3a2488..6c77a63 100644 --- a/lib/CodeGen/AsmPrinter/DwarfAccelTable.cpp +++ b/lib/CodeGen/AsmPrinter/DwarfAccelTable.cpp @@ -60,7 +60,7 @@ void DwarfAccelTable::ComputeBucketCount(void) { uniques.resize(Data.size()); for (size_t i = 0, e = Data.size(); i < e; ++i) uniques[i] = Data[i]->HashValue; - std::sort(uniques.begin(), uniques.end()); + std::stable_sort(uniques.begin(), uniques.end()); std::vector<uint32_t>::iterator p = std::unique(uniques.begin(), uniques.end()); uint32_t num = std::distance(uniques.begin(), p); @@ -73,6 +73,15 @@ void DwarfAccelTable::ComputeBucketCount(void) { Header.hashes_count = num; } +namespace { + // DIESorter - comparison predicate that sorts DIEs by their offset. + struct DIESorter { + bool operator()(DIE *A, DIE *B) const { + return A->getOffset() < B->getOffset(); + } + }; +} + void DwarfAccelTable::FinalizeTable(AsmPrinter *Asm, const char *Prefix) { // Create the individual hash data outputs. for (StringMap<DIEArray>::iterator @@ -80,7 +89,7 @@ void DwarfAccelTable::FinalizeTable(AsmPrinter *Asm, const char *Prefix) { struct HashData *Entry = new HashData((*EI).getKeyData()); // Unique the entries. - std::sort((*EI).second.begin(), (*EI).second.end()); + std::stable_sort((*EI).second.begin(), (*EI).second.end(), DIESorter()); (*EI).second.erase(std::unique((*EI).second.begin(), (*EI).second.end()), (*EI).second.end()); diff --git a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp index 159c096..237998a 100644 --- a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp +++ b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp @@ -1154,8 +1154,8 @@ void CompileUnit::createGlobalVariableDIE(const MDNode *N) { void CompileUnit::constructSubrangeDIE(DIE &Buffer, DISubrange SR, DIE *IndexTy){ DIE *DW_Subrange = new DIE(dwarf::DW_TAG_subrange_type); addDIEEntry(DW_Subrange, dwarf::DW_AT_type, dwarf::DW_FORM_ref4, IndexTy); - int64_t L = SR.getLo(); - int64_t H = SR.getHi(); + uint64_t L = SR.getLo(); + uint64_t H = SR.getHi(); // The L value defines the lower bounds which is typically zero for C/C++. The // H value is the upper bounds. Values are 64 bit. H - L + 1 is the size @@ -1168,8 +1168,8 @@ void CompileUnit::constructSubrangeDIE(DIE &Buffer, DISubrange SR, DIE *IndexTy) return; } if (L) - addSInt(DW_Subrange, dwarf::DW_AT_lower_bound, 0, L); - addSInt(DW_Subrange, dwarf::DW_AT_upper_bound, 0, H); + addUInt(DW_Subrange, dwarf::DW_AT_lower_bound, 0, L); + addUInt(DW_Subrange, dwarf::DW_AT_upper_bound, 0, H); Buffer.addChild(DW_Subrange); } diff --git a/lib/CodeGen/EdgeBundles.cpp b/lib/CodeGen/EdgeBundles.cpp index a7aba89..3bb0465 100644 --- a/lib/CodeGen/EdgeBundles.cpp +++ b/lib/CodeGen/EdgeBundles.cpp @@ -77,7 +77,7 @@ void EdgeBundles::view() const { /// Specialize WriteGraph, the standard implementation won't work. raw_ostream &llvm::WriteGraph(raw_ostream &O, const EdgeBundles &G, bool ShortNames, - const std::string &Title) { + const Twine &Title) { const MachineFunction *MF = G.getMachineFunction(); O << "digraph {\n"; diff --git a/lib/CodeGen/ExecutionDepsFix.cpp b/lib/CodeGen/ExecutionDepsFix.cpp index 050edce..300f037 100644 --- a/lib/CodeGen/ExecutionDepsFix.cpp +++ b/lib/CodeGen/ExecutionDepsFix.cpp @@ -600,6 +600,9 @@ void ExeDepsFix::visitSoftInstr(MachineInstr *mi, unsigned mask) { while (!Regs.empty()) { if (!dv) { dv = Regs.pop_back_val().Value; + // Force the first dv to match the current instruction. + dv->AvailableDomains = dv->getCommonDomains(available); + assert(dv->AvailableDomains && "Domain should have been filtered"); continue; } @@ -617,9 +620,10 @@ void ExeDepsFix::visitSoftInstr(MachineInstr *mi, unsigned mask) { } // dv is the DomainValue we are going to use for this instruction. - if (!dv) + if (!dv) { dv = alloc(); - dv->AvailableDomains = available; + dv->AvailableDomains = available; + } dv->Instrs.push_back(mi); // Finally set all defs and non-collapsed uses to dv. @@ -650,10 +654,11 @@ bool ExeDepsFix::runOnMachineFunction(MachineFunction &mf) { bool anyregs = false; for (TargetRegisterClass::const_iterator I = RC->begin(), E = RC->end(); I != E; ++I) - if (MF->getRegInfo().isPhysRegUsed(*I)) { - anyregs = true; - break; - } + for (const unsigned *AI = TRI->getOverlaps(*I); *AI; ++AI) + if (MF->getRegInfo().isPhysRegUsed(*AI)) { + anyregs = true; + break; + } if (!anyregs) return false; // Initialize the AliasMap on the first use. diff --git a/lib/CodeGen/GCMetadata.cpp b/lib/CodeGen/GCMetadata.cpp index d757cf4..0281d05 100644 --- a/lib/CodeGen/GCMetadata.cpp +++ b/lib/CodeGen/GCMetadata.cpp @@ -156,12 +156,12 @@ bool Printer::runOnFunction(Function &F) { GCFunctionInfo *FD = &getAnalysis<GCModuleInfo>().getFunctionInfo(F); - OS << "GC roots for " << FD->getFunction().getNameStr() << ":\n"; + OS << "GC roots for " << FD->getFunction().getName() << ":\n"; for (GCFunctionInfo::roots_iterator RI = FD->roots_begin(), RE = FD->roots_end(); RI != RE; ++RI) OS << "\t" << RI->Num << "\t" << RI->StackOffset << "[sp]\n"; - OS << "GC safe points for " << FD->getFunction().getNameStr() << ":\n"; + OS << "GC safe points for " << FD->getFunction().getName() << ":\n"; for (GCFunctionInfo::iterator PI = FD->begin(), PE = FD->end(); PI != PE; ++PI) { diff --git a/lib/CodeGen/LLVMTargetMachine.cpp b/lib/CodeGen/LLVMTargetMachine.cpp index 3e69069..03b5693 100644 --- a/lib/CodeGen/LLVMTargetMachine.cpp +++ b/lib/CodeGen/LLVMTargetMachine.cpp @@ -114,9 +114,10 @@ EnableFastISelOption("fast-isel", cl::Hidden, LLVMTargetMachine::LLVMTargetMachine(const Target &T, StringRef Triple, StringRef CPU, StringRef FS, - Reloc::Model RM, CodeModel::Model CM) + Reloc::Model RM, CodeModel::Model CM, + CodeGenOpt::Level OL) : TargetMachine(T, Triple, CPU, FS) { - CodeGenInfo = T.createMCCodeGenInfo(Triple, RM, CM); + CodeGenInfo = T.createMCCodeGenInfo(Triple, RM, CM, OL); AsmInfo = T.createMCAsmInfo(Triple); // TargetSelect.h moved to a different directory between LLVM 2.9 and 3.0, // and if the old one gets included then MCAsmInfo will be NULL and @@ -130,11 +131,10 @@ LLVMTargetMachine::LLVMTargetMachine(const Target &T, StringRef Triple, bool LLVMTargetMachine::addPassesToEmitFile(PassManagerBase &PM, formatted_raw_ostream &Out, CodeGenFileType FileType, - CodeGenOpt::Level OptLevel, bool DisableVerify) { // Add common CodeGen passes. MCContext *Context = 0; - if (addCommonCodeGenPasses(PM, OptLevel, DisableVerify, Context)) + if (addCommonCodeGenPasses(PM, DisableVerify, Context)) return true; assert(Context != 0 && "Failed to get MCContext"); @@ -219,14 +219,13 @@ bool LLVMTargetMachine::addPassesToEmitFile(PassManagerBase &PM, /// bool LLVMTargetMachine::addPassesToEmitMachineCode(PassManagerBase &PM, JITCodeEmitter &JCE, - CodeGenOpt::Level OptLevel, bool DisableVerify) { // Add common CodeGen passes. MCContext *Ctx = 0; - if (addCommonCodeGenPasses(PM, OptLevel, DisableVerify, Ctx)) + if (addCommonCodeGenPasses(PM, DisableVerify, Ctx)) return true; - addCodeEmitter(PM, OptLevel, JCE); + addCodeEmitter(PM, JCE); PM.add(createGCInfoDeleter()); return false; // success! @@ -240,10 +239,9 @@ bool LLVMTargetMachine::addPassesToEmitMachineCode(PassManagerBase &PM, bool LLVMTargetMachine::addPassesToEmitMC(PassManagerBase &PM, MCContext *&Ctx, raw_ostream &Out, - CodeGenOpt::Level OptLevel, bool DisableVerify) { // Add common CodeGen passes. - if (addCommonCodeGenPasses(PM, OptLevel, DisableVerify, Ctx)) + if (addCommonCodeGenPasses(PM, DisableVerify, Ctx)) return true; if (hasMCSaveTempLabels()) @@ -295,7 +293,6 @@ static void printAndVerify(PassManagerBase &PM, /// emitting to assembly files or machine code output. /// bool LLVMTargetMachine::addCommonCodeGenPasses(PassManagerBase &PM, - CodeGenOpt::Level OptLevel, bool DisableVerify, MCContext *&OutContext) { // Standard LLVM-Level Passes. @@ -313,7 +310,7 @@ bool LLVMTargetMachine::addCommonCodeGenPasses(PassManagerBase &PM, PM.add(createVerifierPass()); // Run loop strength reduction before anything else. - if (OptLevel != CodeGenOpt::None && !DisableLSR) { + if (getOptLevel() != CodeGenOpt::None && !DisableLSR) { PM.add(createLoopStrengthReducePass(getTargetLowering())); if (PrintLSR) PM.add(createPrintFunctionPass("\n\n*** Code after LSR ***\n", &dbgs())); @@ -349,12 +346,12 @@ bool LLVMTargetMachine::addCommonCodeGenPasses(PassManagerBase &PM, break; } - if (OptLevel != CodeGenOpt::None && !DisableCGP) + if (getOptLevel() != CodeGenOpt::None && !DisableCGP) PM.add(createCodeGenPreparePass(getTargetLowering())); PM.add(createStackProtectorPass(getTargetLowering())); - addPreISel(PM, OptLevel); + addPreISel(PM); if (PrintISelInput) PM.add(createPrintFunctionPass("\n\n" @@ -377,15 +374,16 @@ bool LLVMTargetMachine::addCommonCodeGenPasses(PassManagerBase &PM, OutContext = &MMI->getContext(); // Return the MCContext specifically by-ref. // Set up a MachineFunction for the rest of CodeGen to work on. - PM.add(new MachineFunctionAnalysis(*this, OptLevel)); + PM.add(new MachineFunctionAnalysis(*this)); // Enable FastISel with -fast, but allow that to be overridden. if (EnableFastISelOption == cl::BOU_TRUE || - (OptLevel == CodeGenOpt::None && EnableFastISelOption != cl::BOU_FALSE)) + (getOptLevel() == CodeGenOpt::None && + EnableFastISelOption != cl::BOU_FALSE)) EnableFastISel = true; // Ask the target for an isel. - if (addInstSelector(PM, OptLevel)) + if (addInstSelector(PM)) return true; // Print the instruction selected machine code... @@ -395,21 +393,21 @@ bool LLVMTargetMachine::addCommonCodeGenPasses(PassManagerBase &PM, PM.add(createExpandISelPseudosPass()); // Pre-ra tail duplication. - if (OptLevel != CodeGenOpt::None && !DisableEarlyTailDup) { + if (getOptLevel() != CodeGenOpt::None && !DisableEarlyTailDup) { PM.add(createTailDuplicatePass(true)); printAndVerify(PM, "After Pre-RegAlloc TailDuplicate"); } // Optimize PHIs before DCE: removing dead PHI cycles may make more // instructions dead. - if (OptLevel != CodeGenOpt::None) + if (getOptLevel() != CodeGenOpt::None) PM.add(createOptimizePHIsPass()); // If the target requests it, assign local variables to stack slots relative // to one another and simplify frame index references where possible. PM.add(createLocalStackSlotAllocationPass()); - if (OptLevel != CodeGenOpt::None) { + if (getOptLevel() != CodeGenOpt::None) { // With optimization, dead code should already be eliminated. However // there is one known exception: lowered code for arguments that are only // used by tail calls, where the tail calls reuse the incoming stack @@ -431,15 +429,15 @@ bool LLVMTargetMachine::addCommonCodeGenPasses(PassManagerBase &PM, } // Run pre-ra passes. - if (addPreRegAlloc(PM, OptLevel)) + if (addPreRegAlloc(PM)) printAndVerify(PM, "After PreRegAlloc passes"); // Perform register allocation. - PM.add(createRegisterAllocator(OptLevel)); + PM.add(createRegisterAllocator(getOptLevel())); printAndVerify(PM, "After Register Allocation"); // Perform stack slot coloring and post-ra machine LICM. - if (OptLevel != CodeGenOpt::None) { + if (getOptLevel() != CodeGenOpt::None) { // FIXME: Re-enable coloring with register when it's capable of adding // kill markers. if (!DisableSSC) @@ -453,7 +451,7 @@ bool LLVMTargetMachine::addCommonCodeGenPasses(PassManagerBase &PM, } // Run post-ra passes. - if (addPostRegAlloc(PM, OptLevel)) + if (addPostRegAlloc(PM)) printAndVerify(PM, "After PostRegAlloc passes"); PM.add(createExpandPostRAPseudosPass()); @@ -464,23 +462,23 @@ bool LLVMTargetMachine::addCommonCodeGenPasses(PassManagerBase &PM, printAndVerify(PM, "After PrologEpilogCodeInserter"); // Run pre-sched2 passes. - if (addPreSched2(PM, OptLevel)) + if (addPreSched2(PM)) printAndVerify(PM, "After PreSched2 passes"); // Second pass scheduler. - if (OptLevel != CodeGenOpt::None && !DisablePostRA) { - PM.add(createPostRAScheduler(OptLevel)); + if (getOptLevel() != CodeGenOpt::None && !DisablePostRA) { + PM.add(createPostRAScheduler(getOptLevel())); printAndVerify(PM, "After PostRAScheduler"); } // Branch folding must be run after regalloc and prolog/epilog insertion. - if (OptLevel != CodeGenOpt::None && !DisableBranchFold) { + if (getOptLevel() != CodeGenOpt::None && !DisableBranchFold) { PM.add(createBranchFoldingPass(getEnableTailMergeDefault())); printNoVerify(PM, "After BranchFolding"); } // Tail duplication. - if (OptLevel != CodeGenOpt::None && !DisableTailDuplicate) { + if (getOptLevel() != CodeGenOpt::None && !DisableTailDuplicate) { PM.add(createTailDuplicatePass(false)); printNoVerify(PM, "After TailDuplicate"); } @@ -490,7 +488,7 @@ bool LLVMTargetMachine::addCommonCodeGenPasses(PassManagerBase &PM, if (PrintGCInfo) PM.add(createGCInfoPrinter(dbgs())); - if (OptLevel != CodeGenOpt::None && !DisableCodePlace) { + if (getOptLevel() != CodeGenOpt::None && !DisableCodePlace) { if (EnableBlockPlacement) { // MachineBlockPlacement is an experimental pass which is disabled by // default currently. Eventually it should subsume CodePlacementOpt, so @@ -509,7 +507,7 @@ bool LLVMTargetMachine::addCommonCodeGenPasses(PassManagerBase &PM, } } - if (addPreEmitPass(PM, OptLevel)) + if (addPreEmitPass(PM)) printNoVerify(PM, "After PreEmit passes"); return false; diff --git a/lib/CodeGen/MachineBasicBlock.cpp b/lib/CodeGen/MachineBasicBlock.cpp index 4c5fe4c..b9d1ef7 100644 --- a/lib/CodeGen/MachineBasicBlock.cpp +++ b/lib/CodeGen/MachineBasicBlock.cpp @@ -297,8 +297,22 @@ void MachineBasicBlock::updateTerminator() { TII->RemoveBranch(*this); } else { // The block has an unconditional fallthrough. If its successor is not - // its layout successor, insert a branch. - TBB = *succ_begin(); + // its layout successor, insert a branch. First we have to locate the + // only non-landing-pad successor, as that is the fallthrough block. + for (succ_iterator SI = succ_begin(), SE = succ_end(); SI != SE; ++SI) { + if ((*SI)->isLandingPad()) + continue; + assert(!TBB && "Found more than one non-landing-pad successor!"); + TBB = *SI; + } + + // If there is no non-landing-pad successor, the block has no + // fall-through edges to be concerned with. + if (!TBB) + return; + + // Finally update the unconditional successor to be reached via a branch + // if it would not be reached by fallthrough. if (!isLayoutSuccessor(TBB)) TII->InsertBranch(*this, TBB, 0, Cond, dl); } diff --git a/lib/CodeGen/MachineBlockPlacement.cpp b/lib/CodeGen/MachineBlockPlacement.cpp index 304f167..55d804b 100644 --- a/lib/CodeGen/MachineBlockPlacement.cpp +++ b/lib/CodeGen/MachineBlockPlacement.cpp @@ -214,11 +214,12 @@ class MachineBlockPlacement : public MachineFunctionPass { MachineBasicBlock *selectBestCandidateBlock( BlockChain &Chain, SmallVectorImpl<MachineBasicBlock *> &WorkList, const BlockFilterSet *BlockFilter); - MachineBasicBlock *getFirstUnplacedBlock(const BlockChain &PlacedChain, - ArrayRef<MachineBasicBlock *> Blocks, - unsigned &PrevUnplacedBlockIdx); + MachineBasicBlock *getFirstUnplacedBlock( + MachineFunction &F, + const BlockChain &PlacedChain, + MachineFunction::iterator &PrevUnplacedBlockIt, + const BlockFilterSet *BlockFilter); void buildChain(MachineBasicBlock *BB, BlockChain &Chain, - ArrayRef<MachineBasicBlock *> Blocks, SmallVectorImpl<MachineBasicBlock *> &BlockWorkList, const BlockFilterSet *BlockFilter = 0); void buildLoopChains(MachineFunction &F, MachineLoop &L); @@ -314,7 +315,7 @@ void MachineBlockPlacement::markChainSuccessors( // This is a cross-chain edge that is within the loop, so decrement the // loop predecessor count of the destination chain. if (SuccChain.LoopPredecessors > 0 && --SuccChain.LoopPredecessors == 0) - BlockWorkList.push_back(*SI); + BlockWorkList.push_back(*SuccChain.begin()); } } } @@ -354,15 +355,45 @@ MachineBasicBlock *MachineBlockPlacement::selectBestSuccessor( DEBUG(dbgs() << " " << getBlockName(*SI) << " -> Already merged!\n"); continue; } + if (*SI != *SuccChain.begin()) { + DEBUG(dbgs() << " " << getBlockName(*SI) << " -> Mid chain!\n"); + continue; + } uint32_t SuccWeight = MBPI->getEdgeWeight(BB, *SI); BranchProbability SuccProb(SuccWeight / WeightScale, SumWeight); // Only consider successors which are either "hot", or wouldn't violate // any CFG constraints. - if (SuccChain.LoopPredecessors != 0 && SuccProb < HotProb) { - DEBUG(dbgs() << " " << getBlockName(*SI) << " -> CFG conflict\n"); - continue; + if (SuccChain.LoopPredecessors != 0) { + if (SuccProb < HotProb) { + DEBUG(dbgs() << " " << getBlockName(*SI) << " -> CFG conflict\n"); + continue; + } + + // Make sure that a hot successor doesn't have a globally more important + // predecessor. + BlockFrequency CandidateEdgeFreq + = MBFI->getBlockFreq(BB) * SuccProb * HotProb.getCompl(); + bool BadCFGConflict = false; + for (MachineBasicBlock::pred_iterator PI = (*SI)->pred_begin(), + PE = (*SI)->pred_end(); + PI != PE; ++PI) { + if (*PI == *SI || (BlockFilter && !BlockFilter->count(*PI)) || + BlockToChain[*PI] == &Chain) + continue; + BlockFrequency PredEdgeFreq + = MBFI->getBlockFreq(*PI) * MBPI->getEdgeProbability(*PI, *SI); + if (PredEdgeFreq >= CandidateEdgeFreq) { + BadCFGConflict = true; + break; + } + } + if (BadCFGConflict) { + DEBUG(dbgs() << " " << getBlockName(*SI) + << " -> non-cold CFG conflict\n"); + continue; + } } DEBUG(dbgs() << " " << getBlockName(*SI) << " -> " << SuccProb @@ -444,18 +475,23 @@ MachineBasicBlock *MachineBlockPlacement::selectBestCandidateBlock( /// /// This routine is called when we are unable to use the CFG to walk through /// all of the basic blocks and form a chain due to unnatural loops in the CFG. -/// We walk through the sequence of blocks, starting from the -/// LastUnplacedBlockIdx. We update this index to avoid re-scanning the entire -/// sequence on repeated calls to this routine. +/// We walk through the function's blocks in order, starting from the +/// LastUnplacedBlockIt. We update this iterator on each call to avoid +/// re-scanning the entire sequence on repeated calls to this routine. MachineBasicBlock *MachineBlockPlacement::getFirstUnplacedBlock( - const BlockChain &PlacedChain, - ArrayRef<MachineBasicBlock *> Blocks, - unsigned &PrevUnplacedBlockIdx) { - for (unsigned i = PrevUnplacedBlockIdx, e = Blocks.size(); i != e; ++i) { - MachineBasicBlock *BB = Blocks[i]; - if (BlockToChain[BB] != &PlacedChain) { - PrevUnplacedBlockIdx = i; - return BB; + MachineFunction &F, const BlockChain &PlacedChain, + MachineFunction::iterator &PrevUnplacedBlockIt, + const BlockFilterSet *BlockFilter) { + for (MachineFunction::iterator I = PrevUnplacedBlockIt, E = F.end(); I != E; + ++I) { + if (BlockFilter && !BlockFilter->count(I)) + continue; + if (BlockToChain[I] != &PlacedChain) { + PrevUnplacedBlockIt = I; + // Now select the head of the chain to which the unplaced block belongs + // as the block to place. This will force the entire chain to be placed, + // and satisfies the requirements of merging chains. + return *BlockToChain[I]->begin(); } } return 0; @@ -464,14 +500,12 @@ MachineBasicBlock *MachineBlockPlacement::getFirstUnplacedBlock( void MachineBlockPlacement::buildChain( MachineBasicBlock *BB, BlockChain &Chain, - ArrayRef<MachineBasicBlock *> Blocks, SmallVectorImpl<MachineBasicBlock *> &BlockWorkList, const BlockFilterSet *BlockFilter) { assert(BB); assert(BlockToChain[BB] == &Chain); - assert(*Chain.begin() == BB); - SmallVector<MachineOperand, 4> Cond; // For AnalyzeBranch. - unsigned PrevUnplacedBlockIdx = 0; + MachineFunction &F = *BB->getParent(); + MachineFunction::iterator PrevUnplacedBlockIt = F.begin(); MachineBasicBlock *LoopHeaderBB = BB; markChainSuccessors(Chain, LoopHeaderBB, BlockWorkList, BlockFilter); @@ -482,26 +516,9 @@ void MachineBlockPlacement::buildChain( assert(*llvm::prior(Chain.end()) == BB); MachineBasicBlock *BestSucc = 0; - // Check for unreasonable branches, and forcibly merge the existing layout - // successor for them. We can handle cases that AnalyzeBranch can't: jump - // tables etc are fine. The case we want to handle specially is when there - // is potential fallthrough, but the branch cannot be analyzed. This - // includes blocks without terminators as well as other cases. - Cond.clear(); - MachineBasicBlock *TBB = 0, *FBB = 0; // For AnalyzeBranch. - if (TII->AnalyzeBranch(*BB, TBB, FBB, Cond) && BB->canFallThrough()) { - MachineFunction::iterator I(BB), NextI(llvm::next(I)); - // Ensure that the layout successor is a viable block, as we know that - // fallthrough is a possibility. Note that this may not be a valid block - // in the loop, but we allow that to cope with degenerate situations. - assert(NextI != BB->getParent()->end()); - BestSucc = NextI; - } - - // Otherwise, look for the best viable successor if there is one to place - // immediately after this block. - if (!BestSucc) - BestSucc = selectBestSuccessor(BB, Chain, BlockFilter); + // Look for the best viable successor if there is one to place immediately + // after this block. + BestSucc = selectBestSuccessor(BB, Chain, BlockFilter); // If an immediate successor isn't available, look for the best viable // block among those we've identified as not violating the loop's CFG at @@ -510,7 +527,8 @@ void MachineBlockPlacement::buildChain( BestSucc = selectBestCandidateBlock(Chain, BlockWorkList, BlockFilter); if (!BestSucc) { - BestSucc = getFirstUnplacedBlock(Chain, Blocks, PrevUnplacedBlockIdx); + BestSucc = getFirstUnplacedBlock(F, Chain, PrevUnplacedBlockIt, + BlockFilter); if (!BestSucc) break; @@ -576,11 +594,10 @@ void MachineBlockPlacement::buildLoopChains(MachineFunction &F, } if (Chain.LoopPredecessors == 0) - BlockWorkList.push_back(*BI); + BlockWorkList.push_back(*Chain.begin()); } - buildChain(*L.block_begin(), LoopChain, L.getBlocks(), BlockWorkList, - &LoopBlockSet); + buildChain(*L.block_begin(), LoopChain, BlockWorkList, &LoopBlockSet); DEBUG({ // Crash at the end so we get all of the debugging output first. @@ -596,8 +613,7 @@ void MachineBlockPlacement::buildLoopChains(MachineFunction &F, if (!LoopBlockSet.erase(*BCI)) { // We don't mark the loop as bad here because there are real situations // where this can occur. For example, with an unanalyzable fallthrough - // from a loop block to a non-loop block. - // FIXME: Such constructs shouldn't exist. Track them down and fix them. + // from a loop block to a non-loop block or vice versa. dbgs() << "Loop chain contains a block not contained by the loop!\n" << " Loop header: " << getBlockName(*L.block_begin()) << "\n" << " Chain header: " << getBlockName(*LoopChain.begin()) << "\n" @@ -621,26 +637,43 @@ void MachineBlockPlacement::buildLoopChains(MachineFunction &F, void MachineBlockPlacement::buildCFGChains(MachineFunction &F) { // Ensure that every BB in the function has an associated chain to simplify // the assumptions of the remaining algorithm. - for (MachineFunction::iterator FI = F.begin(), FE = F.end(); FI != FE; ++FI) - BlockToChain[&*FI] = - new (ChainAllocator.Allocate()) BlockChain(BlockToChain, &*FI); + SmallVector<MachineOperand, 4> Cond; // For AnalyzeBranch. + for (MachineFunction::iterator FI = F.begin(), FE = F.end(); FI != FE; ++FI) { + MachineBasicBlock *BB = FI; + BlockChain *Chain + = new (ChainAllocator.Allocate()) BlockChain(BlockToChain, BB); + // Also, merge any blocks which we cannot reason about and must preserve + // the exact fallthrough behavior for. + for (;;) { + Cond.clear(); + MachineBasicBlock *TBB = 0, *FBB = 0; // For AnalyzeBranch. + if (!TII->AnalyzeBranch(*BB, TBB, FBB, Cond) || !FI->canFallThrough()) + break; + + MachineFunction::iterator NextFI(llvm::next(FI)); + MachineBasicBlock *NextBB = NextFI; + // Ensure that the layout successor is a viable block, as we know that + // fallthrough is a possibility. + assert(NextFI != FE && "Can't fallthrough past the last block."); + DEBUG(dbgs() << "Pre-merging due to unanalyzable fallthrough: " + << getBlockName(BB) << " -> " << getBlockName(NextBB) + << "\n"); + Chain->merge(NextBB, 0); + FI = NextFI; + BB = NextBB; + } + } // Build any loop-based chains. for (MachineLoopInfo::iterator LI = MLI->begin(), LE = MLI->end(); LI != LE; ++LI) buildLoopChains(F, **LI); - // We need a vector of blocks so that buildChain can handle unnatural CFG - // constructs by searching for unplaced blocks and just concatenating them. - SmallVector<MachineBasicBlock *, 16> Blocks; - Blocks.reserve(F.size()); - SmallVector<MachineBasicBlock *, 16> BlockWorkList; SmallPtrSet<BlockChain *, 4> UpdatedPreds; for (MachineFunction::iterator FI = F.begin(), FE = F.end(); FI != FE; ++FI) { MachineBasicBlock *BB = &*FI; - Blocks.push_back(BB); BlockChain &Chain = *BlockToChain[BB]; if (!UpdatedPreds.insert(&Chain)) continue; @@ -659,11 +692,11 @@ void MachineBlockPlacement::buildCFGChains(MachineFunction &F) { } if (Chain.LoopPredecessors == 0) - BlockWorkList.push_back(BB); + BlockWorkList.push_back(*Chain.begin()); } BlockChain &FunctionChain = *BlockToChain[&F.front()]; - buildChain(&F.front(), FunctionChain, Blocks, BlockWorkList); + buildChain(&F.front(), FunctionChain, BlockWorkList); typedef SmallPtrSet<MachineBasicBlock *, 16> FunctionBlockSetType; DEBUG({ @@ -695,7 +728,6 @@ void MachineBlockPlacement::buildCFGChains(MachineFunction &F) { // Splice the blocks into place. MachineFunction::iterator InsertPos = F.begin(); - SmallVector<MachineOperand, 4> Cond; // For AnalyzeBranch. for (BlockChain::iterator BI = FunctionChain.begin(), BE = FunctionChain.end(); BI != BE; ++BI) { diff --git a/lib/CodeGen/MachineFunction.cpp b/lib/CodeGen/MachineFunction.cpp index 20066a0..0c89a57 100644 --- a/lib/CodeGen/MachineFunction.cpp +++ b/lib/CodeGen/MachineFunction.cpp @@ -335,7 +335,7 @@ namespace llvm { DOTGraphTraits (bool isSimple=false) : DefaultDOTGraphTraits(isSimple) {} static std::string getGraphName(const MachineFunction *F) { - return "CFG for '" + F->getFunction()->getNameStr() + "' function"; + return "CFG for '" + F->getFunction()->getName().str() + "' function"; } std::string getNodeLabel(const MachineBasicBlock *Node, @@ -368,7 +368,7 @@ namespace llvm { void MachineFunction::viewCFG() const { #ifndef NDEBUG - ViewGraph(this, "mf" + getFunction()->getNameStr()); + ViewGraph(this, "mf" + getFunction()->getName()); #else errs() << "MachineFunction::viewCFG is only available in debug builds on " << "systems with Graphviz or gv!\n"; @@ -378,7 +378,7 @@ void MachineFunction::viewCFG() const void MachineFunction::viewCFGOnly() const { #ifndef NDEBUG - ViewGraph(this, "mf" + getFunction()->getNameStr(), true); + ViewGraph(this, "mf" + getFunction()->getName(), true); #else errs() << "MachineFunction::viewCFGOnly is only available in debug builds on " << "systems with Graphviz or gv!\n"; diff --git a/lib/CodeGen/MachineFunctionAnalysis.cpp b/lib/CodeGen/MachineFunctionAnalysis.cpp index 054c750..35591e1 100644 --- a/lib/CodeGen/MachineFunctionAnalysis.cpp +++ b/lib/CodeGen/MachineFunctionAnalysis.cpp @@ -19,9 +19,8 @@ using namespace llvm; char MachineFunctionAnalysis::ID = 0; -MachineFunctionAnalysis::MachineFunctionAnalysis(const TargetMachine &tm, - CodeGenOpt::Level OL) : - FunctionPass(ID), TM(tm), OptLevel(OL), MF(0) { +MachineFunctionAnalysis::MachineFunctionAnalysis(const TargetMachine &tm) : + FunctionPass(ID), TM(tm), MF(0) { initializeMachineModuleInfoPass(*PassRegistry::getPassRegistry()); } diff --git a/lib/CodeGen/MachineLICM.cpp b/lib/CodeGen/MachineLICM.cpp index e756ded..e5e8c51 100644 --- a/lib/CodeGen/MachineLICM.cpp +++ b/lib/CodeGen/MachineLICM.cpp @@ -670,7 +670,7 @@ MachineLICM::getRegisterClassIDAndCost(const MachineInstr *MI, unsigned &RCId, unsigned &RCCost) const { const TargetRegisterClass *RC = MRI->getRegClass(Reg); EVT VT = *RC->vt_begin(); - if (VT == MVT::untyped) { + if (VT == MVT::Untyped) { RCId = RC->getID(); RCCost = 1; } else { diff --git a/lib/CodeGen/MachineVerifier.cpp b/lib/CodeGen/MachineVerifier.cpp index b3c28b0..f231e3c 100644 --- a/lib/CodeGen/MachineVerifier.cpp +++ b/lib/CodeGen/MachineVerifier.cpp @@ -320,7 +320,7 @@ void MachineVerifier::report(const char *msg, const MachineFunction *MF) { MF->print(*OS, Indexes); } *OS << "*** Bad machine code: " << msg << " ***\n" - << "- function: " << MF->getFunction()->getNameStr() << "\n"; + << "- function: " << MF->getFunction()->getName() << "\n"; } void MachineVerifier::report(const char *msg, const MachineBasicBlock *MBB) { diff --git a/lib/CodeGen/PostRASchedulerList.cpp b/lib/CodeGen/PostRASchedulerList.cpp index c73e877..7205ed6 100644 --- a/lib/CodeGen/PostRASchedulerList.cpp +++ b/lib/CodeGen/PostRASchedulerList.cpp @@ -248,8 +248,8 @@ bool PostRAScheduler::runOnMachineFunction(MachineFunction &Fn) { static int bbcnt = 0; if (bbcnt++ % DebugDiv != DebugMod) continue; - dbgs() << "*** DEBUG scheduling " << Fn.getFunction()->getNameStr() << - ":BB#" << MBB->getNumber() << " ***\n"; + dbgs() << "*** DEBUG scheduling " << Fn.getFunction()->getName() + << ":BB#" << MBB->getNumber() << " ***\n"; } #endif diff --git a/lib/CodeGen/RegAllocFast.cpp b/lib/CodeGen/RegAllocFast.cpp index b36a445..4664a3c 100644 --- a/lib/CodeGen/RegAllocFast.cpp +++ b/lib/CodeGen/RegAllocFast.cpp @@ -682,7 +682,7 @@ void RAFast::handleThroughOperands(MachineInstr *MI, } SmallVector<unsigned, 8> PartialDefs; - DEBUG(dbgs() << "Allocating tied uses and early clobbers.\n"); + DEBUG(dbgs() << "Allocating tied uses.\n"); for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { MachineOperand &MO = MI->getOperand(i); if (!MO.isReg()) continue; @@ -704,15 +704,24 @@ void RAFast::handleThroughOperands(MachineInstr *MI, // That would confuse the later phys-def processing pass. LiveRegMap::iterator LRI = reloadVirtReg(MI, i, Reg, 0); PartialDefs.push_back(LRI->second.PhysReg); - } else if (MO.isEarlyClobber()) { - // Note: defineVirtReg may invalidate MO. - LiveRegMap::iterator LRI = defineVirtReg(MI, i, Reg, 0); - unsigned PhysReg = LRI->second.PhysReg; - if (setPhysReg(MI, i, PhysReg)) - VirtDead.push_back(Reg); } } + DEBUG(dbgs() << "Allocating early clobbers.\n"); + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + MachineOperand &MO = MI->getOperand(i); + if (!MO.isReg()) continue; + unsigned Reg = MO.getReg(); + if (!TargetRegisterInfo::isVirtualRegister(Reg)) continue; + if (!MO.isEarlyClobber()) + continue; + // Note: defineVirtReg may invalidate MO. + LiveRegMap::iterator LRI = defineVirtReg(MI, i, Reg, 0); + unsigned PhysReg = LRI->second.PhysReg; + if (setPhysReg(MI, i, PhysReg)) + VirtDead.push_back(Reg); + } + // Restore UsedInInstr to a state usable for allocating normal virtual uses. UsedInInstr.reset(); for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { @@ -815,7 +824,6 @@ void RAFast::AllocateBasicBlock() { if (!MO.isReg()) continue; unsigned Reg = MO.getReg(); if (!TargetRegisterInfo::isVirtualRegister(Reg)) continue; - LiveDbgValueMap[Reg].push_back(MI); LiveRegMap::iterator LRI = LiveVirtRegs.find(Reg); if (LRI != LiveVirtRegs.end()) setPhysReg(MI, i, LRI->second.PhysReg); @@ -849,6 +857,7 @@ void RAFast::AllocateBasicBlock() { } } } + LiveDbgValueMap[Reg].push_back(MI); } } // Next instruction. diff --git a/lib/CodeGen/ScheduleDAGPrinter.cpp b/lib/CodeGen/ScheduleDAGPrinter.cpp index 4b55a22..6304243 100644 --- a/lib/CodeGen/ScheduleDAGPrinter.cpp +++ b/lib/CodeGen/ScheduleDAGPrinter.cpp @@ -86,12 +86,12 @@ void ScheduleDAG::viewGraph() { // This code is only for debugging! #ifndef NDEBUG if (BB->getBasicBlock()) - ViewGraph(this, "dag." + MF.getFunction()->getNameStr(), false, - "Scheduling-Units Graph for " + MF.getFunction()->getNameStr() + - ":" + BB->getBasicBlock()->getNameStr()); + ViewGraph(this, "dag." + MF.getFunction()->getName(), false, + "Scheduling-Units Graph for " + MF.getFunction()->getName() + + ":" + BB->getBasicBlock()->getName()); else - ViewGraph(this, "dag." + MF.getFunction()->getNameStr(), false, - "Scheduling-Units Graph for " + MF.getFunction()->getNameStr()); + ViewGraph(this, "dag." + MF.getFunction()->getName(), false, + "Scheduling-Units Graph for " + MF.getFunction()->getName()); #else errs() << "ScheduleDAG::viewGraph is only available in debug builds on " << "systems with Graphviz or gv!\n"; diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 4384db8..d8208a4 100644 --- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -22,7 +22,6 @@ #include "llvm/LLVMContext.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFrameInfo.h" -#include "llvm/CodeGen/PseudoSourceValue.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Target/TargetData.h" #include "llvm/Target/TargetLowering.h" @@ -6937,10 +6936,23 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) { DAG.getConstant(PtrOff, PtrType)); } - return DAG.getLoad(LVT, N->getDebugLoc(), LN0->getChain(), NewPtr, - LN0->getPointerInfo().getWithOffset(PtrOff), - LN0->isVolatile(), LN0->isNonTemporal(), - LN0->isInvariant(), Align); + // The replacement we need to do here is a little tricky: we need to + // replace an extractelement of a load with a load. + // Use ReplaceAllUsesOfValuesWith to do the replacement. + SDValue Load = DAG.getLoad(LVT, N->getDebugLoc(), LN0->getChain(), NewPtr, + LN0->getPointerInfo().getWithOffset(PtrOff), + LN0->isVolatile(), LN0->isNonTemporal(), + LN0->isInvariant(), Align); + WorkListRemover DeadNodes(*this); + SDValue From[] = { SDValue(N, 0), SDValue(LN0,1) }; + SDValue To[] = { Load.getValue(0), Load.getValue(1) }; + DAG.ReplaceAllUsesOfValuesWith(From, To, 2, &DeadNodes); + // Since we're explcitly calling ReplaceAllUses, add the new node to the + // worklist explicitly as well. + AddToWorkList(Load.getNode()); + // Make sure to revisit this node to clean it up; it will usually be dead. + AddToWorkList(N); + return SDValue(N, 0); } return SDValue(); diff --git a/lib/CodeGen/SelectionDAG/FastISel.cpp b/lib/CodeGen/SelectionDAG/FastISel.cpp index e8f8c73..cff37c2 100644 --- a/lib/CodeGen/SelectionDAG/FastISel.cpp +++ b/lib/CodeGen/SelectionDAG/FastISel.cpp @@ -39,6 +39,7 @@ // //===----------------------------------------------------------------------===// +#define DEBUG_TYPE "isel" #include "llvm/Function.h" #include "llvm/GlobalVariable.h" #include "llvm/Instructions.h" @@ -58,8 +59,12 @@ #include "llvm/Target/TargetMachine.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/Debug.h" +#include "llvm/ADT/Statistic.h" using namespace llvm; +STATISTIC(NumFastIselSuccessIndependent, "Number of insts selected by target-independent selector"); +STATISTIC(NumFastIselSuccessTarget, "Number of insts selected by target-specific selector"); + /// startNewBlock - Set the current block to which generated machine /// instructions will be appended, and clear the local CSE map. /// @@ -96,6 +101,11 @@ bool FastISel::hasTrivialKill(const Value *V) const { !hasTrivialKill(Cast->getOperand(0))) return false; + // GEPs with all zero indices are trivially coalesced by fast-isel. + if (const GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(I)) + if (GEP->hasAllZeroIndices() && !hasTrivialKill(GEP->getOperand(0))) + return false; + // Only instructions with a single use in the same basic block are considered // to have trivial kills. return I->hasOneUse() && @@ -427,6 +437,11 @@ bool FastISel::SelectGetElementPtr(const User *I) { bool NIsKill = hasTrivialKill(I->getOperand(0)); + // Keep a running tab of the total offset to coalesce multiple N = N + Offset + // into a single N = N + TotalOffset. + uint64_t TotalOffs = 0; + // FIXME: What's a good SWAG number for MaxOffs? + uint64_t MaxOffs = 2048; Type *Ty = I->getOperand(0)->getType(); MVT VT = TLI.getPointerTy(); for (GetElementPtrInst::const_op_iterator OI = I->op_begin()+1, @@ -436,14 +451,15 @@ bool FastISel::SelectGetElementPtr(const User *I) { unsigned Field = cast<ConstantInt>(Idx)->getZExtValue(); if (Field) { // N = N + Offset - uint64_t Offs = TD.getStructLayout(StTy)->getElementOffset(Field); - // FIXME: This can be optimized by combining the add with a - // subsequent one. - N = FastEmit_ri_(VT, ISD::ADD, N, NIsKill, Offs, VT); - if (N == 0) - // Unhandled operand. Halt "fast" selection and bail. - return false; - NIsKill = true; + TotalOffs += TD.getStructLayout(StTy)->getElementOffset(Field); + if (TotalOffs >= MaxOffs) { + N = FastEmit_ri_(VT, ISD::ADD, N, NIsKill, TotalOffs, VT); + if (N == 0) + // Unhandled operand. Halt "fast" selection and bail. + return false; + NIsKill = true; + TotalOffs = 0; + } } Ty = StTy->getElementType(Field); } else { @@ -452,14 +468,26 @@ bool FastISel::SelectGetElementPtr(const User *I) { // If this is a constant subscript, handle it quickly. if (const ConstantInt *CI = dyn_cast<ConstantInt>(Idx)) { if (CI->isZero()) continue; - uint64_t Offs = + // N = N + Offset + TotalOffs += TD.getTypeAllocSize(Ty)*cast<ConstantInt>(CI)->getSExtValue(); - N = FastEmit_ri_(VT, ISD::ADD, N, NIsKill, Offs, VT); + if (TotalOffs >= MaxOffs) { + N = FastEmit_ri_(VT, ISD::ADD, N, NIsKill, TotalOffs, VT); + if (N == 0) + // Unhandled operand. Halt "fast" selection and bail. + return false; + NIsKill = true; + TotalOffs = 0; + } + continue; + } + if (TotalOffs) { + N = FastEmit_ri_(VT, ISD::ADD, N, NIsKill, TotalOffs, VT); if (N == 0) // Unhandled operand. Halt "fast" selection and bail. return false; NIsKill = true; - continue; + TotalOffs = 0; } // N = N + Idx * ElementSize; @@ -484,6 +512,12 @@ bool FastISel::SelectGetElementPtr(const User *I) { return false; } } + if (TotalOffs) { + N = FastEmit_ri_(VT, ISD::ADD, N, NIsKill, TotalOffs, VT); + if (N == 0) + // Unhandled operand. Halt "fast" selection and bail. + return false; + } // We successfully emitted code for the given LLVM Instruction. UpdateValueMap(I, N); @@ -760,12 +794,14 @@ FastISel::SelectInstruction(const Instruction *I) { // First, try doing target-independent selection. if (SelectOperator(I, I->getOpcode())) { + ++NumFastIselSuccessIndependent; DL = DebugLoc(); return true; } // Next, try calling the target to attempt to handle the instruction. if (TargetSelectInstruction(I)) { + ++NumFastIselSuccessTarget; DL = DebugLoc(); return true; } diff --git a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp index 0bca55f..156cc70 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -293,6 +293,8 @@ SelectionDAGLegalize::ExpandConstantFP(ConstantFPSDNode *CFP, bool UseCP) { static void ExpandUnalignedStore(StoreSDNode *ST, SelectionDAG &DAG, const TargetLowering &TLI, SelectionDAGLegalize *DAGLegalize) { + assert(ST->getAddressingMode() == ISD::UNINDEXED && + "unaligned indexed stores not implemented!"); SDValue Chain = ST->getChain(); SDValue Ptr = ST->getBasePtr(); SDValue Val = ST->getValue(); @@ -413,6 +415,8 @@ static void ExpandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG, const TargetLowering &TLI, SDValue &ValResult, SDValue &ChainResult) { + assert(LD->getAddressingMode() == ISD::UNINDEXED && + "unaligned indexed loads not implemented!"); SDValue Chain = LD->getChain(); SDValue Ptr = LD->getBasePtr(); EVT VT = LD->getValueType(0); diff --git a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp index f4164b2..fd24238 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp @@ -20,7 +20,6 @@ #include "LegalizeTypes.h" #include "llvm/DerivedTypes.h" -#include "llvm/CodeGen/PseudoSourceValue.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" using namespace llvm; @@ -2927,38 +2926,28 @@ SDValue DAGTypeLegalizer::PromoteIntRes_SCALAR_TO_VECTOR(SDNode *N) { SDValue DAGTypeLegalizer::PromoteIntRes_CONCAT_VECTORS(SDNode *N) { DebugLoc dl = N->getDebugLoc(); - SDValue Op0 = N->getOperand(0); - SDValue Op1 = N->getOperand(1); - assert(Op0.getValueType() == Op1.getValueType() && - "Invalid input vector types"); - EVT OutVT = N->getValueType(0); EVT NOutVT = TLI.getTypeToTransformTo(*DAG.getContext(), OutVT); assert(NOutVT.isVector() && "This type must be promoted to a vector type"); + EVT InElemTy = OutVT.getVectorElementType(); EVT OutElemTy = NOutVT.getVectorElementType(); - unsigned NumElem0 = Op0.getValueType().getVectorNumElements(); - unsigned NumElem1 = Op1.getValueType().getVectorNumElements(); + unsigned NumElem = N->getOperand(0).getValueType().getVectorNumElements(); unsigned NumOutElem = NOutVT.getVectorNumElements(); - assert(NumElem0 + NumElem1 == NumOutElem && - "Invalid number of incoming elements"); + unsigned NumOperands = N->getNumOperands(); + assert(NumElem * NumOperands == NumOutElem && + "Unexpected number of elements"); // Take the elements from the first vector. SmallVector<SDValue, 8> Ops(NumOutElem); - for (unsigned i = 0; i < NumElem0; ++i) { - SDValue Ext = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, - Op0.getValueType().getScalarType(), Op0, - DAG.getIntPtrConstant(i)); - Ops[i] = DAG.getNode(ISD::ANY_EXTEND, dl, OutElemTy, Ext); - } - - // Take the elements from the second vector - for (unsigned i = 0; i < NumElem1; ++i) { - SDValue Ext = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, - Op1.getValueType().getScalarType(), Op1, - DAG.getIntPtrConstant(i)); - Ops[i + NumElem0] = DAG.getNode(ISD::ANY_EXTEND, dl, OutElemTy, Ext); + for (unsigned i = 0; i < NumOperands; ++i) { + SDValue Op = N->getOperand(i); + for (unsigned j = 0; j < NumElem; ++j) { + SDValue Ext = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, + InElemTy, Op, DAG.getIntPtrConstant(j)); + Ops[i * NumElem + j] = DAG.getNode(ISD::ANY_EXTEND, dl, OutElemTy, Ext); + } } return DAG.getNode(ISD::BUILD_VECTOR, dl, NOutVT, &Ops[0], Ops.size()); diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp b/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp index 84d334a..7c5472b 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp @@ -21,7 +21,6 @@ #include "LegalizeTypes.h" #include "llvm/Target/TargetData.h" -#include "llvm/CodeGen/PseudoSourceValue.h" using namespace llvm; //===----------------------------------------------------------------------===// diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp index cb5df05..ad83565 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -21,7 +21,6 @@ //===----------------------------------------------------------------------===// #include "LegalizeTypes.h" -#include "llvm/CodeGen/PseudoSourceValue.h" #include "llvm/Target/TargetData.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp index 7938a37..cd0da37 100644 --- a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp +++ b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp @@ -267,7 +267,7 @@ private: /// GetCostForDef - Looks up the register class and cost for a given definition. /// Typically this just means looking up the representative register class, -/// but for untyped values (MVT::untyped) it means inspecting the node's +/// but for untyped values (MVT::Untyped) it means inspecting the node's /// opcode to determine what register class is being generated. static void GetCostForDef(const ScheduleDAGSDNodes::RegDefIter &RegDefPos, const TargetLowering *TLI, @@ -278,7 +278,7 @@ static void GetCostForDef(const ScheduleDAGSDNodes::RegDefIter &RegDefPos, // Special handling for untyped values. These values can only come from // the expansion of custom DAG-to-DAG patterns. - if (VT == MVT::untyped) { + if (VT == MVT::Untyped) { const SDNode *Node = RegDefPos.GetNode(); unsigned Opcode = Node->getMachineOpcode(); @@ -948,6 +948,11 @@ SUnit *ScheduleDAGRRList::CopyAndMoveSuccessors(SUnit *SU) { if (!TII->unfoldMemoryOperand(*DAG, N, NewNodes)) return NULL; + // unfolding an x86 DEC64m operation results in store, dec, load which + // can't be handled here so quit + if (NewNodes.size() == 3) + return NULL; + DEBUG(dbgs() << "Unfolding SU #" << SU->NodeNum << "\n"); assert(NewNodes.size() == 2 && "Expected a load folding node!"); diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index 38dd7cc..497c286 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -28,7 +28,6 @@ #include "llvm/CodeGen/MachineConstantPool.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineModuleInfo.h" -#include "llvm/CodeGen/PseudoSourceValue.h" #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Target/TargetData.h" #include "llvm/Target/TargetLowering.h" diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index 18c29b8..8d02350 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -41,7 +41,6 @@ #include "llvm/CodeGen/MachineJumpTableInfo.h" #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/CodeGen/PseudoSourceValue.h" #include "llvm/CodeGen/SelectionDAG.h" #include "llvm/Analysis/DebugInfo.h" #include "llvm/Target/TargetData.h" @@ -1811,8 +1810,8 @@ void SelectionDAGBuilder::visitInvoke(const InvokeInst &I) { CopyToExportRegsIfNeeded(&I); // Update successor info - InvokeMBB->addSuccessor(Return); - InvokeMBB->addSuccessor(LandingPad); + addSuccessorWithWeight(InvokeMBB, Return); + addSuccessorWithWeight(InvokeMBB, LandingPad); // Drop into normal successor. DAG.setRoot(DAG.getNode(ISD::BR, getCurDebugLoc(), diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp index 5cbce3f..8cecc17 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp @@ -476,8 +476,8 @@ void SelectionDAGISel::CodeGenAndEmitDAG() { #endif { BlockNumber = FuncInfo->MBB->getNumber(); - BlockName = MF->getFunction()->getNameStr() + ":" + - FuncInfo->MBB->getBasicBlock()->getNameStr(); + BlockName = MF->getFunction()->getName().str() + ":" + + FuncInfo->MBB->getBasicBlock()->getName().str(); } DEBUG(dbgs() << "Initial selection DAG: BB#" << BlockNumber << " '" << BlockName << "'\n"; CurDAG->dump()); @@ -892,13 +892,16 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) { FastIS->setLastLocalValue(0); } + unsigned NumFastIselRemaining = std::distance(Begin, End); // Do FastISel on as many instructions as possible. for (; BI != Begin; --BI) { const Instruction *Inst = llvm::prior(BI); // If we no longer require this instruction, skip it. - if (isFoldedOrDeadInstruction(Inst, FuncInfo)) + if (isFoldedOrDeadInstruction(Inst, FuncInfo)) { + --NumFastIselRemaining; continue; + } // Bottom-up: reset the insert pos at the top, after any local-value // instructions. @@ -906,6 +909,7 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) { // Try to select the instruction with FastISel. if (FastIS->SelectInstruction(Inst)) { + --NumFastIselRemaining; ++NumFastIselSuccess; // If fast isel succeeded, skip over all the folded instructions, and // then see if there is a load right before the selected instructions. @@ -918,15 +922,18 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) { } if (BeforeInst != Inst && isa<LoadInst>(BeforeInst) && BeforeInst->hasOneUse() && - TryToFoldFastISelLoad(cast<LoadInst>(BeforeInst), Inst, FastIS)) + TryToFoldFastISelLoad(cast<LoadInst>(BeforeInst), Inst, FastIS)) { // If we succeeded, don't re-select the load. BI = llvm::next(BasicBlock::const_iterator(BeforeInst)); + --NumFastIselRemaining; + ++NumFastIselSuccess; + } continue; } // Then handle certain instructions as single-LLVM-Instruction blocks. if (isa<CallInst>(Inst)) { - ++NumFastIselFailures; + if (EnableFastISelVerbose || EnableFastISelAbort) { dbgs() << "FastISel missed call: "; Inst->dump(); @@ -941,24 +948,30 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) { bool HadTailCall = false; SelectBasicBlock(Inst, BI, HadTailCall); + // Recompute NumFastIselRemaining as Selection DAG instruction + // selection may have handled the call, input args, etc. + unsigned RemainingNow = std::distance(Begin, BI); + NumFastIselFailures += NumFastIselRemaining - RemainingNow; + // If the call was emitted as a tail call, we're done with the block. if (HadTailCall) { --BI; break; } + NumFastIselRemaining = RemainingNow; continue; } if (isa<TerminatorInst>(Inst) && !isa<BranchInst>(Inst)) { // Don't abort, and use a different message for terminator misses. - ++NumFastIselFailures; + NumFastIselFailures += NumFastIselRemaining; if (EnableFastISelVerbose || EnableFastISelAbort) { dbgs() << "FastISel missed terminator: "; Inst->dump(); } } else { - ++NumFastIselFailures; + NumFastIselFailures += NumFastIselRemaining; if (EnableFastISelVerbose || EnableFastISelAbort) { dbgs() << "FastISel miss: "; Inst->dump(); diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp index cd1647b..a7cf089 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp @@ -19,7 +19,6 @@ #include "llvm/CodeGen/MachineConstantPool.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineModuleInfo.h" -#include "llvm/CodeGen/PseudoSourceValue.h" #include "llvm/Analysis/DebugInfo.h" #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Target/TargetMachine.h" @@ -148,7 +147,7 @@ std::string DOTGraphTraits<SelectionDAG*>::getNodeLabel(const SDNode *Node, void SelectionDAG::viewGraph(const std::string &Title) { // This code is only for debugging! #ifndef NDEBUG - ViewGraph(this, "dag." + getMachineFunction().getFunction()->getNameStr(), + ViewGraph(this, "dag." + getMachineFunction().getFunction()->getName(), false, Title); #else errs() << "SelectionDAG::viewGraph is only available in debug builds on " diff --git a/lib/CodeGen/ShrinkWrapping.cpp b/lib/CodeGen/ShrinkWrapping.cpp index 160f38f..13f269e 100644 --- a/lib/CodeGen/ShrinkWrapping.cpp +++ b/lib/CodeGen/ShrinkWrapping.cpp @@ -158,7 +158,7 @@ void PEI::initShrinkWrappingInfo() { // via --shrink-wrap-func=<funcname>. #ifndef NDEBUG if (ShrinkWrapFunc != "") { - std::string MFName = MF->getFunction()->getNameStr(); + std::string MFName = MF->getFunction()->getName().str(); ShrinkWrapThisFunction = (MFName == ShrinkWrapFunc); } #endif @@ -1045,7 +1045,7 @@ std::string PEI::getBasicBlockName(const MachineBasicBlock* MBB) { return ""; if (MBB->getBasicBlock()) - return MBB->getBasicBlock()->getNameStr(); + return MBB->getBasicBlock()->getName().str(); std::ostringstream name; name << "_MBB_" << MBB->getNumber(); diff --git a/lib/CodeGen/SjLjEHPrepare.cpp b/lib/CodeGen/SjLjEHPrepare.cpp index ac88441..c865192 100644 --- a/lib/CodeGen/SjLjEHPrepare.cpp +++ b/lib/CodeGen/SjLjEHPrepare.cpp @@ -30,6 +30,7 @@ #include "llvm/Support/Debug.h" #include "llvm/Support/IRBuilder.h" #include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" @@ -48,10 +49,10 @@ namespace { Constant *BuiltinSetjmpFn; Constant *FrameAddrFn; Constant *StackAddrFn; + Constant *StackRestoreFn; Constant *LSDAAddrFn; Value *PersonalityFn; Constant *CallSiteFn; - Constant *DispatchSetupFn; Constant *FuncCtxFn; Value *CallSite; public: @@ -107,11 +108,10 @@ bool SjLjEHPass::doInitialization(Module &M) { (Type *)0); FrameAddrFn = Intrinsic::getDeclaration(&M, Intrinsic::frameaddress); StackAddrFn = Intrinsic::getDeclaration(&M, Intrinsic::stacksave); + StackRestoreFn = Intrinsic::getDeclaration(&M, Intrinsic::stackrestore); BuiltinSetjmpFn = Intrinsic::getDeclaration(&M, Intrinsic::eh_sjlj_setjmp); LSDAAddrFn = Intrinsic::getDeclaration(&M, Intrinsic::eh_sjlj_lsda); CallSiteFn = Intrinsic::getDeclaration(&M, Intrinsic::eh_sjlj_callsite); - DispatchSetupFn - = Intrinsic::getDeclaration(&M, Intrinsic::eh_sjlj_dispatch_setup); FuncCtxFn = Intrinsic::getDeclaration(&M, Intrinsic::eh_sjlj_functioncontext); PersonalityFn = 0; @@ -365,13 +365,13 @@ void SjLjEHPass::lowerAcrossUnwindEdges(Function &F, bool SjLjEHPass::setupEntryBlockAndCallSites(Function &F) { SmallVector<ReturnInst*, 16> Returns; SmallVector<InvokeInst*, 16> Invokes; - SmallVector<LandingPadInst*, 16> LPads; + SmallSetVector<LandingPadInst*, 16> LPads; // Look through the terminators of the basic blocks to find invokes. for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) if (InvokeInst *II = dyn_cast<InvokeInst>(BB->getTerminator())) { Invokes.push_back(II); - LPads.push_back(II->getUnwindDest()->getLandingPadInst()); + LPads.insert(II->getUnwindDest()->getLandingPadInst()); } else if (ReturnInst *RI = dyn_cast<ReturnInst>(BB->getTerminator())) { Returns.push_back(RI); } @@ -383,7 +383,8 @@ bool SjLjEHPass::setupEntryBlockAndCallSites(Function &F) { lowerIncomingArguments(F); lowerAcrossUnwindEdges(F, Invokes); - Value *FuncCtx = setupFunctionContext(F, LPads); + Value *FuncCtx = + setupFunctionContext(F, makeArrayRef(LPads.begin(), LPads.end())); BasicBlock *EntryBB = F.begin(); Type *Int32Ty = Type::getInt32Ty(F.getContext()); @@ -460,6 +461,25 @@ bool SjLjEHPass::setupEntryBlockAndCallSites(Function &F) { EntryBB->getTerminator()); Register->setDoesNotThrow(); + // Following any allocas not in the entry block, update the saved SP in the + // jmpbuf to the new value. + for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) { + if (BB == F.begin()) + continue; + for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) { + if (CallInst *CI = dyn_cast<CallInst>(I)) { + if (CI->getCalledFunction() != StackRestoreFn) + continue; + } else if (!isa<AllocaInst>(I)) { + continue; + } + Instruction *StackAddr = CallInst::Create(StackAddrFn, "sp"); + StackAddr->insertAfter(I); + Instruction *StoreStackAddr = new StoreInst(StackAddr, StackPtr, true); + StoreStackAddr->insertAfter(StackAddr); + } + } + // Finally, for any returns from this function, if this function contains an // invoke, add a call to unregister the function context. for (unsigned I = 0, E = Returns.size(); I != E; ++I) diff --git a/lib/CodeGen/StackProtector.cpp b/lib/CodeGen/StackProtector.cpp index 1f0e5a2..43a6ad8 100644 --- a/lib/CodeGen/StackProtector.cpp +++ b/lib/CodeGen/StackProtector.cpp @@ -123,16 +123,11 @@ bool StackProtector::RequiresStackProtector() const { // protectors. return true; - if (ArrayType *AT = dyn_cast<ArrayType>(AI->getAllocatedType())) { - // We apparently only care about character arrays. - if (!AT->getElementType()->isIntegerTy(8)) - continue; - + if (ArrayType *AT = dyn_cast<ArrayType>(AI->getAllocatedType())) // If an array has more than SSPBufferSize bytes of allocated space, // then we emit stack protectors. if (SSPBufferSize <= TD->getTypeAllocSize(AT)) return true; - } } } diff --git a/lib/CodeGen/TargetInstrInfoImpl.cpp b/lib/CodeGen/TargetInstrInfoImpl.cpp index f32678f..08e2b16 100644 --- a/lib/CodeGen/TargetInstrInfoImpl.cpp +++ b/lib/CodeGen/TargetInstrInfoImpl.cpp @@ -296,8 +296,7 @@ TargetInstrInfo::foldMemoryOperand(MachineBasicBlock::iterator MI, const MachineFrameInfo &MFI = *MF.getFrameInfo(); assert(MFI.getObjectOffset(FI) != -1); MachineMemOperand *MMO = - MF.getMachineMemOperand( - MachinePointerInfo(PseudoSourceValue::getFixedStack(FI)), + MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(FI), Flags, MFI.getObjectSize(FI), MFI.getObjectAlignment(FI)); NewMI->addMemOperand(MF, MMO); diff --git a/lib/CodeGen/TargetLoweringObjectFileImpl.cpp b/lib/CodeGen/TargetLoweringObjectFileImpl.cpp index 3848f4d..c43e5b6 100644 --- a/lib/CodeGen/TargetLoweringObjectFileImpl.cpp +++ b/lib/CodeGen/TargetLoweringObjectFileImpl.cpp @@ -358,9 +358,9 @@ getExplicitSectionGlobal(const GlobalValue *GV, SectionKind Kind, TAA, TAAParsed, StubSize); if (!ErrorCode.empty()) { // If invalid, report the error with report_fatal_error. - report_fatal_error("Global variable '" + GV->getNameStr() + - "' has an invalid section specifier '" + GV->getSection()+ - "': " + ErrorCode + "."); + report_fatal_error("Global variable '" + GV->getName() + + "' has an invalid section specifier '" + + GV->getSection() + "': " + ErrorCode + "."); // Fall back to dropping it into the data section. return DataSection; } @@ -379,9 +379,9 @@ getExplicitSectionGlobal(const GlobalValue *GV, SectionKind Kind, // to reject it here. if (S->getTypeAndAttributes() != TAA || S->getStubSize() != StubSize) { // If invalid, report the error with report_fatal_error. - report_fatal_error("Global variable '" + GV->getNameStr() + - "' section type or attributes does not match previous" - " section specifier"); + report_fatal_error("Global variable '" + GV->getName() + + "' section type or attributes does not match previous" + " section specifier"); } return S; diff --git a/lib/CodeGen/TwoAddressInstructionPass.cpp b/lib/CodeGen/TwoAddressInstructionPass.cpp index 6796312..a2e8134 100644 --- a/lib/CodeGen/TwoAddressInstructionPass.cpp +++ b/lib/CodeGen/TwoAddressInstructionPass.cpp @@ -68,6 +68,7 @@ namespace { MachineRegisterInfo *MRI; LiveVariables *LV; AliasAnalysis *AA; + CodeGenOpt::Level OptLevel; // DistanceMap - Keep track the distance of a MI from the start of the // current basic block. @@ -571,6 +572,9 @@ bool TwoAddressInstructionPass::isProfitableToCommute(unsigned regB, unsigned regC, MachineInstr *MI, MachineBasicBlock *MBB, unsigned Dist) { + if (OptLevel == CodeGenOpt::None) + return false; + // Determine if it's profitable to commute this two address instruction. In // general, we want no uses between this instruction and the definition of // the two-address register. @@ -924,7 +928,7 @@ TwoAddressInstructionPass::RescheduleMIBelowKill(MachineBasicBlock *MBB, if (isTwoAddrUse(*KillMI, Reg, DstReg)) return false; - bool SeenStore; + bool SeenStore = true; if (!MI->isSafeToMove(TII, AA, SeenStore)) return false; @@ -933,6 +937,7 @@ TwoAddressInstructionPass::RescheduleMIBelowKill(MachineBasicBlock *MBB, return false; SmallSet<unsigned, 2> Uses; + SmallSet<unsigned, 2> Kills; SmallSet<unsigned, 2> Defs; for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { const MachineOperand &MO = MI->getOperand(i); @@ -943,8 +948,11 @@ TwoAddressInstructionPass::RescheduleMIBelowKill(MachineBasicBlock *MBB, continue; if (MO.isDef()) Defs.insert(MOReg); - else + else { Uses.insert(MOReg); + if (MO.isKill() && MOReg != Reg) + Kills.insert(MOReg); + } } // Move the copies connected to MI down as well. @@ -991,7 +999,8 @@ TwoAddressInstructionPass::RescheduleMIBelowKill(MachineBasicBlock *MBB, } else { if (Defs.count(MOReg)) return false; - if (MOReg != Reg && MO.isKill() && Uses.count(MOReg)) + if (MOReg != Reg && + ((MO.isKill() && Uses.count(MOReg)) || Kills.count(MOReg))) // Don't want to extend other live ranges and update kills. return false; } @@ -1071,7 +1080,7 @@ TwoAddressInstructionPass::RescheduleKillAboveMI(MachineBasicBlock *MBB, if (isTwoAddrUse(*KillMI, Reg, DstReg)) return false; - bool SeenStore; + bool SeenStore = true; if (!KillMI->isSafeToMove(TII, AA, SeenStore)) return false; @@ -1115,6 +1124,7 @@ TwoAddressInstructionPass::RescheduleKillAboveMI(MachineBasicBlock *MBB, MCID.isTerminator()) // Don't move pass calls, etc. return false; + SmallVector<unsigned, 2> OtherDefs; for (unsigned i = 0, e = OtherMI->getNumOperands(); i != e; ++i) { const MachineOperand &MO = OtherMI->getOperand(i); if (!MO.isReg()) @@ -1131,15 +1141,20 @@ TwoAddressInstructionPass::RescheduleKillAboveMI(MachineBasicBlock *MBB, // Don't want to extend other live ranges and update kills. return false; } else { - if (Uses.count(MOReg)) - return false; - if (TargetRegisterInfo::isPhysicalRegister(MOReg) && - LiveDefs.count(MOReg)) - return false; - // Physical register def is seen. - Defs.erase(MOReg); + OtherDefs.push_back(MOReg); } } + + for (unsigned i = 0, e = OtherDefs.size(); i != e; ++i) { + unsigned MOReg = OtherDefs[i]; + if (Uses.count(MOReg)) + return false; + if (TargetRegisterInfo::isPhysicalRegister(MOReg) && + LiveDefs.count(MOReg)) + return false; + // Physical register def is seen. + Defs.erase(MOReg); + } } // Move the old kill above MI, don't forget to move debug info as well. @@ -1152,7 +1167,7 @@ TwoAddressInstructionPass::RescheduleKillAboveMI(MachineBasicBlock *MBB, --From; MBB->splice(InsertPos, MBB, From, To); - nmi = llvm::prior(mi); // Backtrack so we process the moved instruction. + nmi = llvm::prior(InsertPos); // Backtrack so we process the moved instr. DistanceMap.erase(DI); if (LV) { @@ -1182,6 +1197,9 @@ TryInstructionTransform(MachineBasicBlock::iterator &mi, MachineFunction::iterator &mbbi, unsigned SrcIdx, unsigned DstIdx, unsigned Dist, SmallPtrSet<MachineInstr*, 8> &Processed) { + if (OptLevel == CodeGenOpt::None) + return false; + MachineInstr &MI = *mi; const MCInstrDesc &MCID = MI.getDesc(); unsigned regA = MI.getOperand(DstIdx).getReg(); @@ -1377,6 +1395,7 @@ bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &MF) { InstrItins = TM.getInstrItineraryData(); LV = getAnalysisIfAvailable<LiveVariables>(); AA = &getAnalysis<AliasAnalysis>(); + OptLevel = TM.getOptLevel(); bool MadeChange = false; diff --git a/lib/ExecutionEngine/Interpreter/ExternalFunctions.cpp b/lib/ExecutionEngine/Interpreter/ExternalFunctions.cpp index 055875c..1bcdbe2 100644 --- a/lib/ExecutionEngine/Interpreter/ExternalFunctions.cpp +++ b/lib/ExecutionEngine/Interpreter/ExternalFunctions.cpp @@ -94,15 +94,16 @@ static ExFunc lookupFunction(const Function *F) { FunctionType *FT = F->getFunctionType(); for (unsigned i = 0, e = FT->getNumContainedTypes(); i != e; ++i) ExtName += getTypeID(FT->getContainedType(i)); - ExtName + "_" + F->getNameStr(); + ExtName += "_" + F->getName().str(); sys::ScopedLock Writer(*FunctionsLock); ExFunc FnPtr = FuncNames[ExtName]; if (FnPtr == 0) - FnPtr = FuncNames["lle_X_" + F->getNameStr()]; + FnPtr = FuncNames["lle_X_" + F->getName().str()]; if (FnPtr == 0) // Try calling a generic function... if it exists... FnPtr = (ExFunc)(intptr_t) - sys::DynamicLibrary::SearchForAddressOfSymbol("lle_X_"+F->getNameStr()); + sys::DynamicLibrary::SearchForAddressOfSymbol("lle_X_" + + F->getName().str()); if (FnPtr != 0) ExportedFunctions->insert(std::make_pair(F, FnPtr)); // Cache for later return FnPtr; diff --git a/lib/ExecutionEngine/JIT/JITDebugRegisterer.cpp b/lib/ExecutionEngine/JIT/JITDebugRegisterer.cpp index e71c20b..2e90968 100644 --- a/lib/ExecutionEngine/JIT/JITDebugRegisterer.cpp +++ b/lib/ExecutionEngine/JIT/JITDebugRegisterer.cpp @@ -118,7 +118,7 @@ std::string JITDebugRegisterer::MakeELF(const Function *F, DebugInfo &I) { if (JITEmitDebugInfoToDisk) { std::string Filename; raw_string_ostream O2(Filename); - O2 << "/tmp/llvm_function_" << I.FnStart << "_" << F->getNameStr() << ".o"; + O2 << "/tmp/llvm_function_" << I.FnStart << "_" << F->getName() << ".o"; O2.flush(); std::string Errors; raw_fd_ostream O3(Filename.c_str(), Errors); diff --git a/lib/ExecutionEngine/MCJIT/MCJIT.cpp b/lib/ExecutionEngine/MCJIT/MCJIT.cpp index 7c8a740..d5f407d 100644 --- a/lib/ExecutionEngine/MCJIT/MCJIT.cpp +++ b/lib/ExecutionEngine/MCJIT/MCJIT.cpp @@ -64,7 +64,7 @@ MCJIT::MCJIT(Module *m, TargetMachine *tm, TargetJITInfo &tji, // Turn the machine code intermediate representation into bytes in memory // that may be executed. - if (TM->addPassesToEmitMC(PM, Ctx, OS, CodeGenOpt::Default, false)) { + if (TM->addPassesToEmitMC(PM, Ctx, OS, false)) { report_fatal_error("Target does not support MC emission!"); } diff --git a/lib/MC/ELFObjectWriter.cpp b/lib/MC/ELFObjectWriter.cpp index eee002a..bd28069 100644 --- a/lib/MC/ELFObjectWriter.cpp +++ b/lib/MC/ELFObjectWriter.cpp @@ -1579,8 +1579,8 @@ unsigned PPCELFObjectWriter::GetRelocType(const MCValue &Target, return Type; } -void -PPCELFObjectWriter::adjustFixupOffset(const MCFixup &Fixup, uint64_t &RelocOffset) { +void PPCELFObjectWriter:: +adjustFixupOffset(const MCFixup &Fixup, uint64_t &RelocOffset) { switch ((unsigned)Fixup.getKind()) { case PPC::fixup_ppc_ha16: case PPC::fixup_ppc_lo16: @@ -1825,6 +1825,12 @@ MipsELFObjectWriter::MipsELFObjectWriter(MCELFObjectTargetWriter *MOTW, MipsELFObjectWriter::~MipsELFObjectWriter() {} +// FIXME: get the real EABI Version from the Triple. +void MipsELFObjectWriter::WriteEFlags() { + Write32(ELF::EF_MIPS_NOREORDER | + ELF::EF_MIPS_ARCH_32R2); +} + unsigned MipsELFObjectWriter::GetRelocType(const MCValue &Target, const MCFixup &Fixup, bool IsPCRel, @@ -1840,6 +1846,9 @@ unsigned MipsELFObjectWriter::GetRelocType(const MCValue &Target, case FK_Data_4: Type = ELF::R_MIPS_32; break; + case FK_GPRel_4: + Type = ELF::R_MIPS_GPREL32; + break; case Mips::fixup_Mips_GPREL16: Type = ELF::R_MIPS_GPREL16; break; diff --git a/lib/MC/ELFObjectWriter.h b/lib/MC/ELFObjectWriter.h index 862b085..7838206 100644 --- a/lib/MC/ELFObjectWriter.h +++ b/lib/MC/ELFObjectWriter.h @@ -240,33 +240,38 @@ class ELFObjectWriter : public MCObjectWriter { F.getContents() += StringRef(buf, 8); } - virtual void WriteHeader(uint64_t SectionDataSize, unsigned NumberOfSections); + virtual void WriteHeader(uint64_t SectionDataSize, + unsigned NumberOfSections); /// Default e_flags = 0 virtual void WriteEFlags() { Write32(0); } - virtual void WriteSymbolEntry(MCDataFragment *SymtabF, MCDataFragment *ShndxF, - uint64_t name, uint8_t info, - uint64_t value, uint64_t size, - uint8_t other, uint32_t shndx, - bool Reserved); + virtual void WriteSymbolEntry(MCDataFragment *SymtabF, + MCDataFragment *ShndxF, + uint64_t name, uint8_t info, + uint64_t value, uint64_t size, + uint8_t other, uint32_t shndx, + bool Reserved); virtual void WriteSymbol(MCDataFragment *SymtabF, MCDataFragment *ShndxF, ELFSymbolData &MSD, const MCAsmLayout &Layout); typedef DenseMap<const MCSectionELF*, uint32_t> SectionIndexMapTy; - virtual void WriteSymbolTable(MCDataFragment *SymtabF, MCDataFragment *ShndxF, - const MCAssembler &Asm, - const MCAsmLayout &Layout, - const SectionIndexMapTy &SectionIndexMap); - - virtual void RecordRelocation(const MCAssembler &Asm, const MCAsmLayout &Layout, - const MCFragment *Fragment, const MCFixup &Fixup, + virtual void WriteSymbolTable(MCDataFragment *SymtabF, + MCDataFragment *ShndxF, + const MCAssembler &Asm, + const MCAsmLayout &Layout, + const SectionIndexMapTy &SectionIndexMap); + + virtual void RecordRelocation(const MCAssembler &Asm, + const MCAsmLayout &Layout, + const MCFragment *Fragment, + const MCFixup &Fixup, MCValue Target, uint64_t &FixedValue); virtual uint64_t getSymbolIndexInSymbolTable(const MCAssembler &Asm, - const MCSymbol *S); + const MCSymbol *S); // Map from a group section to the signature symbol typedef DenseMap<const MCSectionELF*, const MCSymbol*> GroupMapTy; @@ -347,7 +352,8 @@ class ELFObjectWriter : public MCObjectWriter { virtual unsigned GetRelocType(const MCValue &Target, const MCFixup &Fixup, bool IsPCRel, bool IsRelocWithSymbol, int64_t Addend) = 0; - virtual void adjustFixupOffset(const MCFixup &Fixup, uint64_t &RelocOffset) { } + virtual void adjustFixupOffset(const MCFixup &Fixup, + uint64_t &RelocOffset) {} }; //===- X86ELFObjectWriter -------------------------------------------===// @@ -436,6 +442,8 @@ class ELFObjectWriter : public MCObjectWriter { bool IsLittleEndian); virtual ~MipsELFObjectWriter(); + virtual void WriteEFlags(); + protected: virtual unsigned GetRelocType(const MCValue &Target, const MCFixup &Fixup, bool IsPCRel, bool IsRelocWithSymbol, diff --git a/lib/MC/MCAsmBackend.cpp b/lib/MC/MCAsmBackend.cpp index 2c150f4..936ed55 100644 --- a/lib/MC/MCAsmBackend.cpp +++ b/lib/MC/MCAsmBackend.cpp @@ -21,14 +21,18 @@ MCAsmBackend::~MCAsmBackend() { const MCFixupKindInfo & MCAsmBackend::getFixupKindInfo(MCFixupKind Kind) const { static const MCFixupKindInfo Builtins[] = { - { "FK_Data_1", 0, 8, 0 }, - { "FK_Data_2", 0, 16, 0 }, - { "FK_Data_4", 0, 32, 0 }, - { "FK_Data_8", 0, 64, 0 }, - { "FK_PCRel_1", 0, 8, MCFixupKindInfo::FKF_IsPCRel }, + { "FK_Data_1", 0, 8, 0 }, + { "FK_Data_2", 0, 16, 0 }, + { "FK_Data_4", 0, 32, 0 }, + { "FK_Data_8", 0, 64, 0 }, + { "FK_PCRel_1", 0, 8, MCFixupKindInfo::FKF_IsPCRel }, { "FK_PCRel_2", 0, 16, MCFixupKindInfo::FKF_IsPCRel }, { "FK_PCRel_4", 0, 32, MCFixupKindInfo::FKF_IsPCRel }, - { "FK_PCRel_8", 0, 64, MCFixupKindInfo::FKF_IsPCRel } + { "FK_PCRel_8", 0, 64, MCFixupKindInfo::FKF_IsPCRel }, + { "FK_GPRel_1", 0, 8, 0 }, + { "FK_GPRel_2", 0, 16, 0 }, + { "FK_GPRel_4", 0, 32, 0 }, + { "FK_GPRel_8", 0, 64, 0 } }; assert((size_t)Kind <= sizeof(Builtins) / sizeof(Builtins[0]) && diff --git a/lib/MC/MCCodeGenInfo.cpp b/lib/MC/MCCodeGenInfo.cpp index 236e7de..d9dcfd0 100644 --- a/lib/MC/MCCodeGenInfo.cpp +++ b/lib/MC/MCCodeGenInfo.cpp @@ -15,7 +15,9 @@ #include "llvm/MC/MCCodeGenInfo.h" using namespace llvm; -void MCCodeGenInfo::InitMCCodeGenInfo(Reloc::Model RM, CodeModel::Model CM) { +void MCCodeGenInfo::InitMCCodeGenInfo(Reloc::Model RM, CodeModel::Model CM, + CodeGenOpt::Level OL) { RelocationModel = RM; CMModel = CM; + OptLevel = OL; } diff --git a/lib/MC/MCELFStreamer.cpp b/lib/MC/MCELFStreamer.cpp index 0b366da..0ea3c64 100644 --- a/lib/MC/MCELFStreamer.cpp +++ b/lib/MC/MCELFStreamer.cpp @@ -205,10 +205,10 @@ void MCELFStreamer::EmitCommonSymbol(MCSymbol *Symbol, uint64_t Size, if (MCELF::GetBinding(SD) == ELF_STB_Local) { const MCSection *Section = getAssembler().getContext().getELFSection(".bss", - ELF::SHT_NOBITS, - ELF::SHF_WRITE | - ELF::SHF_ALLOC, - SectionKind::getBSS()); + ELF::SHT_NOBITS, + ELF::SHF_WRITE | + ELF::SHF_ALLOC, + SectionKind::getBSS()); Symbol->setSection(*Section); struct LocalCommon L = {&SD, Size, ByteAlignment}; diff --git a/lib/MC/MCExpr.cpp b/lib/MC/MCExpr.cpp index 4af27ab..da297fb 100644 --- a/lib/MC/MCExpr.cpp +++ b/lib/MC/MCExpr.cpp @@ -26,38 +26,6 @@ STATISTIC(MCExprEvaluate, "Number of MCExpr evaluations"); } } -static bool printMipsSymbolRef(const MCSymbolRefExpr &SRE, - const MCSymbol &Sym, raw_ostream &OS) { - MCSymbolRefExpr::VariantKind Kind= SRE.getKind(); - - switch (Kind) { - default: - return false; - case MCSymbolRefExpr::VK_Mips_None: break; - case MCSymbolRefExpr::VK_Mips_GPREL: OS << "%gp_rel("; break; - case MCSymbolRefExpr::VK_Mips_GOT_CALL: OS << "%call16("; break; - case MCSymbolRefExpr::VK_Mips_GOT: OS << "%got("; break; - case MCSymbolRefExpr::VK_Mips_ABS_HI: OS << "%hi("; break; - case MCSymbolRefExpr::VK_Mips_ABS_LO: OS << "%lo("; break; - case MCSymbolRefExpr::VK_Mips_TLSGD: OS << "%tlsgd("; break; - case MCSymbolRefExpr::VK_Mips_GOTTPREL: OS << "%gottprel("; break; - case MCSymbolRefExpr::VK_Mips_TPREL_HI: OS << "%tprel_hi("; break; - case MCSymbolRefExpr::VK_Mips_TPREL_LO: OS << "%tprel_lo("; break; - case MCSymbolRefExpr::VK_Mips_GPOFF_HI: OS << "%hi(%neg(%gp_rel("; break; - case MCSymbolRefExpr::VK_Mips_GPOFF_LO: OS << "%lo(%neg(%gp_rel("; break; - case MCSymbolRefExpr::VK_Mips_GOT_DISP: OS << "%got_disp("; break; - case MCSymbolRefExpr::VK_Mips_GOT_PAGE: OS << "%got_page("; break; - case MCSymbolRefExpr::VK_Mips_GOT_OFST: OS << "%got_ofst("; break; - } - - OS << Sym; - - if (Kind != MCSymbolRefExpr::VK_Mips_None) - OS << ')'; - - return true; -} - void MCExpr::print(raw_ostream &OS) const { switch (getKind()) { case MCExpr::Target: @@ -73,9 +41,6 @@ void MCExpr::print(raw_ostream &OS) const { // absolute names. bool UseParens = Sym.getName()[0] == '$'; - if (printMipsSymbolRef(SRE, Sym, OS)) - return; - if (SRE.getKind() == MCSymbolRefExpr::VK_PPC_DARWIN_HA16 || SRE.getKind() == MCSymbolRefExpr::VK_PPC_DARWIN_LO16) { OS << MCSymbolRefExpr::getVariantKindName(SRE.getKind()); diff --git a/lib/MC/MCModule.cpp b/lib/MC/MCModule.cpp index b1d09d9..f563160 100644 --- a/lib/MC/MCModule.cpp +++ b/lib/MC/MCModule.cpp @@ -1,4 +1,4 @@ -//===- lib/MC/MCModule.cpp - MCModule implementation --------------------------===// +//===- lib/MC/MCModule.cpp - MCModule implementation ----------------------===// // // The LLVM Compiler Infrastructure // diff --git a/lib/MC/MCObjectFileInfo.cpp b/lib/MC/MCObjectFileInfo.cpp index d76e48b..7d23541 100644 --- a/lib/MC/MCObjectFileInfo.cpp +++ b/lib/MC/MCObjectFileInfo.cpp @@ -56,8 +56,8 @@ void MCObjectFileInfo::InitMachOMCObjectFileInfo(Triple T) { TLSThreadInitSection = Ctx->getMachOSection("__DATA", "__thread_init", - MCSectionMachO::S_THREAD_LOCAL_INIT_FUNCTION_POINTERS, - SectionKind::getDataRel()); + MCSectionMachO::S_THREAD_LOCAL_INIT_FUNCTION_POINTERS, + SectionKind::getDataRel()); CStringSection // .cstring = Ctx->getMachOSection("__TEXT", "__cstring", @@ -221,8 +221,8 @@ void MCObjectFileInfo::InitMachOMCObjectFileInfo(Triple T) { void MCObjectFileInfo::InitELFMCObjectFileInfo(Triple T) { if (T.getArch() == Triple::x86) { PersonalityEncoding = (RelocM == Reloc::PIC_) - ? dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4 - : dwarf::DW_EH_PE_absptr; + ? dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4 + : dwarf::DW_EH_PE_absptr; LSDAEncoding = (RelocM == Reloc::PIC_) ? dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4 : dwarf::DW_EH_PE_absptr; @@ -230,8 +230,8 @@ void MCObjectFileInfo::InitELFMCObjectFileInfo(Triple T) { ? dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4 : dwarf::DW_EH_PE_absptr; TTypeEncoding = (RelocM == Reloc::PIC_) - ? dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4 - : dwarf::DW_EH_PE_absptr; + ? dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4 + : dwarf::DW_EH_PE_absptr; } else if (T.getArch() == Triple::x86_64) { FDECFIEncoding = dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4; diff --git a/lib/MC/MCObjectStreamer.cpp b/lib/MC/MCObjectStreamer.cpp index a04ae08..90c957f 100644 --- a/lib/MC/MCObjectStreamer.cpp +++ b/lib/MC/MCObjectStreamer.cpp @@ -245,6 +245,16 @@ void MCObjectStreamer::EmitValueToOffset(const MCExpr *Offset, EmitFill(Res, Value, 0); } +// Associate GPRel32 fixup with data and resize data area +void MCObjectStreamer::EmitGPRel32Value(const MCExpr *Value) { + MCDataFragment *DF = getOrCreateDataFragment(); + + DF->addFixup(MCFixup::Create(DF->getContents().size(), + Value, + FK_GPRel_4)); + DF->getContents().resize(DF->getContents().size() + 4, 0); +} + void MCObjectStreamer::Finish() { // Dump out the dwarf file & directory tables and line tables. if (getContext().hasDwarfFiles()) diff --git a/lib/Object/COFFObjectFile.cpp b/lib/Object/COFFObjectFile.cpp index 2abfb44..c6ce562 100644 --- a/lib/Object/COFFObjectFile.cpp +++ b/lib/Object/COFFObjectFile.cpp @@ -226,7 +226,9 @@ error_code COFFObjectFile::getSymbolNMTypeChar(DataRefImpl Symb, if (symb->StorageClass == COFF::IMAGE_SYM_CLASS_WEAK_EXTERNAL) { Result = 'w'; return object_error::success; // Don't do ::toupper. - } else + } else if (symb->Value != 0) // Check for common symbols. + ret = 'c'; + else ret = 'u'; break; case COFF::IMAGE_SYM_ABSOLUTE: diff --git a/lib/Support/APInt.cpp b/lib/Support/APInt.cpp index 3774c52..55cb433 100644 --- a/lib/Support/APInt.cpp +++ b/lib/Support/APInt.cpp @@ -870,30 +870,43 @@ unsigned APInt::countPopulationSlowCase() const { return Count; } +/// Perform a logical right-shift from Src to Dst, which must be equal or +/// non-overlapping, of Words words, by Shift, which must be less than 64. +static void lshrNear(uint64_t *Dst, uint64_t *Src, unsigned Words, + unsigned Shift) { + uint64_t Carry = 0; + for (int I = Words - 1; I >= 0; --I) { + uint64_t Tmp = Src[I]; + Dst[I] = (Tmp >> Shift) | Carry; + Carry = Tmp << (64 - Shift); + } +} + APInt APInt::byteSwap() const { assert(BitWidth >= 16 && BitWidth % 16 == 0 && "Cannot byteswap!"); if (BitWidth == 16) return APInt(BitWidth, ByteSwap_16(uint16_t(VAL))); - else if (BitWidth == 32) + if (BitWidth == 32) return APInt(BitWidth, ByteSwap_32(unsigned(VAL))); - else if (BitWidth == 48) { + if (BitWidth == 48) { unsigned Tmp1 = unsigned(VAL >> 16); Tmp1 = ByteSwap_32(Tmp1); uint16_t Tmp2 = uint16_t(VAL); Tmp2 = ByteSwap_16(Tmp2); return APInt(BitWidth, (uint64_t(Tmp2) << 32) | Tmp1); - } else if (BitWidth == 64) + } + if (BitWidth == 64) return APInt(BitWidth, ByteSwap_64(VAL)); - else { - APInt Result(BitWidth, 0); - char *pByte = (char*)Result.pVal; - for (unsigned i = 0; i < BitWidth / APINT_WORD_SIZE / 2; ++i) { - char Tmp = pByte[i]; - pByte[i] = pByte[BitWidth / APINT_WORD_SIZE - 1 - i]; - pByte[BitWidth / APINT_WORD_SIZE - i - 1] = Tmp; - } - return Result; + + APInt Result(getNumWords() * APINT_BITS_PER_WORD, 0); + for (unsigned I = 0, N = getNumWords(); I != N; ++I) + Result.pVal[I] = ByteSwap_64(pVal[N - I - 1]); + if (Result.BitWidth != BitWidth) { + lshrNear(Result.pVal, Result.pVal, getNumWords(), + Result.BitWidth - BitWidth); + Result.BitWidth = BitWidth; } + return Result; } APInt llvm::APIntOps::GreatestCommonDivisor(const APInt& API1, @@ -1232,11 +1245,7 @@ APInt APInt::lshr(unsigned shiftAmt) const { // If we are shifting less than a word, compute the shift with a simple carry if (shiftAmt < APINT_BITS_PER_WORD) { - uint64_t carry = 0; - for (int i = getNumWords()-1; i >= 0; --i) { - val[i] = (pVal[i] >> shiftAmt) | carry; - carry = pVal[i] << (APINT_BITS_PER_WORD - shiftAmt); - } + lshrNear(val, pVal, getNumWords(), shiftAmt); return APInt(val, BitWidth).clearUnusedBits(); } diff --git a/lib/Support/ConstantRange.cpp b/lib/Support/ConstantRange.cpp index c29cb53..5743479 100644 --- a/lib/Support/ConstantRange.cpp +++ b/lib/Support/ConstantRange.cpp @@ -466,10 +466,8 @@ ConstantRange ConstantRange::signExtend(uint32_t DstTySize) const { /// correspond to the possible range of values as if the source range had been /// truncated to the specified type. ConstantRange ConstantRange::truncate(uint32_t DstTySize) const { - unsigned SrcTySize = getBitWidth(); - assert(SrcTySize > DstTySize && "Not a value truncation"); - APInt Size(APInt::getLowBitsSet(SrcTySize, DstTySize)); - if (isFullSet() || getSetSize().ugt(Size)) + assert(getBitWidth() > DstTySize && "Not a value truncation"); + if (isFullSet() || getSetSize().getActiveBits() > DstTySize) return ConstantRange(DstTySize, /*isFullSet=*/true); return ConstantRange(Lower.trunc(DstTySize), Upper.trunc(DstTySize)); diff --git a/lib/Support/MemoryBuffer.cpp b/lib/Support/MemoryBuffer.cpp index cc3f6a8..1a40972 100644 --- a/lib/Support/MemoryBuffer.cpp +++ b/lib/Support/MemoryBuffer.cpp @@ -14,6 +14,7 @@ #include "llvm/Support/MemoryBuffer.h" #include "llvm/ADT/OwningPtr.h" #include "llvm/ADT/SmallString.h" +#include "llvm/Config/config.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/Errno.h" #include "llvm/Support/Path.h" @@ -320,23 +321,24 @@ error_code MemoryBuffer::getOpenFile(int FD, const char *Filename, char *BufPtr = const_cast<char*>(SB->getBufferStart()); size_t BytesLeft = MapSize; +#ifndef HAVE_PREAD if (lseek(FD, Offset, SEEK_SET) == -1) return error_code(errno, posix_category()); +#endif while (BytesLeft) { +#ifdef HAVE_PREAD + ssize_t NumRead = ::pread(FD, BufPtr, BytesLeft, MapSize-BytesLeft+Offset); +#else ssize_t NumRead = ::read(FD, BufPtr, BytesLeft); +#endif if (NumRead == -1) { if (errno == EINTR) continue; // Error while reading. return error_code(errno, posix_category()); - } else if (NumRead == 0) { - // We hit EOF early, truncate and terminate buffer. - Buf->BufferEnd = BufPtr; - *BufPtr = 0; - result.swap(SB); - return success; } + assert(NumRead != 0 && "fstat reported an invalid file size."); BytesLeft -= NumRead; BufPtr += NumRead; } diff --git a/lib/Target/ARM/ARMBaseInstrInfo.cpp b/lib/Target/ARM/ARMBaseInstrInfo.cpp index 7a7267a..9315348 100644 --- a/lib/Target/ARM/ARMBaseInstrInfo.cpp +++ b/lib/Target/ARM/ARMBaseInstrInfo.cpp @@ -28,7 +28,6 @@ #include "llvm/CodeGen/MachineJumpTableInfo.h" #include "llvm/CodeGen/MachineMemOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/CodeGen/PseudoSourceValue.h" #include "llvm/CodeGen/SelectionDAGNodes.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/Support/BranchProbability.h" @@ -47,7 +46,7 @@ EnableARM3Addr("enable-arm-3-addr-conv", cl::Hidden, cl::desc("Enable ARM 2-addr to 3-addr conv")); static cl::opt<bool> -WidenVMOVS("widen-vmovs", cl::Hidden, +WidenVMOVS("widen-vmovs", cl::Hidden, cl::init(true), cl::desc("Widen ARM vmovs to vmovd when possible")); /// ARM_MLxEntry - Record information about MLA / MLS instructions. @@ -710,8 +709,7 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, unsigned Align = MFI.getObjectAlignment(FI); MachineMemOperand *MMO = - MF.getMachineMemOperand(MachinePointerInfo( - PseudoSourceValue::getFixedStack(FI)), + MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(FI), MachineMemOperand::MOStore, MFI.getObjectSize(FI), Align); @@ -862,7 +860,7 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, unsigned Align = MFI.getObjectAlignment(FI); MachineMemOperand *MMO = MF.getMachineMemOperand( - MachinePointerInfo(PseudoSourceValue::getFixedStack(FI)), + MachinePointerInfo::getFixedStack(FI), MachineMemOperand::MOLoad, MFI.getObjectSize(FI), Align); diff --git a/lib/Target/ARM/ARMExpandPseudoInsts.cpp b/lib/Target/ARM/ARMExpandPseudoInsts.cpp index fb7d96a..fc464ea 100644 --- a/lib/Target/ARM/ARMExpandPseudoInsts.cpp +++ b/lib/Target/ARM/ARMExpandPseudoInsts.cpp @@ -824,7 +824,7 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB, MI.eraseFromParent(); return true; } - case ARM::Int_eh_sjlj_dispatchsetup: { + case ARM::eh_sjlj_dispatchsetup: { MachineFunction &MF = *MI.getParent()->getParent(); const ARMBaseInstrInfo *AII = static_cast<const ARMBaseInstrInfo*>(TII); diff --git a/lib/Target/ARM/ARMFastISel.cpp b/lib/Target/ARM/ARMFastISel.cpp index 4df084f..9bae422 100644 --- a/lib/Target/ARM/ARMFastISel.cpp +++ b/lib/Target/ARM/ARMFastISel.cpp @@ -37,7 +37,6 @@ #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineMemOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/CodeGen/PseudoSourceValue.h" #include "llvm/Support/CallSite.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/ErrorHandling.h" @@ -197,8 +196,6 @@ class ARMFastISel : public FastISel { // Call handling routines. private: - bool FastEmitExtend(ISD::NodeType Opc, EVT DstVT, unsigned Src, EVT SrcVT, - unsigned &ResultReg); CCAssignFn *CCAssignFnForCall(CallingConv::ID CC, bool Return); bool ProcessCallArgs(SmallVectorImpl<Value*> &Args, SmallVectorImpl<unsigned> &ArgRegs, @@ -687,6 +684,8 @@ unsigned ARMFastISel::TargetMaterializeConstant(const Constant *C) { return 0; } +// TODO: unsigned ARMFastISel::TargetMaterializeFloatZero(const ConstantFP *CF); + unsigned ARMFastISel::TargetMaterializeAlloca(const AllocaInst *AI) { // Don't handle dynamic allocas. if (!FuncInfo.StaticAllocaMap.count(AI)) return 0; @@ -1115,7 +1114,7 @@ bool ARMFastISel::ARMEmitStore(EVT VT, unsigned SrcReg, Address &Addr) { // Create the base instruction, then add the operands. MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(StrOpc)) - .addReg(SrcReg, getKillRegState(true)); + .addReg(SrcReg); AddLoadStoreOperands(VT, Addr, MIB, MachineMemOperand::MOStore, useAM3); return true; } @@ -1304,6 +1303,8 @@ bool ARMFastISel::ARMEmitCmp(const Value *Src1Value, const Value *Src2Value, int Imm = 0; bool UseImm = false; bool isNegativeImm = false; + // FIXME: At -O0 we don't have anything that canonicalizes operand order. + // Thus, Src1Value may be a ConstantInt, but we're missing it. if (const ConstantInt *ConstInt = dyn_cast<ConstantInt>(Src2Value)) { if (SrcVT == MVT::i32 || SrcVT == MVT::i16 || SrcVT == MVT::i8 || SrcVT == MVT::i1) { @@ -1669,12 +1670,6 @@ bool ARMFastISel::SelectBinaryOp(const Instruction *I, unsigned ISDOpcode) { if (isFloat && !Subtarget->hasVFP2()) return false; - unsigned Op1 = getRegForValue(I->getOperand(0)); - if (Op1 == 0) return false; - - unsigned Op2 = getRegForValue(I->getOperand(1)); - if (Op2 == 0) return false; - unsigned Opc; bool is64bit = VT == MVT::f64 || VT == MVT::i64; switch (ISDOpcode) { @@ -1689,6 +1684,12 @@ bool ARMFastISel::SelectBinaryOp(const Instruction *I, unsigned ISDOpcode) { Opc = is64bit ? ARM::VMULD : ARM::VMULS; break; } + unsigned Op1 = getRegForValue(I->getOperand(0)); + if (Op1 == 0) return false; + + unsigned Op2 = getRegForValue(I->getOperand(1)); + if (Op2 == 0) return false; + unsigned ResultReg = createResultReg(TLI.getRegClassFor(VT)); AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), ResultReg) @@ -1699,18 +1700,6 @@ bool ARMFastISel::SelectBinaryOp(const Instruction *I, unsigned ISDOpcode) { // Call Handling Code -bool ARMFastISel::FastEmitExtend(ISD::NodeType Opc, EVT DstVT, unsigned Src, - EVT SrcVT, unsigned &ResultReg) { - unsigned RR = FastEmit_r(SrcVT.getSimpleVT(), DstVT.getSimpleVT(), Opc, - Src, /*TODO: Kill=*/false); - - if (RR != 0) { - ResultReg = RR; - return true; - } else - return false; -} - // This is largely taken directly from CCAssignFnForNode - we don't support // varargs in FastISel so that part has been removed. // TODO: We may not support all of this. @@ -2119,9 +2108,6 @@ bool ARMFastISel::SelectCall(const Instruction *I, if (IntrMemName && e-i <= 2) break; - unsigned Arg = getRegForValue(*i); - if (Arg == 0) - return false; ISD::ArgFlagsTy Flags; unsigned AttrInd = i - CS.arg_begin() + 1; if (CS.paramHasAttr(AttrInd, Attribute::SExt)) @@ -2141,6 +2127,11 @@ bool ARMFastISel::SelectCall(const Instruction *I, if (!isTypeLegal(ArgTy, ArgVT) && ArgVT != MVT::i16 && ArgVT != MVT::i8 && ArgVT != MVT::i1) return false; + + unsigned Arg = getRegForValue(*i); + if (Arg == 0) + return false; + unsigned OriginalAlignment = TD.getABITypeAlignment(ArgTy); Flags.setOrigAlign(OriginalAlignment); diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp index b55ef70..8c4c06f 100644 --- a/lib/Target/ARM/ARMISelLowering.cpp +++ b/lib/Target/ARM/ARMISelLowering.cpp @@ -40,7 +40,6 @@ #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/CodeGen/PseudoSourceValue.h" #include "llvm/CodeGen/SelectionDAG.h" #include "llvm/MC/MCSectionMachO.h" #include "llvm/Target/TargetOptions.h" @@ -687,7 +686,6 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) if (Subtarget->isTargetDarwin()) { setOperationAction(ISD::EH_SJLJ_SETJMP, MVT::i32, Custom); setOperationAction(ISD::EH_SJLJ_LONGJMP, MVT::Other, Custom); - setOperationAction(ISD::EH_SJLJ_DISPATCHSETUP, MVT::Other, Custom); setLibcallName(RTLIB::UNWIND_RESUME, "_Unwind_SjLj_Resume"); } @@ -864,7 +862,6 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const { case ARMISD::EH_SJLJ_SETJMP: return "ARMISD::EH_SJLJ_SETJMP"; case ARMISD::EH_SJLJ_LONGJMP:return "ARMISD::EH_SJLJ_LONGJMP"; - case ARMISD::EH_SJLJ_DISPATCHSETUP:return "ARMISD::EH_SJLJ_DISPATCHSETUP"; case ARMISD::TC_RETURN: return "ARMISD::TC_RETURN"; @@ -912,6 +909,7 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const { case ARMISD::VGETLANEs: return "ARMISD::VGETLANEs"; case ARMISD::VMOVIMM: return "ARMISD::VMOVIMM"; case ARMISD::VMVNIMM: return "ARMISD::VMVNIMM"; + case ARMISD::VMOVFPIMM: return "ARMISD::VMOVFPIMM"; case ARMISD::VDUP: return "ARMISD::VDUP"; case ARMISD::VDUPLANE: return "ARMISD::VDUPLANE"; case ARMISD::VEXT: return "ARMISD::VEXT"; @@ -2212,14 +2210,6 @@ SDValue ARMTargetLowering::LowerGLOBAL_OFFSET_TABLE(SDValue Op, } SDValue -ARMTargetLowering::LowerEH_SJLJ_DISPATCHSETUP(SDValue Op, SelectionDAG &DAG) - const { - DebugLoc dl = Op.getDebugLoc(); - return DAG.getNode(ARMISD::EH_SJLJ_DISPATCHSETUP, dl, MVT::Other, - Op.getOperand(0), Op.getOperand(1)); -} - -SDValue ARMTargetLowering::LowerEH_SJLJ_SETJMP(SDValue Op, SelectionDAG &DAG) const { DebugLoc dl = Op.getDebugLoc(); SDValue Val = DAG.getConstant(0, MVT::i32); @@ -3986,6 +3976,16 @@ SDValue ARMTargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG, SDValue Vmov = DAG.getNode(ARMISD::VMVNIMM, dl, VmovVT, Val); return DAG.getNode(ISD::BITCAST, dl, VT, Vmov); } + + // Use vmov.f32 to materialize other v2f32 and v4f32 splats. + if (VT == MVT::v2f32 || VT == MVT::v4f32) { + ConstantFPSDNode *C = cast<ConstantFPSDNode>(Op.getOperand(0)); + int ImmVal = ARM_AM::getFP32Imm(C->getValueAPF()); + if (ImmVal != -1) { + SDValue Val = DAG.getTargetConstant(ImmVal, MVT::i32); + return DAG.getNode(ARMISD::VMOVFPIMM, dl, VT, Val); + } + } } } @@ -5014,7 +5014,6 @@ SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { case ISD::GLOBAL_OFFSET_TABLE: return LowerGLOBAL_OFFSET_TABLE(Op, DAG); case ISD::EH_SJLJ_SETJMP: return LowerEH_SJLJ_SETJMP(Op, DAG); case ISD::EH_SJLJ_LONGJMP: return LowerEH_SJLJ_LONGJMP(Op, DAG); - case ISD::EH_SJLJ_DISPATCHSETUP: return LowerEH_SJLJ_DISPATCHSETUP(Op, DAG); case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG, Subtarget); case ISD::BITCAST: return ExpandBITCAST(Op.getNode(), DAG); @@ -5556,52 +5555,6 @@ ARMTargetLowering::EmitAtomicBinary64(MachineInstr *MI, MachineBasicBlock *BB, return BB; } -/// EmitBasePointerRecalculation - For functions using a base pointer, we -/// rematerialize it (via the frame pointer). -void ARMTargetLowering:: -EmitBasePointerRecalculation(MachineInstr *MI, MachineBasicBlock *MBB, - MachineBasicBlock *DispatchBB) const { - const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); - const ARMBaseInstrInfo *AII = static_cast<const ARMBaseInstrInfo*>(TII); - MachineFunction &MF = *MI->getParent()->getParent(); - ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); - const ARMBaseRegisterInfo &RI = AII->getRegisterInfo(); - - if (!RI.hasBasePointer(MF)) return; - - MachineBasicBlock::iterator MBBI = MI; - - int32_t NumBytes = AFI->getFramePtrSpillOffset(); - unsigned FramePtr = RI.getFrameRegister(MF); - assert(MF.getTarget().getFrameLowering()->hasFP(MF) && - "Base pointer without frame pointer?"); - - if (AFI->isThumb2Function()) - llvm::emitT2RegPlusImmediate(*MBB, MBBI, MI->getDebugLoc(), ARM::R6, - FramePtr, -NumBytes, ARMCC::AL, 0, *AII); - else if (AFI->isThumbFunction()) - llvm::emitThumbRegPlusImmediate(*MBB, MBBI, MI->getDebugLoc(), ARM::R6, - FramePtr, -NumBytes, *AII, RI); - else - llvm::emitARMRegPlusImmediate(*MBB, MBBI, MI->getDebugLoc(), ARM::R6, - FramePtr, -NumBytes, ARMCC::AL, 0, *AII); - - if (!RI.needsStackRealignment(MF)) return; - - // If there's dynamic realignment, adjust for it. - MachineFrameInfo *MFI = MF.getFrameInfo(); - unsigned MaxAlign = MFI->getMaxAlignment(); - assert(!AFI->isThumb1OnlyFunction()); - - // Emit bic r6, r6, MaxAlign - unsigned bicOpc = AFI->isThumbFunction() ? ARM::t2BICri : ARM::BICri; - AddDefaultCC( - AddDefaultPred( - BuildMI(*MBB, MBBI, MI->getDebugLoc(), TII->get(bicOpc), ARM::R6) - .addReg(ARM::R6, RegState::Kill) - .addImm(MaxAlign - 1))); -} - /// SetupEntryBlockForSjLj - Insert code into the entry block that creates and /// registers the function context. void ARMTargetLowering:: @@ -5636,8 +5589,6 @@ SetupEntryBlockForSjLj(MachineInstr *MI, MachineBasicBlock *MBB, MF->getMachineMemOperand(MachinePointerInfo::getFixedStack(FI), MachineMemOperand::MOStore, 4, 4); - EmitBasePointerRecalculation(MI, MBB, DispatchBB); - // Load the address of the dispatch MBB into the jump buffer. if (isThumb2) { // Incoming value: jbuf @@ -5811,6 +5762,8 @@ EmitSjLjDispatchBlock(MachineInstr *MI, MachineBasicBlock *MBB) const { MachineMemOperand::MOLoad | MachineMemOperand::MOVolatile, 4, 4); + BuildMI(DispatchBB, dl, TII->get(ARM::eh_sjlj_dispatchsetup)); + unsigned NumLPads = LPadList.size(); if (Subtarget->isThumb2()) { unsigned NewVReg1 = MRI->createVirtualRegister(TRC); diff --git a/lib/Target/ARM/ARMISelLowering.h b/lib/Target/ARM/ARMISelLowering.h index be6a530..b8dc4bf 100644 --- a/lib/Target/ARM/ARMISelLowering.h +++ b/lib/Target/ARM/ARMISelLowering.h @@ -81,7 +81,6 @@ namespace llvm { EH_SJLJ_SETJMP, // SjLj exception handling setjmp. EH_SJLJ_LONGJMP, // SjLj exception handling longjmp. - EH_SJLJ_DISPATCHSETUP, // SjLj exception handling dispatch setup. TC_RETURN, // Tail call return pseudo. @@ -146,6 +145,9 @@ namespace llvm { VMOVIMM, VMVNIMM, + // Vector move f32 immediate: + VMOVFPIMM, + // Vector duplicate: VDUP, VDUPLANE, @@ -407,7 +409,6 @@ namespace llvm { ISD::ArgFlagsTy Flags) const; SDValue LowerEH_SJLJ_SETJMP(SDValue Op, SelectionDAG &DAG) const; SDValue LowerEH_SJLJ_LONGJMP(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerEH_SJLJ_DISPATCHSETUP(SDValue Op, SelectionDAG &DAG) const; SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *Subtarget) const; SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const; @@ -517,9 +518,6 @@ namespace llvm { bool signExtend, ARMCC::CondCodes Cond) const; - void EmitBasePointerRecalculation(MachineInstr *MI, MachineBasicBlock *MBB, - MachineBasicBlock *DispatchBB) const; - void SetupEntryBlockForSjLj(MachineInstr *MI, MachineBasicBlock *MBB, MachineBasicBlock *DispatchBB, int FI) const; diff --git a/lib/Target/ARM/ARMInstrFormats.td b/lib/Target/ARM/ARMInstrFormats.td index 06ee2c8..6940156 100644 --- a/lib/Target/ARM/ARMInstrFormats.td +++ b/lib/Target/ARM/ARMInstrFormats.td @@ -2041,9 +2041,26 @@ multiclass VFPDT64InstAlias<string opc, string asm, dag Result> { def _64 : VFPDataTypeInstAlias<opc, ".64", asm, Result>; defm : VFPDT64ReqInstAlias<opc, asm, Result>; } +multiclass VFPDT64NoF64ReqInstAlias<string opc, string asm, dag Result> { + def I64 : VFPDataTypeInstAlias<opc, ".i64", asm, Result>; + def S64 : VFPDataTypeInstAlias<opc, ".s64", asm, Result>; + def U64 : VFPDataTypeInstAlias<opc, ".u64", asm, Result>; + def D : VFPDataTypeInstAlias<opc, ".d", asm, Result>; +} +// VFPDT64ReqInstAlias plus plain ".64" +multiclass VFPDT64NoF64InstAlias<string opc, string asm, dag Result> { + def _64 : VFPDataTypeInstAlias<opc, ".64", asm, Result>; + defm : VFPDT64ReqInstAlias<opc, asm, Result>; +} multiclass VFPDTAnyInstAlias<string opc, string asm, dag Result> { defm : VFPDT8InstAlias<opc, asm, Result>; defm : VFPDT16InstAlias<opc, asm, Result>; defm : VFPDT32InstAlias<opc, asm, Result>; defm : VFPDT64InstAlias<opc, asm, Result>; } +multiclass VFPDTAnyNoF64InstAlias<string opc, string asm, dag Result> { + defm : VFPDT8InstAlias<opc, asm, Result>; + defm : VFPDT16InstAlias<opc, asm, Result>; + defm : VFPDT32InstAlias<opc, asm, Result>; + defm : VFPDT64NoF64InstAlias<opc, asm, Result>; +} diff --git a/lib/Target/ARM/ARMInstrInfo.td b/lib/Target/ARM/ARMInstrInfo.td index 770703c..be03924 100644 --- a/lib/Target/ARM/ARMInstrInfo.td +++ b/lib/Target/ARM/ARMInstrInfo.td @@ -58,8 +58,6 @@ def SDT_ARMEH_SJLJ_Setjmp : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisPtrTy<1>, SDTCisInt<2>]>; def SDT_ARMEH_SJLJ_Longjmp: SDTypeProfile<0, 2, [SDTCisPtrTy<0>, SDTCisInt<1>]>; -def SDT_ARMEH_SJLJ_DispatchSetup: SDTypeProfile<0, 1, [SDTCisInt<0>]>; - def SDT_ARMMEMBARRIER : SDTypeProfile<0, 1, [SDTCisInt<0>]>; def SDT_ARMPREFETCH : SDTypeProfile<0, 3, [SDTCisPtrTy<0>, SDTCisSameAs<1, 2>, @@ -143,9 +141,6 @@ def ARMeh_sjlj_setjmp: SDNode<"ARMISD::EH_SJLJ_SETJMP", SDT_ARMEH_SJLJ_Setjmp, [SDNPHasChain]>; def ARMeh_sjlj_longjmp: SDNode<"ARMISD::EH_SJLJ_LONGJMP", SDT_ARMEH_SJLJ_Longjmp, [SDNPHasChain]>; -def ARMeh_sjlj_dispatchsetup: SDNode<"ARMISD::EH_SJLJ_DISPATCHSETUP", - SDT_ARMEH_SJLJ_DispatchSetup, [SDNPHasChain]>; - def ARMMemBarrier : SDNode<"ARMISD::MEMBARRIER", SDT_ARMMEMBARRIER, [SDNPHasChain]>; @@ -475,6 +470,7 @@ def shift_so_reg_reg : Operand<i32>, // reg reg imm let EncoderMethod = "getSORegRegOpValue"; let PrintMethod = "printSORegRegOperand"; let DecoderMethod = "DecodeSORegRegOperand"; + let ParserMatchClass = ShiftedRegAsmOperand; let MIOperandInfo = (ops GPR, GPR, i32imm); } @@ -485,6 +481,7 @@ def shift_so_reg_imm : Operand<i32>, // reg reg imm let EncoderMethod = "getSORegImmOpValue"; let PrintMethod = "printSORegImmOperand"; let DecoderMethod = "DecodeSORegImmOperand"; + let ParserMatchClass = ShiftedImmAsmOperand; let MIOperandInfo = (ops GPR, i32imm); } @@ -1555,7 +1552,7 @@ PseudoInst<(outs), (ins i32imm:$amt, pred:$p), NoItinerary, } // Atomic pseudo-insts which will be lowered to ldrexd/strexd loops. -// (These psuedos use a hand-written selection code). +// (These pseudos use a hand-written selection code). let usesCustomInserter = 1, Defs = [CPSR], mayLoad = 1, mayStore = 1 in { def ATOMOR6432 : PseudoInst<(outs GPR:$dst1, GPR:$dst2), (ins GPR:$addr, GPR:$src1, GPR:$src2), @@ -4673,11 +4670,8 @@ def Int_eh_sjlj_longjmp : PseudoInst<(outs), (ins GPR:$src, GPR:$scratch), // This pseudo is used for ARM, Thumb1 and Thumb2. Any differences are // handled when the pseudo is expanded (which happens before any passes // that need the instruction size). -let isBarrier = 1, hasSideEffects = 1 in -def Int_eh_sjlj_dispatchsetup : - PseudoInst<(outs), (ins GPR:$src), NoItinerary, - [(ARMeh_sjlj_dispatchsetup GPR:$src)]>, - Requires<[IsDarwin]>; +let isBarrier = 1 in +def eh_sjlj_dispatchsetup : PseudoInst<(outs), (ins), NoItinerary, []>; //===----------------------------------------------------------------------===// // Non-Instruction Patterns @@ -5023,3 +5017,43 @@ def LSLi : ARMAsmPseudo<"lsl${s}${p} $Rd, $Rm, $imm", def RORi : ARMAsmPseudo<"ror${s}${p} $Rd, $Rm, $imm", (ins GPR:$Rd, GPR:$Rm, imm0_31:$imm, pred:$p, cc_out:$s)>; +def RRXi : ARMAsmPseudo<"rrx${s}${p} $Rd, $Rm", + (ins GPRnopc:$Rd, GPRnopc:$Rm, pred:$p, cc_out:$s)>; +def ASRr : ARMAsmPseudo<"asr${s}${p} $Rd, $Rn, $Rm", + (ins GPRnopc:$Rd, GPRnopc:$Rn, GPRnopc:$Rm, pred:$p, + cc_out:$s)>; +def LSRr : ARMAsmPseudo<"lsr${s}${p} $Rd, $Rn, $Rm", + (ins GPRnopc:$Rd, GPRnopc:$Rn, GPRnopc:$Rm, pred:$p, + cc_out:$s)>; +def LSLr : ARMAsmPseudo<"lsl${s}${p} $Rd, $Rn, $Rm", + (ins GPRnopc:$Rd, GPRnopc:$Rn, GPRnopc:$Rm, pred:$p, + cc_out:$s)>; +def RORr : ARMAsmPseudo<"ror${s}${p} $Rd, $Rn, $Rm", + (ins GPRnopc:$Rd, GPRnopc:$Rn, GPRnopc:$Rm, pred:$p, + cc_out:$s)>; +// shifter instructions also support a two-operand form. +def : ARMInstAlias<"asr${s}${p} $Rm, $imm", + (ASRi GPR:$Rm, GPR:$Rm, imm0_32:$imm, pred:$p, cc_out:$s)>; +def : ARMInstAlias<"lsr${s}${p} $Rm, $imm", + (LSRi GPR:$Rm, GPR:$Rm, imm0_32:$imm, pred:$p, cc_out:$s)>; +def : ARMInstAlias<"lsl${s}${p} $Rm, $imm", + (LSLi GPR:$Rm, GPR:$Rm, imm0_31:$imm, pred:$p, cc_out:$s)>; +def : ARMInstAlias<"ror${s}${p} $Rm, $imm", + (RORi GPR:$Rm, GPR:$Rm, imm0_31:$imm, pred:$p, cc_out:$s)>; +def : ARMInstAlias<"asr${s}${p} $Rn, $Rm", + (ASRr GPRnopc:$Rn, GPRnopc:$Rn, GPRnopc:$Rm, pred:$p, + cc_out:$s)>; +def : ARMInstAlias<"lsr${s}${p} $Rn, $Rm", + (LSRr GPRnopc:$Rn, GPRnopc:$Rn, GPRnopc:$Rm, pred:$p, + cc_out:$s)>; +def : ARMInstAlias<"lsl${s}${p} $Rn, $Rm", + (LSLr GPRnopc:$Rn, GPRnopc:$Rn, GPRnopc:$Rm, pred:$p, + cc_out:$s)>; +def : ARMInstAlias<"ror${s}${p} $Rn, $Rm", + (RORr GPRnopc:$Rn, GPRnopc:$Rn, GPRnopc:$Rm, pred:$p, + cc_out:$s)>; + + +// 'mul' instruction can be specified with only two operands. +def : ARMInstAlias<"mul${s}${p} $Rn, $Rm", + (MUL rGPR:$Rn, rGPR:$Rn, rGPR:$Rm, pred:$p, cc_out:$s)>; diff --git a/lib/Target/ARM/ARMInstrNEON.td b/lib/Target/ARM/ARMInstrNEON.td index 49cc254..f2ca963 100644 --- a/lib/Target/ARM/ARMInstrNEON.td +++ b/lib/Target/ARM/ARMInstrNEON.td @@ -39,6 +39,10 @@ def nImmVMOVI32 : Operand<i32> { let PrintMethod = "printNEONModImmOperand"; let ParserMatchClass = nImmVMOVI32AsmOperand; } +def nImmVMOVF32 : Operand<i32> { + let PrintMethod = "printFPImmOperand"; + let ParserMatchClass = FPImmOperand; +} def nImmSplatI64AsmOperand : AsmOperandClass { let Name = "NEONi64splat"; } def nImmSplatI64 : Operand<i32> { let PrintMethod = "printNEONModImmOperand"; @@ -173,6 +177,7 @@ def NEONvgetlanes : SDNode<"ARMISD::VGETLANEs", SDTARMVGETLN>; def SDTARMVMOVIMM : SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVT<1, i32>]>; def NEONvmovImm : SDNode<"ARMISD::VMOVIMM", SDTARMVMOVIMM>; def NEONvmvnImm : SDNode<"ARMISD::VMVNIMM", SDTARMVMOVIMM>; +def NEONvmovFPImm : SDNode<"ARMISD::VMOVFPIMM", SDTARMVMOVIMM>; def SDTARMVORRIMM : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0, 1>, SDTCisVT<2, i32>]>; @@ -4464,6 +4469,10 @@ def : InstAlias<"vmov${p} $Vd, $Vm", (VORRd DPR:$Vd, DPR:$Vm, DPR:$Vm, pred:$p)>; def : InstAlias<"vmov${p} $Vd, $Vm", (VORRq QPR:$Vd, QPR:$Vm, QPR:$Vm, pred:$p)>; +defm : VFPDTAnyNoF64InstAlias<"vmov${p}", "$Vd, $Vm", + (VORRd DPR:$Vd, DPR:$Vm, DPR:$Vm, pred:$p)>; +defm : VFPDTAnyNoF64InstAlias<"vmov${p}", "$Vd, $Vm", + (VORRq QPR:$Vd, QPR:$Vm, QPR:$Vm, pred:$p)>; // VMOV : Vector Move (Immediate) @@ -4513,6 +4522,15 @@ def VMOVv2i64 : N1ModImm<1, 0b000, 0b1110, 0, 1, 1, 1, (outs QPR:$Vd), (ins nImmSplatI64:$SIMM), IIC_VMOVImm, "vmov", "i64", "$Vd, $SIMM", "", [(set QPR:$Vd, (v2i64 (NEONvmovImm timm:$SIMM)))]>; + +def VMOVv2f32 : N1ModImm<1, 0b000, 0b1111, 0, 0, 0, 1, (outs DPR:$Vd), + (ins nImmVMOVF32:$SIMM), IIC_VMOVImm, + "vmov", "f32", "$Vd, $SIMM", "", + [(set DPR:$Vd, (v2f32 (NEONvmovFPImm timm:$SIMM)))]>; +def VMOVv4f32 : N1ModImm<1, 0b000, 0b1111, 0, 1, 0, 1, (outs QPR:$Vd), + (ins nImmVMOVF32:$SIMM), IIC_VMOVImm, + "vmov", "f32", "$Vd, $SIMM", "", + [(set QPR:$Vd, (v4f32 (NEONvmovFPImm timm:$SIMM)))]>; } // isReMaterializable // VMOV : Vector Get Lane (move scalar to ARM core register) @@ -4801,6 +4819,7 @@ def VCVTu2fq : N2VQ<0b11, 0b11, 0b10, 0b11, 0b01101, 0, "vcvt", "f32.u32", v4f32, v4i32, uint_to_fp>; // VCVT : Vector Convert Between Floating-Point and Fixed-Point. +let DecoderMethod = "DecodeVCVTD" in { def VCVTf2xsd : N2VCvtD<0, 1, 0b1111, 0, 1, "vcvt", "s32.f32", v2i32, v2f32, int_arm_neon_vcvtfp2fxs>; def VCVTf2xud : N2VCvtD<1, 1, 0b1111, 0, 1, "vcvt", "u32.f32", @@ -4809,7 +4828,9 @@ def VCVTxs2fd : N2VCvtD<0, 1, 0b1110, 0, 1, "vcvt", "f32.s32", v2f32, v2i32, int_arm_neon_vcvtfxs2fp>; def VCVTxu2fd : N2VCvtD<1, 1, 0b1110, 0, 1, "vcvt", "f32.u32", v2f32, v2i32, int_arm_neon_vcvtfxu2fp>; +} +let DecoderMethod = "DecodeVCVTQ" in { def VCVTf2xsq : N2VCvtQ<0, 1, 0b1111, 0, 1, "vcvt", "s32.f32", v4i32, v4f32, int_arm_neon_vcvtfp2fxs>; def VCVTf2xuq : N2VCvtQ<1, 1, 0b1111, 0, 1, "vcvt", "u32.f32", @@ -4818,6 +4839,7 @@ def VCVTxs2fq : N2VCvtQ<0, 1, 0b1110, 0, 1, "vcvt", "f32.s32", v4f32, v4i32, int_arm_neon_vcvtfxs2fp>; def VCVTxu2fq : N2VCvtQ<1, 1, 0b1110, 0, 1, "vcvt", "f32.u32", v4f32, v4i32, int_arm_neon_vcvtfxu2fp>; +} // VCVT : Vector Convert Between Half-Precision and Single-Precision. def VCVTf2h : N2VNInt<0b11, 0b11, 0b01, 0b10, 0b01100, 0, 0, @@ -5218,6 +5240,19 @@ defm : VFPDT32ReqInstAlias<"vld1${p}", "$Vd, $Rn!", (VLD1d32wb_fixed VecListOneD:$Vd, zero_reg, addrmode6:$Rn, pred:$p)>; defm : VFPDT64ReqInstAlias<"vld1${p}", "$Vd, $Rn!", (VLD1d64wb_fixed VecListOneD:$Vd, zero_reg, addrmode6:$Rn, pred:$p)>; +// with writeback, register stride +defm : VFPDT8ReqInstAlias<"vld1${p}", "$Vd, $Rn, $Rm", + (VLD1d8wb_register VecListOneD:$Vd, zero_reg, addrmode6:$Rn, + rGPR:$Rm, pred:$p)>; +defm : VFPDT16ReqInstAlias<"vld1${p}", "$Vd, $Rn, $Rm", + (VLD1d16wb_register VecListOneD:$Vd, zero_reg, addrmode6:$Rn, + rGPR:$Rm, pred:$p)>; +defm : VFPDT32ReqInstAlias<"vld1${p}", "$Vd, $Rn, $Rm", + (VLD1d32wb_register VecListOneD:$Vd, zero_reg, addrmode6:$Rn, + rGPR:$Rm, pred:$p)>; +defm : VFPDT64ReqInstAlias<"vld1${p}", "$Vd, $Rn, $Rm", + (VLD1d64wb_register VecListOneD:$Vd, zero_reg, addrmode6:$Rn, + rGPR:$Rm, pred:$p)>; // Load two D registers. defm : VFPDT8ReqInstAlias<"vld1${p}", "$Vd, $Rn", @@ -5237,6 +5272,19 @@ defm : VFPDT32ReqInstAlias<"vld1${p}", "$Vd, $Rn!", (VLD1q32wb_fixed VecListTwoD:$Vd, zero_reg, addrmode6:$Rn, pred:$p)>; defm : VFPDT64ReqInstAlias<"vld1${p}", "$Vd, $Rn!", (VLD1q64wb_fixed VecListTwoD:$Vd, zero_reg, addrmode6:$Rn, pred:$p)>; +// with writeback, register stride +defm : VFPDT8ReqInstAlias<"vld1${p}", "$Vd, $Rn, $Rm", + (VLD1q8wb_register VecListTwoD:$Vd, zero_reg, addrmode6:$Rn, + rGPR:$Rm, pred:$p)>; +defm : VFPDT16ReqInstAlias<"vld1${p}", "$Vd, $Rn, $Rm", + (VLD1q16wb_register VecListTwoD:$Vd, zero_reg, addrmode6:$Rn, + rGPR:$Rm, pred:$p)>; +defm : VFPDT32ReqInstAlias<"vld1${p}", "$Vd, $Rn, $Rm", + (VLD1q32wb_register VecListTwoD:$Vd, zero_reg, addrmode6:$Rn, + rGPR:$Rm, pred:$p)>; +defm : VFPDT64ReqInstAlias<"vld1${p}", "$Vd, $Rn, $Rm", + (VLD1q64wb_register VecListTwoD:$Vd, zero_reg, addrmode6:$Rn, + rGPR:$Rm, pred:$p)>; // Load three D registers. defm : VFPDT8ReqInstAlias<"vld1${p}", "$Vd, $Rn", @@ -5260,6 +5308,19 @@ defm : VFPDT32ReqInstAlias<"vld1${p}", "$Vd, $Rn!", defm : VFPDT64ReqInstAlias<"vld1${p}", "$Vd, $Rn!", (VLD1d64Twb_fixed VecListThreeD:$Vd, zero_reg, addrmode6:$Rn, pred:$p)>; +// with writeback, register stride +defm : VFPDT8ReqInstAlias<"vld1${p}", "$Vd, $Rn, $Rm", + (VLD1d8Twb_register VecListThreeD:$Vd, zero_reg, + addrmode6:$Rn, rGPR:$Rm, pred:$p)>; +defm : VFPDT16ReqInstAlias<"vld1${p}", "$Vd, $Rn, $Rm", + (VLD1d16Twb_register VecListThreeD:$Vd, zero_reg, + addrmode6:$Rn, rGPR:$Rm, pred:$p)>; +defm : VFPDT32ReqInstAlias<"vld1${p}", "$Vd, $Rn, $Rm", + (VLD1d32Twb_register VecListThreeD:$Vd, zero_reg, + addrmode6:$Rn, rGPR:$Rm, pred:$p)>; +defm : VFPDT64ReqInstAlias<"vld1${p}", "$Vd, $Rn, $Rm", + (VLD1d64Twb_register VecListThreeD:$Vd, zero_reg, + addrmode6:$Rn, rGPR:$Rm, pred:$p)>; // Load four D registers. @@ -5284,6 +5345,19 @@ defm : VFPDT32ReqInstAlias<"vld1${p}", "$Vd, $Rn!", defm : VFPDT64ReqInstAlias<"vld1${p}", "$Vd, $Rn!", (VLD1d64Qwb_fixed VecListFourD:$Vd, zero_reg, addrmode6:$Rn, pred:$p)>; +// with writeback, register stride +defm : VFPDT8ReqInstAlias<"vld1${p}", "$Vd, $Rn, $Rm", + (VLD1d8Qwb_register VecListFourD:$Vd, zero_reg, + addrmode6:$Rn, rGPR:$Rm, pred:$p)>; +defm : VFPDT16ReqInstAlias<"vld1${p}", "$Vd, $Rn, $Rm", + (VLD1d16Qwb_register VecListFourD:$Vd, zero_reg, + addrmode6:$Rn, rGPR:$Rm, pred:$p)>; +defm : VFPDT32ReqInstAlias<"vld1${p}", "$Vd, $Rn, $Rm", + (VLD1d32Qwb_register VecListFourD:$Vd, zero_reg, + addrmode6:$Rn, rGPR:$Rm, pred:$p)>; +defm : VFPDT64ReqInstAlias<"vld1${p}", "$Vd, $Rn, $Rm", + (VLD1d64Qwb_register VecListFourD:$Vd, zero_reg, + addrmode6:$Rn, rGPR:$Rm, pred:$p)>; // VST1 requires a size suffix, but also accepts type specific variants. // Store one D register. @@ -5304,6 +5378,19 @@ defm : VFPDT32ReqInstAlias<"vst1${p}", "$Vd, $Rn!", (VST1d32wb_fixed zero_reg, addrmode6:$Rn, VecListOneD:$Vd, pred:$p)>; defm : VFPDT64ReqInstAlias<"vst1${p}", "$Vd, $Rn!", (VST1d64wb_fixed zero_reg, addrmode6:$Rn, VecListOneD:$Vd, pred:$p)>; +// with writeback, register stride +defm : VFPDT8ReqInstAlias<"vst1${p}", "$Vd, $Rn, $Rm", + (VST1d8wb_register zero_reg, addrmode6:$Rn, rGPR:$Rm, + VecListOneD:$Vd, pred:$p)>; +defm : VFPDT16ReqInstAlias<"vst1${p}", "$Vd, $Rn, $Rm", + (VST1d16wb_register zero_reg, addrmode6:$Rn, rGPR:$Rm, + VecListOneD:$Vd, pred:$p)>; +defm : VFPDT32ReqInstAlias<"vst1${p}", "$Vd, $Rn, $Rm", + (VST1d32wb_register zero_reg, addrmode6:$Rn, rGPR:$Rm, + VecListOneD:$Vd, pred:$p)>; +defm : VFPDT64ReqInstAlias<"vst1${p}", "$Vd, $Rn, $Rm", + (VST1d64wb_register zero_reg, addrmode6:$Rn, rGPR:$Rm, + VecListOneD:$Vd, pred:$p)>; // Store two D registers. defm : VFPDT8ReqInstAlias<"vst1${p}", "$Vd, $Rn", @@ -5323,6 +5410,19 @@ defm : VFPDT32ReqInstAlias<"vst1${p}", "$Vd, $Rn!", (VST1q32wb_fixed zero_reg, addrmode6:$Rn, VecListTwoD:$Vd, pred:$p)>; defm : VFPDT64ReqInstAlias<"vst1${p}", "$Vd, $Rn!", (VST1q64wb_fixed zero_reg, addrmode6:$Rn, VecListTwoD:$Vd, pred:$p)>; +// with writeback, register stride +defm : VFPDT8ReqInstAlias<"vst1${p}", "$Vd, $Rn, $Rm", + (VST1q8wb_register zero_reg, addrmode6:$Rn, + rGPR:$Rm, VecListTwoD:$Vd, pred:$p)>; +defm : VFPDT16ReqInstAlias<"vst1${p}", "$Vd, $Rn, $Rm", + (VST1q16wb_register zero_reg, addrmode6:$Rn, + rGPR:$Rm, VecListTwoD:$Vd, pred:$p)>; +defm : VFPDT32ReqInstAlias<"vst1${p}", "$Vd, $Rn, $Rm", + (VST1q32wb_register zero_reg, addrmode6:$Rn, + rGPR:$Rm, VecListTwoD:$Vd, pred:$p)>; +defm : VFPDT64ReqInstAlias<"vst1${p}", "$Vd, $Rn, $Rm", + (VST1q64wb_register zero_reg, addrmode6:$Rn, + rGPR:$Rm, VecListTwoD:$Vd, pred:$p)>; // FIXME: The three and four register VST1 instructions haven't been moved // to the VecList* encoding yet, so we can't do assembly parsing support @@ -5346,3 +5446,19 @@ defm : VFPDT64ReqInstAlias<"vst1${p}", "$Vd, $Rn!", // (VST1d32Q addrmode6:$Rn, VecListFourD:$Vd, pred:$p)>; //defm : VFPDT64ReqInstAlias<"vst1${p}", "$Vd, $Rn", // (VST1d64Q addrmode6:$Rn, VecListFourD:$Vd, pred:$p)>; + + +// VTRN instructions data type suffix aliases for more-specific types. +defm : VFPDT8ReqInstAlias <"vtrn${p}", "$Dd, $Dm", + (VTRNd8 DPR:$Dd, DPR:$Dm, pred:$p)>; +defm : VFPDT16ReqInstAlias<"vtrn${p}", "$Dd, $Dm", + (VTRNd16 DPR:$Dd, DPR:$Dm, pred:$p)>; +defm : VFPDT32ReqInstAlias<"vtrn${p}", "$Dd, $Dm", + (VTRNd32 DPR:$Dd, DPR:$Dm, pred:$p)>; + +defm : VFPDT8ReqInstAlias <"vtrn${p}", "$Qd, $Qm", + (VTRNq8 QPR:$Qd, QPR:$Qm, pred:$p)>; +defm : VFPDT16ReqInstAlias<"vtrn${p}", "$Qd, $Qm", + (VTRNq16 QPR:$Qd, QPR:$Qm, pred:$p)>; +defm : VFPDT32ReqInstAlias<"vtrn${p}", "$Qd, $Qm", + (VTRNq32 QPR:$Qd, QPR:$Qm, pred:$p)>; diff --git a/lib/Target/ARM/ARMInstrThumb2.td b/lib/Target/ARM/ARMInstrThumb2.td index 03077c0..6129fa3 100644 --- a/lib/Target/ARM/ARMInstrThumb2.td +++ b/lib/Target/ARM/ARMInstrThumb2.td @@ -4084,3 +4084,8 @@ def : t2InstAlias<"sxth${p} $Rd, $Rm$rot", // for isel. def : t2InstAlias<"mov${p} $Rd, $imm", (t2MVNi rGPR:$Rd, t2_so_imm_not:$imm, pred:$p, zero_reg)>; + + +// Wide 'mul' encoding can be specified with only two operands. +def : t2InstAlias<"mul${p} $Rn, $Rm", + (t2MUL rGPR:$Rn, rGPR:$Rn, rGPR:$Rm, pred:$p)>; diff --git a/lib/Target/ARM/ARMInstrVFP.td b/lib/Target/ARM/ARMInstrVFP.td index 488c508..e420135 100644 --- a/lib/Target/ARM/ARMInstrVFP.td +++ b/lib/Target/ARM/ARMInstrVFP.td @@ -1172,3 +1172,35 @@ defm : VFPDT64InstAlias<"vldr${p}", "$Dd, $addr", (VLDRD DPR:$Dd, addrmode5:$addr, pred:$p)>; defm : VFPDT64InstAlias<"vstr${p}", "$Dd, $addr", (VSTRD DPR:$Dd, addrmode5:$addr, pred:$p)>; + +// VMUL has a two-operand form (implied destination operand) +def : VFP2InstAlias<"vmul${p}.f64 $Dn, $Dm", + (VMULD DPR:$Dn, DPR:$Dn, DPR:$Dm, pred:$p)>; +def : VFP2InstAlias<"vmul${p}.f32 $Sn, $Sm", + (VMULS SPR:$Sn, SPR:$Sn, SPR:$Sm, pred:$p)>; +// VADD has a two-operand form (implied destination operand) +def : VFP2InstAlias<"vadd${p}.f64 $Dn, $Dm", + (VADDD DPR:$Dn, DPR:$Dn, DPR:$Dm, pred:$p)>; +def : VFP2InstAlias<"vadd${p}.f32 $Sn, $Sm", + (VADDS SPR:$Sn, SPR:$Sn, SPR:$Sm, pred:$p)>; +// VSUB has a two-operand form (implied destination operand) +def : VFP2InstAlias<"vsub${p}.f64 $Dn, $Dm", + (VSUBD DPR:$Dn, DPR:$Dn, DPR:$Dm, pred:$p)>; +def : VFP2InstAlias<"vsub${p}.f32 $Sn, $Sm", + (VSUBS SPR:$Sn, SPR:$Sn, SPR:$Sm, pred:$p)>; + +// VMOV can accept optional .f32/.f64 suffix. +def : VFP2InstAlias<"vmov${p}.f32 $Rt, $Sn", + (VMOVRS GPR:$Rt, SPR:$Sn, pred:$p)>; +def : VFP2InstAlias<"vmov${p}.f32 $Sn, $Rt", + (VMOVSR SPR:$Sn, GPR:$Rt, pred:$p)>; + +def : VFP2InstAlias<"vmov${p}.f64 $Rt, $Rt2, $Dn", + (VMOVRRD GPR:$Rt, GPR:$Rt2, DPR:$Dn, pred:$p)>; +def : VFP2InstAlias<"vmov${p}.f64 $Dn, $Rt, $Rt2", + (VMOVDRR DPR:$Dn, GPR:$Rt, GPR:$Rt2, pred:$p)>; + +// VMOVS doesn't need the .f32 to disambiguate from the NEON encoding the way +// VMOVD does. +def : VFP2InstAlias<"vmov${p} $Sd, $Sm", + (VMOVS SPR:$Sd, SPR:$Sm, pred:$p)>; diff --git a/lib/Target/ARM/ARMTargetMachine.cpp b/lib/Target/ARM/ARMTargetMachine.cpp index cf1432d..6cbb24b 100644 --- a/lib/Target/ARM/ARMTargetMachine.cpp +++ b/lib/Target/ARM/ARMTargetMachine.cpp @@ -38,8 +38,9 @@ extern "C" void LLVMInitializeARMTarget() { /// ARMBaseTargetMachine::ARMBaseTargetMachine(const Target &T, StringRef TT, StringRef CPU, StringRef FS, - Reloc::Model RM, CodeModel::Model CM) - : LLVMTargetMachine(T, TT, CPU, FS, RM, CM), + Reloc::Model RM, CodeModel::Model CM, + CodeGenOpt::Level OL) + : LLVMTargetMachine(T, TT, CPU, FS, RM, CM, OL), Subtarget(TT, CPU, FS), JITInfo(), InstrItins(Subtarget.getInstrItineraryData()) { @@ -50,8 +51,9 @@ ARMBaseTargetMachine::ARMBaseTargetMachine(const Target &T, StringRef TT, ARMTargetMachine::ARMTargetMachine(const Target &T, StringRef TT, StringRef CPU, StringRef FS, - Reloc::Model RM, CodeModel::Model CM) - : ARMBaseTargetMachine(T, TT, CPU, FS, RM, CM), InstrInfo(Subtarget), + Reloc::Model RM, CodeModel::Model CM, + CodeGenOpt::Level OL) + : ARMBaseTargetMachine(T, TT, CPU, FS, RM, CM, OL), InstrInfo(Subtarget), DataLayout(Subtarget.isAPCS_ABI() ? std::string("e-p:32:32-f64:32:64-i64:32:64-" "v128:32:128-v64:32:64-n32-S32") : @@ -71,8 +73,9 @@ ARMTargetMachine::ARMTargetMachine(const Target &T, StringRef TT, ThumbTargetMachine::ThumbTargetMachine(const Target &T, StringRef TT, StringRef CPU, StringRef FS, - Reloc::Model RM, CodeModel::Model CM) - : ARMBaseTargetMachine(T, TT, CPU, FS, RM, CM), + Reloc::Model RM, CodeModel::Model CM, + CodeGenOpt::Level OL) + : ARMBaseTargetMachine(T, TT, CPU, FS, RM, CM, OL), InstrInfo(Subtarget.hasThumb2() ? ((ARMBaseInstrInfo*)new Thumb2InstrInfo(Subtarget)) : ((ARMBaseInstrInfo*)new Thumb1InstrInfo(Subtarget))), @@ -95,34 +98,30 @@ ThumbTargetMachine::ThumbTargetMachine(const Target &T, StringRef TT, : (ARMFrameLowering*)new Thumb1FrameLowering(Subtarget)) { } -bool ARMBaseTargetMachine::addPreISel(PassManagerBase &PM, - CodeGenOpt::Level OptLevel) { - if (OptLevel != CodeGenOpt::None && EnableGlobalMerge) +bool ARMBaseTargetMachine::addPreISel(PassManagerBase &PM) { + if (getOptLevel() != CodeGenOpt::None && EnableGlobalMerge) PM.add(createGlobalMergePass(getTargetLowering())); return false; } -bool ARMBaseTargetMachine::addInstSelector(PassManagerBase &PM, - CodeGenOpt::Level OptLevel) { - PM.add(createARMISelDag(*this, OptLevel)); +bool ARMBaseTargetMachine::addInstSelector(PassManagerBase &PM) { + PM.add(createARMISelDag(*this, getOptLevel())); return false; } -bool ARMBaseTargetMachine::addPreRegAlloc(PassManagerBase &PM, - CodeGenOpt::Level OptLevel) { +bool ARMBaseTargetMachine::addPreRegAlloc(PassManagerBase &PM) { // FIXME: temporarily disabling load / store optimization pass for Thumb1. - if (OptLevel != CodeGenOpt::None && !Subtarget.isThumb1Only()) + if (getOptLevel() != CodeGenOpt::None && !Subtarget.isThumb1Only()) PM.add(createARMLoadStoreOptimizationPass(true)); - if (OptLevel != CodeGenOpt::None && Subtarget.isCortexA9()) + if (getOptLevel() != CodeGenOpt::None && Subtarget.isCortexA9()) PM.add(createMLxExpansionPass()); return true; } -bool ARMBaseTargetMachine::addPreSched2(PassManagerBase &PM, - CodeGenOpt::Level OptLevel) { +bool ARMBaseTargetMachine::addPreSched2(PassManagerBase &PM) { // FIXME: temporarily disabling load / store optimization pass for Thumb1. - if (OptLevel != CodeGenOpt::None) { + if (getOptLevel() != CodeGenOpt::None) { if (!Subtarget.isThumb1Only()) PM.add(createARMLoadStoreOptimizationPass()); if (Subtarget.hasNEON()) @@ -133,7 +132,7 @@ bool ARMBaseTargetMachine::addPreSched2(PassManagerBase &PM, // proper scheduling. PM.add(createARMExpandPseudoPass()); - if (OptLevel != CodeGenOpt::None) { + if (getOptLevel() != CodeGenOpt::None) { if (!Subtarget.isThumb1Only()) PM.add(createIfConverterPass()); } @@ -143,8 +142,7 @@ bool ARMBaseTargetMachine::addPreSched2(PassManagerBase &PM, return true; } -bool ARMBaseTargetMachine::addPreEmitPass(PassManagerBase &PM, - CodeGenOpt::Level OptLevel) { +bool ARMBaseTargetMachine::addPreEmitPass(PassManagerBase &PM) { if (Subtarget.isThumb2() && !Subtarget.prefers32BitThumb()) PM.add(createThumb2SizeReductionPass()); @@ -153,7 +151,6 @@ bool ARMBaseTargetMachine::addPreEmitPass(PassManagerBase &PM, } bool ARMBaseTargetMachine::addCodeEmitter(PassManagerBase &PM, - CodeGenOpt::Level OptLevel, JITCodeEmitter &JCE) { // Machine code emitter pass for ARM. PM.add(createARMJITCodeEmitterPass(*this, JCE)); diff --git a/lib/Target/ARM/ARMTargetMachine.h b/lib/Target/ARM/ARMTargetMachine.h index c8c601c..a1f517b 100644 --- a/lib/Target/ARM/ARMTargetMachine.h +++ b/lib/Target/ARM/ARMTargetMachine.h @@ -41,7 +41,8 @@ private: public: ARMBaseTargetMachine(const Target &T, StringRef TT, StringRef CPU, StringRef FS, - Reloc::Model RM, CodeModel::Model CM); + Reloc::Model RM, CodeModel::Model CM, + CodeGenOpt::Level OL); virtual ARMJITInfo *getJITInfo() { return &JITInfo; } virtual const ARMSubtarget *getSubtargetImpl() const { return &Subtarget; } @@ -50,13 +51,12 @@ public: } // Pass Pipeline Configuration - virtual bool addPreISel(PassManagerBase &PM, CodeGenOpt::Level OptLevel); - virtual bool addInstSelector(PassManagerBase &PM, CodeGenOpt::Level OptLevel); - virtual bool addPreRegAlloc(PassManagerBase &PM, CodeGenOpt::Level OptLevel); - virtual bool addPreSched2(PassManagerBase &PM, CodeGenOpt::Level OptLevel); - virtual bool addPreEmitPass(PassManagerBase &PM, CodeGenOpt::Level OptLevel); - virtual bool addCodeEmitter(PassManagerBase &PM, CodeGenOpt::Level OptLevel, - JITCodeEmitter &MCE); + virtual bool addPreISel(PassManagerBase &PM); + virtual bool addInstSelector(PassManagerBase &PM); + virtual bool addPreRegAlloc(PassManagerBase &PM); + virtual bool addPreSched2(PassManagerBase &PM); + virtual bool addPreEmitPass(PassManagerBase &PM); + virtual bool addCodeEmitter(PassManagerBase &PM, JITCodeEmitter &MCE); }; /// ARMTargetMachine - ARM target machine. @@ -71,7 +71,8 @@ class ARMTargetMachine : public ARMBaseTargetMachine { public: ARMTargetMachine(const Target &T, StringRef TT, StringRef CPU, StringRef FS, - Reloc::Model RM, CodeModel::Model CM); + Reloc::Model RM, CodeModel::Model CM, + CodeGenOpt::Level OL); virtual const ARMRegisterInfo *getRegisterInfo() const { return &InstrInfo.getRegisterInfo(); @@ -111,7 +112,8 @@ class ThumbTargetMachine : public ARMBaseTargetMachine { public: ThumbTargetMachine(const Target &T, StringRef TT, StringRef CPU, StringRef FS, - Reloc::Model RM, CodeModel::Model CM); + Reloc::Model RM, CodeModel::Model CM, + CodeGenOpt::Level OL); /// returns either Thumb1RegisterInfo or Thumb2RegisterInfo virtual const ARMBaseRegisterInfo *getRegisterInfo() const { diff --git a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp index 1d66d12..bb83e5e 100644 --- a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp +++ b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp @@ -1946,18 +1946,15 @@ void ARMOperand::print(raw_ostream &OS) const { break; case k_ShiftedRegister: OS << "<so_reg_reg " - << RegShiftedReg.SrcReg - << ARM_AM::getShiftOpcStr(ARM_AM::getSORegShOp(RegShiftedReg.ShiftImm)) - << ", " << RegShiftedReg.ShiftReg << ", " - << ARM_AM::getSORegOffset(RegShiftedReg.ShiftImm) - << ">"; + << RegShiftedReg.SrcReg << " " + << ARM_AM::getShiftOpcStr(RegShiftedReg.ShiftTy) + << " " << RegShiftedReg.ShiftReg << ">"; break; case k_ShiftedImmediate: OS << "<so_reg_imm " - << RegShiftedImm.SrcReg - << ARM_AM::getShiftOpcStr(ARM_AM::getSORegShOp(RegShiftedImm.ShiftImm)) - << ", " << ARM_AM::getSORegOffset(RegShiftedImm.ShiftImm) - << ">"; + << RegShiftedImm.SrcReg << " " + << ARM_AM::getShiftOpcStr(RegShiftedImm.ShiftTy) + << " #" << RegShiftedImm.ShiftImm << ">"; break; case k_RotateImmediate: OS << "<ror " << " #" << (RotImm.Imm * 8) << ">"; @@ -2366,7 +2363,7 @@ static unsigned getDRegFromQReg(unsigned QReg) { case ARM::Q6: return ARM::D12; case ARM::Q7: return ARM::D14; case ARM::Q8: return ARM::D16; - case ARM::Q9: return ARM::D19; + case ARM::Q9: return ARM::D18; case ARM::Q10: return ARM::D20; case ARM::Q11: return ARM::D22; case ARM::Q12: return ARM::D24; @@ -2420,7 +2417,7 @@ parseRegisterList(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { while (Parser.getTok().is(AsmToken::Comma) || Parser.getTok().is(AsmToken::Minus)) { if (Parser.getTok().is(AsmToken::Minus)) { - Parser.Lex(); // Eat the comma. + Parser.Lex(); // Eat the minus. SMLoc EndLoc = Parser.getTok().getLoc(); int EndReg = tryParseRegister(); if (EndReg == -1) @@ -2487,10 +2484,31 @@ parseRegisterList(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { // parse a vector register list ARMAsmParser::OperandMatchResultTy ARMAsmParser:: parseVectorList(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { - if(Parser.getTok().isNot(AsmToken::LCurly)) + SMLoc S = Parser.getTok().getLoc(); + // As an extension (to match gas), support a plain D register or Q register + // (without encosing curly braces) as a single or double entry list, + // respectively. + if (Parser.getTok().is(AsmToken::Identifier)) { + int Reg = tryParseRegister(); + if (Reg == -1) + return MatchOperand_NoMatch; + SMLoc E = Parser.getTok().getLoc(); + if (ARMMCRegisterClasses[ARM::DPRRegClassID].contains(Reg)) { + Operands.push_back(ARMOperand::CreateVectorList(Reg, 1, S, E)); + return MatchOperand_Success; + } + if (ARMMCRegisterClasses[ARM::QPRRegClassID].contains(Reg)) { + Reg = getDRegFromQReg(Reg); + Operands.push_back(ARMOperand::CreateVectorList(Reg, 2, S, E)); + return MatchOperand_Success; + } + Error(S, "vector register expected"); + return MatchOperand_ParseFail; + } + + if (Parser.getTok().isNot(AsmToken::LCurly)) return MatchOperand_NoMatch; - SMLoc S = Parser.getTok().getLoc(); Parser.Lex(); // Eat '{' token. SMLoc RegLoc = Parser.getTok().getLoc(); @@ -2509,7 +2527,39 @@ parseVectorList(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { ++Count; } - while (Parser.getTok().is(AsmToken::Comma)) { + while (Parser.getTok().is(AsmToken::Comma) || + Parser.getTok().is(AsmToken::Minus)) { + if (Parser.getTok().is(AsmToken::Minus)) { + Parser.Lex(); // Eat the minus. + SMLoc EndLoc = Parser.getTok().getLoc(); + int EndReg = tryParseRegister(); + if (EndReg == -1) { + Error(EndLoc, "register expected"); + return MatchOperand_ParseFail; + } + // Allow Q regs and just interpret them as the two D sub-registers. + if (ARMMCRegisterClasses[ARM::QPRRegClassID].contains(EndReg)) + EndReg = getDRegFromQReg(EndReg) + 1; + // If the register is the same as the start reg, there's nothing + // more to do. + if (Reg == EndReg) + continue; + // The register must be in the same register class as the first. + if (!ARMMCRegisterClasses[ARM::DPRRegClassID].contains(EndReg)) { + Error(EndLoc, "invalid register in register list"); + return MatchOperand_ParseFail; + } + // Ranges must go from low to high. + if (Reg > EndReg) { + Error(EndLoc, "bad range in register list"); + return MatchOperand_ParseFail; + } + + // Add all the registers in the range to the register list. + Count += EndReg - Reg; + Reg = EndReg; + continue; + } Parser.Lex(); // Eat the comma. RegLoc = Parser.getTok().getLoc(); int OldReg = Reg; @@ -3538,9 +3588,12 @@ parseMemory(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { } // If we have a '#', it's an immediate offset, else assume it's a register - // offset. - if (Parser.getTok().is(AsmToken::Hash)) { - Parser.Lex(); // Eat the '#'. + // offset. Be friendly and also accept a plain integer (without a leading + // hash) for gas compatibility. + if (Parser.getTok().is(AsmToken::Hash) || + Parser.getTok().is(AsmToken::Integer)) { + if (Parser.getTok().is(AsmToken::Hash)) + Parser.Lex(); // Eat the '#'. E = Parser.getTok().getLoc(); bool isNegative = getParser().getTok().is(AsmToken::Minus); @@ -4098,6 +4151,7 @@ bool ARMAsmParser::shouldOmitCCOutOperand(StringRef Mnemonic, // remove the cc_out operand. (!isARMLowRegister(static_cast<ARMOperand*>(Operands[3])->getReg()) || !isARMLowRegister(static_cast<ARMOperand*>(Operands[4])->getReg()) || + !isARMLowRegister(static_cast<ARMOperand*>(Operands[5])->getReg()) || !inITBlock() || (static_cast<ARMOperand*>(Operands[3])->getReg() != static_cast<ARMOperand*>(Operands[5])->getReg() && @@ -4105,6 +4159,20 @@ bool ARMAsmParser::shouldOmitCCOutOperand(StringRef Mnemonic, static_cast<ARMOperand*>(Operands[4])->getReg()))) return true; + // Also check the 'mul' syntax variant that doesn't specify an explicit + // destination register. + if (isThumbTwo() && Mnemonic == "mul" && Operands.size() == 5 && + static_cast<ARMOperand*>(Operands[1])->getReg() == 0 && + static_cast<ARMOperand*>(Operands[3])->isReg() && + static_cast<ARMOperand*>(Operands[4])->isReg() && + // If the registers aren't low regs or the cc_out operand is zero + // outside of an IT block, we have to use the 32-bit encoding, so + // remove the cc_out operand. + (!isARMLowRegister(static_cast<ARMOperand*>(Operands[3])->getReg()) || + !isARMLowRegister(static_cast<ARMOperand*>(Operands[4])->getReg()) || + !inITBlock())) + return true; + // Register-register 'add/sub' for thumb does not have a cc_out operand @@ -4542,12 +4610,37 @@ processInstruction(MCInst &Inst, const SmallVectorImpl<MCParsedAsmOperand*> &Operands) { switch (Inst.getOpcode()) { // Handle the MOV complex aliases. + case ARM::ASRr: + case ARM::LSRr: + case ARM::LSLr: + case ARM::RORr: { + ARM_AM::ShiftOpc ShiftTy; + switch(Inst.getOpcode()) { + default: llvm_unreachable("unexpected opcode!"); + case ARM::ASRr: ShiftTy = ARM_AM::asr; break; + case ARM::LSRr: ShiftTy = ARM_AM::lsr; break; + case ARM::LSLr: ShiftTy = ARM_AM::lsl; break; + case ARM::RORr: ShiftTy = ARM_AM::ror; break; + } + // A shift by zero is a plain MOVr, not a MOVsi. + unsigned Shifter = ARM_AM::getSORegOpc(ShiftTy, 0); + MCInst TmpInst; + TmpInst.setOpcode(ARM::MOVsr); + TmpInst.addOperand(Inst.getOperand(0)); // Rd + TmpInst.addOperand(Inst.getOperand(1)); // Rn + TmpInst.addOperand(Inst.getOperand(2)); // Rm + TmpInst.addOperand(MCOperand::CreateImm(Shifter)); // Shift value and ty + TmpInst.addOperand(Inst.getOperand(3)); // CondCode + TmpInst.addOperand(Inst.getOperand(4)); + TmpInst.addOperand(Inst.getOperand(5)); // cc_out + Inst = TmpInst; + return true; + } case ARM::ASRi: case ARM::LSRi: case ARM::LSLi: case ARM::RORi: { ARM_AM::ShiftOpc ShiftTy; - unsigned Amt = Inst.getOperand(2).getImm(); switch(Inst.getOpcode()) { default: llvm_unreachable("unexpected opcode!"); case ARM::ASRi: ShiftTy = ARM_AM::asr; break; @@ -4556,6 +4649,7 @@ processInstruction(MCInst &Inst, case ARM::RORi: ShiftTy = ARM_AM::ror; break; } // A shift by zero is a plain MOVr, not a MOVsi. + unsigned Amt = Inst.getOperand(2).getImm(); unsigned Opc = Amt == 0 ? ARM::MOVr : ARM::MOVsi; unsigned Shifter = ARM_AM::getSORegOpc(ShiftTy, Amt); MCInst TmpInst; @@ -4570,6 +4664,19 @@ processInstruction(MCInst &Inst, Inst = TmpInst; return true; } + case ARM::RRXi: { + unsigned Shifter = ARM_AM::getSORegOpc(ARM_AM::rrx, 0); + MCInst TmpInst; + TmpInst.setOpcode(ARM::MOVsi); + TmpInst.addOperand(Inst.getOperand(0)); // Rd + TmpInst.addOperand(Inst.getOperand(1)); // Rn + TmpInst.addOperand(MCOperand::CreateImm(Shifter)); // Shift value and ty + TmpInst.addOperand(Inst.getOperand(2)); // CondCode + TmpInst.addOperand(Inst.getOperand(3)); + TmpInst.addOperand(Inst.getOperand(4)); // cc_out + Inst = TmpInst; + return true; + } case ARM::t2LDMIA_UPD: { // If this is a load of a single register, then we should use // a post-indexed LDR instruction instead, per the ARM ARM. diff --git a/lib/Target/ARM/CMakeLists.txt b/lib/Target/ARM/CMakeLists.txt index baa55f2..511932e 100644 --- a/lib/Target/ARM/CMakeLists.txt +++ b/lib/Target/ARM/CMakeLists.txt @@ -62,8 +62,8 @@ add_llvm_library_dependencies(LLVMARMCodeGen LLVMTarget ) -# workaround for hanging compilation on MSVC10 -if( MSVC_VERSION EQUAL 1600 ) +# workaround for hanging compilation on MSVC9, 10 +if( MSVC_VERSION EQUAL 1600 OR MSVC_VERSION EQUAL 1500 ) set_property( SOURCE ARMISelLowering.cpp PROPERTY COMPILE_FLAGS "/Od" diff --git a/lib/Target/ARM/Disassembler/ARMDisassembler.cpp b/lib/Target/ARM/Disassembler/ARMDisassembler.cpp index 0b9b5d0..ad250ab 100644 --- a/lib/Target/ARM/Disassembler/ARMDisassembler.cpp +++ b/lib/Target/ARM/Disassembler/ARMDisassembler.cpp @@ -179,8 +179,6 @@ static DecodeStatus DecodeAddrMode7Operand(llvm::MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder); static DecodeStatus DecodeBranchImmInstruction(llvm::MCInst &Inst,unsigned Insn, uint64_t Address, const void *Decoder); -static DecodeStatus DecodeVCVTImmOperand(llvm::MCInst &Inst, unsigned Val, - uint64_t Address, const void *Decoder); static DecodeStatus DecodeAddrMode6Operand(llvm::MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder); static DecodeStatus DecodeVLDInstruction(llvm::MCInst &Inst, unsigned Val, @@ -251,6 +249,11 @@ static DecodeStatus DecodeVMOVRRS(llvm::MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder); static DecodeStatus DecodeSwap(llvm::MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder); +static DecodeStatus DecodeVCVTD(llvm::MCInst &Inst, unsigned Insn, + uint64_t Address, const void *Decoder); +static DecodeStatus DecodeVCVTQ(llvm::MCInst &Inst, unsigned Insn, + uint64_t Address, const void *Decoder); + static DecodeStatus DecodeThumbAddSpecialReg(llvm::MCInst &Inst, uint16_t Insn, uint64_t Address, const void *Decoder); @@ -1921,12 +1924,6 @@ DecodeBranchImmInstruction(llvm::MCInst &Inst, unsigned Insn, } -static DecodeStatus DecodeVCVTImmOperand(llvm::MCInst &Inst, unsigned Val, - uint64_t Address, const void *Decoder) { - Inst.addOperand(MCOperand::CreateImm(64 - Val)); - return MCDisassembler::Success; -} - static DecodeStatus DecodeAddrMode6Operand(llvm::MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder) { DecodeStatus S = MCDisassembler::Success; @@ -4085,3 +4082,60 @@ static DecodeStatus DecodeSwap(llvm::MCInst &Inst, unsigned Insn, return S; } + +static DecodeStatus DecodeVCVTD(llvm::MCInst &Inst, unsigned Insn, + uint64_t Address, const void *Decoder) { + unsigned Vd = (fieldFromInstruction32(Insn, 12, 4) << 0); + Vd |= (fieldFromInstruction32(Insn, 22, 1) << 4); + unsigned Vm = (fieldFromInstruction32(Insn, 0, 4) << 0); + Vm |= (fieldFromInstruction32(Insn, 5, 1) << 4); + unsigned imm = fieldFromInstruction32(Insn, 16, 6); + unsigned cmode = fieldFromInstruction32(Insn, 8, 4); + + DecodeStatus S = MCDisassembler::Success; + + // VMOVv2f32 is ambiguous with these decodings. + if (!(imm & 0x38) && cmode == 0xF) { + Inst.setOpcode(ARM::VMOVv2f32); + return DecodeNEONModImmInstruction(Inst, Insn, Address, Decoder); + } + + if (!(imm & 0x20)) Check(S, MCDisassembler::SoftFail); + + if (!Check(S, DecodeDPRRegisterClass(Inst, Vd, Address, Decoder))) + return MCDisassembler::Fail; + if (!Check(S, DecodeDPRRegisterClass(Inst, Vm, Address, Decoder))) + return MCDisassembler::Fail; + Inst.addOperand(MCOperand::CreateImm(64 - imm)); + + return S; +} + +static DecodeStatus DecodeVCVTQ(llvm::MCInst &Inst, unsigned Insn, + uint64_t Address, const void *Decoder) { + unsigned Vd = (fieldFromInstruction32(Insn, 12, 4) << 0); + Vd |= (fieldFromInstruction32(Insn, 22, 1) << 4); + unsigned Vm = (fieldFromInstruction32(Insn, 0, 4) << 0); + Vm |= (fieldFromInstruction32(Insn, 5, 1) << 4); + unsigned imm = fieldFromInstruction32(Insn, 16, 6); + unsigned cmode = fieldFromInstruction32(Insn, 8, 4); + + DecodeStatus S = MCDisassembler::Success; + + // VMOVv4f32 is ambiguous with these decodings. + if (!(imm & 0x38) && cmode == 0xF) { + Inst.setOpcode(ARM::VMOVv4f32); + return DecodeNEONModImmInstruction(Inst, Insn, Address, Decoder); + } + + if (!(imm & 0x20)) Check(S, MCDisassembler::SoftFail); + + if (!Check(S, DecodeQPRRegisterClass(Inst, Vd, Address, Decoder))) + return MCDisassembler::Fail; + if (!Check(S, DecodeQPRRegisterClass(Inst, Vm, Address, Decoder))) + return MCDisassembler::Fail; + Inst.addOperand(MCOperand::CreateImm(64 - imm)); + + return S; +} + diff --git a/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp b/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp index 1bc585b..62d04c4 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp +++ b/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp @@ -60,7 +60,7 @@ public: // ARMFixupKinds.h. // // Name Offset (bits) Size (bits) Flags -{ "fixup_arm_ldst_pcrel_12", 1, 24, MCFixupKindInfo::FKF_IsPCRel }, +{ "fixup_arm_ldst_pcrel_12", 0, 32, MCFixupKindInfo::FKF_IsPCRel }, { "fixup_t2_ldst_pcrel_12", 0, 32, MCFixupKindInfo::FKF_IsPCRel | MCFixupKindInfo::FKF_IsAlignedDownTo32Bits}, { "fixup_arm_pcrel_10", 0, 32, MCFixupKindInfo::FKF_IsPCRel }, @@ -68,7 +68,7 @@ public: MCFixupKindInfo::FKF_IsAlignedDownTo32Bits}, { "fixup_thumb_adr_pcrel_10",0, 8, MCFixupKindInfo::FKF_IsPCRel | MCFixupKindInfo::FKF_IsAlignedDownTo32Bits}, -{ "fixup_arm_adr_pcrel_12", 1, 24, MCFixupKindInfo::FKF_IsPCRel }, +{ "fixup_arm_adr_pcrel_12", 0, 32, MCFixupKindInfo::FKF_IsPCRel }, { "fixup_t2_adr_pcrel_12", 0, 32, MCFixupKindInfo::FKF_IsPCRel | MCFixupKindInfo::FKF_IsAlignedDownTo32Bits}, { "fixup_arm_condbranch", 0, 24, MCFixupKindInfo::FKF_IsPCRel }, @@ -138,7 +138,7 @@ bool ARMAsmBackend::WriteNopData(uint64_t Count, MCObjectWriter *OW) const { const uint16_t Thumb1_16bitNopEncoding = 0x46c0; // using MOV r8,r8 const uint16_t Thumb2_16bitNopEncoding = 0xbf00; // NOP const uint32_t ARMv4_NopEncoding = 0xe1a0000; // using MOV r0,r0 - const uint32_t ARMv6T2_NopEncoding = 0xe3207800; // NOP + const uint32_t ARMv6T2_NopEncoding = 0xe320f000; // NOP if (isThumb()) { const uint16_t nopEncoding = hasNOP() ? Thumb2_16bitNopEncoding : Thumb1_16bitNopEncoding; diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp b/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp index 6042b11..e86f48e 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp +++ b/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp @@ -129,14 +129,15 @@ static MCAsmInfo *createARMMCAsmInfo(const Target &T, StringRef TT) { } static MCCodeGenInfo *createARMMCCodeGenInfo(StringRef TT, Reloc::Model RM, - CodeModel::Model CM) { + CodeModel::Model CM, + CodeGenOpt::Level OL) { MCCodeGenInfo *X = new MCCodeGenInfo(); if (RM == Reloc::Default) { Triple TheTriple(TT); // Default relocation model on Darwin is PIC, not DynamicNoPIC. RM = TheTriple.isOSDarwin() ? Reloc::PIC_ : Reloc::DynamicNoPIC; } - X->InitMCCodeGenInfo(RM, CM); + X->InitMCCodeGenInfo(RM, CM, OL); return X; } diff --git a/lib/Target/ARM/Thumb1InstrInfo.cpp b/lib/Target/ARM/Thumb1InstrInfo.cpp index 218311d..de33bd6 100644 --- a/lib/Target/ARM/Thumb1InstrInfo.cpp +++ b/lib/Target/ARM/Thumb1InstrInfo.cpp @@ -18,7 +18,6 @@ #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/MachineMemOperand.h" -#include "llvm/CodeGen/PseudoSourceValue.h" #include "llvm/ADT/SmallVector.h" #include "Thumb1InstrInfo.h" @@ -60,8 +59,7 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, MachineFunction &MF = *MBB.getParent(); MachineFrameInfo &MFI = *MF.getFrameInfo(); MachineMemOperand *MMO = - MF.getMachineMemOperand( - MachinePointerInfo(PseudoSourceValue::getFixedStack(FI)), + MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(FI), MachineMemOperand::MOStore, MFI.getObjectSize(FI), MFI.getObjectAlignment(FI)); @@ -89,8 +87,7 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, MachineFunction &MF = *MBB.getParent(); MachineFrameInfo &MFI = *MF.getFrameInfo(); MachineMemOperand *MMO = - MF.getMachineMemOperand( - MachinePointerInfo(PseudoSourceValue::getFixedStack(FI)), + MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(FI), MachineMemOperand::MOLoad, MFI.getObjectSize(FI), MFI.getObjectAlignment(FI)); diff --git a/lib/Target/ARM/Thumb2InstrInfo.cpp b/lib/Target/ARM/Thumb2InstrInfo.cpp index cf040c8..7ec3c0e 100644 --- a/lib/Target/ARM/Thumb2InstrInfo.cpp +++ b/lib/Target/ARM/Thumb2InstrInfo.cpp @@ -20,7 +20,6 @@ #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineMemOperand.h" -#include "llvm/CodeGen/PseudoSourceValue.h" #include "llvm/ADT/SmallVector.h" #include "llvm/Support/CommandLine.h" @@ -130,8 +129,7 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, MachineFunction &MF = *MBB.getParent(); MachineFrameInfo &MFI = *MF.getFrameInfo(); MachineMemOperand *MMO = - MF.getMachineMemOperand( - MachinePointerInfo(PseudoSourceValue::getFixedStack(FI)), + MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(FI), MachineMemOperand::MOStore, MFI.getObjectSize(FI), MFI.getObjectAlignment(FI)); @@ -158,8 +156,7 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, MachineFunction &MF = *MBB.getParent(); MachineFrameInfo &MFI = *MF.getFrameInfo(); MachineMemOperand *MMO = - MF.getMachineMemOperand( - MachinePointerInfo(PseudoSourceValue::getFixedStack(FI)), + MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(FI), MachineMemOperand::MOLoad, MFI.getObjectSize(FI), MFI.getObjectAlignment(FI)); diff --git a/lib/Target/CBackend/CBackend.cpp b/lib/Target/CBackend/CBackend.cpp index 06e812b..8bce52c 100644 --- a/lib/Target/CBackend/CBackend.cpp +++ b/lib/Target/CBackend/CBackend.cpp @@ -3604,7 +3604,6 @@ void CWriter::visitExtractValueInst(ExtractValueInst &EVI) { bool CTargetMachine::addPassesToEmitFile(PassManagerBase &PM, formatted_raw_ostream &o, CodeGenFileType FileType, - CodeGenOpt::Level OptLevel, bool DisableVerify) { if (FileType != TargetMachine::CGFT_AssemblyFile) return true; diff --git a/lib/Target/CBackend/CTargetMachine.h b/lib/Target/CBackend/CTargetMachine.h index 4f1ca97..ca346af 100644 --- a/lib/Target/CBackend/CTargetMachine.h +++ b/lib/Target/CBackend/CTargetMachine.h @@ -22,13 +22,13 @@ namespace llvm { struct CTargetMachine : public TargetMachine { CTargetMachine(const Target &T, StringRef TT, StringRef CPU, StringRef FS, - Reloc::Model RM, CodeModel::Model CM) + Reloc::Model RM, CodeModel::Model CM, + CodeGenOpt::Level OL) : TargetMachine(T, TT, CPU, FS) {} virtual bool addPassesToEmitFile(PassManagerBase &PM, formatted_raw_ostream &Out, CodeGenFileType FileType, - CodeGenOpt::Level OptLevel, bool DisableVerify); virtual const TargetData *getTargetData() const { return 0; } diff --git a/lib/Target/CellSPU/MCTargetDesc/SPUMCTargetDesc.cpp b/lib/Target/CellSPU/MCTargetDesc/SPUMCTargetDesc.cpp index d5af2a8..5ce14c9 100644 --- a/lib/Target/CellSPU/MCTargetDesc/SPUMCTargetDesc.cpp +++ b/lib/Target/CellSPU/MCTargetDesc/SPUMCTargetDesc.cpp @@ -62,11 +62,12 @@ static MCAsmInfo *createSPUMCAsmInfo(const Target &T, StringRef TT) { } static MCCodeGenInfo *createSPUMCCodeGenInfo(StringRef TT, Reloc::Model RM, - CodeModel::Model CM) { + CodeModel::Model CM, + CodeGenOpt::Level OL) { MCCodeGenInfo *X = new MCCodeGenInfo(); // For the time being, use static relocations, since there's really no // support for PIC yet. - X->InitMCCodeGenInfo(Reloc::Static, CM); + X->InitMCCodeGenInfo(Reloc::Static, CM, OL); return X; } diff --git a/lib/Target/CellSPU/SPUISelDAGToDAG.cpp b/lib/Target/CellSPU/SPUISelDAGToDAG.cpp index 99837df..a851be3 100644 --- a/lib/Target/CellSPU/SPUISelDAGToDAG.cpp +++ b/lib/Target/CellSPU/SPUISelDAGToDAG.cpp @@ -22,7 +22,6 @@ #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/SelectionDAG.h" #include "llvm/CodeGen/SelectionDAGISel.h" -#include "llvm/CodeGen/PseudoSourceValue.h" #include "llvm/Target/TargetOptions.h" #include "llvm/ADT/Statistic.h" #include "llvm/Constants.h" diff --git a/lib/Target/CellSPU/SPUTargetMachine.cpp b/lib/Target/CellSPU/SPUTargetMachine.cpp index 93a7f6e..6940316 100644 --- a/lib/Target/CellSPU/SPUTargetMachine.cpp +++ b/lib/Target/CellSPU/SPUTargetMachine.cpp @@ -34,8 +34,9 @@ SPUFrameLowering::getCalleeSaveSpillSlots(unsigned &NumEntries) const { SPUTargetMachine::SPUTargetMachine(const Target &T, StringRef TT, StringRef CPU, StringRef FS, - Reloc::Model RM, CodeModel::Model CM) - : LLVMTargetMachine(T, TT, CPU, FS, RM, CM), + Reloc::Model RM, CodeModel::Model CM, + CodeGenOpt::Level OL) + : LLVMTargetMachine(T, TT, CPU, FS, RM, CM, OL), Subtarget(TT, CPU, FS), DataLayout(Subtarget.getTargetDataString()), InstrInfo(*this), @@ -49,8 +50,7 @@ SPUTargetMachine::SPUTargetMachine(const Target &T, StringRef TT, // Pass Pipeline Configuration //===----------------------------------------------------------------------===// -bool SPUTargetMachine::addInstSelector(PassManagerBase &PM, - CodeGenOpt::Level OptLevel) { +bool SPUTargetMachine::addInstSelector(PassManagerBase &PM) { // Install an instruction selector. PM.add(createSPUISelDag(*this)); return false; @@ -58,7 +58,7 @@ bool SPUTargetMachine::addInstSelector(PassManagerBase &PM, // passes to run just before printing the assembly bool SPUTargetMachine:: -addPreEmitPass(PassManagerBase &PM, CodeGenOpt::Level OptLevel) { +addPreEmitPass(PassManagerBase &PM) { // load the TCE instruction scheduler, if available via // loaded plugins typedef llvm::FunctionPass* (*BuilderFunc)(const char*); diff --git a/lib/Target/CellSPU/SPUTargetMachine.h b/lib/Target/CellSPU/SPUTargetMachine.h index fffe77c..909f12e 100644 --- a/lib/Target/CellSPU/SPUTargetMachine.h +++ b/lib/Target/CellSPU/SPUTargetMachine.h @@ -40,7 +40,8 @@ class SPUTargetMachine : public LLVMTargetMachine { public: SPUTargetMachine(const Target &T, StringRef TT, StringRef CPU, StringRef FS, - Reloc::Model RM, CodeModel::Model CM); + Reloc::Model RM, CodeModel::Model CM, + CodeGenOpt::Level OL); /// Return the subtarget implementation object virtual const SPUSubtarget *getSubtargetImpl() const { @@ -81,9 +82,8 @@ public: } // Pass Pipeline Configuration - virtual bool addInstSelector(PassManagerBase &PM, - CodeGenOpt::Level OptLevel); - virtual bool addPreEmitPass(PassManagerBase &, CodeGenOpt::Level); + virtual bool addInstSelector(PassManagerBase &PM); + virtual bool addPreEmitPass(PassManagerBase &); }; } // end namespace llvm diff --git a/lib/Target/CppBackend/CPPBackend.cpp b/lib/Target/CppBackend/CPPBackend.cpp index 394ea2b..efeb989 100644 --- a/lib/Target/CppBackend/CPPBackend.cpp +++ b/lib/Target/CppBackend/CPPBackend.cpp @@ -2065,7 +2065,6 @@ char CppWriter::ID = 0; bool CPPTargetMachine::addPassesToEmitFile(PassManagerBase &PM, formatted_raw_ostream &o, CodeGenFileType FileType, - CodeGenOpt::Level OptLevel, bool DisableVerify) { if (FileType != TargetMachine::CGFT_AssemblyFile) return true; PM.add(new CppWriter(o)); diff --git a/lib/Target/CppBackend/CPPTargetMachine.h b/lib/Target/CppBackend/CPPTargetMachine.h index 287e537..a3613b4 100644 --- a/lib/Target/CppBackend/CPPTargetMachine.h +++ b/lib/Target/CppBackend/CPPTargetMachine.h @@ -24,13 +24,13 @@ class formatted_raw_ostream; struct CPPTargetMachine : public TargetMachine { CPPTargetMachine(const Target &T, StringRef TT, StringRef CPU, StringRef FS, - Reloc::Model RM, CodeModel::Model CM) + Reloc::Model RM, CodeModel::Model CM, + CodeGenOpt::Level OL) : TargetMachine(T, TT, CPU, FS) {} virtual bool addPassesToEmitFile(PassManagerBase &PM, formatted_raw_ostream &Out, CodeGenFileType FileType, - CodeGenOpt::Level OptLevel, bool DisableVerify); virtual const TargetData *getTargetData() const { return 0; } diff --git a/lib/Target/MBlaze/MBlazeTargetMachine.cpp b/lib/Target/MBlaze/MBlazeTargetMachine.cpp index 7bff53e..4ad7bd6 100644 --- a/lib/Target/MBlaze/MBlazeTargetMachine.cpp +++ b/lib/Target/MBlaze/MBlazeTargetMachine.cpp @@ -34,8 +34,9 @@ extern "C" void LLVMInitializeMBlazeTarget() { MBlazeTargetMachine:: MBlazeTargetMachine(const Target &T, StringRef TT, StringRef CPU, StringRef FS, - Reloc::Model RM, CodeModel::Model CM): - LLVMTargetMachine(T, TT, CPU, FS, RM, CM), + Reloc::Model RM, CodeModel::Model CM, + CodeGenOpt::Level OL): + LLVMTargetMachine(T, TT, CPU, FS, RM, CM, OL), Subtarget(TT, CPU, FS), DataLayout("E-p:32:32:32-i8:8:8-i16:16:16"), InstrInfo(*this), @@ -46,8 +47,7 @@ MBlazeTargetMachine(const Target &T, StringRef TT, // Install an instruction selector pass using // the ISelDag to gen MBlaze code. -bool MBlazeTargetMachine::addInstSelector(PassManagerBase &PM, - CodeGenOpt::Level OptLevel) { +bool MBlazeTargetMachine::addInstSelector(PassManagerBase &PM) { PM.add(createMBlazeISelDag(*this)); return false; } @@ -55,8 +55,7 @@ bool MBlazeTargetMachine::addInstSelector(PassManagerBase &PM, // Implemented by targets that want to run passes immediately before // machine code is emitted. return true if -print-machineinstrs should // print out the code after the passes. -bool MBlazeTargetMachine::addPreEmitPass(PassManagerBase &PM, - CodeGenOpt::Level OptLevel) { +bool MBlazeTargetMachine::addPreEmitPass(PassManagerBase &PM) { PM.add(createMBlazeDelaySlotFillerPass(*this)); return true; } diff --git a/lib/Target/MBlaze/MBlazeTargetMachine.h b/lib/Target/MBlaze/MBlazeTargetMachine.h index c1bc08a..1c1aa53 100644 --- a/lib/Target/MBlaze/MBlazeTargetMachine.h +++ b/lib/Target/MBlaze/MBlazeTargetMachine.h @@ -43,7 +43,8 @@ namespace llvm { public: MBlazeTargetMachine(const Target &T, StringRef TT, StringRef CPU, StringRef FS, - Reloc::Model RM, CodeModel::Model CM); + Reloc::Model RM, CodeModel::Model CM, + CodeGenOpt::Level OL); virtual const MBlazeInstrInfo *getInstrInfo() const { return &InstrInfo; } @@ -77,8 +78,8 @@ namespace llvm { } // Pass Pipeline Configuration - virtual bool addInstSelector(PassManagerBase &PM, CodeGenOpt::Level Opt); - virtual bool addPreEmitPass(PassManagerBase &PM,CodeGenOpt::Level Opt); + virtual bool addInstSelector(PassManagerBase &PM); + virtual bool addPreEmitPass(PassManagerBase &PM); }; } // End llvm namespace diff --git a/lib/Target/MBlaze/MCTargetDesc/MBlazeMCTargetDesc.cpp b/lib/Target/MBlaze/MCTargetDesc/MBlazeMCTargetDesc.cpp index 43ae281..a3a5cf4 100644 --- a/lib/Target/MBlaze/MCTargetDesc/MBlazeMCTargetDesc.cpp +++ b/lib/Target/MBlaze/MCTargetDesc/MBlazeMCTargetDesc.cpp @@ -62,13 +62,14 @@ static MCAsmInfo *createMCAsmInfo(const Target &T, StringRef TT) { } static MCCodeGenInfo *createMBlazeMCCodeGenInfo(StringRef TT, Reloc::Model RM, - CodeModel::Model CM) { + CodeModel::Model CM, + CodeGenOpt::Level OL) { MCCodeGenInfo *X = new MCCodeGenInfo(); if (RM == Reloc::Default) RM = Reloc::Static; if (CM == CodeModel::Default) CM = CodeModel::Small; - X->InitMCCodeGenInfo(RM, CM); + X->InitMCCodeGenInfo(RM, CM, OL); return X; } diff --git a/lib/Target/MSP430/MCTargetDesc/MSP430MCTargetDesc.cpp b/lib/Target/MSP430/MCTargetDesc/MSP430MCTargetDesc.cpp index fda70b8..0d532e3 100644 --- a/lib/Target/MSP430/MCTargetDesc/MSP430MCTargetDesc.cpp +++ b/lib/Target/MSP430/MCTargetDesc/MSP430MCTargetDesc.cpp @@ -51,9 +51,10 @@ static MCSubtargetInfo *createMSP430MCSubtargetInfo(StringRef TT, StringRef CPU, } static MCCodeGenInfo *createMSP430MCCodeGenInfo(StringRef TT, Reloc::Model RM, - CodeModel::Model CM) { + CodeModel::Model CM, + CodeGenOpt::Level OL) { MCCodeGenInfo *X = new MCCodeGenInfo(); - X->InitMCCodeGenInfo(RM, CM); + X->InitMCCodeGenInfo(RM, CM, OL); return X; } diff --git a/lib/Target/MSP430/MSP430ISelLowering.cpp b/lib/Target/MSP430/MSP430ISelLowering.cpp index 9daeb2a..5c94137 100644 --- a/lib/Target/MSP430/MSP430ISelLowering.cpp +++ b/lib/Target/MSP430/MSP430ISelLowering.cpp @@ -29,7 +29,6 @@ #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/CodeGen/PseudoSourceValue.h" #include "llvm/CodeGen/SelectionDAGISel.h" #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" #include "llvm/CodeGen/ValueTypes.h" diff --git a/lib/Target/MSP430/MSP430InstrInfo.cpp b/lib/Target/MSP430/MSP430InstrInfo.cpp index ffd4318..81f766e 100644 --- a/lib/Target/MSP430/MSP430InstrInfo.cpp +++ b/lib/Target/MSP430/MSP430InstrInfo.cpp @@ -19,7 +19,6 @@ #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/CodeGen/PseudoSourceValue.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/TargetRegistry.h" @@ -43,8 +42,7 @@ void MSP430InstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB, MachineFrameInfo &MFI = *MF.getFrameInfo(); MachineMemOperand *MMO = - MF.getMachineMemOperand( - MachinePointerInfo(PseudoSourceValue::getFixedStack(FrameIdx)), + MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(FrameIdx), MachineMemOperand::MOStore, MFI.getObjectSize(FrameIdx), MFI.getObjectAlignment(FrameIdx)); @@ -72,8 +70,7 @@ void MSP430InstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, MachineFrameInfo &MFI = *MF.getFrameInfo(); MachineMemOperand *MMO = - MF.getMachineMemOperand( - MachinePointerInfo(PseudoSourceValue::getFixedStack(FrameIdx)), + MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(FrameIdx), MachineMemOperand::MOLoad, MFI.getObjectSize(FrameIdx), MFI.getObjectAlignment(FrameIdx)); diff --git a/lib/Target/MSP430/MSP430TargetMachine.cpp b/lib/Target/MSP430/MSP430TargetMachine.cpp index 4dd8933..fe185fb 100644 --- a/lib/Target/MSP430/MSP430TargetMachine.cpp +++ b/lib/Target/MSP430/MSP430TargetMachine.cpp @@ -28,8 +28,9 @@ MSP430TargetMachine::MSP430TargetMachine(const Target &T, StringRef TT, StringRef CPU, StringRef FS, - Reloc::Model RM, CodeModel::Model CM) - : LLVMTargetMachine(T, TT, CPU, FS, RM, CM), + Reloc::Model RM, CodeModel::Model CM, + CodeGenOpt::Level OL) + : LLVMTargetMachine(T, TT, CPU, FS, RM, CM, OL), Subtarget(TT, CPU, FS), // FIXME: Check TargetData string. DataLayout("e-p:16:16:16-i8:8:8-i16:16:16-i32:16:32-n8:16"), @@ -37,15 +38,13 @@ MSP430TargetMachine::MSP430TargetMachine(const Target &T, FrameLowering(Subtarget) { } -bool MSP430TargetMachine::addInstSelector(PassManagerBase &PM, - CodeGenOpt::Level OptLevel) { +bool MSP430TargetMachine::addInstSelector(PassManagerBase &PM) { // Install an instruction selector. - PM.add(createMSP430ISelDag(*this, OptLevel)); + PM.add(createMSP430ISelDag(*this, getOptLevel())); return false; } -bool MSP430TargetMachine::addPreEmitPass(PassManagerBase &PM, - CodeGenOpt::Level OptLevel) { +bool MSP430TargetMachine::addPreEmitPass(PassManagerBase &PM) { // Must run branch selection immediately preceding the asm printer. PM.add(createMSP430BranchSelectionPass()); return false; diff --git a/lib/Target/MSP430/MSP430TargetMachine.h b/lib/Target/MSP430/MSP430TargetMachine.h index eb483dc..4fb060f 100644 --- a/lib/Target/MSP430/MSP430TargetMachine.h +++ b/lib/Target/MSP430/MSP430TargetMachine.h @@ -40,7 +40,8 @@ class MSP430TargetMachine : public LLVMTargetMachine { public: MSP430TargetMachine(const Target &T, StringRef TT, StringRef CPU, StringRef FS, - Reloc::Model RM, CodeModel::Model CM); + Reloc::Model RM, CodeModel::Model CM, + CodeGenOpt::Level OL); virtual const TargetFrameLowering *getFrameLowering() const { return &FrameLowering; @@ -61,8 +62,8 @@ public: return &TSInfo; } - virtual bool addInstSelector(PassManagerBase &PM, CodeGenOpt::Level OptLevel); - virtual bool addPreEmitPass(PassManagerBase &PM, CodeGenOpt::Level OptLevel); + virtual bool addInstSelector(PassManagerBase &PM); + virtual bool addPreEmitPass(PassManagerBase &PM); }; // MSP430TargetMachine. } // end namespace llvm diff --git a/lib/Target/Mips/CMakeLists.txt b/lib/Target/Mips/CMakeLists.txt index 53656d4d..ac9cfc0 100644 --- a/lib/Target/Mips/CMakeLists.txt +++ b/lib/Target/Mips/CMakeLists.txt @@ -22,7 +22,6 @@ add_llvm_target(MipsCodeGen MipsISelLowering.cpp MipsFrameLowering.cpp MipsMCInstLower.cpp - MipsMCSymbolRefExpr.cpp MipsRegisterInfo.cpp MipsSubtarget.cpp MipsTargetMachine.cpp diff --git a/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp b/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp index 4f017d0..7bc5fe4 100644 --- a/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp +++ b/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp @@ -58,6 +58,7 @@ static unsigned adjustFixupValue(unsigned Kind, uint64_t Value) { switch (Kind) { default: break; + case FK_GPRel_4: case FK_Data_4: Value &= 0xffffffff; break; @@ -68,6 +69,9 @@ static unsigned adjustFixupValue(unsigned Kind, uint64_t Value) { case Mips::fixup_Mips_PC16: Value &= 0x0000ffff; break; + case Mips::fixup_Mips_HI16: + Value >>= 16; + break; } return Value; @@ -104,15 +108,17 @@ public: llvm_unreachable("Unknown fixup kind!"); case Mips::fixup_Mips_GOT16: // This will be fixed up at link time break; + case FK_GPRel_4: case FK_Data_4: case Mips::fixup_Mips_26: case Mips::fixup_Mips_LO16: case Mips::fixup_Mips_PC16: + case Mips::fixup_Mips_HI16: // For each byte of the fragment that the fixup touches, mask i // the fixup value. The Value has been "split up" into the appr // bitfields above. for (unsigned i = 0; i != 4; ++i) // FIXME - Need to support 2 and 8 bytes - Data[Offset + i] |= uint8_t((Value >> (i * 8)) & 0xff); + Data[Offset + i] += uint8_t((Value >> (i * 8)) & 0xff); break; } } diff --git a/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp b/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp index 1115fec..0c3cbb3 100644 --- a/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp +++ b/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp @@ -173,11 +173,21 @@ getMachineOpValue(const MCInst &MI, const MCOperand &MO, } else if (MO.isExpr()) { const MCExpr *Expr = MO.getExpr(); MCExpr::ExprKind Kind = Expr->getKind(); + unsigned Ret = 0; + + if (Kind == MCExpr::Binary) { + const MCBinaryExpr *BE = static_cast<const MCBinaryExpr*>(Expr); + Expr = BE->getLHS(); + Kind = Expr->getKind(); + const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(BE->getRHS()); + assert((Kind == MCExpr::SymbolRef) && CE && + "Binary expression must be sym+const."); + Ret = CE->getValue(); + } + if (Kind == MCExpr::SymbolRef) { - Mips::Fixups FixupKind = Mips::fixup_Mips_NONE; - MCSymbolRefExpr::VariantKind SymRefKind = - cast<MCSymbolRefExpr>(Expr)->getKind(); - switch(SymRefKind) { + Mips::Fixups FixupKind; + switch(cast<MCSymbolRefExpr>(Expr)->getKind()) { case MCSymbolRefExpr::VK_Mips_GPREL: FixupKind = Mips::fixup_Mips_GPREL16; break; @@ -206,12 +216,12 @@ getMachineOpValue(const MCInst &MI, const MCOperand &MO, FixupKind = Mips::fixup_Mips_TPREL_LO; break; default: - return 0; + return Ret; } // switch Fixups.push_back(MCFixup::Create(0, Expr, MCFixupKind(FixupKind))); } // if SymbolRef // All of the information is in the fixup. - return 0; + return Ret; } llvm_unreachable("Unable to encode MCOperand!"); // Not reached @@ -234,15 +244,22 @@ MipsMCCodeEmitter::getMemEncoding(const MCInst &MI, unsigned OpNo, unsigned MipsMCCodeEmitter::getSizeExtEncoding(const MCInst &MI, unsigned OpNo, SmallVectorImpl<MCFixup> &Fixups) const { - // FIXME: implement - return 0; + assert(MI.getOperand(OpNo).isImm()); + unsigned szEncoding = getMachineOpValue(MI, MI.getOperand(OpNo), Fixups); + return szEncoding - 1; } +// FIXME: should be called getMSBEncoding +// unsigned MipsMCCodeEmitter::getSizeInsEncoding(const MCInst &MI, unsigned OpNo, SmallVectorImpl<MCFixup> &Fixups) const { - // FIXME: implement - return 0; + assert(MI.getOperand(OpNo-1).isImm()); + assert(MI.getOperand(OpNo).isImm()); + unsigned pos = getMachineOpValue(MI, MI.getOperand(OpNo-1), Fixups); + unsigned sz = getMachineOpValue(MI, MI.getOperand(OpNo), Fixups); + + return pos + sz - 1; } #include "MipsGenMCCodeEmitter.inc" diff --git a/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.cpp b/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.cpp index e6040e4..1fec88a 100644 --- a/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.cpp +++ b/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.cpp @@ -63,11 +63,12 @@ static MCAsmInfo *createMipsMCAsmInfo(const Target &T, StringRef TT) { } static MCCodeGenInfo *createMipsMCCodeGenInfo(StringRef TT, Reloc::Model RM, - CodeModel::Model CM) { + CodeModel::Model CM, + CodeGenOpt::Level OL) { MCCodeGenInfo *X = new MCCodeGenInfo(); if (RM == Reloc::Default) RM = Reloc::PIC_; - X->InitMCCodeGenInfo(RM, CM); + X->InitMCCodeGenInfo(RM, CM, OL); return X; } diff --git a/lib/Target/Mips/Mips64InstrInfo.td b/lib/Target/Mips/Mips64InstrInfo.td index 3c97241..b0fb4fa 100644 --- a/lib/Target/Mips/Mips64InstrInfo.td +++ b/lib/Target/Mips/Mips64InstrInfo.td @@ -175,6 +175,7 @@ def SCD : SCBase<0x3c, "scd", CPU64Regs, mem>, Requires<[NotN64]>; def SCD_P8 : SCBase<0x3c, "scd", CPU64Regs, mem64>, Requires<[IsN64]>; /// Jump and Branch Instructions +def JR64 : JumpFR<0x00, 0x08, "jr", CPU64Regs>; def JAL64 : JumpLink64<0x03, "jal">; def JALR64 : JumpLinkReg64<0x00, 0x09, "jalr">; def BEQ64 : CBranch<0x04, "beq", seteq, CPU64Regs>; @@ -231,7 +232,24 @@ let Predicates = [IsN64] in { } // hi/lo relocs -def : Pat<(i64 (MipsLo tglobaladdr:$in)), (DADDiu ZERO_64, tglobaladdr:$in)>; +def : Pat<(MipsHi tglobaladdr:$in), (LUi64 tglobaladdr:$in)>; +def : Pat<(MipsHi tblockaddress:$in), (LUi64 tblockaddress:$in)>; +def : Pat<(MipsHi tjumptable:$in), (LUi64 tjumptable:$in)>; +def : Pat<(MipsHi tconstpool:$in), (LUi64 tconstpool:$in)>; + +def : Pat<(MipsLo tglobaladdr:$in), (DADDiu ZERO_64, tglobaladdr:$in)>; +def : Pat<(MipsLo tblockaddress:$in), (DADDiu ZERO_64, tblockaddress:$in)>; +def : Pat<(MipsLo tjumptable:$in), (DADDiu ZERO_64, tjumptable:$in)>; +def : Pat<(MipsLo tconstpool:$in), (DADDiu ZERO_64, tconstpool:$in)>; + +def : Pat<(add CPU64Regs:$hi, (MipsLo tglobaladdr:$lo)), + (DADDiu CPU64Regs:$hi, tglobaladdr:$lo)>; +def : Pat<(add CPU64Regs:$hi, (MipsLo tblockaddress:$lo)), + (DADDiu CPU64Regs:$hi, tblockaddress:$lo)>; +def : Pat<(add CPU64Regs:$hi, (MipsLo tjumptable:$lo)), + (DADDiu CPU64Regs:$hi, tjumptable:$lo)>; +def : Pat<(add CPU64Regs:$hi, (MipsLo tconstpool:$lo)), + (DADDiu CPU64Regs:$hi, tconstpool:$lo)>; defm : BrcondPats<CPU64Regs, BEQ64, BNE64, SLT64, SLTu64, SLTi64, SLTiu64, ZERO_64>; diff --git a/lib/Target/Mips/MipsAsmPrinter.cpp b/lib/Target/Mips/MipsAsmPrinter.cpp index 186a5e3..d27e3ab 100644 --- a/lib/Target/Mips/MipsAsmPrinter.cpp +++ b/lib/Target/Mips/MipsAsmPrinter.cpp @@ -18,7 +18,6 @@ #include "MipsInstrInfo.h" #include "MipsMachineFunction.h" #include "MipsMCInstLower.h" -#include "MipsMCSymbolRefExpr.h" #include "InstPrinter/MipsInstPrinter.h" #include "MCTargetDesc/MipsBaseInfo.h" #include "llvm/ADT/SmallString.h" @@ -79,12 +78,19 @@ void MipsAsmPrinter::EmitInstruction(const MachineInstr *MI) { // Enclose unaligned load or store with .macro & .nomacro directives. if (isUnalignedLoadStore(Opc)) { - MCInst Directive; - Directive.setOpcode(Mips::MACRO); - OutStreamer.EmitInstruction(Directive); - OutStreamer.EmitInstruction(TmpInst0); - Directive.setOpcode(Mips::NOMACRO); - OutStreamer.EmitInstruction(Directive); + if (OutStreamer.hasRawTextSupport()) { + MCInst Directive; + Directive.setOpcode(Mips::MACRO); + OutStreamer.EmitInstruction(Directive); + OutStreamer.EmitInstruction(TmpInst0); + Directive.setOpcode(Mips::NOMACRO); + OutStreamer.EmitInstruction(Directive); + } else { + MCInstLowering.LowerUnalignedLoadStore(MI, MCInsts); + for (SmallVector<MCInst, 4>::iterator I = MCInsts.begin(); I + != MCInsts.end(); ++I) + OutStreamer.EmitInstruction(*I); + } return; } @@ -92,8 +98,8 @@ void MipsAsmPrinter::EmitInstruction(const MachineInstr *MI) { // Lower CPLOAD and CPRESTORE if (Opc == Mips::CPLOAD) { MCInstLowering.LowerCPLOAD(MI, MCInsts); - for (SmallVector<MCInst, 4>::iterator I = MCInsts.begin(); - I != MCInsts.end(); ++I) + for (SmallVector<MCInst, 4>::iterator I = MCInsts.begin(); I + != MCInsts.end(); ++I) OutStreamer.EmitInstruction(*I); return; } @@ -102,7 +108,7 @@ void MipsAsmPrinter::EmitInstruction(const MachineInstr *MI) { MCInstLowering.LowerCPRESTORE(MI, TmpInst0); OutStreamer.EmitInstruction(TmpInst0); return; - } + } } OutStreamer.EmitInstruction(TmpInst0); diff --git a/lib/Target/Mips/MipsAsmPrinter.h b/lib/Target/Mips/MipsAsmPrinter.h index 16461ff..f0c6626 100644 --- a/lib/Target/Mips/MipsAsmPrinter.h +++ b/lib/Target/Mips/MipsAsmPrinter.h @@ -27,9 +27,11 @@ class MachineBasicBlock; class Module; class LLVM_LIBRARY_VISIBILITY MipsAsmPrinter : public AsmPrinter { - const MipsSubtarget *Subtarget; - + public: + + const MipsSubtarget *Subtarget; + explicit MipsAsmPrinter(TargetMachine &TM, MCStreamer &Streamer) : AsmPrinter(TM, Streamer) { Subtarget = &TM.getSubtarget<MipsSubtarget>(); diff --git a/lib/Target/Mips/MipsFrameLowering.cpp b/lib/Target/Mips/MipsFrameLowering.cpp index 19bb1a5..36aef99 100644 --- a/lib/Target/Mips/MipsFrameLowering.cpp +++ b/lib/Target/Mips/MipsFrameLowering.cpp @@ -152,6 +152,9 @@ void MipsFrameLowering::emitPrologue(MachineFunction &MF) const { bool ATUsed; unsigned GP = STI.isABI_N64() ? Mips::GP_64 : Mips::GP; unsigned T9 = STI.isABI_N64() ? Mips::T9_64 : Mips::T9; + unsigned SP = STI.isABI_N64() ? Mips::SP_64 : Mips::SP; + unsigned FP = STI.isABI_N64() ? Mips::FP_64 : Mips::FP; + unsigned ZERO = STI.isABI_N64() ? Mips::ZERO_64 : Mips::ZERO; unsigned ADDu = STI.isABI_N64() ? Mips::DADDu : Mips::ADDu; unsigned ADDiu = STI.isABI_N64() ? Mips::DADDiu : Mips::ADDiu; unsigned LUi = STI.isABI_N64() ? Mips::LUi64 : Mips::LUi; @@ -169,13 +172,14 @@ void MipsFrameLowering::emitPrologue(MachineFunction &MF) const { MFI->setStackSize(StackSize); BuildMI(MBB, MBBI, dl, TII.get(Mips::NOREORDER)); + BuildMI(MBB, MBBI, dl, TII.get(Mips::NOMACRO)); // Emit instructions that set $gp using the the value of $t9. // O32 uses the directive .cpload while N32/64 requires three instructions to // do this. // TODO: Do not emit these instructions if no instructions use $gp. if (isPIC && STI.isABI_O32()) - BuildMI(MBB, MBBI, dl, TII.get(Mips::CPLOAD)) + BuildMI(MBB, llvm::prior(MBBI), dl, TII.get(Mips::CPLOAD)) .addReg(RegInfo->getPICCallReg()); else if (STI.isABI_N64() || (isPIC && STI.isABI_N32())) { // lui $28,%hi(%neg(%gp_rel(fname))) @@ -189,8 +193,6 @@ void MipsFrameLowering::emitPrologue(MachineFunction &MF) const { .addGlobalAddress(FName, 0, MipsII::MO_GPOFF_LO); } - BuildMI(MBB, MBBI, dl, TII.get(Mips::NOMACRO)); - // No need to allocate space on the stack. if (StackSize == 0 && !MFI->adjustsStack()) return; @@ -199,10 +201,8 @@ void MipsFrameLowering::emitPrologue(MachineFunction &MF) const { MachineLocation DstML, SrcML; // Adjust stack : addi sp, sp, (-imm) - ATUsed = expandRegLargeImmPair(Mips::SP, -StackSize, NewReg, NewImm, MBB, - MBBI); - BuildMI(MBB, MBBI, dl, TII.get(Mips::ADDiu), Mips::SP) - .addReg(NewReg).addImm(NewImm); + ATUsed = expandRegLargeImmPair(SP, -StackSize, NewReg, NewImm, MBB, MBBI); + BuildMI(MBB, MBBI, dl, TII.get(ADDiu), SP).addReg(NewReg).addImm(NewImm); // FIXME: change this when mips goes MC". if (ATUsed) @@ -262,14 +262,13 @@ void MipsFrameLowering::emitPrologue(MachineFunction &MF) const { // if framepointer enabled, set it to point to the stack pointer. if (hasFP(MF)) { // Insert instruction "move $fp, $sp" at this location. - BuildMI(MBB, MBBI, dl, TII.get(Mips::ADDu), Mips::FP) - .addReg(Mips::SP).addReg(Mips::ZERO); + BuildMI(MBB, MBBI, dl, TII.get(ADDu), FP).addReg(SP).addReg(ZERO); // emit ".cfi_def_cfa_register $fp" MCSymbol *SetFPLabel = MMI.getContext().CreateTempSymbol(); BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::PROLOG_LABEL)).addSym(SetFPLabel); - DstML = MachineLocation(Mips::FP); + DstML = MachineLocation(FP); SrcML = MachineLocation(MachineLocation::VirtualFP); Moves.push_back(MachineMove(SetFPLabel, DstML, SrcML)); } @@ -293,6 +292,11 @@ void MipsFrameLowering::emitEpilogue(MachineFunction &MF, const MipsInstrInfo &TII = *static_cast<const MipsInstrInfo*>(MF.getTarget().getInstrInfo()); DebugLoc dl = MBBI->getDebugLoc(); + unsigned SP = STI.isABI_N64() ? Mips::SP_64 : Mips::SP; + unsigned FP = STI.isABI_N64() ? Mips::FP_64 : Mips::FP; + unsigned ZERO = STI.isABI_N64() ? Mips::ZERO_64 : Mips::ZERO; + unsigned ADDu = STI.isABI_N64() ? Mips::DADDu : Mips::ADDu; + unsigned ADDiu = STI.isABI_N64() ? Mips::DADDiu : Mips::ADDiu; // Get the number of bytes from FrameInfo unsigned StackSize = MFI->getStackSize(); @@ -310,16 +314,13 @@ void MipsFrameLowering::emitEpilogue(MachineFunction &MF, --I; // Insert instruction "move $sp, $fp" at this location. - BuildMI(MBB, I, dl, TII.get(Mips::ADDu), Mips::SP) - .addReg(Mips::FP).addReg(Mips::ZERO); + BuildMI(MBB, I, dl, TII.get(ADDu), SP).addReg(FP).addReg(ZERO); } // adjust stack : insert addi sp, sp, (imm) if (StackSize) { - ATUsed = expandRegLargeImmPair(Mips::SP, StackSize, NewReg, NewImm, MBB, - MBBI); - BuildMI(MBB, MBBI, dl, TII.get(Mips::ADDiu), Mips::SP) - .addReg(NewReg).addImm(NewImm); + ATUsed = expandRegLargeImmPair(SP, StackSize, NewReg, NewImm, MBB, MBBI); + BuildMI(MBB, MBBI, dl, TII.get(ADDiu), SP).addReg(NewReg).addImm(NewImm); // FIXME: change this when mips goes MC". if (ATUsed) @@ -331,13 +332,15 @@ void MipsFrameLowering:: processFunctionBeforeCalleeSavedScan(MachineFunction &MF, RegScavenger *RS) const { MachineRegisterInfo& MRI = MF.getRegInfo(); + unsigned RA = STI.isABI_N64() ? Mips::RA_64 : Mips::RA; + unsigned FP = STI.isABI_N64() ? Mips::FP_64 : Mips::FP; // FIXME: remove this code if register allocator can correctly mark // $fp and $ra used or unused. // Mark $fp and $ra as used or unused. if (hasFP(MF)) - MRI.setPhysRegUsed(Mips::FP); + MRI.setPhysRegUsed(FP); // The register allocator might determine $ra is used after seeing // instruction "jr $ra", but we do not want PrologEpilogInserter to insert @@ -345,7 +348,7 @@ processFunctionBeforeCalleeSavedScan(MachineFunction &MF, // To correct this, $ra is explicitly marked unused if there is no // function call. if (MF.getFrameInfo()->hasCalls()) - MRI.setPhysRegUsed(Mips::RA); + MRI.setPhysRegUsed(RA); else - MRI.setPhysRegUnused(Mips::RA); + MRI.setPhysRegUnused(RA); } diff --git a/lib/Target/Mips/MipsISelLowering.cpp b/lib/Target/Mips/MipsISelLowering.cpp index b595f03..b5a15cf 100644 --- a/lib/Target/Mips/MipsISelLowering.cpp +++ b/lib/Target/Mips/MipsISelLowering.cpp @@ -127,9 +127,11 @@ MipsTargetLowering(MipsTargetMachine &TM) setOperationAction(ISD::GlobalAddress, MVT::i32, Custom); setOperationAction(ISD::GlobalAddress, MVT::i64, Custom); setOperationAction(ISD::BlockAddress, MVT::i32, Custom); + setOperationAction(ISD::BlockAddress, MVT::i64, Custom); setOperationAction(ISD::GlobalTLSAddress, MVT::i32, Custom); setOperationAction(ISD::JumpTable, MVT::i32, Custom); setOperationAction(ISD::ConstantPool, MVT::i32, Custom); + setOperationAction(ISD::ConstantPool, MVT::i64, Custom); setOperationAction(ISD::SELECT, MVT::f32, Custom); setOperationAction(ISD::SELECT, MVT::f64, Custom); setOperationAction(ISD::SELECT, MVT::i32, Custom); @@ -1506,7 +1508,7 @@ SDValue MipsTargetLowering::LowerBlockAddress(SDValue Op, // FIXME there isn't actually debug info here DebugLoc dl = Op.getDebugLoc(); - if (getTargetMachine().getRelocationModel() != Reloc::PIC_) { + if (getTargetMachine().getRelocationModel() != Reloc::PIC_ && !IsN64) { // %hi/%lo relocation SDValue BAHi = DAG.getBlockAddress(BA, MVT::i32, true, MipsII::MO_ABS_HI); @@ -1517,16 +1519,17 @@ SDValue MipsTargetLowering::LowerBlockAddress(SDValue Op, return DAG.getNode(ISD::ADD, dl, MVT::i32, Hi, Lo); } - SDValue BAGOTOffset = DAG.getBlockAddress(BA, MVT::i32, true, - MipsII::MO_GOT); - BAGOTOffset = DAG.getNode(MipsISD::WrapperPIC, dl, MVT::i32, BAGOTOffset); - SDValue BALOOffset = DAG.getBlockAddress(BA, MVT::i32, true, - MipsII::MO_ABS_LO); - SDValue Load = DAG.getLoad(MVT::i32, dl, + EVT ValTy = Op.getValueType(); + unsigned GOTFlag = IsN64 ? MipsII::MO_GOT_PAGE : MipsII::MO_GOT; + unsigned OFSTFlag = IsN64 ? MipsII::MO_GOT_OFST : MipsII::MO_ABS_LO; + SDValue BAGOTOffset = DAG.getBlockAddress(BA, ValTy, true, GOTFlag); + BAGOTOffset = DAG.getNode(MipsISD::WrapperPIC, dl, ValTy, BAGOTOffset); + SDValue BALOOffset = DAG.getBlockAddress(BA, ValTy, true, OFSTFlag); + SDValue Load = DAG.getLoad(ValTy, dl, DAG.getEntryNode(), BAGOTOffset, MachinePointerInfo(), false, false, false, 0); - SDValue Lo = DAG.getNode(MipsISD::Lo, dl, MVT::i32, BALOOffset); - return DAG.getNode(ISD::ADD, dl, MVT::i32, Load, Lo); + SDValue Lo = DAG.getNode(MipsISD::Lo, dl, ValTy, BALOOffset); + return DAG.getNode(ISD::ADD, dl, ValTy, Load, Lo); } SDValue MipsTargetLowering:: @@ -1649,16 +1652,19 @@ LowerConstantPool(SDValue Op, SelectionDAG &DAG) const SDValue Lo = DAG.getNode(MipsISD::Lo, dl, MVT::i32, CPLo); ResNode = DAG.getNode(ISD::ADD, dl, MVT::i32, HiPart, Lo); } else { - SDValue CP = DAG.getTargetConstantPool(C, MVT::i32, N->getAlignment(), - N->getOffset(), MipsII::MO_GOT); - CP = DAG.getNode(MipsISD::WrapperPIC, dl, MVT::i32, CP); - SDValue Load = DAG.getLoad(MVT::i32, dl, DAG.getEntryNode(), + EVT ValTy = Op.getValueType(); + unsigned GOTFlag = IsN64 ? MipsII::MO_GOT_PAGE : MipsII::MO_GOT; + unsigned OFSTFlag = IsN64 ? MipsII::MO_GOT_OFST : MipsII::MO_ABS_LO; + SDValue CP = DAG.getTargetConstantPool(C, ValTy, N->getAlignment(), + N->getOffset(), GOTFlag); + CP = DAG.getNode(MipsISD::WrapperPIC, dl, ValTy, CP); + SDValue Load = DAG.getLoad(ValTy, dl, DAG.getEntryNode(), CP, MachinePointerInfo::getConstantPool(), false, false, false, 0); - SDValue CPLo = DAG.getTargetConstantPool(C, MVT::i32, N->getAlignment(), - N->getOffset(), MipsII::MO_ABS_LO); - SDValue Lo = DAG.getNode(MipsISD::Lo, dl, MVT::i32, CPLo); - ResNode = DAG.getNode(ISD::ADD, dl, MVT::i32, Load, Lo); + SDValue CPLo = DAG.getTargetConstantPool(C, ValTy, N->getAlignment(), + N->getOffset(), OFSTFlag); + SDValue Lo = DAG.getNode(MipsISD::Lo, dl, ValTy, CPLo); + ResNode = DAG.getNode(ISD::ADD, dl, ValTy, Load, Lo); } return ResNode; @@ -2063,6 +2069,7 @@ PassByValArg64(SDValue& ByValChain, SDValue Chain, DebugLoc dl, bool IsRegLoc = VA.isRegLoc(); unsigned Offset = 0; // Offset in # of bytes from the beginning of struct. unsigned LocMemOffset = 0; + unsigned MemCpySize = ByValSize; if (!IsRegLoc) LocMemOffset = VA.getLocMemOffset(); @@ -2082,9 +2089,13 @@ PassByValArg64(SDValue& ByValChain, SDValue Chain, DebugLoc dl, RegsToPass.push_back(std::make_pair(*Reg, LoadVal)); } + // Return if the struct has been fully copied. + if (!(MemCpySize = ByValSize - Offset)) + return; + // If there is an argument register available, copy the remainder of the // byval argument with sub-doubleword loads and shifts. - if ((Reg != RegEnd) && (ByValSize != Offset)) { + if (Reg != RegEnd) { assert((ByValSize < Offset + 8) && "Size of the remainder should be smaller than 8-byte."); SDValue Val; @@ -2119,19 +2130,18 @@ PassByValArg64(SDValue& ByValChain, SDValue Chain, DebugLoc dl, } } - unsigned MemCpySize = ByValSize - Offset; - if (MemCpySize) { - // Create a fixed object on stack at offset LocMemOffset and copy - // remainder of byval arg to it with memcpy. - SDValue Src = DAG.getNode(ISD::ADD, dl, PtrTy, Arg, - DAG.getConstant(Offset, PtrTy)); - LastFI = MFI->CreateFixedObject(MemCpySize, LocMemOffset, true); - SDValue Dst = DAG.getFrameIndex(LastFI, PtrTy); - ByValChain = DAG.getMemcpy(ByValChain, dl, Dst, Src, - DAG.getConstant(MemCpySize, PtrTy), Alignment, - /*isVolatile=*/false, /*AlwaysInline=*/false, - MachinePointerInfo(0), MachinePointerInfo(0)); - } + assert(MemCpySize && "MemCpySize must not be zero."); + + // Create a fixed object on stack at offset LocMemOffset and copy + // remainder of byval arg to it with memcpy. + SDValue Src = DAG.getNode(ISD::ADD, dl, PtrTy, Arg, + DAG.getConstant(Offset, PtrTy)); + LastFI = MFI->CreateFixedObject(MemCpySize, LocMemOffset, true); + SDValue Dst = DAG.getFrameIndex(LastFI, PtrTy); + ByValChain = DAG.getMemcpy(ByValChain, dl, Dst, Src, + DAG.getConstant(MemCpySize, PtrTy), Alignment, + /*isVolatile=*/false, /*AlwaysInline=*/false, + MachinePointerInfo(0), MachinePointerInfo(0)); } /// LowerCall - functions arguments are copied from virtual regs to diff --git a/lib/Target/Mips/MipsInstrInfo.td b/lib/Target/Mips/MipsInstrInfo.td index 5dca9b6..0ae94ab 100644 --- a/lib/Target/Mips/MipsInstrInfo.td +++ b/lib/Target/Mips/MipsInstrInfo.td @@ -145,7 +145,9 @@ def brtarget : Operand<OtherVT> { let EncoderMethod = "getBranchTargetOpValue"; let OperandType = "OPERAND_PCREL"; } -def calltarget : Operand<i32>; +def calltarget : Operand<iPTR> { + let EncoderMethod = "getJumpTargetOpValue"; +} def calltarget64: Operand<i64>; def simm16 : Operand<i32>; def simm16_64 : Operand<i64>; @@ -378,6 +380,22 @@ class StoreM<bits<6> op, string instr_asm, PatFrag OpNode, RegisterClass RC, let isPseudo = Pseudo; } +// Memory Load/Store +let canFoldAsLoad = 1 in +class LoadX<bits<6> op, RegisterClass RC, + Operand MemOpnd>: + FMem<op, (outs RC:$rt), (ins MemOpnd:$addr), + "", + [], IILoad> { +} + +class StoreX<bits<6> op, RegisterClass RC, + Operand MemOpnd>: + FMem<op, (outs), (ins RC:$rt, MemOpnd:$addr), + "", + [], IIStore> { +} + // 32-bit load. multiclass LoadM32<bits<6> op, string instr_asm, PatFrag OpNode, bit Pseudo = 0> { @@ -396,6 +414,13 @@ multiclass LoadM64<bits<6> op, string instr_asm, PatFrag OpNode, Requires<[IsN64]>; } +// 32-bit load. +multiclass LoadX32<bits<6> op> { + def #NAME# : LoadX<op, CPURegs, mem>, + Requires<[NotN64]>; + def _P8 : LoadX<op, CPURegs, mem64>, + Requires<[IsN64]>; +} // 32-bit store. multiclass StoreM32<bits<6> op, string instr_asm, PatFrag OpNode, bit Pseudo = 0> { @@ -414,6 +439,14 @@ multiclass StoreM64<bits<6> op, string instr_asm, PatFrag OpNode, Requires<[IsN64]>; } +// 32-bit store. +multiclass StoreX32<bits<6> op> { + def #NAME# : StoreX<op, CPURegs, mem>, + Requires<[NotN64]>; + def _P8 : StoreX<op, CPURegs, mem64>, + Requires<[IsN64]>; +} + // Conditional Branch class CBranch<bits<6> op, string instr_asm, PatFrag cond_op, RegisterClass RC>: CBranchBase<op, (outs), (ins RC:$rs, RC:$rt, brtarget:$imm16), @@ -458,10 +491,11 @@ class JumpFJ<bits<6> op, string instr_asm>: FJ<op, (outs), (ins jmptarget:$target), !strconcat(instr_asm, "\t$target"), [(br bb:$target)], IIBranch>; -let isBranch=1, isTerminator=1, isBarrier=1, rd=0, hasDelaySlot = 1 in -class JumpFR<bits<6> op, bits<6> func, string instr_asm>: - FR<op, func, (outs), (ins CPURegs:$rs), - !strconcat(instr_asm, "\t$rs"), [(brind CPURegs:$rs)], IIBranch> { +let isBranch=1, isTerminator=1, isBarrier=1, rd=0, hasDelaySlot = 1, + isIndirectBranch = 1 in +class JumpFR<bits<6> op, bits<6> func, string instr_asm, RegisterClass RC>: + FR<op, func, (outs), (ins RC:$rs), + !strconcat(instr_asm, "\t$rs"), [(brind RC:$rs)], IIBranch> { let rt = 0; let rd = 0; let shamt = 0; @@ -760,6 +794,12 @@ defm ULW : LoadM32<0x23, "ulw", load_u, 1>; defm USH : StoreM32<0x29, "ush", truncstorei16_u, 1>; defm USW : StoreM32<0x2b, "usw", store_u, 1>; +/// Primitives for unaligned +defm LWL : LoadX32<0x22>; +defm LWR : LoadX32<0x26>; +defm SWL : StoreX32<0x2A>; +defm SWR : StoreX32<0x2E>; + let hasSideEffects = 1 in def SYNC : MipsInst<(outs), (ins i32imm:$stype), "sync $stype", [(MipsSync imm:$stype)], NoItinerary, FrmOther> @@ -779,8 +819,7 @@ def SC_P8 : SCBase<0x38, "sc", CPURegs, mem64>, Requires<[IsN64]>; /// Jump and Branch Instructions def J : JumpFJ<0x02, "j">; -let isIndirectBranch = 1 in - def JR : JumpFR<0x00, 0x08, "jr">; +def JR : JumpFR<0x00, 0x08, "jr", CPURegs>; def JAL : JumpLink<0x03, "jal">; def JALR : JumpLinkReg<0x00, 0x09, "jalr">; def BEQ : CBranch<0x04, "beq", seteq, CPURegs>; @@ -898,20 +937,20 @@ def : Pat<(MipsJmpLink (i32 texternalsym:$dst)), // hi/lo relocs def : Pat<(MipsHi tglobaladdr:$in), (LUi tglobaladdr:$in)>; def : Pat<(MipsHi tblockaddress:$in), (LUi tblockaddress:$in)>; +def : Pat<(MipsHi tjumptable:$in), (LUi tjumptable:$in)>; +def : Pat<(MipsHi tconstpool:$in), (LUi tconstpool:$in)>; + def : Pat<(MipsLo tglobaladdr:$in), (ADDiu ZERO, tglobaladdr:$in)>; def : Pat<(MipsLo tblockaddress:$in), (ADDiu ZERO, tblockaddress:$in)>; +def : Pat<(MipsLo tjumptable:$in), (ADDiu ZERO, tjumptable:$in)>; +def : Pat<(MipsLo tconstpool:$in), (ADDiu ZERO, tconstpool:$in)>; + def : Pat<(add CPURegs:$hi, (MipsLo tglobaladdr:$lo)), (ADDiu CPURegs:$hi, tglobaladdr:$lo)>; def : Pat<(add CPURegs:$hi, (MipsLo tblockaddress:$lo)), (ADDiu CPURegs:$hi, tblockaddress:$lo)>; - -def : Pat<(MipsHi tjumptable:$in), (LUi tjumptable:$in)>; -def : Pat<(MipsLo tjumptable:$in), (ADDiu ZERO, tjumptable:$in)>; def : Pat<(add CPURegs:$hi, (MipsLo tjumptable:$lo)), (ADDiu CPURegs:$hi, tjumptable:$lo)>; - -def : Pat<(MipsHi tconstpool:$in), (LUi tconstpool:$in)>; -def : Pat<(MipsLo tconstpool:$in), (ADDiu ZERO, tconstpool:$in)>; def : Pat<(add CPURegs:$hi, (MipsLo tconstpool:$lo)), (ADDiu CPURegs:$hi, tconstpool:$lo)>; diff --git a/lib/Target/Mips/MipsMCInstLower.cpp b/lib/Target/Mips/MipsMCInstLower.cpp index 1fab52c..6fc2af1 100644 --- a/lib/Target/Mips/MipsMCInstLower.cpp +++ b/lib/Target/Mips/MipsMCInstLower.cpp @@ -23,6 +23,7 @@ #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCInst.h" #include "llvm/Target/Mangler.h" + using namespace llvm; MipsMCInstLower::MipsMCInstLower(Mangler *mang, const MachineFunction &mf, @@ -55,34 +56,34 @@ MCOperand MipsMCInstLower::LowerSymbolOperand(const MachineOperand &MO, } switch (MOTy) { - case MachineOperand::MO_MachineBasicBlock: - Symbol = MO.getMBB()->getSymbol(); - break; - - case MachineOperand::MO_GlobalAddress: - Symbol = Mang->getSymbol(MO.getGlobal()); - break; - - case MachineOperand::MO_BlockAddress: - Symbol = AsmPrinter.GetBlockAddressSymbol(MO.getBlockAddress()); - break; - - case MachineOperand::MO_ExternalSymbol: - Symbol = AsmPrinter.GetExternalSymbolSymbol(MO.getSymbolName()); - break; - - case MachineOperand::MO_JumpTableIndex: - Symbol = AsmPrinter.GetJTISymbol(MO.getIndex()); - break; - - case MachineOperand::MO_ConstantPoolIndex: - Symbol = AsmPrinter.GetCPISymbol(MO.getIndex()); - if (MO.getOffset()) - Offset += MO.getOffset(); - break; - - default: - llvm_unreachable("<unknown operand type>"); + case MachineOperand::MO_MachineBasicBlock: + Symbol = MO.getMBB()->getSymbol(); + break; + + case MachineOperand::MO_GlobalAddress: + Symbol = Mang->getSymbol(MO.getGlobal()); + break; + + case MachineOperand::MO_BlockAddress: + Symbol = AsmPrinter.GetBlockAddressSymbol(MO.getBlockAddress()); + break; + + case MachineOperand::MO_ExternalSymbol: + Symbol = AsmPrinter.GetExternalSymbolSymbol(MO.getSymbolName()); + break; + + case MachineOperand::MO_JumpTableIndex: + Symbol = AsmPrinter.GetJTISymbol(MO.getIndex()); + break; + + case MachineOperand::MO_ConstantPoolIndex: + Symbol = AsmPrinter.GetCPISymbol(MO.getIndex()); + if (MO.getOffset()) + Offset += MO.getOffset(); + break; + + default: + llvm_unreachable("<unknown operand type>"); } const MCSymbolRefExpr *MCSym = MCSymbolRefExpr::Create(Symbol, Kind, Ctx); @@ -145,8 +146,8 @@ void MipsMCInstLower::LowerCPRESTORE(const MachineInstr *MI, MCInst &OutMI) { OutMI.addOperand(MCOperand::CreateImm(MO.getImm())); } - -MCOperand MipsMCInstLower::LowerOperand(const MachineOperand& MO) const { +MCOperand MipsMCInstLower::LowerOperand(const MachineOperand& MO, + unsigned offset) const { MachineOperandType MOTy = MO.getType(); switch (MOTy) { @@ -158,14 +159,14 @@ MCOperand MipsMCInstLower::LowerOperand(const MachineOperand& MO) const { if (MO.isImplicit()) break; return MCOperand::CreateReg(MO.getReg()); case MachineOperand::MO_Immediate: - return MCOperand::CreateImm(MO.getImm()); + return MCOperand::CreateImm(MO.getImm() + offset); case MachineOperand::MO_MachineBasicBlock: case MachineOperand::MO_GlobalAddress: case MachineOperand::MO_ExternalSymbol: case MachineOperand::MO_JumpTableIndex: case MachineOperand::MO_ConstantPoolIndex: case MachineOperand::MO_BlockAddress: - return LowerSymbolOperand(MO, MOTy, 0); + return LowerSymbolOperand(MO, MOTy, offset); } return MCOperand(); @@ -182,3 +183,116 @@ void MipsMCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const { OutMI.addOperand(MCOp); } } + +void MipsMCInstLower::LowerUnalignedLoadStore(const MachineInstr *MI, + SmallVector<MCInst, + 4>& MCInsts) { + unsigned Opc = MI->getOpcode(); + MCInst instr1, instr2, instr3, move; + + bool two_instructions = false; + + assert(MI->getNumOperands() == 3); + assert(MI->getOperand(0).isReg()); + assert(MI->getOperand(1).isReg()); + + MCOperand target = LowerOperand(MI->getOperand(0)); + MCOperand base = LowerOperand(MI->getOperand(1)); + MCOperand atReg = MCOperand::CreateReg(Mips::AT); + MCOperand zeroReg = MCOperand::CreateReg(Mips::ZERO); + + MachineOperand unloweredName = MI->getOperand(2); + MCOperand name = LowerOperand(unloweredName); + + move.setOpcode(Mips::ADDu); + move.addOperand(target); + move.addOperand(atReg); + move.addOperand(zeroReg); + + switch (Opc) { + case Mips::ULW: { + // FIXME: only works for little endian right now + MCOperand adj_name = LowerOperand(unloweredName, 3); + if (base.getReg() == (target.getReg())) { + instr1.setOpcode(Mips::LWL); + instr1.addOperand(atReg); + instr1.addOperand(base); + instr1.addOperand(adj_name); + instr2.setOpcode(Mips::LWR); + instr2.addOperand(atReg); + instr2.addOperand(base); + instr2.addOperand(name); + instr3 = move; + } else { + two_instructions = true; + instr1.setOpcode(Mips::LWL); + instr1.addOperand(target); + instr1.addOperand(base); + instr1.addOperand(adj_name); + instr2.setOpcode(Mips::LWR); + instr2.addOperand(target); + instr2.addOperand(base); + instr2.addOperand(name); + } + break; + } + case Mips::ULHu: { + // FIXME: only works for little endian right now + MCOperand adj_name = LowerOperand(unloweredName, 1); + instr1.setOpcode(Mips::LBu); + instr1.addOperand(atReg); + instr1.addOperand(base); + instr1.addOperand(adj_name); + instr2.setOpcode(Mips::LBu); + instr2.addOperand(target); + instr2.addOperand(base); + instr2.addOperand(name); + instr3.setOpcode(Mips::INS); + instr3.addOperand(target); + instr3.addOperand(atReg); + instr3.addOperand(MCOperand::CreateImm(0x8)); + instr3.addOperand(MCOperand::CreateImm(0x18)); + break; + } + + case Mips::USW: { + // FIXME: only works for little endian right now + assert (base.getReg() != target.getReg()); + two_instructions = true; + MCOperand adj_name = LowerOperand(unloweredName, 3); + instr1.setOpcode(Mips::SWL); + instr1.addOperand(target); + instr1.addOperand(base); + instr1.addOperand(adj_name); + instr2.setOpcode(Mips::SWR); + instr2.addOperand(target); + instr2.addOperand(base); + instr2.addOperand(name); + break; + } + case Mips::USH: { + MCOperand adj_name = LowerOperand(unloweredName, 1); + instr1.setOpcode(Mips::SB); + instr1.addOperand(target); + instr1.addOperand(base); + instr1.addOperand(name); + instr2.setOpcode(Mips::SRL); + instr2.addOperand(atReg); + instr2.addOperand(target); + instr2.addOperand(MCOperand::CreateImm(8)); + instr3.setOpcode(Mips::SB); + instr3.addOperand(atReg); + instr3.addOperand(base); + instr3.addOperand(adj_name); + break; + } + default: + // FIXME: need to add others + assert(0 && "unaligned instruction not processed"); + } + + MCInsts.push_back(instr1); + MCInsts.push_back(instr2); + if (!two_instructions) MCInsts.push_back(instr3); +} + diff --git a/lib/Target/Mips/MipsMCInstLower.h b/lib/Target/Mips/MipsMCInstLower.h index 3a24da2..98e37e4 100644 --- a/lib/Target/Mips/MipsMCInstLower.h +++ b/lib/Target/Mips/MipsMCInstLower.h @@ -37,10 +37,12 @@ public: void Lower(const MachineInstr *MI, MCInst &OutMI) const; void LowerCPLOAD(const MachineInstr *MI, SmallVector<MCInst, 4>& MCInsts); void LowerCPRESTORE(const MachineInstr *MI, MCInst &OutMI); + void LowerUnalignedLoadStore(const MachineInstr *MI, + SmallVector<MCInst, 4>& MCInsts); private: MCOperand LowerSymbolOperand(const MachineOperand &MO, MachineOperandType MOTy, unsigned Offset) const; - MCOperand LowerOperand(const MachineOperand& MO) const; + MCOperand LowerOperand(const MachineOperand& MO, unsigned offset = 0) const; }; } diff --git a/lib/Target/Mips/MipsMCSymbolRefExpr.cpp b/lib/Target/Mips/MipsMCSymbolRefExpr.cpp deleted file mode 100644 index a0a242c..0000000 --- a/lib/Target/Mips/MipsMCSymbolRefExpr.cpp +++ /dev/null @@ -1,70 +0,0 @@ -//===-- MipsMCSymbolRefExpr.cpp - Mips specific MC expression classes -----===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// - -#define DEBUG_TYPE "mipsmcsymbolrefexpr" -#include "MipsMCSymbolRefExpr.h" -#include "llvm/MC/MCAssembler.h" -#include "llvm/MC/MCContext.h" -#include "llvm/MC/MCSymbol.h" -using namespace llvm; - -const MipsMCSymbolRefExpr* -MipsMCSymbolRefExpr::Create(VariantKind Kind, const MCSymbol *Symbol, - int Offset, MCContext &Ctx) { - return new (Ctx) MipsMCSymbolRefExpr(Kind, Symbol, Offset); -} - -void MipsMCSymbolRefExpr::PrintImpl(raw_ostream &OS) const { - switch (Kind) { - default: assert(0 && "Invalid kind!"); - case VK_Mips_None: break; - case VK_Mips_GPREL: OS << "%gp_rel("; break; - case VK_Mips_GOT_CALL: OS << "%call16("; break; - case VK_Mips_GOT: OS << "%got("; break; - case VK_Mips_ABS_HI: OS << "%hi("; break; - case VK_Mips_ABS_LO: OS << "%lo("; break; - case VK_Mips_TLSGD: OS << "%tlsgd("; break; - case VK_Mips_GOTTPREL: OS << "%gottprel("; break; - case VK_Mips_TPREL_HI: OS << "%tprel_hi("; break; - case VK_Mips_TPREL_LO: OS << "%tprel_lo("; break; - case VK_Mips_GPOFF_HI: OS << "%hi(%neg(%gp_rel("; break; - case VK_Mips_GPOFF_LO: OS << "%lo(%neg(%gp_rel("; break; - case VK_Mips_GOT_DISP: OS << "%got_disp("; break; - case VK_Mips_GOT_PAGE: OS << "%got_page("; break; - case VK_Mips_GOT_OFST: OS << "%got_ofst("; break; - } - - OS << *Symbol; - - if (Offset) { - if (Offset > 0) - OS << '+'; - OS << Offset; - } - - if (Kind == VK_Mips_GPOFF_HI || Kind == VK_Mips_GPOFF_LO) - OS << ")))"; - else if (Kind != VK_Mips_None) - OS << ')'; -} - -bool -MipsMCSymbolRefExpr::EvaluateAsRelocatableImpl(MCValue &Res, - const MCAsmLayout *Layout) const { - return false; -} - -void MipsMCSymbolRefExpr::AddValueSymbols(MCAssembler *Asm) const { - Asm->getOrCreateSymbolData(*Symbol); -} - -const MCSection *MipsMCSymbolRefExpr::FindAssociatedSection() const { - return Symbol->isDefined() ? &Symbol->getSection() : NULL; -} - diff --git a/lib/Target/Mips/MipsMCSymbolRefExpr.h b/lib/Target/Mips/MipsMCSymbolRefExpr.h deleted file mode 100644 index 55e85a7..0000000 --- a/lib/Target/Mips/MipsMCSymbolRefExpr.h +++ /dev/null @@ -1,67 +0,0 @@ -//===-- MipsMCSymbolRefExpr.h - Mips specific MCSymbolRefExpr class -------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// - -#ifndef MIPSMCSYMBOLREFEXPR_H -#define MIPSMCSYMBOLREFEXPR_H -#include "llvm/MC/MCExpr.h" - -namespace llvm { - -class MipsMCSymbolRefExpr : public MCTargetExpr { -public: - enum VariantKind { - VK_Mips_None, - VK_Mips_GPREL, - VK_Mips_GOT_CALL, - VK_Mips_GOT, - VK_Mips_ABS_HI, - VK_Mips_ABS_LO, - VK_Mips_TLSGD, - VK_Mips_GOTTPREL, - VK_Mips_TPREL_HI, - VK_Mips_TPREL_LO, - VK_Mips_GPOFF_HI, - VK_Mips_GPOFF_LO, - VK_Mips_GOT_DISP, - VK_Mips_GOT_PAGE, - VK_Mips_GOT_OFST - }; - -private: - const VariantKind Kind; - const MCSymbol *Symbol; - int Offset; - - explicit MipsMCSymbolRefExpr(VariantKind _Kind, const MCSymbol *_Symbol, - int _Offset) - : Kind(_Kind), Symbol(_Symbol), Offset(_Offset) {} - -public: - static const MipsMCSymbolRefExpr *Create(VariantKind Kind, - const MCSymbol *Symbol, int Offset, - MCContext &Ctx); - - void PrintImpl(raw_ostream &OS) const; - bool EvaluateAsRelocatableImpl(MCValue &Res, - const MCAsmLayout *Layout) const; - void AddValueSymbols(MCAssembler *) const; - const MCSection *FindAssociatedSection() const; - - static bool classof(const MCExpr *E) { - return E->getKind() == MCExpr::Target; - } - - static bool classof(const MipsMCSymbolRefExpr *) { return true; } - - int getOffset() const { return Offset; } - void setOffset(int O) { Offset = O; } -}; -} // end namespace llvm - -#endif diff --git a/lib/Target/Mips/MipsRegisterInfo.cpp b/lib/Target/Mips/MipsRegisterInfo.cpp index 5331f09..06c4a66 100644 --- a/lib/Target/Mips/MipsRegisterInfo.cpp +++ b/lib/Target/Mips/MipsRegisterInfo.cpp @@ -285,7 +285,7 @@ eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj, if (MipsFI->isOutArgFI(FrameIndex) || MipsFI->isDynAllocFI(FrameIndex) || (FrameIndex >= MinCSFI && FrameIndex <= MaxCSFI)) - FrameReg = Mips::SP; + FrameReg = Subtarget.isABI_N64() ? Mips::SP_64 : Mips::SP; else FrameReg = getFrameRegister(MF); @@ -334,8 +334,10 @@ eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj, unsigned MipsRegisterInfo:: getFrameRegister(const MachineFunction &MF) const { const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering(); + bool IsN64 = Subtarget.isABI_N64(); - return TFI->hasFP(MF) ? Mips::FP : Mips::SP; + return TFI->hasFP(MF) ? (IsN64 ? Mips::FP_64 : Mips::FP) : + (IsN64 ? Mips::SP_64 : Mips::SP); } unsigned MipsRegisterInfo:: diff --git a/lib/Target/Mips/MipsTargetMachine.cpp b/lib/Target/Mips/MipsTargetMachine.cpp index 6480da3..5d6b24f 100644 --- a/lib/Target/Mips/MipsTargetMachine.cpp +++ b/lib/Target/Mips/MipsTargetMachine.cpp @@ -36,8 +36,9 @@ MipsTargetMachine:: MipsTargetMachine(const Target &T, StringRef TT, StringRef CPU, StringRef FS, Reloc::Model RM, CodeModel::Model CM, + CodeGenOpt::Level OL, bool isLittle): - LLVMTargetMachine(T, TT, CPU, FS, RM, CM), + LLVMTargetMachine(T, TT, CPU, FS, RM, CM, OL), Subtarget(TT, CPU, FS, isLittle), DataLayout(isLittle ? (Subtarget.isABI_N64() ? @@ -54,31 +55,35 @@ MipsTargetMachine(const Target &T, StringRef TT, MipsebTargetMachine:: MipsebTargetMachine(const Target &T, StringRef TT, StringRef CPU, StringRef FS, - Reloc::Model RM, CodeModel::Model CM) : - MipsTargetMachine(T, TT, CPU, FS, RM, CM, false) {} + Reloc::Model RM, CodeModel::Model CM, + CodeGenOpt::Level OL) : + MipsTargetMachine(T, TT, CPU, FS, RM, CM, OL, false) {} MipselTargetMachine:: MipselTargetMachine(const Target &T, StringRef TT, StringRef CPU, StringRef FS, - Reloc::Model RM, CodeModel::Model CM) : - MipsTargetMachine(T, TT, CPU, FS, RM, CM, true) {} + Reloc::Model RM, CodeModel::Model CM, + CodeGenOpt::Level OL) : + MipsTargetMachine(T, TT, CPU, FS, RM, CM, OL, true) {} Mips64ebTargetMachine:: Mips64ebTargetMachine(const Target &T, StringRef TT, StringRef CPU, StringRef FS, - Reloc::Model RM, CodeModel::Model CM) : - MipsTargetMachine(T, TT, CPU, FS, RM, CM, false) {} + Reloc::Model RM, CodeModel::Model CM, + CodeGenOpt::Level OL) : + MipsTargetMachine(T, TT, CPU, FS, RM, CM, OL, false) {} Mips64elTargetMachine:: Mips64elTargetMachine(const Target &T, StringRef TT, StringRef CPU, StringRef FS, - Reloc::Model RM, CodeModel::Model CM) : - MipsTargetMachine(T, TT, CPU, FS, RM, CM, true) {} + Reloc::Model RM, CodeModel::Model CM, + CodeGenOpt::Level OL) : + MipsTargetMachine(T, TT, CPU, FS, RM, CM, OL, true) {} // Install an instruction selector pass using // the ISelDag to gen Mips code. bool MipsTargetMachine:: -addInstSelector(PassManagerBase &PM, CodeGenOpt::Level OptLevel) +addInstSelector(PassManagerBase &PM) { PM.add(createMipsISelDag(*this)); return false; @@ -88,14 +93,14 @@ addInstSelector(PassManagerBase &PM, CodeGenOpt::Level OptLevel) // machine code is emitted. return true if -print-machineinstrs should // print out the code after the passes. bool MipsTargetMachine:: -addPreEmitPass(PassManagerBase &PM, CodeGenOpt::Level OptLevel) +addPreEmitPass(PassManagerBase &PM) { PM.add(createMipsDelaySlotFillerPass(*this)); return true; } bool MipsTargetMachine:: -addPreRegAlloc(PassManagerBase &PM, CodeGenOpt::Level OptLevel) { +addPreRegAlloc(PassManagerBase &PM) { // Do not restore $gp if target is Mips64. // In N32/64, $gp is a callee-saved register. if (!Subtarget.hasMips64()) @@ -104,14 +109,13 @@ addPreRegAlloc(PassManagerBase &PM, CodeGenOpt::Level OptLevel) { } bool MipsTargetMachine:: -addPostRegAlloc(PassManagerBase &PM, CodeGenOpt::Level OptLevel) { +addPostRegAlloc(PassManagerBase &PM) { PM.add(createMipsExpandPseudoPass(*this)); return true; } bool MipsTargetMachine::addCodeEmitter(PassManagerBase &PM, - CodeGenOpt::Level OptLevel, - JITCodeEmitter &JCE) { + JITCodeEmitter &JCE) { // Machine code emitter pass for Mips. PM.add(createMipsJITCodeEmitterPass(*this, JCE)); return false; diff --git a/lib/Target/Mips/MipsTargetMachine.h b/lib/Target/Mips/MipsTargetMachine.h index 118ed10..e40d9e2 100644 --- a/lib/Target/Mips/MipsTargetMachine.h +++ b/lib/Target/Mips/MipsTargetMachine.h @@ -40,6 +40,7 @@ namespace llvm { MipsTargetMachine(const Target &T, StringRef TT, StringRef CPU, StringRef FS, Reloc::Model RM, CodeModel::Model CM, + CodeGenOpt::Level OL, bool isLittle); virtual const MipsInstrInfo *getInstrInfo() const @@ -67,15 +68,11 @@ namespace llvm { } // Pass Pipeline Configuration - virtual bool addInstSelector(PassManagerBase &PM, - CodeGenOpt::Level OptLevel); - virtual bool addPreEmitPass(PassManagerBase &PM, - CodeGenOpt::Level OptLevel); - virtual bool addPreRegAlloc(PassManagerBase &PM, - CodeGenOpt::Level OptLevel); - virtual bool addPostRegAlloc(PassManagerBase &, CodeGenOpt::Level); + virtual bool addInstSelector(PassManagerBase &PM); + virtual bool addPreEmitPass(PassManagerBase &PM); + virtual bool addPreRegAlloc(PassManagerBase &PM); + virtual bool addPostRegAlloc(PassManagerBase &); virtual bool addCodeEmitter(PassManagerBase &PM, - CodeGenOpt::Level OptLevel, JITCodeEmitter &JCE); }; @@ -86,7 +83,8 @@ class MipsebTargetMachine : public MipsTargetMachine { public: MipsebTargetMachine(const Target &T, StringRef TT, StringRef CPU, StringRef FS, - Reloc::Model RM, CodeModel::Model CM); + Reloc::Model RM, CodeModel::Model CM, + CodeGenOpt::Level OL); }; /// MipselTargetMachine - Mips32 little endian target machine. @@ -95,7 +93,8 @@ class MipselTargetMachine : public MipsTargetMachine { public: MipselTargetMachine(const Target &T, StringRef TT, StringRef CPU, StringRef FS, - Reloc::Model RM, CodeModel::Model CM); + Reloc::Model RM, CodeModel::Model CM, + CodeGenOpt::Level OL); }; /// Mips64ebTargetMachine - Mips64 big endian target machine. @@ -104,7 +103,8 @@ class Mips64ebTargetMachine : public MipsTargetMachine { public: Mips64ebTargetMachine(const Target &T, StringRef TT, StringRef CPU, StringRef FS, - Reloc::Model RM, CodeModel::Model CM); + Reloc::Model RM, CodeModel::Model CM, + CodeGenOpt::Level OL); }; /// Mips64elTargetMachine - Mips64 little endian target machine. @@ -113,7 +113,8 @@ class Mips64elTargetMachine : public MipsTargetMachine { public: Mips64elTargetMachine(const Target &T, StringRef TT, StringRef CPU, StringRef FS, - Reloc::Model RM, CodeModel::Model CM); + Reloc::Model RM, CodeModel::Model CM, + CodeGenOpt::Level OL); }; } // End llvm namespace diff --git a/lib/Target/PTX/MCTargetDesc/PTXMCTargetDesc.cpp b/lib/Target/PTX/MCTargetDesc/PTXMCTargetDesc.cpp index a5af3b8..09f86b5 100644 --- a/lib/Target/PTX/MCTargetDesc/PTXMCTargetDesc.cpp +++ b/lib/Target/PTX/MCTargetDesc/PTXMCTargetDesc.cpp @@ -52,9 +52,10 @@ static MCSubtargetInfo *createPTXMCSubtargetInfo(StringRef TT, StringRef CPU, } static MCCodeGenInfo *createPTXMCCodeGenInfo(StringRef TT, Reloc::Model RM, - CodeModel::Model CM) { + CodeModel::Model CM, + CodeGenOpt::Level OL) { MCCodeGenInfo *X = new MCCodeGenInfo(); - X->InitMCCodeGenInfo(RM, CM); + X->InitMCCodeGenInfo(RM, CM, OL); return X; } diff --git a/lib/Target/PTX/PTXTargetMachine.cpp b/lib/Target/PTX/PTXTargetMachine.cpp index 50dd417..292ea5e 100644 --- a/lib/Target/PTX/PTXTargetMachine.cpp +++ b/lib/Target/PTX/PTXTargetMachine.cpp @@ -88,8 +88,9 @@ namespace { PTXTargetMachine::PTXTargetMachine(const Target &T, StringRef TT, StringRef CPU, StringRef FS, Reloc::Model RM, CodeModel::Model CM, + CodeGenOpt::Level OL, bool is64Bit) - : LLVMTargetMachine(T, TT, CPU, FS, RM, CM), + : LLVMTargetMachine(T, TT, CPU, FS, RM, CM, OL), DataLayout(is64Bit ? DataLayout64 : DataLayout32), Subtarget(TT, CPU, FS, is64Bit), FrameLowering(Subtarget), @@ -100,39 +101,38 @@ PTXTargetMachine::PTXTargetMachine(const Target &T, PTX32TargetMachine::PTX32TargetMachine(const Target &T, StringRef TT, StringRef CPU, StringRef FS, - Reloc::Model RM, CodeModel::Model CM) - : PTXTargetMachine(T, TT, CPU, FS, RM, CM, false) { + Reloc::Model RM, CodeModel::Model CM, + CodeGenOpt::Level OL) + : PTXTargetMachine(T, TT, CPU, FS, RM, CM, OL, false) { } PTX64TargetMachine::PTX64TargetMachine(const Target &T, StringRef TT, StringRef CPU, StringRef FS, - Reloc::Model RM, CodeModel::Model CM) - : PTXTargetMachine(T, TT, CPU, FS, RM, CM, true) { + Reloc::Model RM, CodeModel::Model CM, + CodeGenOpt::Level OL) + : PTXTargetMachine(T, TT, CPU, FS, RM, CM, OL, true) { } -bool PTXTargetMachine::addInstSelector(PassManagerBase &PM, - CodeGenOpt::Level OptLevel) { - PM.add(createPTXISelDag(*this, OptLevel)); +bool PTXTargetMachine::addInstSelector(PassManagerBase &PM) { + PM.add(createPTXISelDag(*this, getOptLevel())); return false; } -bool PTXTargetMachine::addPostRegAlloc(PassManagerBase &PM, - CodeGenOpt::Level OptLevel) { +bool PTXTargetMachine::addPostRegAlloc(PassManagerBase &PM) { // PTXMFInfoExtract must after register allocation! - //PM.add(createPTXMFInfoExtract(*this, OptLevel)); + //PM.add(createPTXMFInfoExtract(*this)); return false; } bool PTXTargetMachine::addPassesToEmitFile(PassManagerBase &PM, formatted_raw_ostream &Out, CodeGenFileType FileType, - CodeGenOpt::Level OptLevel, bool DisableVerify) { // This is mostly based on LLVMTargetMachine::addPassesToEmitFile // Add common CodeGen passes. MCContext *Context = 0; - if (addCommonCodeGenPasses(PM, OptLevel, DisableVerify, Context)) + if (addCommonCodeGenPasses(PM, DisableVerify, Context)) return true; assert(Context != 0 && "Failed to get MCContext"); @@ -192,7 +192,6 @@ bool PTXTargetMachine::addPassesToEmitFile(PassManagerBase &PM, } bool PTXTargetMachine::addCommonCodeGenPasses(PassManagerBase &PM, - CodeGenOpt::Level OptLevel, bool DisableVerify, MCContext *&OutContext) { // Add standard LLVM codegen passes. @@ -214,7 +213,7 @@ bool PTXTargetMachine::addCommonCodeGenPasses(PassManagerBase &PM, PM.add(createVerifierPass()); // Run loop strength reduction before anything else. - if (OptLevel != CodeGenOpt::None) { + if (getOptLevel() != CodeGenOpt::None) { PM.add(createLoopStrengthReducePass(getTargetLowering())); //PM.add(createPrintFunctionPass("\n\n*** Code after LSR ***\n", &dbgs())); } @@ -228,12 +227,12 @@ bool PTXTargetMachine::addCommonCodeGenPasses(PassManagerBase &PM, // The lower invoke pass may create unreachable code. Remove it. PM.add(createUnreachableBlockEliminationPass()); - if (OptLevel != CodeGenOpt::None) + if (getOptLevel() != CodeGenOpt::None) PM.add(createCodeGenPreparePass(getTargetLowering())); PM.add(createStackProtectorPass(getTargetLowering())); - addPreISel(PM, OptLevel); + addPreISel(PM); //PM.add(createPrintFunctionPass("\n\n" // "*** Final LLVM Code input to ISel ***\n", @@ -255,10 +254,10 @@ bool PTXTargetMachine::addCommonCodeGenPasses(PassManagerBase &PM, OutContext = &MMI->getContext(); // Return the MCContext specifically by-ref. // Set up a MachineFunction for the rest of CodeGen to work on. - PM.add(new MachineFunctionAnalysis(*this, OptLevel)); + PM.add(new MachineFunctionAnalysis(*this)); // Ask the target for an isel. - if (addInstSelector(PM, OptLevel)) + if (addInstSelector(PM)) return true; // Print the instruction selected machine code... @@ -268,21 +267,21 @@ bool PTXTargetMachine::addCommonCodeGenPasses(PassManagerBase &PM, PM.add(createExpandISelPseudosPass()); // Pre-ra tail duplication. - if (OptLevel != CodeGenOpt::None) { + if (getOptLevel() != CodeGenOpt::None) { PM.add(createTailDuplicatePass(true)); printAndVerify(PM, "After Pre-RegAlloc TailDuplicate"); } // Optimize PHIs before DCE: removing dead PHI cycles may make more // instructions dead. - if (OptLevel != CodeGenOpt::None) + if (getOptLevel() != CodeGenOpt::None) PM.add(createOptimizePHIsPass()); // If the target requests it, assign local variables to stack slots relative // to one another and simplify frame index references where possible. PM.add(createLocalStackSlotAllocationPass()); - if (OptLevel != CodeGenOpt::None) { + if (getOptLevel() != CodeGenOpt::None) { // With optimization, dead code should already be eliminated. However // there is one known exception: lowered code for arguments that are only // used by tail calls, where the tail calls reuse the incoming stack @@ -300,7 +299,7 @@ bool PTXTargetMachine::addCommonCodeGenPasses(PassManagerBase &PM, } // Run pre-ra passes. - if (addPreRegAlloc(PM, OptLevel)) + if (addPreRegAlloc(PM)) printAndVerify(PM, "After PreRegAlloc passes"); // Perform register allocation. @@ -308,7 +307,7 @@ bool PTXTargetMachine::addCommonCodeGenPasses(PassManagerBase &PM, printAndVerify(PM, "After Register Allocation"); // Perform stack slot coloring and post-ra machine LICM. - if (OptLevel != CodeGenOpt::None) { + if (getOptLevel() != CodeGenOpt::None) { // FIXME: Re-enable coloring with register when it's capable of adding // kill markers. PM.add(createStackSlotColoringPass(false)); @@ -322,7 +321,7 @@ bool PTXTargetMachine::addCommonCodeGenPasses(PassManagerBase &PM, } // Run post-ra passes. - if (addPostRegAlloc(PM, OptLevel)) + if (addPostRegAlloc(PM)) printAndVerify(PM, "After PostRegAlloc passes"); PM.add(createExpandPostRAPseudosPass()); @@ -333,23 +332,23 @@ bool PTXTargetMachine::addCommonCodeGenPasses(PassManagerBase &PM, printAndVerify(PM, "After PrologEpilogCodeInserter"); // Run pre-sched2 passes. - if (addPreSched2(PM, OptLevel)) + if (addPreSched2(PM)) printAndVerify(PM, "After PreSched2 passes"); // Second pass scheduler. - if (OptLevel != CodeGenOpt::None) { - PM.add(createPostRAScheduler(OptLevel)); + if (getOptLevel() != CodeGenOpt::None) { + PM.add(createPostRAScheduler(getOptLevel())); printAndVerify(PM, "After PostRAScheduler"); } // Branch folding must be run after regalloc and prolog/epilog insertion. - if (OptLevel != CodeGenOpt::None) { + if (getOptLevel() != CodeGenOpt::None) { PM.add(createBranchFoldingPass(getEnableTailMergeDefault())); printNoVerify(PM, "After BranchFolding"); } // Tail duplication. - if (OptLevel != CodeGenOpt::None) { + if (getOptLevel() != CodeGenOpt::None) { PM.add(createTailDuplicatePass(false)); printNoVerify(PM, "After TailDuplicate"); } @@ -359,16 +358,16 @@ bool PTXTargetMachine::addCommonCodeGenPasses(PassManagerBase &PM, //if (PrintGCInfo) // PM.add(createGCInfoPrinter(dbgs())); - if (OptLevel != CodeGenOpt::None) { + if (getOptLevel() != CodeGenOpt::None) { PM.add(createCodePlacementOptPass()); printNoVerify(PM, "After CodePlacementOpt"); } - if (addPreEmitPass(PM, OptLevel)) + if (addPreEmitPass(PM)) printNoVerify(PM, "After PreEmit passes"); - PM.add(createPTXMFInfoExtract(*this, OptLevel)); - PM.add(createPTXFPRoundingModePass(*this, OptLevel)); + PM.add(createPTXMFInfoExtract(*this, getOptLevel())); + PM.add(createPTXFPRoundingModePass(*this, getOptLevel())); return false; } diff --git a/lib/Target/PTX/PTXTargetMachine.h b/lib/Target/PTX/PTXTargetMachine.h index 5b7c82b..19f6c0f 100644 --- a/lib/Target/PTX/PTXTargetMachine.h +++ b/lib/Target/PTX/PTXTargetMachine.h @@ -37,6 +37,7 @@ class PTXTargetMachine : public LLVMTargetMachine { PTXTargetMachine(const Target &T, StringRef TT, StringRef CPU, StringRef FS, Reloc::Model RM, CodeModel::Model CM, + CodeGenOpt::Level OL, bool is64Bit); virtual const TargetData *getTargetData() const { return &DataLayout; } @@ -58,22 +59,18 @@ class PTXTargetMachine : public LLVMTargetMachine { virtual const PTXSubtarget *getSubtargetImpl() const { return &Subtarget; } - virtual bool addInstSelector(PassManagerBase &PM, - CodeGenOpt::Level OptLevel); - virtual bool addPostRegAlloc(PassManagerBase &PM, - CodeGenOpt::Level OptLevel); + virtual bool addInstSelector(PassManagerBase &PM); + virtual bool addPostRegAlloc(PassManagerBase &PM); // We override this method to supply our own set of codegen passes. virtual bool addPassesToEmitFile(PassManagerBase &, formatted_raw_ostream &, CodeGenFileType, - CodeGenOpt::Level, bool = true); // Emission of machine code through JITCodeEmitter is not supported. virtual bool addPassesToEmitMachineCode(PassManagerBase &, JITCodeEmitter &, - CodeGenOpt::Level, bool = true) { return true; } @@ -82,14 +79,13 @@ class PTXTargetMachine : public LLVMTargetMachine { virtual bool addPassesToEmitMC(PassManagerBase &, MCContext *&, raw_ostream &, - CodeGenOpt::Level, bool = true) { return true; } private: - bool addCommonCodeGenPasses(PassManagerBase &, CodeGenOpt::Level, + bool addCommonCodeGenPasses(PassManagerBase &, bool DisableVerify, MCContext *&OutCtx); }; // class PTXTargetMachine @@ -99,7 +95,8 @@ public: PTX32TargetMachine(const Target &T, StringRef TT, StringRef CPU, StringRef FS, - Reloc::Model RM, CodeModel::Model CM); + Reloc::Model RM, CodeModel::Model CM, + CodeGenOpt::Level OL); }; // class PTX32TargetMachine class PTX64TargetMachine : public PTXTargetMachine { @@ -107,7 +104,8 @@ public: PTX64TargetMachine(const Target &T, StringRef TT, StringRef CPU, StringRef FS, - Reloc::Model RM, CodeModel::Model CM); + Reloc::Model RM, CodeModel::Model CM, + CodeGenOpt::Level OL); }; // class PTX32TargetMachine } // namespace llvm diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp index d5c8a9e..7c47051 100644 --- a/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp +++ b/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp @@ -76,7 +76,8 @@ static MCAsmInfo *createPPCMCAsmInfo(const Target &T, StringRef TT) { } static MCCodeGenInfo *createPPCMCCodeGenInfo(StringRef TT, Reloc::Model RM, - CodeModel::Model CM) { + CodeModel::Model CM, + CodeGenOpt::Level OL) { MCCodeGenInfo *X = new MCCodeGenInfo(); if (RM == Reloc::Default) { @@ -86,7 +87,7 @@ static MCCodeGenInfo *createPPCMCCodeGenInfo(StringRef TT, Reloc::Model RM, else RM = Reloc::Static; } - X->InitMCCodeGenInfo(RM, CM); + X->InitMCCodeGenInfo(RM, CM, OL); return X; } diff --git a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp index 6f204cc..3dee406 100644 --- a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp +++ b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp @@ -18,7 +18,6 @@ #include "MCTargetDesc/PPCPredicates.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineFunction.h" -#include "llvm/CodeGen/MachineFunctionAnalysis.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/SelectionDAG.h" #include "llvm/CodeGen/SelectionDAGISel.h" diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp index b188b90..36d5c41 100644 --- a/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/lib/Target/PowerPC/PPCISelLowering.cpp @@ -23,7 +23,6 @@ #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/CodeGen/PseudoSourceValue.h" #include "llvm/CodeGen/SelectionDAG.h" #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" #include "llvm/CallingConv.h" @@ -408,6 +407,8 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM) setInsertFencesForAtomic(true); + setSchedulingPreference(Sched::Hybrid); + computeRegisterProperties(); } diff --git a/lib/Target/PowerPC/PPCInstrInfo.cpp b/lib/Target/PowerPC/PPCInstrInfo.cpp index f148e9d..b9a6297 100644 --- a/lib/Target/PowerPC/PPCInstrInfo.cpp +++ b/lib/Target/PowerPC/PPCInstrInfo.cpp @@ -22,7 +22,6 @@ #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineMemOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/CodeGen/PseudoSourceValue.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/ErrorHandling.h" @@ -57,11 +56,8 @@ ScheduleHazardRecognizer *PPCInstrInfo::CreateTargetHazardRecognizer( unsigned Directive = TM->getSubtarget<PPCSubtarget>().getDarwinDirective(); if (Directive == PPC::DIR_440) { - // Disable the hazard recognizer for now, as it doesn't support - // bottom-up scheduling. - //const InstrItineraryData *II = TM->getInstrItineraryData(); - //return new PPCHazardRecognizer440(II, DAG); - return new ScheduleHazardRecognizer(); + const InstrItineraryData *II = TM->getInstrItineraryData(); + return new PPCHazardRecognizer440(II, DAG); } else { // Disable the hazard recognizer for now, as it doesn't support @@ -501,8 +497,7 @@ PPCInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB, const MachineFrameInfo &MFI = *MF.getFrameInfo(); MachineMemOperand *MMO = - MF.getMachineMemOperand( - MachinePointerInfo(PseudoSourceValue::getFixedStack(FrameIdx)), + MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(FrameIdx), MachineMemOperand::MOStore, MFI.getObjectSize(FrameIdx), MFI.getObjectAlignment(FrameIdx)); @@ -623,8 +618,7 @@ PPCInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, const MachineFrameInfo &MFI = *MF.getFrameInfo(); MachineMemOperand *MMO = - MF.getMachineMemOperand( - MachinePointerInfo(PseudoSourceValue::getFixedStack(FrameIdx)), + MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(FrameIdx), MachineMemOperand::MOLoad, MFI.getObjectSize(FrameIdx), MFI.getObjectAlignment(FrameIdx)); diff --git a/lib/Target/PowerPC/PPCRegisterInfo.cpp b/lib/Target/PowerPC/PPCRegisterInfo.cpp index 2e90b7a..3ba9260 100644 --- a/lib/Target/PowerPC/PPCRegisterInfo.cpp +++ b/lib/Target/PowerPC/PPCRegisterInfo.cpp @@ -273,6 +273,27 @@ BitVector PPCRegisterInfo::getReservedRegs(const MachineFunction &MF) const { return Reserved; } +unsigned +PPCRegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC, + MachineFunction &MF) const { + const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering(); + const unsigned DefaultSafety = 1; + + switch (RC->getID()) { + default: + return 0; + case PPC::G8RCRegClassID: + case PPC::GPRCRegClassID: { + unsigned FP = TFI->hasFP(MF) ? 1 : 0; + return 32 - FP - DefaultSafety; + } + case PPC::F8RCRegClassID: + case PPC::F4RCRegClassID: + case PPC::VRRCRegClassID: + return 32 - DefaultSafety; + } +} + //===----------------------------------------------------------------------===// // Stack Frame Processing methods //===----------------------------------------------------------------------===// diff --git a/lib/Target/PowerPC/PPCRegisterInfo.h b/lib/Target/PowerPC/PPCRegisterInfo.h index 1cc7213..f70a594 100644 --- a/lib/Target/PowerPC/PPCRegisterInfo.h +++ b/lib/Target/PowerPC/PPCRegisterInfo.h @@ -37,6 +37,9 @@ public: /// This is used for addressing modes. virtual const TargetRegisterClass *getPointerRegClass(unsigned Kind=0) const; + unsigned getRegPressureLimit(const TargetRegisterClass *RC, + MachineFunction &MF) const; + /// Code Generation virtual methods... const unsigned *getCalleeSavedRegs(const MachineFunction* MF = 0) const; diff --git a/lib/Target/PowerPC/PPCTargetMachine.cpp b/lib/Target/PowerPC/PPCTargetMachine.cpp index f5744b83..de8fca0 100644 --- a/lib/Target/PowerPC/PPCTargetMachine.cpp +++ b/lib/Target/PowerPC/PPCTargetMachine.cpp @@ -29,8 +29,9 @@ extern "C" void LLVMInitializePowerPCTarget() { PPCTargetMachine::PPCTargetMachine(const Target &T, StringRef TT, StringRef CPU, StringRef FS, Reloc::Model RM, CodeModel::Model CM, + CodeGenOpt::Level OL, bool is64Bit) - : LLVMTargetMachine(T, TT, CPU, FS, RM, CM), + : LLVMTargetMachine(T, TT, CPU, FS, RM, CM, OL), Subtarget(TT, CPU, FS, is64Bit), DataLayout(Subtarget.getTargetDataString()), InstrInfo(*this), FrameLowering(Subtarget), JITInfo(*this, is64Bit), @@ -44,15 +45,17 @@ bool PPCTargetMachine::getEnableTailMergeDefault() const { return false; } PPC32TargetMachine::PPC32TargetMachine(const Target &T, StringRef TT, StringRef CPU, StringRef FS, - Reloc::Model RM, CodeModel::Model CM) - : PPCTargetMachine(T, TT, CPU, FS, RM, CM, false) { + Reloc::Model RM, CodeModel::Model CM, + CodeGenOpt::Level OL) + : PPCTargetMachine(T, TT, CPU, FS, RM, CM, OL, false) { } PPC64TargetMachine::PPC64TargetMachine(const Target &T, StringRef TT, StringRef CPU, StringRef FS, - Reloc::Model RM, CodeModel::Model CM) - : PPCTargetMachine(T, TT, CPU, FS, RM, CM, true) { + Reloc::Model RM, CodeModel::Model CM, + CodeGenOpt::Level OL) + : PPCTargetMachine(T, TT, CPU, FS, RM, CM, OL, true) { } @@ -60,22 +63,19 @@ PPC64TargetMachine::PPC64TargetMachine(const Target &T, StringRef TT, // Pass Pipeline Configuration //===----------------------------------------------------------------------===// -bool PPCTargetMachine::addInstSelector(PassManagerBase &PM, - CodeGenOpt::Level OptLevel) { +bool PPCTargetMachine::addInstSelector(PassManagerBase &PM) { // Install an instruction selector. PM.add(createPPCISelDag(*this)); return false; } -bool PPCTargetMachine::addPreEmitPass(PassManagerBase &PM, - CodeGenOpt::Level OptLevel) { +bool PPCTargetMachine::addPreEmitPass(PassManagerBase &PM) { // Must run branch selection immediately preceding the asm printer. PM.add(createPPCBranchSelectionPass()); return false; } bool PPCTargetMachine::addCodeEmitter(PassManagerBase &PM, - CodeGenOpt::Level OptLevel, JITCodeEmitter &JCE) { // FIXME: This should be moved to TargetJITInfo!! if (Subtarget.isPPC64()) diff --git a/lib/Target/PowerPC/PPCTargetMachine.h b/lib/Target/PowerPC/PPCTargetMachine.h index d06f084..03b27c6 100644 --- a/lib/Target/PowerPC/PPCTargetMachine.h +++ b/lib/Target/PowerPC/PPCTargetMachine.h @@ -42,7 +42,8 @@ class PPCTargetMachine : public LLVMTargetMachine { public: PPCTargetMachine(const Target &T, StringRef TT, StringRef CPU, StringRef FS, - Reloc::Model RM, CodeModel::Model CM, bool is64Bit); + Reloc::Model RM, CodeModel::Model CM, + CodeGenOpt::Level OL, bool is64Bit); virtual const PPCInstrInfo *getInstrInfo() const { return &InstrInfo; } virtual const PPCFrameLowering *getFrameLowering() const { @@ -66,9 +67,9 @@ public: } // Pass Pipeline Configuration - virtual bool addInstSelector(PassManagerBase &PM, CodeGenOpt::Level OptLevel); - virtual bool addPreEmitPass(PassManagerBase &PM, CodeGenOpt::Level OptLevel); - virtual bool addCodeEmitter(PassManagerBase &PM, CodeGenOpt::Level OptLevel, + virtual bool addInstSelector(PassManagerBase &PM); + virtual bool addPreEmitPass(PassManagerBase &PM); + virtual bool addCodeEmitter(PassManagerBase &PM, JITCodeEmitter &JCE); virtual bool getEnableTailMergeDefault() const; }; @@ -79,7 +80,8 @@ class PPC32TargetMachine : public PPCTargetMachine { public: PPC32TargetMachine(const Target &T, StringRef TT, StringRef CPU, StringRef FS, - Reloc::Model RM, CodeModel::Model CM); + Reloc::Model RM, CodeModel::Model CM, + CodeGenOpt::Level OL); }; /// PPC64TargetMachine - PowerPC 64-bit target machine. @@ -88,7 +90,8 @@ class PPC64TargetMachine : public PPCTargetMachine { public: PPC64TargetMachine(const Target &T, StringRef TT, StringRef CPU, StringRef FS, - Reloc::Model RM, CodeModel::Model CM); + Reloc::Model RM, CodeModel::Model CM, + CodeGenOpt::Level OL); }; } // end namespace llvm diff --git a/lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.cpp b/lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.cpp index cb2a7df..eda04c3 100644 --- a/lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.cpp +++ b/lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.cpp @@ -50,9 +50,10 @@ static MCSubtargetInfo *createSparcMCSubtargetInfo(StringRef TT, StringRef CPU, } static MCCodeGenInfo *createSparcMCCodeGenInfo(StringRef TT, Reloc::Model RM, - CodeModel::Model CM) { + CodeModel::Model CM, + CodeGenOpt::Level OL) { MCCodeGenInfo *X = new MCCodeGenInfo(); - X->InitMCCodeGenInfo(RM, CM); + X->InitMCCodeGenInfo(RM, CM, OL); return X; } diff --git a/lib/Target/Sparc/SparcTargetMachine.cpp b/lib/Target/Sparc/SparcTargetMachine.cpp index 3d7b4a4..7dff799 100644 --- a/lib/Target/Sparc/SparcTargetMachine.cpp +++ b/lib/Target/Sparc/SparcTargetMachine.cpp @@ -27,16 +27,16 @@ extern "C" void LLVMInitializeSparcTarget() { SparcTargetMachine::SparcTargetMachine(const Target &T, StringRef TT, StringRef CPU, StringRef FS, Reloc::Model RM, CodeModel::Model CM, + CodeGenOpt::Level OL, bool is64bit) - : LLVMTargetMachine(T, TT, CPU, FS, RM, CM), + : LLVMTargetMachine(T, TT, CPU, FS, RM, CM, OL), Subtarget(TT, CPU, FS, is64bit), DataLayout(Subtarget.getDataLayout()), TLInfo(*this), TSInfo(*this), InstrInfo(Subtarget), FrameLowering(Subtarget) { } -bool SparcTargetMachine::addInstSelector(PassManagerBase &PM, - CodeGenOpt::Level OptLevel) { +bool SparcTargetMachine::addInstSelector(PassManagerBase &PM) { PM.add(createSparcISelDag(*this)); return false; } @@ -44,8 +44,7 @@ bool SparcTargetMachine::addInstSelector(PassManagerBase &PM, /// addPreEmitPass - This pass may be implemented by targets that want to run /// passes immediately before machine code is emitted. This should return /// true if -print-machineinstrs should print out the code after the passes. -bool SparcTargetMachine::addPreEmitPass(PassManagerBase &PM, - CodeGenOpt::Level OptLevel){ +bool SparcTargetMachine::addPreEmitPass(PassManagerBase &PM){ PM.add(createSparcFPMoverPass(*this)); PM.add(createSparcDelaySlotFillerPass(*this)); return true; @@ -54,13 +53,15 @@ bool SparcTargetMachine::addPreEmitPass(PassManagerBase &PM, SparcV8TargetMachine::SparcV8TargetMachine(const Target &T, StringRef TT, StringRef CPU, StringRef FS, Reloc::Model RM, - CodeModel::Model CM) - : SparcTargetMachine(T, TT, CPU, FS, RM, CM, false) { + CodeModel::Model CM, + CodeGenOpt::Level OL) + : SparcTargetMachine(T, TT, CPU, FS, RM, CM, OL, false) { } SparcV9TargetMachine::SparcV9TargetMachine(const Target &T, StringRef TT, StringRef CPU, StringRef FS, Reloc::Model RM, - CodeModel::Model CM) - : SparcTargetMachine(T, TT, CPU, FS, RM, CM, true) { + CodeModel::Model CM, + CodeGenOpt::Level OL) + : SparcTargetMachine(T, TT, CPU, FS, RM, CM, OL, true) { } diff --git a/lib/Target/Sparc/SparcTargetMachine.h b/lib/Target/Sparc/SparcTargetMachine.h index 3c907dd..63bfa5d 100644 --- a/lib/Target/Sparc/SparcTargetMachine.h +++ b/lib/Target/Sparc/SparcTargetMachine.h @@ -35,7 +35,8 @@ class SparcTargetMachine : public LLVMTargetMachine { public: SparcTargetMachine(const Target &T, StringRef TT, StringRef CPU, StringRef FS, - Reloc::Model RM, CodeModel::Model CM, bool is64bit); + Reloc::Model RM, CodeModel::Model CM, + CodeGenOpt::Level OL, bool is64bit); virtual const SparcInstrInfo *getInstrInfo() const { return &InstrInfo; } virtual const TargetFrameLowering *getFrameLowering() const { @@ -54,8 +55,8 @@ public: virtual const TargetData *getTargetData() const { return &DataLayout; } // Pass Pipeline Configuration - virtual bool addInstSelector(PassManagerBase &PM, CodeGenOpt::Level OptLevel); - virtual bool addPreEmitPass(PassManagerBase &PM, CodeGenOpt::Level OptLevel); + virtual bool addInstSelector(PassManagerBase &PM); + virtual bool addPreEmitPass(PassManagerBase &PM); }; /// SparcV8TargetMachine - Sparc 32-bit target machine @@ -64,7 +65,8 @@ class SparcV8TargetMachine : public SparcTargetMachine { public: SparcV8TargetMachine(const Target &T, StringRef TT, StringRef CPU, StringRef FS, - Reloc::Model RM, CodeModel::Model CM); + Reloc::Model RM, CodeModel::Model CM, + CodeGenOpt::Level OL); }; /// SparcV9TargetMachine - Sparc 64-bit target machine @@ -73,7 +75,8 @@ class SparcV9TargetMachine : public SparcTargetMachine { public: SparcV9TargetMachine(const Target &T, StringRef TT, StringRef CPU, StringRef FS, - Reloc::Model RM, CodeModel::Model CM); + Reloc::Model RM, CodeModel::Model CM, + CodeGenOpt::Level OL); }; } // end namespace llvm diff --git a/lib/Target/TargetLibraryInfo.cpp b/lib/Target/TargetLibraryInfo.cpp index 709dfd2..aa2e014 100644 --- a/lib/Target/TargetLibraryInfo.cpp +++ b/lib/Target/TargetLibraryInfo.cpp @@ -20,6 +20,19 @@ INITIALIZE_PASS(TargetLibraryInfo, "targetlibinfo", "Target Library Information", false, true) char TargetLibraryInfo::ID = 0; +const char* TargetLibraryInfo::StandardNames[LibFunc::NumLibFuncs] = + { + "memset", + "memcpy", + "memmove", + "memset_pattern16", + "iprintf", + "siprintf", + "fiprintf", + "fwrite", + "fputs" + }; + /// initialize - Initialize the set of available library functions based on the /// specified target triple. This should be carefully written so that a missing /// target triple gets a sane set of defaults. @@ -38,6 +51,17 @@ static void initialize(TargetLibraryInfo &TLI, const Triple &T) { TLI.setUnavailable(LibFunc::memset_pattern16); } + if (T.isMacOSX() && T.getArch() == Triple::x86 && + !T.isMacOSXVersionLT(10, 7)) { + // x86-32 OSX has a scheme where fwrite and fputs (and some other functions + // we don't care about) have two versions; on recent OSX, the one we want + // has a $UNIX2003 suffix. The two implementations are identical except + // for the return value in some edge cases. However, we don't want to + // generate code that depends on the old symbols. + TLI.setAvailableWithName(LibFunc::fwrite, "fwrite$UNIX2003"); + TLI.setAvailableWithName(LibFunc::fputs, "fputs$UNIX2003"); + } + // iprintf and friends are only available on XCore and TCE. if (T.getArch() != Triple::xcore && T.getArch() != Triple::tce) { TLI.setUnavailable(LibFunc::iprintf); @@ -64,6 +88,7 @@ TargetLibraryInfo::TargetLibraryInfo(const Triple &T) : ImmutablePass(ID) { TargetLibraryInfo::TargetLibraryInfo(const TargetLibraryInfo &TLI) : ImmutablePass(ID) { memcpy(AvailableArray, TLI.AvailableArray, sizeof(AvailableArray)); + CustomNames = TLI.CustomNames; } diff --git a/lib/Target/TargetMachine.cpp b/lib/Target/TargetMachine.cpp index daac924..805e16e 100644 --- a/lib/Target/TargetMachine.cpp +++ b/lib/Target/TargetMachine.cpp @@ -226,6 +226,14 @@ CodeModel::Model TargetMachine::getCodeModel() const { return CodeGenInfo->getCodeModel(); } +/// getOptLevel - Returns the optimization level: None, Less, +/// Default, or Aggressive. +CodeGenOpt::Level TargetMachine::getOptLevel() const { + if (!CodeGenInfo) + return CodeGenOpt::Default; + return CodeGenInfo->getOptLevel(); +} + bool TargetMachine::getAsmVerbosityDefault() { return AsmVerbosityDefault; } diff --git a/lib/Target/X86/InstPrinter/X86InstComments.cpp b/lib/Target/X86/InstPrinter/X86InstComments.cpp index 8d85b95..6e87efa 100644 --- a/lib/Target/X86/InstPrinter/X86InstComments.cpp +++ b/lib/Target/X86/InstPrinter/X86InstComments.cpp @@ -34,6 +34,12 @@ void llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS, switch (MI->getOpcode()) { case X86::INSERTPSrr: + Src1Name = getRegName(MI->getOperand(0).getReg()); + Src2Name = getRegName(MI->getOperand(2).getReg()); + DecodeINSERTPSMask(MI->getOperand(3).getImm(), ShuffleMask); + break; + case X86::VINSERTPSrr: + DestName = getRegName(MI->getOperand(0).getReg()); Src1Name = getRegName(MI->getOperand(1).getReg()); Src2Name = getRegName(MI->getOperand(2).getReg()); DecodeINSERTPSMask(MI->getOperand(3).getImm(), ShuffleMask); @@ -44,34 +50,52 @@ void llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS, Src1Name = getRegName(MI->getOperand(0).getReg()); DecodeMOVLHPSMask(2, ShuffleMask); break; + case X86::VMOVLHPSrr: + Src2Name = getRegName(MI->getOperand(2).getReg()); + Src1Name = getRegName(MI->getOperand(1).getReg()); + DestName = getRegName(MI->getOperand(0).getReg()); + DecodeMOVLHPSMask(2, ShuffleMask); + break; case X86::MOVHLPSrr: Src2Name = getRegName(MI->getOperand(2).getReg()); Src1Name = getRegName(MI->getOperand(0).getReg()); DecodeMOVHLPSMask(2, ShuffleMask); break; + case X86::VMOVHLPSrr: + Src2Name = getRegName(MI->getOperand(2).getReg()); + Src1Name = getRegName(MI->getOperand(1).getReg()); + DestName = getRegName(MI->getOperand(0).getReg()); + DecodeMOVHLPSMask(2, ShuffleMask); + break; case X86::PSHUFDri: + case X86::VPSHUFDri: Src1Name = getRegName(MI->getOperand(1).getReg()); // FALL THROUGH. case X86::PSHUFDmi: + case X86::VPSHUFDmi: DestName = getRegName(MI->getOperand(0).getReg()); DecodePSHUFMask(4, MI->getOperand(MI->getNumOperands()-1).getImm(), ShuffleMask); break; case X86::PSHUFHWri: + case X86::VPSHUFHWri: Src1Name = getRegName(MI->getOperand(1).getReg()); // FALL THROUGH. case X86::PSHUFHWmi: + case X86::VPSHUFHWmi: DestName = getRegName(MI->getOperand(0).getReg()); DecodePSHUFHWMask(MI->getOperand(MI->getNumOperands()-1).getImm(), ShuffleMask); break; case X86::PSHUFLWri: + case X86::VPSHUFLWri: Src1Name = getRegName(MI->getOperand(1).getReg()); // FALL THROUGH. case X86::PSHUFLWmi: + case X86::VPSHUFLWmi: DestName = getRegName(MI->getOperand(0).getReg()); DecodePSHUFLWMask(MI->getOperand(MI->getNumOperands()-1).getImm(), ShuffleMask); @@ -142,6 +166,14 @@ void llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS, DecodeSHUFPSMask(2, MI->getOperand(3).getImm(), ShuffleMask); Src1Name = getRegName(MI->getOperand(0).getReg()); break; + case X86::VSHUFPDrri: + Src2Name = getRegName(MI->getOperand(2).getReg()); + // FALL THROUGH. + case X86::VSHUFPDrmi: + DecodeSHUFPSMask(2, MI->getOperand(3).getImm(), ShuffleMask); + Src1Name = getRegName(MI->getOperand(1).getReg()); + DestName = getRegName(MI->getOperand(0).getReg()); + break; case X86::SHUFPSrri: Src2Name = getRegName(MI->getOperand(2).getReg()); @@ -150,63 +182,107 @@ void llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS, DecodeSHUFPSMask(4, MI->getOperand(3).getImm(), ShuffleMask); Src1Name = getRegName(MI->getOperand(0).getReg()); break; + case X86::VSHUFPSrri: + Src2Name = getRegName(MI->getOperand(2).getReg()); + // FALL THROUGH. + case X86::VSHUFPSrmi: + DecodeSHUFPSMask(4, MI->getOperand(3).getImm(), ShuffleMask); + Src1Name = getRegName(MI->getOperand(1).getReg()); + DestName = getRegName(MI->getOperand(0).getReg()); + break; case X86::UNPCKLPDrr: Src2Name = getRegName(MI->getOperand(2).getReg()); // FALL THROUGH. case X86::UNPCKLPDrm: - DecodeUNPCKLPDMask(2, ShuffleMask); + DecodeUNPCKLPMask(MVT::v2f64, ShuffleMask); Src1Name = getRegName(MI->getOperand(0).getReg()); break; case X86::VUNPCKLPDrr: Src2Name = getRegName(MI->getOperand(2).getReg()); // FALL THROUGH. case X86::VUNPCKLPDrm: - DecodeUNPCKLPDMask(2, ShuffleMask); + DecodeUNPCKLPMask(MVT::v2f64, ShuffleMask); Src1Name = getRegName(MI->getOperand(1).getReg()); + DestName = getRegName(MI->getOperand(0).getReg()); break; case X86::VUNPCKLPDYrr: Src2Name = getRegName(MI->getOperand(2).getReg()); // FALL THROUGH. case X86::VUNPCKLPDYrm: - DecodeUNPCKLPDMask(4, ShuffleMask); + DecodeUNPCKLPMask(MVT::v4f64, ShuffleMask); Src1Name = getRegName(MI->getOperand(1).getReg()); + DestName = getRegName(MI->getOperand(0).getReg()); break; case X86::UNPCKLPSrr: Src2Name = getRegName(MI->getOperand(2).getReg()); // FALL THROUGH. case X86::UNPCKLPSrm: - DecodeUNPCKLPSMask(4, ShuffleMask); + DecodeUNPCKLPMask(MVT::v4f32, ShuffleMask); Src1Name = getRegName(MI->getOperand(0).getReg()); break; case X86::VUNPCKLPSrr: Src2Name = getRegName(MI->getOperand(2).getReg()); // FALL THROUGH. case X86::VUNPCKLPSrm: - DecodeUNPCKLPSMask(4, ShuffleMask); + DecodeUNPCKLPMask(MVT::v4f32, ShuffleMask); Src1Name = getRegName(MI->getOperand(1).getReg()); + DestName = getRegName(MI->getOperand(0).getReg()); break; case X86::VUNPCKLPSYrr: Src2Name = getRegName(MI->getOperand(2).getReg()); // FALL THROUGH. case X86::VUNPCKLPSYrm: - DecodeUNPCKLPSMask(8, ShuffleMask); + DecodeUNPCKLPMask(MVT::v8f32, ShuffleMask); Src1Name = getRegName(MI->getOperand(1).getReg()); + DestName = getRegName(MI->getOperand(0).getReg()); break; case X86::UNPCKHPDrr: Src2Name = getRegName(MI->getOperand(2).getReg()); // FALL THROUGH. case X86::UNPCKHPDrm: - DecodeUNPCKHPMask(2, ShuffleMask); + DecodeUNPCKHPMask(MVT::v2f64, ShuffleMask); Src1Name = getRegName(MI->getOperand(0).getReg()); break; + case X86::VUNPCKHPDrr: + Src2Name = getRegName(MI->getOperand(2).getReg()); + // FALL THROUGH. + case X86::VUNPCKHPDrm: + DecodeUNPCKHPMask(MVT::v2f64, ShuffleMask); + Src1Name = getRegName(MI->getOperand(1).getReg()); + DestName = getRegName(MI->getOperand(0).getReg()); + break; + case X86::VUNPCKHPDYrr: + Src2Name = getRegName(MI->getOperand(2).getReg()); + // FALL THROUGH. + case X86::VUNPCKHPDYrm: + DecodeUNPCKLPMask(MVT::v4f64, ShuffleMask); + Src1Name = getRegName(MI->getOperand(1).getReg()); + DestName = getRegName(MI->getOperand(0).getReg()); + break; case X86::UNPCKHPSrr: Src2Name = getRegName(MI->getOperand(2).getReg()); // FALL THROUGH. case X86::UNPCKHPSrm: - DecodeUNPCKHPMask(4, ShuffleMask); + DecodeUNPCKHPMask(MVT::v4f32, ShuffleMask); Src1Name = getRegName(MI->getOperand(0).getReg()); break; + case X86::VUNPCKHPSrr: + Src2Name = getRegName(MI->getOperand(2).getReg()); + // FALL THROUGH. + case X86::VUNPCKHPSrm: + DecodeUNPCKHPMask(MVT::v4f32, ShuffleMask); + Src1Name = getRegName(MI->getOperand(1).getReg()); + DestName = getRegName(MI->getOperand(0).getReg()); + break; + case X86::VUNPCKHPSYrr: + Src2Name = getRegName(MI->getOperand(2).getReg()); + // FALL THROUGH. + case X86::VUNPCKHPSYrm: + DecodeUNPCKHPMask(MVT::v8f32, ShuffleMask); + Src1Name = getRegName(MI->getOperand(1).getReg()); + DestName = getRegName(MI->getOperand(0).getReg()); + break; case X86::VPERMILPSri: DecodeVPERMILPSMask(4, MI->getOperand(2).getImm(), ShuffleMask); diff --git a/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp b/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp index 03c3948..a843515 100644 --- a/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp +++ b/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp @@ -385,7 +385,8 @@ static MCAsmInfo *createX86MCAsmInfo(const Target &T, StringRef TT) { } static MCCodeGenInfo *createX86MCCodeGenInfo(StringRef TT, Reloc::Model RM, - CodeModel::Model CM) { + CodeModel::Model CM, + CodeGenOpt::Level OL) { MCCodeGenInfo *X = new MCCodeGenInfo(); Triple T(TT); @@ -429,7 +430,7 @@ static MCCodeGenInfo *createX86MCCodeGenInfo(StringRef TT, Reloc::Model RM, // 64-bit JIT places everything in the same buffer except external funcs. CM = is64Bit ? CodeModel::Large : CodeModel::Small; - X->InitMCCodeGenInfo(RM, CM); + X->InitMCCodeGenInfo(RM, CM, OL); return X; } diff --git a/lib/Target/X86/Utils/X86ShuffleDecode.cpp b/lib/Target/X86/Utils/X86ShuffleDecode.cpp index aeb3309..f6c9d7b 100644 --- a/lib/Target/X86/Utils/X86ShuffleDecode.cpp +++ b/lib/Target/X86/Utils/X86ShuffleDecode.cpp @@ -142,29 +142,29 @@ void DecodeSHUFPSMask(unsigned NElts, unsigned Imm, } } -void DecodeUNPCKHPMask(unsigned NElts, - SmallVectorImpl<unsigned> &ShuffleMask) { - for (unsigned i = 0; i != NElts/2; ++i) { - ShuffleMask.push_back(i+NElts/2); // Reads from dest - ShuffleMask.push_back(i+NElts+NElts/2); // Reads from src - } -} +void DecodeUNPCKHPMask(EVT VT, SmallVectorImpl<unsigned> &ShuffleMask) { + unsigned NumElts = VT.getVectorNumElements(); -void DecodeUNPCKLPSMask(unsigned NElts, - SmallVectorImpl<unsigned> &ShuffleMask) { - DecodeUNPCKLPMask(MVT::getVectorVT(MVT::i32, NElts), ShuffleMask); -} + // Handle 128 and 256-bit vector lengths. AVX defines UNPCK* to operate + // independently on 128-bit lanes. + unsigned NumLanes = VT.getSizeInBits() / 128; + if (NumLanes == 0 ) NumLanes = 1; // Handle MMX + unsigned NumLaneElts = NumElts / NumLanes; -void DecodeUNPCKLPDMask(unsigned NElts, - SmallVectorImpl<unsigned> &ShuffleMask) { - DecodeUNPCKLPMask(MVT::getVectorVT(MVT::i64, NElts), ShuffleMask); + for (unsigned s = 0; s < NumLanes; ++s) { + unsigned Start = s * NumLaneElts + NumLaneElts/2; + unsigned End = s * NumLaneElts + NumLaneElts; + for (unsigned i = Start; i != End; ++i) { + ShuffleMask.push_back(i); // Reads from dest/src1 + ShuffleMask.push_back(i+NumElts); // Reads from src/src2 + } + } } /// DecodeUNPCKLPMask - This decodes the shuffle masks for unpcklps/unpcklpd /// etc. VT indicates the type of the vector allowing it to handle different /// datatypes and vector widths. -void DecodeUNPCKLPMask(EVT VT, - SmallVectorImpl<unsigned> &ShuffleMask) { +void DecodeUNPCKLPMask(EVT VT, SmallVectorImpl<unsigned> &ShuffleMask) { unsigned NumElts = VT.getVectorNumElements(); // Handle 128 and 256-bit vector lengths. AVX defines UNPCK* to operate @@ -173,16 +173,13 @@ void DecodeUNPCKLPMask(EVT VT, if (NumLanes == 0 ) NumLanes = 1; // Handle MMX unsigned NumLaneElts = NumElts / NumLanes; - unsigned Start = 0; - unsigned End = NumLaneElts / 2; for (unsigned s = 0; s < NumLanes; ++s) { + unsigned Start = s * NumLaneElts; + unsigned End = s * NumLaneElts + NumLaneElts/2; for (unsigned i = Start; i != End; ++i) { - ShuffleMask.push_back(i); // Reads from dest/src1 - ShuffleMask.push_back(i+NumLaneElts); // Reads from src/src2 + ShuffleMask.push_back(i); // Reads from dest/src1 + ShuffleMask.push_back(i+NumElts); // Reads from src/src2 } - // Process the next 128 bits. - Start += NumLaneElts; - End += NumLaneElts; } } diff --git a/lib/Target/X86/Utils/X86ShuffleDecode.h b/lib/Target/X86/Utils/X86ShuffleDecode.h index 58193e6..35f6530 100644 --- a/lib/Target/X86/Utils/X86ShuffleDecode.h +++ b/lib/Target/X86/Utils/X86ShuffleDecode.h @@ -67,20 +67,15 @@ void DecodePUNPCKHMask(unsigned NElts, void DecodeSHUFPSMask(unsigned NElts, unsigned Imm, SmallVectorImpl<unsigned> &ShuffleMask); -void DecodeUNPCKHPMask(unsigned NElts, - SmallVectorImpl<unsigned> &ShuffleMask); - -void DecodeUNPCKLPSMask(unsigned NElts, - SmallVectorImpl<unsigned> &ShuffleMask); - -void DecodeUNPCKLPDMask(unsigned NElts, - SmallVectorImpl<unsigned> &ShuffleMask); +/// DecodeUNPCKHPMask - This decodes the shuffle masks for unpckhps/unpckhpd +/// etc. VT indicates the type of the vector allowing it to handle different +/// datatypes and vector widths. +void DecodeUNPCKHPMask(EVT VT, SmallVectorImpl<unsigned> &ShuffleMask); /// DecodeUNPCKLPMask - This decodes the shuffle masks for unpcklps/unpcklpd /// etc. VT indicates the type of the vector allowing it to handle different /// datatypes and vector widths. -void DecodeUNPCKLPMask(EVT VT, - SmallVectorImpl<unsigned> &ShuffleMask); +void DecodeUNPCKLPMask(EVT VT, SmallVectorImpl<unsigned> &ShuffleMask); // DecodeVPERMILPSMask - Decodes VPERMILPS permutes for any 128-bit 32-bit diff --git a/lib/Target/X86/X86ISelDAGToDAG.cpp b/lib/Target/X86/X86ISelDAGToDAG.cpp index 3d75de0..3c35763 100644 --- a/lib/Target/X86/X86ISelDAGToDAG.cpp +++ b/lib/Target/X86/X86ISelDAGToDAG.cpp @@ -2216,6 +2216,75 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) { } break; } + case ISD::STORE: { + // The DEC64m tablegen pattern is currently not able to match the case where + // the EFLAGS on the original DEC are used. + // we'll need to improve tablegen to allow flags to be transferred from a + // node in the pattern to the result node. probably with a new keyword + // for example, we have this + // def DEC64m : RI<0xFF, MRM1m, (outs), (ins i64mem:$dst), "dec{q}\t$dst", + // [(store (add (loadi64 addr:$dst), -1), addr:$dst), + // (implicit EFLAGS)]>; + // but maybe need something like this + // def DEC64m : RI<0xFF, MRM1m, (outs), (ins i64mem:$dst), "dec{q}\t$dst", + // [(store (add (loadi64 addr:$dst), -1), addr:$dst), + // (transferrable EFLAGS)]>; + StoreSDNode *StoreNode = cast<StoreSDNode>(Node); + SDValue Chain = StoreNode->getOperand(0); + SDValue StoredVal = StoreNode->getOperand(1); + SDValue Address = StoreNode->getOperand(2); + SDValue Undef = StoreNode->getOperand(3); + + if (StoreNode->getMemOperand()->getSize() != 8 || + Undef->getOpcode() != ISD::UNDEF || + Chain->getOpcode() != ISD::LOAD || + StoredVal->getOpcode() != X86ISD::DEC || + StoredVal.getResNo() != 0 || + StoredVal->getOperand(0).getNode() != Chain.getNode()) + break; + + //OPC_CheckPredicate, 1, // Predicate_nontemporalstore + if (StoreNode->isNonTemporal()) + break; + + LoadSDNode *LoadNode = cast<LoadSDNode>(Chain.getNode()); + if (LoadNode->getOperand(1) != Address || + LoadNode->getOperand(2) != Undef) + break; + + if (!ISD::isNormalLoad(LoadNode)) + break; + + if (!ISD::isNormalStore(StoreNode)) + break; + + // check load chain has only one use (from the store) + if (!Chain.hasOneUse()) + break; + + // Merge the input chains if they are not intra-pattern references. + SDValue InputChain = LoadNode->getOperand(0); + + SDValue Base, Scale, Index, Disp, Segment; + if (!SelectAddr(LoadNode, LoadNode->getBasePtr(), + Base, Scale, Index, Disp, Segment)) + break; + + MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(2); + MemOp[0] = StoreNode->getMemOperand(); + MemOp[1] = LoadNode->getMemOperand(); + const SDValue Ops[] = { Base, Scale, Index, Disp, Segment, InputChain }; + MachineSDNode *Result = CurDAG->getMachineNode(X86::DEC64m, + Node->getDebugLoc(), + MVT::i32, MVT::Other, Ops, + array_lengthof(Ops)); + Result->setMemRefs(MemOp, MemOp + 2); + + ReplaceUses(SDValue(StoreNode, 0), SDValue(Result, 1)); + ReplaceUses(SDValue(StoredVal.getNode(), 1), SDValue(Result, 0)); + + return Result; + } } SDNode *ResNode = SelectCode(Node); diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 4e11131..96c6f41 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -35,7 +35,6 @@ #include "llvm/CodeGen/MachineJumpTableInfo.h" #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/CodeGen/PseudoSourceValue.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCExpr.h" @@ -909,7 +908,7 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) setOperationAction(ISD::SINT_TO_FP, MVT::v4i32, Legal); } - if (Subtarget->hasSSE41() || Subtarget->hasAVX()) { + if (Subtarget->hasSSE41orAVX()) { setOperationAction(ISD::FFLOOR, MVT::f32, Legal); setOperationAction(ISD::FCEIL, MVT::f32, Legal); setOperationAction(ISD::FTRUNC, MVT::f32, Legal); @@ -981,7 +980,7 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) } } - if (Subtarget->hasSSE42() || Subtarget->hasAVX()) + if (Subtarget->hasSSE42orAVX()) setOperationAction(ISD::SETCC, MVT::v2i64, Custom); if (!UseSoftFloat && Subtarget->hasAVX()) { @@ -2846,16 +2845,12 @@ static bool isTargetShuffle(unsigned Opcode) { case X86ISD::MOVSD: case X86ISD::UNPCKLPS: case X86ISD::UNPCKLPD: - case X86ISD::VUNPCKLPSY: - case X86ISD::VUNPCKLPDY: case X86ISD::PUNPCKLWD: case X86ISD::PUNPCKLBW: case X86ISD::PUNPCKLDQ: case X86ISD::PUNPCKLQDQ: case X86ISD::UNPCKHPS: case X86ISD::UNPCKHPD: - case X86ISD::VUNPCKHPSY: - case X86ISD::VUNPCKHPDY: case X86ISD::PUNPCKHWD: case X86ISD::PUNPCKHBW: case X86ISD::PUNPCKHDQ: @@ -2927,16 +2922,12 @@ static SDValue getTargetShuffleNode(unsigned Opc, DebugLoc dl, EVT VT, case X86ISD::MOVSD: case X86ISD::UNPCKLPS: case X86ISD::UNPCKLPD: - case X86ISD::VUNPCKLPSY: - case X86ISD::VUNPCKLPDY: case X86ISD::PUNPCKLWD: case X86ISD::PUNPCKLBW: case X86ISD::PUNPCKLDQ: case X86ISD::PUNPCKLQDQ: case X86ISD::UNPCKHPS: case X86ISD::UNPCKHPD: - case X86ISD::VUNPCKHPSY: - case X86ISD::VUNPCKHPDY: case X86ISD::PUNPCKHWD: case X86ISD::PUNPCKHBW: case X86ISD::PUNPCKHDQ: @@ -3416,6 +3407,41 @@ static unsigned getShuffleVSHUFPDYImmediate(SDNode *N) { return Mask; } +/// CommuteVectorShuffleMask - Change values in a shuffle permute mask assuming +/// the two vector operands have swapped position. +static void CommuteVectorShuffleMask(SmallVectorImpl<int> &Mask, EVT VT) { + unsigned NumElems = VT.getVectorNumElements(); + for (unsigned i = 0; i != NumElems; ++i) { + int idx = Mask[i]; + if (idx < 0) + continue; + else if (idx < (int)NumElems) + Mask[i] = idx + NumElems; + else + Mask[i] = idx - NumElems; + } +} + +/// isCommutedVSHUFP() - Return true if swapping operands will +/// allow to use the "vshufpd" or "vshufps" instruction +/// for 256-bit vectors +static bool isCommutedVSHUFPMask(const SmallVectorImpl<int> &Mask, EVT VT, + const X86Subtarget *Subtarget) { + + unsigned NumElems = VT.getVectorNumElements(); + if ((VT.getSizeInBits() != 256) || ((NumElems != 4) && (NumElems != 8))) + return false; + + SmallVector<int, 8> CommutedMask; + for (unsigned i = 0; i < NumElems; ++i) + CommutedMask.push_back(Mask[i]); + + CommuteVectorShuffleMask(CommutedMask, VT); + return (NumElems == 4) ? isVSHUFPDYMask(CommutedMask, VT, Subtarget): + isVSHUFPSYMask(CommutedMask, VT, Subtarget); +} + + /// isSHUFPMask - Return true if the specified VECTOR_SHUFFLE operand /// specifies a shuffle of elements that is suitable for input to 128-bit /// SHUFPS and SHUFPD. @@ -3551,13 +3577,14 @@ bool X86::isMOVLHPSMask(ShuffleVectorSDNode *N) { /// isUNPCKLMask - Return true if the specified VECTOR_SHUFFLE operand /// specifies a shuffle of elements that is suitable for input to UNPCKL. static bool isUNPCKLMask(const SmallVectorImpl<int> &Mask, EVT VT, - bool V2IsSplat = false) { + bool HasAVX2, bool V2IsSplat = false) { int NumElts = VT.getVectorNumElements(); assert((VT.is128BitVector() || VT.is256BitVector()) && "Unsupported vector type for unpckh"); - if (VT.getSizeInBits() == 256 && NumElts != 4 && NumElts != 8) + if (VT.getSizeInBits() == 256 && NumElts != 4 && NumElts != 8 && + (!HasAVX2 || (NumElts != 16 && NumElts != 32))) return false; // Handle 128 and 256-bit vector lengths. AVX defines UNPCK* to operate @@ -3591,22 +3618,23 @@ static bool isUNPCKLMask(const SmallVectorImpl<int> &Mask, EVT VT, return true; } -bool X86::isUNPCKLMask(ShuffleVectorSDNode *N, bool V2IsSplat) { +bool X86::isUNPCKLMask(ShuffleVectorSDNode *N, bool HasAVX2, bool V2IsSplat) { SmallVector<int, 8> M; N->getMask(M); - return ::isUNPCKLMask(M, N->getValueType(0), V2IsSplat); + return ::isUNPCKLMask(M, N->getValueType(0), HasAVX2, V2IsSplat); } /// isUNPCKHMask - Return true if the specified VECTOR_SHUFFLE operand /// specifies a shuffle of elements that is suitable for input to UNPCKH. static bool isUNPCKHMask(const SmallVectorImpl<int> &Mask, EVT VT, - bool V2IsSplat = false) { + bool HasAVX2, bool V2IsSplat = false) { int NumElts = VT.getVectorNumElements(); assert((VT.is128BitVector() || VT.is256BitVector()) && "Unsupported vector type for unpckh"); - if (VT.getSizeInBits() == 256 && NumElts != 4 && NumElts != 8) + if (VT.getSizeInBits() == 256 && NumElts != 4 && NumElts != 8 && + (!HasAVX2 || (NumElts != 16 && NumElts != 32))) return false; // Handle 128 and 256-bit vector lengths. AVX defines UNPCK* to operate @@ -3638,10 +3666,10 @@ static bool isUNPCKHMask(const SmallVectorImpl<int> &Mask, EVT VT, return true; } -bool X86::isUNPCKHMask(ShuffleVectorSDNode *N, bool V2IsSplat) { +bool X86::isUNPCKHMask(ShuffleVectorSDNode *N, bool HasAVX2, bool V2IsSplat) { SmallVector<int, 8> M; N->getMask(M); - return ::isUNPCKHMask(M, N->getValueType(0), V2IsSplat); + return ::isUNPCKHMask(M, N->getValueType(0), HasAVX2, V2IsSplat); } /// isUNPCKL_v_undef_Mask - Special case of isUNPCKLMask for canonical form @@ -3953,7 +3981,7 @@ static bool isCommutedMOVL(ShuffleVectorSDNode *N, bool V2IsSplat = false, /// Masks to match: <1, 1, 3, 3> or <1, 1, 3, 3, 5, 5, 7, 7> bool X86::isMOVSHDUPMask(ShuffleVectorSDNode *N, const X86Subtarget *Subtarget) { - if (!Subtarget->hasSSE3() && !Subtarget->hasAVX()) + if (!Subtarget->hasSSE3orAVX()) return false; // The second vector must be undef @@ -3981,7 +4009,7 @@ bool X86::isMOVSHDUPMask(ShuffleVectorSDNode *N, /// Masks to match: <0, 0, 2, 2> or <0, 0, 2, 2, 4, 4, 6, 6> bool X86::isMOVSLDUPMask(ShuffleVectorSDNode *N, const X86Subtarget *Subtarget) { - if (!Subtarget->hasSSE3() && !Subtarget->hasAVX()) + if (!Subtarget->hasSSE3orAVX()) return false; // The second vector must be undef @@ -4216,21 +4244,6 @@ static SDValue CommuteVectorShuffle(ShuffleVectorSDNode *SVOp, SVOp->getOperand(0), &MaskVec[0]); } -/// CommuteVectorShuffleMask - Change values in a shuffle permute mask assuming -/// the two vector operands have swapped position. -static void CommuteVectorShuffleMask(SmallVectorImpl<int> &Mask, EVT VT) { - unsigned NumElems = VT.getVectorNumElements(); - for (unsigned i = 0; i != NumElems; ++i) { - int idx = Mask[i]; - if (idx < 0) - continue; - else if (idx < (int)NumElems) - Mask[i] = idx + NumElems; - else - Mask[i] = idx - NumElems; - } -} - /// ShouldXformToMOVHLPS - Return true if the node should be transformed to /// match movhlps. The lower half elements should come from upper half of /// V1 (and in order), and the upper half elements should come from the upper @@ -4388,23 +4401,30 @@ static SDValue getZeroVector(EVT VT, bool HasXMMInt, SelectionDAG &DAG, } /// getOnesVector - Returns a vector of specified type with all bits set. -/// Always build ones vectors as <4 x i32>. For 256-bit types, use two -/// <4 x i32> inserted in a <8 x i32> appropriately. Then bitcast to their -/// original type, ensuring they get CSE'd. -static SDValue getOnesVector(EVT VT, SelectionDAG &DAG, DebugLoc dl) { +/// Always build ones vectors as <4 x i32> or <8 x i32>. For 256-bit types with +/// no AVX2 supprt, use two <4 x i32> inserted in a <8 x i32> appropriately. +/// Then bitcast to their original type, ensuring they get CSE'd. +static SDValue getOnesVector(EVT VT, bool HasAVX2, SelectionDAG &DAG, + DebugLoc dl) { assert(VT.isVector() && "Expected a vector type"); assert((VT.is128BitVector() || VT.is256BitVector()) && "Expected a 128-bit or 256-bit vector type"); SDValue Cst = DAG.getTargetConstant(~0U, MVT::i32); - SDValue Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, - Cst, Cst, Cst, Cst); - - if (VT.is256BitVector()) { - SDValue InsV = Insert128BitVector(DAG.getNode(ISD::UNDEF, dl, MVT::v8i32), - Vec, DAG.getConstant(0, MVT::i32), DAG, dl); - Vec = Insert128BitVector(InsV, Vec, - DAG.getConstant(4 /* NumElems/2 */, MVT::i32), DAG, dl); + SDValue Vec; + if (VT.getSizeInBits() == 256) { + if (HasAVX2) { // AVX2 + SDValue Ops[] = { Cst, Cst, Cst, Cst, Cst, Cst, Cst, Cst }; + Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v8i32, Ops, 8); + } else { // AVX + Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, Cst, Cst, Cst, Cst); + SDValue InsV = Insert128BitVector(DAG.getNode(ISD::UNDEF, dl, MVT::v8i32), + Vec, DAG.getConstant(0, MVT::i32), DAG, dl); + Vec = Insert128BitVector(InsV, Vec, + DAG.getConstant(4 /* NumElems/2 */, MVT::i32), DAG, dl); + } + } else { + Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, Cst, Cst, Cst, Cst); } return DAG.getNode(ISD::BITCAST, dl, VT, Vec); @@ -4623,9 +4643,7 @@ static SDValue getShuffleScalarElt(SDNode *N, int Index, SelectionDAG &DAG, break; case X86ISD::UNPCKHPS: case X86ISD::UNPCKHPD: - case X86ISD::VUNPCKHPSY: - case X86ISD::VUNPCKHPDY: - DecodeUNPCKHPMask(NumElems, ShuffleMask); + DecodeUNPCKHPMask(VT, ShuffleMask); break; case X86ISD::PUNPCKLBW: case X86ISD::PUNPCKLWD: @@ -4635,8 +4653,6 @@ static SDValue getShuffleScalarElt(SDNode *N, int Index, SelectionDAG &DAG, break; case X86ISD::UNPCKLPS: case X86ISD::UNPCKLPD: - case X86ISD::VUNPCKLPSY: - case X86ISD::VUNPCKLPDY: DecodeUNPCKLPMask(VT, ShuffleMask); break; case X86ISD::MOVHLPS: @@ -5111,6 +5127,97 @@ static SDValue EltsFromConsecutiveLoads(EVT VT, SmallVectorImpl<SDValue> &Elts, return SDValue(); } +/// isVectorBroadcast - Check if the node chain is suitable to be xformed to +/// a vbroadcast node. We support two patterns: +/// 1. A splat BUILD_VECTOR which uses a single scalar load. +/// 2. A splat shuffle which uses a scalar_to_vector node which comes from +/// a scalar load. +/// The scalar load node is returned when a pattern is found, +/// or SDValue() otherwise. +static SDValue isVectorBroadcast(SDValue &Op, bool hasAVX2) { + EVT VT = Op.getValueType(); + SDValue V = Op; + + if (V.hasOneUse() && V.getOpcode() == ISD::BITCAST) + V = V.getOperand(0); + + //A suspected load to be broadcasted. + SDValue Ld; + + switch (V.getOpcode()) { + default: + // Unknown pattern found. + return SDValue(); + + case ISD::BUILD_VECTOR: { + // The BUILD_VECTOR node must be a splat. + if (!isSplatVector(V.getNode())) + return SDValue(); + + Ld = V.getOperand(0); + + // The suspected load node has several users. Make sure that all + // of its users are from the BUILD_VECTOR node. + if (!Ld->hasNUsesOfValue(VT.getVectorNumElements(), 0)) + return SDValue(); + break; + } + + case ISD::VECTOR_SHUFFLE: { + ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op); + + // Shuffles must have a splat mask where the first element is + // broadcasted. + if ((!SVOp->isSplat()) || SVOp->getMaskElt(0) != 0) + return SDValue(); + + SDValue Sc = Op.getOperand(0); + if (Sc.getOpcode() != ISD::SCALAR_TO_VECTOR) + return SDValue(); + + Ld = Sc.getOperand(0); + + // The scalar_to_vector node and the suspected + // load node must have exactly one user. + if (!Sc.hasOneUse() || !Ld.hasOneUse()) + return SDValue(); + break; + } + } + + // The scalar source must be a normal load. + if (!ISD::isNormalLoad(Ld.getNode())) + return SDValue(); + + bool Is256 = VT.getSizeInBits() == 256; + bool Is128 = VT.getSizeInBits() == 128; + unsigned ScalarSize = Ld.getValueType().getSizeInBits(); + + if (hasAVX2) { + // VBroadcast to YMM + if (Is256 && (ScalarSize == 8 || ScalarSize == 16 || + ScalarSize == 32 || ScalarSize == 64 )) + return Ld; + + // VBroadcast to XMM + if (Is128 && (ScalarSize == 8 || ScalarSize == 32 || + ScalarSize == 16 || ScalarSize == 64 )) + return Ld; + } + + // VBroadcast to YMM + if (Is256 && (ScalarSize == 32 || ScalarSize == 64)) + return Ld; + + // VBroadcast to XMM + if (Is128 && (ScalarSize == 32)) + return Ld; + + + // Unsupported broadcast. + return SDValue(); +} + SDValue X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const { DebugLoc dl = Op.getDebugLoc(); @@ -5131,14 +5238,20 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const { } // Vectors containing all ones can be matched by pcmpeqd on 128-bit width - // vectors or broken into v4i32 operations on 256-bit vectors. + // vectors or broken into v4i32 operations on 256-bit vectors. AVX2 can use + // vpcmpeqd on 256-bit vectors. if (ISD::isBuildVectorAllOnes(Op.getNode())) { - if (Op.getValueType() == MVT::v4i32) + if (Op.getValueType() == MVT::v4i32 || + (Op.getValueType() == MVT::v8i32 && Subtarget->hasAVX2())) return Op; - return getOnesVector(Op.getValueType(), DAG, dl); + return getOnesVector(Op.getValueType(), Subtarget->hasAVX2(), DAG, dl); } + SDValue LD = isVectorBroadcast(Op, Subtarget->hasAVX2()); + if (Subtarget->hasAVX() && LD.getNode()) + return DAG.getNode(X86ISD::VBROADCAST, dl, VT, LD); + unsigned EVTBits = ExtVT.getSizeInBits(); unsigned NumZero = 0; @@ -5380,7 +5493,7 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const { return LD; // For SSE 4.1, use insertps to put the high elements into the low element. - if (getSubtarget()->hasSSE41() || getSubtarget()->hasAVX()) { + if (getSubtarget()->hasSSE41orAVX()) { SDValue Result; if (Op.getOperand(0).getOpcode() != ISD::UNDEF) Result = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Op.getOperand(0)); @@ -5551,7 +5664,7 @@ X86TargetLowering::LowerVECTOR_SHUFFLEv8i16(SDValue Op, // quads, disable the next transformation since it does not help SSSE3. bool V1Used = InputQuads[0] || InputQuads[1]; bool V2Used = InputQuads[2] || InputQuads[3]; - if (Subtarget->hasSSSE3() || Subtarget->hasAVX()) { + if (Subtarget->hasSSSE3orAVX()) { if (InputQuads.count() == 2 && V1Used && V2Used) { BestLoQuad = InputQuads.find_first(); BestHiQuad = InputQuads.find_next(BestLoQuad); @@ -5624,7 +5737,7 @@ X86TargetLowering::LowerVECTOR_SHUFFLEv8i16(SDValue Op, // If we have SSSE3, and all words of the result are from 1 input vector, // case 2 is generated, otherwise case 3 is generated. If no SSSE3 // is present, fall back to case 4. - if (Subtarget->hasSSSE3() || Subtarget->hasAVX()) { + if (Subtarget->hasSSSE3orAVX()) { SmallVector<SDValue,16> pshufbMask; // If we have elements from both input vectors, set the high bit of the @@ -5692,8 +5805,7 @@ X86TargetLowering::LowerVECTOR_SHUFFLEv8i16(SDValue Op, NewV = DAG.getVectorShuffle(MVT::v8i16, dl, NewV, DAG.getUNDEF(MVT::v8i16), &MaskV[0]); - if (NewV.getOpcode() == ISD::VECTOR_SHUFFLE && - (Subtarget->hasSSSE3() || Subtarget->hasAVX())) + if (NewV.getOpcode() == ISD::VECTOR_SHUFFLE && Subtarget->hasSSSE3orAVX()) NewV = getTargetShuffleNode(X86ISD::PSHUFLW, dl, MVT::v8i16, NewV.getOperand(0), X86::getShufflePSHUFLWImmediate(NewV.getNode()), @@ -5721,8 +5833,7 @@ X86TargetLowering::LowerVECTOR_SHUFFLEv8i16(SDValue Op, NewV = DAG.getVectorShuffle(MVT::v8i16, dl, NewV, DAG.getUNDEF(MVT::v8i16), &MaskV[0]); - if (NewV.getOpcode() == ISD::VECTOR_SHUFFLE && - (Subtarget->hasSSSE3() || Subtarget->hasAVX())) + if (NewV.getOpcode() == ISD::VECTOR_SHUFFLE && Subtarget->hasSSSE3orAVX()) NewV = getTargetShuffleNode(X86ISD::PSHUFHW, dl, MVT::v8i16, NewV.getOperand(0), X86::getShufflePSHUFHWImmediate(NewV.getNode()), @@ -5788,7 +5899,7 @@ SDValue LowerVECTOR_SHUFFLEv16i8(ShuffleVectorSDNode *SVOp, } // If SSSE3, use 1 pshufb instruction per vector with elements in the result. - if (TLI.getSubtarget()->hasSSSE3() || TLI.getSubtarget()->hasAVX()) { + if (TLI.getSubtarget()->hasSSSE3orAVX()) { SmallVector<SDValue,16> pshufbMask; // If all result elements are from one input vector, then only translate @@ -6455,17 +6566,23 @@ SDValue getMOVLP(SDValue &Op, DebugLoc &dl, SelectionDAG &DAG, bool HasXMMInt) { X86::getShuffleSHUFImmediate(SVOp), DAG); } -static inline unsigned getUNPCKLOpcode(EVT VT) { +static inline unsigned getUNPCKLOpcode(EVT VT, bool HasAVX2) { switch(VT.getSimpleVT().SimpleTy) { case MVT::v4i32: return X86ISD::PUNPCKLDQ; case MVT::v2i64: return X86ISD::PUNPCKLQDQ; + case MVT::v8i32: + if (HasAVX2) return X86ISD::PUNPCKLDQ; + // else use fp unit for int unpack. + case MVT::v8f32: case MVT::v4f32: return X86ISD::UNPCKLPS; + case MVT::v4i64: + if (HasAVX2) return X86ISD::PUNPCKLQDQ; + // else use fp unit for int unpack. + case MVT::v4f64: case MVT::v2f64: return X86ISD::UNPCKLPD; - case MVT::v8i32: // Use fp unit for int unpack. - case MVT::v8f32: return X86ISD::VUNPCKLPSY; - case MVT::v4i64: // Use fp unit for int unpack. - case MVT::v4f64: return X86ISD::VUNPCKLPDY; + case MVT::v32i8: case MVT::v16i8: return X86ISD::PUNPCKLBW; + case MVT::v16i16: case MVT::v8i16: return X86ISD::PUNPCKLWD; default: llvm_unreachable("Unknown type for unpckl"); @@ -6473,17 +6590,23 @@ static inline unsigned getUNPCKLOpcode(EVT VT) { return 0; } -static inline unsigned getUNPCKHOpcode(EVT VT) { +static inline unsigned getUNPCKHOpcode(EVT VT, bool HasAVX2) { switch(VT.getSimpleVT().SimpleTy) { case MVT::v4i32: return X86ISD::PUNPCKHDQ; case MVT::v2i64: return X86ISD::PUNPCKHQDQ; + case MVT::v8i32: + if (HasAVX2) return X86ISD::PUNPCKHDQ; + // else use fp unit for int unpack. + case MVT::v8f32: case MVT::v4f32: return X86ISD::UNPCKHPS; + case MVT::v4i64: + if (HasAVX2) return X86ISD::PUNPCKHQDQ; + // else use fp unit for int unpack. + case MVT::v4f64: case MVT::v2f64: return X86ISD::UNPCKHPD; - case MVT::v8i32: // Use fp unit for int unpack. - case MVT::v8f32: return X86ISD::VUNPCKHPSY; - case MVT::v4i64: // Use fp unit for int unpack. - case MVT::v4f64: return X86ISD::VUNPCKHPDY; + case MVT::v32i8: case MVT::v16i8: return X86ISD::PUNPCKHBW; + case MVT::v16i16: case MVT::v8i16: return X86ISD::PUNPCKHWD; default: llvm_unreachable("Unknown type for unpckh"); @@ -6507,52 +6630,6 @@ static inline unsigned getVPERMILOpcode(EVT VT) { return 0; } -/// isVectorBroadcast - Check if the node chain is suitable to be xformed to -/// a vbroadcast node. The nodes are suitable whenever we can fold a load coming -/// from a 32 or 64 bit scalar. Update Op to the desired load to be folded. -static bool isVectorBroadcast(SDValue &Op) { - EVT VT = Op.getValueType(); - bool Is256 = VT.getSizeInBits() == 256; - - assert((VT.getSizeInBits() == 128 || Is256) && - "Unsupported type for vbroadcast node"); - - SDValue V = Op; - if (V.hasOneUse() && V.getOpcode() == ISD::BITCAST) - V = V.getOperand(0); - - if (Is256 && !(V.hasOneUse() && - V.getOpcode() == ISD::INSERT_SUBVECTOR && - V.getOperand(0).getOpcode() == ISD::UNDEF)) - return false; - - if (Is256) - V = V.getOperand(1); - - if (!V.hasOneUse()) - return false; - - // Check the source scalar_to_vector type. 256-bit broadcasts are - // supported for 32/64-bit sizes, while 128-bit ones are only supported - // for 32-bit scalars. - if (V.getOpcode() != ISD::SCALAR_TO_VECTOR) - return false; - - unsigned ScalarSize = V.getOperand(0).getValueType().getSizeInBits(); - if (ScalarSize != 32 && ScalarSize != 64) - return false; - if (!Is256 && ScalarSize == 64) - return false; - - V = V.getOperand(0); - if (!MayFoldLoad(V)) - return false; - - // Return the load node - Op = V; - return true; -} - static SDValue NormalizeVectorShuffle(SDValue Op, SelectionDAG &DAG, const TargetLowering &TLI, @@ -6578,8 +6655,9 @@ SDValue NormalizeVectorShuffle(SDValue Op, SelectionDAG &DAG, return Op; // Use vbroadcast whenever the splat comes from a foldable load - if (Subtarget->hasAVX() && isVectorBroadcast(V1)) - return DAG.getNode(X86ISD::VBROADCAST, dl, VT, V1); + SDValue LD = isVectorBroadcast(Op, Subtarget->hasAVX2()); + if (Subtarget->hasAVX() && LD.getNode()) + return DAG.getNode(X86ISD::VBROADCAST, dl, VT, LD); // Handle splats by matching through known shuffle masks if ((Size == 128 && NumElem <= 4) || @@ -6630,6 +6708,7 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const { bool V1IsSplat = false; bool V2IsSplat = false; bool HasXMMInt = Subtarget->hasXMMInt(); + bool HasAVX2 = Subtarget->hasAVX2(); MachineFunction &MF = DAG.getMachineFunction(); bool OptForSize = MF.getFunction()->hasFnAttr(Attribute::OptimizeForSize); @@ -6659,12 +6738,13 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const { // NOTE: isPSHUFDMask can also match both masks below (unpckl_undef and // unpckh_undef). Only use pshufd if speed is more important than size. if (OptForSize && X86::isUNPCKL_v_undef_Mask(SVOp)) - return getTargetShuffleNode(getUNPCKLOpcode(VT), dl, VT, V1, V1, DAG); + return getTargetShuffleNode(getUNPCKLOpcode(VT, HasAVX2), dl, VT, V1, V1, + DAG); if (OptForSize && X86::isUNPCKH_v_undef_Mask(SVOp)) - return getTargetShuffleNode(getUNPCKHOpcode(VT), dl, VT, V1, V1, DAG); + return getTargetShuffleNode(getUNPCKHOpcode(VT, HasAVX2), dl, VT, V1, V1, + DAG); - if (X86::isMOVDDUPMask(SVOp) && - (Subtarget->hasSSE3() || Subtarget->hasAVX()) && + if (X86::isMOVDDUPMask(SVOp) && Subtarget->hasSSE3orAVX() && V2IsUndef && RelaxedMayFoldVectorLoad(V1)) return getMOVDDup(Op, dl, V1, DAG); @@ -6672,9 +6752,10 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const { return getMOVHighToLow(Op, dl, DAG); // Use to match splats - if (HasXMMInt && X86::isUNPCKHMask(SVOp) && V2IsUndef && + if (HasXMMInt && X86::isUNPCKHMask(SVOp, HasAVX2) && V2IsUndef && (VT == MVT::v2f64 || VT == MVT::v2i64)) - return getTargetShuffleNode(getUNPCKHOpcode(VT), dl, VT, V1, V1, DAG); + return getTargetShuffleNode(getUNPCKHOpcode(VT, HasAVX2), dl, VT, V1, V1, + DAG); if (X86::isPSHUFDMask(SVOp)) { // The actual implementation will match the mask in the if above and then @@ -6696,8 +6777,7 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const { bool isLeft = false; unsigned ShAmt = 0; SDValue ShVal; - bool isShift = getSubtarget()->hasXMMInt() && - isVectorShift(SVOp, DAG, isLeft, ShVal, ShAmt); + bool isShift = HasXMMInt && isVectorShift(SVOp, DAG, isLeft, ShVal, ShAmt); if (isShift && ShVal.hasOneUse()) { // If the shifted value has multiple uses, it may be cheaper to use // v_set0 + movlhps or movhlps, etc. @@ -6721,7 +6801,7 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const { } // FIXME: fold these into legal mask. - if (X86::isMOVLHPSMask(SVOp) && !X86::isUNPCKLMask(SVOp)) + if (X86::isMOVLHPSMask(SVOp) && !X86::isUNPCKLMask(SVOp, HasAVX2)) return getMOVLowToHigh(Op, dl, DAG, HasXMMInt); if (X86::isMOVHLPSMask(SVOp)) @@ -6774,11 +6854,13 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const { return getMOVL(DAG, dl, VT, V2, V1); } - if (X86::isUNPCKLMask(SVOp)) - return getTargetShuffleNode(getUNPCKLOpcode(VT), dl, VT, V1, V2, DAG); + if (X86::isUNPCKLMask(SVOp, HasAVX2)) + return getTargetShuffleNode(getUNPCKLOpcode(VT, HasAVX2), dl, VT, V1, V2, + DAG); - if (X86::isUNPCKHMask(SVOp)) - return getTargetShuffleNode(getUNPCKHOpcode(VT), dl, VT, V1, V2, DAG); + if (X86::isUNPCKHMask(SVOp, HasAVX2)) + return getTargetShuffleNode(getUNPCKHOpcode(VT, HasAVX2), dl, VT, V1, V2, + DAG); if (V2IsSplat) { // Normalize mask so all entries that point to V2 points to its first @@ -6787,9 +6869,9 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const { SDValue NewMask = NormalizeMask(SVOp, DAG); ShuffleVectorSDNode *NSVOp = cast<ShuffleVectorSDNode>(NewMask); if (NSVOp != SVOp) { - if (X86::isUNPCKLMask(NSVOp, true)) { + if (X86::isUNPCKLMask(NSVOp, HasAVX2, true)) { return NewMask; - } else if (X86::isUNPCKHMask(NSVOp, true)) { + } else if (X86::isUNPCKHMask(NSVOp, HasAVX2, true)) { return NewMask; } } @@ -6801,11 +6883,13 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const { SDValue NewOp = CommuteVectorShuffle(SVOp, DAG); ShuffleVectorSDNode *NewSVOp = cast<ShuffleVectorSDNode>(NewOp); - if (X86::isUNPCKLMask(NewSVOp)) - return getTargetShuffleNode(getUNPCKLOpcode(VT), dl, VT, V2, V1, DAG); + if (X86::isUNPCKLMask(NewSVOp, HasAVX2)) + return getTargetShuffleNode(getUNPCKLOpcode(VT, HasAVX2), dl, VT, V2, V1, + DAG); - if (X86::isUNPCKHMask(NewSVOp)) - return getTargetShuffleNode(getUNPCKHOpcode(VT), dl, VT, V2, V1, DAG); + if (X86::isUNPCKHMask(NewSVOp, HasAVX2)) + return getTargetShuffleNode(getUNPCKHOpcode(VT, HasAVX2), dl, VT, V2, V1, + DAG); } // Normalize the node to match x86 shuffle ops if needed @@ -6818,7 +6902,7 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const { SmallVector<int, 16> M; SVOp->getMask(M); - if (isPALIGNRMask(M, VT, Subtarget->hasSSSE3() || Subtarget->hasAVX())) + if (isPALIGNRMask(M, VT, Subtarget->hasSSSE3orAVX())) return getTargetShuffleNode(X86ISD::PALIGN, dl, VT, V1, V2, X86::getShufflePALIGNRImmediate(SVOp), DAG); @@ -6846,9 +6930,11 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const { X86::getShuffleSHUFImmediate(SVOp), DAG); if (X86::isUNPCKL_v_undef_Mask(SVOp)) - return getTargetShuffleNode(getUNPCKLOpcode(VT), dl, VT, V1, V1, DAG); + return getTargetShuffleNode(getUNPCKLOpcode(VT, HasAVX2), dl, VT, V1, V1, + DAG); if (X86::isUNPCKH_v_undef_Mask(SVOp)) - return getTargetShuffleNode(getUNPCKHOpcode(VT), dl, VT, V1, V1, DAG); + return getTargetShuffleNode(getUNPCKHOpcode(VT, HasAVX2), dl, VT, V1, V1, + DAG); //===--------------------------------------------------------------------===// // Generate target specific nodes for 128 or 256-bit shuffles only @@ -6884,6 +6970,17 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const { return getTargetShuffleNode(getSHUFPOpcode(VT), dl, VT, V1, V2, getShuffleVSHUFPDYImmediate(SVOp), DAG); + // Try to swap operands in the node to match x86 shuffle ops + if (isCommutedVSHUFPMask(M, VT, Subtarget)) { + // Now we need to commute operands. + SVOp = cast<ShuffleVectorSDNode>(CommuteVectorShuffle(SVOp, DAG)); + V1 = SVOp->getOperand(0); + V2 = SVOp->getOperand(1); + unsigned Immediate = (NumElems == 4) ? getShuffleVSHUFPDYImmediate(SVOp): + getShuffleVSHUFPSYImmediate(SVOp); + return getTargetShuffleNode(getSHUFPOpcode(VT), dl, VT, V1, V2, Immediate, DAG); + } + //===--------------------------------------------------------------------===// // Since no target specific shuffle was selected for this generic one, // lower it into other known shuffles. FIXME: this isn't true yet, but @@ -7002,7 +7099,7 @@ X86TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op, assert(Vec.getValueSizeInBits() <= 128 && "Unexpected vector length"); - if (Subtarget->hasSSE41() || Subtarget->hasAVX()) { + if (Subtarget->hasSSE41orAVX()) { SDValue Res = LowerEXTRACT_VECTOR_ELT_SSE4(Op, DAG); if (Res.getNode()) return Res; @@ -7144,7 +7241,7 @@ X86TargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const { return Insert128BitVector(N0, V, Ins128Idx, DAG, dl); } - if (Subtarget->hasSSE41() || Subtarget->hasAVX()) + if (Subtarget->hasSSE41orAVX()) return LowerINSERT_VECTOR_ELT_SSE4(Op, DAG); if (EltVT == MVT::i8) @@ -8264,8 +8361,10 @@ SDValue X86TargetLowering::EmitTest(SDValue Op, unsigned X86CC, // climbing the DAG back to the root, and it doesn't seem to be worth the // effort. for (SDNode::use_iterator UI = Op.getNode()->use_begin(), - UE = Op.getNode()->use_end(); UI != UE; ++UI) - if (UI->getOpcode() != ISD::CopyToReg && UI->getOpcode() != ISD::SETCC) + UE = Op.getNode()->use_end(); UI != UE; ++UI) + if (UI->getOpcode() != ISD::CopyToReg && + UI->getOpcode() != ISD::SETCC && + UI->getOpcode() != ISD::STORE) goto default_case; if (ConstantSDNode *C = @@ -8408,11 +8507,19 @@ SDValue X86TargetLowering::LowerToBT(SDValue And, ISD::CondCode CC, } } else if (Op1.getOpcode() == ISD::Constant) { ConstantSDNode *AndRHS = cast<ConstantSDNode>(Op1); + uint64_t AndRHSVal = AndRHS->getZExtValue(); SDValue AndLHS = Op0; - if (AndRHS->getZExtValue() == 1 && AndLHS.getOpcode() == ISD::SRL) { + + if (AndRHSVal == 1 && AndLHS.getOpcode() == ISD::SRL) { LHS = AndLHS.getOperand(0); RHS = AndLHS.getOperand(1); } + + // Use BT if the immediate can't be encoded in a TEST instruction. + if (!isUInt<32>(AndRHSVal) && isPowerOf2_64(AndRHSVal)) { + LHS = AndLHS; + RHS = DAG.getConstant(Log2_64_Ceil(AndRHSVal), LHS.getValueType()); + } } if (LHS.getNode()) { @@ -8632,9 +8739,9 @@ SDValue X86TargetLowering::LowerVSETCC(SDValue Op, SelectionDAG &DAG) const { // Check that the operation in question is available (most are plain SSE2, // but PCMPGTQ and PCMPEQQ have different requirements). - if (Opc == X86ISD::PCMPGTQ && !Subtarget->hasSSE42() && !Subtarget->hasAVX()) + if (Opc == X86ISD::PCMPGTQ && !Subtarget->hasSSE42orAVX()) return SDValue(); - if (Opc == X86ISD::PCMPEQQ && !Subtarget->hasSSE41() && !Subtarget->hasAVX()) + if (Opc == X86ISD::PCMPEQQ && !Subtarget->hasSSE41orAVX()) return SDValue(); // Since SSE has no unsigned integer comparisons, we need to flip the sign @@ -9464,6 +9571,23 @@ X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const case Intrinsic::x86_avx_hsub_pd_256: return DAG.getNode(X86ISD::FHSUB, dl, Op.getValueType(), Op.getOperand(1), Op.getOperand(2)); + case Intrinsic::x86_avx2_psllv_d: + case Intrinsic::x86_avx2_psllv_q: + case Intrinsic::x86_avx2_psllv_d_256: + case Intrinsic::x86_avx2_psllv_q_256: + return DAG.getNode(ISD::SHL, dl, Op.getValueType(), + Op.getOperand(1), Op.getOperand(2)); + case Intrinsic::x86_avx2_psrlv_d: + case Intrinsic::x86_avx2_psrlv_q: + case Intrinsic::x86_avx2_psrlv_d_256: + case Intrinsic::x86_avx2_psrlv_q_256: + return DAG.getNode(ISD::SRL, dl, Op.getValueType(), + Op.getOperand(1), Op.getOperand(2)); + case Intrinsic::x86_avx2_psrav_d: + case Intrinsic::x86_avx2_psrav_d_256: + return DAG.getNode(ISD::SRA, dl, Op.getValueType(), + Op.getOperand(1), Op.getOperand(2)); + // ptest and testp intrinsics. The intrinsic these come from are designed to // return an integer value, not just an instruction so lower it to the ptest // or testp pattern and a setcc for the result. @@ -10261,47 +10385,48 @@ SDValue X86TargetLowering::LowerShift(SDValue Op, SelectionDAG &DAG) const { return Res; } - if (Subtarget->hasAVX2()) { - if (VT == MVT::v4i64 && Op.getOpcode() == ISD::SHL) - return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT, - DAG.getConstant(Intrinsic::x86_avx2_pslli_q, MVT::i32), - R, DAG.getConstant(ShiftAmt, MVT::i32)); - - if (VT == MVT::v8i32 && Op.getOpcode() == ISD::SHL) - return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT, - DAG.getConstant(Intrinsic::x86_avx2_pslli_d, MVT::i32), - R, DAG.getConstant(ShiftAmt, MVT::i32)); - - if (VT == MVT::v16i16 && Op.getOpcode() == ISD::SHL) - return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT, - DAG.getConstant(Intrinsic::x86_avx2_pslli_w, MVT::i32), - R, DAG.getConstant(ShiftAmt, MVT::i32)); - - if (VT == MVT::v4i64 && Op.getOpcode() == ISD::SRL) - return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT, - DAG.getConstant(Intrinsic::x86_avx2_psrli_q, MVT::i32), - R, DAG.getConstant(ShiftAmt, MVT::i32)); - - if (VT == MVT::v8i32 && Op.getOpcode() == ISD::SRL) - return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT, - DAG.getConstant(Intrinsic::x86_avx2_psrli_d, MVT::i32), - R, DAG.getConstant(ShiftAmt, MVT::i32)); - - if (VT == MVT::v16i16 && Op.getOpcode() == ISD::SRL) - return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT, - DAG.getConstant(Intrinsic::x86_avx2_psrli_w, MVT::i32), - R, DAG.getConstant(ShiftAmt, MVT::i32)); - - if (VT == MVT::v8i32 && Op.getOpcode() == ISD::SRA) - return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT, - DAG.getConstant(Intrinsic::x86_avx2_psrai_d, MVT::i32), - R, DAG.getConstant(ShiftAmt, MVT::i32)); - - if (VT == MVT::v16i16 && Op.getOpcode() == ISD::SRA) - return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT, - DAG.getConstant(Intrinsic::x86_avx2_psrai_w, MVT::i32), - R, DAG.getConstant(ShiftAmt, MVT::i32)); + if (Subtarget->hasAVX2() && VT == MVT::v32i8) { + if (Op.getOpcode() == ISD::SHL) { + // Make a large shift. + SDValue SHL = + DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT, + DAG.getConstant(Intrinsic::x86_avx2_pslli_w, MVT::i32), + R, DAG.getConstant(ShiftAmt, MVT::i32)); + // Zero out the rightmost bits. + SmallVector<SDValue, 32> V(32, DAG.getConstant(uint8_t(-1U << ShiftAmt), + MVT::i8)); + return DAG.getNode(ISD::AND, dl, VT, SHL, + DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &V[0], 32)); + } + if (Op.getOpcode() == ISD::SRL) { + // Make a large shift. + SDValue SRL = + DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT, + DAG.getConstant(Intrinsic::x86_avx2_psrli_w, MVT::i32), + R, DAG.getConstant(ShiftAmt, MVT::i32)); + // Zero out the leftmost bits. + SmallVector<SDValue, 32> V(32, DAG.getConstant(uint8_t(-1U) >> ShiftAmt, + MVT::i8)); + return DAG.getNode(ISD::AND, dl, VT, SRL, + DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &V[0], 32)); + } + if (Op.getOpcode() == ISD::SRA) { + if (ShiftAmt == 7) { + // R s>> 7 === R s< 0 + SDValue Zeros = getZeroVector(VT, true /* HasXMMInt */, DAG, dl); + return DAG.getNode(X86ISD::PCMPGTB, dl, VT, Zeros, R); + } + + // R s>> a === ((R u>> a) ^ m) - m + SDValue Res = DAG.getNode(ISD::SRL, dl, VT, R, Amt); + SmallVector<SDValue, 32> V(32, DAG.getConstant(128 >> ShiftAmt, + MVT::i8)); + SDValue Mask = DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &V[0], 32); + Res = DAG.getNode(ISD::XOR, dl, VT, Res, Mask); + Res = DAG.getNode(ISD::SUB, dl, VT, Res, Mask); + return Res; } + } } } @@ -10493,9 +10618,9 @@ SDValue X86TargetLowering::LowerXALUO(SDValue Op, SelectionDAG &DAG) const { SDValue X86TargetLowering::LowerSIGN_EXTEND_INREG(SDValue Op, SelectionDAG &DAG) const{ DebugLoc dl = Op.getDebugLoc(); - SDNode* Node = Op.getNode(); - EVT ExtraVT = cast<VTSDNode>(Node->getOperand(1))->getVT(); - EVT VT = Node->getValueType(0); + EVT ExtraVT = cast<VTSDNode>(Op.getOperand(1))->getVT(); + EVT VT = Op.getValueType(); + if (Subtarget->hasXMMInt() && VT.isVector()) { unsigned BitsDiff = VT.getScalarType().getSizeInBits() - ExtraVT.getScalarType().getSizeInBits(); @@ -10506,21 +10631,55 @@ SDValue X86TargetLowering::LowerSIGN_EXTEND_INREG(SDValue Op, SelectionDAG &DAG) switch (VT.getSimpleVT().SimpleTy) { default: return SDValue(); - case MVT::v4i32: { + case MVT::v4i32: SHLIntrinsicsID = Intrinsic::x86_sse2_pslli_d; SRAIntrinsicsID = Intrinsic::x86_sse2_psrai_d; break; - } - case MVT::v8i16: { + case MVT::v8i16: SHLIntrinsicsID = Intrinsic::x86_sse2_pslli_w; SRAIntrinsicsID = Intrinsic::x86_sse2_psrai_w; break; - } + case MVT::v8i32: + case MVT::v16i16: + if (!Subtarget->hasAVX()) + return SDValue(); + if (!Subtarget->hasAVX2()) { + // needs to be split + int NumElems = VT.getVectorNumElements(); + SDValue Idx0 = DAG.getConstant(0, MVT::i32); + SDValue Idx1 = DAG.getConstant(NumElems/2, MVT::i32); + + // Extract the LHS vectors + SDValue LHS = Op.getOperand(0); + SDValue LHS1 = Extract128BitVector(LHS, Idx0, DAG, dl); + SDValue LHS2 = Extract128BitVector(LHS, Idx1, DAG, dl); + + MVT EltVT = VT.getVectorElementType().getSimpleVT(); + EVT NewVT = MVT::getVectorVT(EltVT, NumElems/2); + + EVT ExtraEltVT = ExtraVT.getVectorElementType(); + int ExtraNumElems = ExtraVT.getVectorNumElements(); + ExtraVT = EVT::getVectorVT(*DAG.getContext(), ExtraEltVT, + ExtraNumElems/2); + SDValue Extra = DAG.getValueType(ExtraVT); + + LHS1 = DAG.getNode(Op.getOpcode(), dl, NewVT, LHS1, Extra); + LHS2 = DAG.getNode(Op.getOpcode(), dl, NewVT, LHS2, Extra); + + return DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, LHS1, LHS2);; + } + if (VT == MVT::v8i32) { + SHLIntrinsicsID = Intrinsic::x86_avx2_pslli_d; + SRAIntrinsicsID = Intrinsic::x86_avx2_psrai_d; + } else { + SHLIntrinsicsID = Intrinsic::x86_avx2_pslli_w; + SRAIntrinsicsID = Intrinsic::x86_avx2_psrai_w; + } } SDValue Tmp1 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT, DAG.getConstant(SHLIntrinsicsID, MVT::i32), - Node->getOperand(0), ShAmt); + Op.getOperand(0), ShAmt); return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT, DAG.getConstant(SRAIntrinsicsID, MVT::i32), @@ -11033,9 +11192,7 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const { case X86ISD::PINSRW: return "X86ISD::PINSRW"; case X86ISD::PSHUFB: return "X86ISD::PSHUFB"; case X86ISD::ANDNP: return "X86ISD::ANDNP"; - case X86ISD::PSIGNB: return "X86ISD::PSIGNB"; - case X86ISD::PSIGNW: return "X86ISD::PSIGNW"; - case X86ISD::PSIGND: return "X86ISD::PSIGND"; + case X86ISD::PSIGN: return "X86ISD::PSIGN"; case X86ISD::BLENDV: return "X86ISD::BLENDV"; case X86ISD::FHADD: return "X86ISD::FHADD"; case X86ISD::FHSUB: return "X86ISD::FHSUB"; @@ -11111,7 +11268,6 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const { case X86ISD::MOVSS: return "X86ISD::MOVSS"; case X86ISD::UNPCKLPS: return "X86ISD::UNPCKLPS"; case X86ISD::UNPCKLPD: return "X86ISD::UNPCKLPD"; - case X86ISD::VUNPCKLPDY: return "X86ISD::VUNPCKLPDY"; case X86ISD::UNPCKHPS: return "X86ISD::UNPCKHPS"; case X86ISD::UNPCKHPD: return "X86ISD::UNPCKHPD"; case X86ISD::PUNPCKLBW: return "X86ISD::PUNPCKLBW"; @@ -11235,7 +11391,7 @@ X86TargetLowering::isShuffleMaskLegal(const SmallVectorImpl<int> &M, EVT VT) const { // Very little shuffling can be done for 64-bit vectors right now. if (VT.getSizeInBits() == 64) - return isPALIGNRMask(M, VT, Subtarget->hasSSSE3() || Subtarget->hasAVX()); + return isPALIGNRMask(M, VT, Subtarget->hasSSSE3orAVX()); // FIXME: pshufb, blends, shifts. return (VT.getVectorNumElements() == 2 || @@ -11245,9 +11401,9 @@ X86TargetLowering::isShuffleMaskLegal(const SmallVectorImpl<int> &M, isPSHUFDMask(M, VT) || isPSHUFHWMask(M, VT) || isPSHUFLWMask(M, VT) || - isPALIGNRMask(M, VT, Subtarget->hasSSSE3() || Subtarget->hasAVX()) || - isUNPCKLMask(M, VT) || - isUNPCKHMask(M, VT) || + isPALIGNRMask(M, VT, Subtarget->hasSSSE3orAVX()) || + isUNPCKLMask(M, VT, Subtarget->hasAVX2()) || + isUNPCKHMask(M, VT, Subtarget->hasAVX2()) || isUNPCKL_v_undef_Mask(M, VT) || isUNPCKH_v_undef_Mask(M, VT)); } @@ -11654,7 +11810,7 @@ X86TargetLowering::EmitAtomicMinMaxWithCustomInserter(MachineInstr *mInstr, MachineBasicBlock * X86TargetLowering::EmitPCMP(MachineInstr *MI, MachineBasicBlock *BB, unsigned numArgs, bool memArg) const { - assert((Subtarget->hasSSE42() || Subtarget->hasAVX()) && + assert(Subtarget->hasSSE42orAVX() && "Target must have SSE4.2 or AVX features enabled"); DebugLoc dl = MI->getDebugLoc(); @@ -13808,98 +13964,98 @@ static SDValue PerformOrCombine(SDNode *N, SelectionDAG &DAG, return R; EVT VT = N->getValueType(0); - if (VT != MVT::i16 && VT != MVT::i32 && VT != MVT::i64 && VT != MVT::v2i64) - return SDValue(); SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); // look for psign/blend - if (Subtarget->hasSSSE3() || Subtarget->hasAVX()) { - if (VT == MVT::v2i64) { - // Canonicalize pandn to RHS - if (N0.getOpcode() == X86ISD::ANDNP) - std::swap(N0, N1); - // or (and (m, x), (pandn m, y)) - if (N0.getOpcode() == ISD::AND && N1.getOpcode() == X86ISD::ANDNP) { - SDValue Mask = N1.getOperand(0); - SDValue X = N1.getOperand(1); - SDValue Y; - if (N0.getOperand(0) == Mask) - Y = N0.getOperand(1); - if (N0.getOperand(1) == Mask) - Y = N0.getOperand(0); - - // Check to see if the mask appeared in both the AND and ANDNP and - if (!Y.getNode()) - return SDValue(); + if (VT == MVT::v2i64 || VT == MVT::v4i64) { + if (!Subtarget->hasSSSE3orAVX() || + (VT == MVT::v4i64 && !Subtarget->hasAVX2())) + return SDValue(); - // Validate that X, Y, and Mask are BIT_CONVERTS, and see through them. - if (Mask.getOpcode() != ISD::BITCAST || - X.getOpcode() != ISD::BITCAST || - Y.getOpcode() != ISD::BITCAST) - return SDValue(); + // Canonicalize pandn to RHS + if (N0.getOpcode() == X86ISD::ANDNP) + std::swap(N0, N1); + // or (and (m, x), (pandn m, y)) + if (N0.getOpcode() == ISD::AND && N1.getOpcode() == X86ISD::ANDNP) { + SDValue Mask = N1.getOperand(0); + SDValue X = N1.getOperand(1); + SDValue Y; + if (N0.getOperand(0) == Mask) + Y = N0.getOperand(1); + if (N0.getOperand(1) == Mask) + Y = N0.getOperand(0); + + // Check to see if the mask appeared in both the AND and ANDNP and + if (!Y.getNode()) + return SDValue(); - // Look through mask bitcast. - Mask = Mask.getOperand(0); - EVT MaskVT = Mask.getValueType(); + // Validate that X, Y, and Mask are BIT_CONVERTS, and see through them. + if (Mask.getOpcode() != ISD::BITCAST || + X.getOpcode() != ISD::BITCAST || + Y.getOpcode() != ISD::BITCAST) + return SDValue(); - // Validate that the Mask operand is a vector sra node. The sra node - // will be an intrinsic. - if (Mask.getOpcode() != ISD::INTRINSIC_WO_CHAIN) - return SDValue(); + // Look through mask bitcast. + Mask = Mask.getOperand(0); + EVT MaskVT = Mask.getValueType(); - // FIXME: what to do for bytes, since there is a psignb/pblendvb, but - // there is no psrai.b - switch (cast<ConstantSDNode>(Mask.getOperand(0))->getZExtValue()) { - case Intrinsic::x86_sse2_psrai_w: - case Intrinsic::x86_sse2_psrai_d: - break; - default: return SDValue(); - } + // Validate that the Mask operand is a vector sra node. The sra node + // will be an intrinsic. + if (Mask.getOpcode() != ISD::INTRINSIC_WO_CHAIN) + return SDValue(); - // Check that the SRA is all signbits. - SDValue SraC = Mask.getOperand(2); - unsigned SraAmt = cast<ConstantSDNode>(SraC)->getZExtValue(); - unsigned EltBits = MaskVT.getVectorElementType().getSizeInBits(); - if ((SraAmt + 1) != EltBits) - return SDValue(); + // FIXME: what to do for bytes, since there is a psignb/pblendvb, but + // there is no psrai.b + switch (cast<ConstantSDNode>(Mask.getOperand(0))->getZExtValue()) { + case Intrinsic::x86_sse2_psrai_w: + case Intrinsic::x86_sse2_psrai_d: + case Intrinsic::x86_avx2_psrai_w: + case Intrinsic::x86_avx2_psrai_d: + break; + default: return SDValue(); + } - DebugLoc DL = N->getDebugLoc(); - - // Now we know we at least have a plendvb with the mask val. See if - // we can form a psignb/w/d. - // psign = x.type == y.type == mask.type && y = sub(0, x); - X = X.getOperand(0); - Y = Y.getOperand(0); - if (Y.getOpcode() == ISD::SUB && Y.getOperand(1) == X && - ISD::isBuildVectorAllZeros(Y.getOperand(0).getNode()) && - X.getValueType() == MaskVT && X.getValueType() == Y.getValueType()){ - unsigned Opc = 0; - switch (EltBits) { - case 8: Opc = X86ISD::PSIGNB; break; - case 16: Opc = X86ISD::PSIGNW; break; - case 32: Opc = X86ISD::PSIGND; break; - default: break; - } - if (Opc) { - SDValue Sign = DAG.getNode(Opc, DL, MaskVT, X, Mask.getOperand(1)); - return DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, Sign); - } - } - // PBLENDVB only available on SSE 4.1 - if (!(Subtarget->hasSSE41() || Subtarget->hasAVX())) - return SDValue(); + // Check that the SRA is all signbits. + SDValue SraC = Mask.getOperand(2); + unsigned SraAmt = cast<ConstantSDNode>(SraC)->getZExtValue(); + unsigned EltBits = MaskVT.getVectorElementType().getSizeInBits(); + if ((SraAmt + 1) != EltBits) + return SDValue(); - X = DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, X); - Y = DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, Y); - Mask = DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, Mask); - Mask = DAG.getNode(ISD::VSELECT, DL, MVT::v16i8, Mask, X, Y); - return DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, Mask); + DebugLoc DL = N->getDebugLoc(); + + // Now we know we at least have a plendvb with the mask val. See if + // we can form a psignb/w/d. + // psign = x.type == y.type == mask.type && y = sub(0, x); + X = X.getOperand(0); + Y = Y.getOperand(0); + if (Y.getOpcode() == ISD::SUB && Y.getOperand(1) == X && + ISD::isBuildVectorAllZeros(Y.getOperand(0).getNode()) && + X.getValueType() == MaskVT && X.getValueType() == Y.getValueType() && + (EltBits == 8 || EltBits == 16 || EltBits == 32)) { + SDValue Sign = DAG.getNode(X86ISD::PSIGN, DL, MaskVT, X, + Mask.getOperand(1)); + return DAG.getNode(ISD::BITCAST, DL, VT, Sign); } + // PBLENDVB only available on SSE 4.1 + if (!Subtarget->hasSSE41orAVX()) + return SDValue(); + + EVT BlendVT = (VT == MVT::v4i64) ? MVT::v32i8 : MVT::v16i8; + + X = DAG.getNode(ISD::BITCAST, DL, BlendVT, X); + Y = DAG.getNode(ISD::BITCAST, DL, BlendVT, Y); + Mask = DAG.getNode(ISD::BITCAST, DL, BlendVT, Mask); + Mask = DAG.getNode(ISD::VSELECT, DL, BlendVT, Mask, X, Y); + return DAG.getNode(ISD::BITCAST, DL, VT, Mask); } } + if (VT != MVT::i16 && VT != MVT::i32 && VT != MVT::i64) + return SDValue(); + // fold (or (x << c) | (y >> (64 - c))) ==> (shld64 x, y, c) if (N0.getOpcode() == ISD::SRL && N1.getOpcode() == ISD::SHL) std::swap(N0, N1); @@ -14409,8 +14565,7 @@ static SDValue PerformFADDCombine(SDNode *N, SelectionDAG &DAG, SDValue RHS = N->getOperand(1); // Try to synthesize horizontal adds from adds of shuffles. - if ((Subtarget->hasSSE3() || Subtarget->hasAVX()) && - (VT == MVT::v4f32 || VT == MVT::v2f64) && + if (Subtarget->hasSSE3orAVX() && (VT == MVT::v4f32 || VT == MVT::v2f64) && isHorizontalBinOp(LHS, RHS, true)) return DAG.getNode(X86ISD::FHADD, N->getDebugLoc(), VT, LHS, RHS); return SDValue(); @@ -14424,8 +14579,7 @@ static SDValue PerformFSUBCombine(SDNode *N, SelectionDAG &DAG, SDValue RHS = N->getOperand(1); // Try to synthesize horizontal subs from subs of shuffles. - if ((Subtarget->hasSSE3() || Subtarget->hasAVX()) && - (VT == MVT::v4f32 || VT == MVT::v2f64) && + if (Subtarget->hasSSE3orAVX() && (VT == MVT::v4f32 || VT == MVT::v2f64) && isHorizontalBinOp(LHS, RHS, false)) return DAG.getNode(X86ISD::FHSUB, N->getDebugLoc(), VT, LHS, RHS); return SDValue(); @@ -14621,7 +14775,23 @@ static SDValue OptimizeConditionalInDecrement(SDNode *N, SelectionDAG &DAG) { DAG.getConstant(0, OtherVal.getValueType()), NewCmp); } -static SDValue PerformSubCombine(SDNode *N, SelectionDAG &DAG) { +/// PerformADDCombine - Do target-specific dag combines on integer adds. +static SDValue PerformAddCombine(SDNode *N, SelectionDAG &DAG, + const X86Subtarget *Subtarget) { + EVT VT = N->getValueType(0); + SDValue Op0 = N->getOperand(0); + SDValue Op1 = N->getOperand(1); + + // Try to synthesize horizontal adds from adds of shuffles. + if ((Subtarget->hasSSSE3orAVX()) && (VT == MVT::v8i16 || VT == MVT::v4i32) && + isHorizontalBinOp(Op0, Op1, true)) + return DAG.getNode(X86ISD::HADD, N->getDebugLoc(), VT, Op0, Op1); + + return OptimizeConditionalInDecrement(N, DAG); +} + +static SDValue PerformSubCombine(SDNode *N, SelectionDAG &DAG, + const X86Subtarget *Subtarget) { SDValue Op0 = N->getOperand(0); SDValue Op1 = N->getOperand(1); @@ -14643,6 +14813,12 @@ static SDValue PerformSubCombine(SDNode *N, SelectionDAG &DAG) { } } + // Try to synthesize horizontal adds from adds of shuffles. + EVT VT = N->getValueType(0); + if ((Subtarget->hasSSSE3orAVX()) && (VT == MVT::v8i16 || VT == MVT::v4i32) && + isHorizontalBinOp(Op0, Op1, false)) + return DAG.getNode(X86ISD::HSUB, N->getDebugLoc(), VT, Op0, Op1); + return OptimizeConditionalInDecrement(N, DAG); } @@ -14656,8 +14832,8 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N, case ISD::VSELECT: case ISD::SELECT: return PerformSELECTCombine(N, DAG, Subtarget); case X86ISD::CMOV: return PerformCMOVCombine(N, DAG, DCI); - case ISD::ADD: return OptimizeConditionalInDecrement(N, DAG); - case ISD::SUB: return PerformSubCombine(N, DAG); + case ISD::ADD: return PerformAddCombine(N, DAG, Subtarget); + case ISD::SUB: return PerformSubCombine(N, DAG, Subtarget); case X86ISD::ADC: return PerformADCCombine(N, DAG, DCI); case ISD::MUL: return PerformMulCombine(N, DAG, DCI); case ISD::SHL: @@ -14687,16 +14863,12 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N, case X86ISD::PUNPCKHQDQ: case X86ISD::UNPCKHPS: case X86ISD::UNPCKHPD: - case X86ISD::VUNPCKHPSY: - case X86ISD::VUNPCKHPDY: case X86ISD::PUNPCKLBW: case X86ISD::PUNPCKLWD: case X86ISD::PUNPCKLDQ: case X86ISD::PUNPCKLQDQ: case X86ISD::UNPCKLPS: case X86ISD::UNPCKLPD: - case X86ISD::VUNPCKLPSY: - case X86ISD::VUNPCKLPDY: case X86ISD::MOVHLPS: case X86ISD::MOVLHPS: case X86ISD::PSHUFD: diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h index 3b7a14d..ccff3a5 100644 --- a/lib/Target/X86/X86ISelLowering.h +++ b/lib/Target/X86/X86ISelLowering.h @@ -172,12 +172,18 @@ namespace llvm { /// ANDNP - Bitwise Logical AND NOT of Packed FP values. ANDNP, - /// PSIGNB/W/D - Copy integer sign. - PSIGNB, PSIGNW, PSIGND, + /// PSIGN - Copy integer sign. + PSIGN, /// BLEND family of opcodes BLENDV, + /// HADD - Integer horizontal add. + HADD, + + /// HSUB - Integer horizontal sub. + HSUB, + /// FHADD - Floating point horizontal add. FHADD, @@ -269,12 +275,8 @@ namespace llvm { MOVSS, UNPCKLPS, UNPCKLPD, - VUNPCKLPSY, - VUNPCKLPDY, UNPCKHPS, UNPCKHPD, - VUNPCKHPSY, - VUNPCKHPDY, PUNPCKLBW, PUNPCKLWD, PUNPCKLDQ, @@ -408,11 +410,13 @@ namespace llvm { /// isUNPCKLMask - Return true if the specified VECTOR_SHUFFLE operand /// specifies a shuffle of elements that is suitable for input to UNPCKL. - bool isUNPCKLMask(ShuffleVectorSDNode *N, bool V2IsSplat = false); + bool isUNPCKLMask(ShuffleVectorSDNode *N, bool HasAVX2, + bool V2IsSplat = false); /// isUNPCKHMask - Return true if the specified VECTOR_SHUFFLE operand /// specifies a shuffle of elements that is suitable for input to UNPCKH. - bool isUNPCKHMask(ShuffleVectorSDNode *N, bool V2IsSplat = false); + bool isUNPCKHMask(ShuffleVectorSDNode *N, bool HasAVX2, + bool V2IsSplat = false); /// isUNPCKL_v_undef_Mask - Special case of isUNPCKLMask for canonical form /// of vector_shuffle v, v, <0, 4, 1, 5>, i.e. vector_shuffle v, undef, diff --git a/lib/Target/X86/X86InstrBuilder.h b/lib/Target/X86/X86InstrBuilder.h index 0245e5c..fa1d676 100644 --- a/lib/Target/X86/X86InstrBuilder.h +++ b/lib/Target/X86/X86InstrBuilder.h @@ -27,7 +27,6 @@ #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineMemOperand.h" -#include "llvm/CodeGen/PseudoSourceValue.h" namespace llvm { diff --git a/lib/Target/X86/X86InstrFragmentsSIMD.td b/lib/Target/X86/X86InstrFragmentsSIMD.td index 6fd2efd..791bbe6 100644 --- a/lib/Target/X86/X86InstrFragmentsSIMD.td +++ b/lib/Target/X86/X86InstrFragmentsSIMD.td @@ -41,6 +41,8 @@ def X86fsrl : SDNode<"X86ISD::FSRL", SDTX86FPShiftOp>; def X86fgetsign: SDNode<"X86ISD::FGETSIGNx86",SDTFPToIntOp>; def X86fhadd : SDNode<"X86ISD::FHADD", SDTFPBinOp>; def X86fhsub : SDNode<"X86ISD::FHSUB", SDTFPBinOp>; +def X86hadd : SDNode<"X86ISD::HADD", SDTIntBinOp>; +def X86hsub : SDNode<"X86ISD::HSUB", SDTIntBinOp>; def X86comi : SDNode<"X86ISD::COMI", SDTX86CmpTest>; def X86ucomi : SDNode<"X86ISD::UCOMI", SDTX86CmpTest>; def X86cmpss : SDNode<"X86ISD::FSETCCss", SDTX86Cmpss>; @@ -51,14 +53,8 @@ def X86pshufb : SDNode<"X86ISD::PSHUFB", def X86andnp : SDNode<"X86ISD::ANDNP", SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0,1>, SDTCisSameAs<0,2>]>>; -def X86psignb : SDNode<"X86ISD::PSIGNB", - SDTypeProfile<1, 2, [SDTCisVT<0, v16i8>, SDTCisSameAs<0,1>, - SDTCisSameAs<0,2>]>>; -def X86psignw : SDNode<"X86ISD::PSIGNW", - SDTypeProfile<1, 2, [SDTCisVT<0, v8i16>, SDTCisSameAs<0,1>, - SDTCisSameAs<0,2>]>>; -def X86psignd : SDNode<"X86ISD::PSIGND", - SDTypeProfile<1, 2, [SDTCisVT<0, v4i32>, SDTCisSameAs<0,1>, +def X86psign : SDNode<"X86ISD::PSIGN", + SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0,1>, SDTCisSameAs<0,2>]>>; def X86pextrb : SDNode<"X86ISD::PEXTRB", SDTypeProfile<1, 2, [SDTCisVT<0, i32>, SDTCisPtrTy<2>]>>; @@ -136,13 +132,9 @@ def X86Movlpd : SDNode<"X86ISD::MOVLPD", SDTShuff2Op>; def X86Unpcklps : SDNode<"X86ISD::UNPCKLPS", SDTShuff2Op>; def X86Unpcklpd : SDNode<"X86ISD::UNPCKLPD", SDTShuff2Op>; -def X86Unpcklpsy : SDNode<"X86ISD::VUNPCKLPSY", SDTShuff2Op>; -def X86Unpcklpdy : SDNode<"X86ISD::VUNPCKLPDY", SDTShuff2Op>; def X86Unpckhps : SDNode<"X86ISD::UNPCKHPS", SDTShuff2Op>; def X86Unpckhpd : SDNode<"X86ISD::UNPCKHPD", SDTShuff2Op>; -def X86Unpckhpsy : SDNode<"X86ISD::VUNPCKHPSY", SDTShuff2Op>; -def X86Unpckhpdy : SDNode<"X86ISD::VUNPCKHPDY", SDTShuff2Op>; def X86Punpcklbw : SDNode<"X86ISD::PUNPCKLBW", SDTShuff2Op>; def X86Punpcklwd : SDNode<"X86ISD::PUNPCKLWD", SDTShuff2Op>; @@ -427,12 +419,12 @@ def movl : PatFrag<(ops node:$lhs, node:$rhs), def unpckl : PatFrag<(ops node:$lhs, node:$rhs), (vector_shuffle node:$lhs, node:$rhs), [{ - return X86::isUNPCKLMask(cast<ShuffleVectorSDNode>(N)); + return X86::isUNPCKLMask(cast<ShuffleVectorSDNode>(N), Subtarget->hasAVX2()); }]>; def unpckh : PatFrag<(ops node:$lhs, node:$rhs), (vector_shuffle node:$lhs, node:$rhs), [{ - return X86::isUNPCKHMask(cast<ShuffleVectorSDNode>(N)); + return X86::isUNPCKHMask(cast<ShuffleVectorSDNode>(N), Subtarget->hasAVX2()); }]>; def pshufd : PatFrag<(ops node:$lhs, node:$rhs), diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp index 9428fff..24c4a53 100644 --- a/lib/Target/X86/X86InstrInfo.cpp +++ b/lib/Target/X86/X86InstrInfo.cpp @@ -25,7 +25,6 @@ #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/LiveVariables.h" -#include "llvm/CodeGen/PseudoSourceValue.h" #include "llvm/MC/MCInst.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" @@ -2903,6 +2902,7 @@ MachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF, switch (LoadMI->getOpcode()) { case X86::AVX_SET0PSY: case X86::AVX_SET0PDY: + case X86::AVX2_SETALLONES: Alignment = 32; break; case X86::V_SET0: @@ -2948,6 +2948,7 @@ MachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF, case X86::AVX_SET0PSY: case X86::AVX_SET0PDY: case X86::AVX_SETALLONES: + case X86::AVX2_SETALLONES: case X86::FsFLD0SD: case X86::FsFLD0SS: case X86::VFsFLD0SD: @@ -2986,7 +2987,8 @@ MachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF, else Ty = VectorType::get(Type::getInt32Ty(MF.getFunction()->getContext()), 4); - bool IsAllOnes = (Opc == X86::V_SETALLONES || Opc == X86::AVX_SETALLONES); + bool IsAllOnes = (Opc == X86::V_SETALLONES || Opc == X86::AVX_SETALLONES || + Opc == X86::AVX2_SETALLONES); const Constant *C = IsAllOnes ? Constant::getAllOnesValue(Ty) : Constant::getNullValue(Ty); unsigned CPI = MCP.getConstantPoolIndex(C, Alignment); @@ -3555,7 +3557,11 @@ static const unsigned ReplaceableInstrs[][3] = { { X86::VMOVAPSYrr, X86::VMOVAPDYrr, X86::VMOVDQAYrr }, { X86::VMOVUPSYmr, X86::VMOVUPDYmr, X86::VMOVDQUYmr }, { X86::VMOVUPSYrm, X86::VMOVUPDYrm, X86::VMOVDQUYrm }, - { X86::VMOVNTPSYmr, X86::VMOVNTPDYmr, X86::VMOVNTDQYmr }, + { X86::VMOVNTPSYmr, X86::VMOVNTPDYmr, X86::VMOVNTDQYmr } +}; + +static const unsigned ReplaceableInstrsAVX2[][3] = { + //PackedSingle PackedDouble PackedInt { X86::VANDNPSYrm, X86::VANDNPDYrm, X86::VPANDNYrm }, { X86::VANDNPSYrr, X86::VANDNPDYrr, X86::VPANDNYrr }, { X86::VANDPSYrm, X86::VANDPDYrm, X86::VPANDYrm }, @@ -3563,7 +3569,7 @@ static const unsigned ReplaceableInstrs[][3] = { { X86::VORPSYrm, X86::VORPDYrm, X86::VPORYrm }, { X86::VORPSYrr, X86::VORPDYrr, X86::VPORYrr }, { X86::VXORPSYrm, X86::VXORPDYrm, X86::VPXORYrm }, - { X86::VXORPSYrr, X86::VXORPDYrr, X86::VPXORYrr }, + { X86::VXORPSYrr, X86::VXORPDYrr, X86::VPXORYrr } }; // FIXME: Some shuffle and unpack instructions have equivalents in different @@ -3576,11 +3582,23 @@ static const unsigned *lookup(unsigned opcode, unsigned domain) { return 0; } +static const unsigned *lookupAVX2(unsigned opcode, unsigned domain) { + for (unsigned i = 0, e = array_lengthof(ReplaceableInstrsAVX2); i != e; ++i) + if (ReplaceableInstrsAVX2[i][domain-1] == opcode) + return ReplaceableInstrsAVX2[i]; + return 0; +} + std::pair<uint16_t, uint16_t> X86InstrInfo::getExecutionDomain(const MachineInstr *MI) const { uint16_t domain = (MI->getDesc().TSFlags >> X86II::SSEDomainShift) & 3; - return std::make_pair(domain, - domain && lookup(MI->getOpcode(), domain) ? 0xe : 0); + bool hasAVX2 = TM.getSubtarget<X86Subtarget>().hasAVX2(); + uint16_t validDomains = 0; + if (domain && lookup(MI->getOpcode(), domain)) + validDomains = 0xe; + else if (domain && lookupAVX2(MI->getOpcode(), domain)) + validDomains = hasAVX2 ? 0xe : 0x6; + return std::make_pair(domain, validDomains); } void X86InstrInfo::setExecutionDomain(MachineInstr *MI, unsigned Domain) const { @@ -3588,6 +3606,11 @@ void X86InstrInfo::setExecutionDomain(MachineInstr *MI, unsigned Domain) const { uint16_t dom = (MI->getDesc().TSFlags >> X86II::SSEDomainShift) & 3; assert(dom && "Not an SSE instruction"); const unsigned *table = lookup(MI->getOpcode(), dom); + if (!table) { // try the other table + assert((TM.getSubtarget<X86Subtarget>().hasAVX2() || Domain < 3) && + "256-bit vector operations only available in AVX2"); + table = lookupAVX2(MI->getOpcode(), dom); + } assert(table && "Cannot change domain"); MI->setDesc(get(table[Domain-1])); } diff --git a/lib/Target/X86/X86InstrInfo.td b/lib/Target/X86/X86InstrInfo.td index 79ce509..35631d5 100644 --- a/lib/Target/X86/X86InstrInfo.td +++ b/lib/Target/X86/X86InstrInfo.td @@ -1523,10 +1523,11 @@ def : MnemonicAlias<"call", "calll">, Requires<[In32BitMode]>; def : MnemonicAlias<"call", "callq">, Requires<[In64BitMode]>; def : MnemonicAlias<"cbw", "cbtw">; +def : MnemonicAlias<"cwde", "cwtl">; def : MnemonicAlias<"cwd", "cwtd">; def : MnemonicAlias<"cdq", "cltd">; -def : MnemonicAlias<"cwde", "cwtl">; def : MnemonicAlias<"cdqe", "cltq">; +def : MnemonicAlias<"cqo", "cqto">; // lret maps to lretl, it is not ambiguous with lretq. def : MnemonicAlias<"lret", "lretl">; diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index 6deee4f..7cadac1 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -311,13 +311,16 @@ def : Pat<(bc_v4i64 (v8f32 immAllZerosV)), // JIT implementation, it does not expand the instructions below like // X86MCInstLower does. let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1, - isCodeGenOnly = 1, ExeDomain = SSEPackedInt in + isCodeGenOnly = 1, ExeDomain = SSEPackedInt in { def V_SETALLONES : PDI<0x76, MRMInitReg, (outs VR128:$dst), (ins), "", [(set VR128:$dst, (v4i32 immAllOnesV))]>; -let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1, - isCodeGenOnly = 1, ExeDomain = SSEPackedInt, Predicates = [HasAVX] in + let Predicates = [HasAVX] in def AVX_SETALLONES : PDI<0x76, MRMInitReg, (outs VR128:$dst), (ins), "", [(set VR128:$dst, (v4i32 immAllOnesV))]>, VEX_4V; + let Predicates = [HasAVX2] in + def AVX2_SETALLONES : PDI<0x76, MRMInitReg, (outs VR256:$dst), (ins), "", + [(set VR256:$dst, (v8i32 immAllOnesV))]>, VEX_4V; +} //===----------------------------------------------------------------------===// @@ -522,6 +525,8 @@ let Predicates = [HasSSE2] in { // fold opportunity reappears. def : Pat<(v2f64 (X86Movlpd VR128:$src1, VR128:$src2)), (MOVSDrr VR128:$src1, (EXTRACT_SUBREG (v2f64 VR128:$src2),sub_sd))>; + def : Pat<(v2i64 (X86Movlpd VR128:$src1, VR128:$src2)), + (MOVSDrr VR128:$src1, (EXTRACT_SUBREG (v2i64 VR128:$src2),sub_sd))>; def : Pat<(v4f32 (X86Movlps VR128:$src1, VR128:$src2)), (MOVSDrr VR128:$src1, (EXTRACT_SUBREG (v4f32 VR128:$src2),sub_sd))>; def : Pat<(v4i32 (X86Movlps VR128:$src1, VR128:$src2)), @@ -2467,21 +2472,21 @@ let Predicates = [HasAVX] in { def : Pat<(v4f32 (X86Unpckhps VR128:$src1, VR128:$src2)), (VUNPCKHPSrr VR128:$src1, VR128:$src2)>; - def : Pat<(v8f32 (X86Unpcklpsy VR256:$src1, (memopv8f32 addr:$src2))), + def : Pat<(v8f32 (X86Unpcklps VR256:$src1, (memopv8f32 addr:$src2))), (VUNPCKLPSYrm VR256:$src1, addr:$src2)>; - def : Pat<(v8f32 (X86Unpcklpsy VR256:$src1, VR256:$src2)), + def : Pat<(v8f32 (X86Unpcklps VR256:$src1, VR256:$src2)), (VUNPCKLPSYrr VR256:$src1, VR256:$src2)>; - def : Pat<(v8i32 (X86Unpcklpsy VR256:$src1, VR256:$src2)), + def : Pat<(v8i32 (X86Unpcklps VR256:$src1, VR256:$src2)), (VUNPCKLPSYrr VR256:$src1, VR256:$src2)>; - def : Pat<(v8i32 (X86Unpcklpsy VR256:$src1, (memopv8i32 addr:$src2))), + def : Pat<(v8i32 (X86Unpcklps VR256:$src1, (bc_v8i32 (memopv4i64 addr:$src2)))), (VUNPCKLPSYrm VR256:$src1, addr:$src2)>; - def : Pat<(v8f32 (X86Unpckhpsy VR256:$src1, (memopv8f32 addr:$src2))), + def : Pat<(v8f32 (X86Unpckhps VR256:$src1, (memopv8f32 addr:$src2))), (VUNPCKHPSYrm VR256:$src1, addr:$src2)>; - def : Pat<(v8f32 (X86Unpckhpsy VR256:$src1, VR256:$src2)), + def : Pat<(v8f32 (X86Unpckhps VR256:$src1, VR256:$src2)), (VUNPCKHPSYrr VR256:$src1, VR256:$src2)>; - def : Pat<(v8i32 (X86Unpckhpsy VR256:$src1, (memopv8i32 addr:$src2))), + def : Pat<(v8i32 (X86Unpckhps VR256:$src1, (bc_v8i32 (memopv4i64 addr:$src2)))), (VUNPCKHPSYrm VR256:$src1, addr:$src2)>; - def : Pat<(v8i32 (X86Unpckhpsy VR256:$src1, VR256:$src2)), + def : Pat<(v8i32 (X86Unpckhps VR256:$src1, VR256:$src2)), (VUNPCKHPSYrr VR256:$src1, VR256:$src2)>; def : Pat<(v2f64 (X86Unpcklpd VR128:$src1, (memopv2f64 addr:$src2))), @@ -2493,21 +2498,21 @@ let Predicates = [HasAVX] in { def : Pat<(v2f64 (X86Unpckhpd VR128:$src1, VR128:$src2)), (VUNPCKHPDrr VR128:$src1, VR128:$src2)>; - def : Pat<(v4f64 (X86Unpcklpdy VR256:$src1, (memopv4f64 addr:$src2))), + def : Pat<(v4f64 (X86Unpcklpd VR256:$src1, (memopv4f64 addr:$src2))), (VUNPCKLPDYrm VR256:$src1, addr:$src2)>; - def : Pat<(v4f64 (X86Unpcklpdy VR256:$src1, VR256:$src2)), + def : Pat<(v4f64 (X86Unpcklpd VR256:$src1, VR256:$src2)), (VUNPCKLPDYrr VR256:$src1, VR256:$src2)>; - def : Pat<(v4i64 (X86Unpcklpdy VR256:$src1, (memopv4i64 addr:$src2))), + def : Pat<(v4i64 (X86Unpcklpd VR256:$src1, (memopv4i64 addr:$src2))), (VUNPCKLPDYrm VR256:$src1, addr:$src2)>; - def : Pat<(v4i64 (X86Unpcklpdy VR256:$src1, VR256:$src2)), + def : Pat<(v4i64 (X86Unpcklpd VR256:$src1, VR256:$src2)), (VUNPCKLPDYrr VR256:$src1, VR256:$src2)>; - def : Pat<(v4f64 (X86Unpckhpdy VR256:$src1, (memopv4f64 addr:$src2))), + def : Pat<(v4f64 (X86Unpckhpd VR256:$src1, (memopv4f64 addr:$src2))), (VUNPCKHPDYrm VR256:$src1, addr:$src2)>; - def : Pat<(v4f64 (X86Unpckhpdy VR256:$src1, VR256:$src2)), + def : Pat<(v4f64 (X86Unpckhpd VR256:$src1, VR256:$src2)), (VUNPCKHPDYrr VR256:$src1, VR256:$src2)>; - def : Pat<(v4i64 (X86Unpckhpdy VR256:$src1, (memopv4i64 addr:$src2))), + def : Pat<(v4i64 (X86Unpckhpd VR256:$src1, (memopv4i64 addr:$src2))), (VUNPCKHPDYrm VR256:$src1, addr:$src2)>; - def : Pat<(v4i64 (X86Unpckhpdy VR256:$src1, VR256:$src2)), + def : Pat<(v4i64 (X86Unpckhpd VR256:$src1, VR256:$src2)), (VUNPCKHPDYrr VR256:$src1, VR256:$src2)>; // FIXME: Instead of X86Movddup, there should be a X86Unpcklpd here, the @@ -3421,47 +3426,6 @@ multiclass PDI_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode, [(set RC:$dst, (OpVT (OpNode RC:$src1, (bitconvert (memop_frag addr:$src2)))))]>; } - -/// PDI_binop_rm_v2i64 - Simple SSE2 binary operator whose type is v2i64. -/// -/// FIXME: we could eliminate this and use PDI_binop_rm instead if tblgen knew -/// to collapse (bitconvert VT to VT) into its operand. -/// -multiclass PDI_binop_rm_v2i64<bits<8> opc, string OpcodeStr, SDNode OpNode, - bit IsCommutable = 0, bit Is2Addr = 1> { - let isCommutable = IsCommutable in - def rr : PDI<opc, MRMSrcReg, (outs VR128:$dst), - (ins VR128:$src1, VR128:$src2), - !if(Is2Addr, - !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), - !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), - [(set VR128:$dst, (v2i64 (OpNode VR128:$src1, VR128:$src2)))]>; - def rm : PDI<opc, MRMSrcMem, (outs VR128:$dst), - (ins VR128:$src1, i128mem:$src2), - !if(Is2Addr, - !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), - !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), - [(set VR128:$dst, (OpNode VR128:$src1, (memopv2i64 addr:$src2)))]>; -} - -/// PDI_binop_rm_v4i64 - Simple AVX2 binary operator whose type is v4i64. -/// -/// FIXME: we could eliminate this and use PDI_binop_rm instead if tblgen knew -/// to collapse (bitconvert VT to VT) into its operand. -/// -multiclass PDI_binop_rm_v4i64<bits<8> opc, string OpcodeStr, SDNode OpNode, - bit IsCommutable = 0> { - let isCommutable = IsCommutable in - def rr : PDI<opc, MRMSrcReg, (outs VR256:$dst), - (ins VR256:$src1, VR256:$src2), - !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), - [(set VR256:$dst, (v4i64 (OpNode VR256:$src1, VR256:$src2)))]>; - def rm : PDI<opc, MRMSrcMem, (outs VR256:$dst), - (ins VR256:$src1, i256mem:$src2), - !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), - [(set VR256:$dst, (OpNode VR256:$src1, (memopv4i64 addr:$src2)))]>; -} - } // ExeDomain = SSEPackedInt // 128-bit Integer Arithmetic @@ -3473,7 +3437,8 @@ defm VPADDW : PDI_binop_rm<0xFD, "vpaddw", add, v8i16, VR128, memopv2i64, i128mem, 1, 0>, VEX_4V; defm VPADDD : PDI_binop_rm<0xFE, "vpaddd", add, v4i32, VR128, memopv2i64, i128mem, 1, 0>, VEX_4V; -defm VPADDQ : PDI_binop_rm_v2i64<0xD4, "vpaddq", add, 1, 0>, VEX_4V; +defm VPADDQ : PDI_binop_rm<0xD4, "vpaddq", add, v2i64, VR128, memopv2i64, + i128mem, 1, 0>, VEX_4V; defm VPMULLW : PDI_binop_rm<0xD5, "vpmullw", mul, v8i16, VR128, memopv2i64, i128mem, 1, 0>, VEX_4V; defm VPSUBB : PDI_binop_rm<0xF8, "vpsubb", sub, v16i8, VR128, memopv2i64, @@ -3482,7 +3447,8 @@ defm VPSUBW : PDI_binop_rm<0xF9, "vpsubw", sub, v8i16, VR128, memopv2i64, i128mem, 0, 0>, VEX_4V; defm VPSUBD : PDI_binop_rm<0xFA, "vpsubd", sub, v4i32, VR128, memopv2i64, i128mem, 0, 0>, VEX_4V; -defm VPSUBQ : PDI_binop_rm_v2i64<0xFB, "vpsubq", sub, 0, 0>, VEX_4V; +defm VPSUBQ : PDI_binop_rm<0xFB, "vpsubq", sub, v2i64, VR128, memopv2i64, + i128mem, 0, 0>, VEX_4V; // Intrinsic forms defm VPSUBSB : PDI_binop_rm_int<0xE8, "vpsubsb" , int_x86_sse2_psubs_b, @@ -3527,21 +3493,23 @@ defm VPSADBW : PDI_binop_rm_int<0xF6, "vpsadbw", int_x86_sse2_psad_bw, let Predicates = [HasAVX2] in { defm VPADDBY : PDI_binop_rm<0xFC, "vpaddb", add, v32i8, VR256, memopv4i64, - i256mem, 1, 0>, VEX_4V; + i256mem, 1, 0>, VEX_4V; defm VPADDWY : PDI_binop_rm<0xFD, "vpaddw", add, v16i16, VR256, memopv4i64, - i256mem, 1, 0>, VEX_4V; + i256mem, 1, 0>, VEX_4V; defm VPADDDY : PDI_binop_rm<0xFE, "vpaddd", add, v8i32, VR256, memopv4i64, - i256mem, 1, 0>, VEX_4V; -defm VPADDQY : PDI_binop_rm_v4i64<0xD4, "vpaddq", add, 1>, VEX_4V; + i256mem, 1, 0>, VEX_4V; +defm VPADDQY : PDI_binop_rm<0xD4, "vpaddq", add, v4i64, VR256, memopv4i64, + i256mem, 1, 0>, VEX_4V; defm VPMULLWY : PDI_binop_rm<0xD5, "vpmullw", mul, v16i16, VR256, memopv4i64, - i256mem, 1, 0>, VEX_4V; + i256mem, 1, 0>, VEX_4V; defm VPSUBBY : PDI_binop_rm<0xF8, "vpsubb", sub, v32i8, VR256, memopv4i64, - i256mem, 0, 0>, VEX_4V; + i256mem, 0, 0>, VEX_4V; defm VPSUBWY : PDI_binop_rm<0xF9, "vpsubw", sub, v16i16,VR256, memopv4i64, - i256mem, 0, 0>, VEX_4V; + i256mem, 0, 0>, VEX_4V; defm VPSUBDY : PDI_binop_rm<0xFA, "vpsubd", sub, v8i32, VR256, memopv4i64, - i256mem, 0, 0>, VEX_4V; -defm VPSUBQY : PDI_binop_rm_v4i64<0xFB, "vpsubq", sub, 0>, VEX_4V; + i256mem, 0, 0>, VEX_4V; +defm VPSUBQY : PDI_binop_rm<0xFB, "vpsubq", sub, v4i64, VR256, memopv4i64, + i256mem, 0, 0>, VEX_4V; // Intrinsic forms defm VPSUBSBY : PDI_binop_rm_int<0xE8, "vpsubsb" , int_x86_avx2_psubs_b, @@ -3591,7 +3559,8 @@ defm PADDW : PDI_binop_rm<0xFD, "paddw", add, v8i16, VR128, memopv2i64, i128mem, 1>; defm PADDD : PDI_binop_rm<0xFE, "paddd", add, v4i32, VR128, memopv2i64, i128mem, 1>; -defm PADDQ : PDI_binop_rm_v2i64<0xD4, "paddq", add, 1>; +defm PADDQ : PDI_binop_rm<0xD4, "paddq", add, v2i64, VR128, memopv2i64, + i128mem, 1>; defm PMULLW : PDI_binop_rm<0xD5, "pmullw", mul, v8i16, VR128, memopv2i64, i128mem, 1>; defm PSUBB : PDI_binop_rm<0xF8, "psubb", sub, v16i8, VR128, memopv2i64, @@ -3600,7 +3569,8 @@ defm PSUBW : PDI_binop_rm<0xF9, "psubw", sub, v8i16, VR128, memopv2i64, i128mem>; defm PSUBD : PDI_binop_rm<0xFA, "psubd", sub, v4i32, VR128, memopv2i64, i128mem>; -defm PSUBQ : PDI_binop_rm_v2i64<0xFB, "psubq", sub>; +defm PSUBQ : PDI_binop_rm<0xFB, "psubq", sub, v2i64, VR128, memopv2i64, + i128mem>; // Intrinsic forms defm PSUBSB : PDI_binop_rm_int<0xE8, "psubsb" , int_x86_sse2_psubs_b, @@ -3676,9 +3646,12 @@ defm VPSRAD : PDI_binop_rmi_int<0xE2, 0x72, MRM4r, "vpsrad", int_x86_sse2_psra_d, int_x86_sse2_psrai_d, VR128, 0>, VEX_4V; -defm VPAND : PDI_binop_rm_v2i64<0xDB, "vpand", and, 1, 0>, VEX_4V; -defm VPOR : PDI_binop_rm_v2i64<0xEB, "vpor" , or, 1, 0>, VEX_4V; -defm VPXOR : PDI_binop_rm_v2i64<0xEF, "vpxor", xor, 1, 0>, VEX_4V; +defm VPAND : PDI_binop_rm<0xDB, "vpand", and, v2i64, VR128, memopv2i64, + i128mem, 1, 0>, VEX_4V; +defm VPOR : PDI_binop_rm<0xEB, "vpor" , or, v2i64, VR128, memopv2i64, + i128mem, 1, 0>, VEX_4V; +defm VPXOR : PDI_binop_rm<0xEF, "vpxor", xor, v2i64, VR128, memopv2i64, + i128mem, 1, 0>, VEX_4V; let ExeDomain = SSEPackedInt in { let neverHasSideEffects = 1 in { @@ -3735,9 +3708,12 @@ defm VPSRADY : PDI_binop_rmi_int<0xE2, 0x72, MRM4r, "vpsrad", int_x86_avx2_psra_d, int_x86_avx2_psrai_d, VR256, 0>, VEX_4V; -defm VPANDY : PDI_binop_rm_v4i64<0xDB, "vpand", and, 1>, VEX_4V; -defm VPORY : PDI_binop_rm_v4i64<0xEB, "vpor" , or, 1>, VEX_4V; -defm VPXORY : PDI_binop_rm_v4i64<0xEF, "vpxor", xor, 1>, VEX_4V; +defm VPANDY : PDI_binop_rm<0xDB, "vpand", and, v4i64, VR256, memopv4i64, + i256mem, 1, 0>, VEX_4V; +defm VPORY : PDI_binop_rm<0xEB, "vpor", or, v4i64, VR256, memopv4i64, + i256mem, 1, 0>, VEX_4V; +defm VPXORY : PDI_binop_rm<0xEF, "vpxor", xor, v4i64, VR256, memopv4i64, + i256mem, 1, 0>, VEX_4V; let ExeDomain = SSEPackedInt in { let neverHasSideEffects = 1 in { @@ -3794,9 +3770,12 @@ defm PSRAD : PDI_binop_rmi_int<0xE2, 0x72, MRM4r, "psrad", int_x86_sse2_psra_d, int_x86_sse2_psrai_d, VR128>; -defm PAND : PDI_binop_rm_v2i64<0xDB, "pand", and, 1>; -defm POR : PDI_binop_rm_v2i64<0xEB, "por" , or, 1>; -defm PXOR : PDI_binop_rm_v2i64<0xEF, "pxor", xor, 1>; +defm PAND : PDI_binop_rm<0xDB, "pand", and, v2i64, VR128, memopv2i64, + i128mem, 1>; +defm POR : PDI_binop_rm<0xEB, "por" , or, v2i64, VR128, memopv2i64, + i128mem, 1>; +defm PXOR : PDI_binop_rm<0xEF, "pxor", xor, v2i64, VR128, memopv2i64, + i128mem, 1>; let ExeDomain = SSEPackedInt in { let neverHasSideEffects = 1 in { @@ -3822,51 +3801,51 @@ let ExeDomain = SSEPackedInt in { let Predicates = [HasAVX] in { def : Pat<(int_x86_sse2_psll_dq VR128:$src1, imm:$src2), - (v2i64 (VPSLLDQri VR128:$src1, (BYTE_imm imm:$src2)))>; + (VPSLLDQri VR128:$src1, (BYTE_imm imm:$src2))>; def : Pat<(int_x86_sse2_psrl_dq VR128:$src1, imm:$src2), - (v2i64 (VPSRLDQri VR128:$src1, (BYTE_imm imm:$src2)))>; + (VPSRLDQri VR128:$src1, (BYTE_imm imm:$src2))>; def : Pat<(int_x86_sse2_psll_dq_bs VR128:$src1, imm:$src2), - (v2i64 (VPSLLDQri VR128:$src1, imm:$src2))>; + (VPSLLDQri VR128:$src1, imm:$src2)>; def : Pat<(int_x86_sse2_psrl_dq_bs VR128:$src1, imm:$src2), - (v2i64 (VPSRLDQri VR128:$src1, imm:$src2))>; + (VPSRLDQri VR128:$src1, imm:$src2)>; def : Pat<(v2f64 (X86fsrl VR128:$src1, i32immSExt8:$src2)), - (v2f64 (VPSRLDQri VR128:$src1, (BYTE_imm imm:$src2)))>; + (VPSRLDQri VR128:$src1, (BYTE_imm imm:$src2))>; // Shift up / down and insert zero's. def : Pat<(v2i64 (X86vshl VR128:$src, (i8 imm:$amt))), - (v2i64 (VPSLLDQri VR128:$src, (BYTE_imm imm:$amt)))>; + (VPSLLDQri VR128:$src, (BYTE_imm imm:$amt))>; def : Pat<(v2i64 (X86vshr VR128:$src, (i8 imm:$amt))), - (v2i64 (VPSRLDQri VR128:$src, (BYTE_imm imm:$amt)))>; + (VPSRLDQri VR128:$src, (BYTE_imm imm:$amt))>; } let Predicates = [HasAVX2] in { def : Pat<(int_x86_avx2_psll_dq VR256:$src1, imm:$src2), - (v4i64 (VPSLLDQYri VR256:$src1, (BYTE_imm imm:$src2)))>; + (VPSLLDQYri VR256:$src1, (BYTE_imm imm:$src2))>; def : Pat<(int_x86_avx2_psrl_dq VR256:$src1, imm:$src2), - (v4i64 (VPSRLDQYri VR256:$src1, (BYTE_imm imm:$src2)))>; + (VPSRLDQYri VR256:$src1, (BYTE_imm imm:$src2))>; def : Pat<(int_x86_avx2_psll_dq_bs VR256:$src1, imm:$src2), - (v4i64 (VPSLLDQYri VR256:$src1, imm:$src2))>; + (VPSLLDQYri VR256:$src1, imm:$src2)>; def : Pat<(int_x86_avx2_psrl_dq_bs VR256:$src1, imm:$src2), - (v4i64 (VPSRLDQYri VR256:$src1, imm:$src2))>; + (VPSRLDQYri VR256:$src1, imm:$src2)>; } let Predicates = [HasSSE2] in { def : Pat<(int_x86_sse2_psll_dq VR128:$src1, imm:$src2), - (v2i64 (PSLLDQri VR128:$src1, (BYTE_imm imm:$src2)))>; + (PSLLDQri VR128:$src1, (BYTE_imm imm:$src2))>; def : Pat<(int_x86_sse2_psrl_dq VR128:$src1, imm:$src2), - (v2i64 (PSRLDQri VR128:$src1, (BYTE_imm imm:$src2)))>; + (PSRLDQri VR128:$src1, (BYTE_imm imm:$src2))>; def : Pat<(int_x86_sse2_psll_dq_bs VR128:$src1, imm:$src2), - (v2i64 (PSLLDQri VR128:$src1, imm:$src2))>; + (PSLLDQri VR128:$src1, imm:$src2)>; def : Pat<(int_x86_sse2_psrl_dq_bs VR128:$src1, imm:$src2), - (v2i64 (PSRLDQri VR128:$src1, imm:$src2))>; + (PSRLDQri VR128:$src1, imm:$src2)>; def : Pat<(v2f64 (X86fsrl VR128:$src1, i32immSExt8:$src2)), - (v2f64 (PSRLDQri VR128:$src1, (BYTE_imm imm:$src2)))>; + (PSRLDQri VR128:$src1, (BYTE_imm imm:$src2))>; // Shift up / down and insert zero's. def : Pat<(v2i64 (X86vshl VR128:$src, (i8 imm:$amt))), - (v2i64 (PSLLDQri VR128:$src, (BYTE_imm imm:$amt)))>; + (PSLLDQri VR128:$src, (BYTE_imm imm:$amt))>; def : Pat<(v2i64 (X86vshr VR128:$src, (i8 imm:$amt))), - (v2i64 (PSRLDQri VR128:$src, (BYTE_imm imm:$amt)))>; + (PSRLDQri VR128:$src, (BYTE_imm imm:$amt))>; } //===---------------------------------------------------------------------===// @@ -3889,28 +3868,34 @@ let Predicates = [HasAVX] in { def : Pat<(v16i8 (X86pcmpeqb VR128:$src1, VR128:$src2)), (VPCMPEQBrr VR128:$src1, VR128:$src2)>; - def : Pat<(v16i8 (X86pcmpeqb VR128:$src1, (memop addr:$src2))), + def : Pat<(v16i8 (X86pcmpeqb VR128:$src1, + (bc_v16i8 (memopv2i64 addr:$src2)))), (VPCMPEQBrm VR128:$src1, addr:$src2)>; def : Pat<(v8i16 (X86pcmpeqw VR128:$src1, VR128:$src2)), (VPCMPEQWrr VR128:$src1, VR128:$src2)>; - def : Pat<(v8i16 (X86pcmpeqw VR128:$src1, (memop addr:$src2))), + def : Pat<(v8i16 (X86pcmpeqw VR128:$src1, + (bc_v8i16 (memopv2i64 addr:$src2)))), (VPCMPEQWrm VR128:$src1, addr:$src2)>; def : Pat<(v4i32 (X86pcmpeqd VR128:$src1, VR128:$src2)), (VPCMPEQDrr VR128:$src1, VR128:$src2)>; - def : Pat<(v4i32 (X86pcmpeqd VR128:$src1, (memop addr:$src2))), + def : Pat<(v4i32 (X86pcmpeqd VR128:$src1, + (bc_v4i32 (memopv2i64 addr:$src2)))), (VPCMPEQDrm VR128:$src1, addr:$src2)>; def : Pat<(v16i8 (X86pcmpgtb VR128:$src1, VR128:$src2)), (VPCMPGTBrr VR128:$src1, VR128:$src2)>; - def : Pat<(v16i8 (X86pcmpgtb VR128:$src1, (memop addr:$src2))), + def : Pat<(v16i8 (X86pcmpgtb VR128:$src1, + (bc_v16i8 (memopv2i64 addr:$src2)))), (VPCMPGTBrm VR128:$src1, addr:$src2)>; def : Pat<(v8i16 (X86pcmpgtw VR128:$src1, VR128:$src2)), (VPCMPGTWrr VR128:$src1, VR128:$src2)>; - def : Pat<(v8i16 (X86pcmpgtw VR128:$src1, (memop addr:$src2))), + def : Pat<(v8i16 (X86pcmpgtw VR128:$src1, + (bc_v8i16 (memopv2i64 addr:$src2)))), (VPCMPGTWrm VR128:$src1, addr:$src2)>; def : Pat<(v4i32 (X86pcmpgtd VR128:$src1, VR128:$src2)), (VPCMPGTDrr VR128:$src1, VR128:$src2)>; - def : Pat<(v4i32 (X86pcmpgtd VR128:$src1, (memop addr:$src2))), + def : Pat<(v4i32 (X86pcmpgtd VR128:$src1, + (bc_v4i32 (memopv2i64 addr:$src2)))), (VPCMPGTDrm VR128:$src1, addr:$src2)>; } @@ -3930,28 +3915,34 @@ let Predicates = [HasAVX2] in { def : Pat<(v32i8 (X86pcmpeqb VR256:$src1, VR256:$src2)), (VPCMPEQBYrr VR256:$src1, VR256:$src2)>; - def : Pat<(v32i8 (X86pcmpeqb VR256:$src1, (memop addr:$src2))), + def : Pat<(v32i8 (X86pcmpeqb VR256:$src1, + (bc_v32i8 (memopv4i64 addr:$src2)))), (VPCMPEQBYrm VR256:$src1, addr:$src2)>; def : Pat<(v16i16 (X86pcmpeqw VR256:$src1, VR256:$src2)), (VPCMPEQWYrr VR256:$src1, VR256:$src2)>; - def : Pat<(v16i16 (X86pcmpeqw VR256:$src1, (memop addr:$src2))), + def : Pat<(v16i16 (X86pcmpeqw VR256:$src1, + (bc_v16i16 (memopv4i64 addr:$src2)))), (VPCMPEQWYrm VR256:$src1, addr:$src2)>; def : Pat<(v8i32 (X86pcmpeqd VR256:$src1, VR256:$src2)), (VPCMPEQDYrr VR256:$src1, VR256:$src2)>; - def : Pat<(v8i32 (X86pcmpeqd VR256:$src1, (memop addr:$src2))), + def : Pat<(v8i32 (X86pcmpeqd VR256:$src1, + (bc_v8i32 (memopv4i64 addr:$src2)))), (VPCMPEQDYrm VR256:$src1, addr:$src2)>; def : Pat<(v32i8 (X86pcmpgtb VR256:$src1, VR256:$src2)), (VPCMPGTBYrr VR256:$src1, VR256:$src2)>; - def : Pat<(v32i8 (X86pcmpgtb VR256:$src1, (memop addr:$src2))), + def : Pat<(v32i8 (X86pcmpgtb VR256:$src1, + (bc_v32i8 (memopv4i64 addr:$src2)))), (VPCMPGTBYrm VR256:$src1, addr:$src2)>; def : Pat<(v16i16 (X86pcmpgtw VR256:$src1, VR256:$src2)), (VPCMPGTWYrr VR256:$src1, VR256:$src2)>; - def : Pat<(v16i16 (X86pcmpgtw VR256:$src1, (memop addr:$src2))), + def : Pat<(v16i16 (X86pcmpgtw VR256:$src1, + (bc_v16i16 (memopv4i64 addr:$src2)))), (VPCMPGTWYrm VR256:$src1, addr:$src2)>; def : Pat<(v8i32 (X86pcmpgtd VR256:$src1, VR256:$src2)), (VPCMPGTDYrr VR256:$src1, VR256:$src2)>; - def : Pat<(v8i32 (X86pcmpgtd VR256:$src1, (memop addr:$src2))), + def : Pat<(v8i32 (X86pcmpgtd VR256:$src1, + (bc_v8i32 (memopv4i64 addr:$src2)))), (VPCMPGTDYrm VR256:$src1, addr:$src2)>; } @@ -3973,28 +3964,34 @@ let Constraints = "$src1 = $dst" in { let Predicates = [HasSSE2] in { def : Pat<(v16i8 (X86pcmpeqb VR128:$src1, VR128:$src2)), (PCMPEQBrr VR128:$src1, VR128:$src2)>; - def : Pat<(v16i8 (X86pcmpeqb VR128:$src1, (memop addr:$src2))), + def : Pat<(v16i8 (X86pcmpeqb VR128:$src1, + (bc_v16i8 (memopv2i64 addr:$src2)))), (PCMPEQBrm VR128:$src1, addr:$src2)>; def : Pat<(v8i16 (X86pcmpeqw VR128:$src1, VR128:$src2)), (PCMPEQWrr VR128:$src1, VR128:$src2)>; - def : Pat<(v8i16 (X86pcmpeqw VR128:$src1, (memop addr:$src2))), + def : Pat<(v8i16 (X86pcmpeqw VR128:$src1, + (bc_v8i16 (memopv2i64 addr:$src2)))), (PCMPEQWrm VR128:$src1, addr:$src2)>; def : Pat<(v4i32 (X86pcmpeqd VR128:$src1, VR128:$src2)), (PCMPEQDrr VR128:$src1, VR128:$src2)>; - def : Pat<(v4i32 (X86pcmpeqd VR128:$src1, (memop addr:$src2))), + def : Pat<(v4i32 (X86pcmpeqd VR128:$src1, + (bc_v4i32 (memopv2i64 addr:$src2)))), (PCMPEQDrm VR128:$src1, addr:$src2)>; def : Pat<(v16i8 (X86pcmpgtb VR128:$src1, VR128:$src2)), (PCMPGTBrr VR128:$src1, VR128:$src2)>; - def : Pat<(v16i8 (X86pcmpgtb VR128:$src1, (memop addr:$src2))), + def : Pat<(v16i8 (X86pcmpgtb VR128:$src1, + (bc_v16i8 (memopv2i64 addr:$src2)))), (PCMPGTBrm VR128:$src1, addr:$src2)>; def : Pat<(v8i16 (X86pcmpgtw VR128:$src1, VR128:$src2)), (PCMPGTWrr VR128:$src1, VR128:$src2)>; - def : Pat<(v8i16 (X86pcmpgtw VR128:$src1, (memop addr:$src2))), + def : Pat<(v8i16 (X86pcmpgtw VR128:$src1, + (bc_v8i16 (memopv2i64 addr:$src2)))), (PCMPGTWrm VR128:$src1, addr:$src2)>; def : Pat<(v4i32 (X86pcmpgtd VR128:$src1, VR128:$src2)), (PCMPGTDrr VR128:$src1, VR128:$src2)>; - def : Pat<(v4i32 (X86pcmpgtd VR128:$src1, (memop addr:$src2))), + def : Pat<(v4i32 (X86pcmpgtd VR128:$src1, + (bc_v4i32 (memopv2i64 addr:$src2)))), (PCMPGTDrm VR128:$src1, addr:$src2)>; } @@ -4207,19 +4204,8 @@ let Predicates = [HasAVX] in { bc_v8i16, 0>, VEX_4V; defm VPUNPCKLDQ : sse2_unpack<0x62, "vpunpckldq", v4i32, X86Punpckldq, bc_v4i32, 0>, VEX_4V; - - /// FIXME: we could eliminate this and use sse2_unpack instead if tblgen - /// knew to collapse (bitconvert VT to VT) into its operand. - def VPUNPCKLQDQrr : PDI<0x6C, MRMSrcReg, - (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), - "vpunpcklqdq\t{$src2, $src1, $dst|$dst, $src1, $src2}", - [(set VR128:$dst, (v2i64 (X86Punpcklqdq VR128:$src1, - VR128:$src2)))]>, VEX_4V; - def VPUNPCKLQDQrm : PDI<0x6C, MRMSrcMem, - (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2), - "vpunpcklqdq\t{$src2, $src1, $dst|$dst, $src1, $src2}", - [(set VR128:$dst, (v2i64 (X86Punpcklqdq VR128:$src1, - (memopv2i64 addr:$src2))))]>, VEX_4V; + defm VPUNPCKLQDQ : sse2_unpack<0x6C, "vpunpcklqdq", v2i64, X86Punpcklqdq, + bc_v2i64, 0>, VEX_4V; defm VPUNPCKHBW : sse2_unpack<0x68, "vpunpckhbw", v16i8, X86Punpckhbw, bc_v16i8, 0>, VEX_4V; @@ -4227,19 +4213,8 @@ let Predicates = [HasAVX] in { bc_v8i16, 0>, VEX_4V; defm VPUNPCKHDQ : sse2_unpack<0x6A, "vpunpckhdq", v4i32, X86Punpckhdq, bc_v4i32, 0>, VEX_4V; - - /// FIXME: we could eliminate this and use sse2_unpack instead if tblgen - /// knew to collapse (bitconvert VT to VT) into its operand. - def VPUNPCKHQDQrr : PDI<0x6D, MRMSrcReg, - (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), - "vpunpckhqdq\t{$src2, $src1, $dst|$dst, $src1, $src2}", - [(set VR128:$dst, (v2i64 (X86Punpckhqdq VR128:$src1, - VR128:$src2)))]>, VEX_4V; - def VPUNPCKHQDQrm : PDI<0x6D, MRMSrcMem, - (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2), - "vpunpckhqdq\t{$src2, $src1, $dst|$dst, $src1, $src2}", - [(set VR128:$dst, (v2i64 (X86Punpckhqdq VR128:$src1, - (memopv2i64 addr:$src2))))]>, VEX_4V; + defm VPUNPCKHQDQ : sse2_unpack<0x6D, "vpunpckhqdq", v2i64, X86Punpckhqdq, + bc_v2i64, 0>, VEX_4V; } let Predicates = [HasAVX2] in { @@ -4249,19 +4224,8 @@ let Predicates = [HasAVX2] in { bc_v16i16>, VEX_4V; defm VPUNPCKLDQ : sse2_unpack_y<0x62, "vpunpckldq", v8i32, X86Punpckldq, bc_v8i32>, VEX_4V; - - /// FIXME: we could eliminate this and use sse2_unpack_y instead if tblgen - /// knew to collapse (bitconvert VT to VT) into its operand. - def VPUNPCKLQDQYrr : PDI<0x6C, MRMSrcReg, - (outs VR256:$dst), (ins VR256:$src1, VR256:$src2), - "vpunpcklqdq\t{$src2, $src1, $dst|$dst, $src1, $src2}", - [(set VR256:$dst, (v4i64 (X86Punpcklqdq VR256:$src1, - VR256:$src2)))]>, VEX_4V; - def VPUNPCKLQDQYrm : PDI<0x6C, MRMSrcMem, - (outs VR256:$dst), (ins VR256:$src1, i256mem:$src2), - "vpunpcklqdq\t{$src2, $src1, $dst|$dst, $src1, $src2}", - [(set VR256:$dst, (v4i64 (X86Punpcklqdq VR256:$src1, - (memopv4i64 addr:$src2))))]>, VEX_4V; + defm VPUNPCKLQDQ : sse2_unpack_y<0x6C, "vpunpcklqdq", v4i64, X86Punpcklqdq, + bc_v4i64>, VEX_4V; defm VPUNPCKHBW : sse2_unpack_y<0x68, "vpunpckhbw", v32i8, X86Punpckhbw, bc_v32i8>, VEX_4V; @@ -4269,57 +4233,28 @@ let Predicates = [HasAVX2] in { bc_v16i16>, VEX_4V; defm VPUNPCKHDQ : sse2_unpack_y<0x6A, "vpunpckhdq", v8i32, X86Punpckhdq, bc_v8i32>, VEX_4V; - - /// FIXME: we could eliminate this and use sse2_unpack_y instead if tblgen - /// knew to collapse (bitconvert VT to VT) into its operand. - def VPUNPCKHQDQYrr : PDI<0x6D, MRMSrcReg, - (outs VR256:$dst), (ins VR256:$src1, VR256:$src2), - "vpunpckhqdq\t{$src2, $src1, $dst|$dst, $src1, $src2}", - [(set VR256:$dst, (v4i64 (X86Punpckhqdq VR256:$src1, - VR256:$src2)))]>, VEX_4V; - def VPUNPCKHQDQYrm : PDI<0x6D, MRMSrcMem, - (outs VR256:$dst), (ins VR256:$src1, i256mem:$src2), - "vpunpckhqdq\t{$src2, $src1, $dst|$dst, $src1, $src2}", - [(set VR256:$dst, (v4i64 (X86Punpckhqdq VR256:$src1, - (memopv4i64 addr:$src2))))]>, VEX_4V; + defm VPUNPCKHQDQ : sse2_unpack_y<0x6D, "vpunpckhqdq", v4i64, X86Punpckhqdq, + bc_v4i64>, VEX_4V; } let Constraints = "$src1 = $dst" in { - defm PUNPCKLBW : sse2_unpack<0x60, "punpcklbw", v16i8, X86Punpcklbw, bc_v16i8>; - defm PUNPCKLWD : sse2_unpack<0x61, "punpcklwd", v8i16, X86Punpcklwd, bc_v8i16>; - defm PUNPCKLDQ : sse2_unpack<0x62, "punpckldq", v4i32, X86Punpckldq, bc_v4i32>; - - /// FIXME: we could eliminate this and use sse2_unpack instead if tblgen - /// knew to collapse (bitconvert VT to VT) into its operand. - def PUNPCKLQDQrr : PDI<0x6C, MRMSrcReg, - (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), - "punpcklqdq\t{$src2, $dst|$dst, $src2}", - [(set VR128:$dst, - (v2i64 (X86Punpcklqdq VR128:$src1, VR128:$src2)))]>; - def PUNPCKLQDQrm : PDI<0x6C, MRMSrcMem, - (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2), - "punpcklqdq\t{$src2, $dst|$dst, $src2}", - [(set VR128:$dst, - (v2i64 (X86Punpcklqdq VR128:$src1, - (memopv2i64 addr:$src2))))]>; - - defm PUNPCKHBW : sse2_unpack<0x68, "punpckhbw", v16i8, X86Punpckhbw, bc_v16i8>; - defm PUNPCKHWD : sse2_unpack<0x69, "punpckhwd", v8i16, X86Punpckhwd, bc_v8i16>; - defm PUNPCKHDQ : sse2_unpack<0x6A, "punpckhdq", v4i32, X86Punpckhdq, bc_v4i32>; - - /// FIXME: we could eliminate this and use sse2_unpack instead if tblgen - /// knew to collapse (bitconvert VT to VT) into its operand. - def PUNPCKHQDQrr : PDI<0x6D, MRMSrcReg, - (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), - "punpckhqdq\t{$src2, $dst|$dst, $src2}", - [(set VR128:$dst, - (v2i64 (X86Punpckhqdq VR128:$src1, VR128:$src2)))]>; - def PUNPCKHQDQrm : PDI<0x6D, MRMSrcMem, - (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2), - "punpckhqdq\t{$src2, $dst|$dst, $src2}", - [(set VR128:$dst, - (v2i64 (X86Punpckhqdq VR128:$src1, - (memopv2i64 addr:$src2))))]>; + defm PUNPCKLBW : sse2_unpack<0x60, "punpcklbw", v16i8, X86Punpcklbw, + bc_v16i8>; + defm PUNPCKLWD : sse2_unpack<0x61, "punpcklwd", v8i16, X86Punpcklwd, + bc_v8i16>; + defm PUNPCKLDQ : sse2_unpack<0x62, "punpckldq", v4i32, X86Punpckldq, + bc_v4i32>; + defm PUNPCKLQDQ : sse2_unpack<0x6C, "punpcklqdq", v2i64, X86Punpcklqdq, + bc_v2i64>; + + defm PUNPCKHBW : sse2_unpack<0x68, "punpckhbw", v16i8, X86Punpckhbw, + bc_v16i8>; + defm PUNPCKHWD : sse2_unpack<0x69, "punpckhwd", v8i16, X86Punpckhwd, + bc_v8i16>; + defm PUNPCKHDQ : sse2_unpack<0x6A, "punpckhdq", v4i32, X86Punpckhdq, + bc_v4i32>; + defm PUNPCKHQDQ : sse2_unpack<0x6D, "punpckhqdq", v2i64, X86Punpckhqdq, + bc_v2i64>; } } // ExeDomain = SSEPackedInt @@ -5052,21 +4987,25 @@ multiclass sse3_addsub<Intrinsic Int, string OpcodeStr, RegisterClass RC, [(set RC:$dst, (Int RC:$src1, (memop addr:$src2)))]>; } -let Predicates = [HasAVX], - ExeDomain = SSEPackedDouble in { - defm VADDSUBPS : sse3_addsub<int_x86_sse3_addsub_ps, "vaddsubps", VR128, - f128mem, 0>, TB, XD, VEX_4V; - defm VADDSUBPD : sse3_addsub<int_x86_sse3_addsub_pd, "vaddsubpd", VR128, - f128mem, 0>, TB, OpSize, VEX_4V; - defm VADDSUBPSY : sse3_addsub<int_x86_avx_addsub_ps_256, "vaddsubps", VR256, - f256mem, 0>, TB, XD, VEX_4V; - defm VADDSUBPDY : sse3_addsub<int_x86_avx_addsub_pd_256, "vaddsubpd", VR256, - f256mem, 0>, TB, OpSize, VEX_4V; -} -let Constraints = "$src1 = $dst", Predicates = [HasSSE3], - ExeDomain = SSEPackedDouble in { +let Predicates = [HasAVX] in { + let ExeDomain = SSEPackedSingle in { + defm VADDSUBPS : sse3_addsub<int_x86_sse3_addsub_ps, "vaddsubps", VR128, + f128mem, 0>, TB, XD, VEX_4V; + defm VADDSUBPSY : sse3_addsub<int_x86_avx_addsub_ps_256, "vaddsubps", VR256, + f256mem, 0>, TB, XD, VEX_4V; + } + let ExeDomain = SSEPackedDouble in { + defm VADDSUBPD : sse3_addsub<int_x86_sse3_addsub_pd, "vaddsubpd", VR128, + f128mem, 0>, TB, OpSize, VEX_4V; + defm VADDSUBPDY : sse3_addsub<int_x86_avx_addsub_pd_256, "vaddsubpd", VR256, + f256mem, 0>, TB, OpSize, VEX_4V; + } +} +let Constraints = "$src1 = $dst", Predicates = [HasSSE3] in { + let ExeDomain = SSEPackedSingle in defm ADDSUBPS : sse3_addsub<int_x86_sse3_addsub_ps, "addsubps", VR128, f128mem>, TB, XD; + let ExeDomain = SSEPackedDouble in defm ADDSUBPD : sse3_addsub<int_x86_sse3_addsub_pd, "addsubpd", VR128, f128mem>, TB, OpSize; } @@ -5106,29 +5045,37 @@ multiclass S3_Int<bits<8> o, string OpcodeStr, ValueType vt, RegisterClass RC, } let Predicates = [HasAVX] in { - defm VHADDPS : S3D_Int<0x7C, "vhaddps", v4f32, VR128, f128mem, - X86fhadd, 0>, VEX_4V; - defm VHADDPD : S3_Int <0x7C, "vhaddpd", v2f64, VR128, f128mem, - X86fhadd, 0>, VEX_4V; - defm VHSUBPS : S3D_Int<0x7D, "vhsubps", v4f32, VR128, f128mem, - X86fhsub, 0>, VEX_4V; - defm VHSUBPD : S3_Int <0x7D, "vhsubpd", v2f64, VR128, f128mem, - X86fhsub, 0>, VEX_4V; - defm VHADDPSY : S3D_Int<0x7C, "vhaddps", v8f32, VR256, f256mem, - X86fhadd, 0>, VEX_4V; - defm VHADDPDY : S3_Int <0x7C, "vhaddpd", v4f64, VR256, f256mem, - X86fhadd, 0>, VEX_4V; - defm VHSUBPSY : S3D_Int<0x7D, "vhsubps", v8f32, VR256, f256mem, - X86fhsub, 0>, VEX_4V; - defm VHSUBPDY : S3_Int <0x7D, "vhsubpd", v4f64, VR256, f256mem, - X86fhsub, 0>, VEX_4V; + let ExeDomain = SSEPackedSingle in { + defm VHADDPS : S3D_Int<0x7C, "vhaddps", v4f32, VR128, f128mem, + X86fhadd, 0>, VEX_4V; + defm VHSUBPS : S3D_Int<0x7D, "vhsubps", v4f32, VR128, f128mem, + X86fhsub, 0>, VEX_4V; + defm VHADDPSY : S3D_Int<0x7C, "vhaddps", v8f32, VR256, f256mem, + X86fhadd, 0>, VEX_4V; + defm VHSUBPSY : S3D_Int<0x7D, "vhsubps", v8f32, VR256, f256mem, + X86fhsub, 0>, VEX_4V; + } + let ExeDomain = SSEPackedDouble in { + defm VHADDPD : S3_Int <0x7C, "vhaddpd", v2f64, VR128, f128mem, + X86fhadd, 0>, VEX_4V; + defm VHSUBPD : S3_Int <0x7D, "vhsubpd", v2f64, VR128, f128mem, + X86fhsub, 0>, VEX_4V; + defm VHADDPDY : S3_Int <0x7C, "vhaddpd", v4f64, VR256, f256mem, + X86fhadd, 0>, VEX_4V; + defm VHSUBPDY : S3_Int <0x7D, "vhsubpd", v4f64, VR256, f256mem, + X86fhsub, 0>, VEX_4V; + } } let Constraints = "$src1 = $dst" in { - defm HADDPS : S3D_Int<0x7C, "haddps", v4f32, VR128, f128mem, X86fhadd>; - defm HADDPD : S3_Int<0x7C, "haddpd", v2f64, VR128, f128mem, X86fhadd>; - defm HSUBPS : S3D_Int<0x7D, "hsubps", v4f32, VR128, f128mem, X86fhsub>; - defm HSUBPD : S3_Int<0x7D, "hsubpd", v2f64, VR128, f128mem, X86fhsub>; + let ExeDomain = SSEPackedSingle in { + defm HADDPS : S3D_Int<0x7C, "haddps", v4f32, VR128, f128mem, X86fhadd>; + defm HSUBPS : S3D_Int<0x7D, "hsubps", v4f32, VR128, f128mem, X86fhsub>; + } + let ExeDomain = SSEPackedDouble in { + defm HADDPD : S3_Int<0x7C, "haddpd", v2f64, VR128, f128mem, X86fhadd>; + defm HSUBPD : S3_Int<0x7D, "hsubpd", v2f64, VR128, f128mem, X86fhsub>; + } } //===---------------------------------------------------------------------===// @@ -5284,11 +5231,11 @@ let isCommutable = 0 in { int_x86_avx2_pmadd_ub_sw>, VEX_4V; defm VPSHUFB : SS3I_binop_rm_int_y<0x00, "vpshufb", memopv32i8, int_x86_avx2_pshuf_b>, VEX_4V; - defm VPSIGNB : SS3I_binop_rm_int_y<0x08, "vpsignb", memopv16i8, + defm VPSIGNB : SS3I_binop_rm_int_y<0x08, "vpsignb", memopv32i8, int_x86_avx2_psign_b>, VEX_4V; - defm VPSIGNW : SS3I_binop_rm_int_y<0x09, "vpsignw", memopv8i16, + defm VPSIGNW : SS3I_binop_rm_int_y<0x09, "vpsignw", memopv16i16, int_x86_avx2_psign_w>, VEX_4V; - defm VPSIGND : SS3I_binop_rm_int_y<0x0A, "vpsignd", memopv4i32, + defm VPSIGND : SS3I_binop_rm_int_y<0x0A, "vpsignd", memopv8i32, int_x86_avx2_psign_d>, VEX_4V; } defm VPMULHRSW : SS3I_binop_rm_int_y<0x0B, "vpmulhrsw", memopv16i16, @@ -5331,12 +5278,21 @@ let Predicates = [HasSSSE3] in { def : Pat<(X86pshufb VR128:$src, (bc_v16i8 (memopv2i64 addr:$mask))), (PSHUFBrm128 VR128:$src, addr:$mask)>; - def : Pat<(X86psignb VR128:$src1, VR128:$src2), + def : Pat<(v16i8 (X86psign VR128:$src1, VR128:$src2)), (PSIGNBrr128 VR128:$src1, VR128:$src2)>; - def : Pat<(X86psignw VR128:$src1, VR128:$src2), + def : Pat<(v8i16 (X86psign VR128:$src1, VR128:$src2)), (PSIGNWrr128 VR128:$src1, VR128:$src2)>; - def : Pat<(X86psignd VR128:$src1, VR128:$src2), + def : Pat<(v4i32 (X86psign VR128:$src1, VR128:$src2)), (PSIGNDrr128 VR128:$src1, VR128:$src2)>; + + def : Pat<(v8i16 (X86hadd VR128:$src1, VR128:$src2)), + (PHADDWrr128 VR128:$src1, VR128:$src2)>; + def : Pat<(v4i32 (X86hadd VR128:$src1, VR128:$src2)), + (PHADDDrr128 VR128:$src1, VR128:$src2)>; + def : Pat<(v8i16 (X86hsub VR128:$src1, VR128:$src2)), + (PHSUBWrr128 VR128:$src1, VR128:$src2)>; + def : Pat<(v4i32 (X86hsub VR128:$src1, VR128:$src2)), + (PHSUBDrr128 VR128:$src1, VR128:$src2)>; } let Predicates = [HasAVX] in { @@ -5345,12 +5301,39 @@ let Predicates = [HasAVX] in { def : Pat<(X86pshufb VR128:$src, (bc_v16i8 (memopv2i64 addr:$mask))), (VPSHUFBrm128 VR128:$src, addr:$mask)>; - def : Pat<(X86psignb VR128:$src1, VR128:$src2), + def : Pat<(v16i8 (X86psign VR128:$src1, VR128:$src2)), (VPSIGNBrr128 VR128:$src1, VR128:$src2)>; - def : Pat<(X86psignw VR128:$src1, VR128:$src2), + def : Pat<(v8i16 (X86psign VR128:$src1, VR128:$src2)), (VPSIGNWrr128 VR128:$src1, VR128:$src2)>; - def : Pat<(X86psignd VR128:$src1, VR128:$src2), + def : Pat<(v4i32 (X86psign VR128:$src1, VR128:$src2)), (VPSIGNDrr128 VR128:$src1, VR128:$src2)>; + + def : Pat<(v8i16 (X86hadd VR128:$src1, VR128:$src2)), + (VPHADDWrr128 VR128:$src1, VR128:$src2)>; + def : Pat<(v4i32 (X86hadd VR128:$src1, VR128:$src2)), + (VPHADDDrr128 VR128:$src1, VR128:$src2)>; + def : Pat<(v8i16 (X86hsub VR128:$src1, VR128:$src2)), + (VPHSUBWrr128 VR128:$src1, VR128:$src2)>; + def : Pat<(v4i32 (X86hsub VR128:$src1, VR128:$src2)), + (VPHSUBDrr128 VR128:$src1, VR128:$src2)>; +} + +let Predicates = [HasAVX2] in { + def : Pat<(v32i8 (X86psign VR256:$src1, VR256:$src2)), + (VPSIGNBrr256 VR256:$src1, VR256:$src2)>; + def : Pat<(v16i16 (X86psign VR256:$src1, VR256:$src2)), + (VPSIGNWrr256 VR256:$src1, VR256:$src2)>; + def : Pat<(v8i32 (X86psign VR256:$src1, VR256:$src2)), + (VPSIGNDrr256 VR256:$src1, VR256:$src2)>; + + def : Pat<(v16i16 (X86hadd VR256:$src1, VR256:$src2)), + (VPHADDWrr256 VR256:$src1, VR256:$src2)>; + def : Pat<(v8i32 (X86hadd VR256:$src1, VR256:$src2)), + (VPHADDDrr256 VR256:$src1, VR256:$src2)>; + def : Pat<(v16i16 (X86hsub VR256:$src1, VR256:$src2)), + (VPHSUBWrr256 VR256:$src1, VR256:$src2)>; + def : Pat<(v8i32 (X86hsub VR256:$src1, VR256:$src2)), + (VPHSUBDrr256 VR256:$src1, VR256:$src2)>; } //===---------------------------------------------------------------------===// @@ -5837,14 +5820,16 @@ multiclass SS41I_extractf32<bits<8> opc, string OpcodeStr> { addr:$dst)]>, OpSize; } -let Predicates = [HasAVX] in { - defm VEXTRACTPS : SS41I_extractf32<0x17, "vextractps">, VEX; - def VEXTRACTPSrr64 : SS4AIi8<0x17, MRMDestReg, (outs GR64:$dst), - (ins VR128:$src1, i32i8imm:$src2), - "vextractps \t{$src2, $src1, $dst|$dst, $src1, $src2}", - []>, OpSize, VEX; +let ExeDomain = SSEPackedSingle in { + let Predicates = [HasAVX] in { + defm VEXTRACTPS : SS41I_extractf32<0x17, "vextractps">, VEX; + def VEXTRACTPSrr64 : SS4AIi8<0x17, MRMDestReg, (outs GR64:$dst), + (ins VR128:$src1, i32i8imm:$src2), + "vextractps \t{$src2, $src1, $dst|$dst, $src1, $src2}", + []>, OpSize, VEX; + } + defm EXTRACTPS : SS41I_extractf32<0x17, "extractps">; } -defm EXTRACTPS : SS41I_extractf32<0x17, "extractps">; // Also match an EXTRACTPS store when the store is done as f32 instead of i32. def : Pat<(store (f32 (bitconvert (extractelt (bc_v4i32 (v4f32 VR128:$src1)), @@ -5965,10 +5950,12 @@ multiclass SS41I_insertf32<bits<8> opc, string asm, bit Is2Addr = 1> { imm:$src3))]>, OpSize; } -let Constraints = "$src1 = $dst" in - defm INSERTPS : SS41I_insertf32<0x21, "insertps">; -let Predicates = [HasAVX] in - defm VINSERTPS : SS41I_insertf32<0x21, "vinsertps", 0>, VEX_4V; +let ExeDomain = SSEPackedSingle in { + let Constraints = "$src1 = $dst" in + defm INSERTPS : SS41I_insertf32<0x21, "insertps">; + let Predicates = [HasAVX] in + defm VINSERTPS : SS41I_insertf32<0x21, "vinsertps", 0>, VEX_4V; +} def : Pat<(int_x86_sse41_insertps VR128:$src1, VR128:$src2, imm:$src3), (VINSERTPSrr VR128:$src1, VR128:$src2, imm:$src3)>, @@ -5985,6 +5972,7 @@ multiclass sse41_fp_unop_rm<bits<8> opcps, bits<8> opcpd, string OpcodeStr, X86MemOperand x86memop, RegisterClass RC, PatFrag mem_frag32, PatFrag mem_frag64, Intrinsic V4F32Int, Intrinsic V2F64Int> { +let ExeDomain = SSEPackedSingle in { // Intrinsic operation, reg. // Vector intrinsic operation, reg def PSr : SS4AIi8<opcps, MRMSrcReg, @@ -5995,15 +5983,16 @@ multiclass sse41_fp_unop_rm<bits<8> opcps, bits<8> opcpd, string OpcodeStr, OpSize; // Vector intrinsic operation, mem - def PSm : Ii8<opcps, MRMSrcMem, + def PSm : SS4AIi8<opcps, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src1, i32i8imm:$src2), !strconcat(OpcodeStr, "ps\t{$src2, $src1, $dst|$dst, $src1, $src2}"), [(set RC:$dst, (V4F32Int (mem_frag32 addr:$src1),imm:$src2))]>, - TA, OpSize, - Requires<[HasSSE41]>; + OpSize; +} // ExeDomain = SSEPackedSingle +let ExeDomain = SSEPackedDouble in { // Vector intrinsic operation, reg def PDr : SS4AIi8<opcpd, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, i32i8imm:$src2), @@ -6020,44 +6009,14 @@ multiclass sse41_fp_unop_rm<bits<8> opcps, bits<8> opcpd, string OpcodeStr, [(set RC:$dst, (V2F64Int (mem_frag64 addr:$src1),imm:$src2))]>, OpSize; -} - -multiclass sse41_fp_unop_rm_avx_p<bits<8> opcps, bits<8> opcpd, - RegisterClass RC, X86MemOperand x86memop, string OpcodeStr> { - // Intrinsic operation, reg. - // Vector intrinsic operation, reg - def PSr_AVX : SS4AIi8<opcps, MRMSrcReg, - (outs RC:$dst), (ins RC:$src1, i32i8imm:$src2), - !strconcat(OpcodeStr, - "ps\t{$src2, $src1, $dst|$dst, $src1, $src2}"), - []>, OpSize; - - // Vector intrinsic operation, mem - def PSm_AVX : Ii8<opcps, MRMSrcMem, - (outs RC:$dst), (ins x86memop:$src1, i32i8imm:$src2), - !strconcat(OpcodeStr, - "ps\t{$src2, $src1, $dst|$dst, $src1, $src2}"), - []>, TA, OpSize, Requires<[HasSSE41]>; - - // Vector intrinsic operation, reg - def PDr_AVX : SS4AIi8<opcpd, MRMSrcReg, - (outs RC:$dst), (ins RC:$src1, i32i8imm:$src2), - !strconcat(OpcodeStr, - "pd\t{$src2, $src1, $dst|$dst, $src1, $src2}"), - []>, OpSize; - - // Vector intrinsic operation, mem - def PDm_AVX : SS4AIi8<opcpd, MRMSrcMem, - (outs RC:$dst), (ins x86memop:$src1, i32i8imm:$src2), - !strconcat(OpcodeStr, - "pd\t{$src2, $src1, $dst|$dst, $src1, $src2}"), - []>, OpSize; +} // ExeDomain = SSEPackedDouble } multiclass sse41_fp_binop_rm<bits<8> opcss, bits<8> opcsd, string OpcodeStr, Intrinsic F32Int, Intrinsic F64Int, bit Is2Addr = 1> { +let ExeDomain = GenericDomain in { // Intrinsic operation, reg. def SSr : SS4AIi8<opcss, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2, i32i8imm:$src3), @@ -6103,37 +6062,7 @@ multiclass sse41_fp_binop_rm<bits<8> opcss, bits<8> opcsd, [(set VR128:$dst, (F64Int VR128:$src1, sse_load_f64:$src2, imm:$src3))]>, OpSize; -} - -multiclass sse41_fp_binop_rm_avx_s<bits<8> opcss, bits<8> opcsd, - string OpcodeStr> { - // Intrinsic operation, reg. - def SSr_AVX : SS4AIi8<opcss, MRMSrcReg, - (outs VR128:$dst), (ins VR128:$src1, VR128:$src2, i32i8imm:$src3), - !strconcat(OpcodeStr, - "ss\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), - []>, OpSize; - - // Intrinsic operation, mem. - def SSm_AVX : SS4AIi8<opcss, MRMSrcMem, - (outs VR128:$dst), (ins VR128:$src1, ssmem:$src2, i32i8imm:$src3), - !strconcat(OpcodeStr, - "ss\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), - []>, OpSize; - - // Intrinsic operation, reg. - def SDr_AVX : SS4AIi8<opcsd, MRMSrcReg, - (outs VR128:$dst), (ins VR128:$src1, VR128:$src2, i32i8imm:$src3), - !strconcat(OpcodeStr, - "sd\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), - []>, OpSize; - - // Intrinsic operation, mem. - def SDm_AVX : SS4AIi8<opcsd, MRMSrcMem, - (outs VR128:$dst), (ins VR128:$src1, sdmem:$src2, i32i8imm:$src3), - !strconcat(OpcodeStr, - "sd\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), - []>, OpSize; +} // ExeDomain = GenericDomain } // FP round - roundss, roundps, roundsd, roundpd @@ -6150,13 +6079,6 @@ let Predicates = [HasAVX] in { defm VROUND : sse41_fp_binop_rm<0x0A, 0x0B, "vround", int_x86_sse41_round_ss, int_x86_sse41_round_sd, 0>, VEX_4V, VEX_LIG; - - // Instructions for the assembler - defm VROUND : sse41_fp_unop_rm_avx_p<0x08, 0x09, VR128, f128mem, "vround">, - VEX; - defm VROUNDY : sse41_fp_unop_rm_avx_p<0x08, 0x09, VR256, f256mem, "vround">, - VEX; - defm VROUND : sse41_fp_binop_rm_avx_s<0x0A, 0x0B, "vround">, VEX_4V, VEX_LIG; } defm ROUND : sse41_fp_unop_rm<0x08, 0x09, "round", f128mem, VR128, @@ -6194,11 +6116,11 @@ def VPTESTYrm : SS48I<0x17, MRMSrcMem, (outs), (ins VR256:$src1, i256mem:$src2), let Defs = [EFLAGS] in { def PTESTrr : SS48I<0x17, MRMSrcReg, (outs), (ins VR128:$src1, VR128:$src2), - "ptest \t{$src2, $src1|$src1, $src2}", + "ptest\t{$src2, $src1|$src1, $src2}", [(set EFLAGS, (X86ptest VR128:$src1, (v4f32 VR128:$src2)))]>, OpSize; def PTESTrm : SS48I<0x17, MRMSrcMem, (outs), (ins VR128:$src1, f128mem:$src2), - "ptest \t{$src2, $src1|$src1, $src2}", + "ptest\t{$src2, $src1|$src1, $src2}", [(set EFLAGS, (X86ptest VR128:$src1, (memopv4f32 addr:$src2)))]>, OpSize; } @@ -6216,11 +6138,15 @@ multiclass avx_bittest<bits<8> opc, string OpcodeStr, RegisterClass RC, } let Defs = [EFLAGS], Predicates = [HasAVX] in { +let ExeDomain = SSEPackedSingle in { defm VTESTPS : avx_bittest<0x0E, "vtestps", VR128, f128mem, memopv4f32, v4f32>; defm VTESTPSY : avx_bittest<0x0E, "vtestps", VR256, f256mem, memopv8f32, v8f32>; +} +let ExeDomain = SSEPackedDouble in { defm VTESTPD : avx_bittest<0x0F, "vtestpd", VR128, f128mem, memopv2f64, v2f64>; defm VTESTPDY : avx_bittest<0x0F, "vtestpd", VR256, f256mem, memopv4f64, v4f64>; } +} //===----------------------------------------------------------------------===// // SSE4.1 - Misc Instructions @@ -6391,10 +6317,12 @@ let Constraints = "$src1 = $dst" in { defm PMULDQ : SS41I_binop_rm_int<0x28, "pmuldq", int_x86_sse41_pmuldq>; } -def : Pat<(v2i64 (X86pcmpeqq VR128:$src1, VR128:$src2)), - (PCMPEQQrr VR128:$src1, VR128:$src2)>; -def : Pat<(v2i64 (X86pcmpeqq VR128:$src1, (memop addr:$src2))), - (PCMPEQQrm VR128:$src1, addr:$src2)>; +let Predicates = [HasSSE41] in { + def : Pat<(v2i64 (X86pcmpeqq VR128:$src1, VR128:$src2)), + (PCMPEQQrr VR128:$src1, VR128:$src2)>; + def : Pat<(v2i64 (X86pcmpeqq VR128:$src1, (memop addr:$src2))), + (PCMPEQQrm VR128:$src1, addr:$src2)>; +} /// SS48I_binop_rm - Simple SSE41 binary operator. multiclass SS48I_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode, @@ -6470,23 +6398,30 @@ multiclass SS41I_binop_rmi_int<bits<8> opc, string OpcodeStr, let Predicates = [HasAVX] in { let isCommutable = 0 in { - defm VBLENDPS : SS41I_binop_rmi_int<0x0C, "vblendps", int_x86_sse41_blendps, - VR128, memopv16i8, i128mem, 0>, VEX_4V; - defm VBLENDPD : SS41I_binop_rmi_int<0x0D, "vblendpd", int_x86_sse41_blendpd, - VR128, memopv16i8, i128mem, 0>, VEX_4V; - defm VBLENDPSY : SS41I_binop_rmi_int<0x0C, "vblendps", - int_x86_avx_blend_ps_256, VR256, memopv32i8, i256mem, 0>, VEX_4V; - defm VBLENDPDY : SS41I_binop_rmi_int<0x0D, "vblendpd", - int_x86_avx_blend_pd_256, VR256, memopv32i8, i256mem, 0>, VEX_4V; + let ExeDomain = SSEPackedSingle in { + defm VBLENDPS : SS41I_binop_rmi_int<0x0C, "vblendps", int_x86_sse41_blendps, + VR128, memopv16i8, i128mem, 0>, VEX_4V; + defm VBLENDPSY : SS41I_binop_rmi_int<0x0C, "vblendps", + int_x86_avx_blend_ps_256, VR256, memopv32i8, i256mem, 0>, VEX_4V; + } + let ExeDomain = SSEPackedDouble in { + defm VBLENDPD : SS41I_binop_rmi_int<0x0D, "vblendpd", int_x86_sse41_blendpd, + VR128, memopv16i8, i128mem, 0>, VEX_4V; + defm VBLENDPDY : SS41I_binop_rmi_int<0x0D, "vblendpd", + int_x86_avx_blend_pd_256, VR256, memopv32i8, i256mem, 0>, VEX_4V; + } defm VPBLENDW : SS41I_binop_rmi_int<0x0E, "vpblendw", int_x86_sse41_pblendw, VR128, memopv16i8, i128mem, 0>, VEX_4V; defm VMPSADBW : SS41I_binop_rmi_int<0x42, "vmpsadbw", int_x86_sse41_mpsadbw, VR128, memopv16i8, i128mem, 0>, VEX_4V; } + let ExeDomain = SSEPackedSingle in defm VDPPS : SS41I_binop_rmi_int<0x40, "vdpps", int_x86_sse41_dpps, VR128, memopv16i8, i128mem, 0>, VEX_4V; + let ExeDomain = SSEPackedDouble in defm VDPPD : SS41I_binop_rmi_int<0x41, "vdppd", int_x86_sse41_dppd, VR128, memopv16i8, i128mem, 0>, VEX_4V; + let ExeDomain = SSEPackedSingle in defm VDPPSY : SS41I_binop_rmi_int<0x40, "vdpps", int_x86_avx_dp_ps_256, VR256, memopv32i8, i256mem, 0>, VEX_4V; } @@ -6502,8 +6437,10 @@ let Predicates = [HasAVX2] in { let Constraints = "$src1 = $dst" in { let isCommutable = 0 in { + let ExeDomain = SSEPackedSingle in defm BLENDPS : SS41I_binop_rmi_int<0x0C, "blendps", int_x86_sse41_blendps, VR128, memopv16i8, i128mem>; + let ExeDomain = SSEPackedDouble in defm BLENDPD : SS41I_binop_rmi_int<0x0D, "blendpd", int_x86_sse41_blendpd, VR128, memopv16i8, i128mem>; defm PBLENDW : SS41I_binop_rmi_int<0x0E, "pblendw", int_x86_sse41_pblendw, @@ -6511,8 +6448,10 @@ let Constraints = "$src1 = $dst" in { defm MPSADBW : SS41I_binop_rmi_int<0x42, "mpsadbw", int_x86_sse41_mpsadbw, VR128, memopv16i8, i128mem>; } + let ExeDomain = SSEPackedSingle in defm DPPS : SS41I_binop_rmi_int<0x40, "dpps", int_x86_sse41_dpps, VR128, memopv16i8, i128mem>; + let ExeDomain = SSEPackedDouble in defm DPPD : SS41I_binop_rmi_int<0x41, "dppd", int_x86_sse41_dppd, VR128, memopv16i8, i128mem>; } @@ -6539,16 +6478,20 @@ multiclass SS41I_quaternary_int_avx<bits<8> opc, string OpcodeStr, } let Predicates = [HasAVX] in { +let ExeDomain = SSEPackedDouble in { defm VBLENDVPD : SS41I_quaternary_int_avx<0x4B, "vblendvpd", VR128, i128mem, memopv16i8, int_x86_sse41_blendvpd>; -defm VBLENDVPS : SS41I_quaternary_int_avx<0x4A, "vblendvps", VR128, i128mem, - memopv16i8, int_x86_sse41_blendvps>; -defm VPBLENDVB : SS41I_quaternary_int_avx<0x4C, "vpblendvb", VR128, i128mem, - memopv16i8, int_x86_sse41_pblendvb>; defm VBLENDVPDY : SS41I_quaternary_int_avx<0x4B, "vblendvpd", VR256, i256mem, memopv32i8, int_x86_avx_blendv_pd_256>; +} // ExeDomain = SSEPackedDouble +let ExeDomain = SSEPackedSingle in { +defm VBLENDVPS : SS41I_quaternary_int_avx<0x4A, "vblendvps", VR128, i128mem, + memopv16i8, int_x86_sse41_blendvps>; defm VBLENDVPSY : SS41I_quaternary_int_avx<0x4A, "vblendvps", VR256, i256mem, memopv32i8, int_x86_avx_blendv_ps_256>; +} // ExeDomain = SSEPackedSingle +defm VPBLENDVB : SS41I_quaternary_int_avx<0x4C, "vpblendvb", VR128, i128mem, + memopv16i8, int_x86_sse41_pblendvb>; } let Predicates = [HasAVX2] in { @@ -6612,7 +6555,9 @@ let Uses = [XMM0], Constraints = "$src1 = $dst" in { } } +let ExeDomain = SSEPackedDouble in defm BLENDVPD : SS41I_ternary_int<0x15, "blendvpd", int_x86_sse41_blendvpd>; +let ExeDomain = SSEPackedSingle in defm BLENDVPS : SS41I_ternary_int<0x14, "blendvps", int_x86_sse41_blendvps>; defm PBLENDVB : SS41I_ternary_int<0x10, "pblendvb", int_x86_sse41_pblendvb>; @@ -6712,10 +6657,12 @@ let Predicates = [HasAVX2] in { let Constraints = "$src1 = $dst" in defm PCMPGTQ : SS42I_binop_rm_int<0x37, "pcmpgtq", int_x86_sse42_pcmpgtq>; -def : Pat<(v2i64 (X86pcmpgtq VR128:$src1, VR128:$src2)), - (PCMPGTQrr VR128:$src1, VR128:$src2)>; -def : Pat<(v2i64 (X86pcmpgtq VR128:$src1, (memop addr:$src2))), - (PCMPGTQrm VR128:$src1, addr:$src2)>; +let Predicates = [HasSSE42] in { + def : Pat<(v2i64 (X86pcmpgtq VR128:$src1, VR128:$src2)), + (PCMPGTQrr VR128:$src1, VR128:$src2)>; + def : Pat<(v2i64 (X86pcmpgtq VR128:$src1, (memop addr:$src2))), + (PCMPGTQrm VR128:$src1, addr:$src2)>; +} //===----------------------------------------------------------------------===// // SSE4.2 - String/text Processing Instructions @@ -7164,21 +7111,27 @@ class avx2_broadcast_reg<bits<8> opc, string OpcodeStr, RegisterClass RC, !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), [(set RC:$dst, (Int VR128:$src))]>, VEX; -def VBROADCASTSSrm : avx_broadcast<0x18, "vbroadcastss", VR128, f32mem, - int_x86_avx_vbroadcast_ss>; -def VBROADCASTSSYrm : avx_broadcast<0x18, "vbroadcastss", VR256, f32mem, - int_x86_avx_vbroadcast_ss_256>; +let ExeDomain = SSEPackedSingle in { + def VBROADCASTSSrm : avx_broadcast<0x18, "vbroadcastss", VR128, f32mem, + int_x86_avx_vbroadcast_ss>; + def VBROADCASTSSYrm : avx_broadcast<0x18, "vbroadcastss", VR256, f32mem, + int_x86_avx_vbroadcast_ss_256>; +} +let ExeDomain = SSEPackedDouble in def VBROADCASTSDrm : avx_broadcast<0x19, "vbroadcastsd", VR256, f64mem, int_x86_avx_vbroadcast_sd_256>; def VBROADCASTF128 : avx_broadcast<0x1A, "vbroadcastf128", VR256, f128mem, int_x86_avx_vbroadcastf128_pd_256>; -def VBROADCASTSSrr : avx2_broadcast_reg<0x18, "vbroadcastss", VR128, - int_x86_avx2_vbroadcast_ss_ps>; -def VBROADCASTSSYrr : avx2_broadcast_reg<0x18, "vbroadcastss", VR256, - int_x86_avx2_vbroadcast_ss_ps_256>; +let ExeDomain = SSEPackedSingle in { + def VBROADCASTSSrr : avx2_broadcast_reg<0x18, "vbroadcastss", VR128, + int_x86_avx2_vbroadcast_ss_ps>; + def VBROADCASTSSYrr : avx2_broadcast_reg<0x18, "vbroadcastss", VR256, + int_x86_avx2_vbroadcast_ss_ps_256>; +} +let ExeDomain = SSEPackedDouble in def VBROADCASTSDrr : avx2_broadcast_reg<0x19, "vbroadcastsd", VR256, - int_x86_avx2_vbroadcast_sd_pd_256>; + int_x86_avx2_vbroadcast_sd_pd_256>; let Predicates = [HasAVX2] in def VBROADCASTI128 : avx_broadcast<0x5A, "vbroadcasti128", VR256, i128mem, @@ -7187,19 +7140,6 @@ def VBROADCASTI128 : avx_broadcast<0x5A, "vbroadcasti128", VR256, i128mem, def : Pat<(int_x86_avx_vbroadcastf128_ps_256 addr:$src), (VBROADCASTF128 addr:$src)>; -def : Pat<(v8i32 (X86VBroadcast (loadi32 addr:$src))), - (VBROADCASTSSYrm addr:$src)>; -def : Pat<(v4i64 (X86VBroadcast (loadi64 addr:$src))), - (VBROADCASTSDrm addr:$src)>; -def : Pat<(v8f32 (X86VBroadcast (loadf32 addr:$src))), - (VBROADCASTSSYrm addr:$src)>; -def : Pat<(v4f64 (X86VBroadcast (loadf64 addr:$src))), - (VBROADCASTSDrm addr:$src)>; - -def : Pat<(v4f32 (X86VBroadcast (loadf32 addr:$src))), - (VBROADCASTSSrm addr:$src)>; -def : Pat<(v4i32 (X86VBroadcast (loadi32 addr:$src))), - (VBROADCASTSSrm addr:$src)>; //===----------------------------------------------------------------------===// // VINSERTF128 - Insert packed floating-point values @@ -7300,8 +7240,7 @@ def : Pat<(vextractf128_extract:$ext VR256:$src1, (i32 imm)), // multiclass avx_movmask_rm<bits<8> opc_rm, bits<8> opc_mr, string OpcodeStr, Intrinsic IntLd, Intrinsic IntLd256, - Intrinsic IntSt, Intrinsic IntSt256, - PatFrag pf128, PatFrag pf256> { + Intrinsic IntSt, Intrinsic IntSt256> { def rm : AVX8I<opc_rm, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, f128mem:$src2), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), @@ -7322,18 +7261,18 @@ multiclass avx_movmask_rm<bits<8> opc_rm, bits<8> opc_mr, string OpcodeStr, [(IntSt256 addr:$dst, VR256:$src1, VR256:$src2)]>, VEX_4V; } +let ExeDomain = SSEPackedSingle in defm VMASKMOVPS : avx_movmask_rm<0x2C, 0x2E, "vmaskmovps", int_x86_avx_maskload_ps, int_x86_avx_maskload_ps_256, int_x86_avx_maskstore_ps, - int_x86_avx_maskstore_ps_256, - memopv4f32, memopv8f32>; + int_x86_avx_maskstore_ps_256>; +let ExeDomain = SSEPackedDouble in defm VMASKMOVPD : avx_movmask_rm<0x2D, 0x2F, "vmaskmovpd", int_x86_avx_maskload_pd, int_x86_avx_maskload_pd_256, int_x86_avx_maskstore_pd, - int_x86_avx_maskstore_pd_256, - memopv2f64, memopv4f64>; + int_x86_avx_maskstore_pd_256>; //===----------------------------------------------------------------------===// // VPERMIL - Permute Single and Double Floating-Point Values @@ -7361,22 +7300,26 @@ multiclass avx_permil<bits<8> opc_rm, bits<8> opc_rmi, string OpcodeStr, [(set RC:$dst, (IntImm (f_frag addr:$src1), imm:$src2))]>, VEX; } -defm VPERMILPS : avx_permil<0x0C, 0x04, "vpermilps", VR128, f128mem, i128mem, - memopv4f32, memopv4i32, - int_x86_avx_vpermilvar_ps, - int_x86_avx_vpermil_ps>; -defm VPERMILPSY : avx_permil<0x0C, 0x04, "vpermilps", VR256, f256mem, i256mem, - memopv8f32, memopv8i32, - int_x86_avx_vpermilvar_ps_256, - int_x86_avx_vpermil_ps_256>; -defm VPERMILPD : avx_permil<0x0D, 0x05, "vpermilpd", VR128, f128mem, i128mem, - memopv2f64, memopv2i64, - int_x86_avx_vpermilvar_pd, - int_x86_avx_vpermil_pd>; -defm VPERMILPDY : avx_permil<0x0D, 0x05, "vpermilpd", VR256, f256mem, i256mem, - memopv4f64, memopv4i64, - int_x86_avx_vpermilvar_pd_256, - int_x86_avx_vpermil_pd_256>; +let ExeDomain = SSEPackedSingle in { + defm VPERMILPS : avx_permil<0x0C, 0x04, "vpermilps", VR128, f128mem, i128mem, + memopv4f32, memopv4i32, + int_x86_avx_vpermilvar_ps, + int_x86_avx_vpermil_ps>; + defm VPERMILPSY : avx_permil<0x0C, 0x04, "vpermilps", VR256, f256mem, i256mem, + memopv8f32, memopv8i32, + int_x86_avx_vpermilvar_ps_256, + int_x86_avx_vpermil_ps_256>; +} +let ExeDomain = SSEPackedDouble in { + defm VPERMILPD : avx_permil<0x0D, 0x05, "vpermilpd", VR128, f128mem, i128mem, + memopv2f64, memopv2i64, + int_x86_avx_vpermilvar_pd, + int_x86_avx_vpermil_pd>; + defm VPERMILPDY : avx_permil<0x0D, 0x05, "vpermilpd", VR256, f256mem, i256mem, + memopv4f64, memopv4i64, + int_x86_avx_vpermilvar_pd_256, + int_x86_avx_vpermil_pd_256>; +} def : Pat<(v8f32 (X86VPermilpsy VR256:$src1, (i8 imm:$imm))), (VPERMILPSYri VR256:$src1, imm:$imm)>; @@ -7549,6 +7492,40 @@ defm VPBROADCASTQ : avx2_broadcast<0x59, "vpbroadcastq", i64mem, loadi64, int_x86_avx2_pbroadcastq_128, int_x86_avx2_pbroadcastq_256>; +let Predicates = [HasAVX2] in { + def : Pat<(v16i8 (X86VBroadcast (loadi8 addr:$src))), + (VPBROADCASTBrm addr:$src)>; + def : Pat<(v32i8 (X86VBroadcast (loadi8 addr:$src))), + (VPBROADCASTBYrm addr:$src)>; + def : Pat<(v8i16 (X86VBroadcast (loadi16 addr:$src))), + (VPBROADCASTWrm addr:$src)>; + def : Pat<(v16i16 (X86VBroadcast (loadi16 addr:$src))), + (VPBROADCASTWYrm addr:$src)>; + def : Pat<(v4i32 (X86VBroadcast (loadi32 addr:$src))), + (VPBROADCASTDrm addr:$src)>; + def : Pat<(v8i32 (X86VBroadcast (loadi32 addr:$src))), + (VPBROADCASTDYrm addr:$src)>; + def : Pat<(v2i64 (X86VBroadcast (loadi64 addr:$src))), + (VPBROADCASTQrm addr:$src)>; + def : Pat<(v4i64 (X86VBroadcast (loadi64 addr:$src))), + (VPBROADCASTQYrm addr:$src)>; +} + +// AVX1 broadcast patterns +def : Pat<(v8i32 (X86VBroadcast (loadi32 addr:$src))), + (VBROADCASTSSYrm addr:$src)>; +def : Pat<(v4i64 (X86VBroadcast (loadi64 addr:$src))), + (VBROADCASTSDrm addr:$src)>; +def : Pat<(v8f32 (X86VBroadcast (loadf32 addr:$src))), + (VBROADCASTSSYrm addr:$src)>; +def : Pat<(v4f64 (X86VBroadcast (loadf64 addr:$src))), + (VBROADCASTSDrm addr:$src)>; + +def : Pat<(v4f32 (X86VBroadcast (loadf32 addr:$src))), + (VBROADCASTSSrm addr:$src)>; +def : Pat<(v4i32 (X86VBroadcast (loadi32 addr:$src))), + (VBROADCASTSSrm addr:$src)>; + //===----------------------------------------------------------------------===// // VPERM - Permute instructions // @@ -7569,6 +7546,7 @@ multiclass avx2_perm<bits<8> opc, string OpcodeStr, PatFrag mem_frag, } defm VPERMD : avx2_perm<0x36, "vpermd", memopv8i32, int_x86_avx2_permd>; +let ExeDomain = SSEPackedSingle in defm VPERMPS : avx2_perm<0x16, "vpermps", memopv8f32, int_x86_avx2_permps>; multiclass avx2_perm_imm<bits<8> opc, string OpcodeStr, PatFrag mem_frag, @@ -7588,6 +7566,7 @@ multiclass avx2_perm_imm<bits<8> opc, string OpcodeStr, PatFrag mem_frag, defm VPERMQ : avx2_perm_imm<0x00, "vpermq", memopv4i64, int_x86_avx2_permq>, VEX_W; +let ExeDomain = SSEPackedDouble in defm VPERMPD : avx2_perm_imm<0x01, "vpermpd", memopv4f64, int_x86_avx2_permpd>, VEX_W; @@ -7643,8 +7622,7 @@ def VEXTRACTI128mr : AVX2AIi8<0x39, MRMDestMem, (outs), // multiclass avx2_pmovmask<string OpcodeStr, Intrinsic IntLd128, Intrinsic IntLd256, - Intrinsic IntSt128, Intrinsic IntSt256, - PatFrag pf128, PatFrag pf256> { + Intrinsic IntSt128, Intrinsic IntSt256> { def rm : AVX28I<0x8c, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), @@ -7667,124 +7645,49 @@ defm VPMASKMOVD : avx2_pmovmask<"vpmaskmovd", int_x86_avx2_maskload_d, int_x86_avx2_maskload_d_256, int_x86_avx2_maskstore_d, - int_x86_avx2_maskstore_d_256, - memopv4i32, memopv8i32>; + int_x86_avx2_maskstore_d_256>; defm VPMASKMOVQ : avx2_pmovmask<"vpmaskmovq", int_x86_avx2_maskload_q, int_x86_avx2_maskload_q_256, int_x86_avx2_maskstore_q, - int_x86_avx2_maskstore_q_256, - memopv2i64, memopv4i64>, VEX_W; + int_x86_avx2_maskstore_q_256>, VEX_W; //===----------------------------------------------------------------------===// // Variable Bit Shifts // -multiclass avx2_var_shift<bits<8> opc, string OpcodeStr, - Intrinsic Int128, Intrinsic Int256> { +multiclass avx2_var_shift<bits<8> opc, string OpcodeStr, SDNode OpNode, + ValueType vt128, ValueType vt256> { def rr : AVX28I<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), - [(set VR128:$dst, (Int128 VR128:$src1, VR128:$src2))]>, VEX_4V; - def rm : AVX28I<opc, MRMSrcMem, (outs VR128:$dst), - (ins VR128:$src1, i128mem:$src2), - !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), [(set VR128:$dst, - (Int128 VR128:$src1, (bitconvert (memopv2i64 addr:$src2))))]>, - VEX_4V; - def Yrr : AVX28I<opc, MRMSrcReg, (outs VR256:$dst), - (ins VR256:$src1, VR256:$src2), - !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), - [(set VR256:$dst, (Int256 VR256:$src1, VR256:$src2))]>, VEX_4V; - def Yrm : AVX28I<opc, MRMSrcMem, (outs VR256:$dst), - (ins VR256:$src1, i256mem:$src2), - !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), - [(set VR256:$dst, - (Int256 VR256:$src1, (bitconvert (memopv4i64 addr:$src2))))]>, + (vt128 (OpNode VR128:$src1, (vt128 VR128:$src2))))]>, VEX_4V; -} - -multiclass avx2_var_shift_i64<bits<8> opc, string OpcodeStr, - Intrinsic Int128, Intrinsic Int256> { - def rr : AVX28I<opc, MRMSrcReg, (outs VR128:$dst), - (ins VR128:$src1, VR128:$src2), - !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), - [(set VR128:$dst, (Int128 VR128:$src1, VR128:$src2))]>, VEX_4V; def rm : AVX28I<opc, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), [(set VR128:$dst, - (Int128 VR128:$src1, (memopv2i64 addr:$src2)))]>, + (vt128 (OpNode VR128:$src1, + (vt128 (bitconvert (memopv2i64 addr:$src2))))))]>, VEX_4V; def Yrr : AVX28I<opc, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src1, VR256:$src2), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), - [(set VR256:$dst, (Int256 VR256:$src1, VR256:$src2))]>, VEX_4V; + [(set VR256:$dst, + (vt256 (OpNode VR256:$src1, (vt256 VR256:$src2))))]>, + VEX_4V; def Yrm : AVX28I<opc, MRMSrcMem, (outs VR256:$dst), (ins VR256:$src1, i256mem:$src2), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), [(set VR256:$dst, - (Int256 VR256:$src1, (memopv4i64 addr:$src2)))]>, + (vt256 (OpNode VR256:$src1, + (vt256 (bitconvert (memopv4i64 addr:$src2))))))]>, VEX_4V; } -defm VPSLLVD : avx2_var_shift<0x47, "vpsllvd", int_x86_avx2_psllv_d, - int_x86_avx2_psllv_d_256>; -defm VPSLLVQ : avx2_var_shift_i64<0x47, "vpsllvq", int_x86_avx2_psllv_q, - int_x86_avx2_psllv_q_256>, VEX_W; -defm VPSRLVD : avx2_var_shift<0x45, "vpsrlvd", int_x86_avx2_psrlv_d, - int_x86_avx2_psrlv_d_256>; -defm VPSRLVQ : avx2_var_shift_i64<0x45, "vpsrlvq", int_x86_avx2_psrlv_q, - int_x86_avx2_psrlv_q_256>, VEX_W; -defm VPSRAVD : avx2_var_shift<0x46, "vpsravd", int_x86_avx2_psrav_d, - int_x86_avx2_psrav_d_256>; - -let Predicates = [HasAVX2] in { - def : Pat<(v4i32 (shl (v4i32 VR128:$src1), (v4i32 VR128:$src2))), - (VPSLLVDrr VR128:$src1, VR128:$src2)>; - def : Pat<(v2i64 (shl (v2i64 VR128:$src1), (v2i64 VR128:$src2))), - (VPSLLVQrr VR128:$src1, VR128:$src2)>; - def : Pat<(v4i32 (srl (v4i32 VR128:$src1), (v4i32 VR128:$src2))), - (VPSRLVDrr VR128:$src1, VR128:$src2)>; - def : Pat<(v2i64 (srl (v2i64 VR128:$src1), (v2i64 VR128:$src2))), - (VPSRLVQrr VR128:$src1, VR128:$src2)>; - def : Pat<(v4i32 (sra (v4i32 VR128:$src1), (v4i32 VR128:$src2))), - (VPSRAVDrr VR128:$src1, VR128:$src2)>; - def : Pat<(v8i32 (shl (v8i32 VR256:$src1), (v8i32 VR256:$src2))), - (VPSLLVDYrr VR256:$src1, VR256:$src2)>; - def : Pat<(v4i64 (shl (v4i64 VR256:$src1), (v4i64 VR256:$src2))), - (VPSLLVQYrr VR256:$src1, VR256:$src2)>; - def : Pat<(v8i32 (srl (v8i32 VR256:$src1), (v8i32 VR256:$src2))), - (VPSRLVDYrr VR256:$src1, VR256:$src2)>; - def : Pat<(v4i64 (srl (v4i64 VR256:$src1), (v4i64 VR256:$src2))), - (VPSRLVQYrr VR256:$src1, VR256:$src2)>; - def : Pat<(v8i32 (sra (v8i32 VR256:$src1), (v8i32 VR256:$src2))), - (VPSRAVDYrr VR256:$src1, VR256:$src2)>; - - def : Pat<(v4i32 (shl (v4i32 VR128:$src1), - (v4i32 (bitconvert (memopv2i64 addr:$src2))))), - (VPSLLVDrm VR128:$src1, addr:$src2)>; - def : Pat<(v2i64 (shl (v2i64 VR128:$src1), (memopv2i64 addr:$src2))), - (VPSLLVQrm VR128:$src1, addr:$src2)>; - def : Pat<(v4i32 (srl (v4i32 VR128:$src1), - (v4i32 (bitconvert (memopv2i64 addr:$src2))))), - (VPSRLVDrm VR128:$src1, addr:$src2)>; - def : Pat<(v2i64 (srl (v2i64 VR128:$src1), (memopv2i64 addr:$src2))), - (VPSRLVQrm VR128:$src1, addr:$src2)>; - def : Pat<(v4i32 (sra (v4i32 VR128:$src1), - (v4i32 (bitconvert (memopv2i64 addr:$src2))))), - (VPSRAVDrm VR128:$src1, addr:$src2)>; - def : Pat<(v8i32 (shl (v8i32 VR256:$src1), - (v8i32 (bitconvert (memopv4i64 addr:$src2))))), - (VPSLLVDYrm VR256:$src1, addr:$src2)>; - def : Pat<(v4i64 (shl (v4i64 VR256:$src1), (memopv4i64 addr:$src2))), - (VPSLLVQYrm VR256:$src1, addr:$src2)>; - def : Pat<(v8i32 (srl (v8i32 VR256:$src1), - (v8i32 (bitconvert (memopv4i64 addr:$src2))))), - (VPSRLVDYrm VR256:$src1, addr:$src2)>; - def : Pat<(v4i64 (srl (v4i64 VR256:$src1), (memopv4i64 addr:$src2))), - (VPSRLVQYrm VR256:$src1, addr:$src2)>; - def : Pat<(v8i32 (sra (v8i32 VR256:$src1), - (v8i32 (bitconvert (memopv4i64 addr:$src2))))), - (VPSRAVDYrm VR256:$src1, addr:$src2)>; -} +defm VPSLLVD : avx2_var_shift<0x47, "vpsllvd", shl, v4i32, v8i32>; +defm VPSLLVQ : avx2_var_shift<0x47, "vpsllvq", shl, v2i64, v4i64>, VEX_W; +defm VPSRLVD : avx2_var_shift<0x45, "vpsrlvd", srl, v4i32, v8i32>; +defm VPSRLVQ : avx2_var_shift<0x45, "vpsrlvq", srl, v2i64, v4i64>, VEX_W; +defm VPSRAVD : avx2_var_shift<0x46, "vpsravd", sra, v4i32, v8i32>; diff --git a/lib/Target/X86/X86MCInstLower.cpp b/lib/Target/X86/X86MCInstLower.cpp index 328cf67..81ee665 100644 --- a/lib/Target/X86/X86MCInstLower.cpp +++ b/lib/Target/X86/X86MCInstLower.cpp @@ -376,6 +376,7 @@ ReSimplify: case X86::AVX_SET0PSY: LowerUnaryToTwoAddr(OutMI, X86::VXORPSYrr); break; case X86::AVX_SET0PDY: LowerUnaryToTwoAddr(OutMI, X86::VXORPDYrr); break; case X86::AVX_SETALLONES: LowerUnaryToTwoAddr(OutMI, X86::VPCMPEQDrr); break; + case X86::AVX2_SETALLONES: LowerUnaryToTwoAddr(OutMI, X86::VPCMPEQDYrr);break; case X86::MOV16r0: LowerSubReg32_Op0(OutMI, X86::MOV32r0); // MOV16r0 -> MOV32r0 diff --git a/lib/Target/X86/X86Subtarget.h b/lib/Target/X86/X86Subtarget.h index 763fb43..e93f8e9 100644 --- a/lib/Target/X86/X86Subtarget.h +++ b/lib/Target/X86/X86Subtarget.h @@ -190,6 +190,10 @@ public: bool hasAVX2() const { return HasAVX2; } bool hasXMM() const { return hasSSE1() || hasAVX(); } bool hasXMMInt() const { return hasSSE2() || hasAVX(); } + bool hasSSE3orAVX() const { return hasSSE3() || hasAVX(); } + bool hasSSSE3orAVX() const { return hasSSSE3() || hasAVX(); } + bool hasSSE41orAVX() const { return hasSSE41() || hasAVX(); } + bool hasSSE42orAVX() const { return hasSSE42() || hasAVX(); } bool hasAES() const { return HasAES; } bool hasCLMUL() const { return HasCLMUL; } bool hasFMA3() const { return HasFMA3; } diff --git a/lib/Target/X86/X86TargetMachine.cpp b/lib/Target/X86/X86TargetMachine.cpp index 4d4d7c0..1c9f3bd 100644 --- a/lib/Target/X86/X86TargetMachine.cpp +++ b/lib/Target/X86/X86TargetMachine.cpp @@ -31,8 +31,9 @@ extern "C" void LLVMInitializeX86Target() { X86_32TargetMachine::X86_32TargetMachine(const Target &T, StringRef TT, StringRef CPU, StringRef FS, - Reloc::Model RM, CodeModel::Model CM) - : X86TargetMachine(T, TT, CPU, FS, RM, CM, false), + Reloc::Model RM, CodeModel::Model CM, + CodeGenOpt::Level OL) + : X86TargetMachine(T, TT, CPU, FS, RM, CM, OL, false), DataLayout(getSubtargetImpl()->isTargetDarwin() ? "e-p:32:32-f64:32:64-i64:32:64-f80:128:128-f128:128:128-" "n8:16:32-S128" : @@ -51,8 +52,9 @@ X86_32TargetMachine::X86_32TargetMachine(const Target &T, StringRef TT, X86_64TargetMachine::X86_64TargetMachine(const Target &T, StringRef TT, StringRef CPU, StringRef FS, - Reloc::Model RM, CodeModel::Model CM) - : X86TargetMachine(T, TT, CPU, FS, RM, CM, true), + Reloc::Model RM, CodeModel::Model CM, + CodeGenOpt::Level OL) + : X86TargetMachine(T, TT, CPU, FS, RM, CM, OL, true), DataLayout("e-p:64:64-s:64-f64:64:64-i64:64:64-f80:128:128-f128:128:128-" "n8:16:32:64-S128"), InstrInfo(*this), @@ -66,8 +68,9 @@ X86_64TargetMachine::X86_64TargetMachine(const Target &T, StringRef TT, X86TargetMachine::X86TargetMachine(const Target &T, StringRef TT, StringRef CPU, StringRef FS, Reloc::Model RM, CodeModel::Model CM, + CodeGenOpt::Level OL, bool is64Bit) - : LLVMTargetMachine(T, TT, CPU, FS, RM, CM), + : LLVMTargetMachine(T, TT, CPU, FS, RM, CM, OL), Subtarget(TT, CPU, FS, StackAlignmentOverride, is64Bit), FrameLowering(*this, Subtarget), ELFWriterInfo(is64Bit, true) { @@ -102,16 +105,15 @@ X86TargetMachine::X86TargetMachine(const Target &T, StringRef TT, static cl::opt<bool> UseVZeroUpper("x86-use-vzeroupper", cl::desc("Minimize AVX to SSE transition penalty"), - cl::init(false)); + cl::init(true)); //===----------------------------------------------------------------------===// // Pass Pipeline Configuration //===----------------------------------------------------------------------===// -bool X86TargetMachine::addInstSelector(PassManagerBase &PM, - CodeGenOpt::Level OptLevel) { +bool X86TargetMachine::addInstSelector(PassManagerBase &PM) { // Install an instruction selector. - PM.add(createX86ISelDag(*this, OptLevel)); + PM.add(createX86ISelDag(*this, getOptLevel())); // For 32-bit, prepend instructions to set the "global base reg" for PIC. if (!Subtarget.is64Bit()) @@ -120,33 +122,21 @@ bool X86TargetMachine::addInstSelector(PassManagerBase &PM, return false; } -bool X86TargetMachine::addPreRegAlloc(PassManagerBase &PM, - CodeGenOpt::Level OptLevel) { +bool X86TargetMachine::addPreRegAlloc(PassManagerBase &PM) { PM.add(createX86MaxStackAlignmentHeuristicPass()); return false; // -print-machineinstr shouldn't print after this. } -bool X86TargetMachine::addPostRegAlloc(PassManagerBase &PM, - CodeGenOpt::Level OptLevel) { +bool X86TargetMachine::addPostRegAlloc(PassManagerBase &PM) { PM.add(createX86FloatingPointStackifierPass()); return true; // -print-machineinstr should print after this. } -bool X86TargetMachine::addPreEmitPass(PassManagerBase &PM, - CodeGenOpt::Level OptLevel) { +bool X86TargetMachine::addPreEmitPass(PassManagerBase &PM) { bool ShouldPrint = false; - if (OptLevel != CodeGenOpt::None) { - if (Subtarget.hasXMMInt()) { - PM.add(createExecutionDependencyFixPass(&X86::VR128RegClass)); - ShouldPrint = true; - } - if (Subtarget.hasAVX2()) { - // FIXME this should be turned on for just AVX, but the pass doesn't check - // that instructions are valid before replacing them and there are AVX2 - // integer instructions in the table. - PM.add(createExecutionDependencyFixPass(&X86::VR256RegClass)); - ShouldPrint = true; - } + if (getOptLevel() != CodeGenOpt::None && Subtarget.hasXMMInt()) { + PM.add(createExecutionDependencyFixPass(&X86::VR128RegClass)); + ShouldPrint = true; } if (Subtarget.hasAVX() && UseVZeroUpper) { @@ -158,7 +148,6 @@ bool X86TargetMachine::addPreEmitPass(PassManagerBase &PM, } bool X86TargetMachine::addCodeEmitter(PassManagerBase &PM, - CodeGenOpt::Level OptLevel, JITCodeEmitter &JCE) { PM.add(createX86JITCodeEmitterPass(*this, JCE)); diff --git a/lib/Target/X86/X86TargetMachine.h b/lib/Target/X86/X86TargetMachine.h index d1569aa..64be458 100644 --- a/lib/Target/X86/X86TargetMachine.h +++ b/lib/Target/X86/X86TargetMachine.h @@ -40,6 +40,7 @@ public: X86TargetMachine(const Target &T, StringRef TT, StringRef CPU, StringRef FS, Reloc::Model RM, CodeModel::Model CM, + CodeGenOpt::Level OL, bool is64Bit); virtual const X86InstrInfo *getInstrInfo() const { @@ -66,11 +67,11 @@ public: } // Set up the pass pipeline. - virtual bool addInstSelector(PassManagerBase &PM, CodeGenOpt::Level OptLevel); - virtual bool addPreRegAlloc(PassManagerBase &PM, CodeGenOpt::Level OptLevel); - virtual bool addPostRegAlloc(PassManagerBase &PM, CodeGenOpt::Level OptLevel); - virtual bool addPreEmitPass(PassManagerBase &PM, CodeGenOpt::Level OptLevel); - virtual bool addCodeEmitter(PassManagerBase &PM, CodeGenOpt::Level OptLevel, + virtual bool addInstSelector(PassManagerBase &PM); + virtual bool addPreRegAlloc(PassManagerBase &PM); + virtual bool addPostRegAlloc(PassManagerBase &PM); + virtual bool addPreEmitPass(PassManagerBase &PM); + virtual bool addCodeEmitter(PassManagerBase &PM, JITCodeEmitter &JCE); }; @@ -85,7 +86,8 @@ class X86_32TargetMachine : public X86TargetMachine { public: X86_32TargetMachine(const Target &T, StringRef TT, StringRef CPU, StringRef FS, - Reloc::Model RM, CodeModel::Model CM); + Reloc::Model RM, CodeModel::Model CM, + CodeGenOpt::Level OL); virtual const TargetData *getTargetData() const { return &DataLayout; } virtual const X86TargetLowering *getTargetLowering() const { return &TLInfo; @@ -112,7 +114,8 @@ class X86_64TargetMachine : public X86TargetMachine { public: X86_64TargetMachine(const Target &T, StringRef TT, StringRef CPU, StringRef FS, - Reloc::Model RM, CodeModel::Model CM); + Reloc::Model RM, CodeModel::Model CM, + CodeGenOpt::Level OL); virtual const TargetData *getTargetData() const { return &DataLayout; } virtual const X86TargetLowering *getTargetLowering() const { return &TLInfo; diff --git a/lib/Target/XCore/MCTargetDesc/XCoreMCTargetDesc.cpp b/lib/Target/XCore/MCTargetDesc/XCoreMCTargetDesc.cpp index 276e841..7d5fcce 100644 --- a/lib/Target/XCore/MCTargetDesc/XCoreMCTargetDesc.cpp +++ b/lib/Target/XCore/MCTargetDesc/XCoreMCTargetDesc.cpp @@ -61,9 +61,10 @@ static MCAsmInfo *createXCoreMCAsmInfo(const Target &T, StringRef TT) { } static MCCodeGenInfo *createXCoreMCCodeGenInfo(StringRef TT, Reloc::Model RM, - CodeModel::Model CM) { + CodeModel::Model CM, + CodeGenOpt::Level OL) { MCCodeGenInfo *X = new MCCodeGenInfo(); - X->InitMCCodeGenInfo(RM, CM); + X->InitMCCodeGenInfo(RM, CM, OL); return X; } diff --git a/lib/Target/XCore/XCoreTargetMachine.cpp b/lib/Target/XCore/XCoreTargetMachine.cpp index fdc5d35..eec3674 100644 --- a/lib/Target/XCore/XCoreTargetMachine.cpp +++ b/lib/Target/XCore/XCoreTargetMachine.cpp @@ -21,8 +21,9 @@ using namespace llvm; /// XCoreTargetMachine::XCoreTargetMachine(const Target &T, StringRef TT, StringRef CPU, StringRef FS, - Reloc::Model RM, CodeModel::Model CM) - : LLVMTargetMachine(T, TT, CPU, FS, RM, CM), + Reloc::Model RM, CodeModel::Model CM, + CodeGenOpt::Level OL) + : LLVMTargetMachine(T, TT, CPU, FS, RM, CM, OL), Subtarget(TT, CPU, FS), DataLayout("e-p:32:32:32-a0:0:32-f32:32:32-f64:32:32-i1:8:32-i8:8:32-" "i16:16:32-i32:32:32-i64:32:32-n32"), @@ -32,8 +33,7 @@ XCoreTargetMachine::XCoreTargetMachine(const Target &T, StringRef TT, TSInfo(*this) { } -bool XCoreTargetMachine::addInstSelector(PassManagerBase &PM, - CodeGenOpt::Level OptLevel) { +bool XCoreTargetMachine::addInstSelector(PassManagerBase &PM) { PM.add(createXCoreISelDag(*this)); return false; } diff --git a/lib/Target/XCore/XCoreTargetMachine.h b/lib/Target/XCore/XCoreTargetMachine.h index 83d09d6..3f2644d 100644 --- a/lib/Target/XCore/XCoreTargetMachine.h +++ b/lib/Target/XCore/XCoreTargetMachine.h @@ -34,7 +34,8 @@ class XCoreTargetMachine : public LLVMTargetMachine { public: XCoreTargetMachine(const Target &T, StringRef TT, StringRef CPU, StringRef FS, - Reloc::Model RM, CodeModel::Model CM); + Reloc::Model RM, CodeModel::Model CM, + CodeGenOpt::Level OL); virtual const XCoreInstrInfo *getInstrInfo() const { return &InstrInfo; } virtual const XCoreFrameLowering *getFrameLowering() const { @@ -55,7 +56,7 @@ public: virtual const TargetData *getTargetData() const { return &DataLayout; } // Pass Pipeline Configuration - virtual bool addInstSelector(PassManagerBase &PM, CodeGenOpt::Level OptLevel); + virtual bool addInstSelector(PassManagerBase &PM); }; } // end namespace llvm diff --git a/lib/Transforms/IPO/DeadArgumentElimination.cpp b/lib/Transforms/IPO/DeadArgumentElimination.cpp index 4bb6f7a..95aef27 100644 --- a/lib/Transforms/IPO/DeadArgumentElimination.cpp +++ b/lib/Transforms/IPO/DeadArgumentElimination.cpp @@ -74,7 +74,7 @@ namespace { std::string getDescription() const { return std::string((IsArg ? "Argument #" : "Return value #")) - + utostr(Idx) + " of function " + F->getNameStr(); + + utostr(Idx) + " of function " + F->getName().str(); } }; diff --git a/lib/Transforms/InstCombine/InstCombineCalls.cpp b/lib/Transforms/InstCombine/InstCombineCalls.cpp index c7b3ff8..e8136ab 100644 --- a/lib/Transforms/InstCombine/InstCombineCalls.cpp +++ b/lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -760,7 +760,7 @@ static bool isSafeToEliminateVarargsCast(const CallSite CS, // The size of ByVal arguments is derived from the type, so we // can't change to a type with a different size. If the size were // passed explicitly we could avoid this check. - if (!CS.paramHasAttr(ix, Attribute::ByVal)) + if (!CS.isByValArgument(ix)) return true; Type* SrcTy = @@ -960,7 +960,7 @@ Instruction *InstCombiner::visitCallSite(CallSite CS) { PointerType *PTy = cast<PointerType>(Callee->getType()); FunctionType *FTy = cast<FunctionType>(PTy->getElementType()); if (FTy->isVarArg()) { - int ix = FTy->getNumParams() + (isa<InvokeInst>(Callee) ? 3 : 1); + int ix = FTy->getNumParams() + (isa<InvokeInst>(Callee) ? 2 : 0); // See if we can optimize any arguments passed through the varargs area of // the call. for (CallSite::arg_iterator I = CS.arg_begin()+FTy->getNumParams(), diff --git a/lib/Transforms/InstCombine/InstructionCombining.cpp b/lib/Transforms/InstCombine/InstructionCombining.cpp index 0cc969b..a7a6311 100644 --- a/lib/Transforms/InstCombine/InstructionCombining.cpp +++ b/lib/Transforms/InstCombine/InstructionCombining.cpp @@ -1902,7 +1902,7 @@ bool InstCombiner::DoOneIteration(Function &F, unsigned Iteration) { MadeIRChange = false; DEBUG(errs() << "\n\nINSTCOMBINE ITERATION #" << Iteration << " on " - << F.getNameStr() << "\n"); + << F.getName() << "\n"); { // Do a depth-first traversal of the function, populate the worklist with diff --git a/lib/Transforms/Instrumentation/AddressSanitizer.cpp b/lib/Transforms/Instrumentation/AddressSanitizer.cpp new file mode 100644 index 0000000..b617539 --- /dev/null +++ b/lib/Transforms/Instrumentation/AddressSanitizer.cpp @@ -0,0 +1,987 @@ +//===-- AddressSanitizer.cpp - memory error detector ------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file is a part of AddressSanitizer, an address sanity checker. +// Details of the algorithm: +// http://code.google.com/p/address-sanitizer/wiki/AddressSanitizerAlgorithm +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "asan" + +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/OwningPtr.h" +#include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/Function.h" +#include "llvm/InlineAsm.h" +#include "llvm/IntrinsicInst.h" +#include "llvm/LLVMContext.h" +#include "llvm/Module.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/DataTypes.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/IRBuilder.h" +#include "llvm/Support/MemoryBuffer.h" +#include "llvm/Support/Regex.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Support/system_error.h" +#include "llvm/Target/TargetData.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Transforms/Instrumentation.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" +#include "llvm/Transforms/Utils/ModuleUtils.h" +#include "llvm/Type.h" + +#include <string> +#include <algorithm> + +using namespace llvm; + +static const uint64_t kDefaultShadowScale = 3; +static const uint64_t kDefaultShadowOffset32 = 1ULL << 29; +static const uint64_t kDefaultShadowOffset64 = 1ULL << 44; + +static const size_t kMaxStackMallocSize = 1 << 16; // 64K +static const uintptr_t kCurrentStackFrameMagic = 0x41B58AB3; +static const uintptr_t kRetiredStackFrameMagic = 0x45E0360E; + +static const char *kAsanModuleCtorName = "asan.module_ctor"; +static const char *kAsanReportErrorTemplate = "__asan_report_"; +static const char *kAsanRegisterGlobalsName = "__asan_register_globals"; +static const char *kAsanInitName = "__asan_init"; +static const char *kAsanMappingOffsetName = "__asan_mapping_offset"; +static const char *kAsanMappingScaleName = "__asan_mapping_scale"; +static const char *kAsanStackMallocName = "__asan_stack_malloc"; +static const char *kAsanStackFreeName = "__asan_stack_free"; + +static const int kAsanStackLeftRedzoneMagic = 0xf1; +static const int kAsanStackMidRedzoneMagic = 0xf2; +static const int kAsanStackRightRedzoneMagic = 0xf3; +static const int kAsanStackPartialRedzoneMagic = 0xf4; + +// Command-line flags. + +// This flag may need to be replaced with -f[no-]asan-reads. +static cl::opt<bool> ClInstrumentReads("asan-instrument-reads", + cl::desc("instrument read instructions"), cl::Hidden, cl::init(true)); +static cl::opt<bool> ClInstrumentWrites("asan-instrument-writes", + cl::desc("instrument write instructions"), cl::Hidden, cl::init(true)); +// This flag may need to be replaced with -f[no]asan-stack. +static cl::opt<bool> ClStack("asan-stack", + cl::desc("Handle stack memory"), cl::Hidden, cl::init(true)); +// This flag may need to be replaced with -f[no]asan-use-after-return. +static cl::opt<bool> ClUseAfterReturn("asan-use-after-return", + cl::desc("Check return-after-free"), cl::Hidden, cl::init(false)); +// This flag may need to be replaced with -f[no]asan-globals. +static cl::opt<bool> ClGlobals("asan-globals", + cl::desc("Handle global objects"), cl::Hidden, cl::init(true)); +static cl::opt<bool> ClMemIntrin("asan-memintrin", + cl::desc("Handle memset/memcpy/memmove"), cl::Hidden, cl::init(true)); +// This flag may need to be replaced with -fasan-blacklist. +static cl::opt<std::string> ClBlackListFile("asan-blacklist", + cl::desc("File containing the list of functions to ignore " + "during instrumentation"), cl::Hidden); +static cl::opt<bool> ClUseCall("asan-use-call", + cl::desc("Use function call to generate a crash"), cl::Hidden, + cl::init(true)); + +// These flags allow to change the shadow mapping. +// The shadow mapping looks like +// Shadow = (Mem >> scale) + (1 << offset_log) +static cl::opt<int> ClMappingScale("asan-mapping-scale", + cl::desc("scale of asan shadow mapping"), cl::Hidden, cl::init(0)); +static cl::opt<int> ClMappingOffsetLog("asan-mapping-offset-log", + cl::desc("offset of asan shadow mapping"), cl::Hidden, cl::init(-1)); + +// Optimization flags. Not user visible, used mostly for testing +// and benchmarking the tool. +static cl::opt<bool> ClOpt("asan-opt", + cl::desc("Optimize instrumentation"), cl::Hidden, cl::init(true)); +static cl::opt<bool> ClOptSameTemp("asan-opt-same-temp", + cl::desc("Instrument the same temp just once"), cl::Hidden, + cl::init(true)); +static cl::opt<bool> ClOptGlobals("asan-opt-globals", + cl::desc("Don't instrument scalar globals"), cl::Hidden, cl::init(true)); + +// Debug flags. +static cl::opt<int> ClDebug("asan-debug", cl::desc("debug"), cl::Hidden, + cl::init(0)); +static cl::opt<int> ClDebugStack("asan-debug-stack", cl::desc("debug stack"), + cl::Hidden, cl::init(0)); +static cl::opt<std::string> ClDebugFunc("asan-debug-func", + cl::Hidden, cl::desc("Debug func")); +static cl::opt<int> ClDebugMin("asan-debug-min", cl::desc("Debug min inst"), + cl::Hidden, cl::init(-1)); +static cl::opt<int> ClDebugMax("asan-debug-max", cl::desc("Debug man inst"), + cl::Hidden, cl::init(-1)); + +namespace { + +// Blacklisted functions are not instrumented. +// The blacklist file contains one or more lines like this: +// --- +// fun:FunctionWildCard +// --- +// This is similar to the "ignore" feature of ThreadSanitizer. +// http://code.google.com/p/data-race-test/wiki/ThreadSanitizerIgnores +class BlackList { + public: + BlackList(const std::string &Path); + bool isIn(const Function &F); + private: + Regex *Functions; +}; + +/// AddressSanitizer: instrument the code in module to find memory bugs. +struct AddressSanitizer : public ModulePass { + AddressSanitizer(); + void instrumentMop(Instruction *I); + void instrumentAddress(Instruction *OrigIns, IRBuilder<> &IRB, + Value *Addr, uint32_t TypeSize, bool IsWrite); + Instruction *generateCrashCode(IRBuilder<> &IRB, Value *Addr, + bool IsWrite, uint32_t TypeSize); + bool instrumentMemIntrinsic(MemIntrinsic *MI); + void instrumentMemIntrinsicParam(Instruction *OrigIns, Value *Addr, + Value *Size, + Instruction *InsertBefore, bool IsWrite); + Value *memToShadow(Value *Shadow, IRBuilder<> &IRB); + bool handleFunction(Module &M, Function &F); + bool poisonStackInFunction(Module &M, Function &F); + virtual bool runOnModule(Module &M); + bool insertGlobalRedzones(Module &M); + BranchInst *splitBlockAndInsertIfThen(Instruction *SplitBefore, Value *Cmp); + static char ID; // Pass identification, replacement for typeid + + private: + + uint64_t getAllocaSizeInBytes(AllocaInst *AI) { + Type *Ty = AI->getAllocatedType(); + uint64_t SizeInBytes = TD->getTypeStoreSizeInBits(Ty) / 8; + return SizeInBytes; + } + uint64_t getAlignedSize(uint64_t SizeInBytes) { + return ((SizeInBytes + RedzoneSize - 1) + / RedzoneSize) * RedzoneSize; + } + uint64_t getAlignedAllocaSize(AllocaInst *AI) { + uint64_t SizeInBytes = getAllocaSizeInBytes(AI); + return getAlignedSize(SizeInBytes); + } + + void PoisonStack(const ArrayRef<AllocaInst*> &AllocaVec, IRBuilder<> IRB, + Value *ShadowBase, bool DoPoison); + bool LooksLikeCodeInBug11395(Instruction *I); + + Module *CurrentModule; + LLVMContext *C; + TargetData *TD; + uint64_t MappingOffset; + int MappingScale; + size_t RedzoneSize; + int LongSize; + Type *IntptrTy; + Type *IntptrPtrTy; + Function *AsanCtorFunction; + Function *AsanInitFunction; + Instruction *CtorInsertBefore; + OwningPtr<BlackList> BL; +}; +} // namespace + +char AddressSanitizer::ID = 0; +INITIALIZE_PASS(AddressSanitizer, "asan", + "AddressSanitizer: detects use-after-free and out-of-bounds bugs.", + false, false) +AddressSanitizer::AddressSanitizer() : ModulePass(ID) { } +ModulePass *llvm::createAddressSanitizerPass() { + return new AddressSanitizer(); +} + +// Create a constant for Str so that we can pass it to the run-time lib. +static GlobalVariable *createPrivateGlobalForString(Module &M, StringRef Str) { + Constant *StrConst = ConstantArray::get(M.getContext(), Str); + return new GlobalVariable(M, StrConst->getType(), true, + GlobalValue::PrivateLinkage, StrConst, ""); +} + +// Split the basic block and insert an if-then code. +// Before: +// Head +// SplitBefore +// Tail +// After: +// Head +// if (Cmp) +// NewBasicBlock +// SplitBefore +// Tail +// +// Returns the NewBasicBlock's terminator. +BranchInst *AddressSanitizer::splitBlockAndInsertIfThen( + Instruction *SplitBefore, Value *Cmp) { + BasicBlock *Head = SplitBefore->getParent(); + BasicBlock *Tail = Head->splitBasicBlock(SplitBefore); + TerminatorInst *HeadOldTerm = Head->getTerminator(); + BasicBlock *NewBasicBlock = + BasicBlock::Create(*C, "", Head->getParent()); + BranchInst *HeadNewTerm = BranchInst::Create(/*ifTrue*/NewBasicBlock, + /*ifFalse*/Tail, + Cmp); + ReplaceInstWithInst(HeadOldTerm, HeadNewTerm); + + BranchInst *CheckTerm = BranchInst::Create(Tail, NewBasicBlock); + return CheckTerm; +} + +Value *AddressSanitizer::memToShadow(Value *Shadow, IRBuilder<> &IRB) { + // Shadow >> scale + Shadow = IRB.CreateLShr(Shadow, MappingScale); + if (MappingOffset == 0) + return Shadow; + // (Shadow >> scale) | offset + return IRB.CreateOr(Shadow, ConstantInt::get(IntptrTy, + MappingOffset)); +} + +void AddressSanitizer::instrumentMemIntrinsicParam(Instruction *OrigIns, + Value *Addr, Value *Size, Instruction *InsertBefore, bool IsWrite) { + // Check the first byte. + { + IRBuilder<> IRB(InsertBefore); + instrumentAddress(OrigIns, IRB, Addr, 8, IsWrite); + } + // Check the last byte. + { + IRBuilder<> IRB(InsertBefore); + Value *SizeMinusOne = IRB.CreateSub( + Size, ConstantInt::get(Size->getType(), 1)); + SizeMinusOne = IRB.CreateIntCast(SizeMinusOne, IntptrTy, false); + Value *AddrLong = IRB.CreatePointerCast(Addr, IntptrTy); + Value *AddrPlusSizeMinisOne = IRB.CreateAdd(AddrLong, SizeMinusOne); + instrumentAddress(OrigIns, IRB, AddrPlusSizeMinisOne, 8, IsWrite); + } +} + +// Instrument memset/memmove/memcpy +bool AddressSanitizer::instrumentMemIntrinsic(MemIntrinsic *MI) { + Value *Dst = MI->getDest(); + MemTransferInst *MemTran = dyn_cast<MemTransferInst>(MI); + Value *Src = MemTran ? MemTran->getSource() : NULL; + Value *Length = MI->getLength(); + + Constant *ConstLength = dyn_cast<Constant>(Length); + Instruction *InsertBefore = MI; + if (ConstLength) { + if (ConstLength->isNullValue()) return false; + } else { + // The size is not a constant so it could be zero -- check at run-time. + IRBuilder<> IRB(InsertBefore); + + Value *Cmp = IRB.CreateICmpNE(Length, + Constant::getNullValue(Length->getType())); + InsertBefore = splitBlockAndInsertIfThen(InsertBefore, Cmp); + } + + instrumentMemIntrinsicParam(MI, Dst, Length, InsertBefore, true); + if (Src) + instrumentMemIntrinsicParam(MI, Src, Length, InsertBefore, false); + return true; +} + +static Value *getLDSTOperand(Instruction *I) { + if (LoadInst *LI = dyn_cast<LoadInst>(I)) { + return LI->getPointerOperand(); + } + return cast<StoreInst>(*I).getPointerOperand(); +} + +void AddressSanitizer::instrumentMop(Instruction *I) { + int IsWrite = isa<StoreInst>(*I); + Value *Addr = getLDSTOperand(I); + if (ClOpt && ClOptGlobals && isa<GlobalVariable>(Addr)) { + // We are accessing a global scalar variable. Nothing to catch here. + return; + } + Type *OrigPtrTy = Addr->getType(); + Type *OrigTy = cast<PointerType>(OrigPtrTy)->getElementType(); + + assert(OrigTy->isSized()); + uint32_t TypeSize = TD->getTypeStoreSizeInBits(OrigTy); + + if (TypeSize != 8 && TypeSize != 16 && + TypeSize != 32 && TypeSize != 64 && TypeSize != 128) { + // Ignore all unusual sizes. + return; + } + + IRBuilder<> IRB(I); + instrumentAddress(I, IRB, Addr, TypeSize, IsWrite); +} + +Instruction *AddressSanitizer::generateCrashCode( + IRBuilder<> &IRB, Value *Addr, bool IsWrite, uint32_t TypeSize) { + + if (ClUseCall) { + // Here we use a call instead of arch-specific asm to report an error. + // This is almost always slower (because the codegen needs to generate + // prologue/epilogue for otherwise leaf functions) and generates more code. + // This mode could be useful if we can not use SIGILL for some reason. + // + // IsWrite and TypeSize are encoded in the function name. + std::string FunctionName = std::string(kAsanReportErrorTemplate) + + (IsWrite ? "store" : "load") + itostr(TypeSize / 8); + Value *ReportWarningFunc = CurrentModule->getOrInsertFunction( + FunctionName, IRB.getVoidTy(), IntptrTy, NULL); + CallInst *Call = IRB.CreateCall(ReportWarningFunc, Addr); + Call->setDoesNotReturn(); + return Call; + } + + uint32_t LogOfSizeInBytes = CountTrailingZeros_32(TypeSize / 8); + assert(8U * (1 << LogOfSizeInBytes) == TypeSize); + uint8_t TelltaleValue = IsWrite * 8 + LogOfSizeInBytes; + assert(TelltaleValue < 16); + + // Move the failing address to %rax/%eax + FunctionType *Fn1Ty = FunctionType::get( + IRB.getVoidTy(), ArrayRef<Type*>(IntptrTy), false); + const char *MovStr = LongSize == 32 + ? "mov $0, %eax" : "mov $0, %rax"; + Value *AsmMov = InlineAsm::get( + Fn1Ty, StringRef(MovStr), StringRef("r"), true); + IRB.CreateCall(AsmMov, Addr); + + // crash with ud2; could use int3, but it is less friendly to gdb. + // after ud2 put a 1-byte instruction that encodes the access type and size. + + const char *TelltaleInsns[16] = { + "push %eax", // 0x50 + "push %ecx", // 0x51 + "push %edx", // 0x52 + "push %ebx", // 0x53 + "push %esp", // 0x54 + "push %ebp", // 0x55 + "push %esi", // 0x56 + "push %edi", // 0x57 + "pop %eax", // 0x58 + "pop %ecx", // 0x59 + "pop %edx", // 0x5a + "pop %ebx", // 0x5b + "pop %esp", // 0x5c + "pop %ebp", // 0x5d + "pop %esi", // 0x5e + "pop %edi" // 0x5f + }; + + std::string AsmStr = "ud2;"; + AsmStr += TelltaleInsns[TelltaleValue]; + Value *MyAsm = InlineAsm::get(FunctionType::get(Type::getVoidTy(*C), false), + StringRef(AsmStr), StringRef(""), true); + CallInst *AsmCall = IRB.CreateCall(MyAsm); + + // This saves us one jump, but triggers a bug in RA (or somewhere else): + // while building 483.xalancbmk the compiler goes into infinite loop in + // llvm::SpillPlacement::iterate() / RAGreedy::growRegion + // AsmCall->setDoesNotReturn(); + return AsmCall; +} + +void AddressSanitizer::instrumentAddress(Instruction *OrigIns, + IRBuilder<> &IRB, Value *Addr, + uint32_t TypeSize, bool IsWrite) { + Value *AddrLong = IRB.CreatePointerCast(Addr, IntptrTy); + + Type *ShadowTy = IntegerType::get( + *C, std::max(8U, TypeSize >> MappingScale)); + Type *ShadowPtrTy = PointerType::get(ShadowTy, 0); + Value *ShadowPtr = memToShadow(AddrLong, IRB); + Value *CmpVal = Constant::getNullValue(ShadowTy); + Value *ShadowValue = IRB.CreateLoad( + IRB.CreateIntToPtr(ShadowPtr, ShadowPtrTy)); + + Value *Cmp = IRB.CreateICmpNE(ShadowValue, CmpVal); + + Instruction *CheckTerm = splitBlockAndInsertIfThen( + cast<Instruction>(Cmp)->getNextNode(), Cmp); + IRBuilder<> IRB2(CheckTerm); + + size_t Granularity = 1 << MappingScale; + if (TypeSize < 8 * Granularity) { + // Addr & (Granularity - 1) + Value *Lower3Bits = IRB2.CreateAnd( + AddrLong, ConstantInt::get(IntptrTy, Granularity - 1)); + // (Addr & (Granularity - 1)) + size - 1 + Value *LastAccessedByte = IRB2.CreateAdd( + Lower3Bits, ConstantInt::get(IntptrTy, TypeSize / 8 - 1)); + // (uint8_t) ((Addr & (Granularity-1)) + size - 1) + LastAccessedByte = IRB2.CreateIntCast( + LastAccessedByte, IRB.getInt8Ty(), false); + // ((uint8_t) ((Addr & (Granularity-1)) + size - 1)) >= ShadowValue + Value *Cmp2 = IRB2.CreateICmpSGE(LastAccessedByte, ShadowValue); + + CheckTerm = splitBlockAndInsertIfThen(CheckTerm, Cmp2); + } + + IRBuilder<> IRB1(CheckTerm); + Instruction *Crash = generateCrashCode(IRB1, AddrLong, IsWrite, TypeSize); + Crash->setDebugLoc(OrigIns->getDebugLoc()); +} + +// This function replaces all global variables with new variables that have +// trailing redzones. It also creates a function that poisons +// redzones and inserts this function into llvm.global_ctors. +bool AddressSanitizer::insertGlobalRedzones(Module &M) { + SmallVector<GlobalVariable *, 16> GlobalsToChange; + + for (Module::GlobalListType::iterator G = M.getGlobalList().begin(), + E = M.getGlobalList().end(); G != E; ++G) { + Type *Ty = cast<PointerType>(G->getType())->getElementType(); + DEBUG(dbgs() << "GLOBAL: " << *G); + + if (!Ty->isSized()) continue; + if (!G->hasInitializer()) continue; + // Touch only those globals that will not be defined in other modules. + // Don't handle ODR type linkages since other modules may be built w/o asan. + if (G->getLinkage() != GlobalVariable::ExternalLinkage && + G->getLinkage() != GlobalVariable::PrivateLinkage && + G->getLinkage() != GlobalVariable::InternalLinkage) + continue; + // Two problems with thread-locals: + // - The address of the main thread's copy can't be computed at link-time. + // - Need to poison all copies, not just the main thread's one. + if (G->isThreadLocal()) + continue; + // For now, just ignore this Alloca if the alignment is large. + if (G->getAlignment() > RedzoneSize) continue; + + // Ignore all the globals with the names starting with "\01L_OBJC_". + // Many of those are put into the .cstring section. The linker compresses + // that section by removing the spare \0s after the string terminator, so + // our redzones get broken. + if ((G->getName().find("\01L_OBJC_") == 0) || + (G->getName().find("\01l_OBJC_") == 0)) { + DEBUG(dbgs() << "Ignoring \\01L_OBJC_* global: " << *G); + continue; + } + + // Ignore the globals from the __OBJC section. The ObjC runtime assumes + // those conform to /usr/lib/objc/runtime.h, so we can't add redzones to + // them. + if (G->hasSection()) { + StringRef Section(G->getSection()); + if ((Section.find("__OBJC,") == 0) || + (Section.find("__DATA, __objc_") == 0)) { + DEBUG(dbgs() << "Ignoring ObjC runtime global: " << *G); + continue; + } + } + + GlobalsToChange.push_back(G); + } + + size_t n = GlobalsToChange.size(); + if (n == 0) return false; + + // A global is described by a structure + // size_t beg; + // size_t size; + // size_t size_with_redzone; + // const char *name; + // We initialize an array of such structures and pass it to a run-time call. + StructType *GlobalStructTy = StructType::get(IntptrTy, IntptrTy, + IntptrTy, IntptrTy, NULL); + SmallVector<Constant *, 16> Initializers(n); + + IRBuilder<> IRB(CtorInsertBefore); + + for (size_t i = 0; i < n; i++) { + GlobalVariable *G = GlobalsToChange[i]; + PointerType *PtrTy = cast<PointerType>(G->getType()); + Type *Ty = PtrTy->getElementType(); + uint64_t SizeInBytes = TD->getTypeStoreSizeInBits(Ty) / 8; + uint64_t RightRedzoneSize = RedzoneSize + + (RedzoneSize - (SizeInBytes % RedzoneSize)); + Type *RightRedZoneTy = ArrayType::get(IRB.getInt8Ty(), RightRedzoneSize); + + StructType *NewTy = StructType::get(Ty, RightRedZoneTy, NULL); + Constant *NewInitializer = ConstantStruct::get( + NewTy, G->getInitializer(), + Constant::getNullValue(RightRedZoneTy), NULL); + + GlobalVariable *Name = createPrivateGlobalForString(M, G->getName()); + + // Create a new global variable with enough space for a redzone. + GlobalVariable *NewGlobal = new GlobalVariable( + M, NewTy, G->isConstant(), G->getLinkage(), + NewInitializer, "", G, G->isThreadLocal()); + NewGlobal->copyAttributesFrom(G); + NewGlobal->setAlignment(RedzoneSize); + + Value *Indices2[2]; + Indices2[0] = IRB.getInt32(0); + Indices2[1] = IRB.getInt32(0); + + G->replaceAllUsesWith( + ConstantExpr::getGetElementPtr(NewGlobal, Indices2, 2)); + NewGlobal->takeName(G); + G->eraseFromParent(); + + Initializers[i] = ConstantStruct::get( + GlobalStructTy, + ConstantExpr::getPointerCast(NewGlobal, IntptrTy), + ConstantInt::get(IntptrTy, SizeInBytes), + ConstantInt::get(IntptrTy, SizeInBytes + RightRedzoneSize), + ConstantExpr::getPointerCast(Name, IntptrTy), + NULL); + DEBUG(dbgs() << "NEW GLOBAL:\n" << *NewGlobal); + } + + ArrayType *ArrayOfGlobalStructTy = ArrayType::get(GlobalStructTy, n); + GlobalVariable *AllGlobals = new GlobalVariable( + M, ArrayOfGlobalStructTy, false, GlobalVariable::PrivateLinkage, + ConstantArray::get(ArrayOfGlobalStructTy, Initializers), ""); + + Function *AsanRegisterGlobals = cast<Function>(M.getOrInsertFunction( + kAsanRegisterGlobalsName, IRB.getVoidTy(), IntptrTy, IntptrTy, NULL)); + AsanRegisterGlobals->setLinkage(Function::ExternalLinkage); + + IRB.CreateCall2(AsanRegisterGlobals, + IRB.CreatePointerCast(AllGlobals, IntptrTy), + ConstantInt::get(IntptrTy, n)); + + DEBUG(dbgs() << M); + return true; +} + +// virtual +bool AddressSanitizer::runOnModule(Module &M) { + // Initialize the private fields. No one has accessed them before. + TD = getAnalysisIfAvailable<TargetData>(); + if (!TD) + return false; + BL.reset(new BlackList(ClBlackListFile)); + + CurrentModule = &M; + C = &(M.getContext()); + LongSize = TD->getPointerSizeInBits(); + IntptrTy = Type::getIntNTy(*C, LongSize); + IntptrPtrTy = PointerType::get(IntptrTy, 0); + + AsanCtorFunction = Function::Create( + FunctionType::get(Type::getVoidTy(*C), false), + GlobalValue::InternalLinkage, kAsanModuleCtorName, &M); + BasicBlock *AsanCtorBB = BasicBlock::Create(*C, "", AsanCtorFunction); + CtorInsertBefore = ReturnInst::Create(*C, AsanCtorBB); + + // call __asan_init in the module ctor. + IRBuilder<> IRB(CtorInsertBefore); + AsanInitFunction = cast<Function>( + M.getOrInsertFunction(kAsanInitName, IRB.getVoidTy(), NULL)); + AsanInitFunction->setLinkage(Function::ExternalLinkage); + IRB.CreateCall(AsanInitFunction); + + MappingOffset = LongSize == 32 + ? kDefaultShadowOffset32 : kDefaultShadowOffset64; + if (ClMappingOffsetLog >= 0) { + if (ClMappingOffsetLog == 0) { + // special case + MappingOffset = 0; + } else { + MappingOffset = 1ULL << ClMappingOffsetLog; + } + } + MappingScale = kDefaultShadowScale; + if (ClMappingScale) { + MappingScale = ClMappingScale; + } + // Redzone used for stack and globals is at least 32 bytes. + // For scales 6 and 7, the redzone has to be 64 and 128 bytes respectively. + RedzoneSize = std::max(32, (int)(1 << MappingScale)); + + bool Res = false; + + if (ClGlobals) + Res |= insertGlobalRedzones(M); + + // Tell the run-time the current values of mapping offset and scale. + GlobalValue *asan_mapping_offset = + new GlobalVariable(M, IntptrTy, true, GlobalValue::LinkOnceODRLinkage, + ConstantInt::get(IntptrTy, MappingOffset), + kAsanMappingOffsetName); + GlobalValue *asan_mapping_scale = + new GlobalVariable(M, IntptrTy, true, GlobalValue::LinkOnceODRLinkage, + ConstantInt::get(IntptrTy, MappingScale), + kAsanMappingScaleName); + // Read these globals, otherwise they may be optimized away. + IRB.CreateLoad(asan_mapping_scale, true); + IRB.CreateLoad(asan_mapping_offset, true); + + + for (Module::iterator F = M.begin(), E = M.end(); F != E; ++F) { + if (F->isDeclaration()) continue; + Res |= handleFunction(M, *F); + } + + appendToGlobalCtors(M, AsanCtorFunction, 1 /*high priority*/); + + return Res; +} + +bool AddressSanitizer::handleFunction(Module &M, Function &F) { + if (BL->isIn(F)) return false; + if (&F == AsanCtorFunction) return false; + + if (!ClDebugFunc.empty() && ClDebugFunc != F.getName()) + return false; + // We want to instrument every address only once per basic block + // (unless there are calls between uses). + SmallSet<Value*, 16> TempsToInstrument; + SmallVector<Instruction*, 16> ToInstrument; + + // Fill the set of memory operations to instrument. + for (Function::iterator FI = F.begin(), FE = F.end(); + FI != FE; ++FI) { + TempsToInstrument.clear(); + for (BasicBlock::iterator BI = FI->begin(), BE = FI->end(); + BI != BE; ++BI) { + if ((isa<LoadInst>(BI) && ClInstrumentReads) || + (isa<StoreInst>(BI) && ClInstrumentWrites)) { + Value *Addr = getLDSTOperand(BI); + if (ClOpt && ClOptSameTemp) { + if (!TempsToInstrument.insert(Addr)) + continue; // We've seen this temp in the current BB. + } + } else if (isa<MemIntrinsic>(BI) && ClMemIntrin) { + // ok, take it. + } else { + if (isa<CallInst>(BI)) { + // A call inside BB. + TempsToInstrument.clear(); + } + continue; + } + ToInstrument.push_back(BI); + } + } + + // Instrument. + int NumInstrumented = 0; + for (size_t i = 0, n = ToInstrument.size(); i != n; i++) { + Instruction *Inst = ToInstrument[i]; + if (ClDebugMin < 0 || ClDebugMax < 0 || + (NumInstrumented >= ClDebugMin && NumInstrumented <= ClDebugMax)) { + if (isa<StoreInst>(Inst) || isa<LoadInst>(Inst)) + instrumentMop(Inst); + else + instrumentMemIntrinsic(cast<MemIntrinsic>(Inst)); + } + NumInstrumented++; + } + + DEBUG(dbgs() << F); + + bool ChangedStack = poisonStackInFunction(M, F); + + // For each NSObject descendant having a +load method, this method is invoked + // by the ObjC runtime before any of the static constructors is called. + // Therefore we need to instrument such methods with a call to __asan_init + // at the beginning in order to initialize our runtime before any access to + // the shadow memory. + // We cannot just ignore these methods, because they may call other + // instrumented functions. + if (F.getName().find(" load]") != std::string::npos) { + IRBuilder<> IRB(F.begin()->begin()); + IRB.CreateCall(AsanInitFunction); + } + + return NumInstrumented > 0 || ChangedStack; +} + +static uint64_t ValueForPoison(uint64_t PoisonByte, size_t ShadowRedzoneSize) { + if (ShadowRedzoneSize == 1) return PoisonByte; + if (ShadowRedzoneSize == 2) return (PoisonByte << 8) + PoisonByte; + if (ShadowRedzoneSize == 4) + return (PoisonByte << 24) + (PoisonByte << 16) + + (PoisonByte << 8) + (PoisonByte); + assert(0 && "ShadowRedzoneSize is either 1, 2 or 4"); + return 0; +} + +static void PoisonShadowPartialRightRedzone(uint8_t *Shadow, + size_t Size, + size_t RedzoneSize, + size_t ShadowGranularity, + uint8_t Magic) { + for (size_t i = 0; i < RedzoneSize; + i+= ShadowGranularity, Shadow++) { + if (i + ShadowGranularity <= Size) { + *Shadow = 0; // fully addressable + } else if (i >= Size) { + *Shadow = Magic; // unaddressable + } else { + *Shadow = Size - i; // first Size-i bytes are addressable + } + } +} + +void AddressSanitizer::PoisonStack(const ArrayRef<AllocaInst*> &AllocaVec, + IRBuilder<> IRB, + Value *ShadowBase, bool DoPoison) { + size_t ShadowRZSize = RedzoneSize >> MappingScale; + assert(ShadowRZSize >= 1 && ShadowRZSize <= 4); + Type *RZTy = Type::getIntNTy(*C, ShadowRZSize * 8); + Type *RZPtrTy = PointerType::get(RZTy, 0); + + Value *PoisonLeft = ConstantInt::get(RZTy, + ValueForPoison(DoPoison ? kAsanStackLeftRedzoneMagic : 0LL, ShadowRZSize)); + Value *PoisonMid = ConstantInt::get(RZTy, + ValueForPoison(DoPoison ? kAsanStackMidRedzoneMagic : 0LL, ShadowRZSize)); + Value *PoisonRight = ConstantInt::get(RZTy, + ValueForPoison(DoPoison ? kAsanStackRightRedzoneMagic : 0LL, ShadowRZSize)); + + // poison the first red zone. + IRB.CreateStore(PoisonLeft, IRB.CreateIntToPtr(ShadowBase, RZPtrTy)); + + // poison all other red zones. + uint64_t Pos = RedzoneSize; + for (size_t i = 0, n = AllocaVec.size(); i < n; i++) { + AllocaInst *AI = AllocaVec[i]; + uint64_t SizeInBytes = getAllocaSizeInBytes(AI); + uint64_t AlignedSize = getAlignedAllocaSize(AI); + assert(AlignedSize - SizeInBytes < RedzoneSize); + Value *Ptr = NULL; + + Pos += AlignedSize; + + assert(ShadowBase->getType() == IntptrTy); + if (SizeInBytes < AlignedSize) { + // Poison the partial redzone at right + Ptr = IRB.CreateAdd( + ShadowBase, ConstantInt::get(IntptrTy, + (Pos >> MappingScale) - ShadowRZSize)); + size_t AddressableBytes = RedzoneSize - (AlignedSize - SizeInBytes); + uint32_t Poison = 0; + if (DoPoison) { + PoisonShadowPartialRightRedzone((uint8_t*)&Poison, AddressableBytes, + RedzoneSize, + 1ULL << MappingScale, + kAsanStackPartialRedzoneMagic); + } + Value *PartialPoison = ConstantInt::get(RZTy, Poison); + IRB.CreateStore(PartialPoison, IRB.CreateIntToPtr(Ptr, RZPtrTy)); + } + + // Poison the full redzone at right. + Ptr = IRB.CreateAdd(ShadowBase, + ConstantInt::get(IntptrTy, Pos >> MappingScale)); + Value *Poison = i == AllocaVec.size() - 1 ? PoisonRight : PoisonMid; + IRB.CreateStore(Poison, IRB.CreateIntToPtr(Ptr, RZPtrTy)); + + Pos += RedzoneSize; + } +} + +// Workaround for bug 11395: we don't want to instrument stack in functions +// with large assembly blobs (32-bit only), otherwise reg alloc may crash. +// FIXME: remove once the bug 11395 is fixed. +bool AddressSanitizer::LooksLikeCodeInBug11395(Instruction *I) { + if (LongSize != 32) return false; + CallInst *CI = dyn_cast<CallInst>(I); + if (!CI || !CI->isInlineAsm()) return false; + if (CI->getNumArgOperands() <= 5) return false; + // We have inline assembly with quite a few arguments. + return true; +} + +// Find all static Alloca instructions and put +// poisoned red zones around all of them. +// Then unpoison everything back before the function returns. +// +// Stack poisoning does not play well with exception handling. +// When an exception is thrown, we essentially bypass the code +// that unpoisones the stack. This is why the run-time library has +// to intercept __cxa_throw (as well as longjmp, etc) and unpoison the entire +// stack in the interceptor. This however does not work inside the +// actual function which catches the exception. Most likely because the +// compiler hoists the load of the shadow value somewhere too high. +// This causes asan to report a non-existing bug on 453.povray. +// It sounds like an LLVM bug. +bool AddressSanitizer::poisonStackInFunction(Module &M, Function &F) { + if (!ClStack) return false; + SmallVector<AllocaInst*, 16> AllocaVec; + SmallVector<Instruction*, 8> RetVec; + uint64_t TotalSize = 0; + + // Filter out Alloca instructions we want (and can) handle. + // Collect Ret instructions. + for (Function::iterator FI = F.begin(), FE = F.end(); + FI != FE; ++FI) { + BasicBlock &BB = *FI; + for (BasicBlock::iterator BI = BB.begin(), BE = BB.end(); + BI != BE; ++BI) { + if (LooksLikeCodeInBug11395(BI)) return false; + if (isa<ReturnInst>(BI)) { + RetVec.push_back(BI); + continue; + } + + AllocaInst *AI = dyn_cast<AllocaInst>(BI); + if (!AI) continue; + if (AI->isArrayAllocation()) continue; + if (!AI->isStaticAlloca()) continue; + if (!AI->getAllocatedType()->isSized()) continue; + if (AI->getAlignment() > RedzoneSize) continue; + AllocaVec.push_back(AI); + uint64_t AlignedSize = getAlignedAllocaSize(AI); + TotalSize += AlignedSize; + } + } + + if (AllocaVec.empty()) return false; + + uint64_t LocalStackSize = TotalSize + (AllocaVec.size() + 1) * RedzoneSize; + + bool DoStackMalloc = ClUseAfterReturn + && LocalStackSize <= kMaxStackMallocSize; + + Instruction *InsBefore = AllocaVec[0]; + IRBuilder<> IRB(InsBefore); + + + Type *ByteArrayTy = ArrayType::get(IRB.getInt8Ty(), LocalStackSize); + AllocaInst *MyAlloca = + new AllocaInst(ByteArrayTy, "MyAlloca", InsBefore); + MyAlloca->setAlignment(RedzoneSize); + assert(MyAlloca->isStaticAlloca()); + Value *OrigStackBase = IRB.CreatePointerCast(MyAlloca, IntptrTy); + Value *LocalStackBase = OrigStackBase; + + if (DoStackMalloc) { + Value *AsanStackMallocFunc = M.getOrInsertFunction( + kAsanStackMallocName, IntptrTy, IntptrTy, IntptrTy, NULL); + LocalStackBase = IRB.CreateCall2(AsanStackMallocFunc, + ConstantInt::get(IntptrTy, LocalStackSize), OrigStackBase); + } + + // This string will be parsed by the run-time (DescribeStackAddress). + SmallString<2048> StackDescriptionStorage; + raw_svector_ostream StackDescription(StackDescriptionStorage); + StackDescription << F.getName() << " " << AllocaVec.size() << " "; + + uint64_t Pos = RedzoneSize; + // Replace Alloca instructions with base+offset. + for (size_t i = 0, n = AllocaVec.size(); i < n; i++) { + AllocaInst *AI = AllocaVec[i]; + uint64_t SizeInBytes = getAllocaSizeInBytes(AI); + StringRef Name = AI->getName(); + StackDescription << Pos << " " << SizeInBytes << " " + << Name.size() << " " << Name << " "; + uint64_t AlignedSize = getAlignedAllocaSize(AI); + assert((AlignedSize % RedzoneSize) == 0); + AI->replaceAllUsesWith( + IRB.CreateIntToPtr( + IRB.CreateAdd(LocalStackBase, ConstantInt::get(IntptrTy, Pos)), + AI->getType())); + Pos += AlignedSize + RedzoneSize; + } + assert(Pos == LocalStackSize); + + // Write the Magic value and the frame description constant to the redzone. + Value *BasePlus0 = IRB.CreateIntToPtr(LocalStackBase, IntptrPtrTy); + IRB.CreateStore(ConstantInt::get(IntptrTy, kCurrentStackFrameMagic), + BasePlus0); + Value *BasePlus1 = IRB.CreateAdd(LocalStackBase, + ConstantInt::get(IntptrTy, LongSize/8)); + BasePlus1 = IRB.CreateIntToPtr(BasePlus1, IntptrPtrTy); + Value *Description = IRB.CreatePointerCast( + createPrivateGlobalForString(M, StackDescription.str()), + IntptrTy); + IRB.CreateStore(Description, BasePlus1); + + // Poison the stack redzones at the entry. + Value *ShadowBase = memToShadow(LocalStackBase, IRB); + PoisonStack(ArrayRef<AllocaInst*>(AllocaVec), IRB, ShadowBase, true); + + Value *AsanStackFreeFunc = NULL; + if (DoStackMalloc) { + AsanStackFreeFunc = M.getOrInsertFunction( + kAsanStackFreeName, IRB.getVoidTy(), + IntptrTy, IntptrTy, IntptrTy, NULL); + } + + // Unpoison the stack before all ret instructions. + for (size_t i = 0, n = RetVec.size(); i < n; i++) { + Instruction *Ret = RetVec[i]; + IRBuilder<> IRBRet(Ret); + + // Mark the current frame as retired. + IRBRet.CreateStore(ConstantInt::get(IntptrTy, kRetiredStackFrameMagic), + BasePlus0); + // Unpoison the stack. + PoisonStack(ArrayRef<AllocaInst*>(AllocaVec), IRBRet, ShadowBase, false); + + if (DoStackMalloc) { + IRBRet.CreateCall3(AsanStackFreeFunc, LocalStackBase, + ConstantInt::get(IntptrTy, LocalStackSize), + OrigStackBase); + } + } + + if (ClDebugStack) { + DEBUG(dbgs() << F); + } + + return true; +} + +BlackList::BlackList(const std::string &Path) { + Functions = NULL; + const char *kFunPrefix = "fun:"; + if (!ClBlackListFile.size()) return; + std::string Fun; + + OwningPtr<MemoryBuffer> File; + if (error_code EC = MemoryBuffer::getFile(ClBlackListFile.c_str(), File)) { + errs() << EC.message(); + exit(1); + } + MemoryBuffer *Buff = File.take(); + const char *Data = Buff->getBufferStart(); + size_t DataLen = Buff->getBufferSize(); + SmallVector<StringRef, 16> Lines; + SplitString(StringRef(Data, DataLen), Lines, "\n\r"); + for (size_t i = 0, numLines = Lines.size(); i < numLines; i++) { + if (Lines[i].startswith(kFunPrefix)) { + std::string ThisFunc = Lines[i].substr(strlen(kFunPrefix)); + if (Fun.size()) { + Fun += "|"; + } + // add ThisFunc replacing * with .* + for (size_t j = 0, n = ThisFunc.size(); j < n; j++) { + if (ThisFunc[j] == '*') + Fun += '.'; + Fun += ThisFunc[j]; + } + } + } + if (Fun.size()) { + Functions = new Regex(Fun); + } +} + +bool BlackList::isIn(const Function &F) { + if (Functions) { + bool Res = Functions->match(F.getName()); + return Res; + } + return false; +} diff --git a/lib/Transforms/Instrumentation/CMakeLists.txt b/lib/Transforms/Instrumentation/CMakeLists.txt index 7b3a927..929b7cd 100644 --- a/lib/Transforms/Instrumentation/CMakeLists.txt +++ b/lib/Transforms/Instrumentation/CMakeLists.txt @@ -1,4 +1,5 @@ add_llvm_library(LLVMInstrumentation + AddressSanitizer.cpp EdgeProfiling.cpp GCOVProfiling.cpp Instrumentation.cpp diff --git a/lib/Transforms/Instrumentation/Instrumentation.cpp b/lib/Transforms/Instrumentation/Instrumentation.cpp index 71adc1e..6d6e0ae 100644 --- a/lib/Transforms/Instrumentation/Instrumentation.cpp +++ b/lib/Transforms/Instrumentation/Instrumentation.cpp @@ -24,6 +24,7 @@ void llvm::initializeInstrumentation(PassRegistry &Registry) { initializeOptimalEdgeProfilerPass(Registry); initializePathProfilerPass(Registry); initializeGCOVProfilerPass(Registry); + initializeAddressSanitizerPass(Registry); } /// LLVMInitializeInstrumentation - C binding for diff --git a/lib/Transforms/Instrumentation/OptimalEdgeProfiling.cpp b/lib/Transforms/Instrumentation/OptimalEdgeProfiling.cpp index 62c21b8..1fe1254 100644 --- a/lib/Transforms/Instrumentation/OptimalEdgeProfiling.cpp +++ b/lib/Transforms/Instrumentation/OptimalEdgeProfiling.cpp @@ -69,7 +69,7 @@ inline static void printEdgeCounter(ProfileInfo::Edge e, BasicBlock* b, unsigned i) { DEBUG(dbgs() << "--Edge Counter for " << (e) << " in " \ - << ((b)?(b)->getNameStr():"0") << " (# " << (i) << ")\n"); + << ((b)?(b)->getName():"0") << " (# " << (i) << ")\n"); } bool OptimalEdgeProfiler::runOnModule(Module &M) { @@ -127,7 +127,7 @@ bool OptimalEdgeProfiler::runOnModule(Module &M) { unsigned i = 0; for (Module::iterator F = M.begin(), E = M.end(); F != E; ++F) { if (F->isDeclaration()) continue; - DEBUG(dbgs() << "Working on " << F->getNameStr() << "\n"); + DEBUG(dbgs() << "Working on " << F->getName() << "\n"); // Calculate a Maximum Spanning Tree with the edge weights determined by // ProfileEstimator. ProfileEstimator also assign weights to the virtual diff --git a/lib/Transforms/Instrumentation/PathProfiling.cpp b/lib/Transforms/Instrumentation/PathProfiling.cpp index 23915d3..b214796 100644 --- a/lib/Transforms/Instrumentation/PathProfiling.cpp +++ b/lib/Transforms/Instrumentation/PathProfiling.cpp @@ -665,7 +665,7 @@ void BLInstrumentationDag::unlinkPhony() { // Generate a .dot graph to represent the DAG and pathNumbers void BLInstrumentationDag::generateDotGraph() { std::string errorInfo; - std::string functionName = getFunction().getNameStr(); + std::string functionName = getFunction().getName().str(); std::string filename = "pathdag." + functionName + ".dot"; DEBUG (dbgs() << "Writing '" << filename << "'...\n"); @@ -750,7 +750,8 @@ Value* BLInstrumentationNode::getStartingPathNumber(){ // Sets the Value of the pathNumber. Used by the instrumentation code. void BLInstrumentationNode::setStartingPathNumber(Value* pathNumber) { DEBUG(dbgs() << " SPN-" << getName() << " <-- " << (pathNumber ? - pathNumber->getNameStr() : "unused") << "\n"); + pathNumber->getName() : + "unused") << "\n"); _startingPathNumber = pathNumber; } @@ -760,7 +761,7 @@ Value* BLInstrumentationNode::getEndingPathNumber(){ void BLInstrumentationNode::setEndingPathNumber(Value* pathNumber) { DEBUG(dbgs() << " EPN-" << getName() << " <-- " - << (pathNumber ? pathNumber->getNameStr() : "unused") << "\n"); + << (pathNumber ? pathNumber->getName() : "unused") << "\n"); _endingPathNumber = pathNumber; } @@ -1239,9 +1240,9 @@ void PathProfiler::insertInstrumentation( insertPoint++; DEBUG(dbgs() << "\nInstrumenting method call block '" - << node->getBlock()->getNameStr() << "'\n"); + << node->getBlock()->getName() << "'\n"); DEBUG(dbgs() << " Path number initialized: " - << ((node->getStartingPathNumber()) ? "yes" : "no") << "\n"); + << ((node->getStartingPathNumber()) ? "yes" : "no") << "\n"); Value* newpn; if( node->getStartingPathNumber() ) { @@ -1370,7 +1371,7 @@ bool PathProfiler::runOnModule(Module &M) { if (F->isDeclaration()) continue; - DEBUG(dbgs() << "Function: " << F->getNameStr() << "\n"); + DEBUG(dbgs() << "Function: " << F->getName() << "\n"); functionNumber++; // set function number diff --git a/lib/Transforms/Scalar/DeadStoreElimination.cpp b/lib/Transforms/Scalar/DeadStoreElimination.cpp index 8f5f157..f5688cb 100644 --- a/lib/Transforms/Scalar/DeadStoreElimination.cpp +++ b/lib/Transforms/Scalar/DeadStoreElimination.cpp @@ -274,43 +274,35 @@ static Value *getStoredPointerOperand(Instruction *I) { } } -static uint64_t getPointerSize(Value *V, AliasAnalysis &AA) { +static uint64_t getPointerSize(const Value *V, AliasAnalysis &AA) { const TargetData *TD = AA.getTargetData(); - if (CallInst *CI = dyn_cast<CallInst>(V)) { - assert(isMalloc(CI) && "Expected Malloc call!"); - if (ConstantInt *C = dyn_cast<ConstantInt>(CI->getArgOperand(0))) + if (const CallInst *CI = extractMallocCall(V)) { + if (const ConstantInt *C = dyn_cast<ConstantInt>(CI->getArgOperand(0))) return C->getZExtValue(); - return AliasAnalysis::UnknownSize; } if (TD == 0) return AliasAnalysis::UnknownSize; - if (AllocaInst *A = dyn_cast<AllocaInst>(V)) { + if (const AllocaInst *A = dyn_cast<AllocaInst>(V)) { // Get size information for the alloca - if (ConstantInt *C = dyn_cast<ConstantInt>(A->getArraySize())) + if (const ConstantInt *C = dyn_cast<ConstantInt>(A->getArraySize())) return C->getZExtValue() * TD->getTypeAllocSize(A->getAllocatedType()); - return AliasAnalysis::UnknownSize; } - assert(isa<Argument>(V) && "Expected AllocaInst, malloc call or Argument!"); - PointerType *PT = cast<PointerType>(V->getType()); - return TD->getTypeAllocSize(PT->getElementType()); -} + if (const Argument *A = dyn_cast<Argument>(V)) { + if (A->hasByValAttr()) + if (PointerType *PT = dyn_cast<PointerType>(A->getType())) + return TD->getTypeAllocSize(PT->getElementType()); + } -/// isObjectPointerWithTrustworthySize - Return true if the specified Value* is -/// pointing to an object with a pointer size we can trust. -static bool isObjectPointerWithTrustworthySize(const Value *V) { - if (const AllocaInst *AI = dyn_cast<AllocaInst>(V)) - return !AI->isArrayAllocation(); - if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(V)) - return !GV->mayBeOverridden(); - if (const Argument *A = dyn_cast<Argument>(V)) - return A->hasByValAttr(); - if (isMalloc(V)) - return true; - return false; + if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(V)) { + if (!GV->mayBeOverridden()) + return TD->getTypeAllocSize(GV->getType()->getElementType()); + } + + return AliasAnalysis::UnknownSize; } namespace { @@ -329,8 +321,8 @@ namespace { static OverwriteResult isOverwrite(const AliasAnalysis::Location &Later, const AliasAnalysis::Location &Earlier, AliasAnalysis &AA, - int64_t& EarlierOff, - int64_t& LaterOff) { + int64_t &EarlierOff, + int64_t &LaterOff) { const Value *P1 = Earlier.Ptr->stripPointerCasts(); const Value *P2 = Later.Ptr->stripPointerCasts(); @@ -377,12 +369,10 @@ static OverwriteResult isOverwrite(const AliasAnalysis::Location &Later, return OverwriteUnknown; // If the "Later" store is to a recognizable object, get its size. - if (isObjectPointerWithTrustworthySize(UO2)) { - uint64_t ObjectSize = - TD.getTypeAllocSize(cast<PointerType>(UO2->getType())->getElementType()); + uint64_t ObjectSize = getPointerSize(UO2, AA); + if (ObjectSize != AliasAnalysis::UnknownSize) if (ObjectSize == Later.Size && ObjectSize >= Earlier.Size) return OverwriteComplete; - } // Okay, we have stores to two completely different pointers. Try to // decompose the pointer into a "base + constant_offset" form. If the base diff --git a/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/lib/Transforms/Scalar/LoopStrengthReduce.cpp index 3e122c2..4ae51d5 100644 --- a/lib/Transforms/Scalar/LoopStrengthReduce.cpp +++ b/lib/Transforms/Scalar/LoopStrengthReduce.cpp @@ -3827,8 +3827,8 @@ LSRInstance::LSRInstance(const TargetLowering *tli, Loop *l, Pass *P) // Remove any extra phis created by processing inner loops. SmallVector<WeakVH, 16> DeadInsts; SCEVExpander Rewriter(SE, "lsr"); - Changed |= Rewriter.replaceCongruentIVs(L, &DT, DeadInsts); - Changed |= DeleteTriviallyDeadInstructions(DeadInsts); + Changed |= (bool)Rewriter.replaceCongruentIVs(L, &DT, DeadInsts); + Changed |= (bool)DeleteTriviallyDeadInstructions(DeadInsts); } DEBUG(dbgs() << "LSR skipping outer loop " << *L << "\n"); return; @@ -3880,8 +3880,8 @@ LSRInstance::LSRInstance(const TargetLowering *tli, Loop *l, Pass *P) // Remove any extra phis created by processing inner loops. SmallVector<WeakVH, 16> DeadInsts; SCEVExpander Rewriter(SE, "lsr"); - Changed |= Rewriter.replaceCongruentIVs(L, &DT, DeadInsts); - Changed |= DeleteTriviallyDeadInstructions(DeadInsts); + Changed |= (bool)Rewriter.replaceCongruentIVs(L, &DT, DeadInsts); + Changed |= (bool)DeleteTriviallyDeadInstructions(DeadInsts); } } diff --git a/lib/Transforms/Scalar/MemCpyOptimizer.cpp b/lib/Transforms/Scalar/MemCpyOptimizer.cpp index 298d692..9e4f51f 100644 --- a/lib/Transforms/Scalar/MemCpyOptimizer.cpp +++ b/lib/Transforms/Scalar/MemCpyOptimizer.cpp @@ -950,7 +950,7 @@ bool MemCpyOpt::iterateOnFunction(Function &F) { RepeatInstruction = processMemMove(M); else if (CallSite CS = (Value*)I) { for (unsigned i = 0, e = CS.arg_size(); i != e; ++i) - if (CS.paramHasAttr(i+1, Attribute::ByVal)) + if (CS.isByValArgument(i)) MadeChange |= processByValArgument(CS, i); } diff --git a/lib/Transforms/Scalar/ScalarReplAggregates.cpp b/lib/Transforms/Scalar/ScalarReplAggregates.cpp index c12f403..4b14efc 100644 --- a/lib/Transforms/Scalar/ScalarReplAggregates.cpp +++ b/lib/Transforms/Scalar/ScalarReplAggregates.cpp @@ -2534,13 +2534,12 @@ isOnlyCopiedFromConstantGlobal(Value *V, MemTransferInst *&TheCopy, // ignore it if we know that the value isn't captured. unsigned ArgNo = CS.getArgumentNo(UI); if (CS.onlyReadsMemory() && - (CS.getInstruction()->use_empty() || - CS.paramHasAttr(ArgNo+1, Attribute::NoCapture))) + (CS.getInstruction()->use_empty() || CS.doesNotCapture(ArgNo))) continue; // If this is being passed as a byval argument, the caller is making a // copy, so it is only a read of the alloca. - if (CS.paramHasAttr(ArgNo+1, Attribute::ByVal)) + if (CS.isByValArgument(ArgNo)) continue; } diff --git a/lib/Transforms/Scalar/SimplifyLibCalls.cpp b/lib/Transforms/Scalar/SimplifyLibCalls.cpp index 2a00ae1..6e169de 100644 --- a/lib/Transforms/Scalar/SimplifyLibCalls.cpp +++ b/lib/Transforms/Scalar/SimplifyLibCalls.cpp @@ -963,8 +963,7 @@ struct UnaryDoubleFPOpt : public LibCallOptimization { // floor((double)floatval) -> (double)floorf(floatval) Value *V = Cast->getOperand(0); - V = EmitUnaryFloatFnCall(V, Callee->getName().data(), B, - Callee->getAttributes()); + V = EmitUnaryFloatFnCall(V, Callee->getName(), B, Callee->getAttributes()); return B.CreateFPExt(V, B.getDoubleTy()); } }; @@ -1324,7 +1323,7 @@ struct FPutsOpt : public LibCallOptimization { if (!Len) return 0; EmitFWrite(CI->getArgOperand(0), ConstantInt::get(TD->getIntPtrType(*Context), Len-1), - CI->getArgOperand(1), B, TD); + CI->getArgOperand(1), B, TD, TLI); return CI; // Known to have no uses (see above). } }; @@ -1352,7 +1351,7 @@ struct FPrintFOpt : public LibCallOptimization { EmitFWrite(CI->getArgOperand(1), ConstantInt::get(TD->getIntPtrType(*Context), FormatStr.size()), - CI->getArgOperand(0), B, TD); + CI->getArgOperand(0), B, TD, TLI); return ConstantInt::get(CI->getType(), FormatStr.size()); } @@ -1374,7 +1373,7 @@ struct FPrintFOpt : public LibCallOptimization { // fprintf(F, "%s", str) --> fputs(str, F) if (!CI->getArgOperand(2)->getType()->isPointerTy() || !CI->use_empty()) return 0; - EmitFPutS(CI->getArgOperand(2), CI->getArgOperand(0), B, TD); + EmitFPutS(CI->getArgOperand(2), CI->getArgOperand(0), B, TD, TLI); return CI; } return 0; @@ -1470,6 +1469,7 @@ namespace { SimplifyLibCalls() : FunctionPass(ID), StrCpy(false), StrCpyChk(true) { initializeSimplifyLibCallsPass(*PassRegistry::getPassRegistry()); } + void AddOpt(LibFunc::Func F, LibCallOptimization* Opt); void InitOptimizations(); bool runOnFunction(Function &F); @@ -1500,6 +1500,11 @@ FunctionPass *llvm::createSimplifyLibCallsPass() { return new SimplifyLibCalls(); } +void SimplifyLibCalls::AddOpt(LibFunc::Func F, LibCallOptimization* Opt) { + if (TLI->has(F)) + Optimizations[TLI->getName(F)] = Opt; +} + /// Optimizations - Populate the Optimizations map with all the optimizations /// we know. void SimplifyLibCalls::InitOptimizations() { @@ -1525,9 +1530,9 @@ void SimplifyLibCalls::InitOptimizations() { Optimizations["strcspn"] = &StrCSpn; Optimizations["strstr"] = &StrStr; Optimizations["memcmp"] = &MemCmp; - if (TLI->has(LibFunc::memcpy)) Optimizations["memcpy"] = &MemCpy; + AddOpt(LibFunc::memcpy, &MemCpy); Optimizations["memmove"] = &MemMove; - if (TLI->has(LibFunc::memset)) Optimizations["memset"] = &MemSet; + AddOpt(LibFunc::memset, &MemSet); // _chk variants of String and Memory LibCall Optimizations. Optimizations["__strcpy_chk"] = &StrCpyChk; @@ -1580,8 +1585,8 @@ void SimplifyLibCalls::InitOptimizations() { // Formatting and IO Optimizations Optimizations["sprintf"] = &SPrintF; Optimizations["printf"] = &PrintF; - Optimizations["fwrite"] = &FWrite; - Optimizations["fputs"] = &FPuts; + AddOpt(LibFunc::fwrite, &FWrite); + AddOpt(LibFunc::fputs, &FPuts); Optimizations["fprintf"] = &FPrintF; Optimizations["puts"] = &Puts; } diff --git a/lib/Transforms/Utils/BuildLibCalls.cpp b/lib/Transforms/Utils/BuildLibCalls.cpp index 4b5f45b..a808303 100644 --- a/lib/Transforms/Utils/BuildLibCalls.cpp +++ b/lib/Transforms/Utils/BuildLibCalls.cpp @@ -15,11 +15,15 @@ #include "llvm/Type.h" #include "llvm/Constants.h" #include "llvm/Function.h" +#include "llvm/Intrinsics.h" +#include "llvm/LLVMContext.h" #include "llvm/Module.h" #include "llvm/Support/IRBuilder.h" #include "llvm/Target/TargetData.h" +#include "llvm/Target/TargetLibraryInfo.h" #include "llvm/LLVMContext.h" #include "llvm/Intrinsics.h" +#include "llvm/ADT/SmallString.h" using namespace llvm; @@ -206,19 +210,16 @@ Value *llvm::EmitMemCmp(Value *Ptr1, Value *Ptr2, /// 'floor'). This function is known to take a single of type matching 'Op' and /// returns one value with the same type. If 'Op' is a long double, 'l' is /// added as the suffix of name, if 'Op' is a float, we add a 'f' suffix. -Value *llvm::EmitUnaryFloatFnCall(Value *Op, const char *Name, - IRBuilder<> &B, const AttrListPtr &Attrs) { - char NameBuffer[20]; +Value *llvm::EmitUnaryFloatFnCall(Value *Op, StringRef Name, IRBuilder<> &B, + const AttrListPtr &Attrs) { + SmallString<20> NameBuffer; if (!Op->getType()->isDoubleTy()) { // If we need to add a suffix, copy into NameBuffer. - unsigned NameLen = strlen(Name); - assert(NameLen < sizeof(NameBuffer)-2); - memcpy(NameBuffer, Name, NameLen); + NameBuffer += Name; if (Op->getType()->isFloatTy()) - NameBuffer[NameLen] = 'f'; // floorf + NameBuffer += 'f'; // floorf else - NameBuffer[NameLen] = 'l'; // floorl - NameBuffer[NameLen+1] = 0; + NameBuffer += 'l'; // floorl Name = NameBuffer; } @@ -299,20 +300,21 @@ void llvm::EmitFPutC(Value *Char, Value *File, IRBuilder<> &B, /// EmitFPutS - Emit a call to the puts function. Str is required to be a /// pointer and File is a pointer to FILE. void llvm::EmitFPutS(Value *Str, Value *File, IRBuilder<> &B, - const TargetData *TD) { + const TargetData *TD, const TargetLibraryInfo *TLI) { Module *M = B.GetInsertBlock()->getParent()->getParent(); AttributeWithIndex AWI[3]; AWI[0] = AttributeWithIndex::get(1, Attribute::NoCapture); AWI[1] = AttributeWithIndex::get(2, Attribute::NoCapture); AWI[2] = AttributeWithIndex::get(~0u, Attribute::NoUnwind); + StringRef FPutsName = TLI->getName(LibFunc::fputs); Constant *F; if (File->getType()->isPointerTy()) - F = M->getOrInsertFunction("fputs", AttrListPtr::get(AWI, 3), + F = M->getOrInsertFunction(FPutsName, AttrListPtr::get(AWI, 3), B.getInt32Ty(), B.getInt8PtrTy(), File->getType(), NULL); else - F = M->getOrInsertFunction("fputs", B.getInt32Ty(), + F = M->getOrInsertFunction(FPutsName, B.getInt32Ty(), B.getInt8PtrTy(), File->getType(), NULL); CallInst *CI = B.CreateCall2(F, CastToCStr(Str, B), File, "fputs"); @@ -324,23 +326,25 @@ void llvm::EmitFPutS(Value *Str, Value *File, IRBuilder<> &B, /// EmitFWrite - Emit a call to the fwrite function. This assumes that Ptr is /// a pointer, Size is an 'intptr_t', and File is a pointer to FILE. void llvm::EmitFWrite(Value *Ptr, Value *Size, Value *File, - IRBuilder<> &B, const TargetData *TD) { + IRBuilder<> &B, const TargetData *TD, + const TargetLibraryInfo *TLI) { Module *M = B.GetInsertBlock()->getParent()->getParent(); AttributeWithIndex AWI[3]; AWI[0] = AttributeWithIndex::get(1, Attribute::NoCapture); AWI[1] = AttributeWithIndex::get(4, Attribute::NoCapture); AWI[2] = AttributeWithIndex::get(~0u, Attribute::NoUnwind); LLVMContext &Context = B.GetInsertBlock()->getContext(); + StringRef FWriteName = TLI->getName(LibFunc::fwrite); Constant *F; if (File->getType()->isPointerTy()) - F = M->getOrInsertFunction("fwrite", AttrListPtr::get(AWI, 3), + F = M->getOrInsertFunction(FWriteName, AttrListPtr::get(AWI, 3), TD->getIntPtrType(Context), B.getInt8PtrTy(), TD->getIntPtrType(Context), TD->getIntPtrType(Context), File->getType(), NULL); else - F = M->getOrInsertFunction("fwrite", TD->getIntPtrType(Context), + F = M->getOrInsertFunction(FWriteName, TD->getIntPtrType(Context), B.getInt8PtrTy(), TD->getIntPtrType(Context), TD->getIntPtrType(Context), diff --git a/lib/Transforms/Utils/InlineFunction.cpp b/lib/Transforms/Utils/InlineFunction.cpp index 5464dbc..dd4a659 100644 --- a/lib/Transforms/Utils/InlineFunction.cpp +++ b/lib/Transforms/Utils/InlineFunction.cpp @@ -987,7 +987,7 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI) { // by them explicit. However, we don't do this if the callee is readonly // or readnone, because the copy would be unneeded: the callee doesn't // modify the struct. - if (CalledFunc->paramHasAttr(ArgNo+1, Attribute::ByVal)) { + if (CS.isByValArgument(ArgNo)) { ActualArg = HandleByValArgument(ActualArg, TheCall, CalledFunc, IFI, CalledFunc->getParamAlignment(ArgNo+1)); diff --git a/lib/Transforms/Utils/ModuleUtils.cpp b/lib/Transforms/Utils/ModuleUtils.cpp index db81de7..5e294a3 100644 --- a/lib/Transforms/Utils/ModuleUtils.cpp +++ b/lib/Transforms/Utils/ModuleUtils.cpp @@ -16,6 +16,7 @@ #include "llvm/Function.h" #include "llvm/Module.h" #include "llvm/Support/IRBuilder.h" + using namespace llvm; void llvm::appendToGlobalCtors(Module &M, Function *F, int Priority) { diff --git a/lib/Transforms/Utils/SimplifyIndVar.cpp b/lib/Transforms/Utils/SimplifyIndVar.cpp index 76289c0..6732a78 100644 --- a/lib/Transforms/Utils/SimplifyIndVar.cpp +++ b/lib/Transforms/Utils/SimplifyIndVar.cpp @@ -107,8 +107,8 @@ Value *SimplifyIndvar::foldIVUser(Instruction *UseInst, Instruction *IVOperand) // Attempt to fold a binary operator with constant operand. // e.g. ((I + 1) >> 2) => I >> 2 - if (IVOperand->getNumOperands() != 2 || - !isa<ConstantInt>(IVOperand->getOperand(1))) + if (!isa<BinaryOperator>(IVOperand) + || !isa<ConstantInt>(IVOperand->getOperand(1))) return 0; IVSrc = IVOperand->getOperand(0); diff --git a/lib/VMCore/Pass.cpp b/lib/VMCore/Pass.cpp index 9afc540..fca9466 100644 --- a/lib/VMCore/Pass.cpp +++ b/lib/VMCore/Pass.cpp @@ -25,8 +25,6 @@ using namespace llvm; // Pass Implementation // -Pass::Pass(PassKind K, char &pid) : Resolver(0), PassID(&pid), Kind(K) { } - // Force out-of-line virtual method. Pass::~Pass() { delete Resolver; @@ -48,7 +46,7 @@ bool Pass::mustPreserveAnalysisID(char &AID) const { return Resolver->getAnalysisIfAvailable(&AID, true) != 0; } -// dumpPassStructure - Implement the -debug-passes=Structure option +// dumpPassStructure - Implement the -debug-pass=Structure option void Pass::dumpPassStructure(unsigned Offset) { dbgs().indent(Offset*2) << getPassName() << "\n"; } diff --git a/lib/VMCore/Value.cpp b/lib/VMCore/Value.cpp index 2fa5f08..291df91 100644 --- a/lib/VMCore/Value.cpp +++ b/lib/VMCore/Value.cpp @@ -66,7 +66,7 @@ Value::~Value() { // a <badref> // if (!use_empty()) { - dbgs() << "While deleting: " << *VTy << " %" << getNameStr() << "\n"; + dbgs() << "While deleting: " << *VTy << " %" << getName() << "\n"; for (use_iterator I = use_begin(), E = use_end(); I != E; ++I) dbgs() << "Use still stuck around after Def is destroyed:" << **I << "\n"; @@ -156,10 +156,6 @@ StringRef Value::getName() const { return Name->getKey(); } -std::string Value::getNameStr() const { - return getName().str(); -} - void Value::setName(const Twine &NewName) { // Fast path for common IRBuilder case of setName("") when there is no name. if (NewName.isTriviallyEmpty() && !hasName()) @@ -554,7 +550,7 @@ void ValueHandleBase::ValueIsDeleted(Value *V) { // All callbacks, weak references, and assertingVHs should be dropped by now. if (V->HasValueHandle) { #ifndef NDEBUG // Only in +Asserts mode... - dbgs() << "While deleting: " << *V->getType() << " %" << V->getNameStr() + dbgs() << "While deleting: " << *V->getType() << " %" << V->getName() << "\n"; if (pImpl->ValueHandles[V]->getKind() == Assert) llvm_unreachable("An asserting value handle still pointed to this" @@ -617,8 +613,8 @@ void ValueHandleBase::ValueIsRAUWd(Value *Old, Value *New) { case Tracking: case Weak: dbgs() << "After RAUW from " << *Old->getType() << " %" - << Old->getNameStr() << " to " << *New->getType() << " %" - << New->getNameStr() << "\n"; + << Old->getName() << " to " << *New->getType() << " %" + << New->getName() << "\n"; llvm_unreachable("A tracking or weak value handle still pointed to the" " old value!\n"); default: diff --git a/lib/VMCore/ValueTypes.cpp b/lib/VMCore/ValueTypes.cpp index e13bd7d..089c259 100644 --- a/lib/VMCore/ValueTypes.cpp +++ b/lib/VMCore/ValueTypes.cpp @@ -139,7 +139,7 @@ std::string EVT::getEVTString() const { case MVT::v2f64: return "v2f64"; case MVT::v4f64: return "v4f64"; case MVT::Metadata:return "Metadata"; - case MVT::untyped: return "untyped"; + case MVT::Untyped: return "Untyped"; } } |