diff options
Diffstat (limited to 'lib')
251 files changed, 10316 insertions, 6618 deletions
diff --git a/lib/Analysis/Analysis.cpp b/lib/Analysis/Analysis.cpp index 131a593..66e416c 100644 --- a/lib/Analysis/Analysis.cpp +++ b/lib/Analysis/Analysis.cpp @@ -31,7 +31,6 @@ void llvm::initializeAnalysis(PassRegistry &Registry) { initializeCFGPrinterPass(Registry); initializeCFGOnlyViewerPass(Registry); initializeCFGOnlyPrinterPass(Registry); - initializePrintDbgInfoPass(Registry); initializeDependenceAnalysisPass(Registry); initializeDominanceFrontierPass(Registry); initializeDomViewerPass(Registry); diff --git a/lib/Analysis/BasicAliasAnalysis.cpp b/lib/Analysis/BasicAliasAnalysis.cpp index ca668b2..4139336 100644 --- a/lib/Analysis/BasicAliasAnalysis.cpp +++ b/lib/Analysis/BasicAliasAnalysis.cpp @@ -88,7 +88,7 @@ static uint64_t getObjectSize(const Value *V, const DataLayout &TD, const TargetLibraryInfo &TLI, bool RoundToAlign = false) { uint64_t Size; - if (getObjectSize(V, Size, &TD, &TLI, RoundToAlign)) + if (getUnderlyingObjectSize(V, Size, &TD, &TLI, RoundToAlign)) return Size; return AliasAnalysis::UnknownSize; } diff --git a/lib/Analysis/CMakeLists.txt b/lib/Analysis/CMakeLists.txt index 4c64c4a..597c767 100644 --- a/lib/Analysis/CMakeLists.txt +++ b/lib/Analysis/CMakeLists.txt @@ -13,7 +13,6 @@ add_llvm_library(LLVMAnalysis CostModel.cpp CodeMetrics.cpp ConstantFolding.cpp - DbgInfoPrinter.cpp DependenceAnalysis.cpp DomPrinter.cpp DominanceFrontier.cpp diff --git a/lib/Analysis/CaptureTracking.cpp b/lib/Analysis/CaptureTracking.cpp index d9c0299..a729270 100644 --- a/lib/Analysis/CaptureTracking.cpp +++ b/lib/Analysis/CaptureTracking.cpp @@ -18,7 +18,12 @@ #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/SmallVector.h" +#include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/CaptureTracking.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/Instructions.h" +#include "llvm/Support/CallSite.h" + using namespace llvm; CaptureTracker::~CaptureTracker() {} diff --git a/lib/Analysis/CostModel.cpp b/lib/Analysis/CostModel.cpp index 44684a9..df70d15 100644 --- a/lib/Analysis/CostModel.cpp +++ b/lib/Analysis/CostModel.cpp @@ -23,6 +23,7 @@ #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/IR/Function.h" #include "llvm/IR/Instructions.h" +#include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Value.h" #include "llvm/Pass.h" #include "llvm/Support/Debug.h" @@ -189,6 +190,16 @@ unsigned CostModelAnalysis::getInstructionCost(const Instruction *I) const { 0); return -1; } + case Instruction::Call: + if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) { + SmallVector<Type*, 4> Tys; + for (unsigned J = 0, JE = II->getNumArgOperands(); J != JE; ++J) + Tys.push_back(II->getArgOperand(J)->getType()); + + return TTI->getIntrinsicInstrCost(II->getIntrinsicID(), II->getType(), + Tys); + } + return -1; default: // We don't have any information on this instruction. return -1; diff --git a/lib/Analysis/DbgInfoPrinter.cpp b/lib/Analysis/DbgInfoPrinter.cpp deleted file mode 100644 index f674e0c..0000000 --- a/lib/Analysis/DbgInfoPrinter.cpp +++ /dev/null @@ -1,224 +0,0 @@ -//===- DbgInfoPrinter.cpp - Print debug info in a human readable form ------==// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file implements a pass that prints instructions, and associated debug -// info: -// -// - source/line/col information -// - original variable name -// - original type name -// -//===----------------------------------------------------------------------===// - -#include "llvm/Analysis/Passes.h" -#include "llvm/Assembly/Writer.h" -#include "llvm/DebugInfo.h" -#include "llvm/IR/Function.h" -#include "llvm/IR/IntrinsicInst.h" -#include "llvm/IR/Metadata.h" -#include "llvm/IR/Module.h" -#include "llvm/Pass.h" -#include "llvm/Support/CFG.h" -#include "llvm/Support/CommandLine.h" -#include "llvm/Support/raw_ostream.h" - -using namespace llvm; - -static cl::opt<bool> -PrintDirectory("print-fullpath", - cl::desc("Print fullpath when printing debug info"), - cl::Hidden); - -namespace { - class PrintDbgInfo : public FunctionPass { - raw_ostream &Out; - void printVariableDeclaration(const Value *V); - public: - static char ID; // Pass identification - PrintDbgInfo() : FunctionPass(ID), Out(errs()) { - initializePrintDbgInfoPass(*PassRegistry::getPassRegistry()); - } - - virtual bool runOnFunction(Function &F); - virtual void getAnalysisUsage(AnalysisUsage &AU) const { - AU.setPreservesAll(); - } - }; - char PrintDbgInfo::ID = 0; -} - -INITIALIZE_PASS(PrintDbgInfo, "print-dbginfo", - "Print debug info in human readable form", false, false) - -FunctionPass *llvm::createDbgInfoPrinterPass() { return new PrintDbgInfo(); } - -/// Find the debug info descriptor corresponding to this global variable. -static Value *findDbgGlobalDeclare(GlobalVariable *V) { - const Module *M = V->getParent(); - NamedMDNode *NMD = M->getNamedMetadata("llvm.dbg.gv"); - if (!NMD) - return 0; - - for (unsigned i = 0, e = NMD->getNumOperands(); i != e; ++i) { - DIDescriptor DIG(cast<MDNode>(NMD->getOperand(i))); - if (!DIG.isGlobalVariable()) - continue; - if (DIGlobalVariable(DIG).getGlobal() == V) - return DIG; - } - return 0; -} - -/// Find the debug info descriptor corresponding to this function. -static Value *findDbgSubprogramDeclare(Function *V) { - const Module *M = V->getParent(); - NamedMDNode *NMD = M->getNamedMetadata("llvm.dbg.sp"); - if (!NMD) - return 0; - - for (unsigned i = 0, e = NMD->getNumOperands(); i != e; ++i) { - DIDescriptor DIG(cast<MDNode>(NMD->getOperand(i))); - if (!DIG.isSubprogram()) - continue; - if (DISubprogram(DIG).getFunction() == V) - return DIG; - } - return 0; -} - -/// Finds the llvm.dbg.declare intrinsic corresponding to this value if any. -/// It looks through pointer casts too. -static const DbgDeclareInst *findDbgDeclare(const Value *V) { - V = V->stripPointerCasts(); - - if (!isa<Instruction>(V) && !isa<Argument>(V)) - return 0; - - const Function *F = NULL; - if (const Instruction *I = dyn_cast<Instruction>(V)) - F = I->getParent()->getParent(); - else if (const Argument *A = dyn_cast<Argument>(V)) - F = A->getParent(); - - for (Function::const_iterator FI = F->begin(), FE = F->end(); FI != FE; ++FI) - for (BasicBlock::const_iterator BI = (*FI).begin(), BE = (*FI).end(); - BI != BE; ++BI) - if (const DbgDeclareInst *DDI = dyn_cast<DbgDeclareInst>(BI)) - if (DDI->getAddress() == V) - return DDI; - - return 0; -} - -static bool getLocationInfo(const Value *V, std::string &DisplayName, - std::string &Type, unsigned &LineNo, - std::string &File, std::string &Dir) { - DICompileUnit Unit; - DIType TypeD; - - if (GlobalVariable *GV = dyn_cast<GlobalVariable>(const_cast<Value*>(V))) { - Value *DIGV = findDbgGlobalDeclare(GV); - if (!DIGV) return false; - DIGlobalVariable Var(cast<MDNode>(DIGV)); - - StringRef D = Var.getDisplayName(); - if (!D.empty()) - DisplayName = D; - LineNo = Var.getLineNumber(); - Unit = Var.getCompileUnit(); - TypeD = Var.getType(); - } else if (Function *F = dyn_cast<Function>(const_cast<Value*>(V))){ - Value *DIF = findDbgSubprogramDeclare(F); - if (!DIF) return false; - DISubprogram Var(cast<MDNode>(DIF)); - - StringRef D = Var.getDisplayName(); - if (!D.empty()) - DisplayName = D; - LineNo = Var.getLineNumber(); - Unit = Var.getCompileUnit(); - TypeD = Var.getType(); - } else { - const DbgDeclareInst *DDI = findDbgDeclare(V); - if (!DDI) return false; - DIVariable Var(cast<MDNode>(DDI->getVariable())); - - StringRef D = Var.getName(); - if (!D.empty()) - DisplayName = D; - LineNo = Var.getLineNumber(); - Unit = Var.getCompileUnit(); - TypeD = Var.getType(); - } - - StringRef T = TypeD.getName(); - if (!T.empty()) - Type = T; - StringRef F = Unit.getFilename(); - if (!F.empty()) - File = F; - StringRef D = Unit.getDirectory(); - if (!D.empty()) - Dir = D; - return true; -} - -void PrintDbgInfo::printVariableDeclaration(const Value *V) { - std::string DisplayName, File, Directory, Type; - unsigned LineNo = 0; - - if (!getLocationInfo(V, DisplayName, Type, LineNo, File, Directory)) - return; - - Out << "; "; - WriteAsOperand(Out, V, false, 0); - if (isa<Function>(V)) - Out << " is function " << DisplayName - << " of type " << Type << " declared at "; - else - Out << " is variable " << DisplayName - << " of type " << Type << " declared at "; - - if (PrintDirectory) - Out << Directory << "/"; - - Out << File << ":" << LineNo << "\n"; -} - -bool PrintDbgInfo::runOnFunction(Function &F) { - if (F.isDeclaration()) - return false; - - Out << "function " << F.getName() << "\n\n"; - - for (Function::iterator I = F.begin(), E = F.end(); I != E; ++I) { - BasicBlock *BB = I; - - if (I != F.begin() && (pred_begin(BB) == pred_end(BB))) - // Skip dead blocks. - continue; - - Out << BB->getName(); - Out << ":"; - - Out << "\n"; - - for (BasicBlock::const_iterator i = BB->begin(), e = BB->end(); - i != e; ++i) { - - printVariableDeclaration(i); - - if (const User *U = dyn_cast<User>(i)) { - for(unsigned i=0;i<U->getNumOperands();i++) - printVariableDeclaration(U->getOperand(i)); - } - } - } - return false; -} diff --git a/lib/Analysis/IPA/InlineCost.cpp b/lib/Analysis/IPA/InlineCost.cpp index 3292e00..35c45e6 100644 --- a/lib/Analysis/IPA/InlineCost.cpp +++ b/lib/Analysis/IPA/InlineCost.cpp @@ -474,10 +474,12 @@ bool CallAnalyzer::visitCastInst(CastInst &I) { bool CallAnalyzer::visitUnaryInstruction(UnaryInstruction &I) { Value *Operand = I.getOperand(0); - Constant *Ops[1] = { dyn_cast<Constant>(Operand) }; - if (Ops[0] || (Ops[0] = SimplifiedValues.lookup(Operand))) + Constant *COp = dyn_cast<Constant>(Operand); + if (!COp) + COp = SimplifiedValues.lookup(Operand); + if (COp) if (Constant *C = ConstantFoldInstOperands(I.getOpcode(), I.getType(), - Ops, TD)) { + COp, TD)) { SimplifiedValues[&I] = C; return true; } diff --git a/lib/Analysis/MemoryBuiltins.cpp b/lib/Analysis/MemoryBuiltins.cpp index 0fc0550..d490d54 100644 --- a/lib/Analysis/MemoryBuiltins.cpp +++ b/lib/Analysis/MemoryBuiltins.cpp @@ -8,7 +8,7 @@ //===----------------------------------------------------------------------===// // // This family of functions identifies calls to builtin functions that allocate -// or free memory. +// or free memory. // //===----------------------------------------------------------------------===// @@ -88,6 +88,10 @@ static Function *getCalledFunction(const Value *V, bool LookThroughBitCast) { static const AllocFnsTy *getAllocationData(const Value *V, AllocType AllocTy, const TargetLibraryInfo *TLI, bool LookThroughBitCast = false) { + // Skip intrinsics + if (isa<IntrinsicInst>(V)) + return 0; + Function *Callee = getCalledFunction(V, LookThroughBitCast); if (!Callee) return 0; @@ -194,12 +198,12 @@ static Value *computeArraySize(const CallInst *CI, const DataLayout *TD, const TargetLibraryInfo *TLI, bool LookThroughSExt = false) { if (!CI) - return NULL; + return 0; // The size of the malloc's result type must be known to determine array size. Type *T = getMallocAllocatedType(CI, TLI); if (!T || !T->isSized() || !TD) - return NULL; + return 0; unsigned ElementSize = TD->getTypeAllocSize(T); if (StructType *ST = dyn_cast<StructType>(T)) @@ -208,15 +212,15 @@ static Value *computeArraySize(const CallInst *CI, const DataLayout *TD, // If malloc call's arg can be determined to be a multiple of ElementSize, // return the multiple. Otherwise, return NULL. Value *MallocArg = CI->getArgOperand(0); - Value *Multiple = NULL; + Value *Multiple = 0; if (ComputeMultiple(MallocArg, ElementSize, Multiple, LookThroughSExt)) return Multiple; - return NULL; + return 0; } -/// isArrayMalloc - Returns the corresponding CallInst if the instruction +/// isArrayMalloc - Returns the corresponding CallInst if the instruction /// is a call to malloc whose array size can be determined and the array size /// is not constant 1. Otherwise, return NULL. const CallInst *llvm::isArrayMalloc(const Value *I, @@ -225,12 +229,12 @@ const CallInst *llvm::isArrayMalloc(const Value *I, const CallInst *CI = extractMallocCall(I, TLI); Value *ArraySize = computeArraySize(CI, TD, TLI); - if (ArraySize && - ArraySize != ConstantInt::get(CI->getArgOperand(0)->getType(), 1)) - return CI; + if (ConstantInt *ConstSize = dyn_cast_or_null<ConstantInt>(ArraySize)) + if (ConstSize->isOne()) + return CI; // CI is a non-array malloc or we can't figure out that it is an array malloc. - return NULL; + return 0; } /// getMallocType - Returns the PointerType resulting from the malloc call. @@ -241,8 +245,8 @@ const CallInst *llvm::isArrayMalloc(const Value *I, PointerType *llvm::getMallocType(const CallInst *CI, const TargetLibraryInfo *TLI) { assert(isMallocLikeFn(CI, TLI) && "getMallocType and not malloc call"); - - PointerType *MallocType = NULL; + + PointerType *MallocType = 0; unsigned NumOfBitCastUses = 0; // Determine if CallInst has a bitcast use. @@ -262,7 +266,7 @@ PointerType *llvm::getMallocType(const CallInst *CI, return cast<PointerType>(CI->getType()); // Type could not be determined. - return NULL; + return 0; } /// getMallocAllocatedType - Returns the Type allocated by malloc call. @@ -273,10 +277,10 @@ PointerType *llvm::getMallocType(const CallInst *CI, Type *llvm::getMallocAllocatedType(const CallInst *CI, const TargetLibraryInfo *TLI) { PointerType *PT = getMallocType(CI, TLI); - return PT ? PT->getElementType() : NULL; + return PT ? PT->getElementType() : 0; } -/// getMallocArraySize - Returns the array size of a malloc call. If the +/// getMallocArraySize - Returns the array size of a malloc call. If the /// argument passed to malloc is a multiple of the size of the malloced type, /// then return that multiple. For non-array mallocs, the multiple is /// constant 1. Otherwise, return NULL for mallocs whose array size cannot be @@ -300,7 +304,7 @@ const CallInst *llvm::extractCallocCall(const Value *I, /// isFreeCall - Returns non-null if the value is a call to the builtin free() const CallInst *llvm::isFreeCall(const Value *I, const TargetLibraryInfo *TLI) { const CallInst *CI = dyn_cast<CallInst>(I); - if (!CI) + if (!CI || isa<IntrinsicInst>(CI)) return 0; Function *Callee = CI->getCalledFunction(); if (Callee == 0 || !Callee->isDeclaration()) @@ -317,7 +321,7 @@ const CallInst *llvm::isFreeCall(const Value *I, const TargetLibraryInfo *TLI) { return 0; // Check free prototype. - // FIXME: workaround for PR5130, this will be obsolete when a nobuiltin + // FIXME: workaround for PR5130, this will be obsolete when a nobuiltin // attribute will exist. FunctionType *FTy = Callee->getFunctionType(); if (!FTy->getReturnType()->isVoidTy()) @@ -360,6 +364,26 @@ bool llvm::getObjectSize(const Value *Ptr, uint64_t &Size, const DataLayout *TD, return true; } +/// \brief Compute the size of the underlying object pointed by Ptr. Returns +/// true and the object size in Size if successful, and false otherwise. +/// If RoundToAlign is true, then Size is rounded up to the aligment of allocas, +/// byval arguments, and global variables. +bool llvm::getUnderlyingObjectSize(const Value *Ptr, uint64_t &Size, + const DataLayout *TD, + const TargetLibraryInfo *TLI, + bool RoundToAlign) { + if (!TD) + return false; + + ObjectSizeOffsetVisitor Visitor(TD, TLI, Ptr->getContext(), RoundToAlign); + SizeOffsetType Data = Visitor.compute(const_cast<Value*>(Ptr)); + if (!Visitor.knownSize(Data)) + return false; + + Size = Data.first.getZExtValue(); + return true; +} + STATISTIC(ObjectVisitorArgument, "Number of arguments with unsolved size and offset"); @@ -385,16 +409,23 @@ ObjectSizeOffsetVisitor::ObjectSizeOffsetVisitor(const DataLayout *TD, SizeOffsetType ObjectSizeOffsetVisitor::compute(Value *V) { V = V->stripPointerCasts(); - if (Instruction *I = dyn_cast<Instruction>(V)) { - // If we have already seen this instruction, bail out. Cycles can happen in - // unreachable code after constant propagation. - if (!SeenInsts.insert(I)) - return unknown(); + if (isa<Instruction>(V) || isa<GEPOperator>(V)) { + // Return cached value or insert unknown in cache if size of V was not + // computed yet in order to avoid recursions in PHis. + std::pair<CacheMapTy::iterator, bool> CacheVal = + CacheMap.insert(std::make_pair(V, unknown())); + if (!CacheVal.second) + return CacheVal.first->second; + + SizeOffsetType Result; if (GEPOperator *GEP = dyn_cast<GEPOperator>(V)) - return visitGEPOperator(*GEP); - return visit(*I); + Result = visitGEPOperator(*GEP); + else + Result = visit(cast<Instruction>(*V)); + return CacheMap[V] = Result; } + if (Argument *A = dyn_cast<Argument>(V)) return visitArgument(*A); if (ConstantPointerNull *P = dyn_cast<ConstantPointerNull>(V)) @@ -408,8 +439,6 @@ SizeOffsetType ObjectSizeOffsetVisitor::compute(Value *V) { if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V)) { if (CE->getOpcode() == Instruction::IntToPtr) return unknown(); // clueless - if (CE->getOpcode() == Instruction::GetElementPtr) - return visitGEPOperator(cast<GEPOperator>(*CE)); } DEBUG(dbgs() << "ObjectSizeOffsetVisitor::compute() unhandled value: " << *V @@ -543,9 +572,21 @@ SizeOffsetType ObjectSizeOffsetVisitor::visitLoadInst(LoadInst&) { return unknown(); } -SizeOffsetType ObjectSizeOffsetVisitor::visitPHINode(PHINode&) { - // too complex to analyze statically. - return unknown(); +SizeOffsetType ObjectSizeOffsetVisitor::visitPHINode(PHINode &PHI) { + if (PHI.getNumIncomingValues() == 0) + return unknown(); + + SizeOffsetType Ret = compute(PHI.getIncomingValue(0)); + if (!bothKnown(Ret)) + return unknown(); + + // Verify that all PHI incoming pointers have the same size and offset. + for (unsigned i = 1, e = PHI.getNumIncomingValues(); i != e; ++i) { + SizeOffsetType EdgeData = compute(PHI.getIncomingValue(i)); + if (!bothKnown(EdgeData) || EdgeData != Ret) + return unknown(); + } + return Ret; } SizeOffsetType ObjectSizeOffsetVisitor::visitSelectInst(SelectInst &I) { diff --git a/lib/Analysis/MemoryDependenceAnalysis.cpp b/lib/Analysis/MemoryDependenceAnalysis.cpp index 38bf5dd..1faa046 100644 --- a/lib/Analysis/MemoryDependenceAnalysis.cpp +++ b/lib/Analysis/MemoryDependenceAnalysis.cpp @@ -351,15 +351,23 @@ getLoadLoadClobberFullWidthSize(const Value *MemLocBase, int64_t MemLocOffs, /// getPointerDependencyFrom - Return the instruction on which a memory /// location depends. If isLoad is true, this routine ignores may-aliases with /// read-only operations. If isLoad is false, this routine ignores may-aliases -/// with reads from read-only locations. +/// with reads from read-only locations. If possible, pass the query +/// instruction as well; this function may take advantage of the metadata +/// annotated to the query instruction to refine the result. MemDepResult MemoryDependenceAnalysis:: getPointerDependencyFrom(const AliasAnalysis::Location &MemLoc, bool isLoad, - BasicBlock::iterator ScanIt, BasicBlock *BB) { + BasicBlock::iterator ScanIt, BasicBlock *BB, + Instruction *QueryInst) { const Value *MemLocBase = 0; int64_t MemLocOffset = 0; - unsigned Limit = BlockScanLimit; + bool isInvariantLoad = false; + if (isLoad && QueryInst) { + LoadInst *LI = dyn_cast<LoadInst>(QueryInst); + if (LI && LI->getMetadata(LLVMContext::MD_invariant_load) != 0) + isInvariantLoad = true; + } // Walk backwards through the basic block, looking for dependencies. while (ScanIt != BB->begin()) { @@ -474,6 +482,8 @@ getPointerDependencyFrom(const AliasAnalysis::Location &MemLoc, bool isLoad, continue; if (R == AliasAnalysis::MustAlias) return MemDepResult::getDef(Inst); + if (isInvariantLoad) + continue; return MemDepResult::getClobber(Inst); } @@ -571,7 +581,7 @@ MemDepResult MemoryDependenceAnalysis::getDependency(Instruction *QueryInst) { isLoad |= II->getIntrinsicID() == Intrinsic::lifetime_start; LocalCache = getPointerDependencyFrom(MemLoc, isLoad, ScanPos, - QueryParent); + QueryParent, QueryInst); } else if (isa<CallInst>(QueryInst) || isa<InvokeInst>(QueryInst)) { CallSite QueryCS(QueryInst); bool isReadOnly = AA->onlyReadsMemory(QueryCS); diff --git a/lib/Analysis/TargetTransformInfo.cpp b/lib/Analysis/TargetTransformInfo.cpp index 72421a0..976cd87 100644 --- a/lib/Analysis/TargetTransformInfo.cpp +++ b/lib/Analysis/TargetTransformInfo.cpp @@ -263,8 +263,8 @@ struct NoTTI : ImmutablePass, TargetTransformInfo { case Instruction::PtrToInt: // A ptrtoint cast is free so long as the result is large enough to store // the pointer, and a legal integer type. - if (DL && DL->isLegalInteger(OpTy->getScalarSizeInBits()) && - OpTy->getScalarSizeInBits() >= DL->getPointerSizeInBits()) + if (DL && DL->isLegalInteger(Ty->getScalarSizeInBits()) && + Ty->getScalarSizeInBits() >= DL->getPointerSizeInBits()) return TCC_Free; // Otherwise it's not a no-op. diff --git a/lib/Analysis/ValueTracking.cpp b/lib/Analysis/ValueTracking.cpp index 8e3994e..45dcc5e 100644 --- a/lib/Analysis/ValueTracking.cpp +++ b/lib/Analysis/ValueTracking.cpp @@ -953,6 +953,8 @@ bool llvm::isKnownNonZero(Value *V, const DataLayout *TD, unsigned Depth) { // Check for pointer simplifications. if (V->getType()->isPointerTy()) { + if (isKnownNonNull(V)) + return true; if (GEPOperator *GEP = dyn_cast<GEPOperator>(V)) if (isGEPKnownNonNull(GEP, TD, Depth)) return true; @@ -1396,10 +1398,10 @@ bool llvm::CannotBeNegativeZero(const Value *V, unsigned Depth) { return true; // (add x, 0.0) is guaranteed to return +0.0, not -0.0. - if (I->getOpcode() == Instruction::FAdd && - isa<ConstantFP>(I->getOperand(1)) && - cast<ConstantFP>(I->getOperand(1))->isNullValue()) - return true; + if (I->getOpcode() == Instruction::FAdd) + if (ConstantFP *CFP = dyn_cast<ConstantFP>(I->getOperand(1))) + if (CFP->isNullValue()) + return true; // sitofp and uitofp turn into +0.0 for zero. if (isa<SIToFPInst>(I) || isa<UIToFPInst>(I)) diff --git a/lib/CodeGen/Analysis.cpp b/lib/CodeGen/Analysis.cpp index c7abf7a..dd7282c 100644 --- a/lib/CodeGen/Analysis.cpp +++ b/lib/CodeGen/Analysis.cpp @@ -24,7 +24,6 @@ #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" #include "llvm/Target/TargetLowering.h" -#include "llvm/Target/TargetOptions.h" using namespace llvm; /// ComputeLinearIndex - Given an LLVM IR aggregate type and a sequence diff --git a/lib/CodeGen/AsmPrinter/DIE.cpp b/lib/CodeGen/AsmPrinter/DIE.cpp index 4ded281..bbb0432 100644 --- a/lib/CodeGen/AsmPrinter/DIE.cpp +++ b/lib/CodeGen/AsmPrinter/DIE.cpp @@ -112,6 +112,17 @@ DIE::~DIE() { delete Children[i]; } +/// Climb up the parent chain to get the compile unit DIE this DIE belongs to. +DIE *DIE::getCompileUnit() const{ + DIE *p = getParent(); + while (p) { + if (p->getTag() == dwarf::DW_TAG_compile_unit) + return p; + p = p->getParent(); + } + llvm_unreachable("We should not have orphaned DIEs."); +} + #ifndef NDEBUG void DIE::print(raw_ostream &O, unsigned IncIndent) { IndentCount += IncIndent; diff --git a/lib/CodeGen/AsmPrinter/DIE.h b/lib/CodeGen/AsmPrinter/DIE.h index 35d7959..d087c54 100644 --- a/lib/CodeGen/AsmPrinter/DIE.h +++ b/lib/CodeGen/AsmPrinter/DIE.h @@ -152,6 +152,9 @@ namespace llvm { const std::vector<DIE *> &getChildren() const { return Children; } const SmallVector<DIEValue*, 32> &getValues() const { return Values; } DIE *getParent() const { return Parent; } + /// Climb up the parent chain to get the compile unit DIE this DIE belongs + /// to. + DIE *getCompileUnit() const; void setTag(unsigned Tag) { Abbrev.setTag(Tag); } void setOffset(unsigned O) { Offset = O; } void setSize(unsigned S) { Size = S; } @@ -232,9 +235,10 @@ namespace llvm { /// static unsigned BestForm(bool IsSigned, uint64_t Int) { if (IsSigned) { - if ((char)Int == (signed)Int) return dwarf::DW_FORM_data1; - if ((short)Int == (signed)Int) return dwarf::DW_FORM_data2; - if ((int)Int == (signed)Int) return dwarf::DW_FORM_data4; + const int64_t SignedInt = Int; + if ((char)Int == SignedInt) return dwarf::DW_FORM_data1; + if ((short)Int == SignedInt) return dwarf::DW_FORM_data2; + if ((int)Int == SignedInt) return dwarf::DW_FORM_data4; } else { if ((unsigned char)Int == Int) return dwarf::DW_FORM_data1; if ((unsigned short)Int == Int) return dwarf::DW_FORM_data2; diff --git a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp index 93b00fb..1c743c2 100644 --- a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp +++ b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp @@ -35,7 +35,7 @@ using namespace llvm; CompileUnit::CompileUnit(unsigned UID, unsigned L, DIE *D, AsmPrinter *A, DwarfDebug *DW, DwarfUnits *DWU) : UniqueID(UID), Language(L), CUDie(D), Asm(A), DD(DW), DU(DWU), - IndexTyDie(0) { + IndexTyDie(0), DebugInfoOffset(0) { DIEIntegerOne = new (DIEValueAllocator) DIEInteger(1); } @@ -241,7 +241,8 @@ void CompileUnit::addSourceLine(DIE *Die, DIVariable V) { if (Line == 0) return; unsigned FileID = DD->getOrCreateSourceID(V.getContext().getFilename(), - V.getContext().getDirectory()); + V.getContext().getDirectory(), + getUniqueID()); assert(FileID && "Invalid file id"); addUInt(Die, dwarf::DW_AT_decl_file, 0, FileID); addUInt(Die, dwarf::DW_AT_decl_line, 0, Line); @@ -257,7 +258,8 @@ void CompileUnit::addSourceLine(DIE *Die, DIGlobalVariable G) { unsigned Line = G.getLineNumber(); if (Line == 0) return; - unsigned FileID = DD->getOrCreateSourceID(G.getFilename(), G.getDirectory()); + unsigned FileID = DD->getOrCreateSourceID(G.getFilename(), G.getDirectory(), + getUniqueID()); assert(FileID && "Invalid file id"); addUInt(Die, dwarf::DW_AT_decl_file, 0, FileID); addUInt(Die, dwarf::DW_AT_decl_line, 0, Line); @@ -276,7 +278,7 @@ void CompileUnit::addSourceLine(DIE *Die, DISubprogram SP) { return; unsigned FileID = DD->getOrCreateSourceID(SP.getFilename(), - SP.getDirectory()); + SP.getDirectory(), getUniqueID()); assert(FileID && "Invalid file id"); addUInt(Die, dwarf::DW_AT_decl_file, 0, FileID); addUInt(Die, dwarf::DW_AT_decl_line, 0, Line); @@ -293,7 +295,7 @@ void CompileUnit::addSourceLine(DIE *Die, DIType Ty) { if (Line == 0) return; unsigned FileID = DD->getOrCreateSourceID(Ty.getFilename(), - Ty.getDirectory()); + Ty.getDirectory(), getUniqueID()); assert(FileID && "Invalid file id"); addUInt(Die, dwarf::DW_AT_decl_file, 0, FileID); addUInt(Die, dwarf::DW_AT_decl_line, 0, Line); @@ -311,7 +313,7 @@ void CompileUnit::addSourceLine(DIE *Die, DIObjCProperty Ty) { return; DIFile File = Ty.getFile(); unsigned FileID = DD->getOrCreateSourceID(File.getFilename(), - File.getDirectory()); + File.getDirectory(), getUniqueID()); assert(FileID && "Invalid file id"); addUInt(Die, dwarf::DW_AT_decl_file, 0, FileID); addUInt(Die, dwarf::DW_AT_decl_line, 0, Line); @@ -329,7 +331,8 @@ void CompileUnit::addSourceLine(DIE *Die, DINameSpace NS) { return; StringRef FN = NS.getFilename(); - unsigned FileID = DD->getOrCreateSourceID(FN, NS.getDirectory()); + unsigned FileID = DD->getOrCreateSourceID(FN, NS.getDirectory(), + getUniqueID()); assert(FileID && "Invalid file id"); addUInt(Die, dwarf::DW_AT_decl_file, 0, FileID); addUInt(Die, dwarf::DW_AT_decl_line, 0, Line); @@ -1348,9 +1351,19 @@ void CompileUnit::createGlobalVariableDIE(const MDNode *N) { } // Add linkage name. StringRef LinkageName = GV.getLinkageName(); - if (!LinkageName.empty() && isGlobalVariable) - addString(VariableDIE, dwarf::DW_AT_MIPS_linkage_name, + if (!LinkageName.empty()) { + // From DWARF4: DIEs to which DW_AT_linkage_name may apply include: + // TAG_common_block, TAG_constant, TAG_entry_point, TAG_subprogram and + // TAG_variable. + addString(IsStaticMember && VariableSpecDIE ? + VariableSpecDIE : VariableDIE, dwarf::DW_AT_MIPS_linkage_name, getRealLinkageName(LinkageName)); + // In compatibility mode with older gdbs we put the linkage name on both + // the TAG_variable DIE and on the TAG_member DIE. + if (IsStaticMember && VariableSpecDIE && DD->useDarwinGDBCompat()) + addString(VariableDIE, dwarf::DW_AT_MIPS_linkage_name, + getRealLinkageName(LinkageName)); + } } else if (const ConstantInt *CI = dyn_cast_or_null<ConstantInt>(GV.getConstant())) { // AT_const_value was added when the static memeber was created. To avoid @@ -1659,33 +1672,6 @@ DIE *CompileUnit::createMemberDIE(DIDerivedType DT) { if (DT.isArtificial()) addFlag(MemberDie, dwarf::DW_AT_artificial); - // This is only for backward compatibility. - StringRef PropertyName = DT.getObjCPropertyName(); - if (!PropertyName.empty()) { - addString(MemberDie, dwarf::DW_AT_APPLE_property_name, PropertyName); - StringRef GetterName = DT.getObjCPropertyGetterName(); - if (!GetterName.empty()) - addString(MemberDie, dwarf::DW_AT_APPLE_property_getter, GetterName); - StringRef SetterName = DT.getObjCPropertySetterName(); - if (!SetterName.empty()) - addString(MemberDie, dwarf::DW_AT_APPLE_property_setter, SetterName); - unsigned PropertyAttributes = 0; - if (DT.isReadOnlyObjCProperty()) - PropertyAttributes |= dwarf::DW_APPLE_PROPERTY_readonly; - if (DT.isReadWriteObjCProperty()) - PropertyAttributes |= dwarf::DW_APPLE_PROPERTY_readwrite; - if (DT.isAssignObjCProperty()) - PropertyAttributes |= dwarf::DW_APPLE_PROPERTY_assign; - if (DT.isRetainObjCProperty()) - PropertyAttributes |= dwarf::DW_APPLE_PROPERTY_retain; - if (DT.isCopyObjCProperty()) - PropertyAttributes |= dwarf::DW_APPLE_PROPERTY_copy; - if (DT.isNonAtomicObjCProperty()) - PropertyAttributes |= dwarf::DW_APPLE_PROPERTY_nonatomic; - if (PropertyAttributes) - addUInt(MemberDie, dwarf::DW_AT_APPLE_property_attribute, 0, - PropertyAttributes); - } return MemberDie; } diff --git a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h index 77bf6a9..2b180c6 100644 --- a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h +++ b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h @@ -87,6 +87,9 @@ class CompileUnit { /// corresponds to the MDNode mapped with the subprogram DIE. DenseMap<DIE *, const MDNode *> ContainingTypeMap; + /// Offset of the CUDie from beginning of debug info section. + unsigned DebugInfoOffset; + /// getLowerBoundDefault - Return the default lower bound for an array. If the /// DWARF version doesn't handle the language, return -1. int64_t getDefaultLowerBound() const; @@ -103,6 +106,7 @@ public: unsigned getUniqueID() const { return UniqueID; } unsigned getLanguage() const { return Language; } DIE* getCUDie() const { return CUDie.get(); } + unsigned getDebugInfoOffset() const { return DebugInfoOffset; } const StringMap<DIE*> &getGlobalNames() const { return GlobalNames; } const StringMap<DIE*> &getGlobalTypes() const { return GlobalTypes; } @@ -120,6 +124,7 @@ public: return AccelTypes; } + void setDebugInfoOffset(unsigned DbgInfoOff) { DebugInfoOffset = DbgInfoOff; } /// hasContent - Return true if this compile unit has something to write out. /// bool hasContent() const { return !CUDie->getChildren().empty(); } diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp index 87659ef..b169602 100644 --- a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp +++ b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp @@ -352,11 +352,16 @@ DIE *DwarfDebug::updateSubprogramScopeDIE(CompileUnit *SPCU, // If we're updating an abstract DIE, then we will be adding the children and // object pointer later on. But what we don't want to do is process the // concrete DIE twice. - if (DIE *AbsSPDIE = AbstractSPDies.lookup(SPNode)) { + DIE *AbsSPDIE = AbstractSPDies.lookup(SPNode); + if (AbsSPDIE) { + bool InSameCU = (AbsSPDIE->getCompileUnit() == SPCU->getCUDie()); // Pick up abstract subprogram DIE. SPDie = new DIE(dwarf::DW_TAG_subprogram); + // If AbsSPDIE belongs to a different CU, use DW_FORM_ref_addr instead of + // DW_FORM_ref4. SPCU->addDIEEntry(SPDie, dwarf::DW_AT_abstract_origin, - dwarf::DW_FORM_ref4, AbsSPDIE); + InSameCU ? dwarf::DW_FORM_ref4 : dwarf::DW_FORM_ref_addr, + AbsSPDIE); SPCU->addDie(SPDie); } else { DISubprogram SPDecl = SP.getFunctionDeclaration(); @@ -528,7 +533,8 @@ DIE *DwarfDebug::constructInlinedScopeDIE(CompileUnit *TheCU, DILocation DL(Scope->getInlinedAt()); TheCU->addUInt(ScopeDIE, dwarf::DW_AT_call_file, 0, - getOrCreateSourceID(DL.getFilename(), DL.getDirectory())); + getOrCreateSourceID(DL.getFilename(), DL.getDirectory(), + TheCU->getUniqueID())); TheCU->addUInt(ScopeDIE, dwarf::DW_AT_call_line, 0, DL.getLineNumber()); // Add name to the name table, we do this here because we're guaranteed @@ -617,19 +623,28 @@ DIE *DwarfDebug::constructScopeDIE(CompileUnit *TheCU, LexicalScope *Scope) { // SourceIds map. This can update DirectoryNames and SourceFileNames maps // as well. unsigned DwarfDebug::getOrCreateSourceID(StringRef FileName, - StringRef DirName) { + StringRef DirName, unsigned CUID) { + // If we use .loc in assembly, we can't separate .file entries according to + // compile units. Thus all files will belong to the default compile unit. + if (Asm->TM.hasMCUseLoc() && + Asm->OutStreamer.getKind() == MCStreamer::SK_AsmStreamer) + CUID = 0; + // If FE did not provide a file name, then assume stdin. if (FileName.empty()) - return getOrCreateSourceID("<stdin>", StringRef()); + return getOrCreateSourceID("<stdin>", StringRef(), CUID); // TODO: this might not belong here. See if we can factor this better. if (DirName == CompilationDir) DirName = ""; - unsigned SrcId = SourceIdMap.size()+1; + // FileIDCUMap stores the current ID for the given compile unit. + unsigned SrcId = FileIDCUMap[CUID] + 1; - // We look up the file/dir pair by concatenating them with a zero byte. + // We look up the CUID/file/dir by concatenating them with a zero byte. SmallString<128> NamePair; + NamePair += CUID; + NamePair += '\0'; NamePair += DirName; NamePair += '\0'; // Zero bytes are not allowed in paths. NamePair += FileName; @@ -638,8 +653,9 @@ unsigned DwarfDebug::getOrCreateSourceID(StringRef FileName, if (Ent.getValue() != SrcId) return Ent.getValue(); + FileIDCUMap[CUID] = SrcId; // Print out a .file directive to specify files for .loc directives. - Asm->OutStreamer.EmitDwarfFileDirective(SrcId, DirName, FileName); + Asm->OutStreamer.EmitDwarfFileDirective(SrcId, DirName, FileName, CUID); return SrcId; } @@ -650,14 +666,17 @@ CompileUnit *DwarfDebug::constructCompileUnit(const MDNode *N) { DICompileUnit DIUnit(N); StringRef FN = DIUnit.getFilename(); CompilationDir = DIUnit.getDirectory(); - // Call this to emit a .file directive if it wasn't emitted for the source - // file this CU comes from yet. - getOrCreateSourceID(FN, CompilationDir); DIE *Die = new DIE(dwarf::DW_TAG_compile_unit); CompileUnit *NewCU = new CompileUnit(GlobalCUIndexCount++, DIUnit.getLanguage(), Die, Asm, this, &InfoHolder); + + FileIDCUMap[NewCU->getUniqueID()] = 0; + // Call this to emit a .file directive if it wasn't emitted for the source + // file this CU comes from yet. + getOrCreateSourceID(FN, CompilationDir, NewCU->getUniqueID()); + NewCU->addString(Die, dwarf::DW_AT_producer, DIUnit.getProducer()); NewCU->addUInt(Die, dwarf::DW_AT_language, dwarf::DW_FORM_data2, DIUnit.getLanguage()); @@ -742,82 +761,6 @@ void DwarfDebug::constructSubprogramDIE(CompileUnit *TheCU, TheCU->addGlobalName(SP.getName(), SubprogramDie); } -// Collect debug info from named mdnodes such as llvm.dbg.enum and llvm.dbg.ty. -void DwarfDebug::collectInfoFromNamedMDNodes(const Module *M) { - if (NamedMDNode *NMD = M->getNamedMetadata("llvm.dbg.sp")) - for (unsigned i = 0, e = NMD->getNumOperands(); i != e; ++i) { - const MDNode *N = NMD->getOperand(i); - if (CompileUnit *CU = CUMap.lookup(DISubprogram(N).getCompileUnit())) - constructSubprogramDIE(CU, N); - } - - if (NamedMDNode *NMD = M->getNamedMetadata("llvm.dbg.gv")) - for (unsigned i = 0, e = NMD->getNumOperands(); i != e; ++i) { - const MDNode *N = NMD->getOperand(i); - if (CompileUnit *CU = CUMap.lookup(DIGlobalVariable(N).getCompileUnit())) - CU->createGlobalVariableDIE(N); - } - - if (NamedMDNode *NMD = M->getNamedMetadata("llvm.dbg.enum")) - for (unsigned i = 0, e = NMD->getNumOperands(); i != e; ++i) { - DIType Ty(NMD->getOperand(i)); - if (CompileUnit *CU = CUMap.lookup(Ty.getCompileUnit())) - CU->getOrCreateTypeDIE(Ty); - } - - if (NamedMDNode *NMD = M->getNamedMetadata("llvm.dbg.ty")) - for (unsigned i = 0, e = NMD->getNumOperands(); i != e; ++i) { - DIType Ty(NMD->getOperand(i)); - if (CompileUnit *CU = CUMap.lookup(Ty.getCompileUnit())) - CU->getOrCreateTypeDIE(Ty); - } -} - -// Collect debug info using DebugInfoFinder. -// FIXME - Remove this when dragonegg switches to DIBuilder. -bool DwarfDebug::collectLegacyDebugInfo(const Module *M) { - DebugInfoFinder DbgFinder; - DbgFinder.processModule(*M); - - bool HasDebugInfo = false; - // Scan all the compile-units to see if there are any marked as the main - // unit. If not, we do not generate debug info. - for (DebugInfoFinder::iterator I = DbgFinder.compile_unit_begin(), - E = DbgFinder.compile_unit_end(); I != E; ++I) { - if (DICompileUnit(*I).isMain()) { - HasDebugInfo = true; - break; - } - } - if (!HasDebugInfo) return false; - - // Emit initial sections so we can refer to them later. - emitSectionLabels(); - - // Create all the compile unit DIEs. - for (DebugInfoFinder::iterator I = DbgFinder.compile_unit_begin(), - E = DbgFinder.compile_unit_end(); I != E; ++I) - constructCompileUnit(*I); - - // Create DIEs for each global variable. - for (DebugInfoFinder::iterator I = DbgFinder.global_variable_begin(), - E = DbgFinder.global_variable_end(); I != E; ++I) { - const MDNode *N = *I; - if (CompileUnit *CU = CUMap.lookup(DIGlobalVariable(N).getCompileUnit())) - CU->createGlobalVariableDIE(N); - } - - // Create DIEs for each subprogram. - for (DebugInfoFinder::iterator I = DbgFinder.subprogram_begin(), - E = DbgFinder.subprogram_end(); I != E; ++I) { - const MDNode *N = *I; - if (CompileUnit *CU = CUMap.lookup(DISubprogram(N).getCompileUnit())) - constructSubprogramDIE(CU, N); - } - - return HasDebugInfo; -} - // Emit all Dwarf sections that should come prior to the content. Create // global DIEs and emit initial debug info sections. This is invoked by // the target AsmPrinter. @@ -830,30 +773,28 @@ void DwarfDebug::beginModule() { // If module has named metadata anchors then use them, otherwise scan the // module using debug info finder to collect debug info. NamedMDNode *CU_Nodes = M->getNamedMetadata("llvm.dbg.cu"); - if (CU_Nodes) { - // Emit initial sections so we can reference labels later. - emitSectionLabels(); - - for (unsigned i = 0, e = CU_Nodes->getNumOperands(); i != e; ++i) { - DICompileUnit CUNode(CU_Nodes->getOperand(i)); - CompileUnit *CU = constructCompileUnit(CUNode); - DIArray GVs = CUNode.getGlobalVariables(); - for (unsigned i = 0, e = GVs.getNumElements(); i != e; ++i) - CU->createGlobalVariableDIE(GVs.getElement(i)); - DIArray SPs = CUNode.getSubprograms(); - for (unsigned i = 0, e = SPs.getNumElements(); i != e; ++i) - constructSubprogramDIE(CU, SPs.getElement(i)); - DIArray EnumTypes = CUNode.getEnumTypes(); - for (unsigned i = 0, e = EnumTypes.getNumElements(); i != e; ++i) - CU->getOrCreateTypeDIE(EnumTypes.getElement(i)); - DIArray RetainedTypes = CUNode.getRetainedTypes(); - for (unsigned i = 0, e = RetainedTypes.getNumElements(); i != e; ++i) - CU->getOrCreateTypeDIE(RetainedTypes.getElement(i)); - } - } else if (!collectLegacyDebugInfo(M)) + if (!CU_Nodes) return; - collectInfoFromNamedMDNodes(M); + // Emit initial sections so we can reference labels later. + emitSectionLabels(); + + for (unsigned i = 0, e = CU_Nodes->getNumOperands(); i != e; ++i) { + DICompileUnit CUNode(CU_Nodes->getOperand(i)); + CompileUnit *CU = constructCompileUnit(CUNode); + DIArray GVs = CUNode.getGlobalVariables(); + for (unsigned i = 0, e = GVs.getNumElements(); i != e; ++i) + CU->createGlobalVariableDIE(GVs.getElement(i)); + DIArray SPs = CUNode.getSubprograms(); + for (unsigned i = 0, e = SPs.getNumElements(); i != e; ++i) + constructSubprogramDIE(CU, SPs.getElement(i)); + DIArray EnumTypes = CUNode.getEnumTypes(); + for (unsigned i = 0, e = EnumTypes.getNumElements(); i != e; ++i) + CU->getOrCreateTypeDIE(EnumTypes.getElement(i)); + DIArray RetainedTypes = CUNode.getRetainedTypes(); + for (unsigned i = 0, e = RetainedTypes.getNumElements(); i != e; ++i) + CU->getOrCreateTypeDIE(RetainedTypes.getElement(i)); + } // Tell MMI that we have debug info. MMI->setDebugInfoAvailability(true); @@ -1197,16 +1138,10 @@ DwarfDebug::collectVariableInfo(const MachineFunction *MF, if (DV.getTag() == dwarf::DW_TAG_arg_variable && DISubprogram(DV.getContext()).describes(MF->getFunction())) Scope = LScopes.getCurrentFunctionScope(); - else { - if (DV.getVersion() <= LLVMDebugVersion9) - Scope = LScopes.findLexicalScope(MInsn->getDebugLoc()); - else { - if (MDNode *IA = DV.getInlinedAt()) - Scope = LScopes.findInlinedScope(DebugLoc::getFromDILocation(IA)); - else - Scope = LScopes.findLexicalScope(cast<MDNode>(DV->getOperand(1))); - } - } + else if (MDNode *IA = DV.getInlinedAt()) + Scope = LScopes.findInlinedScope(DebugLoc::getFromDILocation(IA)); + else + Scope = LScopes.findLexicalScope(cast<MDNode>(DV->getOperand(1))); // If variable scope is not found then skip this variable. if (!Scope) continue; @@ -1707,7 +1642,8 @@ void DwarfDebug::recordSourceLine(unsigned Line, unsigned Col, const MDNode *S, } else llvm_unreachable("Unexpected scope info"); - Src = getOrCreateSourceID(Fn, Dir); + Src = getOrCreateSourceID(Fn, Dir, + Asm->OutStreamer.getContext().getDwarfCompileUnitID()); } Asm->OutStreamer.EmitDwarfLocDirective(Src, Line, Col, Flags, 0, 0, Fn); } @@ -1761,15 +1697,19 @@ DwarfUnits::computeSizeAndOffset(DIE *Die, unsigned Offset) { // Compute the size and offset of all the DIEs. void DwarfUnits::computeSizeAndOffsets() { + // Offset from the beginning of debug info section. + unsigned AccuOffset = 0; for (SmallVector<CompileUnit *, 1>::iterator I = CUs.begin(), E = CUs.end(); I != E; ++I) { + (*I)->setDebugInfoOffset(AccuOffset); unsigned Offset = sizeof(int32_t) + // Length of Compilation Unit Info sizeof(int16_t) + // DWARF version number sizeof(int32_t) + // Offset Into Abbrev. Section sizeof(int8_t); // Pointer Size (in bytes) - computeSizeAndOffset((*I)->getCUDie(), Offset); + unsigned EndOffset = computeSizeAndOffset((*I)->getCUDie(), Offset); + AccuOffset += EndOffset; } } @@ -1843,6 +1783,13 @@ void DwarfDebug::emitDIE(DIE *Die, std::vector<DIEAbbrev *> *Abbrevs) { DIEEntry *E = cast<DIEEntry>(Values[i]); DIE *Origin = E->getEntry(); unsigned Addr = Origin->getOffset(); + if (Form == dwarf::DW_FORM_ref_addr) { + // For DW_FORM_ref_addr, output the offset from beginning of debug info + // section. Origin->getOffset() returns the offset from start of the + // compile unit. + DwarfUnits &Holder = useSplitDwarf() ? SkeletonHolder : InfoHolder; + Addr += Holder.getCUOffset(Origin->getCompileUnit()); + } Asm->EmitInt32(Addr); break; } @@ -1940,6 +1887,19 @@ void DwarfUnits::emitUnits(DwarfDebug *DD, } } +/// For a given compile unit DIE, returns offset from beginning of debug info. +unsigned DwarfUnits::getCUOffset(DIE *Die) { + assert(Die->getTag() == dwarf::DW_TAG_compile_unit && + "Input DIE should be compile unit in getCUOffset."); + for (SmallVector<CompileUnit *, 1>::iterator I = CUs.begin(), + E = CUs.end(); I != E; ++I) { + CompileUnit *TheCU = *I; + if (TheCU->getCUDie() == Die) + return TheCU->getDebugInfoOffset(); + } + llvm_unreachable("The compile unit DIE should belong to CUs in DwarfUnits."); +} + // Emit the debug info section. void DwarfDebug::emitDebugInfo() { DwarfUnits &Holder = useSplitDwarf() ? SkeletonHolder : InfoHolder; diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.h b/lib/CodeGen/AsmPrinter/DwarfDebug.h index 7b56815..81e345e 100644 --- a/lib/CodeGen/AsmPrinter/DwarfDebug.h +++ b/lib/CodeGen/AsmPrinter/DwarfDebug.h @@ -274,6 +274,10 @@ public: /// \brief Returns the address pool. AddrPool *getAddrPool() { return &AddressPool; } + + /// \brief for a given compile unit DIE, returns offset from beginning of + /// debug info. + unsigned getCUOffset(DIE *Die); }; /// \brief Collects and handles dwarf debug information. @@ -305,7 +309,9 @@ class DwarfDebug { // A list of all the unique abbreviations in use. std::vector<DIEAbbrev *> Abbreviations; - // Source id map, i.e. pair of source filename and directory, + // Stores the current file ID for a given compile unit. + DenseMap <unsigned, unsigned> FileIDCUMap; + // Source id map, i.e. CUID, source filename and directory, // separated by a zero byte, mapped to a unique id. StringMap<unsigned, BumpPtrAllocator&> SourceIdMap; @@ -596,14 +602,6 @@ public: DwarfDebug(AsmPrinter *A, Module *M); ~DwarfDebug(); - /// \brief Collect debug info from named mdnodes such as llvm.dbg.enum - /// and llvm.dbg.ty - void collectInfoFromNamedMDNodes(const Module *M); - - /// \brief Collect debug info using DebugInfoFinder. - /// FIXME - Remove this when DragonEgg switches to DIBuilder. - bool collectLegacyDebugInfo(const Module *M); - /// \brief Emit all Dwarf sections that should come prior to the /// content. void beginModule(); @@ -626,7 +624,8 @@ public: /// \brief Look up the source id with the given directory and source file /// names. If none currently exists, create a new id and insert it in the /// SourceIds map. - unsigned getOrCreateSourceID(StringRef DirName, StringRef FullName); + unsigned getOrCreateSourceID(StringRef DirName, StringRef FullName, + unsigned CUID); /// \brief Recursively Emits a debug information entry. void emitDIE(DIE *Die, std::vector<DIEAbbrev *> *Abbrevs); diff --git a/lib/CodeGen/BasicTargetTransformInfo.cpp b/lib/CodeGen/BasicTargetTransformInfo.cpp index e8b5b4f..4cd1b80 100644 --- a/lib/CodeGen/BasicTargetTransformInfo.cpp +++ b/lib/CodeGen/BasicTargetTransformInfo.cpp @@ -379,22 +379,77 @@ unsigned BasicTTI::getMemoryOpCost(unsigned Opcode, Type *Src, return LT.first; } -unsigned BasicTTI::getIntrinsicInstrCost(Intrinsic::ID, Type *RetTy, +unsigned BasicTTI::getIntrinsicInstrCost(Intrinsic::ID IID, Type *RetTy, ArrayRef<Type *> Tys) const { - // assume that we need to scalarize this intrinsic. - unsigned ScalarizationCost = 0; - unsigned ScalarCalls = 1; - if (RetTy->isVectorTy()) { - ScalarizationCost = getScalarizationOverhead(RetTy, true, false); - ScalarCalls = std::max(ScalarCalls, RetTy->getVectorNumElements()); - } - for (unsigned i = 0, ie = Tys.size(); i != ie; ++i) { - if (Tys[i]->isVectorTy()) { - ScalarizationCost += getScalarizationOverhead(Tys[i], false, true); + unsigned ISD = 0; + switch (IID) { + default: { + // Assume that we need to scalarize this intrinsic. + unsigned ScalarizationCost = 0; + unsigned ScalarCalls = 1; + if (RetTy->isVectorTy()) { + ScalarizationCost = getScalarizationOverhead(RetTy, true, false); ScalarCalls = std::max(ScalarCalls, RetTy->getVectorNumElements()); } + for (unsigned i = 0, ie = Tys.size(); i != ie; ++i) { + if (Tys[i]->isVectorTy()) { + ScalarizationCost += getScalarizationOverhead(Tys[i], false, true); + ScalarCalls = std::max(ScalarCalls, RetTy->getVectorNumElements()); + } + } + + return ScalarCalls + ScalarizationCost; + } + // Look for intrinsics that can be lowered directly or turned into a scalar + // intrinsic call. + case Intrinsic::sqrt: ISD = ISD::FSQRT; break; + case Intrinsic::sin: ISD = ISD::FSIN; break; + case Intrinsic::cos: ISD = ISD::FCOS; break; + case Intrinsic::exp: ISD = ISD::FEXP; break; + case Intrinsic::exp2: ISD = ISD::FEXP2; break; + case Intrinsic::log: ISD = ISD::FLOG; break; + case Intrinsic::log10: ISD = ISD::FLOG10; break; + case Intrinsic::log2: ISD = ISD::FLOG2; break; + case Intrinsic::fabs: ISD = ISD::FABS; break; + case Intrinsic::floor: ISD = ISD::FFLOOR; break; + case Intrinsic::ceil: ISD = ISD::FCEIL; break; + case Intrinsic::trunc: ISD = ISD::FTRUNC; break; + case Intrinsic::rint: ISD = ISD::FRINT; break; + case Intrinsic::pow: ISD = ISD::FPOW; break; + case Intrinsic::fma: ISD = ISD::FMA; break; + case Intrinsic::fmuladd: ISD = ISD::FMA; break; // FIXME: mul + add? + } + + std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(RetTy); + + if (TLI->isOperationLegalOrPromote(ISD, LT.second)) { + // The operation is legal. Assume it costs 1. + // If the type is split to multiple registers, assume that thre is some + // overhead to this. + // TODO: Once we have extract/insert subvector cost we need to use them. + if (LT.first > 1) + return LT.first * 2; + return LT.first * 1; } - return ScalarCalls + ScalarizationCost; + + if (!TLI->isOperationExpand(ISD, LT.second)) { + // If the operation is custom lowered then assume + // thare the code is twice as expensive. + return LT.first * 2; + } + + // Else, assume that we need to scalarize this intrinsic. For math builtins + // this will emit a costly libcall, adding call overhead and spills. Make it + // very expensive. + if (RetTy->isVectorTy()) { + unsigned Num = RetTy->getVectorNumElements(); + unsigned Cost = TopTTI->getIntrinsicInstrCost(IID, RetTy->getScalarType(), + Tys); + return 10 * Cost * Num; + } + + // This is going to be turned into a library call, make it expensive. + return 10; } unsigned BasicTTI::getNumberOfParts(Type *Tp) const { diff --git a/lib/CodeGen/EarlyIfConversion.cpp b/lib/CodeGen/EarlyIfConversion.cpp index fac207e..5447df0 100644 --- a/lib/CodeGen/EarlyIfConversion.cpp +++ b/lib/CodeGen/EarlyIfConversion.cpp @@ -459,7 +459,6 @@ void SSAIfConv::replacePHIInstrs() { for (unsigned i = 0, e = PHIs.size(); i != e; ++i) { PHIInfo &PI = PHIs[i]; DEBUG(dbgs() << "If-converting " << *PI.PHI); - assert(PI.PHI->getNumOperands() == 5 && "Unexpected PHI operands."); unsigned DstReg = PI.PHI->getOperand(0).getReg(); TII->insertSelect(*Head, FirstTerm, HeadDL, DstReg, Cond, PI.TReg, PI.FReg); DEBUG(dbgs() << " --> " << *llvm::prior(FirstTerm)); @@ -593,6 +592,7 @@ public: EarlyIfConverter() : MachineFunctionPass(ID) {} void getAnalysisUsage(AnalysisUsage &AU) const; bool runOnMachineFunction(MachineFunction &MF); + const char *getPassName() const { return "Early If-Conversion"; } private: bool tryConvertIf(MachineBasicBlock*); diff --git a/lib/CodeGen/LiveIntervalAnalysis.cpp b/lib/CodeGen/LiveIntervalAnalysis.cpp index 22b35d5..f1b8394 100644 --- a/lib/CodeGen/LiveIntervalAnalysis.cpp +++ b/lib/CodeGen/LiveIntervalAnalysis.cpp @@ -972,9 +972,9 @@ private: // Return the last use of reg between NewIdx and OldIdx. SlotIndex findLastUseBefore(unsigned Reg) { - SlotIndex LastUse = NewIdx; if (TargetRegisterInfo::isVirtualRegister(Reg)) { + SlotIndex LastUse = NewIdx; for (MachineRegisterInfo::use_nodbg_iterator UI = MRI.use_nodbg_begin(Reg), UE = MRI.use_nodbg_end(); @@ -984,30 +984,42 @@ private: if (InstSlot > LastUse && InstSlot < OldIdx) LastUse = InstSlot; } - } else { - MachineInstr* MI = LIS.getSlotIndexes()->getInstructionFromIndex(NewIdx); - MachineBasicBlock::iterator MII(MI); - ++MII; - MachineBasicBlock* MBB = MI->getParent(); - for (; MII != MBB->end(); ++MII){ - if (MII->isDebugValue()) - continue; - if (LIS.getInstructionIndex(MII) < OldIdx) - break; - for (MachineInstr::mop_iterator MOI = MII->operands_begin(), - MOE = MII->operands_end(); - MOI != MOE; ++MOI) { - const MachineOperand& mop = *MOI; - if (!mop.isReg() || mop.getReg() == 0 || - TargetRegisterInfo::isVirtualRegister(mop.getReg())) - continue; - - if (TRI.hasRegUnit(mop.getReg(), Reg)) - LastUse = LIS.getInstructionIndex(MII); - } - } + return LastUse; + } + + // This is a regunit interval, so scanning the use list could be very + // expensive. Scan upwards from OldIdx instead. + assert(NewIdx < OldIdx && "Expected upwards move"); + SlotIndexes *Indexes = LIS.getSlotIndexes(); + MachineBasicBlock *MBB = Indexes->getMBBFromIndex(NewIdx); + + // OldIdx may not correspond to an instruction any longer, so set MII to + // point to the next instruction after OldIdx, or MBB->end(). + MachineBasicBlock::iterator MII = MBB->end(); + if (MachineInstr *MI = Indexes->getInstructionFromIndex( + Indexes->getNextNonNullIndex(OldIdx))) + if (MI->getParent() == MBB) + MII = MI; + + MachineBasicBlock::iterator Begin = MBB->begin(); + while (MII != Begin) { + if ((--MII)->isDebugValue()) + continue; + SlotIndex Idx = Indexes->getInstructionIndex(MII); + + // Stop searching when NewIdx is reached. + if (!SlotIndex::isEarlierInstr(NewIdx, Idx)) + return NewIdx; + + // Check if MII uses Reg. + for (MIBundleOperands MO(MII); MO.isValid(); ++MO) + if (MO->isReg() && + TargetRegisterInfo::isPhysicalRegister(MO->getReg()) && + TRI.hasRegUnit(MO->getReg(), Reg)) + return Idx; } - return LastUse; + // Didn't reach NewIdx. It must be the first instruction in the block. + return NewIdx; } }; diff --git a/lib/CodeGen/MachineFunction.cpp b/lib/CodeGen/MachineFunction.cpp index 5e04f2d..04321f3 100644 --- a/lib/CodeGen/MachineFunction.cpp +++ b/lib/CodeGen/MachineFunction.cpp @@ -574,6 +574,54 @@ MachineFrameInfo::getPristineRegs(const MachineBasicBlock *MBB) const { return BV; } +unsigned MachineFrameInfo::estimateStackSize(const MachineFunction &MF) const { + const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering(); + const TargetRegisterInfo *RegInfo = MF.getTarget().getRegisterInfo(); + unsigned MaxAlign = getMaxAlignment(); + int Offset = 0; + + // This code is very, very similar to PEI::calculateFrameObjectOffsets(). + // It really should be refactored to share code. Until then, changes + // should keep in mind that there's tight coupling between the two. + + for (int i = getObjectIndexBegin(); i != 0; ++i) { + int FixedOff = -getObjectOffset(i); + if (FixedOff > Offset) Offset = FixedOff; + } + for (unsigned i = 0, e = getObjectIndexEnd(); i != e; ++i) { + if (isDeadObjectIndex(i)) + continue; + Offset += getObjectSize(i); + unsigned Align = getObjectAlignment(i); + // Adjust to alignment boundary + Offset = (Offset+Align-1)/Align*Align; + + MaxAlign = std::max(Align, MaxAlign); + } + + if (adjustsStack() && TFI->hasReservedCallFrame(MF)) + Offset += getMaxCallFrameSize(); + + // Round up the size to a multiple of the alignment. If the function has + // any calls or alloca's, align to the target's StackAlignment value to + // ensure that the callee's frame or the alloca data is suitably aligned; + // otherwise, for leaf functions, align to the TransientStackAlignment + // value. + unsigned StackAlign; + if (adjustsStack() || hasVarSizedObjects() || + (RegInfo->needsStackRealignment(MF) && getObjectIndexEnd() != 0)) + StackAlign = TFI->getStackAlignment(); + else + StackAlign = TFI->getTransientStackAlignment(); + + // If the frame pointer is eliminated, all frame offsets will be relative to + // SP not FP. Align to MaxAlign so this works. + StackAlign = std::max(StackAlign, MaxAlign); + unsigned AlignMask = StackAlign - 1; + Offset = (Offset + AlignMask) & ~uint64_t(AlignMask); + + return (unsigned)Offset; +} void MachineFrameInfo::print(const MachineFunction &MF, raw_ostream &OS) const{ if (Objects.empty()) return; diff --git a/lib/CodeGen/MachineRegisterInfo.cpp b/lib/CodeGen/MachineRegisterInfo.cpp index a777f52..1af00e8 100644 --- a/lib/CodeGen/MachineRegisterInfo.cpp +++ b/lib/CodeGen/MachineRegisterInfo.cpp @@ -37,6 +37,7 @@ MachineRegisterInfo::~MachineRegisterInfo() { /// void MachineRegisterInfo::setRegClass(unsigned Reg, const TargetRegisterClass *RC) { + assert(RC && RC->isAllocatable() && "Invalid RC for virtual register"); VRegInfo[Reg].first = RC; } diff --git a/lib/CodeGen/MachineScheduler.cpp b/lib/CodeGen/MachineScheduler.cpp index a93d070..c872355 100644 --- a/lib/CodeGen/MachineScheduler.cpp +++ b/lib/CodeGen/MachineScheduler.cpp @@ -19,6 +19,8 @@ #include "llvm/ADT/PriorityQueue.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/CodeGen/LiveIntervalAnalysis.h" +#include "llvm/CodeGen/MachineDominators.h" +#include "llvm/CodeGen/MachineLoopInfo.h" #include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/RegisterClassInfo.h" #include "llvm/CodeGen/ScheduleDFS.h" @@ -57,6 +59,9 @@ static cl::opt<bool> EnableLoadCluster("misched-cluster", cl::Hidden, static cl::opt<bool> EnableMacroFusion("misched-fusion", cl::Hidden, cl::desc("Enable scheduling for macro fusion."), cl::init(true)); +static cl::opt<bool> VerifyScheduling("verify-misched", cl::Hidden, + cl::desc("Verify machine instrs before and after machine scheduling")); + // DAG subtrees must have at least this many nodes. static const unsigned MinSubtreeSize = 8; @@ -197,6 +202,10 @@ bool MachineScheduler::runOnMachineFunction(MachineFunction &mf) { LIS = &getAnalysis<LiveIntervals>(); const TargetInstrInfo *TII = MF->getTarget().getInstrInfo(); + if (VerifyScheduling) { + DEBUG(LIS->print(dbgs())); + MF->verify(this, "Before machine scheduling."); + } RegClassInfo->runOnMachineFunction(*MF); // Select the scheduler, or set the default. @@ -285,6 +294,8 @@ bool MachineScheduler::runOnMachineFunction(MachineFunction &mf) { } Scheduler->finalizeSchedule(); DEBUG(LIS->print(dbgs())); + if (VerifyScheduling) + MF->verify(this, "After machine scheduling."); return true; } diff --git a/lib/CodeGen/MachineTraceMetrics.cpp b/lib/CodeGen/MachineTraceMetrics.cpp index f77a7b1..5bf0176 100644 --- a/lib/CodeGen/MachineTraceMetrics.cpp +++ b/lib/CodeGen/MachineTraceMetrics.cpp @@ -677,7 +677,7 @@ computeCrossBlockCriticalPath(const TraceBlockInfo &TBI) { const MachineInstr *DefMI = MTM.MRI->getVRegDef(LIR.Reg); // Ignore dependencies outside the current trace. const TraceBlockInfo &DefTBI = BlockInfo[DefMI->getParent()->getNumber()]; - if (!DefTBI.isEarlierInSameTrace(TBI)) + if (!DefTBI.isUsefulDominator(TBI)) continue; unsigned Len = LIR.Height + Cycles[DefMI].Depth; MaxLen = std::max(MaxLen, Len); @@ -740,7 +740,7 @@ computeInstrDepths(const MachineBasicBlock *MBB) { const TraceBlockInfo&DepTBI = BlockInfo[Dep.DefMI->getParent()->getNumber()]; // Ignore dependencies from outside the current trace. - if (!DepTBI.isEarlierInSameTrace(TBI)) + if (!DepTBI.isUsefulDominator(TBI)) continue; assert(DepTBI.HasValidInstrDepths && "Inconsistent dependency"); unsigned DepCycle = Cycles.lookup(Dep.DefMI).Depth; diff --git a/lib/CodeGen/Passes.cpp b/lib/CodeGen/Passes.cpp index b79f9f9..6e1cad3 100644 --- a/lib/CodeGen/Passes.cpp +++ b/lib/CodeGen/Passes.cpp @@ -25,7 +25,6 @@ #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Target/TargetLowering.h" -#include "llvm/Target/TargetOptions.h" #include "llvm/Target/TargetSubtargetInfo.h" #include "llvm/Transforms/Scalar.h" diff --git a/lib/CodeGen/PrologEpilogInserter.cpp b/lib/CodeGen/PrologEpilogInserter.cpp index b18d52d..855a8c5 100644 --- a/lib/CodeGen/PrologEpilogInserter.cpp +++ b/lib/CodeGen/PrologEpilogInserter.cpp @@ -38,7 +38,6 @@ #include "llvm/Target/TargetFrameLowering.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetMachine.h" -#include "llvm/Target/TargetOptions.h" #include "llvm/Target/TargetRegisterInfo.h" #include <climits> @@ -102,7 +101,7 @@ bool PEI::runOnMachineFunction(MachineFunction &Fn) { // Allow the target machine to make final modifications to the function // before the frame layout is finalized. - TFI->processFunctionBeforeFrameFinalized(Fn); + TFI->processFunctionBeforeFrameFinalized(Fn, RS); // Calculate actual frame offsets for all abstract stack objects... calculateFrameObjectOffsets(Fn); diff --git a/lib/CodeGen/RegAllocBasic.cpp b/lib/CodeGen/RegAllocBasic.cpp index 3053119..0b6dc68 100644 --- a/lib/CodeGen/RegAllocBasic.cpp +++ b/lib/CodeGen/RegAllocBasic.cpp @@ -34,7 +34,6 @@ #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetMachine.h" -#include "llvm/Target/TargetOptions.h" #include "llvm/Target/TargetRegisterInfo.h" #include <cstdlib> #include <queue> diff --git a/lib/CodeGen/RegAllocGreedy.cpp b/lib/CodeGen/RegAllocGreedy.cpp index 6344a73..6d84176 100644 --- a/lib/CodeGen/RegAllocGreedy.cpp +++ b/lib/CodeGen/RegAllocGreedy.cpp @@ -41,7 +41,6 @@ #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/Timer.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetOptions.h" #include <queue> using namespace llvm; diff --git a/lib/CodeGen/RegisterCoalescer.cpp b/lib/CodeGen/RegisterCoalescer.cpp index e2488ad..d85646d 100644 --- a/lib/CodeGen/RegisterCoalescer.cpp +++ b/lib/CodeGen/RegisterCoalescer.cpp @@ -37,7 +37,6 @@ #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetMachine.h" -#include "llvm/Target/TargetOptions.h" #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Target/TargetSubtargetInfo.h" #include <algorithm> diff --git a/lib/CodeGen/ScheduleDAG.cpp b/lib/CodeGen/ScheduleDAG.cpp index 45b4f68..07e5b47 100644 --- a/lib/CodeGen/ScheduleDAG.cpp +++ b/lib/CodeGen/ScheduleDAG.cpp @@ -329,8 +329,8 @@ void SUnit::dumpAll(const ScheduleDAG *G) const { dbgs() << " # weak succs left : " << WeakSuccsLeft << "\n"; dbgs() << " # rdefs left : " << NumRegDefsLeft << "\n"; dbgs() << " Latency : " << Latency << "\n"; - dbgs() << " Depth : " << Depth << "\n"; - dbgs() << " Height : " << Height << "\n"; + dbgs() << " Depth : " << getDepth() << "\n"; + dbgs() << " Height : " << getHeight() << "\n"; if (Preds.size() != 0) { dbgs() << " Predecessors:\n"; @@ -367,6 +367,8 @@ void SUnit::dumpAll(const ScheduleDAG *G) const { if (I->isArtificial()) dbgs() << " *"; dbgs() << ": Latency=" << I->getLatency(); + if (I->isAssignedRegDep()) + dbgs() << " Reg=" << PrintReg(I->getReg(), G->TRI); dbgs() << "\n"; } } diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index ec52d7e..61603e1 100644 --- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -4496,8 +4496,8 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) { NegOne, DAG.getConstant(0, VT), cast<CondCodeSDNode>(N0.getOperand(2))->get(), true); if (SCC.getNode()) return SCC; - if (!LegalOperations || - TLI.isOperationLegal(ISD::SETCC, TLI.getSetCCResultType(VT))) + if (!VT.isVector() && (!LegalOperations || + TLI.isOperationLegal(ISD::SETCC, TLI.getSetCCResultType(VT)))) return DAG.getNode(ISD::SELECT, N->getDebugLoc(), VT, DAG.getSetCC(N->getDebugLoc(), TLI.getSetCCResultType(VT), @@ -6709,7 +6709,8 @@ SDValue DAGCombiner::visitBRCOND(SDNode *N) { // fold a brcond with a setcc condition into a BR_CC node if BR_CC is legal // on the target. if (N1.getOpcode() == ISD::SETCC && - TLI.isOperationLegalOrCustom(ISD::BR_CC, MVT::Other)) { + TLI.isOperationLegalOrCustom(ISD::BR_CC, + N1.getOperand(0).getValueType())) { return DAG.getNode(ISD::BR_CC, N->getDebugLoc(), MVT::Other, Chain, N1.getOperand(2), N1.getOperand(0), N1.getOperand(1), N2); diff --git a/lib/CodeGen/SelectionDAG/FastISel.cpp b/lib/CodeGen/SelectionDAG/FastISel.cpp index ff9b2ba..10e2dc6 100644 --- a/lib/CodeGen/SelectionDAG/FastISel.cpp +++ b/lib/CodeGen/SelectionDAG/FastISel.cpp @@ -696,6 +696,13 @@ bool FastISel::SelectCall(const User *I) { UpdateValueMap(Call, ResultReg); return true; } + case Intrinsic::expect: { + unsigned ResultReg = getRegForValue(Call->getArgOperand(0)); + if (ResultReg == 0) + return false; + UpdateValueMap(Call, ResultReg); + return true; + } } // Usually, it does not make sense to initialize a value, @@ -822,7 +829,7 @@ FastISel::SelectInstruction(const Instruction *I) { } // First, try doing target-independent selection. - if (!SkipTargetIndependentFastISel() && SelectOperator(I, I->getOpcode())) { + if (SelectOperator(I, I->getOpcode())) { ++NumFastIselSuccessIndependent; DL = DebugLoc(); return true; diff --git a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp index f085e44..51cc254 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -3632,8 +3632,19 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) { // Otherwise, SETCC for the given comparison type must be completely // illegal; expand it into a SELECT_CC. EVT VT = Node->getValueType(0); + int TrueValue; + switch (TLI.getBooleanContents(VT.isVector())) { + case TargetLowering::ZeroOrOneBooleanContent: + case TargetLowering::UndefinedBooleanContent: + TrueValue = 1; + break; + case TargetLowering::ZeroOrNegativeOneBooleanContent: + TrueValue = -1; + break; + } Tmp1 = DAG.getNode(ISD::SELECT_CC, dl, VT, Tmp1, Tmp2, - DAG.getConstant(1, VT), DAG.getConstant(0, VT), Tmp3); + DAG.getConstant(TrueValue, VT), DAG.getConstant(0, VT), + Tmp3); Results.push_back(Tmp1); break; } diff --git a/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp index 1ee2192..6a05cf8 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp @@ -29,11 +29,13 @@ static RTLIB::Libcall GetFPLibCall(EVT VT, RTLIB::Libcall Call_F32, RTLIB::Libcall Call_F64, RTLIB::Libcall Call_F80, + RTLIB::Libcall Call_F128, RTLIB::Libcall Call_PPCF128) { return VT == MVT::f32 ? Call_F32 : VT == MVT::f64 ? Call_F64 : VT == MVT::f80 ? Call_F80 : + VT == MVT::f128 ? Call_F128 : VT == MVT::ppcf128 ? Call_PPCF128 : RTLIB::UNKNOWN_LIBCALL; } @@ -156,6 +158,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FADD(SDNode *N) { RTLIB::ADD_F32, RTLIB::ADD_F64, RTLIB::ADD_F80, + RTLIB::ADD_F128, RTLIB::ADD_PPCF128), NVT, Ops, 2, false, N->getDebugLoc()); } @@ -167,6 +170,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FCEIL(SDNode *N) { RTLIB::CEIL_F32, RTLIB::CEIL_F64, RTLIB::CEIL_F80, + RTLIB::CEIL_F128, RTLIB::CEIL_PPCF128), NVT, &Op, 1, false, N->getDebugLoc()); } @@ -220,6 +224,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FCOS(SDNode *N) { RTLIB::COS_F32, RTLIB::COS_F64, RTLIB::COS_F80, + RTLIB::COS_F128, RTLIB::COS_PPCF128), NVT, &Op, 1, false, N->getDebugLoc()); } @@ -232,6 +237,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FDIV(SDNode *N) { RTLIB::DIV_F32, RTLIB::DIV_F64, RTLIB::DIV_F80, + RTLIB::DIV_F128, RTLIB::DIV_PPCF128), NVT, Ops, 2, false, N->getDebugLoc()); } @@ -243,6 +249,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FEXP(SDNode *N) { RTLIB::EXP_F32, RTLIB::EXP_F64, RTLIB::EXP_F80, + RTLIB::EXP_F128, RTLIB::EXP_PPCF128), NVT, &Op, 1, false, N->getDebugLoc()); } @@ -254,6 +261,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FEXP2(SDNode *N) { RTLIB::EXP2_F32, RTLIB::EXP2_F64, RTLIB::EXP2_F80, + RTLIB::EXP2_F128, RTLIB::EXP2_PPCF128), NVT, &Op, 1, false, N->getDebugLoc()); } @@ -265,6 +273,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FFLOOR(SDNode *N) { RTLIB::FLOOR_F32, RTLIB::FLOOR_F64, RTLIB::FLOOR_F80, + RTLIB::FLOOR_F128, RTLIB::FLOOR_PPCF128), NVT, &Op, 1, false, N->getDebugLoc()); } @@ -276,6 +285,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FLOG(SDNode *N) { RTLIB::LOG_F32, RTLIB::LOG_F64, RTLIB::LOG_F80, + RTLIB::LOG_F128, RTLIB::LOG_PPCF128), NVT, &Op, 1, false, N->getDebugLoc()); } @@ -287,6 +297,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FLOG2(SDNode *N) { RTLIB::LOG2_F32, RTLIB::LOG2_F64, RTLIB::LOG2_F80, + RTLIB::LOG2_F128, RTLIB::LOG2_PPCF128), NVT, &Op, 1, false, N->getDebugLoc()); } @@ -298,6 +309,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FLOG10(SDNode *N) { RTLIB::LOG10_F32, RTLIB::LOG10_F64, RTLIB::LOG10_F80, + RTLIB::LOG10_F128, RTLIB::LOG10_PPCF128), NVT, &Op, 1, false, N->getDebugLoc()); } @@ -311,6 +323,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FMA(SDNode *N) { RTLIB::FMA_F32, RTLIB::FMA_F64, RTLIB::FMA_F80, + RTLIB::FMA_F128, RTLIB::FMA_PPCF128), NVT, Ops, 3, false, N->getDebugLoc()); } @@ -323,6 +336,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FMUL(SDNode *N) { RTLIB::MUL_F32, RTLIB::MUL_F64, RTLIB::MUL_F80, + RTLIB::MUL_F128, RTLIB::MUL_PPCF128), NVT, Ops, 2, false, N->getDebugLoc()); } @@ -334,6 +348,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FNEARBYINT(SDNode *N) { RTLIB::NEARBYINT_F32, RTLIB::NEARBYINT_F64, RTLIB::NEARBYINT_F80, + RTLIB::NEARBYINT_F128, RTLIB::NEARBYINT_PPCF128), NVT, &Op, 1, false, N->getDebugLoc()); } @@ -347,6 +362,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FNEG(SDNode *N) { RTLIB::SUB_F32, RTLIB::SUB_F64, RTLIB::SUB_F80, + RTLIB::SUB_F128, RTLIB::SUB_PPCF128), NVT, Ops, 2, false, N->getDebugLoc()); } @@ -384,6 +400,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FPOW(SDNode *N) { RTLIB::POW_F32, RTLIB::POW_F64, RTLIB::POW_F80, + RTLIB::POW_F128, RTLIB::POW_PPCF128), NVT, Ops, 2, false, N->getDebugLoc()); } @@ -397,6 +414,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FPOWI(SDNode *N) { RTLIB::POWI_F32, RTLIB::POWI_F64, RTLIB::POWI_F80, + RTLIB::POWI_F128, RTLIB::POWI_PPCF128), NVT, Ops, 2, false, N->getDebugLoc()); } @@ -409,6 +427,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FREM(SDNode *N) { RTLIB::REM_F32, RTLIB::REM_F64, RTLIB::REM_F80, + RTLIB::REM_F128, RTLIB::REM_PPCF128), NVT, Ops, 2, false, N->getDebugLoc()); } @@ -420,6 +439,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FRINT(SDNode *N) { RTLIB::RINT_F32, RTLIB::RINT_F64, RTLIB::RINT_F80, + RTLIB::RINT_F128, RTLIB::RINT_PPCF128), NVT, &Op, 1, false, N->getDebugLoc()); } @@ -431,6 +451,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FSIN(SDNode *N) { RTLIB::SIN_F32, RTLIB::SIN_F64, RTLIB::SIN_F80, + RTLIB::SIN_F128, RTLIB::SIN_PPCF128), NVT, &Op, 1, false, N->getDebugLoc()); } @@ -442,6 +463,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FSQRT(SDNode *N) { RTLIB::SQRT_F32, RTLIB::SQRT_F64, RTLIB::SQRT_F80, + RTLIB::SQRT_F128, RTLIB::SQRT_PPCF128), NVT, &Op, 1, false, N->getDebugLoc()); } @@ -454,6 +476,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FSUB(SDNode *N) { RTLIB::SUB_F32, RTLIB::SUB_F64, RTLIB::SUB_F80, + RTLIB::SUB_F128, RTLIB::SUB_PPCF128), NVT, Ops, 2, false, N->getDebugLoc()); } @@ -465,6 +488,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FTRUNC(SDNode *N) { RTLIB::TRUNC_F32, RTLIB::TRUNC_F64, RTLIB::TRUNC_F80, + RTLIB::TRUNC_F128, RTLIB::TRUNC_PPCF128), NVT, &Op, 1, false, N->getDebugLoc()); } @@ -839,7 +863,8 @@ void DAGTypeLegalizer::ExpandFloatRes_FADD(SDNode *N, SDValue &Lo, SDValue &Hi) { SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0), RTLIB::ADD_F32, RTLIB::ADD_F64, - RTLIB::ADD_F80, RTLIB::ADD_PPCF128), + RTLIB::ADD_F80, RTLIB::ADD_F128, + RTLIB::ADD_PPCF128), N, false); GetPairElements(Call, Lo, Hi); } @@ -848,7 +873,8 @@ void DAGTypeLegalizer::ExpandFloatRes_FCEIL(SDNode *N, SDValue &Lo, SDValue &Hi) { SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0), RTLIB::CEIL_F32, RTLIB::CEIL_F64, - RTLIB::CEIL_F80, RTLIB::CEIL_PPCF128), + RTLIB::CEIL_F80, RTLIB::CEIL_F128, + RTLIB::CEIL_PPCF128), N, false); GetPairElements(Call, Lo, Hi); } @@ -859,6 +885,7 @@ void DAGTypeLegalizer::ExpandFloatRes_FCOPYSIGN(SDNode *N, RTLIB::COPYSIGN_F32, RTLIB::COPYSIGN_F64, RTLIB::COPYSIGN_F80, + RTLIB::COPYSIGN_F128, RTLIB::COPYSIGN_PPCF128), N, false); GetPairElements(Call, Lo, Hi); @@ -868,7 +895,8 @@ void DAGTypeLegalizer::ExpandFloatRes_FCOS(SDNode *N, SDValue &Lo, SDValue &Hi) { SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0), RTLIB::COS_F32, RTLIB::COS_F64, - RTLIB::COS_F80, RTLIB::COS_PPCF128), + RTLIB::COS_F80, RTLIB::COS_F128, + RTLIB::COS_PPCF128), N, false); GetPairElements(Call, Lo, Hi); } @@ -880,6 +908,7 @@ void DAGTypeLegalizer::ExpandFloatRes_FDIV(SDNode *N, SDValue &Lo, RTLIB::DIV_F32, RTLIB::DIV_F64, RTLIB::DIV_F80, + RTLIB::DIV_F128, RTLIB::DIV_PPCF128), N->getValueType(0), Ops, 2, false, N->getDebugLoc()); @@ -890,7 +919,8 @@ void DAGTypeLegalizer::ExpandFloatRes_FEXP(SDNode *N, SDValue &Lo, SDValue &Hi) { SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0), RTLIB::EXP_F32, RTLIB::EXP_F64, - RTLIB::EXP_F80, RTLIB::EXP_PPCF128), + RTLIB::EXP_F80, RTLIB::EXP_F128, + RTLIB::EXP_PPCF128), N, false); GetPairElements(Call, Lo, Hi); } @@ -899,7 +929,8 @@ void DAGTypeLegalizer::ExpandFloatRes_FEXP2(SDNode *N, SDValue &Lo, SDValue &Hi) { SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0), RTLIB::EXP2_F32, RTLIB::EXP2_F64, - RTLIB::EXP2_F80, RTLIB::EXP2_PPCF128), + RTLIB::EXP2_F80, RTLIB::EXP2_F128, + RTLIB::EXP2_PPCF128), N, false); GetPairElements(Call, Lo, Hi); } @@ -907,8 +938,9 @@ void DAGTypeLegalizer::ExpandFloatRes_FEXP2(SDNode *N, void DAGTypeLegalizer::ExpandFloatRes_FFLOOR(SDNode *N, SDValue &Lo, SDValue &Hi) { SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0), - RTLIB::FLOOR_F32,RTLIB::FLOOR_F64, - RTLIB::FLOOR_F80,RTLIB::FLOOR_PPCF128), + RTLIB::FLOOR_F32, RTLIB::FLOOR_F64, + RTLIB::FLOOR_F80, RTLIB::FLOOR_F128, + RTLIB::FLOOR_PPCF128), N, false); GetPairElements(Call, Lo, Hi); } @@ -917,7 +949,8 @@ void DAGTypeLegalizer::ExpandFloatRes_FLOG(SDNode *N, SDValue &Lo, SDValue &Hi) { SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0), RTLIB::LOG_F32, RTLIB::LOG_F64, - RTLIB::LOG_F80, RTLIB::LOG_PPCF128), + RTLIB::LOG_F80, RTLIB::LOG_F128, + RTLIB::LOG_PPCF128), N, false); GetPairElements(Call, Lo, Hi); } @@ -926,7 +959,8 @@ void DAGTypeLegalizer::ExpandFloatRes_FLOG2(SDNode *N, SDValue &Lo, SDValue &Hi) { SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0), RTLIB::LOG2_F32, RTLIB::LOG2_F64, - RTLIB::LOG2_F80, RTLIB::LOG2_PPCF128), + RTLIB::LOG2_F80, RTLIB::LOG2_F128, + RTLIB::LOG2_PPCF128), N, false); GetPairElements(Call, Lo, Hi); } @@ -934,8 +968,9 @@ void DAGTypeLegalizer::ExpandFloatRes_FLOG2(SDNode *N, void DAGTypeLegalizer::ExpandFloatRes_FLOG10(SDNode *N, SDValue &Lo, SDValue &Hi) { SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0), - RTLIB::LOG10_F32,RTLIB::LOG10_F64, - RTLIB::LOG10_F80,RTLIB::LOG10_PPCF128), + RTLIB::LOG10_F32, RTLIB::LOG10_F64, + RTLIB::LOG10_F80, RTLIB::LOG10_F128, + RTLIB::LOG10_PPCF128), N, false); GetPairElements(Call, Lo, Hi); } @@ -947,6 +982,7 @@ void DAGTypeLegalizer::ExpandFloatRes_FMA(SDNode *N, SDValue &Lo, RTLIB::FMA_F32, RTLIB::FMA_F64, RTLIB::FMA_F80, + RTLIB::FMA_F128, RTLIB::FMA_PPCF128), N->getValueType(0), Ops, 3, false, N->getDebugLoc()); @@ -960,6 +996,7 @@ void DAGTypeLegalizer::ExpandFloatRes_FMUL(SDNode *N, SDValue &Lo, RTLIB::MUL_F32, RTLIB::MUL_F64, RTLIB::MUL_F80, + RTLIB::MUL_F128, RTLIB::MUL_PPCF128), N->getValueType(0), Ops, 2, false, N->getDebugLoc()); @@ -972,6 +1009,7 @@ void DAGTypeLegalizer::ExpandFloatRes_FNEARBYINT(SDNode *N, RTLIB::NEARBYINT_F32, RTLIB::NEARBYINT_F64, RTLIB::NEARBYINT_F80, + RTLIB::NEARBYINT_F128, RTLIB::NEARBYINT_PPCF128), N, false); GetPairElements(Call, Lo, Hi); @@ -997,7 +1035,8 @@ void DAGTypeLegalizer::ExpandFloatRes_FPOW(SDNode *N, SDValue &Lo, SDValue &Hi) { SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0), RTLIB::POW_F32, RTLIB::POW_F64, - RTLIB::POW_F80, RTLIB::POW_PPCF128), + RTLIB::POW_F80, RTLIB::POW_F128, + RTLIB::POW_PPCF128), N, false); GetPairElements(Call, Lo, Hi); } @@ -1006,7 +1045,8 @@ void DAGTypeLegalizer::ExpandFloatRes_FPOWI(SDNode *N, SDValue &Lo, SDValue &Hi) { SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0), RTLIB::POWI_F32, RTLIB::POWI_F64, - RTLIB::POWI_F80, RTLIB::POWI_PPCF128), + RTLIB::POWI_F80, RTLIB::POWI_F128, + RTLIB::POWI_PPCF128), N, false); GetPairElements(Call, Lo, Hi); } @@ -1015,7 +1055,8 @@ void DAGTypeLegalizer::ExpandFloatRes_FRINT(SDNode *N, SDValue &Lo, SDValue &Hi) { SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0), RTLIB::RINT_F32, RTLIB::RINT_F64, - RTLIB::RINT_F80, RTLIB::RINT_PPCF128), + RTLIB::RINT_F80, RTLIB::RINT_F128, + RTLIB::RINT_PPCF128), N, false); GetPairElements(Call, Lo, Hi); } @@ -1024,7 +1065,8 @@ void DAGTypeLegalizer::ExpandFloatRes_FSIN(SDNode *N, SDValue &Lo, SDValue &Hi) { SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0), RTLIB::SIN_F32, RTLIB::SIN_F64, - RTLIB::SIN_F80, RTLIB::SIN_PPCF128), + RTLIB::SIN_F80, RTLIB::SIN_F128, + RTLIB::SIN_PPCF128), N, false); GetPairElements(Call, Lo, Hi); } @@ -1033,7 +1075,8 @@ void DAGTypeLegalizer::ExpandFloatRes_FSQRT(SDNode *N, SDValue &Lo, SDValue &Hi) { SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0), RTLIB::SQRT_F32, RTLIB::SQRT_F64, - RTLIB::SQRT_F80, RTLIB::SQRT_PPCF128), + RTLIB::SQRT_F80, RTLIB::SQRT_F128, + RTLIB::SQRT_PPCF128), N, false); GetPairElements(Call, Lo, Hi); } @@ -1045,6 +1088,7 @@ void DAGTypeLegalizer::ExpandFloatRes_FSUB(SDNode *N, SDValue &Lo, RTLIB::SUB_F32, RTLIB::SUB_F64, RTLIB::SUB_F80, + RTLIB::SUB_F128, RTLIB::SUB_PPCF128), N->getValueType(0), Ops, 2, false, N->getDebugLoc()); @@ -1055,7 +1099,8 @@ void DAGTypeLegalizer::ExpandFloatRes_FTRUNC(SDNode *N, SDValue &Lo, SDValue &Hi) { SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0), RTLIB::TRUNC_F32, RTLIB::TRUNC_F64, - RTLIB::TRUNC_F80, RTLIB::TRUNC_PPCF128), + RTLIB::TRUNC_F80, RTLIB::TRUNC_F128, + RTLIB::TRUNC_PPCF128), N, false); GetPairElements(Call, Lo, Hi); } diff --git a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp index 182b7f3..beeb6b3 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp @@ -531,9 +531,10 @@ SDValue DAGTypeLegalizer::PromoteIntRes_SETCC(SDNode *N) { } SDValue DAGTypeLegalizer::PromoteIntRes_SHL(SDNode *N) { - return DAG.getNode(ISD::SHL, N->getDebugLoc(), - TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)), - GetPromotedInteger(N->getOperand(0)), N->getOperand(1)); + SDValue Res = GetPromotedInteger(N->getOperand(0)); + SDValue Amt = N->getOperand(1); + Amt = Amt.getValueType().isVector() ? ZExtPromotedInteger(Amt) : Amt; + return DAG.getNode(ISD::SHL, N->getDebugLoc(), Res.getValueType(), Res, Amt); } SDValue DAGTypeLegalizer::PromoteIntRes_SIGN_EXTEND_INREG(SDNode *N) { @@ -555,16 +556,17 @@ SDValue DAGTypeLegalizer::PromoteIntRes_SimpleIntBinOp(SDNode *N) { SDValue DAGTypeLegalizer::PromoteIntRes_SRA(SDNode *N) { // The input value must be properly sign extended. SDValue Res = SExtPromotedInteger(N->getOperand(0)); - return DAG.getNode(ISD::SRA, N->getDebugLoc(), - Res.getValueType(), Res, N->getOperand(1)); + SDValue Amt = N->getOperand(1); + Amt = Amt.getValueType().isVector() ? ZExtPromotedInteger(Amt) : Amt; + return DAG.getNode(ISD::SRA, N->getDebugLoc(), Res.getValueType(), Res, Amt); } SDValue DAGTypeLegalizer::PromoteIntRes_SRL(SDNode *N) { // The input value must be properly zero extended. - EVT VT = N->getValueType(0); - EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT); SDValue Res = ZExtPromotedInteger(N->getOperand(0)); - return DAG.getNode(ISD::SRL, N->getDebugLoc(), NVT, Res, N->getOperand(1)); + SDValue Amt = N->getOperand(1); + Amt = Amt.getValueType().isVector() ? ZExtPromotedInteger(Amt) : Amt; + return DAG.getNode(ISD::SRL, N->getDebugLoc(), Res.getValueType(), Res, Amt); } SDValue DAGTypeLegalizer::PromoteIntRes_TRUNCATE(SDNode *N) { @@ -2101,8 +2103,9 @@ void DAGTypeLegalizer::ExpandIntRes_Shift(SDNode *N, // have an illegal type. Fix that first by casting the operand, otherwise // the new SHL_PARTS operation would need further legalization. SDValue ShiftOp = N->getOperand(1); - MVT ShiftTy = TLI.getShiftAmountTy(VT); - assert(ShiftTy.getSizeInBits() >= Log2_32_Ceil(VT.getSizeInBits()) && + EVT ShiftTy = TLI.getShiftAmountTy(VT); + assert(ShiftTy.getScalarType().getSizeInBits() >= + Log2_32_Ceil(VT.getScalarType().getSizeInBits()) && "ShiftAmountTy is too small to cover the range of this type!"); if (ShiftOp.getValueType() != ShiftTy) ShiftOp = DAG.getZExtOrTrunc(ShiftOp, dl, ShiftTy); diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/lib/CodeGen/SelectionDAG/LegalizeTypes.h index 7de42ea..27b3cf2 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeTypes.h +++ b/lib/CodeGen/SelectionDAG/LegalizeTypes.h @@ -530,6 +530,7 @@ private: // Vector Operand Scalarization: <1 x ty> -> ty. bool ScalarizeVectorOperand(SDNode *N, unsigned OpNo); SDValue ScalarizeVecOp_BITCAST(SDNode *N); + SDValue ScalarizeVecOp_EXTEND(SDNode *N); SDValue ScalarizeVecOp_CONCAT_VECTORS(SDNode *N); SDValue ScalarizeVecOp_EXTRACT_VECTOR_ELT(SDNode *N); SDValue ScalarizeVecOp_STORE(StoreSDNode *N, unsigned OpNo); diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp index 09a50d9..5ec8535 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -365,6 +365,11 @@ bool DAGTypeLegalizer::ScalarizeVectorOperand(SDNode *N, unsigned OpNo) { case ISD::BITCAST: Res = ScalarizeVecOp_BITCAST(N); break; + case ISD::ANY_EXTEND: + case ISD::ZERO_EXTEND: + case ISD::SIGN_EXTEND: + Res = ScalarizeVecOp_EXTEND(N); + break; case ISD::CONCAT_VECTORS: Res = ScalarizeVecOp_CONCAT_VECTORS(N); break; @@ -400,6 +405,21 @@ SDValue DAGTypeLegalizer::ScalarizeVecOp_BITCAST(SDNode *N) { N->getValueType(0), Elt); } +/// ScalarizeVecOp_EXTEND - If the value to extend is a vector that needs +/// to be scalarized, it must be <1 x ty>. Extend the element instead. +SDValue DAGTypeLegalizer::ScalarizeVecOp_EXTEND(SDNode *N) { + assert(N->getValueType(0).getVectorNumElements() == 1 && + "Unexected vector type!"); + SDValue Elt = GetScalarizedVector(N->getOperand(0)); + SmallVector<SDValue, 1> Ops(1); + Ops[0] = DAG.getNode(N->getOpcode(), N->getDebugLoc(), + N->getValueType(0).getScalarType(), Elt); + // Revectorize the result so the types line up with what the uses of this + // expression expect. + return DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(), N->getValueType(0), + &Ops[0], 1); +} + /// ScalarizeVecOp_CONCAT_VECTORS - The vectors to concatenate have length one - /// use a BUILD_VECTOR instead. SDValue DAGTypeLegalizer::ScalarizeVecOp_CONCAT_VECTORS(SDNode *N) { diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp index addfccb..c009cfc 100644 --- a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp +++ b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp @@ -904,9 +904,6 @@ void ScheduleDAGRRList::BacktrackBottomUp(SUnit *SU, SUnit *BtSU) { SUnit *OldSU = Sequence.back(); while (true) { Sequence.pop_back(); - if (SU->isSucc(OldSU)) - // Don't try to remove SU from AvailableQueue. - SU->isAvailable = false; // FIXME: use ready cycle instead of height CurCycle = OldSU->getHeight(); UnscheduleNodeBottomUp(OldSU); @@ -1363,8 +1360,10 @@ SUnit *ScheduleDAGRRList::PickNodeToScheduleBottomUp() { SmallVector<unsigned, 4> LRegs; if (!DelayForLiveRegsBottomUp(CurSU, LRegs)) break; - DEBUG(dbgs() << " Interfering reg " << TRI->getName(LRegs[0]) - << " SU #" << CurSU->NodeNum << '\n'); + DEBUG(dbgs() << " Interfering reg " << + (LRegs[0] == TRI->getNumRegs() ? "CallResource" + : TRI->getName(LRegs[0])) + << " SU #" << CurSU->NodeNum << '\n'); std::pair<LRegsMapT::iterator, bool> LRegsPair = LRegsMap.insert(std::make_pair(CurSU, LRegs)); if (LRegsPair.second) { diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index db8ae6e..35707e8 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -1518,7 +1518,7 @@ SDValue SelectionDAG::getMDNode(const MDNode *MD) { /// the target's desired shift amount type. SDValue SelectionDAG::getShiftAmountOperand(EVT LHSTy, SDValue Op) { EVT OpTy = Op.getValueType(); - MVT ShTy = TLI.getShiftAmountTy(LHSTy); + EVT ShTy = TLI.getShiftAmountTy(LHSTy); if (OpTy == ShTy || OpTy.isVector()) return Op; ISD::NodeType Opcode = OpTy.bitsGT(ShTy) ? ISD::TRUNCATE : ISD::ZERO_EXTEND; @@ -2912,6 +2912,8 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT, SDValue N1, "Shift operators return type must be the same as their first arg"); assert(VT.isInteger() && N2.getValueType().isInteger() && "Shifts only work on integers"); + assert((!VT.isVector() || VT == N2.getValueType()) && + "Vector shift amounts must be in the same as their first arg"); // Verify that the shift amount VT is bit enough to hold valid shift // amounts. This catches things like trying to shift an i1024 value by an // i8, which is easy to fall into in generic code that uses @@ -4702,7 +4704,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT, } SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, - const std::vector<EVT> &ResultTys, + ArrayRef<EVT> ResultTys, const SDValue *Ops, unsigned NumOps) { return getNode(Opcode, DL, getVTList(&ResultTys[0], ResultTys.size()), Ops, NumOps); @@ -5354,7 +5356,7 @@ SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc dl, EVT VT1, MachineSDNode * SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc dl, - const std::vector<EVT> &ResultTys, + ArrayRef<EVT> ResultTys, const SDValue *Ops, unsigned NumOps) { SDVTList VTs = getVTList(&ResultTys[0], ResultTys.size()); return getMachineNode(Opcode, dl, VTs, Ops, NumOps); diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index b8ab2a9..33d100e 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -2654,7 +2654,7 @@ void SelectionDAGBuilder::visitShift(const User &I, unsigned Opcode) { SDValue Op1 = getValue(I.getOperand(0)); SDValue Op2 = getValue(I.getOperand(1)); - MVT ShiftTy = TLI.getShiftAmountTy(Op2.getValueType()); + EVT ShiftTy = TLI.getShiftAmountTy(Op2.getValueType()); // Coerce the shift amount to the right type if we can. if (!I.getType()->isVectorTy() && Op2.getValueType() != ShiftTy) { @@ -6161,6 +6161,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { Ctx.emitError(CS.getInstruction(), "inline asm not supported yet:" " don't know how to handle tied " "indirect register inputs"); + report_fatal_error("Cannot handle indirect register inputs!"); } RegsForValue MatchedRegs; @@ -6589,9 +6590,7 @@ static bool isOnlyUsedInEntryBlock(const Argument *A, bool FastISel) { return true; } -void SelectionDAGISel::LowerArguments(const BasicBlock *LLVMBB) { - // If this is the entry block, emit arguments. - const Function &F = *LLVMBB->getParent(); +void SelectionDAGISel::LowerArguments(const Function &F) { SelectionDAG &DAG = SDB->DAG; DebugLoc dl = SDB->getCurDebugLoc(); const DataLayout *TD = TLI.getDataLayout(); diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp index acae58c..c3b6276 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp @@ -362,6 +362,7 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) { TargetSubtargetInfo &ST = const_cast<TargetSubtargetInfo&>(TM.getSubtarget<TargetSubtargetInfo>()); ST.resetSubtargetFeatures(MF); + TM.resetTargetOptions(MF); DEBUG(dbgs() << "\n\n\n=== " << Fn.getName() << "\n"); @@ -1031,13 +1032,11 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) { FuncInfo->VisitedBBs.insert(LLVMBB); } - FuncInfo->MBB = FuncInfo->MBBMap[LLVMBB]; - FuncInfo->InsertPt = FuncInfo->MBB->getFirstNonPHI(); - BasicBlock::const_iterator const Begin = LLVMBB->getFirstNonPHI(); BasicBlock::const_iterator const End = LLVMBB->end(); BasicBlock::const_iterator BI = End; + FuncInfo->MBB = FuncInfo->MBBMap[LLVMBB]; FuncInfo->InsertPt = FuncInfo->MBB->getFirstNonPHI(); // Setup an EH landing-pad block. @@ -1053,15 +1052,12 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) { if (LLVMBB == &Fn.getEntryBlock()) { // Lower any arguments needed in this block if this is the entry block. if (!FastIS->LowerArguments()) { - + // Fast isel failed to lower these arguments if (EnableFastISelAbortArgs) - // The "fast" selector couldn't lower these arguments. For the - // purpose of debugging, just abort. llvm_unreachable("FastISel didn't lower all arguments"); - // Call target indepedent SDISel argument lowering code if the target - // specific routine is not successful. - LowerArguments(LLVMBB); + // Use SelectionDAG argument lowering + LowerArguments(Fn); CurDAG->setRoot(SDB->getControlRoot()); SDB->clear(); CodeGenAndEmitDAG(); @@ -1179,7 +1175,7 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) { } else { // Lower any arguments needed in this block if this is the entry block. if (LLVMBB == &Fn.getEntryBlock()) - LowerArguments(LLVMBB); + LowerArguments(Fn); } if (Begin != BI) @@ -1674,9 +1670,7 @@ SDNode *SelectionDAGISel::Select_INLINEASM(SDNode *N) { std::vector<SDValue> Ops(N->op_begin(), N->op_end()); SelectInlineAsmMemoryOperands(Ops); - std::vector<EVT> VTs; - VTs.push_back(MVT::Other); - VTs.push_back(MVT::Glue); + EVT VTs[] = { MVT::Other, MVT::Glue }; SDValue New = CurDAG->getNode(ISD::INLINEASM, N->getDebugLoc(), VTs, &Ops[0], Ops.size()); New->setNodeId(-1); @@ -2610,11 +2604,11 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable, SDValue Imm = RecordedNodes[RecNo].first; if (Imm->getOpcode() == ISD::Constant) { - int64_t Val = cast<ConstantSDNode>(Imm)->getZExtValue(); - Imm = CurDAG->getTargetConstant(Val, Imm.getValueType()); + const ConstantInt *Val=cast<ConstantSDNode>(Imm)->getConstantIntValue(); + Imm = CurDAG->getConstant(*Val, Imm.getValueType(), true); } else if (Imm->getOpcode() == ISD::ConstantFP) { const ConstantFP *Val=cast<ConstantFPSDNode>(Imm)->getConstantFPValue(); - Imm = CurDAG->getTargetConstantFP(*Val, Imm.getValueType()); + Imm = CurDAG->getConstantFP(*Val, Imm.getValueType(), true); } RecordedNodes.push_back(std::make_pair(Imm, RecordedNodes[RecNo].second)); diff --git a/lib/CodeGen/SjLjEHPrepare.cpp b/lib/CodeGen/SjLjEHPrepare.cpp index b58bb85..3903743 100644 --- a/lib/CodeGen/SjLjEHPrepare.cpp +++ b/lib/CodeGen/SjLjEHPrepare.cpp @@ -379,13 +379,22 @@ void SjLjEHPrepare::lowerAcrossUnwindEdges(Function &F, /// the function context and marking the call sites with the appropriate /// values. These values are used by the DWARF EH emitter. bool SjLjEHPrepare::setupEntryBlockAndCallSites(Function &F) { - SmallVector<ReturnInst*, 16> Returns; - SmallVector<InvokeInst*, 16> Invokes; + SmallVector<ReturnInst*, 16> Returns; + SmallVector<InvokeInst*, 16> Invokes; SmallSetVector<LandingPadInst*, 16> LPads; // Look through the terminators of the basic blocks to find invokes. for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) if (InvokeInst *II = dyn_cast<InvokeInst>(BB->getTerminator())) { + if (Function *Callee = II->getCalledFunction()) + if (Callee->isIntrinsic() && + Callee->getIntrinsicID() == Intrinsic::donothing) { + // Remove the NOP invoke. + BranchInst::Create(II->getNormalDest(), II); + II->eraseFromParent(); + continue; + } + Invokes.push_back(II); LPads.insert(II->getUnwindDest()->getLandingPadInst()); } else if (ReturnInst *RI = dyn_cast<ReturnInst>(BB->getTerminator())) { diff --git a/lib/CodeGen/StackProtector.cpp b/lib/CodeGen/StackProtector.cpp index f3be37c..fbef347 100644 --- a/lib/CodeGen/StackProtector.cpp +++ b/lib/CodeGen/StackProtector.cpp @@ -31,7 +31,6 @@ #include "llvm/Pass.h" #include "llvm/Support/CommandLine.h" #include "llvm/Target/TargetLowering.h" -#include "llvm/Target/TargetOptions.h" using namespace llvm; STATISTIC(NumFunProtected, "Number of functions protected"); diff --git a/lib/CodeGen/TargetLoweringBase.cpp b/lib/CodeGen/TargetLoweringBase.cpp index 2a02f6a..3c34676 100644 --- a/lib/CodeGen/TargetLoweringBase.cpp +++ b/lib/CodeGen/TargetLoweringBase.cpp @@ -744,10 +744,17 @@ TargetLoweringBase::~TargetLoweringBase() { delete &TLOF; } -MVT TargetLoweringBase::getShiftAmountTy(EVT LHSTy) const { +MVT TargetLoweringBase::getScalarShiftAmountTy(EVT LHSTy) const { return MVT::getIntegerVT(8*TD->getPointerSize(0)); } +EVT TargetLoweringBase::getShiftAmountTy(EVT LHSTy) const { + assert(LHSTy.isInteger() && "Shift amount is not an integer type!"); + if (LHSTy.isVector()) + return LHSTy; + return getScalarShiftAmountTy(LHSTy); +} + /// canOpTrap - Returns true if the operation can trap for the value type. /// VT must be a legal type. bool TargetLoweringBase::canOpTrap(unsigned Op, EVT VT) const { @@ -905,6 +912,15 @@ void TargetLoweringBase::computeRegisterProperties() { ValueTypeActions.setTypeAction(MVT::ppcf128, TypeExpandFloat); } + // Decide how to handle f128. If the target does not have native f128 support, + // expand it to i128 and we will be generating soft float library calls. + if (!isTypeLegal(MVT::f128)) { + NumRegistersForVT[MVT::f128] = NumRegistersForVT[MVT::i128]; + RegisterTypeForVT[MVT::f128] = RegisterTypeForVT[MVT::i128]; + TransformToType[MVT::f128] = MVT::i128; + ValueTypeActions.setTypeAction(MVT::f128, TypeSoftenFloat); + } + // Decide how to handle f64. If the target does not have native f64 support, // expand it to i64 and we will be generating soft float library calls. if (!isTypeLegal(MVT::f64)) { diff --git a/lib/CodeGen/TargetLoweringObjectFileImpl.cpp b/lib/CodeGen/TargetLoweringObjectFileImpl.cpp index 1170bf2..3bdca4c 100644 --- a/lib/CodeGen/TargetLoweringObjectFileImpl.cpp +++ b/lib/CodeGen/TargetLoweringObjectFileImpl.cpp @@ -36,7 +36,6 @@ #include "llvm/Support/raw_ostream.h" #include "llvm/Target/Mangler.h" #include "llvm/Target/TargetMachine.h" -#include "llvm/Target/TargetOptions.h" using namespace llvm; using namespace dwarf; @@ -743,8 +742,11 @@ static const char *getCOFFSectionPrefixForUniqueGlobal(SectionKind Kind) { return ".text$"; if (Kind.isBSS ()) return ".bss$"; - if (Kind.isThreadLocal()) - return ".tls$"; + if (Kind.isThreadLocal()) { + // 'LLVM' is just an arbitary string to ensure that the section name gets + // sorted in between '.tls$AAA' and '.tls$ZZZ' by the linker. + return ".tls$LLVM"; + } if (Kind.isWriteable()) return ".data$"; return ".rdata$"; diff --git a/lib/CodeGen/TargetSchedule.cpp b/lib/CodeGen/TargetSchedule.cpp index f31f67d..783bfa1 100644 --- a/lib/CodeGen/TargetSchedule.cpp +++ b/lib/CodeGen/TargetSchedule.cpp @@ -240,7 +240,10 @@ unsigned TargetSchedModel::computeOperandLatency( report_fatal_error(ss.str()); } #endif - return DefMI->isTransient() ? 0 : 1; + // FIXME: Automatically giving all implicit defs defaultDefLatency is + // undesirable. We should only do it for defs that are known to the MC + // desc like flags. Truly implicit defs should get 1 cycle latency. + return DefMI->isTransient() ? 0 : TII->defaultDefLatency(&SchedModel, DefMI); } unsigned TargetSchedModel::computeInstrLatency(const MachineInstr *MI) const { diff --git a/lib/CodeGen/TwoAddressInstructionPass.cpp b/lib/CodeGen/TwoAddressInstructionPass.cpp index 26c5fe4..e6dfe10 100644 --- a/lib/CodeGen/TwoAddressInstructionPass.cpp +++ b/lib/CodeGen/TwoAddressInstructionPass.cpp @@ -47,7 +47,6 @@ #include "llvm/Support/ErrorHandling.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetMachine.h" -#include "llvm/Target/TargetOptions.h" #include "llvm/Target/TargetRegisterInfo.h" using namespace llvm; diff --git a/lib/IR/AsmWriter.cpp b/lib/IR/AsmWriter.cpp index 9954a29..fb591a8 100644 --- a/lib/IR/AsmWriter.cpp +++ b/lib/IR/AsmWriter.cpp @@ -2094,7 +2094,7 @@ static void WriteMDNodeComment(const MDNode *Node, return; DIDescriptor Desc(Node); - if (Desc.getVersion() < LLVMDebugVersion11) + if (!Desc.Verify()) return; unsigned Tag = Desc.getTag(); diff --git a/lib/IR/AttributeImpl.h b/lib/IR/AttributeImpl.h index cb2c55c..ad2670d 100644 --- a/lib/IR/AttributeImpl.h +++ b/lib/IR/AttributeImpl.h @@ -146,7 +146,7 @@ public: } static void Profile(FoldingSetNodeID &ID, StringRef Kind, StringRef Values) { ID.AddString(Kind); - ID.AddString(Values); + if (!Values.empty()) ID.AddString(Values); } // FIXME: Remove this! diff --git a/lib/IR/Attributes.cpp b/lib/IR/Attributes.cpp index 6eb51f0..2d82891 100644 --- a/lib/IR/Attributes.cpp +++ b/lib/IR/Attributes.cpp @@ -645,9 +645,17 @@ AttributeSet AttributeSet::get(LLVMContext &C, ArrayRef<AttributeSet> Attrs) { AttributeSet AttributeSet::addAttribute(LLVMContext &C, unsigned Idx, Attribute::AttrKind Attr) const { + if (hasAttribute(Idx, Attr)) return *this; return addAttributes(C, Idx, AttributeSet::get(C, Idx, Attr)); } +AttributeSet AttributeSet::addAttribute(LLVMContext &C, unsigned Idx, + StringRef Kind) const { + llvm::AttrBuilder B; + B.addAttribute(Kind); + return addAttributes(C, Idx, AttributeSet::get(C, Idx, B)); +} + AttributeSet AttributeSet::addAttributes(LLVMContext &C, unsigned Idx, AttributeSet Attrs) const { if (!pImpl) return Attrs; @@ -699,6 +707,7 @@ AttributeSet AttributeSet::addAttributes(LLVMContext &C, unsigned Idx, AttributeSet AttributeSet::removeAttribute(LLVMContext &C, unsigned Idx, Attribute::AttrKind Attr) const { + if (!hasAttribute(Idx, Attr)) return *this; return removeAttributes(C, Idx, AttributeSet::get(C, Idx, Attr)); } diff --git a/lib/IR/ConstantFold.cpp b/lib/IR/ConstantFold.cpp index a5a9d9f..bf93d4f 100644 --- a/lib/IR/ConstantFold.cpp +++ b/lib/IR/ConstantFold.cpp @@ -1971,21 +1971,30 @@ static Constant *ConstantFoldGetElementPtrImpl(Constant *C, } } - // Implement folding of: - // i32* getelementptr ([2 x i32]* bitcast ([3 x i32]* %X to [2 x i32]*), - // i64 0, i64 0) - // To: i32* getelementptr ([3 x i32]* %X, i64 0, i64 0) + // Attempt to fold casts to the same type away. For example, folding: // + // i32* getelementptr ([2 x i32]* bitcast ([3 x i32]* %X to [2 x i32]*), + // i64 0, i64 0) + // into: + // + // i32* getelementptr ([3 x i32]* %X, i64 0, i64 0) + // + // Don't fold if the cast is changing address spaces. if (CE->isCast() && Idxs.size() > 1 && Idx0->isNullValue()) { - if (PointerType *SPT = - dyn_cast<PointerType>(CE->getOperand(0)->getType())) - if (ArrayType *SAT = dyn_cast<ArrayType>(SPT->getElementType())) - if (ArrayType *CAT = - dyn_cast<ArrayType>(cast<PointerType>(C->getType())->getElementType())) - if (CAT->getElementType() == SAT->getElementType()) - return - ConstantExpr::getGetElementPtr((Constant*)CE->getOperand(0), - Idxs, inBounds); + PointerType *SrcPtrTy = + dyn_cast<PointerType>(CE->getOperand(0)->getType()); + PointerType *DstPtrTy = dyn_cast<PointerType>(CE->getType()); + if (SrcPtrTy && DstPtrTy) { + ArrayType *SrcArrayTy = + dyn_cast<ArrayType>(SrcPtrTy->getElementType()); + ArrayType *DstArrayTy = + dyn_cast<ArrayType>(DstPtrTy->getElementType()); + if (SrcArrayTy && DstArrayTy + && SrcArrayTy->getElementType() == DstArrayTy->getElementType() + && SrcPtrTy->getAddressSpace() == DstPtrTy->getAddressSpace()) + return ConstantExpr::getGetElementPtr((Constant*)CE->getOperand(0), + Idxs, inBounds); + } } } diff --git a/lib/IR/Constants.cpp b/lib/IR/Constants.cpp index 8093a09..70f7e01 100644 --- a/lib/IR/Constants.cpp +++ b/lib/IR/Constants.cpp @@ -47,6 +47,19 @@ bool Constant::isNegativeZeroValue() const { if (const ConstantFP *CFP = dyn_cast<ConstantFP>(this)) return CFP->isZero() && CFP->isNegative(); + // Equivalent for a vector of -0.0's. + if (const ConstantDataVector *CV = dyn_cast<ConstantDataVector>(this)) + if (ConstantFP *SplatCFP = dyn_cast_or_null<ConstantFP>(CV->getSplatValue())) + if (SplatCFP && SplatCFP->isZero() && SplatCFP->isNegative()) + return true; + + // However, vectors of zeroes which are floating point represent +0.0's. + if (const ConstantAggregateZero *CAZ = dyn_cast<ConstantAggregateZero>(this)) + if (const VectorType *VT = dyn_cast<VectorType>(CAZ->getType())) + if (VT->getElementType()->isFloatingPointTy()) + // As it's a CAZ, we know it's the zero bit-pattern (ie, +0.0) in each element. + return false; + // Otherwise, just use +0.0. return isNullValue(); } @@ -1416,9 +1429,8 @@ static inline Constant *getFoldedCast( LLVMContextImpl *pImpl = Ty->getContext().pImpl; - // Look up the constant in the table first to ensure uniqueness - std::vector<Constant*> argVec(1, C); - ExprMapKeyType Key(opc, argVec); + // Look up the constant in the table first to ensure uniqueness. + ExprMapKeyType Key(opc, C); return pImpl->ExprConstants.getOrCreate(Ty, Key); } @@ -1715,9 +1727,8 @@ Constant *ConstantExpr::get(unsigned Opcode, Constant *C1, Constant *C2, if (Constant *FC = ConstantFoldBinaryInstruction(Opcode, C1, C2)) return FC; // Fold a few common cases. - std::vector<Constant*> argVec(1, C1); - argVec.push_back(C2); - ExprMapKeyType Key(Opcode, argVec, 0, Flags); + Constant *ArgVec[] = { C1, C2 }; + ExprMapKeyType Key(Opcode, ArgVec, 0, Flags); LLVMContextImpl *pImpl = C1->getContext().pImpl; return pImpl->ExprConstants.getOrCreate(C1->getType(), Key); @@ -1793,10 +1804,8 @@ Constant *ConstantExpr::getSelect(Constant *C, Constant *V1, Constant *V2) { if (Constant *SC = ConstantFoldSelectInstruction(C, V1, V2)) return SC; // Fold common cases - std::vector<Constant*> argVec(3, C); - argVec[1] = V1; - argVec[2] = V2; - ExprMapKeyType Key(Instruction::Select, argVec); + Constant *ArgVec[] = { C, V1, V2 }; + ExprMapKeyType Key(Instruction::Select, ArgVec); LLVMContextImpl *pImpl = C->getContext().pImpl; return pImpl->ExprConstants.getOrCreate(V1->getType(), Key); @@ -1848,9 +1857,7 @@ ConstantExpr::getICmp(unsigned short pred, Constant *LHS, Constant *RHS) { return FC; // Fold a few common cases... // Look up the constant in the table first to ensure uniqueness - std::vector<Constant*> ArgVec; - ArgVec.push_back(LHS); - ArgVec.push_back(RHS); + Constant *ArgVec[] = { LHS, RHS }; // Get the key type with both the opcode and predicate const ExprMapKeyType Key(Instruction::ICmp, ArgVec, pred); @@ -1871,9 +1878,7 @@ ConstantExpr::getFCmp(unsigned short pred, Constant *LHS, Constant *RHS) { return FC; // Fold a few common cases... // Look up the constant in the table first to ensure uniqueness - std::vector<Constant*> ArgVec; - ArgVec.push_back(LHS); - ArgVec.push_back(RHS); + Constant *ArgVec[] = { LHS, RHS }; // Get the key type with both the opcode and predicate const ExprMapKeyType Key(Instruction::FCmp, ArgVec, pred); @@ -1895,9 +1900,8 @@ Constant *ConstantExpr::getExtractElement(Constant *Val, Constant *Idx) { return FC; // Fold a few common cases. // Look up the constant in the table first to ensure uniqueness - std::vector<Constant*> ArgVec(1, Val); - ArgVec.push_back(Idx); - const ExprMapKeyType Key(Instruction::ExtractElement,ArgVec); + Constant *ArgVec[] = { Val, Idx }; + const ExprMapKeyType Key(Instruction::ExtractElement, ArgVec); LLVMContextImpl *pImpl = Val->getContext().pImpl; Type *ReqTy = Val->getType()->getVectorElementType(); @@ -1916,10 +1920,8 @@ Constant *ConstantExpr::getInsertElement(Constant *Val, Constant *Elt, if (Constant *FC = ConstantFoldInsertElementInstruction(Val, Elt, Idx)) return FC; // Fold a few common cases. // Look up the constant in the table first to ensure uniqueness - std::vector<Constant*> ArgVec(1, Val); - ArgVec.push_back(Elt); - ArgVec.push_back(Idx); - const ExprMapKeyType Key(Instruction::InsertElement,ArgVec); + Constant *ArgVec[] = { Val, Elt, Idx }; + const ExprMapKeyType Key(Instruction::InsertElement, ArgVec); LLVMContextImpl *pImpl = Val->getContext().pImpl; return pImpl->ExprConstants.getOrCreate(Val->getType(), Key); @@ -1938,10 +1940,8 @@ Constant *ConstantExpr::getShuffleVector(Constant *V1, Constant *V2, Type *ShufTy = VectorType::get(EltTy, NElts); // Look up the constant in the table first to ensure uniqueness - std::vector<Constant*> ArgVec(1, V1); - ArgVec.push_back(V2); - ArgVec.push_back(Mask); - const ExprMapKeyType Key(Instruction::ShuffleVector,ArgVec); + Constant *ArgVec[] = { V1, V2, Mask }; + const ExprMapKeyType Key(Instruction::ShuffleVector, ArgVec); LLVMContextImpl *pImpl = ShufTy->getContext().pImpl; return pImpl->ExprConstants.getOrCreate(ShufTy, Key); diff --git a/lib/IR/DIBuilder.cpp b/lib/IR/DIBuilder.cpp index f31e531..34921af 100644 --- a/lib/IR/DIBuilder.cpp +++ b/lib/IR/DIBuilder.cpp @@ -95,11 +95,8 @@ void DIBuilder::createCompileUnit(unsigned Lang, StringRef Filename, GetTagConstant(VMContext, dwarf::DW_TAG_compile_unit), Constant::getNullValue(Type::getInt32Ty(VMContext)), ConstantInt::get(Type::getInt32Ty(VMContext), Lang), - MDString::get(VMContext, Filename), - MDString::get(VMContext, Directory), + createFile(Filename, Directory), MDString::get(VMContext, Producer), - // isMain field can be removed when we remove the legacy debug info. - ConstantInt::get(Type::getInt1Ty(VMContext), true), // isMain ConstantInt::get(Type::getInt1Ty(VMContext), isOptimized), MDString::get(VMContext, Flags), ConstantInt::get(Type::getInt32Ty(VMContext), RunTimeVer), @@ -119,13 +116,14 @@ void DIBuilder::createCompileUnit(unsigned Lang, StringRef Filename, /// createFile - Create a file descriptor to hold debugging information /// for a file. DIFile DIBuilder::createFile(StringRef Filename, StringRef Directory) { - assert(TheCU && "Unable to create DW_TAG_file_type without CompileUnit"); assert(!Filename.empty() && "Unable to create file without name"); - Value *Elts[] = { - GetTagConstant(VMContext, dwarf::DW_TAG_file_type), + Value *Pair[] = { MDString::get(VMContext, Filename), MDString::get(VMContext, Directory), - NULL // TheCU + }; + Value *Elts[] = { + GetTagConstant(VMContext, dwarf::DW_TAG_file_type), + MDNode::get(VMContext, Pair) }; return DIFile(MDNode::get(VMContext, Elts)); } @@ -487,7 +485,9 @@ DIType DIBuilder::createClassType(DIDescriptor Context, StringRef Name, DIType DerivedFrom, DIArray Elements, MDNode *VTableHolder, MDNode *TemplateParams) { - // TAG_class_type is encoded in DICompositeType format. + assert((!Context || Context.Verify()) && + "createClassType should be called with a valid Context"); + // TAG_class_type is encoded in DICompositeType format. Value *Elts[] = { GetTagConstant(VMContext, dwarf::DW_TAG_class_type), getNonCompileUnitScope(Context), @@ -504,7 +504,9 @@ DIType DIBuilder::createClassType(DIDescriptor Context, StringRef Name, VTableHolder, TemplateParams }; - return DIType(MDNode::get(VMContext, Elts)); + DIType R(MDNode::get(VMContext, Elts)); + assert(R.Verify() && "createClassType should return a verifiable DIType"); + return R; } /// createStructType - Create debugging information entry for a struct. @@ -534,7 +536,9 @@ DICompositeType DIBuilder::createStructType(DIDescriptor Context, VTableHolder, NULL, }; - return DICompositeType(MDNode::get(VMContext, Elts)); + DICompositeType R(MDNode::get(VMContext, Elts)); + assert(R.Verify() && "createStructType should return a verifiable DIType"); + return R; } /// createUnionType - Create debugging information entry for an union. @@ -766,6 +770,8 @@ DIType DIBuilder::createForwardDecl(unsigned Tag, StringRef Name, ConstantInt::get(Type::getInt32Ty(VMContext), RuntimeLang) }; MDNode *Node = MDNode::getTemporary(VMContext, Elts); + assert(DIType(Node).Verify() && + "createForwardDecl result should be verifiable"); return DIType(Node); } @@ -846,6 +852,11 @@ DIVariable DIBuilder::createLocalVariable(unsigned Tag, DIDescriptor Scope, unsigned LineNo, DIType Ty, bool AlwaysPreserve, unsigned Flags, unsigned ArgNo) { + DIDescriptor Context(getNonCompileUnitScope(Scope)); + assert((!Context || Context.Verify()) && + "createLocalVariable should be called with a valid Context"); + assert(Ty.Verify() && + "createLocalVariable should be called with a valid type"); Value *Elts[] = { GetTagConstant(VMContext, Tag), getNonCompileUnitScope(Scope), @@ -865,6 +876,8 @@ DIVariable DIBuilder::createLocalVariable(unsigned Tag, DIDescriptor Scope, NamedMDNode *FnLocals = getOrInsertFnSpecificMDNode(M, Fn); FnLocals->addOperand(Node); } + assert(DIVariable(Node).Verify() && + "createLocalVariable should return a verifiable DIVariable"); return DIVariable(Node); } @@ -990,7 +1003,10 @@ DINameSpace DIBuilder::createNameSpace(DIDescriptor Scope, StringRef Name, File, ConstantInt::get(Type::getInt32Ty(VMContext), LineNo) }; - return DINameSpace(MDNode::get(VMContext, Elts)); + DINameSpace R(MDNode::get(VMContext, Elts)); + assert(R.Verify() && + "createNameSpace should return a verifiable DINameSpace"); + return R; } /// createLexicalBlockFile - This creates a new MDNode that encapsulates @@ -1002,7 +1018,11 @@ DILexicalBlockFile DIBuilder::createLexicalBlockFile(DIDescriptor Scope, Scope, File }; - return DILexicalBlockFile(MDNode::get(VMContext, Elts)); + DILexicalBlockFile R(MDNode::get(VMContext, Elts)); + assert( + R.Verify() && + "createLexicalBlockFile should return a verifiable DILexicalBlockFile"); + return R; } DILexicalBlock DIBuilder::createLexicalBlock(DIDescriptor Scope, DIFile File, @@ -1017,7 +1037,10 @@ DILexicalBlock DIBuilder::createLexicalBlock(DIDescriptor Scope, DIFile File, File, ConstantInt::get(Type::getInt32Ty(VMContext), unique_id++) }; - return DILexicalBlock(MDNode::get(VMContext, Elts)); + DILexicalBlock R(MDNode::get(VMContext, Elts)); + assert(R.Verify() && + "createLexicalBlock should return a verifiable DILexicalBlock"); + return R; } /// insertDeclare - Insert a new llvm.dbg.declare intrinsic call. diff --git a/lib/IR/DataLayout.cpp b/lib/IR/DataLayout.cpp index f09de3a..4100c4f 100644 --- a/lib/IR/DataLayout.cpp +++ b/lib/IR/DataLayout.cpp @@ -438,6 +438,12 @@ DataLayout::~DataLayout() { delete static_cast<StructLayoutMap*>(LayoutMap); } +bool DataLayout::doFinalization(Module &M) { + delete static_cast<StructLayoutMap*>(LayoutMap); + LayoutMap = 0; + return false; +} + const StructLayout *DataLayout::getStructLayout(StructType *Ty) const { if (!LayoutMap) LayoutMap = new StructLayoutMap(); diff --git a/lib/IR/DebugInfo.cpp b/lib/IR/DebugInfo.cpp index a59fdcd..1a5454e 100644 --- a/lib/IR/DebugInfo.cpp +++ b/lib/IR/DebugInfo.cpp @@ -51,18 +51,43 @@ DIDescriptor::DIDescriptor(const DIVariable F) : DbgNode(F.DbgNode) { DIDescriptor::DIDescriptor(const DIType F) : DbgNode(F.DbgNode) { } -StringRef -DIDescriptor::getStringField(unsigned Elt) const { - if (DbgNode == 0) - return StringRef(); +bool DIDescriptor::Verify() const { + return DbgNode && + (DIDerivedType(DbgNode).Verify() || + DICompositeType(DbgNode).Verify() || DIBasicType(DbgNode).Verify() || + DIVariable(DbgNode).Verify() || DISubprogram(DbgNode).Verify() || + DIGlobalVariable(DbgNode).Verify() || DIFile(DbgNode).Verify() || + DICompileUnit(DbgNode).Verify() || DINameSpace(DbgNode).Verify() || + DILexicalBlock(DbgNode).Verify() || + DILexicalBlockFile(DbgNode).Verify() || + DISubrange(DbgNode).Verify() || DIEnumerator(DbgNode).Verify() || + DIObjCProperty(DbgNode).Verify() || + DITemplateTypeParameter(DbgNode).Verify() || + DITemplateValueParameter(DbgNode).Verify()); +} + +static Value *getField(const MDNode *DbgNode, unsigned Elt) { + if (DbgNode == 0 || Elt >= DbgNode->getNumOperands()) + return 0; + return DbgNode->getOperand(Elt); +} - if (Elt < DbgNode->getNumOperands()) - if (MDString *MDS = dyn_cast_or_null<MDString>(DbgNode->getOperand(Elt))) - return MDS->getString(); +static const MDNode *getNodeField(const MDNode *DbgNode, unsigned Elt) { + if (const MDNode *R = dyn_cast_or_null<MDNode>(getField(DbgNode, Elt))) + return R; + return 0; +} +static StringRef getStringField(const MDNode *DbgNode, unsigned Elt) { + if (MDString *MDS = dyn_cast_or_null<MDString>(getField(DbgNode, Elt))) + return MDS->getString(); return StringRef(); } +StringRef DIDescriptor::getStringField(unsigned Elt) const { + return ::getStringField(DbgNode, Elt); +} + uint64_t DIDescriptor::getUInt64Field(unsigned Elt) const { if (DbgNode == 0) return 0; @@ -135,17 +160,11 @@ void DIDescriptor::replaceFunctionField(unsigned Elt, Function *F) { } unsigned DIVariable::getNumAddrElements() const { - if (getVersion() <= LLVMDebugVersion8) - return DbgNode->getNumOperands()-6; - if (getVersion() == LLVMDebugVersion9) - return DbgNode->getNumOperands()-7; return DbgNode->getNumOperands()-8; } /// getInlinedAt - If this variable is inlined then return inline location. MDNode *DIVariable::getInlinedAt() const { - if (getVersion() <= LLVMDebugVersion9) - return NULL; return dyn_cast_or_null<MDNode>(DbgNode->getOperand(7)); } @@ -392,31 +411,30 @@ bool DIType::isUnsignedDIType() { /// Verify - Verify that a compile unit is well formed. bool DICompileUnit::Verify() const { - if (!DbgNode) + if (!isCompileUnit()) return false; StringRef N = getFilename(); if (N.empty()) return false; // It is possible that directory and produce string is empty. - return true; + return DbgNode->getNumOperands() == 13; } /// Verify - Verify that an ObjC property is well formed. bool DIObjCProperty::Verify() const { - if (!DbgNode) + if (!isObjCProperty()) return false; - unsigned Tag = getTag(); - if (Tag != dwarf::DW_TAG_APPLE_property) return false; + DIType Ty = getType(); if (!Ty.Verify()) return false; // Don't worry about the rest of the strings for now. - return true; + return DbgNode->getNumOperands() == 8; } /// Verify - Verify that a type descriptor is well formed. bool DIType::Verify() const { - if (!DbgNode) + if (!isType()) return false; if (getContext() && !getContext().Verify()) return false; @@ -437,27 +455,28 @@ bool DIType::Verify() const { /// Verify - Verify that a basic type descriptor is well formed. bool DIBasicType::Verify() const { - return isBasicType(); + return isBasicType() && DbgNode->getNumOperands() == 10; } /// Verify - Verify that a derived type descriptor is well formed. bool DIDerivedType::Verify() const { - return isDerivedType(); + return isDerivedType() && DbgNode->getNumOperands() >= 10 && + DbgNode->getNumOperands() <= 14; } /// Verify - Verify that a composite type descriptor is well formed. bool DICompositeType::Verify() const { - if (!DbgNode) + if (!isCompositeType()) return false; if (getContext() && !getContext().Verify()) return false; - return true; + return DbgNode->getNumOperands() >= 10 && DbgNode->getNumOperands() <= 14; } /// Verify - Verify that a subprogram descriptor is well formed. bool DISubprogram::Verify() const { - if (!DbgNode) + if (!isSubprogram()) return false; if (getContext() && !getContext().Verify()) @@ -466,12 +485,12 @@ bool DISubprogram::Verify() const { DICompositeType Ty = getType(); if (!Ty.Verify()) return false; - return true; + return DbgNode->getNumOperands() == 21; } /// Verify - Verify that a global variable descriptor is well formed. bool DIGlobalVariable::Verify() const { - if (!DbgNode) + if (!isGlobalVariable()) return false; if (getDisplayName().empty()) @@ -487,12 +506,12 @@ bool DIGlobalVariable::Verify() const { if (!getGlobal() && !getConstant()) return false; - return true; + return DbgNode->getNumOperands() == 13; } /// Verify - Verify that a variable descriptor is well formed. bool DIVariable::Verify() const { - if (!DbgNode) + if (!isVariable()) return false; if (getContext() && !getContext().Verify()) @@ -502,7 +521,7 @@ bool DIVariable::Verify() const { if (!Ty.Verify()) return false; - return true; + return DbgNode->getNumOperands() >= 8; } /// Verify - Verify that a location descriptor is well formed. @@ -515,11 +534,44 @@ bool DILocation::Verify() const { /// Verify - Verify that a namespace descriptor is well formed. bool DINameSpace::Verify() const { - if (!DbgNode) - return false; - if (getName().empty()) + if (!isNameSpace()) return false; - return true; + return DbgNode->getNumOperands() == 5; +} + +/// \brief Verify that the file descriptor is well formed. +bool DIFile::Verify() const { + return isFile() && DbgNode->getNumOperands() == 2; +} + +/// \brief Verify that the enumerator descriptor is well formed. +bool DIEnumerator::Verify() const { + return isEnumerator() && DbgNode->getNumOperands() == 3; +} + +/// \brief Verify that the subrange descriptor is well formed. +bool DISubrange::Verify() const { + return isSubrange() && DbgNode->getNumOperands() == 3; +} + +/// \brief Verify that the lexical block descriptor is well formed. +bool DILexicalBlock::Verify() const { + return isLexicalBlock() && DbgNode->getNumOperands() == 6; +} + +/// \brief Verify that the file-scoped lexical block descriptor is well formed. +bool DILexicalBlockFile::Verify() const { + return isLexicalBlockFile() && DbgNode->getNumOperands() == 3; +} + +/// \brief Verify that the template type parameter descriptor is well formed. +bool DITemplateTypeParameter::Verify() const { + return isTemplateTypeParameter() && DbgNode->getNumOperands() == 7; +} + +/// \brief Verify that the template value parameter descriptor is well formed. +bool DITemplateValueParameter::Verify() const { + return isTemplateValueParameter() && DbgNode->getNumOperands() == 8; } /// getOriginalTypeSize - If this type is derived from a base type then @@ -553,7 +605,7 @@ uint64_t DIDerivedType::getOriginalTypeSize() const { /// getObjCProperty - Return property node, if this ivar is associated with one. MDNode *DIDerivedType::getObjCProperty() const { - if (getVersion() <= LLVMDebugVersion11 || DbgNode->getNumOperands() <= 10) + if (DbgNode->getNumOperands() <= 10) return NULL; return dyn_cast_or_null<MDNode>(DbgNode->getOperand(10)); } @@ -619,9 +671,7 @@ StringRef DIScope::getFilename() const { return DINameSpace(DbgNode).getFilename(); if (isType()) return DIType(DbgNode).getFilename(); - if (isFile()) - return DIFile(DbgNode).getFilename(); - llvm_unreachable("Invalid DIScope!"); + return ::getStringField(getNodeField(DbgNode, 1), 0); } StringRef DIScope::getDirectory() const { @@ -639,44 +689,42 @@ StringRef DIScope::getDirectory() const { return DINameSpace(DbgNode).getDirectory(); if (isType()) return DIType(DbgNode).getDirectory(); - if (isFile()) - return DIFile(DbgNode).getDirectory(); - llvm_unreachable("Invalid DIScope!"); + return ::getStringField(getNodeField(DbgNode, 1), 1); } DIArray DICompileUnit::getEnumTypes() const { - if (!DbgNode || DbgNode->getNumOperands() < 14) + if (!DbgNode || DbgNode->getNumOperands() < 13) return DIArray(); - if (MDNode *N = dyn_cast_or_null<MDNode>(DbgNode->getOperand(10))) + if (MDNode *N = dyn_cast_or_null<MDNode>(DbgNode->getOperand(8))) return DIArray(N); return DIArray(); } DIArray DICompileUnit::getRetainedTypes() const { - if (!DbgNode || DbgNode->getNumOperands() < 14) + if (!DbgNode || DbgNode->getNumOperands() < 13) return DIArray(); - if (MDNode *N = dyn_cast_or_null<MDNode>(DbgNode->getOperand(11))) + if (MDNode *N = dyn_cast_or_null<MDNode>(DbgNode->getOperand(9))) return DIArray(N); return DIArray(); } DIArray DICompileUnit::getSubprograms() const { - if (!DbgNode || DbgNode->getNumOperands() < 14) + if (!DbgNode || DbgNode->getNumOperands() < 13) return DIArray(); - if (MDNode *N = dyn_cast_or_null<MDNode>(DbgNode->getOperand(12))) + if (MDNode *N = dyn_cast_or_null<MDNode>(DbgNode->getOperand(10))) return DIArray(N); return DIArray(); } DIArray DICompileUnit::getGlobalVariables() const { - if (!DbgNode || DbgNode->getNumOperands() < 14) + if (!DbgNode || DbgNode->getNumOperands() < 13) return DIArray(); - if (MDNode *N = dyn_cast_or_null<MDNode>(DbgNode->getOperand(13))) + if (MDNode *N = dyn_cast_or_null<MDNode>(DbgNode->getOperand(11))) return DIArray(N); return DIArray(); } @@ -805,71 +853,25 @@ void DebugInfoFinder::processModule(const Module &M) { for (unsigned i = 0, e = CU_Nodes->getNumOperands(); i != e; ++i) { DICompileUnit CU(CU_Nodes->getOperand(i)); addCompileUnit(CU); - if (CU.getVersion() > LLVMDebugVersion10) { - DIArray GVs = CU.getGlobalVariables(); - for (unsigned i = 0, e = GVs.getNumElements(); i != e; ++i) { - DIGlobalVariable DIG(GVs.getElement(i)); - if (addGlobalVariable(DIG)) - processType(DIG.getType()); - } - DIArray SPs = CU.getSubprograms(); - for (unsigned i = 0, e = SPs.getNumElements(); i != e; ++i) - processSubprogram(DISubprogram(SPs.getElement(i))); - DIArray EnumTypes = CU.getEnumTypes(); - for (unsigned i = 0, e = EnumTypes.getNumElements(); i != e; ++i) - processType(DIType(EnumTypes.getElement(i))); - DIArray RetainedTypes = CU.getRetainedTypes(); - for (unsigned i = 0, e = RetainedTypes.getNumElements(); i != e; ++i) - processType(DIType(RetainedTypes.getElement(i))); - return; + DIArray GVs = CU.getGlobalVariables(); + for (unsigned i = 0, e = GVs.getNumElements(); i != e; ++i) { + DIGlobalVariable DIG(GVs.getElement(i)); + if (addGlobalVariable(DIG)) + processType(DIG.getType()); } + DIArray SPs = CU.getSubprograms(); + for (unsigned i = 0, e = SPs.getNumElements(); i != e; ++i) + processSubprogram(DISubprogram(SPs.getElement(i))); + DIArray EnumTypes = CU.getEnumTypes(); + for (unsigned i = 0, e = EnumTypes.getNumElements(); i != e; ++i) + processType(DIType(EnumTypes.getElement(i))); + DIArray RetainedTypes = CU.getRetainedTypes(); + for (unsigned i = 0, e = RetainedTypes.getNumElements(); i != e; ++i) + processType(DIType(RetainedTypes.getElement(i))); + // FIXME: We really shouldn't be bailing out after visiting just one CU + return; } } - - for (Module::const_iterator I = M.begin(), E = M.end(); I != E; ++I) - for (Function::const_iterator FI = (*I).begin(), FE = (*I).end(); - FI != FE; ++FI) - for (BasicBlock::const_iterator BI = (*FI).begin(), BE = (*FI).end(); - BI != BE; ++BI) { - if (const DbgDeclareInst *DDI = dyn_cast<DbgDeclareInst>(BI)) - processDeclare(DDI); - - DebugLoc Loc = BI->getDebugLoc(); - if (Loc.isUnknown()) - continue; - - LLVMContext &Ctx = BI->getContext(); - DIDescriptor Scope(Loc.getScope(Ctx)); - - if (Scope.isCompileUnit()) - addCompileUnit(DICompileUnit(Scope)); - else if (Scope.isSubprogram()) - processSubprogram(DISubprogram(Scope)); - else if (Scope.isLexicalBlockFile()) { - DILexicalBlockFile DBF = DILexicalBlockFile(Scope); - processLexicalBlock(DILexicalBlock(DBF.getScope())); - } - else if (Scope.isLexicalBlock()) - processLexicalBlock(DILexicalBlock(Scope)); - - if (MDNode *IA = Loc.getInlinedAt(Ctx)) - processLocation(DILocation(IA)); - } - - if (NamedMDNode *NMD = M.getNamedMetadata("llvm.dbg.gv")) { - for (unsigned i = 0, e = NMD->getNumOperands(); i != e; ++i) { - DIGlobalVariable DIG(cast<MDNode>(NMD->getOperand(i))); - if (addGlobalVariable(DIG)) { - if (DIG.getVersion() <= LLVMDebugVersion10) - addCompileUnit(DIG.getCompileUnit()); - processType(DIG.getType()); - } - } - } - - if (NamedMDNode *NMD = M.getNamedMetadata("llvm.dbg.sp")) - for (unsigned i = 0, e = NMD->getNumOperands(); i != e; ++i) - processSubprogram(DISubprogram(NMD->getOperand(i))); } /// processLocation - Process DILocation. @@ -893,8 +895,6 @@ void DebugInfoFinder::processLocation(DILocation Loc) { void DebugInfoFinder::processType(DIType DT) { if (!addType(DT)) return; - if (DT.getVersion() <= LLVMDebugVersion10) - addCompileUnit(DT.getCompileUnit()); if (DT.isCompositeType()) { DICompositeType DCT(DT); processType(DCT.getTypeDerivedFrom()); @@ -929,8 +929,6 @@ void DebugInfoFinder::processLexicalBlock(DILexicalBlock LB) { void DebugInfoFinder::processSubprogram(DISubprogram SP) { if (!addSubprogram(SP)) return; - if (SP.getVersion() <= LLVMDebugVersion10) - addCompileUnit(SP.getCompileUnit()); processType(SP.getType()); } @@ -945,8 +943,6 @@ void DebugInfoFinder::processDeclare(const DbgDeclareInst *DDI) { if (!NodesSeen.insert(DV)) return; - if (DIVariable(N).getVersion() <= LLVMDebugVersion10) - addCompileUnit(DIVariable(N).getCompileUnit()); processType(DIVariable(N).getType()); } diff --git a/lib/IR/Function.cpp b/lib/IR/Function.cpp index 5c444d2..5559a6c 100644 --- a/lib/IR/Function.cpp +++ b/lib/IR/Function.cpp @@ -12,6 +12,7 @@ //===----------------------------------------------------------------------===// #include "llvm/IR/Function.h" +#include "LLVMContextImpl.h" #include "SymbolTableListTraitsImpl.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/STLExtras.h" @@ -208,6 +209,10 @@ Function::~Function() { // Remove the function from the on-the-side GC table. clearGC(); + + // Remove the intrinsicID from the Cache. + if(getValueName() && isIntrinsic()) + getContext().pImpl->IntrinsicIDCache.erase(this); } void Function::BuildLazyArguments() const { @@ -337,18 +342,35 @@ void Function::copyAttributesFrom(const GlobalValue *Src) { /// intrinsic, or if the pointer is null. This value is always defined to be /// zero to allow easy checking for whether a function is intrinsic or not. The /// particular intrinsic functions which correspond to this value are defined in -/// llvm/Intrinsics.h. +/// llvm/Intrinsics.h. Results are cached in the LLVM context, subsequent +/// requests for the same ID return results much faster from the cache. /// unsigned Function::getIntrinsicID() const { const ValueName *ValName = this->getValueName(); if (!ValName || !isIntrinsic()) return 0; + + LLVMContextImpl::IntrinsicIDCacheTy &IntrinsicIDCache = + getContext().pImpl->IntrinsicIDCache; + if(!IntrinsicIDCache.count(this)) { + unsigned Id = lookupIntrinsicID(); + IntrinsicIDCache[this]=Id; + return Id; + } + return IntrinsicIDCache[this]; +} + +/// This private method does the actual lookup of an intrinsic ID when the query +/// could not be answered from the cache. +unsigned Function::lookupIntrinsicID() const { + const ValueName *ValName = this->getValueName(); unsigned Len = ValName->getKeyLength(); const char *Name = ValName->getKeyData(); #define GET_FUNCTION_RECOGNIZER #include "llvm/IR/Intrinsics.gen" #undef GET_FUNCTION_RECOGNIZER + return 0; } diff --git a/lib/IR/Instructions.cpp b/lib/IR/Instructions.cpp index 8a0a465..2e3a525 100644 --- a/lib/IR/Instructions.cpp +++ b/lib/IR/Instructions.cpp @@ -331,12 +331,9 @@ CallInst::CallInst(const CallInst &CI) SubclassOptionalData = CI.SubclassOptionalData; } -void CallInst::addAttribute(unsigned i, Attribute attr) { +void CallInst::addAttribute(unsigned i, Attribute::AttrKind attr) { AttributeSet PAL = getAttributes(); - AttrBuilder B(attr); - LLVMContext &Context = getContext(); - PAL = PAL.addAttributes(Context, i, - AttributeSet::get(Context, i, B)); + PAL = PAL.addAttribute(getContext(), i, attr); setAttributes(PAL); } @@ -593,11 +590,9 @@ bool InvokeInst::paramHasAttr(unsigned i, Attribute::AttrKind A) const { return false; } -void InvokeInst::addAttribute(unsigned i, Attribute attr) { +void InvokeInst::addAttribute(unsigned i, Attribute::AttrKind attr) { AttributeSet PAL = getAttributes(); - AttrBuilder B(attr); - PAL = PAL.addAttributes(getContext(), i, - AttributeSet::get(getContext(), i, B)); + PAL = PAL.addAttribute(getContext(), i, attr); setAttributes(PAL); } diff --git a/lib/IR/LLVMContext.cpp b/lib/IR/LLVMContext.cpp index b73cd03..883bb98 100644 --- a/lib/IR/LLVMContext.cpp +++ b/lib/IR/LLVMContext.cpp @@ -58,6 +58,11 @@ LLVMContext::LLVMContext() : pImpl(new LLVMContextImpl(*this)) { unsigned TBAAStructID = getMDKindID("tbaa.struct"); assert(TBAAStructID == MD_tbaa_struct && "tbaa.struct kind id drifted"); (void)TBAAStructID; + + // Create the 'invariant.load' metadata kind. + unsigned InvariantLdId = getMDKindID("invariant.load"); + assert(InvariantLdId == MD_invariant_load && "invariant.load kind id drifted"); + (void)InvariantLdId; } LLVMContext::~LLVMContext() { delete pImpl; } diff --git a/lib/IR/LLVMContextImpl.h b/lib/IR/LLVMContextImpl.h index 7353dc0..0c659b8 100644 --- a/lib/IR/LLVMContextImpl.h +++ b/lib/IR/LLVMContextImpl.h @@ -318,7 +318,7 @@ public: /// ValueHandles - This map keeps track of all of the value handles that are /// watching a Value*. The Value::HasValueHandle bit is used to know - // whether or not a value has an entry in this map. + /// whether or not a value has an entry in this map. typedef DenseMap<Value*, ValueHandleBase*> ValueHandlesTy; ValueHandlesTy ValueHandles; @@ -350,6 +350,11 @@ public: /// to date. std::vector<std::pair<DebugRecVH, DebugRecVH> > ScopeInlinedAtRecords; + /// IntrinsicIDCache - Cache of intrinsic name (string) to numeric ID mappings + /// requested in this context + typedef DenseMap<const Function*, unsigned> IntrinsicIDCacheTy; + IntrinsicIDCacheTy IntrinsicIDCache; + int getOrAddScopeRecordIdxEntry(MDNode *N, int ExistingIdx); int getOrAddScopeInlinedAtIdxEntry(MDNode *Scope, MDNode *IA,int ExistingIdx); diff --git a/lib/IR/Metadata.cpp b/lib/IR/Metadata.cpp index d751064..0228aeb 100644 --- a/lib/IR/Metadata.cpp +++ b/lib/IR/Metadata.cpp @@ -303,6 +303,7 @@ void MDNode::deleteTemporary(MDNode *N) { /// getOperand - Return specified operand. Value *MDNode::getOperand(unsigned i) const { + assert(i < getNumOperands() && "Invalid operand number"); return *getOperandPtr(const_cast<MDNode*>(this), i); } diff --git a/lib/IR/Value.cpp b/lib/IR/Value.cpp index 5bdce2b..adc702e 100644 --- a/lib/IR/Value.cpp +++ b/lib/IR/Value.cpp @@ -195,6 +195,9 @@ void Value::setName(const Twine &NewName) { if (getSymTab(this, ST)) return; // Cannot set a name on this value (e.g. constant). + if (Function *F = dyn_cast<Function>(this)) + getContext().pImpl->IntrinsicIDCache.erase(F); + if (!ST) { // No symbol table to update? Just do the change. if (NameRef.empty()) { // Free the name for this value. @@ -307,7 +310,7 @@ void Value::replaceAllUsesWith(Value *New) { // Notify all ValueHandles (if present) that this value is going away. if (HasValueHandle) ValueHandleBase::ValueIsRAUWd(this, New); - + while (!use_empty()) { Use &U = *UseList; // Must handle Constants specially, we cannot call replaceUsesOfWith on a @@ -318,10 +321,10 @@ void Value::replaceAllUsesWith(Value *New) { continue; } } - + U.set(New); } - + if (BasicBlock *BB = dyn_cast<BasicBlock>(this)) BB->replaceSuccessorsPhiUsesWith(cast<BasicBlock>(New)); } diff --git a/lib/Linker/LinkModules.cpp b/lib/Linker/LinkModules.cpp index c358a0a..0acbcfa 100644 --- a/lib/Linker/LinkModules.cpp +++ b/lib/Linker/LinkModules.cpp @@ -1292,7 +1292,7 @@ bool ModuleLinker::run() { //===----------------------------------------------------------------------===// /// LinkModules - This function links two modules together, with the resulting -/// left module modified to be the composite of the two input modules. If an +/// Dest module modified to be the composite of the two input modules. If an /// error occurs, true is returned and ErrorMsg (if not null) is set to indicate /// the problem. Upon failure, the Dest module could be in a modified state, /// and shouldn't be relied on to be consistent. diff --git a/lib/MC/ELFObjectWriter.cpp b/lib/MC/ELFObjectWriter.cpp index 0b97f27..3d99548 100644 --- a/lib/MC/ELFObjectWriter.cpp +++ b/lib/MC/ELFObjectWriter.cpp @@ -1,4 +1,4 @@ -//===- lib/MC/ELFObjectWriter.cpp - ELF File Writer -------------------===// +//===- lib/MC/ELFObjectWriter.cpp - ELF File Writer -----------------------===// // // The LLVM Compiler Infrastructure // @@ -155,7 +155,7 @@ class ELFObjectWriter : public MCObjectWriter { raw_ostream &_OS, bool IsLittleEndian) : MCObjectWriter(_OS, IsLittleEndian), TargetObjectWriter(MOTW), - NeedsGOT(false), NeedsSymtabShndx(false){ + NeedsGOT(false), NeedsSymtabShndx(false) { } virtual ~ELFObjectWriter(); @@ -978,7 +978,7 @@ void ELFObjectWriter::ComputeSymbolTable(MCAssembler &Asm, for (unsigned i = 0, e = UndefinedSymbolData.size(); i != e; ++i) UndefinedSymbolData[i].SymbolData->setIndex(Index++); - if (NumRegularSections > ELF::SHN_LORESERVE) + if (Index >= ELF::SHN_LORESERVE) NeedsSymtabShndx = true; } diff --git a/lib/MC/MCAsmStreamer.cpp b/lib/MC/MCAsmStreamer.cpp index 7eb7202..35613b4 100644 --- a/lib/MC/MCAsmStreamer.cpp +++ b/lib/MC/MCAsmStreamer.cpp @@ -215,7 +215,7 @@ public: virtual void EmitFileDirective(StringRef Filename); virtual bool EmitDwarfFileDirective(unsigned FileNo, StringRef Directory, - StringRef Filename); + StringRef Filename, unsigned CUID = 0); virtual void EmitDwarfLocDirective(unsigned FileNo, unsigned Line, unsigned Column, unsigned Flags, unsigned Isa, unsigned Discriminator, @@ -828,14 +828,14 @@ void MCAsmStreamer::EmitFileDirective(StringRef Filename) { } bool MCAsmStreamer::EmitDwarfFileDirective(unsigned FileNo, StringRef Directory, - StringRef Filename) { + StringRef Filename, unsigned CUID) { if (!UseDwarfDirectory && !Directory.empty()) { if (sys::path::is_absolute(Filename)) - return EmitDwarfFileDirective(FileNo, "", Filename); + return EmitDwarfFileDirective(FileNo, "", Filename, CUID); SmallString<128> FullPathName = Directory; sys::path::append(FullPathName, Filename); - return EmitDwarfFileDirective(FileNo, "", FullPathName); + return EmitDwarfFileDirective(FileNo, "", FullPathName, CUID); } if (UseLoc) { @@ -846,8 +846,11 @@ bool MCAsmStreamer::EmitDwarfFileDirective(unsigned FileNo, StringRef Directory, } PrintQuotedString(Filename, OS); EmitEOL(); + // All .file will belong to a single CUID. + CUID = 0; } - return this->MCStreamer::EmitDwarfFileDirective(FileNo, Directory, Filename); + return this->MCStreamer::EmitDwarfFileDirective(FileNo, Directory, Filename, + CUID); } void MCAsmStreamer::EmitDwarfLocDirective(unsigned FileNo, unsigned Line, diff --git a/lib/MC/MCContext.cpp b/lib/MC/MCContext.cpp index 1a7df60..9adcc02 100644 --- a/lib/MC/MCContext.cpp +++ b/lib/MC/MCContext.cpp @@ -77,8 +77,8 @@ void MCContext::reset() { Symbols.clear(); Allocator.Reset(); Instances.clear(); - MCDwarfFiles.clear(); - MCDwarfDirs.clear(); + MCDwarfFilesCUMap.clear(); + MCDwarfDirsCUMap.clear(); MCGenDwarfLabelEntries.clear(); DwarfDebugFlags = StringRef(); MCLineSections.clear(); @@ -299,11 +299,13 @@ const MCSection *MCContext::getCOFFSection(StringRef Section, /// error and zero is returned and the client reports the error, else the /// allocated file number is returned. The file numbers may be in any order. unsigned MCContext::GetDwarfFile(StringRef Directory, StringRef FileName, - unsigned FileNumber) { + unsigned FileNumber, unsigned CUID) { // TODO: a FileNumber of zero says to use the next available file number. // Note: in GenericAsmParser::ParseDirectiveFile() FileNumber was checked // to not be less than one. This needs to be change to be not less than zero. + SmallVectorImpl<MCDwarfFile *>& MCDwarfFiles = MCDwarfFilesCUMap[CUID]; + SmallVectorImpl<StringRef>& MCDwarfDirs = MCDwarfDirsCUMap[CUID]; // Make space for this FileNumber in the MCDwarfFiles vector if needed. if (FileNumber >= MCDwarfFiles.size()) { MCDwarfFiles.resize(FileNumber + 1); @@ -363,7 +365,8 @@ unsigned MCContext::GetDwarfFile(StringRef Directory, StringRef FileName, /// isValidDwarfFileNumber - takes a dwarf file number and returns true if it /// currently is assigned and false otherwise. -bool MCContext::isValidDwarfFileNumber(unsigned FileNumber) { +bool MCContext::isValidDwarfFileNumber(unsigned FileNumber, unsigned CUID) { + SmallVectorImpl<MCDwarfFile *>& MCDwarfFiles = MCDwarfFilesCUMap[CUID]; if(FileNumber == 0 || FileNumber >= MCDwarfFiles.size()) return false; diff --git a/lib/MC/MCDisassembler/Disassembler.cpp b/lib/MC/MCDisassembler/Disassembler.cpp index d3fa906..4766b37 100644 --- a/lib/MC/MCDisassembler/Disassembler.cpp +++ b/lib/MC/MCDisassembler/Disassembler.cpp @@ -44,41 +44,49 @@ LLVMDisasmContextRef LLVMCreateDisasmCPU(const char *Triple, const char *CPU, // Get the assembler info needed to setup the MCContext. const MCAsmInfo *MAI = TheTarget->createMCAsmInfo(Triple); - assert(MAI && "Unable to create target asm info!"); + if (!MAI) + return 0; const MCInstrInfo *MII = TheTarget->createMCInstrInfo(); - assert(MII && "Unable to create target instruction info!"); + if (!MII) + return 0; const MCRegisterInfo *MRI = TheTarget->createMCRegInfo(Triple); - assert(MRI && "Unable to create target register info!"); + if (!MRI) + return 0; // Package up features to be passed to target/subtarget std::string FeaturesStr; const MCSubtargetInfo *STI = TheTarget->createMCSubtargetInfo(Triple, CPU, FeaturesStr); - assert(STI && "Unable to create subtarget info!"); + if (!STI) + return 0; // Set up the MCContext for creating symbols and MCExpr's. MCContext *Ctx = new MCContext(*MAI, *MRI, 0); - assert(Ctx && "Unable to create MCContext!"); + if (!Ctx) + return 0; // Set up disassembler. MCDisassembler *DisAsm = TheTarget->createMCDisassembler(*STI); - assert(DisAsm && "Unable to create disassembler!"); + if (!DisAsm) + return 0; DisAsm->setupForSymbolicDisassembly(GetOpInfo, SymbolLookUp, DisInfo, Ctx); // Set up the instruction printer. int AsmPrinterVariant = MAI->getAssemblerDialect(); MCInstPrinter *IP = TheTarget->createMCInstPrinter(AsmPrinterVariant, *MAI, *MII, *MRI, *STI); - assert(IP && "Unable to create instruction printer!"); + if (!IP) + return 0; LLVMDisasmContext *DC = new LLVMDisasmContext(Triple, DisInfo, TagType, GetOpInfo, SymbolLookUp, TheTarget, MAI, MRI, STI, MII, Ctx, DisAsm, IP); - assert(DC && "Allocation failure!"); + if (!DC) + return 0; return DC; } diff --git a/lib/MC/MCDwarf.cpp b/lib/MC/MCDwarf.cpp index fea057a..0f8f074 100644 --- a/lib/MC/MCDwarf.cpp +++ b/lib/MC/MCDwarf.cpp @@ -298,8 +298,8 @@ const MCSymbol *MCDwarfFileTable::EmitCU(MCStreamer *MCOS, unsigned CUID) { // Put out the directory and file tables. // First the directory table. - const std::vector<StringRef> &MCDwarfDirs = - context.getMCDwarfDirs(); + const SmallVectorImpl<StringRef> &MCDwarfDirs = + context.getMCDwarfDirs(CUID); for (unsigned i = 0; i < MCDwarfDirs.size(); i++) { MCOS->EmitBytes(MCDwarfDirs[i]); // the DirectoryName MCOS->EmitBytes(StringRef("\0", 1)); // the null term. of the string @@ -307,8 +307,8 @@ const MCSymbol *MCDwarfFileTable::EmitCU(MCStreamer *MCOS, unsigned CUID) { MCOS->EmitIntValue(0, 1); // Terminate the directory list // Second the file table. - const std::vector<MCDwarfFile *> &MCDwarfFiles = - MCOS->getContext().getMCDwarfFiles(); + const SmallVectorImpl<MCDwarfFile *> &MCDwarfFiles = + MCOS->getContext().getMCDwarfFiles(CUID); for (unsigned i = 1; i < MCDwarfFiles.size(); i++) { MCOS->EmitBytes(MCDwarfFiles[i]->getName()); // FileName MCOS->EmitBytes(StringRef("\0", 1)); // the null term. of the string @@ -643,13 +643,13 @@ static void EmitGenDwarfInfo(MCStreamer *MCOS, // AT_name, the name of the source file. Reconstruct from the first directory // and file table entries. - const std::vector<StringRef> &MCDwarfDirs = + const SmallVectorImpl<StringRef> &MCDwarfDirs = context.getMCDwarfDirs(); if (MCDwarfDirs.size() > 0) { MCOS->EmitBytes(MCDwarfDirs[0]); MCOS->EmitBytes("/"); } - const std::vector<MCDwarfFile *> &MCDwarfFiles = + const SmallVectorImpl<MCDwarfFile *> &MCDwarfFiles = MCOS->getContext().getMCDwarfFiles(); MCOS->EmitBytes(MCDwarfFiles[1]->getName()); MCOS->EmitIntValue(0, 1); // NULL byte to terminate the string. diff --git a/lib/MC/MCELFStreamer.cpp b/lib/MC/MCELFStreamer.cpp index c1428d8..7f5f1b6 100644 --- a/lib/MC/MCELFStreamer.cpp +++ b/lib/MC/MCELFStreamer.cpp @@ -1,4 +1,4 @@ -//===- lib/MC/MCELFStreamer.cpp - ELF Object Output ------------===// +//===- lib/MC/MCELFStreamer.cpp - ELF Object Output -----------------------===// // // The LLVM Compiler Infrastructure // diff --git a/lib/MC/MCNullStreamer.cpp b/lib/MC/MCNullStreamer.cpp index 89f74c1..c872b22 100644 --- a/lib/MC/MCNullStreamer.cpp +++ b/lib/MC/MCNullStreamer.cpp @@ -89,7 +89,7 @@ namespace { virtual void EmitFileDirective(StringRef Filename) {} virtual bool EmitDwarfFileDirective(unsigned FileNo, StringRef Directory, - StringRef Filename) { + StringRef Filename, unsigned CUID = 0) { return false; } virtual void EmitDwarfLocDirective(unsigned FileNo, unsigned Line, diff --git a/lib/MC/MCObjectFileInfo.cpp b/lib/MC/MCObjectFileInfo.cpp index 2e1a045..bafa002 100644 --- a/lib/MC/MCObjectFileInfo.cpp +++ b/lib/MC/MCObjectFileInfo.cpp @@ -223,6 +223,11 @@ void MCObjectFileInfo::InitMachOMCObjectFileInfo(Triple T) { } void MCObjectFileInfo::InitELFMCObjectFileInfo(Triple T) { + // FIXME: Check this. Mips64el is using the base values, which is most likely + // incorrect. + if (T.getArch() != Triple::mips64el) + FDECFIEncoding = dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4; + if (T.getArch() == Triple::x86) { PersonalityEncoding = (RelocM == Reloc::PIC_) ? dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4 @@ -230,15 +235,13 @@ void MCObjectFileInfo::InitELFMCObjectFileInfo(Triple T) { LSDAEncoding = (RelocM == Reloc::PIC_) ? dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4 : dwarf::DW_EH_PE_absptr; - FDEEncoding = FDECFIEncoding = (RelocM == Reloc::PIC_) + FDEEncoding = (RelocM == Reloc::PIC_) ? dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4 : dwarf::DW_EH_PE_absptr; TTypeEncoding = (RelocM == Reloc::PIC_) ? dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4 : dwarf::DW_EH_PE_absptr; } else if (T.getArch() == Triple::x86_64) { - FDECFIEncoding = dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4; - if (RelocM == Reloc::PIC_) { PersonalityEncoding = dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel | ((CMModel == CodeModel::Small || CMModel == CodeModel::Medium) @@ -261,8 +264,6 @@ void MCObjectFileInfo::InitELFMCObjectFileInfo(Triple T) { ? dwarf::DW_EH_PE_udata4 : dwarf::DW_EH_PE_absptr; } } else if (T.getArch() == Triple::aarch64) { - FDECFIEncoding = dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4; - // The small model guarantees static code/data size < 4GB, but not where it // will be in memory. Most of these could end up >2GB away so even a signed // pc-relative 32-bit address is insufficient, theoretically. @@ -282,7 +283,6 @@ void MCObjectFileInfo::InitELFMCObjectFileInfo(Triple T) { } else if (T.getArch() == Triple::ppc64) { PersonalityEncoding = dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_udata8; - FDECFIEncoding = dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4; LSDAEncoding = dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_udata8; FDEEncoding = dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_udata8; TTypeEncoding = dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel | diff --git a/lib/MC/MCParser/AsmParser.cpp b/lib/MC/MCParser/AsmParser.cpp index 6ab49ec..9d52377 100644 --- a/lib/MC/MCParser/AsmParser.cpp +++ b/lib/MC/MCParser/AsmParser.cpp @@ -626,7 +626,7 @@ bool AsmParser::Run(bool NoInitialTextSection, bool NoFinalize) { return TokError("unmatched .ifs or .elses"); // Check to see there are no empty DwarfFile slots. - const std::vector<MCDwarfFile *> &MCDwarfFiles = + const SmallVectorImpl<MCDwarfFile *> &MCDwarfFiles = getContext().getMCDwarfFiles(); for (unsigned i = 1; i < MCDwarfFiles.size(); i++) { if (!MCDwarfFiles[i]) @@ -1495,7 +1495,7 @@ bool AsmParser::ParseStatement(ParseStatementInfo &Info) { // If we previously parsed a cpp hash file line comment then make sure the // current Dwarf File is for the CppHashFilename if not then emit the // Dwarf File table for it and adjust the line number for the .loc. - const std::vector<MCDwarfFile *> &MCDwarfFiles = + const SmallVectorImpl<MCDwarfFile *> &MCDwarfFiles = getContext().getMCDwarfFiles(); if (CppHashFilename.size() != 0) { if (MCDwarfFiles[getContext().getGenDwarfFileNumber()]->getName() != diff --git a/lib/MC/MCPureStreamer.cpp b/lib/MC/MCPureStreamer.cpp index 573308a..0e04c55 100644 --- a/lib/MC/MCPureStreamer.cpp +++ b/lib/MC/MCPureStreamer.cpp @@ -95,7 +95,7 @@ public: report_fatal_error("unsupported directive in pure streamer"); } virtual bool EmitDwarfFileDirective(unsigned FileNo, StringRef Directory, - StringRef Filename) { + StringRef Filename, unsigned CUID = 0) { report_fatal_error("unsupported directive in pure streamer"); } diff --git a/lib/MC/MCStreamer.cpp b/lib/MC/MCStreamer.cpp index 9857f7b..d02e553 100644 --- a/lib/MC/MCStreamer.cpp +++ b/lib/MC/MCStreamer.cpp @@ -24,7 +24,7 @@ using namespace llvm; MCStreamer::MCStreamer(StreamerKind Kind, MCContext &Ctx) : Kind(Kind), Context(Ctx), EmitEHFrame(true), EmitDebugFrame(false), CurrentW64UnwindInfo(0), LastSymbol(0), AutoInitSections(false) { - const MCSection *section = NULL; + const MCSection *section = 0; SectionStack.push_back(std::make_pair(section, section)); } @@ -40,7 +40,7 @@ void MCStreamer::reset() { EmitDebugFrame = false; CurrentW64UnwindInfo = 0; LastSymbol = 0; - const MCSection *section = NULL; + const MCSection *section = 0; SectionStack.clear(); SectionStack.push_back(std::make_pair(section, section)); } @@ -157,8 +157,8 @@ void MCStreamer::EmitFill(uint64_t NumBytes, uint8_t FillValue, bool MCStreamer::EmitDwarfFileDirective(unsigned FileNo, StringRef Directory, - StringRef Filename) { - return getContext().GetDwarfFile(Directory, Filename, FileNo) == 0; + StringRef Filename, unsigned CUID) { + return getContext().GetDwarfFile(Directory, Filename, FileNo, CUID) == 0; } void MCStreamer::EmitDwarfLocDirective(unsigned FileNo, unsigned Line, @@ -172,7 +172,7 @@ void MCStreamer::EmitDwarfLocDirective(unsigned FileNo, unsigned Line, MCDwarfFrameInfo *MCStreamer::getCurrentFrameInfo() { if (FrameInfos.empty()) - return NULL; + return 0; return &FrameInfos.back(); } @@ -473,7 +473,7 @@ void MCStreamer::EmitWin64EHSetFrame(unsigned Register, unsigned Offset) { report_fatal_error("Frame register and offset already specified!"); if (Offset & 0x0F) report_fatal_error("Misaligned frame pointer offset!"); - MCWin64EHInstruction Inst(Win64EH::UOP_SetFPReg, NULL, Register, Offset); + MCWin64EHInstruction Inst(Win64EH::UOP_SetFPReg, 0, Register, Offset); CurFrame->LastFrameInst = CurFrame->Instructions.size(); CurFrame->Instructions.push_back(Inst); } @@ -623,5 +623,5 @@ void MCStreamer::Finish() { MCSymbolData &MCStreamer::getOrCreateSymbolData(MCSymbol *Symbol) { report_fatal_error("Not supported!"); - return *(static_cast<MCSymbolData*> (NULL)); + return *(static_cast<MCSymbolData*>(0)); } diff --git a/lib/Object/MachOObjectFile.cpp b/lib/Object/MachOObjectFile.cpp index eb1690e..6501df9 100644 --- a/lib/Object/MachOObjectFile.cpp +++ b/lib/Object/MachOObjectFile.cpp @@ -1304,14 +1304,17 @@ StringRef MachOObjectFile::getFileFormatName() const { } } + // Make sure the cpu type has the correct mask. + assert((MachOObj->getHeader().CPUType & llvm::MachO::CPUArchABI64) + == llvm::MachO::CPUArchABI64 && + "32-bit object file when we're 64-bit?"); + switch (MachOObj->getHeader().CPUType) { case llvm::MachO::CPUTypeX86_64: return "Mach-O 64-bit x86-64"; case llvm::MachO::CPUTypePowerPC64: return "Mach-O 64-bit ppc64"; default: - assert((MachOObj->getHeader().CPUType & llvm::MachO::CPUArchABI64) == 1 && - "32-bit object file when we're 64-bit?"); return "Mach-O 64-bit unknown"; } } diff --git a/lib/Support/CommandLine.cpp b/lib/Support/CommandLine.cpp index 53fcf06..560d7eb 100644 --- a/lib/Support/CommandLine.cpp +++ b/lib/Support/CommandLine.cpp @@ -1222,14 +1222,10 @@ sortOpts(StringMap<Option*> &OptMap, namespace { class HelpPrinter { - size_t MaxArgLen; - const Option *EmptyArg; const bool ShowHidden; public: - explicit HelpPrinter(bool showHidden) : ShowHidden(showHidden) { - EmptyArg = 0; - } + explicit HelpPrinter(bool showHidden) : ShowHidden(showHidden) {} void operator=(bool Value) { if (Value == false) return; @@ -1266,7 +1262,7 @@ public: outs() << "\n\n"; // Compute the maximum argument length... - MaxArgLen = 0; + size_t MaxArgLen = 0; for (size_t i = 0, e = Opts.size(); i != e; ++i) MaxArgLen = std::max(MaxArgLen, Opts[i].second->getOptionWidth()); diff --git a/lib/Support/FileOutputBuffer.cpp b/lib/Support/FileOutputBuffer.cpp index cd430f2..1ee69b6 100644 --- a/lib/Support/FileOutputBuffer.cpp +++ b/lib/Support/FileOutputBuffer.cpp @@ -70,8 +70,8 @@ error_code FileOutputBuffer::create(StringRef FilePath, if (EC) return EC; - OwningPtr<mapped_file_region> MappedFile( - new mapped_file_region(FD, mapped_file_region::readwrite, Size, 0, EC)); + OwningPtr<mapped_file_region> MappedFile(new mapped_file_region( + FD, true, mapped_file_region::readwrite, Size, 0, EC)); if (EC) return EC; diff --git a/lib/Support/MemoryBuffer.cpp b/lib/Support/MemoryBuffer.cpp index 691b6f5..8042237 100644 --- a/lib/Support/MemoryBuffer.cpp +++ b/lib/Support/MemoryBuffer.cpp @@ -72,13 +72,15 @@ static void CopyStringRef(char *Memory, StringRef Data) { Memory[Data.size()] = 0; // Null terminate string. } -/// GetNamedBuffer - Allocates a new MemoryBuffer with Name copied after it. -template <typename T> -static T *GetNamedBuffer(StringRef Buffer, StringRef Name, - bool RequiresNullTerminator) { - char *Mem = static_cast<char*>(operator new(sizeof(T) + Name.size() + 1)); - CopyStringRef(Mem + sizeof(T), Name); - return new (Mem) T(Buffer, RequiresNullTerminator); +struct NamedBufferAlloc { + StringRef Name; + NamedBufferAlloc(StringRef Name) : Name(Name) {} +}; + +void *operator new(size_t N, const NamedBufferAlloc &Alloc) { + char *Mem = static_cast<char *>(operator new(N + Alloc.Name.size() + 1)); + CopyStringRef(Mem + N, Alloc.Name); + return Mem; } namespace { @@ -105,8 +107,8 @@ public: MemoryBuffer *MemoryBuffer::getMemBuffer(StringRef InputData, StringRef BufferName, bool RequiresNullTerminator) { - return GetNamedBuffer<MemoryBufferMem>(InputData, BufferName, - RequiresNullTerminator); + return new (NamedBufferAlloc(BufferName)) + MemoryBufferMem(InputData, RequiresNullTerminator); } /// getMemBufferCopy - Open the specified memory range as a MemoryBuffer, @@ -183,24 +185,38 @@ error_code MemoryBuffer::getFileOrSTDIN(const char *Filename, //===----------------------------------------------------------------------===// namespace { -/// MemoryBufferMMapFile - This represents a file that was mapped in with the -/// sys::Path::MapInFilePages method. When destroyed, it calls the -/// sys::Path::UnMapFilePages method. -class MemoryBufferMMapFile : public MemoryBufferMem { -public: - MemoryBufferMMapFile(StringRef Buffer, bool RequiresNullTerminator) - : MemoryBufferMem(Buffer, RequiresNullTerminator) { } +/// \brief Memorry maps a file descriptor using sys::fs::mapped_file_region. +/// +/// This handles converting the offset into a legal offset on the platform. +class MemoryBufferMMapFile : public MemoryBuffer { + sys::fs::mapped_file_region MFR; + + static uint64_t getLegalMapOffset(uint64_t Offset) { + return Offset & ~(sys::fs::mapped_file_region::alignment() - 1); + } - ~MemoryBufferMMapFile() { - static int PageSize = sys::process::get_self()->page_size(); + static uint64_t getLegalMapSize(uint64_t Len, uint64_t Offset) { + return Len + (Offset - getLegalMapOffset(Offset)); + } - uintptr_t Start = reinterpret_cast<uintptr_t>(getBufferStart()); - size_t Size = getBufferSize(); - uintptr_t RealStart = Start & ~(PageSize - 1); - size_t RealSize = Size + (Start - RealStart); + const char *getStart(uint64_t Len, uint64_t Offset) { + return MFR.const_data() + (Offset - getLegalMapOffset(Offset)); + } - sys::Path::UnMapFilePages(reinterpret_cast<const char*>(RealStart), - RealSize); +public: + MemoryBufferMMapFile(bool RequiresNullTerminator, int FD, uint64_t Len, + uint64_t Offset, error_code EC) + : MFR(FD, false, sys::fs::mapped_file_region::readonly, + getLegalMapSize(Len, Offset), getLegalMapOffset(Offset), EC) { + if (!EC) { + const char *Start = getStart(Len, Offset); + init(Start, Start + Len, RequiresNullTerminator); + } + } + + virtual const char *getBufferIdentifier() const LLVM_OVERRIDE { + // The name is stored after the class itself. + return reinterpret_cast<const char *>(this + 1); } virtual BufferKind getBufferKind() const LLVM_OVERRIDE { @@ -244,6 +260,8 @@ error_code MemoryBuffer::getFile(const char *Filename, OwningPtr<MemoryBuffer> &result, int64_t FileSize, bool RequiresNullTerminator) { + // FIXME: Review if this check is unnecessary on windows as well. +#ifdef LLVM_ON_WIN32 // First check that the "file" is not a directory bool is_dir = false; error_code err = sys::fs::is_directory(Filename, is_dir); @@ -251,6 +269,7 @@ error_code MemoryBuffer::getFile(const char *Filename, return err; if (is_dir) return make_error_code(errc::is_a_directory); +#endif int OpenFlags = O_RDONLY; #ifdef O_BINARY @@ -341,17 +360,11 @@ error_code MemoryBuffer::getOpenFile(int FD, const char *Filename, if (shouldUseMmap(FD, FileSize, MapSize, Offset, RequiresNullTerminator, PageSize)) { - off_t RealMapOffset = Offset & ~(PageSize - 1); - off_t Delta = Offset - RealMapOffset; - size_t RealMapSize = MapSize + Delta; - - if (const char *Pages = sys::Path::MapInFilePages(FD, - RealMapSize, - RealMapOffset)) { - result.reset(GetNamedBuffer<MemoryBufferMMapFile>( - StringRef(Pages + Delta, MapSize), Filename, RequiresNullTerminator)); + error_code EC; + result.reset(new (NamedBufferAlloc(Filename)) MemoryBufferMMapFile( + RequiresNullTerminator, FD, MapSize, Offset, EC)); + if (!EC) return error_code::success(); - } } MemoryBuffer *Buf = MemoryBuffer::getNewUninitMemBuffer(MapSize, Filename); diff --git a/lib/Support/Statistic.cpp b/lib/Support/Statistic.cpp index 3a65221..9c28176 100644 --- a/lib/Support/Statistic.cpp +++ b/lib/Support/Statistic.cpp @@ -40,7 +40,9 @@ namespace llvm { extern raw_ostream *CreateInfoOutputFile(); } /// what they did. /// static cl::opt<bool> -Enabled("stats", cl::desc("Enable statistics output from program")); +Enabled( + "stats", + cl::desc("Enable statistics output from program (available with Asserts)")); namespace { @@ -142,6 +144,7 @@ void llvm::PrintStatistics(raw_ostream &OS) { } void llvm::PrintStatistics() { +#if !defined(NDEBUG) || defined(LLVM_ENABLE_STATS) StatisticInfo &Stats = *StatInfo; // Statistics not enabled? @@ -151,4 +154,17 @@ void llvm::PrintStatistics() { raw_ostream &OutStream = *CreateInfoOutputFile(); PrintStatistics(OutStream); delete &OutStream; // Close the file. +#else + // Check if the -stats option is set instead of checking + // !Stats.Stats.empty(). In release builds, Statistics operators + // do nothing, so stats are never Registered. + if (Enabled) { + // Get the stream to write to. + raw_ostream &OutStream = *CreateInfoOutputFile(); + OutStream << "Statistics are disabled. " + << "Build with asserts or with -DLLVM_ENABLE_STATS\n"; + OutStream.flush(); + delete &OutStream; // Close the file. + } +#endif } diff --git a/lib/Support/Unix/Memory.inc b/lib/Support/Unix/Memory.inc index e00394e..e9b26bd 100644 --- a/lib/Support/Unix/Memory.inc +++ b/lib/Support/Unix/Memory.inc @@ -332,7 +332,16 @@ void Memory::InvalidateInstructionCache(const void *Addr, __clear_cache(const_cast<char *>(Start), const_cast<char *>(End)); # elif defined(__mips__) const char *Start = static_cast<const char *>(Addr); +# if defined(ANDROID) + // The declaration of "cacheflush" in Android bionic: + // extern int cacheflush(long start, long end, long flags); + const char *End = Start + Len; + long LStart = reinterpret_cast<long>(const_cast<char *>(Start)); + long LEnd = reinterpret_cast<long>(const_cast<char *>(End)); + cacheflush(LStart, LEnd, BCACHE); +# else cacheflush(const_cast<char *>(Start), Len, BCACHE); +# endif # endif #endif // end apple diff --git a/lib/Support/Unix/PathV2.inc b/lib/Support/Unix/PathV2.inc index c343455..11c5b05 100644 --- a/lib/Support/Unix/PathV2.inc +++ b/lib/Support/Unix/PathV2.inc @@ -419,6 +419,10 @@ retry_random_path: RandomPath[i] = "0123456789abcdef"[sys::Process::GetRandomNumber() & 15]; } + // Make sure we don't fall into an infinite loop by constantly trying + // to create the parent path. + bool TriedToCreateParent = false; + // Try to open + create the file. rety_open_create: int RandomFD = ::open(RandomPath.c_str(), O_RDWR | O_CREAT | O_EXCL, mode); @@ -429,7 +433,9 @@ rety_open_create: goto retry_random_path; // If path prefix doesn't exist, try to create it. if (SavedErrno == errc::no_such_file_or_directory && - !exists(path::parent_path(RandomPath))) { + !exists(path::parent_path(RandomPath)) && + !TriedToCreateParent) { + TriedToCreateParent = true; StringRef p(RandomPath); SmallString<64> dir_to_create; for (path::const_iterator i = path::begin(p), @@ -471,12 +477,14 @@ rety_open_create: return error_code::success(); } -error_code mapped_file_region::init(int fd, uint64_t offset) { - AutoFD FD(fd); +error_code mapped_file_region::init(int FD, bool CloseFD, uint64_t Offset) { + AutoFD ScopedFD(FD); + if (!CloseFD) + ScopedFD.take(); // Figure out how large the file is. struct stat FileInfo; - if (fstat(fd, &FileInfo) == -1) + if (fstat(FD, &FileInfo) == -1) return error_code(errno, system_category()); uint64_t FileSize = FileInfo.st_size; @@ -484,7 +492,7 @@ error_code mapped_file_region::init(int fd, uint64_t offset) { Size = FileSize; else if (FileSize < Size) { // We need to grow the file. - if (ftruncate(fd, Size) == -1) + if (ftruncate(FD, Size) == -1) return error_code(errno, system_category()); } @@ -493,7 +501,7 @@ error_code mapped_file_region::init(int fd, uint64_t offset) { #ifdef MAP_FILE flags |= MAP_FILE; #endif - Mapping = ::mmap(0, Size, prot, flags, fd, offset); + Mapping = ::mmap(0, Size, prot, flags, FD, Offset); if (Mapping == MAP_FAILED) return error_code(errno, system_category()); return error_code::success(); @@ -522,12 +530,13 @@ mapped_file_region::mapped_file_region(const Twine &path, return; } - ec = init(ofd, offset); + ec = init(ofd, true, offset); if (ec) Mapping = 0; } mapped_file_region::mapped_file_region(int fd, + bool closefd, mapmode mode, uint64_t length, uint64_t offset, @@ -541,7 +550,7 @@ mapped_file_region::mapped_file_region(int fd, return; } - ec = init(fd, offset); + ec = init(fd, closefd, offset); if (ec) Mapping = 0; } diff --git a/lib/Support/Windows/PathV2.inc b/lib/Support/Windows/PathV2.inc index 2e6cc96..23f3d14 100644 --- a/lib/Support/Windows/PathV2.inc +++ b/lib/Support/Windows/PathV2.inc @@ -593,6 +593,10 @@ retry_random_path: random_path_utf16.push_back(0); random_path_utf16.pop_back(); + // Make sure we don't fall into an infinite loop by constantly trying + // to create the parent path. + bool TriedToCreateParent = false; + // Try to create + open the path. retry_create_file: HANDLE TempFileHandle = ::CreateFileW(random_path_utf16.begin(), @@ -610,7 +614,9 @@ retry_create_file: if (ec == windows_error::file_exists) goto retry_random_path; // Check for non-existing parent directories. - if (ec == windows_error::path_not_found) { + if (ec == windows_error::path_not_found && !TriedToCreateParent) { + TriedToCreateParent = true; + // Create the directories using result_path as temp storage. if (error_code ec = UTF16ToUTF8(random_path_utf16.begin(), random_path_utf16.size(), result_path)) @@ -705,13 +711,14 @@ error_code get_magic(const Twine &path, uint32_t len, return error_code::success(); } -error_code mapped_file_region::init(int FD, uint64_t Offset) { +error_code mapped_file_region::init(int FD, bool CloseFD, uint64_t Offset) { FileDescriptor = FD; // Make sure that the requested size fits within SIZE_T. if (Size > std::numeric_limits<SIZE_T>::max()) { - if (FileDescriptor) - _close(FileDescriptor); - else + if (FileDescriptor) { + if (CloseFD) + _close(FileDescriptor); + } else ::CloseHandle(FileHandle); return make_error_code(errc::invalid_argument); } @@ -732,9 +739,10 @@ error_code mapped_file_region::init(int FD, uint64_t Offset) { 0); if (FileMappingHandle == NULL) { error_code ec = windows_error(GetLastError()); - if (FileDescriptor) - _close(FileDescriptor); - else + if (FileDescriptor) { + if (CloseFD) + _close(FileDescriptor); + } else ::CloseHandle(FileHandle); return ec; } @@ -754,9 +762,10 @@ error_code mapped_file_region::init(int FD, uint64_t Offset) { if (Mapping == NULL) { error_code ec = windows_error(GetLastError()); ::CloseHandle(FileMappingHandle); - if (FileDescriptor) - _close(FileDescriptor); - else + if (FileDescriptor) { + if (CloseFD) + _close(FileDescriptor); + } else ::CloseHandle(FileHandle); return ec; } @@ -768,14 +777,24 @@ error_code mapped_file_region::init(int FD, uint64_t Offset) { error_code ec = windows_error(GetLastError()); ::UnmapViewOfFile(Mapping); ::CloseHandle(FileMappingHandle); - if (FileDescriptor) - _close(FileDescriptor); - else + if (FileDescriptor) { + if (CloseFD) + _close(FileDescriptor); + } else ::CloseHandle(FileHandle); return ec; } Size = mbi.RegionSize; } + + // Close all the handles except for the view. It will keep the other handles + // alive. + ::CloseHandle(FileMappingHandle); + if (FileDescriptor) { + if (CloseFD) + _close(FileDescriptor); // Also closes FileHandle. + } else + ::CloseHandle(FileHandle); return error_code::success(); } @@ -815,7 +834,7 @@ mapped_file_region::mapped_file_region(const Twine &path, } FileDescriptor = 0; - ec = init(FileDescriptor, offset); + ec = init(FileDescriptor, true, offset); if (ec) { Mapping = FileMappingHandle = 0; FileHandle = INVALID_HANDLE_VALUE; @@ -824,6 +843,7 @@ mapped_file_region::mapped_file_region(const Twine &path, } mapped_file_region::mapped_file_region(int fd, + bool closefd, mapmode mode, uint64_t length, uint64_t offset, @@ -836,13 +856,14 @@ mapped_file_region::mapped_file_region(int fd, , FileMappingHandle() { FileHandle = reinterpret_cast<HANDLE>(_get_osfhandle(fd)); if (FileHandle == INVALID_HANDLE_VALUE) { - _close(FileDescriptor); + if (closefd) + _close(FileDescriptor); FileDescriptor = 0; ec = make_error_code(errc::bad_file_descriptor); return; } - ec = init(FileDescriptor, offset); + ec = init(FileDescriptor, closefd, offset); if (ec) { Mapping = FileMappingHandle = 0; FileHandle = INVALID_HANDLE_VALUE; @@ -853,12 +874,6 @@ mapped_file_region::mapped_file_region(int fd, mapped_file_region::~mapped_file_region() { if (Mapping) ::UnmapViewOfFile(Mapping); - if (FileMappingHandle) - ::CloseHandle(FileMappingHandle); - if (FileDescriptor) - _close(FileDescriptor); - else if (FileHandle != INVALID_HANDLE_VALUE) - ::CloseHandle(FileHandle); } #if LLVM_HAS_RVALUE_REFERENCES diff --git a/lib/Support/raw_ostream.cpp b/lib/Support/raw_ostream.cpp index f71abd3..da26a37 100644 --- a/lib/Support/raw_ostream.cpp +++ b/lib/Support/raw_ostream.cpp @@ -306,7 +306,12 @@ raw_ostream &raw_ostream::write(const char *Ptr, size_t Size) { if (LLVM_UNLIKELY(OutBufCur == OutBufStart)) { size_t BytesToWrite = Size - (Size % NumBytes); write_impl(Ptr, BytesToWrite); - copy_to_buffer(Ptr + BytesToWrite, Size - BytesToWrite); + size_t BytesRemaining = Size - BytesToWrite; + if (BytesRemaining > size_t(OutBufEnd - OutBufCur)) { + // Too much left over to copy into our buffer. + return write(Ptr + BytesToWrite, BytesRemaining); + } + copy_to_buffer(Ptr + BytesToWrite, BytesRemaining); return *this; } diff --git a/lib/Target/AArch64/AArch64FrameLowering.cpp b/lib/Target/AArch64/AArch64FrameLowering.cpp index cca6d12..572617c 100644 --- a/lib/Target/AArch64/AArch64FrameLowering.cpp +++ b/lib/Target/AArch64/AArch64FrameLowering.cpp @@ -349,59 +349,6 @@ AArch64FrameLowering::resolveFrameIndexReference(MachineFunction &MF, return TopOfFrameOffset - FrameRegPos; } -/// Estimate and return the size of the frame. -static unsigned estimateStackSize(MachineFunction &MF) { - // FIXME: Make generic? Really consider after upstreaming. This code is now - // shared between PEI, ARM *and* here. - const MachineFrameInfo *MFI = MF.getFrameInfo(); - const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering(); - const TargetRegisterInfo *RegInfo = MF.getTarget().getRegisterInfo(); - unsigned MaxAlign = MFI->getMaxAlignment(); - int Offset = 0; - - // This code is very, very similar to PEI::calculateFrameObjectOffsets(). - // It really should be refactored to share code. Until then, changes - // should keep in mind that there's tight coupling between the two. - - for (int i = MFI->getObjectIndexBegin(); i != 0; ++i) { - int FixedOff = -MFI->getObjectOffset(i); - if (FixedOff > Offset) Offset = FixedOff; - } - for (unsigned i = 0, e = MFI->getObjectIndexEnd(); i != e; ++i) { - if (MFI->isDeadObjectIndex(i)) - continue; - Offset += MFI->getObjectSize(i); - unsigned Align = MFI->getObjectAlignment(i); - // Adjust to alignment boundary - Offset = (Offset+Align-1)/Align*Align; - - MaxAlign = std::max(Align, MaxAlign); - } - - if (MFI->adjustsStack() && TFI->hasReservedCallFrame(MF)) - Offset += MFI->getMaxCallFrameSize(); - - // Round up the size to a multiple of the alignment. If the function has - // any calls or alloca's, align to the target's StackAlignment value to - // ensure that the callee's frame or the alloca data is suitably aligned; - // otherwise, for leaf functions, align to the TransientStackAlignment - // value. - unsigned StackAlign; - if (MFI->adjustsStack() || MFI->hasVarSizedObjects() || - (RegInfo->needsStackRealignment(MF) && MFI->getObjectIndexEnd() != 0)) - StackAlign = TFI->getStackAlignment(); - else - StackAlign = TFI->getTransientStackAlignment(); - - // If the frame pointer is eliminated, all frame offsets will be relative to - // SP not FP. Align to MaxAlign so this works. - StackAlign = std::max(StackAlign, MaxAlign); - unsigned AlignMask = StackAlign - 1; - Offset = (Offset + AlignMask) & ~uint64_t(AlignMask); - - return (unsigned)Offset; -} - void AArch64FrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF, RegScavenger *RS) const { @@ -422,7 +369,7 @@ AArch64FrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF, // callee-save register for this purpose or allocate an extra spill slot. bool BigStack = - (RS && estimateStackSize(MF) >= TII.estimateRSStackLimit(MF)) + (RS && MFI->estimateStackSize(MF) >= TII.estimateRSStackLimit(MF)) || MFI->hasVarSizedObjects() // Access will be from X29: messes things up || (MFI->adjustsStack() && !hasReservedCallFrame(MF)); diff --git a/lib/Target/AArch64/AArch64ISelLowering.cpp b/lib/Target/AArch64/AArch64ISelLowering.cpp index cea7f91..e9f4497 100644 --- a/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -200,6 +200,8 @@ AArch64TargetLowering::AArch64TargetLowering(AArch64TargetMachine &TM) setOperationAction(ISD::FSIN, MVT::f32, Expand); setOperationAction(ISD::FSIN, MVT::f64, Expand); + setOperationAction(ISD::FSINCOS, MVT::f32, Expand); + setOperationAction(ISD::FSINCOS, MVT::f64, Expand); // Virtually no operation on f128 is legal, but LLVM can't expand them when // there's a valid register class, so we need custom operations in most cases. @@ -217,6 +219,7 @@ AArch64TargetLowering::AArch64TargetLowering(AArch64TargetMachine &TM) setOperationAction(ISD::FREM, MVT::f128, Expand); setOperationAction(ISD::FRINT, MVT::f128, Expand); setOperationAction(ISD::FSIN, MVT::f128, Expand); + setOperationAction(ISD::FSINCOS, MVT::f128, Expand); setOperationAction(ISD::FSQRT, MVT::f128, Expand); setOperationAction(ISD::FSUB, MVT::f128, Custom); setOperationAction(ISD::FTRUNC, MVT::f128, Expand); @@ -341,8 +344,7 @@ AArch64TargetLowering::emitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB, // ldxr dest, ptr // <binop> scratch, dest, incr // stxr stxr_status, scratch, ptr - // cmp stxr_status, #0 - // b.ne loopMBB + // cbnz stxr_status, loopMBB // fallthrough --> exitMBB BB = loopMBB; BuildMI(BB, dl, TII->get(ldrOpc), dest).addReg(ptr); @@ -364,10 +366,8 @@ AArch64TargetLowering::emitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB, MRI.constrainRegClass(stxr_status, &AArch64::GPR32wspRegClass); BuildMI(BB, dl, TII->get(strOpc), stxr_status).addReg(scratch).addReg(ptr); - BuildMI(BB, dl, TII->get(AArch64::SUBwwi_lsl0_cmp)) - .addReg(stxr_status).addImm(0); - BuildMI(BB, dl, TII->get(AArch64::Bcc)) - .addImm(A64CC::NE).addMBB(loopMBB); + BuildMI(BB, dl, TII->get(AArch64::CBNZw)) + .addReg(stxr_status).addMBB(loopMBB); BB->addSuccessor(loopMBB); BB->addSuccessor(exitMBB); @@ -437,8 +437,7 @@ AArch64TargetLowering::emitAtomicBinaryMinMax(MachineInstr *MI, // cmp incr, dest (, sign extend if necessary) // csel scratch, dest, incr, cond // stxr stxr_status, scratch, ptr - // cmp stxr_status, #0 - // b.ne loopMBB + // cbnz stxr_status, loopMBB // fallthrough --> exitMBB BB = loopMBB; BuildMI(BB, dl, TII->get(ldrOpc), dest).addReg(ptr); @@ -457,10 +456,8 @@ AArch64TargetLowering::emitAtomicBinaryMinMax(MachineInstr *MI, BuildMI(BB, dl, TII->get(strOpc), stxr_status) .addReg(scratch).addReg(ptr); - BuildMI(BB, dl, TII->get(AArch64::SUBwwi_lsl0_cmp)) - .addReg(stxr_status).addImm(0); - BuildMI(BB, dl, TII->get(AArch64::Bcc)) - .addImm(A64CC::NE).addMBB(loopMBB); + BuildMI(BB, dl, TII->get(AArch64::CBNZw)) + .addReg(stxr_status).addMBB(loopMBB); BB->addSuccessor(loopMBB); BB->addSuccessor(exitMBB); @@ -533,17 +530,14 @@ AArch64TargetLowering::emitAtomicCmpSwap(MachineInstr *MI, // loop2MBB: // strex stxr_status, newval, [ptr] - // cmp stxr_status, #0 - // b.ne loop1MBB + // cbnz stxr_status, loop1MBB BB = loop2MBB; unsigned stxr_status = MRI.createVirtualRegister(&AArch64::GPR32RegClass); MRI.constrainRegClass(stxr_status, &AArch64::GPR32wspRegClass); BuildMI(BB, dl, TII->get(strOpc), stxr_status).addReg(newval).addReg(ptr); - BuildMI(BB, dl, TII->get(AArch64::SUBwwi_lsl0_cmp)) - .addReg(stxr_status).addImm(0); - BuildMI(BB, dl, TII->get(AArch64::Bcc)) - .addImm(A64CC::NE).addMBB(loop1MBB); + BuildMI(BB, dl, TII->get(AArch64::CBNZw)) + .addReg(stxr_status).addMBB(loop1MBB); BB->addSuccessor(loop1MBB); BB->addSuccessor(exitMBB); @@ -1861,11 +1855,10 @@ AArch64TargetLowering::LowerGlobalAddressELF(SDValue Op, const GlobalValue *GV = GN->getGlobal(); unsigned Alignment = GV->getAlignment(); Reloc::Model RelocM = getTargetMachine().getRelocationModel(); - - if (GV->isWeakForLinker() && RelocM == Reloc::Static) { - // Weak symbols can't use ADRP/ADD pair since they should evaluate to - // zero when undefined. In PIC mode the GOT can take care of this, but in - // absolute mode we use a constant pool load. + if (GV->isWeakForLinker() && GV->isDeclaration() && RelocM == Reloc::Static) { + // Weak undefined symbols can't use ADRP/ADD pair since they should evaluate + // to zero when they remain undefined. In PIC mode the GOT can take care of + // this, but in absolute mode we use a constant pool load. SDValue PoolAddr; PoolAddr = DAG.getNode(AArch64ISD::WrapperSmall, dl, PtrVT, DAG.getTargetConstantPool(GV, PtrVT, 0, 0, @@ -1873,10 +1866,16 @@ AArch64TargetLowering::LowerGlobalAddressELF(SDValue Op, DAG.getTargetConstantPool(GV, PtrVT, 0, 0, AArch64II::MO_LO12), DAG.getConstant(8, MVT::i32)); - return DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), PoolAddr, - MachinePointerInfo::getConstantPool(), - /*isVolatile=*/ false, /*isNonTemporal=*/ true, - /*isInvariant=*/ true, 8); + SDValue GlobalAddr = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), PoolAddr, + MachinePointerInfo::getConstantPool(), + /*isVolatile=*/ false, + /*isNonTemporal=*/ true, + /*isInvariant=*/ true, 8); + if (GN->getOffset() != 0) + return DAG.getNode(ISD::ADD, dl, PtrVT, GlobalAddr, + DAG.getConstant(GN->getOffset(), PtrVT)); + + return GlobalAddr; } if (Alignment == 0) { diff --git a/lib/Target/AArch64/AArch64InstrInfo.td b/lib/Target/AArch64/AArch64InstrInfo.td index 562a7f6..319ec97 100644 --- a/lib/Target/AArch64/AArch64InstrInfo.td +++ b/lib/Target/AArch64/AArch64InstrInfo.td @@ -159,7 +159,7 @@ let Defs = [XSP], Uses = [XSP] in { // Atomic operation pseudo-instructions //===----------------------------------------------------------------------===// -let usesCustomInserter = 1, Defs = [NZCV] in { +let usesCustomInserter = 1 in { multiclass AtomicSizes<string opname> { def _I8 : PseudoInst<(outs GPR32:$dst), (ins GPR64:$ptr, GPR32:$incr), [(set GPR32:$dst, (!cast<SDNode>(opname # "_8") GPR64:$ptr, GPR32:$incr))]>; @@ -178,11 +178,14 @@ defm ATOMIC_LOAD_AND : AtomicSizes<"atomic_load_and">; defm ATOMIC_LOAD_OR : AtomicSizes<"atomic_load_or">; defm ATOMIC_LOAD_XOR : AtomicSizes<"atomic_load_xor">; defm ATOMIC_LOAD_NAND : AtomicSizes<"atomic_load_nand">; -defm ATOMIC_LOAD_MIN : AtomicSizes<"atomic_load_min">; -defm ATOMIC_LOAD_MAX : AtomicSizes<"atomic_load_max">; -defm ATOMIC_LOAD_UMIN : AtomicSizes<"atomic_load_umin">; -defm ATOMIC_LOAD_UMAX : AtomicSizes<"atomic_load_umax">; defm ATOMIC_SWAP : AtomicSizes<"atomic_swap">; +let Defs = [NZCV] in { + // These operations need a CMP to calculate the correct value + defm ATOMIC_LOAD_MIN : AtomicSizes<"atomic_load_min">; + defm ATOMIC_LOAD_MAX : AtomicSizes<"atomic_load_max">; + defm ATOMIC_LOAD_UMIN : AtomicSizes<"atomic_load_umin">; + defm ATOMIC_LOAD_UMAX : AtomicSizes<"atomic_load_umax">; +} let usesCustomInserter = 1, Defs = [NZCV] in { def ATOMIC_CMP_SWAP_I8 @@ -1942,43 +1945,41 @@ def fpz32 : Operand<f32>, ComplexPattern<f32, 1, "SelectFPZeroOperand", [fpimm]> { let ParserMatchClass = fpzero_asmoperand; let PrintMethod = "printFPZeroOperand"; + let DecoderMethod = "DecodeFPZeroOperand"; } def fpz64 : Operand<f64>, ComplexPattern<f64, 1, "SelectFPZeroOperand", [fpimm]> { let ParserMatchClass = fpzero_asmoperand; let PrintMethod = "printFPZeroOperand"; + let DecoderMethod = "DecodeFPZeroOperand"; } -multiclass A64I_fpcmpSignal<bits<2> type, bit imm, dag ins, string asmop2, - dag pattern> { +multiclass A64I_fpcmpSignal<bits<2> type, bit imm, dag ins, dag pattern> { def _quiet : A64I_fpcmp<0b0, 0b0, type, 0b00, {0b0, imm, 0b0, 0b0, 0b0}, - (outs), ins, !strconcat("fcmp\t$Rn, ", asmop2), - [pattern], NoItinerary> { + (outs), ins, "fcmp\t$Rn, $Rm", [pattern], + NoItinerary> { let Defs = [NZCV]; } def _sig : A64I_fpcmp<0b0, 0b0, type, 0b00, {0b1, imm, 0b0, 0b0, 0b0}, - (outs), ins, !strconcat("fcmpe\t$Rn, ", asmop2), - [], NoItinerary> { + (outs), ins, "fcmpe\t$Rn, $Rm", [], NoItinerary> { let Defs = [NZCV]; } } -defm FCMPss : A64I_fpcmpSignal<0b00, 0b0, (ins FPR32:$Rn, FPR32:$Rm), "$Rm", +defm FCMPss : A64I_fpcmpSignal<0b00, 0b0, (ins FPR32:$Rn, FPR32:$Rm), (set NZCV, (A64cmp (f32 FPR32:$Rn), FPR32:$Rm))>; -defm FCMPdd : A64I_fpcmpSignal<0b01, 0b0, (ins FPR64:$Rn, FPR64:$Rm), "$Rm", +defm FCMPdd : A64I_fpcmpSignal<0b01, 0b0, (ins FPR64:$Rn, FPR64:$Rm), (set NZCV, (A64cmp (f64 FPR64:$Rn), FPR64:$Rm))>; -// What would be Rm should be written as 0, but anything is valid for -// disassembly so we can't set the bits -let PostEncoderMethod = "fixFCMPImm" in { - defm FCMPsi : A64I_fpcmpSignal<0b00, 0b1, (ins FPR32:$Rn, fpz32:$Imm), "$Imm", - (set NZCV, (A64cmp (f32 FPR32:$Rn), fpz32:$Imm))>; +// What would be Rm should be written as 0; note that even though it's called +// "$Rm" here to fit in with the InstrFormats, it's actually an immediate. +defm FCMPsi : A64I_fpcmpSignal<0b00, 0b1, (ins FPR32:$Rn, fpz32:$Rm), + (set NZCV, (A64cmp (f32 FPR32:$Rn), fpz32:$Rm))>; - defm FCMPdi : A64I_fpcmpSignal<0b01, 0b1, (ins FPR64:$Rn, fpz64:$Imm), "$Imm", - (set NZCV, (A64cmp (f64 FPR64:$Rn), fpz64:$Imm))>; -} +defm FCMPdi : A64I_fpcmpSignal<0b01, 0b1, (ins FPR64:$Rn, fpz64:$Rm), + (set NZCV, (A64cmp (f64 FPR64:$Rn), fpz64:$Rm))>; //===----------------------------------------------------------------------===// diff --git a/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp b/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp index c1695da..69bb80a 100644 --- a/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp +++ b/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp @@ -160,44 +160,53 @@ private: SMLoc StartLoc, EndLoc; + struct ImmWithLSLOp { + const MCExpr *Val; + unsigned ShiftAmount; + bool ImplicitAmount; + }; + + struct CondCodeOp { + A64CC::CondCodes Code; + }; + + struct FPImmOp { + double Val; + }; + + struct ImmOp { + const MCExpr *Val; + }; + + struct RegOp { + unsigned RegNum; + }; + + struct ShiftExtendOp { + A64SE::ShiftExtSpecifiers ShiftType; + unsigned Amount; + bool ImplicitAmount; + }; + + struct SysRegOp { + const char *Data; + unsigned Length; + }; + + struct TokOp { + const char *Data; + unsigned Length; + }; + union { - struct { - const MCExpr *Val; - unsigned ShiftAmount; - bool ImplicitAmount; - } ImmWithLSL; - - struct { - A64CC::CondCodes Code; - } CondCode; - - struct { - double Val; - } FPImm; - - struct { - const MCExpr *Val; - } Imm; - - struct { - unsigned RegNum; - } Reg; - - struct { - A64SE::ShiftExtSpecifiers ShiftType; - unsigned Amount; - bool ImplicitAmount; - } ShiftExtend; - - struct { - const char *Data; - unsigned Length; - } SysReg; - - struct { - const char *Data; - unsigned Length; - } Tok; + struct ImmWithLSLOp ImmWithLSL; + struct CondCodeOp CondCode; + struct FPImmOp FPImm; + struct ImmOp Imm; + struct RegOp Reg; + struct ShiftExtendOp ShiftExtend; + struct SysRegOp SysReg; + struct TokOp Tok; }; AArch64Operand(KindTy K, SMLoc S, SMLoc E) diff --git a/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp b/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp index eba7666..12c1b8f 100644 --- a/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp +++ b/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp @@ -106,6 +106,11 @@ static DecodeStatus DecodeCVT32FixedPosOperand(llvm::MCInst &Inst, uint64_t Address, const void *Decoder); +static DecodeStatus DecodeFPZeroOperand(llvm::MCInst &Inst, + unsigned RmBits, + uint64_t Address, + const void *Decoder); + template<int RegWidth> static DecodeStatus DecodeMoveWideImmOperand(llvm::MCInst &Inst, unsigned FullImm, @@ -381,6 +386,17 @@ static DecodeStatus DecodeCVT32FixedPosOperand(llvm::MCInst &Inst, return MCDisassembler::Success; } +static DecodeStatus DecodeFPZeroOperand(llvm::MCInst &Inst, + unsigned RmBits, + uint64_t Address, + const void *Decoder) { + // Any bits are valid in the instruction (they're architecturally ignored), + // but a code generator should insert 0. + Inst.addOperand(MCOperand::CreateImm(0)); + return MCDisassembler::Success; +} + + template<int RegWidth> static DecodeStatus DecodeMoveWideImmOperand(llvm::MCInst &Inst, diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp index 756e037..a5c591e 100644 --- a/lib/Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp +++ b/lib/Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp @@ -106,8 +106,6 @@ public: void EncodeInstruction(const MCInst &MI, raw_ostream &OS, SmallVectorImpl<MCFixup> &Fixups) const; - unsigned fixFCMPImm(const MCInst &MI, unsigned EncodedValue) const; - template<int hasRs, int hasRt2> unsigned fixLoadStoreExclusive(const MCInst &MI, unsigned EncodedValue) const; @@ -423,15 +421,6 @@ AArch64MCCodeEmitter::getMoveWideImmOpValue(const MCInst &MI, unsigned OpIdx, return Result | getAddressWithFixup(UImm16MO, requestedFixup, Fixups); } -unsigned AArch64MCCodeEmitter::fixFCMPImm(const MCInst &MI, - unsigned EncodedValue) const { - // For FCMP[E] Rn, #0.0, the Rm field has a canonical representation - // with 0s, but is architecturally ignored - EncodedValue &= ~0x1f0000u; - - return EncodedValue; -} - template<int hasRs, int hasRt2> unsigned AArch64MCCodeEmitter::fixLoadStoreExclusive(const MCInst &MI, unsigned EncodedValue) const { diff --git a/lib/Target/ARM/A15SDOptimizer.cpp b/lib/Target/ARM/A15SDOptimizer.cpp new file mode 100644 index 0000000..f0d4dbe --- /dev/null +++ b/lib/Target/ARM/A15SDOptimizer.cpp @@ -0,0 +1,704 @@ +//=== A15SDOptimizerPass.cpp - Optimize DPR and SPR register accesses on A15==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// The Cortex-A15 processor employs a tracking scheme in its register renaming +// in order to process each instruction's micro-ops speculatively and +// out-of-order with appropriate forwarding. The ARM architecture allows VFP +// instructions to read and write 32-bit S-registers. Each S-register +// corresponds to one half (upper or lower) of an overlaid 64-bit D-register. +// +// There are several instruction patterns which can be used to provide this +// capability which can provide higher performance than other, potentially more +// direct patterns, specifically around when one micro-op reads a D-register +// operand that has recently been written as one or more S-register results. +// +// This file defines a pre-regalloc pass which looks for SPR producers which +// are going to be used by a DPR (or QPR) consumers and creates the more +// optimized access pattern. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "a15-sd-optimizer" +#include "ARM.h" +#include "ARMBaseInstrInfo.h" +#include "ARMSubtarget.h" +#include "ARMISelLowering.h" +#include "ARMTargetMachine.h" + +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetRegisterInfo.h" + +#include <set> + +using namespace llvm; + +namespace { + struct A15SDOptimizer : public MachineFunctionPass { + static char ID; + A15SDOptimizer() : MachineFunctionPass(ID) {} + + virtual bool runOnMachineFunction(MachineFunction &Fn); + + virtual const char *getPassName() const { + return "ARM A15 S->D optimizer"; + } + + private: + const ARMBaseInstrInfo *TII; + const TargetRegisterInfo *TRI; + MachineRegisterInfo *MRI; + + bool runOnInstruction(MachineInstr *MI); + + // + // Instruction builder helpers + // + unsigned createDupLane(MachineBasicBlock &MBB, + MachineBasicBlock::iterator InsertBefore, + DebugLoc DL, + unsigned Reg, unsigned Lane, + bool QPR=false); + + unsigned createExtractSubreg(MachineBasicBlock &MBB, + MachineBasicBlock::iterator InsertBefore, + DebugLoc DL, + unsigned DReg, unsigned Lane, + const TargetRegisterClass *TRC); + + unsigned createVExt(MachineBasicBlock &MBB, + MachineBasicBlock::iterator InsertBefore, + DebugLoc DL, + unsigned Ssub0, unsigned Ssub1); + + unsigned createRegSequence(MachineBasicBlock &MBB, + MachineBasicBlock::iterator InsertBefore, + DebugLoc DL, + unsigned Reg1, unsigned Reg2); + + unsigned createInsertSubreg(MachineBasicBlock &MBB, + MachineBasicBlock::iterator InsertBefore, + DebugLoc DL, unsigned DReg, unsigned Lane, + unsigned ToInsert); + + unsigned createImplicitDef(MachineBasicBlock &MBB, + MachineBasicBlock::iterator InsertBefore, + DebugLoc DL); + + // + // Various property checkers + // + bool usesRegClass(MachineOperand &MO, const TargetRegisterClass *TRC); + bool hasPartialWrite(MachineInstr *MI); + SmallVector<unsigned, 8> getReadDPRs(MachineInstr *MI); + unsigned getDPRLaneFromSPR(unsigned SReg); + + // + // Methods used for getting the definitions of partial registers + // + + MachineInstr *elideCopies(MachineInstr *MI); + void elideCopiesAndPHIs(MachineInstr *MI, + SmallVectorImpl<MachineInstr*> &Outs); + + // + // Pattern optimization methods + // + unsigned optimizeAllLanesPattern(MachineInstr *MI, unsigned Reg); + unsigned optimizeSDPattern(MachineInstr *MI); + unsigned getPrefSPRLane(unsigned SReg); + + // + // Sanitizing method - used to make sure if don't leave dead code around. + // + void eraseInstrWithNoUses(MachineInstr *MI); + + // + // A map used to track the changes done by this pass. + // + std::map<MachineInstr*, unsigned> Replacements; + std::set<MachineInstr *> DeadInstr; + }; + char A15SDOptimizer::ID = 0; +} // end anonymous namespace + +// Returns true if this is a use of a SPR register. +bool A15SDOptimizer::usesRegClass(MachineOperand &MO, + const TargetRegisterClass *TRC) { + if (!MO.isReg()) + return false; + unsigned Reg = MO.getReg(); + + if (TargetRegisterInfo::isVirtualRegister(Reg)) + return MRI->getRegClass(Reg)->hasSuperClassEq(TRC); + else + return TRC->contains(Reg); +} + +unsigned A15SDOptimizer::getDPRLaneFromSPR(unsigned SReg) { + unsigned DReg = TRI->getMatchingSuperReg(SReg, ARM::ssub_1, + &ARM::DPRRegClass); + if (DReg != ARM::NoRegister) return ARM::ssub_1; + return ARM::ssub_0; +} + +// Get the subreg type that is most likely to be coalesced +// for an SPR register that will be used in VDUP32d pseudo. +unsigned A15SDOptimizer::getPrefSPRLane(unsigned SReg) { + if (!TRI->isVirtualRegister(SReg)) + return getDPRLaneFromSPR(SReg); + + MachineInstr *MI = MRI->getVRegDef(SReg); + if (!MI) return ARM::ssub_0; + MachineOperand *MO = MI->findRegisterDefOperand(SReg); + + assert(MO->isReg() && "Non register operand found!"); + if (!MO) return ARM::ssub_0; + + if (MI->isCopy() && usesRegClass(MI->getOperand(1), + &ARM::SPRRegClass)) { + SReg = MI->getOperand(1).getReg(); + } + + if (TargetRegisterInfo::isVirtualRegister(SReg)) { + if (MO->getSubReg() == ARM::ssub_1) return ARM::ssub_1; + return ARM::ssub_0; + } + return getDPRLaneFromSPR(SReg); +} + +// MI is known to be dead. Figure out what instructions +// are also made dead by this and mark them for removal. +void A15SDOptimizer::eraseInstrWithNoUses(MachineInstr *MI) { + SmallVector<MachineInstr *, 8> Front; + DeadInstr.insert(MI); + + DEBUG(dbgs() << "Deleting base instruction " << *MI << "\n"); + Front.push_back(MI); + + while (Front.size() != 0) { + MI = Front.back(); + Front.pop_back(); + + // MI is already known to be dead. We need to see + // if other instructions can also be removed. + for (unsigned int i = 0; i < MI->getNumOperands(); ++i) { + MachineOperand &MO = MI->getOperand(i); + if ((!MO.isReg()) || (!MO.isUse())) + continue; + unsigned Reg = MO.getReg(); + if (!TRI->isVirtualRegister(Reg)) + continue; + MachineOperand *Op = MI->findRegisterDefOperand(Reg); + + if (!Op) + continue; + + MachineInstr *Def = Op->getParent(); + + // We don't need to do anything if we have already marked + // this instruction as being dead. + if (DeadInstr.find(Def) != DeadInstr.end()) + continue; + + // Check if all the uses of this instruction are marked as + // dead. If so, we can also mark this instruction as being + // dead. + bool IsDead = true; + for (unsigned int j = 0; j < Def->getNumOperands(); ++j) { + MachineOperand &MODef = Def->getOperand(j); + if ((!MODef.isReg()) || (!MODef.isDef())) + continue; + unsigned DefReg = MODef.getReg(); + if (!TRI->isVirtualRegister(DefReg)) { + IsDead = false; + break; + } + for (MachineRegisterInfo::use_iterator II = MRI->use_begin(Reg), + EE = MRI->use_end(); + II != EE; ++II) { + // We don't care about self references. + if (&*II == Def) + continue; + if (DeadInstr.find(&*II) == DeadInstr.end()) { + IsDead = false; + break; + } + } + } + + if (!IsDead) continue; + + DEBUG(dbgs() << "Deleting instruction " << *Def << "\n"); + DeadInstr.insert(Def); + } + } +} + +// Creates the more optimized patterns and generally does all the code +// transformations in this pass. +unsigned A15SDOptimizer::optimizeSDPattern(MachineInstr *MI) { + if (MI->isCopy()) { + return optimizeAllLanesPattern(MI, MI->getOperand(1).getReg()); + } + + if (MI->isInsertSubreg()) { + unsigned DPRReg = MI->getOperand(1).getReg(); + unsigned SPRReg = MI->getOperand(2).getReg(); + + if (TRI->isVirtualRegister(DPRReg) && TRI->isVirtualRegister(SPRReg)) { + MachineInstr *DPRMI = MRI->getVRegDef(MI->getOperand(1).getReg()); + MachineInstr *SPRMI = MRI->getVRegDef(MI->getOperand(2).getReg()); + + if (DPRMI && SPRMI) { + // See if the first operand of this insert_subreg is IMPLICIT_DEF + MachineInstr *ECDef = elideCopies(DPRMI); + if (ECDef != 0 && ECDef->isImplicitDef()) { + // Another corner case - if we're inserting something that is purely + // a subreg copy of a DPR, just use that DPR. + + MachineInstr *EC = elideCopies(SPRMI); + // Is it a subreg copy of ssub_0? + if (EC && EC->isCopy() && + EC->getOperand(1).getSubReg() == ARM::ssub_0) { + DEBUG(dbgs() << "Found a subreg copy: " << *SPRMI); + + // Find the thing we're subreg copying out of - is it of the same + // regclass as DPRMI? (i.e. a DPR or QPR). + unsigned FullReg = SPRMI->getOperand(1).getReg(); + const TargetRegisterClass *TRC = + MRI->getRegClass(MI->getOperand(1).getReg()); + if (TRC->hasSuperClassEq(MRI->getRegClass(FullReg))) { + DEBUG(dbgs() << "Subreg copy is compatible - returning "); + DEBUG(dbgs() << PrintReg(FullReg) << "\n"); + eraseInstrWithNoUses(MI); + return FullReg; + } + } + + return optimizeAllLanesPattern(MI, MI->getOperand(2).getReg()); + } + } + } + return optimizeAllLanesPattern(MI, MI->getOperand(0).getReg()); + } + + if (MI->isRegSequence() && usesRegClass(MI->getOperand(1), + &ARM::SPRRegClass)) { + // See if all bar one of the operands are IMPLICIT_DEF and insert the + // optimizer pattern accordingly. + unsigned NumImplicit = 0, NumTotal = 0; + unsigned NonImplicitReg = ~0U; + + for (unsigned I = 1; I < MI->getNumExplicitOperands(); ++I) { + if (!MI->getOperand(I).isReg()) + continue; + ++NumTotal; + unsigned OpReg = MI->getOperand(I).getReg(); + + if (!TRI->isVirtualRegister(OpReg)) + break; + + MachineInstr *Def = MRI->getVRegDef(OpReg); + if (!Def) + break; + if (Def->isImplicitDef()) + ++NumImplicit; + else + NonImplicitReg = MI->getOperand(I).getReg(); + } + + if (NumImplicit == NumTotal - 1) + return optimizeAllLanesPattern(MI, NonImplicitReg); + else + return optimizeAllLanesPattern(MI, MI->getOperand(0).getReg()); + } + + assert(0 && "Unhandled update pattern!"); + return 0; +} + +// Return true if this MachineInstr inserts a scalar (SPR) value into +// a D or Q register. +bool A15SDOptimizer::hasPartialWrite(MachineInstr *MI) { + // The only way we can do a partial register update is through a COPY, + // INSERT_SUBREG or REG_SEQUENCE. + if (MI->isCopy() && usesRegClass(MI->getOperand(1), &ARM::SPRRegClass)) + return true; + + if (MI->isInsertSubreg() && usesRegClass(MI->getOperand(2), + &ARM::SPRRegClass)) + return true; + + if (MI->isRegSequence() && usesRegClass(MI->getOperand(1), &ARM::SPRRegClass)) + return true; + + return false; +} + +// Looks through full copies to get the instruction that defines the input +// operand for MI. +MachineInstr *A15SDOptimizer::elideCopies(MachineInstr *MI) { + if (!MI->isFullCopy()) + return MI; + if (!TRI->isVirtualRegister(MI->getOperand(1).getReg())) + return NULL; + MachineInstr *Def = MRI->getVRegDef(MI->getOperand(1).getReg()); + if (!Def) + return NULL; + return elideCopies(Def); +} + +// Look through full copies and PHIs to get the set of non-copy MachineInstrs +// that can produce MI. +void A15SDOptimizer::elideCopiesAndPHIs(MachineInstr *MI, + SmallVectorImpl<MachineInstr*> &Outs) { + // Looking through PHIs may create loops so we need to track what + // instructions we have visited before. + std::set<MachineInstr *> Reached; + SmallVector<MachineInstr *, 8> Front; + Front.push_back(MI); + while (Front.size() != 0) { + MI = Front.back(); + Front.pop_back(); + + // If we have already explored this MachineInstr, ignore it. + if (Reached.find(MI) != Reached.end()) + continue; + Reached.insert(MI); + if (MI->isPHI()) { + for (unsigned I = 1, E = MI->getNumOperands(); I != E; I += 2) { + unsigned Reg = MI->getOperand(I).getReg(); + if (!TRI->isVirtualRegister(Reg)) { + continue; + } + MachineInstr *NewMI = MRI->getVRegDef(Reg); + if (!NewMI) + continue; + Front.push_back(NewMI); + } + } else if (MI->isFullCopy()) { + if (!TRI->isVirtualRegister(MI->getOperand(1).getReg())) + continue; + MachineInstr *NewMI = MRI->getVRegDef(MI->getOperand(1).getReg()); + if (!NewMI) + continue; + Front.push_back(NewMI); + } else { + DEBUG(dbgs() << "Found partial copy" << *MI <<"\n"); + Outs.push_back(MI); + } + } +} + +// Return the DPR virtual registers that are read by this machine instruction +// (if any). +SmallVector<unsigned, 8> A15SDOptimizer::getReadDPRs(MachineInstr *MI) { + if (MI->isCopyLike() || MI->isInsertSubreg() || MI->isRegSequence() || + MI->isKill()) + return SmallVector<unsigned, 8>(); + + SmallVector<unsigned, 8> Defs; + for (unsigned i = 0; i < MI->getNumOperands(); ++i) { + MachineOperand &MO = MI->getOperand(i); + + if (!MO.isReg() || !MO.isUse()) + continue; + if (!usesRegClass(MO, &ARM::DPRRegClass) && + !usesRegClass(MO, &ARM::QPRRegClass)) + continue; + + Defs.push_back(MO.getReg()); + } + return Defs; +} + +// Creates a DPR register from an SPR one by using a VDUP. +unsigned +A15SDOptimizer::createDupLane(MachineBasicBlock &MBB, + MachineBasicBlock::iterator InsertBefore, + DebugLoc DL, + unsigned Reg, unsigned Lane, bool QPR) { + unsigned Out = MRI->createVirtualRegister(QPR ? &ARM::QPRRegClass : + &ARM::DPRRegClass); + AddDefaultPred(BuildMI(MBB, + InsertBefore, + DL, + TII->get(QPR ? ARM::VDUPLN32q : ARM::VDUPLN32d), + Out) + .addReg(Reg) + .addImm(Lane)); + + return Out; +} + +// Creates a SPR register from a DPR by copying the value in lane 0. +unsigned +A15SDOptimizer::createExtractSubreg(MachineBasicBlock &MBB, + MachineBasicBlock::iterator InsertBefore, + DebugLoc DL, + unsigned DReg, unsigned Lane, + const TargetRegisterClass *TRC) { + unsigned Out = MRI->createVirtualRegister(TRC); + BuildMI(MBB, + InsertBefore, + DL, + TII->get(TargetOpcode::COPY), Out) + .addReg(DReg, 0, Lane); + + return Out; +} + +// Takes two SPR registers and creates a DPR by using a REG_SEQUENCE. +unsigned +A15SDOptimizer::createRegSequence(MachineBasicBlock &MBB, + MachineBasicBlock::iterator InsertBefore, + DebugLoc DL, + unsigned Reg1, unsigned Reg2) { + unsigned Out = MRI->createVirtualRegister(&ARM::QPRRegClass); + BuildMI(MBB, + InsertBefore, + DL, + TII->get(TargetOpcode::REG_SEQUENCE), Out) + .addReg(Reg1) + .addImm(ARM::dsub_0) + .addReg(Reg2) + .addImm(ARM::dsub_1); + return Out; +} + +// Takes two DPR registers that have previously been VDUPed (Ssub0 and Ssub1) +// and merges them into one DPR register. +unsigned +A15SDOptimizer::createVExt(MachineBasicBlock &MBB, + MachineBasicBlock::iterator InsertBefore, + DebugLoc DL, + unsigned Ssub0, unsigned Ssub1) { + unsigned Out = MRI->createVirtualRegister(&ARM::DPRRegClass); + AddDefaultPred(BuildMI(MBB, + InsertBefore, + DL, + TII->get(ARM::VEXTd32), Out) + .addReg(Ssub0) + .addReg(Ssub1) + .addImm(1)); + return Out; +} + +unsigned +A15SDOptimizer::createInsertSubreg(MachineBasicBlock &MBB, + MachineBasicBlock::iterator InsertBefore, + DebugLoc DL, unsigned DReg, unsigned Lane, + unsigned ToInsert) { + unsigned Out = MRI->createVirtualRegister(&ARM::DPR_VFP2RegClass); + BuildMI(MBB, + InsertBefore, + DL, + TII->get(TargetOpcode::INSERT_SUBREG), Out) + .addReg(DReg) + .addReg(ToInsert) + .addImm(Lane); + + return Out; +} + +unsigned +A15SDOptimizer::createImplicitDef(MachineBasicBlock &MBB, + MachineBasicBlock::iterator InsertBefore, + DebugLoc DL) { + unsigned Out = MRI->createVirtualRegister(&ARM::DPRRegClass); + BuildMI(MBB, + InsertBefore, + DL, + TII->get(TargetOpcode::IMPLICIT_DEF), Out); + return Out; +} + +// This function inserts instructions in order to optimize interactions between +// SPR registers and DPR/QPR registers. It does so by performing VDUPs on all +// lanes, and the using VEXT instructions to recompose the result. +unsigned +A15SDOptimizer::optimizeAllLanesPattern(MachineInstr *MI, unsigned Reg) { + MachineBasicBlock::iterator InsertPt(MI); + DebugLoc DL = MI->getDebugLoc(); + MachineBasicBlock &MBB = *MI->getParent(); + InsertPt++; + unsigned Out; + + if (MRI->getRegClass(Reg)->hasSuperClassEq(&ARM::QPRRegClass)) { + unsigned DSub0 = createExtractSubreg(MBB, InsertPt, DL, Reg, + ARM::dsub_0, &ARM::DPRRegClass); + unsigned DSub1 = createExtractSubreg(MBB, InsertPt, DL, Reg, + ARM::dsub_1, &ARM::DPRRegClass); + + unsigned Out1 = createDupLane(MBB, InsertPt, DL, DSub0, 0); + unsigned Out2 = createDupLane(MBB, InsertPt, DL, DSub0, 1); + Out = createVExt(MBB, InsertPt, DL, Out1, Out2); + + unsigned Out3 = createDupLane(MBB, InsertPt, DL, DSub1, 0); + unsigned Out4 = createDupLane(MBB, InsertPt, DL, DSub1, 1); + Out2 = createVExt(MBB, InsertPt, DL, Out3, Out4); + + Out = createRegSequence(MBB, InsertPt, DL, Out, Out2); + + } else if (MRI->getRegClass(Reg)->hasSuperClassEq(&ARM::DPRRegClass)) { + unsigned Out1 = createDupLane(MBB, InsertPt, DL, Reg, 0); + unsigned Out2 = createDupLane(MBB, InsertPt, DL, Reg, 1); + Out = createVExt(MBB, InsertPt, DL, Out1, Out2); + + } else { + assert(MRI->getRegClass(Reg)->hasSuperClassEq(&ARM::SPRRegClass) && + "Found unexpected regclass!"); + + unsigned PrefLane = getPrefSPRLane(Reg); + unsigned Lane; + switch (PrefLane) { + case ARM::ssub_0: Lane = 0; break; + case ARM::ssub_1: Lane = 1; break; + default: llvm_unreachable("Unknown preferred lane!"); + } + + bool UsesQPR = usesRegClass(MI->getOperand(0), &ARM::QPRRegClass); + + Out = createImplicitDef(MBB, InsertPt, DL); + Out = createInsertSubreg(MBB, InsertPt, DL, Out, PrefLane, Reg); + Out = createDupLane(MBB, InsertPt, DL, Out, Lane, UsesQPR); + eraseInstrWithNoUses(MI); + } + return Out; +} + +bool A15SDOptimizer::runOnInstruction(MachineInstr *MI) { + // We look for instructions that write S registers that are then read as + // D/Q registers. These can only be caused by COPY, INSERT_SUBREG and + // REG_SEQUENCE pseudos that insert an SPR value into a DPR register or + // merge two SPR values to form a DPR register. In order avoid false + // positives we make sure that there is an SPR producer so we look past + // COPY and PHI nodes to find it. + // + // The best code pattern for when an SPR producer is going to be used by a + // DPR or QPR consumer depends on whether the other lanes of the + // corresponding DPR/QPR are currently defined. + // + // We can handle these efficiently, depending on the type of + // pseudo-instruction that is producing the pattern + // + // * COPY: * VDUP all lanes and merge the results together + // using VEXTs. + // + // * INSERT_SUBREG: * If the SPR value was originally in another DPR/QPR + // lane, and the other lane(s) of the DPR/QPR register + // that we are inserting in are undefined, use the + // original DPR/QPR value. + // * Otherwise, fall back on the same stategy as COPY. + // + // * REG_SEQUENCE: * If all except one of the input operands are + // IMPLICIT_DEFs, insert the VDUP pattern for just the + // defined input operand + // * Otherwise, fall back on the same stategy as COPY. + // + + // First, get all the reads of D-registers done by this instruction. + SmallVector<unsigned, 8> Defs = getReadDPRs(MI); + bool Modified = false; + + for (SmallVector<unsigned, 8>::iterator I = Defs.begin(), E = Defs.end(); + I != E; ++I) { + // Follow the def-use chain for this DPR through COPYs, and also through + // PHIs (which are essentially multi-way COPYs). It is because of PHIs that + // we can end up with multiple defs of this DPR. + + SmallVector<MachineInstr *, 8> DefSrcs; + if (!TRI->isVirtualRegister(*I)) + continue; + MachineInstr *Def = MRI->getVRegDef(*I); + if (!Def) + continue; + + elideCopiesAndPHIs(Def, DefSrcs); + + for (SmallVector<MachineInstr*, 8>::iterator II = DefSrcs.begin(), + EE = DefSrcs.end(); II != EE; ++II) { + MachineInstr *MI = *II; + + // If we've already analyzed and replaced this operand, don't do + // anything. + if (Replacements.find(MI) != Replacements.end()) + continue; + + // Now, work out if the instruction causes a SPR->DPR dependency. + if (!hasPartialWrite(MI)) + continue; + + // Collect all the uses of this MI's DPR def for updating later. + SmallVector<MachineOperand*, 8> Uses; + unsigned DPRDefReg = MI->getOperand(0).getReg(); + for (MachineRegisterInfo::use_iterator I = MRI->use_begin(DPRDefReg), + E = MRI->use_end(); I != E; ++I) + Uses.push_back(&I.getOperand()); + + // We can optimize this. + unsigned NewReg = optimizeSDPattern(MI); + + if (NewReg != 0) { + Modified = true; + for (SmallVector<MachineOperand*, 8>::const_iterator I = Uses.begin(), + E = Uses.end(); I != E; ++I) { + DEBUG(dbgs() << "Replacing operand " + << **I << " with " + << PrintReg(NewReg) << "\n"); + (*I)->substVirtReg(NewReg, 0, *TRI); + } + } + Replacements[MI] = NewReg; + } + } + return Modified; +} + +bool A15SDOptimizer::runOnMachineFunction(MachineFunction &Fn) { + TII = static_cast<const ARMBaseInstrInfo*>(Fn.getTarget().getInstrInfo()); + TRI = Fn.getTarget().getRegisterInfo(); + MRI = &Fn.getRegInfo(); + bool Modified = false; + + DEBUG(dbgs() << "Running on function " << Fn.getName()<< "\n"); + + DeadInstr.clear(); + Replacements.clear(); + + for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E; + ++MFI) { + + for (MachineBasicBlock::iterator MI = MFI->begin(), ME = MFI->end(); + MI != ME;) { + Modified |= runOnInstruction(MI++); + } + + } + + for (std::set<MachineInstr *>::iterator I = DeadInstr.begin(), + E = DeadInstr.end(); + I != E; ++I) { + (*I)->eraseFromParent(); + } + + return Modified; +} + +FunctionPass *llvm::createA15SDOptimizerPass() { + return new A15SDOptimizer(); +} diff --git a/lib/Target/ARM/ARM.h b/lib/Target/ARM/ARM.h index 5faf8c3..80e5f37 100644 --- a/lib/Target/ARM/ARM.h +++ b/lib/Target/ARM/ARM.h @@ -35,6 +35,7 @@ FunctionPass *createARMISelDag(ARMBaseTargetMachine &TM, FunctionPass *createARMJITCodeEmitterPass(ARMBaseTargetMachine &TM, JITCodeEmitter &JCE); +FunctionPass *createA15SDOptimizerPass(); FunctionPass *createARMLoadStoreOptimizationPass(bool PreAlloc = false); FunctionPass *createARMExpandPseudoPass(); FunctionPass *createARMGlobalBaseRegPass(); diff --git a/lib/Target/ARM/ARMAsmPrinter.cpp b/lib/Target/ARM/ARMAsmPrinter.cpp index 58c7798..13ec208 100644 --- a/lib/Target/ARM/ARMAsmPrinter.cpp +++ b/lib/Target/ARM/ARMAsmPrinter.cpp @@ -1357,7 +1357,7 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) { OutStreamer.EmitInstruction(MCInstBuilder(ARM::MOVr) .addReg(ARM::PC) - .addImm(MI->getOperand(0).getReg()) + .addReg(MI->getOperand(0).getReg()) // Add predicate operands. .addImm(ARMCC::AL) .addReg(0) diff --git a/lib/Target/ARM/ARMBaseInstrInfo.cpp b/lib/Target/ARM/ARMBaseInstrInfo.cpp index ed001ea..ed8b9cd 100644 --- a/lib/Target/ARM/ARMBaseInstrInfo.cpp +++ b/lib/Target/ARM/ARMBaseInstrInfo.cpp @@ -1125,7 +1125,7 @@ bool ARMBaseInstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const{ // copyPhysReg() calls. Look for VMOVS instructions that can legally be // widened to VMOVD. We prefer the VMOVD when possible because it may be // changed into a VORR that can go down the NEON pipeline. - if (!WidenVMOVS || !MI->isCopy()) + if (!WidenVMOVS || !MI->isCopy() || Subtarget.isCortexA15()) return false; // Look for a copy between even S-registers. That is where we keep floats diff --git a/lib/Target/ARM/ARMFrameLowering.cpp b/lib/Target/ARM/ARMFrameLowering.cpp index 0ca6450..3b12408 100644 --- a/lib/Target/ARM/ARMFrameLowering.cpp +++ b/lib/Target/ARM/ARMFrameLowering.cpp @@ -1038,58 +1038,6 @@ static unsigned GetFunctionSizeInBytes(const MachineFunction &MF, return FnSize; } -/// estimateStackSize - Estimate and return the size of the frame. -/// FIXME: Make generic? -static unsigned estimateStackSize(MachineFunction &MF) { - const MachineFrameInfo *MFI = MF.getFrameInfo(); - const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering(); - const TargetRegisterInfo *RegInfo = MF.getTarget().getRegisterInfo(); - unsigned MaxAlign = MFI->getMaxAlignment(); - int Offset = 0; - - // This code is very, very similar to PEI::calculateFrameObjectOffsets(). - // It really should be refactored to share code. Until then, changes - // should keep in mind that there's tight coupling between the two. - - for (int i = MFI->getObjectIndexBegin(); i != 0; ++i) { - int FixedOff = -MFI->getObjectOffset(i); - if (FixedOff > Offset) Offset = FixedOff; - } - for (unsigned i = 0, e = MFI->getObjectIndexEnd(); i != e; ++i) { - if (MFI->isDeadObjectIndex(i)) - continue; - Offset += MFI->getObjectSize(i); - unsigned Align = MFI->getObjectAlignment(i); - // Adjust to alignment boundary - Offset = (Offset+Align-1)/Align*Align; - - MaxAlign = std::max(Align, MaxAlign); - } - - if (MFI->adjustsStack() && TFI->hasReservedCallFrame(MF)) - Offset += MFI->getMaxCallFrameSize(); - - // Round up the size to a multiple of the alignment. If the function has - // any calls or alloca's, align to the target's StackAlignment value to - // ensure that the callee's frame or the alloca data is suitably aligned; - // otherwise, for leaf functions, align to the TransientStackAlignment - // value. - unsigned StackAlign; - if (MFI->adjustsStack() || MFI->hasVarSizedObjects() || - (RegInfo->needsStackRealignment(MF) && MFI->getObjectIndexEnd() != 0)) - StackAlign = TFI->getStackAlignment(); - else - StackAlign = TFI->getTransientStackAlignment(); - - // If the frame pointer is eliminated, all frame offsets will be relative to - // SP not FP. Align to MaxAlign so this works. - StackAlign = std::max(StackAlign, MaxAlign); - unsigned AlignMask = StackAlign - 1; - Offset = (Offset + AlignMask) & ~uint64_t(AlignMask); - - return (unsigned)Offset; -} - /// estimateRSStackSizeLimit - Look at each instruction that references stack /// frames and return the stack size limit beyond which some of these /// instructions will require a scratch register during their expansion later. @@ -1235,7 +1183,7 @@ ARMFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF, // we've used all the registers and so R4 is already used, so not marking // it here will be OK. // FIXME: It will be better just to find spare register here. - unsigned StackSize = estimateStackSize(MF); + unsigned StackSize = MFI->estimateStackSize(MF); if (MFI->hasVarSizedObjects() || StackSize > 508) MRI.setPhysRegUsed(ARM::R4); } @@ -1330,7 +1278,8 @@ ARMFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF, // worth the effort and added fragility? bool BigStack = (RS && - (estimateStackSize(MF) + ((hasFP(MF) && AFI->hasStackFrame()) ? 4:0) >= + (MFI->estimateStackSize(MF) + + ((hasFP(MF) && AFI->hasStackFrame()) ? 4:0) >= estimateRSStackSizeLimit(MF, this))) || MFI->hasVarSizedObjects() || (MFI->adjustsStack() && !canSimplifyCallFramePseudos(MF)); diff --git a/lib/Target/ARM/ARMISelDAGToDAG.cpp b/lib/Target/ARM/ARMISelDAGToDAG.cpp index a83f052..2c51de2 100644 --- a/lib/Target/ARM/ARMISelDAGToDAG.cpp +++ b/lib/Target/ARM/ARMISelDAGToDAG.cpp @@ -3155,7 +3155,7 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { cast<MachineSDNode>(Ld)->setMemRefs(MemOp, MemOp + 1); // Remap uses. - SDValue Glue = isThumb ? SDValue(Ld, 2) : SDValue(Ld, 1); + SDValue OutChain = isThumb ? SDValue(Ld, 2) : SDValue(Ld, 1); if (!SDValue(N, 0).use_empty()) { SDValue Result; if (isThumb) @@ -3163,9 +3163,8 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { else { SDValue SubRegIdx = CurDAG->getTargetConstant(ARM::gsub_0, MVT::i32); SDNode *ResNode = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, - dl, MVT::i32, MVT::Glue, SDValue(Ld, 0), SubRegIdx, Glue); + dl, MVT::i32, SDValue(Ld, 0), SubRegIdx); Result = SDValue(ResNode,0); - Glue = Result.getValue(1); } ReplaceUses(SDValue(N, 0), Result); } @@ -3176,13 +3175,12 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { else { SDValue SubRegIdx = CurDAG->getTargetConstant(ARM::gsub_1, MVT::i32); SDNode *ResNode = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, - dl, MVT::i32, MVT::Glue, SDValue(Ld, 0), SubRegIdx, Glue); + dl, MVT::i32, SDValue(Ld, 0), SubRegIdx); Result = SDValue(ResNode,0); - Glue = Result.getValue(1); } ReplaceUses(SDValue(N, 1), Result); } - ReplaceUses(SDValue(N, 2), Glue); + ReplaceUses(SDValue(N, 2), OutChain); return NULL; } @@ -3195,9 +3193,7 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { // Store exclusive double return a i32 value which is the return status // of the issued store. - std::vector<EVT> ResTys; - ResTys.push_back(MVT::i32); - ResTys.push_back(MVT::Other); + EVT ResTys[] = { MVT::i32, MVT::Other }; bool isThumb = Subtarget->isThumb() && Subtarget->hasThumb2(); // Place arguments in the right order. diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp index ef96e56..514971f 100644 --- a/lib/Target/ARM/ARMISelLowering.cpp +++ b/lib/Target/ARM/ARMISelLowering.cpp @@ -504,6 +504,7 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) setOperationAction(ISD::FRINT, MVT::v2f64, Expand); setOperationAction(ISD::FNEARBYINT, MVT::v2f64, Expand); setOperationAction(ISD::FFLOOR, MVT::v2f64, Expand); + setOperationAction(ISD::FMA, MVT::v2f64, Expand); setOperationAction(ISD::FSQRT, MVT::v4f32, Expand); setOperationAction(ISD::FSIN, MVT::v4f32, Expand); @@ -521,6 +522,23 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) setOperationAction(ISD::FNEARBYINT, MVT::v4f32, Expand); setOperationAction(ISD::FFLOOR, MVT::v4f32, Expand); + // Mark v2f32 intrinsics. + setOperationAction(ISD::FSQRT, MVT::v2f32, Expand); + setOperationAction(ISD::FSIN, MVT::v2f32, Expand); + setOperationAction(ISD::FCOS, MVT::v2f32, Expand); + setOperationAction(ISD::FPOWI, MVT::v2f32, Expand); + setOperationAction(ISD::FPOW, MVT::v2f32, Expand); + setOperationAction(ISD::FLOG, MVT::v2f32, Expand); + setOperationAction(ISD::FLOG2, MVT::v2f32, Expand); + setOperationAction(ISD::FLOG10, MVT::v2f32, Expand); + setOperationAction(ISD::FEXP, MVT::v2f32, Expand); + setOperationAction(ISD::FEXP2, MVT::v2f32, Expand); + setOperationAction(ISD::FCEIL, MVT::v2f32, Expand); + setOperationAction(ISD::FTRUNC, MVT::v2f32, Expand); + setOperationAction(ISD::FRINT, MVT::v2f32, Expand); + setOperationAction(ISD::FNEARBYINT, MVT::v2f32, Expand); + setOperationAction(ISD::FFLOOR, MVT::v2f32, Expand); + // Neon does not support some operations on v1i64 and v2i64 types. setOperationAction(ISD::MUL, MVT::v1i64, Expand); // Custom handling for some quad-vector types to detect VMULL. @@ -554,6 +572,12 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) setOperationAction(ISD::CTPOP, MVT::v4i16, Custom); setOperationAction(ISD::CTPOP, MVT::v8i16, Custom); + // NEON only has FMA instructions as of VFP4. + if (!Subtarget->hasVFP4()) { + setOperationAction(ISD::FMA, MVT::v2f32, Expand); + setOperationAction(ISD::FMA, MVT::v4f32, Expand); + } + setTargetDAGCombine(ISD::INTRINSIC_VOID); setTargetDAGCombine(ISD::INTRINSIC_W_CHAIN); setTargetDAGCombine(ISD::INTRINSIC_WO_CHAIN); @@ -1581,7 +1605,7 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, // On ELF targets for PIC code, direct calls should go through the PLT unsigned OpFlags = 0; if (Subtarget->isTargetELF() && - getTargetMachine().getRelocationModel() == Reloc::PIC_) + getTargetMachine().getRelocationModel() == Reloc::PIC_) OpFlags = ARMII::MO_PLT; Callee = DAG.getTargetGlobalAddress(GV, dl, getPointerTy(), 0, OpFlags); } @@ -2247,8 +2271,7 @@ SDValue ARMTargetLowering::LowerGlobalAddressELF(SDValue Op, EVT PtrVT = getPointerTy(); DebugLoc dl = Op.getDebugLoc(); const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal(); - Reloc::Model RelocM = getTargetMachine().getRelocationModel(); - if (RelocM == Reloc::PIC_) { + if (getTargetMachine().getRelocationModel() == Reloc::PIC_) { bool UseGOTOFF = GV->hasLocalLinkage() || GV->hasHiddenVisibility(); ARMConstantPoolValue *CPV = ARMConstantPoolConstant::Create(GV, @@ -2292,8 +2315,6 @@ SDValue ARMTargetLowering::LowerGlobalAddressDarwin(SDValue Op, DebugLoc dl = Op.getDebugLoc(); const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal(); Reloc::Model RelocM = getTargetMachine().getRelocationModel(); - MachineFunction &MF = DAG.getMachineFunction(); - ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); // FIXME: Enable this for static codegen when tool issues are fixed. Also // update ARMFastISel::ARMMaterializeGV. @@ -2321,6 +2342,7 @@ SDValue ARMTargetLowering::LowerGlobalAddressDarwin(SDValue Op, if (RelocM == Reloc::Static) { CPAddr = DAG.getTargetConstantPool(GV, PtrVT, 4); } else { + ARMFunctionInfo *AFI = DAG.getMachineFunction().getInfo<ARMFunctionInfo>(); ARMPCLabelIndex = AFI->createPICLabelUId(); unsigned PCAdj = (RelocM != Reloc::PIC_) ? 0 : (Subtarget->isThumb()?4:8); ARMConstantPoolValue *CPV = @@ -2401,7 +2423,6 @@ ARMTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG, ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); unsigned ARMPCLabelIndex = AFI->createPICLabelUId(); EVT PtrVT = getPointerTy(); - DebugLoc dl = Op.getDebugLoc(); Reloc::Model RelocM = getTargetMachine().getRelocationModel(); SDValue CPAddr; unsigned PCAdj = (RelocM != Reloc::PIC_) @@ -2661,7 +2682,7 @@ ARMTargetLowering::LowerFormalArguments(SDValue Chain, CCInfo.AnalyzeFormalArguments(Ins, CCAssignFnForNode(CallConv, /* Return*/ false, isVarArg)); - + SmallVector<SDValue, 16> ArgValues; int lastInsIndex = -1; SDValue ArgValue; @@ -2776,7 +2797,7 @@ ARMTargetLowering::LowerFormalArguments(SDValue Chain, } else { int FI = MFI->CreateFixedObject(Flags.getByValSize(), VA.getLocMemOffset(), false); - InVals.push_back(DAG.getFrameIndex(FI, getPointerTy())); + InVals.push_back(DAG.getFrameIndex(FI, getPointerTy())); } } else { int FI = MFI->CreateFixedObject(VA.getLocVT().getSizeInBits()/8, @@ -3573,7 +3594,7 @@ static SDValue LowerCTTZ(SDNode *N, SelectionDAG &DAG, /// input = [v0 v1 v2 v3 ] (vi 16-bit element) /// cast: N0 = [w0 w1 w2 w3 w4 w5 w6 w7] (v0 = [w0 w1], wi 8-bit element) /// vcnt: N1 = [b0 b1 b2 b3 b4 b5 b6 b7] (bi = bit-count of 8-bit element wi) -/// vrev: N2 = [b1 b0 b3 b2 b5 b4 b7 b6] +/// vrev: N2 = [b1 b0 b3 b2 b5 b4 b7 b6] /// [b0 b1 b2 b3 b4 b5 b6 b7] /// +[b1 b0 b3 b2 b5 b4 b7 b6] /// N3=N1+N2 = [k0 k0 k1 k1 k2 k2 k3 k3] (k0 = b0+b1 = bit-count of 16-bit v0, @@ -3594,7 +3615,7 @@ static SDValue getCTPOP16BitCounts(SDNode *N, SelectionDAG &DAG) { /// bit-count for each 16-bit element from the operand. We need slightly /// different sequencing for v4i16 and v8i16 to stay within NEON's available /// 64/128-bit registers. -/// +/// /// Trace for v4i16: /// input = [v0 v1 v2 v3 ] (vi 16-bit element) /// v8i8: BitCounts = [k0 k1 k2 k3 k0 k1 k2 k3 ] (ki is the bit-count of vi) @@ -3625,7 +3646,7 @@ static SDValue lowerCTPOP16BitElements(SDNode *N, SelectionDAG &DAG) { /// input = [v0 v1 ] (vi: 32-bit elements) /// Bitcast = [w0 w1 w2 w3 ] (wi: 16-bit elements, v0 = [w0 w1]) /// Counts16 = [k0 k1 k2 k3 ] (ki: 16-bit elements, bit-count of wi) -/// vrev: N0 = [k1 k0 k3 k2 ] +/// vrev: N0 = [k1 k0 k3 k2 ] /// [k0 k1 k2 k3 ] /// N1 =+[k1 k0 k3 k2 ] /// [k0 k2 k1 k3 ] @@ -4403,7 +4424,7 @@ SDValue ARMTargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG, ValueCounts.insert(std::make_pair(V, 0)); unsigned &Count = ValueCounts[V]; - + // Is this value dominant? (takes up more than half of the lanes) if (++Count > (NumElts / 2)) { hasDominantValue = true; @@ -4431,8 +4452,11 @@ SDValue ARMTargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG, // If we are VDUPing a value that comes directly from a vector, that will // cause an unnecessary move to and from a GPR, where instead we could - // just use VDUPLANE. - if (Value->getOpcode() == ISD::EXTRACT_VECTOR_ELT) { + // just use VDUPLANE. We can only do this if the lane being extracted + // is at a constant index, as the VDUP from lane instructions only have + // constant-index forms. + if (Value->getOpcode() == ISD::EXTRACT_VECTOR_ELT && + isa<ConstantSDNode>(Value->getOperand(1))) { // We need to create a new undef vector to use for the VDUPLANE if the // size of the vector from which we get the value is different than the // size of the vector that we need to create. We will insert the element @@ -4447,12 +4471,10 @@ SDValue ARMTargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG, DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, DAG.getUNDEF(VT), Value, DAG.getConstant(index, MVT::i32)), DAG.getConstant(index, MVT::i32)); - } else { + } else N = DAG.getNode(ARMISD::VDUPLANE, dl, VT, Value->getOperand(0), Value->getOperand(1)); - } - } - else + } else N = DAG.getNode(ARMISD::VDUP, dl, VT, Value); if (!usesOnlyOneValue) { @@ -4484,7 +4506,7 @@ SDValue ARMTargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG, if (usesOnlyOneValue) { SDValue Val = IsSingleInstrConstant(Value, DAG, ST, dl); if (isConstant && Val.getNode()) - return DAG.getNode(ARMISD::VDUP, dl, VT, Val); + return DAG.getNode(ARMISD::VDUP, dl, VT, Val); } } @@ -6329,6 +6351,7 @@ EmitSjLjDispatchBlock(MachineInstr *MI, MachineBasicBlock *MBB) const { MF->getOrCreateJumpTableInfo(MachineJumpTableInfo::EK_Inline); unsigned MJTI = JTI->createJumpTableIndex(LPadList); unsigned UId = AFI->createJumpTableUId(); + Reloc::Model RelocM = getTargetMachine().getRelocationModel(); // Create the MBBs for the dispatch code. @@ -6338,14 +6361,11 @@ EmitSjLjDispatchBlock(MachineInstr *MI, MachineBasicBlock *MBB) const { MachineBasicBlock *TrapBB = MF->CreateMachineBasicBlock(); unsigned trap_opcode; - if (Subtarget->isThumb()) { + if (Subtarget->isThumb()) trap_opcode = ARM::tTRAP; - } else { - if (Subtarget->useNaClTrap()) - trap_opcode = ARM::TRAPNaCl; - else - trap_opcode = ARM::TRAP; - } + else + trap_opcode = Subtarget->useNaClTrap() ? ARM::TRAPNaCl : ARM::TRAP; + BuildMI(TrapBB, dl, TII->get(trap_opcode)); DispatchBB->addSuccessor(TrapBB); @@ -6492,11 +6512,14 @@ EmitSjLjDispatchBlock(MachineInstr *MI, MachineBasicBlock *MBB) const { .addImm(0) .addMemOperand(JTMMOLd)); - unsigned NewVReg6 = MRI->createVirtualRegister(TRC); - AddDefaultPred(BuildMI(DispContBB, dl, TII->get(ARM::tADDrr), NewVReg6) - .addReg(ARM::CPSR, RegState::Define) - .addReg(NewVReg5, RegState::Kill) - .addReg(NewVReg3)); + unsigned NewVReg6 = NewVReg5; + if (RelocM == Reloc::PIC_) { + NewVReg6 = MRI->createVirtualRegister(TRC); + AddDefaultPred(BuildMI(DispContBB, dl, TII->get(ARM::tADDrr), NewVReg6) + .addReg(ARM::CPSR, RegState::Define) + .addReg(NewVReg5, RegState::Kill) + .addReg(NewVReg3)); + } BuildMI(DispContBB, dl, TII->get(ARM::tBR_JTr)) .addReg(NewVReg6, RegState::Kill) @@ -6576,11 +6599,18 @@ EmitSjLjDispatchBlock(MachineInstr *MI, MachineBasicBlock *MBB) const { .addImm(0) .addMemOperand(JTMMOLd)); - BuildMI(DispContBB, dl, TII->get(ARM::BR_JTadd)) - .addReg(NewVReg5, RegState::Kill) - .addReg(NewVReg4) - .addJumpTableIndex(MJTI) - .addImm(UId); + if (RelocM == Reloc::PIC_) { + BuildMI(DispContBB, dl, TII->get(ARM::BR_JTadd)) + .addReg(NewVReg5, RegState::Kill) + .addReg(NewVReg4) + .addJumpTableIndex(MJTI) + .addImm(UId); + } else { + BuildMI(DispContBB, dl, TII->get(ARM::BR_JTr)) + .addReg(NewVReg5, RegState::Kill) + .addJumpTableIndex(MJTI) + .addImm(UId); + } } // Add the jump table entries as successors to the MBB. diff --git a/lib/Target/ARM/ARMTargetMachine.cpp b/lib/Target/ARM/ARMTargetMachine.cpp index 7745218..3003760 100644 --- a/lib/Target/ARM/ARMTargetMachine.cpp +++ b/lib/Target/ARM/ARMTargetMachine.cpp @@ -28,6 +28,11 @@ EnableGlobalMerge("global-merge", cl::Hidden, cl::desc("Enable global merge pass"), cl::init(true)); +static cl::opt<bool> +DisableA15SDOptimization("disable-a15-sd-optimization", cl::Hidden, + cl::desc("Inhibit optimization of S->D register accesses on A15"), + cl::init(false)); + extern "C" void LLVMInitializeARMTarget() { // Register the target. RegisterTargetMachine<ARMTargetMachine> X(TheARMTarget); @@ -164,6 +169,12 @@ bool ARMPassConfig::addPreRegAlloc() { addPass(createARMLoadStoreOptimizationPass(true)); if (getOptLevel() != CodeGenOpt::None && getARMSubtarget().isLikeA9()) addPass(createMLxExpansionPass()); + // Since the A15SDOptimizer pass can insert VDUP instructions, it can only be + // enabled when NEON is available. + if (getOptLevel() != CodeGenOpt::None && getARMSubtarget().isCortexA15() && + getARMSubtarget().hasNEON() && !DisableA15SDOptimization) { + addPass(createA15SDOptimizerPass()); + } return true; } @@ -174,7 +185,8 @@ bool ARMPassConfig::addPreSched2() { addPass(createARMLoadStoreOptimizationPass()); printAndVerify("After ARM load / store optimizer"); } - if (getARMSubtarget().hasNEON()) + if ((DisableA15SDOptimization || !getARMSubtarget().isCortexA15()) && + getARMSubtarget().hasNEON()) addPass(createExecutionDependencyFixPass(&ARM::DPRRegClass)); } diff --git a/lib/Target/ARM/ARMTargetTransformInfo.cpp b/lib/Target/ARM/ARMTargetTransformInfo.cpp index 01c04b4..140a8db 100644 --- a/lib/Target/ARM/ARMTargetTransformInfo.cpp +++ b/lib/Target/ARM/ARMTargetTransformInfo.cpp @@ -177,6 +177,23 @@ unsigned ARMTTI::getCastInstrCost(unsigned Opcode, Type *Dst, int ISD = TLI->InstructionOpcodeToISD(Opcode); assert(ISD && "Invalid opcode"); + // Single to/from double precision conversions. + static const CostTblEntry<MVT> NEONFltDblTbl[] = { + // Vector fptrunc/fpext conversions. + { ISD::FP_ROUND, MVT::v2f64, 2 }, + { ISD::FP_EXTEND, MVT::v2f32, 2 }, + { ISD::FP_EXTEND, MVT::v4f32, 4 } + }; + + if (Src->isVectorTy() && ST->hasNEON() && (ISD == ISD::FP_ROUND || + ISD == ISD::FP_EXTEND)) { + std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(Src); + int Idx = CostTableLookup<MVT>(NEONFltDblTbl, array_lengthof(NEONFltDblTbl), + ISD, LT.second); + if (Idx != -1) + return LT.first * NEONFltDblTbl[Idx].Cost; + } + EVT SrcTy = TLI->getValueType(Src); EVT DstTy = TLI->getValueType(Dst); @@ -194,17 +211,63 @@ unsigned ARMTTI::getCastInstrCost(unsigned Opcode, Type *Dst, { ISD::TRUNCATE, MVT::v4i32, MVT::v4i64, 0 }, { ISD::TRUNCATE, MVT::v4i16, MVT::v4i32, 1 }, + // Operations that we legalize using load/stores to the stack. + { ISD::SIGN_EXTEND, MVT::v16i32, MVT::v16i8, 16*2 + 4*4 }, + { ISD::ZERO_EXTEND, MVT::v16i32, MVT::v16i8, 16*2 + 4*3 }, + { ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i8, 8*2 + 2*4 }, + { ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i8, 8*2 + 2*3 }, + { ISD::TRUNCATE, MVT::v16i8, MVT::v16i32, 4*1 + 16*2 + 2*1 }, + { ISD::TRUNCATE, MVT::v8i8, MVT::v8i32, 2*1 + 8*2 + 1 }, + // Vector float <-> i32 conversions. { ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i32, 1 }, { ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i32, 1 }, + + { ISD::SINT_TO_FP, MVT::v2f32, MVT::v2i8, 3 }, + { ISD::UINT_TO_FP, MVT::v2f32, MVT::v2i8, 3 }, + { ISD::SINT_TO_FP, MVT::v2f32, MVT::v2i16, 2 }, + { ISD::UINT_TO_FP, MVT::v2f32, MVT::v2i16, 2 }, + { ISD::SINT_TO_FP, MVT::v2f32, MVT::v2i32, 1 }, + { ISD::UINT_TO_FP, MVT::v2f32, MVT::v2i32, 1 }, + { ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i1, 3 }, + { ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i1, 3 }, + { ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i8, 3 }, + { ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i8, 3 }, + { ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i16, 2 }, + { ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i16, 2 }, + { ISD::SINT_TO_FP, MVT::v8f32, MVT::v8i16, 4 }, + { ISD::UINT_TO_FP, MVT::v8f32, MVT::v8i16, 4 }, + { ISD::SINT_TO_FP, MVT::v8f32, MVT::v8i32, 2 }, + { ISD::UINT_TO_FP, MVT::v8f32, MVT::v8i32, 2 }, + { ISD::SINT_TO_FP, MVT::v16f32, MVT::v16i16, 8 }, + { ISD::UINT_TO_FP, MVT::v16f32, MVT::v16i16, 8 }, + { ISD::SINT_TO_FP, MVT::v16f32, MVT::v16i32, 4 }, + { ISD::UINT_TO_FP, MVT::v16f32, MVT::v16i32, 4 }, + { ISD::FP_TO_SINT, MVT::v4i32, MVT::v4f32, 1 }, { ISD::FP_TO_UINT, MVT::v4i32, MVT::v4f32, 1 }, + { ISD::FP_TO_SINT, MVT::v4i8, MVT::v4f32, 3 }, + { ISD::FP_TO_UINT, MVT::v4i8, MVT::v4f32, 3 }, + { ISD::FP_TO_SINT, MVT::v4i16, MVT::v4f32, 2 }, + { ISD::FP_TO_UINT, MVT::v4i16, MVT::v4f32, 2 }, // Vector double <-> i32 conversions. { ISD::SINT_TO_FP, MVT::v2f64, MVT::v2i32, 2 }, { ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i32, 2 }, + + { ISD::SINT_TO_FP, MVT::v2f64, MVT::v2i8, 4 }, + { ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i8, 4 }, + { ISD::SINT_TO_FP, MVT::v2f64, MVT::v2i16, 3 }, + { ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i16, 3 }, + { ISD::SINT_TO_FP, MVT::v2f64, MVT::v2i32, 2 }, + { ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i32, 2 }, + { ISD::FP_TO_SINT, MVT::v2i32, MVT::v2f64, 2 }, - { ISD::FP_TO_UINT, MVT::v2i32, MVT::v2f64, 2 } + { ISD::FP_TO_UINT, MVT::v2i32, MVT::v2f64, 2 }, + { ISD::FP_TO_SINT, MVT::v8i16, MVT::v8f32, 4 }, + { ISD::FP_TO_UINT, MVT::v8i16, MVT::v8f32, 4 }, + { ISD::FP_TO_SINT, MVT::v16i16, MVT::v16f32, 8 }, + { ISD::FP_TO_UINT, MVT::v16i16, MVT::v16f32, 8 } }; if (SrcTy.isVector() && ST->hasNEON()) { @@ -247,7 +310,6 @@ unsigned ARMTTI::getCastInstrCost(unsigned Opcode, Type *Dst, return NEONFloatConversionTbl[Idx].Cost; } - // Scalar integer to float conversions. static const TypeConversionCostTblEntry<MVT> NEONIntegerConversionTbl[] = { { ISD::SINT_TO_FP, MVT::f32, MVT::i1, 2 }, @@ -303,7 +365,6 @@ unsigned ARMTTI::getCastInstrCost(unsigned Opcode, Type *Dst, return ARMIntegerConversionTbl[Idx].Cost; } - return TargetTransformInfo::getCastInstrCost(Opcode, Dst, Src); } @@ -326,6 +387,25 @@ unsigned ARMTTI::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, int ISD = TLI->InstructionOpcodeToISD(Opcode); // On NEON a a vector select gets lowered to vbsl. if (ST->hasNEON() && ValTy->isVectorTy() && ISD == ISD::SELECT) { + // Lowering of some vector selects is currently far from perfect. + static const TypeConversionCostTblEntry<MVT> NEONVectorSelectTbl[] = { + { ISD::SELECT, MVT::v16i1, MVT::v16i16, 2*16 + 1 + 3*1 + 4*1 }, + { ISD::SELECT, MVT::v8i1, MVT::v8i32, 4*8 + 1*3 + 1*4 + 1*2 }, + { ISD::SELECT, MVT::v16i1, MVT::v16i32, 4*16 + 1*6 + 1*8 + 1*4 }, + { ISD::SELECT, MVT::v4i1, MVT::v4i64, 4*4 + 1*2 + 1 }, + { ISD::SELECT, MVT::v8i1, MVT::v8i64, 50 }, + { ISD::SELECT, MVT::v16i1, MVT::v16i64, 100 } + }; + + EVT SelCondTy = TLI->getValueType(CondTy); + EVT SelValTy = TLI->getValueType(ValTy); + int Idx = ConvertCostTableLookup<MVT>(NEONVectorSelectTbl, + array_lengthof(NEONVectorSelectTbl), + ISD, SelCondTy.getSimpleVT(), + SelValTy.getSimpleVT()); + if (Idx != -1) + return NEONVectorSelectTbl[Idx].Cost; + std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(ValTy); return LT.first; } diff --git a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp index 6c678fd..c897efd 100644 --- a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp +++ b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp @@ -316,103 +316,127 @@ class ARMOperand : public MCParsedAsmOperand { SMLoc StartLoc, EndLoc; SmallVector<unsigned, 8> Registers; + struct CCOp { + ARMCC::CondCodes Val; + }; + + struct CopOp { + unsigned Val; + }; + + struct CoprocOptionOp { + unsigned Val; + }; + + struct ITMaskOp { + unsigned Mask:4; + }; + + struct MBOptOp { + ARM_MB::MemBOpt Val; + }; + + struct IFlagsOp { + ARM_PROC::IFlags Val; + }; + + struct MMaskOp { + unsigned Val; + }; + + struct TokOp { + const char *Data; + unsigned Length; + }; + + struct RegOp { + unsigned RegNum; + }; + + // A vector register list is a sequential list of 1 to 4 registers. + struct VectorListOp { + unsigned RegNum; + unsigned Count; + unsigned LaneIndex; + bool isDoubleSpaced; + }; + + struct VectorIndexOp { + unsigned Val; + }; + + struct ImmOp { + const MCExpr *Val; + }; + + /// Combined record for all forms of ARM address expressions. + struct MemoryOp { + unsigned BaseRegNum; + // Offset is in OffsetReg or OffsetImm. If both are zero, no offset + // was specified. + const MCConstantExpr *OffsetImm; // Offset immediate value + unsigned OffsetRegNum; // Offset register num, when OffsetImm == NULL + ARM_AM::ShiftOpc ShiftType; // Shift type for OffsetReg + unsigned ShiftImm; // shift for OffsetReg. + unsigned Alignment; // 0 = no alignment specified + // n = alignment in bytes (2, 4, 8, 16, or 32) + unsigned isNegative : 1; // Negated OffsetReg? (~'U' bit) + }; + + struct PostIdxRegOp { + unsigned RegNum; + bool isAdd; + ARM_AM::ShiftOpc ShiftTy; + unsigned ShiftImm; + }; + + struct ShifterImmOp { + bool isASR; + unsigned Imm; + }; + + struct RegShiftedRegOp { + ARM_AM::ShiftOpc ShiftTy; + unsigned SrcReg; + unsigned ShiftReg; + unsigned ShiftImm; + }; + + struct RegShiftedImmOp { + ARM_AM::ShiftOpc ShiftTy; + unsigned SrcReg; + unsigned ShiftImm; + }; + + struct RotImmOp { + unsigned Imm; + }; + + struct BitfieldOp { + unsigned LSB; + unsigned Width; + }; + union { - struct { - ARMCC::CondCodes Val; - } CC; - - struct { - unsigned Val; - } Cop; - - struct { - unsigned Val; - } CoprocOption; - - struct { - unsigned Mask:4; - } ITMask; - - struct { - ARM_MB::MemBOpt Val; - } MBOpt; - - struct { - ARM_PROC::IFlags Val; - } IFlags; - - struct { - unsigned Val; - } MMask; - - struct { - const char *Data; - unsigned Length; - } Tok; - - struct { - unsigned RegNum; - } Reg; - - // A vector register list is a sequential list of 1 to 4 registers. - struct { - unsigned RegNum; - unsigned Count; - unsigned LaneIndex; - bool isDoubleSpaced; - } VectorList; - - struct { - unsigned Val; - } VectorIndex; - - struct { - const MCExpr *Val; - } Imm; - - /// Combined record for all forms of ARM address expressions. - struct { - unsigned BaseRegNum; - // Offset is in OffsetReg or OffsetImm. If both are zero, no offset - // was specified. - const MCConstantExpr *OffsetImm; // Offset immediate value - unsigned OffsetRegNum; // Offset register num, when OffsetImm == NULL - ARM_AM::ShiftOpc ShiftType; // Shift type for OffsetReg - unsigned ShiftImm; // shift for OffsetReg. - unsigned Alignment; // 0 = no alignment specified - // n = alignment in bytes (2, 4, 8, 16, or 32) - unsigned isNegative : 1; // Negated OffsetReg? (~'U' bit) - } Memory; - - struct { - unsigned RegNum; - bool isAdd; - ARM_AM::ShiftOpc ShiftTy; - unsigned ShiftImm; - } PostIdxReg; - - struct { - bool isASR; - unsigned Imm; - } ShifterImm; - struct { - ARM_AM::ShiftOpc ShiftTy; - unsigned SrcReg; - unsigned ShiftReg; - unsigned ShiftImm; - } RegShiftedReg; - struct { - ARM_AM::ShiftOpc ShiftTy; - unsigned SrcReg; - unsigned ShiftImm; - } RegShiftedImm; - struct { - unsigned Imm; - } RotImm; - struct { - unsigned LSB; - unsigned Width; - } Bitfield; + struct CCOp CC; + struct CopOp Cop; + struct CoprocOptionOp CoprocOption; + struct MBOptOp MBOpt; + struct ITMaskOp ITMask; + struct IFlagsOp IFlags; + struct MMaskOp MMask; + struct TokOp Tok; + struct RegOp Reg; + struct VectorListOp VectorList; + struct VectorIndexOp VectorIndex; + struct ImmOp Imm; + struct MemoryOp Memory; + struct PostIdxRegOp PostIdxReg; + struct ShifterImmOp ShifterImm; + struct RegShiftedRegOp RegShiftedReg; + struct RegShiftedImmOp RegShiftedImm; + struct RotImmOp RotImm; + struct BitfieldOp Bitfield; }; ARMOperand(KindTy K) : MCParsedAsmOperand(), Kind(K) {} diff --git a/lib/Target/ARM/CMakeLists.txt b/lib/Target/ARM/CMakeLists.txt index 586834c..b832508 100644 --- a/lib/Target/ARM/CMakeLists.txt +++ b/lib/Target/ARM/CMakeLists.txt @@ -15,6 +15,7 @@ tablegen(LLVM ARMGenDisassemblerTables.inc -gen-disassembler) add_public_tablegen_target(ARMCommonTableGen) add_llvm_target(ARMCodeGen + A15SDOptimizer.cpp ARMAsmPrinter.cpp ARMBaseInstrInfo.cpp ARMBaseRegisterInfo.cpp diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp b/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp index 09e15af..7a59a7d 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp +++ b/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp @@ -655,15 +655,28 @@ getAdrLabelOpValue(const MCInst &MI, unsigned OpIdx, int32_t offset = MO.getImm(); uint32_t Val = 0x2000; + int SoImmVal; if (offset == INT32_MIN) { Val = 0x1000; - offset = 0; + SoImmVal = 0; } else if (offset < 0) { Val = 0x1000; offset *= -1; + SoImmVal = ARM_AM::getSOImmVal(offset); + if(SoImmVal == -1) { + Val = 0x2000; + offset *= -1; + SoImmVal = ARM_AM::getSOImmVal(offset); + } + } else { + SoImmVal = ARM_AM::getSOImmVal(offset); + if(SoImmVal == -1) { + Val = 0x1000; + offset *= -1; + SoImmVal = ARM_AM::getSOImmVal(offset); + } } - int SoImmVal = ARM_AM::getSOImmVal(offset); assert(SoImmVal != -1 && "Not a valid so_imm value!"); Val |= SoImmVal; diff --git a/lib/Target/Hexagon/HexagonISelLowering.cpp b/lib/Target/Hexagon/HexagonISelLowering.cpp index fac931a..0a8b1af 100644 --- a/lib/Target/Hexagon/HexagonISelLowering.cpp +++ b/lib/Target/Hexagon/HexagonISelLowering.cpp @@ -103,6 +103,16 @@ CC_Hexagon_VarArg (unsigned ValNo, MVT ValVT, State.addLoc(CCValAssign::getMem(ValNo, ValVT, ofst, LocVT, LocInfo)); return false; } + if (LocVT == MVT::i1 || LocVT == MVT::i8 || LocVT == MVT::i16) { + LocVT = MVT::i32; + ValVT = MVT::i32; + if (ArgFlags.isSExt()) + LocInfo = CCValAssign::SExt; + else if (ArgFlags.isZExt()) + LocInfo = CCValAssign::ZExt; + else + LocInfo = CCValAssign::AExt; + } if (LocVT == MVT::i32 || LocVT == MVT::f32) { ofst = State.AllocateStack(4, 4); State.addLoc(CCValAssign::getMem(ValNo, ValVT, ofst, LocVT, LocInfo)); @@ -1024,6 +1034,14 @@ SDValue HexagonTargetLowering::LowerGLOBALADDRESS(SDValue Op, return DAG.getNode(HexagonISD::CONST32, dl, getPointerTy(), Result); } +SDValue +HexagonTargetLowering::LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const { + const BlockAddress *BA = cast<BlockAddressSDNode>(Op)->getBlockAddress(); + SDValue BA_SD = DAG.getTargetBlockAddress(BA, MVT::i32); + DebugLoc dl = Op.getDebugLoc(); + return DAG.getNode(HexagonISD::CONST32_GP, dl, getPointerTy(), BA_SD); +} + //===----------------------------------------------------------------------===// // TargetLowering Implementation //===----------------------------------------------------------------------===// @@ -1297,6 +1315,7 @@ HexagonTargetLowering::HexagonTargetLowering(HexagonTargetMachine // Custom legalize GlobalAddress nodes into CONST32. setOperationAction(ISD::GlobalAddress, MVT::i32, Custom); setOperationAction(ISD::GlobalAddress, MVT::i8, Custom); + setOperationAction(ISD::BlockAddress, MVT::i32, Custom); // Truncate action? setOperationAction(ISD::TRUNCATE, MVT::i64, Expand); @@ -1342,7 +1361,6 @@ HexagonTargetLowering::HexagonTargetLowering(HexagonTargetMachine } - setOperationAction(ISD::BR_CC, MVT::Other, Expand); setOperationAction(ISD::BRIND, MVT::Other, Expand); if (EmitJumpTables) { setOperationAction(ISD::BR_JT, MVT::Other, Custom); @@ -1352,6 +1370,9 @@ HexagonTargetLowering::HexagonTargetLowering(HexagonTargetMachine // Increase jump tables cutover to 5, was 4. setMinimumJumpTableEntries(5); + setOperationAction(ISD::BR_CC, MVT::f32, Expand); + setOperationAction(ISD::BR_CC, MVT::f64, Expand); + setOperationAction(ISD::BR_CC, MVT::i1, Expand); setOperationAction(ISD::BR_CC, MVT::i32, Expand); setOperationAction(ISD::MEMBARRIER, MVT::Other, Custom); @@ -1365,6 +1386,29 @@ HexagonTargetLowering::HexagonTargetLowering(HexagonTargetMachine setOperationAction(ISD::FREM , MVT::f32, Expand); setOperationAction(ISD::FSINCOS, MVT::f64, Expand); setOperationAction(ISD::FSINCOS, MVT::f32, Expand); + + // In V4, we have double word add/sub with carry. The problem with + // modelling this instruction is that it produces 2 results - Rdd and Px. + // To model update of Px, we will have to use Defs[p0..p3] which will + // cause any predicate live range to spill. So, we pretend we dont't + // have these instructions. + setOperationAction(ISD::ADDE, MVT::i8, Expand); + setOperationAction(ISD::ADDE, MVT::i16, Expand); + setOperationAction(ISD::ADDE, MVT::i32, Expand); + setOperationAction(ISD::ADDE, MVT::i64, Expand); + setOperationAction(ISD::SUBE, MVT::i8, Expand); + setOperationAction(ISD::SUBE, MVT::i16, Expand); + setOperationAction(ISD::SUBE, MVT::i32, Expand); + setOperationAction(ISD::SUBE, MVT::i64, Expand); + setOperationAction(ISD::ADDC, MVT::i8, Expand); + setOperationAction(ISD::ADDC, MVT::i16, Expand); + setOperationAction(ISD::ADDC, MVT::i32, Expand); + setOperationAction(ISD::ADDC, MVT::i64, Expand); + setOperationAction(ISD::SUBC, MVT::i8, Expand); + setOperationAction(ISD::SUBC, MVT::i16, Expand); + setOperationAction(ISD::SUBC, MVT::i32, Expand); + setOperationAction(ISD::SUBC, MVT::i64, Expand); + setOperationAction(ISD::CTPOP, MVT::i32, Expand); setOperationAction(ISD::CTPOP, MVT::i64, Expand); setOperationAction(ISD::CTTZ , MVT::i32, Expand); @@ -1436,6 +1480,8 @@ HexagonTargetLowering::getTargetNodeName(unsigned Opcode) const { switch (Opcode) { default: return 0; case HexagonISD::CONST32: return "HexagonISD::CONST32"; + case HexagonISD::CONST32_GP: return "HexagonISD::CONST32_GP"; + case HexagonISD::CONST32_Int_Real: return "HexagonISD::CONST32_Int_Real"; case HexagonISD::ADJDYNALLOC: return "HexagonISD::ADJDYNALLOC"; case HexagonISD::CMPICC: return "HexagonISD::CMPICC"; case HexagonISD::CMPFCC: return "HexagonISD::CMPFCC"; @@ -1484,6 +1530,7 @@ HexagonTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { case ISD::MEMBARRIER: return LowerMEMBARRIER(Op, DAG); case ISD::ATOMIC_FENCE: return LowerATOMIC_FENCE(Op, DAG); case ISD::GlobalAddress: return LowerGLOBALADDRESS(Op, DAG); + case ISD::BlockAddress: return LowerBlockAddress(Op, DAG); case ISD::VASTART: return LowerVASTART(Op, DAG); case ISD::BR_JT: return LowerBR_JT(Op, DAG); diff --git a/lib/Target/Hexagon/HexagonISelLowering.h b/lib/Target/Hexagon/HexagonISelLowering.h index 65dab85..3279cc6 100644 --- a/lib/Target/Hexagon/HexagonISelLowering.h +++ b/lib/Target/Hexagon/HexagonISelLowering.h @@ -27,6 +27,7 @@ namespace llvm { CONST32, CONST32_GP, // For marking data present in GP. + CONST32_Int_Real, FCONST32, SETCC, ADJDYNALLOC, @@ -106,6 +107,7 @@ namespace llvm { DebugLoc dl, SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const; SDValue LowerGLOBALADDRESS(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const; SDValue LowerCall(TargetLowering::CallLoweringInfo &CLI, SmallVectorImpl<SDValue> &InVals) const; diff --git a/lib/Target/Hexagon/HexagonInstrInfo.cpp b/lib/Target/Hexagon/HexagonInstrInfo.cpp index d30cdda..96a252e 100644 --- a/lib/Target/Hexagon/HexagonInstrInfo.cpp +++ b/lib/Target/Hexagon/HexagonInstrInfo.cpp @@ -557,218 +557,43 @@ unsigned HexagonInstrInfo::createVR(MachineFunction* MF, MVT VT) const { } bool HexagonInstrInfo::isExtendable(const MachineInstr *MI) const { - switch(MI->getOpcode()) { - default: return false; - // JMP_EQri - case Hexagon::JMP_EQriPt_nv_V4: - case Hexagon::JMP_EQriPnt_nv_V4: - case Hexagon::JMP_EQriNotPt_nv_V4: - case Hexagon::JMP_EQriNotPnt_nv_V4: - - // JMP_EQri - with -1 - case Hexagon::JMP_EQriPtneg_nv_V4: - case Hexagon::JMP_EQriPntneg_nv_V4: - case Hexagon::JMP_EQriNotPtneg_nv_V4: - case Hexagon::JMP_EQriNotPntneg_nv_V4: - - // JMP_EQrr - case Hexagon::JMP_EQrrPt_nv_V4: - case Hexagon::JMP_EQrrPnt_nv_V4: - case Hexagon::JMP_EQrrNotPt_nv_V4: - case Hexagon::JMP_EQrrNotPnt_nv_V4: - - // JMP_GTri - case Hexagon::JMP_GTriPt_nv_V4: - case Hexagon::JMP_GTriPnt_nv_V4: - case Hexagon::JMP_GTriNotPt_nv_V4: - case Hexagon::JMP_GTriNotPnt_nv_V4: - - // JMP_GTri - with -1 - case Hexagon::JMP_GTriPtneg_nv_V4: - case Hexagon::JMP_GTriPntneg_nv_V4: - case Hexagon::JMP_GTriNotPtneg_nv_V4: - case Hexagon::JMP_GTriNotPntneg_nv_V4: - - // JMP_GTrr - case Hexagon::JMP_GTrrPt_nv_V4: - case Hexagon::JMP_GTrrPnt_nv_V4: - case Hexagon::JMP_GTrrNotPt_nv_V4: - case Hexagon::JMP_GTrrNotPnt_nv_V4: - - // JMP_GTrrdn - case Hexagon::JMP_GTrrdnPt_nv_V4: - case Hexagon::JMP_GTrrdnPnt_nv_V4: - case Hexagon::JMP_GTrrdnNotPt_nv_V4: - case Hexagon::JMP_GTrrdnNotPnt_nv_V4: - - // JMP_GTUri - case Hexagon::JMP_GTUriPt_nv_V4: - case Hexagon::JMP_GTUriPnt_nv_V4: - case Hexagon::JMP_GTUriNotPt_nv_V4: - case Hexagon::JMP_GTUriNotPnt_nv_V4: - - // JMP_GTUrr - case Hexagon::JMP_GTUrrPt_nv_V4: - case Hexagon::JMP_GTUrrPnt_nv_V4: - case Hexagon::JMP_GTUrrNotPt_nv_V4: - case Hexagon::JMP_GTUrrNotPnt_nv_V4: + // Constant extenders are allowed only for V4 and above. + if (!Subtarget.hasV4TOps()) + return false; - // JMP_GTUrrdn - case Hexagon::JMP_GTUrrdnPt_nv_V4: - case Hexagon::JMP_GTUrrdnPnt_nv_V4: - case Hexagon::JMP_GTUrrdnNotPt_nv_V4: - case Hexagon::JMP_GTUrrdnNotPnt_nv_V4: + const MCInstrDesc &MID = MI->getDesc(); + const uint64_t F = MID.TSFlags; + if ((F >> HexagonII::ExtendablePos) & HexagonII::ExtendableMask) + return true; - // TFR_FI + // TODO: This is largely obsolete now. Will need to be removed + // in consecutive patches. + switch(MI->getOpcode()) { + // TFR_FI Remains a special case. case Hexagon::TFR_FI: return true; + default: + return false; } + return false; } +// This returns true in two cases: +// - The OP code itself indicates that this is an extended instruction. +// - One of MOs has been marked with HMOTF_ConstExtended flag. bool HexagonInstrInfo::isExtended(const MachineInstr *MI) const { - switch(MI->getOpcode()) { - default: return false; - // JMP_EQri - case Hexagon::JMP_EQriPt_ie_nv_V4: - case Hexagon::JMP_EQriPnt_ie_nv_V4: - case Hexagon::JMP_EQriNotPt_ie_nv_V4: - case Hexagon::JMP_EQriNotPnt_ie_nv_V4: - - // JMP_EQri - with -1 - case Hexagon::JMP_EQriPtneg_ie_nv_V4: - case Hexagon::JMP_EQriPntneg_ie_nv_V4: - case Hexagon::JMP_EQriNotPtneg_ie_nv_V4: - case Hexagon::JMP_EQriNotPntneg_ie_nv_V4: - - // JMP_EQrr - case Hexagon::JMP_EQrrPt_ie_nv_V4: - case Hexagon::JMP_EQrrPnt_ie_nv_V4: - case Hexagon::JMP_EQrrNotPt_ie_nv_V4: - case Hexagon::JMP_EQrrNotPnt_ie_nv_V4: - - // JMP_GTri - case Hexagon::JMP_GTriPt_ie_nv_V4: - case Hexagon::JMP_GTriPnt_ie_nv_V4: - case Hexagon::JMP_GTriNotPt_ie_nv_V4: - case Hexagon::JMP_GTriNotPnt_ie_nv_V4: - - // JMP_GTri - with -1 - case Hexagon::JMP_GTriPtneg_ie_nv_V4: - case Hexagon::JMP_GTriPntneg_ie_nv_V4: - case Hexagon::JMP_GTriNotPtneg_ie_nv_V4: - case Hexagon::JMP_GTriNotPntneg_ie_nv_V4: - - // JMP_GTrr - case Hexagon::JMP_GTrrPt_ie_nv_V4: - case Hexagon::JMP_GTrrPnt_ie_nv_V4: - case Hexagon::JMP_GTrrNotPt_ie_nv_V4: - case Hexagon::JMP_GTrrNotPnt_ie_nv_V4: - - // JMP_GTrrdn - case Hexagon::JMP_GTrrdnPt_ie_nv_V4: - case Hexagon::JMP_GTrrdnPnt_ie_nv_V4: - case Hexagon::JMP_GTrrdnNotPt_ie_nv_V4: - case Hexagon::JMP_GTrrdnNotPnt_ie_nv_V4: - - // JMP_GTUri - case Hexagon::JMP_GTUriPt_ie_nv_V4: - case Hexagon::JMP_GTUriPnt_ie_nv_V4: - case Hexagon::JMP_GTUriNotPt_ie_nv_V4: - case Hexagon::JMP_GTUriNotPnt_ie_nv_V4: - - // JMP_GTUrr - case Hexagon::JMP_GTUrrPt_ie_nv_V4: - case Hexagon::JMP_GTUrrPnt_ie_nv_V4: - case Hexagon::JMP_GTUrrNotPt_ie_nv_V4: - case Hexagon::JMP_GTUrrNotPnt_ie_nv_V4: - - // JMP_GTUrrdn - case Hexagon::JMP_GTUrrdnPt_ie_nv_V4: - case Hexagon::JMP_GTUrrdnPnt_ie_nv_V4: - case Hexagon::JMP_GTUrrdnNotPt_ie_nv_V4: - case Hexagon::JMP_GTUrrdnNotPnt_ie_nv_V4: - - // V4 absolute set addressing. - case Hexagon::LDrid_abs_setimm_V4: - case Hexagon::LDriw_abs_setimm_V4: - case Hexagon::LDrih_abs_setimm_V4: - case Hexagon::LDrib_abs_setimm_V4: - case Hexagon::LDriuh_abs_setimm_V4: - case Hexagon::LDriub_abs_setimm_V4: - - case Hexagon::STrid_abs_setimm_V4: - case Hexagon::STrib_abs_setimm_V4: - case Hexagon::STrih_abs_setimm_V4: - case Hexagon::STriw_abs_setimm_V4: - - // V4 global address load. - case Hexagon::LDd_GP_cPt_V4 : - case Hexagon::LDd_GP_cNotPt_V4 : - case Hexagon::LDd_GP_cdnPt_V4 : - case Hexagon::LDd_GP_cdnNotPt_V4 : - case Hexagon::LDb_GP_cPt_V4 : - case Hexagon::LDb_GP_cNotPt_V4 : - case Hexagon::LDb_GP_cdnPt_V4 : - case Hexagon::LDb_GP_cdnNotPt_V4 : - case Hexagon::LDub_GP_cPt_V4 : - case Hexagon::LDub_GP_cNotPt_V4 : - case Hexagon::LDub_GP_cdnPt_V4 : - case Hexagon::LDub_GP_cdnNotPt_V4 : - case Hexagon::LDh_GP_cPt_V4 : - case Hexagon::LDh_GP_cNotPt_V4 : - case Hexagon::LDh_GP_cdnPt_V4 : - case Hexagon::LDh_GP_cdnNotPt_V4 : - case Hexagon::LDuh_GP_cPt_V4 : - case Hexagon::LDuh_GP_cNotPt_V4 : - case Hexagon::LDuh_GP_cdnPt_V4 : - case Hexagon::LDuh_GP_cdnNotPt_V4 : - case Hexagon::LDw_GP_cPt_V4 : - case Hexagon::LDw_GP_cNotPt_V4 : - case Hexagon::LDw_GP_cdnPt_V4 : - case Hexagon::LDw_GP_cdnNotPt_V4 : - - // V4 global address store. - case Hexagon::STd_GP_cPt_V4 : - case Hexagon::STd_GP_cNotPt_V4 : - case Hexagon::STd_GP_cdnPt_V4 : - case Hexagon::STd_GP_cdnNotPt_V4 : - case Hexagon::STb_GP_cPt_V4 : - case Hexagon::STb_GP_cNotPt_V4 : - case Hexagon::STb_GP_cdnPt_V4 : - case Hexagon::STb_GP_cdnNotPt_V4 : - case Hexagon::STh_GP_cPt_V4 : - case Hexagon::STh_GP_cNotPt_V4 : - case Hexagon::STh_GP_cdnPt_V4 : - case Hexagon::STh_GP_cdnNotPt_V4 : - case Hexagon::STw_GP_cPt_V4 : - case Hexagon::STw_GP_cNotPt_V4 : - case Hexagon::STw_GP_cdnPt_V4 : - case Hexagon::STw_GP_cdnNotPt_V4 : - - // V4 predicated global address new value store. - case Hexagon::STb_GP_cPt_nv_V4 : - case Hexagon::STb_GP_cNotPt_nv_V4 : - case Hexagon::STb_GP_cdnPt_nv_V4 : - case Hexagon::STb_GP_cdnNotPt_nv_V4 : - case Hexagon::STh_GP_cPt_nv_V4 : - case Hexagon::STh_GP_cNotPt_nv_V4 : - case Hexagon::STh_GP_cdnPt_nv_V4 : - case Hexagon::STh_GP_cdnNotPt_nv_V4 : - case Hexagon::STw_GP_cPt_nv_V4 : - case Hexagon::STw_GP_cNotPt_nv_V4 : - case Hexagon::STw_GP_cdnPt_nv_V4 : - case Hexagon::STw_GP_cdnNotPt_nv_V4 : - - // TFR_FI - case Hexagon::TFR_FI_immext_V4: - - // TFRI_F - case Hexagon::TFRI_f: - case Hexagon::TFRI_cPt_f: - case Hexagon::TFRI_cNotPt_f: - case Hexagon::CONST64_Float_Real: + // First check if this is permanently extended op code. + const uint64_t F = MI->getDesc().TSFlags; + if ((F >> HexagonII::ExtendedPos) & HexagonII::ExtendedMask) + return true; + // Use MO operand flags to determine if one of MI's operands + // has HMOTF_ConstExtended flag set. + for (MachineInstr::const_mop_iterator I = MI->operands_begin(), + E = MI->operands_end(); I != E; ++I) { + if (I->getTargetFlags() && HexagonII::HMOTF_ConstExtended) return true; } + return false; } bool HexagonInstrInfo::isNewValueJump(const MachineInstr *MI) const { @@ -877,258 +702,6 @@ bool HexagonInstrInfo::isNewValueJump(const MachineInstr *MI) const { } } -unsigned HexagonInstrInfo::getImmExtForm(const MachineInstr* MI) const { - switch(MI->getOpcode()) { - default: llvm_unreachable("Unknown type of instruction."); - // JMP_EQri - case Hexagon::JMP_EQriPt_nv_V4: - return Hexagon::JMP_EQriPt_ie_nv_V4; - case Hexagon::JMP_EQriNotPt_nv_V4: - return Hexagon::JMP_EQriNotPt_ie_nv_V4; - case Hexagon::JMP_EQriPnt_nv_V4: - return Hexagon::JMP_EQriPnt_ie_nv_V4; - case Hexagon::JMP_EQriNotPnt_nv_V4: - return Hexagon::JMP_EQriNotPnt_ie_nv_V4; - - // JMP_EQri -- with -1 - case Hexagon::JMP_EQriPtneg_nv_V4: - return Hexagon::JMP_EQriPtneg_ie_nv_V4; - case Hexagon::JMP_EQriNotPtneg_nv_V4: - return Hexagon::JMP_EQriNotPtneg_ie_nv_V4; - case Hexagon::JMP_EQriPntneg_nv_V4: - return Hexagon::JMP_EQriPntneg_ie_nv_V4; - case Hexagon::JMP_EQriNotPntneg_nv_V4: - return Hexagon::JMP_EQriNotPntneg_ie_nv_V4; - - // JMP_EQrr - case Hexagon::JMP_EQrrPt_nv_V4: - return Hexagon::JMP_EQrrPt_ie_nv_V4; - case Hexagon::JMP_EQrrNotPt_nv_V4: - return Hexagon::JMP_EQrrNotPt_ie_nv_V4; - case Hexagon::JMP_EQrrPnt_nv_V4: - return Hexagon::JMP_EQrrPnt_ie_nv_V4; - case Hexagon::JMP_EQrrNotPnt_nv_V4: - return Hexagon::JMP_EQrrNotPnt_ie_nv_V4; - - // JMP_GTri - case Hexagon::JMP_GTriPt_nv_V4: - return Hexagon::JMP_GTriPt_ie_nv_V4; - case Hexagon::JMP_GTriNotPt_nv_V4: - return Hexagon::JMP_GTriNotPt_ie_nv_V4; - case Hexagon::JMP_GTriPnt_nv_V4: - return Hexagon::JMP_GTriPnt_ie_nv_V4; - case Hexagon::JMP_GTriNotPnt_nv_V4: - return Hexagon::JMP_GTriNotPnt_ie_nv_V4; - - // JMP_GTri -- with -1 - case Hexagon::JMP_GTriPtneg_nv_V4: - return Hexagon::JMP_GTriPtneg_ie_nv_V4; - case Hexagon::JMP_GTriNotPtneg_nv_V4: - return Hexagon::JMP_GTriNotPtneg_ie_nv_V4; - case Hexagon::JMP_GTriPntneg_nv_V4: - return Hexagon::JMP_GTriPntneg_ie_nv_V4; - case Hexagon::JMP_GTriNotPntneg_nv_V4: - return Hexagon::JMP_GTriNotPntneg_ie_nv_V4; - - // JMP_GTrr - case Hexagon::JMP_GTrrPt_nv_V4: - return Hexagon::JMP_GTrrPt_ie_nv_V4; - case Hexagon::JMP_GTrrNotPt_nv_V4: - return Hexagon::JMP_GTrrNotPt_ie_nv_V4; - case Hexagon::JMP_GTrrPnt_nv_V4: - return Hexagon::JMP_GTrrPnt_ie_nv_V4; - case Hexagon::JMP_GTrrNotPnt_nv_V4: - return Hexagon::JMP_GTrrNotPnt_ie_nv_V4; - - // JMP_GTrrdn - case Hexagon::JMP_GTrrdnPt_nv_V4: - return Hexagon::JMP_GTrrdnPt_ie_nv_V4; - case Hexagon::JMP_GTrrdnNotPt_nv_V4: - return Hexagon::JMP_GTrrdnNotPt_ie_nv_V4; - case Hexagon::JMP_GTrrdnPnt_nv_V4: - return Hexagon::JMP_GTrrdnPnt_ie_nv_V4; - case Hexagon::JMP_GTrrdnNotPnt_nv_V4: - return Hexagon::JMP_GTrrdnNotPnt_ie_nv_V4; - - // JMP_GTUri - case Hexagon::JMP_GTUriPt_nv_V4: - return Hexagon::JMP_GTUriPt_ie_nv_V4; - case Hexagon::JMP_GTUriNotPt_nv_V4: - return Hexagon::JMP_GTUriNotPt_ie_nv_V4; - case Hexagon::JMP_GTUriPnt_nv_V4: - return Hexagon::JMP_GTUriPnt_ie_nv_V4; - case Hexagon::JMP_GTUriNotPnt_nv_V4: - return Hexagon::JMP_GTUriNotPnt_ie_nv_V4; - - // JMP_GTUrr - case Hexagon::JMP_GTUrrPt_nv_V4: - return Hexagon::JMP_GTUrrPt_ie_nv_V4; - case Hexagon::JMP_GTUrrNotPt_nv_V4: - return Hexagon::JMP_GTUrrNotPt_ie_nv_V4; - case Hexagon::JMP_GTUrrPnt_nv_V4: - return Hexagon::JMP_GTUrrPnt_ie_nv_V4; - case Hexagon::JMP_GTUrrNotPnt_nv_V4: - return Hexagon::JMP_GTUrrNotPnt_ie_nv_V4; - - // JMP_GTUrrdn - case Hexagon::JMP_GTUrrdnPt_nv_V4: - return Hexagon::JMP_GTUrrdnPt_ie_nv_V4; - case Hexagon::JMP_GTUrrdnNotPt_nv_V4: - return Hexagon::JMP_GTUrrdnNotPt_ie_nv_V4; - case Hexagon::JMP_GTUrrdnPnt_nv_V4: - return Hexagon::JMP_GTUrrdnPnt_ie_nv_V4; - case Hexagon::JMP_GTUrrdnNotPnt_nv_V4: - return Hexagon::JMP_GTUrrdnNotPnt_ie_nv_V4; - - case Hexagon::TFR_FI: - return Hexagon::TFR_FI_immext_V4; - - case Hexagon::MEMw_ADDi_indexed_MEM_V4 : - case Hexagon::MEMw_SUBi_indexed_MEM_V4 : - case Hexagon::MEMw_ADDr_indexed_MEM_V4 : - case Hexagon::MEMw_SUBr_indexed_MEM_V4 : - case Hexagon::MEMw_ANDr_indexed_MEM_V4 : - case Hexagon::MEMw_ORr_indexed_MEM_V4 : - case Hexagon::MEMw_ADDi_MEM_V4 : - case Hexagon::MEMw_SUBi_MEM_V4 : - case Hexagon::MEMw_ADDr_MEM_V4 : - case Hexagon::MEMw_SUBr_MEM_V4 : - case Hexagon::MEMw_ANDr_MEM_V4 : - case Hexagon::MEMw_ORr_MEM_V4 : - case Hexagon::MEMh_ADDi_indexed_MEM_V4 : - case Hexagon::MEMh_SUBi_indexed_MEM_V4 : - case Hexagon::MEMh_ADDr_indexed_MEM_V4 : - case Hexagon::MEMh_SUBr_indexed_MEM_V4 : - case Hexagon::MEMh_ANDr_indexed_MEM_V4 : - case Hexagon::MEMh_ORr_indexed_MEM_V4 : - case Hexagon::MEMh_ADDi_MEM_V4 : - case Hexagon::MEMh_SUBi_MEM_V4 : - case Hexagon::MEMh_ADDr_MEM_V4 : - case Hexagon::MEMh_SUBr_MEM_V4 : - case Hexagon::MEMh_ANDr_MEM_V4 : - case Hexagon::MEMh_ORr_MEM_V4 : - case Hexagon::MEMb_ADDi_indexed_MEM_V4 : - case Hexagon::MEMb_SUBi_indexed_MEM_V4 : - case Hexagon::MEMb_ADDr_indexed_MEM_V4 : - case Hexagon::MEMb_SUBr_indexed_MEM_V4 : - case Hexagon::MEMb_ANDr_indexed_MEM_V4 : - case Hexagon::MEMb_ORr_indexed_MEM_V4 : - case Hexagon::MEMb_ADDi_MEM_V4 : - case Hexagon::MEMb_SUBi_MEM_V4 : - case Hexagon::MEMb_ADDr_MEM_V4 : - case Hexagon::MEMb_SUBr_MEM_V4 : - case Hexagon::MEMb_ANDr_MEM_V4 : - case Hexagon::MEMb_ORr_MEM_V4 : - llvm_unreachable("Needs implementing."); - } -} - -unsigned HexagonInstrInfo::getNormalBranchForm(const MachineInstr* MI) const { - switch(MI->getOpcode()) { - default: llvm_unreachable("Unknown type of jump instruction."); - // JMP_EQri - case Hexagon::JMP_EQriPt_ie_nv_V4: - return Hexagon::JMP_EQriPt_nv_V4; - case Hexagon::JMP_EQriNotPt_ie_nv_V4: - return Hexagon::JMP_EQriNotPt_nv_V4; - case Hexagon::JMP_EQriPnt_ie_nv_V4: - return Hexagon::JMP_EQriPnt_nv_V4; - case Hexagon::JMP_EQriNotPnt_ie_nv_V4: - return Hexagon::JMP_EQriNotPnt_nv_V4; - - // JMP_EQri -- with -1 - case Hexagon::JMP_EQriPtneg_ie_nv_V4: - return Hexagon::JMP_EQriPtneg_nv_V4; - case Hexagon::JMP_EQriNotPtneg_ie_nv_V4: - return Hexagon::JMP_EQriNotPtneg_nv_V4; - case Hexagon::JMP_EQriPntneg_ie_nv_V4: - return Hexagon::JMP_EQriPntneg_nv_V4; - case Hexagon::JMP_EQriNotPntneg_ie_nv_V4: - return Hexagon::JMP_EQriNotPntneg_nv_V4; - - // JMP_EQrr - case Hexagon::JMP_EQrrPt_ie_nv_V4: - return Hexagon::JMP_EQrrPt_nv_V4; - case Hexagon::JMP_EQrrNotPt_ie_nv_V4: - return Hexagon::JMP_EQrrNotPt_nv_V4; - case Hexagon::JMP_EQrrPnt_ie_nv_V4: - return Hexagon::JMP_EQrrPnt_nv_V4; - case Hexagon::JMP_EQrrNotPnt_ie_nv_V4: - return Hexagon::JMP_EQrrNotPnt_nv_V4; - - // JMP_GTri - case Hexagon::JMP_GTriPt_ie_nv_V4: - return Hexagon::JMP_GTriPt_nv_V4; - case Hexagon::JMP_GTriNotPt_ie_nv_V4: - return Hexagon::JMP_GTriNotPt_nv_V4; - case Hexagon::JMP_GTriPnt_ie_nv_V4: - return Hexagon::JMP_GTriPnt_nv_V4; - case Hexagon::JMP_GTriNotPnt_ie_nv_V4: - return Hexagon::JMP_GTriNotPnt_nv_V4; - - // JMP_GTri -- with -1 - case Hexagon::JMP_GTriPtneg_ie_nv_V4: - return Hexagon::JMP_GTriPtneg_nv_V4; - case Hexagon::JMP_GTriNotPtneg_ie_nv_V4: - return Hexagon::JMP_GTriNotPtneg_nv_V4; - case Hexagon::JMP_GTriPntneg_ie_nv_V4: - return Hexagon::JMP_GTriPntneg_nv_V4; - case Hexagon::JMP_GTriNotPntneg_ie_nv_V4: - return Hexagon::JMP_GTriNotPntneg_nv_V4; - - // JMP_GTrr - case Hexagon::JMP_GTrrPt_ie_nv_V4: - return Hexagon::JMP_GTrrPt_nv_V4; - case Hexagon::JMP_GTrrNotPt_ie_nv_V4: - return Hexagon::JMP_GTrrNotPt_nv_V4; - case Hexagon::JMP_GTrrPnt_ie_nv_V4: - return Hexagon::JMP_GTrrPnt_nv_V4; - case Hexagon::JMP_GTrrNotPnt_ie_nv_V4: - return Hexagon::JMP_GTrrNotPnt_nv_V4; - - // JMP_GTrrdn - case Hexagon::JMP_GTrrdnPt_ie_nv_V4: - return Hexagon::JMP_GTrrdnPt_nv_V4; - case Hexagon::JMP_GTrrdnNotPt_ie_nv_V4: - return Hexagon::JMP_GTrrdnNotPt_nv_V4; - case Hexagon::JMP_GTrrdnPnt_ie_nv_V4: - return Hexagon::JMP_GTrrdnPnt_nv_V4; - case Hexagon::JMP_GTrrdnNotPnt_ie_nv_V4: - return Hexagon::JMP_GTrrdnNotPnt_nv_V4; - - // JMP_GTUri - case Hexagon::JMP_GTUriPt_ie_nv_V4: - return Hexagon::JMP_GTUriPt_nv_V4; - case Hexagon::JMP_GTUriNotPt_ie_nv_V4: - return Hexagon::JMP_GTUriNotPt_nv_V4; - case Hexagon::JMP_GTUriPnt_ie_nv_V4: - return Hexagon::JMP_GTUriPnt_nv_V4; - case Hexagon::JMP_GTUriNotPnt_ie_nv_V4: - return Hexagon::JMP_GTUriNotPnt_nv_V4; - - // JMP_GTUrr - case Hexagon::JMP_GTUrrPt_ie_nv_V4: - return Hexagon::JMP_GTUrrPt_nv_V4; - case Hexagon::JMP_GTUrrNotPt_ie_nv_V4: - return Hexagon::JMP_GTUrrNotPt_nv_V4; - case Hexagon::JMP_GTUrrPnt_ie_nv_V4: - return Hexagon::JMP_GTUrrPnt_nv_V4; - case Hexagon::JMP_GTUrrNotPnt_ie_nv_V4: - return Hexagon::JMP_GTUrrNotPnt_nv_V4; - - // JMP_GTUrrdn - case Hexagon::JMP_GTUrrdnPt_ie_nv_V4: - return Hexagon::JMP_GTUrrdnPt_nv_V4; - case Hexagon::JMP_GTUrrdnNotPt_ie_nv_V4: - return Hexagon::JMP_GTUrrdnNotPt_nv_V4; - case Hexagon::JMP_GTUrrdnPnt_ie_nv_V4: - return Hexagon::JMP_GTUrrdnPnt_nv_V4; - case Hexagon::JMP_GTUrrdnNotPnt_ie_nv_V4: - return Hexagon::JMP_GTUrrdnNotPnt_nv_V4; - } -} - - bool HexagonInstrInfo::isNewValueStore(const MachineInstr *MI) const { switch (MI->getOpcode()) { default: return false; @@ -1326,6 +899,16 @@ bool HexagonInstrInfo::isPostIncrement (const MachineInstr* MI) const { } } +bool HexagonInstrInfo::isNewValueInst(const MachineInstr *MI) const { + if (isNewValueJump(MI)) + return true; + + if (isNewValueStore(MI)) + return true; + + return false; +} + bool HexagonInstrInfo::isSaveCalleeSavedRegsCall(const MachineInstr *MI) const { return MI->getOpcode() == Hexagon::SAVE_REGISTERS_CALL_V4; } @@ -2366,6 +1949,10 @@ isValidOffset(const int Opcode, const int Offset) const { // the given "Opcode". If "Offset" is not in the correct range, "ADD_ri" is // inserted to calculate the final address. Due to this reason, the function // assumes that the "Offset" has correct alignment. + // We used to assert if the offset was not properly aligned, however, + // there are cases where a misaligned pointer recast can cause this + // problem, and we need to allow for it. The front end warns of such + // misaligns with respect to load size. switch(Opcode) { @@ -2375,7 +1962,6 @@ isValidOffset(const int Opcode, const int Offset) const { case Hexagon::STriw_indexed: case Hexagon::STriw: case Hexagon::STriw_f: - assert((Offset % 4 == 0) && "Offset has incorrect alignment"); return (Offset >= Hexagon_MEMW_OFFSET_MIN) && (Offset <= Hexagon_MEMW_OFFSET_MAX); @@ -2385,14 +1971,12 @@ isValidOffset(const int Opcode, const int Offset) const { case Hexagon::STrid: case Hexagon::STrid_indexed: case Hexagon::STrid_f: - assert((Offset % 8 == 0) && "Offset has incorrect alignment"); return (Offset >= Hexagon_MEMD_OFFSET_MIN) && (Offset <= Hexagon_MEMD_OFFSET_MAX); case Hexagon::LDrih: case Hexagon::LDriuh: case Hexagon::STrih: - assert((Offset % 2 == 0) && "Offset has incorrect alignment"); return (Offset >= Hexagon_MEMH_OFFSET_MIN) && (Offset <= Hexagon_MEMH_OFFSET_MAX); @@ -2419,7 +2003,6 @@ isValidOffset(const int Opcode, const int Offset) const { case Hexagon::MEMw_SUBr_MEM_V4 : case Hexagon::MEMw_ANDr_MEM_V4 : case Hexagon::MEMw_ORr_MEM_V4 : - assert ((Offset % 4) == 0 && "MEMOPw offset is not aligned correctly." ); return (0 <= Offset && Offset <= 255); case Hexagon::MEMh_ADDi_indexed_MEM_V4 : @@ -2434,7 +2017,6 @@ isValidOffset(const int Opcode, const int Offset) const { case Hexagon::MEMh_SUBr_MEM_V4 : case Hexagon::MEMh_ANDr_MEM_V4 : case Hexagon::MEMh_ORr_MEM_V4 : - assert ((Offset % 2) == 0 && "MEMOPh offset is not aligned correctly." ); return (0 <= Offset && Offset <= 127); case Hexagon::MEMb_ADDi_indexed_MEM_V4 : @@ -2800,7 +2382,26 @@ isConditionalStore (const MachineInstr* MI) const { } } +unsigned HexagonInstrInfo::getAddrMode(const MachineInstr* MI) const { + const uint64_t F = MI->getDesc().TSFlags; + + return((F >> HexagonII::AddrModePos) & HexagonII::AddrModeMask); +} +/// immediateExtend - Changes the instruction in place to one using an immediate +/// extender. +void HexagonInstrInfo::immediateExtend(MachineInstr *MI) const { + assert((isExtendable(MI)||isConstExtended(MI)) && + "Instruction must be extendable"); + // Find which operand is extendable. + short ExtOpNum = getCExtOpNum(MI); + MachineOperand &MO = MI->getOperand(ExtOpNum); + // This needs to be something we understand. + assert((MO.isMBB() || MO.isImm()) && + "Branch with unknown extendable field type"); + // Mark given operand as extended. + MO.addTargetFlag(HexagonII::HMOTF_ConstExtended); +} DFAPacketizer *HexagonInstrInfo:: CreateTargetScheduleState(const TargetMachine *TM, @@ -2827,3 +2428,155 @@ bool HexagonInstrInfo::isSchedulingBoundary(const MachineInstr *MI, return false; } + +bool HexagonInstrInfo::isConstExtended(MachineInstr *MI) const { + + // Constant extenders are allowed only for V4 and above. + if (!Subtarget.hasV4TOps()) + return false; + + const uint64_t F = MI->getDesc().TSFlags; + unsigned isExtended = (F >> HexagonII::ExtendedPos) & HexagonII::ExtendedMask; + if (isExtended) // Instruction must be extended. + return true; + + unsigned isExtendable = (F >> HexagonII::ExtendablePos) + & HexagonII::ExtendableMask; + if (!isExtendable) + return false; + + short ExtOpNum = getCExtOpNum(MI); + const MachineOperand &MO = MI->getOperand(ExtOpNum); + // Use MO operand flags to determine if MO + // has the HMOTF_ConstExtended flag set. + if (MO.getTargetFlags() && HexagonII::HMOTF_ConstExtended) + return true; + // If this is a Machine BB address we are talking about, and it is + // not marked as extended, say so. + if (MO.isMBB()) + return false; + + // We could be using an instruction with an extendable immediate and shoehorn + // a global address into it. If it is a global address it will be constant + // extended. We do this for COMBINE. + // We currently only handle isGlobal() because it is the only kind of + // object we are going to end up with here for now. + // In the future we probably should add isSymbol(), etc. + if (MO.isGlobal() || MO.isSymbol()) + return true; + + // If the extendable operand is not 'Immediate' type, the instruction should + // have 'isExtended' flag set. + assert(MO.isImm() && "Extendable operand must be Immediate type"); + + int MinValue = getMinValue(MI); + int MaxValue = getMaxValue(MI); + int ImmValue = MO.getImm(); + + return (ImmValue < MinValue || ImmValue > MaxValue); +} + +// Returns true if a particular operand is extendable for an instruction. +bool HexagonInstrInfo::isOperandExtended(const MachineInstr *MI, + unsigned short OperandNum) const { + // Constant extenders are allowed only for V4 and above. + if (!Subtarget.hasV4TOps()) + return false; + + const uint64_t F = MI->getDesc().TSFlags; + + return ((F >> HexagonII::ExtendableOpPos) & HexagonII::ExtendableOpMask) + == OperandNum; +} + +// Returns Operand Index for the constant extended instruction. +unsigned short HexagonInstrInfo::getCExtOpNum(const MachineInstr *MI) const { + const uint64_t F = MI->getDesc().TSFlags; + return ((F >> HexagonII::ExtendableOpPos) & HexagonII::ExtendableOpMask); +} + +// Returns the min value that doesn't need to be extended. +int HexagonInstrInfo::getMinValue(const MachineInstr *MI) const { + const uint64_t F = MI->getDesc().TSFlags; + unsigned isSigned = (F >> HexagonII::ExtentSignedPos) + & HexagonII::ExtentSignedMask; + unsigned bits = (F >> HexagonII::ExtentBitsPos) + & HexagonII::ExtentBitsMask; + + if (isSigned) // if value is signed + return -1 << (bits - 1); + else + return 0; +} + +// Returns the max value that doesn't need to be extended. +int HexagonInstrInfo::getMaxValue(const MachineInstr *MI) const { + const uint64_t F = MI->getDesc().TSFlags; + unsigned isSigned = (F >> HexagonII::ExtentSignedPos) + & HexagonII::ExtentSignedMask; + unsigned bits = (F >> HexagonII::ExtentBitsPos) + & HexagonII::ExtentBitsMask; + + if (isSigned) // if value is signed + return ~(-1 << (bits - 1)); + else + return ~(-1 << bits); +} + +// Returns true if an instruction can be converted into a non-extended +// equivalent instruction. +bool HexagonInstrInfo::NonExtEquivalentExists (const MachineInstr *MI) const { + + short NonExtOpcode; + // Check if the instruction has a register form that uses register in place + // of the extended operand, if so return that as the non-extended form. + if (Hexagon::getRegForm(MI->getOpcode()) >= 0) + return true; + + if (MI->getDesc().mayLoad() || MI->getDesc().mayStore()) { + // Check addressing mode and retreive non-ext equivalent instruction. + + switch (getAddrMode(MI)) { + case HexagonII::Absolute : + // Load/store with absolute addressing mode can be converted into + // base+offset mode. + NonExtOpcode = Hexagon::getBasedWithImmOffset(MI->getOpcode()); + break; + case HexagonII::BaseImmOffset : + // Load/store with base+offset addressing mode can be converted into + // base+register offset addressing mode. However left shift operand should + // be set to 0. + NonExtOpcode = Hexagon::getBaseWithRegOffset(MI->getOpcode()); + break; + default: + return false; + } + if (NonExtOpcode < 0) + return false; + return true; + } + return false; +} + +// Returns opcode of the non-extended equivalent instruction. +short HexagonInstrInfo::getNonExtOpcode (const MachineInstr *MI) const { + + // Check if the instruction has a register form that uses register in place + // of the extended operand, if so return that as the non-extended form. + short NonExtOpcode = Hexagon::getRegForm(MI->getOpcode()); + if (NonExtOpcode >= 0) + return NonExtOpcode; + + if (MI->getDesc().mayLoad() || MI->getDesc().mayStore()) { + // Check addressing mode and retreive non-ext equivalent instruction. + switch (getAddrMode(MI)) { + case HexagonII::Absolute : + return Hexagon::getBasedWithImmOffset(MI->getOpcode()); + case HexagonII::BaseImmOffset : + return Hexagon::getBaseWithRegOffset(MI->getOpcode()); + default: + return -1; + } + } + return -1; +} diff --git a/lib/Target/Hexagon/HexagonInstrInfo.h b/lib/Target/Hexagon/HexagonInstrInfo.h index 4e36dfb..d2f059a 100644 --- a/lib/Target/Hexagon/HexagonInstrInfo.h +++ b/lib/Target/Hexagon/HexagonInstrInfo.h @@ -169,6 +169,7 @@ public: bool isConditionalALU32 (const MachineInstr* MI) const; bool isConditionalLoad (const MachineInstr* MI) const; bool isConditionalStore(const MachineInstr* MI) const; + bool isNewValueInst(const MachineInstr* MI) const; bool isDeallocRet(const MachineInstr *MI) const; unsigned getInvertedPredicatedOpcode(const int Opc) const; bool isExtendable(const MachineInstr* MI) const; @@ -177,9 +178,18 @@ public: bool isNewValueStore(const MachineInstr* MI) const; bool isNewValueJump(const MachineInstr* MI) const; bool isNewValueJumpCandidate(const MachineInstr *MI) const; - unsigned getImmExtForm(const MachineInstr* MI) const; - unsigned getNormalBranchForm(const MachineInstr* MI) const; + + void immediateExtend(MachineInstr *MI) const; + bool isConstExtended(MachineInstr *MI) const; + unsigned getAddrMode(const MachineInstr* MI) const; + bool isOperandExtended(const MachineInstr *MI, + unsigned short OperandNum) const; + unsigned short getCExtOpNum(const MachineInstr *MI) const; + int getMinValue(const MachineInstr *MI) const; + int getMaxValue(const MachineInstr *MI) const; + bool NonExtEquivalentExists (const MachineInstr *MI) const; + short getNonExtOpcode(const MachineInstr *MI) const; private: int getMatchingCondBranchOpcode(int Opc, bool sense) const; diff --git a/lib/Target/Hexagon/HexagonInstrInfo.td b/lib/Target/Hexagon/HexagonInstrInfo.td index 082772a..d7bab20 100644 --- a/lib/Target/Hexagon/HexagonInstrInfo.td +++ b/lib/Target/Hexagon/HexagonInstrInfo.td @@ -251,28 +251,55 @@ multiclass TFR_base<string CextOp> { } } +class T_TFR64_Pred<bit PredNot, bit isPredNew> + : ALU32_rr<(outs DoubleRegs:$dst), + (ins PredRegs:$src1, DoubleRegs:$src2), + !if(PredNot, "if (!$src1", "if ($src1")# + !if(isPredNew, ".new) ", ") ")#"$dst = $src2", []> +{ + bits<5> dst; + bits<2> src1; + bits<5> src2; + + let IClass = 0b1111; + let Inst{27-24} = 0b1101; + let Inst{13} = isPredNew; + let Inst{7} = PredNot; + let Inst{4-0} = dst; + let Inst{6-5} = src1; + let Inst{20-17} = src2{4-1}; + let Inst{16} = 0b1; + let Inst{12-9} = src2{4-1}; + let Inst{8} = 0b0; +} + multiclass TFR64_Pred<bit PredNot> { let PredSense = !if(PredNot, "false", "true") in { - def _c#NAME : ALU32_rr<(outs DoubleRegs:$dst), - (ins PredRegs:$src1, DoubleRegs:$src2), - !if(PredNot, "if (!$src1", "if ($src1")#") $dst = $src2", - []>; - // Predicate new + def _c#NAME : T_TFR64_Pred<PredNot, 0>; + let PNewValue = "new" in - def _cdn#NAME : ALU32_rr<(outs DoubleRegs:$dst), - (ins PredRegs:$src1, DoubleRegs:$src2), - !if(PredNot, "if (!$src1", "if ($src1")#".new) $dst = $src2", - []>; + def _cdn#NAME : T_TFR64_Pred<PredNot, 1>; // Predicate new } } -let InputType = "reg", neverHasSideEffects = 1 in -multiclass TFR64_base<string CextOp> { - let CextOpcode = CextOp, BaseOpcode = CextOp in { +let neverHasSideEffects = 1 in +multiclass TFR64_base<string BaseName> { + let BaseOpcode = BaseName in { let isPredicable = 1 in - def NAME : ALU32_rr<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1), - "$dst = $src1", - []>; + def NAME : ALU32Inst <(outs DoubleRegs:$dst), + (ins DoubleRegs:$src1), + "$dst = $src1" > { + bits<5> dst; + bits<5> src1; + + let IClass = 0b1111; + let Inst{27-23} = 0b01010; + let Inst{4-0} = dst; + let Inst{20-17} = src1{4-1}; + let Inst{16} = 0b1; + let Inst{12-9} = src1{4-1}; + let Inst{8} = 0b0; + } let isPredicated = 1 in { defm Pt : TFR64_Pred<0>; @@ -281,9 +308,8 @@ multiclass TFR64_base<string CextOp> { } } - multiclass TFRI_Pred<bit PredNot> { - let PredSense = !if(PredNot, "false", "true") in { + let isMoveImm = 1, PredSense = !if(PredNot, "false", "true") in { def _c#NAME : ALU32_ri<(outs IntRegs:$dst), (ins PredRegs:$src1, s12Ext:$src2), !if(PredNot, "if (!$src1", "if ($src1")#") $dst = #$src2", @@ -301,8 +327,8 @@ multiclass TFRI_Pred<bit PredNot> { let InputType = "imm", isExtendable = 1, isExtentSigned = 1 in multiclass TFRI_base<string CextOp> { let CextOpcode = CextOp, BaseOpcode = CextOp#I in { - let opExtendable = 1, opExtentBits = 16, isMoveImm = 1, isPredicable = 1, - isReMaterializable = 1 in + let isAsCheapAsAMove = 1 , opExtendable = 1, opExtentBits = 16, + isMoveImm = 1, isPredicable = 1, isReMaterializable = 1 in def NAME : ALU32_ri<(outs IntRegs:$dst), (ins s16Ext:$src1), "$dst = #$src1", [(set (i32 IntRegs:$dst), s16ExtPred:$src1)]>; @@ -317,7 +343,7 @@ multiclass TFRI_base<string CextOp> { defm TFRI : TFRI_base<"TFR">, ImmRegRel, PredNewRel; defm TFR : TFR_base<"TFR">, ImmRegRel, PredNewRel; -defm TFR64 : TFR64_base<"TFR64">, ImmRegRel, PredNewRel; +defm TFR64 : TFR64_base<"TFR64">, PredNewRel; // Transfer control register. let neverHasSideEffects = 1 in @@ -2050,6 +2076,10 @@ def CONST32_Int_Real : LDInst2<(outs IntRegs:$dst), (ins i32imm:$global), "$dst = CONST32(#$global)", [(set (i32 IntRegs:$dst), imm:$global) ]>; +// Map BlockAddress lowering to CONST32_Int_Real +def : Pat<(HexagonCONST32_GP tblockaddress:$addr), + (CONST32_Int_Real tblockaddress:$addr)>; + let isReMaterializable = 1, isMoveImm = 1 in def CONST32_Label : LDInst2<(outs IntRegs:$dst), (ins bblabel:$label), "$dst = CONST32($label)", @@ -2845,6 +2875,18 @@ def: Pat <(i64 (zextloadi8 (add (i32 IntRegs:$src1), s11_0ExtPred:$offset)))>, Requires<[NoV4T]>; +// i1 -> i64 +def: Pat <(i64 (zextloadi1 ADDRriS11_0:$src1)), + (i64 (COMBINE_rr (TFRI 0), (LDriub ADDRriS11_0:$src1)))>, + Requires<[NoV4T]>; + +let AddedComplexity = 20 in +def: Pat <(i64 (zextloadi1 (add (i32 IntRegs:$src1), + s11_0ExtPred:$offset))), + (i64 (COMBINE_rr (TFRI 0), (LDriub_indexed IntRegs:$src1, + s11_0ExtPred:$offset)))>, + Requires<[NoV4T]>; + // i16 -> i64 def: Pat <(i64 (zextloadi16 ADDRriS11_1:$src1)), (i64 (COMBINE_rr (TFRI 0), (LDriuh ADDRriS11_1:$src1)))>, @@ -3020,6 +3062,11 @@ def BR_JT : JRInst<(outs), (ins IntRegs:$src), "jumpr $src", [(HexagonBR_JT (i32 IntRegs:$src))]>; +let isBranch=1, isIndirectBranch=1, isTerminator=1 in +def BRIND : JRInst<(outs), (ins IntRegs:$src), + "jumpr $src", + [(brind (i32 IntRegs:$src))]>; + def HexagonWrapperJT: SDNode<"HexagonISD::WrapperJT", SDTIntUnaryOp>; def : Pat<(HexagonWrapperJT tjumptable:$dst), diff --git a/lib/Target/Hexagon/HexagonInstrInfoV4.td b/lib/Target/Hexagon/HexagonInstrInfoV4.td index e1b2f88..1d0643d 100644 --- a/lib/Target/Hexagon/HexagonInstrInfoV4.td +++ b/lib/Target/Hexagon/HexagonInstrInfoV4.td @@ -940,6 +940,18 @@ def: Pat <(i64 (zextloadi8 (add (i32 IntRegs:$src1), s11_0ExtPred:$offset)))>, Requires<[HasV4T]>; +// zext i1->i64 +def: Pat <(i64 (zextloadi1 ADDRriS11_0:$src1)), + (i64 (COMBINE_Ir_V4 0, (LDriub ADDRriS11_0:$src1)))>, + Requires<[HasV4T]>; + +let AddedComplexity = 20 in +def: Pat <(i64 (zextloadi1 (add (i32 IntRegs:$src1), + s11_0ExtPred:$offset))), + (i64 (COMBINE_Ir_V4 0, (LDriub_indexed IntRegs:$src1, + s11_0ExtPred:$offset)))>, + Requires<[HasV4T]>; + // zext i16->i64 def: Pat <(i64 (zextloadi16 ADDRriS11_1:$src1)), (i64 (COMBINE_Ir_V4 0, (LDriuh ADDRriS11_1:$src1)))>, @@ -3790,6 +3802,11 @@ def TFRI_V4 : ALU32_ri<(outs IntRegs:$dst), (ins globaladdress:$src1), [(set IntRegs:$dst, (HexagonCONST32 tglobaladdr:$src1))]>, Requires<[HasV4T]>; +// Transfer a block address into a register +def : Pat<(HexagonCONST32_GP tblockaddress:$src1), + (TFRI_V4 tblockaddress:$src1)>, + Requires<[HasV4T]>; + let AddedComplexity=50, neverHasSideEffects = 1, isPredicated = 1 in def TFRI_cPt_V4 : ALU32_ri<(outs IntRegs:$dst), (ins PredRegs:$src1, globaladdress:$src2), diff --git a/lib/Target/Hexagon/HexagonMachineScheduler.cpp b/lib/Target/Hexagon/HexagonMachineScheduler.cpp index ced17b3..1388ad4 100644 --- a/lib/Target/Hexagon/HexagonMachineScheduler.cpp +++ b/lib/Target/Hexagon/HexagonMachineScheduler.cpp @@ -15,7 +15,8 @@ #define DEBUG_TYPE "misched" #include "HexagonMachineScheduler.h" -#include <queue> +#include "llvm/CodeGen/MachineLoopInfo.h" +#include "llvm/IR/Function.h" using namespace llvm; @@ -159,6 +160,9 @@ void VLIWMachineScheduler::schedule() { SchedImpl->initialize(this); // To view Height/Depth correctly, they should be accessed at least once. + // + // FIXME: SUnit::dumpAll always recompute depth and height now. The max + // depth/height could be computed directly from the roots and leaves. DEBUG(unsigned maxH = 0; for (unsigned su = 0, e = SUnits.size(); su != e; ++su) if (SUnits[su].getHeight() > maxH) diff --git a/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp b/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp index aff6b86..866beb1 100644 --- a/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp +++ b/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp @@ -17,35 +17,36 @@ // //===----------------------------------------------------------------------===// #define DEBUG_TYPE "packets" -#include "Hexagon.h" -#include "HexagonMachineFunctionInfo.h" -#include "HexagonRegisterInfo.h" -#include "HexagonSubtarget.h" -#include "HexagonTargetMachine.h" -#include "llvm/ADT/DenseMap.h" -#include "llvm/ADT/Statistic.h" #include "llvm/CodeGen/DFAPacketizer.h" -#include "llvm/CodeGen/LatencyPriorityQueue.h" +#include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/MachineDominators.h" -#include "llvm/CodeGen/MachineFrameInfo.h" -#include "llvm/CodeGen/MachineFunctionAnalysis.h" #include "llvm/CodeGen/MachineFunctionPass.h" -#include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineLoopInfo.h" -#include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/ScheduleDAG.h" #include "llvm/CodeGen/ScheduleDAGInstrs.h" -#include "llvm/CodeGen/ScheduleHazardRecognizer.h" +#include "llvm/CodeGen/LatencyPriorityQueue.h" #include "llvm/CodeGen/SchedulerRegistry.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/MachineFunctionAnalysis.h" +#include "llvm/CodeGen/ScheduleHazardRecognizer.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Support/MathExtras.h" #include "llvm/MC/MCInstrItineraries.h" -#include "llvm/Support/CommandLine.h" #include "llvm/Support/Compiler.h" +#include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" -#include "llvm/Support/MathExtras.h" -#include "llvm/Target/TargetInstrInfo.h" -#include "llvm/Target/TargetMachine.h" -#include "llvm/Target/TargetRegisterInfo.h" +#include "Hexagon.h" +#include "HexagonTargetMachine.h" +#include "HexagonRegisterInfo.h" +#include "HexagonSubtarget.h" +#include "HexagonMachineFunctionInfo.h" + #include <map> using namespace llvm; @@ -257,7 +258,7 @@ void HexagonPacketizerList::reserveResourcesForConstExt(MachineInstr* MI) { bool HexagonPacketizerList::canReserveResourcesForConstExt(MachineInstr *MI) { const HexagonInstrInfo *QII = (const HexagonInstrInfo *) TII; - assert(QII->isExtended(MI) && + assert((QII->isExtended(MI) || QII->isConstExtended(MI)) && "Should only be called for constant extended instructions"); MachineFunction *MF = MI->getParent()->getParent(); MachineInstr *PseudoMI = MF->CreateMachineInstr(QII->get(Hexagon::IMMEXT_i), @@ -350,17 +351,6 @@ static bool IsControlFlow(MachineInstr* MI) { return (MI->getDesc().isTerminator() || MI->getDesc().isCall()); } -bool HexagonPacketizerList::isNewValueInst(MachineInstr* MI) { - const HexagonInstrInfo *QII = (const HexagonInstrInfo *) TII; - if (QII->isNewValueJump(MI)) - return true; - - if (QII->isNewValueStore(MI)) - return true; - - return false; -} - // Function returns true if an instruction can be promoted to the new-value // store. It will always return false for v2 and v3. // It lists all the conditional and unconditional stores that can be promoted @@ -2165,7 +2155,8 @@ static bool GetPredicateSense(MachineInstr* MI, } bool HexagonPacketizerList::isDotNewInst(MachineInstr* MI) { - if (isNewValueInst(MI)) + const HexagonInstrInfo *QII = (const HexagonInstrInfo *) TII; + if (QII->isNewValueInst(MI)) return true; switch (MI->getOpcode()) { @@ -2893,13 +2884,13 @@ bool HexagonPacketizerList::isLegalToPacketizeTogether(SUnit *SUI, SUnit *SUJ) { // dealloc_return and memop always take SLOT0. // Arch spec 3.4.4.2 if (QRI->Subtarget.hasV4TOps()) { - - if (MCIDI.mayStore() && MCIDJ.mayStore() && isNewValueInst(J)) { + if (MCIDI.mayStore() && MCIDJ.mayStore() && + (QII->isNewValueInst(J) || QII->isMemOp(J) || QII->isMemOp(I))) { Dependence = true; return false; } - if ( (QII->isMemOp(J) && MCIDI.mayStore()) + if ((QII->isMemOp(J) && MCIDI.mayStore()) || (MCIDJ.mayStore() && QII->isMemOp(I)) || (QII->isMemOp(J) && QII->isMemOp(I))) { Dependence = true; @@ -3196,7 +3187,7 @@ HexagonPacketizerList::addToPacket(MachineInstr *MI) { MachineInstr *nvjMI = MII; assert(ResourceTracker->canReserveResources(MI)); ResourceTracker->reserveResources(MI); - if (QII->isExtended(MI) && + if ((QII->isExtended(MI) || QII->isConstExtended(MI)) && !tryAllocateResourcesForConstExt(MI)) { endPacket(MBB, MI); ResourceTracker->reserveResources(MI); @@ -3216,7 +3207,7 @@ HexagonPacketizerList::addToPacket(MachineInstr *MI) { && (!tryAllocateResourcesForConstExt(nvjMI) || !ResourceTracker->canReserveResources(nvjMI))) || // For non-extended instruction, no need to allocate extra 4 bytes. - (!QII->isExtended(nvjMI) && + (!QII->isExtended(nvjMI) && !ResourceTracker->canReserveResources(nvjMI))) { endPacket(MBB, MI); @@ -3232,7 +3223,7 @@ HexagonPacketizerList::addToPacket(MachineInstr *MI) { CurrentPacketMIs.push_back(MI); CurrentPacketMIs.push_back(nvjMI); } else { - if ( QII->isExtended(MI) + if ( (QII->isExtended(MI) || QII->isConstExtended(MI)) && ( !tryAllocateResourcesForConstExt(MI) || !ResourceTracker->canReserveResources(MI))) { diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonBaseInfo.h b/lib/Target/Hexagon/MCTargetDesc/HexagonBaseInfo.h index 5f9718b..d4a93b5 100644 --- a/lib/Target/Hexagon/MCTargetDesc/HexagonBaseInfo.h +++ b/lib/Target/Hexagon/MCTargetDesc/HexagonBaseInfo.h @@ -154,6 +154,28 @@ namespace HexagonII { // *** The code above must match HexagonInstrFormat*.td *** // + // Hexagon specific MO operand flag mask. + enum HexagonMOTargetFlagVal { + //===------------------------------------------------------------------===// + // Hexagon Specific MachineOperand flags. + MO_NO_FLAG, + + HMOTF_ConstExtended = 1, + + /// MO_PCREL - On a symbol operand, indicates a PC-relative relocation + /// Used for computing a global address for PIC compilations + MO_PCREL, + + /// MO_GOT - Indicates a GOT-relative relocation + MO_GOT, + + // Low or high part of a symbol. + MO_LO16, MO_HI16, + + // Offset from the base of the SDA. + MO_GPREL + }; + } // End namespace HexagonII. } // End namespace llvm. diff --git a/lib/Target/MBlaze/AsmParser/MBlazeAsmParser.cpp b/lib/Target/MBlaze/AsmParser/MBlazeAsmParser.cpp index ad495ff..dda6e24 100644 --- a/lib/Target/MBlaze/AsmParser/MBlazeAsmParser.cpp +++ b/lib/Target/MBlaze/AsmParser/MBlazeAsmParser.cpp @@ -82,29 +82,35 @@ struct MBlazeOperand : public MCParsedAsmOperand { SMLoc StartLoc, EndLoc; + struct TokOp { + const char *Data; + unsigned Length; + }; + + struct RegOp { + unsigned RegNum; + }; + + struct ImmOp { + const MCExpr *Val; + }; + + struct MemOp { + unsigned Base; + unsigned OffReg; + const MCExpr *Off; + }; + + struct FslImmOp { + const MCExpr *Val; + }; + union { - struct { - const char *Data; - unsigned Length; - } Tok; - - struct { - unsigned RegNum; - } Reg; - - struct { - const MCExpr *Val; - } Imm; - - struct { - unsigned Base; - unsigned OffReg; - const MCExpr *Off; - } Mem; - - struct { - const MCExpr *Val; - } FslImm; + struct TokOp Tok; + struct RegOp Reg; + struct ImmOp Imm; + struct MemOp Mem; + struct FslImmOp FslImm; }; MBlazeOperand(KindTy K) : MCParsedAsmOperand(), Kind(K) {} diff --git a/lib/Target/MBlaze/MBlazeISelLowering.cpp b/lib/Target/MBlaze/MBlazeISelLowering.cpp index 7664c60..d4f9432 100644 --- a/lib/Target/MBlaze/MBlazeISelLowering.cpp +++ b/lib/Target/MBlaze/MBlazeISelLowering.cpp @@ -160,7 +160,8 @@ MBlazeTargetLowering::MBlazeTargetLowering(MBlazeTargetMachine &TM) // Operations not directly supported by MBlaze. setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Expand); setOperationAction(ISD::BR_JT, MVT::Other, Expand); - setOperationAction(ISD::BR_CC, MVT::Other, Expand); + setOperationAction(ISD::BR_CC, MVT::f32, Expand); + setOperationAction(ISD::BR_CC, MVT::i32, Expand); setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand); setOperationAction(ISD::ROTL, MVT::i32, Expand); setOperationAction(ISD::ROTR, MVT::i32, Expand); diff --git a/lib/Target/MBlaze/MBlazeRegisterInfo.cpp b/lib/Target/MBlaze/MBlazeRegisterInfo.cpp index d0fd7dc..bd83afc 100644 --- a/lib/Target/MBlaze/MBlazeRegisterInfo.cpp +++ b/lib/Target/MBlaze/MBlazeRegisterInfo.cpp @@ -122,7 +122,7 @@ eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj, } void MBlazeRegisterInfo:: -processFunctionBeforeFrameFinalized(MachineFunction &MF) const { +processFunctionBeforeFrameFinalized(MachineFunction &MF, RegScavenger *) const { // Set the stack offset where GP must be saved/loaded from. MachineFrameInfo *MFI = MF.getFrameInfo(); MBlazeFunctionInfo *MBlazeFI = MF.getInfo<MBlazeFunctionInfo>(); diff --git a/lib/Target/MBlaze/MBlazeRegisterInfo.h b/lib/Target/MBlaze/MBlazeRegisterInfo.h index 99a2fac..497f386 100644 --- a/lib/Target/MBlaze/MBlazeRegisterInfo.h +++ b/lib/Target/MBlaze/MBlazeRegisterInfo.h @@ -55,7 +55,8 @@ struct MBlazeRegisterInfo : public MBlazeGenRegisterInfo { int SPAdj, unsigned FIOperandNum, RegScavenger *RS = NULL) const; - void processFunctionBeforeFrameFinalized(MachineFunction &MF) const; + void processFunctionBeforeFrameFinalized(MachineFunction &MF, + RegScavenger *RS = NULL) const; /// Debug information queries. unsigned getFrameRegister(const MachineFunction &MF) const; diff --git a/lib/Target/MSP430/MSP430FrameLowering.cpp b/lib/Target/MSP430/MSP430FrameLowering.cpp index ae2e556..e504011 100644 --- a/lib/Target/MSP430/MSP430FrameLowering.cpp +++ b/lib/Target/MSP430/MSP430FrameLowering.cpp @@ -285,8 +285,8 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, } void -MSP430FrameLowering::processFunctionBeforeFrameFinalized(MachineFunction &MF) - const { +MSP430FrameLowering::processFunctionBeforeFrameFinalized(MachineFunction &MF, + RegScavenger *) const { // Create a frame entry for the FPW register that must be saved. if (hasFP(MF)) { int FrameIdx = MF.getFrameInfo()->CreateFixedObject(2, -4, true); diff --git a/lib/Target/MSP430/MSP430FrameLowering.h b/lib/Target/MSP430/MSP430FrameLowering.h index a077dd7..c673f59 100644 --- a/lib/Target/MSP430/MSP430FrameLowering.h +++ b/lib/Target/MSP430/MSP430FrameLowering.h @@ -50,7 +50,8 @@ public: bool hasFP(const MachineFunction &MF) const; bool hasReservedCallFrame(const MachineFunction &MF) const; - void processFunctionBeforeFrameFinalized(MachineFunction &MF) const; + void processFunctionBeforeFrameFinalized(MachineFunction &MF, + RegScavenger *RS = NULL) const; }; } // End llvm namespace diff --git a/lib/Target/MSP430/MSP430ISelLowering.h b/lib/Target/MSP430/MSP430ISelLowering.h index ab4e64e..e0ed870 100644 --- a/lib/Target/MSP430/MSP430ISelLowering.h +++ b/lib/Target/MSP430/MSP430ISelLowering.h @@ -73,7 +73,7 @@ namespace llvm { public: explicit MSP430TargetLowering(MSP430TargetMachine &TM); - virtual MVT getShiftAmountTy(EVT LHSTy) const { return MVT::i8; } + virtual MVT getScalarShiftAmountTy(EVT LHSTy) const { return MVT::i8; } /// LowerOperation - Provide custom lowering hooks for some operations. virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const; diff --git a/lib/Target/Mips/AsmParser/MipsAsmParser.cpp b/lib/Target/Mips/AsmParser/MipsAsmParser.cpp index ade6084..ebe12c9 100644 --- a/lib/Target/Mips/AsmParser/MipsAsmParser.cpp +++ b/lib/Target/Mips/AsmParser/MipsAsmParser.cpp @@ -211,25 +211,30 @@ private: MipsOperand(KindTy K) : MCParsedAsmOperand(), Kind(K) {} + struct Token { + const char *Data; + unsigned Length; + }; + + struct RegOp { + unsigned RegNum; + RegisterKind Kind; + }; + + struct ImmOp { + const MCExpr *Val; + }; + + struct MemOp { + unsigned Base; + const MCExpr *Off; + }; + union { - struct { - const char *Data; - unsigned Length; - } Tok; - - struct { - unsigned RegNum; - RegisterKind Kind; - } Reg; - - struct { - const MCExpr *Val; - } Imm; - - struct { - unsigned Base; - const MCExpr *Off; - } Mem; + struct Token Tok; + struct RegOp Reg; + struct ImmOp Imm; + struct MemOp Mem; }; SMLoc StartLoc, EndLoc; diff --git a/lib/Target/Mips/CMakeLists.txt b/lib/Target/Mips/CMakeLists.txt index d6fac0c..cf8bb18 100644 --- a/lib/Target/Mips/CMakeLists.txt +++ b/lib/Target/Mips/CMakeLists.txt @@ -16,10 +16,13 @@ add_public_tablegen_target(MipsCommonTableGen) add_llvm_target(MipsCodeGen Mips16FrameLowering.cpp Mips16InstrInfo.cpp + Mips16ISelDAGToDAG.cpp + Mips16ISelLowering.cpp Mips16RegisterInfo.cpp MipsAnalyzeImmediate.cpp MipsAsmPrinter.cpp MipsCodeEmitter.cpp + MipsConstantIslandPass.cpp MipsDelaySlotFiller.cpp MipsJITInfo.cpp MipsInstrInfo.cpp @@ -32,6 +35,8 @@ add_llvm_target(MipsCodeGen MipsRegisterInfo.cpp MipsSEFrameLowering.cpp MipsSEInstrInfo.cpp + MipsSEISelDAGToDAG.cpp + MipsSEISelLowering.cpp MipsSERegisterInfo.cpp MipsSubtarget.cpp MipsTargetMachine.cpp diff --git a/lib/Target/Mips/Mips.h b/lib/Target/Mips/Mips.h index 2963f7e..8c65bb4 100644 --- a/lib/Target/Mips/Mips.h +++ b/lib/Target/Mips/Mips.h @@ -27,6 +27,7 @@ namespace llvm { FunctionPass *createMipsLongBranchPass(MipsTargetMachine &TM); FunctionPass *createMipsJITCodeEmitterPass(MipsTargetMachine &TM, JITCodeEmitter &JCE); + FunctionPass *createMipsConstantIslandPass(MipsTargetMachine &tm); } // end namespace llvm; diff --git a/lib/Target/Mips/Mips.td b/lib/Target/Mips/Mips.td index 1326623..eefb02a 100644 --- a/lib/Target/Mips/Mips.td +++ b/lib/Target/Mips/Mips.td @@ -44,8 +44,6 @@ def FeatureN64 : SubtargetFeature<"n64", "MipsABI", "N64", "Enable n64 ABI">; def FeatureEABI : SubtargetFeature<"eabi", "MipsABI", "EABI", "Enable eabi ABI">; -def FeatureAndroid : SubtargetFeature<"android", "IsAndroid", "true", - "Target is android">; def FeatureVFPU : SubtargetFeature<"vfpu", "HasVFPU", "true", "Enable vector FPU instructions.">; def FeatureSEInReg : SubtargetFeature<"seinreg", "HasSEInReg", "true", diff --git a/lib/Target/Mips/Mips16ISelDAGToDAG.cpp b/lib/Target/Mips/Mips16ISelDAGToDAG.cpp new file mode 100644 index 0000000..00b3449 --- /dev/null +++ b/lib/Target/Mips/Mips16ISelDAGToDAG.cpp @@ -0,0 +1,308 @@ +//===-- Mips16ISelDAGToDAG.cpp - A Dag to Dag Inst Selector for Mips16 ----===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Subclass of MipsDAGToDAGISel specialized for mips16. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "mips-isel" +#include "Mips16ISelDAGToDAG.h" +#include "Mips.h" +#include "MCTargetDesc/MipsBaseInfo.h" +#include "MipsAnalyzeImmediate.h" +#include "MipsMachineFunction.h" +#include "MipsRegisterInfo.h" +#include "llvm/CodeGen/MachineConstantPool.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/SelectionDAGNodes.h" +#include "llvm/IR/GlobalValue.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/Intrinsics.h" +#include "llvm/IR/Type.h" +#include "llvm/Support/CFG.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetMachine.h" +using namespace llvm; + +/// Select multiply instructions. +std::pair<SDNode*, SDNode*> +Mips16DAGToDAGISel::selectMULT(SDNode *N, unsigned Opc, DebugLoc DL, EVT Ty, + bool HasLo, bool HasHi) { + SDNode *Lo = 0, *Hi = 0; + SDNode *Mul = CurDAG->getMachineNode(Opc, DL, MVT::Glue, N->getOperand(0), + N->getOperand(1)); + SDValue InFlag = SDValue(Mul, 0); + + if (HasLo) { + unsigned Opcode = Mips::Mflo16; + Lo = CurDAG->getMachineNode(Opcode, DL, Ty, MVT::Glue, InFlag); + InFlag = SDValue(Lo, 1); + } + if (HasHi) { + unsigned Opcode = Mips::Mfhi16; + Hi = CurDAG->getMachineNode(Opcode, DL, Ty, InFlag); + } + return std::make_pair(Lo, Hi); +} + +void Mips16DAGToDAGISel::initGlobalBaseReg(MachineFunction &MF) { + MipsFunctionInfo *MipsFI = MF.getInfo<MipsFunctionInfo>(); + + if (!MipsFI->globalBaseRegSet()) + return; + + MachineBasicBlock &MBB = MF.front(); + MachineBasicBlock::iterator I = MBB.begin(); + MachineRegisterInfo &RegInfo = MF.getRegInfo(); + const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo(); + DebugLoc DL = I != MBB.end() ? I->getDebugLoc() : DebugLoc(); + unsigned V0, V1, V2, GlobalBaseReg = MipsFI->getGlobalBaseReg(); + const TargetRegisterClass *RC = + (const TargetRegisterClass*)&Mips::CPU16RegsRegClass; + + V0 = RegInfo.createVirtualRegister(RC); + V1 = RegInfo.createVirtualRegister(RC); + V2 = RegInfo.createVirtualRegister(RC); + + BuildMI(MBB, I, DL, TII.get(Mips::LiRxImmX16), V0) + .addExternalSymbol("_gp_disp", MipsII::MO_ABS_HI); + BuildMI(MBB, I, DL, TII.get(Mips::AddiuRxPcImmX16), V1) + .addExternalSymbol("_gp_disp", MipsII::MO_ABS_LO); + BuildMI(MBB, I, DL, TII.get(Mips::SllX16), V2).addReg(V0).addImm(16); + BuildMI(MBB, I, DL, TII.get(Mips::AdduRxRyRz16), GlobalBaseReg) + .addReg(V1).addReg(V2); +} + +// Insert instructions to initialize the Mips16 SP Alias register in the +// first MBB of the function. +// +void Mips16DAGToDAGISel::initMips16SPAliasReg(MachineFunction &MF) { + MipsFunctionInfo *MipsFI = MF.getInfo<MipsFunctionInfo>(); + + if (!MipsFI->mips16SPAliasRegSet()) + return; + + MachineBasicBlock &MBB = MF.front(); + MachineBasicBlock::iterator I = MBB.begin(); + const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo(); + DebugLoc DL = I != MBB.end() ? I->getDebugLoc() : DebugLoc(); + unsigned Mips16SPAliasReg = MipsFI->getMips16SPAliasReg(); + + BuildMI(MBB, I, DL, TII.get(Mips::MoveR3216), Mips16SPAliasReg) + .addReg(Mips::SP); +} + +void Mips16DAGToDAGISel::processFunctionAfterISel(MachineFunction &MF) { + initGlobalBaseReg(MF); + initMips16SPAliasReg(MF); +} + +/// getMips16SPAliasReg - Output the instructions required to put the +/// SP into a Mips16 accessible aliased register. +SDValue Mips16DAGToDAGISel::getMips16SPAliasReg() { + unsigned Mips16SPAliasReg = + MF->getInfo<MipsFunctionInfo>()->getMips16SPAliasReg(); + return CurDAG->getRegister(Mips16SPAliasReg, TLI.getPointerTy()); +} + +void Mips16DAGToDAGISel::getMips16SPRefReg(SDNode *Parent, SDValue &AliasReg) { + SDValue AliasFPReg = CurDAG->getRegister(Mips::S0, TLI.getPointerTy()); + if (Parent) { + switch (Parent->getOpcode()) { + case ISD::LOAD: { + LoadSDNode *SD = dyn_cast<LoadSDNode>(Parent); + switch (SD->getMemoryVT().getSizeInBits()) { + case 8: + case 16: + AliasReg = TM.getFrameLowering()->hasFP(*MF)? + AliasFPReg: getMips16SPAliasReg(); + return; + } + break; + } + case ISD::STORE: { + StoreSDNode *SD = dyn_cast<StoreSDNode>(Parent); + switch (SD->getMemoryVT().getSizeInBits()) { + case 8: + case 16: + AliasReg = TM.getFrameLowering()->hasFP(*MF)? + AliasFPReg: getMips16SPAliasReg(); + return; + } + break; + } + } + } + AliasReg = CurDAG->getRegister(Mips::SP, TLI.getPointerTy()); + return; + +} + +bool Mips16DAGToDAGISel::selectAddr16( + SDNode *Parent, SDValue Addr, SDValue &Base, SDValue &Offset, + SDValue &Alias) { + EVT ValTy = Addr.getValueType(); + + Alias = CurDAG->getTargetConstant(0, ValTy); + + // if Address is FI, get the TargetFrameIndex. + if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) { + Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), ValTy); + Offset = CurDAG->getTargetConstant(0, ValTy); + getMips16SPRefReg(Parent, Alias); + return true; + } + // on PIC code Load GA + if (Addr.getOpcode() == MipsISD::Wrapper) { + Base = Addr.getOperand(0); + Offset = Addr.getOperand(1); + return true; + } + if (TM.getRelocationModel() != Reloc::PIC_) { + if ((Addr.getOpcode() == ISD::TargetExternalSymbol || + Addr.getOpcode() == ISD::TargetGlobalAddress)) + return false; + } + // Addresses of the form FI+const or FI|const + if (CurDAG->isBaseWithConstantOffset(Addr)) { + ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1)); + if (isInt<16>(CN->getSExtValue())) { + + // If the first operand is a FI, get the TargetFI Node + if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode> + (Addr.getOperand(0))) { + Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), ValTy); + getMips16SPRefReg(Parent, Alias); + } + else + Base = Addr.getOperand(0); + + Offset = CurDAG->getTargetConstant(CN->getZExtValue(), ValTy); + return true; + } + } + // Operand is a result from an ADD. + if (Addr.getOpcode() == ISD::ADD) { + // When loading from constant pools, load the lower address part in + // the instruction itself. Example, instead of: + // lui $2, %hi($CPI1_0) + // addiu $2, $2, %lo($CPI1_0) + // lwc1 $f0, 0($2) + // Generate: + // lui $2, %hi($CPI1_0) + // lwc1 $f0, %lo($CPI1_0)($2) + if (Addr.getOperand(1).getOpcode() == MipsISD::Lo || + Addr.getOperand(1).getOpcode() == MipsISD::GPRel) { + SDValue Opnd0 = Addr.getOperand(1).getOperand(0); + if (isa<ConstantPoolSDNode>(Opnd0) || isa<GlobalAddressSDNode>(Opnd0) || + isa<JumpTableSDNode>(Opnd0)) { + Base = Addr.getOperand(0); + Offset = Opnd0; + return true; + } + } + + // If an indexed floating point load/store can be emitted, return false. + const LSBaseSDNode *LS = dyn_cast<LSBaseSDNode>(Parent); + + if (LS && + (LS->getMemoryVT() == MVT::f32 || LS->getMemoryVT() == MVT::f64) && + Subtarget.hasFPIdx()) + return false; + } + Base = Addr; + Offset = CurDAG->getTargetConstant(0, ValTy); + return true; +} + +/// Select instructions not customized! Used for +/// expanded, promoted and normal instructions +std::pair<bool, SDNode*> Mips16DAGToDAGISel::selectNode(SDNode *Node) { + unsigned Opcode = Node->getOpcode(); + DebugLoc DL = Node->getDebugLoc(); + + /// + // Instruction Selection not handled by the auto-generated + // tablegen selection should be handled here. + /// + EVT NodeTy = Node->getValueType(0); + unsigned MultOpc; + + switch(Opcode) { + default: break; + + case ISD::SUBE: + case ISD::ADDE: { + SDValue InFlag = Node->getOperand(2), CmpLHS; + unsigned Opc = InFlag.getOpcode(); (void)Opc; + assert(((Opc == ISD::ADDC || Opc == ISD::ADDE) || + (Opc == ISD::SUBC || Opc == ISD::SUBE)) && + "(ADD|SUB)E flag operand must come from (ADD|SUB)C/E insn"); + + unsigned MOp; + if (Opcode == ISD::ADDE) { + CmpLHS = InFlag.getValue(0); + MOp = Mips::AdduRxRyRz16; + } else { + CmpLHS = InFlag.getOperand(0); + MOp = Mips::SubuRxRyRz16; + } + + SDValue Ops[] = { CmpLHS, InFlag.getOperand(1) }; + + SDValue LHS = Node->getOperand(0); + SDValue RHS = Node->getOperand(1); + + EVT VT = LHS.getValueType(); + + unsigned Sltu_op = Mips::SltuRxRyRz16; + SDNode *Carry = CurDAG->getMachineNode(Sltu_op, DL, VT, Ops, 2); + unsigned Addu_op = Mips::AdduRxRyRz16; + SDNode *AddCarry = CurDAG->getMachineNode(Addu_op, DL, VT, + SDValue(Carry,0), RHS); + + SDNode *Result = CurDAG->SelectNodeTo(Node, MOp, VT, MVT::Glue, LHS, + SDValue(AddCarry,0)); + return std::make_pair(true, Result); + } + + /// Mul with two results + case ISD::SMUL_LOHI: + case ISD::UMUL_LOHI: { + MultOpc = (Opcode == ISD::UMUL_LOHI ? Mips::MultuRxRy16 : Mips::MultRxRy16); + std::pair<SDNode*, SDNode*> LoHi = selectMULT(Node, MultOpc, DL, NodeTy, + true, true); + if (!SDValue(Node, 0).use_empty()) + ReplaceUses(SDValue(Node, 0), SDValue(LoHi.first, 0)); + + if (!SDValue(Node, 1).use_empty()) + ReplaceUses(SDValue(Node, 1), SDValue(LoHi.second, 0)); + + return std::make_pair(true, (SDNode*)NULL); + } + + case ISD::MULHS: + case ISD::MULHU: { + MultOpc = (Opcode == ISD::MULHU ? Mips::MultuRxRy16 : Mips::MultRxRy16); + SDNode *Result = selectMULT(Node, MultOpc, DL, NodeTy, false, true).second; + return std::make_pair(true, Result); + } + } + + return std::make_pair(false, (SDNode*)NULL); +} + +FunctionPass *llvm::createMips16ISelDag(MipsTargetMachine &TM) { + return new Mips16DAGToDAGISel(TM); +} diff --git a/lib/Target/Mips/Mips16ISelDAGToDAG.h b/lib/Target/Mips/Mips16ISelDAGToDAG.h new file mode 100644 index 0000000..baa8587 --- /dev/null +++ b/lib/Target/Mips/Mips16ISelDAGToDAG.h @@ -0,0 +1,51 @@ +//===---- Mips16ISelDAGToDAG.h - A Dag to Dag Inst Selector for Mips ------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Subclass of MipsDAGToDAGISel specialized for mips16. +// +//===----------------------------------------------------------------------===// + +#ifndef MIPS16ISELDAGTODAG_H +#define MIPS16ISELDAGTODAG_H + +#include "MipsISelDAGToDAG.h" + +namespace llvm { + +class Mips16DAGToDAGISel : public MipsDAGToDAGISel { +public: + explicit Mips16DAGToDAGISel(MipsTargetMachine &TM) : MipsDAGToDAGISel(TM) {} + +private: + std::pair<SDNode*, SDNode*> selectMULT(SDNode *N, unsigned Opc, DebugLoc DL, + EVT Ty, bool HasLo, bool HasHi); + + SDValue getMips16SPAliasReg(); + + void getMips16SPRefReg(SDNode *Parent, SDValue &AliasReg); + + virtual bool selectAddr16(SDNode *Parent, SDValue N, SDValue &Base, + SDValue &Offset, SDValue &Alias); + + virtual std::pair<bool, SDNode*> selectNode(SDNode *Node); + + virtual void processFunctionAfterISel(MachineFunction &MF); + + // Insert instructions to initialize the global base register in the + // first MBB of the function. + void initGlobalBaseReg(MachineFunction &MF); + + void initMips16SPAliasReg(MachineFunction &MF); +}; + +FunctionPass *createMips16ISelDag(MipsTargetMachine &TM); + +} + +#endif diff --git a/lib/Target/Mips/Mips16ISelLowering.cpp b/lib/Target/Mips/Mips16ISelLowering.cpp new file mode 100644 index 0000000..23eb537 --- /dev/null +++ b/lib/Target/Mips/Mips16ISelLowering.cpp @@ -0,0 +1,689 @@ +//===-- Mips16ISelLowering.h - Mips16 DAG Lowering Interface ----*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Subclass of MipsTargetLowering specialized for mips16. +// +//===----------------------------------------------------------------------===// +#define DEBUG_TYPE "mips-lower" +#include "Mips16ISelLowering.h" +#include "MipsRegisterInfo.h" +#include "MCTargetDesc/MipsBaseInfo.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Target/TargetInstrInfo.h" +#include <set> + +using namespace llvm; + +static cl::opt<bool> +Mips16HardFloat("mips16-hard-float", cl::NotHidden, + cl::desc("MIPS: mips16 hard float enable."), + cl::init(false)); + +static cl::opt<bool> DontExpandCondPseudos16( + "mips16-dont-expand-cond-pseudo", + cl::init(false), + cl::desc("Dont expand conditional move related " + "pseudos for Mips 16"), + cl::Hidden); + +namespace { + std::set<const char*, MipsTargetLowering::LTStr> NoHelperNeeded; +} + +Mips16TargetLowering::Mips16TargetLowering(MipsTargetMachine &TM) + : MipsTargetLowering(TM) { + // + // set up as if mips32 and then revert so we can test the mechanism + // for switching + addRegisterClass(MVT::i32, &Mips::CPURegsRegClass); + addRegisterClass(MVT::f32, &Mips::FGR32RegClass); + computeRegisterProperties(); + clearRegisterClasses(); + + // Set up the register classes + addRegisterClass(MVT::i32, &Mips::CPU16RegsRegClass); + + if (Mips16HardFloat) + setMips16HardFloatLibCalls(); + + setOperationAction(ISD::MEMBARRIER, MVT::Other, Expand); + setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Expand); + setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i32, Expand); + setOperationAction(ISD::ATOMIC_SWAP, MVT::i32, Expand); + setOperationAction(ISD::ATOMIC_LOAD_ADD, MVT::i32, Expand); + setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i32, Expand); + setOperationAction(ISD::ATOMIC_LOAD_AND, MVT::i32, Expand); + setOperationAction(ISD::ATOMIC_LOAD_OR, MVT::i32, Expand); + setOperationAction(ISD::ATOMIC_LOAD_XOR, MVT::i32, Expand); + setOperationAction(ISD::ATOMIC_LOAD_NAND, MVT::i32, Expand); + setOperationAction(ISD::ATOMIC_LOAD_MIN, MVT::i32, Expand); + setOperationAction(ISD::ATOMIC_LOAD_MAX, MVT::i32, Expand); + setOperationAction(ISD::ATOMIC_LOAD_UMIN, MVT::i32, Expand); + setOperationAction(ISD::ATOMIC_LOAD_UMAX, MVT::i32, Expand); + + computeRegisterProperties(); +} + +const MipsTargetLowering * +llvm::createMips16TargetLowering(MipsTargetMachine &TM) { + return new Mips16TargetLowering(TM); +} + +bool +Mips16TargetLowering::allowsUnalignedMemoryAccesses(EVT VT, bool *Fast) const { + return false; +} + +MachineBasicBlock * +Mips16TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, + MachineBasicBlock *BB) const { + switch (MI->getOpcode()) { + default: + return MipsTargetLowering::EmitInstrWithCustomInserter(MI, BB); + case Mips::SelBeqZ: + return emitSel16(Mips::BeqzRxImm16, MI, BB); + case Mips::SelBneZ: + return emitSel16(Mips::BnezRxImm16, MI, BB); + case Mips::SelTBteqZCmpi: + return emitSeliT16(Mips::BteqzX16, Mips::CmpiRxImmX16, MI, BB); + case Mips::SelTBteqZSlti: + return emitSeliT16(Mips::BteqzX16, Mips::SltiRxImmX16, MI, BB); + case Mips::SelTBteqZSltiu: + return emitSeliT16(Mips::BteqzX16, Mips::SltiuRxImmX16, MI, BB); + case Mips::SelTBtneZCmpi: + return emitSeliT16(Mips::BtnezX16, Mips::CmpiRxImmX16, MI, BB); + case Mips::SelTBtneZSlti: + return emitSeliT16(Mips::BtnezX16, Mips::SltiRxImmX16, MI, BB); + case Mips::SelTBtneZSltiu: + return emitSeliT16(Mips::BtnezX16, Mips::SltiuRxImmX16, MI, BB); + case Mips::SelTBteqZCmp: + return emitSelT16(Mips::BteqzX16, Mips::CmpRxRy16, MI, BB); + case Mips::SelTBteqZSlt: + return emitSelT16(Mips::BteqzX16, Mips::SltRxRy16, MI, BB); + case Mips::SelTBteqZSltu: + return emitSelT16(Mips::BteqzX16, Mips::SltuRxRy16, MI, BB); + case Mips::SelTBtneZCmp: + return emitSelT16(Mips::BtnezX16, Mips::CmpRxRy16, MI, BB); + case Mips::SelTBtneZSlt: + return emitSelT16(Mips::BtnezX16, Mips::SltRxRy16, MI, BB); + case Mips::SelTBtneZSltu: + return emitSelT16(Mips::BtnezX16, Mips::SltuRxRy16, MI, BB); + case Mips::BteqzT8CmpX16: + return emitFEXT_T8I816_ins(Mips::BteqzX16, Mips::CmpRxRy16, MI, BB); + case Mips::BteqzT8SltX16: + return emitFEXT_T8I816_ins(Mips::BteqzX16, Mips::SltRxRy16, MI, BB); + case Mips::BteqzT8SltuX16: + // TBD: figure out a way to get this or remove the instruction + // altogether. + return emitFEXT_T8I816_ins(Mips::BteqzX16, Mips::SltuRxRy16, MI, BB); + case Mips::BtnezT8CmpX16: + return emitFEXT_T8I816_ins(Mips::BtnezX16, Mips::CmpRxRy16, MI, BB); + case Mips::BtnezT8SltX16: + return emitFEXT_T8I816_ins(Mips::BtnezX16, Mips::SltRxRy16, MI, BB); + case Mips::BtnezT8SltuX16: + // TBD: figure out a way to get this or remove the instruction + // altogether. + return emitFEXT_T8I816_ins(Mips::BtnezX16, Mips::SltuRxRy16, MI, BB); + case Mips::BteqzT8CmpiX16: return emitFEXT_T8I8I16_ins( + Mips::BteqzX16, Mips::CmpiRxImm16, Mips::CmpiRxImmX16, MI, BB); + case Mips::BteqzT8SltiX16: return emitFEXT_T8I8I16_ins( + Mips::BteqzX16, Mips::SltiRxImm16, Mips::SltiRxImmX16, MI, BB); + case Mips::BteqzT8SltiuX16: return emitFEXT_T8I8I16_ins( + Mips::BteqzX16, Mips::SltiuRxImm16, Mips::SltiuRxImmX16, MI, BB); + case Mips::BtnezT8CmpiX16: return emitFEXT_T8I8I16_ins( + Mips::BtnezX16, Mips::CmpiRxImm16, Mips::CmpiRxImmX16, MI, BB); + case Mips::BtnezT8SltiX16: return emitFEXT_T8I8I16_ins( + Mips::BtnezX16, Mips::SltiRxImm16, Mips::SltiRxImmX16, MI, BB); + case Mips::BtnezT8SltiuX16: return emitFEXT_T8I8I16_ins( + Mips::BtnezX16, Mips::SltiuRxImm16, Mips::SltiuRxImmX16, MI, BB); + break; + case Mips::SltCCRxRy16: + return emitFEXT_CCRX16_ins(Mips::SltRxRy16, MI, BB); + break; + case Mips::SltiCCRxImmX16: + return emitFEXT_CCRXI16_ins + (Mips::SltiRxImm16, Mips::SltiRxImmX16, MI, BB); + case Mips::SltiuCCRxImmX16: + return emitFEXT_CCRXI16_ins + (Mips::SltiuRxImm16, Mips::SltiuRxImmX16, MI, BB); + case Mips::SltuCCRxRy16: + return emitFEXT_CCRX16_ins + (Mips::SltuRxRy16, MI, BB); + } +} + +bool Mips16TargetLowering:: +isEligibleForTailCallOptimization(const MipsCC &MipsCCInfo, + unsigned NextStackOffset, + const MipsFunctionInfo& FI) const { + // No tail call optimization for mips16. + return false; +} + +void Mips16TargetLowering::setMips16LibcallName + (RTLIB::Libcall L, const char *Name) { + setLibcallName(L, Name); + NoHelperNeeded.insert(Name); +} + +void Mips16TargetLowering::setMips16HardFloatLibCalls() { + setMips16LibcallName(RTLIB::ADD_F32, "__mips16_addsf3"); + setMips16LibcallName(RTLIB::ADD_F64, "__mips16_adddf3"); + setMips16LibcallName(RTLIB::SUB_F32, "__mips16_subsf3"); + setMips16LibcallName(RTLIB::SUB_F64, "__mips16_subdf3"); + setMips16LibcallName(RTLIB::MUL_F32, "__mips16_mulsf3"); + setMips16LibcallName(RTLIB::MUL_F64, "__mips16_muldf3"); + setMips16LibcallName(RTLIB::DIV_F32, "__mips16_divsf3"); + setMips16LibcallName(RTLIB::DIV_F64, "__mips16_divdf3"); + setMips16LibcallName(RTLIB::FPEXT_F32_F64, "__mips16_extendsfdf2"); + setMips16LibcallName(RTLIB::FPROUND_F64_F32, "__mips16_truncdfsf2"); + setMips16LibcallName(RTLIB::FPTOSINT_F32_I32, "__mips16_fix_truncsfsi"); + setMips16LibcallName(RTLIB::FPTOSINT_F64_I32, "__mips16_fix_truncdfsi"); + setMips16LibcallName(RTLIB::SINTTOFP_I32_F32, "__mips16_floatsisf"); + setMips16LibcallName(RTLIB::SINTTOFP_I32_F64, "__mips16_floatsidf"); + setMips16LibcallName(RTLIB::UINTTOFP_I32_F32, "__mips16_floatunsisf"); + setMips16LibcallName(RTLIB::UINTTOFP_I32_F64, "__mips16_floatunsidf"); + setMips16LibcallName(RTLIB::OEQ_F32, "__mips16_eqsf2"); + setMips16LibcallName(RTLIB::OEQ_F64, "__mips16_eqdf2"); + setMips16LibcallName(RTLIB::UNE_F32, "__mips16_nesf2"); + setMips16LibcallName(RTLIB::UNE_F64, "__mips16_nedf2"); + setMips16LibcallName(RTLIB::OGE_F32, "__mips16_gesf2"); + setMips16LibcallName(RTLIB::OGE_F64, "__mips16_gedf2"); + setMips16LibcallName(RTLIB::OLT_F32, "__mips16_ltsf2"); + setMips16LibcallName(RTLIB::OLT_F64, "__mips16_ltdf2"); + setMips16LibcallName(RTLIB::OLE_F32, "__mips16_lesf2"); + setMips16LibcallName(RTLIB::OLE_F64, "__mips16_ledf2"); + setMips16LibcallName(RTLIB::OGT_F32, "__mips16_gtsf2"); + setMips16LibcallName(RTLIB::OGT_F64, "__mips16_gtdf2"); + setMips16LibcallName(RTLIB::UO_F32, "__mips16_unordsf2"); + setMips16LibcallName(RTLIB::UO_F64, "__mips16_unorddf2"); + setMips16LibcallName(RTLIB::O_F32, "__mips16_unordsf2"); + setMips16LibcallName(RTLIB::O_F64, "__mips16_unorddf2"); +} + + +// +// The Mips16 hard float is a crazy quilt inherited from gcc. I have a much +// cleaner way to do all of this but it will have to wait until the traditional +// gcc mechanism is completed. +// +// For Pic, in order for Mips16 code to call Mips32 code which according the abi +// have either arguments or returned values placed in floating point registers, +// we use a set of helper functions. (This includes functions which return type +// complex which on Mips are returned in a pair of floating point registers). +// +// This is an encoding that we inherited from gcc. +// In Mips traditional O32, N32 ABI, floating point numbers are passed in +// floating point argument registers 1,2 only when the first and optionally +// the second arguments are float (sf) or double (df). +// For Mips16 we are only concerned with the situations where floating point +// arguments are being passed in floating point registers by the ABI, because +// Mips16 mode code cannot execute floating point instructions to load those +// values and hence helper functions are needed. +// The possibilities are (), (sf), (sf, sf), (sf, df), (df), (df, sf), (df, df) +// the helper function suffixs for these are: +// 0, 1, 5, 9, 2, 6, 10 +// this suffix can then be calculated as follows: +// for a given argument Arg: +// Arg1x, Arg2x = 1 : Arg is sf +// 2 : Arg is df +// 0: Arg is neither sf or df +// So this stub is the string for number Arg1x + Arg2x*4. +// However not all numbers between 0 and 10 are possible, we check anyway and +// assert if the impossible exists. +// + +unsigned int Mips16TargetLowering::getMips16HelperFunctionStubNumber + (ArgListTy &Args) const { + unsigned int resultNum = 0; + if (Args.size() >= 1) { + Type *t = Args[0].Ty; + if (t->isFloatTy()) { + resultNum = 1; + } + else if (t->isDoubleTy()) { + resultNum = 2; + } + } + if (resultNum) { + if (Args.size() >=2) { + Type *t = Args[1].Ty; + if (t->isFloatTy()) { + resultNum += 4; + } + else if (t->isDoubleTy()) { + resultNum += 8; + } + } + } + return resultNum; +} + +// +// prefixs are attached to stub numbers depending on the return type . +// return type: float sf_ +// double df_ +// single complex sc_ +// double complext dc_ +// others NO PREFIX +// +// +// The full name of a helper function is__mips16_call_stub + +// return type dependent prefix + stub number +// +// +// This is something that probably should be in a different source file and +// perhaps done differently but my main purpose is to not waste runtime +// on something that we can enumerate in the source. Another possibility is +// to have a python script to generate these mapping tables. This will do +// for now. There are a whole series of helper function mapping arrays, one +// for each return type class as outlined above. There there are 11 possible +// entries. Ones with 0 are ones which should never be selected +// +// All the arrays are similar except for ones which return neither +// sf, df, sc, dc, in which only care about ones which have sf or df as a +// first parameter. +// +#define P_ "__mips16_call_stub_" +#define MAX_STUB_NUMBER 10 +#define T1 P "1", P "2", 0, 0, P "5", P "6", 0, 0, P "9", P "10" +#define T P "0" , T1 +#define P P_ +static char const * vMips16Helper[MAX_STUB_NUMBER+1] = + {0, T1 }; +#undef P +#define P P_ "sf_" +static char const * sfMips16Helper[MAX_STUB_NUMBER+1] = + { T }; +#undef P +#define P P_ "df_" +static char const * dfMips16Helper[MAX_STUB_NUMBER+1] = + { T }; +#undef P +#define P P_ "sc_" +static char const * scMips16Helper[MAX_STUB_NUMBER+1] = + { T }; +#undef P +#define P P_ "dc_" +static char const * dcMips16Helper[MAX_STUB_NUMBER+1] = + { T }; +#undef P +#undef P_ + + +const char* Mips16TargetLowering:: + getMips16HelperFunction + (Type* RetTy, ArgListTy &Args, bool &needHelper) const { + const unsigned int stubNum = getMips16HelperFunctionStubNumber(Args); +#ifndef NDEBUG + const unsigned int maxStubNum = 10; + assert(stubNum <= maxStubNum); + const bool validStubNum[maxStubNum+1] = + {true, true, true, false, false, true, true, false, false, true, true}; + assert(validStubNum[stubNum]); +#endif + const char *result; + if (RetTy->isFloatTy()) { + result = sfMips16Helper[stubNum]; + } + else if (RetTy ->isDoubleTy()) { + result = dfMips16Helper[stubNum]; + } + else if (RetTy->isStructTy()) { + // check if it's complex + if (RetTy->getNumContainedTypes() == 2) { + if ((RetTy->getContainedType(0)->isFloatTy()) && + (RetTy->getContainedType(1)->isFloatTy())) { + result = scMips16Helper[stubNum]; + } + else if ((RetTy->getContainedType(0)->isDoubleTy()) && + (RetTy->getContainedType(1)->isDoubleTy())) { + result = dcMips16Helper[stubNum]; + } + else { + llvm_unreachable("Uncovered condition"); + } + } + else { + llvm_unreachable("Uncovered condition"); + } + } + else { + if (stubNum == 0) { + needHelper = false; + return ""; + } + result = vMips16Helper[stubNum]; + } + needHelper = true; + return result; +} + +void Mips16TargetLowering:: +getOpndList(SmallVectorImpl<SDValue> &Ops, + std::deque< std::pair<unsigned, SDValue> > &RegsToPass, + bool IsPICCall, bool GlobalOrExternal, bool InternalLinkage, + CallLoweringInfo &CLI, SDValue Callee, SDValue Chain) const { + SelectionDAG &DAG = CLI.DAG; + const char* Mips16HelperFunction = 0; + bool NeedMips16Helper = false; + + if (getTargetMachine().Options.UseSoftFloat && Mips16HardFloat) { + // + // currently we don't have symbols tagged with the mips16 or mips32 + // qualifier so we will assume that we don't know what kind it is. + // and generate the helper + // + bool LookupHelper = true; + if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(CLI.Callee)) { + if (NoHelperNeeded.find(S->getSymbol()) != NoHelperNeeded.end()) { + LookupHelper = false; + } + } + if (LookupHelper) Mips16HelperFunction = + getMips16HelperFunction(CLI.RetTy, CLI.Args, NeedMips16Helper); + + } + + SDValue JumpTarget = Callee; + + // T9 should contain the address of the callee function if + // -reloction-model=pic or it is an indirect call. + if (IsPICCall || !GlobalOrExternal) { + unsigned V0Reg = Mips::V0; + if (NeedMips16Helper) { + RegsToPass.push_front(std::make_pair(V0Reg, Callee)); + JumpTarget = DAG.getExternalSymbol(Mips16HelperFunction, getPointerTy()); + JumpTarget = getAddrGlobal(JumpTarget, DAG, MipsII::MO_GOT); + } else + RegsToPass.push_front(std::make_pair((unsigned)Mips::T9, Callee)); + } + + Ops.push_back(JumpTarget); + + MipsTargetLowering::getOpndList(Ops, RegsToPass, IsPICCall, GlobalOrExternal, + InternalLinkage, CLI, Callee, Chain); +} + +MachineBasicBlock *Mips16TargetLowering:: +emitSel16(unsigned Opc, MachineInstr *MI, MachineBasicBlock *BB) const { + if (DontExpandCondPseudos16) + return BB; + const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); + DebugLoc DL = MI->getDebugLoc(); + // To "insert" a SELECT_CC instruction, we actually have to insert the + // diamond control-flow pattern. The incoming instruction knows the + // destination vreg to set, the condition code register to branch on, the + // true/false values to select between, and a branch opcode to use. + const BasicBlock *LLVM_BB = BB->getBasicBlock(); + MachineFunction::iterator It = BB; + ++It; + + // thisMBB: + // ... + // TrueVal = ... + // setcc r1, r2, r3 + // bNE r1, r0, copy1MBB + // fallthrough --> copy0MBB + MachineBasicBlock *thisMBB = BB; + MachineFunction *F = BB->getParent(); + MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB); + MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB); + F->insert(It, copy0MBB); + F->insert(It, sinkMBB); + + // Transfer the remainder of BB and its successor edges to sinkMBB. + sinkMBB->splice(sinkMBB->begin(), BB, + llvm::next(MachineBasicBlock::iterator(MI)), + BB->end()); + sinkMBB->transferSuccessorsAndUpdatePHIs(BB); + + // Next, add the true and fallthrough blocks as its successors. + BB->addSuccessor(copy0MBB); + BB->addSuccessor(sinkMBB); + + BuildMI(BB, DL, TII->get(Opc)).addReg(MI->getOperand(3).getReg()) + .addMBB(sinkMBB); + + // copy0MBB: + // %FalseValue = ... + // # fallthrough to sinkMBB + BB = copy0MBB; + + // Update machine-CFG edges + BB->addSuccessor(sinkMBB); + + // sinkMBB: + // %Result = phi [ %TrueValue, thisMBB ], [ %FalseValue, copy0MBB ] + // ... + BB = sinkMBB; + + BuildMI(*BB, BB->begin(), DL, + TII->get(Mips::PHI), MI->getOperand(0).getReg()) + .addReg(MI->getOperand(1).getReg()).addMBB(thisMBB) + .addReg(MI->getOperand(2).getReg()).addMBB(copy0MBB); + + MI->eraseFromParent(); // The pseudo instruction is gone now. + return BB; +} + +MachineBasicBlock *Mips16TargetLowering::emitSelT16 + (unsigned Opc1, unsigned Opc2, + MachineInstr *MI, MachineBasicBlock *BB) const { + if (DontExpandCondPseudos16) + return BB; + const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); + DebugLoc DL = MI->getDebugLoc(); + // To "insert" a SELECT_CC instruction, we actually have to insert the + // diamond control-flow pattern. The incoming instruction knows the + // destination vreg to set, the condition code register to branch on, the + // true/false values to select between, and a branch opcode to use. + const BasicBlock *LLVM_BB = BB->getBasicBlock(); + MachineFunction::iterator It = BB; + ++It; + + // thisMBB: + // ... + // TrueVal = ... + // setcc r1, r2, r3 + // bNE r1, r0, copy1MBB + // fallthrough --> copy0MBB + MachineBasicBlock *thisMBB = BB; + MachineFunction *F = BB->getParent(); + MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB); + MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB); + F->insert(It, copy0MBB); + F->insert(It, sinkMBB); + + // Transfer the remainder of BB and its successor edges to sinkMBB. + sinkMBB->splice(sinkMBB->begin(), BB, + llvm::next(MachineBasicBlock::iterator(MI)), + BB->end()); + sinkMBB->transferSuccessorsAndUpdatePHIs(BB); + + // Next, add the true and fallthrough blocks as its successors. + BB->addSuccessor(copy0MBB); + BB->addSuccessor(sinkMBB); + + BuildMI(BB, DL, TII->get(Opc2)).addReg(MI->getOperand(3).getReg()) + .addReg(MI->getOperand(4).getReg()); + BuildMI(BB, DL, TII->get(Opc1)).addMBB(sinkMBB); + + // copy0MBB: + // %FalseValue = ... + // # fallthrough to sinkMBB + BB = copy0MBB; + + // Update machine-CFG edges + BB->addSuccessor(sinkMBB); + + // sinkMBB: + // %Result = phi [ %TrueValue, thisMBB ], [ %FalseValue, copy0MBB ] + // ... + BB = sinkMBB; + + BuildMI(*BB, BB->begin(), DL, + TII->get(Mips::PHI), MI->getOperand(0).getReg()) + .addReg(MI->getOperand(1).getReg()).addMBB(thisMBB) + .addReg(MI->getOperand(2).getReg()).addMBB(copy0MBB); + + MI->eraseFromParent(); // The pseudo instruction is gone now. + return BB; + +} + +MachineBasicBlock *Mips16TargetLowering::emitSeliT16 + (unsigned Opc1, unsigned Opc2, + MachineInstr *MI, MachineBasicBlock *BB) const { + if (DontExpandCondPseudos16) + return BB; + const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); + DebugLoc DL = MI->getDebugLoc(); + // To "insert" a SELECT_CC instruction, we actually have to insert the + // diamond control-flow pattern. The incoming instruction knows the + // destination vreg to set, the condition code register to branch on, the + // true/false values to select between, and a branch opcode to use. + const BasicBlock *LLVM_BB = BB->getBasicBlock(); + MachineFunction::iterator It = BB; + ++It; + + // thisMBB: + // ... + // TrueVal = ... + // setcc r1, r2, r3 + // bNE r1, r0, copy1MBB + // fallthrough --> copy0MBB + MachineBasicBlock *thisMBB = BB; + MachineFunction *F = BB->getParent(); + MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB); + MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB); + F->insert(It, copy0MBB); + F->insert(It, sinkMBB); + + // Transfer the remainder of BB and its successor edges to sinkMBB. + sinkMBB->splice(sinkMBB->begin(), BB, + llvm::next(MachineBasicBlock::iterator(MI)), + BB->end()); + sinkMBB->transferSuccessorsAndUpdatePHIs(BB); + + // Next, add the true and fallthrough blocks as its successors. + BB->addSuccessor(copy0MBB); + BB->addSuccessor(sinkMBB); + + BuildMI(BB, DL, TII->get(Opc2)).addReg(MI->getOperand(3).getReg()) + .addImm(MI->getOperand(4).getImm()); + BuildMI(BB, DL, TII->get(Opc1)).addMBB(sinkMBB); + + // copy0MBB: + // %FalseValue = ... + // # fallthrough to sinkMBB + BB = copy0MBB; + + // Update machine-CFG edges + BB->addSuccessor(sinkMBB); + + // sinkMBB: + // %Result = phi [ %TrueValue, thisMBB ], [ %FalseValue, copy0MBB ] + // ... + BB = sinkMBB; + + BuildMI(*BB, BB->begin(), DL, + TII->get(Mips::PHI), MI->getOperand(0).getReg()) + .addReg(MI->getOperand(1).getReg()).addMBB(thisMBB) + .addReg(MI->getOperand(2).getReg()).addMBB(copy0MBB); + + MI->eraseFromParent(); // The pseudo instruction is gone now. + return BB; + +} + +MachineBasicBlock + *Mips16TargetLowering::emitFEXT_T8I816_ins(unsigned BtOpc, unsigned CmpOpc, + MachineInstr *MI, + MachineBasicBlock *BB) const { + if (DontExpandCondPseudos16) + return BB; + const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); + unsigned regX = MI->getOperand(0).getReg(); + unsigned regY = MI->getOperand(1).getReg(); + MachineBasicBlock *target = MI->getOperand(2).getMBB(); + BuildMI(*BB, MI, MI->getDebugLoc(), TII->get(CmpOpc)).addReg(regX).addReg(regY); + BuildMI(*BB, MI, MI->getDebugLoc(), TII->get(BtOpc)).addMBB(target); + MI->eraseFromParent(); // The pseudo instruction is gone now. + return BB; +} + +MachineBasicBlock *Mips16TargetLowering::emitFEXT_T8I8I16_ins( + unsigned BtOpc, unsigned CmpiOpc, unsigned CmpiXOpc, + MachineInstr *MI, MachineBasicBlock *BB) const { + if (DontExpandCondPseudos16) + return BB; + const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); + unsigned regX = MI->getOperand(0).getReg(); + int64_t imm = MI->getOperand(1).getImm(); + MachineBasicBlock *target = MI->getOperand(2).getMBB(); + unsigned CmpOpc; + if (isUInt<8>(imm)) + CmpOpc = CmpiOpc; + else if (isUInt<16>(imm)) + CmpOpc = CmpiXOpc; + else + llvm_unreachable("immediate field not usable"); + BuildMI(*BB, MI, MI->getDebugLoc(), TII->get(CmpOpc)).addReg(regX).addImm(imm); + BuildMI(*BB, MI, MI->getDebugLoc(), TII->get(BtOpc)).addMBB(target); + MI->eraseFromParent(); // The pseudo instruction is gone now. + return BB; +} + +static unsigned Mips16WhichOp8uOr16simm + (unsigned shortOp, unsigned longOp, int64_t Imm) { + if (isUInt<8>(Imm)) + return shortOp; + else if (isInt<16>(Imm)) + return longOp; + else + llvm_unreachable("immediate field not usable"); +} + +MachineBasicBlock *Mips16TargetLowering::emitFEXT_CCRX16_ins( + unsigned SltOpc, + MachineInstr *MI, MachineBasicBlock *BB) const { + if (DontExpandCondPseudos16) + return BB; + const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); + unsigned CC = MI->getOperand(0).getReg(); + unsigned regX = MI->getOperand(1).getReg(); + unsigned regY = MI->getOperand(2).getReg(); + BuildMI(*BB, MI, MI->getDebugLoc(), + TII->get(SltOpc)).addReg(regX).addReg(regY); + BuildMI(*BB, MI, MI->getDebugLoc(), + TII->get(Mips::MoveR3216), CC).addReg(Mips::T8); + MI->eraseFromParent(); // The pseudo instruction is gone now. + return BB; +} + +MachineBasicBlock *Mips16TargetLowering::emitFEXT_CCRXI16_ins( + unsigned SltiOpc, unsigned SltiXOpc, + MachineInstr *MI, MachineBasicBlock *BB )const { + if (DontExpandCondPseudos16) + return BB; + const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); + unsigned CC = MI->getOperand(0).getReg(); + unsigned regX = MI->getOperand(1).getReg(); + int64_t Imm = MI->getOperand(2).getImm(); + unsigned SltOpc = Mips16WhichOp8uOr16simm(SltiOpc, SltiXOpc, Imm); + BuildMI(*BB, MI, MI->getDebugLoc(), + TII->get(SltOpc)).addReg(regX).addImm(Imm); + BuildMI(*BB, MI, MI->getDebugLoc(), + TII->get(Mips::MoveR3216), CC).addReg(Mips::T8); + MI->eraseFromParent(); // The pseudo instruction is gone now. + return BB; + +} diff --git a/lib/Target/Mips/Mips16ISelLowering.h b/lib/Target/Mips/Mips16ISelLowering.h new file mode 100644 index 0000000..b23e2a1 --- /dev/null +++ b/lib/Target/Mips/Mips16ISelLowering.h @@ -0,0 +1,80 @@ +//===-- Mips16ISelLowering.h - Mips16 DAG Lowering Interface ----*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Subclass of MipsTargetLowering specialized for mips16. +// +//===----------------------------------------------------------------------===// + +#ifndef Mips16ISELLOWERING_H +#define Mips16ISELLOWERING_H + +#include "MipsISelLowering.h" + +namespace llvm { + class Mips16TargetLowering : public MipsTargetLowering { + public: + explicit Mips16TargetLowering(MipsTargetMachine &TM); + + virtual bool allowsUnalignedMemoryAccesses(EVT VT, bool *Fast) const; + + virtual MachineBasicBlock * + EmitInstrWithCustomInserter(MachineInstr *MI, MachineBasicBlock *MBB) const; + + private: + virtual bool + isEligibleForTailCallOptimization(const MipsCC &MipsCCInfo, + unsigned NextStackOffset, + const MipsFunctionInfo& FI) const; + + void setMips16LibcallName(RTLIB::Libcall, const char *Name); + + void setMips16HardFloatLibCalls(); + + unsigned int + getMips16HelperFunctionStubNumber(ArgListTy &Args) const; + + const char *getMips16HelperFunction + (Type* RetTy, ArgListTy &Args, bool &needHelper) const; + + virtual void + getOpndList(SmallVectorImpl<SDValue> &Ops, + std::deque< std::pair<unsigned, SDValue> > &RegsToPass, + bool IsPICCall, bool GlobalOrExternal, bool InternalLinkage, + CallLoweringInfo &CLI, SDValue Callee, SDValue Chain) const; + + MachineBasicBlock *emitSel16(unsigned Opc, MachineInstr *MI, + MachineBasicBlock *BB) const; + + MachineBasicBlock *emitSeliT16(unsigned Opc1, unsigned Opc2, + MachineInstr *MI, + MachineBasicBlock *BB) const; + + MachineBasicBlock *emitSelT16(unsigned Opc1, unsigned Opc2, + MachineInstr *MI, + MachineBasicBlock *BB) const; + + MachineBasicBlock *emitFEXT_T8I816_ins(unsigned BtOpc, unsigned CmpOpc, + MachineInstr *MI, + MachineBasicBlock *BB) const; + + MachineBasicBlock *emitFEXT_T8I8I16_ins( + unsigned BtOpc, unsigned CmpiOpc, unsigned CmpiXOpc, + MachineInstr *MI, MachineBasicBlock *BB) const; + + MachineBasicBlock *emitFEXT_CCRX16_ins( + unsigned SltOpc, + MachineInstr *MI, MachineBasicBlock *BB) const; + + MachineBasicBlock *emitFEXT_CCRXI16_ins( + unsigned SltiOpc, unsigned SltiXOpc, + MachineInstr *MI, MachineBasicBlock *BB )const; + }; +} + +#endif // Mips16ISELLOWERING_H diff --git a/lib/Target/Mips/Mips16InstrInfo.td b/lib/Target/Mips/Mips16InstrInfo.td index a9e9c52..6293829 100644 --- a/lib/Target/Mips/Mips16InstrInfo.td +++ b/lib/Target/Mips/Mips16InstrInfo.td @@ -15,7 +15,7 @@ // Mips Address // def addr16 : - ComplexPattern<iPTR, 3, "SelectAddr16", [frameindex], [SDNPWantParent]>; + ComplexPattern<iPTR, 3, "selectAddr16", [frameindex], [SDNPWantParent]>; // // Address operand diff --git a/lib/Target/Mips/Mips64InstrInfo.td b/lib/Target/Mips/Mips64InstrInfo.td index 494ba87..5903b9e 100644 --- a/lib/Target/Mips/Mips64InstrInfo.td +++ b/lib/Target/Mips/Mips64InstrInfo.td @@ -313,7 +313,7 @@ def : InstAlias<"move $dst, $src", (DADDu CPU64RegsOpnd:$dst, CPU64RegsOpnd:$src, ZERO_64), 1>, Requires<[HasMips64]>; def : InstAlias<"move $dst, $src", - (OR64 CPU64RegsOpnd:$dst, CPU64RegsOpnd:$src, ZERO_64), 0>, + (OR64 CPU64RegsOpnd:$dst, CPU64RegsOpnd:$src, ZERO_64), 1>, Requires<[HasMips64]>; def : InstAlias<"and $rs, $rt, $imm", (DANDi CPU64RegsOpnd:$rs, CPU64RegsOpnd:$rt, uimm16_64:$imm), diff --git a/lib/Target/Mips/MipsCallingConv.td b/lib/Target/Mips/MipsCallingConv.td index 78cf140..462def7 100644 --- a/lib/Target/Mips/MipsCallingConv.td +++ b/lib/Target/Mips/MipsCallingConv.td @@ -96,6 +96,12 @@ def RetCC_MipsN : CallingConv<[ CCIfType<[f64], CCAssignToReg<[D0_64, D2_64]>> ]>; +// In soft-mode, register A0_64, instead of V1_64, is used to return a long +// double value. +def RetCC_F128Soft : CallingConv<[ + CCIfType<[i64], CCAssignToReg<[V0_64, A0_64]>> +]>; + //===----------------------------------------------------------------------===// // Mips EABI Calling Convention //===----------------------------------------------------------------------===// @@ -139,17 +145,6 @@ def RetCC_MipsEABI : CallingConv<[ ]>; //===----------------------------------------------------------------------===// -// Mips Android Calling Convention -//===----------------------------------------------------------------------===// - -def RetCC_MipsAndroid : CallingConv<[ - // f32 are returned in registers F0, F2, F1, F3 - CCIfType<[f32], CCAssignToReg<[F0, F2, F1, F3]>>, - - CCDelegateTo<RetCC_MipsO32> -]>; - -//===----------------------------------------------------------------------===// // Mips FastCC Calling Convention //===----------------------------------------------------------------------===// def CC_MipsO32_FastCC : CallingConv<[ @@ -209,7 +204,6 @@ def RetCC_Mips : CallingConv<[ CCIfSubtarget<"isABI_EABI()", CCDelegateTo<RetCC_MipsEABI>>, CCIfSubtarget<"isABI_N32()", CCDelegateTo<RetCC_MipsN>>, CCIfSubtarget<"isABI_N64()", CCDelegateTo<RetCC_MipsN>>, - CCIfSubtarget<"isAndroid()", CCDelegateTo<RetCC_MipsAndroid>>, CCDelegateTo<RetCC_MipsO32> ]>; diff --git a/lib/Target/Mips/MipsCondMov.td b/lib/Target/Mips/MipsCondMov.td index 559370b..42e4c99 100644 --- a/lib/Target/Mips/MipsCondMov.td +++ b/lib/Target/Mips/MipsCondMov.td @@ -58,21 +58,23 @@ multiclass MovzPats0<RegisterClass CRC, RegisterClass DRC, Instruction SLTiuOp> { def : MipsPat<(select (i32 (setge CRC:$lhs, CRC:$rhs)), DRC:$T, DRC:$F), (MOVZInst DRC:$T, (SLTOp CRC:$lhs, CRC:$rhs), DRC:$F)>; - def : MipsPat< - (select (i32 (setuge CRC:$lhs, CRC:$rhs)), DRC:$T, DRC:$F), - (MOVZInst DRC:$T, (SLTuOp CRC:$lhs, CRC:$rhs), DRC:$F)>; - def : MipsPat< - (select (i32 (setge CRC:$lhs, immSExt16:$rhs)), DRC:$T, DRC:$F), - (MOVZInst DRC:$T, (SLTiOp CRC:$lhs, immSExt16:$rhs), DRC:$F)>; - def : MipsPat< - (select (i32 (setuge CRC:$lh, immSExt16:$rh)), DRC:$T, DRC:$F), - (MOVZInst DRC:$T, (SLTiuOp CRC:$lh, immSExt16:$rh), DRC:$F)>; - def : MipsPat< - (select (i32 (setle CRC:$lhs, CRC:$rhs)), DRC:$T, DRC:$F), - (MOVZInst DRC:$T, (SLTOp CRC:$rhs, CRC:$lhs), DRC:$F)>; - def : MipsPat< - (select (i32 (setule CRC:$lhs, CRC:$rhs)), DRC:$T, DRC:$F), - (MOVZInst DRC:$T, (SLTuOp CRC:$rhs, CRC:$lhs), DRC:$F)>; + def : MipsPat<(select (i32 (setuge CRC:$lhs, CRC:$rhs)), DRC:$T, DRC:$F), + (MOVZInst DRC:$T, (SLTuOp CRC:$lhs, CRC:$rhs), DRC:$F)>; + def : MipsPat<(select (i32 (setge CRC:$lhs, immSExt16:$rhs)), DRC:$T, DRC:$F), + (MOVZInst DRC:$T, (SLTiOp CRC:$lhs, immSExt16:$rhs), DRC:$F)>; + def : MipsPat<(select (i32 (setuge CRC:$lh, immSExt16:$rh)), DRC:$T, DRC:$F), + (MOVZInst DRC:$T, (SLTiuOp CRC:$lh, immSExt16:$rh), DRC:$F)>; + def : MipsPat<(select (i32 (setle CRC:$lhs, CRC:$rhs)), DRC:$T, DRC:$F), + (MOVZInst DRC:$T, (SLTOp CRC:$rhs, CRC:$lhs), DRC:$F)>; + def : MipsPat<(select (i32 (setule CRC:$lhs, CRC:$rhs)), DRC:$T, DRC:$F), + (MOVZInst DRC:$T, (SLTuOp CRC:$rhs, CRC:$lhs), DRC:$F)>; + def : MipsPat<(select (i32 (setgt CRC:$lhs, immSExt16Plus1:$rhs)), + DRC:$T, DRC:$F), + (MOVZInst DRC:$T, (SLTiOp CRC:$lhs, (Plus1 imm:$rhs)), DRC:$F)>; + def : MipsPat<(select (i32 (setugt CRC:$lhs, immSExt16Plus1:$rhs)), + DRC:$T, DRC:$F), + (MOVZInst DRC:$T, (SLTiuOp CRC:$lhs, (Plus1 imm:$rhs)), + DRC:$F)>; } multiclass MovzPats1<RegisterClass CRC, RegisterClass DRC, diff --git a/lib/Target/Mips/MipsConstantIslandPass.cpp b/lib/Target/Mips/MipsConstantIslandPass.cpp new file mode 100644 index 0000000..b5de1eb --- /dev/null +++ b/lib/Target/Mips/MipsConstantIslandPass.cpp @@ -0,0 +1,85 @@ +//===-- MipsConstantIslandPass.cpp - Emit Pc Relative loads----------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// +// This pass is used to make Pc relative loads of constants. +// For now, only Mips16 will use this. While it has the same name and +// uses many ideas from the LLVM ARM Constant Island Pass, it's not intended +// to reuse any of the code from the ARM version. +// +// Loading constants inline is expensive on Mips16 and it's in general better +// to place the constant nearby in code space and then it can be loaded with a +// simple 16 bit load instruction. +// +// The constants can be not just numbers but addresses of functions and labels. +// This can be particularly helpful in static relocation mode for embedded +// non linux targets. +// +// + +#define DEBUG_TYPE "mips-constant-islands" + +#include "Mips.h" +#include "MCTargetDesc/MipsBaseInfo.h" +#include "MipsTargetMachine.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/IR/Function.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/MathExtras.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetRegisterInfo.h" + +using namespace llvm; + +namespace { + typedef MachineBasicBlock::iterator Iter; + typedef MachineBasicBlock::reverse_iterator ReverseIter; + + class MipsConstantIslands : public MachineFunctionPass { + + public: + static char ID; + MipsConstantIslands(TargetMachine &tm) + : MachineFunctionPass(ID), TM(tm), + TII(static_cast<const MipsInstrInfo*>(tm.getInstrInfo())), + IsPIC(TM.getRelocationModel() == Reloc::PIC_), + ABI(TM.getSubtarget<MipsSubtarget>().getTargetABI()) {} + + virtual const char *getPassName() const { + return "Mips Constant Islands"; + } + + bool runOnMachineFunction(MachineFunction &F); + + private: + + + const TargetMachine &TM; + const MipsInstrInfo *TII; + bool IsPIC; + unsigned ABI; + + }; + + char MipsConstantIslands::ID = 0; +} // end of anonymous namespace + +/// createMipsLongBranchPass - Returns a pass that converts branches to long +/// branches. +FunctionPass *llvm::createMipsConstantIslandPass(MipsTargetMachine &tm) { + return new MipsConstantIslands(tm); +} + +bool MipsConstantIslands::runOnMachineFunction(MachineFunction &F) { + return true; +} + diff --git a/lib/Target/Mips/MipsDelaySlotFiller.cpp b/lib/Target/Mips/MipsDelaySlotFiller.cpp index d62b166..e265590 100644 --- a/lib/Target/Mips/MipsDelaySlotFiller.cpp +++ b/lib/Target/Mips/MipsDelaySlotFiller.cpp @@ -14,11 +14,17 @@ #define DEBUG_TYPE "delay-slot-filler" #include "Mips.h" +#include "MipsInstrInfo.h" #include "MipsTargetMachine.h" #include "llvm/ADT/BitVector.h" +#include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Analysis/ValueTracking.h" +#include "llvm/CodeGen/MachineBranchProbabilityInfo.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/PseudoSourceValue.h" #include "llvm/Support/CommandLine.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetMachine.h" @@ -36,18 +42,59 @@ static cl::opt<bool> DisableDelaySlotFiller( cl::desc("Fill all delay slots with NOPs."), cl::Hidden); -// This option can be used to silence complaints by machine verifier passes. -static cl::opt<bool> SkipDelaySlotFiller( - "skip-mips-delay-filler", +static cl::opt<bool> DisableForwardSearch( + "disable-mips-df-forward-search", + cl::init(true), + cl::desc("Disallow MIPS delay filler to search forward."), + cl::Hidden); + +static cl::opt<bool> DisableSuccBBSearch( + "disable-mips-df-succbb-search", + cl::init(true), + cl::desc("Disallow MIPS delay filler to search successor basic blocks."), + cl::Hidden); + +static cl::opt<bool> DisableBackwardSearch( + "disable-mips-df-backward-search", cl::init(false), - cl::desc("Skip MIPS' delay slot filling pass."), + cl::desc("Disallow MIPS delay filler to search backward."), cl::Hidden); namespace { + typedef MachineBasicBlock::iterator Iter; + typedef MachineBasicBlock::reverse_iterator ReverseIter; + typedef SmallDenseMap<MachineBasicBlock*, MachineInstr*, 2> BB2BrMap; + + /// \brief A functor comparing edge weight of two blocks. + struct CmpWeight { + CmpWeight(const MachineBasicBlock &S, + const MachineBranchProbabilityInfo &P) : Src(S), Prob(P) {} + + bool operator()(const MachineBasicBlock *Dst0, + const MachineBasicBlock *Dst1) const { + return Prob.getEdgeWeight(&Src, Dst0) < Prob.getEdgeWeight(&Src, Dst1); + } + + const MachineBasicBlock &Src; + const MachineBranchProbabilityInfo &Prob; + }; + class RegDefsUses { public: RegDefsUses(TargetMachine &TM); void init(const MachineInstr &MI); + + /// This function sets all caller-saved registers in Defs. + void setCallerSaved(const MachineInstr &MI); + + /// This function sets all unallocatable registers in Defs. + void setUnallocatableRegs(const MachineFunction &MF); + + /// Set bits in Uses corresponding to MBB's live-out registers except for + /// the registers that are live-in to SuccBB. + void addLiveOut(const MachineBasicBlock &MBB, + const MachineBasicBlock &SuccBB); + bool update(const MachineInstr &MI, unsigned Begin, unsigned End); private: @@ -61,6 +108,72 @@ namespace { BitVector Defs, Uses; }; + /// Base class for inspecting loads and stores. + class InspectMemInstr { + public: + InspectMemInstr(bool ForbidMemInstr_) + : OrigSeenLoad(false), OrigSeenStore(false), SeenLoad(false), + SeenStore(false), ForbidMemInstr(ForbidMemInstr_) {} + + /// Return true if MI cannot be moved to delay slot. + bool hasHazard(const MachineInstr &MI); + + virtual ~InspectMemInstr() {} + + protected: + /// Flags indicating whether loads or stores have been seen. + bool OrigSeenLoad, OrigSeenStore, SeenLoad, SeenStore; + + /// Memory instructions are not allowed to move to delay slot if this flag + /// is true. + bool ForbidMemInstr; + + private: + virtual bool hasHazard_(const MachineInstr &MI) = 0; + }; + + /// This subclass rejects any memory instructions. + class NoMemInstr : public InspectMemInstr { + public: + NoMemInstr() : InspectMemInstr(true) {} + private: + virtual bool hasHazard_(const MachineInstr &MI) { return true; } + }; + + /// This subclass accepts loads from stacks and constant loads. + class LoadFromStackOrConst : public InspectMemInstr { + public: + LoadFromStackOrConst() : InspectMemInstr(false) {} + private: + virtual bool hasHazard_(const MachineInstr &MI); + }; + + /// This subclass uses memory dependence information to determine whether a + /// memory instruction can be moved to a delay slot. + class MemDefsUses : public InspectMemInstr { + public: + MemDefsUses(const MachineFrameInfo *MFI); + + private: + virtual bool hasHazard_(const MachineInstr &MI); + + /// Update Defs and Uses. Return true if there exist dependences that + /// disqualify the delay slot candidate between V and values in Uses and + /// Defs. + bool updateDefsUses(const Value *V, bool MayStore); + + /// Get the list of underlying objects of MI's memory operand. + bool getUnderlyingObjects(const MachineInstr &MI, + SmallVectorImpl<const Value *> &Objects) const; + + const MachineFrameInfo *MFI; + SmallPtrSet<const Value*, 4> Uses, Defs; + + /// Flags indicating whether loads or stores with no underlying objects have + /// been seen. + bool SeenNoObjLoad, SeenNoObjStore; + }; + class Filler : public MachineFunctionPass { public: Filler(TargetMachine &tm) @@ -71,9 +184,6 @@ namespace { } bool runOnMachineFunction(MachineFunction &F) { - if (SkipDelaySlotFiller) - return false; - bool Changed = false; for (MachineFunction::iterator FI = F.begin(), FE = F.end(); FI != FE; ++FI) @@ -81,19 +191,54 @@ namespace { return Changed; } - private: - typedef MachineBasicBlock::iterator Iter; - typedef MachineBasicBlock::reverse_iterator ReverseIter; + void getAnalysisUsage(AnalysisUsage &AU) const { + AU.addRequired<MachineBranchProbabilityInfo>(); + MachineFunctionPass::getAnalysisUsage(AU); + } + private: bool runOnMachineBasicBlock(MachineBasicBlock &MBB); /// This function checks if it is valid to move Candidate to the delay slot - /// and returns true if it isn't. It also updates load and store flags and - /// register defs and uses. - bool delayHasHazard(const MachineInstr &Candidate, bool &SawLoad, - bool &SawStore, RegDefsUses &RegDU) const; - - bool findDelayInstr(MachineBasicBlock &MBB, Iter slot, Iter &Filler) const; + /// and returns true if it isn't. It also updates memory and register + /// dependence information. + bool delayHasHazard(const MachineInstr &Candidate, RegDefsUses &RegDU, + InspectMemInstr &IM) const; + + /// This function searches range [Begin, End) for an instruction that can be + /// moved to the delay slot. Returns true on success. + template<typename IterTy> + bool searchRange(MachineBasicBlock &MBB, IterTy Begin, IterTy End, + RegDefsUses &RegDU, InspectMemInstr &IM, + IterTy &Filler) const; + + /// This function searches in the backward direction for an instruction that + /// can be moved to the delay slot. Returns true on success. + bool searchBackward(MachineBasicBlock &MBB, Iter Slot) const; + + /// This function searches MBB in the forward direction for an instruction + /// that can be moved to the delay slot. Returns true on success. + bool searchForward(MachineBasicBlock &MBB, Iter Slot) const; + + /// This function searches MBB's successor blocks for an instruction that + /// can be moved to the delay slot and inserts clones of the instruction + /// into the successor blocks. + bool searchSuccBBs(MachineBasicBlock &MBB, Iter Slot) const; + + /// Pick a successor block of MBB. Return NULL if MBB doesn't have a + /// successor block that is not a landing pad. + MachineBasicBlock *selectSuccBB(MachineBasicBlock &B) const; + + /// This function analyzes MBB and returns an instruction with an unoccupied + /// slot that branches to Dst. + std::pair<MipsInstrInfo::BranchType, MachineInstr *> + getBranch(MachineBasicBlock &MBB, const MachineBasicBlock &Dst) const; + + /// Examine Pred and see if it is possible to insert an instruction into + /// one of its branches delay slot or its end. + bool examinePred(MachineBasicBlock &Pred, const MachineBasicBlock &Succ, + RegDefsUses &RegDU, bool &HasMultipleSuccs, + BB2BrMap &BrMap) const; bool terminateSearch(const MachineInstr &Candidate) const; @@ -105,6 +250,45 @@ namespace { char Filler::ID = 0; } // end of anonymous namespace +static bool hasUnoccupiedSlot(const MachineInstr *MI) { + return MI->hasDelaySlot() && !MI->isBundledWithSucc(); +} + +/// This function inserts clones of Filler into predecessor blocks. +static void insertDelayFiller(Iter Filler, const BB2BrMap &BrMap) { + MachineFunction *MF = Filler->getParent()->getParent(); + + for (BB2BrMap::const_iterator I = BrMap.begin(); I != BrMap.end(); ++I) { + if (I->second) { + MIBundleBuilder(I->second).append(MF->CloneMachineInstr(&*Filler)); + ++UsefulSlots; + } else { + I->first->insert(I->first->end(), MF->CloneMachineInstr(&*Filler)); + } + } +} + +/// This function adds registers Filler defines to MBB's live-in register list. +static void addLiveInRegs(Iter Filler, MachineBasicBlock &MBB) { + for (unsigned I = 0, E = Filler->getNumOperands(); I != E; ++I) { + const MachineOperand &MO = Filler->getOperand(I); + unsigned R; + + if (!MO.isReg() || !MO.isDef() || !(R = MO.getReg())) + continue; + +#ifndef NDEBUG + const MachineFunction &MF = *MBB.getParent(); + assert(MF.getTarget().getRegisterInfo()->getAllocatableSet(MF).test(R) && + "Shouldn't move an instruction with unallocatable registers across " + "basic block boundaries."); +#endif + + if (!MBB.isLiveIn(R)) + MBB.addLiveIn(R); + } +} + RegDefsUses::RegDefsUses(TargetMachine &TM) : TRI(*TM.getRegisterInfo()), Defs(TRI.getNumRegs(), false), Uses(TRI.getNumRegs(), false) {} @@ -126,6 +310,45 @@ void RegDefsUses::init(const MachineInstr &MI) { } } +void RegDefsUses::setCallerSaved(const MachineInstr &MI) { + assert(MI.isCall()); + + // If MI is a call, add all caller-saved registers to Defs. + BitVector CallerSavedRegs(TRI.getNumRegs(), true); + + CallerSavedRegs.reset(Mips::ZERO); + CallerSavedRegs.reset(Mips::ZERO_64); + + for (const MCPhysReg *R = TRI.getCalleeSavedRegs(); *R; ++R) + for (MCRegAliasIterator AI(*R, &TRI, true); AI.isValid(); ++AI) + CallerSavedRegs.reset(*AI); + + Defs |= CallerSavedRegs; +} + +void RegDefsUses::setUnallocatableRegs(const MachineFunction &MF) { + BitVector AllocSet = TRI.getAllocatableSet(MF); + + for (int R = AllocSet.find_first(); R != -1; R = AllocSet.find_next(R)) + for (MCRegAliasIterator AI(R, &TRI, false); AI.isValid(); ++AI) + AllocSet.set(*AI); + + AllocSet.set(Mips::ZERO); + AllocSet.set(Mips::ZERO_64); + + Defs |= AllocSet.flip(); +} + +void RegDefsUses::addLiveOut(const MachineBasicBlock &MBB, + const MachineBasicBlock &SuccBB) { + for (MachineBasicBlock::const_succ_iterator SI = MBB.succ_begin(), + SE = MBB.succ_end(); SI != SE; ++SI) + if (*SI != &SuccBB) + for (MachineBasicBlock::livein_iterator LI = (*SI)->livein_begin(), + LE = (*SI)->livein_end(); LI != LE; ++LI) + Uses.set(*LI); +} + bool RegDefsUses::update(const MachineInstr &MI, unsigned Begin, unsigned End) { BitVector NewDefs(TRI.getNumRegs()), NewUses(TRI.getNumRegs()); bool HasHazard = false; @@ -164,28 +387,134 @@ bool RegDefsUses::isRegInSet(const BitVector &RegSet, unsigned Reg) const { return false; } +bool InspectMemInstr::hasHazard(const MachineInstr &MI) { + if (!MI.mayStore() && !MI.mayLoad()) + return false; + + if (ForbidMemInstr) + return true; + + OrigSeenLoad = SeenLoad; + OrigSeenStore = SeenStore; + SeenLoad |= MI.mayLoad(); + SeenStore |= MI.mayStore(); + + // If MI is an ordered or volatile memory reference, disallow moving + // subsequent loads and stores to delay slot. + if (MI.hasOrderedMemoryRef() && (OrigSeenLoad || OrigSeenStore)) { + ForbidMemInstr = true; + return true; + } + + return hasHazard_(MI); +} + +bool LoadFromStackOrConst::hasHazard_(const MachineInstr &MI) { + if (MI.mayStore()) + return true; + + if (!MI.hasOneMemOperand() || !(*MI.memoperands_begin())->getValue()) + return true; + + const Value *V = (*MI.memoperands_begin())->getValue(); + + if (isa<FixedStackPseudoSourceValue>(V)) + return false; + + if (const PseudoSourceValue *PSV = dyn_cast<const PseudoSourceValue>(V)) + return !PSV->PseudoSourceValue::isConstant(0) && + (V != PseudoSourceValue::getStack()); + + return true; +} + +MemDefsUses::MemDefsUses(const MachineFrameInfo *MFI_) + : InspectMemInstr(false), MFI(MFI_), SeenNoObjLoad(false), + SeenNoObjStore(false) {} + +bool MemDefsUses::hasHazard_(const MachineInstr &MI) { + bool HasHazard = false; + SmallVector<const Value *, 4> Objs; + + // Check underlying object list. + if (getUnderlyingObjects(MI, Objs)) { + for (SmallVector<const Value *, 4>::const_iterator I = Objs.begin(); + I != Objs.end(); ++I) + HasHazard |= updateDefsUses(*I, MI.mayStore()); + + return HasHazard; + } + + // No underlying objects found. + HasHazard = MI.mayStore() && (OrigSeenLoad || OrigSeenStore); + HasHazard |= MI.mayLoad() || OrigSeenStore; + + SeenNoObjLoad |= MI.mayLoad(); + SeenNoObjStore |= MI.mayStore(); + + return HasHazard; +} + +bool MemDefsUses::updateDefsUses(const Value *V, bool MayStore) { + if (MayStore) + return !Defs.insert(V) || Uses.count(V) || SeenNoObjStore || SeenNoObjLoad; + + Uses.insert(V); + return Defs.count(V) || SeenNoObjStore; +} + +bool MemDefsUses:: +getUnderlyingObjects(const MachineInstr &MI, + SmallVectorImpl<const Value *> &Objects) const { + if (!MI.hasOneMemOperand() || !(*MI.memoperands_begin())->getValue()) + return false; + + const Value *V = (*MI.memoperands_begin())->getValue(); + + SmallVector<Value *, 4> Objs; + GetUnderlyingObjects(const_cast<Value *>(V), Objs); + + for (SmallVector<Value*, 4>::iterator I = Objs.begin(), E = Objs.end(); + I != E; ++I) { + if (const PseudoSourceValue *PSV = dyn_cast<PseudoSourceValue>(*I)) { + if (PSV->isAliased(MFI)) + return false; + } else if (!isIdentifiedObject(V)) + return false; + + Objects.push_back(*I); + } + + return true; +} + /// runOnMachineBasicBlock - Fill in delay slots for the given basic block. /// We assume there is only one delay slot per delayed instruction. bool Filler::runOnMachineBasicBlock(MachineBasicBlock &MBB) { bool Changed = false; for (Iter I = MBB.begin(); I != MBB.end(); ++I) { - if (!I->hasDelaySlot()) + if (!hasUnoccupiedSlot(&*I)) continue; ++FilledSlots; Changed = true; - Iter D; // Delay slot filling is disabled at -O0. - if (!DisableDelaySlotFiller && (TM.getOptLevel() != CodeGenOpt::None) && - findDelayInstr(MBB, I, D)) { - MBB.splice(llvm::next(I), &MBB, D); - ++UsefulSlots; - } else - BuildMI(MBB, llvm::next(I), I->getDebugLoc(), TII->get(Mips::NOP)); + if (!DisableDelaySlotFiller && (TM.getOptLevel() != CodeGenOpt::None)) { + if (searchBackward(MBB, I)) + continue; + + if (I->isTerminator()) { + if (searchSuccBBs(MBB, I)) + continue; + } else if (searchForward(MBB, I)) { + continue; + } + } - // Bundle the delay slot filler to the instruction with the delay slot. + // Bundle the NOP to the instruction with the delay slot. + BuildMI(MBB, llvm::next(I), I->getDebugLoc(), TII->get(Mips::NOP)); MIBundleBuilder(MBB, I, llvm::next(llvm::next(I))); } @@ -198,16 +527,11 @@ FunctionPass *llvm::createMipsDelaySlotFillerPass(MipsTargetMachine &tm) { return new Filler(tm); } -bool Filler::findDelayInstr(MachineBasicBlock &MBB, Iter Slot, - Iter &Filler) const { - RegDefsUses RegDU(TM); - - RegDU.init(*Slot); - - bool SawLoad = false; - bool SawStore = false; - - for (ReverseIter I(Slot); I != MBB.rend(); ++I) { +template<typename IterTy> +bool Filler::searchRange(MachineBasicBlock &MBB, IterTy Begin, IterTy End, + RegDefsUses &RegDU, InspectMemInstr& IM, + IterTy &Filler) const { + for (IterTy I = Begin; I != End; ++I) { // skip debug value if (I->isDebugValue()) continue; @@ -215,33 +539,176 @@ bool Filler::findDelayInstr(MachineBasicBlock &MBB, Iter Slot, if (terminateSearch(*I)) break; - if (delayHasHazard(*I, SawLoad, SawStore, RegDU)) + assert((!I->isCall() && !I->isReturn() && !I->isBranch()) && + "Cannot put calls, returns or branches in delay slot."); + + if (delayHasHazard(*I, RegDU, IM)) continue; - Filler = llvm::next(I).base(); + Filler = I; return true; } return false; } -bool Filler::delayHasHazard(const MachineInstr &Candidate, bool &SawLoad, - bool &SawStore, RegDefsUses &RegDU) const { - bool HasHazard = (Candidate.isImplicitDef() || Candidate.isKill()); +bool Filler::searchBackward(MachineBasicBlock &MBB, Iter Slot) const { + if (DisableBackwardSearch) + return false; + + RegDefsUses RegDU(TM); + MemDefsUses MemDU(MBB.getParent()->getFrameInfo()); + ReverseIter Filler; + + RegDU.init(*Slot); - // Loads or stores cannot be moved past a store to the delay slot - // and stores cannot be moved past a load. - if (Candidate.mayStore() || Candidate.hasOrderedMemoryRef()) { - HasHazard |= SawStore | SawLoad; - SawStore = true; - } else if (Candidate.mayLoad()) { - HasHazard |= SawStore; - SawLoad = true; + if (searchRange(MBB, ReverseIter(Slot), MBB.rend(), RegDU, MemDU, Filler)) { + MBB.splice(llvm::next(Slot), &MBB, llvm::next(Filler).base()); + MIBundleBuilder(MBB, Slot, llvm::next(llvm::next(Slot))); + ++UsefulSlots; + return true; } - assert((!Candidate.isCall() && !Candidate.isReturn()) && - "Cannot put calls or returns in delay slot."); + return false; +} + +bool Filler::searchForward(MachineBasicBlock &MBB, Iter Slot) const { + // Can handle only calls. + if (DisableForwardSearch || !Slot->isCall()) + return false; + + RegDefsUses RegDU(TM); + NoMemInstr NM; + Iter Filler; + + RegDU.setCallerSaved(*Slot); + + if (searchRange(MBB, llvm::next(Slot), MBB.end(), RegDU, NM, Filler)) { + MBB.splice(llvm::next(Slot), &MBB, Filler); + MIBundleBuilder(MBB, Slot, llvm::next(llvm::next(Slot))); + ++UsefulSlots; + return true; + } + + return false; +} + +bool Filler::searchSuccBBs(MachineBasicBlock &MBB, Iter Slot) const { + if (DisableSuccBBSearch) + return false; + + MachineBasicBlock *SuccBB = selectSuccBB(MBB); + + if (!SuccBB) + return false; + + RegDefsUses RegDU(TM); + bool HasMultipleSuccs = false; + BB2BrMap BrMap; + OwningPtr<InspectMemInstr> IM; + Iter Filler; + + // Iterate over SuccBB's predecessor list. + for (MachineBasicBlock::pred_iterator PI = SuccBB->pred_begin(), + PE = SuccBB->pred_end(); PI != PE; ++PI) + if (!examinePred(**PI, *SuccBB, RegDU, HasMultipleSuccs, BrMap)) + return false; + + // Do not allow moving instructions which have unallocatable register operands + // across basic block boundaries. + RegDU.setUnallocatableRegs(*MBB.getParent()); + + // Only allow moving loads from stack or constants if any of the SuccBB's + // predecessors have multiple successors. + if (HasMultipleSuccs) { + IM.reset(new LoadFromStackOrConst()); + } else { + const MachineFrameInfo *MFI = MBB.getParent()->getFrameInfo(); + IM.reset(new MemDefsUses(MFI)); + } + + if (!searchRange(MBB, SuccBB->begin(), SuccBB->end(), RegDU, *IM, Filler)) + return false; + + insertDelayFiller(Filler, BrMap); + addLiveInRegs(Filler, *SuccBB); + Filler->eraseFromParent(); + + return true; +} + +MachineBasicBlock *Filler::selectSuccBB(MachineBasicBlock &B) const { + if (B.succ_empty()) + return NULL; + + // Select the successor with the larget edge weight. + CmpWeight Cmp(B, getAnalysis<MachineBranchProbabilityInfo>()); + MachineBasicBlock *S = *std::max_element(B.succ_begin(), B.succ_end(), Cmp); + return S->isLandingPad() ? NULL : S; +} + +std::pair<MipsInstrInfo::BranchType, MachineInstr *> +Filler::getBranch(MachineBasicBlock &MBB, const MachineBasicBlock &Dst) const { + const MipsInstrInfo *TII = + static_cast<const MipsInstrInfo*>(TM.getInstrInfo()); + MachineBasicBlock *TrueBB = 0, *FalseBB = 0; + SmallVector<MachineInstr*, 2> BranchInstrs; + SmallVector<MachineOperand, 2> Cond; + + MipsInstrInfo::BranchType R = + TII->AnalyzeBranch(MBB, TrueBB, FalseBB, Cond, false, BranchInstrs); + + if ((R == MipsInstrInfo::BT_None) || (R == MipsInstrInfo::BT_NoBranch)) + return std::make_pair(R, (MachineInstr*)NULL); + + if (R != MipsInstrInfo::BT_CondUncond) { + if (!hasUnoccupiedSlot(BranchInstrs[0])) + return std::make_pair(MipsInstrInfo::BT_None, (MachineInstr*)NULL); + + assert(((R != MipsInstrInfo::BT_Uncond) || (TrueBB == &Dst))); + + return std::make_pair(R, BranchInstrs[0]); + } + + assert((TrueBB == &Dst) || (FalseBB == &Dst)); + + // Examine the conditional branch. See if its slot is occupied. + if (hasUnoccupiedSlot(BranchInstrs[0])) + return std::make_pair(MipsInstrInfo::BT_Cond, BranchInstrs[0]); + + // If that fails, try the unconditional branch. + if (hasUnoccupiedSlot(BranchInstrs[1]) && (FalseBB == &Dst)) + return std::make_pair(MipsInstrInfo::BT_Uncond, BranchInstrs[1]); + + return std::make_pair(MipsInstrInfo::BT_None, (MachineInstr*)NULL); +} + +bool Filler::examinePred(MachineBasicBlock &Pred, const MachineBasicBlock &Succ, + RegDefsUses &RegDU, bool &HasMultipleSuccs, + BB2BrMap &BrMap) const { + std::pair<MipsInstrInfo::BranchType, MachineInstr *> P = + getBranch(Pred, Succ); + + // Return if either getBranch wasn't able to analyze the branches or there + // were no branches with unoccupied slots. + if (P.first == MipsInstrInfo::BT_None) + return false; + + if ((P.first != MipsInstrInfo::BT_Uncond) && + (P.first != MipsInstrInfo::BT_NoBranch)) { + HasMultipleSuccs = true; + RegDU.addLiveOut(Pred, Succ); + } + + BrMap[&Pred] = P.second; + return true; +} + +bool Filler::delayHasHazard(const MachineInstr &Candidate, RegDefsUses &RegDU, + InspectMemInstr &IM) const { + bool HasHazard = (Candidate.isImplicitDef() || Candidate.isKill()); + HasHazard |= IM.hasHazard(Candidate); HasHazard |= RegDU.update(Candidate, 0, Candidate.getNumOperands()); return HasHazard; diff --git a/lib/Target/Mips/MipsFrameLowering.h b/lib/Target/Mips/MipsFrameLowering.h index df52d92..14268d2 100644 --- a/lib/Target/Mips/MipsFrameLowering.h +++ b/lib/Target/Mips/MipsFrameLowering.h @@ -39,7 +39,7 @@ protected: uint64_t estimateStackSize(const MachineFunction &MF) const; }; -/// Create MipsInstrInfo objects. +/// Create MipsFrameLowering objects. const MipsFrameLowering *createMips16FrameLowering(const MipsSubtarget &ST); const MipsFrameLowering *createMipsSEFrameLowering(const MipsSubtarget &ST); diff --git a/lib/Target/Mips/MipsISelDAGToDAG.cpp b/lib/Target/Mips/MipsISelDAGToDAG.cpp index 78c74ef..77b08cb 100644 --- a/lib/Target/Mips/MipsISelDAGToDAG.cpp +++ b/lib/Target/Mips/MipsISelDAGToDAG.cpp @@ -12,19 +12,19 @@ //===----------------------------------------------------------------------===// #define DEBUG_TYPE "mips-isel" +#include "MipsISelDAGToDAG.h" +#include "Mips16ISelDAGToDAG.h" +#include "MipsSEISelDAGToDAG.h" #include "Mips.h" #include "MCTargetDesc/MipsBaseInfo.h" #include "MipsAnalyzeImmediate.h" #include "MipsMachineFunction.h" #include "MipsRegisterInfo.h" -#include "MipsSubtarget.h" -#include "MipsTargetMachine.h" #include "llvm/CodeGen/MachineConstantPool.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/CodeGen/SelectionDAGISel.h" #include "llvm/CodeGen/SelectionDAGNodes.h" #include "llvm/IR/GlobalValue.h" #include "llvm/IR/Instructions.h" @@ -45,270 +45,11 @@ using namespace llvm; // MipsDAGToDAGISel - MIPS specific code to select MIPS machine // instructions for SelectionDAG operations. //===----------------------------------------------------------------------===// -namespace { - -class MipsDAGToDAGISel : public SelectionDAGISel { - - /// TM - Keep a reference to MipsTargetMachine. - MipsTargetMachine &TM; - - /// Subtarget - Keep a pointer to the MipsSubtarget around so that we can - /// make the right decision when generating code for different targets. - const MipsSubtarget &Subtarget; - -public: - explicit MipsDAGToDAGISel(MipsTargetMachine &tm) : - SelectionDAGISel(tm), - TM(tm), Subtarget(tm.getSubtarget<MipsSubtarget>()) {} - - // Pass Name - virtual const char *getPassName() const { - return "MIPS DAG->DAG Pattern Instruction Selection"; - } - - virtual bool runOnMachineFunction(MachineFunction &MF); - -private: - // Include the pieces autogenerated from the target description. - #include "MipsGenDAGISel.inc" - - /// getTargetMachine - Return a reference to the TargetMachine, casted - /// to the target-specific type. - const MipsTargetMachine &getTargetMachine() { - return static_cast<const MipsTargetMachine &>(TM); - } - - /// getInstrInfo - Return a reference to the TargetInstrInfo, casted - /// to the target-specific type. - const MipsInstrInfo *getInstrInfo() { - return getTargetMachine().getInstrInfo(); - } - - SDNode *getGlobalBaseReg(); - - SDValue getMips16SPAliasReg(); - - void getMips16SPRefReg(SDNode *parent, SDValue &AliasReg); - - std::pair<SDNode*, SDNode*> SelectMULT(SDNode *N, unsigned Opc, DebugLoc dl, - EVT Ty, bool HasLo, bool HasHi); - - SDNode *Select(SDNode *N); - - // Complex Pattern. - /// (reg + imm). - bool selectAddrRegImm(SDValue Addr, SDValue &Base, SDValue &Offset) const; - - /// Fall back on this function if all else fails. - bool selectAddrDefault(SDValue Addr, SDValue &Base, SDValue &Offset) const; - - /// Match integer address pattern. - bool selectIntAddr(SDValue Addr, SDValue &Base, SDValue &Offset) const; - - bool SelectAddr16(SDNode *Parent, SDValue N, SDValue &Base, SDValue &Offset, - SDValue &Alias); - - // getImm - Return a target constant with the specified value. - inline SDValue getImm(const SDNode *Node, unsigned Imm) { - return CurDAG->getTargetConstant(Imm, Node->getValueType(0)); - } - - void ProcessFunctionAfterISel(MachineFunction &MF); - bool ReplaceUsesWithZeroReg(MachineRegisterInfo *MRI, const MachineInstr&); - void InitGlobalBaseReg(MachineFunction &MF); - void InitMips16SPAliasReg(MachineFunction &MF); - - virtual bool SelectInlineAsmMemoryOperand(const SDValue &Op, - char ConstraintCode, - std::vector<SDValue> &OutOps); -}; - -} - -// Insert instructions to initialize the global base register in the -// first MBB of the function. When the ABI is O32 and the relocation model is -// PIC, the necessary instructions are emitted later to prevent optimization -// passes from moving them. -void MipsDAGToDAGISel::InitGlobalBaseReg(MachineFunction &MF) { - MipsFunctionInfo *MipsFI = MF.getInfo<MipsFunctionInfo>(); - - if (!MipsFI->globalBaseRegSet()) - return; - - MachineBasicBlock &MBB = MF.front(); - MachineBasicBlock::iterator I = MBB.begin(); - MachineRegisterInfo &RegInfo = MF.getRegInfo(); - const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo(); - DebugLoc DL = I != MBB.end() ? I->getDebugLoc() : DebugLoc(); - unsigned V0, V1, V2, GlobalBaseReg = MipsFI->getGlobalBaseReg(); - const TargetRegisterClass *RC; - - if (Subtarget.isABI_N64()) - RC = (const TargetRegisterClass*)&Mips::CPU64RegsRegClass; - else if (Subtarget.inMips16Mode()) - RC = (const TargetRegisterClass*)&Mips::CPU16RegsRegClass; - else - RC = (const TargetRegisterClass*)&Mips::CPURegsRegClass; - - V0 = RegInfo.createVirtualRegister(RC); - V1 = RegInfo.createVirtualRegister(RC); - V2 = RegInfo.createVirtualRegister(RC); - - if (Subtarget.isABI_N64()) { - MF.getRegInfo().addLiveIn(Mips::T9_64); - MBB.addLiveIn(Mips::T9_64); - - // lui $v0, %hi(%neg(%gp_rel(fname))) - // daddu $v1, $v0, $t9 - // daddiu $globalbasereg, $v1, %lo(%neg(%gp_rel(fname))) - const GlobalValue *FName = MF.getFunction(); - BuildMI(MBB, I, DL, TII.get(Mips::LUi64), V0) - .addGlobalAddress(FName, 0, MipsII::MO_GPOFF_HI); - BuildMI(MBB, I, DL, TII.get(Mips::DADDu), V1).addReg(V0) - .addReg(Mips::T9_64); - BuildMI(MBB, I, DL, TII.get(Mips::DADDiu), GlobalBaseReg).addReg(V1) - .addGlobalAddress(FName, 0, MipsII::MO_GPOFF_LO); - return; - } - - if (Subtarget.inMips16Mode()) { - BuildMI(MBB, I, DL, TII.get(Mips::LiRxImmX16), V0) - .addExternalSymbol("_gp_disp", MipsII::MO_ABS_HI); - BuildMI(MBB, I, DL, TII.get(Mips::AddiuRxPcImmX16), V1) - .addExternalSymbol("_gp_disp", MipsII::MO_ABS_LO); - BuildMI(MBB, I, DL, TII.get(Mips::SllX16), V2).addReg(V0).addImm(16); - BuildMI(MBB, I, DL, TII.get(Mips::AdduRxRyRz16), GlobalBaseReg) - .addReg(V1).addReg(V2); - return; - } - - if (MF.getTarget().getRelocationModel() == Reloc::Static) { - // Set global register to __gnu_local_gp. - // - // lui $v0, %hi(__gnu_local_gp) - // addiu $globalbasereg, $v0, %lo(__gnu_local_gp) - BuildMI(MBB, I, DL, TII.get(Mips::LUi), V0) - .addExternalSymbol("__gnu_local_gp", MipsII::MO_ABS_HI); - BuildMI(MBB, I, DL, TII.get(Mips::ADDiu), GlobalBaseReg).addReg(V0) - .addExternalSymbol("__gnu_local_gp", MipsII::MO_ABS_LO); - return; - } - - MF.getRegInfo().addLiveIn(Mips::T9); - MBB.addLiveIn(Mips::T9); - - if (Subtarget.isABI_N32()) { - // lui $v0, %hi(%neg(%gp_rel(fname))) - // addu $v1, $v0, $t9 - // addiu $globalbasereg, $v1, %lo(%neg(%gp_rel(fname))) - const GlobalValue *FName = MF.getFunction(); - BuildMI(MBB, I, DL, TII.get(Mips::LUi), V0) - .addGlobalAddress(FName, 0, MipsII::MO_GPOFF_HI); - BuildMI(MBB, I, DL, TII.get(Mips::ADDu), V1).addReg(V0).addReg(Mips::T9); - BuildMI(MBB, I, DL, TII.get(Mips::ADDiu), GlobalBaseReg).addReg(V1) - .addGlobalAddress(FName, 0, MipsII::MO_GPOFF_LO); - return; - } - - assert(Subtarget.isABI_O32()); - - // For O32 ABI, the following instruction sequence is emitted to initialize - // the global base register: - // - // 0. lui $2, %hi(_gp_disp) - // 1. addiu $2, $2, %lo(_gp_disp) - // 2. addu $globalbasereg, $2, $t9 - // - // We emit only the last instruction here. - // - // GNU linker requires that the first two instructions appear at the beginning - // of a function and no instructions be inserted before or between them. - // The two instructions are emitted during lowering to MC layer in order to - // avoid any reordering. - // - // Register $2 (Mips::V0) is added to the list of live-in registers to ensure - // the value instruction 1 (addiu) defines is valid when instruction 2 (addu) - // reads it. - MF.getRegInfo().addLiveIn(Mips::V0); - MBB.addLiveIn(Mips::V0); - BuildMI(MBB, I, DL, TII.get(Mips::ADDu), GlobalBaseReg) - .addReg(Mips::V0).addReg(Mips::T9); -} - -// Insert instructions to initialize the Mips16 SP Alias register in the -// first MBB of the function. -// -void MipsDAGToDAGISel::InitMips16SPAliasReg(MachineFunction &MF) { - MipsFunctionInfo *MipsFI = MF.getInfo<MipsFunctionInfo>(); - - if (!MipsFI->mips16SPAliasRegSet()) - return; - - MachineBasicBlock &MBB = MF.front(); - MachineBasicBlock::iterator I = MBB.begin(); - const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo(); - DebugLoc DL = I != MBB.end() ? I->getDebugLoc() : DebugLoc(); - unsigned Mips16SPAliasReg = MipsFI->getMips16SPAliasReg(); - - BuildMI(MBB, I, DL, TII.get(Mips::MoveR3216), Mips16SPAliasReg) - .addReg(Mips::SP); -} - - -bool MipsDAGToDAGISel::ReplaceUsesWithZeroReg(MachineRegisterInfo *MRI, - const MachineInstr& MI) { - unsigned DstReg = 0, ZeroReg = 0; - - // Check if MI is "addiu $dst, $zero, 0" or "daddiu $dst, $zero, 0". - if ((MI.getOpcode() == Mips::ADDiu) && - (MI.getOperand(1).getReg() == Mips::ZERO) && - (MI.getOperand(2).getImm() == 0)) { - DstReg = MI.getOperand(0).getReg(); - ZeroReg = Mips::ZERO; - } else if ((MI.getOpcode() == Mips::DADDiu) && - (MI.getOperand(1).getReg() == Mips::ZERO_64) && - (MI.getOperand(2).getImm() == 0)) { - DstReg = MI.getOperand(0).getReg(); - ZeroReg = Mips::ZERO_64; - } - - if (!DstReg) - return false; - - // Replace uses with ZeroReg. - for (MachineRegisterInfo::use_iterator U = MRI->use_begin(DstReg), - E = MRI->use_end(); U != E;) { - MachineOperand &MO = U.getOperand(); - unsigned OpNo = U.getOperandNo(); - MachineInstr *MI = MO.getParent(); - ++U; - - // Do not replace if it is a phi's operand or is tied to def operand. - if (MI->isPHI() || MI->isRegTiedToDefOperand(OpNo) || MI->isPseudo()) - continue; - - MO.setReg(ZeroReg); - } - - return true; -} - -void MipsDAGToDAGISel::ProcessFunctionAfterISel(MachineFunction &MF) { - InitGlobalBaseReg(MF); - InitMips16SPAliasReg(MF); - - MachineRegisterInfo *MRI = &MF.getRegInfo(); - - for (MachineFunction::iterator MFI = MF.begin(), MFE = MF.end(); MFI != MFE; - ++MFI) - for (MachineBasicBlock::iterator I = MFI->begin(); I != MFI->end(); ++I) - ReplaceUsesWithZeroReg(MRI, *I); -} bool MipsDAGToDAGISel::runOnMachineFunction(MachineFunction &MF) { bool Ret = SelectionDAGISel::runOnMachineFunction(MF); - ProcessFunctionAfterISel(MF); + processFunctionAfterISel(MF); return Ret; } @@ -320,233 +61,36 @@ SDNode *MipsDAGToDAGISel::getGlobalBaseReg() { return CurDAG->getRegister(GlobalBaseReg, TLI.getPointerTy()).getNode(); } -/// getMips16SPAliasReg - Output the instructions required to put the -/// SP into a Mips16 accessible aliased register. -SDValue MipsDAGToDAGISel::getMips16SPAliasReg() { - unsigned Mips16SPAliasReg = - MF->getInfo<MipsFunctionInfo>()->getMips16SPAliasReg(); - return CurDAG->getRegister(Mips16SPAliasReg, TLI.getPointerTy()); -} - /// ComplexPattern used on MipsInstrInfo /// Used on Mips Load/Store instructions bool MipsDAGToDAGISel::selectAddrRegImm(SDValue Addr, SDValue &Base, SDValue &Offset) const { - EVT ValTy = Addr.getValueType(); - - // if Address is FI, get the TargetFrameIndex. - if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) { - Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), ValTy); - Offset = CurDAG->getTargetConstant(0, ValTy); - return true; - } - - // on PIC code Load GA - if (Addr.getOpcode() == MipsISD::Wrapper) { - Base = Addr.getOperand(0); - Offset = Addr.getOperand(1); - return true; - } - - if (TM.getRelocationModel() != Reloc::PIC_) { - if ((Addr.getOpcode() == ISD::TargetExternalSymbol || - Addr.getOpcode() == ISD::TargetGlobalAddress)) - return false; - } - - // Addresses of the form FI+const or FI|const - if (CurDAG->isBaseWithConstantOffset(Addr)) { - ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1)); - if (isInt<16>(CN->getSExtValue())) { - - // If the first operand is a FI, get the TargetFI Node - if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode> - (Addr.getOperand(0))) - Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), ValTy); - else - Base = Addr.getOperand(0); - - Offset = CurDAG->getTargetConstant(CN->getZExtValue(), ValTy); - return true; - } - } - - // Operand is a result from an ADD. - if (Addr.getOpcode() == ISD::ADD) { - // When loading from constant pools, load the lower address part in - // the instruction itself. Example, instead of: - // lui $2, %hi($CPI1_0) - // addiu $2, $2, %lo($CPI1_0) - // lwc1 $f0, 0($2) - // Generate: - // lui $2, %hi($CPI1_0) - // lwc1 $f0, %lo($CPI1_0)($2) - if (Addr.getOperand(1).getOpcode() == MipsISD::Lo || - Addr.getOperand(1).getOpcode() == MipsISD::GPRel) { - SDValue Opnd0 = Addr.getOperand(1).getOperand(0); - if (isa<ConstantPoolSDNode>(Opnd0) || isa<GlobalAddressSDNode>(Opnd0) || - isa<JumpTableSDNode>(Opnd0)) { - Base = Addr.getOperand(0); - Offset = Opnd0; - return true; - } - } - } - + llvm_unreachable("Unimplemented function."); return false; } bool MipsDAGToDAGISel::selectAddrDefault(SDValue Addr, SDValue &Base, SDValue &Offset) const { - Base = Addr; - Offset = CurDAG->getTargetConstant(0, Addr.getValueType()); - return true; + llvm_unreachable("Unimplemented function."); + return false; } bool MipsDAGToDAGISel::selectIntAddr(SDValue Addr, SDValue &Base, SDValue &Offset) const { - return selectAddrRegImm(Addr, Base, Offset) || - selectAddrDefault(Addr, Base, Offset); -} - -void MipsDAGToDAGISel::getMips16SPRefReg(SDNode *Parent, SDValue &AliasReg) { - SDValue AliasFPReg = CurDAG->getRegister(Mips::S0, TLI.getPointerTy()); - if (Parent) { - switch (Parent->getOpcode()) { - case ISD::LOAD: { - LoadSDNode *SD = dyn_cast<LoadSDNode>(Parent); - switch (SD->getMemoryVT().getSizeInBits()) { - case 8: - case 16: - AliasReg = TM.getFrameLowering()->hasFP(*MF)? - AliasFPReg: getMips16SPAliasReg(); - return; - } - break; - } - case ISD::STORE: { - StoreSDNode *SD = dyn_cast<StoreSDNode>(Parent); - switch (SD->getMemoryVT().getSizeInBits()) { - case 8: - case 16: - AliasReg = TM.getFrameLowering()->hasFP(*MF)? - AliasFPReg: getMips16SPAliasReg(); - return; - } - break; - } - } - } - AliasReg = CurDAG->getRegister(Mips::SP, TLI.getPointerTy()); - return; - -} -bool MipsDAGToDAGISel::SelectAddr16( - SDNode *Parent, SDValue Addr, SDValue &Base, SDValue &Offset, - SDValue &Alias) { - EVT ValTy = Addr.getValueType(); - - Alias = CurDAG->getTargetConstant(0, ValTy); - - // if Address is FI, get the TargetFrameIndex. - if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) { - Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), ValTy); - Offset = CurDAG->getTargetConstant(0, ValTy); - getMips16SPRefReg(Parent, Alias); - return true; - } - // on PIC code Load GA - if (Addr.getOpcode() == MipsISD::Wrapper) { - Base = Addr.getOperand(0); - Offset = Addr.getOperand(1); - return true; - } - if (TM.getRelocationModel() != Reloc::PIC_) { - if ((Addr.getOpcode() == ISD::TargetExternalSymbol || - Addr.getOpcode() == ISD::TargetGlobalAddress)) - return false; - } - // Addresses of the form FI+const or FI|const - if (CurDAG->isBaseWithConstantOffset(Addr)) { - ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1)); - if (isInt<16>(CN->getSExtValue())) { - - // If the first operand is a FI, get the TargetFI Node - if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode> - (Addr.getOperand(0))) { - Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), ValTy); - getMips16SPRefReg(Parent, Alias); - } - else - Base = Addr.getOperand(0); - - Offset = CurDAG->getTargetConstant(CN->getZExtValue(), ValTy); - return true; - } - } - // Operand is a result from an ADD. - if (Addr.getOpcode() == ISD::ADD) { - // When loading from constant pools, load the lower address part in - // the instruction itself. Example, instead of: - // lui $2, %hi($CPI1_0) - // addiu $2, $2, %lo($CPI1_0) - // lwc1 $f0, 0($2) - // Generate: - // lui $2, %hi($CPI1_0) - // lwc1 $f0, %lo($CPI1_0)($2) - if (Addr.getOperand(1).getOpcode() == MipsISD::Lo || - Addr.getOperand(1).getOpcode() == MipsISD::GPRel) { - SDValue Opnd0 = Addr.getOperand(1).getOperand(0); - if (isa<ConstantPoolSDNode>(Opnd0) || isa<GlobalAddressSDNode>(Opnd0) || - isa<JumpTableSDNode>(Opnd0)) { - Base = Addr.getOperand(0); - Offset = Opnd0; - return true; - } - } - - // If an indexed floating point load/store can be emitted, return false. - const LSBaseSDNode *LS = dyn_cast<LSBaseSDNode>(Parent); - - if (LS && - (LS->getMemoryVT() == MVT::f32 || LS->getMemoryVT() == MVT::f64) && - Subtarget.hasFPIdx()) - return false; - } - Base = Addr; - Offset = CurDAG->getTargetConstant(0, ValTy); - return true; + llvm_unreachable("Unimplemented function."); + return false; } -/// Select multiply instructions. -std::pair<SDNode*, SDNode*> -MipsDAGToDAGISel::SelectMULT(SDNode *N, unsigned Opc, DebugLoc dl, EVT Ty, - bool HasLo, bool HasHi) { - SDNode *Lo = 0, *Hi = 0; - SDNode *Mul = CurDAG->getMachineNode(Opc, dl, MVT::Glue, N->getOperand(0), - N->getOperand(1)); - SDValue InFlag = SDValue(Mul, 0); - - if (HasLo) { - unsigned Opcode = Subtarget.inMips16Mode() ? Mips::Mflo16 : - (Ty == MVT::i32 ? Mips::MFLO : Mips::MFLO64); - Lo = CurDAG->getMachineNode(Opcode, dl, Ty, MVT::Glue, InFlag); - InFlag = SDValue(Lo, 1); - } - if (HasHi) { - unsigned Opcode = Subtarget.inMips16Mode() ? Mips::Mfhi16 : - (Ty == MVT::i32 ? Mips::MFHI : Mips::MFHI64); - Hi = CurDAG->getMachineNode(Opcode, dl, Ty, InFlag); - } - return std::make_pair(Lo, Hi); +bool MipsDAGToDAGISel::selectAddr16(SDNode *Parent, SDValue N, SDValue &Base, + SDValue &Offset, SDValue &Alias) { + llvm_unreachable("Unimplemented function."); + return false; } - /// Select instructions not customized! Used for /// expanded, promoted and normal instructions SDNode* MipsDAGToDAGISel::Select(SDNode *Node) { unsigned Opcode = Node->getOpcode(); - DebugLoc dl = Node->getDebugLoc(); // Dump information about the Node being selected DEBUG(errs() << "Selecting: "; Node->dump(CurDAG); errs() << "\n"); @@ -557,167 +101,19 @@ SDNode* MipsDAGToDAGISel::Select(SDNode *Node) { return NULL; } - /// - // Instruction Selection not handled by the auto-generated - // tablegen selection should be handled here. - /// - EVT NodeTy = Node->getValueType(0); - unsigned MultOpc; + // See if subclasses can handle this node. + std::pair<bool, SDNode*> Ret = selectNode(Node); + + if (Ret.first) + return Ret.second; switch(Opcode) { default: break; - case ISD::SUBE: - case ISD::ADDE: { - bool inMips16Mode = Subtarget.inMips16Mode(); - SDValue InFlag = Node->getOperand(2), CmpLHS; - unsigned Opc = InFlag.getOpcode(); (void)Opc; - assert(((Opc == ISD::ADDC || Opc == ISD::ADDE) || - (Opc == ISD::SUBC || Opc == ISD::SUBE)) && - "(ADD|SUB)E flag operand must come from (ADD|SUB)C/E insn"); - - unsigned MOp; - if (Opcode == ISD::ADDE) { - CmpLHS = InFlag.getValue(0); - if (inMips16Mode) - MOp = Mips::AdduRxRyRz16; - else - MOp = Mips::ADDu; - } else { - CmpLHS = InFlag.getOperand(0); - if (inMips16Mode) - MOp = Mips::SubuRxRyRz16; - else - MOp = Mips::SUBu; - } - - SDValue Ops[] = { CmpLHS, InFlag.getOperand(1) }; - - SDValue LHS = Node->getOperand(0); - SDValue RHS = Node->getOperand(1); - - EVT VT = LHS.getValueType(); - - unsigned Sltu_op = inMips16Mode? Mips::SltuRxRyRz16: Mips::SLTu; - SDNode *Carry = CurDAG->getMachineNode(Sltu_op, dl, VT, Ops, 2); - unsigned Addu_op = inMips16Mode? Mips::AdduRxRyRz16 : Mips::ADDu; - SDNode *AddCarry = CurDAG->getMachineNode(Addu_op, dl, VT, - SDValue(Carry,0), RHS); - - return CurDAG->SelectNodeTo(Node, MOp, VT, MVT::Glue, - LHS, SDValue(AddCarry,0)); - } - - /// Mul with two results - case ISD::SMUL_LOHI: - case ISD::UMUL_LOHI: { - if (NodeTy == MVT::i32) { - if (Subtarget.inMips16Mode()) - MultOpc = (Opcode == ISD::UMUL_LOHI ? Mips::MultuRxRy16 : - Mips::MultRxRy16); - else - MultOpc = (Opcode == ISD::UMUL_LOHI ? Mips::MULTu : Mips::MULT); - } - else - MultOpc = (Opcode == ISD::UMUL_LOHI ? Mips::DMULTu : Mips::DMULT); - - std::pair<SDNode*, SDNode*> LoHi = SelectMULT(Node, MultOpc, dl, NodeTy, - true, true); - - if (!SDValue(Node, 0).use_empty()) - ReplaceUses(SDValue(Node, 0), SDValue(LoHi.first, 0)); - - if (!SDValue(Node, 1).use_empty()) - ReplaceUses(SDValue(Node, 1), SDValue(LoHi.second, 0)); - - return NULL; - } - - /// Special Muls - case ISD::MUL: { - // Mips32 has a 32-bit three operand mul instruction. - if (Subtarget.hasMips32() && NodeTy == MVT::i32) - break; - return SelectMULT(Node, NodeTy == MVT::i32 ? Mips::MULT : Mips::DMULT, - dl, NodeTy, true, false).first; - } - case ISD::MULHS: - case ISD::MULHU: { - if (NodeTy == MVT::i32) { - if (Subtarget.inMips16Mode()) - MultOpc = (Opcode == ISD::MULHU ? - Mips::MultuRxRy16 : Mips::MultRxRy16); - else - MultOpc = (Opcode == ISD::MULHU ? Mips::MULTu : Mips::MULT); - } - else - MultOpc = (Opcode == ISD::MULHU ? Mips::DMULTu : Mips::DMULT); - - return SelectMULT(Node, MultOpc, dl, NodeTy, false, true).second; - } - // Get target GOT address. case ISD::GLOBAL_OFFSET_TABLE: return getGlobalBaseReg(); - case ISD::ConstantFP: { - ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(Node); - if (Node->getValueType(0) == MVT::f64 && CN->isExactlyValue(+0.0)) { - if (Subtarget.hasMips64()) { - SDValue Zero = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl, - Mips::ZERO_64, MVT::i64); - return CurDAG->getMachineNode(Mips::DMTC1, dl, MVT::f64, Zero); - } - - SDValue Zero = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl, - Mips::ZERO, MVT::i32); - return CurDAG->getMachineNode(Mips::BuildPairF64, dl, MVT::f64, Zero, - Zero); - } - break; - } - - case ISD::Constant: { - const ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Node); - unsigned Size = CN->getValueSizeInBits(0); - - if (Size == 32) - break; - - MipsAnalyzeImmediate AnalyzeImm; - int64_t Imm = CN->getSExtValue(); - - const MipsAnalyzeImmediate::InstSeq &Seq = - AnalyzeImm.Analyze(Imm, Size, false); - - MipsAnalyzeImmediate::InstSeq::const_iterator Inst = Seq.begin(); - DebugLoc DL = CN->getDebugLoc(); - SDNode *RegOpnd; - SDValue ImmOpnd = CurDAG->getTargetConstant(SignExtend64<16>(Inst->ImmOpnd), - MVT::i64); - - // The first instruction can be a LUi which is different from other - // instructions (ADDiu, ORI and SLL) in that it does not have a register - // operand. - if (Inst->Opc == Mips::LUi64) - RegOpnd = CurDAG->getMachineNode(Inst->Opc, DL, MVT::i64, ImmOpnd); - else - RegOpnd = - CurDAG->getMachineNode(Inst->Opc, DL, MVT::i64, - CurDAG->getRegister(Mips::ZERO_64, MVT::i64), - ImmOpnd); - - // The remaining instructions in the sequence are handled here. - for (++Inst; Inst != Seq.end(); ++Inst) { - ImmOpnd = CurDAG->getTargetConstant(SignExtend64<16>(Inst->ImmOpnd), - MVT::i64); - RegOpnd = CurDAG->getMachineNode(Inst->Opc, DL, MVT::i64, - SDValue(RegOpnd, 0), ImmOpnd); - } - - return RegOpnd; - } - #ifndef NDEBUG case ISD::LOAD: case ISD::STORE: @@ -726,31 +122,6 @@ SDNode* MipsDAGToDAGISel::Select(SDNode *Node) { "Unexpected unaligned loads/stores."); break; #endif - - case MipsISD::ThreadPointer: { - EVT PtrVT = TLI.getPointerTy(); - unsigned RdhwrOpc, SrcReg, DestReg; - - if (PtrVT == MVT::i32) { - RdhwrOpc = Mips::RDHWR; - SrcReg = Mips::HWR29; - DestReg = Mips::V1; - } else { - RdhwrOpc = Mips::RDHWR64; - SrcReg = Mips::HWR29_64; - DestReg = Mips::V1_64; - } - - SDNode *Rdhwr = - CurDAG->getMachineNode(RdhwrOpc, Node->getDebugLoc(), - Node->getValueType(0), - CurDAG->getRegister(SrcReg, PtrVT)); - SDValue Chain = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, DestReg, - SDValue(Rdhwr, 0)); - SDValue ResNode = CurDAG->getCopyFromReg(Chain, dl, DestReg, PtrVT); - ReplaceUses(SDValue(Node, 0), ResNode); - return ResNode.getNode(); - } } // Select the default instruction @@ -776,5 +147,8 @@ SelectInlineAsmMemoryOperand(const SDValue &Op, char ConstraintCode, /// createMipsISelDag - This pass converts a legalized DAG into a /// MIPS-specific DAG, ready for instruction scheduling. FunctionPass *llvm::createMipsISelDag(MipsTargetMachine &TM) { - return new MipsDAGToDAGISel(TM); + if (TM.getSubtargetImpl()->inMips16Mode()) + return llvm::createMips16ISelDag(TM); + + return llvm::createMipsSEISelDag(TM); } diff --git a/lib/Target/Mips/MipsISelDAGToDAG.h b/lib/Target/Mips/MipsISelDAGToDAG.h new file mode 100644 index 0000000..cf0f9c5 --- /dev/null +++ b/lib/Target/Mips/MipsISelDAGToDAG.h @@ -0,0 +1,93 @@ +//===---- MipsISelDAGToDAG.h - A Dag to Dag Inst Selector for Mips --------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines an instruction selector for the MIPS target. +// +//===----------------------------------------------------------------------===// + +#ifndef MIPSISELDAGTODAG_H +#define MIPSISELDAGTODAG_H + +#include "Mips.h" +#include "MipsSubtarget.h" +#include "MipsTargetMachine.h" +#include "llvm/CodeGen/SelectionDAGISel.h" + +//===----------------------------------------------------------------------===// +// Instruction Selector Implementation +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// MipsDAGToDAGISel - MIPS specific code to select MIPS machine +// instructions for SelectionDAG operations. +//===----------------------------------------------------------------------===// +namespace llvm { + +class MipsDAGToDAGISel : public SelectionDAGISel { +public: + explicit MipsDAGToDAGISel(MipsTargetMachine &TM) + : SelectionDAGISel(TM), Subtarget(TM.getSubtarget<MipsSubtarget>()) {} + + // Pass Name + virtual const char *getPassName() const { + return "MIPS DAG->DAG Pattern Instruction Selection"; + } + + virtual bool runOnMachineFunction(MachineFunction &MF); + +protected: + SDNode *getGlobalBaseReg(); + + /// Keep a pointer to the MipsSubtarget around so that we can make the right + /// decision when generating code for different targets. + const MipsSubtarget &Subtarget; + +private: + // Include the pieces autogenerated from the target description. + #include "MipsGenDAGISel.inc" + + // Complex Pattern. + /// (reg + imm). + virtual bool selectAddrRegImm(SDValue Addr, SDValue &Base, + SDValue &Offset) const; + + /// Fall back on this function if all else fails. + virtual bool selectAddrDefault(SDValue Addr, SDValue &Base, + SDValue &Offset) const; + + /// Match integer address pattern. + virtual bool selectIntAddr(SDValue Addr, SDValue &Base, + SDValue &Offset) const; + + virtual bool selectAddr16(SDNode *Parent, SDValue N, SDValue &Base, + SDValue &Offset, SDValue &Alias); + + virtual SDNode *Select(SDNode *N); + + virtual std::pair<bool, SDNode*> selectNode(SDNode *Node) = 0; + + // getImm - Return a target constant with the specified value. + inline SDValue getImm(const SDNode *Node, uint64_t Imm) { + return CurDAG->getTargetConstant(Imm, Node->getValueType(0)); + } + + virtual void processFunctionAfterISel(MachineFunction &MF) = 0; + + virtual bool SelectInlineAsmMemoryOperand(const SDValue &Op, + char ConstraintCode, + std::vector<SDValue> &OutOps); +}; + +/// createMipsISelDag - This pass converts a legalized DAG into a +/// MIPS-specific DAG, ready for instruction scheduling. +FunctionPass *createMipsISelDag(MipsTargetMachine &TM); + +} + +#endif diff --git a/lib/Target/Mips/MipsISelLowering.cpp b/lib/Target/Mips/MipsISelLowering.cpp index 36e1a15..4bf43f4 100644 --- a/lib/Target/Mips/MipsISelLowering.cpp +++ b/lib/Target/Mips/MipsISelLowering.cpp @@ -12,7 +12,6 @@ // //===----------------------------------------------------------------------===// #define DEBUG_TYPE "mips-lower" -#include <set> #include "MipsISelLowering.h" #include "InstPrinter/MipsInstPrinter.h" #include "MCTargetDesc/MipsBaseInfo.h" @@ -30,7 +29,6 @@ #include "llvm/CodeGen/ValueTypes.h" #include "llvm/IR/CallingConv.h" #include "llvm/IR/DerivedTypes.h" -#include "llvm/IR/Function.h" #include "llvm/IR/GlobalVariable.h" #include "llvm/IR/Intrinsics.h" #include "llvm/Support/CommandLine.h" @@ -43,26 +41,9 @@ using namespace llvm; STATISTIC(NumTailCalls, "Number of tail calls"); static cl::opt<bool> -EnableMipsTailCalls("enable-mips-tail-calls", cl::Hidden, - cl::desc("MIPS: Enable tail calls."), cl::init(false)); - -static cl::opt<bool> LargeGOT("mxgot", cl::Hidden, cl::desc("MIPS: Enable GOT larger than 64k."), cl::init(false)); -static cl::opt<bool> -Mips16HardFloat("mips16-hard-float", cl::NotHidden, - cl::desc("MIPS: mips16 hard float enable."), - cl::init(false)); - -static cl::opt<bool> DontExpandCondPseudos16( - "mips16-dont-expand-cond-pseudo", - cl::init(false), - cl::desc("Dont expand conditional move related " - "pseudos for Mips 16"), - cl::Hidden); - - static const uint16_t O32IntRegs[4] = { Mips::A0, Mips::A1, Mips::A2, Mips::A3 }; @@ -80,7 +61,7 @@ static const uint16_t Mips64DPRegs[8] = { // If I is a shifted mask, set the size (Size) and the first bit of the // mask (Pos), and return true. // For example, if I is 0x003ff800, (Pos, Size) = (11, 11). -static bool IsShiftedMask(uint64_t I, uint64_t &Pos, uint64_t &Size) { +static bool isShiftedMask(uint64_t I, uint64_t &Pos, uint64_t &Size) { if (!isShiftedMask_64(I)) return false; @@ -89,7 +70,7 @@ static bool IsShiftedMask(uint64_t I, uint64_t &Pos, uint64_t &Size) { return true; } -static SDValue GetGlobalReg(SelectionDAG &DAG, EVT Ty) { +SDValue MipsTargetLowering::getGlobalReg(SelectionDAG &DAG, EVT Ty) const { MipsFunctionInfo *FI = DAG.getMachineFunction().getInfo<MipsFunctionInfo>(); return DAG.getRegister(FI->getGlobalBaseReg(), Ty); } @@ -124,11 +105,12 @@ static SDValue getAddrNonPIC(SDValue Op, SelectionDAG &DAG) { DAG.getNode(MipsISD::Lo, DL, Ty, Lo)); } -static SDValue getAddrLocal(SDValue Op, SelectionDAG &DAG, bool HasMips64) { +SDValue MipsTargetLowering::getAddrLocal(SDValue Op, SelectionDAG &DAG, + bool HasMips64) const { DebugLoc DL = Op.getDebugLoc(); EVT Ty = Op.getValueType(); unsigned GOTFlag = HasMips64 ? MipsII::MO_GOT_PAGE : MipsII::MO_GOT; - SDValue GOT = DAG.getNode(MipsISD::Wrapper, DL, Ty, GetGlobalReg(DAG, Ty), + SDValue GOT = DAG.getNode(MipsISD::Wrapper, DL, Ty, getGlobalReg(DAG, Ty), getTargetNode(Op, DAG, GOTFlag)); SDValue Load = DAG.getLoad(Ty, DL, DAG.getEntryNode(), GOT, MachinePointerInfo::getGOT(), false, false, false, @@ -138,21 +120,23 @@ static SDValue getAddrLocal(SDValue Op, SelectionDAG &DAG, bool HasMips64) { return DAG.getNode(ISD::ADD, DL, Ty, Load, Lo); } -static SDValue getAddrGlobal(SDValue Op, SelectionDAG &DAG, unsigned Flag) { +SDValue MipsTargetLowering::getAddrGlobal(SDValue Op, SelectionDAG &DAG, + unsigned Flag) const { DebugLoc DL = Op.getDebugLoc(); EVT Ty = Op.getValueType(); - SDValue Tgt = DAG.getNode(MipsISD::Wrapper, DL, Ty, GetGlobalReg(DAG, Ty), + SDValue Tgt = DAG.getNode(MipsISD::Wrapper, DL, Ty, getGlobalReg(DAG, Ty), getTargetNode(Op, DAG, Flag)); return DAG.getLoad(Ty, DL, DAG.getEntryNode(), Tgt, MachinePointerInfo::getGOT(), false, false, false, 0); } -static SDValue getAddrGlobalLargeGOT(SDValue Op, SelectionDAG &DAG, - unsigned HiFlag, unsigned LoFlag) { +SDValue MipsTargetLowering::getAddrGlobalLargeGOT(SDValue Op, SelectionDAG &DAG, + unsigned HiFlag, + unsigned LoFlag) const { DebugLoc DL = Op.getDebugLoc(); EVT Ty = Op.getValueType(); SDValue Hi = DAG.getNode(MipsISD::Hi, DL, Ty, getTargetNode(Op, DAG, HiFlag)); - Hi = DAG.getNode(ISD::ADD, DL, Ty, Hi, GetGlobalReg(DAG, Ty)); + Hi = DAG.getNode(ISD::ADD, DL, Ty, Hi, getGlobalReg(DAG, Ty)); SDValue Wrapper = DAG.getNode(MipsISD::Wrapper, DL, Ty, Hi, getTargetNode(Op, DAG, LoFlag)); return DAG.getLoad(Ty, DL, DAG.getEntryNode(), Wrapper, @@ -204,7 +188,7 @@ const char *MipsTargetLowering::getTargetNodeName(unsigned Opcode) const { case MipsISD::MTHLIP: return "MipsISD::MTHLIP"; case MipsISD::MULT: return "MipsISD::MULT"; case MipsISD::MULTU: return "MipsISD::MULTU"; - case MipsISD::MADD_DSP: return "MipsISD::MADD_DSPDSP"; + case MipsISD::MADD_DSP: return "MipsISD::MADD_DSP"; case MipsISD::MADDU_DSP: return "MipsISD::MADDU_DSP"; case MipsISD::MSUB_DSP: return "MipsISD::MSUB_DSP"; case MipsISD::MSUBU_DSP: return "MipsISD::MSUBU_DSP"; @@ -212,110 +196,17 @@ const char *MipsTargetLowering::getTargetNodeName(unsigned Opcode) const { } } -namespace { - struct ltstr { - bool operator()(const char *s1, const char *s2) const - { - return strcmp(s1, s2) < 0; - } - }; - - std::set<const char*, ltstr> noHelperNeeded; -} - -void MipsTargetLowering::SetMips16LibcallName - (RTLIB::Libcall l, const char *Name) { - setLibcallName(l, Name); - noHelperNeeded.insert(Name); -} - -void MipsTargetLowering::setMips16HardFloatLibCalls() { - SetMips16LibcallName(RTLIB::ADD_F32, "__mips16_addsf3"); - SetMips16LibcallName(RTLIB::ADD_F64, "__mips16_adddf3"); - SetMips16LibcallName(RTLIB::SUB_F32, "__mips16_subsf3"); - SetMips16LibcallName(RTLIB::SUB_F64, "__mips16_subdf3"); - SetMips16LibcallName(RTLIB::MUL_F32, "__mips16_mulsf3"); - SetMips16LibcallName(RTLIB::MUL_F64, "__mips16_muldf3"); - SetMips16LibcallName(RTLIB::DIV_F32, "__mips16_divsf3"); - SetMips16LibcallName(RTLIB::DIV_F64, "__mips16_divdf3"); - SetMips16LibcallName(RTLIB::FPEXT_F32_F64, "__mips16_extendsfdf2"); - SetMips16LibcallName(RTLIB::FPROUND_F64_F32, "__mips16_truncdfsf2"); - SetMips16LibcallName(RTLIB::FPTOSINT_F32_I32, "__mips16_fix_truncsfsi"); - SetMips16LibcallName(RTLIB::FPTOSINT_F64_I32, "__mips16_fix_truncdfsi"); - SetMips16LibcallName(RTLIB::SINTTOFP_I32_F32, "__mips16_floatsisf"); - SetMips16LibcallName(RTLIB::SINTTOFP_I32_F64, "__mips16_floatsidf"); - SetMips16LibcallName(RTLIB::UINTTOFP_I32_F32, "__mips16_floatunsisf"); - SetMips16LibcallName(RTLIB::UINTTOFP_I32_F64, "__mips16_floatunsidf"); - SetMips16LibcallName(RTLIB::OEQ_F32, "__mips16_eqsf2"); - SetMips16LibcallName(RTLIB::OEQ_F64, "__mips16_eqdf2"); - SetMips16LibcallName(RTLIB::UNE_F32, "__mips16_nesf2"); - SetMips16LibcallName(RTLIB::UNE_F64, "__mips16_nedf2"); - SetMips16LibcallName(RTLIB::OGE_F32, "__mips16_gesf2"); - SetMips16LibcallName(RTLIB::OGE_F64, "__mips16_gedf2"); - SetMips16LibcallName(RTLIB::OLT_F32, "__mips16_ltsf2"); - SetMips16LibcallName(RTLIB::OLT_F64, "__mips16_ltdf2"); - SetMips16LibcallName(RTLIB::OLE_F32, "__mips16_lesf2"); - SetMips16LibcallName(RTLIB::OLE_F64, "__mips16_ledf2"); - SetMips16LibcallName(RTLIB::OGT_F32, "__mips16_gtsf2"); - SetMips16LibcallName(RTLIB::OGT_F64, "__mips16_gtdf2"); - SetMips16LibcallName(RTLIB::UO_F32, "__mips16_unordsf2"); - SetMips16LibcallName(RTLIB::UO_F64, "__mips16_unorddf2"); - SetMips16LibcallName(RTLIB::O_F32, "__mips16_unordsf2"); - SetMips16LibcallName(RTLIB::O_F64, "__mips16_unorddf2"); -} - MipsTargetLowering:: MipsTargetLowering(MipsTargetMachine &TM) : TargetLowering(TM, new MipsTargetObjectFile()), Subtarget(&TM.getSubtarget<MipsSubtarget>()), HasMips64(Subtarget->hasMips64()), IsN64(Subtarget->isABI_N64()), IsO32(Subtarget->isABI_O32()) { - // Mips does not have i1 type, so use i32 for // setcc operations results (slt, sgt, ...). setBooleanContents(ZeroOrOneBooleanContent); setBooleanVectorContents(ZeroOrOneBooleanContent); // FIXME: Is this correct? - // Set up the register classes - addRegisterClass(MVT::i32, &Mips::CPURegsRegClass); - - if (HasMips64) - addRegisterClass(MVT::i64, &Mips::CPU64RegsRegClass); - - if (Subtarget->inMips16Mode()) { - addRegisterClass(MVT::i32, &Mips::CPU16RegsRegClass); - if (Mips16HardFloat) - setMips16HardFloatLibCalls(); - } - - if (Subtarget->hasDSP()) { - MVT::SimpleValueType VecTys[2] = {MVT::v2i16, MVT::v4i8}; - - for (unsigned i = 0; i < array_lengthof(VecTys); ++i) { - addRegisterClass(VecTys[i], &Mips::DSPRegsRegClass); - - // Expand all builtin opcodes. - for (unsigned Opc = 0; Opc < ISD::BUILTIN_OP_END; ++Opc) - setOperationAction(Opc, VecTys[i], Expand); - - setOperationAction(ISD::LOAD, VecTys[i], Legal); - setOperationAction(ISD::STORE, VecTys[i], Legal); - setOperationAction(ISD::BITCAST, VecTys[i], Legal); - } - } - - if (!TM.Options.UseSoftFloat) { - addRegisterClass(MVT::f32, &Mips::FGR32RegClass); - - // When dealing with single precision only, use libcalls - if (!Subtarget->isSingleFloat()) { - if (HasMips64) - addRegisterClass(MVT::f64, &Mips::FGR64RegClass); - else - addRegisterClass(MVT::f64, &Mips::AFGR64RegClass); - } - } - // Load extented operations for i1 types must be promoted setLoadExtAction(ISD::EXTLOAD, MVT::i1, Promote); setLoadExtAction(ISD::ZEXTLOAD, MVT::i1, Promote); @@ -332,6 +223,7 @@ MipsTargetLowering(MipsTargetMachine &TM) AddPromotedToType(ISD::SETCC, MVT::i1, MVT::i32); // Mips Custom Operations + setOperationAction(ISD::BR_JT, MVT::Other, Custom); setOperationAction(ISD::GlobalAddress, MVT::i32, Custom); setOperationAction(ISD::BlockAddress, MVT::i32, Custom); setOperationAction(ISD::GlobalTLSAddress, MVT::i32, Custom); @@ -348,18 +240,6 @@ MipsTargetLowering(MipsTargetMachine &TM) setOperationAction(ISD::VASTART, MVT::Other, Custom); setOperationAction(ISD::FCOPYSIGN, MVT::f32, Custom); setOperationAction(ISD::FCOPYSIGN, MVT::f64, Custom); - if (Subtarget->inMips16Mode()) { - setOperationAction(ISD::MEMBARRIER, MVT::Other, Expand); - setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Expand); - } - else { - setOperationAction(ISD::MEMBARRIER, MVT::Other, Custom); - setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Custom); - } - if (!Subtarget->inMips16Mode()) { - setOperationAction(ISD::LOAD, MVT::i32, Custom); - setOperationAction(ISD::STORE, MVT::i32, Custom); - } if (!TM.Options.NoNaNsFPMath) { setOperationAction(ISD::FABS, MVT::f32, Custom); @@ -397,8 +277,10 @@ MipsTargetLowering(MipsTargetMachine &TM) setOperationAction(ISD::UREM, MVT::i64, Expand); // Operations not directly supported by Mips. - setOperationAction(ISD::BR_JT, MVT::Other, Expand); - setOperationAction(ISD::BR_CC, MVT::Other, Expand); + setOperationAction(ISD::BR_CC, MVT::f32, Expand); + setOperationAction(ISD::BR_CC, MVT::f64, Expand); + setOperationAction(ISD::BR_CC, MVT::i32, Expand); + setOperationAction(ISD::BR_CC, MVT::i64, Expand); setOperationAction(ISD::SELECT_CC, MVT::Other, Expand); setOperationAction(ISD::UINT_TO_FP, MVT::i32, Expand); setOperationAction(ISD::UINT_TO_FP, MVT::i64, Expand); @@ -470,21 +352,6 @@ MipsTargetLowering(MipsTargetMachine &TM) setOperationAction(ISD::ATOMIC_STORE, MVT::i32, Expand); setOperationAction(ISD::ATOMIC_STORE, MVT::i64, Expand); - if (Subtarget->inMips16Mode()) { - setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i32, Expand); - setOperationAction(ISD::ATOMIC_SWAP, MVT::i32, Expand); - setOperationAction(ISD::ATOMIC_LOAD_ADD, MVT::i32, Expand); - setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i32, Expand); - setOperationAction(ISD::ATOMIC_LOAD_AND, MVT::i32, Expand); - setOperationAction(ISD::ATOMIC_LOAD_OR, MVT::i32, Expand); - setOperationAction(ISD::ATOMIC_LOAD_XOR, MVT::i32, Expand); - setOperationAction(ISD::ATOMIC_LOAD_NAND, MVT::i32, Expand); - setOperationAction(ISD::ATOMIC_LOAD_MIN, MVT::i32, Expand); - setOperationAction(ISD::ATOMIC_LOAD_MAX, MVT::i32, Expand); - setOperationAction(ISD::ATOMIC_LOAD_UMIN, MVT::i32, Expand); - setOperationAction(ISD::ATOMIC_LOAD_UMAX, MVT::i32, Expand); - } - setInsertFencesForAtomic(true); if (!Subtarget->hasSEInReg()) { @@ -521,7 +388,6 @@ MipsTargetLowering(MipsTargetMachine &TM) setMinFunctionAlignment(HasMips64 ? 3 : 2); setStackPointerRegisterToSaveRestore(IsN64 ? Mips::SP_64 : Mips::SP); - computeRegisterProperties(); setExceptionPointerRegister(IsN64 ? Mips::A0_64 : Mips::A0); setExceptionSelectorRegister(IsN64 ? Mips::A1_64 : Mips::A1); @@ -529,22 +395,11 @@ MipsTargetLowering(MipsTargetMachine &TM) MaxStoresPerMemcpy = 16; } -bool -MipsTargetLowering::allowsUnalignedMemoryAccesses(EVT VT, bool *Fast) const { - MVT::SimpleValueType SVT = VT.getSimpleVT().SimpleTy; - - if (Subtarget->inMips16Mode()) - return false; +const MipsTargetLowering *MipsTargetLowering::create(MipsTargetMachine &TM) { + if (TM.getSubtargetImpl()->inMips16Mode()) + return llvm::createMips16TargetLowering(TM); - switch (SVT) { - case MVT::i64: - case MVT::i32: - if (Fast) - *Fast = true; - return true; - default: - return false; - } + return llvm::createMipsSETargetLowering(TM); } EVT MipsTargetLowering::getSetCCResultType(EVT VT) const { @@ -553,7 +408,7 @@ EVT MipsTargetLowering::getSetCCResultType(EVT VT) const { return VT.changeVectorElementTypeToInteger(); } -// SelectMadd - +// selectMADD - // Transforms a subgraph in CurDAG if the following pattern is found: // (addc multLo, Lo0), (adde multHi, Hi0), // where, @@ -561,7 +416,7 @@ EVT MipsTargetLowering::getSetCCResultType(EVT VT) const { // Lo0: initial value of Lo register // Hi0: initial value of Hi register // Return true if pattern matching was successful. -static bool SelectMadd(SDNode *ADDENode, SelectionDAG *CurDAG) { +static bool selectMADD(SDNode *ADDENode, SelectionDAG *CurDAG) { // ADDENode's second operand must be a flag output of an ADDC node in order // for the matching to be successful. SDNode *ADDCNode = ADDENode->getOperand(2).getNode(); @@ -598,21 +453,21 @@ static bool SelectMadd(SDNode *ADDENode, SelectionDAG *CurDAG) { return false; SDValue Chain = CurDAG->getEntryNode(); - DebugLoc dl = ADDENode->getDebugLoc(); + DebugLoc DL = ADDENode->getDebugLoc(); // create MipsMAdd(u) node MultOpc = MultOpc == ISD::UMUL_LOHI ? MipsISD::MAddu : MipsISD::MAdd; - SDValue MAdd = CurDAG->getNode(MultOpc, dl, MVT::Glue, + SDValue MAdd = CurDAG->getNode(MultOpc, DL, MVT::Glue, MultNode->getOperand(0),// Factor 0 MultNode->getOperand(1),// Factor 1 ADDCNode->getOperand(1),// Lo0 ADDENode->getOperand(1));// Hi0 // create CopyFromReg nodes - SDValue CopyFromLo = CurDAG->getCopyFromReg(Chain, dl, Mips::LO, MVT::i32, + SDValue CopyFromLo = CurDAG->getCopyFromReg(Chain, DL, Mips::LO, MVT::i32, MAdd); - SDValue CopyFromHi = CurDAG->getCopyFromReg(CopyFromLo.getValue(1), dl, + SDValue CopyFromHi = CurDAG->getCopyFromReg(CopyFromLo.getValue(1), DL, Mips::HI, MVT::i32, CopyFromLo.getValue(2)); @@ -626,7 +481,7 @@ static bool SelectMadd(SDNode *ADDENode, SelectionDAG *CurDAG) { return true; } -// SelectMsub - +// selectMSUB - // Transforms a subgraph in CurDAG if the following pattern is found: // (addc Lo0, multLo), (sube Hi0, multHi), // where, @@ -634,7 +489,7 @@ static bool SelectMadd(SDNode *ADDENode, SelectionDAG *CurDAG) { // Lo0: initial value of Lo register // Hi0: initial value of Hi register // Return true if pattern matching was successful. -static bool SelectMsub(SDNode *SUBENode, SelectionDAG *CurDAG) { +static bool selectMSUB(SDNode *SUBENode, SelectionDAG *CurDAG) { // SUBENode's second operand must be a flag output of an SUBC node in order // for the matching to be successful. SDNode *SUBCNode = SUBENode->getOperand(2).getNode(); @@ -671,21 +526,21 @@ static bool SelectMsub(SDNode *SUBENode, SelectionDAG *CurDAG) { return false; SDValue Chain = CurDAG->getEntryNode(); - DebugLoc dl = SUBENode->getDebugLoc(); + DebugLoc DL = SUBENode->getDebugLoc(); // create MipsSub(u) node MultOpc = MultOpc == ISD::UMUL_LOHI ? MipsISD::MSubu : MipsISD::MSub; - SDValue MSub = CurDAG->getNode(MultOpc, dl, MVT::Glue, + SDValue MSub = CurDAG->getNode(MultOpc, DL, MVT::Glue, MultNode->getOperand(0),// Factor 0 MultNode->getOperand(1),// Factor 1 SUBCNode->getOperand(0),// Lo0 SUBENode->getOperand(0));// Hi0 // create CopyFromReg nodes - SDValue CopyFromLo = CurDAG->getCopyFromReg(Chain, dl, Mips::LO, MVT::i32, + SDValue CopyFromLo = CurDAG->getCopyFromReg(Chain, DL, Mips::LO, MVT::i32, MSub); - SDValue CopyFromHi = CurDAG->getCopyFromReg(CopyFromLo.getValue(1), dl, + SDValue CopyFromHi = CurDAG->getCopyFromReg(CopyFromLo.getValue(1), DL, Mips::HI, MVT::i32, CopyFromLo.getValue(2)); @@ -699,33 +554,33 @@ static bool SelectMsub(SDNode *SUBENode, SelectionDAG *CurDAG) { return true; } -static SDValue PerformADDECombine(SDNode *N, SelectionDAG &DAG, +static SDValue performADDECombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const MipsSubtarget *Subtarget) { if (DCI.isBeforeLegalize()) return SDValue(); if (Subtarget->hasMips32() && N->getValueType(0) == MVT::i32 && - SelectMadd(N, &DAG)) + selectMADD(N, &DAG)) return SDValue(N, 0); return SDValue(); } -static SDValue PerformSUBECombine(SDNode *N, SelectionDAG &DAG, +static SDValue performSUBECombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const MipsSubtarget *Subtarget) { if (DCI.isBeforeLegalize()) return SDValue(); if (Subtarget->hasMips32() && N->getValueType(0) == MVT::i32 && - SelectMsub(N, &DAG)) + selectMSUB(N, &DAG)) return SDValue(N, 0); return SDValue(); } -static SDValue PerformDivRemCombine(SDNode *N, SelectionDAG &DAG, +static SDValue performDivRemCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const MipsSubtarget *Subtarget) { if (DCI.isBeforeLegalizeOps()) @@ -734,18 +589,18 @@ static SDValue PerformDivRemCombine(SDNode *N, SelectionDAG &DAG, EVT Ty = N->getValueType(0); unsigned LO = (Ty == MVT::i32) ? Mips::LO : Mips::LO64; unsigned HI = (Ty == MVT::i32) ? Mips::HI : Mips::HI64; - unsigned opc = N->getOpcode() == ISD::SDIVREM ? MipsISD::DivRem : + unsigned Opc = N->getOpcode() == ISD::SDIVREM ? MipsISD::DivRem : MipsISD::DivRemU; - DebugLoc dl = N->getDebugLoc(); + DebugLoc DL = N->getDebugLoc(); - SDValue DivRem = DAG.getNode(opc, dl, MVT::Glue, + SDValue DivRem = DAG.getNode(Opc, DL, MVT::Glue, N->getOperand(0), N->getOperand(1)); SDValue InChain = DAG.getEntryNode(); SDValue InGlue = DivRem; // insert MFLO if (N->hasAnyUseOfValue(0)) { - SDValue CopyFromLo = DAG.getCopyFromReg(InChain, dl, LO, Ty, + SDValue CopyFromLo = DAG.getCopyFromReg(InChain, DL, LO, Ty, InGlue); DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), CopyFromLo); InChain = CopyFromLo.getValue(1); @@ -754,7 +609,7 @@ static SDValue PerformDivRemCombine(SDNode *N, SelectionDAG &DAG, // insert MFHI if (N->hasAnyUseOfValue(1)) { - SDValue CopyFromHi = DAG.getCopyFromReg(InChain, dl, + SDValue CopyFromHi = DAG.getCopyFromReg(InChain, DL, HI, Ty, InGlue); DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), CopyFromHi); } @@ -790,7 +645,7 @@ static Mips::CondCode FPCondCCodeToFCC(ISD::CondCode CC) { // Returns true if condition code has to be inverted. -static bool InvertFPCondCode(Mips::CondCode CC) { +static bool invertFPCondCode(Mips::CondCode CC) { if (CC >= Mips::FCOND_F && CC <= Mips::FCOND_NGT) return false; @@ -802,7 +657,7 @@ static bool InvertFPCondCode(Mips::CondCode CC) { // Creates and returns an FPCmp node from a setcc node. // Returns Op if setcc is not a floating point comparison. -static SDValue CreateFPCmp(SelectionDAG &DAG, const SDValue &Op) { +static SDValue createFPCmp(SelectionDAG &DAG, const SDValue &Op) { // must be a SETCC node if (Op.getOpcode() != ISD::SETCC) return Op; @@ -813,20 +668,20 @@ static SDValue CreateFPCmp(SelectionDAG &DAG, const SDValue &Op) { return Op; SDValue RHS = Op.getOperand(1); - DebugLoc dl = Op.getDebugLoc(); + DebugLoc DL = Op.getDebugLoc(); // Assume the 3rd operand is a CondCodeSDNode. Add code to check the type of // node if necessary. ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get(); - return DAG.getNode(MipsISD::FPCmp, dl, MVT::Glue, LHS, RHS, + return DAG.getNode(MipsISD::FPCmp, DL, MVT::Glue, LHS, RHS, DAG.getConstant(FPCondCCodeToFCC(CC), MVT::i32)); } // Creates and returns a CMovFPT/F node. -static SDValue CreateCMovFP(SelectionDAG &DAG, SDValue Cond, SDValue True, +static SDValue createCMovFP(SelectionDAG &DAG, SDValue Cond, SDValue True, SDValue False, DebugLoc DL) { - bool invert = InvertFPCondCode((Mips::CondCode) + bool invert = invertFPCondCode((Mips::CondCode) cast<ConstantSDNode>(Cond.getOperand(2)) ->getSExtValue()); @@ -834,7 +689,7 @@ static SDValue CreateCMovFP(SelectionDAG &DAG, SDValue Cond, SDValue True, True.getValueType(), True, False, Cond); } -static SDValue PerformSELECTCombine(SDNode *N, SelectionDAG &DAG, +static SDValue performSELECTCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const MipsSubtarget *Subtarget) { if (DCI.isBeforeLegalizeOps()) @@ -867,7 +722,7 @@ static SDValue PerformSELECTCombine(SDNode *N, SelectionDAG &DAG, return DAG.getNode(ISD::SELECT, DL, FalseTy, SetCC, False, True); } -static SDValue PerformANDCombine(SDNode *N, SelectionDAG &DAG, +static SDValue performANDCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const MipsSubtarget *Subtarget) { // Pattern match EXT. @@ -893,7 +748,7 @@ static SDValue PerformANDCombine(SDNode *N, SelectionDAG &DAG, // Op's second operand must be a shifted mask. if (!(CN = dyn_cast<ConstantSDNode>(Mask)) || - !IsShiftedMask(CN->getZExtValue(), SMPos, SMSize)) + !isShiftedMask(CN->getZExtValue(), SMPos, SMSize)) return SDValue(); // Return if the shifted mask does not start at bit 0 or the sum of its size @@ -907,7 +762,7 @@ static SDValue PerformANDCombine(SDNode *N, SelectionDAG &DAG, DAG.getConstant(SMSize, MVT::i32)); } -static SDValue PerformORCombine(SDNode *N, SelectionDAG &DAG, +static SDValue performORCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const MipsSubtarget *Subtarget) { // Pattern match INS. @@ -926,7 +781,7 @@ static SDValue PerformORCombine(SDNode *N, SelectionDAG &DAG, return SDValue(); if (!(CN = dyn_cast<ConstantSDNode>(And0.getOperand(1))) || - !IsShiftedMask(~CN->getSExtValue(), SMPos0, SMSize0)) + !isShiftedMask(~CN->getSExtValue(), SMPos0, SMSize0)) return SDValue(); // See if Op's second operand matches (and (shl $src, pos), mask1). @@ -934,7 +789,7 @@ static SDValue PerformORCombine(SDNode *N, SelectionDAG &DAG, return SDValue(); if (!(CN = dyn_cast<ConstantSDNode>(And1.getOperand(1))) || - !IsShiftedMask(CN->getZExtValue(), SMPos1, SMSize1)) + !isShiftedMask(CN->getZExtValue(), SMPos1, SMSize1)) return SDValue(); // The shift masks must have the same position and size. @@ -961,7 +816,7 @@ static SDValue PerformORCombine(SDNode *N, SelectionDAG &DAG, DAG.getConstant(SMSize0, MVT::i32), And0.getOperand(0)); } -static SDValue PerformADDCombine(SDNode *N, SelectionDAG &DAG, +static SDValue performADDCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const MipsSubtarget *Subtarget) { // (add v0, (add v1, abs_lo(tjt))) => (add (add v0, v1), abs_lo(tjt)) @@ -991,25 +846,25 @@ static SDValue PerformADDCombine(SDNode *N, SelectionDAG &DAG, SDValue MipsTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const { SelectionDAG &DAG = DCI.DAG; - unsigned opc = N->getOpcode(); + unsigned Opc = N->getOpcode(); - switch (opc) { + switch (Opc) { default: break; case ISD::ADDE: - return PerformADDECombine(N, DAG, DCI, Subtarget); + return performADDECombine(N, DAG, DCI, Subtarget); case ISD::SUBE: - return PerformSUBECombine(N, DAG, DCI, Subtarget); + return performSUBECombine(N, DAG, DCI, Subtarget); case ISD::SDIVREM: case ISD::UDIVREM: - return PerformDivRemCombine(N, DAG, DCI, Subtarget); + return performDivRemCombine(N, DAG, DCI, Subtarget); case ISD::SELECT: - return PerformSELECTCombine(N, DAG, DCI, Subtarget); + return performSELECTCombine(N, DAG, DCI, Subtarget); case ISD::AND: - return PerformANDCombine(N, DAG, DCI, Subtarget); + return performANDCombine(N, DAG, DCI, Subtarget); case ISD::OR: - return PerformORCombine(N, DAG, DCI, Subtarget); + return performORCombine(N, DAG, DCI, Subtarget); case ISD::ADD: - return PerformADDCombine(N, DAG, DCI, Subtarget); + return performADDCombine(N, DAG, DCI, Subtarget); } return SDValue(); @@ -1040,31 +895,32 @@ LowerOperation(SDValue Op, SelectionDAG &DAG) const { switch (Op.getOpcode()) { - case ISD::BRCOND: return LowerBRCOND(Op, DAG); - case ISD::ConstantPool: return LowerConstantPool(Op, DAG); - case ISD::GlobalAddress: return LowerGlobalAddress(Op, DAG); - case ISD::BlockAddress: return LowerBlockAddress(Op, DAG); - case ISD::GlobalTLSAddress: return LowerGlobalTLSAddress(Op, DAG); - case ISD::JumpTable: return LowerJumpTable(Op, DAG); - case ISD::SELECT: return LowerSELECT(Op, DAG); - case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG); - case ISD::SETCC: return LowerSETCC(Op, DAG); - case ISD::VASTART: return LowerVASTART(Op, DAG); - case ISD::FCOPYSIGN: return LowerFCOPYSIGN(Op, DAG); - case ISD::FABS: return LowerFABS(Op, DAG); - case ISD::FRAMEADDR: return LowerFRAMEADDR(Op, DAG); - case ISD::RETURNADDR: return LowerRETURNADDR(Op, DAG); - case ISD::EH_RETURN: return LowerEH_RETURN(Op, DAG); - case ISD::MEMBARRIER: return LowerMEMBARRIER(Op, DAG); - case ISD::ATOMIC_FENCE: return LowerATOMIC_FENCE(Op, DAG); - case ISD::SHL_PARTS: return LowerShiftLeftParts(Op, DAG); - case ISD::SRA_PARTS: return LowerShiftRightParts(Op, DAG, true); - case ISD::SRL_PARTS: return LowerShiftRightParts(Op, DAG, false); - case ISD::LOAD: return LowerLOAD(Op, DAG); - case ISD::STORE: return LowerSTORE(Op, DAG); - case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG); - case ISD::INTRINSIC_W_CHAIN: return LowerINTRINSIC_W_CHAIN(Op, DAG); - case ISD::ADD: return LowerADD(Op, DAG); + case ISD::BR_JT: return lowerBR_JT(Op, DAG); + case ISD::BRCOND: return lowerBRCOND(Op, DAG); + case ISD::ConstantPool: return lowerConstantPool(Op, DAG); + case ISD::GlobalAddress: return lowerGlobalAddress(Op, DAG); + case ISD::BlockAddress: return lowerBlockAddress(Op, DAG); + case ISD::GlobalTLSAddress: return lowerGlobalTLSAddress(Op, DAG); + case ISD::JumpTable: return lowerJumpTable(Op, DAG); + case ISD::SELECT: return lowerSELECT(Op, DAG); + case ISD::SELECT_CC: return lowerSELECT_CC(Op, DAG); + case ISD::SETCC: return lowerSETCC(Op, DAG); + case ISD::VASTART: return lowerVASTART(Op, DAG); + case ISD::FCOPYSIGN: return lowerFCOPYSIGN(Op, DAG); + case ISD::FABS: return lowerFABS(Op, DAG); + case ISD::FRAMEADDR: return lowerFRAMEADDR(Op, DAG); + case ISD::RETURNADDR: return lowerRETURNADDR(Op, DAG); + case ISD::EH_RETURN: return lowerEH_RETURN(Op, DAG); + case ISD::MEMBARRIER: return lowerMEMBARRIER(Op, DAG); + case ISD::ATOMIC_FENCE: return lowerATOMIC_FENCE(Op, DAG); + case ISD::SHL_PARTS: return lowerShiftLeftParts(Op, DAG); + case ISD::SRA_PARTS: return lowerShiftRightParts(Op, DAG, true); + case ISD::SRL_PARTS: return lowerShiftRightParts(Op, DAG, false); + case ISD::LOAD: return lowerLOAD(Op, DAG); + case ISD::STORE: return lowerSTORE(Op, DAG); + case ISD::INTRINSIC_WO_CHAIN: return lowerINTRINSIC_WO_CHAIN(Op, DAG); + case ISD::INTRINSIC_W_CHAIN: return lowerINTRINSIC_W_CHAIN(Op, DAG); + case ISD::ADD: return lowerADD(Op, DAG); } return SDValue(); } @@ -1073,11 +929,11 @@ LowerOperation(SDValue Op, SelectionDAG &DAG) const // Lower helper functions //===----------------------------------------------------------------------===// -// AddLiveIn - This helper function adds the specified physical register to the +// addLiveIn - This helper function adds the specified physical register to the // MachineFunction as a live in value. It also creates a corresponding // virtual register for it. static unsigned -AddLiveIn(MachineFunction &MF, unsigned PReg, const TargetRegisterClass *RC) +addLiveIn(MachineFunction &MF, unsigned PReg, const TargetRegisterClass *RC) { unsigned VReg = MF.getRegInfo().createVirtualRegister(RC); MF.getRegInfo().addLiveIn(PReg, VReg); @@ -1085,7 +941,7 @@ AddLiveIn(MachineFunction &MF, unsigned PReg, const TargetRegisterClass *RC) } // Get fp branch code (not opcode) from condition code. -static Mips::FPBranchCode GetFPBranchCodeFromCond(Mips::CondCode CC) { +static Mips::FPBranchCode getFPBranchCodeFromCond(Mips::CondCode CC) { if (CC >= Mips::FCOND_F && CC <= Mips::FCOND_NGT) return Mips::BRANCH_T; @@ -1095,425 +951,6 @@ static Mips::FPBranchCode GetFPBranchCodeFromCond(Mips::CondCode CC) { return Mips::BRANCH_F; } -/* -static MachineBasicBlock* ExpandCondMov(MachineInstr *MI, MachineBasicBlock *BB, - DebugLoc dl, - const MipsSubtarget *Subtarget, - const TargetInstrInfo *TII, - bool isFPCmp, unsigned Opc) { - // There is no need to expand CMov instructions if target has - // conditional moves. - if (Subtarget->hasCondMov()) - return BB; - - // To "insert" a SELECT_CC instruction, we actually have to insert the - // diamond control-flow pattern. The incoming instruction knows the - // destination vreg to set, the condition code register to branch on, the - // true/false values to select between, and a branch opcode to use. - const BasicBlock *LLVM_BB = BB->getBasicBlock(); - MachineFunction::iterator It = BB; - ++It; - - // thisMBB: - // ... - // TrueVal = ... - // setcc r1, r2, r3 - // bNE r1, r0, copy1MBB - // fallthrough --> copy0MBB - MachineBasicBlock *thisMBB = BB; - MachineFunction *F = BB->getParent(); - MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB); - MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB); - F->insert(It, copy0MBB); - F->insert(It, sinkMBB); - - // Transfer the remainder of BB and its successor edges to sinkMBB. - sinkMBB->splice(sinkMBB->begin(), BB, - llvm::next(MachineBasicBlock::iterator(MI)), - BB->end()); - sinkMBB->transferSuccessorsAndUpdatePHIs(BB); - - // Next, add the true and fallthrough blocks as its successors. - BB->addSuccessor(copy0MBB); - BB->addSuccessor(sinkMBB); - - // Emit the right instruction according to the type of the operands compared - if (isFPCmp) - BuildMI(BB, dl, TII->get(Opc)).addMBB(sinkMBB); - else - BuildMI(BB, dl, TII->get(Opc)).addReg(MI->getOperand(2).getReg()) - .addReg(Mips::ZERO).addMBB(sinkMBB); - - // copy0MBB: - // %FalseValue = ... - // # fallthrough to sinkMBB - BB = copy0MBB; - - // Update machine-CFG edges - BB->addSuccessor(sinkMBB); - - // sinkMBB: - // %Result = phi [ %TrueValue, thisMBB ], [ %FalseValue, copy0MBB ] - // ... - BB = sinkMBB; - - if (isFPCmp) - BuildMI(*BB, BB->begin(), dl, - TII->get(Mips::PHI), MI->getOperand(0).getReg()) - .addReg(MI->getOperand(2).getReg()).addMBB(thisMBB) - .addReg(MI->getOperand(1).getReg()).addMBB(copy0MBB); - else - BuildMI(*BB, BB->begin(), dl, - TII->get(Mips::PHI), MI->getOperand(0).getReg()) - .addReg(MI->getOperand(3).getReg()).addMBB(thisMBB) - .addReg(MI->getOperand(1).getReg()).addMBB(copy0MBB); - - MI->eraseFromParent(); // The pseudo instruction is gone now. - return BB; -} -*/ - -MachineBasicBlock * -MipsTargetLowering::EmitBPOSGE32(MachineInstr *MI, MachineBasicBlock *BB) const{ - // $bb: - // bposge32_pseudo $vr0 - // => - // $bb: - // bposge32 $tbb - // $fbb: - // li $vr2, 0 - // b $sink - // $tbb: - // li $vr1, 1 - // $sink: - // $vr0 = phi($vr2, $fbb, $vr1, $tbb) - - MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo(); - const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); - const TargetRegisterClass *RC = &Mips::CPURegsRegClass; - DebugLoc DL = MI->getDebugLoc(); - const BasicBlock *LLVM_BB = BB->getBasicBlock(); - MachineFunction::iterator It = llvm::next(MachineFunction::iterator(BB)); - MachineFunction *F = BB->getParent(); - MachineBasicBlock *FBB = F->CreateMachineBasicBlock(LLVM_BB); - MachineBasicBlock *TBB = F->CreateMachineBasicBlock(LLVM_BB); - MachineBasicBlock *Sink = F->CreateMachineBasicBlock(LLVM_BB); - F->insert(It, FBB); - F->insert(It, TBB); - F->insert(It, Sink); - - // Transfer the remainder of BB and its successor edges to Sink. - Sink->splice(Sink->begin(), BB, llvm::next(MachineBasicBlock::iterator(MI)), - BB->end()); - Sink->transferSuccessorsAndUpdatePHIs(BB); - - // Add successors. - BB->addSuccessor(FBB); - BB->addSuccessor(TBB); - FBB->addSuccessor(Sink); - TBB->addSuccessor(Sink); - - // Insert the real bposge32 instruction to $BB. - BuildMI(BB, DL, TII->get(Mips::BPOSGE32)).addMBB(TBB); - - // Fill $FBB. - unsigned VR2 = RegInfo.createVirtualRegister(RC); - BuildMI(*FBB, FBB->end(), DL, TII->get(Mips::ADDiu), VR2) - .addReg(Mips::ZERO).addImm(0); - BuildMI(*FBB, FBB->end(), DL, TII->get(Mips::B)).addMBB(Sink); - - // Fill $TBB. - unsigned VR1 = RegInfo.createVirtualRegister(RC); - BuildMI(*TBB, TBB->end(), DL, TII->get(Mips::ADDiu), VR1) - .addReg(Mips::ZERO).addImm(1); - - // Insert phi function to $Sink. - BuildMI(*Sink, Sink->begin(), DL, TII->get(Mips::PHI), - MI->getOperand(0).getReg()) - .addReg(VR2).addMBB(FBB).addReg(VR1).addMBB(TBB); - - MI->eraseFromParent(); // The pseudo instruction is gone now. - return Sink; -} - -MachineBasicBlock *MipsTargetLowering::EmitSel16(unsigned Opc, MachineInstr *MI, - MachineBasicBlock *BB) const { - if (DontExpandCondPseudos16) - return BB; - const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); - DebugLoc dl = MI->getDebugLoc(); - // To "insert" a SELECT_CC instruction, we actually have to insert the - // diamond control-flow pattern. The incoming instruction knows the - // destination vreg to set, the condition code register to branch on, the - // true/false values to select between, and a branch opcode to use. - const BasicBlock *LLVM_BB = BB->getBasicBlock(); - MachineFunction::iterator It = BB; - ++It; - - // thisMBB: - // ... - // TrueVal = ... - // setcc r1, r2, r3 - // bNE r1, r0, copy1MBB - // fallthrough --> copy0MBB - MachineBasicBlock *thisMBB = BB; - MachineFunction *F = BB->getParent(); - MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB); - MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB); - F->insert(It, copy0MBB); - F->insert(It, sinkMBB); - - // Transfer the remainder of BB and its successor edges to sinkMBB. - sinkMBB->splice(sinkMBB->begin(), BB, - llvm::next(MachineBasicBlock::iterator(MI)), - BB->end()); - sinkMBB->transferSuccessorsAndUpdatePHIs(BB); - - // Next, add the true and fallthrough blocks as its successors. - BB->addSuccessor(copy0MBB); - BB->addSuccessor(sinkMBB); - - BuildMI(BB, dl, TII->get(Opc)).addReg(MI->getOperand(3).getReg()) - .addMBB(sinkMBB); - - // copy0MBB: - // %FalseValue = ... - // # fallthrough to sinkMBB - BB = copy0MBB; - - // Update machine-CFG edges - BB->addSuccessor(sinkMBB); - - // sinkMBB: - // %Result = phi [ %TrueValue, thisMBB ], [ %FalseValue, copy0MBB ] - // ... - BB = sinkMBB; - - BuildMI(*BB, BB->begin(), dl, - TII->get(Mips::PHI), MI->getOperand(0).getReg()) - .addReg(MI->getOperand(1).getReg()).addMBB(thisMBB) - .addReg(MI->getOperand(2).getReg()).addMBB(copy0MBB); - - MI->eraseFromParent(); // The pseudo instruction is gone now. - return BB; -} - -MachineBasicBlock *MipsTargetLowering::EmitSelT16 - (unsigned Opc1, unsigned Opc2, - MachineInstr *MI, MachineBasicBlock *BB) const { - if (DontExpandCondPseudos16) - return BB; - const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); - DebugLoc dl = MI->getDebugLoc(); - // To "insert" a SELECT_CC instruction, we actually have to insert the - // diamond control-flow pattern. The incoming instruction knows the - // destination vreg to set, the condition code register to branch on, the - // true/false values to select between, and a branch opcode to use. - const BasicBlock *LLVM_BB = BB->getBasicBlock(); - MachineFunction::iterator It = BB; - ++It; - - // thisMBB: - // ... - // TrueVal = ... - // setcc r1, r2, r3 - // bNE r1, r0, copy1MBB - // fallthrough --> copy0MBB - MachineBasicBlock *thisMBB = BB; - MachineFunction *F = BB->getParent(); - MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB); - MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB); - F->insert(It, copy0MBB); - F->insert(It, sinkMBB); - - // Transfer the remainder of BB and its successor edges to sinkMBB. - sinkMBB->splice(sinkMBB->begin(), BB, - llvm::next(MachineBasicBlock::iterator(MI)), - BB->end()); - sinkMBB->transferSuccessorsAndUpdatePHIs(BB); - - // Next, add the true and fallthrough blocks as its successors. - BB->addSuccessor(copy0MBB); - BB->addSuccessor(sinkMBB); - - BuildMI(BB, dl, TII->get(Opc2)).addReg(MI->getOperand(3).getReg()) - .addReg(MI->getOperand(4).getReg()); - BuildMI(BB, dl, TII->get(Opc1)).addMBB(sinkMBB); - - // copy0MBB: - // %FalseValue = ... - // # fallthrough to sinkMBB - BB = copy0MBB; - - // Update machine-CFG edges - BB->addSuccessor(sinkMBB); - - // sinkMBB: - // %Result = phi [ %TrueValue, thisMBB ], [ %FalseValue, copy0MBB ] - // ... - BB = sinkMBB; - - BuildMI(*BB, BB->begin(), dl, - TII->get(Mips::PHI), MI->getOperand(0).getReg()) - .addReg(MI->getOperand(1).getReg()).addMBB(thisMBB) - .addReg(MI->getOperand(2).getReg()).addMBB(copy0MBB); - - MI->eraseFromParent(); // The pseudo instruction is gone now. - return BB; - -} - - -MachineBasicBlock *MipsTargetLowering::EmitSeliT16 - (unsigned Opc1, unsigned Opc2, - MachineInstr *MI, MachineBasicBlock *BB) const { - if (DontExpandCondPseudos16) - return BB; - const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); - DebugLoc dl = MI->getDebugLoc(); - // To "insert" a SELECT_CC instruction, we actually have to insert the - // diamond control-flow pattern. The incoming instruction knows the - // destination vreg to set, the condition code register to branch on, the - // true/false values to select between, and a branch opcode to use. - const BasicBlock *LLVM_BB = BB->getBasicBlock(); - MachineFunction::iterator It = BB; - ++It; - - // thisMBB: - // ... - // TrueVal = ... - // setcc r1, r2, r3 - // bNE r1, r0, copy1MBB - // fallthrough --> copy0MBB - MachineBasicBlock *thisMBB = BB; - MachineFunction *F = BB->getParent(); - MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB); - MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB); - F->insert(It, copy0MBB); - F->insert(It, sinkMBB); - - // Transfer the remainder of BB and its successor edges to sinkMBB. - sinkMBB->splice(sinkMBB->begin(), BB, - llvm::next(MachineBasicBlock::iterator(MI)), - BB->end()); - sinkMBB->transferSuccessorsAndUpdatePHIs(BB); - - // Next, add the true and fallthrough blocks as its successors. - BB->addSuccessor(copy0MBB); - BB->addSuccessor(sinkMBB); - - BuildMI(BB, dl, TII->get(Opc2)).addReg(MI->getOperand(3).getReg()) - .addImm(MI->getOperand(4).getImm()); - BuildMI(BB, dl, TII->get(Opc1)).addMBB(sinkMBB); - - // copy0MBB: - // %FalseValue = ... - // # fallthrough to sinkMBB - BB = copy0MBB; - - // Update machine-CFG edges - BB->addSuccessor(sinkMBB); - - // sinkMBB: - // %Result = phi [ %TrueValue, thisMBB ], [ %FalseValue, copy0MBB ] - // ... - BB = sinkMBB; - - BuildMI(*BB, BB->begin(), dl, - TII->get(Mips::PHI), MI->getOperand(0).getReg()) - .addReg(MI->getOperand(1).getReg()).addMBB(thisMBB) - .addReg(MI->getOperand(2).getReg()).addMBB(copy0MBB); - - MI->eraseFromParent(); // The pseudo instruction is gone now. - return BB; - -} - - -MachineBasicBlock - *MipsTargetLowering::EmitFEXT_T8I816_ins(unsigned BtOpc, unsigned CmpOpc, - MachineInstr *MI, - MachineBasicBlock *BB) const { - if (DontExpandCondPseudos16) - return BB; - const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); - unsigned regX = MI->getOperand(0).getReg(); - unsigned regY = MI->getOperand(1).getReg(); - MachineBasicBlock *target = MI->getOperand(2).getMBB(); - BuildMI(*BB, MI, MI->getDebugLoc(), TII->get(CmpOpc)).addReg(regX).addReg(regY); - BuildMI(*BB, MI, MI->getDebugLoc(), TII->get(BtOpc)).addMBB(target); - MI->eraseFromParent(); // The pseudo instruction is gone now. - return BB; -} - - -MachineBasicBlock *MipsTargetLowering::EmitFEXT_T8I8I16_ins( - unsigned BtOpc, unsigned CmpiOpc, unsigned CmpiXOpc, - MachineInstr *MI, MachineBasicBlock *BB) const { - if (DontExpandCondPseudos16) - return BB; - const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); - unsigned regX = MI->getOperand(0).getReg(); - int64_t imm = MI->getOperand(1).getImm(); - MachineBasicBlock *target = MI->getOperand(2).getMBB(); - unsigned CmpOpc; - if (isUInt<8>(imm)) - CmpOpc = CmpiOpc; - else if (isUInt<16>(imm)) - CmpOpc = CmpiXOpc; - else - llvm_unreachable("immediate field not usable"); - BuildMI(*BB, MI, MI->getDebugLoc(), TII->get(CmpOpc)).addReg(regX).addImm(imm); - BuildMI(*BB, MI, MI->getDebugLoc(), TII->get(BtOpc)).addMBB(target); - MI->eraseFromParent(); // The pseudo instruction is gone now. - return BB; -} - - -static unsigned Mips16WhichOp8uOr16simm - (unsigned shortOp, unsigned longOp, int64_t Imm) { - if (isUInt<8>(Imm)) - return shortOp; - else if (isInt<16>(Imm)) - return longOp; - else - llvm_unreachable("immediate field not usable"); -} - -MachineBasicBlock *MipsTargetLowering::EmitFEXT_CCRX16_ins( - unsigned SltOpc, - MachineInstr *MI, MachineBasicBlock *BB) const { - if (DontExpandCondPseudos16) - return BB; - const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); - unsigned CC = MI->getOperand(0).getReg(); - unsigned regX = MI->getOperand(1).getReg(); - unsigned regY = MI->getOperand(2).getReg(); - BuildMI(*BB, MI, MI->getDebugLoc(), - TII->get(SltOpc)).addReg(regX).addReg(regY); - BuildMI(*BB, MI, MI->getDebugLoc(), - TII->get(Mips::MoveR3216), CC).addReg(Mips::T8); - MI->eraseFromParent(); // The pseudo instruction is gone now. - return BB; -} -MachineBasicBlock *MipsTargetLowering::EmitFEXT_CCRXI16_ins( - unsigned SltiOpc, unsigned SltiXOpc, - MachineInstr *MI, MachineBasicBlock *BB )const { - if (DontExpandCondPseudos16) - return BB; - const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); - unsigned CC = MI->getOperand(0).getReg(); - unsigned regX = MI->getOperand(1).getReg(); - int64_t Imm = MI->getOperand(2).getImm(); - unsigned SltOpc = Mips16WhichOp8uOr16simm(SltiOpc, SltiXOpc, Imm); - BuildMI(*BB, MI, MI->getDebugLoc(), - TII->get(SltOpc)).addReg(regX).addImm(Imm); - BuildMI(*BB, MI, MI->getDebugLoc(), - TII->get(Mips::MoveR3216), CC).addReg(Mips::T8); - MI->eraseFromParent(); // The pseudo instruction is gone now. - return BB; - -} MachineBasicBlock * MipsTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, MachineBasicBlock *BB) const { @@ -1522,185 +959,114 @@ MipsTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, llvm_unreachable("Unexpected instr type to insert"); case Mips::ATOMIC_LOAD_ADD_I8: case Mips::ATOMIC_LOAD_ADD_I8_P8: - return EmitAtomicBinaryPartword(MI, BB, 1, Mips::ADDu); + return emitAtomicBinaryPartword(MI, BB, 1, Mips::ADDu); case Mips::ATOMIC_LOAD_ADD_I16: case Mips::ATOMIC_LOAD_ADD_I16_P8: - return EmitAtomicBinaryPartword(MI, BB, 2, Mips::ADDu); + return emitAtomicBinaryPartword(MI, BB, 2, Mips::ADDu); case Mips::ATOMIC_LOAD_ADD_I32: case Mips::ATOMIC_LOAD_ADD_I32_P8: - return EmitAtomicBinary(MI, BB, 4, Mips::ADDu); + return emitAtomicBinary(MI, BB, 4, Mips::ADDu); case Mips::ATOMIC_LOAD_ADD_I64: case Mips::ATOMIC_LOAD_ADD_I64_P8: - return EmitAtomicBinary(MI, BB, 8, Mips::DADDu); + return emitAtomicBinary(MI, BB, 8, Mips::DADDu); case Mips::ATOMIC_LOAD_AND_I8: case Mips::ATOMIC_LOAD_AND_I8_P8: - return EmitAtomicBinaryPartword(MI, BB, 1, Mips::AND); + return emitAtomicBinaryPartword(MI, BB, 1, Mips::AND); case Mips::ATOMIC_LOAD_AND_I16: case Mips::ATOMIC_LOAD_AND_I16_P8: - return EmitAtomicBinaryPartword(MI, BB, 2, Mips::AND); + return emitAtomicBinaryPartword(MI, BB, 2, Mips::AND); case Mips::ATOMIC_LOAD_AND_I32: case Mips::ATOMIC_LOAD_AND_I32_P8: - return EmitAtomicBinary(MI, BB, 4, Mips::AND); + return emitAtomicBinary(MI, BB, 4, Mips::AND); case Mips::ATOMIC_LOAD_AND_I64: case Mips::ATOMIC_LOAD_AND_I64_P8: - return EmitAtomicBinary(MI, BB, 8, Mips::AND64); + return emitAtomicBinary(MI, BB, 8, Mips::AND64); case Mips::ATOMIC_LOAD_OR_I8: case Mips::ATOMIC_LOAD_OR_I8_P8: - return EmitAtomicBinaryPartword(MI, BB, 1, Mips::OR); + return emitAtomicBinaryPartword(MI, BB, 1, Mips::OR); case Mips::ATOMIC_LOAD_OR_I16: case Mips::ATOMIC_LOAD_OR_I16_P8: - return EmitAtomicBinaryPartword(MI, BB, 2, Mips::OR); + return emitAtomicBinaryPartword(MI, BB, 2, Mips::OR); case Mips::ATOMIC_LOAD_OR_I32: case Mips::ATOMIC_LOAD_OR_I32_P8: - return EmitAtomicBinary(MI, BB, 4, Mips::OR); + return emitAtomicBinary(MI, BB, 4, Mips::OR); case Mips::ATOMIC_LOAD_OR_I64: case Mips::ATOMIC_LOAD_OR_I64_P8: - return EmitAtomicBinary(MI, BB, 8, Mips::OR64); + return emitAtomicBinary(MI, BB, 8, Mips::OR64); case Mips::ATOMIC_LOAD_XOR_I8: case Mips::ATOMIC_LOAD_XOR_I8_P8: - return EmitAtomicBinaryPartword(MI, BB, 1, Mips::XOR); + return emitAtomicBinaryPartword(MI, BB, 1, Mips::XOR); case Mips::ATOMIC_LOAD_XOR_I16: case Mips::ATOMIC_LOAD_XOR_I16_P8: - return EmitAtomicBinaryPartword(MI, BB, 2, Mips::XOR); + return emitAtomicBinaryPartword(MI, BB, 2, Mips::XOR); case Mips::ATOMIC_LOAD_XOR_I32: case Mips::ATOMIC_LOAD_XOR_I32_P8: - return EmitAtomicBinary(MI, BB, 4, Mips::XOR); + return emitAtomicBinary(MI, BB, 4, Mips::XOR); case Mips::ATOMIC_LOAD_XOR_I64: case Mips::ATOMIC_LOAD_XOR_I64_P8: - return EmitAtomicBinary(MI, BB, 8, Mips::XOR64); + return emitAtomicBinary(MI, BB, 8, Mips::XOR64); case Mips::ATOMIC_LOAD_NAND_I8: case Mips::ATOMIC_LOAD_NAND_I8_P8: - return EmitAtomicBinaryPartword(MI, BB, 1, 0, true); + return emitAtomicBinaryPartword(MI, BB, 1, 0, true); case Mips::ATOMIC_LOAD_NAND_I16: case Mips::ATOMIC_LOAD_NAND_I16_P8: - return EmitAtomicBinaryPartword(MI, BB, 2, 0, true); + return emitAtomicBinaryPartword(MI, BB, 2, 0, true); case Mips::ATOMIC_LOAD_NAND_I32: case Mips::ATOMIC_LOAD_NAND_I32_P8: - return EmitAtomicBinary(MI, BB, 4, 0, true); + return emitAtomicBinary(MI, BB, 4, 0, true); case Mips::ATOMIC_LOAD_NAND_I64: case Mips::ATOMIC_LOAD_NAND_I64_P8: - return EmitAtomicBinary(MI, BB, 8, 0, true); + return emitAtomicBinary(MI, BB, 8, 0, true); case Mips::ATOMIC_LOAD_SUB_I8: case Mips::ATOMIC_LOAD_SUB_I8_P8: - return EmitAtomicBinaryPartword(MI, BB, 1, Mips::SUBu); + return emitAtomicBinaryPartword(MI, BB, 1, Mips::SUBu); case Mips::ATOMIC_LOAD_SUB_I16: case Mips::ATOMIC_LOAD_SUB_I16_P8: - return EmitAtomicBinaryPartword(MI, BB, 2, Mips::SUBu); + return emitAtomicBinaryPartword(MI, BB, 2, Mips::SUBu); case Mips::ATOMIC_LOAD_SUB_I32: case Mips::ATOMIC_LOAD_SUB_I32_P8: - return EmitAtomicBinary(MI, BB, 4, Mips::SUBu); + return emitAtomicBinary(MI, BB, 4, Mips::SUBu); case Mips::ATOMIC_LOAD_SUB_I64: case Mips::ATOMIC_LOAD_SUB_I64_P8: - return EmitAtomicBinary(MI, BB, 8, Mips::DSUBu); + return emitAtomicBinary(MI, BB, 8, Mips::DSUBu); case Mips::ATOMIC_SWAP_I8: case Mips::ATOMIC_SWAP_I8_P8: - return EmitAtomicBinaryPartword(MI, BB, 1, 0); + return emitAtomicBinaryPartword(MI, BB, 1, 0); case Mips::ATOMIC_SWAP_I16: case Mips::ATOMIC_SWAP_I16_P8: - return EmitAtomicBinaryPartword(MI, BB, 2, 0); + return emitAtomicBinaryPartword(MI, BB, 2, 0); case Mips::ATOMIC_SWAP_I32: case Mips::ATOMIC_SWAP_I32_P8: - return EmitAtomicBinary(MI, BB, 4, 0); + return emitAtomicBinary(MI, BB, 4, 0); case Mips::ATOMIC_SWAP_I64: case Mips::ATOMIC_SWAP_I64_P8: - return EmitAtomicBinary(MI, BB, 8, 0); + return emitAtomicBinary(MI, BB, 8, 0); case Mips::ATOMIC_CMP_SWAP_I8: case Mips::ATOMIC_CMP_SWAP_I8_P8: - return EmitAtomicCmpSwapPartword(MI, BB, 1); + return emitAtomicCmpSwapPartword(MI, BB, 1); case Mips::ATOMIC_CMP_SWAP_I16: case Mips::ATOMIC_CMP_SWAP_I16_P8: - return EmitAtomicCmpSwapPartword(MI, BB, 2); + return emitAtomicCmpSwapPartword(MI, BB, 2); case Mips::ATOMIC_CMP_SWAP_I32: case Mips::ATOMIC_CMP_SWAP_I32_P8: - return EmitAtomicCmpSwap(MI, BB, 4); + return emitAtomicCmpSwap(MI, BB, 4); case Mips::ATOMIC_CMP_SWAP_I64: case Mips::ATOMIC_CMP_SWAP_I64_P8: - return EmitAtomicCmpSwap(MI, BB, 8); - case Mips::BPOSGE32_PSEUDO: - return EmitBPOSGE32(MI, BB); - case Mips::SelBeqZ: - return EmitSel16(Mips::BeqzRxImm16, MI, BB); - case Mips::SelBneZ: - return EmitSel16(Mips::BnezRxImm16, MI, BB); - case Mips::SelTBteqZCmpi: - return EmitSeliT16(Mips::BteqzX16, Mips::CmpiRxImmX16, MI, BB); - case Mips::SelTBteqZSlti: - return EmitSeliT16(Mips::BteqzX16, Mips::SltiRxImmX16, MI, BB); - case Mips::SelTBteqZSltiu: - return EmitSeliT16(Mips::BteqzX16, Mips::SltiuRxImmX16, MI, BB); - case Mips::SelTBtneZCmpi: - return EmitSeliT16(Mips::BtnezX16, Mips::CmpiRxImmX16, MI, BB); - case Mips::SelTBtneZSlti: - return EmitSeliT16(Mips::BtnezX16, Mips::SltiRxImmX16, MI, BB); - case Mips::SelTBtneZSltiu: - return EmitSeliT16(Mips::BtnezX16, Mips::SltiuRxImmX16, MI, BB); - case Mips::SelTBteqZCmp: - return EmitSelT16(Mips::BteqzX16, Mips::CmpRxRy16, MI, BB); - case Mips::SelTBteqZSlt: - return EmitSelT16(Mips::BteqzX16, Mips::SltRxRy16, MI, BB); - case Mips::SelTBteqZSltu: - return EmitSelT16(Mips::BteqzX16, Mips::SltuRxRy16, MI, BB); - case Mips::SelTBtneZCmp: - return EmitSelT16(Mips::BtnezX16, Mips::CmpRxRy16, MI, BB); - case Mips::SelTBtneZSlt: - return EmitSelT16(Mips::BtnezX16, Mips::SltRxRy16, MI, BB); - case Mips::SelTBtneZSltu: - return EmitSelT16(Mips::BtnezX16, Mips::SltuRxRy16, MI, BB); - case Mips::BteqzT8CmpX16: - return EmitFEXT_T8I816_ins(Mips::BteqzX16, Mips::CmpRxRy16, MI, BB); - case Mips::BteqzT8SltX16: - return EmitFEXT_T8I816_ins(Mips::BteqzX16, Mips::SltRxRy16, MI, BB); - case Mips::BteqzT8SltuX16: - // TBD: figure out a way to get this or remove the instruction - // altogether. - return EmitFEXT_T8I816_ins(Mips::BteqzX16, Mips::SltuRxRy16, MI, BB); - case Mips::BtnezT8CmpX16: - return EmitFEXT_T8I816_ins(Mips::BtnezX16, Mips::CmpRxRy16, MI, BB); - case Mips::BtnezT8SltX16: - return EmitFEXT_T8I816_ins(Mips::BtnezX16, Mips::SltRxRy16, MI, BB); - case Mips::BtnezT8SltuX16: - // TBD: figure out a way to get this or remove the instruction - // altogether. - return EmitFEXT_T8I816_ins(Mips::BtnezX16, Mips::SltuRxRy16, MI, BB); - case Mips::BteqzT8CmpiX16: return EmitFEXT_T8I8I16_ins( - Mips::BteqzX16, Mips::CmpiRxImm16, Mips::CmpiRxImmX16, MI, BB); - case Mips::BteqzT8SltiX16: return EmitFEXT_T8I8I16_ins( - Mips::BteqzX16, Mips::SltiRxImm16, Mips::SltiRxImmX16, MI, BB); - case Mips::BteqzT8SltiuX16: return EmitFEXT_T8I8I16_ins( - Mips::BteqzX16, Mips::SltiuRxImm16, Mips::SltiuRxImmX16, MI, BB); - case Mips::BtnezT8CmpiX16: return EmitFEXT_T8I8I16_ins( - Mips::BtnezX16, Mips::CmpiRxImm16, Mips::CmpiRxImmX16, MI, BB); - case Mips::BtnezT8SltiX16: return EmitFEXT_T8I8I16_ins( - Mips::BtnezX16, Mips::SltiRxImm16, Mips::SltiRxImmX16, MI, BB); - case Mips::BtnezT8SltiuX16: return EmitFEXT_T8I8I16_ins( - Mips::BtnezX16, Mips::SltiuRxImm16, Mips::SltiuRxImmX16, MI, BB); - break; - case Mips::SltCCRxRy16: - return EmitFEXT_CCRX16_ins(Mips::SltRxRy16, MI, BB); - break; - case Mips::SltiCCRxImmX16: - return EmitFEXT_CCRXI16_ins - (Mips::SltiRxImm16, Mips::SltiRxImmX16, MI, BB); - case Mips::SltiuCCRxImmX16: - return EmitFEXT_CCRXI16_ins - (Mips::SltiuRxImm16, Mips::SltiuRxImmX16, MI, BB); - case Mips::SltuCCRxRy16: - return EmitFEXT_CCRX16_ins - (Mips::SltuRxRy16, MI, BB); + return emitAtomicCmpSwap(MI, BB, 8); } } // This function also handles Mips::ATOMIC_SWAP_I32 (when BinOpcode == 0), and // Mips::ATOMIC_LOAD_NAND_I32 (when Nand == true) MachineBasicBlock * -MipsTargetLowering::EmitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB, +MipsTargetLowering::emitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB, unsigned Size, unsigned BinOpcode, bool Nand) const { assert((Size == 4 || Size == 8) && "Unsupported size for EmitAtomicBinary."); @@ -1709,7 +1075,7 @@ MipsTargetLowering::EmitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB, MachineRegisterInfo &RegInfo = MF->getRegInfo(); const TargetRegisterClass *RC = getRegClassFor(MVT::getIntegerVT(Size * 8)); const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); - DebugLoc dl = MI->getDebugLoc(); + DebugLoc DL = MI->getDebugLoc(); unsigned LL, SC, AND, NOR, ZERO, BEQ; if (Size == 4) { @@ -1765,20 +1131,20 @@ MipsTargetLowering::EmitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB, // sc success, storeval, 0(ptr) // beq success, $0, loopMBB BB = loopMBB; - BuildMI(BB, dl, TII->get(LL), OldVal).addReg(Ptr).addImm(0); + BuildMI(BB, DL, TII->get(LL), OldVal).addReg(Ptr).addImm(0); if (Nand) { // and andres, oldval, incr // nor storeval, $0, andres - BuildMI(BB, dl, TII->get(AND), AndRes).addReg(OldVal).addReg(Incr); - BuildMI(BB, dl, TII->get(NOR), StoreVal).addReg(ZERO).addReg(AndRes); + BuildMI(BB, DL, TII->get(AND), AndRes).addReg(OldVal).addReg(Incr); + BuildMI(BB, DL, TII->get(NOR), StoreVal).addReg(ZERO).addReg(AndRes); } else if (BinOpcode) { // <binop> storeval, oldval, incr - BuildMI(BB, dl, TII->get(BinOpcode), StoreVal).addReg(OldVal).addReg(Incr); + BuildMI(BB, DL, TII->get(BinOpcode), StoreVal).addReg(OldVal).addReg(Incr); } else { StoreVal = Incr; } - BuildMI(BB, dl, TII->get(SC), Success).addReg(StoreVal).addReg(Ptr).addImm(0); - BuildMI(BB, dl, TII->get(BEQ)).addReg(Success).addReg(ZERO).addMBB(loopMBB); + BuildMI(BB, DL, TII->get(SC), Success).addReg(StoreVal).addReg(Ptr).addImm(0); + BuildMI(BB, DL, TII->get(BEQ)).addReg(Success).addReg(ZERO).addMBB(loopMBB); MI->eraseFromParent(); // The instruction is gone now. @@ -1786,7 +1152,7 @@ MipsTargetLowering::EmitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB, } MachineBasicBlock * -MipsTargetLowering::EmitAtomicBinaryPartword(MachineInstr *MI, +MipsTargetLowering::emitAtomicBinaryPartword(MachineInstr *MI, MachineBasicBlock *BB, unsigned Size, unsigned BinOpcode, bool Nand) const { @@ -1797,7 +1163,7 @@ MipsTargetLowering::EmitAtomicBinaryPartword(MachineInstr *MI, MachineRegisterInfo &RegInfo = MF->getRegInfo(); const TargetRegisterClass *RC = getRegClassFor(MVT::i32); const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); - DebugLoc dl = MI->getDebugLoc(); + DebugLoc DL = MI->getDebugLoc(); unsigned LL = IsN64 ? Mips::LL_P8 : Mips::LL; unsigned SC = IsN64 ? Mips::SC_P8 : Mips::SC; @@ -1856,18 +1222,18 @@ MipsTargetLowering::EmitAtomicBinaryPartword(MachineInstr *MI, // sll incr2,incr,shiftamt int64_t MaskImm = (Size == 1) ? 255 : 65535; - BuildMI(BB, dl, TII->get(Mips::ADDiu), MaskLSB2) + BuildMI(BB, DL, TII->get(Mips::ADDiu), MaskLSB2) .addReg(Mips::ZERO).addImm(-4); - BuildMI(BB, dl, TII->get(Mips::AND), AlignedAddr) + BuildMI(BB, DL, TII->get(Mips::AND), AlignedAddr) .addReg(Ptr).addReg(MaskLSB2); - BuildMI(BB, dl, TII->get(Mips::ANDi), PtrLSB2).addReg(Ptr).addImm(3); - BuildMI(BB, dl, TII->get(Mips::SLL), ShiftAmt).addReg(PtrLSB2).addImm(3); - BuildMI(BB, dl, TII->get(Mips::ORi), MaskUpper) + BuildMI(BB, DL, TII->get(Mips::ANDi), PtrLSB2).addReg(Ptr).addImm(3); + BuildMI(BB, DL, TII->get(Mips::SLL), ShiftAmt).addReg(PtrLSB2).addImm(3); + BuildMI(BB, DL, TII->get(Mips::ORi), MaskUpper) .addReg(Mips::ZERO).addImm(MaskImm); - BuildMI(BB, dl, TII->get(Mips::SLLV), Mask) + BuildMI(BB, DL, TII->get(Mips::SLLV), Mask) .addReg(ShiftAmt).addReg(MaskUpper); - BuildMI(BB, dl, TII->get(Mips::NOR), Mask2).addReg(Mips::ZERO).addReg(Mask); - BuildMI(BB, dl, TII->get(Mips::SLLV), Incr2).addReg(ShiftAmt).addReg(Incr); + BuildMI(BB, DL, TII->get(Mips::NOR), Mask2).addReg(Mips::ZERO).addReg(Mask); + BuildMI(BB, DL, TII->get(Mips::SLLV), Incr2).addReg(ShiftAmt).addReg(Incr); // atomic.load.binop // loopMBB: @@ -1889,32 +1255,32 @@ MipsTargetLowering::EmitAtomicBinaryPartword(MachineInstr *MI, // beq success,$0,loopMBB BB = loopMBB; - BuildMI(BB, dl, TII->get(LL), OldVal).addReg(AlignedAddr).addImm(0); + BuildMI(BB, DL, TII->get(LL), OldVal).addReg(AlignedAddr).addImm(0); if (Nand) { // and andres, oldval, incr2 // nor binopres, $0, andres // and newval, binopres, mask - BuildMI(BB, dl, TII->get(Mips::AND), AndRes).addReg(OldVal).addReg(Incr2); - BuildMI(BB, dl, TII->get(Mips::NOR), BinOpRes) + BuildMI(BB, DL, TII->get(Mips::AND), AndRes).addReg(OldVal).addReg(Incr2); + BuildMI(BB, DL, TII->get(Mips::NOR), BinOpRes) .addReg(Mips::ZERO).addReg(AndRes); - BuildMI(BB, dl, TII->get(Mips::AND), NewVal).addReg(BinOpRes).addReg(Mask); + BuildMI(BB, DL, TII->get(Mips::AND), NewVal).addReg(BinOpRes).addReg(Mask); } else if (BinOpcode) { // <binop> binopres, oldval, incr2 // and newval, binopres, mask - BuildMI(BB, dl, TII->get(BinOpcode), BinOpRes).addReg(OldVal).addReg(Incr2); - BuildMI(BB, dl, TII->get(Mips::AND), NewVal).addReg(BinOpRes).addReg(Mask); + BuildMI(BB, DL, TII->get(BinOpcode), BinOpRes).addReg(OldVal).addReg(Incr2); + BuildMI(BB, DL, TII->get(Mips::AND), NewVal).addReg(BinOpRes).addReg(Mask); } else {// atomic.swap // and newval, incr2, mask - BuildMI(BB, dl, TII->get(Mips::AND), NewVal).addReg(Incr2).addReg(Mask); + BuildMI(BB, DL, TII->get(Mips::AND), NewVal).addReg(Incr2).addReg(Mask); } - BuildMI(BB, dl, TII->get(Mips::AND), MaskedOldVal0) + BuildMI(BB, DL, TII->get(Mips::AND), MaskedOldVal0) .addReg(OldVal).addReg(Mask2); - BuildMI(BB, dl, TII->get(Mips::OR), StoreVal) + BuildMI(BB, DL, TII->get(Mips::OR), StoreVal) .addReg(MaskedOldVal0).addReg(NewVal); - BuildMI(BB, dl, TII->get(SC), Success) + BuildMI(BB, DL, TII->get(SC), Success) .addReg(StoreVal).addReg(AlignedAddr).addImm(0); - BuildMI(BB, dl, TII->get(Mips::BEQ)) + BuildMI(BB, DL, TII->get(Mips::BEQ)) .addReg(Success).addReg(Mips::ZERO).addMBB(loopMBB); // sinkMBB: @@ -1925,13 +1291,13 @@ MipsTargetLowering::EmitAtomicBinaryPartword(MachineInstr *MI, BB = sinkMBB; int64_t ShiftImm = (Size == 1) ? 24 : 16; - BuildMI(BB, dl, TII->get(Mips::AND), MaskedOldVal1) + BuildMI(BB, DL, TII->get(Mips::AND), MaskedOldVal1) .addReg(OldVal).addReg(Mask); - BuildMI(BB, dl, TII->get(Mips::SRLV), SrlRes) + BuildMI(BB, DL, TII->get(Mips::SRLV), SrlRes) .addReg(ShiftAmt).addReg(MaskedOldVal1); - BuildMI(BB, dl, TII->get(Mips::SLL), SllRes) + BuildMI(BB, DL, TII->get(Mips::SLL), SllRes) .addReg(SrlRes).addImm(ShiftImm); - BuildMI(BB, dl, TII->get(Mips::SRA), Dest) + BuildMI(BB, DL, TII->get(Mips::SRA), Dest) .addReg(SllRes).addImm(ShiftImm); MI->eraseFromParent(); // The instruction is gone now. @@ -1940,7 +1306,7 @@ MipsTargetLowering::EmitAtomicBinaryPartword(MachineInstr *MI, } MachineBasicBlock * -MipsTargetLowering::EmitAtomicCmpSwap(MachineInstr *MI, +MipsTargetLowering::emitAtomicCmpSwap(MachineInstr *MI, MachineBasicBlock *BB, unsigned Size) const { assert((Size == 4 || Size == 8) && "Unsupported size for EmitAtomicCmpSwap."); @@ -1949,7 +1315,7 @@ MipsTargetLowering::EmitAtomicCmpSwap(MachineInstr *MI, MachineRegisterInfo &RegInfo = MF->getRegInfo(); const TargetRegisterClass *RC = getRegClassFor(MVT::getIntegerVT(Size * 8)); const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); - DebugLoc dl = MI->getDebugLoc(); + DebugLoc DL = MI->getDebugLoc(); unsigned LL, SC, ZERO, BNE, BEQ; if (Size == 4) { @@ -2003,17 +1369,17 @@ MipsTargetLowering::EmitAtomicCmpSwap(MachineInstr *MI, // ll dest, 0(ptr) // bne dest, oldval, exitMBB BB = loop1MBB; - BuildMI(BB, dl, TII->get(LL), Dest).addReg(Ptr).addImm(0); - BuildMI(BB, dl, TII->get(BNE)) + BuildMI(BB, DL, TII->get(LL), Dest).addReg(Ptr).addImm(0); + BuildMI(BB, DL, TII->get(BNE)) .addReg(Dest).addReg(OldVal).addMBB(exitMBB); // loop2MBB: // sc success, newval, 0(ptr) // beq success, $0, loop1MBB BB = loop2MBB; - BuildMI(BB, dl, TII->get(SC), Success) + BuildMI(BB, DL, TII->get(SC), Success) .addReg(NewVal).addReg(Ptr).addImm(0); - BuildMI(BB, dl, TII->get(BEQ)) + BuildMI(BB, DL, TII->get(BEQ)) .addReg(Success).addReg(ZERO).addMBB(loop1MBB); MI->eraseFromParent(); // The instruction is gone now. @@ -2022,7 +1388,7 @@ MipsTargetLowering::EmitAtomicCmpSwap(MachineInstr *MI, } MachineBasicBlock * -MipsTargetLowering::EmitAtomicCmpSwapPartword(MachineInstr *MI, +MipsTargetLowering::emitAtomicCmpSwapPartword(MachineInstr *MI, MachineBasicBlock *BB, unsigned Size) const { assert((Size == 1 || Size == 2) && @@ -2032,7 +1398,7 @@ MipsTargetLowering::EmitAtomicCmpSwapPartword(MachineInstr *MI, MachineRegisterInfo &RegInfo = MF->getRegInfo(); const TargetRegisterClass *RC = getRegClassFor(MVT::i32); const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); - DebugLoc dl = MI->getDebugLoc(); + DebugLoc DL = MI->getDebugLoc(); unsigned LL = IsN64 ? Mips::LL_P8 : Mips::LL; unsigned SC = IsN64 ? Mips::SC_P8 : Mips::SC; @@ -2099,24 +1465,24 @@ MipsTargetLowering::EmitAtomicCmpSwapPartword(MachineInstr *MI, // andi maskednewval,newval,255 // sll shiftednewval,maskednewval,shiftamt int64_t MaskImm = (Size == 1) ? 255 : 65535; - BuildMI(BB, dl, TII->get(Mips::ADDiu), MaskLSB2) + BuildMI(BB, DL, TII->get(Mips::ADDiu), MaskLSB2) .addReg(Mips::ZERO).addImm(-4); - BuildMI(BB, dl, TII->get(Mips::AND), AlignedAddr) + BuildMI(BB, DL, TII->get(Mips::AND), AlignedAddr) .addReg(Ptr).addReg(MaskLSB2); - BuildMI(BB, dl, TII->get(Mips::ANDi), PtrLSB2).addReg(Ptr).addImm(3); - BuildMI(BB, dl, TII->get(Mips::SLL), ShiftAmt).addReg(PtrLSB2).addImm(3); - BuildMI(BB, dl, TII->get(Mips::ORi), MaskUpper) + BuildMI(BB, DL, TII->get(Mips::ANDi), PtrLSB2).addReg(Ptr).addImm(3); + BuildMI(BB, DL, TII->get(Mips::SLL), ShiftAmt).addReg(PtrLSB2).addImm(3); + BuildMI(BB, DL, TII->get(Mips::ORi), MaskUpper) .addReg(Mips::ZERO).addImm(MaskImm); - BuildMI(BB, dl, TII->get(Mips::SLLV), Mask) + BuildMI(BB, DL, TII->get(Mips::SLLV), Mask) .addReg(ShiftAmt).addReg(MaskUpper); - BuildMI(BB, dl, TII->get(Mips::NOR), Mask2).addReg(Mips::ZERO).addReg(Mask); - BuildMI(BB, dl, TII->get(Mips::ANDi), MaskedCmpVal) + BuildMI(BB, DL, TII->get(Mips::NOR), Mask2).addReg(Mips::ZERO).addReg(Mask); + BuildMI(BB, DL, TII->get(Mips::ANDi), MaskedCmpVal) .addReg(CmpVal).addImm(MaskImm); - BuildMI(BB, dl, TII->get(Mips::SLLV), ShiftedCmpVal) + BuildMI(BB, DL, TII->get(Mips::SLLV), ShiftedCmpVal) .addReg(ShiftAmt).addReg(MaskedCmpVal); - BuildMI(BB, dl, TII->get(Mips::ANDi), MaskedNewVal) + BuildMI(BB, DL, TII->get(Mips::ANDi), MaskedNewVal) .addReg(NewVal).addImm(MaskImm); - BuildMI(BB, dl, TII->get(Mips::SLLV), ShiftedNewVal) + BuildMI(BB, DL, TII->get(Mips::SLLV), ShiftedNewVal) .addReg(ShiftAmt).addReg(MaskedNewVal); // loop1MBB: @@ -2124,10 +1490,10 @@ MipsTargetLowering::EmitAtomicCmpSwapPartword(MachineInstr *MI, // and maskedoldval0,oldval,mask // bne maskedoldval0,shiftedcmpval,sinkMBB BB = loop1MBB; - BuildMI(BB, dl, TII->get(LL), OldVal).addReg(AlignedAddr).addImm(0); - BuildMI(BB, dl, TII->get(Mips::AND), MaskedOldVal0) + BuildMI(BB, DL, TII->get(LL), OldVal).addReg(AlignedAddr).addImm(0); + BuildMI(BB, DL, TII->get(Mips::AND), MaskedOldVal0) .addReg(OldVal).addReg(Mask); - BuildMI(BB, dl, TII->get(Mips::BNE)) + BuildMI(BB, DL, TII->get(Mips::BNE)) .addReg(MaskedOldVal0).addReg(ShiftedCmpVal).addMBB(sinkMBB); // loop2MBB: @@ -2136,13 +1502,13 @@ MipsTargetLowering::EmitAtomicCmpSwapPartword(MachineInstr *MI, // sc success,storeval,0(alignedaddr) // beq success,$0,loop1MBB BB = loop2MBB; - BuildMI(BB, dl, TII->get(Mips::AND), MaskedOldVal1) + BuildMI(BB, DL, TII->get(Mips::AND), MaskedOldVal1) .addReg(OldVal).addReg(Mask2); - BuildMI(BB, dl, TII->get(Mips::OR), StoreVal) + BuildMI(BB, DL, TII->get(Mips::OR), StoreVal) .addReg(MaskedOldVal1).addReg(ShiftedNewVal); - BuildMI(BB, dl, TII->get(SC), Success) + BuildMI(BB, DL, TII->get(SC), Success) .addReg(StoreVal).addReg(AlignedAddr).addImm(0); - BuildMI(BB, dl, TII->get(Mips::BEQ)) + BuildMI(BB, DL, TII->get(Mips::BEQ)) .addReg(Success).addReg(Mips::ZERO).addMBB(loop1MBB); // sinkMBB: @@ -2152,11 +1518,11 @@ MipsTargetLowering::EmitAtomicCmpSwapPartword(MachineInstr *MI, BB = sinkMBB; int64_t ShiftImm = (Size == 1) ? 24 : 16; - BuildMI(BB, dl, TII->get(Mips::SRLV), SrlRes) + BuildMI(BB, DL, TII->get(Mips::SRLV), SrlRes) .addReg(ShiftAmt).addReg(MaskedOldVal0); - BuildMI(BB, dl, TII->get(Mips::SLL), SllRes) + BuildMI(BB, DL, TII->get(Mips::SLL), SllRes) .addReg(SrlRes).addImm(ShiftImm); - BuildMI(BB, dl, TII->get(Mips::SRA), Dest) + BuildMI(BB, DL, TII->get(Mips::SRA), Dest) .addReg(SllRes).addImm(ShiftImm); MI->eraseFromParent(); // The instruction is gone now. @@ -2167,16 +1533,46 @@ MipsTargetLowering::EmitAtomicCmpSwapPartword(MachineInstr *MI, //===----------------------------------------------------------------------===// // Misc Lower Operation implementation //===----------------------------------------------------------------------===// +SDValue MipsTargetLowering::lowerBR_JT(SDValue Op, SelectionDAG &DAG) const { + SDValue Chain = Op.getOperand(0); + SDValue Table = Op.getOperand(1); + SDValue Index = Op.getOperand(2); + DebugLoc DL = Op.getDebugLoc(); + EVT PTy = getPointerTy(); + unsigned EntrySize = + DAG.getMachineFunction().getJumpTableInfo()->getEntrySize(*getDataLayout()); + + Index = DAG.getNode(ISD::MUL, DL, PTy, Index, + DAG.getConstant(EntrySize, PTy)); + SDValue Addr = DAG.getNode(ISD::ADD, DL, PTy, Index, Table); + + EVT MemVT = EVT::getIntegerVT(*DAG.getContext(), EntrySize * 8); + Addr = DAG.getExtLoad(ISD::SEXTLOAD, DL, PTy, Chain, Addr, + MachinePointerInfo::getJumpTable(), MemVT, false, false, + 0); + Chain = Addr.getValue(1); + + if ((getTargetMachine().getRelocationModel() == Reloc::PIC_) || IsN64) { + // For PIC, the sequence is: + // BRIND(load(Jumptable + index) + RelocBase) + // RelocBase can be JumpTable, GOT or some sort of global base. + Addr = DAG.getNode(ISD::ADD, DL, PTy, Addr, + getPICJumpTableRelocBase(Table, DAG)); + } + + return DAG.getNode(ISD::BRIND, DL, MVT::Other, Chain, Addr); +} + SDValue MipsTargetLowering:: -LowerBRCOND(SDValue Op, SelectionDAG &DAG) const +lowerBRCOND(SDValue Op, SelectionDAG &DAG) const { // The first operand is the chain, the second is the condition, the third is // the block to branch to if the condition is true. SDValue Chain = Op.getOperand(0); SDValue Dest = Op.getOperand(2); - DebugLoc dl = Op.getDebugLoc(); + DebugLoc DL = Op.getDebugLoc(); - SDValue CondRes = CreateFPCmp(DAG, Op.getOperand(1)); + SDValue CondRes = createFPCmp(DAG, Op.getOperand(1)); // Return if flag is not set by a floating point comparison. if (CondRes.getOpcode() != MipsISD::FPCmp) @@ -2185,27 +1581,27 @@ LowerBRCOND(SDValue Op, SelectionDAG &DAG) const SDValue CCNode = CondRes.getOperand(2); Mips::CondCode CC = (Mips::CondCode)cast<ConstantSDNode>(CCNode)->getZExtValue(); - SDValue BrCode = DAG.getConstant(GetFPBranchCodeFromCond(CC), MVT::i32); + SDValue BrCode = DAG.getConstant(getFPBranchCodeFromCond(CC), MVT::i32); - return DAG.getNode(MipsISD::FPBrcond, dl, Op.getValueType(), Chain, BrCode, + return DAG.getNode(MipsISD::FPBrcond, DL, Op.getValueType(), Chain, BrCode, Dest, CondRes); } SDValue MipsTargetLowering:: -LowerSELECT(SDValue Op, SelectionDAG &DAG) const +lowerSELECT(SDValue Op, SelectionDAG &DAG) const { - SDValue Cond = CreateFPCmp(DAG, Op.getOperand(0)); + SDValue Cond = createFPCmp(DAG, Op.getOperand(0)); // Return if flag is not set by a floating point comparison. if (Cond.getOpcode() != MipsISD::FPCmp) return Op; - return CreateCMovFP(DAG, Cond, Op.getOperand(1), Op.getOperand(2), + return createCMovFP(DAG, Cond, Op.getOperand(1), Op.getOperand(2), Op.getDebugLoc()); } SDValue MipsTargetLowering:: -LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const +lowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const { DebugLoc DL = Op.getDebugLoc(); EVT Ty = Op.getOperand(0).getValueType(); @@ -2217,8 +1613,8 @@ LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const Op.getOperand(3)); } -SDValue MipsTargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const { - SDValue Cond = CreateFPCmp(DAG, Op); +SDValue MipsTargetLowering::lowerSETCC(SDValue Op, SelectionDAG &DAG) const { + SDValue Cond = createFPCmp(DAG, Op); assert(Cond.getOpcode() == MipsISD::FPCmp && "Floating point operand expected."); @@ -2226,13 +1622,13 @@ SDValue MipsTargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const { SDValue True = DAG.getConstant(1, MVT::i32); SDValue False = DAG.getConstant(0, MVT::i32); - return CreateCMovFP(DAG, Cond, True, False, Op.getDebugLoc()); + return createCMovFP(DAG, Cond, True, False, Op.getDebugLoc()); } -SDValue MipsTargetLowering::LowerGlobalAddress(SDValue Op, +SDValue MipsTargetLowering::lowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const { // FIXME there isn't actually debug info here - DebugLoc dl = Op.getDebugLoc(); + DebugLoc DL = Op.getDebugLoc(); const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal(); if (getTargetMachine().getRelocationModel() != Reloc::PIC_ && !IsN64) { @@ -2241,12 +1637,12 @@ SDValue MipsTargetLowering::LowerGlobalAddress(SDValue Op, // %gp_rel relocation if (TLOF.IsGlobalInSmallSection(GV, getTargetMachine())) { - SDValue GA = DAG.getTargetGlobalAddress(GV, dl, MVT::i32, 0, + SDValue GA = DAG.getTargetGlobalAddress(GV, DL, MVT::i32, 0, MipsII::MO_GPREL); - SDValue GPRelNode = DAG.getNode(MipsISD::GPRel, dl, + SDValue GPRelNode = DAG.getNode(MipsISD::GPRel, DL, DAG.getVTList(MVT::i32), &GA, 1); SDValue GPReg = DAG.getRegister(Mips::GP, MVT::i32); - return DAG.getNode(ISD::ADD, dl, MVT::i32, GPReg, GPRelNode); + return DAG.getNode(ISD::ADD, DL, MVT::i32, GPReg, GPRelNode); } // %hi/%lo relocation @@ -2264,7 +1660,7 @@ SDValue MipsTargetLowering::LowerGlobalAddress(SDValue Op, HasMips64 ? MipsII::MO_GOT_DISP : MipsII::MO_GOT16); } -SDValue MipsTargetLowering::LowerBlockAddress(SDValue Op, +SDValue MipsTargetLowering::lowerBlockAddress(SDValue Op, SelectionDAG &DAG) const { if (getTargetMachine().getRelocationModel() != Reloc::PIC_ && !IsN64) return getAddrNonPIC(Op, DAG); @@ -2273,14 +1669,14 @@ SDValue MipsTargetLowering::LowerBlockAddress(SDValue Op, } SDValue MipsTargetLowering:: -LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const +lowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const { // If the relocation model is PIC, use the General Dynamic TLS Model or // Local Dynamic TLS model, otherwise use the Initial Exec or // Local Exec TLS Model. GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op); - DebugLoc dl = GA->getDebugLoc(); + DebugLoc DL = GA->getDebugLoc(); const GlobalValue *GV = GA->getGlobal(); EVT PtrVT = getPointerTy(); @@ -2291,9 +1687,9 @@ LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const unsigned Flag = (model == TLSModel::LocalDynamic) ? MipsII::MO_TLSLDM : MipsII::MO_TLSGD; - SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, Flag); - SDValue Argument = DAG.getNode(MipsISD::Wrapper, dl, PtrVT, - GetGlobalReg(DAG, PtrVT), TGA); + SDValue TGA = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, Flag); + SDValue Argument = DAG.getNode(MipsISD::Wrapper, DL, PtrVT, + getGlobalReg(DAG, PtrVT), TGA); unsigned PtrSize = PtrVT.getSizeInBits(); IntegerType *PtrTy = Type::getIntNTy(*DAG.getContext(), PtrSize); @@ -2307,9 +1703,9 @@ LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const TargetLowering::CallLoweringInfo CLI(DAG.getEntryNode(), PtrTy, false, false, false, false, 0, CallingConv::C, - /*isTailCall=*/false, /*doesNotRet=*/false, + /*IsTailCall=*/false, /*doesNotRet=*/false, /*isReturnValueUsed=*/true, - TlsGetAddr, Args, DAG, dl); + TlsGetAddr, Args, DAG, DL); std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI); SDValue Ret = CallResult.first; @@ -2317,44 +1713,44 @@ LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const if (model != TLSModel::LocalDynamic) return Ret; - SDValue TGAHi = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, + SDValue TGAHi = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, MipsII::MO_DTPREL_HI); - SDValue Hi = DAG.getNode(MipsISD::Hi, dl, PtrVT, TGAHi); - SDValue TGALo = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, + SDValue Hi = DAG.getNode(MipsISD::Hi, DL, PtrVT, TGAHi); + SDValue TGALo = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, MipsII::MO_DTPREL_LO); - SDValue Lo = DAG.getNode(MipsISD::Lo, dl, PtrVT, TGALo); - SDValue Add = DAG.getNode(ISD::ADD, dl, PtrVT, Hi, Ret); - return DAG.getNode(ISD::ADD, dl, PtrVT, Add, Lo); + SDValue Lo = DAG.getNode(MipsISD::Lo, DL, PtrVT, TGALo); + SDValue Add = DAG.getNode(ISD::ADD, DL, PtrVT, Hi, Ret); + return DAG.getNode(ISD::ADD, DL, PtrVT, Add, Lo); } SDValue Offset; if (model == TLSModel::InitialExec) { // Initial Exec TLS Model - SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, + SDValue TGA = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, MipsII::MO_GOTTPREL); - TGA = DAG.getNode(MipsISD::Wrapper, dl, PtrVT, GetGlobalReg(DAG, PtrVT), + TGA = DAG.getNode(MipsISD::Wrapper, DL, PtrVT, getGlobalReg(DAG, PtrVT), TGA); - Offset = DAG.getLoad(PtrVT, dl, + Offset = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), TGA, MachinePointerInfo(), false, false, false, 0); } else { // Local Exec TLS Model assert(model == TLSModel::LocalExec); - SDValue TGAHi = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, + SDValue TGAHi = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, MipsII::MO_TPREL_HI); - SDValue TGALo = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, + SDValue TGALo = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, MipsII::MO_TPREL_LO); - SDValue Hi = DAG.getNode(MipsISD::Hi, dl, PtrVT, TGAHi); - SDValue Lo = DAG.getNode(MipsISD::Lo, dl, PtrVT, TGALo); - Offset = DAG.getNode(ISD::ADD, dl, PtrVT, Hi, Lo); + SDValue Hi = DAG.getNode(MipsISD::Hi, DL, PtrVT, TGAHi); + SDValue Lo = DAG.getNode(MipsISD::Lo, DL, PtrVT, TGALo); + Offset = DAG.getNode(ISD::ADD, DL, PtrVT, Hi, Lo); } - SDValue ThreadPointer = DAG.getNode(MipsISD::ThreadPointer, dl, PtrVT); - return DAG.getNode(ISD::ADD, dl, PtrVT, ThreadPointer, Offset); + SDValue ThreadPointer = DAG.getNode(MipsISD::ThreadPointer, DL, PtrVT); + return DAG.getNode(ISD::ADD, DL, PtrVT, ThreadPointer, Offset); } SDValue MipsTargetLowering:: -LowerJumpTable(SDValue Op, SelectionDAG &DAG) const +lowerJumpTable(SDValue Op, SelectionDAG &DAG) const { if (getTargetMachine().getRelocationModel() != Reloc::PIC_ && !IsN64) return getAddrNonPIC(Op, DAG); @@ -2363,7 +1759,7 @@ LowerJumpTable(SDValue Op, SelectionDAG &DAG) const } SDValue MipsTargetLowering:: -LowerConstantPool(SDValue Op, SelectionDAG &DAG) const +lowerConstantPool(SDValue Op, SelectionDAG &DAG) const { // gp_rel relocation // FIXME: we should reference the constant pool using small data sections, @@ -2381,22 +1777,22 @@ LowerConstantPool(SDValue Op, SelectionDAG &DAG) const return getAddrLocal(Op, DAG, HasMips64); } -SDValue MipsTargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) const { +SDValue MipsTargetLowering::lowerVASTART(SDValue Op, SelectionDAG &DAG) const { MachineFunction &MF = DAG.getMachineFunction(); MipsFunctionInfo *FuncInfo = MF.getInfo<MipsFunctionInfo>(); - DebugLoc dl = Op.getDebugLoc(); + DebugLoc DL = Op.getDebugLoc(); SDValue FI = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), getPointerTy()); // vastart just stores the address of the VarArgsFrameIndex slot into the // memory location argument. const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue(); - return DAG.getStore(Op.getOperand(0), dl, FI, Op.getOperand(1), + return DAG.getStore(Op.getOperand(0), DL, FI, Op.getOperand(1), MachinePointerInfo(SV), false, false, 0); } -static SDValue LowerFCOPYSIGN32(SDValue Op, SelectionDAG &DAG, bool HasR2) { +static SDValue lowerFCOPYSIGN32(SDValue Op, SelectionDAG &DAG, bool HasR2) { EVT TyX = Op.getOperand(0).getValueType(); EVT TyY = Op.getOperand(1).getValueType(); SDValue Const1 = DAG.getConstant(1, MVT::i32); @@ -2441,7 +1837,7 @@ static SDValue LowerFCOPYSIGN32(SDValue Op, SelectionDAG &DAG, bool HasR2) { return DAG.getNode(MipsISD::BuildPairF64, DL, MVT::f64, LowX, Res); } -static SDValue LowerFCOPYSIGN64(SDValue Op, SelectionDAG &DAG, bool HasR2) { +static SDValue lowerFCOPYSIGN64(SDValue Op, SelectionDAG &DAG, bool HasR2) { unsigned WidthX = Op.getOperand(0).getValueSizeInBits(); unsigned WidthY = Op.getOperand(1).getValueSizeInBits(); EVT TyX = MVT::getIntegerVT(WidthX), TyY = MVT::getIntegerVT(WidthY); @@ -2490,14 +1886,14 @@ static SDValue LowerFCOPYSIGN64(SDValue Op, SelectionDAG &DAG, bool HasR2) { } SDValue -MipsTargetLowering::LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const { +MipsTargetLowering::lowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const { if (Subtarget->hasMips64()) - return LowerFCOPYSIGN64(Op, DAG, Subtarget->hasMips32r2()); + return lowerFCOPYSIGN64(Op, DAG, Subtarget->hasMips32r2()); - return LowerFCOPYSIGN32(Op, DAG, Subtarget->hasMips32r2()); + return lowerFCOPYSIGN32(Op, DAG, Subtarget->hasMips32r2()); } -static SDValue LowerFABS32(SDValue Op, SelectionDAG &DAG, bool HasR2) { +static SDValue lowerFABS32(SDValue Op, SelectionDAG &DAG, bool HasR2) { SDValue Res, Const1 = DAG.getConstant(1, MVT::i32); DebugLoc DL = Op.getDebugLoc(); @@ -2526,7 +1922,7 @@ static SDValue LowerFABS32(SDValue Op, SelectionDAG &DAG, bool HasR2) { return DAG.getNode(MipsISD::BuildPairF64, DL, MVT::f64, LowX, Res); } -static SDValue LowerFABS64(SDValue Op, SelectionDAG &DAG, bool HasR2) { +static SDValue lowerFABS64(SDValue Op, SelectionDAG &DAG, bool HasR2) { SDValue Res, Const1 = DAG.getConstant(1, MVT::i32); DebugLoc DL = Op.getDebugLoc(); @@ -2547,15 +1943,15 @@ static SDValue LowerFABS64(SDValue Op, SelectionDAG &DAG, bool HasR2) { } SDValue -MipsTargetLowering::LowerFABS(SDValue Op, SelectionDAG &DAG) const { +MipsTargetLowering::lowerFABS(SDValue Op, SelectionDAG &DAG) const { if (Subtarget->hasMips64() && (Op.getValueType() == MVT::f64)) - return LowerFABS64(Op, DAG, Subtarget->hasMips32r2()); + return lowerFABS64(Op, DAG, Subtarget->hasMips32r2()); - return LowerFABS32(Op, DAG, Subtarget->hasMips32r2()); + return lowerFABS32(Op, DAG, Subtarget->hasMips32r2()); } SDValue MipsTargetLowering:: -LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const { +lowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const { // check the depth assert((cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue() == 0) && "Frame address can only be determined for current frame."); @@ -2563,13 +1959,13 @@ LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const { MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo(); MFI->setFrameAddressIsTaken(true); EVT VT = Op.getValueType(); - DebugLoc dl = Op.getDebugLoc(); - SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl, + DebugLoc DL = Op.getDebugLoc(); + SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), DL, IsN64 ? Mips::FP_64 : Mips::FP, VT); return FrameAddr; } -SDValue MipsTargetLowering::LowerRETURNADDR(SDValue Op, +SDValue MipsTargetLowering::lowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const { // check the depth assert((cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue() == 0) && @@ -2590,7 +1986,7 @@ SDValue MipsTargetLowering::LowerRETURNADDR(SDValue Op, // generated from __builtin_eh_return (offset, handler) // The effect of this is to adjust the stack pointer by "offset" // and then branch to "handler". -SDValue MipsTargetLowering::LowerEH_RETURN(SDValue Op, SelectionDAG &DAG) +SDValue MipsTargetLowering::lowerEH_RETURN(SDValue Op, SelectionDAG &DAG) const { MachineFunction &MF = DAG.getMachineFunction(); MipsFunctionInfo *MipsFI = MF.getInfo<MipsFunctionInfo>(); @@ -2616,24 +2012,24 @@ SDValue MipsTargetLowering::LowerEH_RETURN(SDValue Op, SelectionDAG &DAG) // TODO: set SType according to the desired memory barrier behavior. SDValue -MipsTargetLowering::LowerMEMBARRIER(SDValue Op, SelectionDAG &DAG) const { +MipsTargetLowering::lowerMEMBARRIER(SDValue Op, SelectionDAG &DAG) const { unsigned SType = 0; - DebugLoc dl = Op.getDebugLoc(); - return DAG.getNode(MipsISD::Sync, dl, MVT::Other, Op.getOperand(0), + DebugLoc DL = Op.getDebugLoc(); + return DAG.getNode(MipsISD::Sync, DL, MVT::Other, Op.getOperand(0), DAG.getConstant(SType, MVT::i32)); } -SDValue MipsTargetLowering::LowerATOMIC_FENCE(SDValue Op, +SDValue MipsTargetLowering::lowerATOMIC_FENCE(SDValue Op, SelectionDAG &DAG) const { // FIXME: Need pseudo-fence for 'singlethread' fences // FIXME: Set SType for weaker fences where supported/appropriate. unsigned SType = 0; - DebugLoc dl = Op.getDebugLoc(); - return DAG.getNode(MipsISD::Sync, dl, MVT::Other, Op.getOperand(0), + DebugLoc DL = Op.getDebugLoc(); + return DAG.getNode(MipsISD::Sync, DL, MVT::Other, Op.getOperand(0), DAG.getConstant(SType, MVT::i32)); } -SDValue MipsTargetLowering::LowerShiftLeftParts(SDValue Op, +SDValue MipsTargetLowering::lowerShiftLeftParts(SDValue Op, SelectionDAG &DAG) const { DebugLoc DL = Op.getDebugLoc(); SDValue Lo = Op.getOperand(0), Hi = Op.getOperand(1); @@ -2664,7 +2060,7 @@ SDValue MipsTargetLowering::LowerShiftLeftParts(SDValue Op, return DAG.getMergeValues(Ops, 2, DL); } -SDValue MipsTargetLowering::LowerShiftRightParts(SDValue Op, SelectionDAG &DAG, +SDValue MipsTargetLowering::lowerShiftRightParts(SDValue Op, SelectionDAG &DAG, bool IsSRA) const { DebugLoc DL = Op.getDebugLoc(); SDValue Lo = Op.getOperand(0), Hi = Op.getOperand(1); @@ -2723,7 +2119,7 @@ static SDValue CreateLoadLR(unsigned Opc, SelectionDAG &DAG, LoadSDNode *LD, } // Expand an unaligned 32 or 64-bit integer load node. -SDValue MipsTargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const { +SDValue MipsTargetLowering::lowerLOAD(SDValue Op, SelectionDAG &DAG) const { LoadSDNode *LD = cast<LoadSDNode>(Op); EVT MemVT = LD->getMemoryVT(); @@ -2801,7 +2197,7 @@ static SDValue CreateStoreLR(unsigned Opc, SelectionDAG &DAG, StoreSDNode *SD, } // Expand an unaligned 32 or 64-bit integer store node. -SDValue MipsTargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const { +SDValue MipsTargetLowering::lowerSTORE(SDValue Op, SelectionDAG &DAG) const { StoreSDNode *SD = cast<StoreSDNode>(Op); EVT MemVT = SD->getMemoryVT(); @@ -2849,7 +2245,7 @@ SDValue MipsTargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const { // v1 = copy hi // out64 = merge-values (v0, v1) // -static SDValue LowerDSPIntr(SDValue Op, SelectionDAG &DAG, +static SDValue lowerDSPIntr(SDValue Op, SelectionDAG &DAG, unsigned Opc, bool HasI64In, bool HasI64Out) { DebugLoc DL = Op.getDebugLoc(); bool HasChainIn = Op->getOperand(0).getValueType() == MVT::Other; @@ -2894,95 +2290,95 @@ static SDValue LowerDSPIntr(SDValue Op, SelectionDAG &DAG, return DAG.getMergeValues(Vals, 2, DL); } -SDValue MipsTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, +SDValue MipsTargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const { switch (cast<ConstantSDNode>(Op->getOperand(0))->getZExtValue()) { default: return SDValue(); case Intrinsic::mips_shilo: - return LowerDSPIntr(Op, DAG, MipsISD::SHILO, true, true); + return lowerDSPIntr(Op, DAG, MipsISD::SHILO, true, true); case Intrinsic::mips_dpau_h_qbl: - return LowerDSPIntr(Op, DAG, MipsISD::DPAU_H_QBL, true, true); + return lowerDSPIntr(Op, DAG, MipsISD::DPAU_H_QBL, true, true); case Intrinsic::mips_dpau_h_qbr: - return LowerDSPIntr(Op, DAG, MipsISD::DPAU_H_QBR, true, true); + return lowerDSPIntr(Op, DAG, MipsISD::DPAU_H_QBR, true, true); case Intrinsic::mips_dpsu_h_qbl: - return LowerDSPIntr(Op, DAG, MipsISD::DPSU_H_QBL, true, true); + return lowerDSPIntr(Op, DAG, MipsISD::DPSU_H_QBL, true, true); case Intrinsic::mips_dpsu_h_qbr: - return LowerDSPIntr(Op, DAG, MipsISD::DPSU_H_QBR, true, true); + return lowerDSPIntr(Op, DAG, MipsISD::DPSU_H_QBR, true, true); case Intrinsic::mips_dpa_w_ph: - return LowerDSPIntr(Op, DAG, MipsISD::DPA_W_PH, true, true); + return lowerDSPIntr(Op, DAG, MipsISD::DPA_W_PH, true, true); case Intrinsic::mips_dps_w_ph: - return LowerDSPIntr(Op, DAG, MipsISD::DPS_W_PH, true, true); + return lowerDSPIntr(Op, DAG, MipsISD::DPS_W_PH, true, true); case Intrinsic::mips_dpax_w_ph: - return LowerDSPIntr(Op, DAG, MipsISD::DPAX_W_PH, true, true); + return lowerDSPIntr(Op, DAG, MipsISD::DPAX_W_PH, true, true); case Intrinsic::mips_dpsx_w_ph: - return LowerDSPIntr(Op, DAG, MipsISD::DPSX_W_PH, true, true); + return lowerDSPIntr(Op, DAG, MipsISD::DPSX_W_PH, true, true); case Intrinsic::mips_mulsa_w_ph: - return LowerDSPIntr(Op, DAG, MipsISD::MULSA_W_PH, true, true); + return lowerDSPIntr(Op, DAG, MipsISD::MULSA_W_PH, true, true); case Intrinsic::mips_mult: - return LowerDSPIntr(Op, DAG, MipsISD::MULT, false, true); + return lowerDSPIntr(Op, DAG, MipsISD::MULT, false, true); case Intrinsic::mips_multu: - return LowerDSPIntr(Op, DAG, MipsISD::MULTU, false, true); + return lowerDSPIntr(Op, DAG, MipsISD::MULTU, false, true); case Intrinsic::mips_madd: - return LowerDSPIntr(Op, DAG, MipsISD::MADD_DSP, true, true); + return lowerDSPIntr(Op, DAG, MipsISD::MADD_DSP, true, true); case Intrinsic::mips_maddu: - return LowerDSPIntr(Op, DAG, MipsISD::MADDU_DSP, true, true); + return lowerDSPIntr(Op, DAG, MipsISD::MADDU_DSP, true, true); case Intrinsic::mips_msub: - return LowerDSPIntr(Op, DAG, MipsISD::MSUB_DSP, true, true); + return lowerDSPIntr(Op, DAG, MipsISD::MSUB_DSP, true, true); case Intrinsic::mips_msubu: - return LowerDSPIntr(Op, DAG, MipsISD::MSUBU_DSP, true, true); + return lowerDSPIntr(Op, DAG, MipsISD::MSUBU_DSP, true, true); } } -SDValue MipsTargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op, +SDValue MipsTargetLowering::lowerINTRINSIC_W_CHAIN(SDValue Op, SelectionDAG &DAG) const { switch (cast<ConstantSDNode>(Op->getOperand(1))->getZExtValue()) { default: return SDValue(); case Intrinsic::mips_extp: - return LowerDSPIntr(Op, DAG, MipsISD::EXTP, true, false); + return lowerDSPIntr(Op, DAG, MipsISD::EXTP, true, false); case Intrinsic::mips_extpdp: - return LowerDSPIntr(Op, DAG, MipsISD::EXTPDP, true, false); + return lowerDSPIntr(Op, DAG, MipsISD::EXTPDP, true, false); case Intrinsic::mips_extr_w: - return LowerDSPIntr(Op, DAG, MipsISD::EXTR_W, true, false); + return lowerDSPIntr(Op, DAG, MipsISD::EXTR_W, true, false); case Intrinsic::mips_extr_r_w: - return LowerDSPIntr(Op, DAG, MipsISD::EXTR_R_W, true, false); + return lowerDSPIntr(Op, DAG, MipsISD::EXTR_R_W, true, false); case Intrinsic::mips_extr_rs_w: - return LowerDSPIntr(Op, DAG, MipsISD::EXTR_RS_W, true, false); + return lowerDSPIntr(Op, DAG, MipsISD::EXTR_RS_W, true, false); case Intrinsic::mips_extr_s_h: - return LowerDSPIntr(Op, DAG, MipsISD::EXTR_S_H, true, false); + return lowerDSPIntr(Op, DAG, MipsISD::EXTR_S_H, true, false); case Intrinsic::mips_mthlip: - return LowerDSPIntr(Op, DAG, MipsISD::MTHLIP, true, true); + return lowerDSPIntr(Op, DAG, MipsISD::MTHLIP, true, true); case Intrinsic::mips_mulsaq_s_w_ph: - return LowerDSPIntr(Op, DAG, MipsISD::MULSAQ_S_W_PH, true, true); + return lowerDSPIntr(Op, DAG, MipsISD::MULSAQ_S_W_PH, true, true); case Intrinsic::mips_maq_s_w_phl: - return LowerDSPIntr(Op, DAG, MipsISD::MAQ_S_W_PHL, true, true); + return lowerDSPIntr(Op, DAG, MipsISD::MAQ_S_W_PHL, true, true); case Intrinsic::mips_maq_s_w_phr: - return LowerDSPIntr(Op, DAG, MipsISD::MAQ_S_W_PHR, true, true); + return lowerDSPIntr(Op, DAG, MipsISD::MAQ_S_W_PHR, true, true); case Intrinsic::mips_maq_sa_w_phl: - return LowerDSPIntr(Op, DAG, MipsISD::MAQ_SA_W_PHL, true, true); + return lowerDSPIntr(Op, DAG, MipsISD::MAQ_SA_W_PHL, true, true); case Intrinsic::mips_maq_sa_w_phr: - return LowerDSPIntr(Op, DAG, MipsISD::MAQ_SA_W_PHR, true, true); + return lowerDSPIntr(Op, DAG, MipsISD::MAQ_SA_W_PHR, true, true); case Intrinsic::mips_dpaq_s_w_ph: - return LowerDSPIntr(Op, DAG, MipsISD::DPAQ_S_W_PH, true, true); + return lowerDSPIntr(Op, DAG, MipsISD::DPAQ_S_W_PH, true, true); case Intrinsic::mips_dpsq_s_w_ph: - return LowerDSPIntr(Op, DAG, MipsISD::DPSQ_S_W_PH, true, true); + return lowerDSPIntr(Op, DAG, MipsISD::DPSQ_S_W_PH, true, true); case Intrinsic::mips_dpaq_sa_l_w: - return LowerDSPIntr(Op, DAG, MipsISD::DPAQ_SA_L_W, true, true); + return lowerDSPIntr(Op, DAG, MipsISD::DPAQ_SA_L_W, true, true); case Intrinsic::mips_dpsq_sa_l_w: - return LowerDSPIntr(Op, DAG, MipsISD::DPSQ_SA_L_W, true, true); + return lowerDSPIntr(Op, DAG, MipsISD::DPSQ_SA_L_W, true, true); case Intrinsic::mips_dpaqx_s_w_ph: - return LowerDSPIntr(Op, DAG, MipsISD::DPAQX_S_W_PH, true, true); + return lowerDSPIntr(Op, DAG, MipsISD::DPAQX_S_W_PH, true, true); case Intrinsic::mips_dpaqx_sa_w_ph: - return LowerDSPIntr(Op, DAG, MipsISD::DPAQX_SA_W_PH, true, true); + return lowerDSPIntr(Op, DAG, MipsISD::DPAQX_SA_W_PH, true, true); case Intrinsic::mips_dpsqx_s_w_ph: - return LowerDSPIntr(Op, DAG, MipsISD::DPSQX_S_W_PH, true, true); + return lowerDSPIntr(Op, DAG, MipsISD::DPSQX_S_W_PH, true, true); case Intrinsic::mips_dpsqx_sa_w_ph: - return LowerDSPIntr(Op, DAG, MipsISD::DPSQX_SA_W_PH, true, true); + return lowerDSPIntr(Op, DAG, MipsISD::DPSQX_SA_W_PH, true, true); } } -SDValue MipsTargetLowering::LowerADD(SDValue Op, SelectionDAG &DAG) const { +SDValue MipsTargetLowering::lowerADD(SDValue Op, SelectionDAG &DAG) const { if (Op->getOperand(0).getOpcode() != ISD::FRAMEADDR || cast<ConstantSDNode> (Op->getOperand(0).getOperand(0))->getZExtValue() != 0 @@ -3119,28 +2515,6 @@ static unsigned getNextIntArgReg(unsigned Reg) { return (Reg == Mips::A0) ? Mips::A1 : Mips::A3; } -/// IsEligibleForTailCallOptimization - Check whether the call is eligible -/// for tail call optimization. -bool MipsTargetLowering:: -IsEligibleForTailCallOptimization(const MipsCC &MipsCCInfo, - unsigned NextStackOffset, - const MipsFunctionInfo& FI) const { - if (!EnableMipsTailCalls) - return false; - - // No tail call optimization for mips16. - if (Subtarget->inMips16Mode()) - return false; - - // Return false if either the callee or caller has a byval argument. - if (MipsCCInfo.hasByValArg() || FI.hasByvalArg()) - return false; - - // Return true if the callee's argument area is no larger than the - // caller's. - return NextStackOffset <= FI.getIncomingArgSize(); -} - SDValue MipsTargetLowering::passArgOnStack(SDValue StackPtr, unsigned Offset, SDValue Chain, SDValue Arg, DebugLoc DL, @@ -3159,161 +2533,48 @@ MipsTargetLowering::passArgOnStack(SDValue StackPtr, unsigned Offset, /*isVolatile=*/ true, false, 0); } -// -// The Mips16 hard float is a crazy quilt inherited from gcc. I have a much -// cleaner way to do all of this but it will have to wait until the traditional -// gcc mechanism is completed. -// -// For Pic, in order for Mips16 code to call Mips32 code which according the abi -// have either arguments or returned values placed in floating point registers, -// we use a set of helper functions. (This includes functions which return type -// complex which on Mips are returned in a pair of floating point registers). -// -// This is an encoding that we inherited from gcc. -// In Mips traditional O32, N32 ABI, floating point numbers are passed in -// floating point argument registers 1,2 only when the first and optionally -// the second arguments are float (sf) or double (df). -// For Mips16 we are only concerned with the situations where floating point -// arguments are being passed in floating point registers by the ABI, because -// Mips16 mode code cannot execute floating point instructions to load those -// values and hence helper functions are needed. -// The possibilities are (), (sf), (sf, sf), (sf, df), (df), (df, sf), (df, df) -// the helper function suffixs for these are: -// 0, 1, 5, 9, 2, 6, 10 -// this suffix can then be calculated as follows: -// for a given argument Arg: -// Arg1x, Arg2x = 1 : Arg is sf -// 2 : Arg is df -// 0: Arg is neither sf or df -// So this stub is the string for number Arg1x + Arg2x*4. -// However not all numbers between 0 and 10 are possible, we check anyway and -// assert if the impossible exists. -// - -unsigned int MipsTargetLowering::getMips16HelperFunctionStubNumber - (ArgListTy &Args) const { - unsigned int resultNum = 0; - if (Args.size() >= 1) { - Type *t = Args[0].Ty; - if (t->isFloatTy()) { - resultNum = 1; - } - else if (t->isDoubleTy()) { - resultNum = 2; - } - } - if (resultNum) { - if (Args.size() >=2) { - Type *t = Args[1].Ty; - if (t->isFloatTy()) { - resultNum += 4; - } - else if (t->isDoubleTy()) { - resultNum += 8; - } - } +void MipsTargetLowering:: +getOpndList(SmallVectorImpl<SDValue> &Ops, + std::deque< std::pair<unsigned, SDValue> > &RegsToPass, + bool IsPICCall, bool GlobalOrExternal, bool InternalLinkage, + CallLoweringInfo &CLI, SDValue Callee, SDValue Chain) const { + // Insert node "GP copy globalreg" before call to function. + // + // R_MIPS_CALL* operators (emitted when non-internal functions are called + // in PIC mode) allow symbols to be resolved via lazy binding. + // The lazy binding stub requires GP to point to the GOT. + if (IsPICCall && !InternalLinkage) { + unsigned GPReg = IsN64 ? Mips::GP_64 : Mips::GP; + EVT Ty = IsN64 ? MVT::i64 : MVT::i32; + RegsToPass.push_back(std::make_pair(GPReg, getGlobalReg(CLI.DAG, Ty))); } - return resultNum; -} -// -// prefixs are attached to stub numbers depending on the return type . -// return type: float sf_ -// double df_ -// single complex sc_ -// double complext dc_ -// others NO PREFIX -// -// -// The full name of a helper function is__mips16_call_stub + -// return type dependent prefix + stub number -// -// -// This is something that probably should be in a different source file and -// perhaps done differently but my main purpose is to not waste runtime -// on something that we can enumerate in the source. Another possibility is -// to have a python script to generate these mapping tables. This will do -// for now. There are a whole series of helper function mapping arrays, one -// for each return type class as outlined above. There there are 11 possible -// entries. Ones with 0 are ones which should never be selected -// -// All the arrays are similar except for ones which return neither -// sf, df, sc, dc, in which only care about ones which have sf or df as a -// first parameter. -// -#define P_ "__mips16_call_stub_" -#define MAX_STUB_NUMBER 10 -#define T1 P "1", P "2", 0, 0, P "5", P "6", 0, 0, P "9", P "10" -#define T P "0" , T1 -#define P P_ -static char const * vMips16Helper[MAX_STUB_NUMBER+1] = - {0, T1 }; -#undef P -#define P P_ "sf_" -static char const * sfMips16Helper[MAX_STUB_NUMBER+1] = - { T }; -#undef P -#define P P_ "df_" -static char const * dfMips16Helper[MAX_STUB_NUMBER+1] = - { T }; -#undef P -#define P P_ "sc_" -static char const * scMips16Helper[MAX_STUB_NUMBER+1] = - { T }; -#undef P -#define P P_ "dc_" -static char const * dcMips16Helper[MAX_STUB_NUMBER+1] = - { T }; -#undef P -#undef P_ - - -const char* MipsTargetLowering:: - getMips16HelperFunction - (Type* RetTy, ArgListTy &Args, bool &needHelper) const { - const unsigned int stubNum = getMips16HelperFunctionStubNumber(Args); -#ifndef NDEBUG - const unsigned int maxStubNum = 10; - assert(stubNum <= maxStubNum); - const bool validStubNum[maxStubNum+1] = - {true, true, true, false, false, true, true, false, false, true, true}; - assert(validStubNum[stubNum]); -#endif - const char *result; - if (RetTy->isFloatTy()) { - result = sfMips16Helper[stubNum]; - } - else if (RetTy ->isDoubleTy()) { - result = dfMips16Helper[stubNum]; - } - else if (RetTy->isStructTy()) { - // check if it's complex - if (RetTy->getNumContainedTypes() == 2) { - if ((RetTy->getContainedType(0)->isFloatTy()) && - (RetTy->getContainedType(1)->isFloatTy())) { - result = scMips16Helper[stubNum]; - } - else if ((RetTy->getContainedType(0)->isDoubleTy()) && - (RetTy->getContainedType(1)->isDoubleTy())) { - result = dcMips16Helper[stubNum]; - } - else { - llvm_unreachable("Uncovered condition"); - } - } - else { - llvm_unreachable("Uncovered condition"); - } - } - else { - if (stubNum == 0) { - needHelper = false; - return ""; - } - result = vMips16Helper[stubNum]; + // Build a sequence of copy-to-reg nodes chained together with token + // chain and flag operands which copy the outgoing args into registers. + // The InFlag in necessary since all emitted instructions must be + // stuck together. + SDValue InFlag; + + for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) { + Chain = CLI.DAG.getCopyToReg(Chain, CLI.DL, RegsToPass[i].first, + RegsToPass[i].second, InFlag); + InFlag = Chain.getValue(1); } - needHelper = true; - return result; + + // Add argument registers to the end of the list so that they are + // known live into the call. + for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) + Ops.push_back(CLI.DAG.getRegister(RegsToPass[i].first, + RegsToPass[i].second.getValueType())); + + // Add a register mask operand representing the call-preserved registers. + const TargetRegisterInfo *TRI = getTargetMachine().getRegisterInfo(); + const uint32_t *Mask = TRI->getCallPreservedMask(CLI.CallConv); + assert(Mask && "Missing call preserved mask for calling convention"); + Ops.push_back(CLI.DAG.getRegisterMask(Mask)); + + if (InFlag.getNode()) + Ops.push_back(InFlag); } /// LowerCall - functions arguments are copied from virtual regs to @@ -3322,36 +2583,16 @@ SDValue MipsTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, SmallVectorImpl<SDValue> &InVals) const { SelectionDAG &DAG = CLI.DAG; - DebugLoc &dl = CLI.DL; + DebugLoc &DL = CLI.DL; SmallVector<ISD::OutputArg, 32> &Outs = CLI.Outs; SmallVector<SDValue, 32> &OutVals = CLI.OutVals; SmallVector<ISD::InputArg, 32> &Ins = CLI.Ins; SDValue Chain = CLI.Chain; SDValue Callee = CLI.Callee; - bool &isTailCall = CLI.IsTailCall; + bool &IsTailCall = CLI.IsTailCall; CallingConv::ID CallConv = CLI.CallConv; - bool isVarArg = CLI.IsVarArg; - - const char* mips16HelperFunction = 0; - bool needMips16Helper = false; - - if (Subtarget->inMips16Mode() && getTargetMachine().Options.UseSoftFloat && - Mips16HardFloat) { - // - // currently we don't have symbols tagged with the mips16 or mips32 - // qualifier so we will assume that we don't know what kind it is. - // and generate the helper - // - bool lookupHelper = true; - if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) { - if (noHelperNeeded.find(S->getSymbol()) != noHelperNeeded.end()) { - lookupHelper = false; - } - } - if (lookupHelper) mips16HelperFunction = - getMips16HelperFunction(CLI.RetTy, CLI.Args, needMips16Helper); + bool IsVarArg = CLI.IsVarArg; - } MachineFunction &MF = DAG.getMachineFunction(); MachineFrameInfo *MFI = MF.getFrameInfo(); const TargetFrameLowering *TFL = MF.getTarget().getFrameLowering(); @@ -3359,22 +2600,24 @@ MipsTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, // Analyze operands of the call, assigning locations to each operand. SmallVector<CCValAssign, 16> ArgLocs; - CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), + CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), getTargetMachine(), ArgLocs, *DAG.getContext()); MipsCC MipsCCInfo(CallConv, IsO32, CCInfo); - MipsCCInfo.analyzeCallOperands(Outs, isVarArg); + MipsCCInfo.analyzeCallOperands(Outs, IsVarArg, + getTargetMachine().Options.UseSoftFloat, + Callee.getNode(), CLI.Args); // Get a count of how many bytes are to be pushed on the stack. unsigned NextStackOffset = CCInfo.getNextStackOffset(); // Check if it's really possible to do a tail call. - if (isTailCall) - isTailCall = - IsEligibleForTailCallOptimization(MipsCCInfo, NextStackOffset, + if (IsTailCall) + IsTailCall = + isEligibleForTailCallOptimization(MipsCCInfo, NextStackOffset, *MF.getInfo<MipsFunctionInfo>()); - if (isTailCall) + if (IsTailCall) ++NumTailCalls; // Chain is the output chain of the last Load/Store or CopyToReg node. @@ -3384,10 +2627,10 @@ MipsTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, NextStackOffset = RoundUpToAlignment(NextStackOffset, StackAlignment); SDValue NextStackOffsetVal = DAG.getIntPtrConstant(NextStackOffset, true); - if (!isTailCall) + if (!IsTailCall) Chain = DAG.getCALLSEQ_START(Chain, NextStackOffsetVal); - SDValue StackPtr = DAG.getCopyFromReg(Chain, dl, + SDValue StackPtr = DAG.getCopyFromReg(Chain, DL, IsN64 ? Mips::SP_64 : Mips::SP, getPointerTy()); @@ -3408,9 +2651,9 @@ MipsTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, assert(Flags.getByValSize() && "ByVal args of size 0 should have been ignored by front-end."); assert(ByValArg != MipsCCInfo.byval_end()); - assert(!isTailCall && + assert(!IsTailCall && "Do not tail-call optimize if there is a byval argument."); - passByValArg(Chain, dl, RegsToPass, MemOpChains, StackPtr, MFI, DAG, Arg, + passByValArg(Chain, DL, RegsToPass, MemOpChains, StackPtr, MFI, DAG, Arg, MipsCCInfo, *ByValArg, Flags, Subtarget->isLittle()); ++ByValArg; continue; @@ -3422,12 +2665,13 @@ MipsTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, case CCValAssign::Full: if (VA.isRegLoc()) { if ((ValVT == MVT::f32 && LocVT == MVT::i32) || - (ValVT == MVT::f64 && LocVT == MVT::i64)) - Arg = DAG.getNode(ISD::BITCAST, dl, LocVT, Arg); + (ValVT == MVT::f64 && LocVT == MVT::i64) || + (ValVT == MVT::i64 && LocVT == MVT::f64)) + Arg = DAG.getNode(ISD::BITCAST, DL, LocVT, Arg); else if (ValVT == MVT::f64 && LocVT == MVT::i32) { - SDValue Lo = DAG.getNode(MipsISD::ExtractElementF64, dl, MVT::i32, + SDValue Lo = DAG.getNode(MipsISD::ExtractElementF64, DL, MVT::i32, Arg, DAG.getConstant(0, MVT::i32)); - SDValue Hi = DAG.getNode(MipsISD::ExtractElementF64, dl, MVT::i32, + SDValue Hi = DAG.getNode(MipsISD::ExtractElementF64, DL, MVT::i32, Arg, DAG.getConstant(1, MVT::i32)); if (!Subtarget->isLittle()) std::swap(Lo, Hi); @@ -3440,13 +2684,13 @@ MipsTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, } break; case CCValAssign::SExt: - Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, LocVT, Arg); + Arg = DAG.getNode(ISD::SIGN_EXTEND, DL, LocVT, Arg); break; case CCValAssign::ZExt: - Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, LocVT, Arg); + Arg = DAG.getNode(ISD::ZERO_EXTEND, DL, LocVT, Arg); break; case CCValAssign::AExt: - Arg = DAG.getNode(ISD::ANY_EXTEND, dl, LocVT, Arg); + Arg = DAG.getNode(ISD::ANY_EXTEND, DL, LocVT, Arg); break; } @@ -3463,13 +2707,13 @@ MipsTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, // emit ISD::STORE whichs stores the // parameter value to a stack Location MemOpChains.push_back(passArgOnStack(StackPtr, VA.getLocMemOffset(), - Chain, Arg, dl, isTailCall, DAG)); + Chain, Arg, DL, IsTailCall, DAG)); } // Transform all store nodes into one single node because all store // nodes are independent of each other. if (!MemOpChains.empty()) - Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, + Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, &MemOpChains[0], MemOpChains.size()); // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every @@ -3491,7 +2735,7 @@ MipsTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, else Callee = getAddrGlobal(Callee, DAG, MipsII::MO_GOT_CALL); } else - Callee = DAG.getTargetGlobalAddress(G->getGlobal(), dl, getPointerTy(), 0, + Callee = DAG.getTargetGlobalAddress(G->getGlobal(), DL, getPointerTy(), 0, MipsII::MO_NO_FLAG); GlobalOrExternal = true; } @@ -3508,80 +2752,17 @@ MipsTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, GlobalOrExternal = true; } - SDValue JumpTarget = Callee; - - // T9 should contain the address of the callee function if - // -reloction-model=pic or it is an indirect call. - if (IsPICCall || !GlobalOrExternal) { - unsigned T9Reg = IsN64 ? Mips::T9_64 : Mips::T9; - unsigned V0Reg = Mips::V0; - if (needMips16Helper) { - RegsToPass.push_front(std::make_pair(V0Reg, Callee)); - JumpTarget = DAG.getExternalSymbol( - mips16HelperFunction, getPointerTy()); - JumpTarget = getAddrGlobal(JumpTarget, DAG, MipsII::MO_GOT); - } - else { - RegsToPass.push_front(std::make_pair(T9Reg, Callee)); - - if (!Subtarget->inMips16Mode()) - JumpTarget = SDValue(); - } - } - - // Insert node "GP copy globalreg" before call to function. - // - // R_MIPS_CALL* operators (emitted when non-internal functions are called - // in PIC mode) allow symbols to be resolved via lazy binding. - // The lazy binding stub requires GP to point to the GOT. - if (IsPICCall && !InternalLinkage) { - unsigned GPReg = IsN64 ? Mips::GP_64 : Mips::GP; - EVT Ty = IsN64 ? MVT::i64 : MVT::i32; - RegsToPass.push_back(std::make_pair(GPReg, GetGlobalReg(DAG, Ty))); - } - - // Build a sequence of copy-to-reg nodes chained together with token - // chain and flag operands which copy the outgoing args into registers. - // The InFlag in necessary since all emitted instructions must be - // stuck together. - SDValue InFlag; - - for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) { - Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first, - RegsToPass[i].second, InFlag); - InFlag = Chain.getValue(1); - } - - // MipsJmpLink = #chain, #target_address, #opt_in_flags... - // = Chain, Callee, Reg#1, Reg#2, ... - // - // Returns a chain & a flag for retval copy to use. - SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); SmallVector<SDValue, 8> Ops(1, Chain); + SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); - if (JumpTarget.getNode()) - Ops.push_back(JumpTarget); - - // Add argument registers to the end of the list so that they are - // known live into the call. - for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) - Ops.push_back(DAG.getRegister(RegsToPass[i].first, - RegsToPass[i].second.getValueType())); - - // Add a register mask operand representing the call-preserved registers. - const TargetRegisterInfo *TRI = getTargetMachine().getRegisterInfo(); - const uint32_t *Mask = TRI->getCallPreservedMask(CallConv); - assert(Mask && "Missing call preserved mask for calling convention"); - Ops.push_back(DAG.getRegisterMask(Mask)); + getOpndList(Ops, RegsToPass, IsPICCall, GlobalOrExternal, InternalLinkage, + CLI, Callee, Chain); - if (InFlag.getNode()) - Ops.push_back(InFlag); + if (IsTailCall) + return DAG.getNode(MipsISD::TailCall, DL, MVT::Other, &Ops[0], Ops.size()); - if (isTailCall) - return DAG.getNode(MipsISD::TailCall, dl, MVT::Other, &Ops[0], Ops.size()); - - Chain = DAG.getNode(MipsISD::JmpLink, dl, NodeTys, &Ops[0], Ops.size()); - InFlag = Chain.getValue(1); + Chain = DAG.getNode(MipsISD::JmpLink, DL, NodeTys, &Ops[0], Ops.size()); + SDValue InFlag = Chain.getValue(1); // Create the CALLSEQ_END node. Chain = DAG.getCALLSEQ_END(Chain, NextStackOffsetVal, @@ -3590,31 +2771,40 @@ MipsTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, // Handle result values, copying them out of physregs into vregs that we // return. - return LowerCallResult(Chain, InFlag, CallConv, isVarArg, - Ins, dl, DAG, InVals); + return LowerCallResult(Chain, InFlag, CallConv, IsVarArg, + Ins, DL, DAG, InVals, CLI.Callee.getNode(), CLI.RetTy); } /// LowerCallResult - Lower the result values of a call into the /// appropriate copies out of appropriate physical registers. SDValue MipsTargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag, - CallingConv::ID CallConv, bool isVarArg, + CallingConv::ID CallConv, bool IsVarArg, const SmallVectorImpl<ISD::InputArg> &Ins, - DebugLoc dl, SelectionDAG &DAG, - SmallVectorImpl<SDValue> &InVals) const { + DebugLoc DL, SelectionDAG &DAG, + SmallVectorImpl<SDValue> &InVals, + const SDNode *CallNode, + const Type *RetTy) const { // Assign locations to each value returned by this call. SmallVector<CCValAssign, 16> RVLocs; - CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), + CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), getTargetMachine(), RVLocs, *DAG.getContext()); + MipsCC MipsCCInfo(CallConv, IsO32, CCInfo); - CCInfo.AnalyzeCallResult(Ins, RetCC_Mips); + MipsCCInfo.analyzeCallResult(Ins, getTargetMachine().Options.UseSoftFloat, + CallNode, RetTy); // Copy all of the result registers out of their specified physreg. for (unsigned i = 0; i != RVLocs.size(); ++i) { - Chain = DAG.getCopyFromReg(Chain, dl, RVLocs[i].getLocReg(), - RVLocs[i].getValVT(), InFlag).getValue(1); - InFlag = Chain.getValue(2); - InVals.push_back(Chain.getValue(0)); + SDValue Val = DAG.getCopyFromReg(Chain, DL, RVLocs[i].getLocReg(), + RVLocs[i].getLocVT(), InFlag); + Chain = Val.getValue(1); + InFlag = Val.getValue(2); + + if (RVLocs[i].getValVT() != RVLocs[i].getLocVT()) + Val = DAG.getNode(ISD::BITCAST, DL, RVLocs[i].getValVT(), Val); + + InVals.push_back(Val); } return Chain; @@ -3628,9 +2818,9 @@ MipsTargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag, SDValue MipsTargetLowering::LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, - bool isVarArg, + bool IsVarArg, const SmallVectorImpl<ISD::InputArg> &Ins, - DebugLoc dl, SelectionDAG &DAG, + DebugLoc DL, SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const { MachineFunction &MF = DAG.getMachineFunction(); @@ -3644,16 +2834,17 @@ MipsTargetLowering::LowerFormalArguments(SDValue Chain, // Assign locations to all of the incoming arguments. SmallVector<CCValAssign, 16> ArgLocs; - CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), + CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), getTargetMachine(), ArgLocs, *DAG.getContext()); MipsCC MipsCCInfo(CallConv, IsO32, CCInfo); + Function::const_arg_iterator FuncArg = + DAG.getMachineFunction().getFunction()->arg_begin(); + bool UseSoftFloat = getTargetMachine().Options.UseSoftFloat; - MipsCCInfo.analyzeFormalArguments(Ins); + MipsCCInfo.analyzeFormalArguments(Ins, UseSoftFloat, FuncArg); MipsFI->setFormalArgInfo(CCInfo.getNextStackOffset(), MipsCCInfo.hasByValArg()); - Function::const_arg_iterator FuncArg = - DAG.getMachineFunction().getFunction()->arg_begin(); unsigned CurArgIdx = 0; MipsCC::byval_iterator ByValArg = MipsCCInfo.byval_begin(); @@ -3669,7 +2860,7 @@ MipsTargetLowering::LowerFormalArguments(SDValue Chain, assert(Flags.getByValSize() && "ByVal args of size 0 should have been ignored by front-end."); assert(ByValArg != MipsCCInfo.byval_end()); - copyByValRegs(Chain, dl, OutChains, DAG, Flags, InVals, &*FuncArg, + copyByValRegs(Chain, DL, OutChains, DAG, Flags, InVals, &*FuncArg, MipsCCInfo, *ByValArg); ++ByValArg; continue; @@ -3695,8 +2886,8 @@ MipsTargetLowering::LowerFormalArguments(SDValue Chain, // Transform the arguments stored on // physical registers into virtual ones - unsigned Reg = AddLiveIn(DAG.getMachineFunction(), ArgReg, RC); - SDValue ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, RegVT); + unsigned Reg = addLiveIn(DAG.getMachineFunction(), ArgReg, RC); + SDValue ArgValue = DAG.getCopyFromReg(Chain, DL, Reg, RegVT); // If this is an 8 or 16-bit value, it has been passed promoted // to 32 bits. Insert an assert[sz]ext to capture this, then @@ -3708,22 +2899,24 @@ MipsTargetLowering::LowerFormalArguments(SDValue Chain, else if (VA.getLocInfo() == CCValAssign::ZExt) Opcode = ISD::AssertZext; if (Opcode) - ArgValue = DAG.getNode(Opcode, dl, RegVT, ArgValue, + ArgValue = DAG.getNode(Opcode, DL, RegVT, ArgValue, DAG.getValueType(ValVT)); - ArgValue = DAG.getNode(ISD::TRUNCATE, dl, ValVT, ArgValue); + ArgValue = DAG.getNode(ISD::TRUNCATE, DL, ValVT, ArgValue); } - // Handle floating point arguments passed in integer registers. + // Handle floating point arguments passed in integer registers and + // long double arguments passed in floating point registers. if ((RegVT == MVT::i32 && ValVT == MVT::f32) || - (RegVT == MVT::i64 && ValVT == MVT::f64)) - ArgValue = DAG.getNode(ISD::BITCAST, dl, ValVT, ArgValue); + (RegVT == MVT::i64 && ValVT == MVT::f64) || + (RegVT == MVT::f64 && ValVT == MVT::i64)) + ArgValue = DAG.getNode(ISD::BITCAST, DL, ValVT, ArgValue); else if (IsO32 && RegVT == MVT::i32 && ValVT == MVT::f64) { - unsigned Reg2 = AddLiveIn(DAG.getMachineFunction(), + unsigned Reg2 = addLiveIn(DAG.getMachineFunction(), getNextIntArgReg(ArgReg), RC); - SDValue ArgValue2 = DAG.getCopyFromReg(Chain, dl, Reg2, RegVT); + SDValue ArgValue2 = DAG.getCopyFromReg(Chain, DL, Reg2, RegVT); if (!Subtarget->isLittle()) std::swap(ArgValue, ArgValue2); - ArgValue = DAG.getNode(MipsISD::BuildPairF64, dl, MVT::f64, + ArgValue = DAG.getNode(MipsISD::BuildPairF64, DL, MVT::f64, ArgValue, ArgValue2); } @@ -3739,7 +2932,7 @@ MipsTargetLowering::LowerFormalArguments(SDValue Chain, // Create load nodes to retrieve arguments from the stack SDValue FIN = DAG.getFrameIndex(FI, getPointerTy()); - InVals.push_back(DAG.getLoad(ValVT, dl, Chain, FIN, + InVals.push_back(DAG.getLoad(ValVT, DL, Chain, FIN, MachinePointerInfo::getFixedStack(FI), false, false, false, 0)); } @@ -3755,18 +2948,18 @@ MipsTargetLowering::LowerFormalArguments(SDValue Chain, createVirtualRegister(getRegClassFor(IsN64 ? MVT::i64 : MVT::i32)); MipsFI->setSRetReturnReg(Reg); } - SDValue Copy = DAG.getCopyToReg(DAG.getEntryNode(), dl, Reg, InVals[0]); - Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Copy, Chain); + SDValue Copy = DAG.getCopyToReg(DAG.getEntryNode(), DL, Reg, InVals[0]); + Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Copy, Chain); } - if (isVarArg) - writeVarArgRegs(OutChains, MipsCCInfo, Chain, dl, DAG); + if (IsVarArg) + writeVarArgRegs(OutChains, MipsCCInfo, Chain, DL, DAG); // All stores are grouped in one node to allow the matching between // the size of Ins and InVals. This only happens when on varg functions if (!OutChains.empty()) { OutChains.push_back(Chain); - Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, + Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, &OutChains[0], OutChains.size()); } @@ -3779,42 +2972,48 @@ MipsTargetLowering::LowerFormalArguments(SDValue Chain, bool MipsTargetLowering::CanLowerReturn(CallingConv::ID CallConv, - MachineFunction &MF, bool isVarArg, + MachineFunction &MF, bool IsVarArg, const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext &Context) const { SmallVector<CCValAssign, 16> RVLocs; - CCState CCInfo(CallConv, isVarArg, MF, getTargetMachine(), + CCState CCInfo(CallConv, IsVarArg, MF, getTargetMachine(), RVLocs, Context); return CCInfo.CheckReturn(Outs, RetCC_Mips); } SDValue MipsTargetLowering::LowerReturn(SDValue Chain, - CallingConv::ID CallConv, bool isVarArg, + CallingConv::ID CallConv, bool IsVarArg, const SmallVectorImpl<ISD::OutputArg> &Outs, const SmallVectorImpl<SDValue> &OutVals, - DebugLoc dl, SelectionDAG &DAG) const { - + DebugLoc DL, SelectionDAG &DAG) const { // CCValAssign - represent the assignment of // the return value to a location SmallVector<CCValAssign, 16> RVLocs; + MachineFunction &MF = DAG.getMachineFunction(); // CCState - Info about the registers and stack slot. - CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), - getTargetMachine(), RVLocs, *DAG.getContext()); + CCState CCInfo(CallConv, IsVarArg, MF, getTargetMachine(), RVLocs, + *DAG.getContext()); + MipsCC MipsCCInfo(CallConv, IsO32, CCInfo); - // Analize return values. - CCInfo.AnalyzeReturn(Outs, RetCC_Mips); + // Analyze return values. + MipsCCInfo.analyzeReturn(Outs, getTargetMachine().Options.UseSoftFloat, + MF.getFunction()->getReturnType()); SDValue Flag; SmallVector<SDValue, 4> RetOps(1, Chain); // Copy the result values into the output registers. for (unsigned i = 0; i != RVLocs.size(); ++i) { + SDValue Val = OutVals[i]; CCValAssign &VA = RVLocs[i]; assert(VA.isRegLoc() && "Can only return in registers!"); - Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), OutVals[i], Flag); + if (RVLocs[i].getValVT() != RVLocs[i].getLocVT()) + Val = DAG.getNode(ISD::BITCAST, DL, RVLocs[i].getLocVT(), Val); + + Chain = DAG.getCopyToReg(Chain, DL, VA.getLocReg(), Val, Flag); // Guarantee that all emitted copies are stuck together with flags. Flag = Chain.getValue(1); @@ -3825,17 +3024,16 @@ MipsTargetLowering::LowerReturn(SDValue Chain, // the sret argument into $v0 for the return. We saved the argument into // a virtual register in the entry block, so now we copy the value out // and into $v0. - if (DAG.getMachineFunction().getFunction()->hasStructRetAttr()) { - MachineFunction &MF = DAG.getMachineFunction(); + if (MF.getFunction()->hasStructRetAttr()) { MipsFunctionInfo *MipsFI = MF.getInfo<MipsFunctionInfo>(); unsigned Reg = MipsFI->getSRetReturnReg(); if (!Reg) llvm_unreachable("sret virtual register not created in the entry block"); - SDValue Val = DAG.getCopyFromReg(Chain, dl, Reg, getPointerTy()); + SDValue Val = DAG.getCopyFromReg(Chain, DL, Reg, getPointerTy()); unsigned V0 = IsN64 ? Mips::V0_64 : Mips::V0; - Chain = DAG.getCopyToReg(Chain, dl, V0, Val, Flag); + Chain = DAG.getCopyToReg(Chain, DL, V0, Val, Flag); Flag = Chain.getValue(1); RetOps.push_back(DAG.getRegister(V0, getPointerTy())); } @@ -3847,7 +3045,7 @@ MipsTargetLowering::LowerReturn(SDValue Chain, RetOps.push_back(Flag); // Return on Mips is always a "jr $ra" - return DAG.getNode(MipsISD::Ret, dl, MVT::Other, &RetOps[0], RetOps.size()); + return DAG.getNode(MipsISD::Ret, DL, MVT::Other, &RetOps[0], RetOps.size()); } //===----------------------------------------------------------------------===// @@ -3880,6 +3078,8 @@ getConstraintType(const std::string &Constraint) const case 'l': case 'x': return C_RegisterClass; + case 'R': + return C_Memory; } } return TargetLowering::getConstraintType(Constraint); @@ -3928,6 +3128,9 @@ MipsTargetLowering::getSingleConstraintMatchWeight( if (isa<ConstantInt>(CallOperandVal)) weight = CW_Constant; break; + case 'R': + weight = CW_Memory; + break; } return weight; } @@ -4128,6 +3331,46 @@ unsigned MipsTargetLowering::getJumpTableEncoding() const { return TargetLowering::getJumpTableEncoding(); } +/// This function returns true if CallSym is a long double emulation routine. +static bool isF128SoftLibCall(const char *CallSym) { + const char *const LibCalls[] = + {"__addtf3", "__divtf3", "__eqtf2", "__extenddftf2", "__extendsftf2", + "__fixtfdi", "__fixtfsi", "__fixtfti", "__fixunstfdi", "__fixunstfsi", + "__fixunstfti", "__floatditf", "__floatsitf", "__floattitf", + "__floatunditf", "__floatunsitf", "__floatuntitf", "__getf2", "__gttf2", + "__letf2", "__lttf2", "__multf3", "__netf2", "__powitf2", "__subtf3", + "__trunctfdf2", "__trunctfsf2", "__unordtf2", + "ceill", "copysignl", "cosl", "exp2l", "expl", "floorl", "fmal", "fmodl", + "log10l", "log2l", "logl", "nearbyintl", "powl", "rintl", "sinl", "sqrtl", + "truncl"}; + + const char * const *End = LibCalls + array_lengthof(LibCalls); + + // Check that LibCalls is sorted alphabetically. + MipsTargetLowering::LTStr Comp; + +#ifndef NDEBUG + for (const char * const *I = LibCalls; I < End - 1; ++I) + assert(Comp(*I, *(I + 1))); +#endif + + return std::binary_search(LibCalls, End, CallSym, Comp); +} + +/// This function returns true if Ty is fp128 or i128 which was originally a +/// fp128. +static bool originalTypeIsF128(const Type *Ty, const SDNode *CallNode) { + if (Ty->isFP128Ty()) + return true; + + const ExternalSymbolSDNode *ES = + dyn_cast_or_null<const ExternalSymbolSDNode>(CallNode); + + // If the Ty is i128 and the function being called is a long double emulation + // routine, then the original type is f128. + return (ES && Ty->isIntegerTy(128) && isF128SoftLibCall(ES->getSymbol())); +} + MipsTargetLowering::MipsCC::MipsCC(CallingConv::ID CC, bool IsO32_, CCState &Info) : CCInfo(Info), CallConv(CC), IsO32(IsO32_) { @@ -4137,7 +3380,8 @@ MipsTargetLowering::MipsCC::MipsCC(CallingConv::ID CC, bool IsO32_, void MipsTargetLowering::MipsCC:: analyzeCallOperands(const SmallVectorImpl<ISD::OutputArg> &Args, - bool IsVarArg) { + bool IsVarArg, bool IsSoftFloat, const SDNode *CallNode, + std::vector<ArgListEntry> &FuncArgs) { assert((CallConv != CallingConv::Fast || !IsVarArg) && "CallingConv::Fast shouldn't be used for vararg functions."); @@ -4156,8 +3400,11 @@ analyzeCallOperands(const SmallVectorImpl<ISD::OutputArg> &Args, if (IsVarArg && !Args[I].IsFixed) R = VarFn(I, ArgVT, ArgVT, CCValAssign::Full, ArgFlags, CCInfo); - else - R = FixedFn(I, ArgVT, ArgVT, CCValAssign::Full, ArgFlags, CCInfo); + else { + MVT RegVT = getRegVT(ArgVT, FuncArgs[Args[I].OrigArgIndex].Ty, CallNode, + IsSoftFloat); + R = FixedFn(I, ArgVT, RegVT, CCValAssign::Full, ArgFlags, CCInfo); + } if (R) { #ifndef NDEBUG @@ -4170,20 +3417,26 @@ analyzeCallOperands(const SmallVectorImpl<ISD::OutputArg> &Args, } void MipsTargetLowering::MipsCC:: -analyzeFormalArguments(const SmallVectorImpl<ISD::InputArg> &Args) { +analyzeFormalArguments(const SmallVectorImpl<ISD::InputArg> &Args, + bool IsSoftFloat, Function::const_arg_iterator FuncArg) { unsigned NumArgs = Args.size(); llvm::CCAssignFn *FixedFn = fixedArgFn(); + unsigned CurArgIdx = 0; for (unsigned I = 0; I != NumArgs; ++I) { MVT ArgVT = Args[I].VT; ISD::ArgFlagsTy ArgFlags = Args[I].Flags; + std::advance(FuncArg, Args[I].OrigArgIndex - CurArgIdx); + CurArgIdx = Args[I].OrigArgIndex; if (ArgFlags.isByVal()) { handleByValArg(I, ArgVT, ArgVT, CCValAssign::Full, ArgFlags); continue; } - if (!FixedFn(I, ArgVT, ArgVT, CCValAssign::Full, ArgFlags, CCInfo)) + MVT RegVT = getRegVT(ArgVT, FuncArg->getType(), 0, IsSoftFloat); + + if (!FixedFn(I, ArgVT, RegVT, CCValAssign::Full, ArgFlags, CCInfo)) continue; #ifndef NDEBUG @@ -4194,6 +3447,44 @@ analyzeFormalArguments(const SmallVectorImpl<ISD::InputArg> &Args) { } } +template<typename Ty> +void MipsTargetLowering::MipsCC:: +analyzeReturn(const SmallVectorImpl<Ty> &RetVals, bool IsSoftFloat, + const SDNode *CallNode, const Type *RetTy) const { + CCAssignFn *Fn; + + if (IsSoftFloat && originalTypeIsF128(RetTy, CallNode)) + Fn = RetCC_F128Soft; + else + Fn = RetCC_Mips; + + for (unsigned I = 0, E = RetVals.size(); I < E; ++I) { + MVT VT = RetVals[I].VT; + ISD::ArgFlagsTy Flags = RetVals[I].Flags; + MVT RegVT = this->getRegVT(VT, RetTy, CallNode, IsSoftFloat); + + if (Fn(I, VT, RegVT, CCValAssign::Full, Flags, this->CCInfo)) { +#ifndef NDEBUG + dbgs() << "Call result #" << I << " has unhandled type " + << EVT(VT).getEVTString() << '\n'; +#endif + llvm_unreachable(0); + } + } +} + +void MipsTargetLowering::MipsCC:: +analyzeCallResult(const SmallVectorImpl<ISD::InputArg> &Ins, bool IsSoftFloat, + const SDNode *CallNode, const Type *RetTy) const { + analyzeReturn(Ins, IsSoftFloat, CallNode, RetTy); +} + +void MipsTargetLowering::MipsCC:: +analyzeReturn(const SmallVectorImpl<ISD::OutputArg> &Outs, bool IsSoftFloat, + const Type *RetTy) const { + analyzeReturn(Outs, IsSoftFloat, 0, RetTy); +} + void MipsTargetLowering::MipsCC::handleByValArg(unsigned ValNo, MVT ValVT, MVT LocVT, @@ -4268,6 +3559,21 @@ void MipsTargetLowering::MipsCC::allocateRegs(ByValArgInfo &ByVal, CCInfo.AllocateReg(IntArgRegs[I], ShadowRegs[I]); } +MVT MipsTargetLowering::MipsCC::getRegVT(MVT VT, const Type *OrigTy, + const SDNode *CallNode, + bool IsSoftFloat) const { + if (IsSoftFloat || IsO32) + return VT; + + // Check if the original type was fp128. + if (originalTypeIsF128(OrigTy, CallNode)) { + assert(VT == MVT::i64); + return MVT::f64; + } + + return VT; +} + void MipsTargetLowering:: copyByValRegs(SDValue Chain, DebugLoc DL, std::vector<SDValue> &OutChains, SelectionDAG &DAG, const ISD::ArgFlagsTy &Flags, @@ -4300,7 +3606,7 @@ copyByValRegs(SDValue Chain, DebugLoc DL, std::vector<SDValue> &OutChains, for (unsigned I = 0; I < ByVal.NumRegs; ++I) { unsigned ArgReg = CC.intArgRegs()[ByVal.FirstIdx + I]; - unsigned VReg = AddLiveIn(MF, ArgReg, RC); + unsigned VReg = addLiveIn(MF, ArgReg, RC); unsigned Offset = I * CC.regSize(); SDValue StorePtr = DAG.getNode(ISD::ADD, DL, PtrTy, FIN, DAG.getConstant(Offset, PtrTy)); @@ -4442,7 +3748,7 @@ MipsTargetLowering::writeVarArgRegs(std::vector<SDValue> &OutChains, // in the caller's stack frame, while for N32/64, it is allocated in the // callee's stack frame. for (unsigned I = Idx; I < NumRegs; ++I, VaArgOffset += RegSize) { - unsigned Reg = AddLiveIn(MF, ArgRegs[I], RC); + unsigned Reg = addLiveIn(MF, ArgRegs[I], RC); SDValue ArgValue = DAG.getCopyFromReg(Chain, DL, Reg, RegTy); FI = MFI->CreateFixedObject(RegSize, VaArgOffset, true); SDValue PtrOff = DAG.getFrameIndex(FI, getPointerTy()); diff --git a/lib/Target/Mips/MipsISelLowering.h b/lib/Target/Mips/MipsISelLowering.h index f0f3782..71977d7 100644 --- a/lib/Target/Mips/MipsISelLowering.h +++ b/lib/Target/Mips/MipsISelLowering.h @@ -19,6 +19,7 @@ #include "MipsSubtarget.h" #include "llvm/CodeGen/CallingConvLower.h" #include "llvm/CodeGen/SelectionDAG.h" +#include "llvm/IR/Function.h" #include "llvm/Target/TargetLowering.h" #include <deque> #include <string> @@ -151,9 +152,9 @@ namespace llvm { public: explicit MipsTargetLowering(MipsTargetMachine &TM); - virtual MVT getShiftAmountTy(EVT LHSTy) const { return MVT::i32; } + static const MipsTargetLowering *create(MipsTargetMachine &TM); - virtual bool allowsUnalignedMemoryAccesses (EVT VT, bool *Fast) const; + virtual MVT getScalarShiftAmountTy(EVT LHSTy) const { return MVT::i32; } virtual void LowerOperationWrapper(SDNode *N, SmallVectorImpl<SDValue> &Results, @@ -176,17 +177,34 @@ namespace llvm { EVT getSetCCResultType(EVT VT) const; virtual SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const; - private: - void SetMips16LibcallName(RTLIB::Libcall, const char *Name); + virtual MachineBasicBlock * + EmitInstrWithCustomInserter(MachineInstr *MI, MachineBasicBlock *MBB) const; + + struct LTStr { + bool operator()(const char *S1, const char *S2) const { + return strcmp(S1, S2) < 0; + } + }; + + protected: + SDValue getGlobalReg(SelectionDAG &DAG, EVT Ty) const; - void setMips16HardFloatLibCalls(); + SDValue getAddrLocal(SDValue Op, SelectionDAG &DAG, bool HasMips64) const; - unsigned int - getMips16HelperFunctionStubNumber(ArgListTy &Args) const; + SDValue getAddrGlobal(SDValue Op, SelectionDAG &DAG, unsigned Flag) const; - const char *getMips16HelperFunction - (Type* RetTy, ArgListTy &Args, bool &needHelper) const; + SDValue getAddrGlobalLargeGOT(SDValue Op, SelectionDAG &DAG, + unsigned HiFlag, unsigned LoFlag) const; + + /// This function fills Ops, which is the list of operands that will later + /// be used when a function call node is created. It also generates + /// copyToReg nodes to set up argument registers. + virtual void + getOpndList(SmallVectorImpl<SDValue> &Ops, + std::deque< std::pair<unsigned, SDValue> > &RegsToPass, + bool IsPICCall, bool GlobalOrExternal, bool InternalLinkage, + CallLoweringInfo &CLI, SDValue Callee, SDValue Chain) const; /// ByValArgInfo - Byval argument information. struct ByValArgInfo { @@ -204,8 +222,20 @@ namespace llvm { MipsCC(CallingConv::ID CallConv, bool IsO32, CCState &Info); void analyzeCallOperands(const SmallVectorImpl<ISD::OutputArg> &Outs, - bool IsVarArg); - void analyzeFormalArguments(const SmallVectorImpl<ISD::InputArg> &Ins); + bool IsVarArg, bool IsSoftFloat, + const SDNode *CallNode, + std::vector<ArgListEntry> &FuncArgs); + void analyzeFormalArguments(const SmallVectorImpl<ISD::InputArg> &Ins, + bool IsSoftFloat, + Function::const_arg_iterator FuncArg); + + void analyzeCallResult(const SmallVectorImpl<ISD::InputArg> &Ins, + bool IsSoftFloat, const SDNode *CallNode, + const Type *RetTy) const; + + void analyzeReturn(const SmallVectorImpl<ISD::OutputArg> &Outs, + bool IsSoftFloat, const Type *RetTy) const; + const CCState &getCCInfo() const { return CCInfo; } /// hasByValArg - Returns true if function has byval arguments. @@ -248,6 +278,17 @@ namespace llvm { void allocateRegs(ByValArgInfo &ByVal, unsigned ByValSize, unsigned Align); + /// Return the type of the register which is used to pass an argument or + /// return a value. This function returns f64 if the argument is an i64 + /// value which has been generated as a result of softening an f128 value. + /// Otherwise, it just returns VT. + MVT getRegVT(MVT VT, const Type *OrigTy, const SDNode *CallNode, + bool IsSoftFloat) const; + + template<typename Ty> + void analyzeReturn(const SmallVectorImpl<Ty> &RetVals, bool IsSoftFloat, + const SDNode *CallNode, const Type *RetTy) const; + CCState &CCInfo; CallingConv::ID CallConv; bool IsO32; @@ -259,45 +300,49 @@ namespace llvm { bool HasMips64, IsN64, IsO32; + private: // Lower Operand helpers SDValue LowerCallResult(SDValue Chain, SDValue InFlag, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl<ISD::InputArg> &Ins, DebugLoc dl, SelectionDAG &DAG, - SmallVectorImpl<SDValue> &InVals) const; + SmallVectorImpl<SDValue> &InVals, + const SDNode *CallNode, const Type *RetTy) const; // Lower Operand specifics - SDValue LowerBRCOND(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerSELECT(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerFABS(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerEH_RETURN(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerMEMBARRIER(SDValue Op, SelectionDAG& DAG) const; - SDValue LowerATOMIC_FENCE(SDValue Op, SelectionDAG& DAG) const; - SDValue LowerShiftLeftParts(SDValue Op, SelectionDAG& DAG) const; - SDValue LowerShiftRightParts(SDValue Op, SelectionDAG& DAG, + SDValue lowerBR_JT(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerBRCOND(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerConstantPool(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerBlockAddress(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerJumpTable(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerSELECT(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerSETCC(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerVASTART(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerFABS(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerEH_RETURN(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerMEMBARRIER(SDValue Op, SelectionDAG& DAG) const; + SDValue lowerATOMIC_FENCE(SDValue Op, SelectionDAG& DAG) const; + SDValue lowerShiftLeftParts(SDValue Op, SelectionDAG& DAG) const; + SDValue lowerShiftRightParts(SDValue Op, SelectionDAG& DAG, bool IsSRA) const; - SDValue LowerLOAD(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerSTORE(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerADD(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerLOAD(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerSTORE(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerINTRINSIC_W_CHAIN(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerADD(SDValue Op, SelectionDAG &DAG) const; - /// IsEligibleForTailCallOptimization - Check whether the call is eligible + /// isEligibleForTailCallOptimization - Check whether the call is eligible /// for tail call optimization. - bool IsEligibleForTailCallOptimization(const MipsCC &MipsCCInfo, - unsigned NextStackOffset, - const MipsFunctionInfo& FI) const; + virtual bool + isEligibleForTailCallOptimization(const MipsCC &MipsCCInfo, + unsigned NextStackOffset, + const MipsFunctionInfo& FI) const = 0; /// copyByValArg - Copy argument registers which were used to pass a byval /// argument to the stack. Create a stack frame object for the byval @@ -351,10 +396,6 @@ namespace llvm { const SmallVectorImpl<SDValue> &OutVals, DebugLoc dl, SelectionDAG &DAG) const; - virtual MachineBasicBlock * - EmitInstrWithCustomInserter(MachineInstr *MI, - MachineBasicBlock *MBB) const; - // Inline asm support ConstraintType getConstraintType(const std::string &Constraint) const; @@ -393,40 +434,20 @@ namespace llvm { virtual unsigned getJumpTableEncoding() const; - MachineBasicBlock *EmitBPOSGE32(MachineInstr *MI, - MachineBasicBlock *BB) const; - MachineBasicBlock *EmitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB, + MachineBasicBlock *emitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB, unsigned Size, unsigned BinOpcode, bool Nand = false) const; - MachineBasicBlock *EmitAtomicBinaryPartword(MachineInstr *MI, + MachineBasicBlock *emitAtomicBinaryPartword(MachineInstr *MI, MachineBasicBlock *BB, unsigned Size, unsigned BinOpcode, bool Nand = false) const; - MachineBasicBlock *EmitAtomicCmpSwap(MachineInstr *MI, + MachineBasicBlock *emitAtomicCmpSwap(MachineInstr *MI, MachineBasicBlock *BB, unsigned Size) const; - MachineBasicBlock *EmitAtomicCmpSwapPartword(MachineInstr *MI, + MachineBasicBlock *emitAtomicCmpSwapPartword(MachineInstr *MI, MachineBasicBlock *BB, unsigned Size) const; - MachineBasicBlock *EmitSel16(unsigned Opc, MachineInstr *MI, - MachineBasicBlock *BB) const; - MachineBasicBlock *EmitSeliT16(unsigned Opc1, unsigned Opc2, - MachineInstr *MI, - MachineBasicBlock *BB) const; - - MachineBasicBlock *EmitSelT16(unsigned Opc1, unsigned Opc2, - MachineInstr *MI, - MachineBasicBlock *BB) const; - MachineBasicBlock *EmitFEXT_T8I816_ins(unsigned BtOpc, unsigned CmpOpc, - MachineInstr *MI, - MachineBasicBlock *BB) const; - MachineBasicBlock *EmitFEXT_T8I8I16_ins( - unsigned BtOpc, unsigned CmpiOpc, unsigned CmpiXOpc, - MachineInstr *MI, MachineBasicBlock *BB) const; - MachineBasicBlock *EmitFEXT_CCRX16_ins( - unsigned SltOpc, - MachineInstr *MI, MachineBasicBlock *BB) const; - MachineBasicBlock *EmitFEXT_CCRXI16_ins( - unsigned SltiOpc, unsigned SltiXOpc, - MachineInstr *MI, MachineBasicBlock *BB )const; - }; + + /// Create MipsTargetLowering objects. + const MipsTargetLowering *createMips16TargetLowering(MipsTargetMachine &TM); + const MipsTargetLowering *createMipsSETargetLowering(MipsTargetMachine &TM); } #endif // MipsISELLOWERING_H diff --git a/lib/Target/Mips/MipsInstrInfo.cpp b/lib/Target/Mips/MipsInstrInfo.cpp index 76644c1..ad92d41 100644 --- a/lib/Target/Mips/MipsInstrInfo.cpp +++ b/lib/Target/Mips/MipsInstrInfo.cpp @@ -93,81 +93,11 @@ bool MipsInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, SmallVectorImpl<MachineOperand> &Cond, - bool AllowModify) const -{ - - MachineBasicBlock::reverse_iterator I = MBB.rbegin(), REnd = MBB.rend(); - - // Skip all the debug instructions. - while (I != REnd && I->isDebugValue()) - ++I; - - if (I == REnd || !isUnpredicatedTerminator(&*I)) { - // If this block ends with no branches (it just falls through to its succ) - // just return false, leaving TBB/FBB null. - TBB = FBB = NULL; - return false; - } - - MachineInstr *LastInst = &*I; - unsigned LastOpc = LastInst->getOpcode(); - - // Not an analyzable branch (must be an indirect jump). - if (!GetAnalyzableBrOpc(LastOpc)) - return true; - - // Get the second to last instruction in the block. - unsigned SecondLastOpc = 0; - MachineInstr *SecondLastInst = NULL; - - if (++I != REnd) { - SecondLastInst = &*I; - SecondLastOpc = GetAnalyzableBrOpc(SecondLastInst->getOpcode()); - - // Not an analyzable branch (must be an indirect jump). - if (isUnpredicatedTerminator(SecondLastInst) && !SecondLastOpc) - return true; - } - - // If there is only one terminator instruction, process it. - if (!SecondLastOpc) { - // Unconditional branch - if (LastOpc == UncondBrOpc) { - TBB = LastInst->getOperand(0).getMBB(); - return false; - } - - // Conditional branch - AnalyzeCondBr(LastInst, LastOpc, TBB, Cond); - return false; - } + bool AllowModify) const { + SmallVector<MachineInstr*, 2> BranchInstrs; + BranchType BT = AnalyzeBranch(MBB, TBB, FBB, Cond, AllowModify, BranchInstrs); - // If we reached here, there are two branches. - // If there are three terminators, we don't know what sort of block this is. - if (++I != REnd && isUnpredicatedTerminator(&*I)) - return true; - - // If second to last instruction is an unconditional branch, - // analyze it and remove the last instruction. - if (SecondLastOpc == UncondBrOpc) { - // Return if the last instruction cannot be removed. - if (!AllowModify) - return true; - - TBB = SecondLastInst->getOperand(0).getMBB(); - LastInst->eraseFromParent(); - return false; - } - - // Conditional branch followed by an unconditional branch. - // The last one must be unconditional. - if (LastOpc != UncondBrOpc) - return true; - - AnalyzeCondBr(SecondLastInst, SecondLastOpc, TBB, Cond); - FBB = LastInst->getOperand(0).getMBB(); - - return false; + return (BT == BT_None) || (BT == BT_Indirect); } void MipsInstrInfo::BuildCondBr(MachineBasicBlock &MBB, @@ -256,6 +186,90 @@ ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const return false; } +MipsInstrInfo::BranchType MipsInstrInfo:: +AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, + MachineBasicBlock *&FBB, SmallVectorImpl<MachineOperand> &Cond, + bool AllowModify, + SmallVectorImpl<MachineInstr*> &BranchInstrs) const { + + MachineBasicBlock::reverse_iterator I = MBB.rbegin(), REnd = MBB.rend(); + + // Skip all the debug instructions. + while (I != REnd && I->isDebugValue()) + ++I; + + if (I == REnd || !isUnpredicatedTerminator(&*I)) { + // This block ends with no branches (it just falls through to its succ). + // Leave TBB/FBB null. + TBB = FBB = NULL; + return BT_NoBranch; + } + + MachineInstr *LastInst = &*I; + unsigned LastOpc = LastInst->getOpcode(); + BranchInstrs.push_back(LastInst); + + // Not an analyzable branch (e.g., indirect jump). + if (!GetAnalyzableBrOpc(LastOpc)) + return LastInst->isIndirectBranch() ? BT_Indirect : BT_None; + + // Get the second to last instruction in the block. + unsigned SecondLastOpc = 0; + MachineInstr *SecondLastInst = NULL; + + if (++I != REnd) { + SecondLastInst = &*I; + SecondLastOpc = GetAnalyzableBrOpc(SecondLastInst->getOpcode()); + + // Not an analyzable branch (must be an indirect jump). + if (isUnpredicatedTerminator(SecondLastInst) && !SecondLastOpc) + return BT_None; + } + + // If there is only one terminator instruction, process it. + if (!SecondLastOpc) { + // Unconditional branch + if (LastOpc == UncondBrOpc) { + TBB = LastInst->getOperand(0).getMBB(); + return BT_Uncond; + } + + // Conditional branch + AnalyzeCondBr(LastInst, LastOpc, TBB, Cond); + return BT_Cond; + } + + // If we reached here, there are two branches. + // If there are three terminators, we don't know what sort of block this is. + if (++I != REnd && isUnpredicatedTerminator(&*I)) + return BT_None; + + BranchInstrs.insert(BranchInstrs.begin(), SecondLastInst); + + // If second to last instruction is an unconditional branch, + // analyze it and remove the last instruction. + if (SecondLastOpc == UncondBrOpc) { + // Return if the last instruction cannot be removed. + if (!AllowModify) + return BT_None; + + TBB = SecondLastInst->getOperand(0).getMBB(); + LastInst->eraseFromParent(); + BranchInstrs.pop_back(); + return BT_Uncond; + } + + // Conditional branch followed by an unconditional branch. + // The last one must be unconditional. + if (LastOpc != UncondBrOpc) + return BT_None; + + AnalyzeCondBr(SecondLastInst, SecondLastOpc, TBB, Cond); + FBB = LastInst->getOperand(0).getMBB(); + + return BT_CondUncond; +} + /// Return the number of bytes of code the specified instruction may be. unsigned MipsInstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const { switch (MI->getOpcode()) { diff --git a/lib/Target/Mips/MipsInstrInfo.h b/lib/Target/Mips/MipsInstrInfo.h index aca2bc7..3cd9088 100644 --- a/lib/Target/Mips/MipsInstrInfo.h +++ b/lib/Target/Mips/MipsInstrInfo.h @@ -31,6 +31,15 @@ protected: unsigned UncondBrOpc; public: + enum BranchType { + BT_None, // Couldn't analyze branch. + BT_NoBranch, // No branches found. + BT_Uncond, // One unconditional branch. + BT_Cond, // One conditional branch. + BT_CondUncond, // A conditional branch followed by an unconditional branch. + BT_Indirect // One indirct branch. + }; + explicit MipsInstrInfo(MipsTargetMachine &TM, unsigned UncondBrOpc); static const MipsInstrInfo *create(MipsTargetMachine &TM); @@ -51,6 +60,12 @@ public: virtual bool ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const; + BranchType AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, + MachineBasicBlock *&FBB, + SmallVectorImpl<MachineOperand> &Cond, + bool AllowModify, + SmallVectorImpl<MachineInstr*> &BranchInstrs) const; + virtual MachineInstr* emitFrameIndexDebugValue(MachineFunction &MF, int FrameIx, uint64_t Offset, const MDNode *MDPtr, diff --git a/lib/Target/Mips/MipsInstrInfo.td b/lib/Target/Mips/MipsInstrInfo.td index de09c9e..25b5d24 100644 --- a/lib/Target/Mips/MipsInstrInfo.td +++ b/lib/Target/Mips/MipsInstrInfo.td @@ -299,6 +299,9 @@ def HI16 : SDNodeXForm<imm, [{ return getImm(N, (N->getZExtValue() >> 16) & 0xFFFF); }]>; +// Plus 1. +def Plus1 : SDNodeXForm<imm, [{ return getImm(N, N->getSExtValue() + 1); }]>; + // Node immediate fits as 16-bit sign extended on target immediate. // e.g. addi, andi def immSExt8 : PatLeaf<(imm), [{ return isInt<8>(N->getSExtValue()); }]>; @@ -331,6 +334,11 @@ def immLow16Zero : PatLeaf<(imm), [{ // shamt field must fit in 5 bits. def immZExt5 : ImmLeaf<i32, [{return Imm == (Imm & 0x1f);}]>; +// True if (N + 1) fits in 16-bit field. +def immSExt16Plus1 : PatLeaf<(imm), [{ + return isInt<17>(N->getSExtValue()) && isInt<16>(N->getSExtValue() + 1); +}]>; + // Mips Address Mode! SDNode frameindex could possibily be a match // since load and store instructions from stack used it. def addr : @@ -977,7 +985,7 @@ def : InstAlias<"move $dst, $src", (ADDu CPURegsOpnd:$dst, CPURegsOpnd:$src,ZERO), 1>, Requires<[NotMips64]>; def : InstAlias<"move $dst, $src", - (OR CPURegsOpnd:$dst, CPURegsOpnd:$src,ZERO), 0>, + (OR CPURegsOpnd:$dst, CPURegsOpnd:$src,ZERO), 1>, Requires<[NotMips64]>; def : InstAlias<"bal $offset", (BGEZAL RA, brtarget:$offset), 1>; def : InstAlias<"addu $rs, $rt, $imm", diff --git a/lib/Target/Mips/MipsRegisterInfo.h b/lib/Target/Mips/MipsRegisterInfo.h index 13b2a6a..3c210e7 100644 --- a/lib/Target/Mips/MipsRegisterInfo.h +++ b/lib/Target/Mips/MipsRegisterInfo.h @@ -58,7 +58,8 @@ public: int SPAdj, unsigned FIOperandNum, RegScavenger *RS = NULL) const; - void processFunctionBeforeFrameFinalized(MachineFunction &MF) const; + void processFunctionBeforeFrameFinalized(MachineFunction &MF, + RegScavenger *RS = NULL) const; /// Debug information queries. unsigned getFrameRegister(const MachineFunction &MF) const; diff --git a/lib/Target/Mips/MipsRegisterInfo.td b/lib/Target/Mips/MipsRegisterInfo.td index f93dd86..6d76e8a 100644 --- a/lib/Target/Mips/MipsRegisterInfo.td +++ b/lib/Target/Mips/MipsRegisterInfo.td @@ -18,6 +18,10 @@ def sub_lo : SubRegIndex; def sub_hi : SubRegIndex; } +class Unallocatable { + bit isAllocatable = 0; +} + // We have banks of 32 registers each. class MipsReg<bits<16> Enc, string n> : Register<n> { let HWEncoding = Enc; @@ -291,9 +295,9 @@ def CPU16Regs : RegisterClass<"Mips", [i32], 32, (add // Callee save S0, S1)>; -def CPURAReg : RegisterClass<"Mips", [i32], 32, (add RA)>; +def CPURAReg : RegisterClass<"Mips", [i32], 32, (add RA)>, Unallocatable; -def CPUSPReg : RegisterClass<"Mips", [i32], 32, (add SP)>; +def CPUSPReg : RegisterClass<"Mips", [i32], 32, (add SP)>, Unallocatable; // 64bit fp: // * FGR64 - 32 64-bit registers @@ -319,18 +323,19 @@ def AFGR64 : RegisterClass<"Mips", [f64], 64, (add def FGR64 : RegisterClass<"Mips", [f64], 64, (sequence "D%u_64", 0, 31)>; // Condition Register for floating point operations -def CCR : RegisterClass<"Mips", [i32], 32, (add FCR31,FCC0)>; +def CCR : RegisterClass<"Mips", [i32], 32, (add FCR31,FCC0)>, Unallocatable; // Hi/Lo Registers -def HILO : RegisterClass<"Mips", [i32], 32, (add HI, LO)>; -def HILO64 : RegisterClass<"Mips", [i64], 64, (add HI64, LO64)>; +def HILO : RegisterClass<"Mips", [i32], 32, (add HI, LO)>, Unallocatable; +def HILO64 : RegisterClass<"Mips", [i64], 64, (add HI64, LO64)>, Unallocatable; // Hardware registers -def HWRegs : RegisterClass<"Mips", [i32], 32, (add HWR29)>; -def HWRegs64 : RegisterClass<"Mips", [i64], 32, (add HWR29_64)>; +def HWRegs : RegisterClass<"Mips", [i32], 32, (add HWR29)>, Unallocatable; +def HWRegs64 : RegisterClass<"Mips", [i64], 64, (add HWR29_64)>, Unallocatable; // Accumulator Registers -def ACRegs : RegisterClass<"Mips", [i64], 64, (sequence "AC%u", 0, 3)>; +def ACRegs : RegisterClass<"Mips", [i64], 64, (sequence "AC%u", 0, 3)>, + Unallocatable; def CPURegsAsmOperand : AsmOperandClass { let Name = "CPURegsAsm"; diff --git a/lib/Target/Mips/MipsSEISelDAGToDAG.cpp b/lib/Target/Mips/MipsSEISelDAGToDAG.cpp new file mode 100644 index 0000000..e22c3c8 --- /dev/null +++ b/lib/Target/Mips/MipsSEISelDAGToDAG.cpp @@ -0,0 +1,460 @@ +//===-- MipsSEISelDAGToDAG.cpp - A Dag to Dag Inst Selector for MipsSE ----===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Subclass of MipsDAGToDAGISel specialized for mips32/64. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "mips-isel" +#include "MipsSEISelDAGToDAG.h" +#include "Mips.h" +#include "MCTargetDesc/MipsBaseInfo.h" +#include "MipsAnalyzeImmediate.h" +#include "MipsMachineFunction.h" +#include "MipsRegisterInfo.h" +#include "llvm/CodeGen/MachineConstantPool.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/SelectionDAGNodes.h" +#include "llvm/IR/GlobalValue.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/Intrinsics.h" +#include "llvm/IR/Type.h" +#include "llvm/Support/CFG.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetMachine.h" +using namespace llvm; + + +bool MipsSEDAGToDAGISel::replaceUsesWithZeroReg(MachineRegisterInfo *MRI, + const MachineInstr& MI) { + unsigned DstReg = 0, ZeroReg = 0; + + // Check if MI is "addiu $dst, $zero, 0" or "daddiu $dst, $zero, 0". + if ((MI.getOpcode() == Mips::ADDiu) && + (MI.getOperand(1).getReg() == Mips::ZERO) && + (MI.getOperand(2).getImm() == 0)) { + DstReg = MI.getOperand(0).getReg(); + ZeroReg = Mips::ZERO; + } else if ((MI.getOpcode() == Mips::DADDiu) && + (MI.getOperand(1).getReg() == Mips::ZERO_64) && + (MI.getOperand(2).getImm() == 0)) { + DstReg = MI.getOperand(0).getReg(); + ZeroReg = Mips::ZERO_64; + } + + if (!DstReg) + return false; + + // Replace uses with ZeroReg. + for (MachineRegisterInfo::use_iterator U = MRI->use_begin(DstReg), + E = MRI->use_end(); U != E;) { + MachineOperand &MO = U.getOperand(); + unsigned OpNo = U.getOperandNo(); + MachineInstr *MI = MO.getParent(); + ++U; + + // Do not replace if it is a phi's operand or is tied to def operand. + if (MI->isPHI() || MI->isRegTiedToDefOperand(OpNo) || MI->isPseudo()) + continue; + + MO.setReg(ZeroReg); + } + + return true; +} + +void MipsSEDAGToDAGISel::initGlobalBaseReg(MachineFunction &MF) { + MipsFunctionInfo *MipsFI = MF.getInfo<MipsFunctionInfo>(); + + if (!MipsFI->globalBaseRegSet()) + return; + + MachineBasicBlock &MBB = MF.front(); + MachineBasicBlock::iterator I = MBB.begin(); + MachineRegisterInfo &RegInfo = MF.getRegInfo(); + const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo(); + DebugLoc DL = I != MBB.end() ? I->getDebugLoc() : DebugLoc(); + unsigned V0, V1, GlobalBaseReg = MipsFI->getGlobalBaseReg(); + const TargetRegisterClass *RC; + + if (Subtarget.isABI_N64()) + RC = (const TargetRegisterClass*)&Mips::CPU64RegsRegClass; + else + RC = (const TargetRegisterClass*)&Mips::CPURegsRegClass; + + V0 = RegInfo.createVirtualRegister(RC); + V1 = RegInfo.createVirtualRegister(RC); + + if (Subtarget.isABI_N64()) { + MF.getRegInfo().addLiveIn(Mips::T9_64); + MBB.addLiveIn(Mips::T9_64); + + // lui $v0, %hi(%neg(%gp_rel(fname))) + // daddu $v1, $v0, $t9 + // daddiu $globalbasereg, $v1, %lo(%neg(%gp_rel(fname))) + const GlobalValue *FName = MF.getFunction(); + BuildMI(MBB, I, DL, TII.get(Mips::LUi64), V0) + .addGlobalAddress(FName, 0, MipsII::MO_GPOFF_HI); + BuildMI(MBB, I, DL, TII.get(Mips::DADDu), V1).addReg(V0) + .addReg(Mips::T9_64); + BuildMI(MBB, I, DL, TII.get(Mips::DADDiu), GlobalBaseReg).addReg(V1) + .addGlobalAddress(FName, 0, MipsII::MO_GPOFF_LO); + return; + } + + if (MF.getTarget().getRelocationModel() == Reloc::Static) { + // Set global register to __gnu_local_gp. + // + // lui $v0, %hi(__gnu_local_gp) + // addiu $globalbasereg, $v0, %lo(__gnu_local_gp) + BuildMI(MBB, I, DL, TII.get(Mips::LUi), V0) + .addExternalSymbol("__gnu_local_gp", MipsII::MO_ABS_HI); + BuildMI(MBB, I, DL, TII.get(Mips::ADDiu), GlobalBaseReg).addReg(V0) + .addExternalSymbol("__gnu_local_gp", MipsII::MO_ABS_LO); + return; + } + + MF.getRegInfo().addLiveIn(Mips::T9); + MBB.addLiveIn(Mips::T9); + + if (Subtarget.isABI_N32()) { + // lui $v0, %hi(%neg(%gp_rel(fname))) + // addu $v1, $v0, $t9 + // addiu $globalbasereg, $v1, %lo(%neg(%gp_rel(fname))) + const GlobalValue *FName = MF.getFunction(); + BuildMI(MBB, I, DL, TII.get(Mips::LUi), V0) + .addGlobalAddress(FName, 0, MipsII::MO_GPOFF_HI); + BuildMI(MBB, I, DL, TII.get(Mips::ADDu), V1).addReg(V0).addReg(Mips::T9); + BuildMI(MBB, I, DL, TII.get(Mips::ADDiu), GlobalBaseReg).addReg(V1) + .addGlobalAddress(FName, 0, MipsII::MO_GPOFF_LO); + return; + } + + assert(Subtarget.isABI_O32()); + + // For O32 ABI, the following instruction sequence is emitted to initialize + // the global base register: + // + // 0. lui $2, %hi(_gp_disp) + // 1. addiu $2, $2, %lo(_gp_disp) + // 2. addu $globalbasereg, $2, $t9 + // + // We emit only the last instruction here. + // + // GNU linker requires that the first two instructions appear at the beginning + // of a function and no instructions be inserted before or between them. + // The two instructions are emitted during lowering to MC layer in order to + // avoid any reordering. + // + // Register $2 (Mips::V0) is added to the list of live-in registers to ensure + // the value instruction 1 (addiu) defines is valid when instruction 2 (addu) + // reads it. + MF.getRegInfo().addLiveIn(Mips::V0); + MBB.addLiveIn(Mips::V0); + BuildMI(MBB, I, DL, TII.get(Mips::ADDu), GlobalBaseReg) + .addReg(Mips::V0).addReg(Mips::T9); +} + +void MipsSEDAGToDAGISel::processFunctionAfterISel(MachineFunction &MF) { + initGlobalBaseReg(MF); + + MachineRegisterInfo *MRI = &MF.getRegInfo(); + + for (MachineFunction::iterator MFI = MF.begin(), MFE = MF.end(); MFI != MFE; + ++MFI) + for (MachineBasicBlock::iterator I = MFI->begin(); I != MFI->end(); ++I) + replaceUsesWithZeroReg(MRI, *I); +} + +/// Select multiply instructions. +std::pair<SDNode*, SDNode*> +MipsSEDAGToDAGISel::selectMULT(SDNode *N, unsigned Opc, DebugLoc DL, EVT Ty, + bool HasLo, bool HasHi) { + SDNode *Lo = 0, *Hi = 0; + SDNode *Mul = CurDAG->getMachineNode(Opc, DL, MVT::Glue, N->getOperand(0), + N->getOperand(1)); + SDValue InFlag = SDValue(Mul, 0); + + if (HasLo) { + unsigned Opcode = (Ty == MVT::i32 ? Mips::MFLO : Mips::MFLO64); + Lo = CurDAG->getMachineNode(Opcode, DL, Ty, MVT::Glue, InFlag); + InFlag = SDValue(Lo, 1); + } + if (HasHi) { + unsigned Opcode = (Ty == MVT::i32 ? Mips::MFHI : Mips::MFHI64); + Hi = CurDAG->getMachineNode(Opcode, DL, Ty, InFlag); + } + return std::make_pair(Lo, Hi); +} + +SDNode *MipsSEDAGToDAGISel::selectAddESubE(unsigned MOp, SDValue InFlag, + SDValue CmpLHS, DebugLoc DL, + SDNode *Node) const { + unsigned Opc = InFlag.getOpcode(); (void)Opc; + + assert(((Opc == ISD::ADDC || Opc == ISD::ADDE) || + (Opc == ISD::SUBC || Opc == ISD::SUBE)) && + "(ADD|SUB)E flag operand must come from (ADD|SUB)C/E insn"); + + SDValue Ops[] = { CmpLHS, InFlag.getOperand(1) }; + SDValue LHS = Node->getOperand(0), RHS = Node->getOperand(1); + EVT VT = LHS.getValueType(); + + SDNode *Carry = CurDAG->getMachineNode(Mips::SLTu, DL, VT, Ops, 2); + SDNode *AddCarry = CurDAG->getMachineNode(Mips::ADDu, DL, VT, + SDValue(Carry, 0), RHS); + return CurDAG->SelectNodeTo(Node, MOp, VT, MVT::Glue, LHS, + SDValue(AddCarry, 0)); +} + +/// ComplexPattern used on MipsInstrInfo +/// Used on Mips Load/Store instructions +bool MipsSEDAGToDAGISel::selectAddrRegImm(SDValue Addr, SDValue &Base, + SDValue &Offset) const { + EVT ValTy = Addr.getValueType(); + + // if Address is FI, get the TargetFrameIndex. + if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) { + Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), ValTy); + Offset = CurDAG->getTargetConstant(0, ValTy); + return true; + } + + // on PIC code Load GA + if (Addr.getOpcode() == MipsISD::Wrapper) { + Base = Addr.getOperand(0); + Offset = Addr.getOperand(1); + return true; + } + + if (TM.getRelocationModel() != Reloc::PIC_) { + if ((Addr.getOpcode() == ISD::TargetExternalSymbol || + Addr.getOpcode() == ISD::TargetGlobalAddress)) + return false; + } + + // Addresses of the form FI+const or FI|const + if (CurDAG->isBaseWithConstantOffset(Addr)) { + ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1)); + if (isInt<16>(CN->getSExtValue())) { + + // If the first operand is a FI, get the TargetFI Node + if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode> + (Addr.getOperand(0))) + Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), ValTy); + else + Base = Addr.getOperand(0); + + Offset = CurDAG->getTargetConstant(CN->getZExtValue(), ValTy); + return true; + } + } + + // Operand is a result from an ADD. + if (Addr.getOpcode() == ISD::ADD) { + // When loading from constant pools, load the lower address part in + // the instruction itself. Example, instead of: + // lui $2, %hi($CPI1_0) + // addiu $2, $2, %lo($CPI1_0) + // lwc1 $f0, 0($2) + // Generate: + // lui $2, %hi($CPI1_0) + // lwc1 $f0, %lo($CPI1_0)($2) + if (Addr.getOperand(1).getOpcode() == MipsISD::Lo || + Addr.getOperand(1).getOpcode() == MipsISD::GPRel) { + SDValue Opnd0 = Addr.getOperand(1).getOperand(0); + if (isa<ConstantPoolSDNode>(Opnd0) || isa<GlobalAddressSDNode>(Opnd0) || + isa<JumpTableSDNode>(Opnd0)) { + Base = Addr.getOperand(0); + Offset = Opnd0; + return true; + } + } + } + + return false; +} + +bool MipsSEDAGToDAGISel::selectAddrDefault(SDValue Addr, SDValue &Base, + SDValue &Offset) const { + Base = Addr; + Offset = CurDAG->getTargetConstant(0, Addr.getValueType()); + return true; +} + +bool MipsSEDAGToDAGISel::selectIntAddr(SDValue Addr, SDValue &Base, + SDValue &Offset) const { + return selectAddrRegImm(Addr, Base, Offset) || + selectAddrDefault(Addr, Base, Offset); +} + +std::pair<bool, SDNode*> MipsSEDAGToDAGISel::selectNode(SDNode *Node) { + unsigned Opcode = Node->getOpcode(); + DebugLoc DL = Node->getDebugLoc(); + + /// + // Instruction Selection not handled by the auto-generated + // tablegen selection should be handled here. + /// + EVT NodeTy = Node->getValueType(0); + SDNode *Result; + unsigned MultOpc; + + switch(Opcode) { + default: break; + + case ISD::SUBE: { + SDValue InFlag = Node->getOperand(2); + Result = selectAddESubE(Mips::SUBu, InFlag, InFlag.getOperand(0), DL, Node); + return std::make_pair(true, Result); + } + + case ISD::ADDE: { + SDValue InFlag = Node->getOperand(2); + Result = selectAddESubE(Mips::ADDu, InFlag, InFlag.getValue(0), DL, Node); + return std::make_pair(true, Result); + } + + /// Mul with two results + case ISD::SMUL_LOHI: + case ISD::UMUL_LOHI: { + if (NodeTy == MVT::i32) + MultOpc = (Opcode == ISD::UMUL_LOHI ? Mips::MULTu : Mips::MULT); + else + MultOpc = (Opcode == ISD::UMUL_LOHI ? Mips::DMULTu : Mips::DMULT); + + std::pair<SDNode*, SDNode*> LoHi = selectMULT(Node, MultOpc, DL, NodeTy, + true, true); + + if (!SDValue(Node, 0).use_empty()) + ReplaceUses(SDValue(Node, 0), SDValue(LoHi.first, 0)); + + if (!SDValue(Node, 1).use_empty()) + ReplaceUses(SDValue(Node, 1), SDValue(LoHi.second, 0)); + + return std::make_pair(true, (SDNode*)NULL); + } + + /// Special Muls + case ISD::MUL: { + // Mips32 has a 32-bit three operand mul instruction. + if (Subtarget.hasMips32() && NodeTy == MVT::i32) + break; + MultOpc = NodeTy == MVT::i32 ? Mips::MULT : Mips::DMULT; + Result = selectMULT(Node, MultOpc, DL, NodeTy, true, false).first; + return std::make_pair(true, Result); + } + case ISD::MULHS: + case ISD::MULHU: { + if (NodeTy == MVT::i32) + MultOpc = (Opcode == ISD::MULHU ? Mips::MULTu : Mips::MULT); + else + MultOpc = (Opcode == ISD::MULHU ? Mips::DMULTu : Mips::DMULT); + + Result = selectMULT(Node, MultOpc, DL, NodeTy, false, true).second; + return std::make_pair(true, Result); + } + + case ISD::ConstantFP: { + ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(Node); + if (Node->getValueType(0) == MVT::f64 && CN->isExactlyValue(+0.0)) { + if (Subtarget.hasMips64()) { + SDValue Zero = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), DL, + Mips::ZERO_64, MVT::i64); + Result = CurDAG->getMachineNode(Mips::DMTC1, DL, MVT::f64, Zero); + } else { + SDValue Zero = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), DL, + Mips::ZERO, MVT::i32); + Result = CurDAG->getMachineNode(Mips::BuildPairF64, DL, MVT::f64, Zero, + Zero); + } + + return std::make_pair(true, Result); + } + break; + } + + case ISD::Constant: { + const ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Node); + unsigned Size = CN->getValueSizeInBits(0); + + if (Size == 32) + break; + + MipsAnalyzeImmediate AnalyzeImm; + int64_t Imm = CN->getSExtValue(); + + const MipsAnalyzeImmediate::InstSeq &Seq = + AnalyzeImm.Analyze(Imm, Size, false); + + MipsAnalyzeImmediate::InstSeq::const_iterator Inst = Seq.begin(); + DebugLoc DL = CN->getDebugLoc(); + SDNode *RegOpnd; + SDValue ImmOpnd = CurDAG->getTargetConstant(SignExtend64<16>(Inst->ImmOpnd), + MVT::i64); + + // The first instruction can be a LUi which is different from other + // instructions (ADDiu, ORI and SLL) in that it does not have a register + // operand. + if (Inst->Opc == Mips::LUi64) + RegOpnd = CurDAG->getMachineNode(Inst->Opc, DL, MVT::i64, ImmOpnd); + else + RegOpnd = + CurDAG->getMachineNode(Inst->Opc, DL, MVT::i64, + CurDAG->getRegister(Mips::ZERO_64, MVT::i64), + ImmOpnd); + + // The remaining instructions in the sequence are handled here. + for (++Inst; Inst != Seq.end(); ++Inst) { + ImmOpnd = CurDAG->getTargetConstant(SignExtend64<16>(Inst->ImmOpnd), + MVT::i64); + RegOpnd = CurDAG->getMachineNode(Inst->Opc, DL, MVT::i64, + SDValue(RegOpnd, 0), ImmOpnd); + } + + return std::make_pair(true, RegOpnd); + } + + case MipsISD::ThreadPointer: { + EVT PtrVT = TLI.getPointerTy(); + unsigned RdhwrOpc, SrcReg, DestReg; + + if (PtrVT == MVT::i32) { + RdhwrOpc = Mips::RDHWR; + SrcReg = Mips::HWR29; + DestReg = Mips::V1; + } else { + RdhwrOpc = Mips::RDHWR64; + SrcReg = Mips::HWR29_64; + DestReg = Mips::V1_64; + } + + SDNode *Rdhwr = + CurDAG->getMachineNode(RdhwrOpc, Node->getDebugLoc(), + Node->getValueType(0), + CurDAG->getRegister(SrcReg, PtrVT)); + SDValue Chain = CurDAG->getCopyToReg(CurDAG->getEntryNode(), DL, DestReg, + SDValue(Rdhwr, 0)); + SDValue ResNode = CurDAG->getCopyFromReg(Chain, DL, DestReg, PtrVT); + ReplaceUses(SDValue(Node, 0), ResNode); + return std::make_pair(true, ResNode.getNode()); + } + } + + return std::make_pair(false, (SDNode*)NULL); +} + +FunctionPass *llvm::createMipsSEISelDag(MipsTargetMachine &TM) { + return new MipsSEDAGToDAGISel(TM); +} diff --git a/lib/Target/Mips/MipsSEISelDAGToDAG.h b/lib/Target/Mips/MipsSEISelDAGToDAG.h new file mode 100644 index 0000000..6137ab0 --- /dev/null +++ b/lib/Target/Mips/MipsSEISelDAGToDAG.h @@ -0,0 +1,57 @@ +//===-- MipsSEISelDAGToDAG.h - A Dag to Dag Inst Selector for MipsSE -----===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Subclass of MipsDAGToDAGISel specialized for mips32/64. +// +//===----------------------------------------------------------------------===// + +#ifndef MIPSSEISELDAGTODAG_H +#define MIPSSEISELDAGTODAG_H + +#include "MipsISelDAGToDAG.h" + +namespace llvm { + +class MipsSEDAGToDAGISel : public MipsDAGToDAGISel { + +public: + explicit MipsSEDAGToDAGISel(MipsTargetMachine &TM) : MipsDAGToDAGISel(TM) {} + +private: + bool replaceUsesWithZeroReg(MachineRegisterInfo *MRI, const MachineInstr&); + + std::pair<SDNode*, SDNode*> selectMULT(SDNode *N, unsigned Opc, DebugLoc dl, + EVT Ty, bool HasLo, bool HasHi); + + SDNode *selectAddESubE(unsigned MOp, SDValue InFlag, SDValue CmpLHS, + DebugLoc DL, SDNode *Node) const; + + virtual bool selectAddrRegImm(SDValue Addr, SDValue &Base, + SDValue &Offset) const; + + virtual bool selectAddrDefault(SDValue Addr, SDValue &Base, + SDValue &Offset) const; + + virtual bool selectIntAddr(SDValue Addr, SDValue &Base, + SDValue &Offset) const; + + virtual std::pair<bool, SDNode*> selectNode(SDNode *Node); + + virtual void processFunctionAfterISel(MachineFunction &MF); + + // Insert instructions to initialize the global base register in the + // first MBB of the function. + void initGlobalBaseReg(MachineFunction &MF); +}; + +FunctionPass *createMipsSEISelDag(MipsTargetMachine &TM); + +} + +#endif diff --git a/lib/Target/Mips/MipsSEISelLowering.cpp b/lib/Target/Mips/MipsSEISelLowering.cpp new file mode 100644 index 0000000..287e2ed --- /dev/null +++ b/lib/Target/Mips/MipsSEISelLowering.cpp @@ -0,0 +1,197 @@ +//===-- MipsSEISelLowering.cpp - MipsSE DAG Lowering Interface --*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Subclass of MipsTargetLowering specialized for mips32/64. +// +//===----------------------------------------------------------------------===// +#include "MipsSEISelLowering.h" +#include "MipsRegisterInfo.h" +#include "MipsTargetMachine.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Target/TargetInstrInfo.h" + +using namespace llvm; + +static cl::opt<bool> +EnableMipsTailCalls("enable-mips-tail-calls", cl::Hidden, + cl::desc("MIPS: Enable tail calls."), cl::init(false)); + +MipsSETargetLowering::MipsSETargetLowering(MipsTargetMachine &TM) + : MipsTargetLowering(TM) { + // Set up the register classes + addRegisterClass(MVT::i32, &Mips::CPURegsRegClass); + + if (HasMips64) + addRegisterClass(MVT::i64, &Mips::CPU64RegsRegClass); + + if (Subtarget->hasDSP()) { + MVT::SimpleValueType VecTys[2] = {MVT::v2i16, MVT::v4i8}; + + for (unsigned i = 0; i < array_lengthof(VecTys); ++i) { + addRegisterClass(VecTys[i], &Mips::DSPRegsRegClass); + + // Expand all builtin opcodes. + for (unsigned Opc = 0; Opc < ISD::BUILTIN_OP_END; ++Opc) + setOperationAction(Opc, VecTys[i], Expand); + + setOperationAction(ISD::LOAD, VecTys[i], Legal); + setOperationAction(ISD::STORE, VecTys[i], Legal); + setOperationAction(ISD::BITCAST, VecTys[i], Legal); + } + } + + if (!TM.Options.UseSoftFloat) { + addRegisterClass(MVT::f32, &Mips::FGR32RegClass); + + // When dealing with single precision only, use libcalls + if (!Subtarget->isSingleFloat()) { + if (HasMips64) + addRegisterClass(MVT::f64, &Mips::FGR64RegClass); + else + addRegisterClass(MVT::f64, &Mips::AFGR64RegClass); + } + } + + setOperationAction(ISD::MEMBARRIER, MVT::Other, Custom); + setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Custom); + setOperationAction(ISD::LOAD, MVT::i32, Custom); + setOperationAction(ISD::STORE, MVT::i32, Custom); + + computeRegisterProperties(); +} + +const MipsTargetLowering * +llvm::createMipsSETargetLowering(MipsTargetMachine &TM) { + return new MipsSETargetLowering(TM); +} + + +bool +MipsSETargetLowering::allowsUnalignedMemoryAccesses(EVT VT, bool *Fast) const { + MVT::SimpleValueType SVT = VT.getSimpleVT().SimpleTy; + + switch (SVT) { + case MVT::i64: + case MVT::i32: + if (Fast) + *Fast = true; + return true; + default: + return false; + } +} + +MachineBasicBlock * +MipsSETargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, + MachineBasicBlock *BB) const { + switch (MI->getOpcode()) { + default: + return MipsTargetLowering::EmitInstrWithCustomInserter(MI, BB); + case Mips::BPOSGE32_PSEUDO: + return emitBPOSGE32(MI, BB); + } +} + +bool MipsSETargetLowering:: +isEligibleForTailCallOptimization(const MipsCC &MipsCCInfo, + unsigned NextStackOffset, + const MipsFunctionInfo& FI) const { + if (!EnableMipsTailCalls) + return false; + + // Return false if either the callee or caller has a byval argument. + if (MipsCCInfo.hasByValArg() || FI.hasByvalArg()) + return false; + + // Return true if the callee's argument area is no larger than the + // caller's. + return NextStackOffset <= FI.getIncomingArgSize(); +} + +void MipsSETargetLowering:: +getOpndList(SmallVectorImpl<SDValue> &Ops, + std::deque< std::pair<unsigned, SDValue> > &RegsToPass, + bool IsPICCall, bool GlobalOrExternal, bool InternalLinkage, + CallLoweringInfo &CLI, SDValue Callee, SDValue Chain) const { + // T9 should contain the address of the callee function if + // -reloction-model=pic or it is an indirect call. + if (IsPICCall || !GlobalOrExternal) { + unsigned T9Reg = IsN64 ? Mips::T9_64 : Mips::T9; + RegsToPass.push_front(std::make_pair(T9Reg, Callee)); + } else + Ops.push_back(Callee); + + MipsTargetLowering::getOpndList(Ops, RegsToPass, IsPICCall, GlobalOrExternal, + InternalLinkage, CLI, Callee, Chain); +} + +MachineBasicBlock * MipsSETargetLowering:: +emitBPOSGE32(MachineInstr *MI, MachineBasicBlock *BB) const{ + // $bb: + // bposge32_pseudo $vr0 + // => + // $bb: + // bposge32 $tbb + // $fbb: + // li $vr2, 0 + // b $sink + // $tbb: + // li $vr1, 1 + // $sink: + // $vr0 = phi($vr2, $fbb, $vr1, $tbb) + + MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo(); + const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); + const TargetRegisterClass *RC = &Mips::CPURegsRegClass; + DebugLoc DL = MI->getDebugLoc(); + const BasicBlock *LLVM_BB = BB->getBasicBlock(); + MachineFunction::iterator It = llvm::next(MachineFunction::iterator(BB)); + MachineFunction *F = BB->getParent(); + MachineBasicBlock *FBB = F->CreateMachineBasicBlock(LLVM_BB); + MachineBasicBlock *TBB = F->CreateMachineBasicBlock(LLVM_BB); + MachineBasicBlock *Sink = F->CreateMachineBasicBlock(LLVM_BB); + F->insert(It, FBB); + F->insert(It, TBB); + F->insert(It, Sink); + + // Transfer the remainder of BB and its successor edges to Sink. + Sink->splice(Sink->begin(), BB, llvm::next(MachineBasicBlock::iterator(MI)), + BB->end()); + Sink->transferSuccessorsAndUpdatePHIs(BB); + + // Add successors. + BB->addSuccessor(FBB); + BB->addSuccessor(TBB); + FBB->addSuccessor(Sink); + TBB->addSuccessor(Sink); + + // Insert the real bposge32 instruction to $BB. + BuildMI(BB, DL, TII->get(Mips::BPOSGE32)).addMBB(TBB); + + // Fill $FBB. + unsigned VR2 = RegInfo.createVirtualRegister(RC); + BuildMI(*FBB, FBB->end(), DL, TII->get(Mips::ADDiu), VR2) + .addReg(Mips::ZERO).addImm(0); + BuildMI(*FBB, FBB->end(), DL, TII->get(Mips::B)).addMBB(Sink); + + // Fill $TBB. + unsigned VR1 = RegInfo.createVirtualRegister(RC); + BuildMI(*TBB, TBB->end(), DL, TII->get(Mips::ADDiu), VR1) + .addReg(Mips::ZERO).addImm(1); + + // Insert phi function to $Sink. + BuildMI(*Sink, Sink->begin(), DL, TII->get(Mips::PHI), + MI->getOperand(0).getReg()) + .addReg(VR2).addMBB(FBB).addReg(VR1).addMBB(TBB); + + MI->eraseFromParent(); // The pseudo instruction is gone now. + return Sink; +} diff --git a/lib/Target/Mips/MipsSEISelLowering.h b/lib/Target/Mips/MipsSEISelLowering.h new file mode 100644 index 0000000..04a28ce --- /dev/null +++ b/lib/Target/Mips/MipsSEISelLowering.h @@ -0,0 +1,46 @@ +//===-- MipsSEISelLowering.h - MipsSE DAG Lowering Interface ----*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Subclass of MipsTargetLowering specialized for mips32/64. +// +//===----------------------------------------------------------------------===// + +#ifndef MipsSEISELLOWERING_H +#define MipsSEISELLOWERING_H + +#include "MipsISelLowering.h" + +namespace llvm { + class MipsSETargetLowering : public MipsTargetLowering { + public: + explicit MipsSETargetLowering(MipsTargetMachine &TM); + + virtual bool allowsUnalignedMemoryAccesses(EVT VT, bool *Fast) const; + + virtual MachineBasicBlock * + EmitInstrWithCustomInserter(MachineInstr *MI, MachineBasicBlock *MBB) const; + + private: + virtual bool + isEligibleForTailCallOptimization(const MipsCC &MipsCCInfo, + unsigned NextStackOffset, + const MipsFunctionInfo& FI) const; + + virtual void + getOpndList(SmallVectorImpl<SDValue> &Ops, + std::deque< std::pair<unsigned, SDValue> > &RegsToPass, + bool IsPICCall, bool GlobalOrExternal, bool InternalLinkage, + CallLoweringInfo &CLI, SDValue Callee, SDValue Chain) const; + + MachineBasicBlock *emitBPOSGE32(MachineInstr *MI, + MachineBasicBlock *BB) const; + }; +} + +#endif // MipsSEISELLOWERING_H diff --git a/lib/Target/Mips/MipsSubtarget.cpp b/lib/Target/Mips/MipsSubtarget.cpp index 75b4c98..e11e5d1 100644 --- a/lib/Target/Mips/MipsSubtarget.cpp +++ b/lib/Target/Mips/MipsSubtarget.cpp @@ -33,7 +33,7 @@ MipsSubtarget::MipsSubtarget(const std::string &TT, const std::string &CPU, IsLinux(true), HasSEInReg(false), HasCondMov(false), HasSwap(false), HasBitCount(false), HasFPIdx(false), InMips16Mode(false), InMicroMipsMode(false), HasDSP(false), HasDSPR2(false), - IsAndroid(false), RM(_RM) + RM(_RM) { std::string CPUName = CPU; if (CPUName.empty()) diff --git a/lib/Target/Mips/MipsSubtarget.h b/lib/Target/Mips/MipsSubtarget.h index 32baa3d..7a2e47c 100644 --- a/lib/Target/Mips/MipsSubtarget.h +++ b/lib/Target/Mips/MipsSubtarget.h @@ -95,9 +95,6 @@ protected: // HasDSP, HasDSPR2 -- supports DSP ASE. bool HasDSP, HasDSPR2; - // IsAndroid -- target is android - bool IsAndroid; - InstrItineraryData InstrItins; // The instance to the register info section object @@ -144,7 +141,6 @@ public: bool inMicroMipsMode() const { return InMicroMipsMode; } bool hasDSP() const { return HasDSP; } bool hasDSPR2() const { return HasDSPR2; } - bool isAndroid() const { return IsAndroid; } bool isLinux() const { return IsLinux; } bool useSmallSection() const { return UseSmallSection; } diff --git a/lib/Target/Mips/MipsTargetMachine.cpp b/lib/Target/Mips/MipsTargetMachine.cpp index 1b91e8b..3336358 100644 --- a/lib/Target/Mips/MipsTargetMachine.cpp +++ b/lib/Target/Mips/MipsTargetMachine.cpp @@ -54,7 +54,7 @@ MipsTargetMachine(const Target &T, StringRef TT, "E-p:32:32:32-i8:8:32-i16:16:32-i64:64:64-n32-S64")), InstrInfo(MipsInstrInfo::create(*this)), FrameLowering(MipsFrameLowering::create(*this, Subtarget)), - TLInfo(*this), TSInfo(*this), JITInfo() { + TLInfo(MipsTargetLowering::create(*this)), TSInfo(*this), JITInfo() { } void MipsebTargetMachine::anchor() { } @@ -116,6 +116,8 @@ bool MipsPassConfig::addPreEmitPass() { // NOTE: long branch has not been implemented for mips16. if (TM.getSubtarget<MipsSubtarget>().hasStandardEncoding()) addPass(createMipsLongBranchPass(TM)); + if (TM.getSubtarget<MipsSubtarget>().inMips16Mode()) + addPass(createMipsConstantIslandPass(TM)); return true; } diff --git a/lib/Target/Mips/MipsTargetMachine.h b/lib/Target/Mips/MipsTargetMachine.h index c4928c2..7e5f192 100644 --- a/lib/Target/Mips/MipsTargetMachine.h +++ b/lib/Target/Mips/MipsTargetMachine.h @@ -34,7 +34,7 @@ class MipsTargetMachine : public LLVMTargetMachine { const DataLayout DL; // Calculates type size & alignment OwningPtr<const MipsInstrInfo> InstrInfo; OwningPtr<const MipsFrameLowering> FrameLowering; - MipsTargetLowering TLInfo; + OwningPtr<const MipsTargetLowering> TLInfo; MipsSelectionDAGInfo TSInfo; MipsJITInfo JITInfo; @@ -63,7 +63,7 @@ public: } virtual const MipsTargetLowering *getTargetLowering() const { - return &TLInfo; + return TLInfo.get(); } virtual const MipsSelectionDAGInfo* getSelectionDAGInfo() const { diff --git a/lib/Target/NVPTX/NVPTXISelLowering.cpp b/lib/Target/NVPTX/NVPTXISelLowering.cpp index 5ee747a..e9a9fbf 100644 --- a/lib/Target/NVPTX/NVPTXISelLowering.cpp +++ b/lib/Target/NVPTX/NVPTXISelLowering.cpp @@ -101,7 +101,13 @@ NVPTXTargetLowering::NVPTXTargetLowering(NVPTXTargetMachine &TM) // Operations not directly supported by NVPTX. setOperationAction(ISD::SELECT_CC, MVT::Other, Expand); - setOperationAction(ISD::BR_CC, MVT::Other, Expand); + setOperationAction(ISD::BR_CC, MVT::f32, Expand); + setOperationAction(ISD::BR_CC, MVT::f64, Expand); + setOperationAction(ISD::BR_CC, MVT::i1, Expand); + setOperationAction(ISD::BR_CC, MVT::i8, Expand); + setOperationAction(ISD::BR_CC, MVT::i16, Expand); + setOperationAction(ISD::BR_CC, MVT::i32, Expand); + setOperationAction(ISD::BR_CC, MVT::i64, Expand); setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i64, Expand); setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i32, Expand); setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16, Expand); @@ -716,16 +722,15 @@ NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, for (unsigned i=0,e=Ins.size(); i!=e; ++i) { unsigned sz = Ins[i].VT.getSizeInBits(); if (Ins[i].VT.isInteger() && (sz < 8)) sz = 8; - std::vector<EVT> LoadRetVTs; - LoadRetVTs.push_back(Ins[i].VT); - LoadRetVTs.push_back(MVT::Other); LoadRetVTs.push_back(MVT::Glue); - std::vector<SDValue> LoadRetOps; - LoadRetOps.push_back(Chain); - LoadRetOps.push_back(DAG.getConstant(1, MVT::i32)); - LoadRetOps.push_back(DAG.getConstant(resoffset, MVT::i32)); - LoadRetOps.push_back(InFlag); + EVT LoadRetVTs[] = { Ins[i].VT, MVT::Other, MVT::Glue }; + SDValue LoadRetOps[] = { + Chain, + DAG.getConstant(1, MVT::i32), + DAG.getConstant(resoffset, MVT::i32), + InFlag + }; SDValue retval = DAG.getNode(NVPTXISD::LoadParam, dl, LoadRetVTs, - &LoadRetOps[0], LoadRetOps.size()); + LoadRetOps, array_lengthof(LoadRetOps)); Chain = retval.getValue(1); InFlag = retval.getValue(2); InVals.push_back(retval); @@ -750,16 +755,15 @@ NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, } std::vector<SDValue> tempRetVals; for (unsigned j=0; j<numelems; ++j) { - std::vector<EVT> MoveRetVTs; - MoveRetVTs.push_back(elemtype); - MoveRetVTs.push_back(MVT::Other); MoveRetVTs.push_back(MVT::Glue); - std::vector<SDValue> MoveRetOps; - MoveRetOps.push_back(Chain); - MoveRetOps.push_back(DAG.getConstant(0, MVT::i32)); - MoveRetOps.push_back(DAG.getConstant(paramNum, MVT::i32)); - MoveRetOps.push_back(InFlag); + EVT MoveRetVTs[] = { elemtype, MVT::Other, MVT::Glue }; + SDValue MoveRetOps[] = { + Chain, + DAG.getConstant(0, MVT::i32), + DAG.getConstant(paramNum, MVT::i32), + InFlag + }; SDValue retval = DAG.getNode(NVPTXISD::LoadParam, dl, MoveRetVTs, - &MoveRetOps[0], MoveRetOps.size()); + MoveRetOps, array_lengthof(MoveRetOps)); Chain = retval.getValue(1); InFlag = retval.getValue(2); tempRetVals.push_back(retval); diff --git a/lib/Target/NVPTX/NVPTXISelLowering.h b/lib/Target/NVPTX/NVPTXISelLowering.h index 95e7b55..14afc14 100644 --- a/lib/Target/NVPTX/NVPTXISelLowering.h +++ b/lib/Target/NVPTX/NVPTXISelLowering.h @@ -136,7 +136,7 @@ public: NVPTXTargetMachine *nvTM; // PTX always uses 32-bit shift amounts - virtual MVT getShiftAmountTy(EVT LHSTy) const { + virtual MVT getScalarShiftAmountTy(EVT LHSTy) const { return MVT::i32; } diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCFixupKinds.h b/lib/Target/PowerPC/MCTargetDesc/PPCFixupKinds.h index 7917f77..709daa4 100644 --- a/lib/Target/PowerPC/MCTargetDesc/PPCFixupKinds.h +++ b/lib/Target/PowerPC/MCTargetDesc/PPCFixupKinds.h @@ -12,6 +12,8 @@ #include "llvm/MC/MCFixup.h" +#undef PPC + namespace llvm { namespace PPC { enum Fixups { diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h b/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h index 4a42092..38a7420 100644 --- a/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h +++ b/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h @@ -47,6 +47,10 @@ MCObjectWriter *createPPCELFObjectWriter(raw_ostream &OS, uint8_t OSABI); } // End llvm namespace +// Generated files will use "namespace PPC". To avoid symbol clash, +// undefine PPC here. PPC may be predefined on some hosts. +#undef PPC + // Defines symbolic names for PowerPC registers. This defines a mapping from // register name to register number. // diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCPredicates.h b/lib/Target/PowerPC/MCTargetDesc/PPCPredicates.h index 972e138..b0680fb 100644 --- a/lib/Target/PowerPC/MCTargetDesc/PPCPredicates.h +++ b/lib/Target/PowerPC/MCTargetDesc/PPCPredicates.h @@ -17,6 +17,10 @@ // GCC #defines PPC on Linux but we use it as our namespace name #undef PPC +// Generated files will use "namespace PPC". To avoid symbol clash, +// undefine PPC here. PPC may be predefined on some hosts. +#undef PPC + namespace llvm { namespace PPC { /// Predicate - These are "(BI << 5) | BO" for various predicates. diff --git a/lib/Target/PowerPC/PPCCTRLoops.cpp b/lib/Target/PowerPC/PPCCTRLoops.cpp index b98cc48..ecece8c 100644 --- a/lib/Target/PowerPC/PPCCTRLoops.cpp +++ b/lib/Target/PowerPC/PPCCTRLoops.cpp @@ -189,12 +189,23 @@ INITIALIZE_PASS_END(PPCCTRLoops, "ppc-ctr-loops", "PowerPC CTR Loops", /// isCompareEquals - Returns true if the instruction is a compare equals /// instruction with an immediate operand. -static bool isCompareEqualsImm(const MachineInstr *MI, bool &SignedCmp) { - if (MI->getOpcode() == PPC::CMPWI || MI->getOpcode() == PPC::CMPDI) { +static bool isCompareEqualsImm(const MachineInstr *MI, bool &SignedCmp, + bool &Int64Cmp) { + if (MI->getOpcode() == PPC::CMPWI) { SignedCmp = true; + Int64Cmp = false; return true; - } else if (MI->getOpcode() == PPC::CMPLWI || MI->getOpcode() == PPC::CMPLDI) { + } else if (MI->getOpcode() == PPC::CMPDI) { + SignedCmp = true; + Int64Cmp = true; + return true; + } else if (MI->getOpcode() == PPC::CMPLWI) { + SignedCmp = false; + Int64Cmp = false; + return true; + } else if (MI->getOpcode() == PPC::CMPLDI) { SignedCmp = false; + Int64Cmp = true; return true; } @@ -353,9 +364,9 @@ CountValue *PPCCTRLoops::getTripCount(MachineLoop *L, RI = MRI->reg_begin(IV_Opnd->getReg()), RE = MRI->reg_end(); RI != RE; ++RI) { IV_Opnd = &RI.getOperand(); - bool SignedCmp; + bool SignedCmp, Int64Cmp; MachineInstr *MI = IV_Opnd->getParent(); - if (L->contains(MI) && isCompareEqualsImm(MI, SignedCmp) && + if (L->contains(MI) && isCompareEqualsImm(MI, SignedCmp, Int64Cmp) && MI->getOperand(0).getReg() == PredReg) { OldInsts.push_back(MI); @@ -380,14 +391,14 @@ CountValue *PPCCTRLoops::getTripCount(MachineLoop *L, assert(InitialValue->isReg() && "Expecting register for init value"); unsigned InitialValueReg = InitialValue->getReg(); - const MachineInstr *DefInstr = MRI->getVRegDef(InitialValueReg); + MachineInstr *DefInstr = MRI->getVRegDef(InitialValueReg); // Here we need to look for an immediate load (an li or lis/ori pair). if (DefInstr && (DefInstr->getOpcode() == PPC::ORI8 || DefInstr->getOpcode() == PPC::ORI)) { int64_t start = (short) DefInstr->getOperand(2).getImm(); - const MachineInstr *DefInstr2 = - MRI->getVRegDef(DefInstr->getOperand(0).getReg()); + MachineInstr *DefInstr2 = + MRI->getVRegDef(DefInstr->getOperand(1).getReg()); if (DefInstr2 && (DefInstr2->getOpcode() == PPC::LIS8 || DefInstr2->getOpcode() == PPC::LIS)) { DEBUG(dbgs() << " initial constant: " << *DefInstr); @@ -399,17 +410,33 @@ CountValue *PPCCTRLoops::getTripCount(MachineLoop *L, if ((count % iv_value) != 0) { return 0; } - return new CountValue(count/iv_value); + + OldInsts.push_back(DefInstr); + OldInsts.push_back(DefInstr2); + + // count/iv_value, the trip count, should be positive here. If it + // is negative, that indicates that the counter will wrap. + if (Int64Cmp) + return new CountValue(count/iv_value); + else + return new CountValue(uint32_t(count/iv_value)); } } else if (DefInstr && (DefInstr->getOpcode() == PPC::LI8 || DefInstr->getOpcode() == PPC::LI)) { DEBUG(dbgs() << " initial constant: " << *DefInstr); - int64_t count = ImmVal - int64_t(short(DefInstr->getOperand(1).getImm())); + int64_t count = ImmVal - + int64_t(short(DefInstr->getOperand(1).getImm())); if ((count % iv_value) != 0) { return 0; } - return new CountValue(count/iv_value); + + OldInsts.push_back(DefInstr); + + if (Int64Cmp) + return new CountValue(count/iv_value); + else + return new CountValue(uint32_t(count/iv_value)); } else if (iv_value == 1 || iv_value == -1) { // We can't determine a constant starting value. if (ImmVal == 0) { @@ -417,8 +444,8 @@ CountValue *PPCCTRLoops::getTripCount(MachineLoop *L, } // FIXME: handle non-zero end value. } - // FIXME: handle non-unit increments (we might not want to introduce division - // but we can handle some 2^n cases with shifts). + // FIXME: handle non-unit increments (we might not want to introduce + // division but we can handle some 2^n cases with shifts). } } @@ -489,9 +516,10 @@ bool PPCCTRLoops::isDead(const MachineInstr *MI, if (MO.isReg() && MO.isDef()) { unsigned Reg = MO.getReg(); if (!MRI->use_nodbg_empty(Reg)) { - // This instruction has users, but if the only user is the phi node for the - // parent block, and the only use of that phi node is this instruction, then - // this instruction is dead: both it (and the phi node) can be removed. + // This instruction has users, but if the only user is the phi node for + // the parent block, and the only use of that phi node is this + // instruction, then this instruction is dead: both it (and the phi + // node) can be removed. MachineRegisterInfo::use_iterator I = MRI->use_begin(Reg); if (llvm::next(I) == MRI->use_end() && I.getOperand().getParent()->isPHI()) { @@ -594,6 +622,16 @@ bool PPCCTRLoops::convertToCTRLoop(MachineLoop *L) { DEBUG(dbgs() << "failed to get trip count!\n"); return false; } + + if (TripCount->isImm()) { + DEBUG(dbgs() << "constant trip count: " << TripCount->getImm() << "\n"); + + // FIXME: We currently can't form 64-bit constants + // (including 32-bit unsigned constants) + if (!isInt<32>(TripCount->getImm())) + return false; + } + // Does the loop contain any invalid instructions? if (containsInvalidInstruction(L)) { return false; @@ -664,13 +702,14 @@ bool PPCCTRLoops::convertToCTRLoop(MachineLoop *L) { // Put the trip count in a register for transfer into the count register. int64_t CountImm = TripCount->getImm(); - assert(!TripCount->isNeg() && "Constant trip count must be positive"); + if (TripCount->isNeg()) + CountImm = -CountImm; CountReg = MF->getRegInfo().createVirtualRegister(RC); - if (CountImm > 0xFFFF) { + if (abs64(CountImm) > 0x7FFF) { BuildMI(*Preheader, InsertPos, dl, TII->get(isPPC64 ? PPC::LIS8 : PPC::LIS), - CountReg).addImm(CountImm >> 16); + CountReg).addImm((CountImm >> 16) & 0xFFFF); unsigned CountReg1 = CountReg; CountReg = MF->getRegInfo().createVirtualRegister(RC); BuildMI(*Preheader, InsertPos, dl, diff --git a/lib/Target/PowerPC/PPCFrameLowering.cpp b/lib/Target/PowerPC/PPCFrameLowering.cpp index 0a396e6..353560d 100644 --- a/lib/Target/PowerPC/PPCFrameLowering.cpp +++ b/lib/Target/PowerPC/PPCFrameLowering.cpp @@ -188,13 +188,26 @@ static bool spillsCR(const MachineFunction &MF) { return FuncInfo->isCRSpilled(); } +static bool hasSpills(const MachineFunction &MF) { + const PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>(); + return FuncInfo->hasSpills(); +} + +static bool hasNonRISpills(const MachineFunction &MF) { + const PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>(); + return FuncInfo->hasNonRISpills(); +} + /// determineFrameLayout - Determine the size of the frame and maximum call /// frame size. -void PPCFrameLowering::determineFrameLayout(MachineFunction &MF) const { +unsigned PPCFrameLowering::determineFrameLayout(MachineFunction &MF, + bool UpdateMF, + bool UseEstimate) const { MachineFrameInfo *MFI = MF.getFrameInfo(); // Get the number of bytes to allocate from the FrameInfo - unsigned FrameSize = MFI->getStackSize(); + unsigned FrameSize = + UseEstimate ? MFI->estimateStackSize(MF) : MFI->getStackSize(); // Get the alignments provided by the target, and the maximum alignment // (if any) of the fixed frame objects. @@ -223,8 +236,9 @@ void PPCFrameLowering::determineFrameLayout(MachineFunction &MF) const { && spillsCR(MF)) && (!ALIGN_STACK || MaxAlign <= TargetAlign)) { // No special alignment. // No need for frame - MFI->setStackSize(0); - return; + if (UpdateMF) + MFI->setStackSize(0); + return 0; } // Get the maximum call frame size of all the calls. @@ -241,7 +255,8 @@ void PPCFrameLowering::determineFrameLayout(MachineFunction &MF) const { maxCallFrameSize = (maxCallFrameSize + AlignMask) & ~AlignMask; // Update maximum call frame size. - MFI->setMaxCallFrameSize(maxCallFrameSize); + if (UpdateMF) + MFI->setMaxCallFrameSize(maxCallFrameSize); // Include call frame size in total. FrameSize += maxCallFrameSize; @@ -250,7 +265,10 @@ void PPCFrameLowering::determineFrameLayout(MachineFunction &MF) const { FrameSize = (FrameSize + AlignMask) & ~AlignMask; // Update frame info. - MFI->setStackSize(FrameSize); + if (UpdateMF) + MFI->setStackSize(FrameSize); + + return FrameSize; } // hasFP - Return true if the specified function actually has a dedicated frame @@ -311,11 +329,7 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF) const { MBBI = MBB.begin(); // Work out frame sizes. - // FIXME: determineFrameLayout() may change the frame size. This should be - // moved upper, to some hook. - determineFrameLayout(MF); - unsigned FrameSize = MFI->getStackSize(); - + unsigned FrameSize = determineFrameLayout(MF); int NegFrameSize = -FrameSize; // Get processor type. @@ -780,7 +794,7 @@ static bool MustSaveLR(const MachineFunction &MF, unsigned LR) { void PPCFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF, - RegScavenger *RS) const { + RegScavenger *) const { const TargetRegisterInfo *RegInfo = MF.getTarget().getRegisterInfo(); // Save and clear the LR state. @@ -822,30 +836,15 @@ PPCFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF, int FrameIdx = MFI->CreateFixedObject((uint64_t)4, (int64_t)-4, true); FI->setCRSpillFrameIndex(FrameIdx); } - - // Reserve a slot closest to SP or frame pointer if we have a dynalloc or - // a large stack, which will require scavenging a register to materialize a - // large offset. - // FIXME: this doesn't actually check stack size, so is a bit pessimistic - // FIXME: doesn't detect whether or not we need to spill vXX, which requires - // r0 for now. - - if (RegInfo->requiresRegisterScavenging(MF)) - if (needsFP(MF) || spillsCR(MF)) { - const TargetRegisterClass *GPRC = &PPC::GPRCRegClass; - const TargetRegisterClass *G8RC = &PPC::G8RCRegClass; - const TargetRegisterClass *RC = isPPC64 ? G8RC : GPRC; - RS->setScavengingFrameIndex(MFI->CreateStackObject(RC->getSize(), - RC->getAlignment(), - false)); - } } -void PPCFrameLowering::processFunctionBeforeFrameFinalized(MachineFunction &MF) - const { +void PPCFrameLowering::processFunctionBeforeFrameFinalized(MachineFunction &MF, + RegScavenger *RS) const { // Early exit if not using the SVR4 ABI. - if (!Subtarget.isSVR4ABI()) + if (!Subtarget.isSVR4ABI()) { + addScavengingSpillSlot(MF, RS); return; + } // Get callee saved register information. MachineFrameInfo *FFI = MF.getFrameInfo(); @@ -853,6 +852,7 @@ void PPCFrameLowering::processFunctionBeforeFrameFinalized(MachineFunction &MF) // Early exit if no callee saved registers are modified! if (CSI.empty() && !needsFP(MF)) { + addScavengingSpillSlot(MF, RS); return; } @@ -1031,6 +1031,37 @@ void PPCFrameLowering::processFunctionBeforeFrameFinalized(MachineFunction &MF) FFI->setObjectOffset(FI, LowerBound + FFI->getObjectOffset(FI)); } } + + addScavengingSpillSlot(MF, RS); +} + +void +PPCFrameLowering::addScavengingSpillSlot(MachineFunction &MF, + RegScavenger *RS) const { + // Reserve a slot closest to SP or frame pointer if we have a dynalloc or + // a large stack, which will require scavenging a register to materialize a + // large offset. + + // We need to have a scavenger spill slot for spills if the frame size is + // large. In case there is no free register for large-offset addressing, + // this slot is used for the necessary emergency spill. Also, we need the + // slot for dynamic stack allocations. + + // The scavenger might be invoked if the frame offset does not fit into + // the 16-bit immediate. We don't know the complete frame size here + // because we've not yet computed callee-saved register spills or the + // needed alignment padding. + unsigned StackSize = determineFrameLayout(MF, false, true); + MachineFrameInfo *MFI = MF.getFrameInfo(); + if (MFI->hasVarSizedObjects() || spillsCR(MF) || hasNonRISpills(MF) || + (hasSpills(MF) && !isInt<16>(StackSize))) { + const TargetRegisterClass *GPRC = &PPC::GPRCRegClass; + const TargetRegisterClass *G8RC = &PPC::G8RCRegClass; + const TargetRegisterClass *RC = Subtarget.isPPC64() ? G8RC : GPRC; + RS->setScavengingFrameIndex(MFI->CreateStackObject(RC->getSize(), + RC->getAlignment(), + false)); + } } bool diff --git a/lib/Target/PowerPC/PPCFrameLowering.h b/lib/Target/PowerPC/PPCFrameLowering.h index d09e47f..53ee326 100644 --- a/lib/Target/PowerPC/PPCFrameLowering.h +++ b/lib/Target/PowerPC/PPCFrameLowering.h @@ -32,7 +32,9 @@ public: Subtarget(sti) { } - void determineFrameLayout(MachineFunction &MF) const; + unsigned determineFrameLayout(MachineFunction &MF, + bool UpdateMF = true, + bool UseEstimate = false) const; /// emitProlog/emitEpilog - These methods insert prolog and epilog code into /// the function. @@ -44,7 +46,9 @@ public: void processFunctionBeforeCalleeSavedScan(MachineFunction &MF, RegScavenger *RS = NULL) const; - void processFunctionBeforeFrameFinalized(MachineFunction &MF) const; + void processFunctionBeforeFrameFinalized(MachineFunction &MF, + RegScavenger *RS = NULL) const; + void addScavengingSpillSlot(MachineFunction &MF, RegScavenger *RS) const; bool spillCalleeSavedRegisters(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, @@ -144,6 +148,9 @@ public: return 0; } + // Note that the offsets here overlap, but this is fixed up in + // processFunctionBeforeFrameFinalized. + static const SpillSlot Offsets[] = { // Floating-point register save area offsets. {PPC::F31, -8}, diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp index cf1f459..741e25e 100644 --- a/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/lib/Target/PowerPC/PPCISelLowering.cpp @@ -57,6 +57,9 @@ cl::desc("disable preincrement load/store generation on PPC"), cl::Hidden); static cl::opt<bool> DisableILPPref("disable-ppc-ilp-pref", cl::desc("disable setting the node scheduling preference to ILP on PPC"), cl::Hidden); +static cl::opt<bool> DisablePPCUnaligned("disable-ppc-unaligned", +cl::desc("disable unaligned load/store generation on PPC"), cl::Hidden); + static TargetLoweringObjectFile *CreateTLOF(const PPCTargetMachine &TM) { if (TM.getSubtargetImpl()->isDarwin()) return new TargetLoweringObjectFileMachO(); @@ -1180,13 +1183,15 @@ bool PPCTargetLowering::getPreIndexedAddressParts(SDNode *N, SDValue &Base, SDValue Ptr; EVT VT; + unsigned Alignment; if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) { Ptr = LD->getBasePtr(); VT = LD->getMemoryVT(); - + Alignment = LD->getAlignment(); } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) { Ptr = ST->getBasePtr(); VT = ST->getMemoryVT(); + Alignment = ST->getAlignment(); } else return false; @@ -1205,6 +1210,10 @@ bool PPCTargetLowering::getPreIndexedAddressParts(SDNode *N, SDValue &Base, if (!SelectAddressRegImm(Ptr, Offset, Base, DAG)) return false; } else { + // LDU/STU need an address with at least 4-byte alignment. + if (Alignment < 4) + return false; + // reg + imm * 4. if (!SelectAddressRegImmShift(Ptr, Offset, Base, DAG)) return false; @@ -4786,12 +4795,13 @@ SDValue PPCTargetLowering::LowerFLT_ROUNDS_(SDValue Op, MachineFunction &MF = DAG.getMachineFunction(); EVT VT = Op.getValueType(); EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); - std::vector<EVT> NodeTys; SDValue MFFSreg, InFlag; // Save FP Control Word to register - NodeTys.push_back(MVT::f64); // return register - NodeTys.push_back(MVT::Glue); // unused in this context + EVT NodeTys[] = { + MVT::f64, // return register + MVT::Glue // unused in this context + }; SDValue Chain = DAG.getNode(PPCISD::MFFS, dl, NodeTys, &InFlag, 0); // Save FP register to stack slot @@ -5408,9 +5418,7 @@ SDValue PPCTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, Op.getOperand(3), // RHS DAG.getConstant(CompareOpc, MVT::i32) }; - std::vector<EVT> VTs; - VTs.push_back(Op.getOperand(2).getValueType()); - VTs.push_back(MVT::Glue); + EVT VTs[] = { Op.getOperand(2).getValueType(), MVT::Glue }; SDValue CompNode = DAG.getNode(PPCISD::VCMPo, dl, VTs, Ops, 3); // Now that we have the comparison, emit a copy from the CR to a GPR. @@ -6466,14 +6474,12 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N, bool BranchOnWhenPredTrue = (CC == ISD::SETEQ) ^ (Val == 0); // Create the PPCISD altivec 'dot' comparison node. - std::vector<EVT> VTs; SDValue Ops[] = { LHS.getOperand(2), // LHS of compare LHS.getOperand(3), // RHS of compare DAG.getConstant(CompareOpc, MVT::i32) }; - VTs.push_back(LHS.getOperand(2).getValueType()); - VTs.push_back(MVT::Glue); + EVT VTs[] = { LHS.getOperand(2).getValueType(), MVT::Glue }; SDValue CompNode = DAG.getNode(PPCISD::VCMPo, dl, VTs, Ops, 3); // Unpack the result based on how the target uses it. @@ -6854,6 +6860,32 @@ EVT PPCTargetLowering::getOptimalMemOpType(uint64_t Size, } } +bool PPCTargetLowering::allowsUnalignedMemoryAccesses(EVT VT, + bool *Fast) const { + if (DisablePPCUnaligned) + return false; + + // PowerPC supports unaligned memory access for simple non-vector types. + // Although accessing unaligned addresses is not as efficient as accessing + // aligned addresses, it is generally more efficient than manual expansion, + // and generally only traps for software emulation when crossing page + // boundaries. + + if (!VT.isSimple()) + return false; + + if (VT.getSimpleVT().isVector()) + return false; + + if (VT == MVT::ppcf128) + return false; + + if (Fast) + *Fast = true; + + return true; +} + /// isFMAFasterThanMulAndAdd - Return true if an FMA operation is faster than /// a pair of mul and add instructions. fmuladd intrinsics will be expanded to /// FMAs when this method returns true (and FMAs are legal), otherwise fmuladd diff --git a/lib/Target/PowerPC/PPCISelLowering.h b/lib/Target/PowerPC/PPCISelLowering.h index f5d418c..8d44d9f 100644 --- a/lib/Target/PowerPC/PPCISelLowering.h +++ b/lib/Target/PowerPC/PPCISelLowering.h @@ -329,7 +329,7 @@ namespace llvm { /// DAG node. virtual const char *getTargetNodeName(unsigned Opcode) const; - virtual MVT getShiftAmountTy(EVT LHSTy) const { return MVT::i32; } + virtual MVT getScalarShiftAmountTy(EVT LHSTy) const { return MVT::i32; } /// getSetCCResultType - Return the ISD::SETCC ValueType virtual EVT getSetCCResultType(EVT VT) const; @@ -449,6 +449,10 @@ namespace llvm { bool IsMemset, bool ZeroMemset, bool MemcpyStrSrc, MachineFunction &MF) const; + /// Is unaligned memory access allowed for the given type, and is it fast + /// relative to software emulation. + virtual bool allowsUnalignedMemoryAccesses(EVT VT, bool *Fast = 0) const; + /// isFMAFasterThanMulAndAdd - Return true if an FMA operation is faster than /// a pair of mul and add instructions. fmuladd intrinsics will be expanded to /// FMAs when this method returns true (and FMAs are legal), otherwise fmuladd diff --git a/lib/Target/PowerPC/PPCInstr64Bit.td b/lib/Target/PowerPC/PPCInstr64Bit.td index 0120130..bca1bd5 100644 --- a/lib/Target/PowerPC/PPCInstr64Bit.td +++ b/lib/Target/PowerPC/PPCInstr64Bit.td @@ -555,7 +555,8 @@ def LHA8: DForm_1<42, (outs G8RC:$rD), (ins memri:$src), PPC970_DGroup_Cracked; def LWA : DSForm_1<58, 2, (outs G8RC:$rD), (ins memrix:$src), "lwa $rD, $src", LdStLWA, - [(set G8RC:$rD, (sextloadi32 ixaddr:$src))]>, isPPC64, + [(set G8RC:$rD, + (aligned4sextloadi32 ixaddr:$src))]>, isPPC64, PPC970_DGroup_Cracked; def LHAX8: XForm_1<31, 343, (outs G8RC:$rD), (ins memrr:$src), "lhax $rD, $src", LdStLHA, @@ -648,7 +649,7 @@ def LWZUX8 : XForm_1<31, 55, (outs G8RC:$rD, ptr_rc:$ea_result), let canFoldAsLoad = 1, PPC970_Unit = 2 in { def LD : DSForm_1<58, 0, (outs G8RC:$rD), (ins memrix:$src), "ld $rD, $src", LdStLD, - [(set G8RC:$rD, (load ixaddr:$src))]>, isPPC64; + [(set G8RC:$rD, (aligned4load ixaddr:$src))]>, isPPC64; def LDrs : DSForm_1<58, 0, (outs G8RC:$rD), (ins memrs:$src), "ld $rD, $src", LdStLD, []>, isPPC64; @@ -682,6 +683,10 @@ def LDtoc_restore : DSForm_1a<58, 0, (outs), (ins), def LDX : XForm_1<31, 21, (outs G8RC:$rD), (ins memrr:$src), "ldx $rD, $src", LdStLD, [(set G8RC:$rD, (load xaddr:$src))]>, isPPC64; +let isCodeGenOnly = 1 in +def LDXu : XForm_1<31, 21, (outs G8RC:$rD), (ins memrr:$src), + "ldx $rD, $src", LdStLD, + [(set G8RC:$rD, (load xaddr:$src))]>, isPPC64; let mayLoad = 1 in def LDU : DSForm_1<58, 1, (outs G8RC:$rD, ptr_rc:$ea_result), (ins memrix:$addr), @@ -798,7 +803,7 @@ def STWX8 : XForm_8<31, 151, (outs), (ins G8RC:$rS, memrr:$dst), // Normal 8-byte stores. def STD : DSForm_1<62, 0, (outs), (ins G8RC:$rS, memrix:$dst), "std $rS, $dst", LdStSTD, - [(store G8RC:$rS, ixaddr:$dst)]>, isPPC64; + [(aligned4store G8RC:$rS, ixaddr:$dst)]>, isPPC64; def STDX : XForm_8<31, 149, (outs), (ins G8RC:$rS, memrr:$dst), "stdx $rS, $dst", LdStSTD, [(store G8RC:$rS, xaddr:$dst)]>, isPPC64, @@ -833,8 +838,9 @@ def STWU8 : DForm_1a<37, (outs ptr_rc:$ea_res), (ins G8RC:$rS, def STDU : DSForm_1a<62, 1, (outs ptr_rc:$ea_res), (ins G8RC:$rS, s16immX4:$ptroff, ptr_rc:$ptrreg), "stdu $rS, $ptroff($ptrreg)", LdStSTDU, - [(set ptr_rc:$ea_res, (pre_store G8RC:$rS, ptr_rc:$ptrreg, - iaddroff:$ptroff))]>, + [(set ptr_rc:$ea_res, + (aligned4pre_store G8RC:$rS, ptr_rc:$ptrreg, + iaddroff:$ptroff))]>, RegConstraint<"$ptrreg = $ea_res">, NoEncode<"$ea_res">, isPPC64; @@ -979,3 +985,13 @@ def : Pat<(add G8RC:$in, (PPChi tjumptable:$g, 0)), (ADDIS8 G8RC:$in, tjumptable:$g)>; def : Pat<(add G8RC:$in, (PPChi tblockaddress:$g, 0)), (ADDIS8 G8RC:$in, tblockaddress:$g)>; + +// Patterns to match r+r indexed loads and stores for +// addresses without at least 4-byte alignment. +def : Pat<(i64 (unaligned4sextloadi32 xoaddr:$src)), + (LWAX xoaddr:$src)>; +def : Pat<(i64 (unaligned4load xoaddr:$src)), + (LDX xoaddr:$src)>; +def : Pat<(unaligned4store G8RC:$rS, xoaddr:$dst), + (STDX G8RC:$rS, xoaddr:$dst)>; + diff --git a/lib/Target/PowerPC/PPCInstrAltivec.td b/lib/Target/PowerPC/PPCInstrAltivec.td index 0cf28ae..0ed7ff2 100644 --- a/lib/Target/PowerPC/PPCInstrAltivec.td +++ b/lib/Target/PowerPC/PPCInstrAltivec.td @@ -182,6 +182,9 @@ class VX2_Int<bits<11> xo, string opc, Intrinsic IntID> //===----------------------------------------------------------------------===// // Instruction Definitions. +def HasAltivec : Predicate<"PPCSubTarget.hasAltivec()">; +let Predicates = [HasAltivec] in { + def DSS : DSS_Form<822, (outs), (ins u5imm:$ZERO0, u5imm:$STRM,u5imm:$ZERO1,u5imm:$ZERO2), "dss $STRM", LdStLoad /*FIXME*/, []>; @@ -733,3 +736,6 @@ def : Pat<(v4f32 (ftrunc (v4f32 VRRC:$vA))), (VRFIZ VRRC:$vA)>; def : Pat<(v4f32 (fnearbyint (v4f32 VRRC:$vA))), (VRFIN VRRC:$vA)>; + +} // end HasAltivec + diff --git a/lib/Target/PowerPC/PPCInstrInfo.cpp b/lib/Target/PowerPC/PPCInstrInfo.cpp index a0517a8..7fe7880 100644 --- a/lib/Target/PowerPC/PPCInstrInfo.cpp +++ b/lib/Target/PowerPC/PPCInstrInfo.cpp @@ -33,11 +33,6 @@ #define GET_INSTRINFO_CTOR #include "PPCGenInstrInfo.inc" -namespace llvm { -extern cl::opt<bool> DisablePPC32RS; -extern cl::opt<bool> DisablePPC64RS; -} - using namespace llvm; static cl:: @@ -444,7 +439,8 @@ PPCInstrInfo::StoreRegToStackSlot(MachineFunction &MF, unsigned SrcReg, bool isKill, int FrameIdx, const TargetRegisterClass *RC, - SmallVectorImpl<MachineInstr*> &NewMIs) const{ + SmallVectorImpl<MachineInstr*> &NewMIs, + bool &NonRI) const{ DebugLoc DL; if (PPC::GPRCRegClass.hasSubClassEq(RC)) { if (SrcReg != PPC::LR) { @@ -489,47 +485,11 @@ PPCInstrInfo::StoreRegToStackSlot(MachineFunction &MF, getKillRegState(isKill)), FrameIdx)); } else if (PPC::CRRCRegClass.hasSubClassEq(RC)) { - if ((!DisablePPC32RS && !TM.getSubtargetImpl()->isPPC64()) || - (!DisablePPC64RS && TM.getSubtargetImpl()->isPPC64())) { - NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::SPILL_CR)) - .addReg(SrcReg, - getKillRegState(isKill)), - FrameIdx)); - return true; - } else { - // FIXME: We need a scatch reg here. The trouble with using R0 is that - // it's possible for the stack frame to be so big the save location is - // out of range of immediate offsets, necessitating another register. - // We hack this on Darwin by reserving R2. It's probably broken on Linux - // at the moment. - - bool is64Bit = TM.getSubtargetImpl()->isPPC64(); - // We need to store the CR in the low 4-bits of the saved value. First, - // issue a MFCR to save all of the CRBits. - unsigned ScratchReg = TM.getSubtargetImpl()->isDarwinABI() ? - (is64Bit ? PPC::X2 : PPC::R2) : - (is64Bit ? PPC::X0 : PPC::R0); - NewMIs.push_back(BuildMI(MF, DL, get(is64Bit ? PPC::MFCR8pseud : - PPC::MFCRpseud), ScratchReg) - .addReg(SrcReg, getKillRegState(isKill))); - - // If the saved register wasn't CR0, shift the bits left so that they are - // in CR0's slot. - if (SrcReg != PPC::CR0) { - unsigned ShiftBits = getPPCRegisterNumbering(SrcReg)*4; - // rlwinm scratch, scratch, ShiftBits, 0, 31. - NewMIs.push_back(BuildMI(MF, DL, get(is64Bit ? PPC::RLWINM8 : - PPC::RLWINM), ScratchReg) - .addReg(ScratchReg).addImm(ShiftBits) - .addImm(0).addImm(31)); - } - - NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(is64Bit ? - PPC::STW8 : PPC::STW)) - .addReg(ScratchReg, - getKillRegState(isKill)), - FrameIdx)); - } + NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::SPILL_CR)) + .addReg(SrcReg, + getKillRegState(isKill)), + FrameIdx)); + return true; } else if (PPC::CRBITRCRegClass.hasSubClassEq(RC)) { // FIXME: We use CRi here because there is no mtcrf on a bit. Since the // backend currently only uses CR1EQ as an individual bit, this should @@ -562,23 +522,14 @@ PPCInstrInfo::StoreRegToStackSlot(MachineFunction &MF, Reg = PPC::CR7; return StoreRegToStackSlot(MF, Reg, isKill, FrameIdx, - &PPC::CRRCRegClass, NewMIs); + &PPC::CRRCRegClass, NewMIs, NonRI); } else if (PPC::VRRCRegClass.hasSubClassEq(RC)) { - // We don't have indexed addressing for vector loads. Emit: - // R0 = ADDI FI# - // STVX VAL, 0, R0 - // - // FIXME: We use R0 here, because it isn't available for RA. - bool Is64Bit = TM.getSubtargetImpl()->isPPC64(); - unsigned Instr = Is64Bit ? PPC::ADDI8 : PPC::ADDI; - unsigned GPR0 = Is64Bit ? PPC::X0 : PPC::R0; - NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(Instr), GPR0), - FrameIdx, 0, 0)); - NewMIs.push_back(BuildMI(MF, DL, get(PPC::STVX)) - .addReg(SrcReg, getKillRegState(isKill)) - .addReg(GPR0) - .addReg(GPR0)); + NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::STVX)) + .addReg(SrcReg, + getKillRegState(isKill)), + FrameIdx)); + NonRI = true; } else { llvm_unreachable("Unknown regclass!"); } @@ -595,10 +546,15 @@ PPCInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB, MachineFunction &MF = *MBB.getParent(); SmallVector<MachineInstr*, 4> NewMIs; - if (StoreRegToStackSlot(MF, SrcReg, isKill, FrameIdx, RC, NewMIs)) { - PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>(); + PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>(); + FuncInfo->setHasSpills(); + + bool NonRI = false; + if (StoreRegToStackSlot(MF, SrcReg, isKill, FrameIdx, RC, NewMIs, NonRI)) FuncInfo->setSpillsCR(); - } + + if (NonRI) + FuncInfo->setHasNonRISpills(); for (unsigned i = 0, e = NewMIs.size(); i != e; ++i) MBB.insert(MI, NewMIs[i]); @@ -616,7 +572,8 @@ bool PPCInstrInfo::LoadRegFromStackSlot(MachineFunction &MF, DebugLoc DL, unsigned DestReg, int FrameIdx, const TargetRegisterClass *RC, - SmallVectorImpl<MachineInstr*> &NewMIs)const{ + SmallVectorImpl<MachineInstr*> &NewMIs, + bool &NonRI) const{ if (PPC::GPRCRegClass.hasSubClassEq(RC)) { if (DestReg != PPC::LR) { NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::LWZ), @@ -642,37 +599,10 @@ PPCInstrInfo::LoadRegFromStackSlot(MachineFunction &MF, DebugLoc DL, NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::LFS), DestReg), FrameIdx)); } else if (PPC::CRRCRegClass.hasSubClassEq(RC)) { - if ((!DisablePPC32RS && !TM.getSubtargetImpl()->isPPC64()) || - (!DisablePPC64RS && TM.getSubtargetImpl()->isPPC64())) { - NewMIs.push_back(addFrameReference(BuildMI(MF, DL, - get(PPC::RESTORE_CR), DestReg) - , FrameIdx)); - return true; - } else { - // FIXME: We need a scatch reg here. The trouble with using R0 is that - // it's possible for the stack frame to be so big the save location is - // out of range of immediate offsets, necessitating another register. - // We hack this on Darwin by reserving R2. It's probably broken on Linux - // at the moment. - unsigned ScratchReg = TM.getSubtargetImpl()->isDarwinABI() ? - PPC::R2 : PPC::R0; - NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::LWZ), - ScratchReg), FrameIdx)); - - // If the reloaded register isn't CR0, shift the bits right so that they are - // in the right CR's slot. - if (DestReg != PPC::CR0) { - unsigned ShiftBits = getPPCRegisterNumbering(DestReg)*4; - // rlwinm r11, r11, 32-ShiftBits, 0, 31. - NewMIs.push_back(BuildMI(MF, DL, get(PPC::RLWINM), ScratchReg) - .addReg(ScratchReg).addImm(32-ShiftBits).addImm(0) - .addImm(31)); - } - - NewMIs.push_back(BuildMI(MF, DL, get(TM.getSubtargetImpl()->isPPC64() ? - PPC::MTCRF8 : PPC::MTCRF), DestReg) - .addReg(ScratchReg)); - } + NewMIs.push_back(addFrameReference(BuildMI(MF, DL, + get(PPC::RESTORE_CR), DestReg), + FrameIdx)); + return true; } else if (PPC::CRBITRCRegClass.hasSubClassEq(RC)) { unsigned Reg = 0; @@ -702,21 +632,12 @@ PPCInstrInfo::LoadRegFromStackSlot(MachineFunction &MF, DebugLoc DL, Reg = PPC::CR7; return LoadRegFromStackSlot(MF, DL, Reg, FrameIdx, - &PPC::CRRCRegClass, NewMIs); + &PPC::CRRCRegClass, NewMIs, NonRI); } else if (PPC::VRRCRegClass.hasSubClassEq(RC)) { - // We don't have indexed addressing for vector loads. Emit: - // R0 = ADDI FI# - // Dest = LVX 0, R0 - // - // FIXME: We use R0 here, because it isn't available for RA. - bool Is64Bit = TM.getSubtargetImpl()->isPPC64(); - unsigned Instr = Is64Bit ? PPC::ADDI8 : PPC::ADDI; - unsigned GPR0 = Is64Bit ? PPC::X0 : PPC::R0; - NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(Instr), GPR0), - FrameIdx, 0, 0)); - NewMIs.push_back(BuildMI(MF, DL, get(PPC::LVX),DestReg).addReg(GPR0) - .addReg(GPR0)); + NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::LVX), DestReg), + FrameIdx)); + NonRI = true; } else { llvm_unreachable("Unknown regclass!"); } @@ -734,10 +655,17 @@ PPCInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, SmallVector<MachineInstr*, 4> NewMIs; DebugLoc DL; if (MI != MBB.end()) DL = MI->getDebugLoc(); - if (LoadRegFromStackSlot(MF, DL, DestReg, FrameIdx, RC, NewMIs)) { - PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>(); + + PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>(); + FuncInfo->setHasSpills(); + + bool NonRI = false; + if (LoadRegFromStackSlot(MF, DL, DestReg, FrameIdx, RC, NewMIs, NonRI)) FuncInfo->setSpillsCR(); - } + + if (NonRI) + FuncInfo->setHasNonRISpills(); + for (unsigned i = 0, e = NewMIs.size(); i != e; ++i) MBB.insert(MI, NewMIs[i]); diff --git a/lib/Target/PowerPC/PPCInstrInfo.h b/lib/Target/PowerPC/PPCInstrInfo.h index 374213e..5d4ae91 100644 --- a/lib/Target/PowerPC/PPCInstrInfo.h +++ b/lib/Target/PowerPC/PPCInstrInfo.h @@ -71,11 +71,13 @@ class PPCInstrInfo : public PPCGenInstrInfo { bool StoreRegToStackSlot(MachineFunction &MF, unsigned SrcReg, bool isKill, int FrameIdx, const TargetRegisterClass *RC, - SmallVectorImpl<MachineInstr*> &NewMIs) const; + SmallVectorImpl<MachineInstr*> &NewMIs, + bool &NonRI) const; bool LoadRegFromStackSlot(MachineFunction &MF, DebugLoc DL, unsigned DestReg, int FrameIdx, const TargetRegisterClass *RC, - SmallVectorImpl<MachineInstr*> &NewMIs) const; + SmallVectorImpl<MachineInstr*> &NewMIs, + bool &NonRI) const; public: explicit PPCInstrInfo(PPCTargetMachine &TM); diff --git a/lib/Target/PowerPC/PPCInstrInfo.td b/lib/Target/PowerPC/PPCInstrInfo.td index 460e943..3f181aa 100644 --- a/lib/Target/PowerPC/PPCInstrInfo.td +++ b/lib/Target/PowerPC/PPCInstrInfo.td @@ -278,6 +278,38 @@ def imm16ShiftedSExt : PatLeaf<(imm), [{ return N->getZExtValue() == (uint64_t)(int)N->getZExtValue(); }], HI16>; +// Some r+i load/store instructions (such as LD, STD, LDU, etc.) that require +// restricted memrix (offset/4) constants are alignment sensitive. If these +// offsets are hidden behind TOC entries than the values of the lower-order +// bits cannot be checked directly. As a result, we need to also incorporate +// an alignment check into the relevant patterns. + +def aligned4load : PatFrag<(ops node:$ptr), (load node:$ptr), [{ + return cast<LoadSDNode>(N)->getAlignment() >= 4; +}]>; +def aligned4store : PatFrag<(ops node:$val, node:$ptr), + (store node:$val, node:$ptr), [{ + return cast<StoreSDNode>(N)->getAlignment() >= 4; +}]>; +def aligned4sextloadi32 : PatFrag<(ops node:$ptr), (sextloadi32 node:$ptr), [{ + return cast<LoadSDNode>(N)->getAlignment() >= 4; +}]>; +def aligned4pre_store : PatFrag< + (ops node:$val, node:$base, node:$offset), + (pre_store node:$val, node:$base, node:$offset), [{ + return cast<StoreSDNode>(N)->getAlignment() >= 4; +}]>; + +def unaligned4load : PatFrag<(ops node:$ptr), (load node:$ptr), [{ + return cast<LoadSDNode>(N)->getAlignment() < 4; +}]>; +def unaligned4store : PatFrag<(ops node:$val, node:$ptr), + (store node:$val, node:$ptr), [{ + return cast<StoreSDNode>(N)->getAlignment() < 4; +}]>; +def unaligned4sextloadi32 : PatFrag<(ops node:$ptr), (sextloadi32 node:$ptr), [{ + return cast<LoadSDNode>(N)->getAlignment() < 4; +}]>; //===----------------------------------------------------------------------===// // PowerPC Flag Definitions. diff --git a/lib/Target/PowerPC/PPCMachineFunctionInfo.h b/lib/Target/PowerPC/PPCMachineFunctionInfo.h index 045b375..b1636a2 100644 --- a/lib/Target/PowerPC/PPCMachineFunctionInfo.h +++ b/lib/Target/PowerPC/PPCMachineFunctionInfo.h @@ -37,6 +37,13 @@ class PPCFunctionInfo : public MachineFunctionInfo { /// PEI. bool MustSaveLR; + /// Does this function have any stack spills. + bool HasSpills; + + /// Does this function spill using instructions with only r+r (not r+i) + /// forms. + bool HasNonRISpills; + /// SpillsCR - Indicates whether CR is spilled in the current function. bool SpillsCR; @@ -78,6 +85,8 @@ public: explicit PPCFunctionInfo(MachineFunction &MF) : FramePointerSaveIndex(0), ReturnAddrSaveIndex(0), + HasSpills(false), + HasNonRISpills(false), SpillsCR(false), LRStoreRequired(false), MinReservedArea(0), @@ -109,6 +118,12 @@ public: void setMustSaveLR(bool U) { MustSaveLR = U; } bool mustSaveLR() const { return MustSaveLR; } + void setHasSpills() { HasSpills = true; } + bool hasSpills() const { return HasSpills; } + + void setHasNonRISpills() { HasNonRISpills = true; } + bool hasNonRISpills() const { return HasNonRISpills; } + void setSpillsCR() { SpillsCR = true; } bool isCRSpilled() const { return SpillsCR; } diff --git a/lib/Target/PowerPC/PPCRegisterInfo.cpp b/lib/Target/PowerPC/PPCRegisterInfo.cpp index df245cc..e2c7221 100644 --- a/lib/Target/PowerPC/PPCRegisterInfo.cpp +++ b/lib/Target/PowerPC/PPCRegisterInfo.cpp @@ -46,26 +46,8 @@ #define GET_REGINFO_TARGET_DESC #include "PPCGenRegisterInfo.inc" -namespace llvm { -cl::opt<bool> DisablePPC32RS("disable-ppc32-regscavenger", - cl::init(false), - cl::desc("Disable PPC32 register scavenger"), - cl::Hidden); -cl::opt<bool> DisablePPC64RS("disable-ppc64-regscavenger", - cl::init(false), - cl::desc("Disable PPC64 register scavenger"), - cl::Hidden); -} - using namespace llvm; -// FIXME (64-bit): Should be inlined. -bool -PPCRegisterInfo::requiresRegisterScavenging(const MachineFunction &) const { - return ((!DisablePPC32RS && !Subtarget.isPPC64()) || - (!DisablePPC64RS && Subtarget.isPPC64())); -} - PPCRegisterInfo::PPCRegisterInfo(const PPCSubtarget &ST, const TargetInstrInfo &tii) : PPCGenRegisterInfo(ST.isPPC64() ? PPC::LR8 : PPC::LR, @@ -89,12 +71,6 @@ PPCRegisterInfo::PPCRegisterInfo(const PPCSubtarget &ST, ImmToIdxMap[PPC::ADDI8] = PPC::ADD8; ImmToIdxMap[PPC::STD_32] = PPC::STDX_32; } -bool -PPCRegisterInfo::trackLivenessAfterRegAlloc(const MachineFunction &MF) const { - return requiresRegisterScavenging(MF); -} - - /// getPointerRegClass - Return the register class to use to hold pointers. /// This is used for addressing modes. const TargetRegisterClass * @@ -139,12 +115,6 @@ BitVector PPCRegisterInfo::getReservedRegs(const MachineFunction &MF) const { Reserved.set(PPC::R2); // System-reserved register Reserved.set(PPC::R13); // Small Data Area pointer register } - // Reserve R2 on Darwin to hack around the problem of save/restore of CR - // when the stack frame is too big to address directly; we need two regs. - // This is a hack. - if (Subtarget.isDarwinABI()) { - Reserved.set(PPC::R2); - } // On PPC64, r13 is the thread pointer. Never allocate this register. // Note that this is over conservative, as it also prevents allocation of R31 @@ -162,12 +132,6 @@ BitVector PPCRegisterInfo::getReservedRegs(const MachineFunction &MF) const { if (Subtarget.isSVR4ABI()) { Reserved.set(PPC::X2); } - // Reserve X2 on Darwin to hack around the problem of save/restore of CR - // when the stack frame is too big to address directly; we need two regs. - // This is a hack. - if (Subtarget.isDarwinABI()) { - Reserved.set(PPC::X2); - } } if (PPCFI->needsFP(MF)) @@ -199,38 +163,10 @@ PPCRegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC, } } -bool -PPCRegisterInfo::avoidWriteAfterWrite(const TargetRegisterClass *RC) const { - switch (RC->getID()) { - case PPC::G8RCRegClassID: - case PPC::GPRCRegClassID: - case PPC::F8RCRegClassID: - case PPC::F4RCRegClassID: - case PPC::VRRCRegClassID: - return true; - default: - return false; - } -} - //===----------------------------------------------------------------------===// // Stack Frame Processing methods //===----------------------------------------------------------------------===// -/// findScratchRegister - Find a 'free' PPC register. Try for a call-clobbered -/// register first and then a spilled callee-saved register if that fails. -static -unsigned findScratchRegister(MachineBasicBlock::iterator II, RegScavenger *RS, - const TargetRegisterClass *RC, int SPAdj) { - assert(RS && "Register scavenging must be on"); - unsigned Reg = RS->FindUnusedReg(RC); - // FIXME: move ARM callee-saved reg scan to target independent code, then - // search for already spilled CS register here. - if (Reg == 0) - Reg = RS->scavengeRegister(RC, II, SPAdj); - return Reg; -} - /// lowerDynamicAlloc - Generate the code for allocating an object in the /// current frame. The sequence of code with be in the general form /// @@ -271,28 +207,16 @@ void PPCRegisterInfo::lowerDynamicAlloc(MachineBasicBlock::iterator II, // Fortunately, a frame greater than 32K is rare. const TargetRegisterClass *G8RC = &PPC::G8RCRegClass; const TargetRegisterClass *GPRC = &PPC::GPRCRegClass; - const TargetRegisterClass *RC = LP64 ? G8RC : GPRC; - - // FIXME (64-bit): Use "findScratchRegister" - unsigned Reg; - if (requiresRegisterScavenging(MF)) - Reg = findScratchRegister(II, RS, RC, SPAdj); - else - Reg = PPC::R0; + unsigned Reg = MF.getRegInfo().createVirtualRegister(LP64 ? G8RC : GPRC); if (MaxAlign < TargetAlign && isInt<16>(FrameSize)) { BuildMI(MBB, II, dl, TII.get(PPC::ADDI), Reg) .addReg(PPC::R31) .addImm(FrameSize); } else if (LP64) { - if (requiresRegisterScavenging(MF)) // FIXME (64-bit): Use "true" part. - BuildMI(MBB, II, dl, TII.get(PPC::LD), Reg) - .addImm(0) - .addReg(PPC::X1); - else - BuildMI(MBB, II, dl, TII.get(PPC::LD), PPC::X0) - .addImm(0) - .addReg(PPC::X1); + BuildMI(MBB, II, dl, TII.get(PPC::LD), Reg) + .addImm(0) + .addReg(PPC::X1); } else { BuildMI(MBB, II, dl, TII.get(PPC::LWZ), Reg) .addImm(0) @@ -302,17 +226,10 @@ void PPCRegisterInfo::lowerDynamicAlloc(MachineBasicBlock::iterator II, // Grow the stack and update the stack pointer link, then determine the // address of new allocated space. if (LP64) { - if (requiresRegisterScavenging(MF)) // FIXME (64-bit): Use "true" part. - BuildMI(MBB, II, dl, TII.get(PPC::STDUX), PPC::X1) - .addReg(Reg, RegState::Kill) - .addReg(PPC::X1) - .addReg(MI.getOperand(1).getReg()); - else - BuildMI(MBB, II, dl, TII.get(PPC::STDUX), PPC::X1) - .addReg(PPC::X0, RegState::Kill) - .addReg(PPC::X1) - .addReg(MI.getOperand(1).getReg()); - + BuildMI(MBB, II, dl, TII.get(PPC::STDUX), PPC::X1) + .addReg(Reg, RegState::Kill) + .addReg(PPC::X1) + .addReg(MI.getOperand(1).getReg()); if (!MI.getOperand(1).isKill()) BuildMI(MBB, II, dl, TII.get(PPC::ADDI8), MI.getOperand(0).getReg()) .addReg(PPC::X1) @@ -369,8 +286,7 @@ void PPCRegisterInfo::lowerCRSpilling(MachineBasicBlock::iterator II, (void) RS; bool LP64 = Subtarget.isPPC64(); - unsigned Reg = Subtarget.isDarwinABI() ? (LP64 ? PPC::X2 : PPC::R2) : - (LP64 ? PPC::X0 : PPC::R0); + unsigned Reg = LP64 ? PPC::X0 : PPC::R0; unsigned SrcReg = MI.getOperand(0).getReg(); // We need to store the CR in the low 4-bits of the saved value. First, issue @@ -412,8 +328,7 @@ void PPCRegisterInfo::lowerCRRestore(MachineBasicBlock::iterator II, (void) RS; bool LP64 = Subtarget.isPPC64(); - unsigned Reg = Subtarget.isDarwinABI() ? (LP64 ? PPC::X2 : PPC::R2) : - (LP64 ? PPC::X0 : PPC::R0); + unsigned Reg = LP64 ? PPC::X0 : PPC::R0; unsigned DestReg = MI.getOperand(0).getReg(); assert(MI.definesRegister(DestReg) && "RESTORE_CR does not define its destination"); @@ -499,14 +414,12 @@ PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, } // Special case for pseudo-ops SPILL_CR and RESTORE_CR. - if (requiresRegisterScavenging(MF)) { - if (OpC == PPC::SPILL_CR) { - lowerCRSpilling(II, FrameIndex, SPAdj, RS); - return; - } else if (OpC == PPC::RESTORE_CR) { - lowerCRRestore(II, FrameIndex, SPAdj, RS); - return; - } + if (OpC == PPC::SPILL_CR) { + lowerCRSpilling(II, FrameIndex, SPAdj, RS); + return; + } else if (OpC == PPC::RESTORE_CR) { + lowerCRRestore(II, FrameIndex, SPAdj, RS); + return; } // Replace the FrameIndex with base register with GPR1 (SP) or GPR31 (FP). @@ -529,7 +442,25 @@ PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, isIXAddr = true; break; } - + + bool noImmForm = false; + switch (OpC) { + case PPC::LVEBX: + case PPC::LVEHX: + case PPC::LVEWX: + case PPC::LVX: + case PPC::LVXL: + case PPC::LVSL: + case PPC::LVSR: + case PPC::STVEBX: + case PPC::STVEHX: + case PPC::STVEWX: + case PPC::STVX: + case PPC::STVXL: + noImmForm = true; + break; + } + // Now add the frame object offset to the offset from r1. int Offset = MFI->getObjectOffset(FrameIndex); if (!isIXAddr) @@ -553,7 +484,8 @@ PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, // only "std" to a stack slot that is at least 4-byte aligned, but it can // happen in invalid code. if (OpC == PPC::DBG_VALUE || // DBG_VALUE is always Reg+Imm - (isInt<16>(Offset) && (!isIXAddr || (Offset & 3) == 0))) { + (!noImmForm && + isInt<16>(Offset) && (!isIXAddr || (Offset & 3) == 0))) { if (isIXAddr) Offset >>= 2; // The actual encoded value has the low two bits zero. MI.getOperand(OffsetOperandNo).ChangeToImmediate(Offset); @@ -563,13 +495,9 @@ PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, // The offset doesn't fit into a single register, scavenge one to build the // offset in. - unsigned SReg; - if (requiresRegisterScavenging(MF)) { - const TargetRegisterClass *G8RC = &PPC::G8RCRegClass; - const TargetRegisterClass *GPRC = &PPC::GPRCRegClass; - SReg = findScratchRegister(II, RS, is64Bit ? G8RC : GPRC, SPAdj); - } else - SReg = is64Bit ? PPC::X0 : PPC::R0; + const TargetRegisterClass *G8RC = &PPC::G8RCRegClass; + const TargetRegisterClass *GPRC = &PPC::GPRCRegClass; + unsigned SReg = MF.getRegInfo().createVirtualRegister(is64Bit ? G8RC : GPRC); // Insert a set of rA with the full offset value before the ld, st, or add BuildMI(MBB, II, dl, TII.get(is64Bit ? PPC::LIS8 : PPC::LIS), SReg) @@ -584,7 +512,9 @@ PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, // addi 0:rA 1:rB, 2, imm ==> add 0:rA, 1:rB, 2:r0 unsigned OperandBase; - if (OpC != TargetOpcode::INLINEASM) { + if (noImmForm) + OperandBase = 1; + else if (OpC != TargetOpcode::INLINEASM) { assert(ImmToIdxMap.count(OpC) && "No indexed form of load or store available!"); unsigned NewOpcode = ImmToIdxMap.find(OpC)->second; diff --git a/lib/Target/PowerPC/PPCRegisterInfo.h b/lib/Target/PowerPC/PPCRegisterInfo.h index 9840666..5f89f63 100644 --- a/lib/Target/PowerPC/PPCRegisterInfo.h +++ b/lib/Target/PowerPC/PPCRegisterInfo.h @@ -47,13 +47,18 @@ public: BitVector getReservedRegs(const MachineFunction &MF) const; - virtual bool avoidWriteAfterWrite(const TargetRegisterClass *RC) const; - - /// requiresRegisterScavenging - We require a register scavenger. - /// FIXME (64-bit): Should be inlined. - bool requiresRegisterScavenging(const MachineFunction &MF) const; - - bool trackLivenessAfterRegAlloc(const MachineFunction &MF) const; + /// We require the register scavenger. + bool requiresRegisterScavenging(const MachineFunction &MF) const { + return true; + } + + bool requiresFrameIndexScavenging(const MachineFunction &MF) const { + return true; + } + + bool trackLivenessAfterRegAlloc(const MachineFunction &MF) const { + return true; + } void lowerDynamicAlloc(MachineBasicBlock::iterator II, int SPAdj, RegScavenger *RS) const; diff --git a/lib/Target/R600/AMDGPU.h b/lib/Target/R600/AMDGPU.h index ba87918..e099a9f 100644 --- a/lib/Target/R600/AMDGPU.h +++ b/lib/Target/R600/AMDGPU.h @@ -23,11 +23,9 @@ class AMDGPUTargetMachine; // R600 Passes FunctionPass* createR600KernelParametersPass(const DataLayout *TD); FunctionPass *createR600ExpandSpecialInstrsPass(TargetMachine &tm); -FunctionPass *createR600LowerConstCopy(TargetMachine &tm); // SI Passes FunctionPass *createSIAnnotateControlFlowPass(); -FunctionPass *createSIAssignInterpRegsPass(TargetMachine &tm); FunctionPass *createSILowerControlFlowPass(TargetMachine &tm); FunctionPass *createSICodeEmitterPass(formatted_raw_ostream &OS); FunctionPass *createSIInsertWaits(TargetMachine &tm); diff --git a/lib/Target/R600/AMDGPU.td b/lib/Target/R600/AMDGPU.td index 40f4741..1a26c77 100644 --- a/lib/Target/R600/AMDGPU.td +++ b/lib/Target/R600/AMDGPU.td @@ -38,3 +38,4 @@ include "AMDGPUInstrInfo.td" include "AMDGPUIntrinsics.td" include "AMDGPURegisterInfo.td" include "AMDGPUInstructions.td" +include "AMDGPUCallingConv.td" diff --git a/lib/Target/R600/AMDGPUAsmPrinter.cpp b/lib/Target/R600/AMDGPUAsmPrinter.cpp index c30dbe4..f600144 100644 --- a/lib/Target/R600/AMDGPUAsmPrinter.cpp +++ b/lib/Target/R600/AMDGPUAsmPrinter.cpp @@ -141,5 +141,5 @@ void AMDGPUAsmPrinter::EmitProgramInfo(MachineFunction &MF) { SIMachineFunctionInfo * MFI = MF.getInfo<SIMachineFunctionInfo>(); OutStreamer.EmitIntValue(MaxSGPR + 1, 4); OutStreamer.EmitIntValue(MaxVGPR + 1, 4); - OutStreamer.EmitIntValue(MFI->SPIPSInputAddr, 4); + OutStreamer.EmitIntValue(MFI->PSInputAddr, 4); } diff --git a/lib/Target/R600/AMDGPUCallingConv.td b/lib/Target/R600/AMDGPUCallingConv.td new file mode 100644 index 0000000..45ae37e --- /dev/null +++ b/lib/Target/R600/AMDGPUCallingConv.td @@ -0,0 +1,42 @@ +//===---- AMDCallingConv.td - Calling Conventions for Radeon GPUs ---------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This describes the calling conventions for the AMD Radeon GPUs. +// +//===----------------------------------------------------------------------===// + +// Inversion of CCIfInReg +class CCIfNotInReg<CCAction A> : CCIf<"!ArgFlags.isInReg()", A> {} + +// Calling convention for SI +def CC_SI : CallingConv<[ + + CCIfInReg<CCIfType<[f32, i32] , CCAssignToReg<[ + SGPR0, SGPR1, SGPR2, SGPR3, SGPR4, SGPR5, SGPR6, SGPR7, + SGPR8, SGPR9, SGPR10, SGPR11, SGPR12, SGPR13, SGPR14, SGPR15 + ]>>>, + + CCIfInReg<CCIfType<[i64] , CCAssignToRegWithShadow< + [ SGPR0, SGPR2, SGPR4, SGPR6, SGPR8, SGPR10, SGPR12, SGPR14 ], + [ SGPR1, SGPR3, SGPR5, SGPR7, SGPR9, SGPR11, SGPR12, SGPR15 ] + >>>, + + CCIfNotInReg<CCIfType<[f32, i32] , CCAssignToReg<[ + VGPR0, VGPR1, VGPR2, VGPR3, VGPR4, VGPR5, VGPR6, VGPR7, + VGPR8, VGPR9, VGPR10, VGPR11, VGPR12, VGPR13, VGPR14, VGPR15, + VGPR16, VGPR17, VGPR18, VGPR19, VGPR20, VGPR21, VGPR22, VGPR23, + VGPR24, VGPR25, VGPR26, VGPR27, VGPR28, VGPR29, VGPR30, VGPR31 + ]>>> + +]>; + +def CC_AMDGPU : CallingConv<[ + CCIf<"State.getTarget().getSubtarget<AMDGPUSubtarget>().device()"# + "->getGeneration() == AMDGPUDeviceInfo::HD7XXX", CCDelegateTo<CC_SI>> +]>; diff --git a/lib/Target/R600/AMDGPUISelLowering.cpp b/lib/Target/R600/AMDGPUISelLowering.cpp index 0a33264..5995b6f 100644 --- a/lib/Target/R600/AMDGPUISelLowering.cpp +++ b/lib/Target/R600/AMDGPUISelLowering.cpp @@ -14,7 +14,10 @@ //===----------------------------------------------------------------------===// #include "AMDGPUISelLowering.h" +#include "AMDGPURegisterInfo.h" #include "AMDILIntrinsicInfo.h" +#include "AMDGPUSubtarget.h" +#include "llvm/CodeGen/CallingConvLower.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/SelectionDAG.h" @@ -22,6 +25,8 @@ using namespace llvm; +#include "AMDGPUGenCallingConv.inc" + AMDGPUTargetLowering::AMDGPUTargetLowering(TargetMachine &TM) : TargetLowering(TM, new TargetLoweringObjectFileELF()) { @@ -64,17 +69,10 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(TargetMachine &TM) : // TargetLowering Callbacks //===---------------------------------------------------------------------===// -SDValue AMDGPUTargetLowering::LowerFormalArguments( - SDValue Chain, - CallingConv::ID CallConv, - bool isVarArg, - const SmallVectorImpl<ISD::InputArg> &Ins, - DebugLoc DL, SelectionDAG &DAG, - SmallVectorImpl<SDValue> &InVals) const { - for (unsigned i = 0, e = Ins.size(); i < e; ++i) { - InVals.push_back(SDValue()); - } - return Chain; +void AMDGPUTargetLowering::AnalyzeFormalArguments(CCState &State, + const SmallVectorImpl<ISD::InputArg> &Ins) const { + + State.AnalyzeFormalArguments(Ins, CC_AMDGPU); } SDValue AMDGPUTargetLowering::LowerReturn( diff --git a/lib/Target/R600/AMDGPUISelLowering.h b/lib/Target/R600/AMDGPUISelLowering.h index 9e7d997..f31b646 100644 --- a/lib/Target/R600/AMDGPUISelLowering.h +++ b/lib/Target/R600/AMDGPUISelLowering.h @@ -39,15 +39,12 @@ protected: bool isHWTrueValue(SDValue Op) const; bool isHWFalseValue(SDValue Op) const; + void AnalyzeFormalArguments(CCState &State, + const SmallVectorImpl<ISD::InputArg> &Ins) const; + public: AMDGPUTargetLowering(TargetMachine &TM); - virtual SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, - bool isVarArg, - const SmallVectorImpl<ISD::InputArg> &Ins, - DebugLoc DL, SelectionDAG &DAG, - SmallVectorImpl<SDValue> &InVals) const; - virtual SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl<ISD::OutputArg> &Outs, diff --git a/lib/Target/R600/AMDGPUIndirectAddressing.cpp b/lib/Target/R600/AMDGPUIndirectAddressing.cpp index 15840b3..ed6c8ec 100644 --- a/lib/Target/R600/AMDGPUIndirectAddressing.cpp +++ b/lib/Target/R600/AMDGPUIndirectAddressing.cpp @@ -289,7 +289,6 @@ bool AMDGPUIndirectAddressingPass::runOnMachineFunction(MachineFunction &MF) { // We only need to use REG_SEQUENCE for explicit defs, since the // register coalescer won't do anything with the implicit defs. - MachineInstr *DefInstr = MRI.getVRegDef(Reg); if (!regHasExplicitDef(MRI, Reg)) { continue; } diff --git a/lib/Target/R600/AMDGPUInstructions.td b/lib/Target/R600/AMDGPUInstructions.td index 960f108..e740348 100644 --- a/lib/Target/R600/AMDGPUInstructions.td +++ b/lib/Target/R600/AMDGPUInstructions.td @@ -132,13 +132,6 @@ class FNEG <RegisterClass rc> : AMDGPUShaderInst < [(set rc:$dst, (fneg rc:$src0))] >; -def SHADER_TYPE : AMDGPUShaderInst < - (outs), - (ins i32imm:$type), - "SHADER_TYPE $type", - [(int_AMDGPU_shader_type imm:$type)] ->; - } // usesCustomInserter = 1 multiclass RegisterLoadStore <RegisterClass dstClass, Operand addrClass, @@ -209,8 +202,8 @@ class Vector2_Build <ValueType vecType, RegisterClass vectorClass, (vecType (IMPLICIT_DEF)), elemClass:$sub0, sub0), elemClass:$sub1, sub1) >; -class Vector_Build <ValueType vecType, RegisterClass vectorClass, - ValueType elemType, RegisterClass elemClass> : Pat < +class Vector4_Build <ValueType vecType, RegisterClass vectorClass, + ValueType elemType, RegisterClass elemClass> : Pat < (vecType (build_vector (elemType elemClass:$x), (elemType elemClass:$y), (elemType elemClass:$z), (elemType elemClass:$w))), (INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG diff --git a/lib/Target/R600/AMDGPUIntrinsics.td b/lib/Target/R600/AMDGPUIntrinsics.td index 2ba2d4b..eecb25b 100644 --- a/lib/Target/R600/AMDGPUIntrinsics.td +++ b/lib/Target/R600/AMDGPUIntrinsics.td @@ -50,8 +50,6 @@ let TargetPrefix = "AMDGPU", isTarget = 1 in { def int_AMDGPU_umax : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; def int_AMDGPU_umin : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; def int_AMDGPU_cube : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty], [IntrNoMem]>; - - def int_AMDGPU_shader_type : Intrinsic<[], [llvm_i32_ty], []>; } let TargetPrefix = "TGSI", isTarget = 1 in { diff --git a/lib/Target/R600/AMDGPUStructurizeCFG.cpp b/lib/Target/R600/AMDGPUStructurizeCFG.cpp index 26f842e..b723433 100644 --- a/lib/Target/R600/AMDGPUStructurizeCFG.cpp +++ b/lib/Target/R600/AMDGPUStructurizeCFG.cpp @@ -243,6 +243,7 @@ public: initializeRegionInfoPass(*PassRegistry::getPassRegistry()); } + using Pass::doInitialization; virtual bool doInitialization(Region *R, RGPassManager &RGM); virtual bool runOnRegion(Region *R, RGPassManager &RGM); diff --git a/lib/Target/R600/AMDGPUTargetMachine.cpp b/lib/Target/R600/AMDGPUTargetMachine.cpp index e2f00be..0185747 100644 --- a/lib/Target/R600/AMDGPUTargetMachine.cpp +++ b/lib/Target/R600/AMDGPUTargetMachine.cpp @@ -17,6 +17,7 @@ #include "AMDGPU.h" #include "R600ISelLowering.h" #include "R600InstrInfo.h" +#include "R600MachineScheduler.h" #include "SIISelLowering.h" #include "SIInstrInfo.h" #include "llvm/Analysis/Passes.h" @@ -39,6 +40,14 @@ extern "C" void LLVMInitializeR600Target() { RegisterTargetMachine<AMDGPUTargetMachine> X(TheAMDGPUTarget); } +static ScheduleDAGInstrs *createR600MachineScheduler(MachineSchedContext *C) { + return new ScheduleDAGMI(C, new R600SchedStrategy()); +} + +static MachineSchedRegistry +SchedCustomRegistry("r600", "Run R600's custom scheduler", + createR600MachineScheduler); + AMDGPUTargetMachine::AMDGPUTargetMachine(const Target &T, StringRef TT, StringRef CPU, StringRef FS, TargetOptions Options, @@ -70,7 +79,13 @@ namespace { class AMDGPUPassConfig : public TargetPassConfig { public: AMDGPUPassConfig(AMDGPUTargetMachine *TM, PassManagerBase &PM) - : TargetPassConfig(TM, PM) {} + : TargetPassConfig(TM, PM) { + const AMDGPUSubtarget &ST = TM->getSubtarget<AMDGPUSubtarget>(); + if (ST.device()->getGeneration() <= AMDGPUDeviceInfo::HD6XXX) { + enablePass(&MachineSchedulerID); + MachineSchedRegistry::setDefault(createR600MachineScheduler); + } + } AMDGPUTargetMachine &getAMDGPUTargetMachine() const { return getTM<AMDGPUTargetMachine>(); @@ -112,11 +127,6 @@ bool AMDGPUPassConfig::addInstSelector() { } bool AMDGPUPassConfig::addPreRegAlloc() { - const AMDGPUSubtarget &ST = TM->getSubtarget<AMDGPUSubtarget>(); - - if (ST.device()->getGeneration() > AMDGPUDeviceInfo::HD6XXX) { - addPass(createSIAssignInterpRegsPass(*TM)); - } addPass(createAMDGPUConvertToISAPass(*TM)); return false; } @@ -143,7 +153,6 @@ bool AMDGPUPassConfig::addPreEmitPass() { addPass(createAMDGPUCFGStructurizerPass(*TM)); addPass(createR600ExpandSpecialInstrsPass(*TM)); addPass(&FinalizeMachineBundlesID); - addPass(createR600LowerConstCopy(*TM)); } else { addPass(createSILowerControlFlowPass(*TM)); } diff --git a/lib/Target/R600/AMDIL.h b/lib/Target/R600/AMDIL.h index b39fbdb..39ab664 100644 --- a/lib/Target/R600/AMDIL.h +++ b/lib/Target/R600/AMDIL.h @@ -96,24 +96,23 @@ enum AddressSpaces { ADDRESS_NONE = 5, ///< Address space for unknown memory. PARAM_D_ADDRESS = 6, ///< Address space for direct addressible parameter memory (CONST0) PARAM_I_ADDRESS = 7, ///< Address space for indirect addressible parameter memory (VTX1) - USER_SGPR_ADDRESS = 8, ///< Address space for USER_SGPRS on SI - CONSTANT_BUFFER_0 = 9, - CONSTANT_BUFFER_1 = 10, - CONSTANT_BUFFER_2 = 11, - CONSTANT_BUFFER_3 = 12, - CONSTANT_BUFFER_4 = 13, - CONSTANT_BUFFER_5 = 14, - CONSTANT_BUFFER_6 = 15, - CONSTANT_BUFFER_7 = 16, - CONSTANT_BUFFER_8 = 17, - CONSTANT_BUFFER_9 = 18, - CONSTANT_BUFFER_10 = 19, - CONSTANT_BUFFER_11 = 20, - CONSTANT_BUFFER_12 = 21, - CONSTANT_BUFFER_13 = 22, - CONSTANT_BUFFER_14 = 23, - CONSTANT_BUFFER_15 = 24, - LAST_ADDRESS = 25 + CONSTANT_BUFFER_0 = 8, + CONSTANT_BUFFER_1 = 9, + CONSTANT_BUFFER_2 = 10, + CONSTANT_BUFFER_3 = 11, + CONSTANT_BUFFER_4 = 12, + CONSTANT_BUFFER_5 = 13, + CONSTANT_BUFFER_6 = 14, + CONSTANT_BUFFER_7 = 15, + CONSTANT_BUFFER_8 = 16, + CONSTANT_BUFFER_9 = 17, + CONSTANT_BUFFER_10 = 18, + CONSTANT_BUFFER_11 = 19, + CONSTANT_BUFFER_12 = 20, + CONSTANT_BUFFER_13 = 21, + CONSTANT_BUFFER_14 = 22, + CONSTANT_BUFFER_15 = 23, + LAST_ADDRESS = 24 }; } // namespace AMDGPUAS diff --git a/lib/Target/R600/AMDILCFGStructurizer.cpp b/lib/Target/R600/AMDILCFGStructurizer.cpp index aa8ab6b..b0cd0f9 100644 --- a/lib/Target/R600/AMDILCFGStructurizer.cpp +++ b/lib/Target/R600/AMDILCFGStructurizer.cpp @@ -2595,6 +2595,7 @@ struct CFGStructTraits<AMDGPUCFGStructurizer> { static int getBranchNzeroOpcode(int oldOpcode) { switch(oldOpcode) { + case AMDGPU::JUMP_COND: case AMDGPU::JUMP: return AMDGPU::IF_PREDICATE_SET; case AMDGPU::BRANCH_COND_i32: case AMDGPU::BRANCH_COND_f32: return AMDGPU::IF_LOGICALNZ_f32; @@ -2606,6 +2607,7 @@ struct CFGStructTraits<AMDGPUCFGStructurizer> { static int getBranchZeroOpcode(int oldOpcode) { switch(oldOpcode) { + case AMDGPU::JUMP_COND: case AMDGPU::JUMP: return AMDGPU::IF_PREDICATE_SET; case AMDGPU::BRANCH_COND_i32: case AMDGPU::BRANCH_COND_f32: return AMDGPU::IF_LOGICALZ_f32; @@ -2617,6 +2619,7 @@ struct CFGStructTraits<AMDGPUCFGStructurizer> { static int getContinueNzeroOpcode(int oldOpcode) { switch(oldOpcode) { + case AMDGPU::JUMP_COND: case AMDGPU::JUMP: return AMDGPU::CONTINUE_LOGICALNZ_i32; default: assert(0 && "internal error"); @@ -2626,6 +2629,7 @@ struct CFGStructTraits<AMDGPUCFGStructurizer> { static int getContinueZeroOpcode(int oldOpcode) { switch(oldOpcode) { + case AMDGPU::JUMP_COND: case AMDGPU::JUMP: return AMDGPU::CONTINUE_LOGICALZ_i32; default: assert(0 && "internal error"); @@ -2654,8 +2658,7 @@ struct CFGStructTraits<AMDGPUCFGStructurizer> { static bool isCondBranch(MachineInstr *instr) { switch (instr->getOpcode()) { - case AMDGPU::JUMP: - return instr->getOperand(instr->findFirstPredOperandIdx()).getReg() != 0; + case AMDGPU::JUMP_COND: case AMDGPU::BRANCH_COND_i32: case AMDGPU::BRANCH_COND_f32: break; @@ -2668,7 +2671,6 @@ struct CFGStructTraits<AMDGPUCFGStructurizer> { static bool isUncondBranch(MachineInstr *instr) { switch (instr->getOpcode()) { case AMDGPU::JUMP: - return instr->getOperand(instr->findFirstPredOperandIdx()).getReg() == 0; case AMDGPU::BRANCH: return true; default: diff --git a/lib/Target/R600/AMDILDevice.cpp b/lib/Target/R600/AMDILDevice.cpp index eec5059..db8e01e 100644 --- a/lib/Target/R600/AMDILDevice.cpp +++ b/lib/Target/R600/AMDILDevice.cpp @@ -115,10 +115,18 @@ bool AMDGPUDevice::usesSoftware(AMDGPUDeviceInfo::Caps Mode) const { std::string AMDGPUDevice::getDataLayout() const { - return std::string("e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16" - "-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f80:32:32" - "-v16:16:16-v24:32:32-v32:32:32-v48:64:64-v64:64:64" - "-v96:128:128-v128:128:128-v192:256:256-v256:256:256" - "-v512:512:512-v1024:1024:1024-v2048:2048:2048" - "-n8:16:32:64"); + std::string DataLayout = std::string( + "e" + "-p:32:32:32" + "-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32" + "-v16:16:16-v24:32:32-v32:32:32-v48:64:64-v64:64:64-v96:128:128-v128:128:128" + "-v192:256:256-v256:256:256-v512:512:512-v1024:1024:1024-v2048:2048:2048" + "-n32:64" + ); + + if (usesHardware(AMDGPUDeviceInfo::DoubleOps)) { + DataLayout.append("-f64:64:64"); + } + + return DataLayout; } diff --git a/lib/Target/R600/AMDILISelDAGToDAG.cpp b/lib/Target/R600/AMDILISelDAGToDAG.cpp index e77b9dc..fa8f62d 100644 --- a/lib/Target/R600/AMDILISelDAGToDAG.cpp +++ b/lib/Target/R600/AMDILISelDAGToDAG.cpp @@ -162,6 +162,35 @@ SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) { } switch (Opc) { default: break; + case ISD::BUILD_VECTOR: { + const AMDGPUSubtarget &ST = TM.getSubtarget<AMDGPUSubtarget>(); + if (ST.device()->getGeneration() > AMDGPUDeviceInfo::HD6XXX) { + break; + } + // BUILD_VECTOR is usually lowered into an IMPLICIT_DEF + 4 INSERT_SUBREG + // that adds a 128 bits reg copy when going through TwoAddressInstructions + // pass. We want to avoid 128 bits copies as much as possible because they + // can't be bundled by our scheduler. + SDValue RegSeqArgs[9] = { + CurDAG->getTargetConstant(AMDGPU::R600_Reg128RegClassID, MVT::i32), + SDValue(), CurDAG->getTargetConstant(AMDGPU::sub0, MVT::i32), + SDValue(), CurDAG->getTargetConstant(AMDGPU::sub1, MVT::i32), + SDValue(), CurDAG->getTargetConstant(AMDGPU::sub2, MVT::i32), + SDValue(), CurDAG->getTargetConstant(AMDGPU::sub3, MVT::i32) + }; + bool IsRegSeq = true; + for (unsigned i = 0; i < N->getNumOperands(); i++) { + if (dyn_cast<RegisterSDNode>(N->getOperand(i))) { + IsRegSeq = false; + break; + } + RegSeqArgs[2 * i + 1] = N->getOperand(i); + } + if (!IsRegSeq) + break; + return CurDAG->SelectNodeTo(N, AMDGPU::REG_SEQUENCE, N->getVTList(), + RegSeqArgs, 2 * N->getNumOperands() + 1); + } case ISD::ConstantFP: case ISD::Constant: { const AMDGPUSubtarget &ST = TM.getSubtarget<AMDGPUSubtarget>(); @@ -336,17 +365,34 @@ bool AMDGPUDAGToDAGISel::FoldOperands(unsigned Opcode, SDValue Operand = Ops[OperandIdx[i] - 1]; switch (Operand.getOpcode()) { case AMDGPUISD::CONST_ADDRESS: { - if (i == 2) - break; SDValue CstOffset; - if (!Operand.getValueType().isVector() && - SelectGlobalValueConstantOffset(Operand.getOperand(0), CstOffset)) { - Ops[OperandIdx[i] - 1] = CurDAG->getRegister(AMDGPU::ALU_CONST, MVT::f32); - Ops[SelIdx[i] - 1] = CstOffset; - return true; + if (Operand.getValueType().isVector() || + !SelectGlobalValueConstantOffset(Operand.getOperand(0), CstOffset)) + break; + + // Gather others constants values + std::vector<unsigned> Consts; + for (unsigned j = 0; j < 3; j++) { + int SrcIdx = OperandIdx[j]; + if (SrcIdx < 0) + break; + if (RegisterSDNode *Reg = dyn_cast<RegisterSDNode>(Ops[SrcIdx - 1])) { + if (Reg->getReg() == AMDGPU::ALU_CONST) { + ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(Ops[SelIdx[j] - 1]); + Consts.push_back(Cst->getZExtValue()); + } + } } + + ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(CstOffset); + Consts.push_back(Cst->getZExtValue()); + if (!TII->fitsConstReadLimitations(Consts)) + break; + + Ops[OperandIdx[i] - 1] = CurDAG->getRegister(AMDGPU::ALU_CONST, MVT::f32); + Ops[SelIdx[i] - 1] = CstOffset; + return true; } - break; case ISD::FNEG: if (NegIdx[i] < 0) break; diff --git a/lib/Target/R600/AMDILISelLowering.cpp b/lib/Target/R600/AMDILISelLowering.cpp index f65e1f3..922cac1 100644 --- a/lib/Target/R600/AMDILISelLowering.cpp +++ b/lib/Target/R600/AMDILISelLowering.cpp @@ -33,11 +33,6 @@ using namespace llvm; //===----------------------------------------------------------------------===// -// Calling Convention Implementation -//===----------------------------------------------------------------------===// -#include "AMDGPUGenCallingConv.inc" - -//===----------------------------------------------------------------------===// // TargetLowering Implementation Help Functions End //===----------------------------------------------------------------------===// diff --git a/lib/Target/R600/AMDILSIDevice.cpp b/lib/Target/R600/AMDILSIDevice.cpp index 3096c22..0d1de3d 100644 --- a/lib/Target/R600/AMDILSIDevice.cpp +++ b/lib/Target/R600/AMDILSIDevice.cpp @@ -36,10 +36,13 @@ AMDGPUSIDevice::getGeneration() const { std::string AMDGPUSIDevice::getDataLayout() const { - return std::string("e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16" - "-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f80:32:32" - "-v16:16:16-v24:32:32-v32:32:32-v48:64:64-v64:64:64" - "-v96:128:128-v128:128:128-v192:256:256-v256:256:256" - "-v512:512:512-v1024:1024:1024-v2048:2048:2048" - "-n8:16:32:64"); + return std::string( + "e" + "-p:64:64:64" + "-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64" + "-v16:16:16-v24:32:32-v32:32:32-v48:64:64-v64:64:64-v96:128:128" + "-v128:128:128-v192:256:256-v256:256:256-v512:512:512-v1024:1024:1024" + "-v2048:2048:2048" + "-n32:64" + ); } diff --git a/lib/Target/R600/CMakeLists.txt b/lib/Target/R600/CMakeLists.txt index 00f8b10..63c59e1 100644 --- a/lib/Target/R600/CMakeLists.txt +++ b/lib/Target/R600/CMakeLists.txt @@ -37,11 +37,10 @@ add_llvm_target(R600CodeGen R600ExpandSpecialInstrs.cpp R600InstrInfo.cpp R600ISelLowering.cpp - R600LowerConstCopy.cpp R600MachineFunctionInfo.cpp + R600MachineScheduler.cpp R600RegisterInfo.cpp SIAnnotateControlFlow.cpp - SIAssignInterpRegs.cpp SIInsertWaits.cpp SIInstrInfo.cpp SIISelLowering.cpp diff --git a/lib/Target/R600/MCTargetDesc/AMDGPUMCCodeEmitter.h b/lib/Target/R600/MCTargetDesc/AMDGPUMCCodeEmitter.h index 8721f80..cd3a7ce 100644 --- a/lib/Target/R600/MCTargetDesc/AMDGPUMCCodeEmitter.h +++ b/lib/Target/R600/MCTargetDesc/AMDGPUMCCodeEmitter.h @@ -33,15 +33,6 @@ public: SmallVectorImpl<MCFixup> &Fixups) const { return 0; } - - virtual unsigned GPR4AlignEncode(const MCInst &MI, unsigned OpNo, - SmallVectorImpl<MCFixup> &Fixups) const { - return 0; - } - virtual unsigned GPR2AlignEncode(const MCInst &MI, unsigned OpNo, - SmallVectorImpl<MCFixup> &Fixups) const { - return 0; - } }; } // End namespace llvm diff --git a/lib/Target/R600/MCTargetDesc/SIMCCodeEmitter.cpp b/lib/Target/R600/MCTargetDesc/SIMCCodeEmitter.cpp index 6cc0077..e27abcc 100644 --- a/lib/Target/R600/MCTargetDesc/SIMCCodeEmitter.cpp +++ b/lib/Target/R600/MCTargetDesc/SIMCCodeEmitter.cpp @@ -42,9 +42,6 @@ class SIMCCodeEmitter : public AMDGPUMCCodeEmitter { const MCSubtargetInfo &STI; MCContext &Ctx; - /// \brief Encode a sequence of registers with the correct alignment. - unsigned GPRAlign(const MCInst &MI, unsigned OpNo, unsigned shift) const; - /// \brief Can this operand also contain immediate values? bool isSrcOperand(const MCInstrDesc &Desc, unsigned OpNo) const; @@ -65,14 +62,6 @@ public: /// \returns the encoding for an MCOperand. virtual uint64_t getMachineOpValue(const MCInst &MI, const MCOperand &MO, SmallVectorImpl<MCFixup> &Fixups) const; - - /// \brief Encoding for when 2 consecutive registers are used - virtual unsigned GPR2AlignEncode(const MCInst &MI, unsigned OpNo, - SmallVectorImpl<MCFixup> &Fixup) const; - - /// \brief Encoding for when 4 consectuive registers are used - virtual unsigned GPR4AlignEncode(const MCInst &MI, unsigned OpNo, - SmallVectorImpl<MCFixup> &Fixup) const; }; } // End anonymous namespace @@ -212,24 +201,3 @@ uint64_t SIMCCodeEmitter::getMachineOpValue(const MCInst &MI, return 0; } -//===----------------------------------------------------------------------===// -// Custom Operand Encodings -//===----------------------------------------------------------------------===// - -unsigned SIMCCodeEmitter::GPRAlign(const MCInst &MI, unsigned OpNo, - unsigned shift) const { - unsigned regCode = MRI.getEncodingValue(MI.getOperand(OpNo).getReg()); - return (regCode & 0xff) >> shift; -} - -unsigned SIMCCodeEmitter::GPR2AlignEncode(const MCInst &MI, - unsigned OpNo , - SmallVectorImpl<MCFixup> &Fixup) const { - return GPRAlign(MI, OpNo, 1); -} - -unsigned SIMCCodeEmitter::GPR4AlignEncode(const MCInst &MI, - unsigned OpNo, - SmallVectorImpl<MCFixup> &Fixup) const { - return GPRAlign(MI, OpNo, 2); -} diff --git a/lib/Target/R600/R600ISelLowering.cpp b/lib/Target/R600/R600ISelLowering.cpp index b5c2a93..a73691d 100644 --- a/lib/Target/R600/R600ISelLowering.cpp +++ b/lib/Target/R600/R600ISelLowering.cpp @@ -50,8 +50,8 @@ R600TargetLowering::R600TargetLowering(TargetMachine &TM) : setOperationAction(ISD::UREM, MVT::v4i32, Expand); setOperationAction(ISD::SETCC, MVT::v4i32, Expand); - setOperationAction(ISD::BR_CC, MVT::i32, Custom); - setOperationAction(ISD::BR_CC, MVT::f32, Custom); + setOperationAction(ISD::BR_CC, MVT::i32, Expand); + setOperationAction(ISD::BR_CC, MVT::f32, Expand); setOperationAction(ISD::FSUB, MVT::f32, Expand); @@ -65,8 +65,8 @@ R600TargetLowering::R600TargetLowering(TargetMachine &TM) : setOperationAction(ISD::SELECT_CC, MVT::f32, Custom); setOperationAction(ISD::SELECT_CC, MVT::i32, Custom); - setOperationAction(ISD::SETCC, MVT::i32, Custom); - setOperationAction(ISD::SETCC, MVT::f32, Custom); + setOperationAction(ISD::SETCC, MVT::i32, Expand); + setOperationAction(ISD::SETCC, MVT::f32, Expand); setOperationAction(ISD::FP_TO_UINT, MVT::i1, Custom); setOperationAction(ISD::SELECT, MVT::i32, Custom); @@ -94,6 +94,7 @@ R600TargetLowering::R600TargetLowering(TargetMachine &TM) : setTargetDAGCombine(ISD::EXTRACT_VECTOR_ELT); setTargetDAGCombine(ISD::SELECT_CC); + setBooleanContents(ZeroOrNegativeOneBooleanContent); setSchedulingPreference(Sched::VLIW); } @@ -105,7 +106,6 @@ MachineBasicBlock * R600TargetLowering::EmitInstrWithCustomInserter( switch (MI->getOpcode()) { default: return AMDGPUTargetLowering::EmitInstrWithCustomInserter(MI, BB); - case AMDGPU::SHADER_TYPE: break; case AMDGPU::CLAMP_R600: { MachineInstr *NewMI = TII->buildDefaultInstruction(*BB, I, AMDGPU::MOV, @@ -150,7 +150,13 @@ MachineBasicBlock * R600TargetLowering::EmitInstrWithCustomInserter( TII->buildMovImm(*BB, I, MI->getOperand(0).getReg(), MI->getOperand(1).getImm()); break; - + case AMDGPU::CONST_COPY: { + MachineInstr *NewMI = TII->buildDefaultInstruction(*BB, MI, AMDGPU::MOV, + MI->getOperand(0).getReg(), AMDGPU::ALU_CONST); + TII->setImmOperand(NewMI, R600Operands::SRC0_SEL, + MI->getOperand(1).getImm()); + break; + } case AMDGPU::RAT_WRITE_CACHELESS_32_eg: case AMDGPU::RAT_WRITE_CACHELESS_128_eg: { @@ -215,8 +221,7 @@ MachineBasicBlock * R600TargetLowering::EmitInstrWithCustomInserter( case AMDGPU::BRANCH: BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP)) - .addOperand(MI->getOperand(0)) - .addReg(0); + .addOperand(MI->getOperand(0)); break; case AMDGPU::BRANCH_COND_f32: { @@ -227,7 +232,7 @@ MachineBasicBlock * R600TargetLowering::EmitInstrWithCustomInserter( .addImm(OPCODE_IS_NOT_ZERO) .addImm(0); // Flags TII->addFlag(NewMI, 0, MO_FLAG_PUSH); - BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP)) + BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP_COND)) .addOperand(MI->getOperand(0)) .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill); break; @@ -241,7 +246,7 @@ MachineBasicBlock * R600TargetLowering::EmitInstrWithCustomInserter( .addImm(OPCODE_IS_NOT_ZERO_INT) .addImm(0); // Flags TII->addFlag(NewMI, 0, MO_FLAG_PUSH); - BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP)) + BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP_COND)) .addOperand(MI->getOperand(0)) .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill); break; @@ -306,11 +311,9 @@ using namespace llvm::AMDGPUIntrinsic; SDValue R600TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { switch (Op.getOpcode()) { default: return AMDGPUTargetLowering::LowerOperation(Op, DAG); - case ISD::BR_CC: return LowerBR_CC(Op, DAG); case ISD::ROTL: return LowerROTL(Op, DAG); case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG); case ISD::SELECT: return LowerSELECT(Op, DAG); - case ISD::SETCC: return LowerSETCC(Op, DAG); case ISD::STORE: return LowerSTORE(Op, DAG); case ISD::LOAD: return LowerLOAD(Op, DAG); case ISD::FPOW: return LowerFPOW(Op, DAG); @@ -470,44 +473,6 @@ SDValue R600TargetLowering::LowerFPTOUINT(SDValue Op, SelectionDAG &DAG) const { ); } -SDValue R600TargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const { - SDValue Chain = Op.getOperand(0); - SDValue CC = Op.getOperand(1); - SDValue LHS = Op.getOperand(2); - SDValue RHS = Op.getOperand(3); - SDValue JumpT = Op.getOperand(4); - SDValue CmpValue; - SDValue Result; - - if (LHS.getValueType() == MVT::i32) { - CmpValue = DAG.getNode( - ISD::SELECT_CC, - Op.getDebugLoc(), - MVT::i32, - LHS, RHS, - DAG.getConstant(-1, MVT::i32), - DAG.getConstant(0, MVT::i32), - CC); - } else if (LHS.getValueType() == MVT::f32) { - CmpValue = DAG.getNode( - ISD::SELECT_CC, - Op.getDebugLoc(), - MVT::f32, - LHS, RHS, - DAG.getConstantFP(1.0f, MVT::f32), - DAG.getConstantFP(0.0f, MVT::f32), - CC); - } else { - assert(0 && "Not valid type for br_cc"); - } - Result = DAG.getNode( - AMDGPUISD::BRANCH_COND, - CmpValue.getDebugLoc(), - MVT::Other, Chain, - JumpT, CmpValue); - return Result; -} - SDValue R600TargetLowering::LowerImplicitParameter(SelectionDAG &DAG, EVT VT, DebugLoc DL, unsigned DwordOffset) const { @@ -576,12 +541,37 @@ SDValue R600TargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const // Check if we can lower this to a native operation. + // Try to lower to a SET* instruction: + // + // SET* can match the following patterns: + // + // select_cc f32, f32, -1, 0, cc_any + // select_cc f32, f32, 1.0f, 0.0f, cc_any + // select_cc i32, i32, -1, 0, cc_any + // + + // Move hardware True/False values to the correct operand. + if (isHWTrueValue(False) && isHWFalseValue(True)) { + ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get(); + std::swap(False, True); + CC = DAG.getCondCode(ISD::getSetCCInverse(CCOpcode, CompareVT == MVT::i32)); + } + + if (isHWTrueValue(True) && isHWFalseValue(False) && + (CompareVT == VT || VT == MVT::i32)) { + // This can be matched by a SET* instruction. + return DAG.getNode(ISD::SELECT_CC, DL, VT, LHS, RHS, True, False, CC); + } + // Try to lower to a CND* instruction: - // CND* instructions requires RHS to be zero. Some SELECT_CC nodes that - // can be lowered to CND* instructions can also be lowered to SET* - // instructions. CND* instructions are cheaper, because they dont't - // require additional instructions to convert their result to the correct - // value type, so this check should be first. + // + // CND* can match the following patterns: + // + // select_cc f32, 0.0, f32, f32, cc_any + // select_cc f32, 0.0, i32, i32, cc_any + // select_cc i32, 0, f32, f32, cc_any + // select_cc i32, 0, i32, i32, cc_any + // if (isZero(LHS) || isZero(RHS)) { SDValue Cond = (isZero(LHS) ? RHS : LHS); SDValue Zero = (isZero(LHS) ? LHS : RHS); @@ -623,38 +613,6 @@ SDValue R600TargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const return DAG.getNode(ISD::BITCAST, DL, VT, SelectNode); } - // Try to lower to a SET* instruction: - // - // CompareVT == MVT::f32 and VT == MVT::i32 is supported by the hardware, - // but for the other case where CompareVT != VT, all operands of - // SELECT_CC need to have the same value type, so we need to change True and - // False to be the same type as LHS and RHS, and then convert the result of - // the select_cc back to the correct type. - - // Move hardware True/False values to the correct operand. - if (isHWTrueValue(False) && isHWFalseValue(True)) { - ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get(); - std::swap(False, True); - CC = DAG.getCondCode(ISD::getSetCCInverse(CCOpcode, CompareVT == MVT::i32)); - } - - if (isHWTrueValue(True) && isHWFalseValue(False)) { - if (CompareVT != VT && VT == MVT::f32 && CompareVT == MVT::i32) { - SDValue Boolean = DAG.getNode(ISD::SELECT_CC, DL, CompareVT, - LHS, RHS, - DAG.getConstant(-1, MVT::i32), - DAG.getConstant(0, MVT::i32), - CC); - // Convert integer values of true (-1) and false (0) to fp values of - // true (1.0f) and false (0.0f). - SDValue LSB = DAG.getNode(ISD::AND, DL, MVT::i32, Boolean, - DAG.getConstant(1, MVT::i32)); - return DAG.getNode(ISD::UINT_TO_FP, DL, VT, LSB); - } else { - // This SELECT_CC is already legal. - return DAG.getNode(ISD::SELECT_CC, DL, VT, LHS, RHS, True, False, CC); - } - } // Possible Min/Max pattern SDValue MinMax = LowerMinMax(Op, DAG); @@ -698,48 +656,6 @@ SDValue R600TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const { DAG.getCondCode(ISD::SETNE)); } -SDValue R600TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const { - SDValue Cond; - SDValue LHS = Op.getOperand(0); - SDValue RHS = Op.getOperand(1); - SDValue CC = Op.getOperand(2); - DebugLoc DL = Op.getDebugLoc(); - assert(Op.getValueType() == MVT::i32); - if (LHS.getValueType() == MVT::i32) { - Cond = DAG.getNode( - ISD::SELECT_CC, - Op.getDebugLoc(), - MVT::i32, - LHS, RHS, - DAG.getConstant(-1, MVT::i32), - DAG.getConstant(0, MVT::i32), - CC); - } else if (LHS.getValueType() == MVT::f32) { - Cond = DAG.getNode( - ISD::SELECT_CC, - Op.getDebugLoc(), - MVT::f32, - LHS, RHS, - DAG.getConstantFP(1.0f, MVT::f32), - DAG.getConstantFP(0.0f, MVT::f32), - CC); - Cond = DAG.getNode( - ISD::FP_TO_SINT, - DL, - MVT::i32, - Cond); - } else { - assert(0 && "Not valid type for set_cc"); - } - Cond = DAG.getNode( - ISD::AND, - DL, - MVT::i32, - DAG.getConstant(1, MVT::i32), - Cond); - return Cond; -} - /// LLVM generates byte-addresed pointers. For indirect addressing, we need to /// convert these pointers to a register index. Each register holds /// 16 bytes, (4 x 32bit sub-register), but we need to take into account the @@ -918,7 +834,8 @@ SDValue R600TargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const if (ConstantBlock > -1) { SDValue Result; if (dyn_cast<ConstantExpr>(LoadNode->getSrcValue()) || - dyn_cast<Constant>(LoadNode->getSrcValue())) { + dyn_cast<Constant>(LoadNode->getSrcValue()) || + dyn_cast<ConstantSDNode>(Ptr)) { SDValue Slots[4]; for (unsigned i = 0; i < 4; i++) { // We want Const position encoded with the following formula : @@ -934,7 +851,9 @@ SDValue R600TargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const } else { // non constant ptr cant be folded, keeps it as a v4f32 load Result = DAG.getNode(AMDGPUISD::CONST_ADDRESS, DL, MVT::v4i32, - DAG.getNode(ISD::SRL, DL, MVT::i32, Ptr, DAG.getConstant(4, MVT::i32)) + DAG.getNode(ISD::SRL, DL, MVT::i32, Ptr, DAG.getConstant(4, MVT::i32)), + DAG.getConstant(LoadNode->getAddressSpace() - + AMDGPUAS::CONSTANT_BUFFER_0, MVT::i32) ); } @@ -1122,6 +1041,9 @@ SDValue R600TargetLowering::PerformDAGCombine(SDNode *N, case ISD::SELECT_CC: { // fold selectcc (selectcc x, y, a, b, cc), b, a, b, seteq -> // selectcc x, y, a, b, inv(cc) + // + // fold selectcc (selectcc x, y, a, b, cc), b, a, b, setne -> + // selectcc x, y, a, b, cc SDValue LHS = N->getOperand(0); if (LHS.getOpcode() != ISD::SELECT_CC) { return SDValue(); @@ -1130,24 +1052,30 @@ SDValue R600TargetLowering::PerformDAGCombine(SDNode *N, SDValue RHS = N->getOperand(1); SDValue True = N->getOperand(2); SDValue False = N->getOperand(3); + ISD::CondCode NCC = cast<CondCodeSDNode>(N->getOperand(4))->get(); if (LHS.getOperand(2).getNode() != True.getNode() || LHS.getOperand(3).getNode() != False.getNode() || - RHS.getNode() != False.getNode() || - cast<CondCodeSDNode>(N->getOperand(4))->get() != ISD::SETEQ) { + RHS.getNode() != False.getNode()) { return SDValue(); } - ISD::CondCode CCOpcode = cast<CondCodeSDNode>(LHS->getOperand(4))->get(); - CCOpcode = ISD::getSetCCInverse( - CCOpcode, LHS.getOperand(0).getValueType().isInteger()); - return DAG.getSelectCC(N->getDebugLoc(), - LHS.getOperand(0), - LHS.getOperand(1), - LHS.getOperand(2), - LHS.getOperand(3), - CCOpcode); + switch (NCC) { + default: return SDValue(); + case ISD::SETNE: return LHS; + case ISD::SETEQ: { + ISD::CondCode LHSCC = cast<CondCodeSDNode>(LHS.getOperand(4))->get(); + LHSCC = ISD::getSetCCInverse(LHSCC, + LHS.getOperand(0).getValueType().isInteger()); + return DAG.getSelectCC(N->getDebugLoc(), + LHS.getOperand(0), + LHS.getOperand(1), + LHS.getOperand(2), + LHS.getOperand(3), + LHSCC); } + } + } case AMDGPUISD::EXPORT: { SDValue Arg = N->getOperand(1); if (Arg.getOpcode() != ISD::BUILD_VECTOR) diff --git a/lib/Target/R600/R600ISelLowering.h b/lib/Target/R600/R600ISelLowering.h index afa3897..5cb4b91 100644 --- a/lib/Target/R600/R600ISelLowering.h +++ b/lib/Target/R600/R600ISelLowering.h @@ -52,14 +52,11 @@ private: void lowerImplicitParameter(MachineInstr *MI, MachineBasicBlock &BB, MachineRegisterInfo & MRI, unsigned dword_offset) const; - SDValue LowerBR_CC(SDValue Op, SelectionDAG &DAG) const; - /// \brief Lower ROTL opcode to BITALIGN SDValue LowerROTL(SDValue Op, SelectionDAG &DAG) const; SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const; SDValue LowerSELECT(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const; SDValue LowerSTORE(SDValue Op, SelectionDAG &DAG) const; SDValue LowerFPTOUINT(SDValue Op, SelectionDAG &DAG) const; SDValue LowerFPOW(SDValue Op, SelectionDAG &DAG) const; diff --git a/lib/Target/R600/R600InstrInfo.cpp b/lib/Target/R600/R600InstrInfo.cpp index 7e3f005..0865098 100644 --- a/lib/Target/R600/R600InstrInfo.cpp +++ b/lib/Target/R600/R600InstrInfo.cpp @@ -139,6 +139,60 @@ bool R600InstrInfo::isALUInstr(unsigned Opcode) const { (TargetFlags & R600_InstFlag::OP3)); } +bool +R600InstrInfo::fitsConstReadLimitations(const std::vector<unsigned> &Consts) + const { + assert (Consts.size() <= 12 && "Too many operands in instructions group"); + unsigned Pair1 = 0, Pair2 = 0; + for (unsigned i = 0, n = Consts.size(); i < n; ++i) { + unsigned ReadConstHalf = Consts[i] & 2; + unsigned ReadConstIndex = Consts[i] & (~3); + unsigned ReadHalfConst = ReadConstIndex | ReadConstHalf; + if (!Pair1) { + Pair1 = ReadHalfConst; + continue; + } + if (Pair1 == ReadHalfConst) + continue; + if (!Pair2) { + Pair2 = ReadHalfConst; + continue; + } + if (Pair2 != ReadHalfConst) + return false; + } + return true; +} + +bool +R600InstrInfo::canBundle(const std::vector<MachineInstr *> &MIs) const { + std::vector<unsigned> Consts; + for (unsigned i = 0, n = MIs.size(); i < n; i++) { + const MachineInstr *MI = MIs[i]; + + const R600Operands::Ops OpTable[3][2] = { + {R600Operands::SRC0, R600Operands::SRC0_SEL}, + {R600Operands::SRC1, R600Operands::SRC1_SEL}, + {R600Operands::SRC2, R600Operands::SRC2_SEL}, + }; + + if (!isALUInstr(MI->getOpcode())) + continue; + + for (unsigned j = 0; j < 3; j++) { + int SrcIdx = getOperandIdx(MI->getOpcode(), OpTable[j][0]); + if (SrcIdx < 0) + break; + if (MI->getOperand(SrcIdx).getReg() == AMDGPU::ALU_CONST) { + unsigned Const = MI->getOperand( + getOperandIdx(MI->getOpcode(), OpTable[j][1])).getImm(); + Consts.push_back(Const); + } + } + } + return fitsConstReadLimitations(Consts); +} + DFAPacketizer *R600InstrInfo::CreateTargetScheduleState(const TargetMachine *TM, const ScheduleDAG *DAG) const { const InstrItineraryData *II = TM->getInstrItineraryData(); @@ -168,6 +222,11 @@ findFirstPredicateSetterFrom(MachineBasicBlock &MBB, return NULL; } +static +bool isJump(unsigned Opcode) { + return Opcode == AMDGPU::JUMP || Opcode == AMDGPU::JUMP_COND; +} + bool R600InstrInfo::AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, @@ -186,7 +245,7 @@ R600InstrInfo::AnalyzeBranch(MachineBasicBlock &MBB, return false; --I; } - if (static_cast<MachineInstr *>(I)->getOpcode() != AMDGPU::JUMP) { + if (!isJump(static_cast<MachineInstr *>(I)->getOpcode())) { return false; } @@ -196,22 +255,20 @@ R600InstrInfo::AnalyzeBranch(MachineBasicBlock &MBB, // If there is only one terminator instruction, process it. unsigned LastOpc = LastInst->getOpcode(); if (I == MBB.begin() || - static_cast<MachineInstr *>(--I)->getOpcode() != AMDGPU::JUMP) { + !isJump(static_cast<MachineInstr *>(--I)->getOpcode())) { if (LastOpc == AMDGPU::JUMP) { - if(!isPredicated(LastInst)) { - TBB = LastInst->getOperand(0).getMBB(); - return false; - } else { - MachineInstr *predSet = I; - while (!isPredicateSetter(predSet->getOpcode())) { - predSet = --I; - } - TBB = LastInst->getOperand(0).getMBB(); - Cond.push_back(predSet->getOperand(1)); - Cond.push_back(predSet->getOperand(2)); - Cond.push_back(MachineOperand::CreateReg(AMDGPU::PRED_SEL_ONE, false)); - return false; + TBB = LastInst->getOperand(0).getMBB(); + return false; + } else if (LastOpc == AMDGPU::JUMP_COND) { + MachineInstr *predSet = I; + while (!isPredicateSetter(predSet->getOpcode())) { + predSet = --I; } + TBB = LastInst->getOperand(0).getMBB(); + Cond.push_back(predSet->getOperand(1)); + Cond.push_back(predSet->getOperand(2)); + Cond.push_back(MachineOperand::CreateReg(AMDGPU::PRED_SEL_ONE, false)); + return false; } return true; // Can't handle indirect branch. } @@ -221,10 +278,7 @@ R600InstrInfo::AnalyzeBranch(MachineBasicBlock &MBB, unsigned SecondLastOpc = SecondLastInst->getOpcode(); // If the block ends with a B and a Bcc, handle it. - if (SecondLastOpc == AMDGPU::JUMP && - isPredicated(SecondLastInst) && - LastOpc == AMDGPU::JUMP && - !isPredicated(LastInst)) { + if (SecondLastOpc == AMDGPU::JUMP_COND && LastOpc == AMDGPU::JUMP) { MachineInstr *predSet = --I; while (!isPredicateSetter(predSet->getOpcode())) { predSet = --I; @@ -261,7 +315,7 @@ R600InstrInfo::InsertBranch(MachineBasicBlock &MBB, if (FBB == 0) { if (Cond.empty()) { - BuildMI(&MBB, DL, get(AMDGPU::JUMP)).addMBB(TBB).addReg(0); + BuildMI(&MBB, DL, get(AMDGPU::JUMP)).addMBB(TBB); return 1; } else { MachineInstr *PredSet = findFirstPredicateSetterFrom(MBB, MBB.end()); @@ -269,7 +323,7 @@ R600InstrInfo::InsertBranch(MachineBasicBlock &MBB, addFlag(PredSet, 0, MO_FLAG_PUSH); PredSet->getOperand(2).setImm(Cond[1].getImm()); - BuildMI(&MBB, DL, get(AMDGPU::JUMP)) + BuildMI(&MBB, DL, get(AMDGPU::JUMP_COND)) .addMBB(TBB) .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill); return 1; @@ -279,10 +333,10 @@ R600InstrInfo::InsertBranch(MachineBasicBlock &MBB, assert(PredSet && "No previous predicate !"); addFlag(PredSet, 0, MO_FLAG_PUSH); PredSet->getOperand(2).setImm(Cond[1].getImm()); - BuildMI(&MBB, DL, get(AMDGPU::JUMP)) + BuildMI(&MBB, DL, get(AMDGPU::JUMP_COND)) .addMBB(TBB) .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill); - BuildMI(&MBB, DL, get(AMDGPU::JUMP)).addMBB(FBB).addReg(0); + BuildMI(&MBB, DL, get(AMDGPU::JUMP)).addMBB(FBB); return 2; } } @@ -302,11 +356,13 @@ R600InstrInfo::RemoveBranch(MachineBasicBlock &MBB) const { switch (I->getOpcode()) { default: return 0; + case AMDGPU::JUMP_COND: { + MachineInstr *predSet = findFirstPredicateSetterFrom(MBB, I); + clearFlag(predSet, 0, MO_FLAG_PUSH); + I->eraseFromParent(); + break; + } case AMDGPU::JUMP: - if (isPredicated(I)) { - MachineInstr *predSet = findFirstPredicateSetterFrom(MBB, I); - clearFlag(predSet, 0, MO_FLAG_PUSH); - } I->eraseFromParent(); break; } @@ -320,11 +376,13 @@ R600InstrInfo::RemoveBranch(MachineBasicBlock &MBB) const { // FIXME: only one case?? default: return 1; + case AMDGPU::JUMP_COND: { + MachineInstr *predSet = findFirstPredicateSetterFrom(MBB, I); + clearFlag(predSet, 0, MO_FLAG_PUSH); + I->eraseFromParent(); + break; + } case AMDGPU::JUMP: - if (isPredicated(I)) { - MachineInstr *predSet = findFirstPredicateSetterFrom(MBB, I); - clearFlag(predSet, 0, MO_FLAG_PUSH); - } I->eraseFromParent(); break; } @@ -356,6 +414,8 @@ R600InstrInfo::isPredicable(MachineInstr *MI) const { if (MI->getOpcode() == AMDGPU::KILLGT) { return false; + } else if (isVector(*MI)) { + return false; } else { return AMDGPUInstrInfo::isPredicable(MI); } diff --git a/lib/Target/R600/R600InstrInfo.h b/lib/Target/R600/R600InstrInfo.h index efe721c..bf9569e 100644 --- a/lib/Target/R600/R600InstrInfo.h +++ b/lib/Target/R600/R600InstrInfo.h @@ -53,6 +53,9 @@ namespace llvm { /// \returns true if this \p Opcode represents an ALU instruction. bool isALUInstr(unsigned Opcode) const; + bool fitsConstReadLimitations(const std::vector<unsigned>&) const; + bool canBundle(const std::vector<MachineInstr *> &) const; + /// \breif Vector instructions are instructions that must fill all /// instruction slots within an instruction group. bool isVector(const MachineInstr &MI) const; diff --git a/lib/Target/R600/R600Instructions.td b/lib/Target/R600/R600Instructions.td index 8242df9..8c50d54 100644 --- a/lib/Target/R600/R600Instructions.td +++ b/lib/Target/R600/R600Instructions.td @@ -512,8 +512,8 @@ def INTERP_PAIR_ZW : AMDGPUShaderInst < []>; def CONST_ADDRESS: SDNode<"AMDGPUISD::CONST_ADDRESS", - SDTypeProfile<1, 1, [SDTCisInt<0>, SDTCisPtrTy<1>]>, - [SDNPMayLoad] + SDTypeProfile<1, -1, [SDTCisInt<0>, SDTCisPtrTy<1>]>, + [SDNPVariadic] >; //===----------------------------------------------------------------------===// @@ -1090,12 +1090,12 @@ class COS_Common <bits<11> inst> : R600_1OP < multiclass DIV_Common <InstR600 recip_ieee> { def : Pat< (int_AMDGPU_div R600_Reg32:$src0, R600_Reg32:$src1), - (MUL R600_Reg32:$src0, (recip_ieee R600_Reg32:$src1)) + (MUL_IEEE R600_Reg32:$src0, (recip_ieee R600_Reg32:$src1)) >; def : Pat< (fdiv R600_Reg32:$src0, R600_Reg32:$src1), - (MUL R600_Reg32:$src0, (recip_ieee R600_Reg32:$src1)) + (MUL_IEEE R600_Reg32:$src0, (recip_ieee R600_Reg32:$src1)) >; } @@ -1169,12 +1169,12 @@ let Predicates = [isR600] in { // cards. class COS_PAT <InstR600 trig> : Pat< (fcos R600_Reg32:$src), - (trig (MUL (MOV_IMM_I32 CONST.TWO_PI_INV), R600_Reg32:$src)) + (trig (MUL_IEEE (MOV_IMM_I32 CONST.TWO_PI_INV), R600_Reg32:$src)) >; class SIN_PAT <InstR600 trig> : Pat< (fsin R600_Reg32:$src), - (trig (MUL (MOV_IMM_I32 CONST.TWO_PI_INV), R600_Reg32:$src)) + (trig (MUL_IEEE (MOV_IMM_I32 CONST.TWO_PI_INV), R600_Reg32:$src)) >; //===----------------------------------------------------------------------===// @@ -1587,19 +1587,28 @@ def PRED_X : InstR600 < (ins R600_Reg32:$src0, i32imm:$src1, i32imm:$flags), "", [], NullALU> { let FlagOperandIdx = 3; - let isTerminator = 1; } -let isTerminator = 1, isBranch = 1, isBarrier = 1 in { - -def JUMP : InstR600 <0x10, +let isTerminator = 1, isBranch = 1 in { +def JUMP_COND : InstR600 <0x10, (outs), - (ins brtarget:$target, R600_Pred:$p), + (ins brtarget:$target, R600_Predicate_Bit:$p), "JUMP $target ($p)", [], AnyALU >; -} // End isTerminator = 1, isBranch = 1, isBarrier = 1 +def JUMP : InstR600 <0x10, + (outs), + (ins brtarget:$target), + "JUMP $target", + [], AnyALU + > +{ + let isPredicable = 1; + let isBarrier = 1; +} + +} // End isTerminator = 1, isBranch = 1 let usesCustomInserter = 1 in { @@ -1639,7 +1648,7 @@ def FNEG_R600 : FNEG<R600_Reg32>; //===---------------------------------------------------------------------===// // Return instruction //===---------------------------------------------------------------------===// -let isTerminator = 1, isReturn = 1, isBarrier = 1, hasCtrlDep = 1, +let isTerminator = 1, isReturn = 1, hasCtrlDep = 1, usesCustomInserter = 1 in { def RETURN : ILFormat<(outs), (ins variable_ops), "RETURN", [(IL_retflag)]>; @@ -1650,27 +1659,27 @@ let isTerminator = 1, isReturn = 1, isBarrier = 1, hasCtrlDep = 1, // Constant Buffer Addressing Support //===----------------------------------------------------------------------===// -let isCodeGenOnly = 1, isPseudo = 1, Namespace = "AMDGPU" in { +let usesCustomInserter = 1, isCodeGenOnly = 1, isPseudo = 1, Namespace = "AMDGPU" in { def CONST_COPY : Instruction { let OutOperandList = (outs R600_Reg32:$dst); let InOperandList = (ins i32imm:$src); - let Pattern = [(set R600_Reg32:$dst, (CONST_ADDRESS ADDRGA_CONST_OFFSET:$src))]; + let Pattern = + [(set R600_Reg32:$dst, (CONST_ADDRESS ADDRGA_CONST_OFFSET:$src))]; let AsmString = "CONST_COPY"; let neverHasSideEffects = 1; let isAsCheapAsAMove = 1; let Itinerary = NullALU; } -} // end isCodeGenOnly = 1, isPseudo = 1, Namespace = "AMDGPU" +} // end usesCustomInserter = 1, isCodeGenOnly = 1, isPseudo = 1, Namespace = "AMDGPU" def TEX_VTX_CONSTBUF : - InstR600ISA <(outs R600_Reg128:$dst), (ins MEMxi:$ptr), "VTX_READ_eg $dst, $ptr", - [(set R600_Reg128:$dst, (CONST_ADDRESS ADDRGA_VAR_OFFSET:$ptr))]>, + InstR600ISA <(outs R600_Reg128:$dst), (ins MEMxi:$ptr, i32imm:$BUFFER_ID), "VTX_READ_eg $dst, $ptr", + [(set R600_Reg128:$dst, (CONST_ADDRESS ADDRGA_VAR_OFFSET:$ptr, (i32 imm:$BUFFER_ID)))]>, VTX_WORD1_GPR, VTX_WORD0 { let VC_INST = 0; let FETCH_TYPE = 2; let FETCH_WHOLE_QUAD = 0; - let BUFFER_ID = 0; let SRC_REL = 0; let SRC_SEL_X = 0; let DST_REL = 0; @@ -1840,6 +1849,18 @@ let isTerminator=1 in { // ISel Patterns //===----------------------------------------------------------------------===// +// CND*_INT Pattterns for f32 True / False values + +class CND_INT_f32 <InstR600 cnd, CondCode cc> : Pat < + (selectcc (i32 R600_Reg32:$src0), 0, (f32 R600_Reg32:$src1), + R600_Reg32:$src2, cc), + (cnd R600_Reg32:$src0, R600_Reg32:$src1, R600_Reg32:$src2) +>; + +def : CND_INT_f32 <CNDE_INT, SETEQ>; +def : CND_INT_f32 <CNDGT_INT, SETGT>; +def : CND_INT_f32 <CNDGE_INT, SETGE>; + //CNDGE_INT extra pattern def : Pat < (selectcc (i32 R600_Reg32:$src0), -1, (i32 R600_Reg32:$src1), @@ -1958,8 +1979,8 @@ def : Insert_Element <i32, v4i32, R600_Reg32, R600_Reg128, 1, sub1>; def : Insert_Element <i32, v4i32, R600_Reg32, R600_Reg128, 2, sub2>; def : Insert_Element <i32, v4i32, R600_Reg32, R600_Reg128, 3, sub3>; -def : Vector_Build <v4f32, R600_Reg128, f32, R600_Reg32>; -def : Vector_Build <v4i32, R600_Reg128, i32, R600_Reg32>; +def : Vector4_Build <v4f32, R600_Reg128, f32, R600_Reg32>; +def : Vector4_Build <v4i32, R600_Reg128, i32, R600_Reg32>; // bitconvert patterns diff --git a/lib/Target/R600/R600LowerConstCopy.cpp b/lib/Target/R600/R600LowerConstCopy.cpp deleted file mode 100644 index 3ebe653..0000000 --- a/lib/Target/R600/R600LowerConstCopy.cpp +++ /dev/null @@ -1,222 +0,0 @@ -//===-- R600LowerConstCopy.cpp - Propagate ConstCopy / lower them to MOV---===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -/// \file -/// This pass is intended to handle remaining ConstCopy pseudo MachineInstr. -/// ISel will fold each Const Buffer read inside scalar ALU. However it cannot -/// fold them inside vector instruction, like DOT4 or Cube ; ISel emits -/// ConstCopy instead. This pass (executed after ExpandingSpecialInstr) will try -/// to fold them if possible or replace them by MOV otherwise. -// -//===----------------------------------------------------------------------===// - -#include "AMDGPU.h" -#include "R600InstrInfo.h" -#include "llvm/CodeGen/MachineFunction.h" -#include "llvm/CodeGen/MachineFunctionPass.h" -#include "llvm/CodeGen/MachineInstrBuilder.h" -#include "llvm/IR/GlobalValue.h" - -namespace llvm { - -class R600LowerConstCopy : public MachineFunctionPass { -private: - static char ID; - const R600InstrInfo *TII; - - struct ConstPairs { - unsigned XYPair; - unsigned ZWPair; - }; - - bool canFoldInBundle(ConstPairs &UsedConst, unsigned ReadConst) const; -public: - R600LowerConstCopy(TargetMachine &tm); - virtual bool runOnMachineFunction(MachineFunction &MF); - - const char *getPassName() const { return "R600 Eliminate Symbolic Operand"; } -}; - -char R600LowerConstCopy::ID = 0; - -R600LowerConstCopy::R600LowerConstCopy(TargetMachine &tm) : - MachineFunctionPass(ID), - TII (static_cast<const R600InstrInfo *>(tm.getInstrInfo())) -{ -} - -bool R600LowerConstCopy::canFoldInBundle(ConstPairs &UsedConst, - unsigned ReadConst) const { - unsigned ReadConstChan = ReadConst & 3; - unsigned ReadConstIndex = ReadConst & (~3); - if (ReadConstChan < 2) { - if (!UsedConst.XYPair) { - UsedConst.XYPair = ReadConstIndex; - } - return UsedConst.XYPair == ReadConstIndex; - } else { - if (!UsedConst.ZWPair) { - UsedConst.ZWPair = ReadConstIndex; - } - return UsedConst.ZWPair == ReadConstIndex; - } -} - -static bool isControlFlow(const MachineInstr &MI) { - return (MI.getOpcode() == AMDGPU::IF_PREDICATE_SET) || - (MI.getOpcode() == AMDGPU::ENDIF) || - (MI.getOpcode() == AMDGPU::ELSE) || - (MI.getOpcode() == AMDGPU::WHILELOOP) || - (MI.getOpcode() == AMDGPU::BREAK); -} - -bool R600LowerConstCopy::runOnMachineFunction(MachineFunction &MF) { - - for (MachineFunction::iterator BB = MF.begin(), BB_E = MF.end(); - BB != BB_E; ++BB) { - MachineBasicBlock &MBB = *BB; - DenseMap<unsigned, MachineInstr *> RegToConstIndex; - for (MachineBasicBlock::instr_iterator I = MBB.instr_begin(), - E = MBB.instr_end(); I != E;) { - - if (I->getOpcode() == AMDGPU::CONST_COPY) { - MachineInstr &MI = *I; - I = llvm::next(I); - unsigned DstReg = MI.getOperand(0).getReg(); - DenseMap<unsigned, MachineInstr *>::iterator SrcMI = - RegToConstIndex.find(DstReg); - if (SrcMI != RegToConstIndex.end()) { - SrcMI->second->eraseFromParent(); - RegToConstIndex.erase(SrcMI); - } - MachineInstr *NewMI = - TII->buildDefaultInstruction(MBB, &MI, AMDGPU::MOV, - MI.getOperand(0).getReg(), AMDGPU::ALU_CONST); - TII->setImmOperand(NewMI, R600Operands::SRC0_SEL, - MI.getOperand(1).getImm()); - RegToConstIndex[DstReg] = NewMI; - MI.eraseFromParent(); - continue; - } - - std::vector<unsigned> Defs; - // We consider all Instructions as bundled because algorithm that handle - // const read port limitations inside an IG is still valid with single - // instructions. - std::vector<MachineInstr *> Bundle; - - if (I->isBundle()) { - unsigned BundleSize = I->getBundleSize(); - for (unsigned i = 0; i < BundleSize; i++) { - I = llvm::next(I); - Bundle.push_back(I); - } - } else if (TII->isALUInstr(I->getOpcode())){ - Bundle.push_back(I); - } else if (isControlFlow(*I)) { - RegToConstIndex.clear(); - I = llvm::next(I); - continue; - } else { - MachineInstr &MI = *I; - for (MachineInstr::mop_iterator MOp = MI.operands_begin(), - MOpE = MI.operands_end(); MOp != MOpE; ++MOp) { - MachineOperand &MO = *MOp; - if (!MO.isReg()) - continue; - if (MO.isDef()) { - Defs.push_back(MO.getReg()); - } else { - // Either a TEX or an Export inst, prevent from erasing def of used - // operand - RegToConstIndex.erase(MO.getReg()); - for (MCSubRegIterator SR(MO.getReg(), &TII->getRegisterInfo()); - SR.isValid(); ++SR) { - RegToConstIndex.erase(*SR); - } - } - } - } - - - R600Operands::Ops OpTable[3][2] = { - {R600Operands::SRC0, R600Operands::SRC0_SEL}, - {R600Operands::SRC1, R600Operands::SRC1_SEL}, - {R600Operands::SRC2, R600Operands::SRC2_SEL}, - }; - - for(std::vector<MachineInstr *>::iterator It = Bundle.begin(), - ItE = Bundle.end(); It != ItE; ++It) { - MachineInstr *MI = *It; - if (TII->isPredicated(MI)) { - // We don't want to erase previous assignment - RegToConstIndex.erase(MI->getOperand(0).getReg()); - } else { - int WriteIDX = TII->getOperandIdx(MI->getOpcode(), R600Operands::WRITE); - if (WriteIDX < 0 || MI->getOperand(WriteIDX).getImm()) - Defs.push_back(MI->getOperand(0).getReg()); - } - } - - ConstPairs CP = {0,0}; - for (unsigned SrcOp = 0; SrcOp < 3; SrcOp++) { - for(std::vector<MachineInstr *>::iterator It = Bundle.begin(), - ItE = Bundle.end(); It != ItE; ++It) { - MachineInstr *MI = *It; - int SrcIdx = TII->getOperandIdx(MI->getOpcode(), OpTable[SrcOp][0]); - if (SrcIdx < 0) - continue; - MachineOperand &MO = MI->getOperand(SrcIdx); - DenseMap<unsigned, MachineInstr *>::iterator SrcMI = - RegToConstIndex.find(MO.getReg()); - if (SrcMI != RegToConstIndex.end()) { - MachineInstr *CstMov = SrcMI->second; - int ConstMovSel = - TII->getOperandIdx(CstMov->getOpcode(), R600Operands::SRC0_SEL); - unsigned ConstIndex = CstMov->getOperand(ConstMovSel).getImm(); - if (MI->isInsideBundle() && canFoldInBundle(CP, ConstIndex)) { - TII->setImmOperand(MI, OpTable[SrcOp][1], ConstIndex); - MI->getOperand(SrcIdx).setReg(AMDGPU::ALU_CONST); - } else { - RegToConstIndex.erase(SrcMI); - } - } - } - } - - for (std::vector<unsigned>::iterator It = Defs.begin(), ItE = Defs.end(); - It != ItE; ++It) { - DenseMap<unsigned, MachineInstr *>::iterator SrcMI = - RegToConstIndex.find(*It); - if (SrcMI != RegToConstIndex.end()) { - SrcMI->second->eraseFromParent(); - RegToConstIndex.erase(SrcMI); - } - } - I = llvm::next(I); - } - - if (MBB.succ_empty()) { - for (DenseMap<unsigned, MachineInstr *>::iterator - DI = RegToConstIndex.begin(), DE = RegToConstIndex.end(); - DI != DE; ++DI) { - DI->second->eraseFromParent(); - } - } - } - return false; -} - -FunctionPass *createR600LowerConstCopy(TargetMachine &tm) { - return new R600LowerConstCopy(tm); -} - -} - - diff --git a/lib/Target/R600/R600MachineFunctionInfo.cpp b/lib/Target/R600/R600MachineFunctionInfo.cpp index 40aec83..b07a585 100644 --- a/lib/Target/R600/R600MachineFunctionInfo.cpp +++ b/lib/Target/R600/R600MachineFunctionInfo.cpp @@ -14,5 +14,4 @@ using namespace llvm; R600MachineFunctionInfo::R600MachineFunctionInfo(const MachineFunction &MF) : MachineFunctionInfo() { - memset(Outputs, 0, sizeof(Outputs)); } diff --git a/lib/Target/R600/R600MachineFunctionInfo.h b/lib/Target/R600/R600MachineFunctionInfo.h index 4b901f4..13a46b8 100644 --- a/lib/Target/R600/R600MachineFunctionInfo.h +++ b/lib/Target/R600/R600MachineFunctionInfo.h @@ -26,7 +26,6 @@ public: R600MachineFunctionInfo(const MachineFunction &MF); SmallVector<unsigned, 4> LiveOuts; std::vector<unsigned> IndirectRegs; - SDNode *Outputs[16]; }; } // End llvm namespace diff --git a/lib/Target/R600/R600MachineScheduler.cpp b/lib/Target/R600/R600MachineScheduler.cpp new file mode 100644 index 0000000..9074364 --- /dev/null +++ b/lib/Target/R600/R600MachineScheduler.cpp @@ -0,0 +1,427 @@ +//===-- R600MachineScheduler.cpp - R600 Scheduler Interface -*- C++ -*-----===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +/// \file +/// \brief R600 Machine Scheduler interface +// TODO: Scheduling is optimised for VLIW4 arch, modify it to support TRANS slot +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "misched" + +#include "R600MachineScheduler.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/LiveIntervalAnalysis.h" +#include "llvm/Pass.h" +#include "llvm/PassManager.h" +#include "llvm/Support/raw_ostream.h" +#include <set> + +using namespace llvm; + +void R600SchedStrategy::initialize(ScheduleDAGMI *dag) { + + DAG = dag; + TII = static_cast<const R600InstrInfo*>(DAG->TII); + TRI = static_cast<const R600RegisterInfo*>(DAG->TRI); + MRI = &DAG->MRI; + Available[IDAlu]->clear(); + Available[IDFetch]->clear(); + Available[IDOther]->clear(); + CurInstKind = IDOther; + CurEmitted = 0; + OccupedSlotsMask = 15; + InstKindLimit[IDAlu] = 120; // 120 minus 8 for security + + + const AMDGPUSubtarget &ST = DAG->TM.getSubtarget<AMDGPUSubtarget>(); + if (ST.device()->getGeneration() <= AMDGPUDeviceInfo::HD5XXX) { + InstKindLimit[IDFetch] = 7; // 8 minus 1 for security + } else { + InstKindLimit[IDFetch] = 15; // 16 minus 1 for security + } +} + +void R600SchedStrategy::MoveUnits(ReadyQueue *QSrc, ReadyQueue *QDst) +{ + if (QSrc->empty()) + return; + for (ReadyQueue::iterator I = QSrc->begin(), + E = QSrc->end(); I != E; ++I) { + (*I)->NodeQueueId &= ~QSrc->getID(); + QDst->push(*I); + } + QSrc->clear(); +} + +SUnit* R600SchedStrategy::pickNode(bool &IsTopNode) { + SUnit *SU = 0; + IsTopNode = true; + NextInstKind = IDOther; + + // check if we might want to switch current clause type + bool AllowSwitchToAlu = (CurInstKind == IDOther) || + (CurEmitted > InstKindLimit[CurInstKind]) || + (Available[CurInstKind]->empty()); + bool AllowSwitchFromAlu = (CurEmitted > InstKindLimit[CurInstKind]) && + (!Available[IDFetch]->empty() || !Available[IDOther]->empty()); + + if ((AllowSwitchToAlu && CurInstKind != IDAlu) || + (!AllowSwitchFromAlu && CurInstKind == IDAlu)) { + // try to pick ALU + SU = pickAlu(); + if (SU) { + if (CurEmitted > InstKindLimit[IDAlu]) + CurEmitted = 0; + NextInstKind = IDAlu; + } + } + + if (!SU) { + // try to pick FETCH + SU = pickOther(IDFetch); + if (SU) + NextInstKind = IDFetch; + } + + // try to pick other + if (!SU) { + SU = pickOther(IDOther); + if (SU) + NextInstKind = IDOther; + } + + DEBUG( + if (SU) { + dbgs() << "picked node: "; + SU->dump(DAG); + } else { + dbgs() << "NO NODE "; + for (int i = 0; i < IDLast; ++i) { + Available[i]->dump(); + Pending[i]->dump(); + } + for (unsigned i = 0; i < DAG->SUnits.size(); i++) { + const SUnit &S = DAG->SUnits[i]; + if (!S.isScheduled) + S.dump(DAG); + } + } + ); + + return SU; +} + +void R600SchedStrategy::schedNode(SUnit *SU, bool IsTopNode) { + + DEBUG(dbgs() << "scheduled: "); + DEBUG(SU->dump(DAG)); + + if (NextInstKind != CurInstKind) { + DEBUG(dbgs() << "Instruction Type Switch\n"); + if (NextInstKind != IDAlu) + OccupedSlotsMask = 15; + CurEmitted = 0; + CurInstKind = NextInstKind; + } + + if (CurInstKind == IDAlu) { + switch (getAluKind(SU)) { + case AluT_XYZW: + CurEmitted += 4; + break; + case AluDiscarded: + break; + default: { + ++CurEmitted; + for (MachineInstr::mop_iterator It = SU->getInstr()->operands_begin(), + E = SU->getInstr()->operands_end(); It != E; ++It) { + MachineOperand &MO = *It; + if (MO.isReg() && MO.getReg() == AMDGPU::ALU_LITERAL_X) + ++CurEmitted; + } + } + } + } else { + ++CurEmitted; + } + + + DEBUG(dbgs() << CurEmitted << " Instructions Emitted in this clause\n"); + + if (CurInstKind != IDFetch) { + MoveUnits(Pending[IDFetch], Available[IDFetch]); + } + MoveUnits(Pending[IDOther], Available[IDOther]); +} + +void R600SchedStrategy::releaseTopNode(SUnit *SU) { + int IK = getInstKind(SU); + + DEBUG(dbgs() << IK << " <= "); + DEBUG(SU->dump(DAG)); + + Pending[IK]->push(SU); +} + +void R600SchedStrategy::releaseBottomNode(SUnit *SU) { +} + +bool R600SchedStrategy::regBelongsToClass(unsigned Reg, + const TargetRegisterClass *RC) const { + if (!TargetRegisterInfo::isVirtualRegister(Reg)) { + return RC->contains(Reg); + } else { + return MRI->getRegClass(Reg) == RC; + } +} + +R600SchedStrategy::AluKind R600SchedStrategy::getAluKind(SUnit *SU) const { + MachineInstr *MI = SU->getInstr(); + + switch (MI->getOpcode()) { + case AMDGPU::INTERP_PAIR_XY: + case AMDGPU::INTERP_PAIR_ZW: + case AMDGPU::INTERP_VEC_LOAD: + return AluT_XYZW; + case AMDGPU::COPY: + if (TargetRegisterInfo::isPhysicalRegister(MI->getOperand(1).getReg())) { + // %vregX = COPY Tn_X is likely to be discarded in favor of an + // assignement of Tn_X to %vregX, don't considers it in scheduling + return AluDiscarded; + } + else if (MI->getOperand(1).isUndef()) { + // MI will become a KILL, don't considers it in scheduling + return AluDiscarded; + } + default: + break; + } + + // Does the instruction take a whole IG ? + if(TII->isVector(*MI) || + TII->isCubeOp(MI->getOpcode()) || + TII->isReductionOp(MI->getOpcode())) + return AluT_XYZW; + + // Is the result already assigned to a channel ? + unsigned DestSubReg = MI->getOperand(0).getSubReg(); + switch (DestSubReg) { + case AMDGPU::sub0: + return AluT_X; + case AMDGPU::sub1: + return AluT_Y; + case AMDGPU::sub2: + return AluT_Z; + case AMDGPU::sub3: + return AluT_W; + default: + break; + } + + // Is the result already member of a X/Y/Z/W class ? + unsigned DestReg = MI->getOperand(0).getReg(); + if (regBelongsToClass(DestReg, &AMDGPU::R600_TReg32_XRegClass) || + regBelongsToClass(DestReg, &AMDGPU::R600_AddrRegClass)) + return AluT_X; + if (regBelongsToClass(DestReg, &AMDGPU::R600_TReg32_YRegClass)) + return AluT_Y; + if (regBelongsToClass(DestReg, &AMDGPU::R600_TReg32_ZRegClass)) + return AluT_Z; + if (regBelongsToClass(DestReg, &AMDGPU::R600_TReg32_WRegClass)) + return AluT_W; + if (regBelongsToClass(DestReg, &AMDGPU::R600_Reg128RegClass)) + return AluT_XYZW; + + return AluAny; + +} + +int R600SchedStrategy::getInstKind(SUnit* SU) { + int Opcode = SU->getInstr()->getOpcode(); + + if (TII->isALUInstr(Opcode)) { + return IDAlu; + } + + switch (Opcode) { + case AMDGPU::COPY: + case AMDGPU::CONST_COPY: + case AMDGPU::INTERP_PAIR_XY: + case AMDGPU::INTERP_PAIR_ZW: + case AMDGPU::INTERP_VEC_LOAD: + case AMDGPU::DOT4_eg_pseudo: + case AMDGPU::DOT4_r600_pseudo: + return IDAlu; + case AMDGPU::TEX_VTX_CONSTBUF: + case AMDGPU::TEX_VTX_TEXBUF: + case AMDGPU::TEX_LD: + case AMDGPU::TEX_GET_TEXTURE_RESINFO: + case AMDGPU::TEX_GET_GRADIENTS_H: + case AMDGPU::TEX_GET_GRADIENTS_V: + case AMDGPU::TEX_SET_GRADIENTS_H: + case AMDGPU::TEX_SET_GRADIENTS_V: + case AMDGPU::TEX_SAMPLE: + case AMDGPU::TEX_SAMPLE_C: + case AMDGPU::TEX_SAMPLE_L: + case AMDGPU::TEX_SAMPLE_C_L: + case AMDGPU::TEX_SAMPLE_LB: + case AMDGPU::TEX_SAMPLE_C_LB: + case AMDGPU::TEX_SAMPLE_G: + case AMDGPU::TEX_SAMPLE_C_G: + case AMDGPU::TXD: + case AMDGPU::TXD_SHADOW: + return IDFetch; + default: + DEBUG( + dbgs() << "other inst: "; + SU->dump(DAG); + ); + return IDOther; + } +} + +SUnit *R600SchedStrategy::PopInst(std::multiset<SUnit *, CompareSUnit> &Q) { + if (Q.empty()) + return NULL; + for (std::set<SUnit *, CompareSUnit>::iterator It = Q.begin(), E = Q.end(); + It != E; ++It) { + SUnit *SU = *It; + InstructionsGroupCandidate.push_back(SU->getInstr()); + if (TII->canBundle(InstructionsGroupCandidate)) { + InstructionsGroupCandidate.pop_back(); + Q.erase(It); + return SU; + } else { + InstructionsGroupCandidate.pop_back(); + } + } + return NULL; +} + +void R600SchedStrategy::LoadAlu() { + ReadyQueue *QSrc = Pending[IDAlu]; + for (ReadyQueue::iterator I = QSrc->begin(), + E = QSrc->end(); I != E; ++I) { + (*I)->NodeQueueId &= ~QSrc->getID(); + AluKind AK = getAluKind(*I); + AvailableAlus[AK].insert(*I); + } + QSrc->clear(); +} + +void R600SchedStrategy::PrepareNextSlot() { + DEBUG(dbgs() << "New Slot\n"); + assert (OccupedSlotsMask && "Slot wasn't filled"); + OccupedSlotsMask = 0; + InstructionsGroupCandidate.clear(); + LoadAlu(); +} + +void R600SchedStrategy::AssignSlot(MachineInstr* MI, unsigned Slot) { + unsigned DestReg = MI->getOperand(0).getReg(); + // PressureRegister crashes if an operand is def and used in the same inst + // and we try to constraint its regclass + for (MachineInstr::mop_iterator It = MI->operands_begin(), + E = MI->operands_end(); It != E; ++It) { + MachineOperand &MO = *It; + if (MO.isReg() && !MO.isDef() && + MO.getReg() == MI->getOperand(0).getReg()) + return; + } + // Constrains the regclass of DestReg to assign it to Slot + switch (Slot) { + case 0: + MRI->constrainRegClass(DestReg, &AMDGPU::R600_TReg32_XRegClass); + break; + case 1: + MRI->constrainRegClass(DestReg, &AMDGPU::R600_TReg32_YRegClass); + break; + case 2: + MRI->constrainRegClass(DestReg, &AMDGPU::R600_TReg32_ZRegClass); + break; + case 3: + MRI->constrainRegClass(DestReg, &AMDGPU::R600_TReg32_WRegClass); + break; + } +} + +SUnit *R600SchedStrategy::AttemptFillSlot(unsigned Slot) { + static const AluKind IndexToID[] = {AluT_X, AluT_Y, AluT_Z, AluT_W}; + SUnit *SlotedSU = PopInst(AvailableAlus[IndexToID[Slot]]); + SUnit *UnslotedSU = PopInst(AvailableAlus[AluAny]); + if (!UnslotedSU) { + return SlotedSU; + } else if (!SlotedSU) { + AssignSlot(UnslotedSU->getInstr(), Slot); + return UnslotedSU; + } else { + //Determine which one to pick (the lesser one) + if (CompareSUnit()(SlotedSU, UnslotedSU)) { + AvailableAlus[AluAny].insert(UnslotedSU); + return SlotedSU; + } else { + AvailableAlus[IndexToID[Slot]].insert(SlotedSU); + AssignSlot(UnslotedSU->getInstr(), Slot); + return UnslotedSU; + } + } +} + +bool R600SchedStrategy::isAvailablesAluEmpty() const { + return Pending[IDAlu]->empty() && AvailableAlus[AluAny].empty() && + AvailableAlus[AluT_XYZW].empty() && AvailableAlus[AluT_X].empty() && + AvailableAlus[AluT_Y].empty() && AvailableAlus[AluT_Z].empty() && + AvailableAlus[AluT_W].empty() && AvailableAlus[AluDiscarded].empty(); +} + +SUnit* R600SchedStrategy::pickAlu() { + while (!isAvailablesAluEmpty()) { + if (!OccupedSlotsMask) { + // Flush physical reg copies (RA will discard them) + if (!AvailableAlus[AluDiscarded].empty()) { + OccupedSlotsMask = 15; + return PopInst(AvailableAlus[AluDiscarded]); + } + // If there is a T_XYZW alu available, use it + if (!AvailableAlus[AluT_XYZW].empty()) { + OccupedSlotsMask = 15; + return PopInst(AvailableAlus[AluT_XYZW]); + } + } + for (unsigned Chan = 0; Chan < 4; ++Chan) { + bool isOccupied = OccupedSlotsMask & (1 << Chan); + if (!isOccupied) { + SUnit *SU = AttemptFillSlot(Chan); + if (SU) { + OccupedSlotsMask |= (1 << Chan); + InstructionsGroupCandidate.push_back(SU->getInstr()); + return SU; + } + } + } + PrepareNextSlot(); + } + return NULL; +} + +SUnit* R600SchedStrategy::pickOther(int QID) { + SUnit *SU = 0; + ReadyQueue *AQ = Available[QID]; + + if (AQ->empty()) { + MoveUnits(Pending[QID], AQ); + } + if (!AQ->empty()) { + SU = *AQ->begin(); + AQ->remove(AQ->begin()); + } + return SU; +} + diff --git a/lib/Target/R600/R600MachineScheduler.h b/lib/Target/R600/R600MachineScheduler.h new file mode 100644 index 0000000..3d0367f --- /dev/null +++ b/lib/Target/R600/R600MachineScheduler.h @@ -0,0 +1,120 @@ +//===-- R600MachineScheduler.h - R600 Scheduler Interface -*- C++ -*-------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +/// \file +/// \brief R600 Machine Scheduler interface +// +//===----------------------------------------------------------------------===// + +#ifndef R600MACHINESCHEDULER_H_ +#define R600MACHINESCHEDULER_H_ + +#include "R600InstrInfo.h" +#include "llvm/CodeGen/MachineScheduler.h" +#include "llvm/Support/Debug.h" +#include "llvm/ADT/PriorityQueue.h" + +using namespace llvm; + +namespace llvm { + +class CompareSUnit { +public: + bool operator()(const SUnit *S1, const SUnit *S2) { + return S1->getDepth() > S2->getDepth(); + } +}; + +class R600SchedStrategy : public MachineSchedStrategy { + + const ScheduleDAGMI *DAG; + const R600InstrInfo *TII; + const R600RegisterInfo *TRI; + MachineRegisterInfo *MRI; + + enum InstQueue { + QAlu = 1, + QFetch = 2, + QOther = 4 + }; + + enum InstKind { + IDAlu, + IDFetch, + IDOther, + IDLast + }; + + enum AluKind { + AluAny, + AluT_X, + AluT_Y, + AluT_Z, + AluT_W, + AluT_XYZW, + AluDiscarded, // LLVM Instructions that are going to be eliminated + AluLast + }; + + ReadyQueue *Available[IDLast], *Pending[IDLast]; + std::multiset<SUnit *, CompareSUnit> AvailableAlus[AluLast]; + + InstKind CurInstKind; + int CurEmitted; + InstKind NextInstKind; + + int InstKindLimit[IDLast]; + + int OccupedSlotsMask; + +public: + R600SchedStrategy() : + DAG(0), TII(0), TRI(0), MRI(0) { + Available[IDAlu] = new ReadyQueue(QAlu, "AAlu"); + Available[IDFetch] = new ReadyQueue(QFetch, "AFetch"); + Available[IDOther] = new ReadyQueue(QOther, "AOther"); + Pending[IDAlu] = new ReadyQueue(QAlu<<4, "PAlu"); + Pending[IDFetch] = new ReadyQueue(QFetch<<4, "PFetch"); + Pending[IDOther] = new ReadyQueue(QOther<<4, "POther"); + } + + virtual ~R600SchedStrategy() { + for (unsigned I = 0; I < IDLast; ++I) { + delete Available[I]; + delete Pending[I]; + } + } + + virtual void initialize(ScheduleDAGMI *dag); + virtual SUnit *pickNode(bool &IsTopNode); + virtual void schedNode(SUnit *SU, bool IsTopNode); + virtual void releaseTopNode(SUnit *SU); + virtual void releaseBottomNode(SUnit *SU); + +private: + std::vector<MachineInstr *> InstructionsGroupCandidate; + + int getInstKind(SUnit *SU); + bool regBelongsToClass(unsigned Reg, const TargetRegisterClass *RC) const; + AluKind getAluKind(SUnit *SU) const; + void LoadAlu(); + bool isAvailablesAluEmpty() const; + SUnit *AttemptFillSlot (unsigned Slot); + void PrepareNextSlot(); + SUnit *PopInst(std::multiset<SUnit *, CompareSUnit> &Q); + + void AssignSlot(MachineInstr *MI, unsigned Slot); + SUnit* pickAlu(); + SUnit* pickOther(int QID); + void MoveUnits(ReadyQueue *QSrc, ReadyQueue *QDst); +}; + +} // namespace llvm + +#endif /* R600MACHINESCHEDULER_H_ */ diff --git a/lib/Target/R600/SIAssignInterpRegs.cpp b/lib/Target/R600/SIAssignInterpRegs.cpp deleted file mode 100644 index 832e44d..0000000 --- a/lib/Target/R600/SIAssignInterpRegs.cpp +++ /dev/null @@ -1,152 +0,0 @@ -//===-- SIAssignInterpRegs.cpp - Assign interpolation registers -----------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -/// \file -/// \brief This pass maps the pseudo interpolation registers to the correct physical -/// registers. -// -/// Prior to executing a fragment shader, the GPU loads interpolation -/// parameters into physical registers. The specific physical register that each -/// interpolation parameter ends up in depends on the type of the interpolation -/// parameter as well as how many interpolation parameters are used by the -/// shader. -// -//===----------------------------------------------------------------------===// - - - -#include "AMDGPU.h" -#include "AMDIL.h" -#include "SIMachineFunctionInfo.h" -#include "llvm/CodeGen/MachineFunctionPass.h" -#include "llvm/CodeGen/MachineInstrBuilder.h" -#include "llvm/CodeGen/MachineRegisterInfo.h" - -using namespace llvm; - -namespace { - -class SIAssignInterpRegsPass : public MachineFunctionPass { - -private: - static char ID; - TargetMachine &TM; - - void addLiveIn(MachineFunction * MF, MachineRegisterInfo & MRI, - unsigned physReg, unsigned virtReg); - -public: - SIAssignInterpRegsPass(TargetMachine &tm) : - MachineFunctionPass(ID), TM(tm) { } - - virtual bool runOnMachineFunction(MachineFunction &MF); - - const char *getPassName() const { return "SI Assign intrpolation registers"; } -}; - -} // End anonymous namespace - -char SIAssignInterpRegsPass::ID = 0; - -#define INTERP_VALUES 16 -#define REQUIRED_VALUE_MAX_INDEX 7 - -struct InterpInfo { - bool Enabled; - unsigned Regs[3]; - unsigned RegCount; -}; - - -FunctionPass *llvm::createSIAssignInterpRegsPass(TargetMachine &tm) { - return new SIAssignInterpRegsPass(tm); -} - -bool SIAssignInterpRegsPass::runOnMachineFunction(MachineFunction &MF) { - - struct InterpInfo InterpUse[INTERP_VALUES] = { - {false, {AMDGPU::PERSP_SAMPLE_I, AMDGPU::PERSP_SAMPLE_J}, 2}, - {false, {AMDGPU::PERSP_CENTER_I, AMDGPU::PERSP_CENTER_J}, 2}, - {false, {AMDGPU::PERSP_CENTROID_I, AMDGPU::PERSP_CENTROID_J}, 2}, - {false, {AMDGPU::PERSP_I_W, AMDGPU::PERSP_J_W, AMDGPU::PERSP_1_W}, 3}, - {false, {AMDGPU::LINEAR_SAMPLE_I, AMDGPU::LINEAR_SAMPLE_J}, 2}, - {false, {AMDGPU::LINEAR_CENTER_I, AMDGPU::LINEAR_CENTER_J}, 2}, - {false, {AMDGPU::LINEAR_CENTROID_I, AMDGPU::LINEAR_CENTROID_J}, 2}, - {false, {AMDGPU::LINE_STIPPLE_TEX_COORD}, 1}, - {false, {AMDGPU::POS_X_FLOAT}, 1}, - {false, {AMDGPU::POS_Y_FLOAT}, 1}, - {false, {AMDGPU::POS_Z_FLOAT}, 1}, - {false, {AMDGPU::POS_W_FLOAT}, 1}, - {false, {AMDGPU::FRONT_FACE}, 1}, - {false, {AMDGPU::ANCILLARY}, 1}, - {false, {AMDGPU::SAMPLE_COVERAGE}, 1}, - {false, {AMDGPU::POS_FIXED_PT}, 1} - }; - - SIMachineFunctionInfo * MFI = MF.getInfo<SIMachineFunctionInfo>(); - // This pass is only needed for pixel shaders. - if (MFI->ShaderType != ShaderType::PIXEL) { - return false; - } - MachineRegisterInfo &MRI = MF.getRegInfo(); - bool ForceEnable = true; - - // First pass, mark the interpolation values that are used. - for (unsigned InterpIdx = 0; InterpIdx < INTERP_VALUES; InterpIdx++) { - for (unsigned RegIdx = 0; RegIdx < InterpUse[InterpIdx].RegCount; - RegIdx++) { - InterpUse[InterpIdx].Enabled = InterpUse[InterpIdx].Enabled || - !MRI.use_empty(InterpUse[InterpIdx].Regs[RegIdx]); - if (InterpUse[InterpIdx].Enabled && - InterpIdx <= REQUIRED_VALUE_MAX_INDEX) { - ForceEnable = false; - } - } - } - - // At least one interpolation mode must be enabled or else the GPU will hang. - if (ForceEnable) { - InterpUse[0].Enabled = true; - } - - unsigned UsedVgprs = 0; - - // Second pass, replace with VGPRs. - for (unsigned InterpIdx = 0; InterpIdx < INTERP_VALUES; InterpIdx++) { - if (!InterpUse[InterpIdx].Enabled) { - continue; - } - MFI->SPIPSInputAddr |= (1 << InterpIdx); - - for (unsigned RegIdx = 0; RegIdx < InterpUse[InterpIdx].RegCount; - RegIdx++, UsedVgprs++) { - unsigned NewReg = AMDGPU::VReg_32RegClass.getRegister(UsedVgprs); - unsigned VirtReg = MRI.createVirtualRegister(&AMDGPU::VReg_32RegClass); - MRI.replaceRegWith(InterpUse[InterpIdx].Regs[RegIdx], VirtReg); - addLiveIn(&MF, MRI, NewReg, VirtReg); - } - } - - return false; -} - -void SIAssignInterpRegsPass::addLiveIn(MachineFunction * MF, - MachineRegisterInfo & MRI, - unsigned physReg, unsigned virtReg) { - const TargetInstrInfo * TII = TM.getInstrInfo(); - if (!MRI.isLiveIn(physReg)) { - MRI.addLiveIn(physReg, virtReg); - MF->front().addLiveIn(physReg); - BuildMI(MF->front(), MF->front().begin(), DebugLoc(), - TII->get(TargetOpcode::COPY), virtReg) - .addReg(physReg); - } else { - MRI.replaceRegWith(virtReg, MRI.getLiveInVirtReg(physReg)); - } -} diff --git a/lib/Target/R600/SIISelLowering.cpp b/lib/Target/R600/SIISelLowering.cpp index 0a0fbd9..93f8c38 100644 --- a/lib/Target/R600/SIISelLowering.cpp +++ b/lib/Target/R600/SIISelLowering.cpp @@ -14,10 +14,13 @@ #include "SIISelLowering.h" #include "AMDIL.h" +#include "AMDGPU.h" #include "AMDILIntrinsicInfo.h" #include "SIInstrInfo.h" #include "SIMachineFunctionInfo.h" #include "SIRegisterInfo.h" +#include "llvm/IR/Function.h" +#include "llvm/CodeGen/CallingConvLower.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/SelectionDAG.h" @@ -28,30 +31,41 @@ SITargetLowering::SITargetLowering(TargetMachine &TM) : AMDGPUTargetLowering(TM), TII(static_cast<const SIInstrInfo*>(TM.getInstrInfo())), TRI(TM.getRegisterInfo()) { - addRegisterClass(MVT::v4f32, &AMDGPU::VReg_128RegClass); - addRegisterClass(MVT::f32, &AMDGPU::VReg_32RegClass); - addRegisterClass(MVT::i32, &AMDGPU::VReg_32RegClass); - addRegisterClass(MVT::i64, &AMDGPU::SReg_64RegClass); + addRegisterClass(MVT::i1, &AMDGPU::SReg_64RegClass); + addRegisterClass(MVT::i64, &AMDGPU::SReg_64RegClass); + + addRegisterClass(MVT::v16i8, &AMDGPU::SReg_128RegClass); + addRegisterClass(MVT::v32i8, &AMDGPU::SReg_256RegClass); + addRegisterClass(MVT::v64i8, &AMDGPU::SReg_512RegClass); + + addRegisterClass(MVT::i32, &AMDGPU::VReg_32RegClass); + addRegisterClass(MVT::f32, &AMDGPU::VReg_32RegClass); addRegisterClass(MVT::v1i32, &AMDGPU::VReg_32RegClass); + addRegisterClass(MVT::v2i32, &AMDGPU::VReg_64RegClass); + addRegisterClass(MVT::v2f32, &AMDGPU::VReg_64RegClass); + addRegisterClass(MVT::v4i32, &AMDGPU::VReg_128RegClass); + addRegisterClass(MVT::v4f32, &AMDGPU::VReg_128RegClass); + addRegisterClass(MVT::v8i32, &AMDGPU::VReg_256RegClass); + addRegisterClass(MVT::v8f32, &AMDGPU::VReg_256RegClass); + addRegisterClass(MVT::v16i32, &AMDGPU::VReg_512RegClass); + addRegisterClass(MVT::v16f32, &AMDGPU::VReg_512RegClass); computeRegisterProperties(); + setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v8i32, Expand); + setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v8f32, Expand); + setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v16i32, Expand); + setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v16f32, Expand); + setOperationAction(ISD::ADD, MVT::i64, Legal); setOperationAction(ISD::ADD, MVT::i32, Legal); - setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom); - - // We need to custom lower loads from the USER_SGPR address space, so we can - // add the SGPRs as livein registers. - setOperationAction(ISD::LOAD, MVT::i32, Custom); - setOperationAction(ISD::LOAD, MVT::i64, Custom); - setOperationAction(ISD::SELECT_CC, MVT::f32, Custom); setOperationAction(ISD::SELECT_CC, MVT::i32, Custom); @@ -59,6 +73,137 @@ SITargetLowering::SITargetLowering(TargetMachine &TM) : setTargetDAGCombine(ISD::SELECT_CC); setTargetDAGCombine(ISD::SETCC); + + setSchedulingPreference(Sched::Source); +} + +SDValue SITargetLowering::LowerFormalArguments( + SDValue Chain, + CallingConv::ID CallConv, + bool isVarArg, + const SmallVectorImpl<ISD::InputArg> &Ins, + DebugLoc DL, SelectionDAG &DAG, + SmallVectorImpl<SDValue> &InVals) const { + + const TargetRegisterInfo *TRI = getTargetMachine().getRegisterInfo(); + + MachineFunction &MF = DAG.getMachineFunction(); + FunctionType *FType = MF.getFunction()->getFunctionType(); + SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>(); + + assert(CallConv == CallingConv::C); + + SmallVector<ISD::InputArg, 16> Splits; + uint32_t Skipped = 0; + + for (unsigned i = 0, e = Ins.size(), PSInputNum = 0; i != e; ++i) { + const ISD::InputArg &Arg = Ins[i]; + + // First check if it's a PS input addr + if (Info->ShaderType == ShaderType::PIXEL && !Arg.Flags.isInReg()) { + + assert((PSInputNum <= 15) && "Too many PS inputs!"); + + if (!Arg.Used) { + // We can savely skip PS inputs + Skipped |= 1 << i; + ++PSInputNum; + continue; + } + + Info->PSInputAddr |= 1 << PSInputNum++; + } + + // Second split vertices into their elements + if (Arg.VT.isVector()) { + ISD::InputArg NewArg = Arg; + NewArg.Flags.setSplit(); + NewArg.VT = Arg.VT.getVectorElementType(); + + // We REALLY want the ORIGINAL number of vertex elements here, e.g. a + // three or five element vertex only needs three or five registers, + // NOT four or eigth. + Type *ParamType = FType->getParamType(Arg.OrigArgIndex); + unsigned NumElements = ParamType->getVectorNumElements(); + + for (unsigned j = 0; j != NumElements; ++j) { + Splits.push_back(NewArg); + NewArg.PartOffset += NewArg.VT.getStoreSize(); + } + + } else { + Splits.push_back(Arg); + } + } + + SmallVector<CCValAssign, 16> ArgLocs; + CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), + getTargetMachine(), ArgLocs, *DAG.getContext()); + + // At least one interpolation mode must be enabled or else the GPU will hang. + if (Info->ShaderType == ShaderType::PIXEL && (Info->PSInputAddr & 0x7F) == 0) { + Info->PSInputAddr |= 1; + CCInfo.AllocateReg(AMDGPU::VGPR0); + CCInfo.AllocateReg(AMDGPU::VGPR1); + } + + AnalyzeFormalArguments(CCInfo, Splits); + + for (unsigned i = 0, e = Ins.size(), ArgIdx = 0; i != e; ++i) { + + if (Skipped & (1 << i)) { + InVals.push_back(SDValue()); + continue; + } + + CCValAssign &VA = ArgLocs[ArgIdx++]; + assert(VA.isRegLoc() && "Parameter must be in a register!"); + + unsigned Reg = VA.getLocReg(); + MVT VT = VA.getLocVT(); + + if (VT == MVT::i64) { + // For now assume it is a pointer + Reg = TRI->getMatchingSuperReg(Reg, AMDGPU::sub0, + &AMDGPU::SReg_64RegClass); + Reg = MF.addLiveIn(Reg, &AMDGPU::SReg_64RegClass); + InVals.push_back(DAG.getCopyFromReg(Chain, DL, Reg, VT)); + continue; + } + + const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg, VT); + + Reg = MF.addLiveIn(Reg, RC); + SDValue Val = DAG.getCopyFromReg(Chain, DL, Reg, VT); + + const ISD::InputArg &Arg = Ins[i]; + if (Arg.VT.isVector()) { + + // Build a vector from the registers + Type *ParamType = FType->getParamType(Arg.OrigArgIndex); + unsigned NumElements = ParamType->getVectorNumElements(); + + SmallVector<SDValue, 4> Regs; + Regs.push_back(Val); + for (unsigned j = 1; j != NumElements; ++j) { + Reg = ArgLocs[ArgIdx++].getLocReg(); + Reg = MF.addLiveIn(Reg, RC); + Regs.push_back(DAG.getCopyFromReg(Chain, DL, Reg, VT)); + } + + // Fill up the missing vector elements + NumElements = Arg.VT.getVectorNumElements() - NumElements; + for (unsigned j = 0; j != NumElements; ++j) + Regs.push_back(DAG.getUNDEF(VT)); + + InVals.push_back(DAG.getNode(ISD::BUILD_VECTOR, DL, Arg.VT, + Regs.data(), Regs.size())); + continue; + } + + InVals.push_back(Val); + } + return Chain; } MachineBasicBlock * SITargetLowering::EmitInstrWithCustomInserter( @@ -70,15 +215,6 @@ MachineBasicBlock * SITargetLowering::EmitInstrWithCustomInserter( default: return AMDGPUTargetLowering::EmitInstrWithCustomInserter(MI, BB); case AMDGPU::BRANCH: return BB; - case AMDGPU::SHADER_TYPE: - BB->getParent()->getInfo<SIMachineFunctionInfo>()->ShaderType = - MI->getOperand(0).getImm(); - MI->eraseFromParent(); - break; - - case AMDGPU::SI_INTERP: - LowerSI_INTERP(MI, *BB, I, MRI); - break; case AMDGPU::SI_WQM: LowerSI_WQM(MI, *BB, I, MRI); break; @@ -94,41 +230,14 @@ void SITargetLowering::LowerSI_WQM(MachineInstr *MI, MachineBasicBlock &BB, MI->eraseFromParent(); } -void SITargetLowering::LowerSI_INTERP(MachineInstr *MI, MachineBasicBlock &BB, - MachineBasicBlock::iterator I, MachineRegisterInfo & MRI) const { - unsigned tmp = MRI.createVirtualRegister(&AMDGPU::VReg_32RegClass); - unsigned M0 = MRI.createVirtualRegister(&AMDGPU::M0RegRegClass); - MachineOperand dst = MI->getOperand(0); - MachineOperand iReg = MI->getOperand(1); - MachineOperand jReg = MI->getOperand(2); - MachineOperand attr_chan = MI->getOperand(3); - MachineOperand attr = MI->getOperand(4); - MachineOperand params = MI->getOperand(5); - - BuildMI(BB, I, BB.findDebugLoc(I), TII->get(AMDGPU::S_MOV_B32), M0) - .addOperand(params); - - BuildMI(BB, I, BB.findDebugLoc(I), TII->get(AMDGPU::V_INTERP_P1_F32), tmp) - .addOperand(iReg) - .addOperand(attr_chan) - .addOperand(attr) - .addReg(M0); - - BuildMI(BB, I, BB.findDebugLoc(I), TII->get(AMDGPU::V_INTERP_P2_F32)) - .addOperand(dst) - .addReg(tmp) - .addOperand(jReg) - .addOperand(attr_chan) - .addOperand(attr) - .addReg(M0); - - MI->eraseFromParent(); -} - EVT SITargetLowering::getSetCCResultType(EVT VT) const { return MVT::i1; } +MVT SITargetLowering::getScalarShiftAmountTy(EVT VT) const { + return MVT::i32; +} + //===----------------------------------------------------------------------===// // Custom DAG Lowering Operations //===----------------------------------------------------------------------===// @@ -137,20 +246,7 @@ SDValue SITargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { switch (Op.getOpcode()) { default: return AMDGPUTargetLowering::LowerOperation(Op, DAG); case ISD::BRCOND: return LowerBRCOND(Op, DAG); - case ISD::LOAD: return LowerLOAD(Op, DAG); case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG); - case ISD::INTRINSIC_WO_CHAIN: { - unsigned IntrinsicID = - cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); - EVT VT = Op.getValueType(); - switch (IntrinsicID) { - case AMDGPUIntrinsic::SI_vs_load_buffer_index: - return CreateLiveInRegister(DAG, &AMDGPU::VReg_32RegClass, - AMDGPU::VGPR0, VT); - default: return AMDGPUTargetLowering::LowerOperation(Op, DAG); - } - break; - } } return SDValue(); } @@ -249,47 +345,6 @@ SDValue SITargetLowering::LowerBRCOND(SDValue BRCOND, return Chain; } -SDValue SITargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const { - EVT VT = Op.getValueType(); - LoadSDNode *Ptr = dyn_cast<LoadSDNode>(Op); - - assert(Ptr); - - unsigned AddrSpace = Ptr->getPointerInfo().getAddrSpace(); - - // We only need to lower USER_SGPR address space loads - if (AddrSpace != AMDGPUAS::USER_SGPR_ADDRESS) { - return SDValue(); - } - - // Loads from the USER_SGPR address space can only have constant value - // pointers. - ConstantSDNode *BasePtr = dyn_cast<ConstantSDNode>(Ptr->getBasePtr()); - assert(BasePtr); - - unsigned TypeDwordWidth = VT.getSizeInBits() / 32; - const TargetRegisterClass * dstClass; - switch (TypeDwordWidth) { - default: - assert(!"USER_SGPR value size not implemented"); - return SDValue(); - case 1: - dstClass = &AMDGPU::SReg_32RegClass; - break; - case 2: - dstClass = &AMDGPU::SReg_64RegClass; - break; - } - uint64_t Index = BasePtr->getZExtValue(); - assert(Index % TypeDwordWidth == 0 && "USER_SGPR not properly aligned"); - unsigned SGPRIndex = Index / TypeDwordWidth; - unsigned Reg = dstClass->getRegister(SGPRIndex); - - DAG.ReplaceAllUsesOfValueWith(Op, CreateLiveInRegister(DAG, dstClass, Reg, - VT)); - return SDValue(); -} - SDValue SITargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const { SDValue LHS = Op.getOperand(0); SDValue RHS = Op.getOperand(1); diff --git a/lib/Target/R600/SIISelLowering.h b/lib/Target/R600/SIISelLowering.h index 737162f..d656225 100644 --- a/lib/Target/R600/SIISelLowering.h +++ b/lib/Target/R600/SIISelLowering.h @@ -24,14 +24,9 @@ class SITargetLowering : public AMDGPUTargetLowering { const SIInstrInfo * TII; const TargetRegisterInfo * TRI; - void LowerMOV_IMM(MachineInstr *MI, MachineBasicBlock &BB, - MachineBasicBlock::iterator I, unsigned Opocde) const; - void LowerSI_INTERP(MachineInstr *MI, MachineBasicBlock &BB, - MachineBasicBlock::iterator I, MachineRegisterInfo & MRI) const; void LowerSI_WQM(MachineInstr *MI, MachineBasicBlock &BB, MachineBasicBlock::iterator I, MachineRegisterInfo & MRI) const; - SDValue LowerLOAD(SDValue Op, SelectionDAG &DAG) const; SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const; SDValue LowerBRCOND(SDValue Op, SelectionDAG &DAG) const; @@ -43,9 +38,17 @@ class SITargetLowering : public AMDGPUTargetLowering { public: SITargetLowering(TargetMachine &tm); + + SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, + bool isVarArg, + const SmallVectorImpl<ISD::InputArg> &Ins, + DebugLoc DL, SelectionDAG &DAG, + SmallVectorImpl<SDValue> &InVals) const; + virtual MachineBasicBlock * EmitInstrWithCustomInserter(MachineInstr * MI, MachineBasicBlock * BB) const; virtual EVT getSetCCResultType(EVT VT) const; + virtual MVT getScalarShiftAmountTy(EVT VT) const; virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const; virtual SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const; virtual SDNode *PostISelFolding(MachineSDNode *N, SelectionDAG &DAG) const; diff --git a/lib/Target/R600/SIInsertWaits.cpp b/lib/Target/R600/SIInsertWaits.cpp index 24fc929..98bd3db 100644 --- a/lib/Target/R600/SIInsertWaits.cpp +++ b/lib/Target/R600/SIInsertWaits.cpp @@ -88,6 +88,9 @@ private: MachineBasicBlock::iterator I, const Counters &Counts); + /// \brief Do we need def2def checks? + bool unorderedDefines(MachineInstr &MI); + /// \brief Resolve all operand dependencies to counter requirements Counters handleOperands(MachineInstr &MI); @@ -125,7 +128,7 @@ Counters SIInsertWaits::getHwCounts(MachineInstr &MI) { // Only consider stores or EXP for EXP_CNT Result.Named.EXP = !!(TSFlags & SIInstrFlags::EXP_CNT && - (MI.getOpcode() == AMDGPU::EXP || !MI.getDesc().mayStore())); + (MI.getOpcode() == AMDGPU::EXP || MI.getDesc().mayStore())); // LGKM may uses larger values if (TSFlags & SIInstrFlags::LGKM_CNT) { @@ -311,8 +314,10 @@ Counters SIInsertWaits::handleOperands(MachineInstr &MI) { RegInterval Interval = getRegInterval(Op); for (unsigned j = Interval.first; j < Interval.second; ++j) { - if (Op.isDef()) + if (Op.isDef()) { increaseCounters(Result, UsedRegs[j]); + increaseCounters(Result, DefinedRegs[j]); + } if (Op.isUse()) increaseCounters(Result, DefinedRegs[j]); diff --git a/lib/Target/R600/SIInstrFormats.td b/lib/Target/R600/SIInstrFormats.td index fe417d6..3891ddb 100644 --- a/lib/Target/R600/SIInstrFormats.td +++ b/lib/Target/R600/SIInstrFormats.td @@ -129,12 +129,12 @@ class SMRD <bits<5> op, bits<1> imm, dag outs, dag ins, string asm, list<dag> pattern> : Enc32<outs, ins, asm, pattern> { bits<7> SDST; - bits<6> SBASE; + bits<7> SBASE; bits<8> OFFSET; let Inst{7-0} = OFFSET; let Inst{8} = imm; - let Inst{14-9} = SBASE; + let Inst{14-9} = SBASE{6-1}; let Inst{21-15} = SDST; let Inst{26-22} = op; let Inst{31-27} = 0x18; //encoding @@ -292,7 +292,7 @@ class MUBUF <bits<7> op, dag outs, dag ins, string asm, list<dag> pattern> : bits<1> ADDR64; bits<1> LDS; bits<8> VADDR; - bits<5> SRSRC; + bits<7> SRSRC; bits<1> SLC; bits<1> TFE; bits<8> SOFFSET; @@ -307,7 +307,7 @@ class MUBUF <bits<7> op, dag outs, dag ins, string asm, list<dag> pattern> : let Inst{31-26} = 0x38; //encoding let Inst{39-32} = VADDR; let Inst{47-40} = VDATA; - let Inst{52-48} = SRSRC; + let Inst{52-48} = SRSRC{6-2}; let Inst{54} = SLC; let Inst{55} = TFE; let Inst{63-56} = SOFFSET; @@ -330,7 +330,7 @@ class MTBUF <bits<3> op, dag outs, dag ins, string asm, list<dag> pattern> : bits<4> DFMT; bits<3> NFMT; bits<8> VADDR; - bits<5> SRSRC; + bits<7> SRSRC; bits<1> SLC; bits<1> TFE; bits<8> SOFFSET; @@ -346,7 +346,7 @@ class MTBUF <bits<3> op, dag outs, dag ins, string asm, list<dag> pattern> : let Inst{31-26} = 0x3a; //encoding let Inst{39-32} = VADDR; let Inst{47-40} = VDATA; - let Inst{52-48} = SRSRC; + let Inst{52-48} = SRSRC{6-2}; let Inst{54} = SLC; let Inst{55} = TFE; let Inst{63-56} = SOFFSET; @@ -370,8 +370,8 @@ class MIMG <bits<7> op, dag outs, dag ins, string asm, list<dag> pattern> : bits<1> LWE; bits<1> SLC; bits<8> VADDR; - bits<5> SRSRC; - bits<5> SSAMP; + bits<7> SRSRC; + bits<7> SSAMP; let Inst{11-8} = DMASK; let Inst{12} = UNORM; @@ -385,8 +385,8 @@ class MIMG <bits<7> op, dag outs, dag ins, string asm, list<dag> pattern> : let Inst{31-26} = 0x3c; let Inst{39-32} = VADDR; let Inst{47-40} = VDATA; - let Inst{52-48} = SRSRC; - let Inst{57-53} = SSAMP; + let Inst{52-48} = SRSRC{6-2}; + let Inst{57-53} = SSAMP{6-2}; let VM_CNT = 1; let EXP_CNT = 1; diff --git a/lib/Target/R600/SIInstrInfo.cpp b/lib/Target/R600/SIInstrInfo.cpp index d9dbd6a..de2373b 100644 --- a/lib/Target/R600/SIInstrInfo.cpp +++ b/lib/Target/R600/SIInstrInfo.cpp @@ -33,36 +33,108 @@ const SIRegisterInfo &SIInstrInfo::getRegisterInfo() const { void SIInstrInfo::copyPhysReg(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MI, DebugLoc DL, - unsigned DestReg, unsigned SrcReg, - bool KillSrc) const { + MachineBasicBlock::iterator MI, DebugLoc DL, + unsigned DestReg, unsigned SrcReg, + bool KillSrc) const { + // If we are trying to copy to or from SCC, there is a bug somewhere else in // the backend. While it may be theoretically possible to do this, it should // never be necessary. assert(DestReg != AMDGPU::SCC && SrcReg != AMDGPU::SCC); - if (AMDGPU::VReg_64RegClass.contains(DestReg)) { - assert(AMDGPU::VReg_64RegClass.contains(SrcReg) || - AMDGPU::SReg_64RegClass.contains(SrcReg)); - BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_e32), RI.getSubReg(DestReg, AMDGPU::sub0)) - .addReg(RI.getSubReg(SrcReg, AMDGPU::sub0), getKillRegState(KillSrc)) - .addReg(DestReg, RegState::Define | RegState::Implicit); - BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_e32), RI.getSubReg(DestReg, AMDGPU::sub1)) - .addReg(RI.getSubReg(SrcReg, AMDGPU::sub1), getKillRegState(KillSrc)); + const int16_t Sub0_15[] = { + AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, AMDGPU::sub3, + AMDGPU::sub4, AMDGPU::sub5, AMDGPU::sub6, AMDGPU::sub7, + AMDGPU::sub8, AMDGPU::sub9, AMDGPU::sub10, AMDGPU::sub11, + AMDGPU::sub12, AMDGPU::sub13, AMDGPU::sub14, AMDGPU::sub15, 0 + }; + + const int16_t Sub0_7[] = { + AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, AMDGPU::sub3, + AMDGPU::sub4, AMDGPU::sub5, AMDGPU::sub6, AMDGPU::sub7, 0 + }; + + const int16_t Sub0_3[] = { + AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, AMDGPU::sub3, 0 + }; + + const int16_t Sub0_1[] = { + AMDGPU::sub0, AMDGPU::sub1, 0 + }; + + unsigned Opcode; + const int16_t *SubIndices; + + if (AMDGPU::SReg_32RegClass.contains(DestReg)) { + assert(AMDGPU::SReg_32RegClass.contains(SrcReg)); + BuildMI(MBB, MI, DL, get(AMDGPU::S_MOV_B32), DestReg) + .addReg(SrcReg, getKillRegState(KillSrc)); + return; + } else if (AMDGPU::SReg_64RegClass.contains(DestReg)) { assert(AMDGPU::SReg_64RegClass.contains(SrcReg)); BuildMI(MBB, MI, DL, get(AMDGPU::S_MOV_B64), DestReg) .addReg(SrcReg, getKillRegState(KillSrc)); + return; + + } else if (AMDGPU::SReg_128RegClass.contains(DestReg)) { + assert(AMDGPU::SReg_128RegClass.contains(SrcReg)); + Opcode = AMDGPU::S_MOV_B32; + SubIndices = Sub0_3; + + } else if (AMDGPU::SReg_256RegClass.contains(DestReg)) { + assert(AMDGPU::SReg_256RegClass.contains(SrcReg)); + Opcode = AMDGPU::S_MOV_B32; + SubIndices = Sub0_7; + + } else if (AMDGPU::SReg_512RegClass.contains(DestReg)) { + assert(AMDGPU::SReg_512RegClass.contains(SrcReg)); + Opcode = AMDGPU::S_MOV_B32; + SubIndices = Sub0_15; + } else if (AMDGPU::VReg_32RegClass.contains(DestReg)) { assert(AMDGPU::VReg_32RegClass.contains(SrcReg) || - AMDGPU::SReg_32RegClass.contains(SrcReg)); + AMDGPU::SReg_32RegClass.contains(SrcReg)); BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_e32), DestReg) .addReg(SrcReg, getKillRegState(KillSrc)); + return; + + } else if (AMDGPU::VReg_64RegClass.contains(DestReg)) { + assert(AMDGPU::VReg_64RegClass.contains(SrcReg) || + AMDGPU::SReg_64RegClass.contains(SrcReg)); + Opcode = AMDGPU::V_MOV_B32_e32; + SubIndices = Sub0_1; + + } else if (AMDGPU::VReg_128RegClass.contains(DestReg)) { + assert(AMDGPU::VReg_128RegClass.contains(SrcReg) || + AMDGPU::SReg_128RegClass.contains(SrcReg)); + Opcode = AMDGPU::V_MOV_B32_e32; + SubIndices = Sub0_3; + + } else if (AMDGPU::VReg_256RegClass.contains(DestReg)) { + assert(AMDGPU::VReg_256RegClass.contains(SrcReg) || + AMDGPU::SReg_256RegClass.contains(SrcReg)); + Opcode = AMDGPU::V_MOV_B32_e32; + SubIndices = Sub0_7; + + } else if (AMDGPU::VReg_512RegClass.contains(DestReg)) { + assert(AMDGPU::VReg_512RegClass.contains(SrcReg) || + AMDGPU::SReg_512RegClass.contains(SrcReg)); + Opcode = AMDGPU::V_MOV_B32_e32; + SubIndices = Sub0_15; + } else { - assert(AMDGPU::SReg_32RegClass.contains(DestReg)); - assert(AMDGPU::SReg_32RegClass.contains(SrcReg)); - BuildMI(MBB, MI, DL, get(AMDGPU::S_MOV_B32), DestReg) - .addReg(SrcReg, getKillRegState(KillSrc)); + llvm_unreachable("Can't copy register!"); + } + + while (unsigned SubIdx = *SubIndices++) { + MachineInstrBuilder Builder = BuildMI(MBB, MI, DL, + get(Opcode), RI.getSubReg(DestReg, SubIdx)); + + Builder.addReg(RI.getSubReg(SrcReg, SubIdx), getKillRegState(KillSrc)); + + if (*SubIndices) + Builder.addReg(DestReg, RegState::Define | RegState::Implicit); } } diff --git a/lib/Target/R600/SIInstrInfo.td b/lib/Target/R600/SIInstrInfo.td index d6c3f06..2f10c38 100644 --- a/lib/Target/R600/SIInstrInfo.td +++ b/lib/Target/R600/SIInstrInfo.td @@ -53,16 +53,6 @@ def SIOperand { int VCC = 0x6A; } -class GPR4Align <RegisterClass rc> : Operand <vAny> { - let EncoderMethod = "GPR4AlignEncode"; - let MIOperandInfo = (ops rc:$reg); -} - -class GPR2Align <RegisterClass rc> : Operand <iPTR> { - let EncoderMethod = "GPR2AlignEncode"; - let MIOperandInfo = (ops rc:$reg); -} - include "SIInstrFormats.td" //===----------------------------------------------------------------------===// @@ -125,16 +115,17 @@ class SOPK_64 <bits<5> op, string opName, list<dag> pattern> : SOPK < opName#" $dst, $src0", pattern >; -multiclass SMRD_Helper <bits<5> op, string asm, RegisterClass dstClass> { +multiclass SMRD_Helper <bits<5> op, string asm, RegisterClass baseClass, + RegisterClass dstClass> { def _IMM : SMRD < op, 1, (outs dstClass:$dst), - (ins GPR2Align<SReg_64>:$sbase, i32imm:$offset), + (ins baseClass:$sbase, i32imm:$offset), asm#" $dst, $sbase, $offset", [] >; def _SGPR : SMRD < op, 0, (outs dstClass:$dst), - (ins GPR2Align<SReg_64>:$sbase, SReg_32:$soff), + (ins baseClass:$sbase, SReg_32:$soff), asm#" $dst, $sbase, $soff", [] >; } @@ -276,7 +267,7 @@ class MTBUF_Store_Helper <bits<3> op, string asm, RegisterClass regClass> : MTBU (outs), (ins regClass:$vdata, i16imm:$offset, i1imm:$offen, i1imm:$idxen, i1imm:$glc, i1imm:$addr64, i8imm:$dfmt, i8imm:$nfmt, VReg_32:$vaddr, - GPR4Align<SReg_128>:$srsrc, i1imm:$slc, i1imm:$tfe, SSrc_32:$soffset), + SReg_128:$srsrc, i1imm:$slc, i1imm:$tfe, SSrc_32:$soffset), asm#" $vdata, $offset, $offen, $idxen, $glc, $addr64, $dfmt," #" $nfmt, $vaddr, $srsrc, $slc, $tfe, $soffset", []> { @@ -288,7 +279,7 @@ class MUBUF_Load_Helper <bits<7> op, string asm, RegisterClass regClass> : MUBUF op, (outs regClass:$dst), (ins i16imm:$offset, i1imm:$offen, i1imm:$idxen, i1imm:$glc, i1imm:$addr64, - i1imm:$lds, VReg_32:$vaddr, GPR4Align<SReg_128>:$srsrc, i1imm:$slc, + i1imm:$lds, VReg_32:$vaddr, SReg_128:$srsrc, i1imm:$slc, i1imm:$tfe, SSrc_32:$soffset), asm#" $dst, $offset, $offen, $idxen, $glc, $addr64, " #"$lds, $vaddr, $srsrc, $slc, $tfe, $soffset", @@ -301,7 +292,7 @@ class MTBUF_Load_Helper <bits<3> op, string asm, RegisterClass regClass> : MTBUF op, (outs regClass:$dst), (ins i16imm:$offset, i1imm:$offen, i1imm:$idxen, i1imm:$glc, i1imm:$addr64, - i8imm:$dfmt, i8imm:$nfmt, VReg_32:$vaddr, GPR4Align<SReg_128>:$srsrc, + i8imm:$dfmt, i8imm:$nfmt, VReg_32:$vaddr, SReg_128:$srsrc, i1imm:$slc, i1imm:$tfe, SSrc_32:$soffset), asm#" $dst, $offset, $offen, $idxen, $glc, $addr64, $dfmt," #" $nfmt, $vaddr, $srsrc, $slc, $tfe, $soffset", @@ -315,7 +306,7 @@ class MIMG_Load_Helper <bits<7> op, string asm> : MIMG < (outs VReg_128:$vdata), (ins i32imm:$dmask, i1imm:$unorm, i1imm:$glc, i1imm:$da, i1imm:$r128, i1imm:$tfe, i1imm:$lwe, i1imm:$slc, VReg_32:$vaddr, - GPR4Align<SReg_256>:$srsrc, GPR4Align<SReg_128>:$ssamp), + SReg_256:$srsrc, SReg_128:$ssamp), asm#" $vdata, $dmask, $unorm, $glc, $da, $r128," #" $tfe, $lwe, $slc, $vaddr, $srsrc, $ssamp", []> { diff --git a/lib/Target/R600/SIInstructions.td b/lib/Target/R600/SIInstructions.td index af116f0..05b04a9 100644 --- a/lib/Target/R600/SIInstructions.td +++ b/lib/Target/R600/SIInstructions.td @@ -403,9 +403,9 @@ def BUFFER_LOAD_FORMAT_XYZW : MUBUF_Load_Helper <0x00000003, "BUFFER_LOAD_FORMAT //def BUFFER_LOAD_SBYTE : MUBUF_ <0x00000009, "BUFFER_LOAD_SBYTE", []>; //def BUFFER_LOAD_USHORT : MUBUF_ <0x0000000a, "BUFFER_LOAD_USHORT", []>; //def BUFFER_LOAD_SSHORT : MUBUF_ <0x0000000b, "BUFFER_LOAD_SSHORT", []>; -//def BUFFER_LOAD_DWORD : MUBUF_ <0x0000000c, "BUFFER_LOAD_DWORD", []>; -//def BUFFER_LOAD_DWORDX2 : MUBUF_DWORDX2 <0x0000000d, "BUFFER_LOAD_DWORDX2", []>; -//def BUFFER_LOAD_DWORDX4 : MUBUF_DWORDX4 <0x0000000e, "BUFFER_LOAD_DWORDX4", []>; +def BUFFER_LOAD_DWORD : MUBUF_Load_Helper <0x0000000c, "BUFFER_LOAD_DWORD", VReg_32>; +def BUFFER_LOAD_DWORDX2 : MUBUF_Load_Helper <0x0000000d, "BUFFER_LOAD_DWORDX2", VReg_64>; +def BUFFER_LOAD_DWORDX4 : MUBUF_Load_Helper <0x0000000e, "BUFFER_LOAD_DWORDX4", VReg_128>; //def BUFFER_STORE_BYTE : MUBUF_ <0x00000018, "BUFFER_STORE_BYTE", []>; //def BUFFER_STORE_SHORT : MUBUF_ <0x0000001a, "BUFFER_STORE_SHORT", []>; //def BUFFER_STORE_DWORD : MUBUF_ <0x0000001c, "BUFFER_STORE_DWORD", []>; @@ -458,17 +458,31 @@ def TBUFFER_LOAD_FORMAT_XYZW : MTBUF_Load_Helper <0x00000003, "TBUFFER_LOAD_FORM let mayLoad = 1 in { -defm S_LOAD_DWORD : SMRD_Helper <0x00000000, "S_LOAD_DWORD", SReg_32>; +defm S_LOAD_DWORD : SMRD_Helper <0x00, "S_LOAD_DWORD", SReg_64, SReg_32>; +defm S_LOAD_DWORDX2 : SMRD_Helper <0x01, "S_LOAD_DWORDX2", SReg_64, SReg_64>; +defm S_LOAD_DWORDX4 : SMRD_Helper <0x02, "S_LOAD_DWORDX4", SReg_64, SReg_128>; +defm S_LOAD_DWORDX8 : SMRD_Helper <0x03, "S_LOAD_DWORDX8", SReg_64, SReg_256>; +defm S_LOAD_DWORDX16 : SMRD_Helper <0x04, "S_LOAD_DWORDX16", SReg_64, SReg_512>; -//def S_LOAD_DWORDX2 : SMRD_DWORDX2 <0x00000001, "S_LOAD_DWORDX2", []>; -defm S_LOAD_DWORDX4 : SMRD_Helper <0x00000002, "S_LOAD_DWORDX4", SReg_128>; -defm S_LOAD_DWORDX8 : SMRD_Helper <0x00000003, "S_LOAD_DWORDX8", SReg_256>; -//def S_LOAD_DWORDX16 : SMRD_DWORDX16 <0x00000004, "S_LOAD_DWORDX16", []>; -//def S_BUFFER_LOAD_DWORD : SMRD_ <0x00000008, "S_BUFFER_LOAD_DWORD", []>; -//def S_BUFFER_LOAD_DWORDX2 : SMRD_DWORDX2 <0x00000009, "S_BUFFER_LOAD_DWORDX2", []>; -//def S_BUFFER_LOAD_DWORDX4 : SMRD_DWORDX4 <0x0000000a, "S_BUFFER_LOAD_DWORDX4", []>; -//def S_BUFFER_LOAD_DWORDX8 : SMRD_DWORDX8 <0x0000000b, "S_BUFFER_LOAD_DWORDX8", []>; -//def S_BUFFER_LOAD_DWORDX16 : SMRD_DWORDX16 <0x0000000c, "S_BUFFER_LOAD_DWORDX16", []>; +defm S_BUFFER_LOAD_DWORD : SMRD_Helper < + 0x08, "S_BUFFER_LOAD_DWORD", SReg_128, SReg_32 +>; + +defm S_BUFFER_LOAD_DWORDX2 : SMRD_Helper < + 0x09, "S_BUFFER_LOAD_DWORDX2", SReg_128, SReg_64 +>; + +defm S_BUFFER_LOAD_DWORDX4 : SMRD_Helper < + 0x0a, "S_BUFFER_LOAD_DWORDX4", SReg_128, SReg_128 +>; + +defm S_BUFFER_LOAD_DWORDX8 : SMRD_Helper < + 0x0b, "S_BUFFER_LOAD_DWORDX8", SReg_128, SReg_256 +>; + +defm S_BUFFER_LOAD_DWORDX16 : SMRD_Helper < + 0x0c, "S_BUFFER_LOAD_DWORDX16", SReg_128, SReg_512 +>; } // mayLoad = 1 @@ -840,7 +854,9 @@ defm V_LSHR_B32 : VOP2_32 <0x00000015, "V_LSHR_B32", []>; defm V_LSHRREV_B32 : VOP2_32 <0x00000016, "V_LSHRREV_B32", []>; defm V_ASHR_I32 : VOP2_32 <0x00000017, "V_ASHR_I32", []>; defm V_ASHRREV_I32 : VOP2_32 <0x00000018, "V_ASHRREV_I32", []>; -defm V_LSHL_B32 : VOP2_32 <0x00000019, "V_LSHL_B32", []>; +defm V_LSHL_B32 : VOP2_32 <0x00000019, "V_LSHL_B32", + [(set VReg_32:$dst, (shl VSrc_32:$src0, (i32 VReg_32:$src1)))] +>; defm V_LSHLREV_B32 : VOP2_32 <0x0000001a, "V_LSHLREV_B32", []>; let isCommutable = 1 in { @@ -1044,13 +1060,6 @@ def S_ABSDIFF_I32 : SOP2_32 <0x0000002c, "S_ABSDIFF_I32", []>; let isCodeGenOnly = 1, isPseudo = 1 in { -def SET_M0 : InstSI < - (outs SReg_32:$dst), - (ins i32imm:$src0), - "SET_M0 $dst, $src0", - [(set SReg_32:$dst, (int_SI_set_M0 imm:$src0))] ->; - def LOAD_CONST : AMDGPUShaderInst < (outs GPRF32:$dst), (ins i32imm:$src), @@ -1060,13 +1069,6 @@ def LOAD_CONST : AMDGPUShaderInst < let usesCustomInserter = 1 in { -def SI_INTERP : InstSI < - (outs VReg_32:$dst), - (ins VReg_32:$i, VReg_32:$j, i32imm:$attr_chan, i32imm:$attr, SReg_32:$params), - "SI_INTERP $dst, $i, $j, $attr_chan, $attr, $params", - [] ->; - def SI_WQM : InstSI < (outs), (ins), @@ -1147,6 +1149,31 @@ def SI_KILL : InstSI < } // end mayLoad = 1, mayStore = 1, hasSideEffects = 1 // Uses = [EXEC], Defs = [EXEC] +let Uses = [EXEC], Defs = [EXEC,VCC,M0] in { + +def SI_INDIRECT_SRC : InstSI < + (outs VReg_32:$dst, SReg_64:$temp), + (ins unknown:$src, VSrc_32:$idx, i32imm:$off), + "SI_INDIRECT_SRC $dst, $temp, $src, $idx, $off", + [] +>; + +class SI_INDIRECT_DST<RegisterClass rc> : InstSI < + (outs rc:$dst, SReg_64:$temp), + (ins unknown:$src, VSrc_32:$idx, i32imm:$off, VReg_32:$val), + "SI_INDIRECT_DST $dst, $temp, $src, $idx, $off, $val", + [] +> { + let Constraints = "$src = $dst"; +} + +def SI_INDIRECT_DST_V2 : SI_INDIRECT_DST<VReg_64>; +def SI_INDIRECT_DST_V4 : SI_INDIRECT_DST<VReg_128>; +def SI_INDIRECT_DST_V8 : SI_INDIRECT_DST<VReg_256>; +def SI_INDIRECT_DST_V16 : SI_INDIRECT_DST<VReg_512>; + +} // Uses = [EXEC,VCC,M0], Defs = [EXEC,VCC,M0] + } // end IsCodeGenOnly, isPseudo def : Pat< @@ -1255,22 +1282,83 @@ defm : SamplePatterns<VReg_128, v4i32>; defm : SamplePatterns<VReg_256, v8i32>; defm : SamplePatterns<VReg_512, v16i32>; -def : Extract_Element <f32, v4f32, VReg_128, 0, sub0>; -def : Extract_Element <f32, v4f32, VReg_128, 1, sub1>; -def : Extract_Element <f32, v4f32, VReg_128, 2, sub2>; -def : Extract_Element <f32, v4f32, VReg_128, 3, sub3>; +/********** ============================================ **********/ +/********** Extraction, Insertion, Building and Casting **********/ +/********** ============================================ **********/ + +foreach Index = 0-2 in { + def Extract_Element_v2i32_#Index : Extract_Element < + i32, v2i32, VReg_64, Index, !cast<SubRegIndex>(sub#Index) + >; + def Insert_Element_v2i32_#Index : Insert_Element < + i32, v2i32, VReg_32, VReg_64, Index, !cast<SubRegIndex>(sub#Index) + >; + + def Extract_Element_v2f32_#Index : Extract_Element < + f32, v2f32, VReg_64, Index, !cast<SubRegIndex>(sub#Index) + >; + def Insert_Element_v2f32_#Index : Insert_Element < + f32, v2f32, VReg_32, VReg_64, Index, !cast<SubRegIndex>(sub#Index) + >; +} + +foreach Index = 0-3 in { + def Extract_Element_v4i32_#Index : Extract_Element < + i32, v4i32, VReg_128, Index, !cast<SubRegIndex>(sub#Index) + >; + def Insert_Element_v4i32_#Index : Insert_Element < + i32, v4i32, VReg_32, VReg_128, Index, !cast<SubRegIndex>(sub#Index) + >; -def : Insert_Element <f32, v4f32, VReg_32, VReg_128, 4, sub0>; -def : Insert_Element <f32, v4f32, VReg_32, VReg_128, 5, sub1>; -def : Insert_Element <f32, v4f32, VReg_32, VReg_128, 6, sub2>; -def : Insert_Element <f32, v4f32, VReg_32, VReg_128, 7, sub3>; + def Extract_Element_v4f32_#Index : Extract_Element < + f32, v4f32, VReg_128, Index, !cast<SubRegIndex>(sub#Index) + >; + def Insert_Element_v4f32_#Index : Insert_Element < + f32, v4f32, VReg_32, VReg_128, Index, !cast<SubRegIndex>(sub#Index) + >; +} + +foreach Index = 0-7 in { + def Extract_Element_v8i32_#Index : Extract_Element < + i32, v8i32, VReg_256, Index, !cast<SubRegIndex>(sub#Index) + >; + def Insert_Element_v8i32_#Index : Insert_Element < + i32, v8i32, VReg_32, VReg_256, Index, !cast<SubRegIndex>(sub#Index) + >; + + def Extract_Element_v8f32_#Index : Extract_Element < + f32, v8f32, VReg_256, Index, !cast<SubRegIndex>(sub#Index) + >; + def Insert_Element_v8f32_#Index : Insert_Element < + f32, v8f32, VReg_32, VReg_256, Index, !cast<SubRegIndex>(sub#Index) + >; +} + +foreach Index = 0-15 in { + def Extract_Element_v16i32_#Index : Extract_Element < + i32, v16i32, VReg_512, Index, !cast<SubRegIndex>(sub#Index) + >; + def Insert_Element_v16i32_#Index : Insert_Element < + i32, v16i32, VReg_32, VReg_512, Index, !cast<SubRegIndex>(sub#Index) + >; + + def Extract_Element_v16f32_#Index : Extract_Element < + f32, v16f32, VReg_512, Index, !cast<SubRegIndex>(sub#Index) + >; + def Insert_Element_v16f32_#Index : Insert_Element < + f32, v16f32, VReg_32, VReg_512, Index, !cast<SubRegIndex>(sub#Index) + >; +} def : Vector1_Build <v1i32, VReg_32, i32, VReg_32>; def : Vector2_Build <v2i32, VReg_64, i32, VReg_32>; -def : Vector_Build <v4f32, VReg_128, f32, VReg_32>; -def : Vector_Build <v4i32, VReg_128, i32, VReg_32>; +def : Vector2_Build <v2f32, VReg_64, f32, VReg_32>; +def : Vector4_Build <v4i32, VReg_128, i32, VReg_32>; +def : Vector4_Build <v4f32, VReg_128, f32, VReg_32>; def : Vector8_Build <v8i32, VReg_256, i32, VReg_32>; +def : Vector8_Build <v8f32, VReg_256, f32, VReg_32>; def : Vector16_Build <v16i32, VReg_512, i32, VReg_32>; +def : Vector16_Build <v16f32, VReg_512, f32, VReg_32>; def : BitConvert <i32, f32, SReg_32>; def : BitConvert <i32, f32, VReg_32>; @@ -1305,11 +1393,6 @@ def : Pat < /********** ================== **********/ def : Pat < - (i1 imm:$imm), - (S_MOV_B64 imm:$imm) ->; - -def : Pat < (i32 imm:$imm), (V_MOV_B32_e32 imm:$imm) >; @@ -1320,13 +1403,8 @@ def : Pat < >; def : Pat < - (i32 imm:$imm), - (S_MOV_B32 imm:$imm) ->; - -def : Pat < - (f32 fpimm:$imm), - (S_MOV_B32 fpimm:$imm) + (i1 imm:$imm), + (S_MOV_B64 imm:$imm) >; def : Pat < @@ -1347,58 +1425,16 @@ def : Pat < /********** ===================== **********/ def : Pat < - (int_SI_fs_interp_constant imm:$attr_chan, imm:$attr, SReg_32:$params), - (V_INTERP_MOV_F32 INTERP.P0, imm:$attr_chan, imm:$attr, - (S_MOV_B32 SReg_32:$params)) ->; - -def : Pat < - (int_SI_fs_interp_linear_center imm:$attr_chan, imm:$attr, SReg_32:$params), - (SI_INTERP (f32 LINEAR_CENTER_I), (f32 LINEAR_CENTER_J), imm:$attr_chan, - imm:$attr, SReg_32:$params) + (int_SI_fs_constant imm:$attr_chan, imm:$attr, M0Reg:$params), + (V_INTERP_MOV_F32 INTERP.P0, imm:$attr_chan, imm:$attr, M0Reg:$params) >; def : Pat < - (int_SI_fs_interp_linear_centroid imm:$attr_chan, imm:$attr, SReg_32:$params), - (SI_INTERP (f32 LINEAR_CENTROID_I), (f32 LINEAR_CENTROID_J), imm:$attr_chan, - imm:$attr, SReg_32:$params) ->; - -def : Pat < - (int_SI_fs_interp_persp_center imm:$attr_chan, imm:$attr, SReg_32:$params), - (SI_INTERP (f32 PERSP_CENTER_I), (f32 PERSP_CENTER_J), imm:$attr_chan, - imm:$attr, SReg_32:$params) ->; - -def : Pat < - (int_SI_fs_interp_persp_centroid imm:$attr_chan, imm:$attr, SReg_32:$params), - (SI_INTERP (f32 PERSP_CENTROID_I), (f32 PERSP_CENTROID_J), imm:$attr_chan, - imm:$attr, SReg_32:$params) ->; - -def : Pat < - (int_SI_fs_read_face), - (f32 FRONT_FACE) ->; - -def : Pat < - (int_SI_fs_read_pos 0), - (f32 POS_X_FLOAT) ->; - -def : Pat < - (int_SI_fs_read_pos 1), - (f32 POS_Y_FLOAT) ->; - -def : Pat < - (int_SI_fs_read_pos 2), - (f32 POS_Z_FLOAT) ->; - -def : Pat < - (int_SI_fs_read_pos 3), - (f32 POS_W_FLOAT) + (int_SI_fs_interp imm:$attr_chan, imm:$attr, M0Reg:$params, VReg_64:$ij), + (V_INTERP_P2_F32 (V_INTERP_P1_F32 (EXTRACT_SUBREG VReg_64:$ij, sub0), + imm:$attr_chan, imm:$attr, M0Reg:$params), + (EXTRACT_SUBREG VReg_64:$ij, sub1), + imm:$attr_chan, imm:$attr, M0Reg:$params) >; /********** ================== **********/ @@ -1455,6 +1491,24 @@ def : Pat < (V_CNDMASK_B32_e64 (i32 0), (i32 -1), SReg_64:$src0) >; +// 1. Offset as 8bit DWORD immediate +def : Pat < + (int_SI_load_const SReg_128:$sbase, IMM8bitDWORD:$offset), + (S_BUFFER_LOAD_DWORD_IMM SReg_128:$sbase, IMM8bitDWORD:$offset) +>; + +// 2. Offset loaded in an 32bit SGPR +def : Pat < + (int_SI_load_const SReg_128:$sbase, imm:$offset), + (S_BUFFER_LOAD_DWORD_SGPR SReg_128:$sbase, (S_MOV_B32 imm:$offset)) +>; + +// 3. Offset in an 32Bit VGPR +def : Pat < + (int_SI_load_const SReg_128:$sbase, VReg_32:$voff), + (BUFFER_LOAD_DWORD 0, 1, 0, 0, 0, 0, VReg_32:$voff, SReg_128:$sbase, 0, 0, 0) +>; + /********** ================== **********/ /********** VOP3 Patterns **********/ /********** ================== **********/ @@ -1489,7 +1543,51 @@ multiclass SMRD_Pattern <SMRD Instr_IMM, SMRD Instr_SGPR, ValueType vt> { defm : SMRD_Pattern <S_LOAD_DWORD_IMM, S_LOAD_DWORD_SGPR, f32>; defm : SMRD_Pattern <S_LOAD_DWORD_IMM, S_LOAD_DWORD_SGPR, i32>; -defm : SMRD_Pattern <S_LOAD_DWORDX4_IMM, S_LOAD_DWORDX4_SGPR, v4i32>; -defm : SMRD_Pattern <S_LOAD_DWORDX8_IMM, S_LOAD_DWORDX8_SGPR, v8i32>; +defm : SMRD_Pattern <S_LOAD_DWORDX4_IMM, S_LOAD_DWORDX4_SGPR, v16i8>; +defm : SMRD_Pattern <S_LOAD_DWORDX8_IMM, S_LOAD_DWORDX8_SGPR, v32i8>; + +/********** ====================== **********/ +/********** Indirect adressing **********/ +/********** ====================== **********/ + +multiclass SI_INDIRECT_Pattern <RegisterClass rc, ValueType vt, + SI_INDIRECT_DST IndDst> { + // 1. Extract with offset + def : Pat< + (vector_extract (vt rc:$vec), + (i64 (zext (i32 (add VReg_32:$idx, imm:$off)))) + ), + (f32 (SI_INDIRECT_SRC (IMPLICIT_DEF), rc:$vec, VReg_32:$idx, imm:$off)) + >; + + // 2. Extract without offset + def : Pat< + (vector_extract (vt rc:$vec), + (i64 (zext (i32 VReg_32:$idx))) + ), + (f32 (SI_INDIRECT_SRC (IMPLICIT_DEF), rc:$vec, VReg_32:$idx, 0)) + >; + + // 3. Insert with offset + def : Pat< + (vector_insert (vt rc:$vec), (f32 VReg_32:$val), + (i64 (zext (i32 (add VReg_32:$idx, imm:$off)))) + ), + (vt (IndDst (IMPLICIT_DEF), rc:$vec, VReg_32:$idx, imm:$off, VReg_32:$val)) + >; + + // 4. Insert without offset + def : Pat< + (vector_insert (vt rc:$vec), (f32 VReg_32:$val), + (i64 (zext (i32 VReg_32:$idx))) + ), + (vt (IndDst (IMPLICIT_DEF), rc:$vec, VReg_32:$idx, 0, VReg_32:$val)) + >; +} + +defm : SI_INDIRECT_Pattern <VReg_64, v2f32, SI_INDIRECT_DST_V2>; +defm : SI_INDIRECT_Pattern <VReg_128, v4f32, SI_INDIRECT_DST_V4>; +defm : SI_INDIRECT_Pattern <VReg_256, v8f32, SI_INDIRECT_DST_V8>; +defm : SI_INDIRECT_Pattern <VReg_512, v16f32, SI_INDIRECT_DST_V16>; } // End isSI predicate diff --git a/lib/Target/R600/SIIntrinsics.td b/lib/Target/R600/SIIntrinsics.td index 611b9c4..33bb815 100644 --- a/lib/Target/R600/SIIntrinsics.td +++ b/lib/Target/R600/SIIntrinsics.td @@ -16,13 +16,11 @@ let TargetPrefix = "SI", isTarget = 1 in { def int_SI_packf16 : Intrinsic <[llvm_i32_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem]>; def int_SI_export : Intrinsic <[], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty], []>; - /* XXX: We may need a seperate intrinsic here for loading integer values */ - def int_SI_load_const : Intrinsic <[llvm_float_ty], [llvm_i64_ty, llvm_i32_ty], []>; - def int_SI_vs_load_buffer_index : Intrinsic <[llvm_i32_ty], [], [IntrNoMem]>; - def int_SI_vs_load_input : Intrinsic <[llvm_v4f32_ty], [llvm_v4i32_ty, llvm_i16_ty, llvm_i32_ty], [IntrReadMem]> ; + def int_SI_load_const : Intrinsic <[llvm_float_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrReadMem]>; + def int_SI_vs_load_input : Intrinsic <[llvm_v4f32_ty], [llvm_v16i8_ty, llvm_i16_ty, llvm_i32_ty], [IntrReadMem]> ; def int_SI_wqm : Intrinsic <[], [], []>; - class Sample : Intrinsic <[llvm_v4f32_ty], [llvm_i32_ty, llvm_anyvector_ty, llvm_v8i32_ty, llvm_v4i32_ty, llvm_i32_ty], [IntrReadMem]>; + class Sample : Intrinsic <[llvm_v4f32_ty], [llvm_i32_ty, llvm_anyvector_ty, llvm_v32i8_ty, llvm_v16i8_ty, llvm_i32_ty], [IntrReadMem]>; def int_SI_sample : Sample; def int_SI_sampleb : Sample; @@ -30,17 +28,8 @@ let TargetPrefix = "SI", isTarget = 1 in { /* Interpolation Intrinsics */ - def int_SI_set_M0 : Intrinsic <[llvm_i32_ty], [llvm_i32_ty]>; - class Interp : Intrinsic <[llvm_float_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrReadMem]>; - - def int_SI_fs_interp_linear_center : Interp; - def int_SI_fs_interp_linear_centroid : Interp; - def int_SI_fs_interp_persp_center : Interp; - def int_SI_fs_interp_persp_centroid : Interp; - def int_SI_fs_interp_constant : Interp; - - def int_SI_fs_read_face : Intrinsic <[llvm_float_ty], [], [IntrNoMem]>; - def int_SI_fs_read_pos : Intrinsic <[llvm_float_ty], [llvm_i32_ty], [IntrNoMem]>; + def int_SI_fs_constant : Intrinsic <[llvm_float_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrReadMem]>; + def int_SI_fs_interp : Intrinsic <[llvm_float_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_v2i32_ty], [IntrReadMem]>; /* Control flow Intrinsics */ diff --git a/lib/Target/R600/SILowerControlFlow.cpp b/lib/Target/R600/SILowerControlFlow.cpp index b215aa2..9a027e7 100644 --- a/lib/Target/R600/SILowerControlFlow.cpp +++ b/lib/Target/R600/SILowerControlFlow.cpp @@ -66,6 +66,7 @@ private: static const unsigned SkipThreshold = 12; static char ID; + const TargetRegisterInfo *TRI; const TargetInstrInfo *TII; bool shouldSkip(MachineBasicBlock *From, MachineBasicBlock *To); @@ -84,9 +85,14 @@ private: void Kill(MachineInstr &MI); void Branch(MachineInstr &MI); + void LoadM0(MachineInstr &MI, MachineInstr *MovRel); + void IndirectSrc(MachineInstr &MI); + void IndirectDst(MachineInstr &MI); + public: SILowerControlFlowPass(TargetMachine &tm) : - MachineFunctionPass(ID), TII(tm.getInstrInfo()) { } + MachineFunctionPass(ID), TRI(tm.getRegisterInfo()), + TII(tm.getInstrInfo()) { } virtual bool runOnMachineFunction(MachineFunction &MF); @@ -302,6 +308,104 @@ void SILowerControlFlowPass::Kill(MachineInstr &MI) { MI.eraseFromParent(); } +void SILowerControlFlowPass::LoadM0(MachineInstr &MI, MachineInstr *MovRel) { + + MachineBasicBlock &MBB = *MI.getParent(); + DebugLoc DL = MI.getDebugLoc(); + MachineBasicBlock::iterator I = MI; + + unsigned Save = MI.getOperand(1).getReg(); + unsigned Idx = MI.getOperand(3).getReg(); + + if (AMDGPU::SReg_32RegClass.contains(Idx)) { + BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_MOV_B32), AMDGPU::M0) + .addReg(Idx); + MBB.insert(I, MovRel); + MI.eraseFromParent(); + return; + } + + assert(AMDGPU::SReg_64RegClass.contains(Save)); + assert(AMDGPU::VReg_32RegClass.contains(Idx)); + + // Save the EXEC mask + BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_MOV_B64), Save) + .addReg(AMDGPU::EXEC); + + // Read the next variant into VCC (lower 32 bits) <- also loop target + BuildMI(MBB, &MI, DL, TII->get(AMDGPU::V_READFIRSTLANE_B32_e32), AMDGPU::VCC) + .addReg(Idx); + + // Move index from VCC into M0 + BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_MOV_B32), AMDGPU::M0) + .addReg(AMDGPU::VCC); + + // Compare the just read M0 value to all possible Idx values + BuildMI(MBB, &MI, DL, TII->get(AMDGPU::V_CMP_EQ_U32_e32), AMDGPU::VCC) + .addReg(AMDGPU::M0) + .addReg(Idx); + + // Update EXEC, save the original EXEC value to VCC + BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_AND_SAVEEXEC_B64), AMDGPU::VCC) + .addReg(AMDGPU::VCC); + + // Do the actual move + MBB.insert(I, MovRel); + + // Update EXEC, switch all done bits to 0 and all todo bits to 1 + BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_XOR_B64), AMDGPU::EXEC) + .addReg(AMDGPU::EXEC) + .addReg(AMDGPU::VCC); + + // Loop back to V_READFIRSTLANE_B32 if there are still variants to cover + BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_CBRANCH_EXECNZ)) + .addImm(-7) + .addReg(AMDGPU::EXEC); + + // Restore EXEC + BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_MOV_B64), AMDGPU::EXEC) + .addReg(Save); + + MI.eraseFromParent(); +} + +void SILowerControlFlowPass::IndirectSrc(MachineInstr &MI) { + + MachineBasicBlock &MBB = *MI.getParent(); + DebugLoc DL = MI.getDebugLoc(); + + unsigned Dst = MI.getOperand(0).getReg(); + unsigned Vec = MI.getOperand(2).getReg(); + unsigned Off = MI.getOperand(4).getImm(); + + MachineInstr *MovRel = + BuildMI(*MBB.getParent(), DL, TII->get(AMDGPU::V_MOVRELS_B32_e32), Dst) + .addReg(TRI->getSubReg(Vec, AMDGPU::sub0) + Off) + .addReg(AMDGPU::M0, RegState::Implicit) + .addReg(Vec, RegState::Implicit); + + LoadM0(MI, MovRel); +} + +void SILowerControlFlowPass::IndirectDst(MachineInstr &MI) { + + MachineBasicBlock &MBB = *MI.getParent(); + DebugLoc DL = MI.getDebugLoc(); + + unsigned Dst = MI.getOperand(0).getReg(); + unsigned Off = MI.getOperand(4).getImm(); + unsigned Val = MI.getOperand(5).getReg(); + + MachineInstr *MovRel = + BuildMI(*MBB.getParent(), DL, TII->get(AMDGPU::V_MOVRELD_B32_e32)) + .addReg(TRI->getSubReg(Dst, AMDGPU::sub0) + Off, RegState::Define) + .addReg(Val) + .addReg(AMDGPU::M0, RegState::Implicit) + .addReg(Dst, RegState::Implicit); + + LoadM0(MI, MovRel); +} + bool SILowerControlFlowPass::runOnMachineFunction(MachineFunction &MF) { bool HaveKill = false; @@ -363,6 +467,17 @@ bool SILowerControlFlowPass::runOnMachineFunction(MachineFunction &MF) { case AMDGPU::S_BRANCH: Branch(MI); break; + + case AMDGPU::SI_INDIRECT_SRC: + IndirectSrc(MI); + break; + + case AMDGPU::SI_INDIRECT_DST_V2: + case AMDGPU::SI_INDIRECT_DST_V4: + case AMDGPU::SI_INDIRECT_DST_V8: + case AMDGPU::SI_INDIRECT_DST_V16: + IndirectDst(MI); + break; } } } diff --git a/lib/Target/R600/SIMachineFunctionInfo.cpp b/lib/Target/R600/SIMachineFunctionInfo.cpp index 7e59b42..1a4e4cb 100644 --- a/lib/Target/R600/SIMachineFunctionInfo.cpp +++ b/lib/Target/R600/SIMachineFunctionInfo.cpp @@ -10,11 +10,25 @@ #include "SIMachineFunctionInfo.h" +#include "llvm/IR/Attributes.h" +#include "llvm/IR/Function.h" using namespace llvm; +const char *SIMachineFunctionInfo::ShaderTypeAttribute = "ShaderType"; + SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF) : MachineFunctionInfo(), - SPIPSInputAddr(0), - ShaderType(0) - { } + ShaderType(0), + PSInputAddr(0) { + + AttributeSet Set = MF.getFunction()->getAttributes(); + Attribute A = Set.getAttribute(AttributeSet::FunctionIndex, + ShaderTypeAttribute); + + if (A.isStringAttribute()) { + StringRef Str = A.getValueAsString(); + if (Str.getAsInteger(0, ShaderType)) + llvm_unreachable("Can't parse shader type!"); + } +} diff --git a/lib/Target/R600/SIMachineFunctionInfo.h b/lib/Target/R600/SIMachineFunctionInfo.h index 47271f5..91a809b 100644 --- a/lib/Target/R600/SIMachineFunctionInfo.h +++ b/lib/Target/R600/SIMachineFunctionInfo.h @@ -23,9 +23,11 @@ namespace llvm { /// tells the hardware which interpolation parameters to load. class SIMachineFunctionInfo : public MachineFunctionInfo { public: + static const char *ShaderTypeAttribute; + SIMachineFunctionInfo(const MachineFunction &MF); - unsigned SPIPSInputAddr; unsigned ShaderType; + unsigned PSInputAddr; }; } // End namespace llvm diff --git a/lib/Target/R600/SIRegisterInfo.td b/lib/Target/R600/SIRegisterInfo.td index 9e04e24..4f14931 100644 --- a/lib/Target/R600/SIRegisterInfo.td +++ b/lib/Target/R600/SIRegisterInfo.td @@ -34,32 +34,6 @@ foreach Index = 0-255 in { } } -// virtual Interpolation registers -def PERSP_SAMPLE_I : SIReg <"PERSP_SAMPLE_I">; -def PERSP_SAMPLE_J : SIReg <"PERSP_SAMPLE_J">; -def PERSP_CENTER_I : SIReg <"PERSP_CENTER_I">; -def PERSP_CENTER_J : SIReg <"PERSP_CENTER_J">; -def PERSP_CENTROID_I : SIReg <"PERSP_CENTROID_I">; -def PERSP_CENTROID_J : SIReg <"PERP_CENTROID_J">; -def PERSP_I_W : SIReg <"PERSP_I_W">; -def PERSP_J_W : SIReg <"PERSP_J_W">; -def PERSP_1_W : SIReg <"PERSP_1_W">; -def LINEAR_SAMPLE_I : SIReg <"LINEAR_SAMPLE_I">; -def LINEAR_SAMPLE_J : SIReg <"LINEAR_SAMPLE_J">; -def LINEAR_CENTER_I : SIReg <"LINEAR_CENTER_I">; -def LINEAR_CENTER_J : SIReg <"LINEAR_CENTER_J">; -def LINEAR_CENTROID_I : SIReg <"LINEAR_CENTROID_I">; -def LINEAR_CENTROID_J : SIReg <"LINEAR_CENTROID_J">; -def LINE_STIPPLE_TEX_COORD : SIReg <"LINE_STIPPLE_TEX_COORD">; -def POS_X_FLOAT : SIReg <"POS_X_FLOAT">; -def POS_Y_FLOAT : SIReg <"POS_Y_FLOAT">; -def POS_Z_FLOAT : SIReg <"POS_Z_FLOAT">; -def POS_W_FLOAT : SIReg <"POS_W_FLOAT">; -def FRONT_FACE : SIReg <"FRONT_FACE">; -def ANCILLARY : SIReg <"ANCILLARY">; -def SAMPLE_COVERAGE : SIReg <"SAMPLE_COVERAGE">; -def POS_FIXED_PT : SIReg <"POS_FIXED_PT">; - //===----------------------------------------------------------------------===// // Groupings using register classes and tuples //===----------------------------------------------------------------------===// @@ -177,22 +151,22 @@ def SReg_64 : RegisterClass<"AMDGPU", [i64, i1], 64, (add SGPR_64, VCCReg, EXECReg) >; -def SReg_128 : RegisterClass<"AMDGPU", [v4f32, v4i32], 128, (add SGPR_128)>; +def SReg_128 : RegisterClass<"AMDGPU", [v16i8], 128, (add SGPR_128)>; -def SReg_256 : RegisterClass<"AMDGPU", [v8i32], 256, (add SGPR_256)>; +def SReg_256 : RegisterClass<"AMDGPU", [v32i8], 256, (add SGPR_256)>; -def SReg_512 : RegisterClass<"AMDGPU", [v16i32], 512, (add SGPR_512)>; +def SReg_512 : RegisterClass<"AMDGPU", [v64i8], 512, (add SGPR_512)>; // Register class for all vector registers (VGPRs + Interploation Registers) -def VReg_32 : RegisterClass<"AMDGPU", [f32, i32, v1i32], 32, (add VGPR_32)>; +def VReg_32 : RegisterClass<"AMDGPU", [i32, f32, v1i32], 32, (add VGPR_32)>; -def VReg_64 : RegisterClass<"AMDGPU", [i64, v2i32], 64, (add VGPR_64)>; +def VReg_64 : RegisterClass<"AMDGPU", [i64, f64, v2i32, v2f32], 64, (add VGPR_64)>; -def VReg_128 : RegisterClass<"AMDGPU", [v4f32, v4i32], 128, (add VGPR_128)>; +def VReg_128 : RegisterClass<"AMDGPU", [v4i32, v4f32], 128, (add VGPR_128)>; -def VReg_256 : RegisterClass<"AMDGPU", [v8i32], 256, (add VGPR_256)>; +def VReg_256 : RegisterClass<"AMDGPU", [v8i32, v8f32], 256, (add VGPR_256)>; -def VReg_512 : RegisterClass<"AMDGPU", [v16i32], 512, (add VGPR_512)>; +def VReg_512 : RegisterClass<"AMDGPU", [v16i32, v16f32], 512, (add VGPR_512)>; //===----------------------------------------------------------------------===// // [SV]Src_* register classes, can have either an immediate or an register @@ -200,28 +174,9 @@ def VReg_512 : RegisterClass<"AMDGPU", [v16i32], 512, (add VGPR_512)>; def SSrc_32 : RegisterClass<"AMDGPU", [i32, f32], 32, (add SReg_32)>; -def SSrc_64 : RegisterClass<"AMDGPU", [i64, i1], 64, (add SReg_64)>; - -def VSrc_32 : RegisterClass<"AMDGPU", [i32, f32], 32, - (add VReg_32, SReg_32, - PERSP_SAMPLE_I, PERSP_SAMPLE_J, - PERSP_CENTER_I, PERSP_CENTER_J, - PERSP_CENTROID_I, PERSP_CENTROID_J, - PERSP_I_W, PERSP_J_W, PERSP_1_W, - LINEAR_SAMPLE_I, LINEAR_SAMPLE_J, - LINEAR_CENTER_I, LINEAR_CENTER_J, - LINEAR_CENTROID_I, LINEAR_CENTROID_J, - LINE_STIPPLE_TEX_COORD, - POS_X_FLOAT, - POS_Y_FLOAT, - POS_Z_FLOAT, - POS_W_FLOAT, - FRONT_FACE, - ANCILLARY, - SAMPLE_COVERAGE, - POS_FIXED_PT - ) ->; +def SSrc_64 : RegisterClass<"AMDGPU", [i64, f64, i1], 64, (add SReg_64)>; + +def VSrc_32 : RegisterClass<"AMDGPU", [i32, f32], 32, (add VReg_32, SReg_32)>; -def VSrc_64 : RegisterClass<"AMDGPU", [i64], 64, (add VReg_64, SReg_64)>; +def VSrc_64 : RegisterClass<"AMDGPU", [i64, f64], 64, (add VReg_64, SReg_64)>; diff --git a/lib/Target/Sparc/SparcISelLowering.cpp b/lib/Target/Sparc/SparcISelLowering.cpp index 138b92d..28ac02a 100644 --- a/lib/Target/Sparc/SparcISelLowering.cpp +++ b/lib/Target/Sparc/SparcISelLowering.cpp @@ -955,9 +955,7 @@ static SDValue LowerBR_CC(SDValue Op, SelectionDAG &DAG) { // Get the condition flag. SDValue CompareFlag; if (LHS.getValueType() == MVT::i32) { - std::vector<EVT> VTs; - VTs.push_back(MVT::i32); - VTs.push_back(MVT::Glue); + EVT VTs[] = { MVT::i32, MVT::Glue }; SDValue Ops[2] = { LHS, RHS }; CompareFlag = DAG.getNode(SPISD::CMPICC, dl, VTs, Ops, 2).getValue(1); if (SPCC == ~0U) SPCC = IntCondCCodeToICC(CC); @@ -986,9 +984,8 @@ static SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) { SDValue CompareFlag; if (LHS.getValueType() == MVT::i32) { - std::vector<EVT> VTs; - VTs.push_back(LHS.getValueType()); // subcc returns a value - VTs.push_back(MVT::Glue); + // subcc returns a value + EVT VTs[] = { LHS.getValueType(), MVT::Glue }; SDValue Ops[2] = { LHS, RHS }; CompareFlag = DAG.getNode(SPISD::CMPICC, dl, VTs, Ops, 2).getValue(1); Opc = SPISD::SELECT_ICC; diff --git a/lib/Target/Sparc/SparcRegisterInfo.h b/lib/Target/Sparc/SparcRegisterInfo.h index 357879b..b53a1ed 100644 --- a/lib/Target/Sparc/SparcRegisterInfo.h +++ b/lib/Target/Sparc/SparcRegisterInfo.h @@ -40,7 +40,8 @@ struct SparcRegisterInfo : public SparcGenRegisterInfo { int SPAdj, unsigned FIOperandNum, RegScavenger *RS = NULL) const; - void processFunctionBeforeFrameFinalized(MachineFunction &MF) const; + void processFunctionBeforeFrameFinalized(MachineFunction &MF, + RegScavenger *RS = NULL) const; // Debug information queries. unsigned getFrameRegister(const MachineFunction &MF) const; diff --git a/lib/Target/TargetLibraryInfo.cpp b/lib/Target/TargetLibraryInfo.cpp index 3a9ace4..ee88ce7 100644 --- a/lib/Target/TargetLibraryInfo.cpp +++ b/lib/Target/TargetLibraryInfo.cpp @@ -24,6 +24,8 @@ void TargetLibraryInfo::anchor() { } const char* TargetLibraryInfo::StandardNames[LibFunc::NumLibFuncs] = { + "_IO_getc", + "_IO_putc", "_ZdaPv", "_ZdlPv", "_Znaj", @@ -38,8 +40,14 @@ const char* TargetLibraryInfo::StandardNames[LibFunc::NumLibFuncs] = "__cxa_guard_abort", "__cxa_guard_acquire", "__cxa_guard_release", + "__isoc99_scanf", + "__isoc99_sscanf", "__memcpy_chk", + "__strdup", + "__strndup", + "__strtok_r", "abs", + "access", "acos", "acosf", "acosh", @@ -61,6 +69,13 @@ const char* TargetLibraryInfo::StandardNames[LibFunc::NumLibFuncs] = "atanhf", "atanhl", "atanl", + "atof", + "atoi", + "atol", + "atoll", + "bcmp", + "bcopy", + "bzero", "calloc", "cbrt", "cbrtf", @@ -68,6 +83,10 @@ const char* TargetLibraryInfo::StandardNames[LibFunc::NumLibFuncs] = "ceil", "ceilf", "ceill", + "chmod", + "chown", + "clearerr", + "closedir", "copysign", "copysignf", "copysignl", @@ -77,6 +96,7 @@ const char* TargetLibraryInfo::StandardNames[LibFunc::NumLibFuncs] = "coshf", "coshl", "cosl", + "ctermid", "exp", "exp10", "exp10f", @@ -92,25 +112,66 @@ const char* TargetLibraryInfo::StandardNames[LibFunc::NumLibFuncs] = "fabs", "fabsf", "fabsl", + "fclose", + "fdopen", + "feof", + "ferror", + "fflush", "ffs", "ffsl", "ffsll", + "fgetc", + "fgetpos", + "fgets", + "fileno", "fiprintf", + "flockfile", "floor", "floorf", "floorl", "fmod", "fmodf", "fmodl", + "fopen", + "fopen64", "fprintf", "fputc", "fputs", + "fread", "free", + "frexp", + "frexpf", + "frexpl", + "fscanf", + "fseek", + "fseeko", + "fseeko64", + "fsetpos", + "fstat", + "fstat64", + "fstatvfs", + "fstatvfs64", + "ftell", + "ftello", + "ftello64", + "ftrylockfile", + "funlockfile", "fwrite", + "getc", + "getc_unlocked", + "getchar", + "getenv", + "getitimer", + "getlogin_r", + "getpwnam", + "gets", + "htonl", + "htons", "iprintf", "isascii", "isdigit", "labs", + "lchown", "llabs", "log", "log10", @@ -127,31 +188,64 @@ const char* TargetLibraryInfo::StandardNames[LibFunc::NumLibFuncs] = "logbl", "logf", "logl", + "lstat", + "lstat64", "malloc", + "memalign", + "memccpy", "memchr", "memcmp", "memcpy", "memmove", + "memrchr", "memset", "memset_pattern16", + "mkdir", + "mktime", + "modf", + "modff", + "modfl", "nearbyint", "nearbyintf", "nearbyintl", + "ntohl", + "ntohs", + "open", + "open64", + "opendir", + "pclose", + "perror", + "popen", "posix_memalign", "pow", "powf", "powl", + "pread", "printf", + "putc", "putchar", "puts", + "pwrite", + "qsort", + "read", + "readlink", "realloc", "reallocf", + "realpath", + "remove", + "rename", + "rewind", "rint", "rintf", "rintl", + "rmdir", "round", "roundf", "roundl", + "scanf", + "setbuf", + "setitimer", + "setvbuf", "sin", "sinf", "sinh", @@ -159,18 +253,28 @@ const char* TargetLibraryInfo::StandardNames[LibFunc::NumLibFuncs] = "sinhl", "sinl", "siprintf", + "snprintf", "sprintf", "sqrt", "sqrtf", "sqrtl", + "sscanf", + "stat", + "stat64", + "statvfs", + "statvfs64", "stpcpy", + "stpncpy", + "strcasecmp", "strcat", "strchr", "strcmp", + "strcoll", "strcpy", "strcspn", "strdup", "strlen", + "strncasecmp", "strncat", "strncmp", "strncpy", @@ -182,22 +286,43 @@ const char* TargetLibraryInfo::StandardNames[LibFunc::NumLibFuncs] = "strstr", "strtod", "strtof", + "strtok", + "strtok_r", "strtol", "strtold", "strtoll", "strtoul", "strtoull", + "strxfrm", + "system", "tan", "tanf", "tanh", "tanhf", "tanhl", "tanl", + "times", + "tmpfile", + "tmpfile64", "toascii", "trunc", "truncf", "truncl", - "valloc" + "uname", + "ungetc", + "unlink", + "unsetenv", + "utime", + "utimes", + "valloc", + "vfprintf", + "vfscanf", + "vprintf", + "vscanf", + "vsnprintf", + "vsprintf", + "vsscanf", + "write" }; /// initialize - Initialize the set of available library functions based on the @@ -259,7 +384,9 @@ static void initialize(TargetLibraryInfo &TLI, const Triple &T, TLI.setUnavailable(LibFunc::fabsl); TLI.setUnavailable(LibFunc::floorl); TLI.setUnavailable(LibFunc::fmodl); + TLI.setUnavailable(LibFunc::frexpl); TLI.setUnavailable(LibFunc::logl); + TLI.setUnavailable(LibFunc::modfl); TLI.setUnavailable(LibFunc::powl); TLI.setUnavailable(LibFunc::sinl); TLI.setUnavailable(LibFunc::sinhl); @@ -336,16 +463,67 @@ static void initialize(TargetLibraryInfo &TLI, const Triple &T, TLI.setUnavailable(LibFunc::tanhf); } - // Win32 does *not* provide stpcpy. It is provided on POSIX systems: - // http://pubs.opengroup.org/onlinepubs/9699919799/functions/stpcpy.html - TLI.setUnavailable(LibFunc::stpcpy); - - // Win32 does *not* provide ffs. It is provided on POSIX systems: - // http://pubs.opengroup.org/onlinepubs/009695399/functions/ffs.html + // Win32 does *not* provide provide these functions, but they are + // generally available on POSIX-compliant systems: + TLI.setUnavailable(LibFunc::access); + TLI.setUnavailable(LibFunc::bcmp); + TLI.setUnavailable(LibFunc::bcopy); + TLI.setUnavailable(LibFunc::bzero); + TLI.setUnavailable(LibFunc::chmod); + TLI.setUnavailable(LibFunc::chown); + TLI.setUnavailable(LibFunc::closedir); + TLI.setUnavailable(LibFunc::ctermid); + TLI.setUnavailable(LibFunc::fdopen); TLI.setUnavailable(LibFunc::ffs); + TLI.setUnavailable(LibFunc::fileno); + TLI.setUnavailable(LibFunc::flockfile); + TLI.setUnavailable(LibFunc::fseeko); + TLI.setUnavailable(LibFunc::fstat); + TLI.setUnavailable(LibFunc::fstatvfs); + TLI.setUnavailable(LibFunc::ftello); + TLI.setUnavailable(LibFunc::ftrylockfile); + TLI.setUnavailable(LibFunc::funlockfile); + TLI.setUnavailable(LibFunc::getc_unlocked); + TLI.setUnavailable(LibFunc::getitimer); + TLI.setUnavailable(LibFunc::getlogin_r); + TLI.setUnavailable(LibFunc::getpwnam); + TLI.setUnavailable(LibFunc::htonl); + TLI.setUnavailable(LibFunc::htons); + TLI.setUnavailable(LibFunc::lchown); + TLI.setUnavailable(LibFunc::lstat); + TLI.setUnavailable(LibFunc::memccpy); + TLI.setUnavailable(LibFunc::mkdir); + TLI.setUnavailable(LibFunc::ntohl); + TLI.setUnavailable(LibFunc::ntohs); + TLI.setUnavailable(LibFunc::open); + TLI.setUnavailable(LibFunc::opendir); + TLI.setUnavailable(LibFunc::pclose); + TLI.setUnavailable(LibFunc::popen); + TLI.setUnavailable(LibFunc::pread); + TLI.setUnavailable(LibFunc::pwrite); + TLI.setUnavailable(LibFunc::read); + TLI.setUnavailable(LibFunc::readlink); + TLI.setUnavailable(LibFunc::realpath); + TLI.setUnavailable(LibFunc::rmdir); + TLI.setUnavailable(LibFunc::setitimer); + TLI.setUnavailable(LibFunc::stat); + TLI.setUnavailable(LibFunc::statvfs); + TLI.setUnavailable(LibFunc::stpcpy); + TLI.setUnavailable(LibFunc::stpncpy); + TLI.setUnavailable(LibFunc::strcasecmp); + TLI.setUnavailable(LibFunc::strncasecmp); + TLI.setUnavailable(LibFunc::times); + TLI.setUnavailable(LibFunc::uname); + TLI.setUnavailable(LibFunc::unlink); + TLI.setUnavailable(LibFunc::unsetenv); + TLI.setUnavailable(LibFunc::utime); + TLI.setUnavailable(LibFunc::utimes); + TLI.setUnavailable(LibFunc::write); - // Win32 does *not* provide llabs. It is defined in ISO/IEC 9899:1999, - // but Visual C++ does not support it. + // Win32 does *not* provide provide these functions, but they are + // specified by C99: + TLI.setUnavailable(LibFunc::atoll); + TLI.setUnavailable(LibFunc::frexpf); TLI.setUnavailable(LibFunc::llabs); } @@ -375,6 +553,27 @@ static void initialize(TargetLibraryInfo &TLI, const Triple &T, default: TLI.setUnavailable(LibFunc::ffsll); } + + // The following functions are available on at least Linux: + if (T.getOS() != Triple::Linux) { + TLI.setUnavailable(LibFunc::dunder_strdup); + TLI.setUnavailable(LibFunc::dunder_strtok_r); + TLI.setUnavailable(LibFunc::dunder_isoc99_scanf); + TLI.setUnavailable(LibFunc::dunder_isoc99_sscanf); + TLI.setUnavailable(LibFunc::under_IO_getc); + TLI.setUnavailable(LibFunc::under_IO_putc); + TLI.setUnavailable(LibFunc::memalign); + TLI.setUnavailable(LibFunc::fopen64); + TLI.setUnavailable(LibFunc::fseeko64); + TLI.setUnavailable(LibFunc::fstat64); + TLI.setUnavailable(LibFunc::fstatvfs64); + TLI.setUnavailable(LibFunc::ftello64); + TLI.setUnavailable(LibFunc::lstat64); + TLI.setUnavailable(LibFunc::open64); + TLI.setUnavailable(LibFunc::stat64); + TLI.setUnavailable(LibFunc::statvfs64); + TLI.setUnavailable(LibFunc::tmpfile64); + } } @@ -398,11 +597,40 @@ TargetLibraryInfo::TargetLibraryInfo(const TargetLibraryInfo &TLI) CustomNames = TLI.CustomNames; } +namespace { +struct StringComparator { + /// Compare two strings and return true if LHS is lexicographically less than + /// RHS. Requires that RHS doesn't contain any zero bytes. + bool operator()(const char *LHS, StringRef RHS) const { + // Compare prefixes with strncmp. If prefixes match we know that LHS is + // greater or equal to RHS as RHS can't contain any '\0'. + return std::strncmp(LHS, RHS.data(), RHS.size()) < 0; + } + + // Provided for compatibility with MSVC's debug mode. + bool operator()(StringRef LHS, const char *RHS) const { return LHS < RHS; } + bool operator()(StringRef LHS, StringRef RHS) const { return LHS < RHS; } + bool operator()(const char *LHS, const char *RHS) const { + return std::strcmp(LHS, RHS) < 0; + } +}; +} + bool TargetLibraryInfo::getLibFunc(StringRef funcName, LibFunc::Func &F) const { const char **Start = &StandardNames[0]; const char **End = &StandardNames[LibFunc::NumLibFuncs]; - const char **I = std::lower_bound(Start, End, funcName); + + // Filter out empty names and names containing null bytes, those can't be in + // our table. + if (funcName.empty() || funcName.find('\0') != StringRef::npos) + return false; + + // Check for \01 prefix that is used to mangle __asm declarations and + // strip it if present. + if (funcName.front() == '\01') + funcName = funcName.substr(1); + const char **I = std::lower_bound(Start, End, funcName, StringComparator()); if (I != End && *I == funcName) { F = (LibFunc::Func)(I - Start); return true; diff --git a/lib/Target/TargetMachine.cpp b/lib/Target/TargetMachine.cpp index 7d8b49c..e728251 100644 --- a/lib/Target/TargetMachine.cpp +++ b/lib/Target/TargetMachine.cpp @@ -12,6 +12,8 @@ //===----------------------------------------------------------------------===// #include "llvm/Target/TargetMachine.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/IR/Function.h" #include "llvm/IR/GlobalAlias.h" #include "llvm/IR/GlobalValue.h" #include "llvm/IR/GlobalVariable.h" @@ -61,6 +63,30 @@ TargetMachine::~TargetMachine() { delete AsmInfo; } +/// \brief Reset the target options based on the function's attributes. +void TargetMachine::resetTargetOptions(const MachineFunction *MF) const { + const Function *F = MF->getFunction(); + TargetOptions &TO = MF->getTarget().Options; + +#define RESET_OPTION(X, Y) \ + do { \ + if (F->hasFnAttribute(Y)) \ + TO.X = \ + (F->getAttributes(). \ + getAttribute(AttributeSet::FunctionIndex, \ + Y).getValueAsString() == "true"); \ + } while (0) + + RESET_OPTION(NoFramePointerElim, "no-frame-pointer-elim"); + RESET_OPTION(NoFramePointerElimNonLeaf, "no-frame-pointer-elim-non-leaf"); + RESET_OPTION(LessPreciseFPMADOption, "less-precise-fpmad"); + RESET_OPTION(UnsafeFPMath, "unsafe-fp-math"); + RESET_OPTION(NoInfsFPMath, "no-infs-fp-math"); + RESET_OPTION(NoNaNsFPMath, "no-nans-fp-math"); + RESET_OPTION(UseSoftFloat, "use-soft-float"); + RESET_OPTION(DisableTailCalls, "disable-tail-calls"); +} + /// getRelocationModel - Returns the code generation relocation model. The /// choices are static, PIC, and dynamic-no-pic, and target default. Reloc::Model TargetMachine::getRelocationModel() const { diff --git a/lib/Target/TargetMachineC.cpp b/lib/Target/TargetMachineC.cpp index 1e4c195..79f74bd 100644 --- a/lib/Target/TargetMachineC.cpp +++ b/lib/Target/TargetMachineC.cpp @@ -184,7 +184,7 @@ LLVMBool LLVMTargetMachineEmitToFile(LLVMTargetMachineRef T, LLVMModuleRef M, } if (TM->addPassesToEmitFile(pass, destf, ft)) { - error = "No DataLayout in TargetMachine"; + error = "TargetMachine can't emit a file of this type"; *ErrorMessage = strdup(error.c_str()); return true; } diff --git a/lib/Target/X86/AsmParser/X86AsmParser.cpp b/lib/Target/X86/AsmParser/X86AsmParser.cpp index b2c6d55..4ed5534a6 100644 --- a/lib/Target/X86/AsmParser/X86AsmParser.cpp +++ b/lib/Target/X86/AsmParser/X86AsmParser.cpp @@ -170,30 +170,35 @@ struct X86Operand : public MCParsedAsmOperand { SMLoc OffsetOfLoc; bool AddressOf; + struct TokOp { + const char *Data; + unsigned Length; + }; + + struct RegOp { + unsigned RegNo; + }; + + struct ImmOp { + const MCExpr *Val; + bool NeedAsmRewrite; + }; + + struct MemOp { + unsigned SegReg; + const MCExpr *Disp; + unsigned BaseReg; + unsigned IndexReg; + unsigned Scale; + unsigned Size; + bool NeedSizeDir; + }; + union { - struct { - const char *Data; - unsigned Length; - } Tok; - - struct { - unsigned RegNo; - } Reg; - - struct { - const MCExpr *Val; - bool NeedAsmRewrite; - } Imm; - - struct { - unsigned SegReg; - const MCExpr *Disp; - unsigned BaseReg; - unsigned IndexReg; - unsigned Scale; - unsigned Size; - bool NeedSizeDir; - } Mem; + struct TokOp Tok; + struct RegOp Reg; + struct ImmOp Imm; + struct MemOp Mem; }; X86Operand(KindTy K, SMLoc Start, SMLoc End) @@ -1734,242 +1739,74 @@ ParseInstruction(ParseInstructionInfo &Info, StringRef Name, SMLoc NameLoc, return false; } -bool X86AsmParser:: -processInstruction(MCInst &Inst, - const SmallVectorImpl<MCParsedAsmOperand*> &Ops) { - switch (Inst.getOpcode()) { - default: return false; - case X86::AND16i16: { - if (!Inst.getOperand(0).isImm() || - !isImmSExti16i8Value(Inst.getOperand(0).getImm())) - return false; - - MCInst TmpInst; - TmpInst.setOpcode(X86::AND16ri8); - TmpInst.addOperand(MCOperand::CreateReg(X86::AX)); - TmpInst.addOperand(MCOperand::CreateReg(X86::AX)); - TmpInst.addOperand(Inst.getOperand(0)); - Inst = TmpInst; - return true; - } - case X86::AND32i32: { - if (!Inst.getOperand(0).isImm() || - !isImmSExti32i8Value(Inst.getOperand(0).getImm())) - return false; - - MCInst TmpInst; - TmpInst.setOpcode(X86::AND32ri8); - TmpInst.addOperand(MCOperand::CreateReg(X86::EAX)); - TmpInst.addOperand(MCOperand::CreateReg(X86::EAX)); - TmpInst.addOperand(Inst.getOperand(0)); - Inst = TmpInst; - return true; - } - case X86::AND64i32: { - if (!Inst.getOperand(0).isImm() || - !isImmSExti64i8Value(Inst.getOperand(0).getImm())) - return false; - - MCInst TmpInst; - TmpInst.setOpcode(X86::AND64ri8); - TmpInst.addOperand(MCOperand::CreateReg(X86::RAX)); - TmpInst.addOperand(MCOperand::CreateReg(X86::RAX)); - TmpInst.addOperand(Inst.getOperand(0)); - Inst = TmpInst; - return true; - } - case X86::XOR16i16: { - if (!Inst.getOperand(0).isImm() || - !isImmSExti16i8Value(Inst.getOperand(0).getImm())) - return false; - - MCInst TmpInst; - TmpInst.setOpcode(X86::XOR16ri8); - TmpInst.addOperand(MCOperand::CreateReg(X86::AX)); - TmpInst.addOperand(MCOperand::CreateReg(X86::AX)); - TmpInst.addOperand(Inst.getOperand(0)); - Inst = TmpInst; - return true; - } - case X86::XOR32i32: { - if (!Inst.getOperand(0).isImm() || - !isImmSExti32i8Value(Inst.getOperand(0).getImm())) - return false; - - MCInst TmpInst; - TmpInst.setOpcode(X86::XOR32ri8); - TmpInst.addOperand(MCOperand::CreateReg(X86::EAX)); - TmpInst.addOperand(MCOperand::CreateReg(X86::EAX)); - TmpInst.addOperand(Inst.getOperand(0)); - Inst = TmpInst; - return true; - } - case X86::XOR64i32: { - if (!Inst.getOperand(0).isImm() || - !isImmSExti64i8Value(Inst.getOperand(0).getImm())) - return false; - - MCInst TmpInst; - TmpInst.setOpcode(X86::XOR64ri8); - TmpInst.addOperand(MCOperand::CreateReg(X86::RAX)); - TmpInst.addOperand(MCOperand::CreateReg(X86::RAX)); - TmpInst.addOperand(Inst.getOperand(0)); - Inst = TmpInst; - return true; - } - case X86::OR16i16: { - if (!Inst.getOperand(0).isImm() || - !isImmSExti16i8Value(Inst.getOperand(0).getImm())) - return false; - - MCInst TmpInst; - TmpInst.setOpcode(X86::OR16ri8); - TmpInst.addOperand(MCOperand::CreateReg(X86::AX)); - TmpInst.addOperand(MCOperand::CreateReg(X86::AX)); - TmpInst.addOperand(Inst.getOperand(0)); - Inst = TmpInst; - return true; - } - case X86::OR32i32: { - if (!Inst.getOperand(0).isImm() || - !isImmSExti32i8Value(Inst.getOperand(0).getImm())) - return false; - - MCInst TmpInst; - TmpInst.setOpcode(X86::OR32ri8); - TmpInst.addOperand(MCOperand::CreateReg(X86::EAX)); - TmpInst.addOperand(MCOperand::CreateReg(X86::EAX)); - TmpInst.addOperand(Inst.getOperand(0)); - Inst = TmpInst; - return true; - } - case X86::OR64i32: { - if (!Inst.getOperand(0).isImm() || - !isImmSExti64i8Value(Inst.getOperand(0).getImm())) - return false; - - MCInst TmpInst; - TmpInst.setOpcode(X86::OR64ri8); - TmpInst.addOperand(MCOperand::CreateReg(X86::RAX)); - TmpInst.addOperand(MCOperand::CreateReg(X86::RAX)); - TmpInst.addOperand(Inst.getOperand(0)); - Inst = TmpInst; - return true; - } - case X86::CMP16i16: { - if (!Inst.getOperand(0).isImm() || - !isImmSExti16i8Value(Inst.getOperand(0).getImm())) - return false; - - MCInst TmpInst; - TmpInst.setOpcode(X86::CMP16ri8); - TmpInst.addOperand(MCOperand::CreateReg(X86::AX)); - TmpInst.addOperand(Inst.getOperand(0)); - Inst = TmpInst; - return true; - } - case X86::CMP32i32: { - if (!Inst.getOperand(0).isImm() || - !isImmSExti32i8Value(Inst.getOperand(0).getImm())) - return false; - - MCInst TmpInst; - TmpInst.setOpcode(X86::CMP32ri8); - TmpInst.addOperand(MCOperand::CreateReg(X86::EAX)); - TmpInst.addOperand(Inst.getOperand(0)); - Inst = TmpInst; - return true; - } - case X86::CMP64i32: { - if (!Inst.getOperand(0).isImm() || - !isImmSExti64i8Value(Inst.getOperand(0).getImm())) - return false; +static bool convertToSExti8(MCInst &Inst, unsigned Opcode, unsigned Reg, + bool isCmp) { + MCInst TmpInst; + TmpInst.setOpcode(Opcode); + if (!isCmp) + TmpInst.addOperand(MCOperand::CreateReg(Reg)); + TmpInst.addOperand(MCOperand::CreateReg(Reg)); + TmpInst.addOperand(Inst.getOperand(0)); + Inst = TmpInst; + return true; +} - MCInst TmpInst; - TmpInst.setOpcode(X86::CMP64ri8); - TmpInst.addOperand(MCOperand::CreateReg(X86::RAX)); - TmpInst.addOperand(Inst.getOperand(0)); - Inst = TmpInst; - return true; - } - case X86::ADD16i16: { - if (!Inst.getOperand(0).isImm() || - !isImmSExti16i8Value(Inst.getOperand(0).getImm())) - return false; +static bool convert16i16to16ri8(MCInst &Inst, unsigned Opcode, + bool isCmp = false) { + if (!Inst.getOperand(0).isImm() || + !isImmSExti16i8Value(Inst.getOperand(0).getImm())) + return false; - MCInst TmpInst; - TmpInst.setOpcode(X86::ADD16ri8); - TmpInst.addOperand(MCOperand::CreateReg(X86::AX)); - TmpInst.addOperand(MCOperand::CreateReg(X86::AX)); - TmpInst.addOperand(Inst.getOperand(0)); - Inst = TmpInst; - return true; - } - case X86::ADD32i32: { - if (!Inst.getOperand(0).isImm() || - !isImmSExti32i8Value(Inst.getOperand(0).getImm())) - return false; + return convertToSExti8(Inst, Opcode, X86::AX, isCmp); +} - MCInst TmpInst; - TmpInst.setOpcode(X86::ADD32ri8); - TmpInst.addOperand(MCOperand::CreateReg(X86::EAX)); - TmpInst.addOperand(MCOperand::CreateReg(X86::EAX)); - TmpInst.addOperand(Inst.getOperand(0)); - Inst = TmpInst; - return true; - } - case X86::ADD64i32: { - if (!Inst.getOperand(0).isImm() || - !isImmSExti64i8Value(Inst.getOperand(0).getImm())) - return false; +static bool convert32i32to32ri8(MCInst &Inst, unsigned Opcode, + bool isCmp = false) { + if (!Inst.getOperand(0).isImm() || + !isImmSExti32i8Value(Inst.getOperand(0).getImm())) + return false; - MCInst TmpInst; - TmpInst.setOpcode(X86::ADD64ri8); - TmpInst.addOperand(MCOperand::CreateReg(X86::RAX)); - TmpInst.addOperand(MCOperand::CreateReg(X86::RAX)); - TmpInst.addOperand(Inst.getOperand(0)); - Inst = TmpInst; - return true; - } - case X86::SUB16i16: { - if (!Inst.getOperand(0).isImm() || - !isImmSExti16i8Value(Inst.getOperand(0).getImm())) - return false; + return convertToSExti8(Inst, Opcode, X86::EAX, isCmp); +} - MCInst TmpInst; - TmpInst.setOpcode(X86::SUB16ri8); - TmpInst.addOperand(MCOperand::CreateReg(X86::AX)); - TmpInst.addOperand(MCOperand::CreateReg(X86::AX)); - TmpInst.addOperand(Inst.getOperand(0)); - Inst = TmpInst; - return true; - } - case X86::SUB32i32: { - if (!Inst.getOperand(0).isImm() || - !isImmSExti32i8Value(Inst.getOperand(0).getImm())) - return false; +static bool convert64i32to64ri8(MCInst &Inst, unsigned Opcode, + bool isCmp = false) { + if (!Inst.getOperand(0).isImm() || + !isImmSExti64i8Value(Inst.getOperand(0).getImm())) + return false; - MCInst TmpInst; - TmpInst.setOpcode(X86::SUB32ri8); - TmpInst.addOperand(MCOperand::CreateReg(X86::EAX)); - TmpInst.addOperand(MCOperand::CreateReg(X86::EAX)); - TmpInst.addOperand(Inst.getOperand(0)); - Inst = TmpInst; - return true; - } - case X86::SUB64i32: { - if (!Inst.getOperand(0).isImm() || - !isImmSExti64i8Value(Inst.getOperand(0).getImm())) - return false; + return convertToSExti8(Inst, Opcode, X86::RAX, isCmp); +} - MCInst TmpInst; - TmpInst.setOpcode(X86::SUB64ri8); - TmpInst.addOperand(MCOperand::CreateReg(X86::RAX)); - TmpInst.addOperand(MCOperand::CreateReg(X86::RAX)); - TmpInst.addOperand(Inst.getOperand(0)); - Inst = TmpInst; - return true; - } +bool X86AsmParser:: +processInstruction(MCInst &Inst, + const SmallVectorImpl<MCParsedAsmOperand*> &Ops) { + switch (Inst.getOpcode()) { + default: return false; + case X86::AND16i16: return convert16i16to16ri8(Inst, X86::AND16ri8); + case X86::AND32i32: return convert32i32to32ri8(Inst, X86::AND32ri8); + case X86::AND64i32: return convert64i32to64ri8(Inst, X86::AND64ri8); + case X86::XOR16i16: return convert16i16to16ri8(Inst, X86::XOR16ri8); + case X86::XOR32i32: return convert32i32to32ri8(Inst, X86::XOR32ri8); + case X86::XOR64i32: return convert64i32to64ri8(Inst, X86::XOR64ri8); + case X86::OR16i16: return convert16i16to16ri8(Inst, X86::OR16ri8); + case X86::OR32i32: return convert32i32to32ri8(Inst, X86::OR32ri8); + case X86::OR64i32: return convert64i32to64ri8(Inst, X86::OR64ri8); + case X86::CMP16i16: return convert16i16to16ri8(Inst, X86::CMP16ri8, true); + case X86::CMP32i32: return convert32i32to32ri8(Inst, X86::CMP32ri8, true); + case X86::CMP64i32: return convert64i32to64ri8(Inst, X86::CMP64ri8, true); + case X86::ADD16i16: return convert16i16to16ri8(Inst, X86::ADD16ri8); + case X86::ADD32i32: return convert32i32to32ri8(Inst, X86::ADD32ri8); + case X86::ADD64i32: return convert64i32to64ri8(Inst, X86::ADD64ri8); + case X86::SUB16i16: return convert16i16to16ri8(Inst, X86::SUB16ri8); + case X86::SUB32i32: return convert32i32to32ri8(Inst, X86::SUB32ri8); + case X86::SUB64i32: return convert64i32to64ri8(Inst, X86::SUB64ri8); + case X86::ADC16i16: return convert16i16to16ri8(Inst, X86::ADC16ri8); + case X86::ADC32i32: return convert32i32to32ri8(Inst, X86::ADC32ri8); + case X86::ADC64i32: return convert64i32to64ri8(Inst, X86::ADC64ri8); + case X86::SBB16i16: return convert16i16to16ri8(Inst, X86::SBB16ri8); + case X86::SBB32i32: return convert32i32to32ri8(Inst, X86::SBB32ri8); + case X86::SBB64i32: return convert64i32to64ri8(Inst, X86::SBB64ri8); } } @@ -2080,7 +1917,7 @@ MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, // Check for the various suffix matches. Tmp[Base.size()] = Suffixes[0]; unsigned ErrorInfoIgnore; - unsigned ErrorInfoMissingFeature; + unsigned ErrorInfoMissingFeature = 0; // Init suppresses compiler warnings. unsigned Match1, Match2, Match3, Match4; Match1 = MatchInstructionImpl(Operands, Inst, ErrorInfoIgnore, diff --git a/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp b/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp index acc90ec..598ddee 100644 --- a/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp +++ b/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp @@ -315,18 +315,18 @@ bool X86AsmBackend::writeNopData(uint64_t Count, MCObjectWriter *OW) const { return true; } - // Write an optimal sequence for the first 15 bytes. - const uint64_t OptimalCount = (Count < 16) ? Count : 15; - const uint64_t Prefixes = OptimalCount <= 10 ? 0 : OptimalCount - 10; - for (uint64_t i = 0, e = Prefixes; i != e; i++) - OW->Write8(0x66); - const uint64_t Rest = OptimalCount - Prefixes; - for (uint64_t i = 0, e = Rest; i != e; i++) - OW->Write8(Nops[Rest - 1][i]); - - // Finish with single byte nops. - for (uint64_t i = OptimalCount, e = Count; i != e; ++i) - OW->Write8(0x90); + // 15 is the longest single nop instruction. Emit as many 15-byte nops as + // needed, then emit a nop of the remaining length. + do { + const uint8_t ThisNopLength = (uint8_t) std::min(Count, (uint64_t) 15); + const uint8_t Prefixes = ThisNopLength <= 10 ? 0 : ThisNopLength - 10; + for (uint8_t i = 0; i < Prefixes; i++) + OW->Write8(0x66); + const uint8_t Rest = ThisNopLength - Prefixes; + for (uint8_t i = 0; i < Rest; i++) + OW->Write8(Nops[Rest - 1][i]); + Count -= ThisNopLength; + } while (Count != 0); return true; } diff --git a/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp b/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp index 122204a..5fbefae 100644 --- a/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp +++ b/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp @@ -446,6 +446,7 @@ void X86MCCodeEmitter::EmitVEXOpcodePrefix(uint64_t TSFlags, unsigned &CurByte, raw_ostream &OS) const { bool HasVEX_4V = (TSFlags >> X86II::VEXShift) & X86II::VEX_4V; bool HasVEX_4VOp3 = (TSFlags >> X86II::VEXShift) & X86II::VEX_4VOp3; + bool HasMemOp4 = (TSFlags >> X86II::VEXShift) & X86II::MemOp4; // VEX_R: opcode externsion equivalent to REX.R in // 1's complement (inverted) form @@ -650,12 +651,19 @@ void X86MCCodeEmitter::EmitVEXOpcodePrefix(uint64_t TSFlags, unsigned &CurByte, // dst(ModR/M), src1(ModR/M) // dst(ModR/M), src1(ModR/M), imm8 // + // FMA4: + // dst(ModR/M.reg), src1(VEX_4V), src2(ModR/M), src3(VEX_I8IMM) + // dst(ModR/M.reg), src1(VEX_4V), src2(VEX_I8IMM), src3(ModR/M), if (X86II::isX86_64ExtendedReg(MI.getOperand(CurOp).getReg())) VEX_R = 0x0; CurOp++; if (HasVEX_4V) VEX_4V = getVEXRegisterEncoding(MI, CurOp++); + + if (HasMemOp4) // Skip second register source (encoded in I8IMM) + CurOp++; + if (X86II::isX86_64ExtendedReg(MI.getOperand(CurOp).getReg())) VEX_B = 0x0; CurOp++; @@ -666,9 +674,15 @@ void X86MCCodeEmitter::EmitVEXOpcodePrefix(uint64_t TSFlags, unsigned &CurByte, // MRMDestReg instructions forms: // dst(ModR/M), src(ModR/M) // dst(ModR/M), src(ModR/M), imm8 - if (X86II::isX86_64ExtendedReg(MI.getOperand(0).getReg())) + // dst(ModR/M), src1(VEX_4V), src2(ModR/M) + if (X86II::isX86_64ExtendedReg(MI.getOperand(CurOp).getReg())) VEX_B = 0x0; - if (X86II::isX86_64ExtendedReg(MI.getOperand(1).getReg())) + CurOp++; + + if (HasVEX_4V) + VEX_4V = getVEXRegisterEncoding(MI, CurOp++); + + if (X86II::isX86_64ExtendedReg(MI.getOperand(CurOp).getReg())) VEX_R = 0x0; break; case X86II::MRM0r: case X86II::MRM1r: @@ -1038,9 +1052,14 @@ EncodeInstruction(const MCInst &MI, raw_ostream &OS, case X86II::MRMDestReg: EmitByte(BaseOpcode, CurByte, OS); + SrcRegNum = CurOp + 1; + + if (HasVEX_4V) // Skip 1st src (which is encoded in VEX_VVVV) + ++SrcRegNum; + EmitRegModRMByte(MI.getOperand(CurOp), - GetX86RegNum(MI.getOperand(CurOp+1)), CurByte, OS); - CurOp += 2; + GetX86RegNum(MI.getOperand(SrcRegNum)), CurByte, OS); + CurOp = SrcRegNum + 1; break; case X86II::MRMDestMem: diff --git a/lib/Target/X86/X86CodeEmitter.cpp b/lib/Target/X86/X86CodeEmitter.cpp index ece38aa..2518e02 100644 --- a/lib/Target/X86/X86CodeEmitter.cpp +++ b/lib/Target/X86/X86CodeEmitter.cpp @@ -816,6 +816,7 @@ void Emitter<CodeEmitter>::emitVEXOpcodePrefix(uint64_t TSFlags, const MCInstrDesc *Desc) const { bool HasVEX_4V = (TSFlags >> X86II::VEXShift) & X86II::VEX_4V; bool HasVEX_4VOp3 = (TSFlags >> X86II::VEXShift) & X86II::VEX_4VOp3; + bool HasMemOp4 = (TSFlags >> X86II::VEXShift) & X86II::MemOp4; // VEX_R: opcode externsion equivalent to REX.R in // 1's complement (inverted) form @@ -1032,6 +1033,10 @@ void Emitter<CodeEmitter>::emitVEXOpcodePrefix(uint64_t TSFlags, if (HasVEX_4V) VEX_4V = getVEXRegisterEncoding(MI, CurOp++); + + if (HasMemOp4) // Skip second register source (encoded in I8IMM) + CurOp++; + if (X86II::isX86_64ExtendedReg(MI.getOperand(CurOp).getReg())) VEX_B = 0x0; CurOp++; @@ -1042,9 +1047,15 @@ void Emitter<CodeEmitter>::emitVEXOpcodePrefix(uint64_t TSFlags, // MRMDestReg instructions forms: // dst(ModR/M), src(ModR/M) // dst(ModR/M), src(ModR/M), imm8 - if (X86II::isX86_64ExtendedReg(MI.getOperand(0).getReg())) + // dst(ModR/M), src1(VEX_4V), src2(ModR/M) + if (X86II::isX86_64ExtendedReg(MI.getOperand(CurOp).getReg())) VEX_B = 0x0; - if (X86II::isX86_64ExtendedReg(MI.getOperand(1).getReg())) + CurOp++; + + if (HasVEX_4V) + VEX_4V = getVEXRegisterEncoding(MI, CurOp++); + + if (X86II::isX86_64ExtendedReg(MI.getOperand(CurOp).getReg())) VEX_R = 0x0; break; case X86II::MRM0r: case X86II::MRM1r: @@ -1279,9 +1290,14 @@ void Emitter<CodeEmitter>::emitInstruction(MachineInstr &MI, case X86II::MRMDestReg: { MCE.emitByte(BaseOpcode); + + unsigned SrcRegNum = CurOp+1; + if (HasVEX_4V) // Skip 1st src (which is encoded in VEX_VVVV) + SrcRegNum++; + emitRegModRMByte(MI.getOperand(CurOp).getReg(), - getX86RegNum(MI.getOperand(CurOp+1).getReg())); - CurOp += 2; + getX86RegNum(MI.getOperand(SrcRegNum).getReg())); + CurOp = SrcRegNum + 1; break; } case X86II::MRMDestMem: { diff --git a/lib/Target/X86/X86FastISel.cpp b/lib/Target/X86/X86FastISel.cpp index b5c3270..85155f5 100644 --- a/lib/Target/X86/X86FastISel.cpp +++ b/lib/Target/X86/X86FastISel.cpp @@ -1526,6 +1526,9 @@ bool X86FastISel::FastLowerArguments() { if (!FuncInfo.CanLowerReturn) return false; + if (Subtarget->isTargetWindows()) + return false; + const Function *F = FuncInfo.Fn; if (F->isVarArg()) return false; diff --git a/lib/Target/X86/X86FrameLowering.cpp b/lib/Target/X86/X86FrameLowering.cpp index a05cf5c..54cbd40 100644 --- a/lib/Target/X86/X86FrameLowering.cpp +++ b/lib/Target/X86/X86FrameLowering.cpp @@ -1386,7 +1386,6 @@ HasNestArgument(const MachineFunction *MF) { return false; } - /// GetScratchRegister - Get a temp register for performing work in the /// segmented stack and the Erlang/HiPE stack prologue. Depending on platform /// and the properties of the function either one or two registers will be @@ -1612,22 +1611,21 @@ X86FrameLowering::adjustForSegmentedStacks(MachineFunction &MF) const { #endif } -// Erlang programs may need a special prologue to handle the stack size they -// might need at runtime. That is because Erlang/OTP does not implement a C -// stack but uses a custom implementation of hybrid stack/heap -// architecture. (for more information see Eric Stenman's Ph.D. thesis: -// http://publications.uu.se/uu/fulltext/nbn_se_uu_diva-2688.pdf) -// -// -// CheckStack: -// temp0 = sp - MaxStack -// if( temp0 < SP_LIMIT(P) ) goto IncStack else goto OldStart -// OldStart: -// ... -// IncStack: -// call inc_stack # doubles the stack space -// temp0 = sp - MaxStack -// if( temp0 < SP_LIMIT(P) ) goto IncStack else goto OldStart +/// Erlang programs may need a special prologue to handle the stack size they +/// might need at runtime. That is because Erlang/OTP does not implement a C +/// stack but uses a custom implementation of hybrid stack/heap architecture. +/// (for more information see Eric Stenman's Ph.D. thesis: +/// http://publications.uu.se/uu/fulltext/nbn_se_uu_diva-2688.pdf) +/// +/// CheckStack: +/// temp0 = sp - MaxStack +/// if( temp0 < SP_LIMIT(P) ) goto IncStack else goto OldStart +/// OldStart: +/// ... +/// IncStack: +/// call inc_stack # doubles the stack space +/// temp0 = sp - MaxStack +/// if( temp0 < SP_LIMIT(P) ) goto IncStack else goto OldStart void X86FrameLowering::adjustForHiPEPrologue(MachineFunction &MF) const { const X86InstrInfo &TII = *TM.getInstrInfo(); MachineFrameInfo *MFI = MF.getFrameInfo(); diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 1c3b9ae..e6858bc 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -85,6 +85,11 @@ static SDValue Extract128BitVector(SDValue Vec, unsigned IdxVal, unsigned NormalizedIdxVal = (((IdxVal * ElVT.getSizeInBits()) / 128) * ElemsPerChunk); + // If the input is a buildvector just emit a smaller one. + if (Vec.getOpcode() == ISD::BUILD_VECTOR) + return DAG.getNode(ISD::BUILD_VECTOR, dl, ResultVT, + Vec->op_begin()+NormalizedIdxVal, ElemsPerChunk); + SDValue VecIdx = DAG.getIntPtrConstant(NormalizedIdxVal); SDValue Result = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, ResultVT, Vec, VecIdx); @@ -181,9 +186,12 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) setSchedulingPreference(Sched::RegPressure); setStackPointerRegisterToSaveRestore(RegInfo->getStackRegister()); - // Bypass i32 with i8 on Atom when compiling with O2 - if (Subtarget->hasSlowDivide() && TM.getOptLevel() >= CodeGenOpt::Default) + // Bypass expensive divides on Atom when compiling with O2 + if (Subtarget->hasSlowDivide() && TM.getOptLevel() >= CodeGenOpt::Default) { addBypassSlowDiv(32, 8); + if (Subtarget->is64Bit()) + addBypassSlowDiv(64, 16); + } if (Subtarget->isTargetWindows() && !Subtarget->isTargetCygMing()) { // Setup Windows compiler runtime calls. @@ -368,7 +376,13 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) setOperationAction(ISD::BR_JT , MVT::Other, Expand); setOperationAction(ISD::BRCOND , MVT::Other, Custom); - setOperationAction(ISD::BR_CC , MVT::Other, Expand); + setOperationAction(ISD::BR_CC , MVT::f32, Expand); + setOperationAction(ISD::BR_CC , MVT::f64, Expand); + setOperationAction(ISD::BR_CC , MVT::f80, Expand); + setOperationAction(ISD::BR_CC , MVT::i8, Expand); + setOperationAction(ISD::BR_CC , MVT::i16, Expand); + setOperationAction(ISD::BR_CC , MVT::i32, Expand); + setOperationAction(ISD::BR_CC , MVT::i64, Expand); setOperationAction(ISD::SELECT_CC , MVT::Other, Expand); if (Subtarget->is64Bit()) setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i32, Legal); @@ -4956,7 +4970,7 @@ static SDValue getVShift(bool isLeft, EVT VT, SDValue SrcOp, return DAG.getNode(ISD::BITCAST, dl, VT, DAG.getNode(Opc, dl, ShVT, SrcOp, DAG.getConstant(NumBits, - TLI.getShiftAmountTy(SrcOp.getValueType())))); + TLI.getScalarShiftAmountTy(SrcOp.getValueType())))); } SDValue @@ -7820,7 +7834,7 @@ X86TargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const { Chain.getValue(1)); } - if (Subtarget->isTargetWindows()) { + if (Subtarget->isTargetWindows() || Subtarget->isTargetMingw()) { // Just use the implicit TLS architecture // Need to generate someting similar to: // mov rdx, qword [gs:abs 58H]; Load pointer to ThreadLocalStorage @@ -7840,18 +7854,19 @@ X86TargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const { SDValue Chain = DAG.getEntryNode(); // Get the Thread Pointer, which is %fs:__tls_array (32-bit) or - // %gs:0x58 (64-bit). + // %gs:0x58 (64-bit). On MinGW, __tls_array is not available, so directly + // use its literal value of 0x2C. Value *Ptr = Constant::getNullValue(Subtarget->is64Bit() ? Type::getInt8PtrTy(*DAG.getContext(), 256) : Type::getInt32PtrTy(*DAG.getContext(), 257)); - SDValue ThreadPointer = DAG.getLoad(getPointerTy(), dl, Chain, - Subtarget->is64Bit() - ? DAG.getIntPtrConstant(0x58) - : DAG.getExternalSymbol("_tls_array", - getPointerTy()), + SDValue TlsArray = Subtarget->is64Bit() ? DAG.getIntPtrConstant(0x58) : + (Subtarget->isTargetMingw() ? DAG.getIntPtrConstant(0x2C) : + DAG.getExternalSymbol("_tls_array", getPointerTy())); + + SDValue ThreadPointer = DAG.getLoad(getPointerTy(), dl, Chain, TlsArray, MachinePointerInfo(Ptr), false, false, false, 0); @@ -12248,7 +12263,8 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N, return; } case ISD::UINT_TO_FP: { - if (N->getOperand(0).getValueType() != MVT::v2i32 && + assert(Subtarget->hasSSE2() && "Requires at least SSE2!"); + if (N->getOperand(0).getValueType() != MVT::v2i32 || N->getValueType(0) != MVT::v2f32) return; SDValue ZExtIn = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::v2i64, @@ -12890,13 +12906,16 @@ static unsigned getPseudoCMOVOpc(EVT VT) { // to // // ... -// EAX = LOAD MI.addr +// t1 = LOAD MI.addr // loop: -// t1 = OP MI.val, EAX -// LCMPXCHG [MI.addr], t1, [EAX is implicitly used & defined] +// t4 = phi(t1, t3 / loop) +// t2 = OP MI.val, t4 +// EAX = t4 +// LCMPXCHG [MI.addr], t2, [EAX is implicitly used & defined] +// t3 = EAX // JNE loop // sink: -// dst = EAX +// dst = t3 // ... MachineBasicBlock * X86TargetLowering::EmitAtomicLoadArith(MachineInstr *MI, @@ -12933,7 +12952,11 @@ X86TargetLowering::EmitAtomicLoadArith(MachineInstr *MI, const TargetRegisterClass *RC = MRI.getRegClass(DstReg); MVT::SimpleValueType VT = *RC->vt_begin(); - unsigned AccPhyReg = getX86SubSuperRegister(X86::EAX, VT); + unsigned t1 = MRI.createVirtualRegister(RC); + unsigned t2 = MRI.createVirtualRegister(RC); + unsigned t3 = MRI.createVirtualRegister(RC); + unsigned t4 = MRI.createVirtualRegister(RC); + unsigned PhyReg = getX86SubSuperRegister(X86::EAX, VT); unsigned LCMPXCHGOpc = getCmpXChgOpcode(VT); unsigned LOADOpc = getLoadOpcode(VT); @@ -12941,12 +12964,16 @@ X86TargetLowering::EmitAtomicLoadArith(MachineInstr *MI, // For the atomic load-arith operator, we generate // // thisMBB: - // EAX = LOAD [MI.addr] + // t1 = LOAD [MI.addr] // mainMBB: + // t4 = phi(t1 / thisMBB, t3 / mainMBB) // t1 = OP MI.val, EAX + // EAX = t4 // LCMPXCHG [MI.addr], t1, [EAX is implicitly used & defined] + // t3 = EAX // JNE mainMBB // sinkMBB: + // dst = t3 MachineBasicBlock *thisMBB = MBB; MachineBasicBlock *mainMBB = MF->CreateMachineBasicBlock(BB); @@ -12962,23 +12989,34 @@ X86TargetLowering::EmitAtomicLoadArith(MachineInstr *MI, sinkMBB->transferSuccessorsAndUpdatePHIs(MBB); // thisMBB: - MIB = BuildMI(thisMBB, DL, TII->get(LOADOpc), AccPhyReg); - for (unsigned i = 0; i < X86::AddrNumOperands; ++i) - MIB.addOperand(MI->getOperand(MemOpndSlot + i)); - MIB.setMemRefs(MMOBegin, MMOEnd); + MIB = BuildMI(thisMBB, DL, TII->get(LOADOpc), t1); + for (unsigned i = 0; i < X86::AddrNumOperands; ++i) { + MachineOperand NewMO = MI->getOperand(MemOpndSlot + i); + if (NewMO.isReg()) + NewMO.setIsKill(false); + MIB.addOperand(NewMO); + } + for (MachineInstr::mmo_iterator MMOI = MMOBegin; MMOI != MMOEnd; ++MMOI) { + unsigned flags = (*MMOI)->getFlags(); + flags = (flags & ~MachineMemOperand::MOStore) | MachineMemOperand::MOLoad; + MachineMemOperand *MMO = + MF->getMachineMemOperand((*MMOI)->getPointerInfo(), flags, + (*MMOI)->getSize(), + (*MMOI)->getBaseAlignment(), + (*MMOI)->getTBAAInfo(), + (*MMOI)->getRanges()); + MIB.addMemOperand(MMO); + } thisMBB->addSuccessor(mainMBB); // mainMBB: MachineBasicBlock *origMainMBB = mainMBB; - mainMBB->addLiveIn(AccPhyReg); - // Copy AccPhyReg as it is used more than once. - unsigned AccReg = MRI.createVirtualRegister(RC); - BuildMI(mainMBB, DL, TII->get(TargetOpcode::COPY), AccReg) - .addReg(AccPhyReg); + // Add a PHI. + MachineInstr *Phi = BuildMI(mainMBB, DL, TII->get(X86::PHI), t4) + .addReg(t1).addMBB(thisMBB).addReg(t3).addMBB(mainMBB); - unsigned t1 = MRI.createVirtualRegister(RC); unsigned Opc = MI->getOpcode(); switch (Opc) { default: @@ -12996,20 +13034,20 @@ X86TargetLowering::EmitAtomicLoadArith(MachineInstr *MI, case X86::ATOMXOR32: case X86::ATOMXOR64: { unsigned ARITHOpc = getNonAtomicOpcode(Opc); - BuildMI(mainMBB, DL, TII->get(ARITHOpc), t1).addReg(SrcReg) - .addReg(AccReg); + BuildMI(mainMBB, DL, TII->get(ARITHOpc), t2).addReg(SrcReg) + .addReg(t4); break; } case X86::ATOMNAND8: case X86::ATOMNAND16: case X86::ATOMNAND32: case X86::ATOMNAND64: { - unsigned t2 = MRI.createVirtualRegister(RC); + unsigned Tmp = MRI.createVirtualRegister(RC); unsigned NOTOpc; unsigned ANDOpc = getNonAtomicOpcodeWithExtraOpc(Opc, NOTOpc); - BuildMI(mainMBB, DL, TII->get(ANDOpc), t2).addReg(SrcReg) - .addReg(AccReg); - BuildMI(mainMBB, DL, TII->get(NOTOpc), t1).addReg(t2); + BuildMI(mainMBB, DL, TII->get(ANDOpc), Tmp).addReg(SrcReg) + .addReg(t4); + BuildMI(mainMBB, DL, TII->get(NOTOpc), t2).addReg(Tmp); break; } case X86::ATOMMAX8: @@ -13033,20 +13071,22 @@ X86TargetLowering::EmitAtomicLoadArith(MachineInstr *MI, BuildMI(mainMBB, DL, TII->get(CMPOpc)) .addReg(SrcReg) - .addReg(AccReg); + .addReg(t4); if (Subtarget->hasCMov()) { if (VT != MVT::i8) { // Native support - BuildMI(mainMBB, DL, TII->get(CMOVOpc), t1) + BuildMI(mainMBB, DL, TII->get(CMOVOpc), t2) .addReg(SrcReg) - .addReg(AccReg); + .addReg(t4); } else { // Promote i8 to i32 to use CMOV32 - const TargetRegisterClass *RC32 = getRegClassFor(MVT::i32); + const TargetRegisterInfo* TRI = getTargetMachine().getRegisterInfo(); + const TargetRegisterClass *RC32 = + TRI->getSubClassWithSubReg(getRegClassFor(MVT::i32), X86::sub_8bit); unsigned SrcReg32 = MRI.createVirtualRegister(RC32); unsigned AccReg32 = MRI.createVirtualRegister(RC32); - unsigned t2 = MRI.createVirtualRegister(RC32); + unsigned Tmp = MRI.createVirtualRegister(RC32); unsigned Undef = MRI.createVirtualRegister(RC32); BuildMI(mainMBB, DL, TII->get(TargetOpcode::IMPLICIT_DEF), Undef); @@ -13057,15 +13097,15 @@ X86TargetLowering::EmitAtomicLoadArith(MachineInstr *MI, .addImm(X86::sub_8bit); BuildMI(mainMBB, DL, TII->get(TargetOpcode::INSERT_SUBREG), AccReg32) .addReg(Undef) - .addReg(AccReg) + .addReg(t4) .addImm(X86::sub_8bit); - BuildMI(mainMBB, DL, TII->get(CMOVOpc), t2) + BuildMI(mainMBB, DL, TII->get(CMOVOpc), Tmp) .addReg(SrcReg32) .addReg(AccReg32); - BuildMI(mainMBB, DL, TII->get(TargetOpcode::COPY), t1) - .addReg(t2, 0, X86::sub_8bit); + BuildMI(mainMBB, DL, TII->get(TargetOpcode::COPY), t2) + .addReg(Tmp, 0, X86::sub_8bit); } } else { // Use pseudo select and lower them. @@ -13074,36 +13114,47 @@ X86TargetLowering::EmitAtomicLoadArith(MachineInstr *MI, unsigned SelOpc = getPseudoCMOVOpc(VT); X86::CondCode CC = X86::getCondFromCMovOpc(CMOVOpc); assert(CC != X86::COND_INVALID && "Invalid atomic-load-op transformation!"); - MIB = BuildMI(mainMBB, DL, TII->get(SelOpc), t1) - .addReg(SrcReg).addReg(AccReg) + MIB = BuildMI(mainMBB, DL, TII->get(SelOpc), t2) + .addReg(SrcReg).addReg(t4) .addImm(CC); mainMBB = EmitLoweredSelect(MIB, mainMBB); + // Replace the original PHI node as mainMBB is changed after CMOV + // lowering. + BuildMI(*origMainMBB, Phi, DL, TII->get(X86::PHI), t4) + .addReg(t1).addMBB(thisMBB).addReg(t3).addMBB(mainMBB); + Phi->eraseFromParent(); } break; } } - // Copy AccPhyReg back from virtual register. - BuildMI(mainMBB, DL, TII->get(TargetOpcode::COPY), AccPhyReg) - .addReg(AccReg); + // Copy PhyReg back from virtual register. + BuildMI(mainMBB, DL, TII->get(TargetOpcode::COPY), PhyReg) + .addReg(t4); MIB = BuildMI(mainMBB, DL, TII->get(LCMPXCHGOpc)); - for (unsigned i = 0; i < X86::AddrNumOperands; ++i) - MIB.addOperand(MI->getOperand(MemOpndSlot + i)); - MIB.addReg(t1); + for (unsigned i = 0; i < X86::AddrNumOperands; ++i) { + MachineOperand NewMO = MI->getOperand(MemOpndSlot + i); + if (NewMO.isReg()) + NewMO.setIsKill(false); + MIB.addOperand(NewMO); + } + MIB.addReg(t2); MIB.setMemRefs(MMOBegin, MMOEnd); + // Copy PhyReg back to virtual register. + BuildMI(mainMBB, DL, TII->get(TargetOpcode::COPY), t3) + .addReg(PhyReg); + BuildMI(mainMBB, DL, TII->get(X86::JNE_4)).addMBB(origMainMBB); mainMBB->addSuccessor(origMainMBB); mainMBB->addSuccessor(sinkMBB); // sinkMBB: - sinkMBB->addLiveIn(AccPhyReg); - BuildMI(*sinkMBB, sinkMBB->begin(), DL, TII->get(TargetOpcode::COPY), DstReg) - .addReg(AccPhyReg); + .addReg(t3); MI->eraseFromParent(); return sinkMBB; @@ -13120,15 +13171,24 @@ X86TargetLowering::EmitAtomicLoadArith(MachineInstr *MI, // to // // ... -// EAX = LOAD [MI.addr + 0] -// EDX = LOAD [MI.addr + 4] +// t1L = LOAD [MI.addr + 0] +// t1H = LOAD [MI.addr + 4] // loop: -// EBX = OP MI.val.lo, EAX -// ECX = OP MI.val.hi, EDX +// t4L = phi(t1L, t3L / loop) +// t4H = phi(t1H, t3H / loop) +// t2L = OP MI.val.lo, t4L +// t2H = OP MI.val.hi, t4H +// EAX = t4L +// EDX = t4H +// EBX = t2L +// ECX = t2H // LCMPXCHG8B [MI.addr], [ECX:EBX & EDX:EAX are implicitly used and EDX:EAX is implicitly defined] +// t3L = EAX +// t3H = EDX // JNE loop // sink: -// dst = EDX:EAX +// dstL = t3L +// dstH = t3H // ... MachineBasicBlock * X86TargetLowering::EmitAtomicLoadArith6432(MachineInstr *MI, @@ -13169,20 +13229,37 @@ X86TargetLowering::EmitAtomicLoadArith6432(MachineInstr *MI, const TargetRegisterClass *RC = &X86::GR32RegClass; const TargetRegisterClass *RC8 = &X86::GR8RegClass; + unsigned t1L = MRI.createVirtualRegister(RC); + unsigned t1H = MRI.createVirtualRegister(RC); + unsigned t2L = MRI.createVirtualRegister(RC); + unsigned t2H = MRI.createVirtualRegister(RC); + unsigned t3L = MRI.createVirtualRegister(RC); + unsigned t3H = MRI.createVirtualRegister(RC); + unsigned t4L = MRI.createVirtualRegister(RC); + unsigned t4H = MRI.createVirtualRegister(RC); + unsigned LCMPXCHGOpc = X86::LCMPXCHG8B; unsigned LOADOpc = X86::MOV32rm; // For the atomic load-arith operator, we generate // // thisMBB: - // EAX = LOAD [MI.addr + 0] - // EDX = LOAD [MI.addr + 4] + // t1L = LOAD [MI.addr + 0] + // t1H = LOAD [MI.addr + 4] // mainMBB: - // EBX = OP MI.vallo, EAX - // ECX = OP MI.valhi, EDX + // t4L = phi(t1L / thisMBB, t3L / mainMBB) + // t4H = phi(t1H / thisMBB, t3H / mainMBB) + // t2L = OP MI.val.lo, t4L + // t2H = OP MI.val.hi, t4H + // EBX = t2L + // ECX = t2H // LCMPXCHG8B [MI.addr], [ECX:EBX & EDX:EAX are implicitly used and EDX:EAX is implicitly defined] - // JNE mainMBB + // t3L = EAX + // t3H = EDX + // JNE loop // sinkMBB: + // dstL = t3L + // dstH = t3H MachineBasicBlock *thisMBB = MBB; MachineBasicBlock *mainMBB = MF->CreateMachineBasicBlock(BB); @@ -13199,35 +13276,50 @@ X86TargetLowering::EmitAtomicLoadArith6432(MachineInstr *MI, // thisMBB: // Lo - MIB = BuildMI(thisMBB, DL, TII->get(LOADOpc), X86::EAX); - for (unsigned i = 0; i < X86::AddrNumOperands; ++i) - MIB.addOperand(MI->getOperand(MemOpndSlot + i)); - MIB.setMemRefs(MMOBegin, MMOEnd); + MIB = BuildMI(thisMBB, DL, TII->get(LOADOpc), t1L); + for (unsigned i = 0; i < X86::AddrNumOperands; ++i) { + MachineOperand NewMO = MI->getOperand(MemOpndSlot + i); + if (NewMO.isReg()) + NewMO.setIsKill(false); + MIB.addOperand(NewMO); + } + for (MachineInstr::mmo_iterator MMOI = MMOBegin; MMOI != MMOEnd; ++MMOI) { + unsigned flags = (*MMOI)->getFlags(); + flags = (flags & ~MachineMemOperand::MOStore) | MachineMemOperand::MOLoad; + MachineMemOperand *MMO = + MF->getMachineMemOperand((*MMOI)->getPointerInfo(), flags, + (*MMOI)->getSize(), + (*MMOI)->getBaseAlignment(), + (*MMOI)->getTBAAInfo(), + (*MMOI)->getRanges()); + MIB.addMemOperand(MMO); + }; + MachineInstr *LowMI = MIB; + // Hi - MIB = BuildMI(thisMBB, DL, TII->get(LOADOpc), X86::EDX); + MIB = BuildMI(thisMBB, DL, TII->get(LOADOpc), t1H); for (unsigned i = 0; i < X86::AddrNumOperands; ++i) { - if (i == X86::AddrDisp) + if (i == X86::AddrDisp) { MIB.addDisp(MI->getOperand(MemOpndSlot + i), 4); // 4 == sizeof(i32) - else - MIB.addOperand(MI->getOperand(MemOpndSlot + i)); + } else { + MachineOperand NewMO = MI->getOperand(MemOpndSlot + i); + if (NewMO.isReg()) + NewMO.setIsKill(false); + MIB.addOperand(NewMO); + } } - MIB.setMemRefs(MMOBegin, MMOEnd); + MIB.setMemRefs(LowMI->memoperands_begin(), LowMI->memoperands_end()); thisMBB->addSuccessor(mainMBB); // mainMBB: MachineBasicBlock *origMainMBB = mainMBB; - mainMBB->addLiveIn(X86::EAX); - mainMBB->addLiveIn(X86::EDX); - - // Copy EDX:EAX as they are used more than once. - unsigned LoReg = MRI.createVirtualRegister(RC); - unsigned HiReg = MRI.createVirtualRegister(RC); - BuildMI(mainMBB, DL, TII->get(TargetOpcode::COPY), LoReg).addReg(X86::EAX); - BuildMI(mainMBB, DL, TII->get(TargetOpcode::COPY), HiReg).addReg(X86::EDX); - unsigned t1L = MRI.createVirtualRegister(RC); - unsigned t1H = MRI.createVirtualRegister(RC); + // Add PHIs. + MachineInstr *PhiL = BuildMI(mainMBB, DL, TII->get(X86::PHI), t4L) + .addReg(t1L).addMBB(thisMBB).addReg(t3L).addMBB(mainMBB); + MachineInstr *PhiH = BuildMI(mainMBB, DL, TII->get(X86::PHI), t4H) + .addReg(t1H).addMBB(thisMBB).addReg(t3H).addMBB(mainMBB); unsigned Opc = MI->getOpcode(); switch (Opc) { @@ -13240,19 +13332,23 @@ X86TargetLowering::EmitAtomicLoadArith6432(MachineInstr *MI, case X86::ATOMSUB6432: { unsigned HiOpc; unsigned LoOpc = getNonAtomic6432Opcode(Opc, HiOpc); - BuildMI(mainMBB, DL, TII->get(LoOpc), t1L).addReg(LoReg).addReg(SrcLoReg); - BuildMI(mainMBB, DL, TII->get(HiOpc), t1H).addReg(HiReg).addReg(SrcHiReg); + BuildMI(mainMBB, DL, TII->get(LoOpc), t2L).addReg(t4L) + .addReg(SrcLoReg); + BuildMI(mainMBB, DL, TII->get(HiOpc), t2H).addReg(t4H) + .addReg(SrcHiReg); break; } case X86::ATOMNAND6432: { unsigned HiOpc, NOTOpc; unsigned LoOpc = getNonAtomic6432OpcodeWithExtraOpc(Opc, HiOpc, NOTOpc); - unsigned t2L = MRI.createVirtualRegister(RC); - unsigned t2H = MRI.createVirtualRegister(RC); - BuildMI(mainMBB, DL, TII->get(LoOpc), t2L).addReg(SrcLoReg).addReg(LoReg); - BuildMI(mainMBB, DL, TII->get(HiOpc), t2H).addReg(SrcHiReg).addReg(HiReg); - BuildMI(mainMBB, DL, TII->get(NOTOpc), t1L).addReg(t2L); - BuildMI(mainMBB, DL, TII->get(NOTOpc), t1H).addReg(t2H); + unsigned TmpL = MRI.createVirtualRegister(RC); + unsigned TmpH = MRI.createVirtualRegister(RC); + BuildMI(mainMBB, DL, TII->get(LoOpc), TmpL).addReg(SrcLoReg) + .addReg(t4L); + BuildMI(mainMBB, DL, TII->get(HiOpc), TmpH).addReg(SrcHiReg) + .addReg(t4H); + BuildMI(mainMBB, DL, TII->get(NOTOpc), t2L).addReg(TmpL); + BuildMI(mainMBB, DL, TII->get(NOTOpc), t2H).addReg(TmpH); break; } case X86::ATOMMAX6432: @@ -13268,12 +13364,12 @@ X86TargetLowering::EmitAtomicLoadArith6432(MachineInstr *MI, unsigned cc = MRI.createVirtualRegister(RC); // cl := cmp src_lo, lo BuildMI(mainMBB, DL, TII->get(X86::CMP32rr)) - .addReg(SrcLoReg).addReg(LoReg); + .addReg(SrcLoReg).addReg(t4L); BuildMI(mainMBB, DL, TII->get(LoOpc), cL); BuildMI(mainMBB, DL, TII->get(X86::MOVZX32rr8), cL32).addReg(cL); // ch := cmp src_hi, hi BuildMI(mainMBB, DL, TII->get(X86::CMP32rr)) - .addReg(SrcHiReg).addReg(HiReg); + .addReg(SrcHiReg).addReg(t4H); BuildMI(mainMBB, DL, TII->get(HiOpc), cH); BuildMI(mainMBB, DL, TII->get(X86::MOVZX32rr8), cH32).addReg(cH); // cc := if (src_hi == hi) ? cl : ch; @@ -13288,58 +13384,74 @@ X86TargetLowering::EmitAtomicLoadArith6432(MachineInstr *MI, } BuildMI(mainMBB, DL, TII->get(X86::TEST32rr)).addReg(cc).addReg(cc); if (Subtarget->hasCMov()) { - BuildMI(mainMBB, DL, TII->get(X86::CMOVNE32rr), t1L) - .addReg(SrcLoReg).addReg(LoReg); - BuildMI(mainMBB, DL, TII->get(X86::CMOVNE32rr), t1H) - .addReg(SrcHiReg).addReg(HiReg); + BuildMI(mainMBB, DL, TII->get(X86::CMOVNE32rr), t2L) + .addReg(SrcLoReg).addReg(t4L); + BuildMI(mainMBB, DL, TII->get(X86::CMOVNE32rr), t2H) + .addReg(SrcHiReg).addReg(t4H); } else { - MIB = BuildMI(mainMBB, DL, TII->get(X86::CMOV_GR32), t1L) - .addReg(SrcLoReg).addReg(LoReg) + MIB = BuildMI(mainMBB, DL, TII->get(X86::CMOV_GR32), t2L) + .addReg(SrcLoReg).addReg(t4L) .addImm(X86::COND_NE); mainMBB = EmitLoweredSelect(MIB, mainMBB); - MIB = BuildMI(mainMBB, DL, TII->get(X86::CMOV_GR32), t1H) - .addReg(SrcHiReg).addReg(HiReg) + // As the lowered CMOV won't clobber EFLAGS, we could reuse it for the + // 2nd CMOV lowering. + mainMBB->addLiveIn(X86::EFLAGS); + MIB = BuildMI(mainMBB, DL, TII->get(X86::CMOV_GR32), t2H) + .addReg(SrcHiReg).addReg(t4H) .addImm(X86::COND_NE); mainMBB = EmitLoweredSelect(MIB, mainMBB); + // Replace the original PHI node as mainMBB is changed after CMOV + // lowering. + BuildMI(*origMainMBB, PhiL, DL, TII->get(X86::PHI), t4L) + .addReg(t1L).addMBB(thisMBB).addReg(t3L).addMBB(mainMBB); + BuildMI(*origMainMBB, PhiH, DL, TII->get(X86::PHI), t4H) + .addReg(t1H).addMBB(thisMBB).addReg(t3H).addMBB(mainMBB); + PhiL->eraseFromParent(); + PhiH->eraseFromParent(); } break; } case X86::ATOMSWAP6432: { unsigned HiOpc; unsigned LoOpc = getNonAtomic6432Opcode(Opc, HiOpc); - BuildMI(mainMBB, DL, TII->get(LoOpc), t1L).addReg(SrcLoReg); - BuildMI(mainMBB, DL, TII->get(HiOpc), t1H).addReg(SrcHiReg); + BuildMI(mainMBB, DL, TII->get(LoOpc), t2L).addReg(SrcLoReg); + BuildMI(mainMBB, DL, TII->get(HiOpc), t2H).addReg(SrcHiReg); break; } } // Copy EDX:EAX back from HiReg:LoReg - BuildMI(mainMBB, DL, TII->get(TargetOpcode::COPY), X86::EAX).addReg(LoReg); - BuildMI(mainMBB, DL, TII->get(TargetOpcode::COPY), X86::EDX).addReg(HiReg); + BuildMI(mainMBB, DL, TII->get(TargetOpcode::COPY), X86::EAX).addReg(t4L); + BuildMI(mainMBB, DL, TII->get(TargetOpcode::COPY), X86::EDX).addReg(t4H); // Copy ECX:EBX from t1H:t1L - BuildMI(mainMBB, DL, TII->get(TargetOpcode::COPY), X86::EBX).addReg(t1L); - BuildMI(mainMBB, DL, TII->get(TargetOpcode::COPY), X86::ECX).addReg(t1H); + BuildMI(mainMBB, DL, TII->get(TargetOpcode::COPY), X86::EBX).addReg(t2L); + BuildMI(mainMBB, DL, TII->get(TargetOpcode::COPY), X86::ECX).addReg(t2H); MIB = BuildMI(mainMBB, DL, TII->get(LCMPXCHGOpc)); - for (unsigned i = 0; i < X86::AddrNumOperands; ++i) - MIB.addOperand(MI->getOperand(MemOpndSlot + i)); + for (unsigned i = 0; i < X86::AddrNumOperands; ++i) { + MachineOperand NewMO = MI->getOperand(MemOpndSlot + i); + if (NewMO.isReg()) + NewMO.setIsKill(false); + MIB.addOperand(NewMO); + } MIB.setMemRefs(MMOBegin, MMOEnd); + // Copy EDX:EAX back to t3H:t3L + BuildMI(mainMBB, DL, TII->get(TargetOpcode::COPY), t3L).addReg(X86::EAX); + BuildMI(mainMBB, DL, TII->get(TargetOpcode::COPY), t3H).addReg(X86::EDX); + BuildMI(mainMBB, DL, TII->get(X86::JNE_4)).addMBB(origMainMBB); mainMBB->addSuccessor(origMainMBB); mainMBB->addSuccessor(sinkMBB); // sinkMBB: - sinkMBB->addLiveIn(X86::EAX); - sinkMBB->addLiveIn(X86::EDX); - BuildMI(*sinkMBB, sinkMBB->begin(), DL, TII->get(TargetOpcode::COPY), DstLoReg) - .addReg(X86::EAX); + .addReg(t3L); BuildMI(*sinkMBB, sinkMBB->begin(), DL, TII->get(TargetOpcode::COPY), DstHiReg) - .addReg(X86::EDX); + .addReg(t3H); MI->eraseFromParent(); return sinkMBB; diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h index 958ceb0..da1dad0 100644 --- a/lib/Target/X86/X86ISelLowering.h +++ b/lib/Target/X86/X86ISelLowering.h @@ -471,7 +471,7 @@ namespace llvm { virtual unsigned getJumpTableEncoding() const; - virtual MVT getShiftAmountTy(EVT LHSTy) const { return MVT::i8; } + virtual MVT getScalarShiftAmountTy(EVT LHSTy) const { return MVT::i8; } virtual const MCExpr * LowerCustomJumpTableEntry(const MachineJumpTableInfo *MJTI, diff --git a/lib/Target/X86/X86InstrArithmetic.td b/lib/Target/X86/X86InstrArithmetic.td index d86a406..f406416 100644 --- a/lib/Target/X86/X86InstrArithmetic.td +++ b/lib/Target/X86/X86InstrArithmetic.td @@ -14,7 +14,7 @@ //===----------------------------------------------------------------------===// // LEA - Load Effective Address - +let SchedRW = [WriteLEA] in { let neverHasSideEffects = 1 in def LEA16r : I<0x8D, MRMSrcMem, (outs GR16:$dst), (ins i32mem:$src), @@ -36,41 +36,52 @@ let isReMaterializable = 1 in def LEA64r : RI<0x8D, MRMSrcMem, (outs GR64:$dst), (ins lea64mem:$src), "lea{q}\t{$src|$dst}, {$dst|$src}", [(set GR64:$dst, lea64addr:$src)], IIC_LEA>; - - +} // SchedRW //===----------------------------------------------------------------------===// // Fixed-Register Multiplication and Division Instructions. // +// SchedModel info for instruction that loads one value and gets the second +// (and possibly third) value from a register. +// This is used for instructions that put the memory operands before other +// uses. +class SchedLoadReg<SchedWrite SW> : Sched<[SW, + // Memory operand. + ReadDefault, ReadDefault, ReadDefault, ReadDefault, ReadDefault, + // Register reads (implicit or explicit). + ReadAfterLd, ReadAfterLd]>; + // Extra precision multiplication // AL is really implied by AX, but the registers in Defs must match the // SDNode results (i8, i32). +// AL,AH = AL*GR8 let Defs = [AL,EFLAGS,AX], Uses = [AL] in def MUL8r : I<0xF6, MRM4r, (outs), (ins GR8:$src), "mul{b}\t$src", // FIXME: Used for 8-bit mul, ignore result upper 8 bits. // This probably ought to be moved to a def : Pat<> if the // syntax can be accepted. [(set AL, (mul AL, GR8:$src)), - (implicit EFLAGS)], IIC_MUL8>; // AL,AH = AL*GR8 - + (implicit EFLAGS)], IIC_MUL8>, Sched<[WriteIMul]>; +// AX,DX = AX*GR16 let Defs = [AX,DX,EFLAGS], Uses = [AX], neverHasSideEffects = 1 in def MUL16r : I<0xF7, MRM4r, (outs), (ins GR16:$src), "mul{w}\t$src", - [], IIC_MUL16_REG>, OpSize; // AX,DX = AX*GR16 - + [], IIC_MUL16_REG>, OpSize, Sched<[WriteIMul]>; +// EAX,EDX = EAX*GR32 let Defs = [EAX,EDX,EFLAGS], Uses = [EAX], neverHasSideEffects = 1 in def MUL32r : I<0xF7, MRM4r, (outs), (ins GR32:$src), - "mul{l}\t$src", // EAX,EDX = EAX*GR32 + "mul{l}\t$src", [/*(set EAX, EDX, EFLAGS, (X86umul_flag EAX, GR32:$src))*/], - IIC_MUL32_REG>; + IIC_MUL32_REG>, Sched<[WriteIMul]>; +// RAX,RDX = RAX*GR64 let Defs = [RAX,RDX,EFLAGS], Uses = [RAX], neverHasSideEffects = 1 in def MUL64r : RI<0xF7, MRM4r, (outs), (ins GR64:$src), - "mul{q}\t$src", // RAX,RDX = RAX*GR64 + "mul{q}\t$src", [/*(set RAX, RDX, EFLAGS, (X86umul_flag RAX, GR64:$src))*/], - IIC_MUL64>; - + IIC_MUL64>, Sched<[WriteIMul]>; +// AL,AH = AL*[mem8] let Defs = [AL,EFLAGS,AX], Uses = [AL] in def MUL8m : I<0xF6, MRM4m, (outs), (ins i8mem :$src), "mul{b}\t$src", @@ -78,51 +89,60 @@ def MUL8m : I<0xF6, MRM4m, (outs), (ins i8mem :$src), // This probably ought to be moved to a def : Pat<> if the // syntax can be accepted. [(set AL, (mul AL, (loadi8 addr:$src))), - (implicit EFLAGS)], IIC_MUL8>; // AL,AH = AL*[mem8] - + (implicit EFLAGS)], IIC_MUL8>, SchedLoadReg<WriteIMulLd>; +// AX,DX = AX*[mem16] let mayLoad = 1, neverHasSideEffects = 1 in { let Defs = [AX,DX,EFLAGS], Uses = [AX] in def MUL16m : I<0xF7, MRM4m, (outs), (ins i16mem:$src), "mul{w}\t$src", - [], IIC_MUL16_MEM>, OpSize; // AX,DX = AX*[mem16] - + [], IIC_MUL16_MEM>, OpSize, SchedLoadReg<WriteIMulLd>; +// EAX,EDX = EAX*[mem32] let Defs = [EAX,EDX,EFLAGS], Uses = [EAX] in def MUL32m : I<0xF7, MRM4m, (outs), (ins i32mem:$src), "mul{l}\t$src", - [], IIC_MUL32_MEM>; // EAX,EDX = EAX*[mem32] + [], IIC_MUL32_MEM>, SchedLoadReg<WriteIMulLd>; +// RAX,RDX = RAX*[mem64] let Defs = [RAX,RDX,EFLAGS], Uses = [RAX] in def MUL64m : RI<0xF7, MRM4m, (outs), (ins i64mem:$src), - "mul{q}\t$src", [], IIC_MUL64>; // RAX,RDX = RAX*[mem64] + "mul{q}\t$src", [], IIC_MUL64>, SchedLoadReg<WriteIMulLd>; } let neverHasSideEffects = 1 in { +// AL,AH = AL*GR8 let Defs = [AL,EFLAGS,AX], Uses = [AL] in def IMUL8r : I<0xF6, MRM5r, (outs), (ins GR8:$src), "imul{b}\t$src", [], - IIC_IMUL8>; // AL,AH = AL*GR8 + IIC_IMUL8>, Sched<[WriteIMul]>; +// AX,DX = AX*GR16 let Defs = [AX,DX,EFLAGS], Uses = [AX] in def IMUL16r : I<0xF7, MRM5r, (outs), (ins GR16:$src), "imul{w}\t$src", [], - IIC_IMUL16_RR>, OpSize; // AX,DX = AX*GR16 + IIC_IMUL16_RR>, OpSize, Sched<[WriteIMul]>; +// EAX,EDX = EAX*GR32 let Defs = [EAX,EDX,EFLAGS], Uses = [EAX] in def IMUL32r : I<0xF7, MRM5r, (outs), (ins GR32:$src), "imul{l}\t$src", [], - IIC_IMUL32_RR>; // EAX,EDX = EAX*GR32 + IIC_IMUL32_RR>, Sched<[WriteIMul]>; +// RAX,RDX = RAX*GR64 let Defs = [RAX,RDX,EFLAGS], Uses = [RAX] in def IMUL64r : RI<0xF7, MRM5r, (outs), (ins GR64:$src), "imul{q}\t$src", [], - IIC_IMUL64_RR>; // RAX,RDX = RAX*GR64 + IIC_IMUL64_RR>, Sched<[WriteIMul]>; let mayLoad = 1 in { +// AL,AH = AL*[mem8] let Defs = [AL,EFLAGS,AX], Uses = [AL] in def IMUL8m : I<0xF6, MRM5m, (outs), (ins i8mem :$src), - "imul{b}\t$src", [], IIC_IMUL8>; // AL,AH = AL*[mem8] + "imul{b}\t$src", [], IIC_IMUL8>, SchedLoadReg<WriteIMulLd>; +// AX,DX = AX*[mem16] let Defs = [AX,DX,EFLAGS], Uses = [AX] in def IMUL16m : I<0xF7, MRM5m, (outs), (ins i16mem:$src), - "imul{w}\t$src", [], IIC_IMUL16_MEM>, OpSize; - // AX,DX = AX*[mem16] + "imul{w}\t$src", [], IIC_IMUL16_MEM>, OpSize, + SchedLoadReg<WriteIMulLd>; +// EAX,EDX = EAX*[mem32] let Defs = [EAX,EDX,EFLAGS], Uses = [EAX] in def IMUL32m : I<0xF7, MRM5m, (outs), (ins i32mem:$src), - "imul{l}\t$src", [], IIC_IMUL32_MEM>; // EAX,EDX = EAX*[mem32] + "imul{l}\t$src", [], IIC_IMUL32_MEM>, SchedLoadReg<WriteIMulLd>; +// RAX,RDX = RAX*[mem64] let Defs = [RAX,RDX,EFLAGS], Uses = [RAX] in def IMUL64m : RI<0xF7, MRM5m, (outs), (ins i64mem:$src), - "imul{q}\t$src", [], IIC_IMUL64>; // RAX,RDX = RAX*[mem64] + "imul{q}\t$src", [], IIC_IMUL64>, SchedLoadReg<WriteIMulLd>; } } // neverHasSideEffects @@ -130,7 +150,8 @@ def IMUL64m : RI<0xF7, MRM5m, (outs), (ins i64mem:$src), let Defs = [EFLAGS] in { let Constraints = "$src1 = $dst" in { -let isCommutable = 1 in { // X = IMUL Y, Z --> X = IMUL Z, Y +let isCommutable = 1, SchedRW = [WriteIMul] in { +// X = IMUL Y, Z --> X = IMUL Z, Y // Register-Register Signed Integer Multiply def IMUL16rr : I<0xAF, MRMSrcReg, (outs GR16:$dst), (ins GR16:$src1,GR16:$src2), "imul{w}\t{$src2, $dst|$dst, $src2}", @@ -148,9 +169,10 @@ def IMUL64rr : RI<0xAF, MRMSrcReg, (outs GR64:$dst), [(set GR64:$dst, EFLAGS, (X86smul_flag GR64:$src1, GR64:$src2))], IIC_IMUL64_RR>, TB; -} +} // isCommutable, SchedRW // Register-Memory Signed Integer Multiply +let SchedRW = [WriteIMulLd, ReadAfterLd] in { def IMUL16rm : I<0xAF, MRMSrcMem, (outs GR16:$dst), (ins GR16:$src1, i16mem:$src2), "imul{w}\t{$src2, $dst|$dst, $src2}", @@ -172,12 +194,14 @@ def IMUL64rm : RI<0xAF, MRMSrcMem, (outs GR64:$dst), (X86smul_flag GR64:$src1, (load addr:$src2)))], IIC_IMUL64_RM>, TB; +} // SchedRW } // Constraints = "$src1 = $dst" } // Defs = [EFLAGS] // Surprisingly enough, these are not two address instructions! let Defs = [EFLAGS] in { +let SchedRW = [WriteIMul] in { // Register-Integer Signed Integer Multiply def IMUL16rri : Ii16<0x69, MRMSrcReg, // GR16 = GR16*I16 (outs GR16:$dst), (ins GR16:$src1, i16imm:$src2), @@ -216,9 +240,10 @@ def IMUL64rri8 : RIi8<0x6B, MRMSrcReg, // GR64 = GR64*I8 [(set GR64:$dst, EFLAGS, (X86smul_flag GR64:$src1, i64immSExt8:$src2))], IIC_IMUL64_RRI>; - +} // SchedRW // Memory-Integer Signed Integer Multiply +let SchedRW = [WriteIMulLd] in { def IMUL16rmi : Ii16<0x69, MRMSrcMem, // GR16 = [mem16]*I16 (outs GR16:$dst), (ins i16mem:$src1, i16imm:$src2), "imul{w}\t{$src2, $src1, $dst|$dst, $src1, $src2}", @@ -260,6 +285,7 @@ def IMUL64rmi8 : RIi8<0x6B, MRMSrcMem, // GR64 = [mem64]*I8 (X86smul_flag (load addr:$src1), i64immSExt8:$src2))], IIC_IMUL64_RMI>; +} // SchedRW } // Defs = [EFLAGS] @@ -267,6 +293,7 @@ def IMUL64rmi8 : RIi8<0x6B, MRMSrcMem, // GR64 = [mem64]*I8 // unsigned division/remainder let hasSideEffects = 1 in { // so that we don't speculatively execute +let SchedRW = [WriteIDiv] in { let Defs = [AL,EFLAGS,AX], Uses = [AX] in def DIV8r : I<0xF6, MRM6r, (outs), (ins GR8:$src), // AX/r8 = AL,AH "div{b}\t$src", [], IIC_DIV8_REG>; @@ -280,24 +307,30 @@ def DIV32r : I<0xF7, MRM6r, (outs), (ins GR32:$src), // EDX:EAX/r32 = EAX,EDX let Defs = [RAX,RDX,EFLAGS], Uses = [RAX,RDX] in def DIV64r : RI<0xF7, MRM6r, (outs), (ins GR64:$src), "div{q}\t$src", [], IIC_DIV64>; +} // SchedRW let mayLoad = 1 in { let Defs = [AL,EFLAGS,AX], Uses = [AX] in def DIV8m : I<0xF6, MRM6m, (outs), (ins i8mem:$src), // AX/[mem8] = AL,AH - "div{b}\t$src", [], IIC_DIV8_MEM>; + "div{b}\t$src", [], IIC_DIV8_MEM>, + SchedLoadReg<WriteIDivLd>; let Defs = [AX,DX,EFLAGS], Uses = [AX,DX] in def DIV16m : I<0xF7, MRM6m, (outs), (ins i16mem:$src), // DX:AX/[mem16] = AX,DX - "div{w}\t$src", [], IIC_DIV16>, OpSize; + "div{w}\t$src", [], IIC_DIV16>, OpSize, + SchedLoadReg<WriteIDivLd>; let Defs = [EAX,EDX,EFLAGS], Uses = [EAX,EDX] in // EDX:EAX/[mem32] = EAX,EDX def DIV32m : I<0xF7, MRM6m, (outs), (ins i32mem:$src), - "div{l}\t$src", [], IIC_DIV32>; + "div{l}\t$src", [], IIC_DIV32>, + SchedLoadReg<WriteIDivLd>; // RDX:RAX/[mem64] = RAX,RDX let Defs = [RAX,RDX,EFLAGS], Uses = [RAX,RDX] in def DIV64m : RI<0xF7, MRM6m, (outs), (ins i64mem:$src), - "div{q}\t$src", [], IIC_DIV64>; + "div{q}\t$src", [], IIC_DIV64>, + SchedLoadReg<WriteIDivLd>; } // Signed division/remainder. +let SchedRW = [WriteIDiv] in { let Defs = [AL,EFLAGS,AX], Uses = [AX] in def IDIV8r : I<0xF6, MRM7r, (outs), (ins GR8:$src), // AX/r8 = AL,AH "idiv{b}\t$src", [], IIC_IDIV8>; @@ -311,20 +344,25 @@ def IDIV32r: I<0xF7, MRM7r, (outs), (ins GR32:$src), // EDX:EAX/r32 = EAX,EDX let Defs = [RAX,RDX,EFLAGS], Uses = [RAX,RDX] in def IDIV64r: RI<0xF7, MRM7r, (outs), (ins GR64:$src), "idiv{q}\t$src", [], IIC_IDIV64>; +} // SchedRW let mayLoad = 1 in { let Defs = [AL,EFLAGS,AX], Uses = [AX] in def IDIV8m : I<0xF6, MRM7m, (outs), (ins i8mem:$src), // AX/[mem8] = AL,AH - "idiv{b}\t$src", [], IIC_IDIV8>; + "idiv{b}\t$src", [], IIC_IDIV8>, + SchedLoadReg<WriteIDivLd>; let Defs = [AX,DX,EFLAGS], Uses = [AX,DX] in def IDIV16m: I<0xF7, MRM7m, (outs), (ins i16mem:$src), // DX:AX/[mem16] = AX,DX - "idiv{w}\t$src", [], IIC_IDIV16>, OpSize; + "idiv{w}\t$src", [], IIC_IDIV16>, OpSize, + SchedLoadReg<WriteIDivLd>; let Defs = [EAX,EDX,EFLAGS], Uses = [EAX,EDX] in // EDX:EAX/[mem32] = EAX,EDX def IDIV32m: I<0xF7, MRM7m, (outs), (ins i32mem:$src), - "idiv{l}\t$src", [], IIC_IDIV32>; + "idiv{l}\t$src", [], IIC_IDIV32>, + SchedLoadReg<WriteIDivLd>; let Defs = [RAX,RDX,EFLAGS], Uses = [RAX,RDX] in // RDX:RAX/[mem64] = RAX,RDX def IDIV64m: RI<0xF7, MRM7m, (outs), (ins i64mem:$src), - "idiv{q}\t$src", [], IIC_IDIV64>; + "idiv{q}\t$src", [], IIC_IDIV64>, + SchedLoadReg<WriteIDivLd>; } } // hasSideEffects = 0 @@ -335,7 +373,7 @@ def IDIV64m: RI<0xF7, MRM7m, (outs), (ins i64mem:$src), // unary instructions let CodeSize = 2 in { let Defs = [EFLAGS] in { -let Constraints = "$src1 = $dst" in { +let Constraints = "$src1 = $dst", SchedRW = [WriteALU] in { def NEG8r : I<0xF6, MRM3r, (outs GR8 :$dst), (ins GR8 :$src1), "neg{b}\t$dst", [(set GR8:$dst, (ineg GR8:$src1)), @@ -351,8 +389,10 @@ def NEG32r : I<0xF7, MRM3r, (outs GR32:$dst), (ins GR32:$src1), def NEG64r : RI<0xF7, MRM3r, (outs GR64:$dst), (ins GR64:$src1), "neg{q}\t$dst", [(set GR64:$dst, (ineg GR64:$src1)), (implicit EFLAGS)], IIC_UNARY_REG>; -} // Constraints = "$src1 = $dst" +} // Constraints = "$src1 = $dst", SchedRW +// Read-modify-write negate. +let SchedRW = [WriteALULd, WriteRMW] in { def NEG8m : I<0xF6, MRM3m, (outs), (ins i8mem :$dst), "neg{b}\t$dst", [(store (ineg (loadi8 addr:$dst)), addr:$dst), @@ -368,12 +408,13 @@ def NEG32m : I<0xF7, MRM3m, (outs), (ins i32mem:$dst), def NEG64m : RI<0xF7, MRM3m, (outs), (ins i64mem:$dst), "neg{q}\t$dst", [(store (ineg (loadi64 addr:$dst)), addr:$dst), (implicit EFLAGS)], IIC_UNARY_MEM>; +} // SchedRW } // Defs = [EFLAGS] // Note: NOT does not set EFLAGS! -let Constraints = "$src1 = $dst" in { +let Constraints = "$src1 = $dst", SchedRW = [WriteALU] in { // Match xor -1 to not. Favors these over a move imm + xor to save code size. let AddedComplexity = 15 in { def NOT8r : I<0xF6, MRM2r, (outs GR8 :$dst), (ins GR8 :$src1), @@ -388,8 +429,9 @@ def NOT32r : I<0xF7, MRM2r, (outs GR32:$dst), (ins GR32:$src1), def NOT64r : RI<0xF7, MRM2r, (outs GR64:$dst), (ins GR64:$src1), "not{q}\t$dst", [(set GR64:$dst, (not GR64:$src1))], IIC_UNARY_REG>; } -} // Constraints = "$src1 = $dst" +} // Constraints = "$src1 = $dst", SchedRW +let SchedRW = [WriteALULd, WriteRMW] in { def NOT8m : I<0xF6, MRM2m, (outs), (ins i8mem :$dst), "not{b}\t$dst", [(store (not (loadi8 addr:$dst)), addr:$dst)], IIC_UNARY_MEM>; @@ -402,11 +444,12 @@ def NOT32m : I<0xF7, MRM2m, (outs), (ins i32mem:$dst), [(store (not (loadi32 addr:$dst)), addr:$dst)], IIC_UNARY_MEM>; def NOT64m : RI<0xF7, MRM2m, (outs), (ins i64mem:$dst), "not{q}\t$dst", [(store (not (loadi64 addr:$dst)), addr:$dst)], IIC_UNARY_MEM>; +} // SchedRW } // CodeSize // TODO: inc/dec is slow for P4, but fast for Pentium-M. let Defs = [EFLAGS] in { -let Constraints = "$src1 = $dst" in { +let Constraints = "$src1 = $dst", SchedRW = [WriteALU] in { let CodeSize = 2 in def INC8r : I<0xFE, MRM0r, (outs GR8 :$dst), (ins GR8 :$src1), "inc{b}\t$dst", @@ -454,9 +497,9 @@ def DEC64_32r : I<0xFF, MRM1r, (outs GR32:$dst), (ins GR32:$src1), Requires<[In64BitMode]>; } // isConvertibleToThreeAddress = 1, CodeSize = 2 -} // Constraints = "$src1 = $dst" +} // Constraints = "$src1 = $dst", SchedRW -let CodeSize = 2 in { +let CodeSize = 2, SchedRW = [WriteALULd, WriteRMW] in { def INC8m : I<0xFE, MRM0m, (outs), (ins i8mem :$dst), "inc{b}\t$dst", [(store (add (loadi8 addr:$dst), 1), addr:$dst), (implicit EFLAGS)], IIC_UNARY_MEM>; @@ -491,9 +534,9 @@ def DEC64_32m : I<0xFF, MRM1m, (outs), (ins i32mem:$dst), "dec{l}\t$dst", [(store (add (loadi32 addr:$dst), -1), addr:$dst), (implicit EFLAGS)], IIC_UNARY_MEM>, Requires<[In64BitMode]>; -} // CodeSize = 2 +} // CodeSize = 2, SchedRW -let Constraints = "$src1 = $dst" in { +let Constraints = "$src1 = $dst", SchedRW = [WriteALU] in { let CodeSize = 2 in def DEC8r : I<0xFE, MRM1r, (outs GR8 :$dst), (ins GR8 :$src1), "dec{b}\t$dst", @@ -514,10 +557,10 @@ def DEC64r : RI<0xFF, MRM1r, (outs GR64:$dst), (ins GR64:$src1), "dec{q}\t$dst", [(set GR64:$dst, EFLAGS, (X86dec_flag GR64:$src1))], IIC_UNARY_REG>; } // CodeSize = 2 -} // Constraints = "$src1 = $dst" +} // Constraints = "$src1 = $dst", SchedRW -let CodeSize = 2 in { +let CodeSize = 2, SchedRW = [WriteALULd, WriteRMW] in { def DEC8m : I<0xFE, MRM1m, (outs), (ins i8mem :$dst), "dec{b}\t$dst", [(store (add (loadi8 addr:$dst), -1), addr:$dst), (implicit EFLAGS)], IIC_UNARY_MEM>; @@ -532,7 +575,7 @@ let CodeSize = 2 in { def DEC64m : RI<0xFF, MRM1m, (outs), (ins i64mem:$dst), "dec{q}\t$dst", [(store (add (loadi64 addr:$dst), -1), addr:$dst), (implicit EFLAGS)], IIC_UNARY_MEM>; -} // CodeSize = 2 +} // CodeSize = 2, SchedRW } // Defs = [EFLAGS] @@ -646,7 +689,8 @@ class BinOpRR<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo, Format f = MRMDestReg> : ITy<opcode, f, typeinfo, outlist, (ins typeinfo.RegClass:$src1, typeinfo.RegClass:$src2), - mnemonic, "{$src2, $src1|$src1, $src2}", pattern, itin>; + mnemonic, "{$src2, $src1|$src1, $src2}", pattern, itin>, + Sched<[WriteALU]>; // BinOpRR_R - Instructions like "add reg, reg, reg", where the pattern has // just a regclass (no eflags) as a result. @@ -689,7 +733,8 @@ class BinOpRR_Rev<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo> : ITy<opcode, MRMSrcReg, typeinfo, (outs typeinfo.RegClass:$dst), (ins typeinfo.RegClass:$src1, typeinfo.RegClass:$src2), - mnemonic, "{$src2, $dst|$dst, $src2}", [], IIC_BIN_NONMEM> { + mnemonic, "{$src2, $dst|$dst, $src2}", [], IIC_BIN_NONMEM>, + Sched<[WriteALU]> { // The disassembler should know about this, but not the asmparser. let isCodeGenOnly = 1; let hasSideEffects = 0; @@ -699,7 +744,8 @@ class BinOpRR_Rev<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo> class BinOpRR_F_Rev<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo> : ITy<opcode, MRMSrcReg, typeinfo, (outs), (ins typeinfo.RegClass:$src1, typeinfo.RegClass:$src2), - mnemonic, "{$src2, $src1|$src1, $src2}", [], IIC_BIN_NONMEM> { + mnemonic, "{$src2, $src1|$src1, $src2}", [], IIC_BIN_NONMEM>, + Sched<[WriteALU]> { // The disassembler should know about this, but not the asmparser. let isCodeGenOnly = 1; let hasSideEffects = 0; @@ -710,7 +756,8 @@ class BinOpRM<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo, dag outlist, list<dag> pattern> : ITy<opcode, MRMSrcMem, typeinfo, outlist, (ins typeinfo.RegClass:$src1, typeinfo.MemOperand:$src2), - mnemonic, "{$src2, $src1|$src1, $src2}", pattern, IIC_BIN_NONMEM>; + mnemonic, "{$src2, $src1|$src1, $src2}", pattern, IIC_BIN_NONMEM>, + Sched<[WriteALULd, ReadAfterLd]>; // BinOpRM_R - Instructions like "add reg, reg, [mem]". class BinOpRM_R<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo, @@ -746,7 +793,8 @@ class BinOpRI<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo, Format f, dag outlist, list<dag> pattern> : ITy<opcode, f, typeinfo, outlist, (ins typeinfo.RegClass:$src1, typeinfo.ImmOperand:$src2), - mnemonic, "{$src2, $src1|$src1, $src2}", pattern, IIC_BIN_NONMEM> { + mnemonic, "{$src2, $src1|$src1, $src2}", pattern, IIC_BIN_NONMEM>, + Sched<[WriteALU]> { let ImmT = typeinfo.ImmEncoding; } @@ -783,7 +831,8 @@ class BinOpRI8<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo, Format f, dag outlist, list<dag> pattern> : ITy<opcode, f, typeinfo, outlist, (ins typeinfo.RegClass:$src1, typeinfo.Imm8Operand:$src2), - mnemonic, "{$src2, $src1|$src1, $src2}", pattern, IIC_BIN_NONMEM> { + mnemonic, "{$src2, $src1|$src1, $src2}", pattern, IIC_BIN_NONMEM>, + Sched<[WriteALU]> { let ImmT = Imm8; // Always 8-bit immediate. } @@ -821,7 +870,8 @@ class BinOpMR<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo, list<dag> pattern> : ITy<opcode, MRMDestMem, typeinfo, (outs), (ins typeinfo.MemOperand:$dst, typeinfo.RegClass:$src), - mnemonic, "{$src, $dst|$dst, $src}", pattern, IIC_BIN_MEM>; + mnemonic, "{$src, $dst|$dst, $src}", pattern, IIC_BIN_MEM>, + Sched<[WriteALULd, WriteRMW]>; // BinOpMR_RMW - Instructions like "add [mem], reg". class BinOpMR_RMW<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo, @@ -849,7 +899,8 @@ class BinOpMI<string mnemonic, X86TypeInfo typeinfo, Format f, list<dag> pattern, bits<8> opcode = 0x80> : ITy<opcode, f, typeinfo, (outs), (ins typeinfo.MemOperand:$dst, typeinfo.ImmOperand:$src), - mnemonic, "{$src, $dst|$dst, $src}", pattern, IIC_BIN_MEM> { + mnemonic, "{$src, $dst|$dst, $src}", pattern, IIC_BIN_MEM>, + Sched<[WriteALULd, WriteRMW]> { let ImmT = typeinfo.ImmEncoding; } @@ -1210,11 +1261,12 @@ multiclass bmi_andn<string mnemonic, RegisterClass RC, X86MemOperand x86memop, def rr : I<0xF2, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2), !strconcat(mnemonic, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), [(set RC:$dst, EFLAGS, (X86and_flag (not RC:$src1), RC:$src2))], - IIC_BIN_NONMEM>; + IIC_BIN_NONMEM>, Sched<[WriteALU]>; def rm : I<0xF2, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2), !strconcat(mnemonic, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), [(set RC:$dst, EFLAGS, - (X86and_flag (not RC:$src1), (ld_frag addr:$src2)))], IIC_BIN_MEM>; + (X86and_flag (not RC:$src1), (ld_frag addr:$src2)))], IIC_BIN_MEM>, + Sched<[WriteALULd, ReadAfterLd]>; } let Predicates = [HasBMI], Defs = [EFLAGS] in { diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index 0979752..105963f 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -16,6 +16,8 @@ class OpndItins<InstrItinClass arg_rr, InstrItinClass arg_rm> { InstrItinClass rr = arg_rr; InstrItinClass rm = arg_rm; + // InstrSchedModel info. + X86FoldableSchedWrite Sched = WriteFAdd; } class SizeItins<OpndItins arg_s, OpndItins arg_d> { @@ -45,6 +47,7 @@ def SSE_ALU_ITINS_S : SizeItins< SSE_ALU_F32S, SSE_ALU_F64S >; +let Sched = WriteFMul in { def SSE_MUL_F32S : OpndItins< IIC_SSE_MUL_F32S_RR, IIC_SSE_MUL_F64S_RM >; @@ -52,11 +55,13 @@ def SSE_MUL_F32S : OpndItins< def SSE_MUL_F64S : OpndItins< IIC_SSE_MUL_F64S_RR, IIC_SSE_MUL_F64S_RM >; +} def SSE_MUL_ITINS_S : SizeItins< SSE_MUL_F32S, SSE_MUL_F64S >; +let Sched = WriteFDiv in { def SSE_DIV_F32S : OpndItins< IIC_SSE_DIV_F32S_RR, IIC_SSE_DIV_F64S_RM >; @@ -64,6 +69,7 @@ def SSE_DIV_F32S : OpndItins< def SSE_DIV_F64S : OpndItins< IIC_SSE_DIV_F64S_RR, IIC_SSE_DIV_F64S_RM >; +} def SSE_DIV_ITINS_S : SizeItins< SSE_DIV_F32S, SSE_DIV_F64S @@ -82,6 +88,7 @@ def SSE_ALU_ITINS_P : SizeItins< SSE_ALU_F32P, SSE_ALU_F64P >; +let Sched = WriteFMul in { def SSE_MUL_F32P : OpndItins< IIC_SSE_MUL_F32P_RR, IIC_SSE_MUL_F64P_RM >; @@ -89,11 +96,13 @@ def SSE_MUL_F32P : OpndItins< def SSE_MUL_F64P : OpndItins< IIC_SSE_MUL_F64P_RR, IIC_SSE_MUL_F64P_RM >; +} def SSE_MUL_ITINS_P : SizeItins< SSE_MUL_F32P, SSE_MUL_F64P >; +let Sched = WriteFDiv in { def SSE_DIV_F32P : OpndItins< IIC_SSE_DIV_F32P_RR, IIC_SSE_DIV_F64P_RM >; @@ -101,6 +110,7 @@ def SSE_DIV_F32P : OpndItins< def SSE_DIV_F64P : OpndItins< IIC_SSE_DIV_F64P_RR, IIC_SSE_DIV_F64P_RM >; +} def SSE_DIV_ITINS_P : SizeItins< SSE_DIV_F32P, SSE_DIV_F64P @@ -110,6 +120,7 @@ def SSE_BIT_ITINS_P : OpndItins< IIC_SSE_BIT_P_RR, IIC_SSE_BIT_P_RM >; +let Sched = WriteVecALU in { def SSE_INTALU_ITINS_P : OpndItins< IIC_SSE_INTALU_P_RR, IIC_SSE_INTALU_P_RM >; @@ -117,7 +128,9 @@ def SSE_INTALU_ITINS_P : OpndItins< def SSE_INTALUQ_ITINS_P : OpndItins< IIC_SSE_INTALUQ_P_RR, IIC_SSE_INTALUQ_P_RM >; +} +let Sched = WriteVecIMul in def SSE_INTMUL_ITINS_P : OpndItins< IIC_SSE_INTMUL_P_RR, IIC_SSE_INTMUL_P_RM >; @@ -148,13 +161,15 @@ multiclass sse12_fp_scalar<bits<8> opc, string OpcodeStr, SDNode OpNode, !if(Is2Addr, !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), - [(set RC:$dst, (OpNode RC:$src1, RC:$src2))], itins.rr>; + [(set RC:$dst, (OpNode RC:$src1, RC:$src2))], itins.rr>, + Sched<[itins.Sched]>; } def rm : SI<opc, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2), !if(Is2Addr, !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), - [(set RC:$dst, (OpNode RC:$src1, (load addr:$src2)))], itins.rm>; + [(set RC:$dst, (OpNode RC:$src1, (load addr:$src2)))], itins.rm>, + Sched<[itins.Sched.Folded, ReadAfterLd]>; } /// sse12_fp_scalar_int - SSE 1 & 2 scalar instructions intrinsics class @@ -189,14 +204,16 @@ multiclass sse12_fp_packed<bits<8> opc, string OpcodeStr, SDNode OpNode, !if(Is2Addr, !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), - [(set RC:$dst, (vt (OpNode RC:$src1, RC:$src2)))], itins.rr, d>; + [(set RC:$dst, (vt (OpNode RC:$src1, RC:$src2)))], itins.rr, d>, + Sched<[itins.Sched]>; let mayLoad = 1 in def rm : PI<opc, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2), !if(Is2Addr, !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), [(set RC:$dst, (OpNode RC:$src1, (mem_frag addr:$src2)))], - itins.rm, d>; + itins.rm, d>, + Sched<[itins.Sched.Folded, ReadAfterLd]>; } /// sse12_fp_packed_logical_rm - SSE 1 & 2 packed instructions class @@ -209,12 +226,14 @@ multiclass sse12_fp_packed_logical_rm<bits<8> opc, RegisterClass RC, Domain d, !if(Is2Addr, !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), - pat_rr, IIC_DEFAULT, d>; + pat_rr, IIC_DEFAULT, d>, + Sched<[WriteVecLogic]>; def rm : PI<opc, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2), !if(Is2Addr, !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), - pat_rm, IIC_DEFAULT, d>; + pat_rm, IIC_DEFAULT, d>, + Sched<[WriteVecLogicLd, ReadAfterLd]>; } //===----------------------------------------------------------------------===// @@ -444,7 +463,7 @@ multiclass sse12_move_rr<RegisterClass RC, SDNode OpNode, ValueType vt, !strconcat(base_opc, asm_opr), [(set VR128:$dst, (vt (OpNode VR128:$src1, (scalar_to_vector RC:$src2))))], - IIC_SSE_MOV_S_RR>; + IIC_SSE_MOV_S_RR>, Sched<[WriteMove]>; // For the disassembler let isCodeGenOnly = 1, hasSideEffects = 0 in @@ -464,7 +483,7 @@ multiclass sse12_move<RegisterClass RC, SDNode OpNode, ValueType vt, def V#NAME#mr : SI<0x11, MRMDestMem, (outs), (ins x86memop:$dst, RC:$src), !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), [(store RC:$src, addr:$dst)], IIC_SSE_MOV_S_MR>, - VEX, VEX_LIG; + VEX, VEX_LIG, Sched<[WriteStore]>; // SSE1 & 2 let Constraints = "$src1 = $dst" in { defm NAME : sse12_move_rr<RC, OpNode, vt, x86memop, OpcodeStr, @@ -473,7 +492,8 @@ multiclass sse12_move<RegisterClass RC, SDNode OpNode, ValueType vt, def NAME#mr : SI<0x11, MRMDestMem, (outs), (ins x86memop:$dst, RC:$src), !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), - [(store RC:$src, addr:$dst)], IIC_SSE_MOV_S_MR>; + [(store RC:$src, addr:$dst)], IIC_SSE_MOV_S_MR>, + Sched<[WriteStore]>; } // Loading from memory automatically zeroing upper bits. @@ -482,11 +502,11 @@ multiclass sse12_move_rm<RegisterClass RC, X86MemOperand x86memop, def V#NAME#rm : SI<0x10, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src), !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), [(set RC:$dst, (mem_pat addr:$src))], - IIC_SSE_MOV_S_RM>, VEX, VEX_LIG; + IIC_SSE_MOV_S_RM>, VEX, VEX_LIG, Sched<[WriteLoad]>; def NAME#rm : SI<0x10, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src), !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), [(set RC:$dst, (mem_pat addr:$src))], - IIC_SSE_MOV_S_RM>; + IIC_SSE_MOV_S_RM>, Sched<[WriteLoad]>; } defm MOVSS : sse12_move<FR32, X86Movss, v4f32, f32mem, "movss">, XS; @@ -745,11 +765,13 @@ multiclass sse12_mov_packed<bits<8> opc, RegisterClass RC, bit IsReMaterializable = 1> { let neverHasSideEffects = 1 in def rr : PI<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src), - !strconcat(asm, "\t{$src, $dst|$dst, $src}"), [], itins.rr, d>; + !strconcat(asm, "\t{$src, $dst|$dst, $src}"), [], itins.rr, d>, + Sched<[WriteMove]>; let canFoldAsLoad = 1, isReMaterializable = IsReMaterializable in def rm : PI<opc, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src), !strconcat(asm, "\t{$src, $dst|$dst, $src}"), - [(set RC:$dst, (ld_frag addr:$src))], itins.rm, d>; + [(set RC:$dst, (ld_frag addr:$src))], itins.rm, d>, + Sched<[WriteLoad]>; } defm VMOVAPS : sse12_mov_packed<0x28, VR128, f128mem, alignedloadv4f32, @@ -790,6 +812,7 @@ defm MOVUPD : sse12_mov_packed<0x10, VR128, f128mem, loadv2f64, "movupd", SSEPackedDouble, SSE_MOVU_ITINS, 0>, TB, OpSize; +let SchedRW = [WriteStore] in { def VMOVAPSmr : VPSI<0x29, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src), "movaps\t{$src, $dst|$dst, $src}", [(alignedstore (v4f32 VR128:$src), addr:$dst)], @@ -822,6 +845,7 @@ def VMOVUPDYmr : VPDI<0x11, MRMDestMem, (outs), (ins f256mem:$dst, VR256:$src), "movupd\t{$src, $dst|$dst, $src}", [(store (v4f64 VR256:$src), addr:$dst)], IIC_SSE_MOVU_P_MR>, VEX, VEX_L; +} // SchedRW // For disassembler let isCodeGenOnly = 1, hasSideEffects = 0 in { @@ -880,6 +904,7 @@ def : Pat<(int_x86_avx_storeu_ps_256 addr:$dst, VR256:$src), def : Pat<(int_x86_avx_storeu_pd_256 addr:$dst, VR256:$src), (VMOVUPDYmr addr:$dst, VR256:$src)>; +let SchedRW = [WriteStore] in { def MOVAPSmr : PSI<0x29, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src), "movaps\t{$src, $dst|$dst, $src}", [(alignedstore (v4f32 VR128:$src), addr:$dst)], @@ -896,6 +921,7 @@ def MOVUPDmr : PDI<0x11, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src), "movupd\t{$src, $dst|$dst, $src}", [(store (v2f64 VR128:$src), addr:$dst)], IIC_SSE_MOVU_P_MR>; +} // SchedRW // For disassembler let isCodeGenOnly = 1, hasSideEffects = 0 in { @@ -1009,7 +1035,7 @@ let Predicates = [HasAVX] in { (VMOVUPSmr addr:$dst, (v4i32 (EXTRACT_SUBREG VR256:$src,sub_xmm)))>; def : Pat<(store (v8i16 (extract_subvector (v16i16 VR256:$src), (iPTR 0))), addr:$dst), - (VMOVAPSmr addr:$dst, (v8i16 (EXTRACT_SUBREG VR256:$src,sub_xmm)))>; + (VMOVUPSmr addr:$dst, (v8i16 (EXTRACT_SUBREG VR256:$src,sub_xmm)))>; def : Pat<(store (v16i8 (extract_subvector (v32i8 VR256:$src), (iPTR 0))), addr:$dst), (VMOVUPSmr addr:$dst, (v16i8 (EXTRACT_SUBREG VR256:$src,sub_xmm)))>; @@ -1095,14 +1121,16 @@ multiclass sse12_mov_hilo_packed_base<bits<8>opc, SDNode psnode, SDNode pdnode, [(set VR128:$dst, (psnode VR128:$src1, (bc_v4f32 (v2f64 (scalar_to_vector (loadf64 addr:$src2))))))], - itin, SSEPackedSingle>, TB; + itin, SSEPackedSingle>, TB, + Sched<[WriteShuffleLd, ReadAfterLd]>; def PDrm : PI<opc, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, f64mem:$src2), !strconcat(base_opc, "d", asm_opr), [(set VR128:$dst, (v2f64 (pdnode VR128:$src1, (scalar_to_vector (loadf64 addr:$src2)))))], - itin, SSEPackedDouble>, TB, OpSize; + itin, SSEPackedDouble>, TB, OpSize, + Sched<[WriteShuffleLd, ReadAfterLd]>; } @@ -1123,6 +1151,7 @@ let AddedComplexity = 20 in { IIC_SSE_MOV_LH>; } +let SchedRW = [WriteStore] in { def VMOVLPSmr : VPSI<0x13, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src), "movlps\t{$src, $dst|$dst, $src}", [(store (f64 (vector_extract (bc_v2f64 (v4f32 VR128:$src)), @@ -1143,6 +1172,7 @@ def MOVLPDmr : PDI<0x13, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src), [(store (f64 (vector_extract (v2f64 VR128:$src), (iPTR 0))), addr:$dst)], IIC_SSE_MOV_LH>; +} // SchedRW let Predicates = [HasAVX] in { // Shuffle with VMOVLPS @@ -1222,6 +1252,7 @@ let AddedComplexity = 20 in { IIC_SSE_MOV_LH>; } +let SchedRW = [WriteStore] in { // v2f64 extract element 1 is always custom lowered to unpack high to low // and extract element 0 so the non-store version isn't too horrible. def VMOVHPSmr : VPSI<0x17, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src), @@ -1246,6 +1277,7 @@ def MOVHPDmr : PDI<0x17, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src), [(store (f64 (vector_extract (v2f64 (X86Unpckh VR128:$src, VR128:$src)), (iPTR 0))), addr:$dst)], IIC_SSE_MOV_LH>; +} // SchedRW let Predicates = [HasAVX] in { // VMOVHPS patterns @@ -1296,14 +1328,14 @@ let AddedComplexity = 20 in { [(set VR128:$dst, (v4f32 (X86Movlhps VR128:$src1, VR128:$src2)))], IIC_SSE_MOV_LH>, - VEX_4V; + VEX_4V, Sched<[WriteShuffle]>; def VMOVHLPSrr : VPSI<0x12, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), "movhlps\t{$src2, $src1, $dst|$dst, $src1, $src2}", [(set VR128:$dst, (v4f32 (X86Movhlps VR128:$src1, VR128:$src2)))], IIC_SSE_MOV_LH>, - VEX_4V; + VEX_4V, Sched<[WriteShuffle]>; } let Constraints = "$src1 = $dst", AddedComplexity = 20 in { def MOVLHPSrr : PSI<0x16, MRMSrcReg, (outs VR128:$dst), @@ -1311,13 +1343,13 @@ let Constraints = "$src1 = $dst", AddedComplexity = 20 in { "movlhps\t{$src2, $dst|$dst, $src2}", [(set VR128:$dst, (v4f32 (X86Movlhps VR128:$src1, VR128:$src2)))], - IIC_SSE_MOV_LH>; + IIC_SSE_MOV_LH>, Sched<[WriteShuffle]>; def MOVHLPSrr : PSI<0x12, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), "movhlps\t{$src2, $dst|$dst, $src2}", [(set VR128:$dst, (v4f32 (X86Movhlps VR128:$src1, VR128:$src2)))], - IIC_SSE_MOV_LH>; + IIC_SSE_MOV_LH>, Sched<[WriteShuffle]>; } let Predicates = [HasAVX] in { @@ -1352,22 +1384,27 @@ def SSE_CVT_PD : OpndItins< IIC_SSE_CVT_PD_RR, IIC_SSE_CVT_PD_RM >; +let Sched = WriteCvtI2F in def SSE_CVT_PS : OpndItins< IIC_SSE_CVT_PS_RR, IIC_SSE_CVT_PS_RM >; +let Sched = WriteCvtI2F in def SSE_CVT_Scalar : OpndItins< IIC_SSE_CVT_Scalar_RR, IIC_SSE_CVT_Scalar_RM >; +let Sched = WriteCvtF2I in def SSE_CVT_SS2SI_32 : OpndItins< IIC_SSE_CVT_SS2SI32_RR, IIC_SSE_CVT_SS2SI32_RM >; +let Sched = WriteCvtF2I in def SSE_CVT_SS2SI_64 : OpndItins< IIC_SSE_CVT_SS2SI64_RR, IIC_SSE_CVT_SS2SI64_RM >; +let Sched = WriteCvtF2I in def SSE_CVT_SD2SI : OpndItins< IIC_SSE_CVT_SD2SI_RR, IIC_SSE_CVT_SD2SI_RM >; @@ -1377,10 +1414,10 @@ multiclass sse12_cvt_s<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC, string asm, OpndItins itins> { def rr : SI<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src), asm, [(set DstRC:$dst, (OpNode SrcRC:$src))], - itins.rr>; + itins.rr>, Sched<[itins.Sched]>; def rm : SI<opc, MRMSrcMem, (outs DstRC:$dst), (ins x86memop:$src), asm, [(set DstRC:$dst, (OpNode (ld_frag addr:$src)))], - itins.rm>; + itins.rm>, Sched<[itins.Sched.Folded]>; } multiclass sse12_cvt_p<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC, @@ -1388,10 +1425,10 @@ multiclass sse12_cvt_p<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC, OpndItins itins> { let neverHasSideEffects = 1 in { def rr : I<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src), asm, - [], itins.rr, d>; + [], itins.rr, d>, Sched<[itins.Sched]>; let mayLoad = 1 in def rm : I<opc, MRMSrcMem, (outs DstRC:$dst), (ins x86memop:$src), asm, - [], itins.rm, d>; + [], itins.rm, d>, Sched<[itins.Sched.Folded]>; } } @@ -1534,10 +1571,12 @@ multiclass sse12_cvt_sint<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC, string asm, OpndItins itins> { def rr : SI<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src), !strconcat(asm, "\t{$src, $dst|$dst, $src}"), - [(set DstRC:$dst, (Int SrcRC:$src))], itins.rr>; + [(set DstRC:$dst, (Int SrcRC:$src))], itins.rr>, + Sched<[itins.Sched]>; def rm : SI<opc, MRMSrcMem, (outs DstRC:$dst), (ins memop:$src), !strconcat(asm, "\t{$src, $dst|$dst, $src}"), - [(set DstRC:$dst, (Int mem_cpat:$src))], itins.rm>; + [(set DstRC:$dst, (Int mem_cpat:$src))], itins.rm>, + Sched<[itins.Sched.Folded]>; } multiclass sse12_cvt_sint_3addr<bits<8> opc, RegisterClass SrcRC, @@ -1549,14 +1588,14 @@ multiclass sse12_cvt_sint_3addr<bits<8> opc, RegisterClass SrcRC, !strconcat(asm, "\t{$src2, $dst|$dst, $src2}"), !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), [(set DstRC:$dst, (Int DstRC:$src1, SrcRC:$src2))], - itins.rr>; + itins.rr>, Sched<[itins.Sched]>; def rm : SI<opc, MRMSrcMem, (outs DstRC:$dst), (ins DstRC:$src1, x86memop:$src2), !if(Is2Addr, !strconcat(asm, "\t{$src2, $dst|$dst, $src2}"), !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), [(set DstRC:$dst, (Int DstRC:$src1, (ld_frag addr:$src2)))], - itins.rm>; + itins.rm>, Sched<[itins.Sched.Folded, ReadAfterLd]>; } defm VCVTSD2SI : sse12_cvt_sint<0x2D, VR128, GR32, @@ -2193,12 +2232,13 @@ multiclass sse12_cmp_scalar<RegisterClass RC, X86MemOperand x86memop, def rr : SIi8<0xC2, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2, CC:$cc), asm, [(set RC:$dst, (OpNode (VT RC:$src1), RC:$src2, imm:$cc))], - itins.rr>; + itins.rr>, Sched<[itins.Sched]>; def rm : SIi8<0xC2, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2, CC:$cc), asm, [(set RC:$dst, (OpNode (VT RC:$src1), (ld_frag addr:$src2), imm:$cc))], - itins.rm>; + itins.rm>, + Sched<[itins.Sched.Folded, ReadAfterLd]>; // Accept explicit immediate argument form instead of comparison code. let neverHasSideEffects = 1 in { @@ -2241,12 +2281,14 @@ multiclass sse12_cmp_scalar_int<X86MemOperand x86memop, Operand CC, (ins VR128:$src1, VR128:$src, CC:$cc), asm, [(set VR128:$dst, (Int VR128:$src1, VR128:$src, imm:$cc))], - itins.rr>; + itins.rr>, + Sched<[itins.Sched]>; def rm : SIi8<0xC2, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, x86memop:$src, CC:$cc), asm, [(set VR128:$dst, (Int VR128:$src1, (load addr:$src), imm:$cc))], - itins.rm>; + itins.rm>, + Sched<[itins.Sched.Folded, ReadAfterLd]>; } // Aliases to match intrinsics which expect XMM operand(s). @@ -2276,12 +2318,14 @@ multiclass sse12_ord_cmp<bits<8> opc, RegisterClass RC, SDNode OpNode, def rr: PI<opc, MRMSrcReg, (outs), (ins RC:$src1, RC:$src2), !strconcat(OpcodeStr, "\t{$src2, $src1|$src1, $src2}"), [(set EFLAGS, (OpNode (vt RC:$src1), RC:$src2))], - IIC_SSE_COMIS_RR, d>; + IIC_SSE_COMIS_RR, d>, + Sched<[WriteFAdd]>; def rm: PI<opc, MRMSrcMem, (outs), (ins RC:$src1, x86memop:$src2), !strconcat(OpcodeStr, "\t{$src2, $src1|$src1, $src2}"), [(set EFLAGS, (OpNode (vt RC:$src1), (ld_frag addr:$src2)))], - IIC_SSE_COMIS_RM, d>; + IIC_SSE_COMIS_RM, d>, + Sched<[WriteFAddLd, ReadAfterLd]>; } let Defs = [EFLAGS] in { @@ -2338,11 +2382,13 @@ multiclass sse12_cmp_packed<RegisterClass RC, X86MemOperand x86memop, def rri : PIi8<0xC2, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2, CC:$cc), asm, [(set RC:$dst, (Int RC:$src1, RC:$src2, imm:$cc))], - IIC_SSE_CMPP_RR, d>; + IIC_SSE_CMPP_RR, d>, + Sched<[WriteFAdd]>; def rmi : PIi8<0xC2, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2, CC:$cc), asm, [(set RC:$dst, (Int RC:$src1, (memop addr:$src2), imm:$cc))], - IIC_SSE_CMPP_RM, d>; + IIC_SSE_CMPP_RM, d>, + Sched<[WriteFAddLd, ReadAfterLd]>; // Accept explicit immediate argument form instead of comparison code. let neverHasSideEffects = 1 in { @@ -2427,12 +2473,14 @@ multiclass sse12_shuffle<RegisterClass RC, X86MemOperand x86memop, def rmi : PIi8<0xC6, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2, i8imm:$src3), asm, [(set RC:$dst, (vt (X86Shufp RC:$src1, (mem_frag addr:$src2), - (i8 imm:$src3))))], IIC_SSE_SHUFP, d>; + (i8 imm:$src3))))], IIC_SSE_SHUFP, d>, + Sched<[WriteShuffleLd, ReadAfterLd]>; let isConvertibleToThreeAddress = IsConvertibleToThreeAddress in def rri : PIi8<0xC6, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2, i8imm:$src3), asm, [(set RC:$dst, (vt (X86Shufp RC:$src1, RC:$src2, - (i8 imm:$src3))))], IIC_SSE_SHUFP, d>; + (i8 imm:$src3))))], IIC_SSE_SHUFP, d>, + Sched<[WriteShuffle]>; } defm VSHUFPS : sse12_shuffle<VR128, f128mem, v4f32, @@ -2516,13 +2564,14 @@ multiclass sse12_unpack_interleave<bits<8> opc, SDNode OpNode, ValueType vt, (outs RC:$dst), (ins RC:$src1, RC:$src2), asm, [(set RC:$dst, (vt (OpNode RC:$src1, RC:$src2)))], - IIC_SSE_UNPCK, d>; + IIC_SSE_UNPCK, d>, Sched<[WriteShuffle]>; def rm : PI<opc, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2), asm, [(set RC:$dst, (vt (OpNode RC:$src1, (mem_frag addr:$src2))))], - IIC_SSE_UNPCK, d>; + IIC_SSE_UNPCK, d>, + Sched<[WriteShuffleLd, ReadAfterLd]>; } defm VUNPCKHPS: sse12_unpack_interleave<0x15, X86Unpckh, v4f32, memopv4f32, @@ -2613,10 +2662,11 @@ multiclass sse12_extr_sign_mask<RegisterClass RC, Intrinsic Int, string asm, Domain d> { def rr32 : PI<0x50, MRMSrcReg, (outs GR32:$dst), (ins RC:$src), !strconcat(asm, "\t{$src, $dst|$dst, $src}"), - [(set GR32:$dst, (Int RC:$src))], IIC_SSE_MOVMSK, d>; + [(set GR32:$dst, (Int RC:$src))], IIC_SSE_MOVMSK, d>, + Sched<[WriteVecLogic]>; def rr64 : PI<0x50, MRMSrcReg, (outs GR64:$dst), (ins RC:$src), !strconcat(asm, "\t{$src, $dst|$dst, $src}"), [], - IIC_SSE_MOVMSK, d>, REX_W; + IIC_SSE_MOVMSK, d>, REX_W, Sched<[WriteVecLogic]>; } let Predicates = [HasAVX] in { @@ -2693,7 +2743,8 @@ multiclass PDI_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode, !if(Is2Addr, !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), - [(set RC:$dst, (OpVT (OpNode RC:$src1, RC:$src2)))], itins.rr>; + [(set RC:$dst, (OpVT (OpNode RC:$src1, RC:$src2)))], itins.rr>, + Sched<[itins.Sched]>; def rm : PDI<opc, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2), !if(Is2Addr, @@ -2701,7 +2752,8 @@ multiclass PDI_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode, !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), [(set RC:$dst, (OpVT (OpNode RC:$src1, (bitconvert (memop_frag addr:$src2)))))], - itins.rm>; + itins.rm>, + Sched<[itins.Sched.Folded, ReadAfterLd]>; } } // ExeDomain = SSEPackedInt @@ -2967,6 +3019,7 @@ let isCodeGenOnly = 1 in { /// /// And, we have a special variant form for a full-vector intrinsic form. +let Sched = WriteFSqrt in { def SSE_SQRTP : OpndItins< IIC_SSE_SQRTP_RR, IIC_SSE_SQRTP_RM >; @@ -2974,7 +3027,9 @@ def SSE_SQRTP : OpndItins< def SSE_SQRTS : OpndItins< IIC_SSE_SQRTS_RR, IIC_SSE_SQRTS_RM >; +} +let Sched = WriteFRcp in { def SSE_RCPP : OpndItins< IIC_SSE_RCPP_RR, IIC_SSE_RCPP_RM >; @@ -2982,6 +3037,7 @@ def SSE_RCPP : OpndItins< def SSE_RCPS : OpndItins< IIC_SSE_RCPS_RR, IIC_SSE_RCPS_RM >; +} /// sse1_fp_unop_s - SSE1 unops in scalar form. multiclass sse1_fp_unop_s<bits<8> opc, string OpcodeStr, @@ -2991,24 +3047,26 @@ let Predicates = [HasAVX], hasSideEffects = 0 in { (ins FR32:$src1, FR32:$src2), !strconcat("v", OpcodeStr, "ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"), - []>, VEX_4V, VEX_LIG; + []>, VEX_4V, VEX_LIG, Sched<[itins.Sched]>; let mayLoad = 1 in { def V#NAME#SSm : SSI<opc, MRMSrcMem, (outs FR32:$dst), (ins FR32:$src1,f32mem:$src2), !strconcat("v", OpcodeStr, "ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"), - []>, VEX_4V, VEX_LIG; + []>, VEX_4V, VEX_LIG, + Sched<[itins.Sched.Folded, ReadAfterLd]>; def V#NAME#SSm_Int : SSI<opc, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, ssmem:$src2), !strconcat("v", OpcodeStr, "ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"), - []>, VEX_4V, VEX_LIG; + []>, VEX_4V, VEX_LIG, + Sched<[itins.Sched.Folded, ReadAfterLd]>; } } def SSr : SSI<opc, MRMSrcReg, (outs FR32:$dst), (ins FR32:$src), !strconcat(OpcodeStr, "ss\t{$src, $dst|$dst, $src}"), - [(set FR32:$dst, (OpNode FR32:$src))]>; + [(set FR32:$dst, (OpNode FR32:$src))]>, Sched<[itins.Sched]>; // For scalar unary operations, fold a load into the operation // only in OptForSize mode. It eliminates an instruction, but it also // eliminates a whole-register clobber (the load), so it introduces a @@ -3016,13 +3074,15 @@ let Predicates = [HasAVX], hasSideEffects = 0 in { def SSm : I<opc, MRMSrcMem, (outs FR32:$dst), (ins f32mem:$src), !strconcat(OpcodeStr, "ss\t{$src, $dst|$dst, $src}"), [(set FR32:$dst, (OpNode (load addr:$src)))], itins.rm>, XS, - Requires<[UseSSE1, OptForSize]>; + Requires<[UseSSE1, OptForSize]>, Sched<[itins.Sched.Folded]>; def SSr_Int : SSI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), !strconcat(OpcodeStr, "ss\t{$src, $dst|$dst, $src}"), - [(set VR128:$dst, (F32Int VR128:$src))], itins.rr>; + [(set VR128:$dst, (F32Int VR128:$src))], itins.rr>, + Sched<[itins.Sched]>; def SSm_Int : SSI<opc, MRMSrcMem, (outs VR128:$dst), (ins ssmem:$src), !strconcat(OpcodeStr, "ss\t{$src, $dst|$dst, $src}"), - [(set VR128:$dst, (F32Int sse_load_f32:$src))], itins.rm>; + [(set VR128:$dst, (F32Int sse_load_f32:$src))], itins.rm>, + Sched<[itins.Sched.Folded]>; } /// sse1_fp_unop_s_rw - SSE1 unops where vector form has a read-write operand. @@ -3033,24 +3093,26 @@ let Predicates = [HasAVX], hasSideEffects = 0 in { (ins FR32:$src1, FR32:$src2), !strconcat("v", OpcodeStr, "ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"), - []>, VEX_4V, VEX_LIG; + []>, VEX_4V, VEX_LIG, Sched<[itins.Sched]>; let mayLoad = 1 in { def V#NAME#SSm : SSI<opc, MRMSrcMem, (outs FR32:$dst), (ins FR32:$src1,f32mem:$src2), !strconcat("v", OpcodeStr, "ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"), - []>, VEX_4V, VEX_LIG; + []>, VEX_4V, VEX_LIG, + Sched<[itins.Sched.Folded, ReadAfterLd]>; def V#NAME#SSm_Int : SSI<opc, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, ssmem:$src2), !strconcat("v", OpcodeStr, "ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"), - []>, VEX_4V, VEX_LIG; + []>, VEX_4V, VEX_LIG, + Sched<[itins.Sched.Folded, ReadAfterLd]>; } } def SSr : SSI<opc, MRMSrcReg, (outs FR32:$dst), (ins FR32:$src), !strconcat(OpcodeStr, "ss\t{$src, $dst|$dst, $src}"), - [(set FR32:$dst, (OpNode FR32:$src))]>; + [(set FR32:$dst, (OpNode FR32:$src))]>, Sched<[itins.Sched]>; // For scalar unary operations, fold a load into the operation // only in OptForSize mode. It eliminates an instruction, but it also // eliminates a whole-register clobber (the load), so it introduces a @@ -3058,17 +3120,17 @@ let Predicates = [HasAVX], hasSideEffects = 0 in { def SSm : I<opc, MRMSrcMem, (outs FR32:$dst), (ins f32mem:$src), !strconcat(OpcodeStr, "ss\t{$src, $dst|$dst, $src}"), [(set FR32:$dst, (OpNode (load addr:$src)))], itins.rm>, XS, - Requires<[UseSSE1, OptForSize]>; + Requires<[UseSSE1, OptForSize]>, Sched<[itins.Sched.Folded]>; let Constraints = "$src1 = $dst" in { def SSr_Int : SSI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), !strconcat(OpcodeStr, "ss\t{$src2, $dst|$dst, $src2}"), - [], itins.rr>; + [], itins.rr>, Sched<[itins.Sched]>; let mayLoad = 1, hasSideEffects = 0 in def SSm_Int : SSI<opc, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, ssmem:$src2), !strconcat(OpcodeStr, "ss\t{$src2, $dst|$dst, $src2}"), - [], itins.rm>; + [], itins.rm>, Sched<[itins.Sched.Folded, ReadAfterLd]>; } } @@ -3080,30 +3142,32 @@ let Predicates = [HasAVX] in { !strconcat("v", OpcodeStr, "ps\t{$src, $dst|$dst, $src}"), [(set VR128:$dst, (v4f32 (OpNode VR128:$src)))], - itins.rr>, VEX; + itins.rr>, VEX, Sched<[itins.Sched]>; def V#NAME#PSm : PSI<opc, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), !strconcat("v", OpcodeStr, "ps\t{$src, $dst|$dst, $src}"), [(set VR128:$dst, (OpNode (memopv4f32 addr:$src)))], - itins.rm>, VEX; + itins.rm>, VEX, Sched<[itins.Sched.Folded]>; def V#NAME#PSYr : PSI<opc, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src), !strconcat("v", OpcodeStr, "ps\t{$src, $dst|$dst, $src}"), [(set VR256:$dst, (v8f32 (OpNode VR256:$src)))], - itins.rr>, VEX, VEX_L; + itins.rr>, VEX, VEX_L, Sched<[itins.Sched]>; def V#NAME#PSYm : PSI<opc, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src), !strconcat("v", OpcodeStr, "ps\t{$src, $dst|$dst, $src}"), [(set VR256:$dst, (OpNode (memopv8f32 addr:$src)))], - itins.rm>, VEX, VEX_L; + itins.rm>, VEX, VEX_L, Sched<[itins.Sched.Folded]>; } def PSr : PSI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), !strconcat(OpcodeStr, "ps\t{$src, $dst|$dst, $src}"), - [(set VR128:$dst, (v4f32 (OpNode VR128:$src)))], itins.rr>; + [(set VR128:$dst, (v4f32 (OpNode VR128:$src)))], itins.rr>, + Sched<[itins.Sched]>; def PSm : PSI<opc, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), !strconcat(OpcodeStr, "ps\t{$src, $dst|$dst, $src}"), - [(set VR128:$dst, (OpNode (memopv4f32 addr:$src)))], itins.rm>; + [(set VR128:$dst, (OpNode (memopv4f32 addr:$src)))], itins.rm>, + Sched<[itins.Sched.Folded]>; } /// sse1_fp_unop_p_int - SSE1 intrinsics unops in packed forms. @@ -3115,33 +3179,33 @@ let Predicates = [HasAVX] in { !strconcat("v", OpcodeStr, "ps\t{$src, $dst|$dst, $src}"), [(set VR128:$dst, (V4F32Int VR128:$src))], - itins.rr>, VEX; + itins.rr>, VEX, Sched<[itins.Sched]>; def V#NAME#PSm_Int : PSI<opc, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), !strconcat("v", OpcodeStr, "ps\t{$src, $dst|$dst, $src}"), [(set VR128:$dst, (V4F32Int (memopv4f32 addr:$src)))], - itins.rm>, VEX; + itins.rm>, VEX, Sched<[itins.Sched.Folded]>; def V#NAME#PSYr_Int : PSI<opc, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src), !strconcat("v", OpcodeStr, "ps\t{$src, $dst|$dst, $src}"), [(set VR256:$dst, (V8F32Int VR256:$src))], - itins.rr>, VEX, VEX_L; + itins.rr>, VEX, VEX_L, Sched<[itins.Sched]>; def V#NAME#PSYm_Int : PSI<opc, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src), !strconcat("v", OpcodeStr, "ps\t{$src, $dst|$dst, $src}"), [(set VR256:$dst, (V8F32Int (memopv8f32 addr:$src)))], - itins.rm>, VEX, VEX_L; + itins.rm>, VEX, VEX_L, Sched<[itins.Sched.Folded]>; } def PSr_Int : PSI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), !strconcat(OpcodeStr, "ps\t{$src, $dst|$dst, $src}"), [(set VR128:$dst, (V4F32Int VR128:$src))], - itins.rr>; + itins.rr>, Sched<[itins.Sched]>; def PSm_Int : PSI<opc, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), !strconcat(OpcodeStr, "ps\t{$src, $dst|$dst, $src}"), [(set VR128:$dst, (V4F32Int (memopv4f32 addr:$src)))], - itins.rm>; + itins.rm>, Sched<[itins.Sched.Folded]>; } /// sse2_fp_unop_s - SSE2 unops in scalar form. @@ -3152,35 +3216,40 @@ let Predicates = [HasAVX], hasSideEffects = 0 in { (ins FR64:$src1, FR64:$src2), !strconcat("v", OpcodeStr, "sd\t{$src2, $src1, $dst|$dst, $src1, $src2}"), - []>, VEX_4V, VEX_LIG; + []>, VEX_4V, VEX_LIG, Sched<[itins.Sched]>; let mayLoad = 1 in { def V#NAME#SDm : SDI<opc, MRMSrcMem, (outs FR64:$dst), (ins FR64:$src1,f64mem:$src2), !strconcat("v", OpcodeStr, "sd\t{$src2, $src1, $dst|$dst, $src1, $src2}"), - []>, VEX_4V, VEX_LIG; + []>, VEX_4V, VEX_LIG, + Sched<[itins.Sched.Folded, ReadAfterLd]>; def V#NAME#SDm_Int : SDI<opc, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, sdmem:$src2), !strconcat("v", OpcodeStr, "sd\t{$src2, $src1, $dst|$dst, $src1, $src2}"), - []>, VEX_4V, VEX_LIG; + []>, VEX_4V, VEX_LIG, + Sched<[itins.Sched.Folded, ReadAfterLd]>; } } def SDr : SDI<opc, MRMSrcReg, (outs FR64:$dst), (ins FR64:$src), !strconcat(OpcodeStr, "sd\t{$src, $dst|$dst, $src}"), - [(set FR64:$dst, (OpNode FR64:$src))], itins.rr>; + [(set FR64:$dst, (OpNode FR64:$src))], itins.rr>, + Sched<[itins.Sched]>; // See the comments in sse1_fp_unop_s for why this is OptForSize. def SDm : I<opc, MRMSrcMem, (outs FR64:$dst), (ins f64mem:$src), !strconcat(OpcodeStr, "sd\t{$src, $dst|$dst, $src}"), [(set FR64:$dst, (OpNode (load addr:$src)))], itins.rm>, XD, - Requires<[UseSSE2, OptForSize]>; + Requires<[UseSSE2, OptForSize]>, Sched<[itins.Sched.Folded]>; def SDr_Int : SDI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), !strconcat(OpcodeStr, "sd\t{$src, $dst|$dst, $src}"), - [(set VR128:$dst, (F64Int VR128:$src))], itins.rr>; + [(set VR128:$dst, (F64Int VR128:$src))], itins.rr>, + Sched<[itins.Sched]>; def SDm_Int : SDI<opc, MRMSrcMem, (outs VR128:$dst), (ins sdmem:$src), !strconcat(OpcodeStr, "sd\t{$src, $dst|$dst, $src}"), - [(set VR128:$dst, (F64Int sse_load_f64:$src))], itins.rm>; + [(set VR128:$dst, (F64Int sse_load_f64:$src))], itins.rm>, + Sched<[itins.Sched.Folded]>; } /// sse2_fp_unop_p - SSE2 unops in vector forms. @@ -3191,30 +3260,32 @@ let Predicates = [HasAVX] in { !strconcat("v", OpcodeStr, "pd\t{$src, $dst|$dst, $src}"), [(set VR128:$dst, (v2f64 (OpNode VR128:$src)))], - itins.rr>, VEX; + itins.rr>, VEX, Sched<[itins.Sched]>; def V#NAME#PDm : PDI<opc, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), !strconcat("v", OpcodeStr, "pd\t{$src, $dst|$dst, $src}"), [(set VR128:$dst, (OpNode (memopv2f64 addr:$src)))], - itins.rm>, VEX; + itins.rm>, VEX, Sched<[itins.Sched.Folded]>; def V#NAME#PDYr : PDI<opc, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src), !strconcat("v", OpcodeStr, "pd\t{$src, $dst|$dst, $src}"), [(set VR256:$dst, (v4f64 (OpNode VR256:$src)))], - itins.rr>, VEX, VEX_L; + itins.rr>, VEX, VEX_L, Sched<[itins.Sched]>; def V#NAME#PDYm : PDI<opc, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src), !strconcat("v", OpcodeStr, "pd\t{$src, $dst|$dst, $src}"), [(set VR256:$dst, (OpNode (memopv4f64 addr:$src)))], - itins.rm>, VEX, VEX_L; + itins.rm>, VEX, VEX_L, Sched<[itins.Sched.Folded]>; } def PDr : PDI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), !strconcat(OpcodeStr, "pd\t{$src, $dst|$dst, $src}"), - [(set VR128:$dst, (v2f64 (OpNode VR128:$src)))], itins.rr>; + [(set VR128:$dst, (v2f64 (OpNode VR128:$src)))], itins.rr>, + Sched<[itins.Sched]>; def PDm : PDI<opc, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), !strconcat(OpcodeStr, "pd\t{$src, $dst|$dst, $src}"), - [(set VR128:$dst, (OpNode (memopv2f64 addr:$src)))], itins.rm>; + [(set VR128:$dst, (OpNode (memopv2f64 addr:$src)))], itins.rm>, + Sched<[itins.Sched.Folded]>; } // Square root. @@ -3305,52 +3376,48 @@ let Predicates = [UseSSE1] in { //===----------------------------------------------------------------------===// let AddedComplexity = 400 in { // Prefer non-temporal versions - def VMOVNTPSmr : VPSI<0x2B, MRMDestMem, (outs), - (ins f128mem:$dst, VR128:$src), - "movntps\t{$src, $dst|$dst, $src}", - [(alignednontemporalstore (v4f32 VR128:$src), - addr:$dst)], - IIC_SSE_MOVNT>, VEX; - def VMOVNTPDmr : VPDI<0x2B, MRMDestMem, (outs), - (ins f128mem:$dst, VR128:$src), - "movntpd\t{$src, $dst|$dst, $src}", - [(alignednontemporalstore (v2f64 VR128:$src), - addr:$dst)], - IIC_SSE_MOVNT>, VEX; - - let ExeDomain = SSEPackedInt in - def VMOVNTDQmr : VPDI<0xE7, MRMDestMem, (outs), - (ins f128mem:$dst, VR128:$src), - "movntdq\t{$src, $dst|$dst, $src}", - [(alignednontemporalstore (v2i64 VR128:$src), - addr:$dst)], - IIC_SSE_MOVNT>, VEX; - - def : Pat<(alignednontemporalstore (v2i64 VR128:$src), addr:$dst), - (VMOVNTDQmr addr:$dst, VR128:$src)>, Requires<[HasAVX]>; - - def VMOVNTPSYmr : VPSI<0x2B, MRMDestMem, (outs), - (ins f256mem:$dst, VR256:$src), - "movntps\t{$src, $dst|$dst, $src}", - [(alignednontemporalstore (v8f32 VR256:$src), - addr:$dst)], - IIC_SSE_MOVNT>, VEX, VEX_L; - def VMOVNTPDYmr : VPDI<0x2B, MRMDestMem, (outs), - (ins f256mem:$dst, VR256:$src), - "movntpd\t{$src, $dst|$dst, $src}", - [(alignednontemporalstore (v4f64 VR256:$src), - addr:$dst)], - IIC_SSE_MOVNT>, VEX, VEX_L; - let ExeDomain = SSEPackedInt in - def VMOVNTDQYmr : VPDI<0xE7, MRMDestMem, (outs), - (ins f256mem:$dst, VR256:$src), - "movntdq\t{$src, $dst|$dst, $src}", - [(alignednontemporalstore (v4i64 VR256:$src), - addr:$dst)], - IIC_SSE_MOVNT>, VEX, VEX_L; -} +let SchedRW = [WriteStore] in { +def VMOVNTPSmr : VPSI<0x2B, MRMDestMem, (outs), + (ins f128mem:$dst, VR128:$src), + "movntps\t{$src, $dst|$dst, $src}", + [(alignednontemporalstore (v4f32 VR128:$src), + addr:$dst)], + IIC_SSE_MOVNT>, VEX; +def VMOVNTPDmr : VPDI<0x2B, MRMDestMem, (outs), + (ins f128mem:$dst, VR128:$src), + "movntpd\t{$src, $dst|$dst, $src}", + [(alignednontemporalstore (v2f64 VR128:$src), + addr:$dst)], + IIC_SSE_MOVNT>, VEX; + +let ExeDomain = SSEPackedInt in +def VMOVNTDQmr : VPDI<0xE7, MRMDestMem, (outs), + (ins f128mem:$dst, VR128:$src), + "movntdq\t{$src, $dst|$dst, $src}", + [(alignednontemporalstore (v2i64 VR128:$src), + addr:$dst)], + IIC_SSE_MOVNT>, VEX; + +def VMOVNTPSYmr : VPSI<0x2B, MRMDestMem, (outs), + (ins f256mem:$dst, VR256:$src), + "movntps\t{$src, $dst|$dst, $src}", + [(alignednontemporalstore (v8f32 VR256:$src), + addr:$dst)], + IIC_SSE_MOVNT>, VEX, VEX_L; +def VMOVNTPDYmr : VPDI<0x2B, MRMDestMem, (outs), + (ins f256mem:$dst, VR256:$src), + "movntpd\t{$src, $dst|$dst, $src}", + [(alignednontemporalstore (v4f64 VR256:$src), + addr:$dst)], + IIC_SSE_MOVNT>, VEX, VEX_L; +let ExeDomain = SSEPackedInt in +def VMOVNTDQYmr : VPDI<0xE7, MRMDestMem, (outs), + (ins f256mem:$dst, VR256:$src), + "movntdq\t{$src, $dst|$dst, $src}", + [(alignednontemporalstore (v4i64 VR256:$src), + addr:$dst)], + IIC_SSE_MOVNT>, VEX, VEX_L; -let AddedComplexity = 400 in { // Prefer non-temporal versions def MOVNTPSmr : PSI<0x2B, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src), "movntps\t{$src, $dst|$dst, $src}", [(alignednontemporalstore (v4f32 VR128:$src), addr:$dst)], @@ -3366,9 +3433,6 @@ def MOVNTDQmr : PDI<0xE7, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src), [(alignednontemporalstore (v2i64 VR128:$src), addr:$dst)], IIC_SSE_MOVNT>; -def : Pat<(alignednontemporalstore (v2i64 VR128:$src), addr:$dst), - (MOVNTDQmr addr:$dst, VR128:$src)>, Requires<[UseSSE2]>; - // There is no AVX form for instructions below this point def MOVNTImr : I<0xC3, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src), "movnti{l}\t{$src, $dst|$dst, $src}", @@ -3380,7 +3444,14 @@ def MOVNTI_64mr : RI<0xC3, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src), [(nontemporalstore (i64 GR64:$src), addr:$dst)], IIC_SSE_MOVNT>, TB, Requires<[HasSSE2]>; -} +} // SchedRW = [WriteStore] + +def : Pat<(alignednontemporalstore (v2i64 VR128:$src), addr:$dst), + (VMOVNTDQmr addr:$dst, VR128:$src)>, Requires<[HasAVX]>; + +def : Pat<(alignednontemporalstore (v2i64 VR128:$src), addr:$dst), + (MOVNTDQmr addr:$dst, VR128:$src)>, Requires<[UseSSE2]>; +} // AddedComplexity //===----------------------------------------------------------------------===// // SSE 1 & 2 - Prefetch and memory fence @@ -3450,7 +3521,7 @@ def STMXCSR : PSI<0xAE, MRM3m, (outs), (ins i32mem:$dst), let ExeDomain = SSEPackedInt in { // SSE integer instructions -let neverHasSideEffects = 1 in { +let neverHasSideEffects = 1, SchedRW = [WriteMove] in { def VMOVDQArr : VPDI<0x6F, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "movdqa\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVA_P_RR>, VEX; @@ -3466,7 +3537,7 @@ def VMOVDQUYrr : VSSI<0x6F, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src), } // For Disassembler -let isCodeGenOnly = 1, hasSideEffects = 0 in { +let isCodeGenOnly = 1, hasSideEffects = 0, SchedRW = [WriteMove] in { def VMOVDQArr_REV : VPDI<0x7F, MRMDestReg, (outs VR128:$dst), (ins VR128:$src), "movdqa\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVA_P_RR>, @@ -3484,7 +3555,7 @@ def VMOVDQUYrr_REV : VSSI<0x7F, MRMDestReg, (outs VR256:$dst), (ins VR256:$src), } let canFoldAsLoad = 1, mayLoad = 1, isReMaterializable = 1, - neverHasSideEffects = 1 in { + neverHasSideEffects = 1, SchedRW = [WriteLoad] in { def VMOVDQArm : VPDI<0x6F, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src), "movdqa\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVA_P_RM>, VEX; @@ -3501,7 +3572,7 @@ let Predicates = [HasAVX] in { } } -let mayStore = 1, neverHasSideEffects = 1 in { +let mayStore = 1, neverHasSideEffects = 1, SchedRW = [WriteStore] in { def VMOVDQAmr : VPDI<0x7F, MRMDestMem, (outs), (ins i128mem:$dst, VR128:$src), "movdqa\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVA_P_MR>, @@ -3520,6 +3591,7 @@ def VMOVDQUYmr : I<0x7F, MRMDestMem, (outs), (ins i256mem:$dst, VR256:$src), } } +let SchedRW = [WriteMove] in { let neverHasSideEffects = 1 in def MOVDQArr : PDI<0x6F, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "movdqa\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVA_P_RR>; @@ -3538,9 +3610,10 @@ def MOVDQUrr_REV : I<0x7F, MRMDestReg, (outs VR128:$dst), (ins VR128:$src), "movdqu\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVU_P_RR>, XS, Requires<[UseSSE2]>; } +} // SchedRW let canFoldAsLoad = 1, mayLoad = 1, isReMaterializable = 1, - neverHasSideEffects = 1 in { + neverHasSideEffects = 1, SchedRW = [WriteLoad] in { def MOVDQArm : PDI<0x6F, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src), "movdqa\t{$src, $dst|$dst, $src}", [/*(set VR128:$dst, (alignedloadv2i64 addr:$src))*/], @@ -3552,7 +3625,7 @@ def MOVDQUrm : I<0x6F, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src), XS, Requires<[UseSSE2]>; } -let mayStore = 1 in { +let mayStore = 1, SchedRW = [WriteStore] in { def MOVDQAmr : PDI<0x7F, MRMDestMem, (outs), (ins i128mem:$dst, VR128:$src), "movdqa\t{$src, $dst|$dst, $src}", [/*(alignedstore (v2i64 VR128:$src), addr:$dst)*/], @@ -3580,6 +3653,7 @@ def : Pat<(int_x86_sse2_storeu_dq addr:$dst, VR128:$src), // SSE2 - Packed Integer Arithmetic Instructions //===---------------------------------------------------------------------===// +let Sched = WriteVecIMul in def SSE_PMADD : OpndItins< IIC_SSE_PMADD, IIC_SSE_PMADD >; @@ -3598,14 +3672,15 @@ multiclass PDI_binop_rm_int<bits<8> opc, string OpcodeStr, Intrinsic IntId, !if(Is2Addr, !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), - [(set RC:$dst, (IntId RC:$src1, RC:$src2))], itins.rr>; + [(set RC:$dst, (IntId RC:$src1, RC:$src2))], itins.rr>, + Sched<[itins.Sched]>; def rm : PDI<opc, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2), !if(Is2Addr, !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), [(set RC:$dst, (IntId RC:$src1, (bitconvert (memop_frag addr:$src2))))], - itins.rm>; + itins.rm>, Sched<[itins.Sched.Folded, ReadAfterLd]>; } multiclass PDI_binop_all_int<bits<8> opc, string OpcodeStr, Intrinsic IntId128, @@ -3639,20 +3714,22 @@ multiclass PDI_binop_rmi<bits<8> opc, bits<8> opc2, Format ImmForm, !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), [(set RC:$dst, (DstVT (OpNode RC:$src1, (SrcVT VR128:$src2))))], - itins.rr>; + itins.rr>, Sched<[WriteVecShift]>; def rm : PDI<opc, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, i128mem:$src2), !if(Is2Addr, !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), [(set RC:$dst, (DstVT (OpNode RC:$src1, - (bc_frag (memopv2i64 addr:$src2)))))], itins.rm>; + (bc_frag (memopv2i64 addr:$src2)))))], itins.rm>, + Sched<[WriteVecShiftLd, ReadAfterLd]>; def ri : PDIi8<opc2, ImmForm, (outs RC:$dst), (ins RC:$src1, i32i8imm:$src2), !if(Is2Addr, !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), - [(set RC:$dst, (DstVT (OpNode2 RC:$src1, (i32 imm:$src2))))], itins.ri>; + [(set RC:$dst, (DstVT (OpNode2 RC:$src1, (i32 imm:$src2))))], itins.ri>, + Sched<[WriteVecShift]>; } /// PDI_binop_rm2 - Simple SSE2 binary operator with different src and dst types @@ -3667,14 +3744,16 @@ multiclass PDI_binop_rm2<bits<8> opc, string OpcodeStr, SDNode OpNode, !if(Is2Addr, !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), - [(set RC:$dst, (DstVT (OpNode (SrcVT RC:$src1), RC:$src2)))]>; + [(set RC:$dst, (DstVT (OpNode (SrcVT RC:$src1), RC:$src2)))]>, + Sched<[itins.Sched]>; def rm : PDI<opc, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2), !if(Is2Addr, !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), [(set RC:$dst, (DstVT (OpNode (SrcVT RC:$src1), - (bitconvert (memop_frag addr:$src2)))))]>; + (bitconvert (memop_frag addr:$src2)))))]>, + Sched<[itins.Sched.Folded, ReadAfterLd]>; } } // ExeDomain = SSEPackedInt @@ -3779,7 +3858,7 @@ defm VPSRAD : PDI_binop_rmi<0xE2, 0x72, MRM4r, "vpsrad", X86vsra, X86vsrai, VR128, v4i32, v4i32, bc_v4i32, SSE_INTSHIFT_ITINS_P, 0>, VEX_4V; -let ExeDomain = SSEPackedInt in { +let ExeDomain = SSEPackedInt, SchedRW = [WriteVecShift] in { // 128-bit logical shifts. def VPSLLDQri : PDIi8<0x73, MRM7r, (outs VR128:$dst), (ins VR128:$src1, i32i8imm:$src2), @@ -3825,7 +3904,7 @@ defm VPSRADY : PDI_binop_rmi<0xE2, 0x72, MRM4r, "vpsrad", X86vsra, X86vsrai, VR256, v8i32, v4i32, bc_v4i32, SSE_INTSHIFT_ITINS_P, 0>, VEX_4V, VEX_L; -let ExeDomain = SSEPackedInt in { +let ExeDomain = SSEPackedInt, SchedRW = [WriteVecShift] in { // 256-bit logical shifts. def VPSLLDQYri : PDIi8<0x73, MRM7r, (outs VR256:$dst), (ins VR256:$src1, i32i8imm:$src2), @@ -3871,7 +3950,7 @@ defm PSRAD : PDI_binop_rmi<0xE2, 0x72, MRM4r, "psrad", X86vsra, X86vsrai, VR128, v4i32, v4i32, bc_v4i32, SSE_INTSHIFT_ITINS_P>; -let ExeDomain = SSEPackedInt in { +let ExeDomain = SSEPackedInt, SchedRW = [WriteVecShift] in { // 128-bit logical shifts. def PSLLDQri : PDIi8<0x73, MRM7r, (outs VR128:$dst), (ins VR128:$src1, i32i8imm:$src2), @@ -3966,14 +4045,15 @@ let Predicates = [HasAVX] in { "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), [(set VR128:$dst, (vt128 (OpNode VR128:$src1, (i8 imm:$src2))))], - IIC_SSE_PSHUF>, VEX; + IIC_SSE_PSHUF>, VEX, Sched<[WriteShuffle]>; def V#NAME#mi : Ii8<0x70, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src1, i8imm:$src2), !strconcat("v", OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), [(set VR128:$dst, (vt128 (OpNode (bitconvert (memopv2i64 addr:$src1)), - (i8 imm:$src2))))], IIC_SSE_PSHUF>, VEX; + (i8 imm:$src2))))], IIC_SSE_PSHUF>, VEX, + Sched<[WriteShuffleLd]>; } let Predicates = [HasAVX2] in { @@ -3983,14 +4063,15 @@ let Predicates = [HasAVX2] in { "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), [(set VR256:$dst, (vt256 (OpNode VR256:$src1, (i8 imm:$src2))))], - IIC_SSE_PSHUF>, VEX, VEX_L; + IIC_SSE_PSHUF>, VEX, VEX_L, Sched<[WriteShuffle]>; def V#NAME#Ymi : Ii8<0x70, MRMSrcMem, (outs VR256:$dst), (ins i256mem:$src1, i8imm:$src2), !strconcat("v", OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), [(set VR256:$dst, (vt256 (OpNode (bitconvert (memopv4i64 addr:$src1)), - (i8 imm:$src2))))], IIC_SSE_PSHUF>, VEX, VEX_L; + (i8 imm:$src2))))], IIC_SSE_PSHUF>, VEX, VEX_L, + Sched<[WriteShuffleLd]>; } let Predicates = [UseSSE2] in { @@ -4000,14 +4081,15 @@ let Predicates = [UseSSE2] in { "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), [(set VR128:$dst, (vt128 (OpNode VR128:$src1, (i8 imm:$src2))))], - IIC_SSE_PSHUF>; + IIC_SSE_PSHUF>, Sched<[WriteShuffle]>; def mi : Ii8<0x70, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src1, i8imm:$src2), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), [(set VR128:$dst, (vt128 (OpNode (bitconvert (memopv2i64 addr:$src1)), - (i8 imm:$src2))))], IIC_SSE_PSHUF>; + (i8 imm:$src2))))], IIC_SSE_PSHUF>, + Sched<[WriteShuffleLd]>; } } } // ExeDomain = SSEPackedInt @@ -4043,7 +4125,7 @@ multiclass sse2_unpack<bits<8> opc, string OpcodeStr, ValueType vt, !strconcat(OpcodeStr,"\t{$src2, $dst|$dst, $src2}"), !strconcat(OpcodeStr,"\t{$src2, $src1, $dst|$dst, $src1, $src2}")), [(set VR128:$dst, (vt (OpNode VR128:$src1, VR128:$src2)))], - IIC_SSE_UNPCK>; + IIC_SSE_UNPCK>, Sched<[WriteShuffle]>; def rm : PDI<opc, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2), !if(Is2Addr, @@ -4052,7 +4134,8 @@ multiclass sse2_unpack<bits<8> opc, string OpcodeStr, ValueType vt, [(set VR128:$dst, (OpNode VR128:$src1, (bc_frag (memopv2i64 addr:$src2))))], - IIC_SSE_UNPCK>; + IIC_SSE_UNPCK>, + Sched<[WriteShuffleLd, ReadAfterLd]>; } multiclass sse2_unpack_y<bits<8> opc, string OpcodeStr, ValueType vt, @@ -4060,12 +4143,14 @@ multiclass sse2_unpack_y<bits<8> opc, string OpcodeStr, ValueType vt, def Yrr : PDI<opc, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src1, VR256:$src2), !strconcat(OpcodeStr,"\t{$src2, $src1, $dst|$dst, $src1, $src2}"), - [(set VR256:$dst, (vt (OpNode VR256:$src1, VR256:$src2)))]>; + [(set VR256:$dst, (vt (OpNode VR256:$src1, VR256:$src2)))]>, + Sched<[WriteShuffle]>; def Yrm : PDI<opc, MRMSrcMem, (outs VR256:$dst), (ins VR256:$src1, i256mem:$src2), !strconcat(OpcodeStr,"\t{$src2, $src1, $dst|$dst, $src1, $src2}"), [(set VR256:$dst, (OpNode VR256:$src1, - (bc_frag (memopv4i64 addr:$src2))))]>; + (bc_frag (memopv4i64 addr:$src2))))]>, + Sched<[WriteShuffleLd, ReadAfterLd]>; } let Predicates = [HasAVX] in { @@ -4142,7 +4227,8 @@ multiclass sse2_pinsrw<bit Is2Addr = 1> { "pinsrw\t{$src3, $src2, $dst|$dst, $src2, $src3}", "vpinsrw\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), [(set VR128:$dst, - (X86pinsrw VR128:$src1, GR32:$src2, imm:$src3))], IIC_SSE_PINSRW>; + (X86pinsrw VR128:$src1, GR32:$src2, imm:$src3))], IIC_SSE_PINSRW>, + Sched<[WriteShuffle]>; def rmi : Ii8<0xC4, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, i16mem:$src2, i32i8imm:$src3), @@ -4151,7 +4237,8 @@ multiclass sse2_pinsrw<bit Is2Addr = 1> { "vpinsrw\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), [(set VR128:$dst, (X86pinsrw VR128:$src1, (extloadi16 addr:$src2), - imm:$src3))], IIC_SSE_PINSRW>; + imm:$src3))], IIC_SSE_PINSRW>, + Sched<[WriteShuffleLd, ReadAfterLd]>; } // Extract @@ -4160,12 +4247,14 @@ def VPEXTRWri : Ii8<0xC5, MRMSrcReg, (outs GR32:$dst), (ins VR128:$src1, i32i8imm:$src2), "vpextrw\t{$src2, $src1, $dst|$dst, $src1, $src2}", [(set GR32:$dst, (X86pextrw (v8i16 VR128:$src1), - imm:$src2))]>, TB, OpSize, VEX; + imm:$src2))]>, TB, OpSize, VEX, + Sched<[WriteShuffle]>; def PEXTRWri : PDIi8<0xC5, MRMSrcReg, (outs GR32:$dst), (ins VR128:$src1, i32i8imm:$src2), "pextrw\t{$src2, $src1, $dst|$dst, $src1, $src2}", [(set GR32:$dst, (X86pextrw (v8i16 VR128:$src1), - imm:$src2))], IIC_SSE_PEXTRW>; + imm:$src2))], IIC_SSE_PEXTRW>, + Sched<[WriteShuffleLd, ReadAfterLd]>; // Insert let Predicates = [HasAVX] in { @@ -4173,7 +4262,7 @@ let Predicates = [HasAVX] in { def VPINSRWrr64i : Ii8<0xC4, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, GR64:$src2, i32i8imm:$src3), "vpinsrw\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", - []>, TB, OpSize, VEX_4V; + []>, TB, OpSize, VEX_4V, Sched<[WriteShuffle]>; } let Constraints = "$src1 = $dst" in @@ -4185,7 +4274,7 @@ let Constraints = "$src1 = $dst" in // SSE2 - Packed Mask Creation //===---------------------------------------------------------------------===// -let ExeDomain = SSEPackedInt in { +let ExeDomain = SSEPackedInt, SchedRW = [WriteVecLogic] in { def VPMOVMSKBrr : VPDI<0xD7, MRMSrcReg, (outs GR32:$dst), (ins VR128:$src), "pmovmskb\t{$src, $dst|$dst, $src}", @@ -4213,7 +4302,7 @@ def PMOVMSKBrr : PDI<0xD7, MRMSrcReg, (outs GR32:$dst), (ins VR128:$src), // SSE2 - Conditional Store //===---------------------------------------------------------------------===// -let ExeDomain = SSEPackedInt in { +let ExeDomain = SSEPackedInt, SchedRW = [WriteStore] in { let Uses = [EDI] in def VMASKMOVDQU : VPDI<0xF7, MRMSrcReg, (outs), @@ -4252,41 +4341,42 @@ def VMOVDI2PDIrr : VPDI<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR32:$src), "movd\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (v4i32 (scalar_to_vector GR32:$src)))], IIC_SSE_MOVDQ>, - VEX; + VEX, Sched<[WriteMove]>; def VMOVDI2PDIrm : VPDI<0x6E, MRMSrcMem, (outs VR128:$dst), (ins i32mem:$src), "movd\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (v4i32 (scalar_to_vector (loadi32 addr:$src))))], IIC_SSE_MOVDQ>, - VEX; + VEX, Sched<[WriteLoad]>; def VMOV64toPQIrr : VRPDI<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR64:$src), "mov{d|q}\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (v2i64 (scalar_to_vector GR64:$src)))], - IIC_SSE_MOVDQ>, VEX; + IIC_SSE_MOVDQ>, VEX, Sched<[WriteMove]>; def VMOV64toSDrr : VRPDI<0x6E, MRMSrcReg, (outs FR64:$dst), (ins GR64:$src), "mov{d|q}\t{$src, $dst|$dst, $src}", [(set FR64:$dst, (bitconvert GR64:$src))], - IIC_SSE_MOVDQ>, VEX; + IIC_SSE_MOVDQ>, VEX, Sched<[WriteMove]>; def MOVDI2PDIrr : PDI<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR32:$src), "movd\t{$src, $dst|$dst, $src}", [(set VR128:$dst, - (v4i32 (scalar_to_vector GR32:$src)))], IIC_SSE_MOVDQ>; + (v4i32 (scalar_to_vector GR32:$src)))], IIC_SSE_MOVDQ>, + Sched<[WriteMove]>; def MOVDI2PDIrm : PDI<0x6E, MRMSrcMem, (outs VR128:$dst), (ins i32mem:$src), "movd\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (v4i32 (scalar_to_vector (loadi32 addr:$src))))], - IIC_SSE_MOVDQ>; + IIC_SSE_MOVDQ>, Sched<[WriteLoad]>; def MOV64toPQIrr : RPDI<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR64:$src), "mov{d|q}\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (v2i64 (scalar_to_vector GR64:$src)))], - IIC_SSE_MOVDQ>; + IIC_SSE_MOVDQ>, Sched<[WriteMove]>; def MOV64toSDrr : RPDI<0x6E, MRMSrcReg, (outs FR64:$dst), (ins GR64:$src), "mov{d|q}\t{$src, $dst|$dst, $src}", [(set FR64:$dst, (bitconvert GR64:$src))], - IIC_SSE_MOVDQ>; + IIC_SSE_MOVDQ>, Sched<[WriteMove]>; //===---------------------------------------------------------------------===// // Move Int Doubleword to Single Scalar @@ -4294,22 +4384,22 @@ def MOV64toSDrr : RPDI<0x6E, MRMSrcReg, (outs FR64:$dst), (ins GR64:$src), def VMOVDI2SSrr : VPDI<0x6E, MRMSrcReg, (outs FR32:$dst), (ins GR32:$src), "movd\t{$src, $dst|$dst, $src}", [(set FR32:$dst, (bitconvert GR32:$src))], - IIC_SSE_MOVDQ>, VEX; + IIC_SSE_MOVDQ>, VEX, Sched<[WriteMove]>; def VMOVDI2SSrm : VPDI<0x6E, MRMSrcMem, (outs FR32:$dst), (ins i32mem:$src), "movd\t{$src, $dst|$dst, $src}", [(set FR32:$dst, (bitconvert (loadi32 addr:$src)))], IIC_SSE_MOVDQ>, - VEX; + VEX, Sched<[WriteLoad]>; def MOVDI2SSrr : PDI<0x6E, MRMSrcReg, (outs FR32:$dst), (ins GR32:$src), "movd\t{$src, $dst|$dst, $src}", [(set FR32:$dst, (bitconvert GR32:$src))], - IIC_SSE_MOVDQ>; + IIC_SSE_MOVDQ>, Sched<[WriteMove]>; def MOVDI2SSrm : PDI<0x6E, MRMSrcMem, (outs FR32:$dst), (ins i32mem:$src), "movd\t{$src, $dst|$dst, $src}", [(set FR32:$dst, (bitconvert (loadi32 addr:$src)))], - IIC_SSE_MOVDQ>; + IIC_SSE_MOVDQ>, Sched<[WriteLoad]>; //===---------------------------------------------------------------------===// // Move Packed Doubleword Int to Packed Double Int @@ -4317,26 +4407,29 @@ def MOVDI2SSrm : PDI<0x6E, MRMSrcMem, (outs FR32:$dst), (ins i32mem:$src), def VMOVPDI2DIrr : VPDI<0x7E, MRMDestReg, (outs GR32:$dst), (ins VR128:$src), "movd\t{$src, $dst|$dst, $src}", [(set GR32:$dst, (vector_extract (v4i32 VR128:$src), - (iPTR 0)))], IIC_SSE_MOVD_ToGP>, VEX; + (iPTR 0)))], IIC_SSE_MOVD_ToGP>, VEX, + Sched<[WriteMove]>; def VMOVPDI2DImr : VPDI<0x7E, MRMDestMem, (outs), (ins i32mem:$dst, VR128:$src), "movd\t{$src, $dst|$dst, $src}", [(store (i32 (vector_extract (v4i32 VR128:$src), (iPTR 0))), addr:$dst)], IIC_SSE_MOVDQ>, - VEX; + VEX, Sched<[WriteLoad]>; def MOVPDI2DIrr : PDI<0x7E, MRMDestReg, (outs GR32:$dst), (ins VR128:$src), "movd\t{$src, $dst|$dst, $src}", [(set GR32:$dst, (vector_extract (v4i32 VR128:$src), - (iPTR 0)))], IIC_SSE_MOVD_ToGP>; + (iPTR 0)))], IIC_SSE_MOVD_ToGP>, + Sched<[WriteMove]>; def MOVPDI2DImr : PDI<0x7E, MRMDestMem, (outs), (ins i32mem:$dst, VR128:$src), "movd\t{$src, $dst|$dst, $src}", [(store (i32 (vector_extract (v4i32 VR128:$src), (iPTR 0))), addr:$dst)], - IIC_SSE_MOVDQ>; + IIC_SSE_MOVDQ>, Sched<[WriteLoad]>; //===---------------------------------------------------------------------===// // Move Packed Doubleword Int first element to Doubleword Int // +let SchedRW = [WriteMove] in { def VMOVPQIto64rr : I<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128:$src), "vmov{d|q}\t{$src, $dst|$dst, $src}", [(set GR64:$dst, (vector_extract (v2i64 VR128:$src), @@ -4349,6 +4442,7 @@ def MOVPQIto64rr : RPDI<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128:$src), [(set GR64:$dst, (vector_extract (v2i64 VR128:$src), (iPTR 0)))], IIC_SSE_MOVD_ToGP>; +} //SchedRW //===---------------------------------------------------------------------===// // Bitcast FR64 <-> GR64 @@ -4357,28 +4451,28 @@ let Predicates = [HasAVX] in def VMOV64toSDrm : S2SI<0x7E, MRMSrcMem, (outs FR64:$dst), (ins i64mem:$src), "vmovq\t{$src, $dst|$dst, $src}", [(set FR64:$dst, (bitconvert (loadi64 addr:$src)))]>, - VEX; + VEX, Sched<[WriteLoad]>; def VMOVSDto64rr : VRPDI<0x7E, MRMDestReg, (outs GR64:$dst), (ins FR64:$src), "mov{d|q}\t{$src, $dst|$dst, $src}", [(set GR64:$dst, (bitconvert FR64:$src))], - IIC_SSE_MOVDQ>, VEX; + IIC_SSE_MOVDQ>, VEX, Sched<[WriteMove]>; def VMOVSDto64mr : VRPDI<0x7E, MRMDestMem, (outs), (ins i64mem:$dst, FR64:$src), "movq\t{$src, $dst|$dst, $src}", [(store (i64 (bitconvert FR64:$src)), addr:$dst)], - IIC_SSE_MOVDQ>, VEX; + IIC_SSE_MOVDQ>, VEX, Sched<[WriteStore]>; def MOV64toSDrm : S2SI<0x7E, MRMSrcMem, (outs FR64:$dst), (ins i64mem:$src), "movq\t{$src, $dst|$dst, $src}", [(set FR64:$dst, (bitconvert (loadi64 addr:$src)))], - IIC_SSE_MOVDQ>; + IIC_SSE_MOVDQ>, Sched<[WriteLoad]>; def MOVSDto64rr : RPDI<0x7E, MRMDestReg, (outs GR64:$dst), (ins FR64:$src), "mov{d|q}\t{$src, $dst|$dst, $src}", [(set GR64:$dst, (bitconvert FR64:$src))], - IIC_SSE_MOVD_ToGP>; + IIC_SSE_MOVD_ToGP>, Sched<[WriteMove]>; def MOVSDto64mr : RPDI<0x7E, MRMDestMem, (outs), (ins i64mem:$dst, FR64:$src), "movq\t{$src, $dst|$dst, $src}", [(store (i64 (bitconvert FR64:$src)), addr:$dst)], - IIC_SSE_MOVDQ>; + IIC_SSE_MOVDQ>, Sched<[WriteStore]>; //===---------------------------------------------------------------------===// // Move Scalar Single to Double Int diff --git a/lib/Target/X86/X86InstrTSX.td b/lib/Target/X86/X86InstrTSX.td index ad55058..a37a8cc 100644 --- a/lib/Target/X86/X86InstrTSX.td +++ b/lib/Target/X86/X86InstrTSX.td @@ -22,7 +22,7 @@ def XBEGIN : I<0, Pseudo, (outs GR32:$dst), (ins), let isBranch = 1, isTerminator = 1, Defs = [EAX] in def XBEGIN_4 : Ii32PCRel<0xc7, MRM_F8, (outs), (ins brtarget:$dst), - "xbegin\t$dst", []>; + "xbegin\t$dst", []>, Requires<[HasRTM]>; def XEND : I<0x01, MRM_D5, (outs), (ins), "xend", [(int_x86_xend)]>, TB, Requires<[HasRTM]>; diff --git a/lib/Target/X86/X86MCInstLower.cpp b/lib/Target/X86/X86MCInstLower.cpp index 3af1b3e..a8a9fd8 100644 --- a/lib/Target/X86/X86MCInstLower.cpp +++ b/lib/Target/X86/X86MCInstLower.cpp @@ -407,6 +407,57 @@ ReSimplify: LowerUnaryToTwoAddr(OutMI, X86::XOR32rr); // MOV32r0 -> XOR32rr break; + // Commute operands to get a smaller encoding by using VEX.R instead of VEX.B + // if one of the registers is extended, but other isn't. + case X86::VMOVAPDrr: + case X86::VMOVAPDYrr: + case X86::VMOVAPSrr: + case X86::VMOVAPSYrr: + case X86::VMOVDQArr: + case X86::VMOVDQAYrr: + case X86::VMOVDQUrr: + case X86::VMOVDQUYrr: + case X86::VMOVUPDrr: + case X86::VMOVUPDYrr: + case X86::VMOVUPSrr: + case X86::VMOVUPSYrr: { + if (!X86II::isX86_64ExtendedReg(OutMI.getOperand(0).getReg()) && + X86II::isX86_64ExtendedReg(OutMI.getOperand(1).getReg())) { + unsigned NewOpc; + switch (OutMI.getOpcode()) { + default: llvm_unreachable("Invalid opcode"); + case X86::VMOVAPDrr: NewOpc = X86::VMOVAPDrr_REV; break; + case X86::VMOVAPDYrr: NewOpc = X86::VMOVAPDYrr_REV; break; + case X86::VMOVAPSrr: NewOpc = X86::VMOVAPSrr_REV; break; + case X86::VMOVAPSYrr: NewOpc = X86::VMOVAPSYrr_REV; break; + case X86::VMOVDQArr: NewOpc = X86::VMOVDQArr_REV; break; + case X86::VMOVDQAYrr: NewOpc = X86::VMOVDQAYrr_REV; break; + case X86::VMOVDQUrr: NewOpc = X86::VMOVDQUrr_REV; break; + case X86::VMOVDQUYrr: NewOpc = X86::VMOVDQUYrr_REV; break; + case X86::VMOVUPDrr: NewOpc = X86::VMOVUPDrr_REV; break; + case X86::VMOVUPDYrr: NewOpc = X86::VMOVUPDYrr_REV; break; + case X86::VMOVUPSrr: NewOpc = X86::VMOVUPSrr_REV; break; + case X86::VMOVUPSYrr: NewOpc = X86::VMOVUPSYrr_REV; break; + } + OutMI.setOpcode(NewOpc); + } + break; + } + case X86::VMOVSDrr: + case X86::VMOVSSrr: { + if (!X86II::isX86_64ExtendedReg(OutMI.getOperand(0).getReg()) && + X86II::isX86_64ExtendedReg(OutMI.getOperand(2).getReg())) { + unsigned NewOpc; + switch (OutMI.getOpcode()) { + default: llvm_unreachable("Invalid opcode"); + case X86::VMOVSDrr: NewOpc = X86::VMOVSDrr_REV; break; + case X86::VMOVSSrr: NewOpc = X86::VMOVSSrr_REV; break; + } + OutMI.setOpcode(NewOpc); + } + break; + } + // TAILJMPr64, CALL64r, CALL64pcrel32 - These instructions have register // inputs modeled as normal uses instead of implicit uses. As such, truncate // off all but the first operand (the callee). FIXME: Change isel. diff --git a/lib/Target/X86/X86Schedule.td b/lib/Target/X86/X86Schedule.td index d99d085..da0ca7d 100644 --- a/lib/Target/X86/X86Schedule.td +++ b/lib/Target/X86/X86Schedule.td @@ -7,6 +7,78 @@ // //===----------------------------------------------------------------------===// +// InstrSchedModel annotations for out-of-order CPUs. +// +// These annotations are independent of the itinerary classes defined below. + +// Instructions with folded loads need to read the memory operand immediately, +// but other register operands don't have to be read until the load is ready. +// These operands are marked with ReadAfterLd. +def ReadAfterLd : SchedRead; + +// Instructions with both a load and a store folded are modeled as a folded +// load + WriteRMW. +def WriteRMW : SchedWrite; + +// Most instructions can fold loads, so almost every SchedWrite comes in two +// variants: With and without a folded load. +// An X86FoldableSchedWrite holds a reference to the corresponding SchedWrite +// with a folded load. +class X86FoldableSchedWrite : SchedWrite { + // The SchedWrite to use when a load is folded into the instruction. + SchedWrite Folded; +} + +// Multiclass that produces a linked pair of SchedWrites. +multiclass X86SchedWritePair { + // Register-Memory operation. + def Ld : SchedWrite; + // Register-Register operation. + def NAME : X86FoldableSchedWrite { + let Folded = !cast<SchedWrite>(NAME#"Ld"); + } +} + +// Arithmetic. +defm WriteALU : X86SchedWritePair; // Simple integer ALU op. +defm WriteIMul : X86SchedWritePair; // Integer multiplication. +defm WriteIDiv : X86SchedWritePair; // Integer division. +def WriteLEA : SchedWrite; // LEA instructions can't fold loads. + +// Integer shifts and rotates. +defm WriteShift : X86SchedWritePair; + +// Loads, stores, and moves, not folded with other operations. +def WriteLoad : SchedWrite; +def WriteStore : SchedWrite; +def WriteMove : SchedWrite; + +// Branches don't produce values, so they have no latency, but they still +// consume resources. Indirect branches can fold loads. +defm WriteJump : X86SchedWritePair; + +// Floating point. This covers both scalar and vector operations. +defm WriteFAdd : X86SchedWritePair; // Floating point add/sub/compare. +defm WriteFMul : X86SchedWritePair; // Floating point multiplication. +defm WriteFDiv : X86SchedWritePair; // Floating point division. +defm WriteFSqrt : X86SchedWritePair; // Floating point square root. +defm WriteFRcp : X86SchedWritePair; // Floating point reciprocal. + +// Vector integer operations. +defm WriteVecALU : X86SchedWritePair; // Vector integer ALU op, no logicals. +defm WriteVecShift : X86SchedWritePair; // Vector integer shifts. +defm WriteVecIMul : X86SchedWritePair; // Vector integer multiply. + +// Vector bitwise operations. +// These are often used on both floating point and integer vectors. +defm WriteVecLogic : X86SchedWritePair; // Vector and/or/xor. +defm WriteShuffle : X86SchedWritePair; // Vector shuffles and blends. + +// Conversion between integer and float. +defm WriteCvtF2I : X86SchedWritePair; // Float -> Integer. +defm WriteCvtI2F : X86SchedWritePair; // Integer -> Float. +defm WriteCvtF2F : X86SchedWritePair; // Float -> Float size conversion. + //===----------------------------------------------------------------------===// // Instruction Itinerary classes used for X86 def IIC_DEFAULT : InstrItinClass; diff --git a/lib/Target/X86/X86TargetTransformInfo.cpp b/lib/Target/X86/X86TargetTransformInfo.cpp index fefb479..be2a997 100644 --- a/lib/Target/X86/X86TargetTransformInfo.cpp +++ b/lib/Target/X86/X86TargetTransformInfo.cpp @@ -176,18 +176,42 @@ unsigned X86TTI::getArithmeticInstrCost(unsigned Opcode, Type *Ty) const { { ISD::MUL, MVT::v8i32, 4 }, { ISD::SUB, MVT::v8i32, 4 }, { ISD::ADD, MVT::v8i32, 4 }, - { ISD::MUL, MVT::v4i64, 4 }, { ISD::SUB, MVT::v4i64, 4 }, { ISD::ADD, MVT::v4i64, 4 }, - }; + // A v4i64 multiply is custom lowered as two split v2i64 vectors that then + // are lowered as a series of long multiplies(3), shifts(4) and adds(2) + // Because we believe v4i64 to be a legal type, we must also include the + // split factor of two in the cost table. Therefore, the cost here is 18 + // instead of 9. + { ISD::MUL, MVT::v4i64, 18 }, + }; // Look for AVX1 lowering tricks. - if (ST->hasAVX()) { - int Idx = CostTableLookup<MVT>(AVX1CostTable, array_lengthof(AVX1CostTable), ISD, - LT.second); + if (ST->hasAVX() && !ST->hasAVX2()) { + int Idx = CostTableLookup<MVT>(AVX1CostTable, array_lengthof(AVX1CostTable), + ISD, LT.second); if (Idx != -1) return LT.first * AVX1CostTable[Idx].Cost; } + + // Custom lowering of vectors. + static const CostTblEntry<MVT> CustomLowered[] = { + // A v2i64/v4i64 and multiply is custom lowered as a series of long + // multiplies(3), shifts(4) and adds(2). + { ISD::MUL, MVT::v2i64, 9 }, + { ISD::MUL, MVT::v4i64, 9 }, + }; + int Idx = CostTableLookup<MVT>(CustomLowered, array_lengthof(CustomLowered), + ISD, LT.second); + if (Idx != -1) + return LT.first * CustomLowered[Idx].Cost; + + // Special lowering of v4i32 mul on sse2, sse3: Lower v4i32 mul as 2x shuffle, + // 2x pmuludq, 2x shuffle. + if (ISD == ISD::MUL && LT.second == MVT::v4i32 && ST->hasSSE2() && + !ST->hasSSE41()) + return 6; + // Fallback to the default implementation. return TargetTransformInfo::getArithmeticInstrCost(Opcode, Ty); } diff --git a/lib/Target/XCore/XCoreISelLowering.cpp b/lib/Target/XCore/XCoreISelLowering.cpp index f8a9125..a5d2be8 100644 --- a/lib/Target/XCore/XCoreISelLowering.cpp +++ b/lib/Target/XCore/XCoreISelLowering.cpp @@ -84,7 +84,7 @@ XCoreTargetLowering::XCoreTargetLowering(XCoreTargetMachine &XTM) setBooleanVectorContents(ZeroOrOneBooleanContent); // FIXME: Is this correct? // XCore does not have the NodeTypes below. - setOperationAction(ISD::BR_CC, MVT::Other, Expand); + setOperationAction(ISD::BR_CC, MVT::i32, Expand); setOperationAction(ISD::SELECT_CC, MVT::i32, Custom); setOperationAction(ISD::ADDC, MVT::i32, Expand); setOperationAction(ISD::ADDE, MVT::i32, Expand); diff --git a/lib/Target/XCore/XCoreISelLowering.h b/lib/Target/XCore/XCoreISelLowering.h index 6d430ef..8d258f5 100644 --- a/lib/Target/XCore/XCoreISelLowering.h +++ b/lib/Target/XCore/XCoreISelLowering.h @@ -84,7 +84,7 @@ namespace llvm { explicit XCoreTargetLowering(XCoreTargetMachine &TM); virtual unsigned getJumpTableEncoding() const; - virtual MVT getShiftAmountTy(EVT LHSTy) const { return MVT::i32; } + virtual MVT getScalarShiftAmountTy(EVT LHSTy) const { return MVT::i32; } /// LowerOperation - Provide custom lowering hooks for some operations. virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const; diff --git a/lib/Transforms/IPO/ExtractGV.cpp b/lib/Transforms/IPO/ExtractGV.cpp index 8a6bfc6..fa3d72d 100644 --- a/lib/Transforms/IPO/ExtractGV.cpp +++ b/lib/Transforms/IPO/ExtractGV.cpp @@ -60,7 +60,7 @@ namespace { continue; } - bool Local = I->hasLocalLinkage(); + bool Local = I->isDiscardableIfUnused(); if (Local) I->setVisibility(GlobalValue::HiddenVisibility); @@ -80,7 +80,7 @@ namespace { continue; } - bool Local = I->hasLocalLinkage(); + bool Local = I->isDiscardableIfUnused(); if (Local) I->setVisibility(GlobalValue::HiddenVisibility); @@ -97,7 +97,7 @@ namespace { Module::alias_iterator CurI = I; ++I; - if (CurI->hasLocalLinkage()) { + if (CurI->isDiscardableIfUnused()) { CurI->setVisibility(GlobalValue::HiddenVisibility); CurI->setLinkage(GlobalValue::ExternalLinkage); } diff --git a/lib/Transforms/IPO/PassManagerBuilder.cpp b/lib/Transforms/IPO/PassManagerBuilder.cpp index 47b2b51..027a9f2 100644 --- a/lib/Transforms/IPO/PassManagerBuilder.cpp +++ b/lib/Transforms/IPO/PassManagerBuilder.cpp @@ -391,9 +391,9 @@ LLVMPassManagerBuilderPopulateModulePassManager(LLVMPassManagerBuilderRef PMB, void LLVMPassManagerBuilderPopulateLTOPassManager(LLVMPassManagerBuilderRef PMB, LLVMPassManagerRef PM, - bool Internalize, - bool RunInliner) { + LLVMBool Internalize, + LLVMBool RunInliner) { PassManagerBuilder *Builder = unwrap(PMB); PassManagerBase *LPM = unwrap(PM); - Builder->populateLTOPassManager(*LPM, Internalize, RunInliner); + Builder->populateLTOPassManager(*LPM, Internalize != 0, RunInliner != 0); } diff --git a/lib/Transforms/InstCombine/InstCombineAddSub.cpp b/lib/Transforms/InstCombine/InstCombineAddSub.cpp index c6d60d6..3c5781c 100644 --- a/lib/Transforms/InstCombine/InstCombineAddSub.cpp +++ b/lib/Transforms/InstCombine/InstCombineAddSub.cpp @@ -150,7 +150,9 @@ namespace { typedef SmallVector<const FAddend*, 4> AddendVect; Value *simplifyFAdd(AddendVect& V, unsigned InstrQuota); - + + Value *performFactorization(Instruction *I); + /// Convert given addend to a Value Value *createAddendVal(const FAddend &A, bool& NeedNeg); @@ -159,6 +161,7 @@ namespace { Value *createFSub(Value *Opnd0, Value *Opnd1); Value *createFAdd(Value *Opnd0, Value *Opnd1); Value *createFMul(Value *Opnd0, Value *Opnd1); + Value *createFDiv(Value *Opnd0, Value *Opnd1); Value *createFNeg(Value *V); Value *createNaryFAdd(const AddendVect& Opnds, unsigned InstrQuota); void createInstPostProc(Instruction *NewInst); @@ -388,6 +391,78 @@ unsigned FAddend::drillAddendDownOneStep return BreakNum; } +// Try to perform following optimization on the input instruction I. Return the +// simplified expression if was successful; otherwise, return 0. +// +// Instruction "I" is Simplified into +// ------------------------------------------------------- +// (x * y) +/- (x * z) x * (y +/- z) +// (y / x) +/- (z / x) (y +/- z) / x +// +Value *FAddCombine::performFactorization(Instruction *I) { + assert((I->getOpcode() == Instruction::FAdd || + I->getOpcode() == Instruction::FSub) && "Expect add/sub"); + + Instruction *I0 = dyn_cast<Instruction>(I->getOperand(0)); + Instruction *I1 = dyn_cast<Instruction>(I->getOperand(1)); + + if (!I0 || !I1 || I0->getOpcode() != I1->getOpcode()) + return 0; + + bool isMpy = false; + if (I0->getOpcode() == Instruction::FMul) + isMpy = true; + else if (I0->getOpcode() != Instruction::FDiv) + return 0; + + Value *Opnd0_0 = I0->getOperand(0); + Value *Opnd0_1 = I0->getOperand(1); + Value *Opnd1_0 = I1->getOperand(0); + Value *Opnd1_1 = I1->getOperand(1); + + // Input Instr I Factor AddSub0 AddSub1 + // ---------------------------------------------- + // (x*y) +/- (x*z) x y z + // (y/x) +/- (z/x) x y z + // + Value *Factor = 0; + Value *AddSub0 = 0, *AddSub1 = 0; + + if (isMpy) { + if (Opnd0_0 == Opnd1_0 || Opnd0_0 == Opnd1_1) + Factor = Opnd0_0; + else if (Opnd0_1 == Opnd1_0 || Opnd0_1 == Opnd1_1) + Factor = Opnd0_1; + + if (Factor) { + AddSub0 = (Factor == Opnd0_0) ? Opnd0_1 : Opnd0_0; + AddSub1 = (Factor == Opnd1_0) ? Opnd1_1 : Opnd1_0; + } + } else if (Opnd0_1 == Opnd1_1) { + Factor = Opnd0_1; + AddSub0 = Opnd0_0; + AddSub1 = Opnd1_0; + } + + if (!Factor) + return 0; + + // Create expression "NewAddSub = AddSub0 +/- AddsSub1" + Value *NewAddSub = (I->getOpcode() == Instruction::FAdd) ? + createFAdd(AddSub0, AddSub1) : + createFSub(AddSub0, AddSub1); + if (ConstantFP *CFP = dyn_cast<ConstantFP>(NewAddSub)) { + const APFloat &F = CFP->getValueAPF(); + if (!F.isNormal() || F.isDenormal()) + return 0; + } + + if (isMpy) + return createFMul(Factor, NewAddSub); + + return createFDiv(NewAddSub, Factor); +} + Value *FAddCombine::simplify(Instruction *I) { assert(I->hasUnsafeAlgebra() && "Should be in unsafe mode"); @@ -471,7 +546,8 @@ Value *FAddCombine::simplify(Instruction *I) { return R; } - return 0; + // step 6: Try factorization as the last resort, + return performFactorization(I); } Value *FAddCombine::simplifyFAdd(AddendVect& Addends, unsigned InstrQuota) { @@ -627,7 +703,8 @@ Value *FAddCombine::createNaryFAdd Value *FAddCombine::createFSub (Value *Opnd0, Value *Opnd1) { Value *V = Builder->CreateFSub(Opnd0, Opnd1); - createInstPostProc(cast<Instruction>(V)); + if (Instruction *I = dyn_cast<Instruction>(V)) + createInstPostProc(I); return V; } @@ -639,13 +716,22 @@ Value *FAddCombine::createFNeg(Value *V) { Value *FAddCombine::createFAdd (Value *Opnd0, Value *Opnd1) { Value *V = Builder->CreateFAdd(Opnd0, Opnd1); - createInstPostProc(cast<Instruction>(V)); + if (Instruction *I = dyn_cast<Instruction>(V)) + createInstPostProc(I); return V; } Value *FAddCombine::createFMul(Value *Opnd0, Value *Opnd1) { Value *V = Builder->CreateFMul(Opnd0, Opnd1); - createInstPostProc(cast<Instruction>(V)); + if (Instruction *I = dyn_cast<Instruction>(V)) + createInstPostProc(I); + return V; +} + +Value *FAddCombine::createFDiv(Value *Opnd0, Value *Opnd1) { + Value *V = Builder->CreateFDiv(Opnd0, Opnd1); + if (Instruction *I = dyn_cast<Instruction>(V)) + createInstPostProc(I); return V; } diff --git a/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp index 4332467..990cbc3 100644 --- a/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp +++ b/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp @@ -22,8 +22,8 @@ using namespace PatternMatch; /// AddOne - Add one to a ConstantInt. -static Constant *AddOne(Constant *C) { - return ConstantExpr::getAdd(C, ConstantInt::get(C->getType(), 1)); +static Constant *AddOne(ConstantInt *C) { + return ConstantInt::get(C->getContext(), C->getValue() + 1); } /// SubOne - Subtract one from a ConstantInt. static Constant *SubOne(ConstantInt *C) { diff --git a/lib/Transforms/InstCombine/InstCombineCasts.cpp b/lib/Transforms/InstCombine/InstCombineCasts.cpp index a960ab2..d162223 100644 --- a/lib/Transforms/InstCombine/InstCombineCasts.cpp +++ b/lib/Transforms/InstCombine/InstCombineCasts.cpp @@ -104,6 +104,12 @@ Instruction *InstCombiner::PromoteCastOfAllocation(BitCastInst &CI, uint64_t CastElTySize = TD->getTypeAllocSize(CastElTy); if (CastElTySize == 0 || AllocElTySize == 0) return 0; + // If the allocation has multiple uses, only promote it if we're not + // shrinking the amount of memory being allocated. + uint64_t AllocElTyStoreSize = TD->getTypeStoreSize(AllocElTy); + uint64_t CastElTyStoreSize = TD->getTypeStoreSize(CastElTy); + if (!AI.hasOneUse() && CastElTyStoreSize < AllocElTyStoreSize) return 0; + // See if we can satisfy the modulus by pulling a scale out of the array // size argument. unsigned ArraySizeScale; diff --git a/lib/Transforms/InstCombine/InstCombineCompares.cpp b/lib/Transforms/InstCombine/InstCombineCompares.cpp index bad46b4..32fdb9b 100644 --- a/lib/Transforms/InstCombine/InstCombineCompares.cpp +++ b/lib/Transforms/InstCombine/InstCombineCompares.cpp @@ -1333,13 +1333,14 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI, } // Transform (icmp pred iM (shl iM %v, N), CI) - // -> (icmp pred i(M-N) (trunc %v iM to i(N-N)), (trunc (CI>>N)) - // Transform the shl to a trunc if (trunc (CI>>N)) has no loss. + // -> (icmp pred i(M-N) (trunc %v iM to i(M-N)), (trunc (CI>>N)) + // Transform the shl to a trunc if (trunc (CI>>N)) has no loss and M-N. // This enables to get rid of the shift in favor of a trunc which can be // free on the target. It has the additional benefit of comparing to a // smaller constant, which will be target friendly. unsigned Amt = ShAmt->getLimitedValue(TypeBits-1); - if (Amt != 0 && RHSV.countTrailingZeros() >= Amt) { + if (LHSI->hasOneUse() && + Amt != 0 && RHSV.countTrailingZeros() >= Amt) { Type *NTy = IntegerType::get(ICI.getContext(), TypeBits - Amt); Constant *NCI = ConstantExpr::getTrunc( ConstantExpr::getAShr(RHS, diff --git a/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp b/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp index 8e4267f..173f2bf 100644 --- a/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp +++ b/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp @@ -402,7 +402,7 @@ Instruction *InstCombiner::visitFMul(BinaryOperator &I) { return ReplaceInstUsesWith(I, V); } - // (MDC +/- C1) * C2 => (MDC * C2) +/- (C1 * C2) + // (MDC +/- C1) * C => (MDC * C) +/- (C1 * C) Instruction *FAddSub = dyn_cast<Instruction>(Op0); if (FAddSub && (FAddSub->getOpcode() == Instruction::FAdd || @@ -420,8 +420,8 @@ Instruction *InstCombiner::visitFMul(BinaryOperator &I) { if (C1 && C1->getValueAPF().isNormal() && isFMulOrFDivWithConstant(Opnd0)) { - Value *M0 = ConstantExpr::getFMul(C1, C); - Value *M1 = isNormalFp(cast<ConstantFP>(M0)) ? + Value *M1 = ConstantExpr::getFMul(C1, C); + Value *M0 = isNormalFp(cast<ConstantFP>(M1)) ? foldFMulConst(cast<Instruction>(Opnd0), C, &I) : 0; if (M0 && M1) { diff --git a/lib/Transforms/Instrumentation/AddressSanitizer.cpp b/lib/Transforms/Instrumentation/AddressSanitizer.cpp index 6877475..92b42ee 100644 --- a/lib/Transforms/Instrumentation/AddressSanitizer.cpp +++ b/lib/Transforms/Instrumentation/AddressSanitizer.cpp @@ -71,7 +71,7 @@ static const char *kAsanRegisterGlobalsName = "__asan_register_globals"; static const char *kAsanUnregisterGlobalsName = "__asan_unregister_globals"; static const char *kAsanPoisonGlobalsName = "__asan_before_dynamic_init"; static const char *kAsanUnpoisonGlobalsName = "__asan_after_dynamic_init"; -static const char *kAsanInitName = "__asan_init_v1"; +static const char *kAsanInitName = "__asan_init_v2"; static const char *kAsanHandleNoReturnName = "__asan_handle_no_return"; static const char *kAsanMappingOffsetName = "__asan_mapping_offset"; static const char *kAsanMappingScaleName = "__asan_mapping_scale"; @@ -244,7 +244,7 @@ static size_t RedzoneSizeForScale(int MappingScale) { /// AddressSanitizer: instrument the code in module to find memory bugs. struct AddressSanitizer : public FunctionPass { - AddressSanitizer(bool CheckInitOrder = false, + AddressSanitizer(bool CheckInitOrder = true, bool CheckUseAfterReturn = false, bool CheckLifetime = false, StringRef BlacklistFile = StringRef(), @@ -315,7 +315,7 @@ struct AddressSanitizer : public FunctionPass { class AddressSanitizerModule : public ModulePass { public: - AddressSanitizerModule(bool CheckInitOrder = false, + AddressSanitizerModule(bool CheckInitOrder = true, StringRef BlacklistFile = StringRef(), bool ZeroBaseShadow = false) : ModulePass(ID), @@ -531,9 +531,12 @@ static size_t TypeSizeToSizeIndex(uint32_t TypeSize) { // Create a constant for Str so that we can pass it to the run-time lib. static GlobalVariable *createPrivateGlobalForString(Module &M, StringRef Str) { Constant *StrConst = ConstantDataArray::getString(M.getContext(), Str); - return new GlobalVariable(M, StrConst->getType(), true, + GlobalVariable *GV = new GlobalVariable(M, StrConst->getType(), true, GlobalValue::PrivateLinkage, StrConst, kAsanGenPrefix); + GV->setUnnamedAddr(true); // Ok to merge these. + GV->setAlignment(1); // Strings may not be merged w/o setting align 1. + return GV; } static bool GlobalWasGeneratedByAsan(GlobalVariable *G) { @@ -885,11 +888,12 @@ bool AddressSanitizerModule::runOnModule(Module &M) { // size_t size; // size_t size_with_redzone; // const char *name; + // const char *module_name; // size_t has_dynamic_init; // We initialize an array of such structures and pass it to a run-time call. StructType *GlobalStructTy = StructType::get(IntptrTy, IntptrTy, IntptrTy, IntptrTy, - IntptrTy, NULL); + IntptrTy, IntptrTy, NULL); SmallVector<Constant *, 16> Initializers(n), DynamicInit; @@ -901,6 +905,9 @@ bool AddressSanitizerModule::runOnModule(Module &M) { // this TU. Used in initialization order checking. Value *FirstDynamic = 0, *LastDynamic = 0; + GlobalVariable *ModuleName = createPrivateGlobalForString( + M, M.getModuleIdentifier()); + for (size_t i = 0; i < n; i++) { static const uint64_t kMaxGlobalRedzone = 1 << 18; GlobalVariable *G = GlobalsToChange[i]; @@ -930,11 +937,7 @@ bool AddressSanitizerModule::runOnModule(Module &M) { NewTy, G->getInitializer(), Constant::getNullValue(RightRedZoneTy), NULL); - SmallString<2048> DescriptionOfGlobal = G->getName(); - DescriptionOfGlobal += " ("; - DescriptionOfGlobal += M.getModuleIdentifier(); - DescriptionOfGlobal += ")"; - GlobalVariable *Name = createPrivateGlobalForString(M, DescriptionOfGlobal); + GlobalVariable *Name = createPrivateGlobalForString(M, G->getName()); // Create a new global variable with enough space for a redzone. GlobalVariable *NewGlobal = new GlobalVariable( @@ -958,6 +961,7 @@ bool AddressSanitizerModule::runOnModule(Module &M) { ConstantInt::get(IntptrTy, SizeInBytes), ConstantInt::get(IntptrTy, SizeInBytes + RightRedzoneSize), ConstantExpr::getPointerCast(Name, IntptrTy), + ConstantExpr::getPointerCast(ModuleName, IntptrTy), ConstantInt::get(IntptrTy, GlobalHasDynamicInitializer), NULL); @@ -1095,6 +1099,7 @@ bool AddressSanitizer::maybeInsertAsanInitAtFunctionEntry(Function &F) { bool AddressSanitizer::runOnFunction(Function &F) { if (BL->isIn(F)) return false; if (&F == AsanCtorFunction) return false; + if (F.getLinkage() == GlobalValue::AvailableExternallyLinkage) return false; DEBUG(dbgs() << "ASAN instrumenting:\n" << F << "\n"); initializeCallbacks(*F.getParent()); diff --git a/lib/Transforms/Instrumentation/GCOVProfiling.cpp b/lib/Transforms/Instrumentation/GCOVProfiling.cpp index eb0dc1e..e8d4ac8 100644 --- a/lib/Transforms/Instrumentation/GCOVProfiling.cpp +++ b/lib/Transforms/Instrumentation/GCOVProfiling.cpp @@ -29,6 +29,7 @@ #include "llvm/IR/Instructions.h" #include "llvm/IR/Module.h" #include "llvm/Pass.h" +#include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/DebugLoc.h" #include "llvm/Support/InstIterator.h" @@ -39,31 +40,57 @@ #include <utility> using namespace llvm; +static cl::opt<std::string> +DefaultGCOVVersion("default-gcov-version", cl::init("402*"), cl::Hidden, + cl::ValueRequired); + +GCOVOptions GCOVOptions::getDefault() { + GCOVOptions Options; + Options.EmitNotes = true; + Options.EmitData = true; + Options.UseCfgChecksum = false; + Options.NoRedZone = false; + Options.FunctionNamesInData = true; + + if (DefaultGCOVVersion.size() != 4) { + llvm::report_fatal_error(std::string("Invalid -default-gcov-version: ") + + DefaultGCOVVersion); + } + memcpy(Options.Version, DefaultGCOVVersion.c_str(), 4); + return Options; +} + namespace { class GCOVProfiler : public ModulePass { public: static char ID; - GCOVProfiler() - : ModulePass(ID), EmitNotes(true), EmitData(true), Use402Format(false), - UseExtraChecksum(false), NoRedZone(false) { + GCOVProfiler() : ModulePass(ID), Options(GCOVOptions::getDefault()) { + ReversedVersion[0] = Options.Version[3]; + ReversedVersion[1] = Options.Version[2]; + ReversedVersion[2] = Options.Version[1]; + ReversedVersion[3] = Options.Version[0]; + ReversedVersion[4] = '\0'; initializeGCOVProfilerPass(*PassRegistry::getPassRegistry()); } - GCOVProfiler(bool EmitNotes, bool EmitData, bool use402Format, - bool useExtraChecksum, bool NoRedZone_) - : ModulePass(ID), EmitNotes(EmitNotes), EmitData(EmitData), - Use402Format(use402Format), UseExtraChecksum(useExtraChecksum), - NoRedZone(NoRedZone_) { - assert((EmitNotes || EmitData) && "GCOVProfiler asked to do nothing?"); + GCOVProfiler(const GCOVOptions &Options) : ModulePass(ID), Options(Options){ + assert((Options.EmitNotes || Options.EmitData) && + "GCOVProfiler asked to do nothing?"); + ReversedVersion[0] = Options.Version[3]; + ReversedVersion[1] = Options.Version[2]; + ReversedVersion[2] = Options.Version[1]; + ReversedVersion[3] = Options.Version[0]; + ReversedVersion[4] = '\0'; initializeGCOVProfilerPass(*PassRegistry::getPassRegistry()); } virtual const char *getPassName() const { return "GCOV Profiler"; } + private: bool runOnModule(Module &M); - // Create the GCNO files for the Module based on DebugInfo. - void emitGCNO(); + // Create the .gcno files for the Module based on DebugInfo. + void emitProfileNotes(); // Modify the program to track transitions along edges and call into the // profiling runtime to emit .gcda files when run. @@ -74,6 +101,7 @@ namespace { Constant *getIncrementIndirectCounterFunc(); Constant *getEmitFunctionFunc(); Constant *getEmitArcsFunc(); + Constant *getDeleteFlushFunctionListFunc(); Constant *getEndFileFunc(); // Create or retrieve an i32 state value that is used to represent the @@ -84,22 +112,22 @@ namespace { // block number. GlobalVariable *buildEdgeLookupTable(Function *F, GlobalVariable *Counter, - const UniqueVector<BasicBlock *> &Preds, - const UniqueVector<BasicBlock *> &Succs); + const UniqueVector<BasicBlock *>&Preds, + const UniqueVector<BasicBlock*>&Succs); // Add the function to write out all our counters to the global destructor // list. - void insertCounterWriteout(ArrayRef<std::pair<GlobalVariable*, MDNode*> >); + Function *insertCounterWriteout(ArrayRef<std::pair<GlobalVariable*, + MDNode*> >); + Function *insertFlush(ArrayRef<std::pair<GlobalVariable*, MDNode*> >); void insertIndirectCounterIncrement(); - void insertFlush(ArrayRef<std::pair<GlobalVariable*, MDNode*> >); std::string mangleName(DICompileUnit CU, const char *NewStem); - bool EmitNotes; - bool EmitData; - bool Use402Format; - bool UseExtraChecksum; - bool NoRedZone; + GCOVOptions Options; + + // Reversed, NUL-terminated copy of Options.Version. + char ReversedVersion[5]; Module *M; LLVMContext *Ctx; @@ -110,12 +138,8 @@ char GCOVProfiler::ID = 0; INITIALIZE_PASS(GCOVProfiler, "insert-gcov-profiling", "Insert instrumentation for GCOV profiling", false, false) -ModulePass *llvm::createGCOVProfilerPass(bool EmitNotes, bool EmitData, - bool Use402Format, - bool UseExtraChecksum, - bool NoRedZone) { - return new GCOVProfiler(EmitNotes, EmitData, Use402Format, UseExtraChecksum, - NoRedZone); +ModulePass *llvm::createGCOVProfilerPass(const GCOVOptions &Options) { + return new GCOVProfiler(Options); } namespace { @@ -253,8 +277,8 @@ namespace { // object users can construct, the blocks and lines will be rooted here. class GCOVFunction : public GCOVRecord { public: - GCOVFunction(DISubprogram SP, raw_ostream *os, - bool Use402Format, bool UseExtraChecksum) { + GCOVFunction(DISubprogram SP, raw_ostream *os, uint32_t Ident, + bool UseCfgChecksum) { this->os = os; Function *F = SP.getFunction(); @@ -268,13 +292,12 @@ namespace { writeBytes(FunctionTag, 4); uint32_t BlockLen = 1 + 1 + 1 + lengthOfGCOVString(SP.getName()) + 1 + lengthOfGCOVString(SP.getFilename()) + 1; - if (UseExtraChecksum) + if (UseCfgChecksum) ++BlockLen; write(BlockLen); - uint32_t Ident = reinterpret_cast<intptr_t>((MDNode*)SP); write(Ident); write(0); // lineno checksum - if (UseExtraChecksum) + if (UseCfgChecksum) write(0); // cfg checksum writeGCOVString(SP.getName()); writeGCOVString(SP.getFilename()); @@ -358,12 +381,12 @@ bool GCOVProfiler::runOnModule(Module &M) { this->M = &M; Ctx = &M.getContext(); - if (EmitNotes) emitGCNO(); - if (EmitData) return emitProfileArcs(); + if (Options.EmitNotes) emitProfileNotes(); + if (Options.EmitData) return emitProfileArcs(); return false; } -void GCOVProfiler::emitGCNO() { +void GCOVProfiler::emitProfileNotes() { NamedMDNode *CU_Nodes = M->getNamedMetadata("llvm.dbg.cu"); if (!CU_Nodes) return; @@ -376,10 +399,9 @@ void GCOVProfiler::emitGCNO() { std::string ErrorInfo; raw_fd_ostream out(mangleName(CU, "gcno").c_str(), ErrorInfo, raw_fd_ostream::F_Binary); - if (!Use402Format) - out.write("oncg*404MVLL", 12); - else - out.write("oncg*204MVLL", 12); + out.write("oncg", 4); + out.write(ReversedVersion, 4); + out.write("MVLL", 4); DIArray SPs = CU.getSubprograms(); for (unsigned i = 0, e = SPs.getNumElements(); i != e; ++i) { @@ -388,7 +410,7 @@ void GCOVProfiler::emitGCNO() { Function *F = SP.getFunction(); if (!F) continue; - GCOVFunction Func(SP, &out, Use402Format, UseExtraChecksum); + GCOVFunction Func(SP, &out, i, Options.UseCfgChecksum); for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) { GCOVBlock &Block = Func.getBlock(BB); @@ -469,21 +491,18 @@ bool GCOVProfiler::emitProfileArcs() { Value *Counter = Builder.CreateConstInBoundsGEP2_64(Counters, 0, Edge); Value *Count = Builder.CreateLoad(Counter); - Count = Builder.CreateAdd(Count, - ConstantInt::get(Type::getInt64Ty(*Ctx),1)); + Count = Builder.CreateAdd(Count, Builder.getInt64(1)); Builder.CreateStore(Count, Counter); } else if (BranchInst *BI = dyn_cast<BranchInst>(TI)) { - Value *Sel = Builder.CreateSelect( - BI->getCondition(), - ConstantInt::get(Type::getInt64Ty(*Ctx), Edge), - ConstantInt::get(Type::getInt64Ty(*Ctx), Edge + 1)); + Value *Sel = Builder.CreateSelect(BI->getCondition(), + Builder.getInt64(Edge), + Builder.getInt64(Edge + 1)); SmallVector<Value *, 2> Idx; - Idx.push_back(Constant::getNullValue(Type::getInt64Ty(*Ctx))); + Idx.push_back(Builder.getInt64(0)); Idx.push_back(Sel); Value *Counter = Builder.CreateInBoundsGEP(Counters, Idx); Value *Count = Builder.CreateLoad(Counter); - Count = Builder.CreateAdd(Count, - ConstantInt::get(Type::getInt64Ty(*Ctx),1)); + Count = Builder.CreateAdd(Count, Builder.getInt64(1)); Builder.CreateStore(Count, Counter); } else { ComplexEdgePreds.insert(BB); @@ -500,10 +519,9 @@ bool GCOVProfiler::emitProfileArcs() { ComplexEdgePreds, ComplexEdgeSuccs); GlobalVariable *EdgeState = getEdgeStateValue(); - Type *Int32Ty = Type::getInt32Ty(*Ctx); for (int i = 0, e = ComplexEdgePreds.size(); i != e; ++i) { IRBuilder<> Builder(ComplexEdgePreds[i+1]->getTerminator()); - Builder.CreateStore(ConstantInt::get(Int32Ty, i), EdgeState); + Builder.CreateStore(Builder.getInt32(i), EdgeState); } for (int i = 0, e = ComplexEdgeSuccs.size(); i != e; ++i) { // call runtime to perform increment @@ -522,8 +540,42 @@ bool GCOVProfiler::emitProfileArcs() { } } - insertCounterWriteout(CountersBySP); - insertFlush(CountersBySP); + Function *WriteoutF = insertCounterWriteout(CountersBySP); + Function *FlushF = insertFlush(CountersBySP); + + // Create a small bit of code that registers the "__llvm_gcov_writeout" to + // be executed at exit and the "__llvm_gcov_flush" function to be executed + // when "__gcov_flush" is called. + FunctionType *FTy = FunctionType::get(Type::getVoidTy(*Ctx), false); + Function *F = Function::Create(FTy, GlobalValue::InternalLinkage, + "__llvm_gcov_init", M); + F->setUnnamedAddr(true); + F->setLinkage(GlobalValue::InternalLinkage); + F->addFnAttr(Attribute::NoInline); + if (Options.NoRedZone) + F->addFnAttr(Attribute::NoRedZone); + + BasicBlock *BB = BasicBlock::Create(*Ctx, "entry", F); + IRBuilder<> Builder(BB); + + FTy = FunctionType::get(Builder.getInt32Ty(), + PointerType::get(FTy, 0), false); + Constant *AtExitFn = M->getOrInsertFunction("atexit", FTy); + Builder.CreateCall(AtExitFn, WriteoutF); + + // Register the local flush function. + FTy = FunctionType::get(Type::getVoidTy(*Ctx), false); + FTy = FunctionType::get(Builder.getVoidTy(), + PointerType::get(FTy, 0), false); + Constant *RegFlush = + M->getOrInsertFunction("llvm_register_flush_function", FTy); + Builder.CreateCall(RegFlush, FlushF); + + // Make sure that all the flush function list is deleted. + Builder.CreateCall(AtExitFn, getDeleteFlushFunctionListFunc()); + Builder.CreateRetVoid(); + + appendToGlobalCtors(*M, F, 0); } if (InsertIndCounterIncrCode) @@ -560,8 +612,8 @@ GlobalVariable *GCOVProfiler::buildEdgeLookupTable( if (Successors > 1 && !isa<BranchInst>(TI) && !isa<ReturnInst>(TI)) { for (int i = 0; i != Successors; ++i) { BasicBlock *Succ = TI->getSuccessor(i); - IRBuilder<> builder(Succ); - Value *Counter = builder.CreateConstInBoundsGEP2_64(Counters, 0, + IRBuilder<> Builder(Succ); + Value *Counter = Builder.CreateConstInBoundsGEP2_64(Counters, 0, Edge + i); EdgeTable[((Succs.idFor(Succ)-1) * Preds.size()) + (Preds.idFor(BB)-1)] = cast<Constant>(Counter); @@ -581,8 +633,11 @@ GlobalVariable *GCOVProfiler::buildEdgeLookupTable( } Constant *GCOVProfiler::getStartFileFunc() { - FunctionType *FTy = FunctionType::get(Type::getVoidTy(*Ctx), - Type::getInt8PtrTy(*Ctx), false); + Type *Args[] = { + Type::getInt8PtrTy(*Ctx), // const char *orig_filename + Type::getInt8PtrTy(*Ctx), // const char version[4] + }; + FunctionType *FTy = FunctionType::get(Type::getVoidTy(*Ctx), Args, false); return M->getOrInsertFunction("llvm_gcda_start_file", FTy); } @@ -598,9 +653,10 @@ Constant *GCOVProfiler::getIncrementIndirectCounterFunc() { } Constant *GCOVProfiler::getEmitFunctionFunc() { - Type *Args[2] = { + Type *Args[3] = { Type::getInt32Ty(*Ctx), // uint32_t ident Type::getInt8PtrTy(*Ctx), // const char *function_name + Type::getInt8Ty(*Ctx), // uint8_t use_extra_checksum }; FunctionType *FTy = FunctionType::get(Type::getVoidTy(*Ctx), Args, false); return M->getOrInsertFunction("llvm_gcda_emit_function", FTy); @@ -611,11 +667,15 @@ Constant *GCOVProfiler::getEmitArcsFunc() { Type::getInt32Ty(*Ctx), // uint32_t num_counters Type::getInt64PtrTy(*Ctx), // uint64_t *counters }; - FunctionType *FTy = FunctionType::get(Type::getVoidTy(*Ctx), - Args, false); + FunctionType *FTy = FunctionType::get(Type::getVoidTy(*Ctx), Args, false); return M->getOrInsertFunction("llvm_gcda_emit_arcs", FTy); } +Constant *GCOVProfiler::getDeleteFlushFunctionListFunc() { + FunctionType *FTy = FunctionType::get(Type::getVoidTy(*Ctx), false); + return M->getOrInsertFunction("llvm_delete_flush_function_list", FTy); +} + Constant *GCOVProfiler::getEndFileFunc() { FunctionType *FTy = FunctionType::get(Type::getVoidTy(*Ctx), false); return M->getOrInsertFunction("llvm_gcda_end_file", FTy); @@ -634,7 +694,7 @@ GlobalVariable *GCOVProfiler::getEdgeStateValue() { return GV; } -void GCOVProfiler::insertCounterWriteout( +Function *GCOVProfiler::insertCounterWriteout( ArrayRef<std::pair<GlobalVariable *, MDNode *> > CountersBySP) { FunctionType *WriteoutFTy = FunctionType::get(Type::getVoidTy(*Ctx), false); Function *WriteoutF = M->getFunction("__llvm_gcov_writeout"); @@ -643,7 +703,7 @@ void GCOVProfiler::insertCounterWriteout( "__llvm_gcov_writeout", M); WriteoutF->setUnnamedAddr(true); WriteoutF->addFnAttr(Attribute::NoInline); - if (NoRedZone) + if (Options.NoRedZone) WriteoutF->addFnAttr(Attribute::NoRedZone); BasicBlock *BB = BasicBlock::Create(*Ctx, "entry", WriteoutF); @@ -659,50 +719,31 @@ void GCOVProfiler::insertCounterWriteout( for (unsigned i = 0, e = CU_Nodes->getNumOperands(); i != e; ++i) { DICompileUnit CU(CU_Nodes->getOperand(i)); std::string FilenameGcda = mangleName(CU, "gcda"); - Builder.CreateCall(StartFile, - Builder.CreateGlobalStringPtr(FilenameGcda)); - for (ArrayRef<std::pair<GlobalVariable *, MDNode *> >::iterator - I = CountersBySP.begin(), E = CountersBySP.end(); - I != E; ++I) { - DISubprogram SP(I->second); - intptr_t ident = reinterpret_cast<intptr_t>(I->second); - Builder.CreateCall2(EmitFunction, - ConstantInt::get(Type::getInt32Ty(*Ctx), ident), - Builder.CreateGlobalStringPtr(SP.getName())); - - GlobalVariable *GV = I->first; + Builder.CreateCall2(StartFile, + Builder.CreateGlobalStringPtr(FilenameGcda), + Builder.CreateGlobalStringPtr(ReversedVersion)); + for (unsigned j = 0, e = CountersBySP.size(); j != e; ++j) { + DISubprogram SP(CountersBySP[j].second); + Builder.CreateCall3(EmitFunction, + Builder.getInt32(j), + Options.FunctionNamesInData ? + Builder.CreateGlobalStringPtr(SP.getName()) : + Constant::getNullValue(Builder.getInt8PtrTy()), + Builder.getInt8(Options.UseCfgChecksum)); + + GlobalVariable *GV = CountersBySP[j].first; unsigned Arcs = cast<ArrayType>(GV->getType()->getElementType())->getNumElements(); Builder.CreateCall2(EmitArcs, - ConstantInt::get(Type::getInt32Ty(*Ctx), Arcs), + Builder.getInt32(Arcs), Builder.CreateConstGEP2_64(GV, 0, 0)); } Builder.CreateCall(EndFile); } } - Builder.CreateRetVoid(); - // Create a small bit of code that registers the "__llvm_gcov_writeout" - // function to be executed at exit. - FunctionType *FTy = FunctionType::get(Type::getVoidTy(*Ctx), false); - Function *F = Function::Create(FTy, GlobalValue::InternalLinkage, - "__llvm_gcov_init", M); - F->setUnnamedAddr(true); - F->setLinkage(GlobalValue::InternalLinkage); - F->addFnAttr(Attribute::NoInline); - if (NoRedZone) - F->addFnAttr(Attribute::NoRedZone); - - BB = BasicBlock::Create(*Ctx, "entry", F); - Builder.SetInsertPoint(BB); - - FTy = FunctionType::get(Type::getInt32Ty(*Ctx), - PointerType::get(FTy, 0), false); - Constant *AtExitFn = M->getOrInsertFunction("atexit", FTy); - Builder.CreateCall(AtExitFn, WriteoutF); Builder.CreateRetVoid(); - - appendToGlobalCtors(*M, F, 0); + return WriteoutF; } void GCOVProfiler::insertIndirectCounterIncrement() { @@ -711,13 +752,9 @@ void GCOVProfiler::insertIndirectCounterIncrement() { Fn->setUnnamedAddr(true); Fn->setLinkage(GlobalValue::InternalLinkage); Fn->addFnAttr(Attribute::NoInline); - if (NoRedZone) + if (Options.NoRedZone) Fn->addFnAttr(Attribute::NoRedZone); - Type *Int32Ty = Type::getInt32Ty(*Ctx); - Type *Int64Ty = Type::getInt64Ty(*Ctx); - Constant *NegOne = ConstantInt::get(Int32Ty, 0xffffffff); - // Create basic blocks for function. BasicBlock *BB = BasicBlock::Create(*Ctx, "entry", Fn); IRBuilder<> Builder(BB); @@ -731,26 +768,27 @@ void GCOVProfiler::insertIndirectCounterIncrement() { Argument *Arg = Fn->arg_begin(); Arg->setName("predecessor"); Value *Pred = Builder.CreateLoad(Arg, "pred"); - Value *Cond = Builder.CreateICmpEQ(Pred, NegOne); + Value *Cond = Builder.CreateICmpEQ(Pred, Builder.getInt32(0xffffffff)); BranchInst::Create(Exit, PredNotNegOne, Cond, BB); Builder.SetInsertPoint(PredNotNegOne); // uint64_t *counter = counters[pred]; // if (!counter) return; - Value *ZExtPred = Builder.CreateZExt(Pred, Int64Ty); + Value *ZExtPred = Builder.CreateZExt(Pred, Builder.getInt64Ty()); Arg = llvm::next(Fn->arg_begin()); Arg->setName("counters"); Value *GEP = Builder.CreateGEP(Arg, ZExtPred); Value *Counter = Builder.CreateLoad(GEP, "counter"); Cond = Builder.CreateICmpEQ(Counter, - Constant::getNullValue(Int64Ty->getPointerTo())); + Constant::getNullValue( + Builder.getInt64Ty()->getPointerTo())); Builder.CreateCondBr(Cond, Exit, CounterEnd); // ++*counter; Builder.SetInsertPoint(CounterEnd); Value *Add = Builder.CreateAdd(Builder.CreateLoad(Counter), - ConstantInt::get(Int64Ty, 1)); + Builder.getInt64(1)); Builder.CreateStore(Add, Counter); Builder.CreateBr(Exit); @@ -759,18 +797,18 @@ void GCOVProfiler::insertIndirectCounterIncrement() { Builder.CreateRetVoid(); } -void GCOVProfiler:: +Function *GCOVProfiler:: insertFlush(ArrayRef<std::pair<GlobalVariable*, MDNode*> > CountersBySP) { FunctionType *FTy = FunctionType::get(Type::getVoidTy(*Ctx), false); - Function *FlushF = M->getFunction("__gcov_flush"); + Function *FlushF = M->getFunction("__llvm_gcov_flush"); if (!FlushF) FlushF = Function::Create(FTy, GlobalValue::InternalLinkage, - "__gcov_flush", M); + "__llvm_gcov_flush", M); else FlushF->setLinkage(GlobalValue::InternalLinkage); FlushF->setUnnamedAddr(true); FlushF->addFnAttr(Attribute::NoInline); - if (NoRedZone) + if (Options.NoRedZone) FlushF->addFnAttr(Attribute::NoRedZone); BasicBlock *Entry = BasicBlock::Create(*Ctx, "entry", FlushF); @@ -795,8 +833,10 @@ insertFlush(ArrayRef<std::pair<GlobalVariable*, MDNode*> > CountersBySP) { if (RetTy == Type::getVoidTy(*Ctx)) Builder.CreateRetVoid(); else if (RetTy->isIntegerTy()) - // Used if __gcov_flush was implicitly declared. + // Used if __llvm_gcov_flush was implicitly declared. Builder.CreateRet(ConstantInt::get(RetTy, 0)); else - report_fatal_error("invalid return type for __gcov_flush"); + report_fatal_error("invalid return type for __llvm_gcov_flush"); + + return FlushF; } diff --git a/lib/Transforms/Instrumentation/MemorySanitizer.cpp b/lib/Transforms/Instrumentation/MemorySanitizer.cpp index 80705af..fce6513 100644 --- a/lib/Transforms/Instrumentation/MemorySanitizer.cpp +++ b/lib/Transforms/Instrumentation/MemorySanitizer.cpp @@ -418,13 +418,9 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> { SmallVector<PHINode *, 16> ShadowPHINodes, OriginPHINodes; ValueMap<Value*, Value*> ShadowMap, OriginMap; bool InsertChecks; + bool LoadShadow; OwningPtr<VarArgHelper> VAHelper; - // An unfortunate workaround for asymmetric lowering of va_arg stuff. - // See a comment in visitCallSite for more details. - static const unsigned AMD64GpEndOffset = 48; // AMD64 ABI Draft 0.99.6 p3.5.7 - static const unsigned AMD64FpEndOffset = 176; - struct ShadowOriginAndInsertPoint { Instruction *Shadow; Instruction *Origin; @@ -437,11 +433,15 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> { SmallVector<Instruction*, 16> StoreList; MemorySanitizerVisitor(Function &F, MemorySanitizer &MS) - : F(F), MS(MS), VAHelper(CreateVarArgHelper(F, MS, *this)) { - InsertChecks = !MS.BL->isIn(F); + : F(F), MS(MS), VAHelper(CreateVarArgHelper(F, MS, *this)) { + LoadShadow = InsertChecks = + !MS.BL->isIn(F) && + F.getAttributes().hasAttribute(AttributeSet::FunctionIndex, + Attribute::SanitizeMemory); + DEBUG(if (!InsertChecks) - dbgs() << "MemorySanitizer is not inserting checks into '" - << F.getName() << "'\n"); + dbgs() << "MemorySanitizer is not inserting checks into '" + << F.getName() << "'\n"); } void materializeStores() { @@ -836,15 +836,25 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> { IRBuilder<> IRB(&I); Type *ShadowTy = getShadowTy(&I); Value *Addr = I.getPointerOperand(); - Value *ShadowPtr = getShadowPtr(Addr, ShadowTy, IRB); - setShadow(&I, IRB.CreateAlignedLoad(ShadowPtr, I.getAlignment(), "_msld")); + if (LoadShadow) { + Value *ShadowPtr = getShadowPtr(Addr, ShadowTy, IRB); + setShadow(&I, + IRB.CreateAlignedLoad(ShadowPtr, I.getAlignment(), "_msld")); + } else { + setShadow(&I, getCleanShadow(&I)); + } if (ClCheckAccessAddress) insertCheck(I.getPointerOperand(), &I); if (MS.TrackOrigins) { - unsigned Alignment = std::max(kMinOriginAlignment, I.getAlignment()); - setOrigin(&I, IRB.CreateAlignedLoad(getOriginPtr(Addr, IRB), Alignment)); + if (LoadShadow) { + unsigned Alignment = std::max(kMinOriginAlignment, I.getAlignment()); + setOrigin(&I, + IRB.CreateAlignedLoad(getOriginPtr(Addr, IRB), Alignment)); + } else { + setOrigin(&I, getCleanOrigin()); + } } } @@ -1410,16 +1420,25 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> { Value *Addr = I.getArgOperand(0); Type *ShadowTy = getShadowTy(&I); - Value *ShadowPtr = getShadowPtr(Addr, ShadowTy, IRB); - // We don't know the pointer alignment (could be unaligned SSE load!). - // Have to assume to worst case. - setShadow(&I, IRB.CreateAlignedLoad(ShadowPtr, 1, "_msld")); + if (LoadShadow) { + Value *ShadowPtr = getShadowPtr(Addr, ShadowTy, IRB); + // We don't know the pointer alignment (could be unaligned SSE load!). + // Have to assume to worst case. + setShadow(&I, IRB.CreateAlignedLoad(ShadowPtr, 1, "_msld")); + } else { + setShadow(&I, getCleanShadow(&I)); + } + if (ClCheckAccessAddress) insertCheck(Addr, &I); - if (MS.TrackOrigins) - setOrigin(&I, IRB.CreateLoad(getOriginPtr(Addr, IRB))); + if (MS.TrackOrigins) { + if (LoadShadow) + setOrigin(&I, IRB.CreateLoad(getOriginPtr(Addr, IRB))); + else + setOrigin(&I, getCleanOrigin()); + } return true; } diff --git a/lib/Transforms/Scalar/CodeGenPrepare.cpp b/lib/Transforms/Scalar/CodeGenPrepare.cpp index d71dd5d..015fd2e 100644 --- a/lib/Transforms/Scalar/CodeGenPrepare.cpp +++ b/lib/Transforms/Scalar/CodeGenPrepare.cpp @@ -154,7 +154,7 @@ bool CodeGenPrepare::runOnFunction(Function &F) { /// This optimization identifies DIV instructions that can be /// profitably bypassed and carried out with a shorter, faster divide. - if (TLI && TLI->isSlowDivBypassed()) { + if (!OptSize && TLI && TLI->isSlowDivBypassed()) { const DenseMap<unsigned int, unsigned int> &BypassWidths = TLI->getBypassSlowDivWidths(); for (Function::iterator I = F.begin(); I != F.end(); I++) diff --git a/lib/Transforms/Scalar/GVN.cpp b/lib/Transforms/Scalar/GVN.cpp index c04b447..129af8d 100644 --- a/lib/Transforms/Scalar/GVN.cpp +++ b/lib/Transforms/Scalar/GVN.cpp @@ -1714,7 +1714,7 @@ bool GVN::processNonLocalLoad(LoadInst *LI) { return true; } -static void patchReplacementInstruction(Value *Repl, Instruction *I) { +static void patchReplacementInstruction(Instruction *I, Value *Repl) { // Patch the replacement so that it is not more restrictive than the value // being replaced. BinaryOperator *Op = dyn_cast<BinaryOperator>(I); @@ -1756,8 +1756,8 @@ static void patchReplacementInstruction(Value *Repl, Instruction *I) { } } -static void patchAndReplaceAllUsesWith(Value *Repl, Instruction *I) { - patchReplacementInstruction(Repl, I); +static void patchAndReplaceAllUsesWith(Instruction *I, Value *Repl) { + patchReplacementInstruction(I, Repl); I->replaceAllUsesWith(Repl); } @@ -1919,7 +1919,7 @@ bool GVN::processLoad(LoadInst *L) { } // Remove it! - patchAndReplaceAllUsesWith(AvailableVal, L); + patchAndReplaceAllUsesWith(L, AvailableVal); if (DepLI->getType()->getScalarType()->isPointerTy()) MD->invalidateCachedPointerInfo(DepLI); markInstructionForDeletion(L); @@ -2260,7 +2260,7 @@ bool GVN::processInstruction(Instruction *I) { } // Remove it! - patchAndReplaceAllUsesWith(repl, I); + patchAndReplaceAllUsesWith(I, repl); if (MD && repl->getType()->getScalarType()->isPointerTy()) MD->invalidateCachedPointerInfo(repl); markInstructionForDeletion(I); diff --git a/lib/Transforms/Scalar/GlobalMerge.cpp b/lib/Transforms/Scalar/GlobalMerge.cpp index 1601a8d..14e463a 100644 --- a/lib/Transforms/Scalar/GlobalMerge.cpp +++ b/lib/Transforms/Scalar/GlobalMerge.cpp @@ -53,6 +53,7 @@ #define DEBUG_TYPE "global-merge" #include "llvm/Transforms/Scalar.h" +#include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/Statistic.h" #include "llvm/IR/Attributes.h" #include "llvm/IR/Constants.h" @@ -60,14 +61,22 @@ #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Function.h" #include "llvm/IR/GlobalVariable.h" +#include "llvm/IR/InlineAsm.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/Intrinsics.h" +#include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Module.h" #include "llvm/Pass.h" +#include "llvm/Support/CommandLine.h" #include "llvm/Target/TargetLowering.h" #include "llvm/Target/TargetLoweringObjectFile.h" using namespace llvm; +static cl::opt<bool> +EnableGlobalMergeOnConst("global-merge-on-const", cl::Hidden, + cl::desc("Enable global merge pass on constants"), + cl::init(false)); + STATISTIC(NumMerged , "Number of globals merged"); namespace { class GlobalMerge : public FunctionPass { @@ -78,6 +87,23 @@ namespace { bool doMerge(SmallVectorImpl<GlobalVariable*> &Globals, Module &M, bool isConst, unsigned AddrSpace) const; + /// \brief Check if the given variable has been identified as must keep + /// \pre setMustKeepGlobalVariables must have been called on the Module that + /// contains GV + bool isMustKeepGlobalVariable(const GlobalVariable *GV) const { + return MustKeepGlobalVariables.count(GV); + } + + /// Collect every variables marked as "used" or used in a landing pad + /// instruction for this Module. + void setMustKeepGlobalVariables(Module &M); + + /// Collect every variables marked as "used" + void collectUsedGlobalVariables(Module &M); + + /// Keep track of the GlobalVariable that are marked as "used" + SmallPtrSet<const GlobalVariable *, 16> MustKeepGlobalVariables; + public: static char ID; // Pass identification, replacement for typeid. explicit GlobalMerge(const TargetLowering *tli = 0) @@ -169,6 +195,46 @@ bool GlobalMerge::doMerge(SmallVectorImpl<GlobalVariable*> &Globals, return true; } +void GlobalMerge::collectUsedGlobalVariables(Module &M) { + // Extract global variables from llvm.used array + const GlobalVariable *GV = M.getGlobalVariable("llvm.used"); + if (!GV || !GV->hasInitializer()) return; + + // Should be an array of 'i8*'. + const ConstantArray *InitList = dyn_cast<ConstantArray>(GV->getInitializer()); + if (InitList == 0) return; + + for (unsigned i = 0, e = InitList->getNumOperands(); i != e; ++i) + if (const GlobalVariable *G = + dyn_cast<GlobalVariable>(InitList->getOperand(i)->stripPointerCasts())) + MustKeepGlobalVariables.insert(G); +} + +void GlobalMerge::setMustKeepGlobalVariables(Module &M) { + // If we already processed a Module, UsedGlobalVariables may have been + // populated. Reset the information for this module. + MustKeepGlobalVariables.clear(); + collectUsedGlobalVariables(M); + + for (Module::iterator IFn = M.begin(), IEndFn = M.end(); IFn != IEndFn; + ++IFn) { + for (Function::iterator IBB = IFn->begin(), IEndBB = IFn->end(); + IBB != IEndBB; ++IBB) { + // Follow the inwoke link to find the landing pad instruction + const InvokeInst *II = dyn_cast<InvokeInst>(IBB->getTerminator()); + if (!II) continue; + + const LandingPadInst *LPInst = II->getUnwindDest()->getLandingPadInst(); + // Look for globals in the clauses of the landing pad instruction + for (unsigned Idx = 0, NumClauses = LPInst->getNumClauses(); + Idx != NumClauses; ++Idx) + if (const GlobalVariable *GV = + dyn_cast<GlobalVariable>(LPInst->getClause(Idx) + ->stripPointerCasts())) + MustKeepGlobalVariables.insert(GV); + } + } +} bool GlobalMerge::doInitialization(Module &M) { DenseMap<unsigned, SmallVector<GlobalVariable*, 16> > Globals, ConstGlobals, @@ -176,6 +242,7 @@ bool GlobalMerge::doInitialization(Module &M) { const DataLayout *TD = TLI->getDataLayout(); unsigned MaxOffset = TLI->getMaximalGlobalOffset(); bool Changed = false; + setMustKeepGlobalVariables(M); // Grab all non-const globals. for (Module::global_iterator I = M.global_begin(), @@ -200,6 +267,12 @@ bool GlobalMerge::doInitialization(Module &M) { I->getName().startswith(".llvm.")) continue; + // Ignore all "required" globals: + // - the ones used for EH + // - the ones marked with "used" attribute + if (isMustKeepGlobalVariable(I)) + continue; + if (TD->getTypeAllocSize(Ty) < MaxOffset) { if (TargetLoweringObjectFile::getKindForGlobal(I, TLI->getTargetMachine()) .isBSSLocal()) @@ -221,11 +294,11 @@ bool GlobalMerge::doInitialization(Module &M) { if (I->second.size() > 1) Changed |= doMerge(I->second, M, false, I->first); - // FIXME: This currently breaks the EH processing due to way how the - // typeinfo detection works. We might want to detect the TIs and ignore - // them in the future. - // if (ConstGlobals.size() > 1) - // Changed |= doMerge(ConstGlobals, M, true); + if (EnableGlobalMergeOnConst) + for (DenseMap<unsigned, SmallVector<GlobalVariable*, 16> >::iterator + I = ConstGlobals.begin(), E = ConstGlobals.end(); I != E; ++I) + if (I->second.size() > 1) + Changed |= doMerge(I->second, M, true, I->first); return Changed; } diff --git a/lib/Transforms/Scalar/SROA.cpp b/lib/Transforms/Scalar/SROA.cpp index e90fe90..0e57e5c 100644 --- a/lib/Transforms/Scalar/SROA.cpp +++ b/lib/Transforms/Scalar/SROA.cpp @@ -69,112 +69,130 @@ static cl::opt<bool> ForceSSAUpdater("force-ssa-updater", cl::init(false), cl::Hidden); namespace { -/// \brief Alloca partitioning representation. -/// -/// This class represents a partitioning of an alloca into slices, and -/// information about the nature of uses of each slice of the alloca. The goal -/// is that this information is sufficient to decide if and how to split the -/// alloca apart and replace slices with scalars. It is also intended that this -/// structure can capture the relevant information needed both to decide about -/// and to enact these transformations. -class AllocaPartitioning { -public: - /// \brief A common base class for representing a half-open byte range. - struct ByteRange { - /// \brief The beginning offset of the range. - uint64_t BeginOffset; +/// \brief A common base class for representing a half-open byte range. +struct ByteRange { + /// \brief The beginning offset of the range. + uint64_t BeginOffset; - /// \brief The ending offset, not included in the range. - uint64_t EndOffset; + /// \brief The ending offset, not included in the range. + uint64_t EndOffset; - ByteRange() : BeginOffset(), EndOffset() {} - ByteRange(uint64_t BeginOffset, uint64_t EndOffset) - : BeginOffset(BeginOffset), EndOffset(EndOffset) {} + ByteRange() : BeginOffset(), EndOffset() {} + ByteRange(uint64_t BeginOffset, uint64_t EndOffset) + : BeginOffset(BeginOffset), EndOffset(EndOffset) {} - /// \brief Support for ordering ranges. - /// - /// This provides an ordering over ranges such that start offsets are - /// always increasing, and within equal start offsets, the end offsets are - /// decreasing. Thus the spanning range comes first in a cluster with the - /// same start position. - bool operator<(const ByteRange &RHS) const { - if (BeginOffset < RHS.BeginOffset) return true; - if (BeginOffset > RHS.BeginOffset) return false; - if (EndOffset > RHS.EndOffset) return true; - return false; - } + /// \brief Support for ordering ranges. + /// + /// This provides an ordering over ranges such that start offsets are + /// always increasing, and within equal start offsets, the end offsets are + /// decreasing. Thus the spanning range comes first in a cluster with the + /// same start position. + bool operator<(const ByteRange &RHS) const { + if (BeginOffset < RHS.BeginOffset) return true; + if (BeginOffset > RHS.BeginOffset) return false; + if (EndOffset > RHS.EndOffset) return true; + return false; + } - /// \brief Support comparison with a single offset to allow binary searches. - friend bool operator<(const ByteRange &LHS, uint64_t RHSOffset) { - return LHS.BeginOffset < RHSOffset; - } + /// \brief Support comparison with a single offset to allow binary searches. + friend bool operator<(const ByteRange &LHS, uint64_t RHSOffset) { + return LHS.BeginOffset < RHSOffset; + } - friend LLVM_ATTRIBUTE_UNUSED bool operator<(uint64_t LHSOffset, - const ByteRange &RHS) { - return LHSOffset < RHS.BeginOffset; - } + friend LLVM_ATTRIBUTE_UNUSED bool operator<(uint64_t LHSOffset, + const ByteRange &RHS) { + return LHSOffset < RHS.BeginOffset; + } - bool operator==(const ByteRange &RHS) const { - return BeginOffset == RHS.BeginOffset && EndOffset == RHS.EndOffset; - } - bool operator!=(const ByteRange &RHS) const { return !operator==(RHS); } - }; + bool operator==(const ByteRange &RHS) const { + return BeginOffset == RHS.BeginOffset && EndOffset == RHS.EndOffset; + } + bool operator!=(const ByteRange &RHS) const { return !operator==(RHS); } +}; - /// \brief A partition of an alloca. +/// \brief A partition of an alloca. +/// +/// This structure represents a contiguous partition of the alloca. These are +/// formed by examining the uses of the alloca. During formation, they may +/// overlap but once an AllocaPartitioning is built, the Partitions within it +/// are all disjoint. +struct Partition : public ByteRange { + /// \brief Whether this partition is splittable into smaller partitions. /// - /// This structure represents a contiguous partition of the alloca. These are - /// formed by examining the uses of the alloca. During formation, they may - /// overlap but once an AllocaPartitioning is built, the Partitions within it - /// are all disjoint. - struct Partition : public ByteRange { - /// \brief Whether this partition is splittable into smaller partitions. - /// - /// We flag partitions as splittable when they are formed entirely due to - /// accesses by trivially splittable operations such as memset and memcpy. - bool IsSplittable; - - /// \brief Test whether a partition has been marked as dead. - bool isDead() const { - if (BeginOffset == UINT64_MAX) { - assert(EndOffset == UINT64_MAX); - return true; - } - return false; + /// We flag partitions as splittable when they are formed entirely due to + /// accesses by trivially splittable operations such as memset and memcpy. + bool IsSplittable; + + /// \brief Test whether a partition has been marked as dead. + bool isDead() const { + if (BeginOffset == UINT64_MAX) { + assert(EndOffset == UINT64_MAX); + return true; } + return false; + } - /// \brief Kill a partition. - /// This is accomplished by setting both its beginning and end offset to - /// the maximum possible value. - void kill() { - assert(!isDead() && "He's Dead, Jim!"); - BeginOffset = EndOffset = UINT64_MAX; - } + /// \brief Kill a partition. + /// This is accomplished by setting both its beginning and end offset to + /// the maximum possible value. + void kill() { + assert(!isDead() && "He's Dead, Jim!"); + BeginOffset = EndOffset = UINT64_MAX; + } - Partition() : ByteRange(), IsSplittable() {} - Partition(uint64_t BeginOffset, uint64_t EndOffset, bool IsSplittable) - : ByteRange(BeginOffset, EndOffset), IsSplittable(IsSplittable) {} - }; + Partition() : ByteRange(), IsSplittable() {} + Partition(uint64_t BeginOffset, uint64_t EndOffset, bool IsSplittable) + : ByteRange(BeginOffset, EndOffset), IsSplittable(IsSplittable) {} +}; + +/// \brief A particular use of a partition of the alloca. +/// +/// This structure is used to associate uses of a partition with it. They +/// mark the range of bytes which are referenced by a particular instruction, +/// and includes a handle to the user itself and the pointer value in use. +/// The bounds of these uses are determined by intersecting the bounds of the +/// memory use itself with a particular partition. As a consequence there is +/// intentionally overlap between various uses of the same partition. +class PartitionUse : public ByteRange { + /// \brief Combined storage for both the Use* and split state. + PointerIntPair<Use*, 1, bool> UsePtrAndIsSplit; - /// \brief A particular use of a partition of the alloca. +public: + PartitionUse() : ByteRange(), UsePtrAndIsSplit() {} + PartitionUse(uint64_t BeginOffset, uint64_t EndOffset, Use *U, + bool IsSplit) + : ByteRange(BeginOffset, EndOffset), UsePtrAndIsSplit(U, IsSplit) {} + + /// \brief The use in question. Provides access to both user and used value. /// - /// This structure is used to associate uses of a partition with it. They - /// mark the range of bytes which are referenced by a particular instruction, - /// and includes a handle to the user itself and the pointer value in use. - /// The bounds of these uses are determined by intersecting the bounds of the - /// memory use itself with a particular partition. As a consequence there is - /// intentionally overlap between various uses of the same partition. - struct PartitionUse : public ByteRange { - /// \brief The use in question. Provides access to both user and used value. - /// - /// Note that this may be null if the partition use is *dead*, that is, it - /// should be ignored. - Use *U; + /// Note that this may be null if the partition use is *dead*, that is, it + /// should be ignored. + Use *getUse() const { return UsePtrAndIsSplit.getPointer(); } - PartitionUse() : ByteRange(), U() {} - PartitionUse(uint64_t BeginOffset, uint64_t EndOffset, Use *U) - : ByteRange(BeginOffset, EndOffset), U(U) {} - }; + /// \brief Set the use for this partition use range. + void setUse(Use *U) { UsePtrAndIsSplit.setPointer(U); } + + /// \brief Whether this use is split across multiple partitions. + bool isSplit() const { return UsePtrAndIsSplit.getInt(); } +}; +} + +namespace llvm { +template <> struct isPodLike<Partition> : llvm::true_type {}; +template <> struct isPodLike<PartitionUse> : llvm::true_type {}; +} +namespace { +/// \brief Alloca partitioning representation. +/// +/// This class represents a partitioning of an alloca into slices, and +/// information about the nature of uses of each slice of the alloca. The goal +/// is that this information is sufficient to decide if and how to split the +/// alloca apart and replace slices with scalars. It is also intended that this +/// structure can capture the relevant information needed both to decide about +/// and to enact these transformations. +class AllocaPartitioning { +public: /// \brief Construct a partitioning of a particular alloca. /// /// Construction does most of the work for partitioning the alloca. This @@ -456,10 +474,10 @@ private: // Clamp the end offset to the end of the allocation. Note that this is // formulated to handle even the case where "BeginOffset + Size" overflows. - // NOTE! This may appear superficially to be something we could ignore - // entirely, but that is not so! There may be PHI-node uses where some - // instructions are dead but not others. We can't completely ignore the - // PHI node, and so have to record at least the information here. + // This may appear superficially to be something we could ignore entirely, + // but that is not so! There may be widened loads or PHI-node uses where + // some instructions are dead but not others. We can't completely ignore + // them, and so have to record at least the information here. assert(AllocSize >= BeginOffset); // Established above. if (Size > AllocSize - BeginOffset) { DEBUG(dbgs() << "WARNING: Clamping a " << Size << " byte use @" << Offset @@ -474,33 +492,17 @@ private: } void handleLoadOrStore(Type *Ty, Instruction &I, const APInt &Offset, - bool IsVolatile) { - uint64_t Size = DL.getTypeStoreSize(Ty); - - // If this memory access can be shown to *statically* extend outside the - // bounds of of the allocation, it's behavior is undefined, so simply - // ignore it. Note that this is more strict than the generic clamping - // behavior of insertUse. We also try to handle cases which might run the - // risk of overflow. - // FIXME: We should instead consider the pointer to have escaped if this - // function is being instrumented for addressing bugs or race conditions. - if (Offset.isNegative() || Size > AllocSize || - Offset.ugt(AllocSize - Size)) { - DEBUG(dbgs() << "WARNING: Ignoring " << Size << " byte " - << (isa<LoadInst>(I) ? "load" : "store") << " @" << Offset - << " which extends past the end of the " << AllocSize - << " byte alloca:\n" - << " alloca: " << P.AI << "\n" - << " use: " << I << "\n"); - return; - } - + uint64_t Size, bool IsVolatile) { // We allow splitting of loads and stores where the type is an integer type - // and which cover the entire alloca. Such integer loads and stores - // often require decomposition into fine grained loads and stores. - bool IsSplittable = false; - if (IntegerType *ITy = dyn_cast<IntegerType>(Ty)) - IsSplittable = !IsVolatile && ITy->getBitWidth() == AllocSize*8; + // and cover the entire alloca. This prevents us from splitting over + // eagerly. + // FIXME: In the great blue eventually, we should eagerly split all integer + // loads and stores, and then have a separate step that merges adjacent + // alloca partitions into a single partition suitable for integer widening. + // Or we should skip the merge step and rely on GVN and other passes to + // merge adjacent loads and stores that survive mem2reg. + bool IsSplittable = + Ty->isIntegerTy() && !IsVolatile && Offset == 0 && Size >= AllocSize; insertUse(I, Offset, Size, IsSplittable); } @@ -512,7 +514,8 @@ private: if (!IsOffsetKnown) return PI.setAborted(&LI); - return handleLoadOrStore(LI.getType(), LI, Offset, LI.isVolatile()); + uint64_t Size = DL.getTypeStoreSize(LI.getType()); + return handleLoadOrStore(LI.getType(), LI, Offset, Size, LI.isVolatile()); } void visitStoreInst(StoreInst &SI) { @@ -522,9 +525,28 @@ private: if (!IsOffsetKnown) return PI.setAborted(&SI); + uint64_t Size = DL.getTypeStoreSize(ValOp->getType()); + + // If this memory access can be shown to *statically* extend outside the + // bounds of of the allocation, it's behavior is undefined, so simply + // ignore it. Note that this is more strict than the generic clamping + // behavior of insertUse. We also try to handle cases which might run the + // risk of overflow. + // FIXME: We should instead consider the pointer to have escaped if this + // function is being instrumented for addressing bugs or race conditions. + if (Offset.isNegative() || Size > AllocSize || + Offset.ugt(AllocSize - Size)) { + DEBUG(dbgs() << "WARNING: Ignoring " << Size << " byte store @" << Offset + << " which extends past the end of the " << AllocSize + << " byte alloca:\n" + << " alloca: " << P.AI << "\n" + << " use: " << SI << "\n"); + return; + } + assert((!SI.isSimple() || ValOp->getType()->isSingleValueType()) && "All simple FCA stores should have been pre-split"); - handleLoadOrStore(ValOp->getType(), SI, Offset, SI.isVolatile()); + handleLoadOrStore(ValOp->getType(), SI, Offset, Size, SI.isVolatile()); } @@ -795,13 +817,14 @@ private: EndOffset = AllocSize; // NB: This only works if we have zero overlapping partitions. - iterator B = std::lower_bound(P.begin(), P.end(), BeginOffset); - if (B != P.begin() && llvm::prior(B)->EndOffset > BeginOffset) - B = llvm::prior(B); - for (iterator I = B, E = P.end(); I != E && I->BeginOffset < EndOffset; - ++I) { + iterator I = std::lower_bound(P.begin(), P.end(), BeginOffset); + if (I != P.begin() && llvm::prior(I)->EndOffset > BeginOffset) + I = llvm::prior(I); + iterator E = P.end(); + bool IsSplit = llvm::next(I) != E && llvm::next(I)->BeginOffset < EndOffset; + for (; I != E && I->BeginOffset < EndOffset; ++I) { PartitionUse NewPU(std::max(I->BeginOffset, BeginOffset), - std::min(I->EndOffset, EndOffset), U); + std::min(I->EndOffset, EndOffset), U, IsSplit); P.use_push_back(I, NewPU); if (isa<PHINode>(U->getUser()) || isa<SelectInst>(U->getUser())) P.PHIOrSelectOpMap[U] @@ -809,20 +832,6 @@ private: } } - void handleLoadOrStore(Type *Ty, Instruction &I, const APInt &Offset) { - uint64_t Size = DL.getTypeStoreSize(Ty); - - // If this memory access can be shown to *statically* extend outside the - // bounds of of the allocation, it's behavior is undefined, so simply - // ignore it. Note that this is more strict than the generic clamping - // behavior of insertUse. - if (Offset.isNegative() || Size > AllocSize || - Offset.ugt(AllocSize - Size)) - return markAsDead(I); - - insertUse(I, Offset, Size); - } - void visitBitCastInst(BitCastInst &BC) { if (BC.use_empty()) return markAsDead(BC); @@ -839,12 +848,23 @@ private: void visitLoadInst(LoadInst &LI) { assert(IsOffsetKnown); - handleLoadOrStore(LI.getType(), LI, Offset); + uint64_t Size = DL.getTypeStoreSize(LI.getType()); + insertUse(LI, Offset, Size); } void visitStoreInst(StoreInst &SI) { assert(IsOffsetKnown); - handleLoadOrStore(SI.getOperand(0)->getType(), SI, Offset); + uint64_t Size = DL.getTypeStoreSize(SI.getOperand(0)->getType()); + + // If this memory access can be shown to *statically* extend outside the + // bounds of of the allocation, it's behavior is undefined, so simply + // ignore it. Note that this is more strict than the generic clamping + // behavior of insertUse. + if (Offset.isNegative() || Size > AllocSize || + Offset.ugt(AllocSize - Size)) + return markAsDead(SI); + + insertUse(SI, Offset, Size); } void visitMemSetInst(MemSetInst &II) { @@ -1089,21 +1109,21 @@ AllocaPartitioning::AllocaPartitioning(const DataLayout &TD, AllocaInst &AI) Type *AllocaPartitioning::getCommonType(iterator I) const { Type *Ty = 0; for (const_use_iterator UI = use_begin(I), UE = use_end(I); UI != UE; ++UI) { - if (!UI->U) + Use *U = UI->getUse(); + if (!U) continue; // Skip dead uses. - if (isa<IntrinsicInst>(*UI->U->getUser())) + if (isa<IntrinsicInst>(*U->getUser())) continue; if (UI->BeginOffset != I->BeginOffset || UI->EndOffset != I->EndOffset) continue; Type *UserTy = 0; - if (LoadInst *LI = dyn_cast<LoadInst>(UI->U->getUser())) { + if (LoadInst *LI = dyn_cast<LoadInst>(U->getUser())) UserTy = LI->getType(); - } else if (StoreInst *SI = dyn_cast<StoreInst>(UI->U->getUser())) { + else if (StoreInst *SI = dyn_cast<StoreInst>(U->getUser())) UserTy = SI->getValueOperand()->getType(); - } else { + else return 0; // Bail if we have weird uses. - } if (IntegerType *ITy = dyn_cast<IntegerType>(UserTy)) { // If the type is larger than the partition, skip it. We only encounter @@ -1140,11 +1160,12 @@ void AllocaPartitioning::print(raw_ostream &OS, const_iterator I, void AllocaPartitioning::printUsers(raw_ostream &OS, const_iterator I, StringRef Indent) const { for (const_use_iterator UI = use_begin(I), UE = use_end(I); UI != UE; ++UI) { - if (!UI->U) + if (!UI->getUse()) continue; // Skip dead uses. OS << Indent << " [" << UI->BeginOffset << "," << UI->EndOffset << ") " - << "used by: " << *UI->U->getUser() << "\n"; - if (MemTransferInst *II = dyn_cast<MemTransferInst>(UI->U->getUser())) { + << "used by: " << *UI->getUse()->getUser() << "\n"; + if (MemTransferInst *II = + dyn_cast<MemTransferInst>(UI->getUse()->getUser())) { const MemTransferOffsets &MTO = MemTransferInstData.lookup(II); bool IsDest; if (!MTO.IsSplittable) @@ -1375,11 +1396,11 @@ public: // may be grown during speculation. However, we never need to re-visit the // new uses, and so we can use the initial size bound. for (unsigned Idx = 0, Size = P.use_size(PI); Idx != Size; ++Idx) { - const AllocaPartitioning::PartitionUse &PU = P.getUse(PI, Idx); - if (!PU.U) + const PartitionUse &PU = P.getUse(PI, Idx); + if (!PU.getUse()) continue; // Skip dead use. - visit(cast<Instruction>(PU.U->getUser())); + visit(cast<Instruction>(PU.getUse()->getUser())); } } @@ -1523,8 +1544,8 @@ private: // inside the load. AllocaPartitioning::use_iterator UI = P.findPartitionUseForPHIOrSelectOperand(InUse); - assert(isa<PHINode>(*UI->U->getUser())); - UI->U = &Load->getOperandUse(Load->getPointerOperandIndex()); + assert(isa<PHINode>(*UI->getUse()->getUser())); + UI->setUse(&Load->getOperandUse(Load->getPointerOperandIndex())); } DEBUG(dbgs() << " speculated to: " << *NewPN << "\n"); } @@ -1571,16 +1592,16 @@ private: void visitSelectInst(SelectInst &SI) { DEBUG(dbgs() << " original: " << SI << "\n"); - IRBuilder<> IRB(&SI); // If the select isn't safe to speculate, just use simple logic to emit it. SmallVector<LoadInst *, 4> Loads; if (!isSafeSelectToSpeculate(SI, Loads)) return; + IRBuilder<> IRB(&SI); Use *Ops[2] = { &SI.getOperandUse(1), &SI.getOperandUse(2) }; AllocaPartitioning::iterator PIs[2]; - AllocaPartitioning::PartitionUse PUs[2]; + PartitionUse PUs[2]; for (unsigned i = 0, e = 2; i != e; ++i) { PIs[i] = P.findPartitionForPHIOrSelectOperand(Ops[i]); if (PIs[i] != P.end()) { @@ -1591,7 +1612,7 @@ private: PUs[i] = *UI; // Clear out the use here so that the offsets into the use list remain // stable but this use is ignored when rewriting. - UI->U = 0; + UI->setUse(0); } } @@ -1623,8 +1644,8 @@ private: for (unsigned i = 0, e = 2; i != e; ++i) { if (PIs[i] != P.end()) { Use *LoadUse = &Loads[i]->getOperandUse(0); - assert(PUs[i].U->get() == LoadUse->get()); - PUs[i].U = LoadUse; + assert(PUs[i].getUse()->get() == LoadUse->get()); + PUs[i].setUse(LoadUse); P.use_push_back(PIs[i], PUs[i]); } } @@ -1911,6 +1932,10 @@ static Value *getAdjustedPtr(IRBuilder<> &IRB, const DataLayout &TD, static bool canConvertValue(const DataLayout &DL, Type *OldTy, Type *NewTy) { if (OldTy == NewTy) return true; + if (IntegerType *OldITy = dyn_cast<IntegerType>(OldTy)) + if (IntegerType *NewITy = dyn_cast<IntegerType>(NewTy)) + if (NewITy->getBitWidth() >= OldITy->getBitWidth()) + return true; if (DL.getTypeSizeInBits(NewTy) != DL.getTypeSizeInBits(OldTy)) return false; if (!NewTy->isSingleValueType() || !OldTy->isSingleValueType()) @@ -1939,6 +1964,10 @@ static Value *convertValue(const DataLayout &DL, IRBuilder<> &IRB, Value *V, "Value not convertable to type"); if (V->getType() == Ty) return V; + if (IntegerType *OldITy = dyn_cast<IntegerType>(V->getType())) + if (IntegerType *NewITy = dyn_cast<IntegerType>(Ty)) + if (NewITy->getBitWidth() > OldITy->getBitWidth()) + return IRB.CreateZExt(V, NewITy); if (V->getType()->isIntegerTy() && Ty->isPointerTy()) return IRB.CreateIntToPtr(V, Ty); if (V->getType()->isPointerTy() && Ty->isIntegerTy()) @@ -1977,7 +2006,8 @@ static bool isVectorPromotionViable(const DataLayout &TD, ElementSize /= 8; for (; I != E; ++I) { - if (!I->U) + Use *U = I->getUse(); + if (!U) continue; // Skip dead use. uint64_t BeginOffset = I->BeginOffset - PartitionBeginOffset; @@ -1997,24 +2027,24 @@ static bool isVectorPromotionViable(const DataLayout &TD, = (NumElements == 1) ? Ty->getElementType() : VectorType::get(Ty->getElementType(), NumElements); - if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(I->U->getUser())) { + if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(U->getUser())) { if (MI->isVolatile()) return false; - if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(I->U->getUser())) { + if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(U->getUser())) { const AllocaPartitioning::MemTransferOffsets &MTO = P.getMemTransferOffsets(*MTI); if (!MTO.IsSplittable) return false; } - } else if (I->U->get()->getType()->getPointerElementType()->isStructTy()) { + } else if (U->get()->getType()->getPointerElementType()->isStructTy()) { // Disable vector promotion when there are loads or stores of an FCA. return false; - } else if (LoadInst *LI = dyn_cast<LoadInst>(I->U->getUser())) { + } else if (LoadInst *LI = dyn_cast<LoadInst>(U->getUser())) { if (LI->isVolatile()) return false; if (!canConvertValue(TD, PartitionTy, LI->getType())) return false; - } else if (StoreInst *SI = dyn_cast<StoreInst>(I->U->getUser())) { + } else if (StoreInst *SI = dyn_cast<StoreInst>(U->getUser())) { if (SI->isVolatile()) return false; if (!canConvertValue(TD, SI->getValueOperand()->getType(), PartitionTy)) @@ -2063,7 +2093,8 @@ static bool isIntegerWideningViable(const DataLayout &TD, // unsplittable entry (which we may make splittable later). bool WholeAllocaOp = false; for (; I != E; ++I) { - if (!I->U) + Use *U = I->getUse(); + if (!U) continue; // Skip dead use. uint64_t RelBegin = I->BeginOffset - AllocBeginOffset; @@ -2074,7 +2105,7 @@ static bool isIntegerWideningViable(const DataLayout &TD, if (RelEnd > Size) return false; - if (LoadInst *LI = dyn_cast<LoadInst>(I->U->getUser())) { + if (LoadInst *LI = dyn_cast<LoadInst>(U->getUser())) { if (LI->isVolatile()) return false; if (RelBegin == 0 && RelEnd == Size) @@ -2089,7 +2120,7 @@ static bool isIntegerWideningViable(const DataLayout &TD, if (RelBegin != 0 || RelEnd != Size || !canConvertValue(TD, AllocaTy, LI->getType())) return false; - } else if (StoreInst *SI = dyn_cast<StoreInst>(I->U->getUser())) { + } else if (StoreInst *SI = dyn_cast<StoreInst>(U->getUser())) { Type *ValueTy = SI->getValueOperand()->getType(); if (SI->isVolatile()) return false; @@ -2105,16 +2136,16 @@ static bool isIntegerWideningViable(const DataLayout &TD, if (RelBegin != 0 || RelEnd != Size || !canConvertValue(TD, ValueTy, AllocaTy)) return false; - } else if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(I->U->getUser())) { + } else if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(U->getUser())) { if (MI->isVolatile() || !isa<Constant>(MI->getLength())) return false; - if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(I->U->getUser())) { + if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(U->getUser())) { const AllocaPartitioning::MemTransferOffsets &MTO = P.getMemTransferOffsets(*MTI); if (!MTO.IsSplittable) return false; } - } else if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I->U->getUser())) { + } else if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(U->getUser())) { if (II->getIntrinsicID() != Intrinsic::lifetime_start && II->getIntrinsicID() != Intrinsic::lifetime_end) return false; @@ -2297,6 +2328,7 @@ class AllocaPartitionRewriter : public InstVisitor<AllocaPartitionRewriter, // The offset of the partition user currently being rewritten. uint64_t BeginOffset, EndOffset; + bool IsSplit; Use *OldUse; Instruction *OldPtr; @@ -2314,7 +2346,7 @@ public: NewAllocaEndOffset(NewEndOffset), NewAllocaTy(NewAI.getAllocatedType()), VecTy(), ElementTy(), ElementSize(), IntTy(), - BeginOffset(), EndOffset() { + BeginOffset(), EndOffset(), IsSplit(), OldUse(), OldPtr() { } /// \brief Visit the users of the alloca partition and rewrite them. @@ -2336,14 +2368,15 @@ public: } bool CanSROA = true; for (; I != E; ++I) { - if (!I->U) + if (!I->getUse()) continue; // Skip dead uses. BeginOffset = I->BeginOffset; EndOffset = I->EndOffset; - OldUse = I->U; - OldPtr = cast<Instruction>(I->U->get()); + IsSplit = I->isSplit(); + OldUse = I->getUse(); + OldPtr = cast<Instruction>(OldUse->get()); NamePrefix = (Twine(NewAI.getName()) + "." + Twine(BeginOffset)).str(); - CanSROA &= visit(cast<Instruction>(I->U->getUser())); + CanSROA &= visit(cast<Instruction>(OldUse->getUser())); } if (VecTy) { assert(CanSROA); @@ -2450,29 +2483,12 @@ private: DEBUG(dbgs() << " original: " << LI << "\n"); Value *OldOp = LI.getOperand(0); assert(OldOp == OldPtr); - IRBuilder<> IRB(&LI); uint64_t Size = EndOffset - BeginOffset; - bool IsSplitIntLoad = Size < TD.getTypeStoreSize(LI.getType()); - // If this memory access can be shown to *statically* extend outside the - // bounds of the original allocation it's behavior is undefined. Rather - // than trying to transform it, just replace it with undef. - // FIXME: We should do something more clever for functions being - // instrumented by asan. - // FIXME: Eventually, once ASan and friends can flush out bugs here, this - // should be transformed to a load of null making it unreachable. - uint64_t OldAllocSize = TD.getTypeAllocSize(OldAI.getAllocatedType()); - if (TD.getTypeStoreSize(LI.getType()) > OldAllocSize) { - LI.replaceAllUsesWith(UndefValue::get(LI.getType())); - Pass.DeadInsts.insert(&LI); - deleteIfTriviallyDead(OldOp); - DEBUG(dbgs() << " to: undef!!\n"); - return true; - } - - Type *TargetTy = IsSplitIntLoad ? Type::getIntNTy(LI.getContext(), Size * 8) - : LI.getType(); + IRBuilder<> IRB(&LI); + Type *TargetTy = IsSplit ? Type::getIntNTy(LI.getContext(), Size * 8) + : LI.getType(); bool IsPtrAdjusted = false; Value *V; if (VecTy) { @@ -2492,16 +2508,15 @@ private: } V = convertValue(TD, IRB, V, TargetTy); - if (IsSplitIntLoad) { + if (IsSplit) { assert(!LI.isVolatile()); assert(LI.getType()->isIntegerTy() && "Only integer type loads and stores are split"); + assert(Size < TD.getTypeStoreSize(LI.getType()) && + "Split load isn't smaller than original load"); assert(LI.getType()->getIntegerBitWidth() == TD.getTypeStoreSizeInBits(LI.getType()) && "Non-byte-multiple bit width"); - assert(LI.getType()->getIntegerBitWidth() == - TD.getTypeAllocSizeInBits(OldAI.getAllocatedType()) && - "Only alloca-wide loads can be split and recomposed"); // Move the insertion point just past the load so that we can refer to it. IRB.SetInsertPoint(llvm::next(BasicBlock::iterator(&LI))); // Create a placeholder value with the same type as LI to use as the @@ -2588,14 +2603,12 @@ private: uint64_t Size = EndOffset - BeginOffset; if (Size < TD.getTypeStoreSize(V->getType())) { assert(!SI.isVolatile()); + assert(IsSplit && "A seemingly split store isn't splittable"); assert(V->getType()->isIntegerTy() && "Only integer type loads and stores are split"); assert(V->getType()->getIntegerBitWidth() == TD.getTypeStoreSizeInBits(V->getType()) && "Non-byte-multiple bit width"); - assert(V->getType()->getIntegerBitWidth() == - TD.getTypeAllocSizeInBits(OldAI.getAllocatedType()) && - "Only alloca-wide stores can be split and recomposed"); IntegerType *NarrowTy = Type::getIntNTy(SI.getContext(), Size * 8); V = extractInteger(TD, IRB, V, NarrowTy, BeginOffset, getName(".extract")); @@ -3381,7 +3394,7 @@ bool SROA::rewriteAllocaPartition(AllocaInst &AI, for (AllocaPartitioning::use_iterator UI = P.use_begin(PI), UE = P.use_end(PI); UI != UE && !IsLive; ++UI) - if (UI->U) + if (UI->getUse()) IsLive = true; if (!IsLive) return false; // No live uses left of this partition. diff --git a/lib/Transforms/Utils/BypassSlowDivision.cpp b/lib/Transforms/Utils/BypassSlowDivision.cpp index 00cda8e..1f517d0 100644 --- a/lib/Transforms/Utils/BypassSlowDivision.cpp +++ b/lib/Transforms/Utils/BypassSlowDivision.cpp @@ -163,7 +163,7 @@ static bool insertFastDiv(Function &F, Value *AndV = MainBuilder.CreateAnd(OrV, BitMask); // Compare operand values and branch - Value *ZeroV = MainBuilder.getInt32(0); + Value *ZeroV = ConstantInt::getSigned(Dividend->getType(), 0); Value *CmpV = MainBuilder.CreateICmpEQ(AndV, ZeroV); MainBuilder.CreateCondBr(CmpV, FastBB, SlowBB); @@ -244,7 +244,7 @@ bool llvm::bypassSlowDivision(Function &F, // Get bitwidth of div/rem instruction IntegerType *T = cast<IntegerType>(J->getType()); - int bitwidth = T->getBitWidth(); + unsigned int bitwidth = T->getBitWidth(); // Continue if bitwidth is not bypassed DenseMap<unsigned int, unsigned int>::const_iterator BI = BypassWidths.find(bitwidth); diff --git a/lib/Transforms/Utils/CloneFunction.cpp b/lib/Transforms/Utils/CloneFunction.cpp index a309bce..63d7a1d 100644 --- a/lib/Transforms/Utils/CloneFunction.cpp +++ b/lib/Transforms/Utils/CloneFunction.cpp @@ -94,9 +94,12 @@ void llvm::CloneFunctionInto(Function *NewFunc, const Function *OldFunc, //Some arguments were deleted with the VMap. Copy arguments one by one for (Function::const_arg_iterator I = OldFunc->arg_begin(), E = OldFunc->arg_end(); I != E; ++I) - if (Argument* Anew = dyn_cast<Argument>(VMap[I])) - Anew->addAttr(OldFunc->getAttributes() - .getParamAttributes(I->getArgNo() + 1)); + if (Argument* Anew = dyn_cast<Argument>(VMap[I])) { + AttributeSet attrs = OldFunc->getAttributes() + .getParamAttributes(I->getArgNo() + 1); + if (attrs.getNumSlots() > 0) + Anew->addAttr(attrs); + } NewFunc->setAttributes(NewFunc->getAttributes() .addAttributes(NewFunc->getContext(), AttributeSet::ReturnIndex, diff --git a/lib/Transforms/Utils/SimplifyCFG.cpp b/lib/Transforms/Utils/SimplifyCFG.cpp index a63d31d..681bf9c 100644 --- a/lib/Transforms/Utils/SimplifyCFG.cpp +++ b/lib/Transforms/Utils/SimplifyCFG.cpp @@ -2789,9 +2789,20 @@ bool SimplifyCFGOpt::SimplifyResume(ResumeInst *RI, IRBuilder<> &Builder) { return false; // Turn all invokes that unwind here into calls and delete the basic block. + bool InvokeRequiresTableEntry = false; + bool Changed = false; for (pred_iterator PI = pred_begin(BB), PE = pred_end(BB); PI != PE;) { InvokeInst *II = cast<InvokeInst>((*PI++)->getTerminator()); + + if (II->hasFnAttr(Attribute::UWTable)) { + // Don't remove an `invoke' instruction if the ABI requires an entry into + // the table. + InvokeRequiresTableEntry = true; + continue; + } + SmallVector<Value*, 8> Args(II->op_begin(), II->op_end() - 3); + // Insert a call instruction before the invoke. CallInst *Call = CallInst::Create(II->getCalledValue(), Args, "", II); Call->takeName(II); @@ -2811,11 +2822,14 @@ bool SimplifyCFGOpt::SimplifyResume(ResumeInst *RI, IRBuilder<> &Builder) { // Finally, delete the invoke instruction! II->eraseFromParent(); + Changed = true; } - // The landingpad is now unreachable. Zap it. - BB->eraseFromParent(); - return true; + if (!InvokeRequiresTableEntry) + // The landingpad is now unreachable. Zap it. + BB->eraseFromParent(); + + return Changed; } bool SimplifyCFGOpt::SimplifyReturn(ReturnInst *RI, IRBuilder<> &Builder) { @@ -3944,11 +3958,13 @@ static bool passingValueIsAlwaysUndefined(Value *V, Instruction *I) { // Load from null is undefined. if (LoadInst *LI = dyn_cast<LoadInst>(Use)) - return LI->getPointerAddressSpace() == 0; + if (!LI->isVolatile()) + return LI->getPointerAddressSpace() == 0; // Store to null is undefined. if (StoreInst *SI = dyn_cast<StoreInst>(Use)) - return SI->getPointerAddressSpace() == 0 && SI->getPointerOperand() == I; + if (!SI->isVolatile()) + return SI->getPointerAddressSpace() == 0 && SI->getPointerOperand() == I; } return false; } diff --git a/lib/Transforms/Utils/SimplifyLibCalls.cpp b/lib/Transforms/Utils/SimplifyLibCalls.cpp index 8ad566c..c231704 100644 --- a/lib/Transforms/Utils/SimplifyLibCalls.cpp +++ b/lib/Transforms/Utils/SimplifyLibCalls.cpp @@ -15,14 +15,17 @@ //===----------------------------------------------------------------------===// #include "llvm/Transforms/Utils/SimplifyLibCalls.h" +#include "llvm/ADT/SmallString.h" #include "llvm/ADT/StringMap.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/Function.h" #include "llvm/IR/IRBuilder.h" +#include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Intrinsics.h" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/Module.h" +#include "llvm/Support/Allocator.h" #include "llvm/Target/TargetLibraryInfo.h" #include "llvm/Transforms/Utils/BuildLibCalls.h" @@ -797,8 +800,7 @@ struct StrToOpt : public LibCallOptimization { if (isa<ConstantPointerNull>(EndPtr)) { // With a null EndPtr, this function won't capture the main argument. // It would be readonly too, except that it still may write to errno. - CI->addAttribute(1, Attribute::get(Callee->getContext(), - Attribute::NoCapture)); + CI->addAttribute(1, Attribute::NoCapture); } return 0; @@ -1672,67 +1674,16 @@ class LibCallSimplifierImpl { const TargetLibraryInfo *TLI; const LibCallSimplifier *LCS; bool UnsafeFPShrink; - StringMap<LibCallOptimization*> Optimizations; - - // Fortified library call optimizations. - MemCpyChkOpt MemCpyChk; - MemMoveChkOpt MemMoveChk; - MemSetChkOpt MemSetChk; - StrCpyChkOpt StrCpyChk; - StpCpyChkOpt StpCpyChk; - StrNCpyChkOpt StrNCpyChk; - - // String library call optimizations. - StrCatOpt StrCat; - StrNCatOpt StrNCat; - StrChrOpt StrChr; - StrRChrOpt StrRChr; - StrCmpOpt StrCmp; - StrNCmpOpt StrNCmp; - StrCpyOpt StrCpy; - StpCpyOpt StpCpy; - StrNCpyOpt StrNCpy; - StrLenOpt StrLen; - StrPBrkOpt StrPBrk; - StrToOpt StrTo; - StrSpnOpt StrSpn; - StrCSpnOpt StrCSpn; - StrStrOpt StrStr; - - // Memory library call optimizations. - MemCmpOpt MemCmp; - MemCpyOpt MemCpy; - MemMoveOpt MemMove; - MemSetOpt MemSet; // Math library call optimizations. - UnaryDoubleFPOpt UnaryDoubleFP, UnsafeUnaryDoubleFP; - CosOpt Cos; PowOpt Pow; Exp2Opt Exp2; - - // Integer library call optimizations. - FFSOpt FFS; - AbsOpt Abs; - IsDigitOpt IsDigit; - IsAsciiOpt IsAscii; - ToAsciiOpt ToAscii; - - // Formatting and IO library call optimizations. - PrintFOpt PrintF; - SPrintFOpt SPrintF; - FPrintFOpt FPrintF; - FWriteOpt FWrite; - FPutsOpt FPuts; - PutsOpt Puts; - - void initOptimizations(); - void addOpt(LibFunc::Func F, LibCallOptimization* Opt); - void addOpt(LibFunc::Func F1, LibFunc::Func F2, LibCallOptimization* Opt); + CosOpt Cos; + PowOpt Pow; + Exp2Opt Exp2; public: LibCallSimplifierImpl(const DataLayout *TD, const TargetLibraryInfo *TLI, const LibCallSimplifier *LCS, bool UnsafeFPShrink = false) - : UnaryDoubleFP(false), UnsafeUnaryDoubleFP(true), - Cos(UnsafeFPShrink), Pow(UnsafeFPShrink), Exp2(UnsafeFPShrink) { + : Cos(UnsafeFPShrink), Pow(UnsafeFPShrink), Exp2(UnsafeFPShrink) { this->TD = TD; this->TLI = TLI; this->LCS = LCS; @@ -1740,126 +1691,234 @@ public: } Value *optimizeCall(CallInst *CI); + LibCallOptimization *lookupOptimization(CallInst *CI); + bool hasFloatVersion(StringRef FuncName); }; -void LibCallSimplifierImpl::initOptimizations() { - // Fortified library call optimizations. - Optimizations["__memcpy_chk"] = &MemCpyChk; - Optimizations["__memmove_chk"] = &MemMoveChk; - Optimizations["__memset_chk"] = &MemSetChk; - Optimizations["__strcpy_chk"] = &StrCpyChk; - Optimizations["__stpcpy_chk"] = &StpCpyChk; - Optimizations["__strncpy_chk"] = &StrNCpyChk; - Optimizations["__stpncpy_chk"] = &StrNCpyChk; - - // String library call optimizations. - addOpt(LibFunc::strcat, &StrCat); - addOpt(LibFunc::strncat, &StrNCat); - addOpt(LibFunc::strchr, &StrChr); - addOpt(LibFunc::strrchr, &StrRChr); - addOpt(LibFunc::strcmp, &StrCmp); - addOpt(LibFunc::strncmp, &StrNCmp); - addOpt(LibFunc::strcpy, &StrCpy); - addOpt(LibFunc::stpcpy, &StpCpy); - addOpt(LibFunc::strncpy, &StrNCpy); - addOpt(LibFunc::strlen, &StrLen); - addOpt(LibFunc::strpbrk, &StrPBrk); - addOpt(LibFunc::strtol, &StrTo); - addOpt(LibFunc::strtod, &StrTo); - addOpt(LibFunc::strtof, &StrTo); - addOpt(LibFunc::strtoul, &StrTo); - addOpt(LibFunc::strtoll, &StrTo); - addOpt(LibFunc::strtold, &StrTo); - addOpt(LibFunc::strtoull, &StrTo); - addOpt(LibFunc::strspn, &StrSpn); - addOpt(LibFunc::strcspn, &StrCSpn); - addOpt(LibFunc::strstr, &StrStr); - - // Memory library call optimizations. - addOpt(LibFunc::memcmp, &MemCmp); - addOpt(LibFunc::memcpy, &MemCpy); - addOpt(LibFunc::memmove, &MemMove); - addOpt(LibFunc::memset, &MemSet); +bool LibCallSimplifierImpl::hasFloatVersion(StringRef FuncName) { + LibFunc::Func Func; + SmallString<20> FloatFuncName = FuncName; + FloatFuncName += 'f'; + if (TLI->getLibFunc(FloatFuncName, Func)) + return TLI->has(Func); + return false; +} - // Math library call optimizations. - addOpt(LibFunc::ceil, LibFunc::ceilf, &UnaryDoubleFP); - addOpt(LibFunc::fabs, LibFunc::fabsf, &UnaryDoubleFP); - addOpt(LibFunc::floor, LibFunc::floorf, &UnaryDoubleFP); - addOpt(LibFunc::rint, LibFunc::rintf, &UnaryDoubleFP); - addOpt(LibFunc::round, LibFunc::roundf, &UnaryDoubleFP); - addOpt(LibFunc::nearbyint, LibFunc::nearbyintf, &UnaryDoubleFP); - addOpt(LibFunc::trunc, LibFunc::truncf, &UnaryDoubleFP); - - if(UnsafeFPShrink) { - addOpt(LibFunc::acos, LibFunc::acosf, &UnsafeUnaryDoubleFP); - addOpt(LibFunc::acosh, LibFunc::acoshf, &UnsafeUnaryDoubleFP); - addOpt(LibFunc::asin, LibFunc::asinf, &UnsafeUnaryDoubleFP); - addOpt(LibFunc::asinh, LibFunc::asinhf, &UnsafeUnaryDoubleFP); - addOpt(LibFunc::atan, LibFunc::atanf, &UnsafeUnaryDoubleFP); - addOpt(LibFunc::atanh, LibFunc::atanhf, &UnsafeUnaryDoubleFP); - addOpt(LibFunc::cbrt, LibFunc::cbrtf, &UnsafeUnaryDoubleFP); - addOpt(LibFunc::cosh, LibFunc::coshf, &UnsafeUnaryDoubleFP); - addOpt(LibFunc::exp, LibFunc::expf, &UnsafeUnaryDoubleFP); - addOpt(LibFunc::exp10, LibFunc::exp10f, &UnsafeUnaryDoubleFP); - addOpt(LibFunc::expm1, LibFunc::expm1f, &UnsafeUnaryDoubleFP); - addOpt(LibFunc::log, LibFunc::logf, &UnsafeUnaryDoubleFP); - addOpt(LibFunc::log10, LibFunc::log10f, &UnsafeUnaryDoubleFP); - addOpt(LibFunc::log1p, LibFunc::log1pf, &UnsafeUnaryDoubleFP); - addOpt(LibFunc::log2, LibFunc::log2f, &UnsafeUnaryDoubleFP); - addOpt(LibFunc::logb, LibFunc::logbf, &UnsafeUnaryDoubleFP); - addOpt(LibFunc::sin, LibFunc::sinf, &UnsafeUnaryDoubleFP); - addOpt(LibFunc::sinh, LibFunc::sinhf, &UnsafeUnaryDoubleFP); - addOpt(LibFunc::sqrt, LibFunc::sqrtf, &UnsafeUnaryDoubleFP); - addOpt(LibFunc::tan, LibFunc::tanf, &UnsafeUnaryDoubleFP); - addOpt(LibFunc::tanh, LibFunc::tanhf, &UnsafeUnaryDoubleFP); +// Fortified library call optimizations. +static MemCpyChkOpt MemCpyChk; +static MemMoveChkOpt MemMoveChk; +static MemSetChkOpt MemSetChk; +static StrCpyChkOpt StrCpyChk; +static StpCpyChkOpt StpCpyChk; +static StrNCpyChkOpt StrNCpyChk; + +// String library call optimizations. +static StrCatOpt StrCat; +static StrNCatOpt StrNCat; +static StrChrOpt StrChr; +static StrRChrOpt StrRChr; +static StrCmpOpt StrCmp; +static StrNCmpOpt StrNCmp; +static StrCpyOpt StrCpy; +static StpCpyOpt StpCpy; +static StrNCpyOpt StrNCpy; +static StrLenOpt StrLen; +static StrPBrkOpt StrPBrk; +static StrToOpt StrTo; +static StrSpnOpt StrSpn; +static StrCSpnOpt StrCSpn; +static StrStrOpt StrStr; + +// Memory library call optimizations. +static MemCmpOpt MemCmp; +static MemCpyOpt MemCpy; +static MemMoveOpt MemMove; +static MemSetOpt MemSet; + +// Math library call optimizations. +static UnaryDoubleFPOpt UnaryDoubleFP(false); +static UnaryDoubleFPOpt UnsafeUnaryDoubleFP(true); + + // Integer library call optimizations. +static FFSOpt FFS; +static AbsOpt Abs; +static IsDigitOpt IsDigit; +static IsAsciiOpt IsAscii; +static ToAsciiOpt ToAscii; + +// Formatting and IO library call optimizations. +static PrintFOpt PrintF; +static SPrintFOpt SPrintF; +static FPrintFOpt FPrintF; +static FWriteOpt FWrite; +static FPutsOpt FPuts; +static PutsOpt Puts; + +LibCallOptimization *LibCallSimplifierImpl::lookupOptimization(CallInst *CI) { + LibFunc::Func Func; + Function *Callee = CI->getCalledFunction(); + StringRef FuncName = Callee->getName(); + + // Next check for intrinsics. + if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(CI)) { + switch (II->getIntrinsicID()) { + case Intrinsic::pow: + return &Pow; + case Intrinsic::exp2: + return &Exp2; + default: + return 0; + } } - addOpt(LibFunc::cosf, &Cos); - addOpt(LibFunc::cos, &Cos); - addOpt(LibFunc::cosl, &Cos); - addOpt(LibFunc::powf, &Pow); - addOpt(LibFunc::pow, &Pow); - addOpt(LibFunc::powl, &Pow); - Optimizations["llvm.pow.f32"] = &Pow; - Optimizations["llvm.pow.f64"] = &Pow; - Optimizations["llvm.pow.f80"] = &Pow; - Optimizations["llvm.pow.f128"] = &Pow; - Optimizations["llvm.pow.ppcf128"] = &Pow; - addOpt(LibFunc::exp2l, &Exp2); - addOpt(LibFunc::exp2, &Exp2); - addOpt(LibFunc::exp2f, &Exp2); - Optimizations["llvm.exp2.ppcf128"] = &Exp2; - Optimizations["llvm.exp2.f128"] = &Exp2; - Optimizations["llvm.exp2.f80"] = &Exp2; - Optimizations["llvm.exp2.f64"] = &Exp2; - Optimizations["llvm.exp2.f32"] = &Exp2; + // Then check for known library functions. + if (TLI->getLibFunc(FuncName, Func) && TLI->has(Func)) { + switch (Func) { + case LibFunc::strcat: + return &StrCat; + case LibFunc::strncat: + return &StrNCat; + case LibFunc::strchr: + return &StrChr; + case LibFunc::strrchr: + return &StrRChr; + case LibFunc::strcmp: + return &StrCmp; + case LibFunc::strncmp: + return &StrNCmp; + case LibFunc::strcpy: + return &StrCpy; + case LibFunc::stpcpy: + return &StpCpy; + case LibFunc::strncpy: + return &StrNCpy; + case LibFunc::strlen: + return &StrLen; + case LibFunc::strpbrk: + return &StrPBrk; + case LibFunc::strtol: + case LibFunc::strtod: + case LibFunc::strtof: + case LibFunc::strtoul: + case LibFunc::strtoll: + case LibFunc::strtold: + case LibFunc::strtoull: + return &StrTo; + case LibFunc::strspn: + return &StrSpn; + case LibFunc::strcspn: + return &StrCSpn; + case LibFunc::strstr: + return &StrStr; + case LibFunc::memcmp: + return &MemCmp; + case LibFunc::memcpy: + return &MemCpy; + case LibFunc::memmove: + return &MemMove; + case LibFunc::memset: + return &MemSet; + case LibFunc::cosf: + case LibFunc::cos: + case LibFunc::cosl: + return &Cos; + case LibFunc::powf: + case LibFunc::pow: + case LibFunc::powl: + return &Pow; + case LibFunc::exp2l: + case LibFunc::exp2: + case LibFunc::exp2f: + return &Exp2; + case LibFunc::ffs: + case LibFunc::ffsl: + case LibFunc::ffsll: + return &FFS; + case LibFunc::abs: + case LibFunc::labs: + case LibFunc::llabs: + return &Abs; + case LibFunc::isdigit: + return &IsDigit; + case LibFunc::isascii: + return &IsAscii; + case LibFunc::toascii: + return &ToAscii; + case LibFunc::printf: + return &PrintF; + case LibFunc::sprintf: + return &SPrintF; + case LibFunc::fprintf: + return &FPrintF; + case LibFunc::fwrite: + return &FWrite; + case LibFunc::fputs: + return &FPuts; + case LibFunc::puts: + return &Puts; + case LibFunc::ceil: + case LibFunc::fabs: + case LibFunc::floor: + case LibFunc::rint: + case LibFunc::round: + case LibFunc::nearbyint: + case LibFunc::trunc: + if (hasFloatVersion(FuncName)) + return &UnaryDoubleFP; + return 0; + case LibFunc::acos: + case LibFunc::acosh: + case LibFunc::asin: + case LibFunc::asinh: + case LibFunc::atan: + case LibFunc::atanh: + case LibFunc::cbrt: + case LibFunc::cosh: + case LibFunc::exp: + case LibFunc::exp10: + case LibFunc::expm1: + case LibFunc::log: + case LibFunc::log10: + case LibFunc::log1p: + case LibFunc::log2: + case LibFunc::logb: + case LibFunc::sin: + case LibFunc::sinh: + case LibFunc::sqrt: + case LibFunc::tan: + case LibFunc::tanh: + if (UnsafeFPShrink && hasFloatVersion(FuncName)) + return &UnsafeUnaryDoubleFP; + return 0; + case LibFunc::memcpy_chk: + return &MemCpyChk; + default: + return 0; + } + } + + // Finally check for fortified library calls. + if (FuncName.endswith("_chk")) { + if (FuncName == "__memmove_chk") + return &MemMoveChk; + else if (FuncName == "__memset_chk") + return &MemSetChk; + else if (FuncName == "__strcpy_chk") + return &StrCpyChk; + else if (FuncName == "__stpcpy_chk") + return &StpCpyChk; + else if (FuncName == "__strncpy_chk") + return &StrNCpyChk; + else if (FuncName == "__stpncpy_chk") + return &StrNCpyChk; + } + + return 0; - // Integer library call optimizations. - addOpt(LibFunc::ffs, &FFS); - addOpt(LibFunc::ffsl, &FFS); - addOpt(LibFunc::ffsll, &FFS); - addOpt(LibFunc::abs, &Abs); - addOpt(LibFunc::labs, &Abs); - addOpt(LibFunc::llabs, &Abs); - addOpt(LibFunc::isdigit, &IsDigit); - addOpt(LibFunc::isascii, &IsAscii); - addOpt(LibFunc::toascii, &ToAscii); - - // Formatting and IO library call optimizations. - addOpt(LibFunc::printf, &PrintF); - addOpt(LibFunc::sprintf, &SPrintF); - addOpt(LibFunc::fprintf, &FPrintF); - addOpt(LibFunc::fwrite, &FWrite); - addOpt(LibFunc::fputs, &FPuts); - addOpt(LibFunc::puts, &Puts); } Value *LibCallSimplifierImpl::optimizeCall(CallInst *CI) { - if (Optimizations.empty()) - initOptimizations(); - - Function *Callee = CI->getCalledFunction(); - LibCallOptimization *LCO = Optimizations.lookup(Callee->getName()); + LibCallOptimization *LCO = lookupOptimization(CI); if (LCO) { IRBuilder<> Builder(CI); return LCO->optimizeCall(CI, TD, TLI, LCS, Builder); @@ -1867,17 +1926,6 @@ Value *LibCallSimplifierImpl::optimizeCall(CallInst *CI) { return 0; } -void LibCallSimplifierImpl::addOpt(LibFunc::Func F, LibCallOptimization* Opt) { - if (TLI->has(F)) - Optimizations[TLI->getName(F)] = Opt; -} - -void LibCallSimplifierImpl::addOpt(LibFunc::Func F1, LibFunc::Func F2, - LibCallOptimization* Opt) { - if (TLI->has(F1) && TLI->has(F2)) - Optimizations[TLI->getName(F1)] = Opt; -} - LibCallSimplifier::LibCallSimplifier(const DataLayout *TD, const TargetLibraryInfo *TLI, bool UnsafeFPShrink) { diff --git a/lib/Transforms/Vectorize/BBVectorize.cpp b/lib/Transforms/Vectorize/BBVectorize.cpp index 7636541..17900da 100644 --- a/lib/Transforms/Vectorize/BBVectorize.cpp +++ b/lib/Transforms/Vectorize/BBVectorize.cpp @@ -1771,7 +1771,7 @@ namespace { size_t MaxDepth = DAG.lookup(IJ); DEBUG(if (DebugPairSelection) dbgs() << "BBV: found DAG for pair {" - << IJ.first << " <-> " << IJ.second << "} of depth " << + << *IJ.first << " <-> " << *IJ.second << "} of depth " << MaxDepth << " and size " << DAG.size() << "\n"); // At this point the DAG has been constructed, but, may contain @@ -2086,7 +2086,7 @@ namespace { DEBUG(if (DebugPairSelection) dbgs() << "BBV: found pruned DAG for pair {" - << IJ.first << " <-> " << IJ.second << "} of depth " << + << *IJ.first << " <-> " << *IJ.second << "} of depth " << MaxDepth << " and size " << PrunedDAG.size() << " (effective size: " << EffSize << ")\n"); if (((TTI && !UseChainDepthWithTI) || diff --git a/lib/Transforms/Vectorize/LoopVectorize.cpp b/lib/Transforms/Vectorize/LoopVectorize.cpp index f489393..930d9c4 100644 --- a/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -79,6 +79,7 @@ #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetLibraryInfo.h" #include "llvm/Transforms/Scalar.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/Local.h" @@ -115,6 +116,12 @@ static const unsigned TinyTripCountUnrollThreshold = 128; /// number of pointers. Notice that the check is quadratic! static const unsigned RuntimeMemoryCheckThreshold = 4; +/// We use a metadata with this name to indicate that a scalar loop was +/// vectorized and that we don't need to re-vectorize it if we run into it +/// again. +static const char* +AlreadyVectorizedMDName = "llvm.vectorizer.already_vectorized"; + namespace { // Forward declarations. @@ -138,10 +145,11 @@ class LoopVectorizationCostModel; class InnerLoopVectorizer { public: InnerLoopVectorizer(Loop *OrigLoop, ScalarEvolution *SE, LoopInfo *LI, - DominatorTree *DT, DataLayout *DL, unsigned VecWidth, + DominatorTree *DT, DataLayout *DL, + const TargetLibraryInfo *TLI, unsigned VecWidth, unsigned UnrollFactor) - : OrigLoop(OrigLoop), SE(SE), LI(LI), DT(DT), DL(DL), VF(VecWidth), - UF(UnrollFactor), Builder(SE->getContext()), Induction(0), + : OrigLoop(OrigLoop), SE(SE), LI(LI), DT(DT), DL(DL), TLI(TLI), + VF(VecWidth), UF(UnrollFactor), Builder(SE->getContext()), Induction(0), OldInduction(0), WidenMap(UnrollFactor) {} // Perform the actual loop widening (vectorization). @@ -268,6 +276,9 @@ private: DominatorTree *DT; /// Data Layout. DataLayout *DL; + /// Target Library Info. + const TargetLibraryInfo *TLI; + /// The vectorization SIMD factor to use. Each vector will have this many /// vector elements. unsigned VF; @@ -320,8 +331,9 @@ class LoopVectorizationLegality { public: LoopVectorizationLegality(Loop *L, ScalarEvolution *SE, DataLayout *DL, DominatorTree *DT, TargetTransformInfo* TTI, - AliasAnalysis* AA) - : TheLoop(L), SE(SE), DL(DL), DT(DT), TTI(TTI), AA(AA), Induction(0) {} + AliasAnalysis *AA, TargetLibraryInfo *TLI) + : TheLoop(L), SE(SE), DL(DL), DT(DT), TTI(TTI), AA(AA), TLI(TLI), + Induction(0) {} /// This enum represents the kinds of reductions that we support. enum ReductionKind { @@ -407,7 +419,7 @@ public: /// Alias(Multi)Map stores the values (GEPs or underlying objects and their /// respective Store/Load instruction(s) to calculate aliasing. - typedef DenseMap<Value*, Instruction* > AliasMap; + typedef MapVector<Value*, Instruction* > AliasMap; typedef DenseMap<Value*, std::vector<Instruction*> > AliasMultiMap; /// Returns true if it is legal to vectorize this loop. @@ -504,6 +516,8 @@ private: TargetTransformInfo *TTI; /// Alias Analysis. AliasAnalysis *AA; + /// Target Library Info. + TargetLibraryInfo *TLI; // --- vectorization state --- // @@ -540,8 +554,8 @@ public: LoopVectorizationCostModel(Loop *L, ScalarEvolution *SE, LoopInfo *LI, LoopVectorizationLegality *Legal, const TargetTransformInfo &TTI, - DataLayout *DL) - : TheLoop(L), SE(SE), LI(LI), Legal(Legal), TTI(TTI), DL(DL) {} + DataLayout *DL, const TargetLibraryInfo *TLI) + : TheLoop(L), SE(SE), LI(LI), Legal(Legal), TTI(TTI), DL(DL), TLI(TLI) {} /// Information about vectorization costs struct VectorizationFactor { @@ -614,6 +628,8 @@ private: const TargetTransformInfo &TTI; /// Target data layout information. DataLayout *DL; + /// Target Library Info. + const TargetLibraryInfo *TLI; }; /// The LoopVectorize Pass. @@ -631,6 +647,7 @@ struct LoopVectorize : public LoopPass { TargetTransformInfo *TTI; DominatorTree *DT; AliasAnalysis *AA; + TargetLibraryInfo *TLI; virtual bool runOnLoop(Loop *L, LPPassManager &LPM) { // We only vectorize innermost loops. @@ -643,19 +660,20 @@ struct LoopVectorize : public LoopPass { TTI = &getAnalysis<TargetTransformInfo>(); DT = &getAnalysis<DominatorTree>(); AA = getAnalysisIfAvailable<AliasAnalysis>(); + TLI = getAnalysisIfAvailable<TargetLibraryInfo>(); DEBUG(dbgs() << "LV: Checking a loop in \"" << L->getHeader()->getParent()->getName() << "\"\n"); // Check if it is legal to vectorize the loop. - LoopVectorizationLegality LVL(L, SE, DL, DT, TTI, AA); + LoopVectorizationLegality LVL(L, SE, DL, DT, TTI, AA, TLI); if (!LVL.canVectorize()) { DEBUG(dbgs() << "LV: Not vectorizing.\n"); return false; } // Use the cost model. - LoopVectorizationCostModel CM(L, SE, LI, &LVL, *TTI, DL); + LoopVectorizationCostModel CM(L, SE, LI, &LVL, *TTI, DL, TLI); // Check the function attributes to find out if this function should be // optimized for size. @@ -689,7 +707,7 @@ struct LoopVectorize : public LoopPass { DEBUG(dbgs() << "LV: Unroll Factor is " << UF << "\n"); // If we decided that it is *legal* to vectorize the loop then do it. - InnerLoopVectorizer LB(L, SE, LI, DT, DL, VF.Width, UF); + InnerLoopVectorizer LB(L, SE, LI, DT, DL, TLI, VF.Width, UF); LB.vectorize(&LVL); DEBUG(verifyFunction(*L->getHeader()->getParent())); @@ -1147,6 +1165,11 @@ InnerLoopVectorizer::createEmptyLoop(LoopVectorizationLegality *Legal) { BasicBlock *ExitBlock = OrigLoop->getExitBlock(); assert(ExitBlock && "Must have an exit block"); + // Mark the old scalar loop with metadata that tells us not to vectorize this + // loop again if we run into it. + MDNode *MD = MDNode::get(OldBasicBlock->getContext(), ArrayRef<Value*>()); + OldBasicBlock->getTerminator()->setMetadata(AlreadyVectorizedMDName, MD); + // Some loops have a single integer induction variable, while other loops // don't. One example is c++ iterators that often have multiple pointer // induction variables. In the code below we also support a case where we @@ -1438,34 +1461,108 @@ getReductionIdentity(LoopVectorizationLegality::ReductionKind K, Type *Tp) { } } -static bool -isTriviallyVectorizableIntrinsic(Instruction *Inst) { - IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst); - if (!II) - return false; - switch (II->getIntrinsicID()) { - case Intrinsic::sqrt: - case Intrinsic::sin: - case Intrinsic::cos: - case Intrinsic::exp: - case Intrinsic::exp2: - case Intrinsic::log: - case Intrinsic::log10: - case Intrinsic::log2: - case Intrinsic::fabs: - case Intrinsic::floor: - case Intrinsic::ceil: - case Intrinsic::trunc: - case Intrinsic::rint: - case Intrinsic::nearbyint: - case Intrinsic::pow: - case Intrinsic::fma: - case Intrinsic::fmuladd: - return true; - default: - return false; +static Intrinsic::ID +getIntrinsicIDForCall(CallInst *CI, const TargetLibraryInfo *TLI) { + // If we have an intrinsic call, check if it is trivially vectorizable. + if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(CI)) { + switch (II->getIntrinsicID()) { + case Intrinsic::sqrt: + case Intrinsic::sin: + case Intrinsic::cos: + case Intrinsic::exp: + case Intrinsic::exp2: + case Intrinsic::log: + case Intrinsic::log10: + case Intrinsic::log2: + case Intrinsic::fabs: + case Intrinsic::floor: + case Intrinsic::ceil: + case Intrinsic::trunc: + case Intrinsic::rint: + case Intrinsic::nearbyint: + case Intrinsic::pow: + case Intrinsic::fma: + case Intrinsic::fmuladd: + return II->getIntrinsicID(); + default: + return Intrinsic::not_intrinsic; + } } - return false; + + if (!TLI) + return Intrinsic::not_intrinsic; + + LibFunc::Func Func; + Function *F = CI->getCalledFunction(); + // We're going to make assumptions on the semantics of the functions, check + // that the target knows that it's available in this environment. + if (!F || !TLI->getLibFunc(F->getName(), Func)) + return Intrinsic::not_intrinsic; + + // Otherwise check if we have a call to a function that can be turned into a + // vector intrinsic. + switch (Func) { + default: + break; + case LibFunc::sin: + case LibFunc::sinf: + case LibFunc::sinl: + return Intrinsic::sin; + case LibFunc::cos: + case LibFunc::cosf: + case LibFunc::cosl: + return Intrinsic::cos; + case LibFunc::exp: + case LibFunc::expf: + case LibFunc::expl: + return Intrinsic::exp; + case LibFunc::exp2: + case LibFunc::exp2f: + case LibFunc::exp2l: + return Intrinsic::exp2; + case LibFunc::log: + case LibFunc::logf: + case LibFunc::logl: + return Intrinsic::log; + case LibFunc::log10: + case LibFunc::log10f: + case LibFunc::log10l: + return Intrinsic::log10; + case LibFunc::log2: + case LibFunc::log2f: + case LibFunc::log2l: + return Intrinsic::log2; + case LibFunc::fabs: + case LibFunc::fabsf: + case LibFunc::fabsl: + return Intrinsic::fabs; + case LibFunc::floor: + case LibFunc::floorf: + case LibFunc::floorl: + return Intrinsic::floor; + case LibFunc::ceil: + case LibFunc::ceilf: + case LibFunc::ceill: + return Intrinsic::ceil; + case LibFunc::trunc: + case LibFunc::truncf: + case LibFunc::truncl: + return Intrinsic::trunc; + case LibFunc::rint: + case LibFunc::rintf: + case LibFunc::rintl: + return Intrinsic::rint; + case LibFunc::nearbyint: + case LibFunc::nearbyintf: + case LibFunc::nearbyintl: + return Intrinsic::nearbyint; + case LibFunc::pow: + case LibFunc::powf: + case LibFunc::powl: + return Intrinsic::pow; + } + + return Intrinsic::not_intrinsic; } /// This function translates the reduction kind to an LLVM binary operator. @@ -1546,7 +1643,7 @@ InnerLoopVectorizer::vectorizeLoop(LoopVectorizationLegality *Legal) { // To do so, we need to generate the 'identity' vector and overide // one of the elements with the incoming scalar reduction. We need // to do it in the vector-loop preheader. - Builder.SetInsertPoint(LoopBypassBlocks.back()->getTerminator()); + Builder.SetInsertPoint(LoopBypassBlocks.front()->getTerminator()); // This is the vector-clone of the value that leaves the loop. VectorParts &VectorExit = getVectorValue(RdxDesc.LoopExitInstr); @@ -1991,17 +2088,21 @@ InnerLoopVectorizer::vectorizeBlockInLoop(LoopVectorizationLegality *Legal, } case Instruction::Call: { - assert(isTriviallyVectorizableIntrinsic(it)); + // Ignore dbg intrinsics. + if (isa<DbgInfoIntrinsic>(it)) + break; + Module *M = BB->getParent()->getParent(); - IntrinsicInst *II = cast<IntrinsicInst>(it); - Intrinsic::ID ID = II->getIntrinsicID(); + CallInst *CI = cast<CallInst>(it); + Intrinsic::ID ID = getIntrinsicIDForCall(CI, TLI); + assert(ID && "Not an intrinsic call!"); for (unsigned Part = 0; Part < UF; ++Part) { SmallVector<Value*, 4> Args; - for (unsigned i = 0, ie = II->getNumArgOperands(); i != ie; ++i) { - VectorParts &Arg = getVectorValue(II->getArgOperand(i)); + for (unsigned i = 0, ie = CI->getNumArgOperands(); i != ie; ++i) { + VectorParts &Arg = getVectorValue(CI->getArgOperand(i)); Args.push_back(Arg[Part]); } - Type *Tys[] = { VectorType::get(II->getType()->getScalarType(), VF) }; + Type *Tys[] = { VectorType::get(CI->getType()->getScalarType(), VF) }; Function *F = Intrinsic::getDeclaration(M, ID, Tys); Entry[Part] = Builder.CreateCall(F, Args); } @@ -2138,6 +2239,13 @@ bool LoopVectorizationLegality::canVectorizeInstrs() { BasicBlock *PreHeader = TheLoop->getLoopPreheader(); BasicBlock *Header = TheLoop->getHeader(); + // If we marked the scalar loop as "already vectorized" then no need + // to vectorize it again. + if (Header->getTerminator()->getMetadata(AlreadyVectorizedMDName)) { + DEBUG(dbgs() << "LV: This loop was vectorized before\n"); + return false; + } + // For each block in the loop. for (Loop::block_iterator bb = TheLoop->block_begin(), be = TheLoop->block_end(); bb != be; ++bb) { @@ -2220,9 +2328,10 @@ bool LoopVectorizationLegality::canVectorizeInstrs() { return false; }// end of PHI handling - // We still don't handle functions. + // We still don't handle functions. However, we can ignore dbg intrinsic + // calls and we do handle certain intrinsic and libm functions. CallInst *CI = dyn_cast<CallInst>(it); - if (CI && !isTriviallyVectorizableIntrinsic(it)) { + if (CI && !getIntrinsicIDForCall(CI, TLI) && !isa<DbgInfoIntrinsic>(CI)) { DEBUG(dbgs() << "LV: Found a call site.\n"); return false; } @@ -2638,6 +2747,9 @@ bool LoopVectorizationLegality::AddReductionVar(PHINode *Phi, // Is this a bin op ? FoundBinOp |= !isa<PHINode>(Iter); + // Remember the current instruction. + Instruction *OldIter = Iter; + // For each of the *users* of iter. for (Value::use_iterator it = Iter->use_begin(), e = Iter->use_end(); it != e; ++it) { @@ -2663,7 +2775,7 @@ bool LoopVectorizationLegality::AddReductionVar(PHINode *Phi, if (isa<PHINode>(Iter) && isa<PHINode>(U) && U->getParent() != TheLoop->getHeader() && TheLoop->contains(U) && - Iter->getNumUses() > 1) + Iter->hasNUsesOrMore(2)) continue; // We can't have multiple inside users. @@ -2683,6 +2795,10 @@ bool LoopVectorizationLegality::AddReductionVar(PHINode *Phi, Iter = U; } + // If all uses were skipped this can't be a reduction variable. + if (Iter == OldIter) + return false; + // We found a reduction var if we have reached the original // phi node and we only have a single instruction with out-of-loop // users. @@ -3152,6 +3268,10 @@ unsigned LoopVectorizationCostModel::expectedCost(unsigned VF) { // For each instruction in the old loop. for (BasicBlock::iterator it = BB->begin(), e = BB->end(); it != e; ++it) { + // Skip dbg intrinsics. + if (isa<DbgInfoIntrinsic>(it)) + continue; + unsigned C = getInstructionCost(it, VF); Cost += C; DEBUG(dbgs() << "LV: Found an estimated cost of "<< C <<" for VF " << @@ -3218,7 +3338,7 @@ LoopVectorizationCostModel::getInstructionCost(Instruction *I, unsigned VF) { const SCEV *CondSCEV = SE->getSCEV(SI->getCondition()); bool ScalarCond = (SE->isLoopInvariant(CondSCEV, TheLoop)); Type *CondTy = SI->getCondition()->getType(); - if (ScalarCond) + if (!ScalarCond) CondTy = VectorType::get(CondTy, VF); return TTI.getCmpSelInstrCost(I->getOpcode(), VectorTy, CondTy); @@ -3305,13 +3425,14 @@ LoopVectorizationCostModel::getInstructionCost(Instruction *I, unsigned VF) { return TTI.getCastInstrCost(I->getOpcode(), VectorTy, SrcVecTy); } case Instruction::Call: { - assert(isTriviallyVectorizableIntrinsic(I)); - IntrinsicInst *II = cast<IntrinsicInst>(I); - Type *RetTy = ToVectorTy(II->getType(), VF); + CallInst *CI = cast<CallInst>(I); + Intrinsic::ID ID = getIntrinsicIDForCall(CI, TLI); + assert(ID && "Not an intrinsic call!"); + Type *RetTy = ToVectorTy(CI->getType(), VF); SmallVector<Type*, 4> Tys; - for (unsigned i = 0, ie = II->getNumArgOperands(); i != ie; ++i) - Tys.push_back(ToVectorTy(II->getArgOperand(i)->getType(), VF)); - return TTI.getIntrinsicInstrCost(II->getIntrinsicID(), RetTy, Tys); + for (unsigned i = 0, ie = CI->getNumArgOperands(); i != ie; ++i) + Tys.push_back(ToVectorTy(CI->getArgOperand(i)->getType(), VF)); + return TTI.getIntrinsicInstrCost(ID, RetTy, Tys); } default: { // We are scalarizing the instruction. Return the cost of the scalar |