diff options
Diffstat (limited to 'lib')
254 files changed, 11957 insertions, 6183 deletions
diff --git a/lib/Analysis/BranchProbabilityInfo.cpp b/lib/Analysis/BranchProbabilityInfo.cpp index 2730ce6..b255ce6 100644 --- a/lib/Analysis/BranchProbabilityInfo.cpp +++ b/lib/Analysis/BranchProbabilityInfo.cpp @@ -1,4 +1,4 @@ -//===-- BranchProbabilityInfo.cpp - Branch Probability Analysis -*- C++ -*-===// +//===-- BranchProbabilityInfo.cpp - Branch Probability Analysis -----------===// // // The LLVM Compiler Infrastructure // @@ -78,6 +78,19 @@ static const uint32_t ZH_NONTAKEN_WEIGHT = 12; static const uint32_t FPH_TAKEN_WEIGHT = 20; static const uint32_t FPH_NONTAKEN_WEIGHT = 12; +/// \brief Invoke-terminating normal branch taken weight +/// +/// This is the weight for branching to the normal destination of an invoke +/// instruction. We expect this to happen most of the time. Set the weight to an +/// absurdly high value so that nested loops subsume it. +static const uint32_t IH_TAKEN_WEIGHT = 1024 * 1024 - 1; + +/// \brief Invoke-terminating normal branch not-taken weight. +/// +/// This is the weight for branching to the unwind destination of an invoke +/// instruction. This is essentially never taken. +static const uint32_t IH_NONTAKEN_WEIGHT = 1; + // Standard weight value. Used when none of the heuristics set weight for // the edge. static const uint32_t NORMAL_WEIGHT = 16; @@ -371,6 +384,19 @@ bool BranchProbabilityInfo::calcFloatingPointHeuristics(BasicBlock *BB) { return true; } +bool BranchProbabilityInfo::calcInvokeHeuristics(BasicBlock *BB) { + InvokeInst *II = dyn_cast<InvokeInst>(BB->getTerminator()); + if (!II) + return false; + + BasicBlock *Normal = II->getNormalDest(); + BasicBlock *Unwind = II->getUnwindDest(); + + setEdgeWeight(BB, Normal, IH_TAKEN_WEIGHT); + setEdgeWeight(BB, Unwind, IH_NONTAKEN_WEIGHT); + return true; +} + void BranchProbabilityInfo::getAnalysisUsage(AnalysisUsage &AU) const { AU.addRequired<LoopInfo>(); AU.setPreservesAll(); @@ -397,7 +423,9 @@ bool BranchProbabilityInfo::runOnFunction(Function &F) { continue; if (calcZeroHeuristics(*I)) continue; - calcFloatingPointHeuristics(*I); + if (calcFloatingPointHeuristics(*I)) + continue; + calcInvokeHeuristics(*I); } PostDominatedByUnreachable.clear(); diff --git a/lib/Analysis/ConstantFolding.cpp b/lib/Analysis/ConstantFolding.cpp index 7ced848..f5e619c 100644 --- a/lib/Analysis/ConstantFolding.cpp +++ b/lib/Analysis/ConstantFolding.cpp @@ -358,17 +358,20 @@ static bool ReadDataFromGlobal(Constant *C, uint64_t ByteOffset, NumElts = AT->getNumElements(); else NumElts = cast<VectorType>(C->getType())->getNumElements(); - + for (; Index != NumElts; ++Index) { if (!ReadDataFromGlobal(C->getAggregateElement(Index), Offset, CurPtr, BytesLeft, TD)) return false; - if (EltSize >= BytesLeft) + + uint64_t BytesWritten = EltSize - Offset; + assert(BytesWritten <= EltSize && "Not indexing into this element?"); + if (BytesWritten >= BytesLeft) return true; - + Offset = 0; - BytesLeft -= EltSize; - CurPtr += EltSize; + BytesLeft -= BytesWritten; + CurPtr += BytesWritten; } return true; } @@ -600,6 +603,22 @@ static Constant *CastGEPIndices(ArrayRef<Constant *> Ops, return C; } +/// Strip the pointer casts, but preserve the address space information. +static Constant* StripPtrCastKeepAS(Constant* Ptr) { + assert(Ptr->getType()->isPointerTy() && "Not a pointer type"); + PointerType *OldPtrTy = cast<PointerType>(Ptr->getType()); + Ptr = cast<Constant>(Ptr->stripPointerCasts()); + PointerType *NewPtrTy = cast<PointerType>(Ptr->getType()); + + // Preserve the address space number of the pointer. + if (NewPtrTy->getAddressSpace() != OldPtrTy->getAddressSpace()) { + NewPtrTy = NewPtrTy->getElementType()->getPointerTo( + OldPtrTy->getAddressSpace()); + Ptr = ConstantExpr::getBitCast(Ptr, NewPtrTy); + } + return Ptr; +} + /// SymbolicallyEvaluateGEP - If we can symbolically evaluate the specified GEP /// constant expression, do so. static Constant *SymbolicallyEvaluateGEP(ArrayRef<Constant *> Ops, @@ -636,13 +655,13 @@ static Constant *SymbolicallyEvaluateGEP(ArrayRef<Constant *> Ops, } return 0; } - + unsigned BitWidth = TD->getTypeSizeInBits(IntPtrTy); APInt Offset = APInt(BitWidth, TD->getIndexedOffset(Ptr->getType(), makeArrayRef((Value **)Ops.data() + 1, Ops.size() - 1))); - Ptr = cast<Constant>(Ptr->stripPointerCasts()); + Ptr = StripPtrCastKeepAS(Ptr); // If this is a GEP of a GEP, fold it all into a single GEP. while (GEPOperator *GEP = dyn_cast<GEPOperator>(Ptr)) { @@ -661,7 +680,7 @@ static Constant *SymbolicallyEvaluateGEP(ArrayRef<Constant *> Ops, Ptr = cast<Constant>(GEP->getOperand(0)); Offset += APInt(BitWidth, TD->getIndexedOffset(Ptr->getType(), NestedOps)); - Ptr = cast<Constant>(Ptr->stripPointerCasts()); + Ptr = StripPtrCastKeepAS(Ptr); } // If the base value for this address is a literal integer value, fold the diff --git a/lib/Analysis/InlineCost.cpp b/lib/Analysis/InlineCost.cpp index a6bf4a8..bc1ecd2 100644 --- a/lib/Analysis/InlineCost.cpp +++ b/lib/Analysis/InlineCost.cpp @@ -797,9 +797,33 @@ bool CallAnalyzer::analyzeCall(CallSite CS) { FiftyPercentVectorBonus = Threshold; TenPercentVectorBonus = Threshold / 2; - // Subtract off one instruction per call argument as those will be free after - // inlining. - Cost -= CS.arg_size() * InlineConstants::InstrCost; + // Give out bonuses per argument, as the instructions setting them up will + // be gone after inlining. + for (unsigned I = 0, E = CS.arg_size(); I != E; ++I) { + if (TD && CS.isByValArgument(I)) { + // We approximate the number of loads and stores needed by dividing the + // size of the byval type by the target's pointer size. + PointerType *PTy = cast<PointerType>(CS.getArgument(I)->getType()); + unsigned TypeSize = TD->getTypeSizeInBits(PTy->getElementType()); + unsigned PointerSize = TD->getPointerSizeInBits(); + // Ceiling division. + unsigned NumStores = (TypeSize + PointerSize - 1) / PointerSize; + + // If it generates more than 8 stores it is likely to be expanded as an + // inline memcpy so we take that as an upper bound. Otherwise we assume + // one load and one store per word copied. + // FIXME: The maxStoresPerMemcpy setting from the target should be used + // here instead of a magic number of 8, but it's not available via + // TargetData. + NumStores = std::min(NumStores, 8U); + + Cost -= 2 * NumStores * InlineConstants::InstrCost; + } else { + // For non-byval arguments subtract off one instruction per call + // argument. + Cost -= InlineConstants::InstrCost; + } + } // If there is only one call of the function, and it has internal linkage, // the cost of inlining it drops dramatically. diff --git a/lib/Analysis/InstructionSimplify.cpp b/lib/Analysis/InstructionSimplify.cpp index 16a9a04..379a35a 100644 --- a/lib/Analysis/InstructionSimplify.cpp +++ b/lib/Analysis/InstructionSimplify.cpp @@ -1719,10 +1719,13 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS, return ConstantInt::get(ITy, false); // A local identified object (alloca or noalias call) can't equal any - // incoming argument, unless they're both null. - if (isa<Instruction>(LHSPtr) && isa<Argument>(RHSPtr) && - Pred == CmpInst::ICMP_EQ) - return ConstantInt::get(ITy, false); + // incoming argument, unless they're both null or they belong to + // different functions. The latter happens during inlining. + if (Instruction *LHSInst = dyn_cast<Instruction>(LHSPtr)) + if (Argument *RHSArg = dyn_cast<Argument>(RHSPtr)) + if (LHSInst->getParent()->getParent() == RHSArg->getParent() && + Pred == CmpInst::ICMP_EQ) + return ConstantInt::get(ITy, false); } // Assume that the constant null is on the right. @@ -1732,14 +1735,17 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS, else if (Pred == CmpInst::ICMP_NE) return ConstantInt::get(ITy, true); } - } else if (isa<Argument>(LHSPtr)) { + } else if (Argument *LHSArg = dyn_cast<Argument>(LHSPtr)) { RHSPtr = RHSPtr->stripInBoundsOffsets(); - // An alloca can't be equal to an argument. - if (isa<AllocaInst>(RHSPtr)) { - if (Pred == CmpInst::ICMP_EQ) - return ConstantInt::get(ITy, false); - else if (Pred == CmpInst::ICMP_NE) - return ConstantInt::get(ITy, true); + // An alloca can't be equal to an argument unless they come from separate + // functions via inlining. + if (AllocaInst *RHSInst = dyn_cast<AllocaInst>(RHSPtr)) { + if (LHSArg->getParent() == RHSInst->getParent()->getParent()) { + if (Pred == CmpInst::ICMP_EQ) + return ConstantInt::get(ITy, false); + else if (Pred == CmpInst::ICMP_NE) + return ConstantInt::get(ITy, true); + } } } diff --git a/lib/Analysis/MemoryBuiltins.cpp b/lib/Analysis/MemoryBuiltins.cpp index 8d99ec3..b986b32 100644 --- a/lib/Analysis/MemoryBuiltins.cpp +++ b/lib/Analysis/MemoryBuiltins.cpp @@ -64,7 +64,7 @@ static const AllocFnsTy AllocationFnData[] = { {"realloc", ReallocLike, 2, 1, -1}, {"reallocf", ReallocLike, 2, 1, -1}, {"strdup", StrDupLike, 1, -1, -1}, - {"strndup", StrDupLike, 2, -1, -1} + {"strndup", StrDupLike, 2, 1, -1} }; @@ -358,11 +358,16 @@ ObjectSizeOffsetVisitor::ObjectSizeOffsetVisitor(const TargetData *TD, SizeOffsetType ObjectSizeOffsetVisitor::compute(Value *V) { V = V->stripPointerCasts(); + if (Instruction *I = dyn_cast<Instruction>(V)) { + // If we have already seen this instruction, bail out. Cycles can happen in + // unreachable code after constant propagation. + if (!SeenInsts.insert(I)) + return unknown(); - if (GEPOperator *GEP = dyn_cast<GEPOperator>(V)) - return visitGEPOperator(*GEP); - if (Instruction *I = dyn_cast<Instruction>(V)) + if (GEPOperator *GEP = dyn_cast<GEPOperator>(V)) + return visitGEPOperator(*GEP); return visit(*I); + } if (Argument *A = dyn_cast<Argument>(V)) return visitArgument(*A); if (ConstantPointerNull *P = dyn_cast<ConstantPointerNull>(V)) @@ -371,9 +376,12 @@ SizeOffsetType ObjectSizeOffsetVisitor::compute(Value *V) { return visitGlobalVariable(*GV); if (UndefValue *UV = dyn_cast<UndefValue>(V)) return visitUndefValue(*UV); - if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V)) + if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V)) { if (CE->getOpcode() == Instruction::IntToPtr) return unknown(); // clueless + if (CE->getOpcode() == Instruction::GetElementPtr) + return visitGEPOperator(cast<GEPOperator>(*CE)); + } DEBUG(dbgs() << "ObjectSizeOffsetVisitor::compute() unhandled value: " << *V << '\n'); @@ -414,8 +422,21 @@ SizeOffsetType ObjectSizeOffsetVisitor::visitCallSite(CallSite CS) { // handle strdup-like functions separately if (FnData->AllocTy == StrDupLike) { - // TODO - return unknown(); + APInt Size(IntTyBits, GetStringLength(CS.getArgument(0))); + if (!Size) + return unknown(); + + // strndup limits strlen + if (FnData->FstParam > 0) { + ConstantInt *Arg= dyn_cast<ConstantInt>(CS.getArgument(FnData->FstParam)); + if (!Arg) + return unknown(); + + APInt MaxSize = Arg->getValue().zextOrSelf(IntTyBits); + if (Size.ugt(MaxSize)) + Size = MaxSize + 1; + } + return std::make_pair(Size, Zero); } ConstantInt *Arg = dyn_cast<ConstantInt>(CS.getArgument(FnData->FstParam)); @@ -512,8 +533,7 @@ SizeOffsetType ObjectSizeOffsetVisitor::visitInstruction(Instruction &I) { ObjectSizeOffsetEvaluator::ObjectSizeOffsetEvaluator(const TargetData *TD, LLVMContext &Context) -: TD(TD), Context(Context), Builder(Context, TargetFolder(TD)), -Visitor(TD, Context) { +: TD(TD), Context(Context), Builder(Context, TargetFolder(TD)) { IntTy = TD->getIntPtrType(Context); Zero = ConstantInt::get(IntTy, 0); } @@ -538,6 +558,7 @@ SizeOffsetEvalType ObjectSizeOffsetEvaluator::compute(Value *V) { } SizeOffsetEvalType ObjectSizeOffsetEvaluator::compute_(Value *V) { + ObjectSizeOffsetVisitor Visitor(TD, Context); SizeOffsetType Const = Visitor.compute(V); if (Visitor.bothKnown(Const)) return std::make_pair(ConstantInt::get(Context, Const.first), diff --git a/lib/Analysis/MemoryDependenceAnalysis.cpp b/lib/Analysis/MemoryDependenceAnalysis.cpp index 7fb154d..059e574 100644 --- a/lib/Analysis/MemoryDependenceAnalysis.cpp +++ b/lib/Analysis/MemoryDependenceAnalysis.cpp @@ -227,13 +227,18 @@ getCallSiteDependencyFrom(CallSite CS, bool isReadOnlyCall, // Otherwise if the two calls don't interact (e.g. InstCS is readnone) // keep scanning. - break; + continue; default: return MemDepResult::getClobber(Inst); } } + + // If we could not obtain a pointer for the instruction and the instruction + // touches memory then assume that this is a dependency. + if (MR != AliasAnalysis::NoModRef) + return MemDepResult::getClobber(Inst); } - + // No dependence found. If this is the entry block of the function, it is // unknown, otherwise it is non-local. if (BB != &BB->getParent()->getEntryBlock()) diff --git a/lib/Analysis/RegionInfo.cpp b/lib/Analysis/RegionInfo.cpp index 5f4458b..868f483 100644 --- a/lib/Analysis/RegionInfo.cpp +++ b/lib/Analysis/RegionInfo.cpp @@ -262,22 +262,6 @@ Region::const_block_node_iterator Region::block_node_end() const { return GraphTraits<FlatIt<const Region*> >::nodes_end(this); } -Region::block_iterator Region::block_begin() { - return block_node_begin(); -} - -Region::block_iterator Region::block_end() { - return block_node_end(); -} - -Region::const_block_iterator Region::block_begin() const { - return block_node_begin(); -} - -Region::const_block_iterator Region::block_end() const { - return block_node_end(); -} - Region::element_iterator Region::element_begin() { return GraphTraits<Region*>::nodes_begin(this); } diff --git a/lib/Analysis/ScalarEvolution.cpp b/lib/Analysis/ScalarEvolution.cpp index f0f3b1c..a654648 100644 --- a/lib/Analysis/ScalarEvolution.cpp +++ b/lib/Analysis/ScalarEvolution.cpp @@ -5370,6 +5370,12 @@ SolveQuadraticEquation(const SCEVAddRecExpr *AddRec, ScalarEvolution &SE) { SqrtTerm *= B; SqrtTerm -= Four * (A * C); + if (SqrtTerm.isNegative()) { + // The loop is provably infinite. + const SCEV *CNC = SE.getCouldNotCompute(); + return std::make_pair(CNC, CNC); + } + // Compute sqrt(B^2-4ac). This is guaranteed to be the nearest // integer value or else APInt::sqrt() will assert. APInt SqrtVal(SqrtTerm.sqrt()); diff --git a/lib/Archive/ArchiveReader.cpp b/lib/Archive/ArchiveReader.cpp index 68873e2..5cfc810 100644 --- a/lib/Archive/ArchiveReader.cpp +++ b/lib/Archive/ArchiveReader.cpp @@ -82,14 +82,9 @@ Archive::parseMemberHeader(const char*& At, const char* End, std::string* error) ArchiveMemberHeader* Hdr = (ArchiveMemberHeader*)At; At += sizeof(ArchiveMemberHeader); - // Extract the size and determine if the file is - // compressed or not (negative length). int flags = 0; int MemberSize = atoi(Hdr->size); - if (MemberSize < 0) { - flags |= ArchiveMember::CompressedFlag; - MemberSize = -MemberSize; - } + assert(MemberSize >= 0); // Check the size of the member for sanity if (At + MemberSize > End) { diff --git a/lib/Archive/ArchiveWriter.cpp b/lib/Archive/ArchiveWriter.cpp index 9ef2943..ec6b4b8 100644 --- a/lib/Archive/ArchiveWriter.cpp +++ b/lib/Archive/ArchiveWriter.cpp @@ -204,7 +204,6 @@ Archive::writeMember( std::ofstream& ARFile, bool CreateSymbolTable, bool TruncateNames, - bool ShouldCompress, std::string* ErrMsg ) { @@ -349,7 +348,7 @@ Archive::writeSymbolTable(std::ofstream& ARFile) { // table, flattening the file names (no directories, 15 chars max) and // compressing each archive member. bool -Archive::writeToDisk(bool CreateSymbolTable, bool TruncateNames, bool Compress, +Archive::writeToDisk(bool CreateSymbolTable, bool TruncateNames, std::string* ErrMsg) { // Make sure they haven't opened up the file, not loaded it, @@ -394,7 +393,7 @@ Archive::writeToDisk(bool CreateSymbolTable, bool TruncateNames, bool Compress, // builds the symbol table, symTab. for (MembersList::iterator I = begin(), E = end(); I != E; ++I) { if (writeMember(*I, ArchiveFile, CreateSymbolTable, - TruncateNames, Compress, ErrMsg)) { + TruncateNames, ErrMsg)) { TmpArchive.eraseFromDisk(); ArchiveFile.close(); return true; @@ -446,7 +445,7 @@ Archive::writeToDisk(bool CreateSymbolTable, bool TruncateNames, bool Compress, // compatibility with other ar(1) implementations as well as allowing the // archive to store both native .o and LLVM .bc files, both indexed. if (foreignST) { - if (writeMember(*foreignST, FinalFile, false, false, false, ErrMsg)) { + if (writeMember(*foreignST, FinalFile, false, false, ErrMsg)) { FinalFile.close(); TmpArchive.eraseFromDisk(); return true; diff --git a/lib/AsmParser/LLLexer.cpp b/lib/AsmParser/LLLexer.cpp index 670c1bb..e045804 100644 --- a/lib/AsmParser/LLLexer.cpp +++ b/lib/AsmParser/LLLexer.cpp @@ -456,11 +456,12 @@ lltok::Kind LLLexer::LexIdentifier() { KEYWORD(private); KEYWORD(linker_private); KEYWORD(linker_private_weak); - KEYWORD(linker_private_weak_def_auto); + KEYWORD(linker_private_weak_def_auto); // FIXME: For backwards compatibility. KEYWORD(internal); KEYWORD(available_externally); KEYWORD(linkonce); KEYWORD(linkonce_odr); + KEYWORD(linkonce_odr_auto_hide); KEYWORD(weak); KEYWORD(weak_odr); KEYWORD(appending); @@ -553,6 +554,7 @@ lltok::Kind LLLexer::LexIdentifier() { KEYWORD(naked); KEYWORD(nonlazybind); KEYWORD(address_safety); + KEYWORD(ia_nsdialect); KEYWORD(type); KEYWORD(opaque); diff --git a/lib/AsmParser/LLParser.cpp b/lib/AsmParser/LLParser.cpp index 095b7c5..a9c7e98 100644 --- a/lib/AsmParser/LLParser.cpp +++ b/lib/AsmParser/LLParser.cpp @@ -184,12 +184,13 @@ bool LLParser::ParseTopLevelEntities() { case lltok::kw_private: // OptionalLinkage case lltok::kw_linker_private: // OptionalLinkage case lltok::kw_linker_private_weak: // OptionalLinkage - case lltok::kw_linker_private_weak_def_auto: // OptionalLinkage + case lltok::kw_linker_private_weak_def_auto: // FIXME: backwards compat. case lltok::kw_internal: // OptionalLinkage case lltok::kw_weak: // OptionalLinkage case lltok::kw_weak_odr: // OptionalLinkage case lltok::kw_linkonce: // OptionalLinkage case lltok::kw_linkonce_odr: // OptionalLinkage + case lltok::kw_linkonce_odr_auto_hide: // OptionalLinkage case lltok::kw_appending: // OptionalLinkage case lltok::kw_dllexport: // OptionalLinkage case lltok::kw_common: // OptionalLinkage @@ -576,8 +577,7 @@ bool LLParser::ParseAlias(const std::string &Name, LocTy NameLoc, Linkage != GlobalValue::InternalLinkage && Linkage != GlobalValue::PrivateLinkage && Linkage != GlobalValue::LinkerPrivateLinkage && - Linkage != GlobalValue::LinkerPrivateWeakLinkage && - Linkage != GlobalValue::LinkerPrivateWeakDefAutoLinkage) + Linkage != GlobalValue::LinkerPrivateWeakLinkage) return Error(LinkageLoc, "invalid linkage type for alias"); Constant *Aliasee; @@ -962,6 +962,7 @@ bool LLParser::ParseOptionalAttrs(Attributes &Attrs, unsigned AttrKind) { case lltok::kw_naked: Attrs |= Attribute::Naked; break; case lltok::kw_nonlazybind: Attrs |= Attribute::NonLazyBind; break; case lltok::kw_address_safety: Attrs |= Attribute::AddressSafety; break; + case lltok::kw_ia_nsdialect: Attrs |= Attribute::IANSDialect; break; case lltok::kw_alignstack: { unsigned Alignment; @@ -989,12 +990,12 @@ bool LLParser::ParseOptionalAttrs(Attributes &Attrs, unsigned AttrKind) { /// ::= 'private' /// ::= 'linker_private' /// ::= 'linker_private_weak' -/// ::= 'linker_private_weak_def_auto' /// ::= 'internal' /// ::= 'weak' /// ::= 'weak_odr' /// ::= 'linkonce' /// ::= 'linkonce_odr' +/// ::= 'linkonce_odr_auto_hide' /// ::= 'available_externally' /// ::= 'appending' /// ::= 'dllexport' @@ -1011,14 +1012,15 @@ bool LLParser::ParseOptionalLinkage(unsigned &Res, bool &HasLinkage) { case lltok::kw_linker_private_weak: Res = GlobalValue::LinkerPrivateWeakLinkage; break; - case lltok::kw_linker_private_weak_def_auto: - Res = GlobalValue::LinkerPrivateWeakDefAutoLinkage; - break; case lltok::kw_internal: Res = GlobalValue::InternalLinkage; break; case lltok::kw_weak: Res = GlobalValue::WeakAnyLinkage; break; case lltok::kw_weak_odr: Res = GlobalValue::WeakODRLinkage; break; case lltok::kw_linkonce: Res = GlobalValue::LinkOnceAnyLinkage; break; case lltok::kw_linkonce_odr: Res = GlobalValue::LinkOnceODRLinkage; break; + case lltok::kw_linkonce_odr_auto_hide: + case lltok::kw_linker_private_weak_def_auto: // FIXME: For backwards compat. + Res = GlobalValue::LinkOnceODRAutoHideLinkage; + break; case lltok::kw_available_externally: Res = GlobalValue::AvailableExternallyLinkage; break; @@ -2652,11 +2654,11 @@ bool LLParser::ParseFunctionHeader(Function *&Fn, bool isDefine) { case GlobalValue::PrivateLinkage: case GlobalValue::LinkerPrivateLinkage: case GlobalValue::LinkerPrivateWeakLinkage: - case GlobalValue::LinkerPrivateWeakDefAutoLinkage: case GlobalValue::InternalLinkage: case GlobalValue::AvailableExternallyLinkage: case GlobalValue::LinkOnceAnyLinkage: case GlobalValue::LinkOnceODRLinkage: + case GlobalValue::LinkOnceODRAutoHideLinkage: case GlobalValue::WeakAnyLinkage: case GlobalValue::WeakODRLinkage: case GlobalValue::DLLExportLinkage: diff --git a/lib/AsmParser/LLToken.h b/lib/AsmParser/LLToken.h index 0461e7b..9fd63f2 100644 --- a/lib/AsmParser/LLToken.h +++ b/lib/AsmParser/LLToken.h @@ -37,8 +37,10 @@ namespace lltok { kw_global, kw_constant, kw_private, kw_linker_private, kw_linker_private_weak, - kw_linker_private_weak_def_auto, kw_internal, - kw_linkonce, kw_linkonce_odr, kw_weak, kw_weak_odr, kw_appending, + kw_linker_private_weak_def_auto, // FIXME: For backwards compatibility. + kw_internal, + kw_linkonce, kw_linkonce_odr, kw_linkonce_odr_auto_hide, + kw_weak, kw_weak_odr, kw_appending, kw_dllimport, kw_dllexport, kw_common, kw_available_externally, kw_default, kw_hidden, kw_protected, kw_unnamed_addr, @@ -105,6 +107,7 @@ namespace lltok { kw_naked, kw_nonlazybind, kw_address_safety, + kw_ia_nsdialect, kw_type, kw_opaque, diff --git a/lib/Bitcode/Reader/BitcodeReader.cpp b/lib/Bitcode/Reader/BitcodeReader.cpp index 4ffee38..65fd52e 100644 --- a/lib/Bitcode/Reader/BitcodeReader.cpp +++ b/lib/Bitcode/Reader/BitcodeReader.cpp @@ -89,7 +89,7 @@ static GlobalValue::LinkageTypes GetDecodedLinkage(unsigned Val) { case 12: return GlobalValue::AvailableExternallyLinkage; case 13: return GlobalValue::LinkerPrivateLinkage; case 14: return GlobalValue::LinkerPrivateWeakLinkage; - case 15: return GlobalValue::LinkerPrivateWeakDefAutoLinkage; + case 15: return GlobalValue::LinkOnceODRAutoHideLinkage; } } diff --git a/lib/Bitcode/Writer/BitcodeWriter.cpp b/lib/Bitcode/Writer/BitcodeWriter.cpp index 5b1725f..1d2dfc3 100644 --- a/lib/Bitcode/Writer/BitcodeWriter.cpp +++ b/lib/Bitcode/Writer/BitcodeWriter.cpp @@ -365,7 +365,7 @@ static unsigned getEncodedLinkage(const GlobalValue *GV) { case GlobalValue::AvailableExternallyLinkage: return 12; case GlobalValue::LinkerPrivateLinkage: return 13; case GlobalValue::LinkerPrivateWeakLinkage: return 14; - case GlobalValue::LinkerPrivateWeakDefAutoLinkage: return 15; + case GlobalValue::LinkOnceODRAutoHideLinkage: return 15; } llvm_unreachable("Invalid linkage"); } diff --git a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp index b48b5af..7364f42 100644 --- a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp +++ b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp @@ -220,16 +220,16 @@ void AsmPrinter::EmitLinkage(unsigned Linkage, MCSymbol *GVSym) const { case GlobalValue::CommonLinkage: case GlobalValue::LinkOnceAnyLinkage: case GlobalValue::LinkOnceODRLinkage: + case GlobalValue::LinkOnceODRAutoHideLinkage: case GlobalValue::WeakAnyLinkage: case GlobalValue::WeakODRLinkage: case GlobalValue::LinkerPrivateWeakLinkage: - case GlobalValue::LinkerPrivateWeakDefAutoLinkage: if (MAI->getWeakDefDirective() != 0) { // .globl _foo OutStreamer.EmitSymbolAttribute(GVSym, MCSA_Global); if ((GlobalValue::LinkageTypes)Linkage != - GlobalValue::LinkerPrivateWeakDefAutoLinkage) + GlobalValue::LinkOnceODRAutoHideLinkage) // .weak_definition _foo OutStreamer.EmitSymbolAttribute(GVSym, MCSA_WeakDefinition); else diff --git a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp index d231665..d30e5bb 100644 --- a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp +++ b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp @@ -7,7 +7,7 @@ // //===----------------------------------------------------------------------===// // -// This file contains support for writing dwarf compile unit. +// This file contains support for constructing a dwarf compile unit. // //===----------------------------------------------------------------------===// diff --git a/lib/CodeGen/CMakeLists.txt b/lib/CodeGen/CMakeLists.txt index d240389..2e189ad 100644 --- a/lib/CodeGen/CMakeLists.txt +++ b/lib/CodeGen/CMakeLists.txt @@ -61,6 +61,7 @@ add_llvm_library(LLVMCodeGen MachineSSAUpdater.cpp MachineScheduler.cpp MachineSink.cpp + MachineTraceMetrics.cpp MachineVerifier.cpp OcamlGC.cpp OptimizePHIs.cpp diff --git a/lib/CodeGen/EarlyIfConversion.cpp b/lib/CodeGen/EarlyIfConversion.cpp index 9840a40..f9347ef 100644 --- a/lib/CodeGen/EarlyIfConversion.cpp +++ b/lib/CodeGen/EarlyIfConversion.cpp @@ -17,12 +17,14 @@ //===----------------------------------------------------------------------===// #define DEBUG_TYPE "early-ifcvt" +#include "MachineTraceMetrics.h" #include "llvm/Function.h" #include "llvm/ADT/BitVector.h" #include "llvm/ADT/PostOrderIterator.h" #include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SparseSet.h" +#include "llvm/ADT/Statistic.h" #include "llvm/CodeGen/MachineBranchProbabilityInfo.h" #include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/MachineFunction.h" @@ -30,6 +32,7 @@ #include "llvm/CodeGen/MachineLoopInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/Passes.h" +#include "llvm/MC/MCInstrItineraries.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Support/CommandLine.h" @@ -48,7 +51,10 @@ BlockInstrLimit("early-ifcvt-limit", cl::init(30), cl::Hidden, static cl::opt<bool> Stress("stress-early-ifcvt", cl::Hidden, cl::desc("Turn all knobs to 11")); -typedef SmallSetVector<MachineBasicBlock*, 8> BlockSetVector; +STATISTIC(NumDiamondsSeen, "Number of diamonds"); +STATISTIC(NumDiamondsConv, "Number of diamonds converted"); +STATISTIC(NumTrianglesSeen, "Number of triangles"); +STATISTIC(NumTrianglesConv, "Number of triangles converted"); //===----------------------------------------------------------------------===// // SSAIfConv @@ -94,6 +100,12 @@ public: /// equal to Tail. bool isTriangle() const { return TBB == Tail || FBB == Tail; } + /// Returns the Tail predecessor for the True side. + MachineBasicBlock *getTPred() const { return TBB == Tail ? Head : TBB; } + + /// Returns the Tail predecessor for the False side. + MachineBasicBlock *getFPred() const { return FBB == Tail ? Head : FBB; } + /// Information about each phi in the Tail block. struct PHIInfo { MachineInstr *PHI; @@ -132,6 +144,12 @@ private: /// Find a valid insertion point in Head. bool findInsertionPoint(); + /// Replace PHI instructions in Tail with selects. + void replacePHIInstrs(); + + /// Insert selects and rewrite PHI operands to use them. + void rewritePHIOperands(); + public: /// runOnMachineFunction - Initialize per-function data structures. void runOnMachineFunction(MachineFunction &MF) { @@ -335,11 +353,7 @@ bool SSAIfConv::canConvertIf(MachineBasicBlock *MBB) { if (Succ0->pred_size() != 1 || Succ0->succ_size() != 1) return false; - // We could support additional Tail predecessors by updating phis instead of - // eliminating them. Let's see an example where it matters first. Tail = Succ0->succ_begin()[0]; - if (Tail->pred_size() != 2) - return false; // This is not a triangle. if (Tail != Succ1) { @@ -389,8 +403,8 @@ bool SSAIfConv::canConvertIf(MachineBasicBlock *MBB) { // Any phis in the tail block must be convertible to selects. PHIs.clear(); - MachineBasicBlock *TPred = TBB == Tail ? Head : TBB; - MachineBasicBlock *FPred = FBB == Tail ? Head : FBB; + MachineBasicBlock *TPred = getTPred(); + MachineBasicBlock *FPred = getFPred(); for (MachineBasicBlock::iterator I = Tail->begin(), E = Tail->end(); I != E && I->isPHI(); ++I) { PHIs.push_back(&*I); @@ -426,24 +440,18 @@ bool SSAIfConv::canConvertIf(MachineBasicBlock *MBB) { if (!findInsertionPoint()) return false; + if (isTriangle()) + ++NumTrianglesSeen; + else + ++NumDiamondsSeen; return true; } - -/// convertIf - Execute the if conversion after canConvertIf has determined the -/// feasibility. -/// -/// Any basic blocks erased will be added to RemovedBlocks. -/// -void SSAIfConv::convertIf(SmallVectorImpl<MachineBasicBlock*> &RemovedBlocks) { - assert(Head && Tail && TBB && FBB && "Call canConvertIf first."); - - // Move all instructions into Head, except for the terminators. - if (TBB != Tail) - Head->splice(InsertionPoint, TBB, TBB->begin(), TBB->getFirstTerminator()); - if (FBB != Tail) - Head->splice(InsertionPoint, FBB, FBB->begin(), FBB->getFirstTerminator()); - +/// replacePHIInstrs - Completely replace PHI instructions with selects. +/// This is possible when the only Tail predecessors are the if-converted +/// blocks. +void SSAIfConv::replacePHIInstrs() { + assert(Tail->pred_size() == 2 && "Cannot replace PHIs"); MachineBasicBlock::iterator FirstTerm = Head->getFirstTerminator(); assert(FirstTerm != Head->end() && "No terminators"); DebugLoc HeadDL = FirstTerm->getDebugLoc(); @@ -459,6 +467,66 @@ void SSAIfConv::convertIf(SmallVectorImpl<MachineBasicBlock*> &RemovedBlocks) { PI.PHI->eraseFromParent(); PI.PHI = 0; } +} + +/// rewritePHIOperands - When there are additional Tail predecessors, insert +/// select instructions in Head and rewrite PHI operands to use the selects. +/// Keep the PHI instructions in Tail to handle the other predecessors. +void SSAIfConv::rewritePHIOperands() { + MachineBasicBlock::iterator FirstTerm = Head->getFirstTerminator(); + assert(FirstTerm != Head->end() && "No terminators"); + DebugLoc HeadDL = FirstTerm->getDebugLoc(); + + // Convert all PHIs to select instructions inserted before FirstTerm. + for (unsigned i = 0, e = PHIs.size(); i != e; ++i) { + PHIInfo &PI = PHIs[i]; + DEBUG(dbgs() << "If-converting " << *PI.PHI); + unsigned PHIDst = PI.PHI->getOperand(0).getReg(); + unsigned DstReg = MRI->createVirtualRegister(MRI->getRegClass(PHIDst)); + TII->insertSelect(*Head, FirstTerm, HeadDL, DstReg, Cond, PI.TReg, PI.FReg); + DEBUG(dbgs() << " --> " << *llvm::prior(FirstTerm)); + + // Rewrite PHI operands TPred -> (DstReg, Head), remove FPred. + for (unsigned i = PI.PHI->getNumOperands(); i != 1; i -= 2) { + MachineBasicBlock *MBB = PI.PHI->getOperand(i-1).getMBB(); + if (MBB == getTPred()) { + PI.PHI->getOperand(i-1).setMBB(Head); + PI.PHI->getOperand(i-2).setReg(DstReg); + } else if (MBB == getFPred()) { + PI.PHI->RemoveOperand(i-1); + PI.PHI->RemoveOperand(i-2); + } + } + DEBUG(dbgs() << " --> " << *PI.PHI); + } +} + +/// convertIf - Execute the if conversion after canConvertIf has determined the +/// feasibility. +/// +/// Any basic blocks erased will be added to RemovedBlocks. +/// +void SSAIfConv::convertIf(SmallVectorImpl<MachineBasicBlock*> &RemovedBlocks) { + assert(Head && Tail && TBB && FBB && "Call canConvertIf first."); + + // Update statistics. + if (isTriangle()) + ++NumTrianglesConv; + else + ++NumDiamondsConv; + + // Move all instructions into Head, except for the terminators. + if (TBB != Tail) + Head->splice(InsertionPoint, TBB, TBB->begin(), TBB->getFirstTerminator()); + if (FBB != Tail) + Head->splice(InsertionPoint, FBB, FBB->begin(), FBB->getFirstTerminator()); + + // Are there extra Tail predecessors? + bool ExtraPreds = Tail->pred_size() != 2; + if (ExtraPreds) + rewritePHIOperands(); + else + replacePHIInstrs(); // Fix up the CFG, temporarily leave Head without any successors. Head->removeSuccessor(TBB); @@ -470,6 +538,7 @@ void SSAIfConv::convertIf(SmallVectorImpl<MachineBasicBlock*> &RemovedBlocks) { // Fix up Head's terminators. // It should become a single branch or a fallthrough. + DebugLoc HeadDL = Head->getFirstTerminator()->getDebugLoc(); TII->RemoveBranch(*Head); // Erase the now empty conditional blocks. It is likely that Head can fall @@ -484,7 +553,7 @@ void SSAIfConv::convertIf(SmallVectorImpl<MachineBasicBlock*> &RemovedBlocks) { } assert(Head->succ_empty() && "Additional head successors?"); - if (Head->isLayoutSuccessor(Tail)) { + if (!ExtraPreds && Head->isLayoutSuccessor(Tail)) { // Splice Tail onto the end of Head. DEBUG(dbgs() << "Joining tail BB#" << Tail->getNumber() << " into head BB#" << Head->getNumber() << '\n'); @@ -512,9 +581,12 @@ namespace { class EarlyIfConverter : public MachineFunctionPass { const TargetInstrInfo *TII; const TargetRegisterInfo *TRI; + const MCSchedModel *SchedModel; MachineRegisterInfo *MRI; MachineDominatorTree *DomTree; MachineLoopInfo *Loops; + MachineTraceMetrics *Traces; + MachineTraceMetrics::Ensemble *MinInstr; SSAIfConv IfConv; public: @@ -527,6 +599,8 @@ private: bool tryConvertIf(MachineBasicBlock*); void updateDomTree(ArrayRef<MachineBasicBlock*> Removed); void updateLoops(ArrayRef<MachineBasicBlock*> Removed); + void invalidateTraces(); + bool shouldConvertIf(); }; } // end anonymous namespace @@ -537,6 +611,7 @@ INITIALIZE_PASS_BEGIN(EarlyIfConverter, "early-ifcvt", "Early If Converter", false, false) INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfo) INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree) +INITIALIZE_PASS_DEPENDENCY(MachineTraceMetrics) INITIALIZE_PASS_END(EarlyIfConverter, "early-ifcvt", "Early If Converter", false, false) @@ -546,6 +621,8 @@ void EarlyIfConverter::getAnalysisUsage(AnalysisUsage &AU) const { AU.addPreserved<MachineDominatorTree>(); AU.addRequired<MachineLoopInfo>(); AU.addPreserved<MachineLoopInfo>(); + AU.addRequired<MachineTraceMetrics>(); + AU.addPreserved<MachineTraceMetrics>(); MachineFunctionPass::getAnalysisUsage(AU); } @@ -576,12 +653,117 @@ void EarlyIfConverter::updateLoops(ArrayRef<MachineBasicBlock*> Removed) { Loops->removeBlock(Removed[i]); } +/// Invalidate MachineTraceMetrics before if-conversion. +void EarlyIfConverter::invalidateTraces() { + Traces->verifyAnalysis(); + Traces->invalidate(IfConv.Head); + Traces->invalidate(IfConv.Tail); + Traces->invalidate(IfConv.TBB); + Traces->invalidate(IfConv.FBB); + Traces->verifyAnalysis(); +} + +// Adjust cycles with downward saturation. +static unsigned adjCycles(unsigned Cyc, int Delta) { + if (Delta < 0 && Cyc + Delta > Cyc) + return 0; + return Cyc + Delta; +} + +/// Apply cost model and heuristics to the if-conversion in IfConv. +/// Return true if the conversion is a good idea. +/// +bool EarlyIfConverter::shouldConvertIf() { + // Stress testing mode disables all cost considerations. + if (Stress) + return true; + + if (!MinInstr) + MinInstr = Traces->getEnsemble(MachineTraceMetrics::TS_MinInstrCount); + + MachineTraceMetrics::Trace TBBTrace = MinInstr->getTrace(IfConv.getTPred()); + MachineTraceMetrics::Trace FBBTrace = MinInstr->getTrace(IfConv.getFPred()); + DEBUG(dbgs() << "TBB: " << TBBTrace << "FBB: " << FBBTrace); + unsigned MinCrit = std::min(TBBTrace.getCriticalPath(), + FBBTrace.getCriticalPath()); + + // Set a somewhat arbitrary limit on the critical path extension we accept. + unsigned CritLimit = SchedModel->MispredictPenalty/2; + + // If-conversion only makes sense when there is unexploited ILP. Compute the + // maximum-ILP resource length of the trace after if-conversion. Compare it + // to the shortest critical path. + SmallVector<const MachineBasicBlock*, 1> ExtraBlocks; + if (IfConv.TBB != IfConv.Tail) + ExtraBlocks.push_back(IfConv.TBB); + unsigned ResLength = FBBTrace.getResourceLength(ExtraBlocks); + DEBUG(dbgs() << "Resource length " << ResLength + << ", minimal critical path " << MinCrit << '\n'); + if (ResLength > MinCrit + CritLimit) { + DEBUG(dbgs() << "Not enough available ILP.\n"); + return false; + } + + // Assume that the depth of the first head terminator will also be the depth + // of the select instruction inserted, as determined by the flag dependency. + // TBB / FBB data dependencies may delay the select even more. + MachineTraceMetrics::Trace HeadTrace = MinInstr->getTrace(IfConv.Head); + unsigned BranchDepth = + HeadTrace.getInstrCycles(IfConv.Head->getFirstTerminator()).Depth; + DEBUG(dbgs() << "Branch depth: " << BranchDepth << '\n'); + + // Look at all the tail phis, and compute the critical path extension caused + // by inserting select instructions. + MachineTraceMetrics::Trace TailTrace = MinInstr->getTrace(IfConv.Tail); + for (unsigned i = 0, e = IfConv.PHIs.size(); i != e; ++i) { + SSAIfConv::PHIInfo &PI = IfConv.PHIs[i]; + unsigned Slack = TailTrace.getInstrSlack(PI.PHI); + unsigned MaxDepth = Slack + TailTrace.getInstrCycles(PI.PHI).Depth; + DEBUG(dbgs() << "Slack " << Slack << ":\t" << *PI.PHI); + + // The condition is pulled into the critical path. + unsigned CondDepth = adjCycles(BranchDepth, PI.CondCycles); + if (CondDepth > MaxDepth) { + unsigned Extra = CondDepth - MaxDepth; + DEBUG(dbgs() << "Condition adds " << Extra << " cycles.\n"); + if (Extra > CritLimit) { + DEBUG(dbgs() << "Exceeds limit of " << CritLimit << '\n'); + return false; + } + } + + // The TBB value is pulled into the critical path. + unsigned TDepth = adjCycles(TBBTrace.getPHIDepth(PI.PHI), PI.TCycles); + if (TDepth > MaxDepth) { + unsigned Extra = TDepth - MaxDepth; + DEBUG(dbgs() << "TBB data adds " << Extra << " cycles.\n"); + if (Extra > CritLimit) { + DEBUG(dbgs() << "Exceeds limit of " << CritLimit << '\n'); + return false; + } + } + + // The FBB value is pulled into the critical path. + unsigned FDepth = adjCycles(FBBTrace.getPHIDepth(PI.PHI), PI.FCycles); + if (FDepth > MaxDepth) { + unsigned Extra = FDepth - MaxDepth; + DEBUG(dbgs() << "FBB data adds " << Extra << " cycles.\n"); + if (Extra > CritLimit) { + DEBUG(dbgs() << "Exceeds limit of " << CritLimit << '\n'); + return false; + } + } + } + return true; +} + /// Attempt repeated if-conversion on MBB, return true if successful. /// bool EarlyIfConverter::tryConvertIf(MachineBasicBlock *MBB) { bool Changed = false; - while (IfConv.canConvertIf(MBB)) { + while (IfConv.canConvertIf(MBB) && shouldConvertIf()) { // If-convert MBB and update analyses. + invalidateTraces(); SmallVector<MachineBasicBlock*, 4> RemovedBlocks; IfConv.convertIf(RemovedBlocks); Changed = true; @@ -597,9 +779,12 @@ bool EarlyIfConverter::runOnMachineFunction(MachineFunction &MF) { << ((Value*)MF.getFunction())->getName() << '\n'); TII = MF.getTarget().getInstrInfo(); TRI = MF.getTarget().getRegisterInfo(); + SchedModel = MF.getTarget().getInstrItineraryData()->SchedModel; MRI = &MF.getRegInfo(); DomTree = &getAnalysis<MachineDominatorTree>(); Loops = getAnalysisIfAvailable<MachineLoopInfo>(); + Traces = &getAnalysis<MachineTraceMetrics>(); + MinInstr = 0; bool Changed = false; IfConv.runOnMachineFunction(MF); diff --git a/lib/CodeGen/ExpandPostRAPseudos.cpp b/lib/CodeGen/ExpandPostRAPseudos.cpp index b14afc2..7a17331 100644 --- a/lib/CodeGen/ExpandPostRAPseudos.cpp +++ b/lib/CodeGen/ExpandPostRAPseudos.cpp @@ -131,13 +131,16 @@ bool ExpandPostRA::LowerSubregToReg(MachineInstr *MI) { } else { TII->copyPhysReg(*MBB, MI, MI->getDebugLoc(), DstSubReg, InsReg, MI->getOperand(2).isKill()); + + // Implicitly define DstReg for subsequent uses. + MachineBasicBlock::iterator CopyMI = MI; + --CopyMI; + CopyMI->addRegisterDefined(DstReg); + // Transfer the kill/dead flags, if needed. if (MI->getOperand(0).isDead()) TransferDeadFlag(MI, DstSubReg, TRI); - DEBUG({ - MachineBasicBlock::iterator dMI = MI; - dbgs() << "subreg: " << *(--dMI); - }); + DEBUG(dbgs() << "subreg: " << *CopyMI); } DEBUG(dbgs() << '\n'); diff --git a/lib/CodeGen/LiveInterval.cpp b/lib/CodeGen/LiveInterval.cpp index 01077db..0a795e6 100644 --- a/lib/CodeGen/LiveInterval.cpp +++ b/lib/CodeGen/LiveInterval.cpp @@ -160,7 +160,7 @@ void LiveInterval::markValNoForDeletion(VNInfo *ValNo) { valnos.pop_back(); } while (!valnos.empty() && valnos.back()->isUnused()); } else { - ValNo->setIsUnused(true); + ValNo->markUnused(); } } @@ -667,9 +667,6 @@ VNInfo* LiveInterval::MergeValueNumberInto(VNInfo *V1, VNInfo *V2) { } } - // Merge the relevant flags. - V2->mergeFlags(V1); - // Now that V1 is dead, remove it. markValNoForDeletion(V1); @@ -737,9 +734,7 @@ void LiveInterval::print(raw_ostream &OS) const { } else { OS << vni->def; if (vni->isPHIDef()) - OS << "-phidef"; - if (vni->hasPHIKill()) - OS << "-phikill"; + OS << "-phi"; } } } @@ -827,14 +822,11 @@ void ConnectedVNInfoEqClasses::Distribute(LiveInterval *LIV[], MachineOperand &MO = RI.getOperand(); MachineInstr *MI = MO.getParent(); ++RI; - if (MO.isUse() && MO.isUndef()) - continue; // DBG_VALUE instructions should have been eliminated earlier. - SlotIndex Idx = LIS.getInstructionIndex(MI); - Idx = Idx.getRegSlot(MO.isUse()); - const VNInfo *VNI = LI.getVNInfoAt(Idx); - // FIXME: We should be able to assert(VNI) here, but the coalescer leaves - // dangling defs around. + LiveRangeQuery LRQ(LI, LIS.getInstructionIndex(MI)); + const VNInfo *VNI = MO.readsReg() ? LRQ.valueIn() : LRQ.valueDefined(); + // In the case of an <undef> use that isn't tied to any def, VNI will be + // NULL. If the use is tied to a def, VNI will be the defined value. if (!VNI) continue; MO.setReg(LIV[getEqClass(VNI)]->reg); diff --git a/lib/CodeGen/LiveIntervalAnalysis.cpp b/lib/CodeGen/LiveIntervalAnalysis.cpp index 819707f..d0f8ae1 100644 --- a/lib/CodeGen/LiveIntervalAnalysis.cpp +++ b/lib/CodeGen/LiveIntervalAnalysis.cpp @@ -27,6 +27,7 @@ #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetMachine.h" +#include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" @@ -38,7 +39,13 @@ #include <cmath> using namespace llvm; +// Switch to the new experimental algorithm for computing live intervals. +static cl::opt<bool> +NewLiveIntervals("new-live-intervals", cl::Hidden, + cl::desc("Use new algorithm forcomputing live intervals")); + char LiveIntervals::ID = 0; +char &llvm::LiveIntervalsID = LiveIntervals::ID; INITIALIZE_PASS_BEGIN(LiveIntervals, "liveintervals", "Live Interval Analysis", false, false) INITIALIZE_AG_DEPENDENCY(AliasAnalysis) @@ -105,7 +112,19 @@ bool LiveIntervals::runOnMachineFunction(MachineFunction &fn) { AllocatableRegs = TRI->getAllocatableSet(fn); ReservedRegs = TRI->getReservedRegs(fn); - computeIntervals(); + // Allocate space for all virtual registers. + VirtRegIntervals.resize(MRI->getNumVirtRegs()); + + if (NewLiveIntervals) { + // This is the new way of computing live intervals. + // It is independent of LiveVariables, and it can run at any time. + computeVirtRegs(); + computeRegMasks(); + } else { + // This is the old way of computing live intervals. + // It depends on LiveVariables. + computeIntervals(); + } computeLiveInRegUnits(); DEBUG(dump()); @@ -238,7 +257,6 @@ void LiveIntervals::handleVirtualRegisterDef(MachineBasicBlock *mbb, // new valno in the killing blocks. assert(vi.AliveBlocks.empty() && "Phi join can't pass through blocks"); DEBUG(dbgs() << " phi-join"); - ValNo->setHasPHIKill(true); } else { // Iterate over all of the blocks that the variable is completely // live in, adding [insrtIndex(begin), instrIndex(end)+4) to the @@ -266,7 +284,6 @@ void LiveIntervals::handleVirtualRegisterDef(MachineBasicBlock *mbb, assert(getInstructionFromIndex(Start) == 0 && "PHI def index points at actual instruction."); ValNo = interval.getNextValue(Start, VNInfoAllocator); - ValNo->setIsPHIDef(true); } LiveRange LR(Start, killIdx, ValNo); interval.addRange(LR); @@ -340,7 +357,6 @@ void LiveIntervals::handleVirtualRegisterDef(MachineBasicBlock *mbb, SlotIndex killIndex = getMBBEndIdx(mbb); LiveRange LR(defIndex, killIndex, ValNo); interval.addRange(LR); - ValNo->setHasPHIKill(true); DEBUG(dbgs() << " phi-join +" << LR); } else { llvm_unreachable("Multiply defined register"); @@ -442,6 +458,49 @@ LiveInterval* LiveIntervals::createInterval(unsigned reg) { } +/// computeVirtRegInterval - Compute the live interval of a virtual register, +/// based on defs and uses. +void LiveIntervals::computeVirtRegInterval(LiveInterval *LI) { + assert(LRCalc && "LRCalc not initialized."); + assert(LI->empty() && "Should only compute empty intervals."); + LRCalc->reset(MF, getSlotIndexes(), DomTree, &getVNInfoAllocator()); + LRCalc->createDeadDefs(LI); + LRCalc->extendToUses(LI); +} + +void LiveIntervals::computeVirtRegs() { + for (unsigned i = 0, e = MRI->getNumVirtRegs(); i != e; ++i) { + unsigned Reg = TargetRegisterInfo::index2VirtReg(i); + if (MRI->reg_nodbg_empty(Reg)) + continue; + LiveInterval *LI = createInterval(Reg); + VirtRegIntervals[Reg] = LI; + computeVirtRegInterval(LI); + } +} + +void LiveIntervals::computeRegMasks() { + RegMaskBlocks.resize(MF->getNumBlockIDs()); + + // Find all instructions with regmask operands. + for (MachineFunction::iterator MBBI = MF->begin(), E = MF->end(); + MBBI != E; ++MBBI) { + MachineBasicBlock *MBB = MBBI; + std::pair<unsigned, unsigned> &RMB = RegMaskBlocks[MBB->getNumber()]; + RMB.first = RegMaskSlots.size(); + for (MachineBasicBlock::iterator MI = MBB->begin(), ME = MBB->end(); + MI != ME; ++MI) + for (MIOperands MO(MI); MO.isValid(); ++MO) { + if (!MO->isRegMask()) + continue; + RegMaskSlots.push_back(Indexes->getInstructionIndex(MI).getRegSlot()); + RegMaskBits.push_back(MO->getRegMask()); + } + // Compute the number of register mask instructions in this block. + RMB.second = RegMaskSlots.size() - RMB.first;; + } +} + //===----------------------------------------------------------------------===// // Register Unit Liveness //===----------------------------------------------------------------------===// @@ -648,7 +707,7 @@ bool LiveIntervals::shrinkToUses(LiveInterval *li, continue; if (VNI->isPHIDef()) { // This is a dead PHI. Remove it. - VNI->setIsUnused(true); + VNI->markUnused(); NewLI.removeRange(*LII); DEBUG(dbgs() << "Dead PHI at " << VNI->def << " may separate interval\n"); CanSeparate = true; @@ -720,6 +779,25 @@ LiveIntervals::intervalIsInOneMBB(const LiveInterval &LI) const { return MBB1 == MBB2 ? MBB1 : NULL; } +bool +LiveIntervals::hasPHIKill(const LiveInterval &LI, const VNInfo *VNI) const { + for (LiveInterval::const_vni_iterator I = LI.vni_begin(), E = LI.vni_end(); + I != E; ++I) { + const VNInfo *PHI = *I; + if (PHI->isUnused() || !PHI->isPHIDef()) + continue; + const MachineBasicBlock *PHIMBB = getMBBFromIndex(PHI->def); + // Conservatively return true instead of scanning huge predecessor lists. + if (PHIMBB->pred_size() > 100) + return true; + for (MachineBasicBlock::const_pred_iterator + PI = PHIMBB->pred_begin(), PE = PHIMBB->pred_end(); PI != PE; ++PI) + if (VNI == LI.getVNInfoBefore(Indexes->getMBBEndIdx(*PI))) + return true; + } + return false; +} + float LiveIntervals::getSpillWeight(bool isDef, bool isUse, unsigned loopDepth) { // Limit the loop depth ridiculousness. @@ -744,7 +822,6 @@ LiveRange LiveIntervals::addLiveRangeToEndOfBlock(unsigned reg, VNInfo* VN = Interval.getNextValue( SlotIndex(getInstructionIndex(startInst).getRegSlot()), getVNInfoAllocator()); - VN->setHasPHIKill(true); LiveRange LR( SlotIndex(getInstructionIndex(startInst).getRegSlot()), getMBBEndIdx(startInst->getParent()), VN); diff --git a/lib/CodeGen/LiveRangeCalc.cpp b/lib/CodeGen/LiveRangeCalc.cpp index 9384075..d828f25 100644 --- a/lib/CodeGen/LiveRangeCalc.cpp +++ b/lib/CodeGen/LiveRangeCalc.cpp @@ -54,8 +54,7 @@ void LiveRangeCalc::createDeadDefs(LiveInterval *LI, unsigned Reg) { .getRegSlot(I.getOperand().isEarlyClobber()); // Create the def in LI. This may find an existing def. - VNInfo *VNI = LI->createDeadDef(Idx, *Alloc); - VNI->setIsPHIDef(MI->isPHI()); + LI->createDeadDef(Idx, *Alloc); } } @@ -320,7 +319,6 @@ void LiveRangeCalc::updateSSA() { SlotIndex Start, End; tie(Start, End) = Indexes->getMBBRange(MBB); VNInfo *VNI = I->LI->getNextValue(Start, *Alloc); - VNI->setIsPHIDef(true); I->Value = VNI; // This block is done, we know the final value. I->DomNode = 0; diff --git a/lib/CodeGen/LiveRangeEdit.cpp b/lib/CodeGen/LiveRangeEdit.cpp index 896fdbf..b4ce9aa 100644 --- a/lib/CodeGen/LiveRangeEdit.cpp +++ b/lib/CodeGen/LiveRangeEdit.cpp @@ -239,6 +239,7 @@ void LiveRangeEdit::eliminateDeadDefs(SmallVectorImpl<MachineInstr*> &Dead, // Collect virtual registers to be erased after MI is gone. SmallVector<unsigned, 8> RegsToErase; + bool ReadsPhysRegs = false; // Check for live intervals that may shrink for (MachineInstr::mop_iterator MOI = MI->operands_begin(), @@ -246,8 +247,12 @@ void LiveRangeEdit::eliminateDeadDefs(SmallVectorImpl<MachineInstr*> &Dead, if (!MOI->isReg()) continue; unsigned Reg = MOI->getReg(); - if (!TargetRegisterInfo::isVirtualRegister(Reg)) + if (!TargetRegisterInfo::isVirtualRegister(Reg)) { + // Check if MI reads any unreserved physregs. + if (Reg && MOI->readsReg() && !LIS.isReserved(Reg)) + ReadsPhysRegs = true; continue; + } LiveInterval &LI = LIS.getInterval(Reg); // Shrink read registers, unless it is likely to be expensive and @@ -271,11 +276,30 @@ void LiveRangeEdit::eliminateDeadDefs(SmallVectorImpl<MachineInstr*> &Dead, } } - if (TheDelegate) - TheDelegate->LRE_WillEraseInstruction(MI); - LIS.RemoveMachineInstrFromMaps(MI); - MI->eraseFromParent(); - ++NumDCEDeleted; + // Currently, we don't support DCE of physreg live ranges. If MI reads + // any unreserved physregs, don't erase the instruction, but turn it into + // a KILL instead. This way, the physreg live ranges don't end up + // dangling. + // FIXME: It would be better to have something like shrinkToUses() for + // physregs. That could potentially enable more DCE and it would free up + // the physreg. It would not happen often, though. + if (ReadsPhysRegs) { + MI->setDesc(TII.get(TargetOpcode::KILL)); + // Remove all operands that aren't physregs. + for (unsigned i = MI->getNumOperands(); i; --i) { + const MachineOperand &MO = MI->getOperand(i-1); + if (MO.isReg() && TargetRegisterInfo::isPhysicalRegister(MO.getReg())) + continue; + MI->RemoveOperand(i-1); + } + DEBUG(dbgs() << "Converted physregs to:\t" << *MI); + } else { + if (TheDelegate) + TheDelegate->LRE_WillEraseInstruction(MI); + LIS.RemoveMachineInstrFromMaps(MI); + MI->eraseFromParent(); + ++NumDCEDeleted; + } // Erase any virtregs that are now empty and unused. There may be <undef> // uses around. Keep the empty live range in that case. diff --git a/lib/CodeGen/MachineBasicBlock.cpp b/lib/CodeGen/MachineBasicBlock.cpp index ecc1e95..cf13dbd 100644 --- a/lib/CodeGen/MachineBasicBlock.cpp +++ b/lib/CodeGen/MachineBasicBlock.cpp @@ -109,7 +109,8 @@ void ilist_traits<MachineInstr>::removeNodeFromList(MachineInstr *N) { assert(N->getParent() != 0 && "machine instruction not in a basic block"); // Remove from the use/def lists. - N->RemoveRegOperandsFromUseLists(); + if (MachineFunction *MF = N->getParent()->getParent()) + N->RemoveRegOperandsFromUseLists(MF->getRegInfo()); N->setParent(0); @@ -310,8 +311,11 @@ void MachineBasicBlock::print(raw_ostream &OS, SlotIndexes *Indexes) const { if (!succ_empty()) { if (Indexes) OS << '\t'; OS << " Successors according to CFG:"; - for (const_succ_iterator SI = succ_begin(), E = succ_end(); SI != E; ++SI) + for (const_succ_iterator SI = succ_begin(), E = succ_end(); SI != E; ++SI) { OS << " BB#" << (*SI)->getNumber(); + if (!Weights.empty()) + OS << '(' << *getWeightIterator(SI) << ')'; + } OS << '\n'; } } @@ -477,18 +481,42 @@ MachineBasicBlock::removeSuccessor(succ_iterator I) { void MachineBasicBlock::replaceSuccessor(MachineBasicBlock *Old, MachineBasicBlock *New) { - uint32_t weight = 0; - succ_iterator SI = std::find(Successors.begin(), Successors.end(), Old); + if (Old == New) + return; - // If Weight list is empty it means we don't use it (disabled optimization). - if (!Weights.empty()) { - weight_iterator WI = getWeightIterator(SI); - weight = *WI; + succ_iterator E = succ_end(); + succ_iterator NewI = E; + succ_iterator OldI = E; + for (succ_iterator I = succ_begin(); I != E; ++I) { + if (*I == Old) { + OldI = I; + if (NewI != E) + break; + } + if (*I == New) { + NewI = I; + if (OldI != E) + break; + } } + assert(OldI != E && "Old is not a successor of this block"); + Old->removePredecessor(this); - // Update the successor information. - removeSuccessor(SI); - addSuccessor(New, weight); + // If New isn't already a successor, let it take Old's place. + if (NewI == E) { + New->addPredecessor(this); + *OldI = New; + return; + } + + // New is already a successor. + // Update its weight instead of adding a duplicate edge. + if (!Weights.empty()) { + weight_iterator OldWI = getWeightIterator(OldI); + *getWeightIterator(NewI) += *OldWI; + Weights.erase(OldWI); + } + Successors.erase(OldI); } void MachineBasicBlock::addPredecessor(MachineBasicBlock *pred) { @@ -507,14 +535,13 @@ void MachineBasicBlock::transferSuccessors(MachineBasicBlock *fromMBB) { while (!fromMBB->succ_empty()) { MachineBasicBlock *Succ = *fromMBB->succ_begin(); - uint32_t weight = 0; - + uint32_t Weight = 0; // If Weight list is empty it means we don't use it (disabled optimization). if (!fromMBB->Weights.empty()) - weight = *fromMBB->Weights.begin(); + Weight = *fromMBB->Weights.begin(); - addSuccessor(Succ, weight); + addSuccessor(Succ, Weight); fromMBB->removeSuccessor(Succ); } } @@ -526,7 +553,10 @@ MachineBasicBlock::transferSuccessorsAndUpdatePHIs(MachineBasicBlock *fromMBB) { while (!fromMBB->succ_empty()) { MachineBasicBlock *Succ = *fromMBB->succ_begin(); - addSuccessor(Succ); + uint32_t Weight = 0; + if (!fromMBB->Weights.empty()) + Weight = *fromMBB->Weights.begin(); + addSuccessor(Succ, Weight); fromMBB->removeSuccessor(Succ); // Fix up any PHI nodes in the successor. @@ -540,9 +570,12 @@ MachineBasicBlock::transferSuccessorsAndUpdatePHIs(MachineBasicBlock *fromMBB) { } } +bool MachineBasicBlock::isPredecessor(const MachineBasicBlock *MBB) const { + return std::find(pred_begin(), pred_end(), MBB) != pred_end(); +} + bool MachineBasicBlock::isSuccessor(const MachineBasicBlock *MBB) const { - const_succ_iterator I = std::find(Successors.begin(), Successors.end(), MBB); - return I != Successors.end(); + return std::find(succ_begin(), succ_end(), MBB) != succ_end(); } bool MachineBasicBlock::isLayoutSuccessor(const MachineBasicBlock *MBB) const { @@ -909,12 +942,11 @@ MachineBasicBlock::findDebugLoc(instr_iterator MBBI) { /// getSuccWeight - Return weight of the edge from this block to MBB. /// -uint32_t MachineBasicBlock::getSuccWeight(const MachineBasicBlock *succ) const { +uint32_t MachineBasicBlock::getSuccWeight(const_succ_iterator Succ) const { if (Weights.empty()) return 0; - const_succ_iterator I = std::find(Successors.begin(), Successors.end(), succ); - return *getWeightIterator(I); + return *getWeightIterator(Succ); } /// getWeightIterator - Return wight iterator corresonding to the I successor diff --git a/lib/CodeGen/MachineBlockPlacement.cpp b/lib/CodeGen/MachineBlockPlacement.cpp index 5a15f92..c4dca2c 100644 --- a/lib/CodeGen/MachineBlockPlacement.cpp +++ b/lib/CodeGen/MachineBlockPlacement.cpp @@ -985,8 +985,22 @@ void MachineBlockPlacement::buildCFGChains(MachineFunction &F) { // boiler plate. Cond.clear(); MachineBasicBlock *TBB = 0, *FBB = 0; // For AnalyzeBranch. - if (!TII->AnalyzeBranch(*PrevBB, TBB, FBB, Cond)) + if (!TII->AnalyzeBranch(*PrevBB, TBB, FBB, Cond)) { + // If PrevBB has a two-way branch, try to re-order the branches + // such that we branch to the successor with higher weight first. + if (TBB && !Cond.empty() && FBB && + MBPI->getEdgeWeight(PrevBB, FBB) > MBPI->getEdgeWeight(PrevBB, TBB) && + !TII->ReverseBranchCondition(Cond)) { + DEBUG(dbgs() << "Reverse order of the two branches: " + << getBlockName(PrevBB) << "\n"); + DEBUG(dbgs() << " Edge weight: " << MBPI->getEdgeWeight(PrevBB, FBB) + << " vs " << MBPI->getEdgeWeight(PrevBB, TBB) << "\n"); + DebugLoc dl; // FIXME: this is nowhere + TII->RemoveBranch(*PrevBB); + TII->InsertBranch(*PrevBB, FBB, TBB, Cond, dl); + } PrevBB->updateTerminator(); + } } // Fixup the last block. @@ -997,29 +1011,63 @@ void MachineBlockPlacement::buildCFGChains(MachineFunction &F) { // Walk through the backedges of the function now that we have fully laid out // the basic blocks and align the destination of each backedge. We don't rely - // on the loop info here so that we can align backedges in unnatural CFGs and - // backedges that were introduced purely because of the loop rotations done - // during this layout pass. - // FIXME: This isn't quite right, we shouldn't align backedges that result - // from blocks being sunken below the exit block for the function. + // exclusively on the loop info here so that we can align backedges in + // unnatural CFGs and backedges that were introduced purely because of the + // loop rotations done during this layout pass. if (F.getFunction()->hasFnAttr(Attribute::OptimizeForSize)) return; unsigned Align = TLI->getPrefLoopAlignment(); if (!Align) return; // Don't care about loop alignment. + if (FunctionChain.begin() == FunctionChain.end()) + return; // Empty chain. - SmallPtrSet<MachineBasicBlock *, 16> PreviousBlocks; - for (BlockChain::iterator BI = FunctionChain.begin(), + const BranchProbability ColdProb(1, 5); // 20% + BlockFrequency EntryFreq = MBFI->getBlockFreq(F.begin()); + BlockFrequency WeightedEntryFreq = EntryFreq * ColdProb; + for (BlockChain::iterator BI = llvm::next(FunctionChain.begin()), BE = FunctionChain.end(); BI != BE; ++BI) { - PreviousBlocks.insert(*BI); - // Set alignment on the destination of all the back edges in the new - // ordering. - for (MachineBasicBlock::succ_iterator SI = (*BI)->succ_begin(), - SE = (*BI)->succ_end(); - SI != SE; ++SI) - if (PreviousBlocks.count(*SI)) - (*SI)->setAlignment(Align); + // Don't align non-looping basic blocks. These are unlikely to execute + // enough times to matter in practice. Note that we'll still handle + // unnatural CFGs inside of a natural outer loop (the common case) and + // rotated loops. + MachineLoop *L = MLI->getLoopFor(*BI); + if (!L) + continue; + + // If the block is cold relative to the function entry don't waste space + // aligning it. + BlockFrequency Freq = MBFI->getBlockFreq(*BI); + if (Freq < WeightedEntryFreq) + continue; + + // If the block is cold relative to its loop header, don't align it + // regardless of what edges into the block exist. + MachineBasicBlock *LoopHeader = L->getHeader(); + BlockFrequency LoopHeaderFreq = MBFI->getBlockFreq(LoopHeader); + if (Freq < (LoopHeaderFreq * ColdProb)) + continue; + + // Check for the existence of a non-layout predecessor which would benefit + // from aligning this block. + MachineBasicBlock *LayoutPred = *llvm::prior(BI); + + // Force alignment if all the predecessors are jumps. We already checked + // that the block isn't cold above. + if (!LayoutPred->isSuccessor(*BI)) { + (*BI)->setAlignment(Align); + continue; + } + + // Align this block if the layout predecessor's edge into this block is + // cold relative to the block. When this is true, othe predecessors make up + // all of the hot entries into the block and thus alignment is likely to be + // important. + BranchProbability LayoutProb = MBPI->getEdgeProbability(LayoutPred, *BI); + BlockFrequency LayoutEdgeFreq = MBFI->getBlockFreq(LayoutPred) * LayoutProb; + if (LayoutEdgeFreq <= (Freq * ColdProb)) + (*BI)->setAlignment(Align); } } diff --git a/lib/CodeGen/MachineBranchProbabilityInfo.cpp b/lib/CodeGen/MachineBranchProbabilityInfo.cpp index 0cc1af0..4479211 100644 --- a/lib/CodeGen/MachineBranchProbabilityInfo.cpp +++ b/lib/CodeGen/MachineBranchProbabilityInfo.cpp @@ -38,7 +38,7 @@ getSumForBlock(const MachineBasicBlock *MBB, uint32_t &Scale) const { Scale = 1; for (MachineBasicBlock::const_succ_iterator I = MBB->succ_begin(), E = MBB->succ_end(); I != E; ++I) { - uint32_t Weight = getEdgeWeight(MBB, *I); + uint32_t Weight = getEdgeWeight(MBB, I); Sum += Weight; } @@ -53,22 +53,30 @@ getSumForBlock(const MachineBasicBlock *MBB, uint32_t &Scale) const { Sum = 0; for (MachineBasicBlock::const_succ_iterator I = MBB->succ_begin(), E = MBB->succ_end(); I != E; ++I) { - uint32_t Weight = getEdgeWeight(MBB, *I); + uint32_t Weight = getEdgeWeight(MBB, I); Sum += Weight / Scale; } assert(Sum <= UINT32_MAX); return Sum; } -uint32_t -MachineBranchProbabilityInfo::getEdgeWeight(const MachineBasicBlock *Src, - const MachineBasicBlock *Dst) const { +uint32_t MachineBranchProbabilityInfo:: +getEdgeWeight(const MachineBasicBlock *Src, + MachineBasicBlock::const_succ_iterator Dst) const { uint32_t Weight = Src->getSuccWeight(Dst); if (!Weight) return DEFAULT_WEIGHT; return Weight; } +uint32_t MachineBranchProbabilityInfo:: +getEdgeWeight(const MachineBasicBlock *Src, + const MachineBasicBlock *Dst) const { + // This is a linear search. Try to use the const_succ_iterator version when + // possible. + return getEdgeWeight(Src, std::find(Src->succ_begin(), Src->succ_end(), Dst)); +} + bool MachineBranchProbabilityInfo::isEdgeHot(MachineBasicBlock *Src, MachineBasicBlock *Dst) const { // Hot probability is at least 4/5 = 80% @@ -82,7 +90,7 @@ MachineBranchProbabilityInfo::getHotSucc(MachineBasicBlock *MBB) const { MachineBasicBlock *MaxSucc = 0; for (MachineBasicBlock::const_succ_iterator I = MBB->succ_begin(), E = MBB->succ_end(); I != E; ++I) { - uint32_t Weight = getEdgeWeight(MBB, *I); + uint32_t Weight = getEdgeWeight(MBB, I); if (Weight > MaxWeight) { MaxWeight = Weight; MaxSucc = *I; diff --git a/lib/CodeGen/MachineCSE.cpp b/lib/CodeGen/MachineCSE.cpp index 9cfe9ab..896461f 100644 --- a/lib/CodeGen/MachineCSE.cpp +++ b/lib/CodeGen/MachineCSE.cpp @@ -215,8 +215,10 @@ bool MachineCSE::hasLivePhysRegDefUses(const MachineInstr *MI, if (MO.isDef() && (MO.isDead() || isPhysDefTriviallyDead(Reg, I, MBB->end()))) continue; - for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) - PhysRefs.insert(*AI); + // Reading constant physregs is ok. + if (!MRI->isConstantPhysReg(Reg, *MBB->getParent())) + for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) + PhysRefs.insert(*AI); if (MO.isDef()) PhysDefs.push_back(Reg); } @@ -324,6 +326,29 @@ bool MachineCSE::isProfitableToCSE(unsigned CSReg, unsigned Reg, MachineInstr *CSMI, MachineInstr *MI) { // FIXME: Heuristics that works around the lack the live range splitting. + // If CSReg is used at all uses of Reg, CSE should not increase register + // pressure of CSReg. + bool MayIncreasePressure = true; + if (TargetRegisterInfo::isVirtualRegister(CSReg) && + TargetRegisterInfo::isVirtualRegister(Reg)) { + MayIncreasePressure = false; + SmallPtrSet<MachineInstr*, 8> CSUses; + for (MachineRegisterInfo::use_nodbg_iterator I =MRI->use_nodbg_begin(CSReg), + E = MRI->use_nodbg_end(); I != E; ++I) { + MachineInstr *Use = &*I; + CSUses.insert(Use); + } + for (MachineRegisterInfo::use_nodbg_iterator I = MRI->use_nodbg_begin(Reg), + E = MRI->use_nodbg_end(); I != E; ++I) { + MachineInstr *Use = &*I; + if (!CSUses.count(Use)) { + MayIncreasePressure = true; + break; + } + } + } + if (!MayIncreasePressure) return true; + // Heuristics #1: Don't CSE "cheap" computation if the def is not local or in // an immediate predecessor. We don't want to increase register pressure and // end up causing other computation to be spilled. @@ -394,6 +419,7 @@ bool MachineCSE::ProcessBlock(MachineBasicBlock *MBB) { bool Changed = false; SmallVector<std::pair<unsigned, unsigned>, 8> CSEPairs; + SmallVector<unsigned, 2> ImplicitDefsToUpdate; for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); I != E; ) { MachineInstr *MI = &*I; ++I; @@ -463,15 +489,24 @@ bool MachineCSE::ProcessBlock(MachineBasicBlock *MBB) { // Check if it's profitable to perform this CSE. bool DoCSE = true; - unsigned NumDefs = MI->getDesc().getNumDefs(); + unsigned NumDefs = MI->getDesc().getNumDefs() + + MI->getDesc().getNumImplicitDefs(); + for (unsigned i = 0, e = MI->getNumOperands(); NumDefs && i != e; ++i) { MachineOperand &MO = MI->getOperand(i); if (!MO.isReg() || !MO.isDef()) continue; unsigned OldReg = MO.getReg(); unsigned NewReg = CSMI->getOperand(i).getReg(); - if (OldReg == NewReg) + + // Go through implicit defs of CSMI and MI, if a def is not dead at MI, + // we should make sure it is not dead at CSMI. + if (MO.isImplicit() && !MO.isDead() && CSMI->getOperand(i).isDead()) + ImplicitDefsToUpdate.push_back(i); + if (OldReg == NewReg) { + --NumDefs; continue; + } assert(TargetRegisterInfo::isVirtualRegister(OldReg) && TargetRegisterInfo::isVirtualRegister(NewReg) && @@ -503,6 +538,11 @@ bool MachineCSE::ProcessBlock(MachineBasicBlock *MBB) { MRI->clearKillFlags(CSEPairs[i].second); } + // Go through implicit defs of CSMI and MI, if a def is not dead at MI, + // we should make sure it is not dead at CSMI. + for (unsigned i = 0, e = ImplicitDefsToUpdate.size(); i != e; ++i) + CSMI->getOperand(ImplicitDefsToUpdate[i]).setIsDead(false); + if (CrossMBBPhysDef) { // Add physical register defs now coming in from a predecessor to MBB // livein list. @@ -526,6 +566,7 @@ bool MachineCSE::ProcessBlock(MachineBasicBlock *MBB) { Exps.push_back(MI); } CSEPairs.clear(); + ImplicitDefsToUpdate.clear(); } return Changed; diff --git a/lib/CodeGen/MachineInstr.cpp b/lib/CodeGen/MachineInstr.cpp index 8dada05..b166849 100644 --- a/lib/CodeGen/MachineInstr.cpp +++ b/lib/CodeGen/MachineInstr.cpp @@ -47,55 +47,6 @@ using namespace llvm; // MachineOperand Implementation //===----------------------------------------------------------------------===// -/// AddRegOperandToRegInfo - Add this register operand to the specified -/// MachineRegisterInfo. If it is null, then the next/prev fields should be -/// explicitly nulled out. -void MachineOperand::AddRegOperandToRegInfo(MachineRegisterInfo *RegInfo) { - assert(isReg() && "Can only add reg operand to use lists"); - - // If the reginfo pointer is null, just explicitly null out or next/prev - // pointers, to ensure they are not garbage. - if (RegInfo == 0) { - Contents.Reg.Prev = 0; - Contents.Reg.Next = 0; - return; - } - - // Otherwise, add this operand to the head of the registers use/def list. - MachineOperand **Head = &RegInfo->getRegUseDefListHead(getReg()); - - // For SSA values, we prefer to keep the definition at the start of the list. - // we do this by skipping over the definition if it is at the head of the - // list. - if (*Head && (*Head)->isDef()) - Head = &(*Head)->Contents.Reg.Next; - - Contents.Reg.Next = *Head; - if (Contents.Reg.Next) { - assert(getReg() == Contents.Reg.Next->getReg() && - "Different regs on the same list!"); - Contents.Reg.Next->Contents.Reg.Prev = &Contents.Reg.Next; - } - - Contents.Reg.Prev = Head; - *Head = this; -} - -/// RemoveRegOperandFromRegInfo - Remove this register operand from the -/// MachineRegisterInfo it is linked with. -void MachineOperand::RemoveRegOperandFromRegInfo() { - assert(isOnRegUseList() && "Reg operand is not on a use list"); - // Unlink this from the doubly linked list of operands. - MachineOperand *NextOp = Contents.Reg.Next; - *Contents.Reg.Prev = NextOp; - if (NextOp) { - assert(NextOp->getReg() == getReg() && "Corrupt reg use/def chain!"); - NextOp->Contents.Reg.Prev = Contents.Reg.Prev; - } - Contents.Reg.Prev = 0; - Contents.Reg.Next = 0; -} - void MachineOperand::setReg(unsigned Reg) { if (getReg() == Reg) return; // No change. @@ -105,9 +56,10 @@ void MachineOperand::setReg(unsigned Reg) { if (MachineInstr *MI = getParent()) if (MachineBasicBlock *MBB = MI->getParent()) if (MachineFunction *MF = MBB->getParent()) { - RemoveRegOperandFromRegInfo(); + MachineRegisterInfo &MRI = MF->getRegInfo(); + MRI.removeRegOperandFromUseList(this); SmallContents.RegNo = Reg; - AddRegOperandToRegInfo(&MF->getRegInfo()); + MRI.addRegOperandToUseList(this); return; } @@ -136,15 +88,36 @@ void MachineOperand::substPhysReg(unsigned Reg, const TargetRegisterInfo &TRI) { setReg(Reg); } +/// Change a def to a use, or a use to a def. +void MachineOperand::setIsDef(bool Val) { + assert(isReg() && "Wrong MachineOperand accessor"); + assert((!Val || !isDebug()) && "Marking a debug operation as def"); + if (IsDef == Val) + return; + // MRI may keep uses and defs in different list positions. + if (MachineInstr *MI = getParent()) + if (MachineBasicBlock *MBB = MI->getParent()) + if (MachineFunction *MF = MBB->getParent()) { + MachineRegisterInfo &MRI = MF->getRegInfo(); + MRI.removeRegOperandFromUseList(this); + IsDef = Val; + MRI.addRegOperandToUseList(this); + return; + } + IsDef = Val; +} + /// ChangeToImmediate - Replace this operand with a new immediate operand of /// the specified value. If an operand is known to be an immediate already, /// the setImm method should be used. void MachineOperand::ChangeToImmediate(int64_t ImmVal) { // If this operand is currently a register operand, and if this is in a // function, deregister the operand from the register's use/def list. - if (isReg() && getParent() && getParent()->getParent() && - getParent()->getParent()->getParent()) - RemoveRegOperandFromRegInfo(); + if (isReg() && isOnRegUseList()) + if (MachineInstr *MI = getParent()) + if (MachineBasicBlock *MBB = MI->getParent()) + if (MachineFunction *MF = MBB->getParent()) + MF->getRegInfo().removeRegOperandFromUseList(this); OpKind = MO_Immediate; Contents.ImmVal = ImmVal; @@ -156,24 +129,20 @@ void MachineOperand::ChangeToImmediate(int64_t ImmVal) { void MachineOperand::ChangeToRegister(unsigned Reg, bool isDef, bool isImp, bool isKill, bool isDead, bool isUndef, bool isDebug) { - // If this operand is already a register operand, use setReg to update the + MachineRegisterInfo *RegInfo = 0; + if (MachineInstr *MI = getParent()) + if (MachineBasicBlock *MBB = MI->getParent()) + if (MachineFunction *MF = MBB->getParent()) + RegInfo = &MF->getRegInfo(); + // If this operand is already a register operand, remove it from the // register's use/def lists. - if (isReg()) { - assert(!isEarlyClobber()); - setReg(Reg); - } else { - // Otherwise, change this to a register and set the reg#. - OpKind = MO_Register; - SmallContents.RegNo = Reg; - - // If this operand is embedded in a function, add the operand to the - // register's use/def list. - if (MachineInstr *MI = getParent()) - if (MachineBasicBlock *MBB = MI->getParent()) - if (MachineFunction *MF = MBB->getParent()) - AddRegOperandToRegInfo(&MF->getRegInfo()); - } + if (RegInfo && isReg()) + RegInfo->removeRegOperandFromUseList(this); + // Change this to a register and set the reg#. + OpKind = MO_Register; + SmallContents.RegNo = Reg; + SubReg = 0; IsDef = isDef; IsImp = isImp; IsKill = isKill; @@ -182,7 +151,13 @@ void MachineOperand::ChangeToRegister(unsigned Reg, bool isDef, bool isImp, IsInternalRead = false; IsEarlyClobber = false; IsDebug = isDebug; - SubReg = 0; + // Ensure isOnRegUseList() returns false. + Contents.Reg.Prev = 0; + + // If this operand is embedded in a function, add the operand to the + // register's use/def list. + if (RegInfo) + RegInfo->addRegOperandToUseList(this); } /// isIdenticalTo - Return true if this operand is identical to the specified @@ -208,6 +183,7 @@ bool MachineOperand::isIdenticalTo(const MachineOperand &Other) const { case MachineOperand::MO_FrameIndex: return getIndex() == Other.getIndex(); case MachineOperand::MO_ConstantPoolIndex: + case MachineOperand::MO_TargetIndex: return getIndex() == Other.getIndex() && getOffset() == Other.getOffset(); case MachineOperand::MO_JumpTableIndex: return getIndex() == Other.getIndex(); @@ -245,6 +221,7 @@ hash_code llvm::hash_value(const MachineOperand &MO) { case MachineOperand::MO_FrameIndex: return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getIndex()); case MachineOperand::MO_ConstantPoolIndex: + case MachineOperand::MO_TargetIndex: return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getIndex(), MO.getOffset()); case MachineOperand::MO_JumpTableIndex: @@ -353,6 +330,11 @@ void MachineOperand::print(raw_ostream &OS, const TargetMachine *TM) const { if (getOffset()) OS << "+" << getOffset(); OS << '>'; break; + case MachineOperand::MO_TargetIndex: + OS << "<ti#" << getIndex(); + if (getOffset()) OS << "+" << getOffset(); + OS << '>'; + break; case MachineOperand::MO_JumpTableIndex: OS << "<jt#" << getIndex() << '>'; break; @@ -650,24 +632,21 @@ MachineRegisterInfo *MachineInstr::getRegInfo() { /// RemoveRegOperandsFromUseLists - Unlink all of the register operands in /// this instruction from their respective use lists. This requires that the /// operands already be on their use lists. -void MachineInstr::RemoveRegOperandsFromUseLists() { - for (unsigned i = 0, e = Operands.size(); i != e; ++i) { +void MachineInstr::RemoveRegOperandsFromUseLists(MachineRegisterInfo &MRI) { + for (unsigned i = 0, e = Operands.size(); i != e; ++i) if (Operands[i].isReg()) - Operands[i].RemoveRegOperandFromRegInfo(); - } + MRI.removeRegOperandFromUseList(&Operands[i]); } /// AddRegOperandsToUseLists - Add all of the register operands in /// this instruction from their respective use lists. This requires that the /// operands not be on their use lists yet. -void MachineInstr::AddRegOperandsToUseLists(MachineRegisterInfo &RegInfo) { - for (unsigned i = 0, e = Operands.size(); i != e; ++i) { +void MachineInstr::AddRegOperandsToUseLists(MachineRegisterInfo &MRI) { + for (unsigned i = 0, e = Operands.size(); i != e; ++i) if (Operands[i].isReg()) - Operands[i].AddRegOperandToRegInfo(&RegInfo); - } + MRI.addRegOperandToUseList(&Operands[i]); } - /// addOperand - Add the specified operand to the instruction. If it is an /// implicit operand, it is added to the end of the operand list. If it is /// an explicit operand it is added at the end of the explicit operand list @@ -695,7 +674,7 @@ void MachineInstr::addOperand(const MachineOperand &Op) { while (OpNo && Operands[OpNo-1].isReg() && Operands[OpNo-1].isImplicit()) { --OpNo; if (RegInfo) - Operands[OpNo].RemoveRegOperandFromRegInfo(); + RegInfo->removeRegOperandFromUseList(&Operands[OpNo]); } } @@ -712,7 +691,7 @@ void MachineInstr::addOperand(const MachineOperand &Op) { if (Reallocate) for (unsigned i = 0; i != OpNo; ++i) if (Operands[i].isReg()) - Operands[i].RemoveRegOperandFromRegInfo(); + RegInfo->removeRegOperandFromUseList(&Operands[i]); // Insert the new operand at OpNo. Operands.insert(Operands.begin() + OpNo, Op); @@ -723,13 +702,15 @@ void MachineInstr::addOperand(const MachineOperand &Op) { if (Reallocate) for (unsigned i = 0; i != OpNo; ++i) if (Operands[i].isReg()) - Operands[i].AddRegOperandToRegInfo(RegInfo); + RegInfo->addRegOperandToUseList(&Operands[i]); // When adding a register operand, tell RegInfo about it. if (Operands[OpNo].isReg()) { - // Add the new operand to RegInfo, even when RegInfo is NULL. - // This will initialize the linked list pointers. - Operands[OpNo].AddRegOperandToRegInfo(RegInfo); + // Ensure isOnRegUseList() returns false, regardless of Op's status. + Operands[OpNo].Contents.Reg.Prev = 0; + // Add the new operand to RegInfo. + if (RegInfo) + RegInfo->addRegOperandToUseList(&Operands[OpNo]); // If the register operand is flagged as early, mark the operand as such. if (MCID->getOperandConstraint(OpNo, MCOI::EARLY_CLOBBER) != -1) Operands[OpNo].setIsEarlyClobber(true); @@ -739,7 +720,7 @@ void MachineInstr::addOperand(const MachineOperand &Op) { if (RegInfo) { for (unsigned i = OpNo + 1, e = Operands.size(); i != e; ++i) { assert(Operands[i].isReg() && "Should only be an implicit reg!"); - Operands[i].AddRegOperandToRegInfo(RegInfo); + RegInfo->addRegOperandToUseList(&Operands[i]); } } } @@ -749,12 +730,13 @@ void MachineInstr::addOperand(const MachineOperand &Op) { /// void MachineInstr::RemoveOperand(unsigned OpNo) { assert(OpNo < Operands.size() && "Invalid operand number"); + MachineRegisterInfo *RegInfo = getRegInfo(); // Special case removing the last one. if (OpNo == Operands.size()-1) { // If needed, remove from the reg def/use list. - if (Operands.back().isReg() && Operands.back().isOnRegUseList()) - Operands.back().RemoveRegOperandFromRegInfo(); + if (RegInfo && Operands.back().isReg() && Operands.back().isOnRegUseList()) + RegInfo->removeRegOperandFromUseList(&Operands.back()); Operands.pop_back(); return; @@ -763,11 +745,10 @@ void MachineInstr::RemoveOperand(unsigned OpNo) { // Otherwise, we are removing an interior operand. If we have reginfo to // update, remove all operands that will be shifted down from their reg lists, // move everything down, then re-add them. - MachineRegisterInfo *RegInfo = getRegInfo(); if (RegInfo) { for (unsigned i = OpNo, e = Operands.size(); i != e; ++i) { if (Operands[i].isReg()) - Operands[i].RemoveRegOperandFromRegInfo(); + RegInfo->removeRegOperandFromUseList(&Operands[i]); } } @@ -776,7 +757,7 @@ void MachineInstr::RemoveOperand(unsigned OpNo) { if (RegInfo) { for (unsigned i = OpNo, e = Operands.size(); i != e; ++i) { if (Operands[i].isReg()) - Operands[i].AddRegOperandToRegInfo(RegInfo); + RegInfo->addRegOperandToUseList(&Operands[i]); } } } diff --git a/lib/CodeGen/MachineRegisterInfo.cpp b/lib/CodeGen/MachineRegisterInfo.cpp index 82e1235..5fb938f 100644 --- a/lib/CodeGen/MachineRegisterInfo.cpp +++ b/lib/CodeGen/MachineRegisterInfo.cpp @@ -102,17 +102,9 @@ MachineRegisterInfo::createVirtualRegister(const TargetRegisterClass *RegClass){ // New virtual register number. unsigned Reg = TargetRegisterInfo::index2VirtReg(getNumVirtRegs()); - - // Add a reg, but keep track of whether the vector reallocated or not. - const unsigned FirstVirtReg = TargetRegisterInfo::index2VirtReg(0); - void *ArrayBase = getNumVirtRegs() == 0 ? 0 : &VRegInfo[FirstVirtReg]; VRegInfo.grow(Reg); VRegInfo[Reg].first = RegClass; RegAllocHints.grow(Reg); - - if (ArrayBase && &VRegInfo[FirstVirtReg] != ArrayBase) - // The vector reallocated, handle this now. - HandleVRegListReallocation(); return Reg; } @@ -126,21 +118,68 @@ void MachineRegisterInfo::clearVirtRegs() { VRegInfo.clear(); } -/// HandleVRegListReallocation - We just added a virtual register to the -/// VRegInfo info list and it reallocated. Update the use/def lists info -/// pointers. -void MachineRegisterInfo::HandleVRegListReallocation() { - // The back pointers for the vreg lists point into the previous vector. - // Update them to point to their correct slots. - for (unsigned i = 0, e = getNumVirtRegs(); i != e; ++i) { - unsigned Reg = TargetRegisterInfo::index2VirtReg(i); - MachineOperand *List = VRegInfo[Reg].second; - if (!List) continue; - // Update the back-pointer to be accurate once more. - List->Contents.Reg.Prev = &VRegInfo[Reg].second; +/// Add MO to the linked list of operands for its register. +void MachineRegisterInfo::addRegOperandToUseList(MachineOperand *MO) { + assert(!MO->isOnRegUseList() && "Already on list"); + MachineOperand *&HeadRef = getRegUseDefListHead(MO->getReg()); + MachineOperand *const Head = HeadRef; + + // Head points to the first list element. + // Next is NULL on the last list element. + // Prev pointers are circular, so Head->Prev == Last. + + // Head is NULL for an empty list. + if (!Head) { + MO->Contents.Reg.Prev = MO; + MO->Contents.Reg.Next = 0; + HeadRef = MO; + return; + } + assert(MO->getReg() == Head->getReg() && "Different regs on the same list!"); + + // Insert MO between Last and Head in the circular Prev chain. + MachineOperand *Last = Head->Contents.Reg.Prev; + assert(Last && "Inconsistent use list"); + assert(MO->getReg() == Last->getReg() && "Different regs on the same list!"); + Head->Contents.Reg.Prev = MO; + MO->Contents.Reg.Prev = Last; + + // Def operands always precede uses. This allows def_iterator to stop early. + // Insert def operands at the front, and use operands at the back. + if (MO->isDef()) { + // Insert def at the front. + MO->Contents.Reg.Next = Head; + HeadRef = MO; + } else { + // Insert use at the end. + MO->Contents.Reg.Next = 0; + Last->Contents.Reg.Next = MO; } } +/// Remove MO from its use-def list. +void MachineRegisterInfo::removeRegOperandFromUseList(MachineOperand *MO) { + assert(MO->isOnRegUseList() && "Operand not on use list"); + MachineOperand *&HeadRef = getRegUseDefListHead(MO->getReg()); + MachineOperand *const Head = HeadRef; + assert(Head && "List already empty"); + + // Unlink this from the doubly linked list of operands. + MachineOperand *Next = MO->Contents.Reg.Next; + MachineOperand *Prev = MO->Contents.Reg.Prev; + + // Prev links are circular, next link is NULL instead of looping back to Head. + if (MO == Head) + HeadRef = Next; + else + Prev->Contents.Reg.Next = Next; + + (Next ? Next : Head)->Contents.Reg.Prev = Prev; + + MO->Contents.Reg.Prev = 0; + MO->Contents.Reg.Next = 0; +} + /// replaceRegWith - Replace all instances of FromReg with ToReg in the /// machine function. This is like llvm-level X->replaceAllUsesWith(Y), /// except that it also changes any definitions of the register as well. @@ -178,13 +217,6 @@ MachineInstr *MachineRegisterInfo::getUniqueVRegDef(unsigned Reg) const { return &*I; } -bool MachineRegisterInfo::hasOneUse(unsigned RegNo) const { - use_iterator UI = use_begin(RegNo); - if (UI == use_end()) - return false; - return ++UI == use_end(); -} - bool MachineRegisterInfo::hasOneNonDBGUse(unsigned RegNo) const { use_nodbg_iterator UI = use_nodbg_begin(RegNo); if (UI == use_nodbg_end()) diff --git a/lib/CodeGen/MachineSSAUpdater.cpp b/lib/CodeGen/MachineSSAUpdater.cpp index acb1ee6..076547a 100644 --- a/lib/CodeGen/MachineSSAUpdater.cpp +++ b/lib/CodeGen/MachineSSAUpdater.cpp @@ -42,7 +42,7 @@ MachineSSAUpdater::MachineSSAUpdater(MachineFunction &MF, } MachineSSAUpdater::~MachineSSAUpdater() { - delete &getAvailableVals(AV); + delete static_cast<AvailableValsTy*>(AV); } /// Initialize - Reset this object to get ready for a new set of SSA diff --git a/lib/CodeGen/MachineSink.cpp b/lib/CodeGen/MachineSink.cpp index 1ce546b..bc383cb 100644 --- a/lib/CodeGen/MachineSink.cpp +++ b/lib/CodeGen/MachineSink.cpp @@ -99,6 +99,16 @@ namespace { bool PerformTrivialForwardCoalescing(MachineInstr *MI, MachineBasicBlock *MBB); }; + + // SuccessorSorter - Sort Successors according to their loop depth. + struct SuccessorSorter { + SuccessorSorter(MachineLoopInfo *LoopInfo) : LI(LoopInfo) {} + bool operator()(const MachineBasicBlock *LHS, + const MachineBasicBlock *RHS) const { + return LI->getLoopDepth(LHS) < LI->getLoopDepth(RHS); + } + MachineLoopInfo *LI; + }; } // end anonymous namespace char MachineSinking::ID = 0; @@ -526,8 +536,11 @@ MachineBasicBlock *MachineSinking::FindSuccToSinkTo(MachineInstr *MI, // Otherwise, we should look at all the successors and decide which one // we should sink to. - for (MachineBasicBlock::succ_iterator SI = MBB->succ_begin(), - E = MBB->succ_end(); SI != E; ++SI) { + // We give successors with smaller loop depth higher priority. + SmallVector<MachineBasicBlock*, 4> Succs(MBB->succ_begin(), MBB->succ_end()); + std::stable_sort(Succs.begin(), Succs.end(), SuccessorSorter(LI)); + for (SmallVector<MachineBasicBlock*, 4>::iterator SI = Succs.begin(), + E = Succs.end(); SI != E; ++SI) { MachineBasicBlock *SuccBlock = *SI; bool LocalUse = false; if (AllUsesDominatedByBlock(Reg, SuccBlock, MBB, diff --git a/lib/CodeGen/MachineTraceMetrics.cpp b/lib/CodeGen/MachineTraceMetrics.cpp new file mode 100644 index 0000000..1a3aa60 --- /dev/null +++ b/lib/CodeGen/MachineTraceMetrics.cpp @@ -0,0 +1,1153 @@ +//===- lib/CodeGen/MachineTraceMetrics.cpp ----------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "machine-trace-metrics" +#include "MachineTraceMetrics.h" +#include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/MachineBranchProbabilityInfo.h" +#include "llvm/CodeGen/MachineLoopInfo.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/MC/MCInstrItineraries.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/ADT/PostOrderIterator.h" +#include "llvm/ADT/SparseSet.h" + +using namespace llvm; + +char MachineTraceMetrics::ID = 0; +char &llvm::MachineTraceMetricsID = MachineTraceMetrics::ID; + +INITIALIZE_PASS_BEGIN(MachineTraceMetrics, + "machine-trace-metrics", "Machine Trace Metrics", false, true) +INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfo) +INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo) +INITIALIZE_PASS_END(MachineTraceMetrics, + "machine-trace-metrics", "Machine Trace Metrics", false, true) + +MachineTraceMetrics::MachineTraceMetrics() + : MachineFunctionPass(ID), MF(0), TII(0), TRI(0), MRI(0), Loops(0) { + std::fill(Ensembles, array_endof(Ensembles), (Ensemble*)0); +} + +void MachineTraceMetrics::getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesAll(); + AU.addRequired<MachineBranchProbabilityInfo>(); + AU.addRequired<MachineLoopInfo>(); + MachineFunctionPass::getAnalysisUsage(AU); +} + +bool MachineTraceMetrics::runOnMachineFunction(MachineFunction &Func) { + MF = &Func; + TII = MF->getTarget().getInstrInfo(); + TRI = MF->getTarget().getRegisterInfo(); + ItinData = MF->getTarget().getInstrItineraryData(); + MRI = &MF->getRegInfo(); + Loops = &getAnalysis<MachineLoopInfo>(); + BlockInfo.resize(MF->getNumBlockIDs()); + return false; +} + +void MachineTraceMetrics::releaseMemory() { + MF = 0; + BlockInfo.clear(); + for (unsigned i = 0; i != TS_NumStrategies; ++i) { + delete Ensembles[i]; + Ensembles[i] = 0; + } +} + +//===----------------------------------------------------------------------===// +// Fixed block information +//===----------------------------------------------------------------------===// +// +// The number of instructions in a basic block and the CPU resources used by +// those instructions don't depend on any given trace strategy. + +/// Compute the resource usage in basic block MBB. +const MachineTraceMetrics::FixedBlockInfo* +MachineTraceMetrics::getResources(const MachineBasicBlock *MBB) { + assert(MBB && "No basic block"); + FixedBlockInfo *FBI = &BlockInfo[MBB->getNumber()]; + if (FBI->hasResources()) + return FBI; + + // Compute resource usage in the block. + // FIXME: Compute per-functional unit counts. + FBI->HasCalls = false; + unsigned InstrCount = 0; + for (MachineBasicBlock::const_iterator I = MBB->begin(), E = MBB->end(); + I != E; ++I) { + const MachineInstr *MI = I; + if (MI->isTransient()) + continue; + ++InstrCount; + if (MI->isCall()) + FBI->HasCalls = true; + } + FBI->InstrCount = InstrCount; + return FBI; +} + +//===----------------------------------------------------------------------===// +// Ensemble utility functions +//===----------------------------------------------------------------------===// + +MachineTraceMetrics::Ensemble::Ensemble(MachineTraceMetrics *ct) + : MTM(*ct) { + BlockInfo.resize(MTM.BlockInfo.size()); +} + +// Virtual destructor serves as an anchor. +MachineTraceMetrics::Ensemble::~Ensemble() {} + +const MachineLoop* +MachineTraceMetrics::Ensemble::getLoopFor(const MachineBasicBlock *MBB) const { + return MTM.Loops->getLoopFor(MBB); +} + +// Update resource-related information in the TraceBlockInfo for MBB. +// Only update resources related to the trace above MBB. +void MachineTraceMetrics::Ensemble:: +computeDepthResources(const MachineBasicBlock *MBB) { + TraceBlockInfo *TBI = &BlockInfo[MBB->getNumber()]; + + // Compute resources from trace above. The top block is simple. + if (!TBI->Pred) { + TBI->InstrDepth = 0; + TBI->Head = MBB->getNumber(); + return; + } + + // Compute from the block above. A post-order traversal ensures the + // predecessor is always computed first. + TraceBlockInfo *PredTBI = &BlockInfo[TBI->Pred->getNumber()]; + assert(PredTBI->hasValidDepth() && "Trace above has not been computed yet"); + const FixedBlockInfo *PredFBI = MTM.getResources(TBI->Pred); + TBI->InstrDepth = PredTBI->InstrDepth + PredFBI->InstrCount; + TBI->Head = PredTBI->Head; +} + +// Update resource-related information in the TraceBlockInfo for MBB. +// Only update resources related to the trace below MBB. +void MachineTraceMetrics::Ensemble:: +computeHeightResources(const MachineBasicBlock *MBB) { + TraceBlockInfo *TBI = &BlockInfo[MBB->getNumber()]; + + // Compute resources for the current block. + TBI->InstrHeight = MTM.getResources(MBB)->InstrCount; + + // The trace tail is done. + if (!TBI->Succ) { + TBI->Tail = MBB->getNumber(); + return; + } + + // Compute from the block below. A post-order traversal ensures the + // predecessor is always computed first. + TraceBlockInfo *SuccTBI = &BlockInfo[TBI->Succ->getNumber()]; + assert(SuccTBI->hasValidHeight() && "Trace below has not been computed yet"); + TBI->InstrHeight += SuccTBI->InstrHeight; + TBI->Tail = SuccTBI->Tail; +} + +// Check if depth resources for MBB are valid and return the TBI. +// Return NULL if the resources have been invalidated. +const MachineTraceMetrics::TraceBlockInfo* +MachineTraceMetrics::Ensemble:: +getDepthResources(const MachineBasicBlock *MBB) const { + const TraceBlockInfo *TBI = &BlockInfo[MBB->getNumber()]; + return TBI->hasValidDepth() ? TBI : 0; +} + +// Check if height resources for MBB are valid and return the TBI. +// Return NULL if the resources have been invalidated. +const MachineTraceMetrics::TraceBlockInfo* +MachineTraceMetrics::Ensemble:: +getHeightResources(const MachineBasicBlock *MBB) const { + const TraceBlockInfo *TBI = &BlockInfo[MBB->getNumber()]; + return TBI->hasValidHeight() ? TBI : 0; +} + +//===----------------------------------------------------------------------===// +// Trace Selection Strategies +//===----------------------------------------------------------------------===// +// +// A trace selection strategy is implemented as a sub-class of Ensemble. The +// trace through a block B is computed by two DFS traversals of the CFG +// starting from B. One upwards, and one downwards. During the upwards DFS, +// pickTracePred() is called on the post-ordered blocks. During the downwards +// DFS, pickTraceSucc() is called in a post-order. +// + +// We never allow traces that leave loops, but we do allow traces to enter +// nested loops. We also never allow traces to contain back-edges. +// +// This means that a loop header can never appear above the center block of a +// trace, except as the trace head. Below the center block, loop exiting edges +// are banned. +// +// Return true if an edge from the From loop to the To loop is leaving a loop. +// Either of To and From can be null. +static bool isExitingLoop(const MachineLoop *From, const MachineLoop *To) { + return From && !From->contains(To); +} + +// MinInstrCountEnsemble - Pick the trace that executes the least number of +// instructions. +namespace { +class MinInstrCountEnsemble : public MachineTraceMetrics::Ensemble { + const char *getName() const { return "MinInstr"; } + const MachineBasicBlock *pickTracePred(const MachineBasicBlock*); + const MachineBasicBlock *pickTraceSucc(const MachineBasicBlock*); + +public: + MinInstrCountEnsemble(MachineTraceMetrics *mtm) + : MachineTraceMetrics::Ensemble(mtm) {} +}; +} + +// Select the preferred predecessor for MBB. +const MachineBasicBlock* +MinInstrCountEnsemble::pickTracePred(const MachineBasicBlock *MBB) { + if (MBB->pred_empty()) + return 0; + const MachineLoop *CurLoop = getLoopFor(MBB); + // Don't leave loops, and never follow back-edges. + if (CurLoop && MBB == CurLoop->getHeader()) + return 0; + unsigned CurCount = MTM.getResources(MBB)->InstrCount; + const MachineBasicBlock *Best = 0; + unsigned BestDepth = 0; + for (MachineBasicBlock::const_pred_iterator + I = MBB->pred_begin(), E = MBB->pred_end(); I != E; ++I) { + const MachineBasicBlock *Pred = *I; + const MachineTraceMetrics::TraceBlockInfo *PredTBI = + getDepthResources(Pred); + // Ignore cycles that aren't natural loops. + if (!PredTBI) + continue; + // Pick the predecessor that would give this block the smallest InstrDepth. + unsigned Depth = PredTBI->InstrDepth + CurCount; + if (!Best || Depth < BestDepth) + Best = Pred, BestDepth = Depth; + } + return Best; +} + +// Select the preferred successor for MBB. +const MachineBasicBlock* +MinInstrCountEnsemble::pickTraceSucc(const MachineBasicBlock *MBB) { + if (MBB->pred_empty()) + return 0; + const MachineLoop *CurLoop = getLoopFor(MBB); + const MachineBasicBlock *Best = 0; + unsigned BestHeight = 0; + for (MachineBasicBlock::const_succ_iterator + I = MBB->succ_begin(), E = MBB->succ_end(); I != E; ++I) { + const MachineBasicBlock *Succ = *I; + // Don't consider back-edges. + if (CurLoop && Succ == CurLoop->getHeader()) + continue; + // Don't consider successors exiting CurLoop. + if (isExitingLoop(CurLoop, getLoopFor(Succ))) + continue; + const MachineTraceMetrics::TraceBlockInfo *SuccTBI = + getHeightResources(Succ); + // Ignore cycles that aren't natural loops. + if (!SuccTBI) + continue; + // Pick the successor that would give this block the smallest InstrHeight. + unsigned Height = SuccTBI->InstrHeight; + if (!Best || Height < BestHeight) + Best = Succ, BestHeight = Height; + } + return Best; +} + +// Get an Ensemble sub-class for the requested trace strategy. +MachineTraceMetrics::Ensemble * +MachineTraceMetrics::getEnsemble(MachineTraceMetrics::Strategy strategy) { + assert(strategy < TS_NumStrategies && "Invalid trace strategy enum"); + Ensemble *&E = Ensembles[strategy]; + if (E) + return E; + + // Allocate new Ensemble on demand. + switch (strategy) { + case TS_MinInstrCount: return (E = new MinInstrCountEnsemble(this)); + default: llvm_unreachable("Invalid trace strategy enum"); + } +} + +void MachineTraceMetrics::invalidate(const MachineBasicBlock *MBB) { + DEBUG(dbgs() << "Invalidate traces through BB#" << MBB->getNumber() << '\n'); + BlockInfo[MBB->getNumber()].invalidate(); + for (unsigned i = 0; i != TS_NumStrategies; ++i) + if (Ensembles[i]) + Ensembles[i]->invalidate(MBB); +} + +void MachineTraceMetrics::verifyAnalysis() const { + if (!MF) + return; +#ifndef NDEBUG + assert(BlockInfo.size() == MF->getNumBlockIDs() && "Outdated BlockInfo size"); + for (unsigned i = 0; i != TS_NumStrategies; ++i) + if (Ensembles[i]) + Ensembles[i]->verify(); +#endif +} + +//===----------------------------------------------------------------------===// +// Trace building +//===----------------------------------------------------------------------===// +// +// Traces are built by two CFG traversals. To avoid recomputing too much, use a +// set abstraction that confines the search to the current loop, and doesn't +// revisit blocks. + +namespace { +struct LoopBounds { + MutableArrayRef<MachineTraceMetrics::TraceBlockInfo> Blocks; + SmallPtrSet<const MachineBasicBlock*, 8> Visited; + const MachineLoopInfo *Loops; + bool Downward; + LoopBounds(MutableArrayRef<MachineTraceMetrics::TraceBlockInfo> blocks, + const MachineLoopInfo *loops) + : Blocks(blocks), Loops(loops), Downward(false) {} +}; +} + +// Specialize po_iterator_storage in order to prune the post-order traversal so +// it is limited to the current loop and doesn't traverse the loop back edges. +namespace llvm { +template<> +class po_iterator_storage<LoopBounds, true> { + LoopBounds &LB; +public: + po_iterator_storage(LoopBounds &lb) : LB(lb) {} + void finishPostorder(const MachineBasicBlock*) {} + + bool insertEdge(const MachineBasicBlock *From, const MachineBasicBlock *To) { + // Skip already visited To blocks. + MachineTraceMetrics::TraceBlockInfo &TBI = LB.Blocks[To->getNumber()]; + if (LB.Downward ? TBI.hasValidHeight() : TBI.hasValidDepth()) + return false; + // From is null once when To is the trace center block. + if (From) { + if (const MachineLoop *FromLoop = LB.Loops->getLoopFor(From)) { + // Don't follow backedges, don't leave FromLoop when going upwards. + if ((LB.Downward ? To : From) == FromLoop->getHeader()) + return false; + // Don't leave FromLoop. + if (isExitingLoop(FromLoop, LB.Loops->getLoopFor(To))) + return false; + } + } + // To is a new block. Mark the block as visited in case the CFG has cycles + // that MachineLoopInfo didn't recognize as a natural loop. + return LB.Visited.insert(To); + } +}; +} + +/// Compute the trace through MBB. +void MachineTraceMetrics::Ensemble::computeTrace(const MachineBasicBlock *MBB) { + DEBUG(dbgs() << "Computing " << getName() << " trace through BB#" + << MBB->getNumber() << '\n'); + // Set up loop bounds for the backwards post-order traversal. + LoopBounds Bounds(BlockInfo, MTM.Loops); + + // Run an upwards post-order search for the trace start. + Bounds.Downward = false; + Bounds.Visited.clear(); + typedef ipo_ext_iterator<const MachineBasicBlock*, LoopBounds> UpwardPO; + for (UpwardPO I = ipo_ext_begin(MBB, Bounds), E = ipo_ext_end(MBB, Bounds); + I != E; ++I) { + DEBUG(dbgs() << " pred for BB#" << I->getNumber() << ": "); + TraceBlockInfo &TBI = BlockInfo[I->getNumber()]; + // All the predecessors have been visited, pick the preferred one. + TBI.Pred = pickTracePred(*I); + DEBUG({ + if (TBI.Pred) + dbgs() << "BB#" << TBI.Pred->getNumber() << '\n'; + else + dbgs() << "null\n"; + }); + // The trace leading to I is now known, compute the depth resources. + computeDepthResources(*I); + } + + // Run a downwards post-order search for the trace end. + Bounds.Downward = true; + Bounds.Visited.clear(); + typedef po_ext_iterator<const MachineBasicBlock*, LoopBounds> DownwardPO; + for (DownwardPO I = po_ext_begin(MBB, Bounds), E = po_ext_end(MBB, Bounds); + I != E; ++I) { + DEBUG(dbgs() << " succ for BB#" << I->getNumber() << ": "); + TraceBlockInfo &TBI = BlockInfo[I->getNumber()]; + // All the successors have been visited, pick the preferred one. + TBI.Succ = pickTraceSucc(*I); + DEBUG({ + if (TBI.Succ) + dbgs() << "BB#" << TBI.Succ->getNumber() << '\n'; + else + dbgs() << "null\n"; + }); + // The trace leaving I is now known, compute the height resources. + computeHeightResources(*I); + } +} + +/// Invalidate traces through BadMBB. +void +MachineTraceMetrics::Ensemble::invalidate(const MachineBasicBlock *BadMBB) { + SmallVector<const MachineBasicBlock*, 16> WorkList; + TraceBlockInfo &BadTBI = BlockInfo[BadMBB->getNumber()]; + + // Invalidate height resources of blocks above MBB. + if (BadTBI.hasValidHeight()) { + BadTBI.invalidateHeight(); + WorkList.push_back(BadMBB); + do { + const MachineBasicBlock *MBB = WorkList.pop_back_val(); + DEBUG(dbgs() << "Invalidate BB#" << MBB->getNumber() << ' ' << getName() + << " height.\n"); + // Find any MBB predecessors that have MBB as their preferred successor. + // They are the only ones that need to be invalidated. + for (MachineBasicBlock::const_pred_iterator + I = MBB->pred_begin(), E = MBB->pred_end(); I != E; ++I) { + TraceBlockInfo &TBI = BlockInfo[(*I)->getNumber()]; + if (!TBI.hasValidHeight()) + continue; + if (TBI.Succ == MBB) { + TBI.invalidateHeight(); + WorkList.push_back(*I); + continue; + } + // Verify that TBI.Succ is actually a *I successor. + assert((!TBI.Succ || (*I)->isSuccessor(TBI.Succ)) && "CFG changed"); + } + } while (!WorkList.empty()); + } + + // Invalidate depth resources of blocks below MBB. + if (BadTBI.hasValidDepth()) { + BadTBI.invalidateDepth(); + WorkList.push_back(BadMBB); + do { + const MachineBasicBlock *MBB = WorkList.pop_back_val(); + DEBUG(dbgs() << "Invalidate BB#" << MBB->getNumber() << ' ' << getName() + << " depth.\n"); + // Find any MBB successors that have MBB as their preferred predecessor. + // They are the only ones that need to be invalidated. + for (MachineBasicBlock::const_succ_iterator + I = MBB->succ_begin(), E = MBB->succ_end(); I != E; ++I) { + TraceBlockInfo &TBI = BlockInfo[(*I)->getNumber()]; + if (!TBI.hasValidDepth()) + continue; + if (TBI.Pred == MBB) { + TBI.invalidateDepth(); + WorkList.push_back(*I); + continue; + } + // Verify that TBI.Pred is actually a *I predecessor. + assert((!TBI.Pred || (*I)->isPredecessor(TBI.Pred)) && "CFG changed"); + } + } while (!WorkList.empty()); + } + + // Clear any per-instruction data. We only have to do this for BadMBB itself + // because the instructions in that block may change. Other blocks may be + // invalidated, but their instructions will stay the same, so there is no + // need to erase the Cycle entries. They will be overwritten when we + // recompute. + for (MachineBasicBlock::const_iterator I = BadMBB->begin(), E = BadMBB->end(); + I != E; ++I) + Cycles.erase(I); +} + +void MachineTraceMetrics::Ensemble::verify() const { +#ifndef NDEBUG + assert(BlockInfo.size() == MTM.MF->getNumBlockIDs() && + "Outdated BlockInfo size"); + for (unsigned Num = 0, e = BlockInfo.size(); Num != e; ++Num) { + const TraceBlockInfo &TBI = BlockInfo[Num]; + if (TBI.hasValidDepth() && TBI.Pred) { + const MachineBasicBlock *MBB = MTM.MF->getBlockNumbered(Num); + assert(MBB->isPredecessor(TBI.Pred) && "CFG doesn't match trace"); + assert(BlockInfo[TBI.Pred->getNumber()].hasValidDepth() && + "Trace is broken, depth should have been invalidated."); + const MachineLoop *Loop = getLoopFor(MBB); + assert(!(Loop && MBB == Loop->getHeader()) && "Trace contains backedge"); + } + if (TBI.hasValidHeight() && TBI.Succ) { + const MachineBasicBlock *MBB = MTM.MF->getBlockNumbered(Num); + assert(MBB->isSuccessor(TBI.Succ) && "CFG doesn't match trace"); + assert(BlockInfo[TBI.Succ->getNumber()].hasValidHeight() && + "Trace is broken, height should have been invalidated."); + const MachineLoop *Loop = getLoopFor(MBB); + const MachineLoop *SuccLoop = getLoopFor(TBI.Succ); + assert(!(Loop && Loop == SuccLoop && TBI.Succ == Loop->getHeader()) && + "Trace contains backedge"); + } + } +#endif +} + +//===----------------------------------------------------------------------===// +// Data Dependencies +//===----------------------------------------------------------------------===// +// +// Compute the depth and height of each instruction based on data dependencies +// and instruction latencies. These cycle numbers assume that the CPU can issue +// an infinite number of instructions per cycle as long as their dependencies +// are ready. + +// A data dependency is represented as a defining MI and operand numbers on the +// defining and using MI. +namespace { +struct DataDep { + const MachineInstr *DefMI; + unsigned DefOp; + unsigned UseOp; + + DataDep(const MachineInstr *DefMI, unsigned DefOp, unsigned UseOp) + : DefMI(DefMI), DefOp(DefOp), UseOp(UseOp) {} + + /// Create a DataDep from an SSA form virtual register. + DataDep(const MachineRegisterInfo *MRI, unsigned VirtReg, unsigned UseOp) + : UseOp(UseOp) { + assert(TargetRegisterInfo::isVirtualRegister(VirtReg)); + MachineRegisterInfo::def_iterator DefI = MRI->def_begin(VirtReg); + assert(!DefI.atEnd() && "Register has no defs"); + DefMI = &*DefI; + DefOp = DefI.getOperandNo(); + assert((++DefI).atEnd() && "Register has multiple defs"); + } +}; +} + +// Get the input data dependencies that must be ready before UseMI can issue. +// Return true if UseMI has any physreg operands. +static bool getDataDeps(const MachineInstr *UseMI, + SmallVectorImpl<DataDep> &Deps, + const MachineRegisterInfo *MRI) { + bool HasPhysRegs = false; + for (ConstMIOperands MO(UseMI); MO.isValid(); ++MO) { + if (!MO->isReg()) + continue; + unsigned Reg = MO->getReg(); + if (!Reg) + continue; + if (TargetRegisterInfo::isPhysicalRegister(Reg)) { + HasPhysRegs = true; + continue; + } + // Collect virtual register reads. + if (MO->readsReg()) + Deps.push_back(DataDep(MRI, Reg, MO.getOperandNo())); + } + return HasPhysRegs; +} + +// Get the input data dependencies of a PHI instruction, using Pred as the +// preferred predecessor. +// This will add at most one dependency to Deps. +static void getPHIDeps(const MachineInstr *UseMI, + SmallVectorImpl<DataDep> &Deps, + const MachineBasicBlock *Pred, + const MachineRegisterInfo *MRI) { + // No predecessor at the beginning of a trace. Ignore dependencies. + if (!Pred) + return; + assert(UseMI->isPHI() && UseMI->getNumOperands() % 2 && "Bad PHI"); + for (unsigned i = 1; i != UseMI->getNumOperands(); i += 2) { + if (UseMI->getOperand(i + 1).getMBB() == Pred) { + unsigned Reg = UseMI->getOperand(i).getReg(); + Deps.push_back(DataDep(MRI, Reg, i)); + return; + } + } +} + +// Keep track of physreg data dependencies by recording each live register unit. +// Associate each regunit with an instruction operand. Depending on the +// direction instructions are scanned, it could be the operand that defined the +// regunit, or the highest operand to read the regunit. +namespace { +struct LiveRegUnit { + unsigned RegUnit; + unsigned Cycle; + const MachineInstr *MI; + unsigned Op; + + unsigned getSparseSetIndex() const { return RegUnit; } + + LiveRegUnit(unsigned RU) : RegUnit(RU), Cycle(0), MI(0), Op(0) {} +}; +} + +// Identify physreg dependencies for UseMI, and update the live regunit +// tracking set when scanning instructions downwards. +static void updatePhysDepsDownwards(const MachineInstr *UseMI, + SmallVectorImpl<DataDep> &Deps, + SparseSet<LiveRegUnit> &RegUnits, + const TargetRegisterInfo *TRI) { + SmallVector<unsigned, 8> Kills; + SmallVector<unsigned, 8> LiveDefOps; + + for (ConstMIOperands MO(UseMI); MO.isValid(); ++MO) { + if (!MO->isReg()) + continue; + unsigned Reg = MO->getReg(); + if (!TargetRegisterInfo::isPhysicalRegister(Reg)) + continue; + // Track live defs and kills for updating RegUnits. + if (MO->isDef()) { + if (MO->isDead()) + Kills.push_back(Reg); + else + LiveDefOps.push_back(MO.getOperandNo()); + } else if (MO->isKill()) + Kills.push_back(Reg); + // Identify dependencies. + if (!MO->readsReg()) + continue; + for (MCRegUnitIterator Units(Reg, TRI); Units.isValid(); ++Units) { + SparseSet<LiveRegUnit>::iterator I = RegUnits.find(*Units); + if (I == RegUnits.end()) + continue; + Deps.push_back(DataDep(I->MI, I->Op, MO.getOperandNo())); + break; + } + } + + // Update RegUnits to reflect live registers after UseMI. + // First kills. + for (unsigned i = 0, e = Kills.size(); i != e; ++i) + for (MCRegUnitIterator Units(Kills[i], TRI); Units.isValid(); ++Units) + RegUnits.erase(*Units); + + // Second, live defs. + for (unsigned i = 0, e = LiveDefOps.size(); i != e; ++i) { + unsigned DefOp = LiveDefOps[i]; + for (MCRegUnitIterator Units(UseMI->getOperand(DefOp).getReg(), TRI); + Units.isValid(); ++Units) { + LiveRegUnit &LRU = RegUnits[*Units]; + LRU.MI = UseMI; + LRU.Op = DefOp; + } + } +} + +/// The length of the critical path through a trace is the maximum of two path +/// lengths: +/// +/// 1. The maximum height+depth over all instructions in the trace center block. +/// +/// 2. The longest cross-block dependency chain. For small blocks, it is +/// possible that the critical path through the trace doesn't include any +/// instructions in the block. +/// +/// This function computes the second number from the live-in list of the +/// center block. +unsigned MachineTraceMetrics::Ensemble:: +computeCrossBlockCriticalPath(const TraceBlockInfo &TBI) { + assert(TBI.HasValidInstrDepths && "Missing depth info"); + assert(TBI.HasValidInstrHeights && "Missing height info"); + unsigned MaxLen = 0; + for (unsigned i = 0, e = TBI.LiveIns.size(); i != e; ++i) { + const LiveInReg &LIR = TBI.LiveIns[i]; + if (!TargetRegisterInfo::isVirtualRegister(LIR.Reg)) + continue; + const MachineInstr *DefMI = MTM.MRI->getVRegDef(LIR.Reg); + // Ignore dependencies outside the current trace. + const TraceBlockInfo &DefTBI = BlockInfo[DefMI->getParent()->getNumber()]; + if (!DefTBI.hasValidDepth() || DefTBI.Head != TBI.Head) + continue; + unsigned Len = LIR.Height + Cycles[DefMI].Depth; + MaxLen = std::max(MaxLen, Len); + } + return MaxLen; +} + +/// Compute instruction depths for all instructions above or in MBB in its +/// trace. This assumes that the trace through MBB has already been computed. +void MachineTraceMetrics::Ensemble:: +computeInstrDepths(const MachineBasicBlock *MBB) { + // The top of the trace may already be computed, and HasValidInstrDepths + // implies Head->HasValidInstrDepths, so we only need to start from the first + // block in the trace that needs to be recomputed. + SmallVector<const MachineBasicBlock*, 8> Stack; + do { + TraceBlockInfo &TBI = BlockInfo[MBB->getNumber()]; + assert(TBI.hasValidDepth() && "Incomplete trace"); + if (TBI.HasValidInstrDepths) + break; + Stack.push_back(MBB); + MBB = TBI.Pred; + } while (MBB); + + // FIXME: If MBB is non-null at this point, it is the last pre-computed block + // in the trace. We should track any live-out physregs that were defined in + // the trace. This is quite rare in SSA form, typically created by CSE + // hoisting a compare. + SparseSet<LiveRegUnit> RegUnits; + RegUnits.setUniverse(MTM.TRI->getNumRegUnits()); + + // Go through trace blocks in top-down order, stopping after the center block. + SmallVector<DataDep, 8> Deps; + while (!Stack.empty()) { + MBB = Stack.pop_back_val(); + DEBUG(dbgs() << "Depths for BB#" << MBB->getNumber() << ":\n"); + TraceBlockInfo &TBI = BlockInfo[MBB->getNumber()]; + TBI.HasValidInstrDepths = true; + TBI.CriticalPath = 0; + + // Also compute the critical path length through MBB when possible. + if (TBI.HasValidInstrHeights) + TBI.CriticalPath = computeCrossBlockCriticalPath(TBI); + + for (MachineBasicBlock::const_iterator I = MBB->begin(), E = MBB->end(); + I != E; ++I) { + const MachineInstr *UseMI = I; + + // Collect all data dependencies. + Deps.clear(); + if (UseMI->isPHI()) + getPHIDeps(UseMI, Deps, TBI.Pred, MTM.MRI); + else if (getDataDeps(UseMI, Deps, MTM.MRI)) + updatePhysDepsDownwards(UseMI, Deps, RegUnits, MTM.TRI); + + // Filter and process dependencies, computing the earliest issue cycle. + unsigned Cycle = 0; + for (unsigned i = 0, e = Deps.size(); i != e; ++i) { + const DataDep &Dep = Deps[i]; + const TraceBlockInfo&DepTBI = + BlockInfo[Dep.DefMI->getParent()->getNumber()]; + // Ignore dependencies from outside the current trace. + if (!DepTBI.hasValidDepth() || DepTBI.Head != TBI.Head) + continue; + assert(DepTBI.HasValidInstrDepths && "Inconsistent dependency"); + unsigned DepCycle = Cycles.lookup(Dep.DefMI).Depth; + // Add latency if DefMI is a real instruction. Transients get latency 0. + if (!Dep.DefMI->isTransient()) + DepCycle += MTM.TII->computeOperandLatency(MTM.ItinData, + Dep.DefMI, Dep.DefOp, + UseMI, Dep.UseOp, + /* FindMin = */ false); + Cycle = std::max(Cycle, DepCycle); + } + // Remember the instruction depth. + InstrCycles &MICycles = Cycles[UseMI]; + MICycles.Depth = Cycle; + + if (!TBI.HasValidInstrHeights) { + DEBUG(dbgs() << Cycle << '\t' << *UseMI); + continue; + } + // Update critical path length. + TBI.CriticalPath = std::max(TBI.CriticalPath, Cycle + MICycles.Height); + DEBUG(dbgs() << TBI.CriticalPath << '\t' << Cycle << '\t' << *UseMI); + } + } +} + +// Identify physreg dependencies for MI when scanning instructions upwards. +// Return the issue height of MI after considering any live regunits. +// Height is the issue height computed from virtual register dependencies alone. +static unsigned updatePhysDepsUpwards(const MachineInstr *MI, unsigned Height, + SparseSet<LiveRegUnit> &RegUnits, + const InstrItineraryData *ItinData, + const TargetInstrInfo *TII, + const TargetRegisterInfo *TRI) { + SmallVector<unsigned, 8> ReadOps; + for (ConstMIOperands MO(MI); MO.isValid(); ++MO) { + if (!MO->isReg()) + continue; + unsigned Reg = MO->getReg(); + if (!TargetRegisterInfo::isPhysicalRegister(Reg)) + continue; + if (MO->readsReg()) + ReadOps.push_back(MO.getOperandNo()); + if (!MO->isDef()) + continue; + // This is a def of Reg. Remove corresponding entries from RegUnits, and + // update MI Height to consider the physreg dependencies. + for (MCRegUnitIterator Units(Reg, TRI); Units.isValid(); ++Units) { + SparseSet<LiveRegUnit>::iterator I = RegUnits.find(*Units); + if (I == RegUnits.end()) + continue; + unsigned DepHeight = I->Cycle; + if (!MI->isTransient()) { + // We may not know the UseMI of this dependency, if it came from the + // live-in list. + if (I->MI) + DepHeight += TII->computeOperandLatency(ItinData, + MI, MO.getOperandNo(), + I->MI, I->Op); + else + // No UseMI. Just use the MI latency instead. + DepHeight += TII->getInstrLatency(ItinData, MI); + } + Height = std::max(Height, DepHeight); + // This regunit is dead above MI. + RegUnits.erase(I); + } + } + + // Now we know the height of MI. Update any regunits read. + for (unsigned i = 0, e = ReadOps.size(); i != e; ++i) { + unsigned Reg = MI->getOperand(ReadOps[i]).getReg(); + for (MCRegUnitIterator Units(Reg, TRI); Units.isValid(); ++Units) { + LiveRegUnit &LRU = RegUnits[*Units]; + // Set the height to the highest reader of the unit. + if (LRU.Cycle <= Height && LRU.MI != MI) { + LRU.Cycle = Height; + LRU.MI = MI; + LRU.Op = ReadOps[i]; + } + } + } + + return Height; +} + + +typedef DenseMap<const MachineInstr *, unsigned> MIHeightMap; + +// Push the height of DefMI upwards if required to match UseMI. +// Return true if this is the first time DefMI was seen. +static bool pushDepHeight(const DataDep &Dep, + const MachineInstr *UseMI, unsigned UseHeight, + MIHeightMap &Heights, + const InstrItineraryData *ItinData, + const TargetInstrInfo *TII) { + // Adjust height by Dep.DefMI latency. + if (!Dep.DefMI->isTransient()) + UseHeight += TII->computeOperandLatency(ItinData, Dep.DefMI, Dep.DefOp, + UseMI, Dep.UseOp); + + // Update Heights[DefMI] to be the maximum height seen. + MIHeightMap::iterator I; + bool New; + tie(I, New) = Heights.insert(std::make_pair(Dep.DefMI, UseHeight)); + if (New) + return true; + + // DefMI has been pushed before. Give it the max height. + if (I->second < UseHeight) + I->second = UseHeight; + return false; +} + +/// Assuming that DefMI was used by Trace.back(), add it to the live-in lists +/// of all the blocks in Trace. Stop when reaching the block that contains +/// DefMI. +void MachineTraceMetrics::Ensemble:: +addLiveIns(const MachineInstr *DefMI, + ArrayRef<const MachineBasicBlock*> Trace) { + assert(!Trace.empty() && "Trace should contain at least one block"); + unsigned Reg = DefMI->getOperand(0).getReg(); + assert(TargetRegisterInfo::isVirtualRegister(Reg)); + const MachineBasicBlock *DefMBB = DefMI->getParent(); + + // Reg is live-in to all blocks in Trace that follow DefMBB. + for (unsigned i = Trace.size(); i; --i) { + const MachineBasicBlock *MBB = Trace[i-1]; + if (MBB == DefMBB) + return; + TraceBlockInfo &TBI = BlockInfo[MBB->getNumber()]; + // Just add the register. The height will be updated later. + TBI.LiveIns.push_back(Reg); + } +} + +/// Compute instruction heights in the trace through MBB. This updates MBB and +/// the blocks below it in the trace. It is assumed that the trace has already +/// been computed. +void MachineTraceMetrics::Ensemble:: +computeInstrHeights(const MachineBasicBlock *MBB) { + // The bottom of the trace may already be computed. + // Find the blocks that need updating. + SmallVector<const MachineBasicBlock*, 8> Stack; + do { + TraceBlockInfo &TBI = BlockInfo[MBB->getNumber()]; + assert(TBI.hasValidHeight() && "Incomplete trace"); + if (TBI.HasValidInstrHeights) + break; + Stack.push_back(MBB); + TBI.LiveIns.clear(); + MBB = TBI.Succ; + } while (MBB); + + // As we move upwards in the trace, keep track of instructions that are + // required by deeper trace instructions. Map MI -> height required so far. + MIHeightMap Heights; + + // For physregs, the def isn't known when we see the use. + // Instead, keep track of the highest use of each regunit. + SparseSet<LiveRegUnit> RegUnits; + RegUnits.setUniverse(MTM.TRI->getNumRegUnits()); + + // If the bottom of the trace was already precomputed, initialize heights + // from its live-in list. + // MBB is the highest precomputed block in the trace. + if (MBB) { + TraceBlockInfo &TBI = BlockInfo[MBB->getNumber()]; + for (unsigned i = 0, e = TBI.LiveIns.size(); i != e; ++i) { + LiveInReg LI = TBI.LiveIns[i]; + if (TargetRegisterInfo::isVirtualRegister(LI.Reg)) { + // For virtual registers, the def latency is included. + unsigned &Height = Heights[MTM.MRI->getVRegDef(LI.Reg)]; + if (Height < LI.Height) + Height = LI.Height; + } else { + // For register units, the def latency is not included because we don't + // know the def yet. + RegUnits[LI.Reg].Cycle = LI.Height; + } + } + } + + // Go through the trace blocks in bottom-up order. + SmallVector<DataDep, 8> Deps; + for (;!Stack.empty(); Stack.pop_back()) { + MBB = Stack.back(); + DEBUG(dbgs() << "Heights for BB#" << MBB->getNumber() << ":\n"); + TraceBlockInfo &TBI = BlockInfo[MBB->getNumber()]; + TBI.HasValidInstrHeights = true; + TBI.CriticalPath = 0; + + // Get dependencies from PHIs in the trace successor. + const MachineBasicBlock *Succ = TBI.Succ; + // If MBB is the last block in the trace, and it has a back-edge to the + // loop header, get loop-carried dependencies from PHIs in the header. For + // that purpose, pretend that all the loop header PHIs have height 0. + if (!Succ) + if (const MachineLoop *Loop = getLoopFor(MBB)) + if (MBB->isSuccessor(Loop->getHeader())) + Succ = Loop->getHeader(); + + if (Succ) { + for (MachineBasicBlock::const_iterator I = Succ->begin(), E = Succ->end(); + I != E && I->isPHI(); ++I) { + const MachineInstr *PHI = I; + Deps.clear(); + getPHIDeps(PHI, Deps, MBB, MTM.MRI); + if (!Deps.empty()) { + // Loop header PHI heights are all 0. + unsigned Height = TBI.Succ ? Cycles.lookup(PHI).Height : 0; + DEBUG(dbgs() << "pred\t" << Height << '\t' << *PHI); + if (pushDepHeight(Deps.front(), PHI, Height, + Heights, MTM.ItinData, MTM.TII)) + addLiveIns(Deps.front().DefMI, Stack); + } + } + } + + // Go through the block backwards. + for (MachineBasicBlock::const_iterator BI = MBB->end(), BB = MBB->begin(); + BI != BB;) { + const MachineInstr *MI = --BI; + + // Find the MI height as determined by virtual register uses in the + // trace below. + unsigned Cycle = 0; + MIHeightMap::iterator HeightI = Heights.find(MI); + if (HeightI != Heights.end()) { + Cycle = HeightI->second; + // We won't be seeing any more MI uses. + Heights.erase(HeightI); + } + + // Don't process PHI deps. They depend on the specific predecessor, and + // we'll get them when visiting the predecessor. + Deps.clear(); + bool HasPhysRegs = !MI->isPHI() && getDataDeps(MI, Deps, MTM.MRI); + + // There may also be regunit dependencies to include in the height. + if (HasPhysRegs) + Cycle = updatePhysDepsUpwards(MI, Cycle, RegUnits, + MTM.ItinData, MTM.TII, MTM.TRI); + + // Update the required height of any virtual registers read by MI. + for (unsigned i = 0, e = Deps.size(); i != e; ++i) + if (pushDepHeight(Deps[i], MI, Cycle, Heights, MTM.ItinData, MTM.TII)) + addLiveIns(Deps[i].DefMI, Stack); + + InstrCycles &MICycles = Cycles[MI]; + MICycles.Height = Cycle; + if (!TBI.HasValidInstrDepths) { + DEBUG(dbgs() << Cycle << '\t' << *MI); + continue; + } + // Update critical path length. + TBI.CriticalPath = std::max(TBI.CriticalPath, Cycle + MICycles.Depth); + DEBUG(dbgs() << TBI.CriticalPath << '\t' << Cycle << '\t' << *MI); + } + + // Update virtual live-in heights. They were added by addLiveIns() with a 0 + // height because the final height isn't known until now. + DEBUG(dbgs() << "BB#" << MBB->getNumber() << " Live-ins:"); + for (unsigned i = 0, e = TBI.LiveIns.size(); i != e; ++i) { + LiveInReg &LIR = TBI.LiveIns[i]; + const MachineInstr *DefMI = MTM.MRI->getVRegDef(LIR.Reg); + LIR.Height = Heights.lookup(DefMI); + DEBUG(dbgs() << ' ' << PrintReg(LIR.Reg) << '@' << LIR.Height); + } + + // Transfer the live regunits to the live-in list. + for (SparseSet<LiveRegUnit>::const_iterator + RI = RegUnits.begin(), RE = RegUnits.end(); RI != RE; ++RI) { + TBI.LiveIns.push_back(LiveInReg(RI->RegUnit, RI->Cycle)); + DEBUG(dbgs() << ' ' << PrintRegUnit(RI->RegUnit, MTM.TRI) + << '@' << RI->Cycle); + } + DEBUG(dbgs() << '\n'); + + if (!TBI.HasValidInstrDepths) + continue; + // Add live-ins to the critical path length. + TBI.CriticalPath = std::max(TBI.CriticalPath, + computeCrossBlockCriticalPath(TBI)); + DEBUG(dbgs() << "Critical path: " << TBI.CriticalPath << '\n'); + } +} + +MachineTraceMetrics::Trace +MachineTraceMetrics::Ensemble::getTrace(const MachineBasicBlock *MBB) { + // FIXME: Check cache tags, recompute as needed. + computeTrace(MBB); + computeInstrDepths(MBB); + computeInstrHeights(MBB); + return Trace(*this, BlockInfo[MBB->getNumber()]); +} + +unsigned +MachineTraceMetrics::Trace::getInstrSlack(const MachineInstr *MI) const { + assert(MI && "Not an instruction."); + assert(getBlockNum() == unsigned(MI->getParent()->getNumber()) && + "MI must be in the trace center block"); + InstrCycles Cyc = getInstrCycles(MI); + return getCriticalPath() - (Cyc.Depth + Cyc.Height); +} + +unsigned +MachineTraceMetrics::Trace::getPHIDepth(const MachineInstr *PHI) const { + const MachineBasicBlock *MBB = TE.MTM.MF->getBlockNumbered(getBlockNum()); + SmallVector<DataDep, 1> Deps; + getPHIDeps(PHI, Deps, MBB, TE.MTM.MRI); + assert(Deps.size() == 1 && "PHI doesn't have MBB as a predecessor"); + DataDep &Dep = Deps.front(); + unsigned DepCycle = getInstrCycles(Dep.DefMI).Depth; + // Add latency if DefMI is a real instruction. Transients get latency 0. + if (!Dep.DefMI->isTransient()) + DepCycle += TE.MTM.TII->computeOperandLatency(TE.MTM.ItinData, + Dep.DefMI, Dep.DefOp, + PHI, Dep.UseOp, + /* FindMin = */ false); + return DepCycle; +} + +unsigned MachineTraceMetrics::Trace::getResourceDepth(bool Bottom) const { + // For now, we compute the resource depth from instruction count / issue + // width. Eventually, we should compute resource depth per functional unit + // and return the max. + unsigned Instrs = TBI.InstrDepth; + if (Bottom) + Instrs += TE.MTM.BlockInfo[getBlockNum()].InstrCount; + if (const MCSchedModel *Model = TE.MTM.ItinData->SchedModel) + if (Model->IssueWidth != 0) + return Instrs / Model->IssueWidth; + // Assume issue width 1 without a schedule model. + return Instrs; +} + +unsigned MachineTraceMetrics::Trace:: +getResourceLength(ArrayRef<const MachineBasicBlock*> Extrablocks) const { + unsigned Instrs = TBI.InstrDepth + TBI.InstrHeight; + for (unsigned i = 0, e = Extrablocks.size(); i != e; ++i) + Instrs += TE.MTM.getResources(Extrablocks[i])->InstrCount; + if (const MCSchedModel *Model = TE.MTM.ItinData->SchedModel) + if (Model->IssueWidth != 0) + return Instrs / Model->IssueWidth; + // Assume issue width 1 without a schedule model. + return Instrs; +} + +void MachineTraceMetrics::Ensemble::print(raw_ostream &OS) const { + OS << getName() << " ensemble:\n"; + for (unsigned i = 0, e = BlockInfo.size(); i != e; ++i) { + OS << " BB#" << i << '\t'; + BlockInfo[i].print(OS); + OS << '\n'; + } +} + +void MachineTraceMetrics::TraceBlockInfo::print(raw_ostream &OS) const { + if (hasValidDepth()) { + OS << "depth=" << InstrDepth; + if (Pred) + OS << " pred=BB#" << Pred->getNumber(); + else + OS << " pred=null"; + OS << " head=BB#" << Head; + if (HasValidInstrDepths) + OS << " +instrs"; + } else + OS << "depth invalid"; + OS << ", "; + if (hasValidHeight()) { + OS << "height=" << InstrHeight; + if (Succ) + OS << " succ=BB#" << Succ->getNumber(); + else + OS << " succ=null"; + OS << " tail=BB#" << Tail; + if (HasValidInstrHeights) + OS << " +instrs"; + } else + OS << "height invalid"; + if (HasValidInstrDepths && HasValidInstrHeights) + OS << ", crit=" << CriticalPath; +} + +void MachineTraceMetrics::Trace::print(raw_ostream &OS) const { + unsigned MBBNum = &TBI - &TE.BlockInfo[0]; + + OS << TE.getName() << " trace BB#" << TBI.Head << " --> BB#" << MBBNum + << " --> BB#" << TBI.Tail << ':'; + if (TBI.hasValidHeight() && TBI.hasValidDepth()) + OS << ' ' << getInstrCount() << " instrs."; + if (TBI.HasValidInstrDepths && TBI.HasValidInstrHeights) + OS << ' ' << TBI.CriticalPath << " cycles."; + + const MachineTraceMetrics::TraceBlockInfo *Block = &TBI; + OS << "\nBB#" << MBBNum; + while (Block->hasValidDepth() && Block->Pred) { + unsigned Num = Block->Pred->getNumber(); + OS << " <- BB#" << Num; + Block = &TE.BlockInfo[Num]; + } + + Block = &TBI; + OS << "\n "; + while (Block->hasValidHeight() && Block->Succ) { + unsigned Num = Block->Succ->getNumber(); + OS << " -> BB#" << Num; + Block = &TE.BlockInfo[Num]; + } + OS << '\n'; +} diff --git a/lib/CodeGen/MachineTraceMetrics.h b/lib/CodeGen/MachineTraceMetrics.h new file mode 100644 index 0000000..c5b86f3 --- /dev/null +++ b/lib/CodeGen/MachineTraceMetrics.h @@ -0,0 +1,341 @@ +//===- lib/CodeGen/MachineTraceMetrics.h - Super-scalar metrics -*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the interface for the MachineTraceMetrics analysis pass +// that estimates CPU resource usage and critical data dependency paths through +// preferred traces. This is useful for super-scalar CPUs where execution speed +// can be limited both by data dependencies and by limited execution resources. +// +// Out-of-order CPUs will often be executing instructions from multiple basic +// blocks at the same time. This makes it difficult to estimate the resource +// usage accurately in a single basic block. Resources can be estimated better +// by looking at a trace through the current basic block. +// +// For every block, the MachineTraceMetrics pass will pick a preferred trace +// that passes through the block. The trace is chosen based on loop structure, +// branch probabilities, and resource usage. The intention is to pick likely +// traces that would be the most affected by code transformations. +// +// It is expensive to compute a full arbitrary trace for every block, so to +// save some computations, traces are chosen to be convergent. This means that +// if the traces through basic blocks A and B ever cross when moving away from +// A and B, they never diverge again. This applies in both directions - If the +// traces meet above A and B, they won't diverge when going further back. +// +// Traces tend to align with loops. The trace through a block in an inner loop +// will begin at the loop entry block and end at a back edge. If there are +// nested loops, the trace may begin and end at those instead. +// +// For each trace, we compute the critical path length, which is the number of +// cycles required to execute the trace when execution is limited by data +// dependencies only. We also compute the resource height, which is the number +// of cycles required to execute all instructions in the trace when ignoring +// data dependencies. +// +// Every instruction in the current block has a slack - the number of cycles +// execution of the instruction can be delayed without extending the critical +// path. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CODEGEN_MACHINE_TRACE_METRICS_H +#define LLVM_CODEGEN_MACHINE_TRACE_METRICS_H + +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/CodeGen/MachineFunctionPass.h" + +namespace llvm { + +class InstrItineraryData; +class MachineBasicBlock; +class MachineInstr; +class MachineLoop; +class MachineLoopInfo; +class MachineRegisterInfo; +class TargetInstrInfo; +class TargetRegisterInfo; +class raw_ostream; + +class MachineTraceMetrics : public MachineFunctionPass { + const MachineFunction *MF; + const TargetInstrInfo *TII; + const TargetRegisterInfo *TRI; + const InstrItineraryData *ItinData; + const MachineRegisterInfo *MRI; + const MachineLoopInfo *Loops; + +public: + class Ensemble; + class Trace; + static char ID; + MachineTraceMetrics(); + void getAnalysisUsage(AnalysisUsage&) const; + bool runOnMachineFunction(MachineFunction&); + void releaseMemory(); + void verifyAnalysis() const; + + friend class Ensemble; + friend class Trace; + + /// Per-basic block information that doesn't depend on the trace through the + /// block. + struct FixedBlockInfo { + /// The number of non-trivial instructions in the block. + /// Doesn't count PHI and COPY instructions that are likely to be removed. + unsigned InstrCount; + + /// True when the block contains calls. + bool HasCalls; + + FixedBlockInfo() : InstrCount(~0u), HasCalls(false) {} + + /// Returns true when resource information for this block has been computed. + bool hasResources() const { return InstrCount != ~0u; } + + /// Invalidate resource information. + void invalidate() { InstrCount = ~0u; } + }; + + /// Get the fixed resource information about MBB. Compute it on demand. + const FixedBlockInfo *getResources(const MachineBasicBlock*); + + /// A virtual register or regunit required by a basic block or its trace + /// successors. + struct LiveInReg { + /// The virtual register required, or a register unit. + unsigned Reg; + + /// For virtual registers: Minimum height of the defining instruction. + /// For regunits: Height of the highest user in the trace. + unsigned Height; + + LiveInReg(unsigned Reg, unsigned Height = 0) : Reg(Reg), Height(Height) {} + }; + + /// Per-basic block information that relates to a specific trace through the + /// block. Convergent traces means that only one of these is required per + /// block in a trace ensemble. + struct TraceBlockInfo { + /// Trace predecessor, or NULL for the first block in the trace. + /// Valid when hasValidDepth(). + const MachineBasicBlock *Pred; + + /// Trace successor, or NULL for the last block in the trace. + /// Valid when hasValidHeight(). + const MachineBasicBlock *Succ; + + /// The block number of the head of the trace. (When hasValidDepth()). + unsigned Head; + + /// The block number of the tail of the trace. (When hasValidHeight()). + unsigned Tail; + + /// Accumulated number of instructions in the trace above this block. + /// Does not include instructions in this block. + unsigned InstrDepth; + + /// Accumulated number of instructions in the trace below this block. + /// Includes instructions in this block. + unsigned InstrHeight; + + TraceBlockInfo() : + Pred(0), Succ(0), + InstrDepth(~0u), InstrHeight(~0u), + HasValidInstrDepths(false), HasValidInstrHeights(false) {} + + /// Returns true if the depth resources have been computed from the trace + /// above this block. + bool hasValidDepth() const { return InstrDepth != ~0u; } + + /// Returns true if the height resources have been computed from the trace + /// below this block. + bool hasValidHeight() const { return InstrHeight != ~0u; } + + /// Invalidate depth resources when some block above this one has changed. + void invalidateDepth() { InstrDepth = ~0u; HasValidInstrDepths = false; } + + /// Invalidate height resources when a block below this one has changed. + void invalidateHeight() { InstrHeight = ~0u; HasValidInstrHeights = false; } + + // Data-dependency-related information. Per-instruction depth and height + // are computed from data dependencies in the current trace, using + // itinerary data. + + /// Instruction depths have been computed. This implies hasValidDepth(). + bool HasValidInstrDepths; + + /// Instruction heights have been computed. This implies hasValidHeight(). + bool HasValidInstrHeights; + + /// Critical path length. This is the number of cycles in the longest data + /// dependency chain through the trace. This is only valid when both + /// HasValidInstrDepths and HasValidInstrHeights are set. + unsigned CriticalPath; + + /// Live-in registers. These registers are defined above the current block + /// and used by this block or a block below it. + /// This does not include PHI uses in the current block, but it does + /// include PHI uses in deeper blocks. + SmallVector<LiveInReg, 4> LiveIns; + + void print(raw_ostream&) const; + }; + + /// InstrCycles represents the cycle height and depth of an instruction in a + /// trace. + struct InstrCycles { + /// Earliest issue cycle as determined by data dependencies and instruction + /// latencies from the beginning of the trace. Data dependencies from + /// before the trace are not included. + unsigned Depth; + + /// Minimum number of cycles from this instruction is issued to the of the + /// trace, as determined by data dependencies and instruction latencies. + unsigned Height; + }; + + /// A trace represents a plausible sequence of executed basic blocks that + /// passes through the current basic block one. The Trace class serves as a + /// handle to internal cached data structures. + class Trace { + Ensemble &TE; + TraceBlockInfo &TBI; + + unsigned getBlockNum() const { return &TBI - &TE.BlockInfo[0]; } + + public: + explicit Trace(Ensemble &te, TraceBlockInfo &tbi) : TE(te), TBI(tbi) {} + void print(raw_ostream&) const; + + /// Compute the total number of instructions in the trace. + unsigned getInstrCount() const { + return TBI.InstrDepth + TBI.InstrHeight; + } + + /// Return the resource depth of the top/bottom of the trace center block. + /// This is the number of cycles required to execute all instructions from + /// the trace head to the trace center block. The resource depth only + /// considers execution resources, it ignores data dependencies. + /// When Bottom is set, instructions in the trace center block are included. + unsigned getResourceDepth(bool Bottom) const; + + /// Return the resource length of the trace. This is the number of cycles + /// required to execute the instructions in the trace if they were all + /// independent, exposing the maximum instruction-level parallelism. + /// + /// Any blocks in Extrablocks are included as if they were part of the + /// trace. + unsigned getResourceLength(ArrayRef<const MachineBasicBlock*> Extrablocks = + ArrayRef<const MachineBasicBlock*>()) const; + + /// Return the length of the (data dependency) critical path through the + /// trace. + unsigned getCriticalPath() const { return TBI.CriticalPath; } + + /// Return the depth and height of MI. The depth is only valid for + /// instructions in or above the trace center block. The height is only + /// valid for instructions in or below the trace center block. + InstrCycles getInstrCycles(const MachineInstr *MI) const { + return TE.Cycles.lookup(MI); + } + + /// Return the slack of MI. This is the number of cycles MI can be delayed + /// before the critical path becomes longer. + /// MI must be an instruction in the trace center block. + unsigned getInstrSlack(const MachineInstr *MI) const; + + /// Return the Depth of a PHI instruction in a trace center block successor. + /// The PHI does not have to be part of the trace. + unsigned getPHIDepth(const MachineInstr *PHI) const; + }; + + /// A trace ensemble is a collection of traces selected using the same + /// strategy, for example 'minimum resource height'. There is one trace for + /// every block in the function. + class Ensemble { + SmallVector<TraceBlockInfo, 4> BlockInfo; + DenseMap<const MachineInstr*, InstrCycles> Cycles; + friend class Trace; + + void computeTrace(const MachineBasicBlock*); + void computeDepthResources(const MachineBasicBlock*); + void computeHeightResources(const MachineBasicBlock*); + unsigned computeCrossBlockCriticalPath(const TraceBlockInfo&); + void computeInstrDepths(const MachineBasicBlock*); + void computeInstrHeights(const MachineBasicBlock*); + void addLiveIns(const MachineInstr *DefMI, + ArrayRef<const MachineBasicBlock*> Trace); + + protected: + MachineTraceMetrics &MTM; + virtual const MachineBasicBlock *pickTracePred(const MachineBasicBlock*) =0; + virtual const MachineBasicBlock *pickTraceSucc(const MachineBasicBlock*) =0; + explicit Ensemble(MachineTraceMetrics*); + const MachineLoop *getLoopFor(const MachineBasicBlock*) const; + const TraceBlockInfo *getDepthResources(const MachineBasicBlock*) const; + const TraceBlockInfo *getHeightResources(const MachineBasicBlock*) const; + + public: + virtual ~Ensemble(); + virtual const char *getName() const =0; + void print(raw_ostream&) const; + void invalidate(const MachineBasicBlock *MBB); + void verify() const; + + /// Get the trace that passes through MBB. + /// The trace is computed on demand. + Trace getTrace(const MachineBasicBlock *MBB); + }; + + /// Strategies for selecting traces. + enum Strategy { + /// Select the trace through a block that has the fewest instructions. + TS_MinInstrCount, + + TS_NumStrategies + }; + + /// Get the trace ensemble representing the given trace selection strategy. + /// The returned Ensemble object is owned by the MachineTraceMetrics analysis, + /// and valid for the lifetime of the analysis pass. + Ensemble *getEnsemble(Strategy); + + /// Invalidate cached information about MBB. This must be called *before* MBB + /// is erased, or the CFG is otherwise changed. + /// + /// This invalidates per-block information about resource usage for MBB only, + /// and it invalidates per-trace information for any trace that passes + /// through MBB. + /// + /// Call Ensemble::getTrace() again to update any trace handles. + void invalidate(const MachineBasicBlock *MBB); + +private: + // One entry per basic block, indexed by block number. + SmallVector<FixedBlockInfo, 4> BlockInfo; + + // One ensemble per strategy. + Ensemble* Ensembles[TS_NumStrategies]; +}; + +inline raw_ostream &operator<<(raw_ostream &OS, + const MachineTraceMetrics::Trace &Tr) { + Tr.print(OS); + return OS; +} + +inline raw_ostream &operator<<(raw_ostream &OS, + const MachineTraceMetrics::Ensemble &En) { + En.print(OS); + return OS; +} +} // end namespace llvm + +#endif diff --git a/lib/CodeGen/MachineVerifier.cpp b/lib/CodeGen/MachineVerifier.cpp index d8dece6..852c169 100644 --- a/lib/CodeGen/MachineVerifier.cpp +++ b/lib/CodeGen/MachineVerifier.cpp @@ -73,8 +73,10 @@ namespace { typedef SmallVector<const uint32_t*, 4> RegMaskVector; typedef DenseSet<unsigned> RegSet; typedef DenseMap<unsigned, const MachineInstr*> RegMap; + typedef SmallPtrSet<const MachineBasicBlock*, 8> BlockSet; const MachineInstr *FirstTerminator; + BlockSet FunctionBlocks; BitVector regsReserved; BitVector regsAllocatable; @@ -117,6 +119,9 @@ namespace { // block. This set is disjoint from regsLiveOut. RegSet vregsRequired; + // Set versions of block's predecessor and successor lists. + BlockSet Preds, Succs; + BBInfo() : reachable(false) {} // Add register to vregsPassed if it belongs there. Return true if @@ -203,6 +208,10 @@ namespace { void report(const char *msg, const MachineBasicBlock *MBB); void report(const char *msg, const MachineInstr *MI); void report(const char *msg, const MachineOperand *MO, unsigned MONum); + void report(const char *msg, const MachineFunction *MF, + const LiveInterval &LI); + void report(const char *msg, const MachineBasicBlock *MBB, + const LiveInterval &LI); void checkLiveness(const MachineOperand *MO, unsigned MONum); void markReachable(const MachineBasicBlock *MBB); @@ -212,6 +221,10 @@ namespace { void calcRegsRequired(); void verifyLiveVariables(); void verifyLiveIntervals(); + void verifyLiveInterval(const LiveInterval&); + void verifyLiveIntervalValue(const LiveInterval&, VNInfo*); + void verifyLiveIntervalSegment(const LiveInterval&, + LiveInterval::const_iterator); }; struct MachineVerifierPass : public MachineFunctionPass { @@ -350,9 +363,9 @@ void MachineVerifier::report(const char *msg, const MachineFunction *MF) { void MachineVerifier::report(const char *msg, const MachineBasicBlock *MBB) { assert(MBB); report(msg, MBB->getParent()); - *OS << "- basic block: " << MBB->getName() - << " " << (void*)MBB - << " (BB#" << MBB->getNumber() << ")"; + *OS << "- basic block: BB#" << MBB->getNumber() + << ' ' << MBB->getName() + << " (" << (void*)MBB << ')'; if (Indexes) *OS << " [" << Indexes->getMBBStartIdx(MBB) << ';' << Indexes->getMBBEndIdx(MBB) << ')'; @@ -377,6 +390,28 @@ void MachineVerifier::report(const char *msg, *OS << "\n"; } +void MachineVerifier::report(const char *msg, const MachineFunction *MF, + const LiveInterval &LI) { + report(msg, MF); + *OS << "- interval: "; + if (TargetRegisterInfo::isVirtualRegister(LI.reg)) + *OS << PrintReg(LI.reg, TRI); + else + *OS << PrintRegUnit(LI.reg, TRI); + *OS << ' ' << LI << '\n'; +} + +void MachineVerifier::report(const char *msg, const MachineBasicBlock *MBB, + const LiveInterval &LI) { + report(msg, MBB); + *OS << "- interval: "; + if (TargetRegisterInfo::isVirtualRegister(LI.reg)) + *OS << PrintReg(LI.reg, TRI); + else + *OS << PrintRegUnit(LI.reg, TRI); + *OS << ' ' << LI << '\n'; +} + void MachineVerifier::markReachable(const MachineBasicBlock *MBB) { BBInfo &MInfo = MBBInfoMap[MBB]; if (!MInfo.reachable) { @@ -404,6 +439,22 @@ void MachineVerifier::visitMachineFunctionBefore() { regsAllocatable = TRI->getAllocatableSet(*MF); markReachable(&MF->front()); + + // Build a set of the basic blocks in the function. + FunctionBlocks.clear(); + for (MachineFunction::const_iterator + I = MF->begin(), E = MF->end(); I != E; ++I) { + FunctionBlocks.insert(I); + BBInfo &MInfo = MBBInfoMap[I]; + + MInfo.Preds.insert(I->pred_begin(), I->pred_end()); + if (MInfo.Preds.size() != I->pred_size()) + report("MBB has duplicate entries in its predecessor list.", I); + + MInfo.Succs.insert(I->succ_begin(), I->succ_end()); + if (MInfo.Succs.size() != I->succ_size()) + report("MBB has duplicate entries in its successor list.", I); + } } // Does iterator point to a and b as the first two elements? @@ -440,6 +491,25 @@ MachineVerifier::visitMachineBasicBlockBefore(const MachineBasicBlock *MBB) { E = MBB->succ_end(); I != E; ++I) { if ((*I)->isLandingPad()) LandingPadSuccs.insert(*I); + if (!FunctionBlocks.count(*I)) + report("MBB has successor that isn't part of the function.", MBB); + if (!MBBInfoMap[*I].Preds.count(MBB)) { + report("Inconsistent CFG", MBB); + *OS << "MBB is not in the predecessor list of the successor BB#" + << (*I)->getNumber() << ".\n"; + } + } + + // Check the predecessor list. + for (MachineBasicBlock::const_pred_iterator I = MBB->pred_begin(), + E = MBB->pred_end(); I != E; ++I) { + if (!FunctionBlocks.count(*I)) + report("MBB has predecessor that isn't part of the function.", MBB); + if (!MBBInfoMap[*I].Succs.count(MBB)) { + report("Inconsistent CFG", MBB); + *OS << "MBB is not in the successor list of the predecessor BB#" + << (*I)->getNumber() << ".\n"; + } } const MCAsmInfo *AsmInfo = TM->getMCAsmInfo(); @@ -510,7 +580,15 @@ MachineVerifier::visitMachineBasicBlockBefore(const MachineBasicBlock *MBB) { ++MBBI; if (MBBI == MF->end()) { report("MBB conditionally falls through out of function!", MBB); - } if (MBB->succ_size() != 2) { + } if (MBB->succ_size() == 1) { + // A conditional branch with only one successor is weird, but allowed. + if (&*MBBI != TBB) + report("MBB exits via conditional branch/fall-through but only has " + "one CFG successor!", MBB); + else if (TBB != *MBB->succ_begin()) + report("MBB exits via conditional branch/fall-through but the CFG " + "successor don't match the actual successor!", MBB); + } else if (MBB->succ_size() != 2) { report("MBB exits via conditional branch/fall-through but doesn't have " "exactly two CFG successors!", MBB); } else if (!matchPair(MBB->succ_begin(), TBB, MBBI)) { @@ -530,7 +608,15 @@ MachineVerifier::visitMachineBasicBlockBefore(const MachineBasicBlock *MBB) { } else if (TBB && FBB) { // Block conditionally branches somewhere, otherwise branches // somewhere else. - if (MBB->succ_size() != 2) { + if (MBB->succ_size() == 1) { + // A conditional branch with only one successor is weird, but allowed. + if (FBB != TBB) + report("MBB exits via conditional branch/branch through but only has " + "one CFG successor!", MBB); + else if (TBB != *MBB->succ_begin()) + report("MBB exits via conditional branch/branch through but the CFG " + "successor don't match the actual successor!", MBB); + } else if (MBB->succ_size() != 2) { report("MBB exits via conditional branch/branch but doesn't have " "exactly two CFG successors!", MBB); } else if (!matchPair(MBB->succ_begin(), TBB, FBB)) { @@ -651,10 +737,10 @@ void MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) { const MachineInstr *MI = MO->getParent(); const MCInstrDesc &MCID = MI->getDesc(); - const MCOperandInfo &MCOI = MCID.OpInfo[MONum]; // The first MCID.NumDefs operands must be explicit register defines if (MONum < MCID.getNumDefs()) { + const MCOperandInfo &MCOI = MCID.OpInfo[MONum]; if (!MO->isReg()) report("Explicit definition must be a register", MO, MONum); else if (!MO->isDef() && !MCOI.isOptionalDef()) @@ -662,6 +748,7 @@ MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) { else if (MO->isImplicit()) report("Explicit definition marked as implicit", MO, MONum); } else if (MONum < MCID.getNumOperands()) { + const MCOperandInfo &MCOI = MCID.OpInfo[MONum]; // Don't check if it's the last operand in a variadic instruction. See, // e.g., LDM_RET in the arm back end. if (MO->isReg() && @@ -685,6 +772,12 @@ MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) { if (MRI->tracksLiveness() && !MI->isDebugValue()) checkLiveness(MO, MONum); + // Verify two-address constraints after leaving SSA form. + unsigned DefIdx; + if (!MRI->isSSA() && MO->isUse() && + MI->isRegTiedToDefOperand(MONum, &DefIdx) && + Reg != MI->getOperand(DefIdx).getReg()) + report("Two-address instruction operands must be identical", MO, MONum); // Check register classes. if (MONum < MCID.getNumOperands() && !MO->isImplicit()) { @@ -786,20 +879,7 @@ void MachineVerifier::checkLiveness(const MachineOperand *MO, unsigned MONum) { if (MO->readsReg()) { regsLiveInButUnused.erase(Reg); - bool isKill = false; - unsigned defIdx; - if (MI->isRegTiedToDefOperand(MONum, &defIdx)) { - // A two-addr use counts as a kill if use and def are the same. - unsigned DefReg = MI->getOperand(defIdx).getReg(); - if (Reg == DefReg) - isKill = true; - else if (TargetRegisterInfo::isPhysicalRegister(Reg)) { - report("Two-address instruction operands must be identical", MO, MONum); - } - } else - isKill = MO->isKill(); - - if (isKill) + if (MO->isKill()) addRegWithSubRegs(regsKilled, Reg); // Check that LiveVars knows this kill. @@ -811,23 +891,44 @@ void MachineVerifier::checkLiveness(const MachineOperand *MO, unsigned MONum) { } // Check LiveInts liveness and kill. - if (TargetRegisterInfo::isVirtualRegister(Reg) && - LiveInts && !LiveInts->isNotInMIMap(MI)) { - SlotIndex UseIdx = LiveInts->getInstructionIndex(MI).getRegSlot(true); - if (LiveInts->hasInterval(Reg)) { - const LiveInterval &LI = LiveInts->getInterval(Reg); - if (!LI.liveAt(UseIdx)) { - report("No live range at use", MO, MONum); - *OS << UseIdx << " is not live in " << LI << '\n'; + if (LiveInts && !LiveInts->isNotInMIMap(MI)) { + SlotIndex UseIdx = LiveInts->getInstructionIndex(MI); + // Check the cached regunit intervals. + if (TargetRegisterInfo::isPhysicalRegister(Reg) && !isReserved(Reg)) { + for (MCRegUnitIterator Units(Reg, TRI); Units.isValid(); ++Units) { + if (const LiveInterval *LI = LiveInts->getCachedRegUnit(*Units)) { + LiveRangeQuery LRQ(*LI, UseIdx); + if (!LRQ.valueIn()) { + report("No live range at use", MO, MONum); + *OS << UseIdx << " is not live in " << PrintRegUnit(*Units, TRI) + << ' ' << *LI << '\n'; + } + if (MO->isKill() && !LRQ.isKill()) { + report("Live range continues after kill flag", MO, MONum); + *OS << PrintRegUnit(*Units, TRI) << ' ' << *LI << '\n'; + } + } } - // Check for extra kill flags. - // Note that we allow missing kill flags for now. - if (MO->isKill() && !LI.killedAt(UseIdx.getRegSlot())) { - report("Live range continues after kill flag", MO, MONum); - *OS << "Live range: " << LI << '\n'; + } + + if (TargetRegisterInfo::isVirtualRegister(Reg)) { + if (LiveInts->hasInterval(Reg)) { + // This is a virtual register interval. + const LiveInterval &LI = LiveInts->getInterval(Reg); + LiveRangeQuery LRQ(LI, UseIdx); + if (!LRQ.valueIn()) { + report("No live range at use", MO, MONum); + *OS << UseIdx << " is not live in " << LI << '\n'; + } + // Check for extra kill flags. + // Note that we allow missing kill flags for now. + if (MO->isKill() && !LRQ.isKill()) { + report("Live range continues after kill flag", MO, MONum); + *OS << "Live range: " << LI << '\n'; + } + } else { + report("Virtual register has no live interval", MO, MONum); } - } else { - report("Virtual register has no Live interval", MO, MONum); } } @@ -1124,281 +1225,282 @@ void MachineVerifier::verifyLiveIntervals() { const LiveInterval &LI = LiveInts->getInterval(Reg); assert(Reg == LI.reg && "Invalid reg to interval mapping"); + verifyLiveInterval(LI); + } - for (LiveInterval::const_vni_iterator I = LI.vni_begin(), E = LI.vni_end(); - I!=E; ++I) { - VNInfo *VNI = *I; - const VNInfo *DefVNI = LI.getVNInfoAt(VNI->def); + // Verify all the cached regunit intervals. + for (unsigned i = 0, e = TRI->getNumRegUnits(); i != e; ++i) + if (const LiveInterval *LI = LiveInts->getCachedRegUnit(i)) + verifyLiveInterval(*LI); +} - if (!DefVNI) { - if (!VNI->isUnused()) { - report("Valno not live at def and not marked unused", MF); - *OS << "Valno #" << VNI->id << " in " << LI << '\n'; - } - continue; - } +void MachineVerifier::verifyLiveIntervalValue(const LiveInterval &LI, + VNInfo *VNI) { + if (VNI->isUnused()) + return; - if (VNI->isUnused()) - continue; + const VNInfo *DefVNI = LI.getVNInfoAt(VNI->def); - if (DefVNI != VNI) { - report("Live range at def has different valno", MF); - *OS << "Valno #" << VNI->id << " is defined at " << VNI->def - << " where valno #" << DefVNI->id << " is live in " << LI << '\n'; - continue; - } + if (!DefVNI) { + report("Valno not live at def and not marked unused", MF, LI); + *OS << "Valno #" << VNI->id << '\n'; + return; + } - const MachineBasicBlock *MBB = LiveInts->getMBBFromIndex(VNI->def); - if (!MBB) { - report("Invalid definition index", MF); - *OS << "Valno #" << VNI->id << " is defined at " << VNI->def - << " in " << LI << '\n'; - continue; - } + if (DefVNI != VNI) { + report("Live range at def has different valno", MF, LI); + *OS << "Valno #" << VNI->id << " is defined at " << VNI->def + << " where valno #" << DefVNI->id << " is live\n"; + return; + } - if (VNI->isPHIDef()) { - if (VNI->def != LiveInts->getMBBStartIdx(MBB)) { - report("PHIDef value is not defined at MBB start", MF); - *OS << "Valno #" << VNI->id << " is defined at " << VNI->def - << ", not at the beginning of BB#" << MBB->getNumber() - << " in " << LI << '\n'; - } - } else { - // Non-PHI def. - const MachineInstr *MI = LiveInts->getInstructionFromIndex(VNI->def); - if (!MI) { - report("No instruction at def index", MF); - *OS << "Valno #" << VNI->id << " is defined at " << VNI->def - << " in " << LI << '\n'; - continue; - } + const MachineBasicBlock *MBB = LiveInts->getMBBFromIndex(VNI->def); + if (!MBB) { + report("Invalid definition index", MF, LI); + *OS << "Valno #" << VNI->id << " is defined at " << VNI->def + << " in " << LI << '\n'; + return; + } - bool hasDef = false; - bool isEarlyClobber = false; - for (ConstMIBundleOperands MOI(MI); MOI.isValid(); ++MOI) { - if (!MOI->isReg() || !MOI->isDef()) - continue; - if (TargetRegisterInfo::isVirtualRegister(LI.reg)) { - if (MOI->getReg() != LI.reg) - continue; - } else { - if (!TargetRegisterInfo::isPhysicalRegister(MOI->getReg()) || - !TRI->regsOverlap(LI.reg, MOI->getReg())) - continue; - } - hasDef = true; - if (MOI->isEarlyClobber()) - isEarlyClobber = true; - } + if (VNI->isPHIDef()) { + if (VNI->def != LiveInts->getMBBStartIdx(MBB)) { + report("PHIDef value is not defined at MBB start", MBB, LI); + *OS << "Valno #" << VNI->id << " is defined at " << VNI->def + << ", not at the beginning of BB#" << MBB->getNumber() << '\n'; + } + return; + } - if (!hasDef) { - report("Defining instruction does not modify register", MI); - *OS << "Valno #" << VNI->id << " in " << LI << '\n'; - } + // Non-PHI def. + const MachineInstr *MI = LiveInts->getInstructionFromIndex(VNI->def); + if (!MI) { + report("No instruction at def index", MBB, LI); + *OS << "Valno #" << VNI->id << " is defined at " << VNI->def << '\n'; + return; + } - // Early clobber defs begin at USE slots, but other defs must begin at - // DEF slots. - if (isEarlyClobber) { - if (!VNI->def.isEarlyClobber()) { - report("Early clobber def must be at an early-clobber slot", MF); - *OS << "Valno #" << VNI->id << " is defined at " << VNI->def - << " in " << LI << '\n'; - } - } else if (!VNI->def.isRegister()) { - report("Non-PHI, non-early clobber def must be at a register slot", - MF); - *OS << "Valno #" << VNI->id << " is defined at " << VNI->def - << " in " << LI << '\n'; - } - } + bool hasDef = false; + bool isEarlyClobber = false; + for (ConstMIBundleOperands MOI(MI); MOI.isValid(); ++MOI) { + if (!MOI->isReg() || !MOI->isDef()) + continue; + if (TargetRegisterInfo::isVirtualRegister(LI.reg)) { + if (MOI->getReg() != LI.reg) + continue; + } else { + if (!TargetRegisterInfo::isPhysicalRegister(MOI->getReg()) || + !TRI->hasRegUnit(MOI->getReg(), LI.reg)) + continue; } + hasDef = true; + if (MOI->isEarlyClobber()) + isEarlyClobber = true; + } - for (LiveInterval::const_iterator I = LI.begin(), E = LI.end(); I!=E; ++I) { - const VNInfo *VNI = I->valno; - assert(VNI && "Live range has no valno"); + if (!hasDef) { + report("Defining instruction does not modify register", MI); + *OS << "Valno #" << VNI->id << " in " << LI << '\n'; + } - if (VNI->id >= LI.getNumValNums() || VNI != LI.getValNumInfo(VNI->id)) { - report("Foreign valno in live range", MF); - I->print(*OS); - *OS << " has a valno not in " << LI << '\n'; - } + // Early clobber defs begin at USE slots, but other defs must begin at + // DEF slots. + if (isEarlyClobber) { + if (!VNI->def.isEarlyClobber()) { + report("Early clobber def must be at an early-clobber slot", MBB, LI); + *OS << "Valno #" << VNI->id << " is defined at " << VNI->def << '\n'; + } + } else if (!VNI->def.isRegister()) { + report("Non-PHI, non-early clobber def must be at a register slot", + MBB, LI); + *OS << "Valno #" << VNI->id << " is defined at " << VNI->def << '\n'; + } +} - if (VNI->isUnused()) { - report("Live range valno is marked unused", MF); - I->print(*OS); - *OS << " in " << LI << '\n'; - } +void +MachineVerifier::verifyLiveIntervalSegment(const LiveInterval &LI, + LiveInterval::const_iterator I) { + const VNInfo *VNI = I->valno; + assert(VNI && "Live range has no valno"); + + if (VNI->id >= LI.getNumValNums() || VNI != LI.getValNumInfo(VNI->id)) { + report("Foreign valno in live range", MF, LI); + *OS << *I << " has a bad valno\n"; + } - const MachineBasicBlock *MBB = LiveInts->getMBBFromIndex(I->start); - if (!MBB) { - report("Bad start of live segment, no basic block", MF); - I->print(*OS); - *OS << " in " << LI << '\n'; - continue; - } - SlotIndex MBBStartIdx = LiveInts->getMBBStartIdx(MBB); - if (I->start != MBBStartIdx && I->start != VNI->def) { - report("Live segment must begin at MBB entry or valno def", MBB); - I->print(*OS); - *OS << " in " << LI << '\n' << "Basic block starts at " - << MBBStartIdx << '\n'; - } + if (VNI->isUnused()) { + report("Live range valno is marked unused", MF, LI); + *OS << *I << '\n'; + } - const MachineBasicBlock *EndMBB = - LiveInts->getMBBFromIndex(I->end.getPrevSlot()); - if (!EndMBB) { - report("Bad end of live segment, no basic block", MF); - I->print(*OS); - *OS << " in " << LI << '\n'; - continue; - } + const MachineBasicBlock *MBB = LiveInts->getMBBFromIndex(I->start); + if (!MBB) { + report("Bad start of live segment, no basic block", MF, LI); + *OS << *I << '\n'; + return; + } + SlotIndex MBBStartIdx = LiveInts->getMBBStartIdx(MBB); + if (I->start != MBBStartIdx && I->start != VNI->def) { + report("Live segment must begin at MBB entry or valno def", MBB, LI); + *OS << *I << '\n'; + } - // No more checks for live-out segments. - if (I->end == LiveInts->getMBBEndIdx(EndMBB)) - continue; + const MachineBasicBlock *EndMBB = + LiveInts->getMBBFromIndex(I->end.getPrevSlot()); + if (!EndMBB) { + report("Bad end of live segment, no basic block", MF, LI); + *OS << *I << '\n'; + return; + } - // The live segment is ending inside EndMBB - const MachineInstr *MI = - LiveInts->getInstructionFromIndex(I->end.getPrevSlot()); - if (!MI) { - report("Live segment doesn't end at a valid instruction", EndMBB); - I->print(*OS); - *OS << " in " << LI << '\n' << "Basic block starts at " - << MBBStartIdx << '\n'; + // No more checks for live-out segments. + if (I->end == LiveInts->getMBBEndIdx(EndMBB)) + return; + + // RegUnit intervals are allowed dead phis. + if (!TargetRegisterInfo::isVirtualRegister(LI.reg) && VNI->isPHIDef() && + I->start == VNI->def && I->end == VNI->def.getDeadSlot()) + return; + + // The live segment is ending inside EndMBB + const MachineInstr *MI = + LiveInts->getInstructionFromIndex(I->end.getPrevSlot()); + if (!MI) { + report("Live segment doesn't end at a valid instruction", EndMBB, LI); + *OS << *I << '\n'; + return; + } + + // The block slot must refer to a basic block boundary. + if (I->end.isBlock()) { + report("Live segment ends at B slot of an instruction", EndMBB, LI); + *OS << *I << '\n'; + } + + if (I->end.isDead()) { + // Segment ends on the dead slot. + // That means there must be a dead def. + if (!SlotIndex::isSameInstr(I->start, I->end)) { + report("Live segment ending at dead slot spans instructions", EndMBB, LI); + *OS << *I << '\n'; + } + } + + // A live segment can only end at an early-clobber slot if it is being + // redefined by an early-clobber def. + if (I->end.isEarlyClobber()) { + if (I+1 == LI.end() || (I+1)->start != I->end) { + report("Live segment ending at early clobber slot must be " + "redefined by an EC def in the same instruction", EndMBB, LI); + *OS << *I << '\n'; + } + } + + // The following checks only apply to virtual registers. Physreg liveness + // is too weird to check. + if (TargetRegisterInfo::isVirtualRegister(LI.reg)) { + // A live range can end with either a redefinition, a kill flag on a + // use, or a dead flag on a def. + bool hasRead = false; + bool hasDeadDef = false; + for (ConstMIBundleOperands MOI(MI); MOI.isValid(); ++MOI) { + if (!MOI->isReg() || MOI->getReg() != LI.reg) continue; - } + if (MOI->readsReg()) + hasRead = true; + if (MOI->isDef() && MOI->isDead()) + hasDeadDef = true; + } - // The block slot must refer to a basic block boundary. - if (I->end.isBlock()) { - report("Live segment ends at B slot of an instruction", MI); + if (I->end.isDead()) { + if (!hasDeadDef) { + report("Instruction doesn't have a dead def operand", MI); I->print(*OS); *OS << " in " << LI << '\n'; } - - if (I->end.isDead()) { - // Segment ends on the dead slot. - // That means there must be a dead def. - if (!SlotIndex::isSameInstr(I->start, I->end)) { - report("Live segment ending at dead slot spans instructions", MI); - I->print(*OS); - *OS << " in " << LI << '\n'; - } - } - - // A live segment can only end at an early-clobber slot if it is being - // redefined by an early-clobber def. - if (I->end.isEarlyClobber()) { - if (I+1 == E || (I+1)->start != I->end) { - report("Live segment ending at early clobber slot must be " - "redefined by an EC def in the same instruction", MI); - I->print(*OS); - *OS << " in " << LI << '\n'; - } + } else { + if (!hasRead) { + report("Instruction ending live range doesn't read the register", MI); + *OS << *I << " in " << LI << '\n'; } + } + } - // The following checks only apply to virtual registers. Physreg liveness - // is too weird to check. - if (TargetRegisterInfo::isVirtualRegister(LI.reg)) { - // A live range can end with either a redefinition, a kill flag on a - // use, or a dead flag on a def. - bool hasRead = false; - bool hasDeadDef = false; - for (ConstMIBundleOperands MOI(MI); MOI.isValid(); ++MOI) { - if (!MOI->isReg() || MOI->getReg() != LI.reg) - continue; - if (MOI->readsReg()) - hasRead = true; - if (MOI->isDef() && MOI->isDead()) - hasDeadDef = true; - } - - if (I->end.isDead()) { - if (!hasDeadDef) { - report("Instruction doesn't have a dead def operand", MI); - I->print(*OS); - *OS << " in " << LI << '\n'; - } - } else { - if (!hasRead) { - report("Instruction ending live range doesn't read the register", - MI); - I->print(*OS); - *OS << " in " << LI << '\n'; - } - } - } + // Now check all the basic blocks in this live segment. + MachineFunction::const_iterator MFI = MBB; + // Is this live range the beginning of a non-PHIDef VN? + if (I->start == VNI->def && !VNI->isPHIDef()) { + // Not live-in to any blocks. + if (MBB == EndMBB) + return; + // Skip this block. + ++MFI; + } + for (;;) { + assert(LiveInts->isLiveInToMBB(LI, MFI)); + // We don't know how to track physregs into a landing pad. + if (!TargetRegisterInfo::isVirtualRegister(LI.reg) && + MFI->isLandingPad()) { + if (&*MFI == EndMBB) + break; + ++MFI; + continue; + } - // Now check all the basic blocks in this live segment. - MachineFunction::const_iterator MFI = MBB; - // Is this live range the beginning of a non-PHIDef VN? - if (I->start == VNI->def && !VNI->isPHIDef()) { - // Not live-in to any blocks. - if (MBB == EndMBB) - continue; - // Skip this block. - ++MFI; + // Is VNI a PHI-def in the current block? + bool IsPHI = VNI->isPHIDef() && + VNI->def == LiveInts->getMBBStartIdx(MFI); + + // Check that VNI is live-out of all predecessors. + for (MachineBasicBlock::const_pred_iterator PI = MFI->pred_begin(), + PE = MFI->pred_end(); PI != PE; ++PI) { + SlotIndex PEnd = LiveInts->getMBBEndIdx(*PI); + const VNInfo *PVNI = LI.getVNInfoBefore(PEnd); + + // All predecessors must have a live-out value. + if (!PVNI) { + report("Register not marked live out of predecessor", *PI, LI); + *OS << "Valno #" << VNI->id << " live into BB#" << MFI->getNumber() + << '@' << LiveInts->getMBBStartIdx(MFI) << ", not live before " + << PEnd << '\n'; + continue; } - for (;;) { - assert(LiveInts->isLiveInToMBB(LI, MFI)); - // We don't know how to track physregs into a landing pad. - if (TargetRegisterInfo::isPhysicalRegister(LI.reg) && - MFI->isLandingPad()) { - if (&*MFI == EndMBB) - break; - ++MFI; - continue; - } - // Is VNI a PHI-def in the current block? - bool IsPHI = VNI->isPHIDef() && - VNI->def == LiveInts->getMBBStartIdx(MFI); - - // Check that VNI is live-out of all predecessors. - for (MachineBasicBlock::const_pred_iterator PI = MFI->pred_begin(), - PE = MFI->pred_end(); PI != PE; ++PI) { - SlotIndex PEnd = LiveInts->getMBBEndIdx(*PI); - const VNInfo *PVNI = LI.getVNInfoBefore(PEnd); - - // All predecessors must have a live-out value. - if (!PVNI) { - report("Register not marked live out of predecessor", *PI); - *OS << "Valno #" << VNI->id << " live into BB#" << MFI->getNumber() - << '@' << LiveInts->getMBBStartIdx(MFI) << ", not live before " - << PEnd << " in " << LI << '\n'; - continue; - } - - // Only PHI-defs can take different predecessor values. - if (!IsPHI && PVNI != VNI) { - report("Different value live out of predecessor", *PI); - *OS << "Valno #" << PVNI->id << " live out of BB#" - << (*PI)->getNumber() << '@' << PEnd - << "\nValno #" << VNI->id << " live into BB#" << MFI->getNumber() - << '@' << LiveInts->getMBBStartIdx(MFI) << " in " - << PrintReg(Reg) << ": " << LI << '\n'; - } - } - if (&*MFI == EndMBB) - break; - ++MFI; + // Only PHI-defs can take different predecessor values. + if (!IsPHI && PVNI != VNI) { + report("Different value live out of predecessor", *PI, LI); + *OS << "Valno #" << PVNI->id << " live out of BB#" + << (*PI)->getNumber() << '@' << PEnd + << "\nValno #" << VNI->id << " live into BB#" << MFI->getNumber() + << '@' << LiveInts->getMBBStartIdx(MFI) << '\n'; } } + if (&*MFI == EndMBB) + break; + ++MFI; + } +} - // Check the LI only has one connected component. - if (TargetRegisterInfo::isVirtualRegister(LI.reg)) { - ConnectedVNInfoEqClasses ConEQ(*LiveInts); - unsigned NumComp = ConEQ.Classify(&LI); - if (NumComp > 1) { - report("Multiple connected components in live interval", MF); - *OS << NumComp << " components in " << LI << '\n'; - for (unsigned comp = 0; comp != NumComp; ++comp) { - *OS << comp << ": valnos"; - for (LiveInterval::const_vni_iterator I = LI.vni_begin(), - E = LI.vni_end(); I!=E; ++I) - if (comp == ConEQ.getEqClass(*I)) - *OS << ' ' << (*I)->id; - *OS << '\n'; - } +void MachineVerifier::verifyLiveInterval(const LiveInterval &LI) { + for (LiveInterval::const_vni_iterator I = LI.vni_begin(), E = LI.vni_end(); + I!=E; ++I) + verifyLiveIntervalValue(LI, *I); + + for (LiveInterval::const_iterator I = LI.begin(), E = LI.end(); I!=E; ++I) + verifyLiveIntervalSegment(LI, I); + + // Check the LI only has one connected component. + if (TargetRegisterInfo::isVirtualRegister(LI.reg)) { + ConnectedVNInfoEqClasses ConEQ(*LiveInts); + unsigned NumComp = ConEQ.Classify(&LI); + if (NumComp > 1) { + report("Multiple connected components in live interval", MF, LI); + for (unsigned comp = 0; comp != NumComp; ++comp) { + *OS << comp << ": valnos"; + for (LiveInterval::const_vni_iterator I = LI.vni_begin(), + E = LI.vni_end(); I!=E; ++I) + if (comp == ConEQ.getEqClass(*I)) + *OS << ' ' << (*I)->id; + *OS << '\n'; } } } diff --git a/lib/CodeGen/Passes.cpp b/lib/CodeGen/Passes.cpp index 69d6d00..56526f2 100644 --- a/lib/CodeGen/Passes.cpp +++ b/lib/CodeGen/Passes.cpp @@ -88,6 +88,10 @@ PrintMachineInstrs("print-machineinstrs", cl::ValueOptional, cl::desc("Print machine instrs"), cl::value_desc("pass-name"), cl::init("option-unspecified")); +// Experimental option to run live inteerval analysis early. +static cl::opt<bool> EarlyLiveIntervals("early-live-intervals", cl::Hidden, + cl::desc("Run live interval analysis earlier in the pipeline")); + /// Allow standard passes to be disabled by command line options. This supports /// simple binary flags that either suppress the pass or do nothing. /// i.e. -disable-mypass=false has no effect. @@ -452,7 +456,8 @@ void TargetPassConfig::addMachinePasses() { printAndVerify("After Instruction Selection"); // Expand pseudo-instructions emitted by ISel. - addPass(&ExpandISelPseudosID); + if (addPass(&ExpandISelPseudosID)) + printAndVerify("After ExpandISelPseudos"); // Add passes that optimize machine instructions in SSA form. if (getOptLevel() != CodeGenOpt::None) { @@ -648,6 +653,11 @@ void TargetPassConfig::addOptimizedRegAlloc(FunctionPass *RegAllocPass) { addPass(&MachineLoopInfoID); addPass(&PHIEliminationID); } + + // Eventually, we want to run LiveIntervals before PHI elimination. + if (EarlyLiveIntervals) + addPass(&LiveIntervalsID); + addPass(&TwoAddressInstructionPassID); if (EnableStrongPHIElim) diff --git a/lib/CodeGen/PeepholeOptimizer.cpp b/lib/CodeGen/PeepholeOptimizer.cpp index 91c33c4..9099862 100644 --- a/lib/CodeGen/PeepholeOptimizer.cpp +++ b/lib/CodeGen/PeepholeOptimizer.cpp @@ -78,6 +78,8 @@ STATISTIC(NumReuse, "Number of extension results reused"); STATISTIC(NumBitcasts, "Number of bitcasts eliminated"); STATISTIC(NumCmps, "Number of compares eliminated"); STATISTIC(NumImmFold, "Number of move immediate folded"); +STATISTIC(NumLoadFold, "Number of loads folded"); +STATISTIC(NumSelects, "Number of selects optimized"); namespace { class PeepholeOptimizer : public MachineFunctionPass { @@ -108,12 +110,14 @@ namespace { bool optimizeCmpInstr(MachineInstr *MI, MachineBasicBlock *MBB); bool optimizeExtInstr(MachineInstr *MI, MachineBasicBlock *MBB, SmallPtrSet<MachineInstr*, 8> &LocalMIs); + bool optimizeSelect(MachineInstr *MI); bool isMoveImmediate(MachineInstr *MI, SmallSet<unsigned, 4> &ImmDefRegs, DenseMap<unsigned, MachineInstr*> &ImmDefMIs); bool foldImmediate(MachineInstr *MI, MachineBasicBlock *MBB, SmallSet<unsigned, 4> &ImmDefRegs, DenseMap<unsigned, MachineInstr*> &ImmDefMIs); + bool isLoadFoldable(MachineInstr *MI, unsigned &FoldAsLoadDefReg); }; } @@ -384,6 +388,47 @@ bool PeepholeOptimizer::optimizeCmpInstr(MachineInstr *MI, return false; } +/// Optimize a select instruction. +bool PeepholeOptimizer::optimizeSelect(MachineInstr *MI) { + unsigned TrueOp = 0; + unsigned FalseOp = 0; + bool Optimizable = false; + SmallVector<MachineOperand, 4> Cond; + if (TII->analyzeSelect(MI, Cond, TrueOp, FalseOp, Optimizable)) + return false; + if (!Optimizable) + return false; + if (!TII->optimizeSelect(MI)) + return false; + MI->eraseFromParent(); + ++NumSelects; + return true; +} + +/// isLoadFoldable - Check whether MI is a candidate for folding into a later +/// instruction. We only fold loads to virtual registers and the virtual +/// register defined has a single use. +bool PeepholeOptimizer::isLoadFoldable(MachineInstr *MI, + unsigned &FoldAsLoadDefReg) { + if (!MI->canFoldAsLoad() || !MI->mayLoad()) + return false; + const MCInstrDesc &MCID = MI->getDesc(); + if (MCID.getNumDefs() != 1) + return false; + + unsigned Reg = MI->getOperand(0).getReg(); + // To reduce compilation time, we check MRI->hasOneUse when inserting + // loads. It should be checked when processing uses of the load, since + // uses can be removed during peephole. + if (!MI->getOperand(0).getSubReg() && + TargetRegisterInfo::isVirtualRegister(Reg) && + MRI->hasOneUse(Reg)) { + FoldAsLoadDefReg = Reg; + return true; + } + return false; +} + bool PeepholeOptimizer::isMoveImmediate(MachineInstr *MI, SmallSet<unsigned, 4> &ImmDefRegs, DenseMap<unsigned, MachineInstr*> &ImmDefMIs) { @@ -441,6 +486,7 @@ bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) { SmallPtrSet<MachineInstr*, 8> LocalMIs; SmallSet<unsigned, 4> ImmDefRegs; DenseMap<unsigned, MachineInstr*> ImmDefMIs; + unsigned FoldAsLoadDefReg; for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) { MachineBasicBlock *MBB = &*I; @@ -448,37 +494,33 @@ bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) { LocalMIs.clear(); ImmDefRegs.clear(); ImmDefMIs.clear(); + FoldAsLoadDefReg = 0; - bool First = true; - MachineBasicBlock::iterator PMII; for (MachineBasicBlock::iterator MII = I->begin(), MIE = I->end(); MII != MIE; ) { MachineInstr *MI = &*MII; + // We may be erasing MI below, increment MII now. + ++MII; LocalMIs.insert(MI); + // If there exists an instruction which belongs to the following + // categories, we will discard the load candidate. if (MI->isLabel() || MI->isPHI() || MI->isImplicitDef() || MI->isKill() || MI->isInlineAsm() || MI->isDebugValue() || MI->hasUnmodeledSideEffects()) { - ++MII; + FoldAsLoadDefReg = 0; continue; } - - if (MI->isBitcast()) { - if (optimizeBitcastInstr(MI, MBB)) { - // MI is deleted. - LocalMIs.erase(MI); - Changed = true; - MII = First ? I->begin() : llvm::next(PMII); - continue; - } - } else if (MI->isCompare()) { - if (optimizeCmpInstr(MI, MBB)) { - // MI is deleted. - LocalMIs.erase(MI); - Changed = true; - MII = First ? I->begin() : llvm::next(PMII); - continue; - } + if (MI->mayStore() || MI->isCall()) + FoldAsLoadDefReg = 0; + + if ((MI->isBitcast() && optimizeBitcastInstr(MI, MBB)) || + (MI->isCompare() && optimizeCmpInstr(MI, MBB)) || + (MI->isSelect() && optimizeSelect(MI))) { + // MI is deleted. + LocalMIs.erase(MI); + Changed = true; + continue; } if (isMoveImmediate(MI, ImmDefRegs, ImmDefMIs)) { @@ -489,9 +531,29 @@ bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) { Changed |= foldImmediate(MI, MBB, ImmDefRegs, ImmDefMIs); } - First = false; - PMII = MII; - ++MII; + // Check whether MI is a load candidate for folding into a later + // instruction. If MI is not a candidate, check whether we can fold an + // earlier load into MI. + if (!isLoadFoldable(MI, FoldAsLoadDefReg) && FoldAsLoadDefReg) { + // We need to fold load after optimizeCmpInstr, since optimizeCmpInstr + // can enable folding by converting SUB to CMP. + MachineInstr *DefMI = 0; + MachineInstr *FoldMI = TII->optimizeLoadInstr(MI, MRI, + FoldAsLoadDefReg, DefMI); + if (FoldMI) { + // Update LocalMIs since we replaced MI with FoldMI and deleted DefMI. + LocalMIs.erase(MI); + LocalMIs.erase(DefMI); + LocalMIs.insert(FoldMI); + MI->eraseFromParent(); + DefMI->eraseFromParent(); + ++NumLoadFold; + + // MI is replaced with FoldMI. + Changed = true; + continue; + } + } } } diff --git a/lib/CodeGen/RegAllocFast.cpp b/lib/CodeGen/RegAllocFast.cpp index 8325f20..6b3a48e 100644 --- a/lib/CodeGen/RegAllocFast.cpp +++ b/lib/CodeGen/RegAllocFast.cpp @@ -201,20 +201,16 @@ int RAFast::getStackSpaceFor(unsigned VirtReg, const TargetRegisterClass *RC) { /// its virtual register, and it is guaranteed to be a block-local register. /// bool RAFast::isLastUseOfLocalReg(MachineOperand &MO) { - // Check for non-debug uses or defs following MO. - // This is the most likely way to fail - fast path it. - MachineOperand *Next = &MO; - while ((Next = Next->getNextOperandForReg())) - if (!Next->isDebug()) - return false; - // If the register has ever been spilled or reloaded, we conservatively assume // it is a global register used in multiple blocks. if (StackSlotForVirtReg[MO.getReg()] != -1) return false; // Check that the use/def chain has exactly one operand - MO. - return &MRI->reg_nodbg_begin(MO.getReg()).getOperand() == &MO; + MachineRegisterInfo::reg_nodbg_iterator I = MRI->reg_nodbg_begin(MO.getReg()); + if (&I.getOperand() != &MO) + return false; + return ++I == MRI->reg_nodbg_end(); } /// addKillFlag - Set kill flags on last use of a virtual register. diff --git a/lib/CodeGen/RegAllocGreedy.cpp b/lib/CodeGen/RegAllocGreedy.cpp index 6ac5428..d0cff48 100644 --- a/lib/CodeGen/RegAllocGreedy.cpp +++ b/lib/CodeGen/RegAllocGreedy.cpp @@ -1747,7 +1747,7 @@ unsigned RAGreedy::selectOrSplit(LiveInterval &VirtReg, bool RAGreedy::runOnMachineFunction(MachineFunction &mf) { DEBUG(dbgs() << "********** GREEDY REGISTER ALLOCATION **********\n" << "********** Function: " - << ((Value*)mf.getFunction())->getName() << '\n'); + << mf.getFunction()->getName() << '\n'); MF = &mf; if (VerifyEnabled) diff --git a/lib/CodeGen/RegisterCoalescer.cpp b/lib/CodeGen/RegisterCoalescer.cpp index 733312f..9906334 100644 --- a/lib/CodeGen/RegisterCoalescer.cpp +++ b/lib/CodeGen/RegisterCoalescer.cpp @@ -460,14 +460,8 @@ bool RegisterCoalescer::adjustCopiesBackFrom(const CoalescerPair &CP, IntB.addRange(LiveRange(FillerStart, FillerEnd, BValNo)); // Okay, merge "B1" into the same value number as "B0". - if (BValNo != ValLR->valno) { - // If B1 is killed by a PHI, then the merged live range must also be killed - // by the same PHI, as B0 and B1 can not overlap. - bool HasPHIKill = BValNo->hasPHIKill(); + if (BValNo != ValLR->valno) IntB.MergeValueNumberInto(BValNo, ValLR->valno); - if (HasPHIKill) - ValLR->valno->setHasPHIKill(true); - } DEBUG(dbgs() << " result = " << IntB << '\n'); // If the source instruction was killing the source register before the @@ -494,6 +488,11 @@ bool RegisterCoalescer::hasOtherReachingDefs(LiveInterval &IntA, LiveInterval &IntB, VNInfo *AValNo, VNInfo *BValNo) { + // If AValNo has PHI kills, conservatively assume that IntB defs can reach + // the PHI values. + if (LIS->hasPHIKill(IntA, AValNo)) + return true; + for (LiveInterval::iterator AI = IntA.begin(), AE = IntA.end(); AI != AE; ++AI) { if (AI->valno != AValNo) continue; @@ -558,10 +557,7 @@ bool RegisterCoalescer::removeCopyByCommutingDef(const CoalescerPair &CP, // AValNo is the value number in A that defines the copy, A3 in the example. VNInfo *AValNo = IntA.getVNInfoAt(CopyIdx.getRegSlot(true)); assert(AValNo && "COPY source not live"); - - // If other defs can reach uses of this def, then it's not safe to perform - // the optimization. - if (AValNo->isPHIDef() || AValNo->isUnused() || AValNo->hasPHIKill()) + if (AValNo->isPHIDef() || AValNo->isUnused()) return false; MachineInstr *DefMI = LIS->getInstructionFromIndex(AValNo->def); if (!DefMI) @@ -657,6 +653,8 @@ bool RegisterCoalescer::removeCopyByCommutingDef(const CoalescerPair &CP, LiveInterval::iterator ULR = IntA.FindLiveRangeContaining(UseIdx); if (ULR == IntA.end() || ULR->valno != AValNo) continue; + // Kill flags are no longer accurate. They are recomputed after RA. + UseMO.setIsKill(false); if (TargetRegisterInfo::isPhysicalRegister(NewReg)) UseMO.substPhysReg(NewReg, *TRI); else @@ -1093,6 +1091,11 @@ bool RegisterCoalescer::joinReservedPhysReg(CoalescerPair &CP) { // register live range doesn't need to be accurate as long as all the // defs are there. + // Delete the identity copy. + MachineInstr *CopyMI = MRI->getVRegDef(RHS.reg); + LIS->RemoveMachineInstrFromMaps(CopyMI); + CopyMI->eraseFromParent(); + // We don't track kills for reserved registers. MRI->clearKillFlags(CP.getSrcReg()); @@ -1382,24 +1385,6 @@ bool RegisterCoalescer::joinIntervals(CoalescerPair &CP) { ++J; } - // Update kill info. Some live ranges are extended due to copy coalescing. - for (DenseMap<VNInfo*, VNInfo*>::iterator I = LHSValsDefinedFromRHS.begin(), - E = LHSValsDefinedFromRHS.end(); I != E; ++I) { - VNInfo *VNI = I->first; - unsigned LHSValID = LHSValNoAssignments[VNI->id]; - if (VNI->hasPHIKill()) - NewVNInfo[LHSValID]->setHasPHIKill(true); - } - - // Update kill info. Some live ranges are extended due to copy coalescing. - for (DenseMap<VNInfo*, VNInfo*>::iterator I = RHSValsDefinedFromLHS.begin(), - E = RHSValsDefinedFromLHS.end(); I != E; ++I) { - VNInfo *VNI = I->first; - unsigned RHSValID = RHSValNoAssignments[VNI->id]; - if (VNI->hasPHIKill()) - NewVNInfo[RHSValID]->setHasPHIKill(true); - } - // Clear kill flags where live ranges are extended. while (!LHSOldKills.empty()) LHSOldKills.pop_back_val()->clearRegisterKills(LHS.reg, TRI); diff --git a/lib/CodeGen/ScheduleDAGInstrs.cpp b/lib/CodeGen/ScheduleDAGInstrs.cpp index 110f478..9c1dba3 100644 --- a/lib/CodeGen/ScheduleDAGInstrs.cpp +++ b/lib/CodeGen/ScheduleDAGInstrs.cpp @@ -411,12 +411,11 @@ void ScheduleDAGInstrs::addVRegDefDeps(SUnit *SU, unsigned OperIdx) { const MachineInstr *MI = SU->getInstr(); unsigned Reg = MI->getOperand(OperIdx).getReg(); - // SSA defs do not have output/anti dependencies. + // Singly defined vregs do not have output/anti dependencies. // The current operand is a def, so we have at least one. - // - // FIXME: This optimization is disabled pending PR13112. - //if (llvm::next(MRI.def_begin(Reg)) == MRI.def_end()) - // return; + // Check here if there are any others... + if (MRI.hasOneDef(Reg)) + return; // Add output dependence to the next nearest def of this vreg. // diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 747bc44..1c485a0 100644 --- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -228,6 +228,9 @@ namespace { SDValue visitFP_EXTEND(SDNode *N); SDValue visitFNEG(SDNode *N); SDValue visitFABS(SDNode *N); + SDValue visitFCEIL(SDNode *N); + SDValue visitFTRUNC(SDNode *N); + SDValue visitFFLOOR(SDNode *N); SDValue visitBRCOND(SDNode *N); SDValue visitBR_CC(SDNode *N); SDValue visitLOAD(SDNode *N); @@ -1140,6 +1143,9 @@ SDValue DAGCombiner::visit(SDNode *N) { case ISD::FP_EXTEND: return visitFP_EXTEND(N); case ISD::FNEG: return visitFNEG(N); case ISD::FABS: return visitFABS(N); + case ISD::FFLOOR: return visitFFLOOR(N); + case ISD::FCEIL: return visitFCEIL(N); + case ISD::FTRUNC: return visitFTRUNC(N); case ISD::BRCOND: return visitBRCOND(N); case ISD::BR_CC: return visitBR_CC(N); case ISD::LOAD: return visitLOAD(N); @@ -5679,7 +5685,7 @@ SDValue DAGCombiner::visitFADD(SDNode *N) { if ((DAG.getTarget().Options.AllowFPOpFusion == FPOpFusion::Fast || DAG.getTarget().Options.UnsafeFPMath) && DAG.getTarget().getTargetLowering()->isFMAFasterThanMulAndAdd(VT) && - TLI.isOperationLegal(ISD::FMA, VT)) { + TLI.isOperationLegalOrCustom(ISD::FMA, VT)) { // fold (fadd (fmul x, y), z) -> (fma x, y, z) if (N0.getOpcode() == ISD::FMUL && N0->hasOneUse()) { @@ -5704,6 +5710,7 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) { ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0); ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1); EVT VT = N->getValueType(0); + DebugLoc dl = N->getDebugLoc(); // fold vector ops if (VT.isVector()) { @@ -5724,11 +5731,11 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) { if (isNegatibleForFree(N1, LegalOperations, TLI, &DAG.getTarget().Options)) return GetNegatedExpression(N1, DAG, LegalOperations); if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT)) - return DAG.getNode(ISD::FNEG, N->getDebugLoc(), VT, N1); + return DAG.getNode(ISD::FNEG, dl, VT, N1); } // fold (fsub A, (fneg B)) -> (fadd A, B) if (isNegatibleForFree(N1, LegalOperations, TLI, &DAG.getTarget().Options)) - return DAG.getNode(ISD::FADD, N->getDebugLoc(), VT, N0, + return DAG.getNode(ISD::FADD, dl, VT, N0, GetNegatedExpression(N1, DAG, LegalOperations)); // If 'unsafe math' is enabled, fold @@ -5756,23 +5763,34 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) { if ((DAG.getTarget().Options.AllowFPOpFusion == FPOpFusion::Fast || DAG.getTarget().Options.UnsafeFPMath) && DAG.getTarget().getTargetLowering()->isFMAFasterThanMulAndAdd(VT) && - TLI.isOperationLegal(ISD::FMA, VT)) { + TLI.isOperationLegalOrCustom(ISD::FMA, VT)) { // fold (fsub (fmul x, y), z) -> (fma x, y, (fneg z)) if (N0.getOpcode() == ISD::FMUL && N0->hasOneUse()) { - return DAG.getNode(ISD::FMA, N->getDebugLoc(), VT, + return DAG.getNode(ISD::FMA, dl, VT, N0.getOperand(0), N0.getOperand(1), - DAG.getNode(ISD::FNEG, N1->getDebugLoc(), VT, N1)); + DAG.getNode(ISD::FNEG, dl, VT, N1)); } // fold (fsub x, (fmul y, z)) -> (fma (fneg y), z, x) // Note: Commutes FSUB operands. if (N1.getOpcode() == ISD::FMUL && N1->hasOneUse()) { - return DAG.getNode(ISD::FMA, N->getDebugLoc(), VT, - DAG.getNode(ISD::FNEG, N1->getDebugLoc(), VT, + return DAG.getNode(ISD::FMA, dl, VT, + DAG.getNode(ISD::FNEG, dl, VT, N1.getOperand(0)), N1.getOperand(1), N0); } + + // fold (fsub (-(fmul, x, y)), z) -> (fma (fneg x), y, (fneg z)) + if (N0.getOpcode() == ISD::FNEG && + N0.getOperand(0).getOpcode() == ISD::FMUL && + N0->hasOneUse() && N0.getOperand(0).hasOneUse()) { + SDValue N00 = N0.getOperand(0).getOperand(0); + SDValue N01 = N0.getOperand(0).getOperand(1); + return DAG.getNode(ISD::FMA, dl, VT, + DAG.getNode(ISD::FNEG, dl, VT, N00), N01, + DAG.getNode(ISD::FNEG, dl, VT, N1)); + } } return SDValue(); @@ -6231,6 +6249,42 @@ SDValue DAGCombiner::visitFNEG(SDNode *N) { return SDValue(); } +SDValue DAGCombiner::visitFCEIL(SDNode *N) { + SDValue N0 = N->getOperand(0); + ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0); + EVT VT = N->getValueType(0); + + // fold (fceil c1) -> fceil(c1) + if (N0CFP && VT != MVT::ppcf128) + return DAG.getNode(ISD::FCEIL, N->getDebugLoc(), VT, N0); + + return SDValue(); +} + +SDValue DAGCombiner::visitFTRUNC(SDNode *N) { + SDValue N0 = N->getOperand(0); + ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0); + EVT VT = N->getValueType(0); + + // fold (ftrunc c1) -> ftrunc(c1) + if (N0CFP && VT != MVT::ppcf128) + return DAG.getNode(ISD::FTRUNC, N->getDebugLoc(), VT, N0); + + return SDValue(); +} + +SDValue DAGCombiner::visitFFLOOR(SDNode *N) { + SDValue N0 = N->getOperand(0); + ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0); + EVT VT = N->getValueType(0); + + // fold (ffloor c1) -> ffloor(c1) + if (N0CFP && VT != MVT::ppcf128) + return DAG.getNode(ISD::FFLOOR, N->getDebugLoc(), VT, N0); + + return SDValue(); +} + SDValue DAGCombiner::visitFABS(SDNode *N) { SDValue N0 = N->getOperand(0); ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0); @@ -7822,9 +7876,29 @@ SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) { if (VecIn1.getValueType().getSizeInBits()*2 != VT.getSizeInBits()) return SDValue(); - // Widen the input vector by adding undef values. - VecIn1 = DAG.getNode(ISD::CONCAT_VECTORS, N->getDebugLoc(), VT, - VecIn1, DAG.getUNDEF(VecIn1.getValueType())); + // If the element type of the input vector is not the same as + // the output element type, make concat_vectors based on input element + // type and then bitcast it to the output vector type. + // + // In another words avoid nodes like this: + // <NODE> v16i8 = concat_vectors v4i16 v4i16 + // Replace it with this one: + // <NODE0> v8i16 = concat_vectors v4i16 v4i16 + // <NODE1> v16i8 = bitcast NODE0 + EVT ItemType = VecIn1.getValueType().getVectorElementType(); + if (ItemType != VT.getVectorElementType()) { + EVT ConcatVT = EVT::getVectorVT(*DAG.getContext(), + ItemType, + VecIn1.getValueType().getVectorNumElements()*2); + // Widen the input vector by adding undef values. + VecIn1 = DAG.getNode(ISD::CONCAT_VECTORS, dl, ConcatVT, + VecIn1, DAG.getUNDEF(VecIn1.getValueType())); + VecIn1 = DAG.getNode(ISD::BITCAST, dl, VT, VecIn1); + } else + // Widen the input vector by adding undef values. + VecIn1 = DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, + VecIn1, DAG.getUNDEF(VecIn1.getValueType())); + } // If VecIn2 is unused then change it to undef. diff --git a/lib/CodeGen/SelectionDAG/FastISel.cpp b/lib/CodeGen/SelectionDAG/FastISel.cpp index e5ea6e6..683fac6 100644 --- a/lib/CodeGen/SelectionDAG/FastISel.cpp +++ b/lib/CodeGen/SelectionDAG/FastISel.cpp @@ -55,6 +55,7 @@ #include "llvm/Analysis/Loads.h" #include "llvm/Target/TargetData.h" #include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetLibraryInfo.h" #include "llvm/Target/TargetLowering.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Support/ErrorHandling.h" @@ -789,6 +790,17 @@ FastISel::SelectInstruction(const Instruction *I) { MachineBasicBlock::iterator SavedInsertPt = FuncInfo.InsertPt; + // As a special case, don't handle calls to builtin library functions that + // may be translated directly to target instructions. + if (const CallInst *Call = dyn_cast<CallInst>(I)) { + const Function *F = Call->getCalledFunction(); + LibFunc::Func Func; + if (F && !F->hasLocalLinkage() && F->hasName() && + LibInfo->getLibFunc(F->getName(), Func) && + LibInfo->hasOptimizedCodeGen(Func)) + return false; + } + // First, try doing target-independent selection. if (SelectOperator(I, I->getOpcode())) { ++NumFastIselSuccessIndependent; @@ -1040,7 +1052,8 @@ FastISel::SelectOperator(const User *I, unsigned Opcode) { } } -FastISel::FastISel(FunctionLoweringInfo &funcInfo) +FastISel::FastISel(FunctionLoweringInfo &funcInfo, + const TargetLibraryInfo *libInfo) : FuncInfo(funcInfo), MRI(FuncInfo.MF->getRegInfo()), MFI(*FuncInfo.MF->getFrameInfo()), @@ -1049,7 +1062,8 @@ FastISel::FastISel(FunctionLoweringInfo &funcInfo) TD(*TM.getTargetData()), TII(*TM.getInstrInfo()), TLI(*TM.getTargetLowering()), - TRI(*TM.getRegisterInfo()) { + TRI(*TM.getRegisterInfo()), + LibInfo(libInfo) { } FastISel::~FastISel() {} diff --git a/lib/CodeGen/SelectionDAG/InstrEmitter.cpp b/lib/CodeGen/SelectionDAG/InstrEmitter.cpp index 936c126..4488d27 100644 --- a/lib/CodeGen/SelectionDAG/InstrEmitter.cpp +++ b/lib/CodeGen/SelectionDAG/InstrEmitter.cpp @@ -411,6 +411,10 @@ void InstrEmitter::AddOperand(MachineInstr *MI, SDValue Op, } else if (BlockAddressSDNode *BA = dyn_cast<BlockAddressSDNode>(Op)) { MI->addOperand(MachineOperand::CreateBA(BA->getBlockAddress(), BA->getTargetFlags())); + } else if (TargetIndexSDNode *TI = dyn_cast<TargetIndexSDNode>(Op)) { + MI->addOperand(MachineOperand::CreateTargetIndex(TI->getIndex(), + TI->getOffset(), + TI->getTargetFlags())); } else { assert(Op.getValueType() != MVT::Other && Op.getValueType() != MVT::Glue && diff --git a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp index b0776af..908ebb9 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -428,7 +428,7 @@ ExpandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG, DebugLoc dl = LD->getDebugLoc(); if (VT.isFloatingPoint() || VT.isVector()) { EVT intVT = EVT::getIntegerVT(*DAG.getContext(), LoadedVT.getSizeInBits()); - if (TLI.isTypeLegal(intVT)) { + if (TLI.isTypeLegal(intVT) && TLI.isTypeLegal(LoadedVT)) { // Expand to a (misaligned) integer load of the same size, // then bitconvert to floating point or vector. SDValue newLoad = DAG.getLoad(intVT, dl, Chain, Ptr, LD->getPointerInfo(), @@ -436,8 +436,9 @@ ExpandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG, LD->isNonTemporal(), LD->isInvariant(), LD->getAlignment()); SDValue Result = DAG.getNode(ISD::BITCAST, dl, LoadedVT, newLoad); - if (VT.isFloatingPoint() && LoadedVT != VT) - Result = DAG.getNode(ISD::FP_EXTEND, dl, VT, Result); + if (LoadedVT != VT) + Result = DAG.getNode(VT.isFloatingPoint() ? ISD::FP_EXTEND : + ISD::ANY_EXTEND, dl, VT, Result); ValResult = Result; ChainResult = Chain; diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h index 5384576..84e41fc 100644 --- a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h +++ b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h @@ -61,6 +61,7 @@ namespace llvm { if (isa<BasicBlockSDNode>(Node)) return true; if (isa<FrameIndexSDNode>(Node)) return true; if (isa<ConstantPoolSDNode>(Node)) return true; + if (isa<TargetIndexSDNode>(Node)) return true; if (isa<JumpTableSDNode>(Node)) return true; if (isa<ExternalSymbolSDNode>(Node)) return true; if (isa<BlockAddressSDNode>(Node)) return true; diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index b971b69..f4fe892 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -403,6 +403,7 @@ static void AddNodeIDCustom(FoldingSetNodeID &ID, const SDNode *N) { ID.AddPointer(GA->getGlobal()); ID.AddInteger(GA->getOffset()); ID.AddInteger(GA->getTargetFlags()); + ID.AddInteger(GA->getAddressSpace()); break; } case ISD::BasicBlock: @@ -438,16 +439,25 @@ static void AddNodeIDCustom(FoldingSetNodeID &ID, const SDNode *N) { ID.AddInteger(CP->getTargetFlags()); break; } + case ISD::TargetIndex: { + const TargetIndexSDNode *TI = cast<TargetIndexSDNode>(N); + ID.AddInteger(TI->getIndex()); + ID.AddInteger(TI->getOffset()); + ID.AddInteger(TI->getTargetFlags()); + break; + } case ISD::LOAD: { const LoadSDNode *LD = cast<LoadSDNode>(N); ID.AddInteger(LD->getMemoryVT().getRawBits()); ID.AddInteger(LD->getRawSubclassData()); + ID.AddInteger(LD->getPointerInfo().getAddrSpace()); break; } case ISD::STORE: { const StoreSDNode *ST = cast<StoreSDNode>(N); ID.AddInteger(ST->getMemoryVT().getRawBits()); ID.AddInteger(ST->getRawSubclassData()); + ID.AddInteger(ST->getPointerInfo().getAddrSpace()); break; } case ISD::ATOMIC_CMP_SWAP: @@ -467,6 +477,12 @@ static void AddNodeIDCustom(FoldingSetNodeID &ID, const SDNode *N) { const AtomicSDNode *AT = cast<AtomicSDNode>(N); ID.AddInteger(AT->getMemoryVT().getRawBits()); ID.AddInteger(AT->getRawSubclassData()); + ID.AddInteger(AT->getPointerInfo().getAddrSpace()); + break; + } + case ISD::PREFETCH: { + const MemSDNode *PF = cast<MemSDNode>(N); + ID.AddInteger(PF->getPointerInfo().getAddrSpace()); break; } case ISD::VECTOR_SHUFFLE: { @@ -483,6 +499,10 @@ static void AddNodeIDCustom(FoldingSetNodeID &ID, const SDNode *N) { break; } } // end switch (N->getOpcode()) + + // Target specific memory nodes could also have address spaces to check. + if (N->isTargetMemoryOpcode()) + ID.AddInteger(cast<MemSDNode>(N)->getPointerInfo().getAddrSpace()); } /// AddNodeIDNode - Generic routine for adding a nodes info to the NodeID @@ -1100,6 +1120,7 @@ SDValue SelectionDAG::getGlobalAddress(const GlobalValue *GV, DebugLoc DL, ID.AddPointer(GV); ID.AddInteger(Offset); ID.AddInteger(TargetFlags); + ID.AddInteger(GV->getType()->getAddressSpace()); void *IP = 0; if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) return SDValue(E, 0); @@ -1199,6 +1220,24 @@ SDValue SelectionDAG::getConstantPool(MachineConstantPoolValue *C, EVT VT, return SDValue(N, 0); } +SDValue SelectionDAG::getTargetIndex(int Index, EVT VT, int64_t Offset, + unsigned char TargetFlags) { + FoldingSetNodeID ID; + AddNodeIDNode(ID, ISD::TargetIndex, getVTList(VT), 0, 0); + ID.AddInteger(Index); + ID.AddInteger(Offset); + ID.AddInteger(TargetFlags); + void *IP = 0; + if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) + return SDValue(E, 0); + + SDNode *N = new (NodeAllocator) TargetIndexSDNode(Index, VT, Offset, + TargetFlags); + CSEMap.InsertNode(N, IP); + AllNodes.push_back(N); + return SDValue(N, 0); +} + SDValue SelectionDAG::getBasicBlock(MachineBasicBlock *MBB) { FoldingSetNodeID ID; AddNodeIDNode(ID, ISD::BasicBlock, getVTList(MVT::Other), 0, 0); @@ -2444,6 +2483,24 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, case ISD::FABS: V.clearSign(); return getConstantFP(V, VT); + case ISD::FCEIL: { + APFloat::opStatus fs = V.roundToIntegral(APFloat::rmTowardPositive); + if (fs == APFloat::opOK || fs == APFloat::opInexact) + return getConstantFP(V, VT); + break; + } + case ISD::FTRUNC: { + APFloat::opStatus fs = V.roundToIntegral(APFloat::rmTowardZero); + if (fs == APFloat::opOK || fs == APFloat::opInexact) + return getConstantFP(V, VT); + break; + } + case ISD::FFLOOR: { + APFloat::opStatus fs = V.roundToIntegral(APFloat::rmTowardNegative); + if (fs == APFloat::opOK || fs == APFloat::opInexact) + return getConstantFP(V, VT); + break; + } case ISD::FP_EXTEND: { bool ignored; // This can return overflow, underflow, or inexact; we don't care. @@ -3901,6 +3958,7 @@ SDValue SelectionDAG::getAtomic(unsigned Opcode, DebugLoc dl, EVT MemVT, ID.AddInteger(MemVT.getRawBits()); SDValue Ops[] = {Chain, Ptr, Cmp, Swp}; AddNodeIDNode(ID, Opcode, VTs, Ops, 4); + ID.AddInteger(MMO->getPointerInfo().getAddrSpace()); void* IP = 0; if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) { cast<AtomicSDNode>(E)->refineAlignment(MMO); @@ -3973,6 +4031,7 @@ SDValue SelectionDAG::getAtomic(unsigned Opcode, DebugLoc dl, EVT MemVT, ID.AddInteger(MemVT.getRawBits()); SDValue Ops[] = {Chain, Ptr, Val}; AddNodeIDNode(ID, Opcode, VTs, Ops, 3); + ID.AddInteger(MMO->getPointerInfo().getAddrSpace()); void* IP = 0; if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) { cast<AtomicSDNode>(E)->refineAlignment(MMO); @@ -4029,6 +4088,7 @@ SDValue SelectionDAG::getAtomic(unsigned Opcode, DebugLoc dl, EVT MemVT, ID.AddInteger(MemVT.getRawBits()); SDValue Ops[] = {Chain, Ptr}; AddNodeIDNode(ID, Opcode, VTs, Ops, 2); + ID.AddInteger(MMO->getPointerInfo().getAddrSpace()); void* IP = 0; if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) { cast<AtomicSDNode>(E)->refineAlignment(MMO); @@ -4106,6 +4166,7 @@ SelectionDAG::getMemIntrinsicNode(unsigned Opcode, DebugLoc dl, SDVTList VTList, if (VTList.VTs[VTList.NumVTs-1] != MVT::Glue) { FoldingSetNodeID ID; AddNodeIDNode(ID, Opcode, VTList, Ops, NumOps); + ID.AddInteger(MMO->getPointerInfo().getAddrSpace()); void *IP = 0; if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) { cast<MemIntrinsicSDNode>(E)->refineAlignment(MMO); @@ -4225,6 +4286,7 @@ SelectionDAG::getLoad(ISD::MemIndexedMode AM, ISD::LoadExtType ExtType, ID.AddInteger(encodeMemSDNodeFlags(ExtType, AM, MMO->isVolatile(), MMO->isNonTemporal(), MMO->isInvariant())); + ID.AddInteger(MMO->getPointerInfo().getAddrSpace()); void *IP = 0; if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) { cast<LoadSDNode>(E)->refineAlignment(MMO); @@ -4314,6 +4376,7 @@ SDValue SelectionDAG::getStore(SDValue Chain, DebugLoc dl, SDValue Val, ID.AddInteger(VT.getRawBits()); ID.AddInteger(encodeMemSDNodeFlags(false, ISD::UNINDEXED, MMO->isVolatile(), MMO->isNonTemporal(), MMO->isInvariant())); + ID.AddInteger(MMO->getPointerInfo().getAddrSpace()); void *IP = 0; if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) { cast<StoreSDNode>(E)->refineAlignment(MMO); @@ -4381,6 +4444,7 @@ SDValue SelectionDAG::getTruncStore(SDValue Chain, DebugLoc dl, SDValue Val, ID.AddInteger(SVT.getRawBits()); ID.AddInteger(encodeMemSDNodeFlags(true, ISD::UNINDEXED, MMO->isVolatile(), MMO->isNonTemporal(), MMO->isInvariant())); + ID.AddInteger(MMO->getPointerInfo().getAddrSpace()); void *IP = 0; if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) { cast<StoreSDNode>(E)->refineAlignment(MMO); @@ -4405,6 +4469,7 @@ SelectionDAG::getIndexedStore(SDValue OrigStore, DebugLoc dl, SDValue Base, AddNodeIDNode(ID, ISD::STORE, VTs, Ops, 4); ID.AddInteger(ST->getMemoryVT().getRawBits()); ID.AddInteger(ST->getRawSubclassData()); + ID.AddInteger(ST->getPointerInfo().getAddrSpace()); void *IP = 0; if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) return SDValue(E, 0); diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index 8cbe818..f3cf758 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -1601,7 +1601,10 @@ void SelectionDAGBuilder::visitSwitchCase(CaseBlock &CB, // Update successor info addSuccessorWithWeight(SwitchBB, CB.TrueBB, CB.TrueWeight); - addSuccessorWithWeight(SwitchBB, CB.FalseBB, CB.FalseWeight); + // TrueBB and FalseBB are always different unless the incoming IR is + // degenerate. This only happens when running llc on weird IR. + if (CB.TrueBB != CB.FalseBB) + addSuccessorWithWeight(SwitchBB, CB.FalseBB, CB.FalseWeight); // Set NextBlock to be the MBB immediately after the current one, if any. // This is used to avoid emitting unnecessary branches to the next block. @@ -3460,7 +3463,7 @@ void SelectionDAGBuilder::visitAtomicLoad(const LoadInst &I) { SDValue InChain = getRoot(); - EVT VT = EVT::getEVT(I.getType()); + EVT VT = TLI.getValueType(I.getType()); if (I.getAlignment() * 8 < VT.getSizeInBits()) report_fatal_error("Cannot generate unaligned atomic load"); @@ -3490,7 +3493,7 @@ void SelectionDAGBuilder::visitAtomicStore(const StoreInst &I) { SDValue InChain = getRoot(); - EVT VT = EVT::getEVT(I.getValueOperand()->getType()); + EVT VT = TLI.getValueType(I.getValueOperand()->getType()); if (I.getAlignment() * 8 < VT.getSizeInBits()) report_fatal_error("Cannot generate unaligned atomic store"); @@ -4929,6 +4932,11 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { getValue(I.getArgOperand(0)).getValueType(), getValue(I.getArgOperand(0)))); return 0; + case Intrinsic::floor: + setValue(&I, DAG.getNode(ISD::FFLOOR, dl, + getValue(I.getArgOperand(0)).getValueType(), + getValue(I.getArgOperand(0)))); + return 0; case Intrinsic::fma: setValue(&I, DAG.getNode(ISD::FMA, dl, getValue(I.getArgOperand(0)).getValueType(), @@ -5506,6 +5514,22 @@ bool SelectionDAGBuilder::visitMemCmpCall(const CallInst &I) { return false; } +/// visitUnaryFloatCall - If a call instruction is a unary floating-point +/// operation (as expected), translate it to an SDNode with the specified opcode +/// and return true. +bool SelectionDAGBuilder::visitUnaryFloatCall(const CallInst &I, + unsigned Opcode) { + // Sanity check that it really is a unary floating-point call. + if (I.getNumArgOperands() != 1 || + !I.getArgOperand(0)->getType()->isFloatingPointTy() || + I.getType() != I.getArgOperand(0)->getType() || + !I.onlyReadsMemory()) + return false; + + SDValue Tmp = getValue(I.getArgOperand(0)); + setValue(&I, DAG.getNode(Opcode, getCurDebugLoc(), Tmp.getValueType(), Tmp)); + return true; +} void SelectionDAGBuilder::visitCall(const CallInst &I) { // Handle inline assembly differently. @@ -5536,150 +5560,97 @@ void SelectionDAGBuilder::visitCall(const CallInst &I) { // Check for well-known libc/libm calls. If the function is internal, it // can't be a library call. - if (!F->hasLocalLinkage() && F->hasName()) { - StringRef Name = F->getName(); - if ((LibInfo->has(LibFunc::copysign) && Name == "copysign") || - (LibInfo->has(LibFunc::copysignf) && Name == "copysignf") || - (LibInfo->has(LibFunc::copysignl) && Name == "copysignl")) { + LibFunc::Func Func; + if (!F->hasLocalLinkage() && F->hasName() && + LibInfo->getLibFunc(F->getName(), Func) && + LibInfo->hasOptimizedCodeGen(Func)) { + switch (Func) { + default: break; + case LibFunc::copysign: + case LibFunc::copysignf: + case LibFunc::copysignl: if (I.getNumArgOperands() == 2 && // Basic sanity checks. I.getArgOperand(0)->getType()->isFloatingPointTy() && I.getType() == I.getArgOperand(0)->getType() && - I.getType() == I.getArgOperand(1)->getType()) { + I.getType() == I.getArgOperand(1)->getType() && + I.onlyReadsMemory()) { SDValue LHS = getValue(I.getArgOperand(0)); SDValue RHS = getValue(I.getArgOperand(1)); setValue(&I, DAG.getNode(ISD::FCOPYSIGN, getCurDebugLoc(), LHS.getValueType(), LHS, RHS)); return; } - } else if ((LibInfo->has(LibFunc::fabs) && Name == "fabs") || - (LibInfo->has(LibFunc::fabsf) && Name == "fabsf") || - (LibInfo->has(LibFunc::fabsl) && Name == "fabsl")) { - if (I.getNumArgOperands() == 1 && // Basic sanity checks. - I.getArgOperand(0)->getType()->isFloatingPointTy() && - I.getType() == I.getArgOperand(0)->getType()) { - SDValue Tmp = getValue(I.getArgOperand(0)); - setValue(&I, DAG.getNode(ISD::FABS, getCurDebugLoc(), - Tmp.getValueType(), Tmp)); + break; + case LibFunc::fabs: + case LibFunc::fabsf: + case LibFunc::fabsl: + if (visitUnaryFloatCall(I, ISD::FABS)) return; - } - } else if ((LibInfo->has(LibFunc::sin) && Name == "sin") || - (LibInfo->has(LibFunc::sinf) && Name == "sinf") || - (LibInfo->has(LibFunc::sinl) && Name == "sinl")) { - if (I.getNumArgOperands() == 1 && // Basic sanity checks. - I.getArgOperand(0)->getType()->isFloatingPointTy() && - I.getType() == I.getArgOperand(0)->getType() && - I.onlyReadsMemory()) { - SDValue Tmp = getValue(I.getArgOperand(0)); - setValue(&I, DAG.getNode(ISD::FSIN, getCurDebugLoc(), - Tmp.getValueType(), Tmp)); + break; + case LibFunc::sin: + case LibFunc::sinf: + case LibFunc::sinl: + if (visitUnaryFloatCall(I, ISD::FSIN)) return; - } - } else if ((LibInfo->has(LibFunc::cos) && Name == "cos") || - (LibInfo->has(LibFunc::cosf) && Name == "cosf") || - (LibInfo->has(LibFunc::cosl) && Name == "cosl")) { - if (I.getNumArgOperands() == 1 && // Basic sanity checks. - I.getArgOperand(0)->getType()->isFloatingPointTy() && - I.getType() == I.getArgOperand(0)->getType() && - I.onlyReadsMemory()) { - SDValue Tmp = getValue(I.getArgOperand(0)); - setValue(&I, DAG.getNode(ISD::FCOS, getCurDebugLoc(), - Tmp.getValueType(), Tmp)); + break; + case LibFunc::cos: + case LibFunc::cosf: + case LibFunc::cosl: + if (visitUnaryFloatCall(I, ISD::FCOS)) return; - } - } else if ((LibInfo->has(LibFunc::sqrt) && Name == "sqrt") || - (LibInfo->has(LibFunc::sqrtf) && Name == "sqrtf") || - (LibInfo->has(LibFunc::sqrtl) && Name == "sqrtl")) { - if (I.getNumArgOperands() == 1 && // Basic sanity checks. - I.getArgOperand(0)->getType()->isFloatingPointTy() && - I.getType() == I.getArgOperand(0)->getType() && - I.onlyReadsMemory()) { - SDValue Tmp = getValue(I.getArgOperand(0)); - setValue(&I, DAG.getNode(ISD::FSQRT, getCurDebugLoc(), - Tmp.getValueType(), Tmp)); + break; + case LibFunc::sqrt: + case LibFunc::sqrtf: + case LibFunc::sqrtl: + if (visitUnaryFloatCall(I, ISD::FSQRT)) return; - } - } else if ((LibInfo->has(LibFunc::floor) && Name == "floor") || - (LibInfo->has(LibFunc::floorf) && Name == "floorf") || - (LibInfo->has(LibFunc::floorl) && Name == "floorl")) { - if (I.getNumArgOperands() == 1 && // Basic sanity checks. - I.getArgOperand(0)->getType()->isFloatingPointTy() && - I.getType() == I.getArgOperand(0)->getType()) { - SDValue Tmp = getValue(I.getArgOperand(0)); - setValue(&I, DAG.getNode(ISD::FFLOOR, getCurDebugLoc(), - Tmp.getValueType(), Tmp)); + break; + case LibFunc::floor: + case LibFunc::floorf: + case LibFunc::floorl: + if (visitUnaryFloatCall(I, ISD::FFLOOR)) return; - } - } else if ((LibInfo->has(LibFunc::nearbyint) && Name == "nearbyint") || - (LibInfo->has(LibFunc::nearbyintf) && Name == "nearbyintf") || - (LibInfo->has(LibFunc::nearbyintl) && Name == "nearbyintl")) { - if (I.getNumArgOperands() == 1 && // Basic sanity checks. - I.getArgOperand(0)->getType()->isFloatingPointTy() && - I.getType() == I.getArgOperand(0)->getType()) { - SDValue Tmp = getValue(I.getArgOperand(0)); - setValue(&I, DAG.getNode(ISD::FNEARBYINT, getCurDebugLoc(), - Tmp.getValueType(), Tmp)); + break; + case LibFunc::nearbyint: + case LibFunc::nearbyintf: + case LibFunc::nearbyintl: + if (visitUnaryFloatCall(I, ISD::FNEARBYINT)) return; - } - } else if ((LibInfo->has(LibFunc::ceil) && Name == "ceil") || - (LibInfo->has(LibFunc::ceilf) && Name == "ceilf") || - (LibInfo->has(LibFunc::ceill) && Name == "ceill")) { - if (I.getNumArgOperands() == 1 && // Basic sanity checks. - I.getArgOperand(0)->getType()->isFloatingPointTy() && - I.getType() == I.getArgOperand(0)->getType()) { - SDValue Tmp = getValue(I.getArgOperand(0)); - setValue(&I, DAG.getNode(ISD::FCEIL, getCurDebugLoc(), - Tmp.getValueType(), Tmp)); + break; + case LibFunc::ceil: + case LibFunc::ceilf: + case LibFunc::ceill: + if (visitUnaryFloatCall(I, ISD::FCEIL)) return; - } - } else if ((LibInfo->has(LibFunc::rint) && Name == "rint") || - (LibInfo->has(LibFunc::rintf) && Name == "rintf") || - (LibInfo->has(LibFunc::rintl) && Name == "rintl")) { - if (I.getNumArgOperands() == 1 && // Basic sanity checks. - I.getArgOperand(0)->getType()->isFloatingPointTy() && - I.getType() == I.getArgOperand(0)->getType()) { - SDValue Tmp = getValue(I.getArgOperand(0)); - setValue(&I, DAG.getNode(ISD::FRINT, getCurDebugLoc(), - Tmp.getValueType(), Tmp)); + break; + case LibFunc::rint: + case LibFunc::rintf: + case LibFunc::rintl: + if (visitUnaryFloatCall(I, ISD::FRINT)) return; - } - } else if ((LibInfo->has(LibFunc::trunc) && Name == "trunc") || - (LibInfo->has(LibFunc::truncf) && Name == "truncf") || - (LibInfo->has(LibFunc::truncl) && Name == "truncl")) { - if (I.getNumArgOperands() == 1 && // Basic sanity checks. - I.getArgOperand(0)->getType()->isFloatingPointTy() && - I.getType() == I.getArgOperand(0)->getType()) { - SDValue Tmp = getValue(I.getArgOperand(0)); - setValue(&I, DAG.getNode(ISD::FTRUNC, getCurDebugLoc(), - Tmp.getValueType(), Tmp)); + break; + case LibFunc::trunc: + case LibFunc::truncf: + case LibFunc::truncl: + if (visitUnaryFloatCall(I, ISD::FTRUNC)) return; - } - } else if ((LibInfo->has(LibFunc::log2) && Name == "log2") || - (LibInfo->has(LibFunc::log2f) && Name == "log2f") || - (LibInfo->has(LibFunc::log2l) && Name == "log2l")) { - if (I.getNumArgOperands() == 1 && // Basic sanity checks. - I.getArgOperand(0)->getType()->isFloatingPointTy() && - I.getType() == I.getArgOperand(0)->getType() && - I.onlyReadsMemory()) { - SDValue Tmp = getValue(I.getArgOperand(0)); - setValue(&I, DAG.getNode(ISD::FLOG2, getCurDebugLoc(), - Tmp.getValueType(), Tmp)); + break; + case LibFunc::log2: + case LibFunc::log2f: + case LibFunc::log2l: + if (visitUnaryFloatCall(I, ISD::FLOG2)) return; - } - } else if ((LibInfo->has(LibFunc::exp2) && Name == "exp2") || - (LibInfo->has(LibFunc::exp2f) && Name == "exp2f") || - (LibInfo->has(LibFunc::exp2l) && Name == "exp2l")) { - if (I.getNumArgOperands() == 1 && // Basic sanity checks. - I.getArgOperand(0)->getType()->isFloatingPointTy() && - I.getType() == I.getArgOperand(0)->getType() && - I.onlyReadsMemory()) { - SDValue Tmp = getValue(I.getArgOperand(0)); - setValue(&I, DAG.getNode(ISD::FEXP2, getCurDebugLoc(), - Tmp.getValueType(), Tmp)); + break; + case LibFunc::exp2: + case LibFunc::exp2f: + case LibFunc::exp2l: + if (visitUnaryFloatCall(I, ISD::FEXP2)) return; - } - } else if (Name == "memcmp") { + break; + case LibFunc::memcmp: if (visitMemCmpCall(I)) return; + break; } } } diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h index d0fde6f..4090002 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h +++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h @@ -520,6 +520,7 @@ private: void visitPHI(const PHINode &I); void visitCall(const CallInst &I); bool visitMemCmpCall(const CallInst &I); + bool visitUnaryFloatCall(const CallInst &I, unsigned Opcode); void visitAtomicLoad(const LoadInst &I); void visitAtomicStore(const StoreInst &I); diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp index 9fc225f..13cd011 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp @@ -100,6 +100,7 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const { case ISD::EH_SJLJ_SETJMP: return "EH_SJLJ_SETJMP"; case ISD::EH_SJLJ_LONGJMP: return "EH_SJLJ_LONGJMP"; case ISD::ConstantPool: return "ConstantPool"; + case ISD::TargetIndex: return "TargetIndex"; case ISD::ExternalSymbol: return "ExternalSymbol"; case ISD::BlockAddress: return "BlockAddress"; case ISD::INTRINSIC_WO_CHAIN: @@ -409,6 +410,10 @@ void SDNode::print_details(raw_ostream &OS, const SelectionDAG *G) const { OS << " " << offset; if (unsigned int TF = CP->getTargetFlags()) OS << " [TF=" << TF << ']'; + } else if (const TargetIndexSDNode *TI = dyn_cast<TargetIndexSDNode>(this)) { + OS << "<" << TI->getIndex() << '+' << TI->getOffset() << ">"; + if (unsigned TF = TI->getTargetFlags()) + OS << " [TF=" << TF << ']'; } else if (const BasicBlockSDNode *BBDN = dyn_cast<BasicBlockSDNode>(this)) { OS << "<"; const Value *LBB = (const Value*)BBDN->getBasicBlock()->getBasicBlock(); diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp index 287c679..4e5e3ba 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp @@ -979,7 +979,7 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) { // Initialize the Fast-ISel state, if needed. FastISel *FastIS = 0; if (TM.Options.EnableFastISel) - FastIS = TLI.createFastISel(*FuncInfo); + FastIS = TLI.createFastISel(*FuncInfo, LibInfo); // Iterate over all basic blocks in the function. ReversePostOrderTraversal<const Function*> RPOT(&Fn); diff --git a/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/lib/CodeGen/SelectionDAG/TargetLowering.cpp index dff9b2c..6820175 100644 --- a/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -2303,7 +2303,7 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, N0.getOpcode() == ISD::AND) if (ConstantSDNode *AndRHS = dyn_cast<ConstantSDNode>(N0.getOperand(1))) { - EVT ShiftTy = DCI.isBeforeLegalize() ? + EVT ShiftTy = DCI.isBeforeLegalizeOps() ? getPointerTy() : getShiftAmountTy(N0.getValueType()); if (Cond == ISD::SETNE && C1 == 0) {// (X & 8) != 0 --> (X & 8) >> 3 // Perform the xform if the AND RHS is a single bit. @@ -2333,7 +2333,7 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, const APInt &AndRHSC = AndRHS->getAPIntValue(); if ((-AndRHSC).isPowerOf2() && (AndRHSC & C1) == C1) { unsigned ShiftBits = AndRHSC.countTrailingZeros(); - EVT ShiftTy = DCI.isBeforeLegalize() ? + EVT ShiftTy = DCI.isBeforeLegalizeOps() ? getPointerTy() : getShiftAmountTy(N0.getValueType()); EVT CmpTy = N0.getValueType(); SDValue Shift = DAG.getNode(ISD::SRL, dl, CmpTy, N0.getOperand(0), @@ -2361,7 +2361,7 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, } NewC = NewC.lshr(ShiftBits); if (ShiftBits && isLegalICmpImmediate(NewC.getSExtValue())) { - EVT ShiftTy = DCI.isBeforeLegalize() ? + EVT ShiftTy = DCI.isBeforeLegalizeOps() ? getPointerTy() : getShiftAmountTy(N0.getValueType()); EVT CmpTy = N0.getValueType(); SDValue Shift = DAG.getNode(ISD::SRL, dl, CmpTy, N0, @@ -2464,7 +2464,8 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, // Otherwise, we can't fold it. However, we can simplify it to SETUO/SETO // if it is not already. ISD::CondCode NewCond = UOF == 0 ? ISD::SETO : ISD::SETUO; - if (NewCond != Cond) + if (NewCond != Cond && (DCI.isBeforeLegalizeOps() || + getCondCodeAction(NewCond, N0.getValueType()) == Legal)) return DAG.getSetCC(dl, VT, N0, N1, NewCond); } diff --git a/lib/CodeGen/SplitKit.cpp b/lib/CodeGen/SplitKit.cpp index 9a751c1..4a2b7ec 100644 --- a/lib/CodeGen/SplitKit.cpp +++ b/lib/CodeGen/SplitKit.cpp @@ -652,7 +652,7 @@ void SplitEditor::removeBackCopies(SmallVectorImpl<VNInfo*> &Copies) { // Adjust RegAssign if a register assignment is killed at VNI->def. We // want to avoid calculating the live range of the source register if // possible. - AssignI.find(VNI->def.getPrevSlot()); + AssignI.find(Def.getPrevSlot()); if (!AssignI.valid() || AssignI.start() >= Def) continue; // If MI doesn't kill the assigned register, just leave it. @@ -739,6 +739,8 @@ void SplitEditor::hoistCopiesForSize() { for (LiveInterval::vni_iterator VI = LI->vni_begin(), VE = LI->vni_end(); VI != VE; ++VI) { VNInfo *VNI = *VI; + if (VNI->isUnused()) + continue; VNInfo *ParentVNI = Edit->getParent().getVNInfoAt(VNI->def); assert(ParentVNI && "Parent not live at complement def"); @@ -812,6 +814,8 @@ void SplitEditor::hoistCopiesForSize() { for (LiveInterval::vni_iterator VI = LI->vni_begin(), VE = LI->vni_end(); VI != VE; ++VI) { VNInfo *VNI = *VI; + if (VNI->isUnused()) + continue; VNInfo *ParentVNI = Edit->getParent().getVNInfoAt(VNI->def); const DomPair &Dom = NearestDom[ParentVNI->id]; if (!Dom.first || Dom.second == VNI->def) @@ -1047,8 +1051,7 @@ void SplitEditor::finish(SmallVectorImpl<unsigned> *LRMap) { if (ParentVNI->isUnused()) continue; unsigned RegIdx = RegAssign.lookup(ParentVNI->def); - VNInfo *VNI = defValue(RegIdx, ParentVNI, ParentVNI->def); - VNI->setIsPHIDef(ParentVNI->isPHIDef()); + defValue(RegIdx, ParentVNI, ParentVNI->def); // Force rematted values to be recomputed everywhere. // The new live ranges may be truncated. diff --git a/lib/CodeGen/StackProtector.cpp b/lib/CodeGen/StackProtector.cpp index 43a6ad8..a04ac3f 100644 --- a/lib/CodeGen/StackProtector.cpp +++ b/lib/CodeGen/StackProtector.cpp @@ -28,15 +28,10 @@ #include "llvm/Support/CommandLine.h" #include "llvm/Target/TargetData.h" #include "llvm/Target/TargetLowering.h" +#include "llvm/Target/TargetOptions.h" +#include "llvm/ADT/Triple.h" using namespace llvm; -// SSPBufferSize - The lower bound for a buffer to be considered for stack -// smashing protection. -static cl::opt<unsigned> -SSPBufferSize("stack-protector-buffer-size", cl::init(8), - cl::desc("Lower bound for a buffer to be considered for " - "stack protection")); - namespace { class StackProtector : public FunctionPass { /// TLI - Keep a pointer of a TargetLowering to consult for determining @@ -46,7 +41,7 @@ namespace { Function *F; Module *M; - DominatorTree* DT; + DominatorTree *DT; /// InsertStackProtectors - Insert code into the prologue and epilogue of /// the function. @@ -60,6 +55,11 @@ namespace { /// check fails. BasicBlock *CreateFailBB(); + /// ContainsProtectableArray - Check whether the type either is an array or + /// contains an array of sufficient size so that we need stack protectors + /// for it. + bool ContainsProtectableArray(Type *Ty, bool InStruct = false) const; + /// RequiresStackProtector - Check whether or not this function needs a /// stack protector based upon the stack protector level. bool RequiresStackProtector() const; @@ -70,8 +70,8 @@ namespace { } StackProtector(const TargetLowering *tli) : FunctionPass(ID), TLI(tli) { - initializeStackProtectorPass(*PassRegistry::getPassRegistry()); - } + initializeStackProtectorPass(*PassRegistry::getPassRegistry()); + } virtual void getAnalysisUsage(AnalysisUsage &AU) const { AU.addPreserved<DominatorTree>(); @@ -95,10 +95,43 @@ bool StackProtector::runOnFunction(Function &Fn) { DT = getAnalysisIfAvailable<DominatorTree>(); if (!RequiresStackProtector()) return false; - + return InsertStackProtectors(); } +/// ContainsProtectableArray - Check whether the type either is an array or +/// contains a char array of sufficient size so that we need stack protectors +/// for it. +bool StackProtector::ContainsProtectableArray(Type *Ty, bool InStruct) const { + if (!Ty) return false; + if (ArrayType *AT = dyn_cast<ArrayType>(Ty)) { + const TargetMachine &TM = TLI->getTargetMachine(); + if (!AT->getElementType()->isIntegerTy(8)) { + Triple Trip(TM.getTargetTriple()); + + // If we're on a non-Darwin platform or we're inside of a structure, don't + // add stack protectors unless the array is a character array. + if (InStruct || !Trip.isOSDarwin()) + return false; + } + + // If an array has more than SSPBufferSize bytes of allocated space, then we + // emit stack protectors. + if (TM.Options.SSPBufferSize <= TLI->getTargetData()->getTypeAllocSize(AT)) + return true; + } + + const StructType *ST = dyn_cast<StructType>(Ty); + if (!ST) return false; + + for (StructType::element_iterator I = ST->element_begin(), + E = ST->element_end(); I != E; ++I) + if (ContainsProtectableArray(*I, true)) + return true; + + return false; +} + /// RequiresStackProtector - Check whether or not this function needs a stack /// protector based upon the stack protector level. The heuristic we use is to /// add a guard variable to functions that call alloca, and functions with @@ -110,8 +143,6 @@ bool StackProtector::RequiresStackProtector() const { if (!F->hasFnAttr(Attribute::StackProtect)) return false; - const TargetData *TD = TLI->getTargetData(); - for (Function::iterator I = F->begin(), E = F->end(); I != E; ++I) { BasicBlock *BB = I; @@ -123,11 +154,8 @@ bool StackProtector::RequiresStackProtector() const { // protectors. return true; - if (ArrayType *AT = dyn_cast<ArrayType>(AI->getAllocatedType())) - // If an array has more than SSPBufferSize bytes of allocated space, - // then we emit stack protectors. - if (SSPBufferSize <= TD->getTypeAllocSize(AT)) - return true; + if (ContainsProtectableArray(AI->getAllocatedType())) + return true; } } @@ -159,17 +187,17 @@ bool StackProtector::InsertStackProtectors() { // StackGuardSlot = alloca i8* // StackGuard = load __stack_chk_guard // call void @llvm.stackprotect.create(StackGuard, StackGuardSlot) - // + // PointerType *PtrTy = Type::getInt8PtrTy(RI->getContext()); unsigned AddressSpace, Offset; if (TLI->getStackCookieLocation(AddressSpace, Offset)) { Constant *OffsetVal = ConstantInt::get(Type::getInt32Ty(RI->getContext()), Offset); - + StackGuardVar = ConstantExpr::getIntToPtr(OffsetVal, PointerType::get(PtrTy, AddressSpace)); } else { - StackGuardVar = M->getOrInsertGlobal("__stack_chk_guard", PtrTy); + StackGuardVar = M->getOrInsertGlobal("__stack_chk_guard", PtrTy); } BasicBlock &Entry = F->getEntryBlock(); diff --git a/lib/CodeGen/StrongPHIElimination.cpp b/lib/CodeGen/StrongPHIElimination.cpp index c6fdc73..5b06195 100644 --- a/lib/CodeGen/StrongPHIElimination.cpp +++ b/lib/CodeGen/StrongPHIElimination.cpp @@ -672,8 +672,8 @@ void StrongPHIElimination::InsertCopiesForPHI(MachineInstr *PHI, LiveInterval &SrcInterval = LI->getInterval(SrcReg); SlotIndex PredIndex = LI->getMBBEndIdx(PredBB); VNInfo *SrcVNI = SrcInterval.getVNInfoBefore(PredIndex); + (void)SrcVNI; assert(SrcVNI); - SrcVNI->setHasPHIKill(true); continue; } @@ -744,7 +744,6 @@ void StrongPHIElimination::InsertCopiesForPHI(MachineInstr *PHI, SlotIndex PHIIndex = LI->getInstructionIndex(PHI); VNInfo *DestVNI = DestLI.getVNInfoAt(PHIIndex.getRegSlot()); assert(DestVNI); - DestVNI->setIsPHIDef(true); // Prior to PHI elimination, the live ranges of PHIs begin at their defining // instruction. After PHI elimination, PHI instructions are replaced by VNs @@ -777,7 +776,6 @@ void StrongPHIElimination::InsertCopiesForPHI(MachineInstr *PHI, SlotIndex DestCopyIndex = LI->getInstructionIndex(CopyInstr); VNInfo *CopyVNI = CopyLI.getNextValue(MBBStartIndex, LI->getVNInfoAllocator()); - CopyVNI->setIsPHIDef(true); CopyLI.addRange(LiveRange(MBBStartIndex, DestCopyIndex.getRegSlot(), CopyVNI)); diff --git a/lib/CodeGen/TargetInstrInfoImpl.cpp b/lib/CodeGen/TargetInstrInfoImpl.cpp index a3d6771..ddee6b2 100644 --- a/lib/CodeGen/TargetInstrInfoImpl.cpp +++ b/lib/CodeGen/TargetInstrInfoImpl.cpp @@ -570,12 +570,12 @@ TargetInstrInfoImpl::getNumMicroOps(const InstrItineraryData *ItinData, } /// Return the default expected latency for a def based on it's opcode. -unsigned TargetInstrInfo::defaultDefLatency(const InstrItineraryData *ItinData, +unsigned TargetInstrInfo::defaultDefLatency(const MCSchedModel *SchedModel, const MachineInstr *DefMI) const { if (DefMI->mayLoad()) - return ItinData->SchedModel->LoadLatency; + return SchedModel->LoadLatency; if (isHighLatencyDef(DefMI->getOpcode())) - return ItinData->SchedModel->HighLatency; + return SchedModel->HighLatency; return 1; } @@ -638,7 +638,7 @@ static int computeDefOperandLatency( return 1; } else if(ItinData->isEmpty()) - return TII->defaultDefLatency(ItinData, DefMI); + return TII->defaultDefLatency(ItinData->SchedModel, DefMI); // ...operand lookup required return -1; @@ -669,7 +669,8 @@ computeOperandLatency(const InstrItineraryData *ItinData, // Expected latency is the max of the stage latency and itinerary props. if (!FindMin) - InstrLatency = std::max(InstrLatency, defaultDefLatency(ItinData, DefMI)); + InstrLatency = std::max(InstrLatency, + defaultDefLatency(ItinData->SchedModel, DefMI)); return InstrLatency; } @@ -742,6 +743,7 @@ computeOperandLatency(const InstrItineraryData *ItinData, // Expected latency is the max of the stage latency and itinerary props. if (!FindMin) - InstrLatency = std::max(InstrLatency, defaultDefLatency(ItinData, DefMI)); + InstrLatency = std::max(InstrLatency, + defaultDefLatency(ItinData->SchedModel, DefMI)); return InstrLatency; } diff --git a/lib/CodeGen/TwoAddressInstructionPass.cpp b/lib/CodeGen/TwoAddressInstructionPass.cpp index e4c0119..aa601af 100644 --- a/lib/CodeGen/TwoAddressInstructionPass.cpp +++ b/lib/CodeGen/TwoAddressInstructionPass.cpp @@ -30,6 +30,7 @@ #define DEBUG_TYPE "twoaddrinstr" #include "llvm/CodeGen/Passes.h" #include "llvm/Function.h" +#include "llvm/CodeGen/LiveIntervalAnalysis.h" #include "llvm/CodeGen/LiveVariables.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstr.h" @@ -55,18 +56,19 @@ STATISTIC(NumCommuted , "Number of instructions commuted to coalesce"); STATISTIC(NumAggrCommuted , "Number of instructions aggressively commuted"); STATISTIC(NumConvertedTo3Addr, "Number of instructions promoted to 3-address"); STATISTIC(Num3AddrSunk, "Number of 3-address instructions sunk"); -STATISTIC(NumReMats, "Number of instructions re-materialized"); -STATISTIC(NumDeletes, "Number of dead instructions deleted"); STATISTIC(NumReSchedUps, "Number of instructions re-scheduled up"); STATISTIC(NumReSchedDowns, "Number of instructions re-scheduled down"); namespace { class TwoAddressInstructionPass : public MachineFunctionPass { + MachineFunction *MF; const TargetInstrInfo *TII; const TargetRegisterInfo *TRI; const InstrItineraryData *InstrItins; MachineRegisterInfo *MRI; LiveVariables *LV; + SlotIndexes *Indexes; + LiveIntervals *LIS; AliasAnalysis *AA; CodeGenOpt::Level OptLevel; @@ -92,16 +94,9 @@ namespace { unsigned Reg, MachineBasicBlock::iterator OldPos); - bool isProfitableToReMat(unsigned Reg, const TargetRegisterClass *RC, - MachineInstr *MI, MachineInstr *DefMI, - MachineBasicBlock *MBB, unsigned Loc); - bool NoUseAfterLastDef(unsigned Reg, MachineBasicBlock *MBB, unsigned Dist, unsigned &LastDef); - MachineInstr *FindLastUseInMBB(unsigned Reg, MachineBasicBlock *MBB, - unsigned Dist); - bool isProfitableToCommute(unsigned regA, unsigned regB, unsigned regC, MachineInstr *MI, MachineBasicBlock *MBB, unsigned Dist); @@ -117,14 +112,6 @@ namespace { MachineFunction::iterator &mbbi, unsigned RegA, unsigned RegB, unsigned Dist); - typedef std::pair<std::pair<unsigned, bool>, MachineInstr*> NewKill; - bool canUpdateDeletedKills(SmallVector<unsigned, 4> &Kills, - SmallVector<NewKill, 4> &NewKills, - MachineBasicBlock *MBB, unsigned Dist); - bool DeleteUnusedInstr(MachineBasicBlock::iterator &mi, - MachineBasicBlock::iterator &nmi, - MachineFunction::iterator &mbbi, unsigned Dist); - bool isDefTooClose(unsigned Reg, unsigned Dist, MachineInstr *MI, MachineBasicBlock *MBB); @@ -150,6 +137,11 @@ namespace { void ProcessCopy(MachineInstr *MI, MachineBasicBlock *MBB, SmallPtrSet<MachineInstr*, 8> &Processed); + typedef SmallVector<std::pair<unsigned, unsigned>, 4> TiedPairList; + typedef SmallDenseMap<unsigned, TiedPairList> TiedOperandMap; + bool collectTiedOperands(MachineInstr *MI, TiedOperandMap&); + void processTiedPairs(MachineInstr *MI, TiedPairList&, unsigned &Dist); + void CoalesceExtSubRegs(SmallVector<unsigned,4> &Srcs, unsigned DstReg); /// EliminateRegSequences - Eliminate REG_SEQUENCE instructions as part @@ -167,6 +159,8 @@ namespace { AU.setPreservesCFG(); AU.addRequired<AliasAnalysis>(); AU.addPreserved<LiveVariables>(); + AU.addPreserved<SlotIndexes>(); + AU.addPreserved<LiveIntervals>(); AU.addPreservedID(MachineLoopInfoID); AU.addPreservedID(MachineDominatorsID); MachineFunctionPass::getAnalysisUsage(AU); @@ -241,7 +235,7 @@ bool TwoAddressInstructionPass::Sink3AddrInstruction(MachineBasicBlock *MBB, // appropriate location, we can try to sink the current instruction // past it. if (!KillMI || KillMI->getParent() != MBB || KillMI == MI || - KillMI->isTerminator()) + KillMI == OldPos || KillMI->isTerminator()) return false; // If any of the definitions are used by another instruction between the @@ -284,6 +278,7 @@ bool TwoAddressInstructionPass::Sink3AddrInstruction(MachineBasicBlock *MBB, } } } + assert(KillMO && "Didn't find kill"); // Update kill and LV information. KillMO->setIsKill(false); @@ -297,59 +292,13 @@ bool TwoAddressInstructionPass::Sink3AddrInstruction(MachineBasicBlock *MBB, MBB->remove(MI); MBB->insert(KillPos, MI); + if (LIS) + LIS->handleMove(MI); + ++Num3AddrSunk; return true; } -/// isTwoAddrUse - Return true if the specified MI is using the specified -/// register as a two-address operand. -static bool isTwoAddrUse(MachineInstr *UseMI, unsigned Reg) { - const MCInstrDesc &MCID = UseMI->getDesc(); - for (unsigned i = 0, e = MCID.getNumOperands(); i != e; ++i) { - MachineOperand &MO = UseMI->getOperand(i); - if (MO.isReg() && MO.getReg() == Reg && - (MO.isDef() || UseMI->isRegTiedToDefOperand(i))) - // Earlier use is a two-address one. - return true; - } - return false; -} - -/// isProfitableToReMat - Return true if the heuristics determines it is likely -/// to be profitable to re-materialize the definition of Reg rather than copy -/// the register. -bool -TwoAddressInstructionPass::isProfitableToReMat(unsigned Reg, - const TargetRegisterClass *RC, - MachineInstr *MI, MachineInstr *DefMI, - MachineBasicBlock *MBB, unsigned Loc) { - bool OtherUse = false; - for (MachineRegisterInfo::use_nodbg_iterator UI = MRI->use_nodbg_begin(Reg), - UE = MRI->use_nodbg_end(); UI != UE; ++UI) { - MachineOperand &UseMO = UI.getOperand(); - MachineInstr *UseMI = UseMO.getParent(); - MachineBasicBlock *UseMBB = UseMI->getParent(); - if (UseMBB == MBB) { - DenseMap<MachineInstr*, unsigned>::iterator DI = DistanceMap.find(UseMI); - if (DI != DistanceMap.end() && DI->second == Loc) - continue; // Current use. - OtherUse = true; - // There is at least one other use in the MBB that will clobber the - // register. - if (isTwoAddrUse(UseMI, Reg)) - return true; - } - } - - // If other uses in MBB are not two-address uses, then don't remat. - if (OtherUse) - return false; - - // No other uses in the same block, remat if it's defined in the same - // block so it does not unnecessarily extend the live range. - return MBB == DefMI->getParent(); -} - /// NoUseAfterLastDef - Return true if there are no intervening uses between the /// last instruction in the MBB that defines the specified register and the /// two-address instruction which is being processed. It also returns the last @@ -377,31 +326,6 @@ bool TwoAddressInstructionPass::NoUseAfterLastDef(unsigned Reg, return !(LastUse > LastDef && LastUse < Dist); } -MachineInstr *TwoAddressInstructionPass::FindLastUseInMBB(unsigned Reg, - MachineBasicBlock *MBB, - unsigned Dist) { - unsigned LastUseDist = 0; - MachineInstr *LastUse = 0; - for (MachineRegisterInfo::reg_iterator I = MRI->reg_begin(Reg), - E = MRI->reg_end(); I != E; ++I) { - MachineOperand &MO = I.getOperand(); - MachineInstr *MI = MO.getParent(); - if (MI->getParent() != MBB || MI->isDebugValue()) - continue; - DenseMap<MachineInstr*, unsigned>::iterator DI = DistanceMap.find(MI); - if (DI == DistanceMap.end()) - continue; - if (DI->second >= Dist) - continue; - - if (MO.isUse() && DI->second > LastUseDist) { - LastUse = DI->first; - LastUseDist = DI->second; - } - } - return LastUse; -} - /// isCopyToReg - Return true if the specified MI is a copy instruction or /// a extract_subreg instruction. It also returns the source and destination /// registers and whether they are physical registers by reference. @@ -538,7 +462,7 @@ regsAreCompatible(unsigned RegA, unsigned RegB, const TargetRegisterInfo *TRI) { } -/// isProfitableToReMat - Return true if it's potentially profitable to commute +/// isProfitableToCommute - Return true if it's potentially profitable to commute /// the two-address instruction that's being processed. bool TwoAddressInstructionPass::isProfitableToCommute(unsigned regA, unsigned regB, @@ -628,6 +552,8 @@ TwoAddressInstructionPass::CommuteInstruction(MachineBasicBlock::iterator &mi, if (LV) // Update live variables LV->replaceKillInstruction(RegC, MI, NewMI); + if (Indexes) + Indexes->replaceMachineInstrInMaps(MI, NewMI); mbbi->insert(mi, NewMI); // Insert the new inst mbbi->erase(mi); // Nuke the old inst. @@ -676,6 +602,9 @@ TwoAddressInstructionPass::ConvertInstTo3Addr(MachineBasicBlock::iterator &mi, DEBUG(dbgs() << "2addr: TO 3-ADDR: " << *NewMI); bool Sunk = false; + if (Indexes) + Indexes->replaceMachineInstrInMaps(mi, NewMI); + if (NewMI->findRegisterUseOperand(RegB, false, TRI)) // FIXME: Temporary workaround. If the new instruction doesn't // uses RegB, convertToThreeAddress must have created more @@ -785,92 +714,6 @@ void TwoAddressInstructionPass::ProcessCopy(MachineInstr *MI, return; } -/// isSafeToDelete - If the specified instruction does not produce any side -/// effects and all of its defs are dead, then it's safe to delete. -static bool isSafeToDelete(MachineInstr *MI, - const TargetInstrInfo *TII, - SmallVector<unsigned, 4> &Kills) { - if (MI->mayStore() || MI->isCall()) - return false; - if (MI->isTerminator() || MI->hasUnmodeledSideEffects()) - return false; - - for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { - MachineOperand &MO = MI->getOperand(i); - if (!MO.isReg()) - continue; - if (MO.isDef() && !MO.isDead()) - return false; - if (MO.isUse() && MO.isKill()) - Kills.push_back(MO.getReg()); - } - return true; -} - -/// canUpdateDeletedKills - Check if all the registers listed in Kills are -/// killed by instructions in MBB preceding the current instruction at -/// position Dist. If so, return true and record information about the -/// preceding kills in NewKills. -bool TwoAddressInstructionPass:: -canUpdateDeletedKills(SmallVector<unsigned, 4> &Kills, - SmallVector<NewKill, 4> &NewKills, - MachineBasicBlock *MBB, unsigned Dist) { - while (!Kills.empty()) { - unsigned Kill = Kills.back(); - Kills.pop_back(); - if (TargetRegisterInfo::isPhysicalRegister(Kill)) - return false; - - MachineInstr *LastKill = FindLastUseInMBB(Kill, MBB, Dist); - if (!LastKill) - return false; - - bool isModRef = LastKill->definesRegister(Kill); - NewKills.push_back(std::make_pair(std::make_pair(Kill, isModRef), - LastKill)); - } - return true; -} - -/// DeleteUnusedInstr - If an instruction with a tied register operand can -/// be safely deleted, just delete it. -bool -TwoAddressInstructionPass::DeleteUnusedInstr(MachineBasicBlock::iterator &mi, - MachineBasicBlock::iterator &nmi, - MachineFunction::iterator &mbbi, - unsigned Dist) { - // Check if the instruction has no side effects and if all its defs are dead. - SmallVector<unsigned, 4> Kills; - if (!isSafeToDelete(mi, TII, Kills)) - return false; - - // If this instruction kills some virtual registers, we need to - // update the kill information. If it's not possible to do so, - // then bail out. - SmallVector<NewKill, 4> NewKills; - if (!canUpdateDeletedKills(Kills, NewKills, &*mbbi, Dist)) - return false; - - if (LV) { - while (!NewKills.empty()) { - MachineInstr *NewKill = NewKills.back().second; - unsigned Kill = NewKills.back().first.first; - bool isDead = NewKills.back().first.second; - NewKills.pop_back(); - if (LV->removeVirtualRegisterKilled(Kill, mi)) { - if (isDead) - LV->addVirtualRegisterDead(Kill, NewKill); - else - LV->addVirtualRegisterKilled(Kill, NewKill); - } - } - } - - mbbi->erase(mi); // Nuke the old inst. - mi = nmi; - return true; -} - /// RescheduleMIBelowKill - If there is one more local instruction that reads /// 'Reg' and it kills 'Reg, consider moving the instruction below the kill /// instruction in order to eliminate the need for the copy. @@ -1000,6 +843,8 @@ TwoAddressInstructionPass::RescheduleMIBelowKill(MachineBasicBlock *MBB, // Update live variables LV->removeVirtualRegisterKilled(Reg, KillMI); LV->addVirtualRegisterKilled(Reg, MI); + if (LIS) + LIS->handleMove(MI); DEBUG(dbgs() << "\trescheduled below kill: " << *KillMI); return true; @@ -1154,6 +999,8 @@ TwoAddressInstructionPass::RescheduleKillAboveMI(MachineBasicBlock *MBB, // Update live variables LV->removeVirtualRegisterKilled(Reg, KillMI); LV->addVirtualRegisterKilled(Reg, MI); + if (LIS) + LIS->handleMove(KillMI); DEBUG(dbgs() << "\trescheduled kill: " << *KillMI); return true; @@ -1180,16 +1027,7 @@ TryInstructionTransform(MachineBasicBlock::iterator &mi, assert(TargetRegisterInfo::isVirtualRegister(regB) && "cannot make instruction into two-address form"); - - // If regA is dead and the instruction can be deleted, just delete - // it so it doesn't clobber regB. bool regBKilled = isKilled(MI, regB, MRI, TII); - if (!regBKilled && MI.getOperand(DstIdx).isDead() && - DeleteUnusedInstr(mi, nmi, mbbi, Dist)) { - ++NumDeletes; - DEBUG(dbgs() << "\tdeleted unused instruction.\n"); - return true; // Done with this instruction." - } if (TargetRegisterInfo::isVirtualRegister(regA)) ScanUses(regA, &*mbbi, Processed); @@ -1273,16 +1111,14 @@ TryInstructionTransform(MachineBasicBlock::iterator &mi, if (NewOpc != 0) { const MCInstrDesc &UnfoldMCID = TII->get(NewOpc); if (UnfoldMCID.getNumDefs() == 1) { - MachineFunction &MF = *mbbi->getParent(); - // Unfold the load. DEBUG(dbgs() << "2addr: UNFOLDING: " << MI); const TargetRegisterClass *RC = TRI->getAllocatableClass( - TII->getRegClass(UnfoldMCID, LoadRegIndex, TRI, MF)); + TII->getRegClass(UnfoldMCID, LoadRegIndex, TRI, *MF)); unsigned Reg = MRI->createVirtualRegister(RC); SmallVector<MachineInstr *, 2> NewMIs; - if (!TII->unfoldMemoryOperand(MF, &MI, Reg, + if (!TII->unfoldMemoryOperand(*MF, &MI, Reg, /*UnfoldLoad=*/true,/*UnfoldStore=*/false, NewMIs)) { DEBUG(dbgs() << "2addr: ABANDONING UNFOLD\n"); @@ -1359,15 +1195,177 @@ TryInstructionTransform(MachineBasicBlock::iterator &mi, return false; } +// Collect tied operands of MI that need to be handled. +// Rewrite trivial cases immediately. +// Return true if any tied operands where found, including the trivial ones. +bool TwoAddressInstructionPass:: +collectTiedOperands(MachineInstr *MI, TiedOperandMap &TiedOperands) { + const MCInstrDesc &MCID = MI->getDesc(); + bool AnyOps = false; + unsigned NumOps = MI->isInlineAsm() ? + MI->getNumOperands() : MCID.getNumOperands(); + + for (unsigned SrcIdx = 0; SrcIdx < NumOps; ++SrcIdx) { + unsigned DstIdx = 0; + if (!MI->isRegTiedToDefOperand(SrcIdx, &DstIdx)) + continue; + AnyOps = true; + MachineOperand &SrcMO = MI->getOperand(SrcIdx); + MachineOperand &DstMO = MI->getOperand(DstIdx); + unsigned SrcReg = SrcMO.getReg(); + unsigned DstReg = DstMO.getReg(); + // Tied constraint already satisfied? + if (SrcReg == DstReg) + continue; + + assert(SrcReg && SrcMO.isUse() && "two address instruction invalid"); + + // Deal with <undef> uses immediately - simply rewrite the src operand. + if (SrcMO.isUndef()) { + // Constrain the DstReg register class if required. + if (TargetRegisterInfo::isVirtualRegister(DstReg)) + if (const TargetRegisterClass *RC = TII->getRegClass(MCID, SrcIdx, + TRI, *MF)) + MRI->constrainRegClass(DstReg, RC); + SrcMO.setReg(DstReg); + DEBUG(dbgs() << "\t\trewrite undef:\t" << *MI); + continue; + } + TiedOperands[SrcReg].push_back(std::make_pair(SrcIdx, DstIdx)); + } + return AnyOps; +} + +// Process a list of tied MI operands that all use the same source register. +// The tied pairs are of the form (SrcIdx, DstIdx). +void +TwoAddressInstructionPass::processTiedPairs(MachineInstr *MI, + TiedPairList &TiedPairs, + unsigned &Dist) { + bool IsEarlyClobber = false; + bool RemovedKillFlag = false; + bool AllUsesCopied = true; + unsigned LastCopiedReg = 0; + unsigned RegB = 0; + for (unsigned tpi = 0, tpe = TiedPairs.size(); tpi != tpe; ++tpi) { + unsigned SrcIdx = TiedPairs[tpi].first; + unsigned DstIdx = TiedPairs[tpi].second; + + const MachineOperand &DstMO = MI->getOperand(DstIdx); + unsigned RegA = DstMO.getReg(); + IsEarlyClobber |= DstMO.isEarlyClobber(); + + // Grab RegB from the instruction because it may have changed if the + // instruction was commuted. + RegB = MI->getOperand(SrcIdx).getReg(); + + if (RegA == RegB) { + // The register is tied to multiple destinations (or else we would + // not have continued this far), but this use of the register + // already matches the tied destination. Leave it. + AllUsesCopied = false; + continue; + } + LastCopiedReg = RegA; + + assert(TargetRegisterInfo::isVirtualRegister(RegB) && + "cannot make instruction into two-address form"); + +#ifndef NDEBUG + // First, verify that we don't have a use of "a" in the instruction + // (a = b + a for example) because our transformation will not + // work. This should never occur because we are in SSA form. + for (unsigned i = 0; i != MI->getNumOperands(); ++i) + assert(i == DstIdx || + !MI->getOperand(i).isReg() || + MI->getOperand(i).getReg() != RegA); +#endif + + // Emit a copy. + BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), + TII->get(TargetOpcode::COPY), RegA).addReg(RegB); + + // Update DistanceMap. + MachineBasicBlock::iterator PrevMI = MI; + --PrevMI; + DistanceMap.insert(std::make_pair(PrevMI, Dist)); + DistanceMap[MI] = ++Dist; + + SlotIndex CopyIdx; + if (Indexes) + CopyIdx = Indexes->insertMachineInstrInMaps(PrevMI).getRegSlot(); + + DEBUG(dbgs() << "\t\tprepend:\t" << *PrevMI); + + MachineOperand &MO = MI->getOperand(SrcIdx); + assert(MO.isReg() && MO.getReg() == RegB && MO.isUse() && + "inconsistent operand info for 2-reg pass"); + if (MO.isKill()) { + MO.setIsKill(false); + RemovedKillFlag = true; + } + + // Make sure regA is a legal regclass for the SrcIdx operand. + if (TargetRegisterInfo::isVirtualRegister(RegA) && + TargetRegisterInfo::isVirtualRegister(RegB)) + MRI->constrainRegClass(RegA, MRI->getRegClass(RegB)); + + MO.setReg(RegA); + + // Propagate SrcRegMap. + SrcRegMap[RegA] = RegB; + } + + + if (AllUsesCopied) { + if (!IsEarlyClobber) { + // Replace other (un-tied) uses of regB with LastCopiedReg. + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + MachineOperand &MO = MI->getOperand(i); + if (MO.isReg() && MO.getReg() == RegB && MO.isUse()) { + if (MO.isKill()) { + MO.setIsKill(false); + RemovedKillFlag = true; + } + MO.setReg(LastCopiedReg); + } + } + } + + // Update live variables for regB. + if (RemovedKillFlag && LV && LV->getVarInfo(RegB).removeKill(MI)) { + MachineBasicBlock::iterator PrevMI = MI; + --PrevMI; + LV->addVirtualRegisterKilled(RegB, PrevMI); + } + + } else if (RemovedKillFlag) { + // Some tied uses of regB matched their destination registers, so + // regB is still used in this instruction, but a kill flag was + // removed from a different tied use of regB, so now we need to add + // a kill flag to one of the remaining uses of regB. + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + MachineOperand &MO = MI->getOperand(i); + if (MO.isReg() && MO.getReg() == RegB && MO.isUse()) { + MO.setIsKill(true); + break; + } + } + } +} + /// runOnMachineFunction - Reduce two-address instructions to two operands. /// -bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &MF) { - const TargetMachine &TM = MF.getTarget(); - MRI = &MF.getRegInfo(); +bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &Func) { + MF = &Func; + const TargetMachine &TM = MF->getTarget(); + MRI = &MF->getRegInfo(); TII = TM.getInstrInfo(); TRI = TM.getRegisterInfo(); InstrItins = TM.getInstrItineraryData(); + Indexes = getAnalysisIfAvailable<SlotIndexes>(); LV = getAnalysisIfAvailable<LiveVariables>(); + LIS = getAnalysisIfAvailable<LiveIntervals>(); AA = &getAnalysis<AliasAnalysis>(); OptLevel = TM.getOptLevel(); @@ -1375,20 +1373,15 @@ bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &MF) { DEBUG(dbgs() << "********** REWRITING TWO-ADDR INSTRS **********\n"); DEBUG(dbgs() << "********** Function: " - << MF.getFunction()->getName() << '\n'); + << MF->getFunction()->getName() << '\n'); // This pass takes the function out of SSA form. MRI->leaveSSA(); - // ReMatRegs - Keep track of the registers whose def's are remat'ed. - BitVector ReMatRegs(MRI->getNumVirtRegs()); - - typedef DenseMap<unsigned, SmallVector<std::pair<unsigned, unsigned>, 4> > - TiedOperandMap; - TiedOperandMap TiedOperands(4); + TiedOperandMap TiedOperands; SmallPtrSet<MachineInstr*, 8> Processed; - for (MachineFunction::iterator mbbi = MF.begin(), mbbe = MF.end(); + for (MachineFunction::iterator mbbi = MF->begin(), mbbe = MF->end(); mbbi != mbbe; ++mbbi) { unsigned Dist = 0; DistanceMap.clear(); @@ -1407,50 +1400,21 @@ bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &MF) { if (mi->isRegSequence()) RegSequences.push_back(&*mi); - const MCInstrDesc &MCID = mi->getDesc(); - bool FirstTied = true; - DistanceMap.insert(std::make_pair(mi, ++Dist)); ProcessCopy(&*mi, &*mbbi, Processed); // First scan through all the tied register uses in this instruction // and record a list of pairs of tied operands for each register. - unsigned NumOps = mi->isInlineAsm() - ? mi->getNumOperands() : MCID.getNumOperands(); - for (unsigned SrcIdx = 0; SrcIdx < NumOps; ++SrcIdx) { - unsigned DstIdx = 0; - if (!mi->isRegTiedToDefOperand(SrcIdx, &DstIdx)) - continue; - - if (FirstTied) { - FirstTied = false; - ++NumTwoAddressInstrs; - DEBUG(dbgs() << '\t' << *mi); - } - - assert(mi->getOperand(SrcIdx).isReg() && - mi->getOperand(SrcIdx).getReg() && - mi->getOperand(SrcIdx).isUse() && - "two address instruction invalid"); - - unsigned regB = mi->getOperand(SrcIdx).getReg(); - - // Deal with <undef> uses immediately - simply rewrite the src operand. - if (mi->getOperand(SrcIdx).isUndef()) { - unsigned DstReg = mi->getOperand(DstIdx).getReg(); - // Constrain the DstReg register class if required. - if (TargetRegisterInfo::isVirtualRegister(DstReg)) - if (const TargetRegisterClass *RC = TII->getRegClass(MCID, SrcIdx, - TRI, MF)) - MRI->constrainRegClass(DstReg, RC); - mi->getOperand(SrcIdx).setReg(DstReg); - DEBUG(dbgs() << "\t\trewrite undef:\t" << *mi); - continue; - } - TiedOperands[regB].push_back(std::make_pair(SrcIdx, DstIdx)); + if (!collectTiedOperands(mi, TiedOperands)) { + mi = nmi; + continue; } + ++NumTwoAddressInstrs; + MadeChange = true; + DEBUG(dbgs() << '\t' << *mi); + // If the instruction has a single pair of tied operands, try some // transformations that may either eliminate the tied operands or // improve the opportunities for coalescing away the register copy. @@ -1477,139 +1441,7 @@ bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &MF) { // Now iterate over the information collected above. for (TiedOperandMap::iterator OI = TiedOperands.begin(), OE = TiedOperands.end(); OI != OE; ++OI) { - SmallVector<std::pair<unsigned, unsigned>, 4> &TiedPairs = OI->second; - - bool IsEarlyClobber = false; - bool RemovedKillFlag = false; - bool AllUsesCopied = true; - unsigned LastCopiedReg = 0; - unsigned regB = OI->first; - for (unsigned tpi = 0, tpe = TiedPairs.size(); tpi != tpe; ++tpi) { - unsigned SrcIdx = TiedPairs[tpi].first; - unsigned DstIdx = TiedPairs[tpi].second; - - const MachineOperand &DstMO = mi->getOperand(DstIdx); - unsigned regA = DstMO.getReg(); - IsEarlyClobber |= DstMO.isEarlyClobber(); - - // Grab regB from the instruction because it may have changed if the - // instruction was commuted. - regB = mi->getOperand(SrcIdx).getReg(); - - if (regA == regB) { - // The register is tied to multiple destinations (or else we would - // not have continued this far), but this use of the register - // already matches the tied destination. Leave it. - AllUsesCopied = false; - continue; - } - LastCopiedReg = regA; - - assert(TargetRegisterInfo::isVirtualRegister(regB) && - "cannot make instruction into two-address form"); - -#ifndef NDEBUG - // First, verify that we don't have a use of "a" in the instruction - // (a = b + a for example) because our transformation will not - // work. This should never occur because we are in SSA form. - for (unsigned i = 0; i != mi->getNumOperands(); ++i) - assert(i == DstIdx || - !mi->getOperand(i).isReg() || - mi->getOperand(i).getReg() != regA); -#endif - - // Emit a copy or rematerialize the definition. - bool isCopy = false; - const TargetRegisterClass *rc = MRI->getRegClass(regB); - MachineInstr *DefMI = MRI->getUniqueVRegDef(regB); - // If it's safe and profitable, remat the definition instead of - // copying it. - if (DefMI && - DefMI->isAsCheapAsAMove() && - DefMI->isSafeToReMat(TII, AA, regB) && - isProfitableToReMat(regB, rc, mi, DefMI, mbbi, Dist)){ - DEBUG(dbgs() << "2addr: REMATTING : " << *DefMI << "\n"); - unsigned regASubIdx = mi->getOperand(DstIdx).getSubReg(); - TII->reMaterialize(*mbbi, mi, regA, regASubIdx, DefMI, *TRI); - ReMatRegs.set(TargetRegisterInfo::virtReg2Index(regB)); - ++NumReMats; - } else { - BuildMI(*mbbi, mi, mi->getDebugLoc(), TII->get(TargetOpcode::COPY), - regA).addReg(regB); - isCopy = true; - } - - // Update DistanceMap. - MachineBasicBlock::iterator prevMI = prior(mi); - DistanceMap.insert(std::make_pair(prevMI, Dist)); - DistanceMap[mi] = ++Dist; - - DEBUG(dbgs() << "\t\tprepend:\t" << *prevMI); - - MachineOperand &MO = mi->getOperand(SrcIdx); - assert(MO.isReg() && MO.getReg() == regB && MO.isUse() && - "inconsistent operand info for 2-reg pass"); - if (MO.isKill()) { - MO.setIsKill(false); - RemovedKillFlag = true; - } - - // Make sure regA is a legal regclass for the SrcIdx operand. - if (TargetRegisterInfo::isVirtualRegister(regA) && - TargetRegisterInfo::isVirtualRegister(regB)) - MRI->constrainRegClass(regA, MRI->getRegClass(regB)); - - MO.setReg(regA); - - if (isCopy) - // Propagate SrcRegMap. - SrcRegMap[regA] = regB; - } - - if (AllUsesCopied) { - if (!IsEarlyClobber) { - // Replace other (un-tied) uses of regB with LastCopiedReg. - for (unsigned i = 0, e = mi->getNumOperands(); i != e; ++i) { - MachineOperand &MO = mi->getOperand(i); - if (MO.isReg() && MO.getReg() == regB && MO.isUse()) { - if (MO.isKill()) { - MO.setIsKill(false); - RemovedKillFlag = true; - } - MO.setReg(LastCopiedReg); - } - } - } - - // Update live variables for regB. - if (RemovedKillFlag && LV && LV->getVarInfo(regB).removeKill(mi)) - LV->addVirtualRegisterKilled(regB, prior(mi)); - - } else if (RemovedKillFlag) { - // Some tied uses of regB matched their destination registers, so - // regB is still used in this instruction, but a kill flag was - // removed from a different tied use of regB, so now we need to add - // a kill flag to one of the remaining uses of regB. - for (unsigned i = 0, e = mi->getNumOperands(); i != e; ++i) { - MachineOperand &MO = mi->getOperand(i); - if (MO.isReg() && MO.getReg() == regB && MO.isUse()) { - MO.setIsKill(true); - break; - } - } - } - - // We didn't change anything if there was a single tied pair, and that - // pair didn't require copies. - if (AllUsesCopied || TiedPairs.size() > 1) { - MadeChange = true; - - // Schedule the source copy / remat inserted to form two-address - // instruction. FIXME: Does it matter the distance map may not be - // accurate after it's scheduled? - TII->scheduleTwoAddrSource(prior(mi), mi, *TRI); - } - + processTiedPairs(mi, OI->second, Dist); DEBUG(dbgs() << "\t\trewrite to:\t" << *mi); } @@ -1634,15 +1466,6 @@ bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &MF) { } } - // Some remat'ed instructions are dead. - for (int i = ReMatRegs.find_first(); i != -1; i = ReMatRegs.find_next(i)) { - unsigned VReg = TargetRegisterInfo::index2VirtReg(i); - if (MRI->use_nodbg_empty(VReg)) { - MachineInstr *DefMI = MRI->getVRegDef(VReg); - DefMI->eraseFromParent(); - } - } - // Eliminate REG_SEQUENCE instructions. Their whole purpose was to preseve // SSA form. It's now safe to de-SSA. MadeChange |= EliminateRegSequences(); diff --git a/lib/DebugInfo/DWARFContext.cpp b/lib/DebugInfo/DWARFContext.cpp index a4e0d8e..797662b 100644 --- a/lib/DebugInfo/DWARFContext.cpp +++ b/lib/DebugInfo/DWARFContext.cpp @@ -167,9 +167,7 @@ DILineInfo DWARFContext::getLineInfoForAddress(uint64_t address, const DWARFDebugLine::LineTable *lineTable = getLineTableForCompileUnit(cu); if (lineTable) { // Get the index of the row we're looking for in the line table. - uint64_t hiPC = cu->getCompileUnitDIE()->getAttributeValueAsUnsigned( - cu, DW_AT_high_pc, -1ULL); - uint32_t rowIndex = lineTable->lookupAddress(address, hiPC); + uint32_t rowIndex = lineTable->lookupAddress(address); if (rowIndex != -1U) { const DWARFDebugLine::Row &row = lineTable->Rows[rowIndex]; // Take file/line info from the line table. diff --git a/lib/DebugInfo/DWARFDebugLine.cpp b/lib/DebugInfo/DWARFDebugLine.cpp index 117fa31..d99575d 100644 --- a/lib/DebugInfo/DWARFDebugLine.cpp +++ b/lib/DebugInfo/DWARFDebugLine.cpp @@ -95,14 +95,46 @@ void DWARFDebugLine::LineTable::dump(raw_ostream &OS) const { DWARFDebugLine::State::~State() {} void DWARFDebugLine::State::appendRowToMatrix(uint32_t offset) { + if (Sequence::Empty) { + // Record the beginning of instruction sequence. + Sequence::Empty = false; + Sequence::LowPC = Address; + Sequence::FirstRowIndex = row; + } ++row; // Increase the row number. LineTable::appendRow(*this); + if (EndSequence) { + // Record the end of instruction sequence. + Sequence::HighPC = Address; + Sequence::LastRowIndex = row; + if (Sequence::isValid()) + LineTable::appendSequence(*this); + Sequence::reset(); + } Row::postAppend(); } +void DWARFDebugLine::State::finalize() { + row = DoneParsingLineTable; + if (!Sequence::Empty) { + fprintf(stderr, "warning: last sequence in debug line table is not" + "terminated!\n"); + } + // Sort all sequences so that address lookup will work faster. + if (!Sequences.empty()) { + std::sort(Sequences.begin(), Sequences.end(), Sequence::orderByLowPC); + // Note: actually, instruction address ranges of sequences should not + // overlap (in shared objects and executables). If they do, the address + // lookup would still work, though, but result would be ambiguous. + // We don't report warning in this case. For example, + // sometimes .so compiled from multiple object files contains a few + // rudimentary sequences for address ranges [0x0, 0xsomething). + } +} + DWARFDebugLine::DumpingState::~DumpingState() {} -void DWARFDebugLine::DumpingState::finalize(uint32_t offset) { +void DWARFDebugLine::DumpingState::finalize() { LineTable::dump(OS); } @@ -180,8 +212,9 @@ DWARFDebugLine::parsePrologue(DataExtractor debug_line_data, fprintf(stderr, "warning: parsing line table prologue at 0x%8.8x should" " have ended at 0x%8.8x but it ended ad 0x%8.8x\n", prologue_offset, end_prologue_offset, *offset_ptr); + return false; } - return end_prologue_offset; + return true; } bool @@ -430,47 +463,53 @@ DWARFDebugLine::parseStatementTable(DataExtractor debug_line_data, } } - state.finalize(*offset_ptr); + state.finalize(); return end_offset; } -static bool findMatchingAddress(const DWARFDebugLine::Row& row1, - const DWARFDebugLine::Row& row2) { - return row1.Address < row2.Address; -} - uint32_t -DWARFDebugLine::LineTable::lookupAddress(uint64_t address, - uint64_t cu_high_pc) const { - uint32_t index = UINT32_MAX; - if (!Rows.empty()) { - // Use the lower_bound algorithm to perform a binary search since we know - // that our line table data is ordered by address. - DWARFDebugLine::Row row; - row.Address = address; - typedef std::vector<Row>::const_iterator iterator; - iterator begin_pos = Rows.begin(); - iterator end_pos = Rows.end(); - iterator pos = std::lower_bound(begin_pos, end_pos, row, - findMatchingAddress); - if (pos == end_pos) { - if (address < cu_high_pc) - return Rows.size()-1; - } else { - // Rely on fact that we are using a std::vector and we can do - // pointer arithmetic to find the row index (which will be one less - // that what we found since it will find the first position after - // the current address) since std::vector iterators are just - // pointers to the container type. - index = pos - begin_pos; - if (pos->Address > address) { - if (index > 0) - --index; - else - index = UINT32_MAX; - } - } +DWARFDebugLine::LineTable::lookupAddress(uint64_t address) const { + uint32_t unknown_index = UINT32_MAX; + if (Sequences.empty()) + return unknown_index; + // First, find an instruction sequence containing the given address. + DWARFDebugLine::Sequence sequence; + sequence.LowPC = address; + SequenceIter first_seq = Sequences.begin(); + SequenceIter last_seq = Sequences.end(); + SequenceIter seq_pos = std::lower_bound(first_seq, last_seq, sequence, + DWARFDebugLine::Sequence::orderByLowPC); + DWARFDebugLine::Sequence found_seq; + if (seq_pos == last_seq) { + found_seq = Sequences.back(); + } else if (seq_pos->LowPC == address) { + found_seq = *seq_pos; + } else { + if (seq_pos == first_seq) + return unknown_index; + found_seq = *(seq_pos - 1); + } + if (!found_seq.containsPC(address)) + return unknown_index; + // Search for instruction address in the rows describing the sequence. + // Rows are stored in a vector, so we may use arithmetical operations with + // iterators. + DWARFDebugLine::Row row; + row.Address = address; + RowIter first_row = Rows.begin() + found_seq.FirstRowIndex; + RowIter last_row = Rows.begin() + found_seq.LastRowIndex; + RowIter row_pos = std::lower_bound(first_row, last_row, row, + DWARFDebugLine::Row::orderByAddress); + if (row_pos == last_row) { + return found_seq.LastRowIndex - 1; + } + uint32_t index = found_seq.FirstRowIndex + (row_pos - first_row); + if (row_pos->Address > address) { + if (row_pos == first_row) + return unknown_index; + else + index--; } - return index; // Failed to find address. + return index; } diff --git a/lib/DebugInfo/DWARFDebugLine.h b/lib/DebugInfo/DWARFDebugLine.h index a8c0669..6382b45 100644 --- a/lib/DebugInfo/DWARFDebugLine.h +++ b/lib/DebugInfo/DWARFDebugLine.h @@ -88,6 +88,10 @@ public: void reset(bool default_is_stmt); void dump(raw_ostream &OS) const; + static bool orderByAddress(const Row& LHS, const Row& RHS) { + return LHS.Address < RHS.Address; + } + // The program-counter value corresponding to a machine instruction // generated by the compiler. uint64_t Address; @@ -125,21 +129,63 @@ public: EpilogueBegin:1; }; + // Represents a series of contiguous machine instructions. Line table for each + // compilation unit may consist of multiple sequences, which are not + // guaranteed to be in the order of ascending instruction address. + struct Sequence { + // Sequence describes instructions at address range [LowPC, HighPC) + // and is described by line table rows [FirstRowIndex, LastRowIndex). + uint64_t LowPC; + uint64_t HighPC; + unsigned FirstRowIndex; + unsigned LastRowIndex; + bool Empty; + + Sequence() { reset(); } + void reset() { + LowPC = 0; + HighPC = 0; + FirstRowIndex = 0; + LastRowIndex = 0; + Empty = true; + } + static bool orderByLowPC(const Sequence& LHS, const Sequence& RHS) { + return LHS.LowPC < RHS.LowPC; + } + bool isValid() const { + return !Empty && (LowPC < HighPC) && (FirstRowIndex < LastRowIndex); + } + bool containsPC(uint64_t pc) const { + return (LowPC <= pc && pc < HighPC); + } + }; + struct LineTable { void appendRow(const DWARFDebugLine::Row &state) { Rows.push_back(state); } + void appendSequence(const DWARFDebugLine::Sequence &sequence) { + Sequences.push_back(sequence); + } void clear() { Prologue.clear(); Rows.clear(); + Sequences.clear(); } - uint32_t lookupAddress(uint64_t address, uint64_t cu_high_pc) const; + // Returns the index of the row with file/line info for a given address, + // or -1 if there is no such row. + uint32_t lookupAddress(uint64_t address) const; void dump(raw_ostream &OS) const; struct Prologue Prologue; - std::vector<Row> Rows; + typedef std::vector<Row> RowVector; + typedef RowVector::const_iterator RowIter; + typedef std::vector<Sequence> SequenceVector; + typedef SequenceVector::const_iterator SequenceIter; + RowVector Rows; + SequenceVector Sequences; }; - struct State : public Row, public LineTable { + struct State : public Row, public Sequence, public LineTable { // Special row codes. enum { StartParsingLineTable = 0, @@ -150,8 +196,11 @@ public: virtual ~State(); virtual void appendRowToMatrix(uint32_t offset); - virtual void finalize(uint32_t offset) { row = DoneParsingLineTable; } - virtual void reset() { Row::reset(Prologue.DefaultIsStmt); } + virtual void finalize(); + virtual void reset() { + Row::reset(Prologue.DefaultIsStmt); + Sequence::reset(); + } // The row number that starts at zero for the prologue, and increases for // each row added to the matrix. @@ -161,7 +210,7 @@ public: struct DumpingState : public State { DumpingState(raw_ostream &OS) : OS(OS) {} virtual ~DumpingState(); - virtual void finalize(uint32_t offset); + virtual void finalize(); private: raw_ostream &OS; }; diff --git a/lib/ExecutionEngine/ExecutionEngine.cpp b/lib/ExecutionEngine/ExecutionEngine.cpp index a744d0c..4afc900 100644 --- a/lib/ExecutionEngine/ExecutionEngine.cpp +++ b/lib/ExecutionEngine/ExecutionEngine.cpp @@ -501,7 +501,8 @@ ExecutionEngine *EngineBuilder::create(TargetMachine *TM) { return 0; } - if ((WhichEngine & EngineKind::JIT) && ExecutionEngine::JITCtor == 0) { + if ((WhichEngine & EngineKind::JIT) && ExecutionEngine::JITCtor == 0 && + ExecutionEngine::MCJITCtor == 0) { if (ErrorStr) *ErrorStr = "JIT has not been linked in."; } diff --git a/lib/ExecutionEngine/JIT/JIT.cpp b/lib/ExecutionEngine/JIT/JIT.cpp index a942299..97995ad 100644 --- a/lib/ExecutionEngine/JIT/JIT.cpp +++ b/lib/ExecutionEngine/JIT/JIT.cpp @@ -361,7 +361,7 @@ bool JIT::removeModule(Module *M) { MutexGuard locked(lock); - if (jitstate->getModule() == M) { + if (jitstate && jitstate->getModule() == M) { delete jitstate; jitstate = 0; } @@ -433,13 +433,18 @@ GenericValue JIT::runFunction(Function *F, } break; case 1: - if (FTy->getNumParams() == 1 && - FTy->getParamType(0)->isIntegerTy(32)) { + if (FTy->getParamType(0)->isIntegerTy(32)) { GenericValue rv; int (*PF)(int) = (int(*)(int))(intptr_t)FPtr; rv.IntVal = APInt(32, PF(ArgValues[0].IntVal.getZExtValue())); return rv; } + if (FTy->getParamType(0)->isPointerTy()) { + GenericValue rv; + int (*PF)(char *) = (int(*)(char *))(intptr_t)FPtr; + rv.IntVal = APInt(32, PF((char*)GVTOP(ArgValues[0]))); + return rv; + } break; } } diff --git a/lib/ExecutionEngine/JIT/JITMemoryManager.cpp b/lib/ExecutionEngine/JIT/JITMemoryManager.cpp index 7be6ef8..61bc119 100644 --- a/lib/ExecutionEngine/JIT/JITMemoryManager.cpp +++ b/lib/ExecutionEngine/JIT/JITMemoryManager.cpp @@ -461,6 +461,9 @@ namespace { /// allocateCodeSection - Allocate memory for a code section. uint8_t *allocateCodeSection(uintptr_t Size, unsigned Alignment, unsigned SectionID) { + // Grow the required block size to account for the block header + Size += sizeof(*CurBlock); + // FIXME: Alignement handling. FreeRangeHeader* candidateBlock = FreeMemoryList; FreeRangeHeader* head = FreeMemoryList; diff --git a/lib/ExecutionEngine/MCJIT/MCJIT.cpp b/lib/ExecutionEngine/MCJIT/MCJIT.cpp index 84274c0..99c65ec 100644 --- a/lib/ExecutionEngine/MCJIT/MCJIT.cpp +++ b/lib/ExecutionEngine/MCJIT/MCJIT.cpp @@ -18,6 +18,7 @@ #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/DynamicLibrary.h" #include "llvm/Support/MemoryBuffer.h" +#include "llvm/Support/MutexGuard.h" #include "llvm/Target/TargetData.h" using namespace llvm; @@ -43,20 +44,40 @@ ExecutionEngine *MCJIT::createJIT(Module *M, // FIXME: Don't do this here. sys::DynamicLibrary::LoadLibraryPermanently(0, NULL); - // If the target supports JIT code generation, create the JIT. - if (TargetJITInfo *TJ = TM->getJITInfo()) - return new MCJIT(M, TM, *TJ, new MCJITMemoryManager(JMM), GVsWithCode); - - if (ErrorStr) - *ErrorStr = "target does not support JIT code generation"; - return 0; + return new MCJIT(M, TM, new MCJITMemoryManager(JMM), GVsWithCode); } -MCJIT::MCJIT(Module *m, TargetMachine *tm, TargetJITInfo &tji, - RTDyldMemoryManager *MM, bool AllocateGVsWithCode) - : ExecutionEngine(m), TM(tm), MemMgr(MM), M(m), OS(Buffer), Dyld(MM) { +MCJIT::MCJIT(Module *m, TargetMachine *tm, RTDyldMemoryManager *MM, + bool AllocateGVsWithCode) + : ExecutionEngine(m), TM(tm), Ctx(0), MemMgr(MM), Dyld(MM), + isCompiled(false), M(m), OS(Buffer) { setTargetData(TM->getTargetData()); +} + +MCJIT::~MCJIT() { + delete MemMgr; + delete TM; +} + +void MCJIT::emitObject(Module *m) { + /// Currently, MCJIT only supports a single module and the module passed to + /// this function call is expected to be the contained module. The module + /// is passed as a parameter here to prepare for multiple module support in + /// the future. + assert(M == m); + + // Get a thread lock to make sure we aren't trying to compile multiple times + MutexGuard locked(lock); + + // FIXME: Track compilation state on a per-module basis when multiple modules + // are supported. + // Re-compilation is not supported + if (isCompiled) + return; + + PassManager PM; + PM.add(new TargetData(*TM->getTargetData())); // Turn the machine code intermediate representation into bytes in memory @@ -69,23 +90,22 @@ MCJIT::MCJIT(Module *m, TargetMachine *tm, TargetJITInfo &tji, // FIXME: When we support multiple modules, we'll want to move the code // gen and finalization out of the constructor here and do it more // on-demand as part of getPointerToFunction(). - PM.run(*M); + PM.run(*m); // Flush the output buffer so the SmallVector gets its data. OS.flush(); // Load the object into the dynamic linker. - MemoryBuffer *MB = MemoryBuffer::getMemBuffer(StringRef(Buffer.data(), + MemoryBuffer* MB = MemoryBuffer::getMemBuffer(StringRef(Buffer.data(), Buffer.size()), "", false); if (Dyld.loadObject(MB)) report_fatal_error(Dyld.getErrorString()); + // Resolve any relocations. Dyld.resolveRelocations(); -} -MCJIT::~MCJIT() { - delete MemMgr; - delete TM; + // FIXME: Add support for per-module compilation state + isCompiled = true; } void *MCJIT::getPointerToBasicBlock(BasicBlock *BB) { @@ -93,6 +113,10 @@ void *MCJIT::getPointerToBasicBlock(BasicBlock *BB) { } void *MCJIT::getPointerToFunction(Function *F) { + // FIXME: Add support for per-module compilation state + if (!isCompiled) + emitObject(M); + if (F->isDeclaration() || F->hasAvailableExternallyLinkage()) { bool AbortOnFailure = !F->hasExternalWeakLinkage(); void *Addr = getPointerToNamedFunction(F->getName(), AbortOnFailure); @@ -100,6 +124,7 @@ void *MCJIT::getPointerToFunction(Function *F) { return Addr; } + // FIXME: Should the Dyld be retaining module information? Probably not. // FIXME: Should we be using the mangler for this? Probably. StringRef BaseName = F->getName(); if (BaseName[0] == '\1') @@ -218,6 +243,10 @@ GenericValue MCJIT::runFunction(Function *F, void *MCJIT::getPointerToNamedFunction(const std::string &Name, bool AbortOnFailure) { + // FIXME: Add support for per-module compilation state + if (!isCompiled) + emitObject(M); + if (!isSymbolSearchingDisabled() && MemMgr) { void *ptr = MemMgr->getPointerToNamedFunction(Name, false); if (ptr) diff --git a/lib/ExecutionEngine/MCJIT/MCJIT.h b/lib/ExecutionEngine/MCJIT/MCJIT.h index 2b3df98..138a7b6 100644 --- a/lib/ExecutionEngine/MCJIT/MCJIT.h +++ b/lib/ExecutionEngine/MCJIT/MCJIT.h @@ -23,23 +23,22 @@ namespace llvm { // blah blah. Purely in get-it-up-and-limping mode for now. class MCJIT : public ExecutionEngine { - MCJIT(Module *M, TargetMachine *tm, TargetJITInfo &tji, - RTDyldMemoryManager *MemMgr, bool AllocateGVsWithCode); + MCJIT(Module *M, TargetMachine *tm, RTDyldMemoryManager *MemMgr, + bool AllocateGVsWithCode); TargetMachine *TM; MCContext *Ctx; RTDyldMemoryManager *MemMgr; + RuntimeDyld Dyld; - // FIXME: These may need moved to a separate 'jitstate' member like the - // non-MC JIT does for multithreading and such. Just keep them here for now. - PassManager PM; + // FIXME: Add support for multiple modules + bool isCompiled; Module *M; - // FIXME: This really doesn't belong here. + + // FIXME: Move these to a single container which manages JITed objects SmallVector<char, 4096> Buffer; // Working buffer into which we JIT. raw_svector_ostream OS; - RuntimeDyld Dyld; - public: ~MCJIT(); @@ -91,6 +90,14 @@ public: TargetMachine *TM); // @} + +protected: + /// emitObject -- Generate a JITed object in memory from the specified module + /// Currently, MCJIT only supports a single module and the module passed to + /// this function call is expected to be the contained module. The module + /// is passed as a parameter here to prepare for multiple module support in + /// the future. + void emitObject(Module *M); }; } // End llvm namespace diff --git a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp index b464040..a98ddc0 100644 --- a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp +++ b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp @@ -108,7 +108,8 @@ bool RuntimeDyldImpl::loadObject(const MemoryBuffer *InputBuffer) { CommonSymbols[*i] = Size; } else { if (SymType == object::SymbolRef::ST_Function || - SymType == object::SymbolRef::ST_Data) { + SymType == object::SymbolRef::ST_Data || + SymType == object::SymbolRef::ST_Unknown) { uint64_t FileOffset; StringRef SectionData; section_iterator si = obj->end_sections(); @@ -333,15 +334,31 @@ void RuntimeDyldImpl::addRelocationForSymbol(const RelocationEntry &RE, } uint8_t *RuntimeDyldImpl::createStubFunction(uint8_t *Addr) { - // TODO: There is only ARM far stub now. We should add the Thumb stub, - // and stubs for branches Thumb - ARM and ARM - Thumb. if (Arch == Triple::arm) { + // TODO: There is only ARM far stub now. We should add the Thumb stub, + // and stubs for branches Thumb - ARM and ARM - Thumb. uint32_t *StubAddr = (uint32_t*)Addr; *StubAddr = 0xe51ff004; // ldr pc,<label> return (uint8_t*)++StubAddr; - } - else + } else if (Arch == Triple::mipsel) { + uint32_t *StubAddr = (uint32_t*)Addr; + // 0: 3c190000 lui t9,%hi(addr). + // 4: 27390000 addiu t9,t9,%lo(addr). + // 8: 03200008 jr t9. + // c: 00000000 nop. + const unsigned LuiT9Instr = 0x3c190000, AdduiT9Instr = 0x27390000; + const unsigned JrT9Instr = 0x03200008, NopInstr = 0x0; + + *StubAddr = LuiT9Instr; + StubAddr++; + *StubAddr = AdduiT9Instr; + StubAddr++; + *StubAddr = JrT9Instr; + StubAddr++; + *StubAddr = NopInstr; return Addr; + } + return Addr; } // Assign an address to a symbol name and resolve all the relocations diff --git a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp index 39aed34..0aea598 100644 --- a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp +++ b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp @@ -55,7 +55,7 @@ public: const MemoryBuffer& getBuffer() const { return *InputData; } - // Methods for type inquiry through isa, cast, and dyn_cast + // Methods for type inquiry through isa, cast and dyn_cast static inline bool classof(const Binary *v) { return (isa<ELFObjectFile<target_endianness, is64Bits> >(v) && classof(cast<ELFObjectFile<target_endianness, is64Bits> >(v))); @@ -208,10 +208,9 @@ void RuntimeDyldELF::resolveX86_64Relocation(uint8_t *LocalAddress, case ELF::R_X86_64_32: case ELF::R_X86_64_32S: { Value += Addend; - // FIXME: Handle the possibility of this assertion failing - assert((Type == ELF::R_X86_64_32 && !(Value & 0xFFFFFFFF00000000ULL)) || - (Type == ELF::R_X86_64_32S && - (Value & 0xFFFFFFFF00000000ULL) == 0xFFFFFFFF00000000ULL)); + assert((Type == ELF::R_X86_64_32 && (Value <= UINT32_MAX)) || + (Type == ELF::R_X86_64_32S && + ((int64_t)Value <= INT32_MAX && (int64_t)Value >= INT32_MIN))); uint32_t TruncatedAddr = (Value & 0xFFFFFFFF); uint32_t *Target = reinterpret_cast<uint32_t*>(LocalAddress); *Target = TruncatedAddr; @@ -220,7 +219,7 @@ void RuntimeDyldELF::resolveX86_64Relocation(uint8_t *LocalAddress, case ELF::R_X86_64_PC32: { uint32_t *Placeholder = reinterpret_cast<uint32_t*>(LocalAddress); int64_t RealOffset = *Placeholder + Value + Addend - FinalAddress; - assert(RealOffset <= 214783647 && RealOffset >= -214783648); + assert(RealOffset <= INT32_MAX && RealOffset >= INT32_MIN); int32_t TruncOffset = (RealOffset & 0xFFFFFFFF); *Placeholder = TruncOffset; break; @@ -248,7 +247,7 @@ void RuntimeDyldELF::resolveX86Relocation(uint8_t *LocalAddress, } default: // There are other relocation types, but it appears these are the - // only ones currently used by the LLVM ELF object writer + // only ones currently used by the LLVM ELF object writer llvm_unreachable("Relocation type not implemented yet!"); break; } @@ -307,6 +306,44 @@ void RuntimeDyldELF::resolveARMRelocation(uint8_t *LocalAddress, } } +void RuntimeDyldELF::resolveMIPSRelocation(uint8_t *LocalAddress, + uint32_t FinalAddress, + uint32_t Value, + uint32_t Type, + int32_t Addend) { + uint32_t* TargetPtr = (uint32_t*)LocalAddress; + Value += Addend; + + DEBUG(dbgs() << "resolveMipselocation, LocalAddress: " << LocalAddress + << " FinalAddress: " << format("%p",FinalAddress) + << " Value: " << format("%x",Value) + << " Type: " << format("%x",Type) + << " Addend: " << format("%x",Addend) + << "\n"); + + switch(Type) { + default: + llvm_unreachable("Not implemented relocation type!"); + break; + case ELF::R_MIPS_32: + *TargetPtr = Value + (*TargetPtr); + break; + case ELF::R_MIPS_26: + *TargetPtr = ((*TargetPtr) & 0xfc000000) | (( Value & 0x0fffffff) >> 2); + break; + case ELF::R_MIPS_HI16: + // Get the higher 16-bits. Also add 1 if bit 15 is 1. + Value += ((*TargetPtr) & 0x0000ffff) << 16; + *TargetPtr = ((*TargetPtr) & 0xffff0000) | + (((Value + 0x8000) >> 16) & 0xffff); + break; + case ELF::R_MIPS_LO16: + Value += ((*TargetPtr) & 0x0000ffff); + *TargetPtr = ((*TargetPtr) & 0xffff0000) | (Value & 0xffff); + break; + } +} + void RuntimeDyldELF::resolveRelocation(uint8_t *LocalAddress, uint64_t FinalAddress, uint64_t Value, @@ -327,6 +364,12 @@ void RuntimeDyldELF::resolveRelocation(uint8_t *LocalAddress, (uint32_t)(Value & 0xffffffffL), Type, (uint32_t)(Addend & 0xffffffffL)); break; + case Triple::mips: // Fall through. + case Triple::mipsel: + resolveMIPSRelocation(LocalAddress, (uint32_t)(FinalAddress & 0xffffffffL), + (uint32_t)(Value & 0xffffffffL), Type, + (uint32_t)(Addend & 0xffffffffL)); + break; default: llvm_unreachable("Unsupported CPU type!"); } } @@ -424,6 +467,53 @@ void RuntimeDyldELF::processRelocationRef(const ObjRelocationInfo &Rel, Section.StubOffset, RelType, 0); Section.StubOffset += getMaxStubSize(); } + } else if (Arch == Triple::mipsel && RelType == ELF::R_MIPS_26) { + // This is an Mips branch relocation, need to use a stub function. + DEBUG(dbgs() << "\t\tThis is a Mips branch relocation."); + SectionEntry &Section = Sections[Rel.SectionID]; + uint8_t *Target = Section.Address + Rel.Offset; + uint32_t *TargetAddress = (uint32_t *)Target; + + // Extract the addend from the instruction. + uint32_t Addend = ((*TargetAddress) & 0x03ffffff) << 2; + + Value.Addend += Addend; + + // Look up for existing stub. + StubMap::const_iterator i = Stubs.find(Value); + if (i != Stubs.end()) { + resolveRelocation(Target, (uint64_t)Target, + (uint64_t)Section.Address + + i->second, RelType, 0); + DEBUG(dbgs() << " Stub function found\n"); + } else { + // Create a new stub function. + DEBUG(dbgs() << " Create a new stub function\n"); + Stubs[Value] = Section.StubOffset; + uint8_t *StubTargetAddr = createStubFunction(Section.Address + + Section.StubOffset); + + // Creating Hi and Lo relocations for the filled stub instructions. + RelocationEntry REHi(Rel.SectionID, + StubTargetAddr - Section.Address, + ELF::R_MIPS_HI16, Value.Addend); + RelocationEntry RELo(Rel.SectionID, + StubTargetAddr - Section.Address + 4, + ELF::R_MIPS_LO16, Value.Addend); + + if (Value.SymbolName) { + addRelocationForSymbol(REHi, Value.SymbolName); + addRelocationForSymbol(RELo, Value.SymbolName); + } else { + addRelocationForSection(REHi, Value.SectionID); + addRelocationForSection(RELo, Value.SectionID); + } + + resolveRelocation(Target, (uint64_t)Target, + (uint64_t)Section.Address + + Section.StubOffset, RelType, 0); + Section.StubOffset += getMaxStubSize(); + } } else { RelocationEntry RE(Rel.SectionID, Rel.Offset, RelType, Value.Addend); if (Value.SymbolName) diff --git a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.h b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.h index e413f78..eade49e 100644 --- a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.h +++ b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.h @@ -42,6 +42,12 @@ protected: uint32_t Type, int32_t Addend); + void resolveMIPSRelocation(uint8_t *LocalAddress, + uint32_t FinalAddress, + uint32_t Value, + uint32_t Type, + int32_t Addend); + virtual void resolveRelocation(uint8_t *LocalAddress, uint64_t FinalAddress, uint64_t Value, diff --git a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldImpl.h b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldImpl.h index c38ca69..3d89994 100644 --- a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldImpl.h +++ b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldImpl.h @@ -161,6 +161,8 @@ protected: inline unsigned getMaxStubSize() { if (Arch == Triple::arm || Arch == Triple::thumb) return 8; // 32-bit instruction and 32-bit address + else if (Arch == Triple::mipsel) + return 16; else return 0; } diff --git a/lib/Linker/LinkModules.cpp b/lib/Linker/LinkModules.cpp index afba2e8..a6599bf 100644 --- a/lib/Linker/LinkModules.cpp +++ b/lib/Linker/LinkModules.cpp @@ -16,6 +16,7 @@ #include "llvm/DerivedTypes.h" #include "llvm/Instructions.h" #include "llvm/Module.h" +#include "llvm/TypeFinder.h" #include "llvm/ADT/DenseSet.h" #include "llvm/ADT/Optional.h" #include "llvm/ADT/SetVector.h" @@ -595,13 +596,13 @@ void ModuleLinker::computeTypeMapping() { // At this point, the destination module may have a type "%foo = { i32 }" for // example. When the source module got loaded into the same LLVMContext, if // it had the same type, it would have been renamed to "%foo.42 = { i32 }". - std::vector<StructType*> SrcStructTypes; - SrcM->findUsedStructTypes(SrcStructTypes, true); + TypeFinder SrcStructTypes; + SrcStructTypes.run(*SrcM, true); SmallPtrSet<StructType*, 32> SrcStructTypesSet(SrcStructTypes.begin(), SrcStructTypes.end()); - std::vector<StructType*> DstStructTypes; - DstM->findUsedStructTypes(DstStructTypes, true); + TypeFinder DstStructTypes; + DstStructTypes.run(*DstM, true); SmallPtrSet<StructType*, 32> DstStructTypesSet(DstStructTypes.begin(), DstStructTypes.end()); diff --git a/lib/MC/CMakeLists.txt b/lib/MC/CMakeLists.txt index f11e686..99bff96 100644 --- a/lib/MC/CMakeLists.txt +++ b/lib/MC/CMakeLists.txt @@ -28,6 +28,7 @@ add_llvm_library(LLVMMC MCObjectStreamer.cpp MCObjectWriter.cpp MCPureStreamer.cpp + MCRegisterInfo.cpp MCSection.cpp MCSectionCOFF.cpp MCSectionELF.cpp diff --git a/lib/MC/MCAssembler.cpp b/lib/MC/MCAssembler.cpp index 0aa0c98..b7d2c28 100644 --- a/lib/MC/MCAssembler.cpp +++ b/lib/MC/MCAssembler.cpp @@ -27,6 +27,7 @@ #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Support/TargetRegistry.h" +#include "llvm/Support/LEB128.h" using namespace llvm; @@ -719,9 +720,9 @@ bool MCAssembler::relaxLEB(MCAsmLayout &Layout, MCLEBFragment &LF) { Data.clear(); raw_svector_ostream OSE(Data); if (LF.isSigned()) - MCObjectWriter::EncodeSLEB128(Value, OSE); + encodeSLEB128(Value, OSE); else - MCObjectWriter::EncodeULEB128(Value, OSE); + encodeULEB128(Value, OSE); OSE.flush(); return OldSize != LF.getContents().size(); } diff --git a/lib/MC/MCDwarf.cpp b/lib/MC/MCDwarf.cpp index 75eaf80..4c63e43 100644 --- a/lib/MC/MCDwarf.cpp +++ b/lib/MC/MCDwarf.cpp @@ -19,6 +19,7 @@ #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/Support/LEB128.h" #include "llvm/Support/Path.h" #include "llvm/Support/SourceMgr.h" #include "llvm/ADT/Hashing.h" @@ -361,7 +362,7 @@ void MCDwarfLineAddr::Encode(int64_t LineDelta, uint64_t AddrDelta, OS << char(dwarf::DW_LNS_const_add_pc); else { OS << char(dwarf::DW_LNS_advance_pc); - MCObjectWriter::EncodeULEB128(AddrDelta, OS); + encodeULEB128(AddrDelta, OS); } OS << char(dwarf::DW_LNS_extended_op); OS << char(1); @@ -376,7 +377,7 @@ void MCDwarfLineAddr::Encode(int64_t LineDelta, uint64_t AddrDelta, // it with DW_LNS_advance_line. if (Temp >= DWARF2_LINE_RANGE) { OS << char(dwarf::DW_LNS_advance_line); - MCObjectWriter::EncodeSLEB128(LineDelta, OS); + encodeSLEB128(LineDelta, OS); LineDelta = 0; Temp = 0 - DWARF2_LINE_BASE; @@ -412,7 +413,7 @@ void MCDwarfLineAddr::Encode(int64_t LineDelta, uint64_t AddrDelta, // Otherwise use DW_LNS_advance_pc. OS << char(dwarf::DW_LNS_advance_pc); - MCObjectWriter::EncodeULEB128(AddrDelta, OS); + encodeULEB128(AddrDelta, OS); if (NeedCopy) OS << char(dwarf::DW_LNS_copy); @@ -1293,20 +1294,17 @@ MCSymbol *FrameEmitterImpl::EmitFDE(MCStreamer &streamer, streamer.EmitSymbolValue(&cieStart, 4); } - unsigned fdeEncoding = MOFI->getFDEEncoding(UsingCFI); - unsigned size = getSizeForEncoding(streamer, fdeEncoding); - // PC Begin - unsigned PCBeginEncoding = IsEH ? fdeEncoding : - (unsigned)dwarf::DW_EH_PE_absptr; - unsigned PCBeginSize = getSizeForEncoding(streamer, PCBeginEncoding); - EmitSymbol(streamer, *frame.Begin, PCBeginEncoding, "FDE initial location"); + unsigned PCEncoding = IsEH ? MOFI->getFDEEncoding(UsingCFI) + : (unsigned)dwarf::DW_EH_PE_absptr; + unsigned PCSize = getSizeForEncoding(streamer, PCEncoding); + EmitSymbol(streamer, *frame.Begin, PCEncoding, "FDE initial location"); // PC Range const MCExpr *Range = MakeStartMinusEndExpr(streamer, *frame.Begin, *frame.End, 0); if (verboseAsm) streamer.AddComment("FDE address range"); - streamer.EmitAbsValue(Range, size); + streamer.EmitAbsValue(Range, PCSize); if (IsEH) { // Augmentation Data Length @@ -1329,7 +1327,7 @@ MCSymbol *FrameEmitterImpl::EmitFDE(MCStreamer &streamer, EmitCFIInstructions(streamer, frame.Instructions, frame.Begin); // Padding - streamer.EmitValueToAlignment(PCBeginSize); + streamer.EmitValueToAlignment(PCSize); return fdeEnd; } diff --git a/lib/MC/MCObjectFileInfo.cpp b/lib/MC/MCObjectFileInfo.cpp index 4e6a1b9..29b4a94 100644 --- a/lib/MC/MCObjectFileInfo.cpp +++ b/lib/MC/MCObjectFileInfo.cpp @@ -507,15 +507,13 @@ void MCObjectFileInfo::InitCOFFMCObjectFileInfo(Triple T) { PDataSection = Ctx->getCOFFSection(".pdata", COFF::IMAGE_SCN_CNT_INITIALIZED_DATA | - COFF::IMAGE_SCN_MEM_READ | - COFF::IMAGE_SCN_MEM_WRITE, + COFF::IMAGE_SCN_MEM_READ, SectionKind::getDataRel()); XDataSection = Ctx->getCOFFSection(".xdata", COFF::IMAGE_SCN_CNT_INITIALIZED_DATA | - COFF::IMAGE_SCN_MEM_READ | - COFF::IMAGE_SCN_MEM_WRITE, + COFF::IMAGE_SCN_MEM_READ, SectionKind::getDataRel()); TLSDataSection = Ctx->getCOFFSection(".tls$", diff --git a/lib/MC/MCObjectWriter.cpp b/lib/MC/MCObjectWriter.cpp index 030f247..94d7cd6 100644 --- a/lib/MC/MCObjectWriter.cpp +++ b/lib/MC/MCObjectWriter.cpp @@ -17,40 +17,6 @@ using namespace llvm; MCObjectWriter::~MCObjectWriter() { } -/// Utility function to encode a SLEB128 value. -void MCObjectWriter::EncodeSLEB128(int64_t Value, raw_ostream &OS) { - bool More; - do { - uint8_t Byte = Value & 0x7f; - // NOTE: this assumes that this signed shift is an arithmetic right shift. - Value >>= 7; - More = !((((Value == 0 ) && ((Byte & 0x40) == 0)) || - ((Value == -1) && ((Byte & 0x40) != 0)))); - if (More) - Byte |= 0x80; // Mark this byte that that more bytes will follow. - OS << char(Byte); - } while (More); -} - -/// Utility function to encode a ULEB128 value. -void MCObjectWriter::EncodeULEB128(uint64_t Value, raw_ostream &OS, - unsigned Padding) { - do { - uint8_t Byte = Value & 0x7f; - Value >>= 7; - if (Value != 0 || Padding != 0) - Byte |= 0x80; // Mark this byte that that more bytes will follow. - OS << char(Byte); - } while (Value != 0); - - // Pad with 0x80 and emit a null byte at the end. - if (Padding != 0) { - for (; Padding != 1; --Padding) - OS << '\x80'; - OS << '\x00'; - } -} - bool MCObjectWriter::IsSymbolRefDifferenceFullyResolved(const MCAssembler &Asm, const MCSymbolRefExpr *A, diff --git a/lib/MC/MCParser/AsmParser.cpp b/lib/MC/MCParser/AsmParser.cpp index 2daad0a..240c10b 100644 --- a/lib/MC/MCParser/AsmParser.cpp +++ b/lib/MC/MCParser/AsmParser.cpp @@ -46,14 +46,17 @@ namespace { /// \brief Helper class for tracking macro definitions. typedef std::vector<AsmToken> MacroArgument; +typedef std::vector<MacroArgument> MacroArguments; +typedef StringRef MacroParameter; +typedef std::vector<MacroParameter> MacroParameters; struct Macro { StringRef Name; StringRef Body; - std::vector<StringRef> Parameters; + MacroParameters Parameters; public: - Macro(StringRef N, StringRef B, const std::vector<StringRef> &P) : + Macro(StringRef N, StringRef B, const MacroParameters &P) : Name(N), Body(B), Parameters(P) {} }; @@ -181,8 +184,8 @@ private: bool HandleMacroEntry(StringRef Name, SMLoc NameLoc, const Macro *M); bool expandMacro(raw_svector_ostream &OS, StringRef Body, - const std::vector<StringRef> &Parameters, - const std::vector<MacroArgument> &A, + const MacroParameters &Parameters, + const MacroArguments &A, const SMLoc &L); void HandleMacroExit(); @@ -207,7 +210,7 @@ private: void EatToEndOfStatement(); bool ParseMacroArgument(MacroArgument &MA); - bool ParseMacroArguments(const Macro *M, std::vector<MacroArgument> &A); + bool ParseMacroArguments(const Macro *M, MacroArguments &A); /// \brief Parse up to the end of statement and a return the contents from the /// current token until the end of the statement; the current token on exit @@ -1451,9 +1454,17 @@ void AsmParser::DiagHandler(const SMDiagnostic &Diag, void *Context) { NewDiag.print(0, OS); } +// FIXME: This is mostly duplicated from the function in AsmLexer.cpp. The +// difference being that that function accepts '@' as part of identifiers and +// we can't do that. AsmLexer.cpp should probably be changed to handle +// '@' as a special case when needed. +static bool isIdentifierChar(char c) { + return isalnum(c) || c == '_' || c == '$' || c == '.'; +} + bool AsmParser::expandMacro(raw_svector_ostream &OS, StringRef Body, - const std::vector<StringRef> &Parameters, - const std::vector<MacroArgument> &A, + const MacroParameters &Parameters, + const MacroArguments &A, const SMLoc &L) { unsigned NParameters = Parameters.size(); if (NParameters != 0 && NParameters != A.size()) @@ -1515,7 +1526,7 @@ bool AsmParser::expandMacro(raw_svector_ostream &OS, StringRef Body, Pos += 2; } else { unsigned I = Pos + 1; - while (isalnum(Body[I]) && I + 1 != End) + while (isIdentifierChar(Body[I]) && I + 1 != End) ++I; const char *Begin = Body.data() + Pos +1; @@ -1555,8 +1566,6 @@ bool AsmParser::ParseMacroArgument(MacroArgument &MA) { unsigned ParenLevel = 0; for (;;) { - SMLoc LastTokenLoc; - if (Lexer.is(AsmToken::Eof) || Lexer.is(AsmToken::Equal)) return TokError("unexpected token in macro instantiation"); @@ -1578,13 +1587,12 @@ bool AsmParser::ParseMacroArgument(MacroArgument &MA) { Lex(); } if (ParenLevel != 0) - return TokError("unbalanced parenthesises in macro argument"); + return TokError("unbalanced parentheses in macro argument"); return false; } // Parse the macro instantiation arguments. -bool AsmParser::ParseMacroArguments(const Macro *M, - std::vector<MacroArgument> &A) { +bool AsmParser::ParseMacroArguments(const Macro *M, MacroArguments &A) { const unsigned NParameters = M ? M->Parameters.size() : 0; // Parse two kinds of macro invocations: @@ -1597,8 +1605,8 @@ bool AsmParser::ParseMacroArguments(const Macro *M, if (ParseMacroArgument(MA)) return true; - if (!MA.empty()) - A.push_back(MA); + A.push_back(MA); + if (Lexer.is(AsmToken::EndOfStatement)) return false; @@ -1615,17 +1623,23 @@ bool AsmParser::HandleMacroEntry(StringRef Name, SMLoc NameLoc, if (ActiveMacros.size() == 20) return TokError("macros cannot be nested more than 20 levels deep"); - std::vector<MacroArgument> MacroArguments; - if (ParseMacroArguments(M, MacroArguments)) + MacroArguments A; + if (ParseMacroArguments(M, A)) return true; + // Remove any trailing empty arguments. Do this after-the-fact as we have + // to keep empty arguments in the middle of the list or positionality + // gets off. e.g., "foo 1, , 2" vs. "foo 1, 2," + while (!A.empty() && A.back().empty()) + A.pop_back(); + // Macro instantiation is lexical, unfortunately. We construct a new buffer // to hold the macro body with substitutions. SmallString<256> Buf; StringRef Body = M->Body; raw_svector_ostream OS(Buf); - if (expandMacro(OS, Body, M->Parameters, MacroArguments, getTok().getLoc())) + if (expandMacro(OS, Body, M->Parameters, A, getTok().getLoc())) return true; // We include the .endmacro in the buffer as our queue to exit the macro @@ -3065,14 +3079,14 @@ bool GenericAsmParser::ParseDirectiveMacro(StringRef Directive, SMLoc DirectiveLoc) { StringRef Name; if (getParser().ParseIdentifier(Name)) - return TokError("expected identifier in directive"); + return TokError("expected identifier in '.macro' directive"); - std::vector<StringRef> Parameters; + MacroParameters Parameters; if (getLexer().isNot(AsmToken::EndOfStatement)) { - for(;;) { - StringRef Parameter; + for (;;) { + MacroParameter Parameter; if (getParser().ParseIdentifier(Parameter)) - return TokError("expected identifier in directive"); + return TokError("expected identifier in '.macro' directive"); Parameters.push_back(Parameter); if (getLexer().isNot(AsmToken::Comma)) @@ -3126,7 +3140,7 @@ bool GenericAsmParser::ParseDirectiveMacro(StringRef Directive, /// ::= .endm /// ::= .endmacro bool GenericAsmParser::ParseDirectiveEndMacro(StringRef Directive, - SMLoc DirectiveLoc) { + SMLoc DirectiveLoc) { if (getLexer().isNot(AsmToken::EndOfStatement)) return TokError("unexpected token in '" + Directive + "' directive"); @@ -3224,7 +3238,7 @@ Macro *AsmParser::ParseMacroLikeBody(SMLoc DirectiveLoc) { // We Are Anonymous. StringRef Name; - std::vector<StringRef> Parameters; + MacroParameters Parameters; return new Macro(Name, Body, Parameters); } @@ -3270,8 +3284,8 @@ bool AsmParser::ParseDirectiveRept(SMLoc DirectiveLoc) { // Macro instantiation is lexical, unfortunately. We construct a new buffer // to hold the macro body with substitutions. SmallString<256> Buf; - std::vector<StringRef> Parameters; - const std::vector<MacroArgument> A; + MacroParameters Parameters; + MacroArguments A; raw_svector_ostream OS(Buf); while (Count--) { if (expandMacro(OS, M->Body, Parameters, A, getTok().getLoc())) @@ -3285,8 +3299,8 @@ bool AsmParser::ParseDirectiveRept(SMLoc DirectiveLoc) { /// ParseDirectiveIrp /// ::= .irp symbol,values bool AsmParser::ParseDirectiveIrp(SMLoc DirectiveLoc) { - std::vector<StringRef> Parameters; - StringRef Parameter; + MacroParameters Parameters; + MacroParameter Parameter; if (ParseIdentifier(Parameter)) return TokError("expected identifier in '.irp' directive"); @@ -3298,7 +3312,7 @@ bool AsmParser::ParseDirectiveIrp(SMLoc DirectiveLoc) { Lex(); - std::vector<MacroArgument> A; + MacroArguments A; if (ParseMacroArguments(0, A)) return true; @@ -3315,9 +3329,8 @@ bool AsmParser::ParseDirectiveIrp(SMLoc DirectiveLoc) { SmallString<256> Buf; raw_svector_ostream OS(Buf); - for (std::vector<MacroArgument>::iterator i = A.begin(), e = A.end(); i != e; - ++i) { - std::vector<MacroArgument> Args; + for (MacroArguments::iterator i = A.begin(), e = A.end(); i != e; ++i) { + MacroArguments Args; Args.push_back(*i); if (expandMacro(OS, M->Body, Parameters, Args, getTok().getLoc())) @@ -3332,8 +3345,8 @@ bool AsmParser::ParseDirectiveIrp(SMLoc DirectiveLoc) { /// ParseDirectiveIrpc /// ::= .irpc symbol,values bool AsmParser::ParseDirectiveIrpc(SMLoc DirectiveLoc) { - std::vector<StringRef> Parameters; - StringRef Parameter; + MacroParameters Parameters; + MacroParameter Parameter; if (ParseIdentifier(Parameter)) return TokError("expected identifier in '.irpc' directive"); @@ -3345,7 +3358,7 @@ bool AsmParser::ParseDirectiveIrpc(SMLoc DirectiveLoc) { Lex(); - std::vector<MacroArgument> A; + MacroArguments A; if (ParseMacroArguments(0, A)) return true; @@ -3371,7 +3384,7 @@ bool AsmParser::ParseDirectiveIrpc(SMLoc DirectiveLoc) { MacroArgument Arg; Arg.push_back(AsmToken(AsmToken::Identifier, Values.slice(I, I+1))); - std::vector<MacroArgument> Args; + MacroArguments Args; Args.push_back(Arg); if (expandMacro(OS, M->Body, Parameters, Args, getTok().getLoc())) diff --git a/lib/MC/MCParser/DarwinAsmParser.cpp b/lib/MC/MCParser/DarwinAsmParser.cpp index 5662fea..18033d0 100644 --- a/lib/MC/MCParser/DarwinAsmParser.cpp +++ b/lib/MC/MCParser/DarwinAsmParser.cpp @@ -50,6 +50,9 @@ public: AddDirectiveHandler<&DarwinAsmParser::ParseDirectiveDumpOrLoad>(".dump"); AddDirectiveHandler<&DarwinAsmParser::ParseDirectiveDumpOrLoad>(".load"); AddDirectiveHandler<&DarwinAsmParser::ParseDirectiveSection>(".section"); + AddDirectiveHandler<&DarwinAsmParser::ParseDirectivePushSection>(".pushsection"); + AddDirectiveHandler<&DarwinAsmParser::ParseDirectivePopSection>(".popsection"); + AddDirectiveHandler<&DarwinAsmParser::ParseDirectivePrevious>(".previous"); AddDirectiveHandler<&DarwinAsmParser::ParseDirectiveSecureLogUnique>( ".secure_log_unique"); AddDirectiveHandler<&DarwinAsmParser::ParseDirectiveSecureLogReset>( @@ -112,6 +115,9 @@ public: bool ParseDirectiveDumpOrLoad(StringRef, SMLoc); bool ParseDirectiveLsym(StringRef, SMLoc); bool ParseDirectiveSection(StringRef, SMLoc); + bool ParseDirectivePushSection(StringRef, SMLoc); + bool ParseDirectivePopSection(StringRef, SMLoc); + bool ParseDirectivePrevious(StringRef, SMLoc); bool ParseDirectiveSecureLogReset(StringRef, SMLoc); bool ParseDirectiveSecureLogUnique(StringRef, SMLoc); bool ParseDirectiveSubsectionsViaSymbols(StringRef, SMLoc); @@ -297,7 +303,7 @@ public: }; -} +} // end anonymous namespace bool DarwinAsmParser::ParseSectionSwitch(const char *Segment, const char *Section, @@ -457,6 +463,37 @@ bool DarwinAsmParser::ParseDirectiveSection(StringRef, SMLoc) { return false; } +/// ParseDirectivePushSection: +/// ::= .pushsection identifier (',' identifier)* +bool DarwinAsmParser::ParseDirectivePushSection(StringRef S, SMLoc Loc) { + getStreamer().PushSection(); + + if (ParseDirectiveSection(S, Loc)) { + getStreamer().PopSection(); + return true; + } + + return false; +} + +/// ParseDirectivePopSection: +/// ::= .popsection +bool DarwinAsmParser::ParseDirectivePopSection(StringRef, SMLoc) { + if (!getStreamer().PopSection()) + return TokError(".popsection without corresponding .pushsection"); + return false; +} + +/// ParseDirectivePrevious: +/// ::= .previous +bool DarwinAsmParser::ParseDirectivePrevious(StringRef DirName, SMLoc) { + const MCSection *PreviousSection = getStreamer().getPreviousSection(); + if (PreviousSection == NULL) + return TokError(".previous without corresponding .section"); + getStreamer().SwitchSection(PreviousSection); + return false; +} + /// ParseDirectiveSecureLogUnique /// ::= .secure_log_unique ... message ... bool DarwinAsmParser::ParseDirectiveSecureLogUnique(StringRef, SMLoc IDLoc) { @@ -707,4 +744,4 @@ MCAsmParserExtension *createDarwinAsmParser() { return new DarwinAsmParser; } -} +} // end llvm namespace diff --git a/lib/MC/MCRegisterInfo.cpp b/lib/MC/MCRegisterInfo.cpp new file mode 100644 index 0000000..4d1aff3 --- /dev/null +++ b/lib/MC/MCRegisterInfo.cpp @@ -0,0 +1,71 @@ +//=== MC/MCRegisterInfo.cpp - Target Register Description -------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements MCRegisterInfo functions. +// +//===----------------------------------------------------------------------===// + +#include "llvm/MC/MCRegisterInfo.h" + +using namespace llvm; + +unsigned MCRegisterInfo::getMatchingSuperReg(unsigned Reg, unsigned SubIdx, + const MCRegisterClass *RC) const { + for (MCSuperRegIterator Supers(Reg, this); Supers.isValid(); ++Supers) + if (RC->contains(*Supers) && Reg == getSubReg(*Supers, SubIdx)) + return *Supers; + return 0; +} + +unsigned MCRegisterInfo::getSubReg(unsigned Reg, unsigned Idx) const { + // Get a pointer to the corresponding SubRegIndices list. This list has the + // name of each sub-register in the same order as MCSubRegIterator. + const uint16_t *SRI = SubRegIndices + get(Reg).SubRegIndices; + for (MCSubRegIterator Subs(Reg, this); Subs.isValid(); ++Subs, ++SRI) + if (*SRI == Idx) + return *Subs; + return 0; +} + +unsigned MCRegisterInfo::getSubRegIndex(unsigned Reg, unsigned SubReg) const { + // Get a pointer to the corresponding SubRegIndices list. This list has the + // name of each sub-register in the same order as MCSubRegIterator. + const uint16_t *SRI = SubRegIndices + get(Reg).SubRegIndices; + for (MCSubRegIterator Subs(Reg, this); Subs.isValid(); ++Subs, ++SRI) + if (*Subs == SubReg) + return *SRI; + return 0; +} + +int MCRegisterInfo::getDwarfRegNum(unsigned RegNum, bool isEH) const { + const DwarfLLVMRegPair *M = isEH ? EHL2DwarfRegs : L2DwarfRegs; + unsigned Size = isEH ? EHL2DwarfRegsSize : L2DwarfRegsSize; + + DwarfLLVMRegPair Key = { RegNum, 0 }; + const DwarfLLVMRegPair *I = std::lower_bound(M, M+Size, Key); + if (I == M+Size || I->FromReg != RegNum) + return -1; + return I->ToReg; +} + +int MCRegisterInfo::getLLVMRegNum(unsigned RegNum, bool isEH) const { + const DwarfLLVMRegPair *M = isEH ? EHDwarf2LRegs : Dwarf2LRegs; + unsigned Size = isEH ? EHDwarf2LRegsSize : Dwarf2LRegsSize; + + DwarfLLVMRegPair Key = { RegNum, 0 }; + const DwarfLLVMRegPair *I = std::lower_bound(M, M+Size, Key); + assert(I != M+Size && I->FromReg == RegNum && "Invalid RegNum"); + return I->ToReg; +} + +int MCRegisterInfo::getSEHRegNum(unsigned RegNum) const { + const DenseMap<unsigned, int>::const_iterator I = L2SEHRegs.find(RegNum); + if (I == L2SEHRegs.end()) return (int)RegNum; + return I->second; +} diff --git a/lib/MC/MCStreamer.cpp b/lib/MC/MCStreamer.cpp index e363f28..0bac24d 100644 --- a/lib/MC/MCStreamer.cpp +++ b/lib/MC/MCStreamer.cpp @@ -15,6 +15,7 @@ #include "llvm/MC/MCSymbol.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/Support/LEB128.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/Twine.h" #include <cstdlib> @@ -94,7 +95,7 @@ void MCStreamer::EmitULEB128IntValue(uint64_t Value, unsigned AddrSpace, unsigned Padding) { SmallString<128> Tmp; raw_svector_ostream OSE(Tmp); - MCObjectWriter::EncodeULEB128(Value, OSE, Padding); + encodeULEB128(Value, OSE, Padding); EmitBytes(OSE.str(), AddrSpace); } @@ -103,7 +104,7 @@ void MCStreamer::EmitULEB128IntValue(uint64_t Value, unsigned AddrSpace, void MCStreamer::EmitSLEB128IntValue(int64_t Value, unsigned AddrSpace) { SmallString<128> Tmp; raw_svector_ostream OSE(Tmp); - MCObjectWriter::EncodeSLEB128(Value, OSE); + encodeSLEB128(Value, OSE); EmitBytes(OSE.str(), AddrSpace); } diff --git a/lib/MC/MCWin64EH.cpp b/lib/MC/MCWin64EH.cpp index 79e66fc..c05b4b1 100644 --- a/lib/MC/MCWin64EH.cpp +++ b/lib/MC/MCWin64EH.cpp @@ -228,8 +228,7 @@ static const MCSection *getWin64EHTableSection(StringRef suffix, return context.getCOFFSection((".xdata"+suffix).str(), COFF::IMAGE_SCN_CNT_INITIALIZED_DATA | - COFF::IMAGE_SCN_MEM_READ | - COFF::IMAGE_SCN_MEM_WRITE, + COFF::IMAGE_SCN_MEM_READ, SectionKind::getDataRel()); } @@ -239,8 +238,7 @@ static const MCSection *getWin64EHFuncTableSection(StringRef suffix, return context.getObjectFileInfo()->getPDataSection(); return context.getCOFFSection((".pdata"+suffix).str(), COFF::IMAGE_SCN_CNT_INITIALIZED_DATA | - COFF::IMAGE_SCN_MEM_READ | - COFF::IMAGE_SCN_MEM_WRITE, + COFF::IMAGE_SCN_MEM_READ, SectionKind::getDataRel()); } diff --git a/lib/Support/APFloat.cpp b/lib/Support/APFloat.cpp index 409d4fb..ed261a4 100644 --- a/lib/Support/APFloat.cpp +++ b/lib/Support/APFloat.cpp @@ -1765,6 +1765,50 @@ APFloat::fusedMultiplyAdd(const APFloat &multiplicand, return fs; } +/* Rounding-mode corrrect round to integral value. */ +APFloat::opStatus APFloat::roundToIntegral(roundingMode rounding_mode) { + opStatus fs; + assertArithmeticOK(*semantics); + + // If the exponent is large enough, we know that this value is already + // integral, and the arithmetic below would potentially cause it to saturate + // to +/-Inf. Bail out early instead. + if (exponent+1 >= (int)semanticsPrecision(*semantics)) + return opOK; + + // The algorithm here is quite simple: we add 2^(p-1), where p is the + // precision of our format, and then subtract it back off again. The choice + // of rounding modes for the addition/subtraction determines the rounding mode + // for our integral rounding as well. + // NOTE: When the input value is negative, we do subtraction followed by + // addition instead. + APInt IntegerConstant(NextPowerOf2(semanticsPrecision(*semantics)), 1); + IntegerConstant <<= semanticsPrecision(*semantics)-1; + APFloat MagicConstant(*semantics); + fs = MagicConstant.convertFromAPInt(IntegerConstant, false, + rmNearestTiesToEven); + MagicConstant.copySign(*this); + + if (fs != opOK) + return fs; + + // Preserve the input sign so that we can handle 0.0/-0.0 cases correctly. + bool inputSign = isNegative(); + + fs = add(MagicConstant, rounding_mode); + if (fs != opOK && fs != opInexact) + return fs; + + fs = subtract(MagicConstant, rounding_mode); + + // Restore the input sign. + if (inputSign != isNegative()) + changeSign(); + + return fs; +} + + /* Comparison requires normalized numbers. */ APFloat::cmpResult APFloat::compare(const APFloat &rhs) const @@ -3278,16 +3322,8 @@ APFloat::APFloat(double d) : exponent2(0), sign2(0) { } namespace { - static void append(SmallVectorImpl<char> &Buffer, - unsigned N, const char *Str) { - unsigned Start = Buffer.size(); - Buffer.set_size(Start + N); - memcpy(&Buffer[Start], Str, N); - } - - template <unsigned N> - void append(SmallVectorImpl<char> &Buffer, const char (&Str)[N]) { - append(Buffer, N, Str); + void append(SmallVectorImpl<char> &Buffer, StringRef Str) { + Buffer.append(Str.begin(), Str.end()); } /// Removes data from the given significand until it is no more diff --git a/lib/Support/CMakeLists.txt b/lib/Support/CMakeLists.txt index 9103327..83baf60 100644 --- a/lib/Support/CMakeLists.txt +++ b/lib/Support/CMakeLists.txt @@ -23,6 +23,7 @@ add_llvm_library(LLVMSupport Dwarf.cpp ErrorHandling.cpp FileUtilities.cpp + FileOutputBuffer.cpp FoldingSet.cpp FormattedStream.cpp GraphWriter.cpp diff --git a/lib/Support/DataExtractor.cpp b/lib/Support/DataExtractor.cpp index dc21155..3d5cce0 100644 --- a/lib/Support/DataExtractor.cpp +++ b/lib/Support/DataExtractor.cpp @@ -139,7 +139,7 @@ uint64_t DataExtractor::getULEB128(uint32_t *offset_ptr) const { while (isValidOffset(offset)) { byte = Data[offset++]; - result |= (byte & 0x7f) << shift; + result |= uint64_t(byte & 0x7f) << shift; shift += 7; if ((byte & 0x80) == 0) break; @@ -160,7 +160,7 @@ int64_t DataExtractor::getSLEB128(uint32_t *offset_ptr) const { while (isValidOffset(offset)) { byte = Data[offset++]; - result |= (byte & 0x7f) << shift; + result |= uint64_t(byte & 0x7f) << shift; shift += 7; if ((byte & 0x80) == 0) break; @@ -168,7 +168,7 @@ int64_t DataExtractor::getSLEB128(uint32_t *offset_ptr) const { // Sign bit of byte is 2nd high order bit (0x40) if (shift < 64 && (byte & 0x40)) - result |= -(1 << shift); + result |= -(1ULL << shift); *offset_ptr = offset; return result; diff --git a/lib/Support/Debug.cpp b/lib/Support/Debug.cpp index 9fdb12e..c8e8900 100644 --- a/lib/Support/Debug.cpp +++ b/lib/Support/Debug.cpp @@ -7,7 +7,7 @@ // //===----------------------------------------------------------------------===// // -// This file implements a handle way of adding debugging information to your +// This file implements a handy way of adding debugging information to your // code, without it being enabled all of the time, and without having to add // command line options to enable it. // @@ -18,8 +18,8 @@ // can specify '-debug-only=foo' to enable JUST the debug information for the // foo class. // -// When compiling in release mode, the -debug-* options and all code in DEBUG() -// statements disappears, so it does not effect the runtime of the code. +// When compiling without assertions, the -debug-* options and all code in +// DEBUG() statements disappears, so it does not affect the runtime of the code. // //===----------------------------------------------------------------------===// @@ -89,11 +89,11 @@ bool llvm::isCurrentDebugType(const char *DebugType) { return CurrentDebugType.empty() || DebugType == CurrentDebugType; } -/// SetCurrentDebugType - Set the current debug type, as if the -debug-only=X +/// setCurrentDebugType - Set the current debug type, as if the -debug-only=X /// option were specified. Note that DebugFlag also needs to be set to true for /// debug output to be produced. /// -void llvm::SetCurrentDebugType(const char *Type) { +void llvm::setCurrentDebugType(const char *Type) { CurrentDebugType = Type; } diff --git a/lib/Support/FileOutputBuffer.cpp b/lib/Support/FileOutputBuffer.cpp new file mode 100644 index 0000000..7dc9587 --- /dev/null +++ b/lib/Support/FileOutputBuffer.cpp @@ -0,0 +1,148 @@ +//===- FileOutputBuffer.cpp - File Output Buffer ----------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Utility for creating a in-memory buffer that will be written to a file. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Support/FileOutputBuffer.h" + +#include "llvm/ADT/OwningPtr.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/Support/FileSystem.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Support/system_error.h" + + +namespace llvm { + + +FileOutputBuffer::FileOutputBuffer(uint8_t *Start, uint8_t *End, + StringRef Path, StringRef TmpPath) + : BufferStart(Start), BufferEnd(End) { + FinalPath.assign(Path); + TempPath.assign(TmpPath); +} + + +FileOutputBuffer::~FileOutputBuffer() { + // If not already commited, delete buffer and remove temp file. + if ( BufferStart != NULL ) { + sys::fs::unmap_file_pages((void*)BufferStart, getBufferSize()); + bool Existed; + sys::fs::remove(Twine(TempPath), Existed); + } +} + + +error_code FileOutputBuffer::create(StringRef FilePath, + size_t Size, + OwningPtr<FileOutputBuffer> &Result, + unsigned Flags) { + // If file already exists, it must be a regular file (to be mappable). + sys::fs::file_status Stat; + error_code EC = sys::fs::status(FilePath, Stat); + switch (Stat.type()) { + case sys::fs::file_type::file_not_found: + // If file does not exist, we'll create one. + break; + case sys::fs::file_type::regular_file: { + // If file is not currently writable, error out. + // FIXME: There is no sys::fs:: api for checking this. + // FIXME: In posix, you use the access() call to check this. + } + break; + default: + if (EC) + return EC; + else + return make_error_code(errc::operation_not_permitted); + } + + // Delete target file. + bool Existed; + EC = sys::fs::remove(FilePath, Existed); + if (EC) + return EC; + + // Create new file in same directory but with random name. + SmallString<128> TempFilePath; + int FD; + EC = sys::fs::unique_file(Twine(FilePath) + ".tmp%%%%%%%", + FD, TempFilePath, false, 0644); + if (EC) + return EC; + + // The unique_file() interface leaks lower layers and returns a file + // descriptor. There is no way to directly close it, so use this hack + // to hand it off to raw_fd_ostream to close for us. + { + raw_fd_ostream Dummy(FD, /*shouldClose=*/true); + } + + // Resize file to requested initial size + EC = sys::fs::resize_file(Twine(TempFilePath), Size); + if (EC) + return EC; + + // If requested, make the output file executable. + if ( Flags & F_executable ) { + sys::fs::file_status Stat2; + EC = sys::fs::status(Twine(TempFilePath), Stat2); + if (EC) + return EC; + + sys::fs::perms new_perms = Stat2.permissions(); + if ( new_perms & sys::fs::owner_read ) + new_perms |= sys::fs::owner_exe; + if ( new_perms & sys::fs::group_read ) + new_perms |= sys::fs::group_exe; + if ( new_perms & sys::fs::others_read ) + new_perms |= sys::fs::others_exe; + new_perms |= sys::fs::add_perms; + EC = sys::fs::permissions(Twine(TempFilePath), new_perms); + if (EC) + return EC; + } + + // Memory map new file. + void *Base; + EC = sys::fs::map_file_pages(Twine(TempFilePath), 0, Size, true, Base); + if (EC) + return EC; + + // Create FileOutputBuffer object to own mapped range. + uint8_t *Start = reinterpret_cast<uint8_t*>(Base); + Result.reset(new FileOutputBuffer(Start, Start+Size, FilePath, TempFilePath)); + + return error_code::success(); +} + + +error_code FileOutputBuffer::commit(int64_t NewSmallerSize) { + // Unmap buffer, letting OS flush dirty pages to file on disk. + void *Start = reinterpret_cast<void*>(BufferStart); + error_code EC = sys::fs::unmap_file_pages(Start, getBufferSize()); + if (EC) + return EC; + + // If requested, resize file as part of commit. + if ( NewSmallerSize != -1 ) { + EC = sys::fs::resize_file(Twine(TempPath), NewSmallerSize); + if (EC) + return EC; + } + + // Rename file to final name. + return sys::fs::rename(Twine(TempPath), Twine(FinalPath)); +} + + +} // namespace + diff --git a/lib/Support/Mutex.cpp b/lib/Support/Mutex.cpp index da5baab..4e4a026 100644 --- a/lib/Support/Mutex.cpp +++ b/lib/Support/Mutex.cpp @@ -59,7 +59,8 @@ MutexImpl::MutexImpl( bool recursive) errorcode = pthread_mutexattr_settype(&attr, kind); assert(errorcode == 0); -#if !defined(__FreeBSD__) && !defined(__OpenBSD__) && !defined(__NetBSD__) && !defined(__DragonFly__) +#if !defined(__FreeBSD__) && !defined(__OpenBSD__) && !defined(__NetBSD__) && \ + !defined(__DragonFly__) && !defined(__Bitrig__) // Make it a process local mutex errorcode = pthread_mutexattr_setpshared(&attr, PTHREAD_PROCESS_PRIVATE); assert(errorcode == 0); diff --git a/lib/Support/Triple.cpp b/lib/Support/Triple.cpp index 7b26ea9..cca549d 100644 --- a/lib/Support/Triple.cpp +++ b/lib/Support/Triple.cpp @@ -124,6 +124,7 @@ const char *Triple::getOSTypeName(OSType Kind) { case RTEMS: return "rtems"; case NativeClient: return "nacl"; case CNK: return "cnk"; + case Bitrig: return "bitrig"; } llvm_unreachable("Invalid OSType"); @@ -293,6 +294,7 @@ static Triple::OSType parseOS(StringRef OSName) { .StartsWith("rtems", Triple::RTEMS) .StartsWith("nacl", Triple::NativeClient) .StartsWith("cnk", Triple::CNK) + .StartsWith("bitrig", Triple::Bitrig) .Default(Triple::UnknownOS); } diff --git a/lib/Support/Unix/Path.inc b/lib/Support/Unix/Path.inc index b41390a..6bddbdf 100644 --- a/lib/Support/Unix/Path.inc +++ b/lib/Support/Unix/Path.inc @@ -260,7 +260,7 @@ Path::GetCurrentDirectory() { return Path(pathname); } -#if defined(__FreeBSD__) || defined (__NetBSD__) || \ +#if defined(__FreeBSD__) || defined (__NetBSD__) || defined(__Bitrig__) || \ defined(__OpenBSD__) || defined(__minix) || defined(__FreeBSD_kernel__) static int test_dir(char buf[PATH_MAX], char ret[PATH_MAX], @@ -329,7 +329,7 @@ Path Path::GetMainExecutable(const char *argv0, void *MainAddr) { if (realpath(exe_path, link_path)) return Path(link_path); } -#elif defined(__FreeBSD__) || defined (__NetBSD__) || \ +#elif defined(__FreeBSD__) || defined (__NetBSD__) || defined(__Bitrig__) || \ defined(__OpenBSD__) || defined(__minix) || defined(__FreeBSD_kernel__) char exe_path[PATH_MAX]; diff --git a/lib/Support/Unix/PathV2.inc b/lib/Support/Unix/PathV2.inc index 93ccd1a..f59551e 100644 --- a/lib/Support/Unix/PathV2.inc +++ b/lib/Support/Unix/PathV2.inc @@ -50,6 +50,12 @@ #include <limits.h> #endif +// Both stdio.h and cstdio are included via different pathes and +// stdcxx's cstdio doesn't include stdio.h, so it doesn't #undef the macros +// either. +#undef ferror +#undef feof + // For GNU Hurd #if defined(__GNU__) && !defined(PATH_MAX) # define PATH_MAX 4096 @@ -461,6 +467,118 @@ rety_open_create: return error_code::success(); } +error_code mapped_file_region::init(int fd, uint64_t offset) { + AutoFD FD(fd); + + // Figure out how large the file is. + struct stat FileInfo; + if (fstat(fd, &FileInfo) == -1) + return error_code(errno, system_category()); + uint64_t FileSize = FileInfo.st_size; + + if (Size == 0) + Size = FileSize; + else if (FileSize < Size) { + // We need to grow the file. + if (ftruncate(fd, Size) == -1) + return error_code(errno, system_category()); + } + + int flags = (Mode == readwrite) ? MAP_SHARED : MAP_PRIVATE; + int prot = (Mode == readonly) ? PROT_READ : (PROT_READ | PROT_WRITE); +#ifdef MAP_FILE + flags |= MAP_FILE; +#endif + Mapping = ::mmap(0, Size, prot, flags, fd, offset); + if (Mapping == MAP_FAILED) + return error_code(errno, system_category()); + return error_code::success(); +} + +mapped_file_region::mapped_file_region(const Twine &path, + mapmode mode, + uint64_t length, + uint64_t offset, + error_code &ec) + : Mode(mode) + , Size(length) + , Mapping() { + // Make sure that the requested size fits within SIZE_T. + if (length > std::numeric_limits<size_t>::max()) { + ec = make_error_code(errc::invalid_argument); + return; + } + + SmallString<128> path_storage; + StringRef name = path.toNullTerminatedStringRef(path_storage); + int oflags = (mode == readonly) ? O_RDONLY : O_RDWR; + int ofd = ::open(name.begin(), oflags); + if (ofd == -1) { + ec = error_code(errno, system_category()); + return; + } + + ec = init(ofd, offset); + if (ec) + Mapping = 0; +} + +mapped_file_region::mapped_file_region(int fd, + mapmode mode, + uint64_t length, + uint64_t offset, + error_code &ec) + : Mode(mode) + , Size(length) + , Mapping() { + // Make sure that the requested size fits within SIZE_T. + if (length > std::numeric_limits<size_t>::max()) { + ec = make_error_code(errc::invalid_argument); + return; + } + + ec = init(fd, offset); + if (ec) + Mapping = 0; +} + +mapped_file_region::~mapped_file_region() { + if (Mapping) + ::munmap(Mapping, Size); +} + +#if LLVM_USE_RVALUE_REFERENCES +mapped_file_region::mapped_file_region(mapped_file_region &&other) + : Mode(other.Mode), Size(other.Size), Mapping(other.Mapping) { + other.Mapping = 0; +} +#endif + +mapped_file_region::mapmode mapped_file_region::flags() const { + assert(Mapping && "Mapping failed but used anyway!"); + return Mode; +} + +uint64_t mapped_file_region::size() const { + assert(Mapping && "Mapping failed but used anyway!"); + return Size; +} + +char *mapped_file_region::data() const { + assert(Mapping && "Mapping failed but used anyway!"); + assert(Mode != readonly && "Cannot get non const data for readonly mapping!"); + return reinterpret_cast<char*>(Mapping); +} + +const char *mapped_file_region::const_data() const { + assert(Mapping && "Mapping failed but used anyway!"); + return reinterpret_cast<const char*>(Mapping); +} + +int mapped_file_region::alignment() { + return Process::GetPageSize(); +} + error_code detail::directory_iterator_construct(detail::DirIterState &it, StringRef path){ SmallString<128> path_null(path); diff --git a/lib/Support/Unix/Process.inc b/lib/Support/Unix/Process.inc index 174112e..5204147 100644 --- a/lib/Support/Unix/Process.inc +++ b/lib/Support/Unix/Process.inc @@ -20,9 +20,10 @@ #ifdef HAVE_SYS_RESOURCE_H #include <sys/resource.h> #endif -// DragonFly BSD has deprecated <malloc.h> for <stdlib.h> instead, -// Unix.h includes this for us already. -#if defined(HAVE_MALLOC_H) && !defined(__DragonFly__) +// DragonFlyBSD, OpenBSD, and Bitrig have deprecated <malloc.h> for +// <stdlib.h> instead. Unix.h includes this for us already. +#if defined(HAVE_MALLOC_H) && !defined(__DragonFly__) && \ + !defined(__OpenBSD__) && !defined(__Bitrig__) #include <malloc.h> #endif #ifdef HAVE_MALLOC_MALLOC_H diff --git a/lib/Support/Windows/PathV2.inc b/lib/Support/Windows/PathV2.inc index 66eeab0..696768b 100644 --- a/lib/Support/Windows/PathV2.inc +++ b/lib/Support/Windows/PathV2.inc @@ -22,6 +22,8 @@ #include <sys/stat.h> #include <sys/types.h> +#undef max + // MinGW doesn't define this. #ifndef _ERRNO_T_DEFINED #define _ERRNO_T_DEFINED @@ -703,6 +705,203 @@ error_code get_magic(const Twine &path, uint32_t len, return error_code::success(); } +error_code mapped_file_region::init(int FD, uint64_t Offset) { + FileDescriptor = FD; + // Make sure that the requested size fits within SIZE_T. + if (Size > std::numeric_limits<SIZE_T>::max()) { + if (FileDescriptor) + _close(FileDescriptor); + else + ::CloseHandle(FileHandle); + return make_error_code(errc::invalid_argument); + } + + DWORD flprotect; + switch (Mode) { + case readonly: flprotect = PAGE_READONLY; break; + case readwrite: flprotect = PAGE_READWRITE; break; + case priv: flprotect = PAGE_WRITECOPY; break; + default: llvm_unreachable("invalid mapping mode"); + } + + FileMappingHandle = ::CreateFileMapping(FileHandle, + 0, + flprotect, + Size >> 32, + Size & 0xffffffff, + 0); + if (FileMappingHandle == NULL) { + error_code ec = windows_error(GetLastError()); + if (FileDescriptor) + _close(FileDescriptor); + else + ::CloseHandle(FileHandle); + return ec; + } + + DWORD dwDesiredAccess; + switch (Mode) { + case readonly: dwDesiredAccess = FILE_MAP_READ; break; + case readwrite: dwDesiredAccess = FILE_MAP_WRITE; break; + case priv: dwDesiredAccess = FILE_MAP_COPY; break; + default: llvm_unreachable("invalid mapping mode"); + } + Mapping = ::MapViewOfFile(FileMappingHandle, + dwDesiredAccess, + Offset >> 32, + Offset & 0xffffffff, + Size); + if (Mapping == NULL) { + error_code ec = windows_error(GetLastError()); + ::CloseHandle(FileMappingHandle); + if (FileDescriptor) + _close(FileDescriptor); + else + ::CloseHandle(FileHandle); + return ec; + } + + if (Size == 0) { + MEMORY_BASIC_INFORMATION mbi; + SIZE_T Result = VirtualQuery(Mapping, &mbi, sizeof(mbi)); + if (Result == 0) { + error_code ec = windows_error(GetLastError()); + ::UnmapViewOfFile(Mapping); + ::CloseHandle(FileMappingHandle); + if (FileDescriptor) + _close(FileDescriptor); + else + ::CloseHandle(FileHandle); + return ec; + } + Size = mbi.RegionSize; + } + return error_code::success(); +} + +mapped_file_region::mapped_file_region(const Twine &path, + mapmode mode, + uint64_t length, + uint64_t offset, + error_code &ec) + : Mode(mode) + , Size(length) + , Mapping() + , FileDescriptor() + , FileHandle(INVALID_HANDLE_VALUE) + , FileMappingHandle() { + SmallString<128> path_storage; + SmallVector<wchar_t, 128> path_utf16; + + // Convert path to UTF-16. + if (ec = UTF8ToUTF16(path.toStringRef(path_storage), path_utf16)) + return; + + // Get file handle for creating a file mapping. + FileHandle = ::CreateFileW(c_str(path_utf16), + Mode == readonly ? GENERIC_READ + : GENERIC_READ | GENERIC_WRITE, + Mode == readonly ? FILE_SHARE_READ + : 0, + 0, + Mode == readonly ? OPEN_EXISTING + : OPEN_ALWAYS, + Mode == readonly ? FILE_ATTRIBUTE_READONLY + : FILE_ATTRIBUTE_NORMAL, + 0); + if (FileHandle == INVALID_HANDLE_VALUE) { + ec = windows_error(::GetLastError()); + return; + } + + FileDescriptor = 0; + ec = init(FileDescriptor, offset); + if (ec) { + Mapping = FileMappingHandle = 0; + FileHandle = INVALID_HANDLE_VALUE; + FileDescriptor = 0; + } +} + +mapped_file_region::mapped_file_region(int fd, + mapmode mode, + uint64_t length, + uint64_t offset, + error_code &ec) + : Mode(mode) + , Size(length) + , Mapping() + , FileDescriptor(fd) + , FileHandle(INVALID_HANDLE_VALUE) + , FileMappingHandle() { + FileHandle = reinterpret_cast<HANDLE>(_get_osfhandle(fd)); + if (FileHandle == INVALID_HANDLE_VALUE) { + _close(FileDescriptor); + FileDescriptor = 0; + ec = make_error_code(errc::bad_file_descriptor); + return; + } + + ec = init(FileDescriptor, offset); + if (ec) { + Mapping = FileMappingHandle = 0; + FileHandle = INVALID_HANDLE_VALUE; + FileDescriptor = 0; + } +} + +mapped_file_region::~mapped_file_region() { + if (Mapping) + ::UnmapViewOfFile(Mapping); + if (FileMappingHandle) + ::CloseHandle(FileMappingHandle); + if (FileDescriptor) + _close(FileDescriptor); + else if (FileHandle != INVALID_HANDLE_VALUE) + ::CloseHandle(FileHandle); +} + +#if LLVM_USE_RVALUE_REFERENCES +mapped_file_region::mapped_file_region(mapped_file_region &&other) + : Mode(other.Mode) + , Size(other.Size) + , Mapping(other.Mapping) + , FileDescriptor(other.FileDescriptor) + , FileHandle(other.FileHandle) + , FileMappingHandle(other.FileMappingHandle) { + other.Mapping = other.FileMappingHandle = 0; + other.FileHandle = INVALID_HANDLE_VALUE; + other.FileDescriptor = 0; +} +#endif + +mapped_file_region::mapmode mapped_file_region::flags() const { + assert(Mapping && "Mapping failed but used anyway!"); + return Mode; +} + +uint64_t mapped_file_region::size() const { + assert(Mapping && "Mapping failed but used anyway!"); + return Size; +} + +char *mapped_file_region::data() const { + assert(Mode != readonly && "Cannot get non const data for readonly mapping!"); + assert(Mapping && "Mapping failed but used anyway!"); + return reinterpret_cast<char*>(Mapping); +} + +const char *mapped_file_region::const_data() const { + assert(Mapping && "Mapping failed but used anyway!"); + return reinterpret_cast<const char*>(Mapping); +} + +int mapped_file_region::alignment() { + SYSTEM_INFO SysInfo; + ::GetSystemInfo(&SysInfo); + return SysInfo.dwAllocationGranularity; +} + error_code detail::directory_iterator_construct(detail::DirIterState &it, StringRef path){ SmallVector<wchar_t, 128> path_utf16; diff --git a/lib/TableGen/TGParser.cpp b/lib/TableGen/TGParser.cpp index 9424677..b9c7ff6 100644 --- a/lib/TableGen/TGParser.cpp +++ b/lib/TableGen/TGParser.cpp @@ -2284,23 +2284,33 @@ InstantiateMulticlassDef(MultiClass &MC, Ref.Rec = DefProto; AddSubClass(CurRec, Ref); - if (DefNameString == 0) { - // We must resolve references to NAME. - if (SetValue(CurRec, Ref.RefLoc, "NAME", std::vector<unsigned>(), - DefmPrefix)) { - Error(DefmPrefixLoc, "Could not resolve " - + CurRec->getNameInitAsString() + ":NAME to '" - + DefmPrefix->getAsUnquotedString() + "'"); - return 0; - } + // Set the value for NAME. We don't resolve references to it 'til later, + // though, so that uses in nested multiclass names don't get + // confused. + if (SetValue(CurRec, Ref.RefLoc, "NAME", std::vector<unsigned>(), + DefmPrefix)) { + Error(DefmPrefixLoc, "Could not resolve " + + CurRec->getNameInitAsString() + ":NAME to '" + + DefmPrefix->getAsUnquotedString() + "'"); + return 0; + } + // If the DefNameString didn't resolve, we probably have a reference to + // NAME and need to replace it. We need to do at least this much greedily, + // otherwise nested multiclasses will end up with incorrect NAME expansions. + if (DefNameString == 0) { RecordVal *DefNameRV = CurRec->getValue("NAME"); CurRec->resolveReferencesTo(DefNameRV); } if (!CurMultiClass) { - // We do this after resolving NAME because before resolution, many - // multiclass defs will have the same name expression. If we are + // Now that we're at the top level, resolve all NAME references + // in the resultant defs that weren't in the def names themselves. + RecordVal *DefNameRV = CurRec->getValue("NAME"); + CurRec->resolveReferencesTo(DefNameRV); + + // Now that NAME references are resolved and we're at the top level of + // any multiclass expansions, add the record to the RecordKeeper. If we are // currently in a multiclass, it means this defm appears inside a // multiclass and its name won't be fully resolvable until we see // the top-level defm. Therefore, we don't add this to the diff --git a/lib/Target/ARM/ARM.td b/lib/Target/ARM/ARM.td index cd3c0e0..69e2346 100644 --- a/lib/Target/ARM/ARM.td +++ b/lib/Target/ARM/ARM.td @@ -224,7 +224,7 @@ def : ProcNoItin<"cortex-m3", [HasV7Ops, def : ProcNoItin<"cortex-m4", [HasV7Ops, FeatureThumb2, FeatureNoARM, FeatureDB, FeatureHWDiv, FeatureDSPThumb2, - FeatureT2XtPk, FeatureVFP2, + FeatureT2XtPk, FeatureVFP4, FeatureVFPOnlySP, FeatureMClass]>; //===----------------------------------------------------------------------===// diff --git a/lib/Target/ARM/ARMAsmPrinter.cpp b/lib/Target/ARM/ARMAsmPrinter.cpp index 9a1ce06..e9e2803 100644 --- a/lib/Target/ARM/ARMAsmPrinter.cpp +++ b/lib/Target/ARM/ARMAsmPrinter.cpp @@ -529,10 +529,24 @@ bool ARMAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNum, return false; } - // These modifiers are not yet supported. + // This modifier is not yet supported. case 'h': // A range of VFP/NEON registers suitable for VLD1/VST1. - case 'H': // The highest-numbered register of a pair. return true; + case 'H': { // The highest-numbered register of a pair. + const MachineOperand &MO = MI->getOperand(OpNum); + if (!MO.isReg()) + return true; + const TargetRegisterClass &RC = ARM::GPRRegClass; + const MachineFunction &MF = *MI->getParent()->getParent(); + const TargetRegisterInfo *TRI = MF.getTarget().getRegisterInfo(); + + unsigned RegIdx = TRI->getEncodingValue(MO.getReg()); + RegIdx |= 1; //The odd register is also the higher-numbered one of a pair. + + unsigned Reg = RC.getRegister(RegIdx); + O << ARMInstPrinter::getRegisterName(Reg); + return false; + } } } @@ -1136,8 +1150,14 @@ void ARMAsmPrinter::EmitUnwindingInstruction(const MachineInstr *MI) { assert(SrcReg == ARM::SP && "Only stack pointer as a source reg is supported"); for (unsigned i = StartOp, NumOps = MI->getNumOperands() - NumOffset; - i != NumOps; ++i) - RegList.push_back(MI->getOperand(i).getReg()); + i != NumOps; ++i) { + const MachineOperand &MO = MI->getOperand(i); + // Actually, there should never be any impdef stuff here. Skip it + // temporary to workaround PR11902. + if (MO.isImplicit()) + continue; + RegList.push_back(MO.getReg()); + } break; case ARM::STR_PRE_IMM: case ARM::STR_PRE_REG: diff --git a/lib/Target/ARM/ARMBaseInstrInfo.cpp b/lib/Target/ARM/ARMBaseInstrInfo.cpp index 714238a..29033e5 100644 --- a/lib/Target/ARM/ARMBaseInstrInfo.cpp +++ b/lib/Target/ARM/ARMBaseInstrInfo.cpp @@ -795,8 +795,28 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, } else llvm_unreachable("Unknown reg class!"); break; + case 24: + if (ARM::DTripleRegClass.hasSubClassEq(RC)) { + // Use aligned spills if the stack can be realigned. + if (Align >= 16 && getRegisterInfo().canRealignStack(MF)) { + AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VST1d64TPseudo)) + .addFrameIndex(FI).addImm(16) + .addReg(SrcReg, getKillRegState(isKill)) + .addMemOperand(MMO)); + } else { + MachineInstrBuilder MIB = + AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VSTMDIA)) + .addFrameIndex(FI)) + .addMemOperand(MMO); + MIB = AddDReg(MIB, SrcReg, ARM::dsub_0, getKillRegState(isKill), TRI); + MIB = AddDReg(MIB, SrcReg, ARM::dsub_1, 0, TRI); + AddDReg(MIB, SrcReg, ARM::dsub_2, 0, TRI); + } + } else + llvm_unreachable("Unknown reg class!"); + break; case 32: - if (ARM::QQPRRegClass.hasSubClassEq(RC)) { + if (ARM::QQPRRegClass.hasSubClassEq(RC) || ARM::DQuadRegClass.hasSubClassEq(RC)) { if (Align >= 16 && getRegisterInfo().canRealignStack(MF)) { // FIXME: It's possible to only store part of the QQ register if the // spilled def has a sub-register index. @@ -868,6 +888,8 @@ ARMBaseInstrInfo::isStoreToStackSlot(const MachineInstr *MI, } break; case ARM::VST1q64: + case ARM::VST1d64TPseudo: + case ARM::VST1d64QPseudo: if (MI->getOperand(0).isFI() && MI->getOperand(2).getSubReg() == 0) { FrameIndex = MI->getOperand(0).getIndex(); @@ -942,8 +964,28 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, } else llvm_unreachable("Unknown reg class!"); break; - case 32: - if (ARM::QQPRRegClass.hasSubClassEq(RC)) { + case 24: + if (ARM::DTripleRegClass.hasSubClassEq(RC)) { + if (Align >= 16 && getRegisterInfo().canRealignStack(MF)) { + AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLD1d64TPseudo), DestReg) + .addFrameIndex(FI).addImm(16) + .addMemOperand(MMO)); + } else { + MachineInstrBuilder MIB = + AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLDMDIA)) + .addFrameIndex(FI) + .addMemOperand(MMO)); + MIB = AddDReg(MIB, DestReg, ARM::dsub_0, RegState::DefineNoRead, TRI); + MIB = AddDReg(MIB, DestReg, ARM::dsub_1, RegState::DefineNoRead, TRI); + MIB = AddDReg(MIB, DestReg, ARM::dsub_2, RegState::DefineNoRead, TRI); + if (TargetRegisterInfo::isPhysicalRegister(DestReg)) + MIB.addReg(DestReg, RegState::ImplicitDefine); + } + } else + llvm_unreachable("Unknown reg class!"); + break; + case 32: + if (ARM::QQPRRegClass.hasSubClassEq(RC) || ARM::DQuadRegClass.hasSubClassEq(RC)) { if (Align >= 16 && getRegisterInfo().canRealignStack(MF)) { AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLD1d64QPseudo), DestReg) .addFrameIndex(FI).addImm(16) @@ -1016,6 +1058,8 @@ ARMBaseInstrInfo::isLoadFromStackSlot(const MachineInstr *MI, } break; case ARM::VLD1q64: + case ARM::VLD1d64TPseudo: + case ARM::VLD1d64QPseudo: if (MI->getOperand(1).isFI() && MI->getOperand(0).getSubReg() == 0) { FrameIndex = MI->getOperand(1).getIndex(); @@ -1524,6 +1568,139 @@ ARMBaseInstrInfo::commuteInstruction(MachineInstr *MI, bool NewMI) const { return TargetInstrInfoImpl::commuteInstruction(MI, NewMI); } +/// Identify instructions that can be folded into a MOVCC instruction, and +/// return the corresponding opcode for the predicated pseudo-instruction. +static unsigned canFoldIntoMOVCC(unsigned Reg, MachineInstr *&MI, + const MachineRegisterInfo &MRI) { + if (!TargetRegisterInfo::isVirtualRegister(Reg)) + return 0; + if (!MRI.hasOneNonDBGUse(Reg)) + return 0; + MI = MRI.getVRegDef(Reg); + if (!MI) + return 0; + // Check if MI has any non-dead defs or physreg uses. This also detects + // predicated instructions which will be reading CPSR. + for (unsigned i = 1, e = MI->getNumOperands(); i != e; ++i) { + const MachineOperand &MO = MI->getOperand(i); + // Reject frame index operands, PEI can't handle the predicated pseudos. + if (MO.isFI() || MO.isCPI() || MO.isJTI()) + return 0; + if (!MO.isReg()) + continue; + if (TargetRegisterInfo::isPhysicalRegister(MO.getReg())) + return 0; + if (MO.isDef() && !MO.isDead()) + return 0; + } + switch (MI->getOpcode()) { + default: return 0; + case ARM::ANDri: return ARM::ANDCCri; + case ARM::ANDrr: return ARM::ANDCCrr; + case ARM::ANDrsi: return ARM::ANDCCrsi; + case ARM::ANDrsr: return ARM::ANDCCrsr; + case ARM::t2ANDri: return ARM::t2ANDCCri; + case ARM::t2ANDrr: return ARM::t2ANDCCrr; + case ARM::t2ANDrs: return ARM::t2ANDCCrs; + case ARM::EORri: return ARM::EORCCri; + case ARM::EORrr: return ARM::EORCCrr; + case ARM::EORrsi: return ARM::EORCCrsi; + case ARM::EORrsr: return ARM::EORCCrsr; + case ARM::t2EORri: return ARM::t2EORCCri; + case ARM::t2EORrr: return ARM::t2EORCCrr; + case ARM::t2EORrs: return ARM::t2EORCCrs; + case ARM::ORRri: return ARM::ORRCCri; + case ARM::ORRrr: return ARM::ORRCCrr; + case ARM::ORRrsi: return ARM::ORRCCrsi; + case ARM::ORRrsr: return ARM::ORRCCrsr; + case ARM::t2ORRri: return ARM::t2ORRCCri; + case ARM::t2ORRrr: return ARM::t2ORRCCrr; + case ARM::t2ORRrs: return ARM::t2ORRCCrs; + + // ARM ADD/SUB + case ARM::ADDri: return ARM::ADDCCri; + case ARM::ADDrr: return ARM::ADDCCrr; + case ARM::ADDrsi: return ARM::ADDCCrsi; + case ARM::ADDrsr: return ARM::ADDCCrsr; + case ARM::SUBri: return ARM::SUBCCri; + case ARM::SUBrr: return ARM::SUBCCrr; + case ARM::SUBrsi: return ARM::SUBCCrsi; + case ARM::SUBrsr: return ARM::SUBCCrsr; + + // Thumb2 ADD/SUB + case ARM::t2ADDri: return ARM::t2ADDCCri; + case ARM::t2ADDri12: return ARM::t2ADDCCri12; + case ARM::t2ADDrr: return ARM::t2ADDCCrr; + case ARM::t2ADDrs: return ARM::t2ADDCCrs; + case ARM::t2SUBri: return ARM::t2SUBCCri; + case ARM::t2SUBri12: return ARM::t2SUBCCri12; + case ARM::t2SUBrr: return ARM::t2SUBCCrr; + case ARM::t2SUBrs: return ARM::t2SUBCCrs; + } +} + +bool ARMBaseInstrInfo::analyzeSelect(const MachineInstr *MI, + SmallVectorImpl<MachineOperand> &Cond, + unsigned &TrueOp, unsigned &FalseOp, + bool &Optimizable) const { + assert((MI->getOpcode() == ARM::MOVCCr || MI->getOpcode() == ARM::t2MOVCCr) && + "Unknown select instruction"); + // MOVCC operands: + // 0: Def. + // 1: True use. + // 2: False use. + // 3: Condition code. + // 4: CPSR use. + TrueOp = 1; + FalseOp = 2; + Cond.push_back(MI->getOperand(3)); + Cond.push_back(MI->getOperand(4)); + // We can always fold a def. + Optimizable = true; + return false; +} + +MachineInstr *ARMBaseInstrInfo::optimizeSelect(MachineInstr *MI, + bool PreferFalse) const { + assert((MI->getOpcode() == ARM::MOVCCr || MI->getOpcode() == ARM::t2MOVCCr) && + "Unknown select instruction"); + const MachineRegisterInfo &MRI = MI->getParent()->getParent()->getRegInfo(); + MachineInstr *DefMI = 0; + unsigned Opc = canFoldIntoMOVCC(MI->getOperand(2).getReg(), DefMI, MRI); + bool Invert = !Opc; + if (!Opc) + Opc = canFoldIntoMOVCC(MI->getOperand(1).getReg(), DefMI, MRI); + if (!Opc) + return 0; + + // Create a new predicated version of DefMI. + // Rfalse is the first use. + MachineInstrBuilder NewMI = BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), + get(Opc), MI->getOperand(0).getReg()) + .addOperand(MI->getOperand(Invert ? 2 : 1)); + + // Copy all the DefMI operands, excluding its (null) predicate. + const MCInstrDesc &DefDesc = DefMI->getDesc(); + for (unsigned i = 1, e = DefDesc.getNumOperands(); + i != e && !DefDesc.OpInfo[i].isPredicate(); ++i) + NewMI.addOperand(DefMI->getOperand(i)); + + unsigned CondCode = MI->getOperand(3).getImm(); + if (Invert) + NewMI.addImm(ARMCC::getOppositeCondition(ARMCC::CondCodes(CondCode))); + else + NewMI.addImm(CondCode); + NewMI.addOperand(MI->getOperand(4)); + + // DefMI is not the -S version that sets CPSR, so add an optional %noreg. + if (NewMI->hasOptionalDef()) + AddDefaultCC(NewMI); + + // The caller will erase MI, but not DefMI. + DefMI->eraseFromParent(); + return NewMI; +} + /// Map pseudo instructions that imply an 'S' bit onto real opcodes. Whether the /// instruction is encoded with an 'S' bit is determined by the optional CPSR /// def operand. @@ -3180,11 +3357,18 @@ enum ARMExeDomain { // std::pair<uint16_t, uint16_t> ARMBaseInstrInfo::getExecutionDomain(const MachineInstr *MI) const { - // VMOVD is a VFP instruction, but can be changed to NEON if it isn't - // predicated. + // VMOVD, VMOVRS and VMOVSR are VFP instructions, but can be changed to NEON + // if they are not predicated. if (MI->getOpcode() == ARM::VMOVD && !isPredicated(MI)) return std::make_pair(ExeVFP, (1<<ExeVFP) | (1<<ExeNEON)); + // Cortex-A9 is particularly picky about mixing the two and wants these + // converted. + if (Subtarget.isCortexA9() && !isPredicated(MI) && + (MI->getOpcode() == ARM::VMOVRS || + MI->getOpcode() == ARM::VMOVSR)) + return std::make_pair(ExeVFP, (1<<ExeVFP) | (1<<ExeNEON)); + // No other instructions can be swizzled, so just determine their domain. unsigned Domain = MI->getDesc().TSFlags & ARMII::DomainMask; @@ -3204,22 +3388,95 @@ ARMBaseInstrInfo::getExecutionDomain(const MachineInstr *MI) const { void ARMBaseInstrInfo::setExecutionDomain(MachineInstr *MI, unsigned Domain) const { - // We only know how to change VMOVD into VORR. - assert(MI->getOpcode() == ARM::VMOVD && "Can only swizzle VMOVD"); - if (Domain != ExeNEON) - return; + unsigned DstReg, SrcReg, DReg; + unsigned Lane; + MachineInstrBuilder MIB(MI); + const TargetRegisterInfo *TRI = &getRegisterInfo(); + bool isKill; + switch (MI->getOpcode()) { + default: + llvm_unreachable("cannot handle opcode!"); + break; + case ARM::VMOVD: + if (Domain != ExeNEON) + break; - // Zap the predicate operands. - assert(!isPredicated(MI) && "Cannot predicate a VORRd"); - MI->RemoveOperand(3); - MI->RemoveOperand(2); + // Zap the predicate operands. + assert(!isPredicated(MI) && "Cannot predicate a VORRd"); + MI->RemoveOperand(3); + MI->RemoveOperand(2); - // Change to a VORRd which requires two identical use operands. - MI->setDesc(get(ARM::VORRd)); + // Change to a VORRd which requires two identical use operands. + MI->setDesc(get(ARM::VORRd)); + + // Add the extra source operand and new predicates. + // This will go before any implicit ops. + AddDefaultPred(MachineInstrBuilder(MI).addOperand(MI->getOperand(1))); + break; + case ARM::VMOVRS: + if (Domain != ExeNEON) + break; + assert(!isPredicated(MI) && "Cannot predicate a VGETLN"); + + DstReg = MI->getOperand(0).getReg(); + SrcReg = MI->getOperand(1).getReg(); + + DReg = TRI->getMatchingSuperReg(SrcReg, ARM::ssub_0, &ARM::DPRRegClass); + Lane = 0; + if (DReg == ARM::NoRegister) { + DReg = TRI->getMatchingSuperReg(SrcReg, ARM::ssub_1, &ARM::DPRRegClass); + Lane = 1; + assert(DReg && "S-register with no D super-register?"); + } + + MI->RemoveOperand(3); + MI->RemoveOperand(2); + MI->RemoveOperand(1); + + MI->setDesc(get(ARM::VGETLNi32)); + MIB.addReg(DReg); + MIB.addImm(Lane); + + MIB->getOperand(1).setIsUndef(); + MIB.addReg(SrcReg, RegState::Implicit); + + AddDefaultPred(MIB); + break; + case ARM::VMOVSR: + if (Domain != ExeNEON) + break; + assert(!isPredicated(MI) && "Cannot predicate a VSETLN"); + + DstReg = MI->getOperand(0).getReg(); + SrcReg = MI->getOperand(1).getReg(); + DReg = TRI->getMatchingSuperReg(DstReg, ARM::ssub_0, &ARM::DPRRegClass); + Lane = 0; + if (DReg == ARM::NoRegister) { + DReg = TRI->getMatchingSuperReg(DstReg, ARM::ssub_1, &ARM::DPRRegClass); + Lane = 1; + assert(DReg && "S-register with no D super-register?"); + } + isKill = MI->getOperand(0).isKill(); + + MI->RemoveOperand(3); + MI->RemoveOperand(2); + MI->RemoveOperand(1); + MI->RemoveOperand(0); + + MI->setDesc(get(ARM::VSETLNi32)); + MIB.addReg(DReg, RegState::Define); + MIB.addReg(DReg, RegState::Undef); + MIB.addReg(SrcReg); + MIB.addImm(Lane); + + if (isKill) + MIB->addRegisterKilled(DstReg, TRI, true); + MIB->addRegisterDefined(DstReg, TRI); + + AddDefaultPred(MIB); + break; + } - // Add the extra source operand and new predicates. - // This will go before any implicit ops. - AddDefaultPred(MachineInstrBuilder(MI).addOperand(MI->getOperand(1))); } bool ARMBaseInstrInfo::hasNOP() const { diff --git a/lib/Target/ARM/ARMBaseInstrInfo.h b/lib/Target/ARM/ARMBaseInstrInfo.h index 1a10a4a..92e5ee8 100644 --- a/lib/Target/ARM/ARMBaseInstrInfo.h +++ b/lib/Target/ARM/ARMBaseInstrInfo.h @@ -202,6 +202,13 @@ public: unsigned SrcReg2, int CmpMask, int CmpValue, const MachineRegisterInfo *MRI) const; + virtual bool analyzeSelect(const MachineInstr *MI, + SmallVectorImpl<MachineOperand> &Cond, + unsigned &TrueOp, unsigned &FalseOp, + bool &Optimizable) const; + + virtual MachineInstr *optimizeSelect(MachineInstr *MI, bool) const; + /// FoldImmediate - 'Reg' is known to be defined by a move immediate /// instruction, try to fold the immediate into the use instruction. virtual bool FoldImmediate(MachineInstr *UseMI, MachineInstr *DefMI, @@ -352,6 +359,11 @@ ARMCC::CondCodes getInstrPredicate(const MachineInstr *MI, unsigned &PredReg); int getMatchingCondBranchOpcode(int Opc); +/// Determine if MI can be folded into an ARM MOVCC instruction, and return the +/// opcode of the SSA instruction representing the conditional MI. +unsigned canFoldARMInstrIntoMOVCC(unsigned Reg, + MachineInstr *&MI, + const MachineRegisterInfo &MRI); /// Map pseudo instructions that imply an 'S' bit onto real opcodes. Whether /// the instruction is encoded with an 'S' bit is determined by the optional diff --git a/lib/Target/ARM/ARMBaseRegisterInfo.cpp b/lib/Target/ARM/ARMBaseRegisterInfo.cpp index 231bd26..9deb96e 100644 --- a/lib/Target/ARM/ARMBaseRegisterInfo.cpp +++ b/lib/Target/ARM/ARMBaseRegisterInfo.cpp @@ -62,8 +62,20 @@ ARMBaseRegisterInfo::ARMBaseRegisterInfo(const ARMBaseInstrInfo &tii, const uint16_t* ARMBaseRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const { + bool ghcCall = false; + + if (MF) { + const Function *F = MF->getFunction(); + ghcCall = (F ? F->getCallingConv() == CallingConv::GHC : false); + } + + if (ghcCall) { + return CSR_GHC_SaveList; + } + else { return (STI.isTargetIOS() && !STI.isAAPCS_ABI()) ? CSR_iOS_SaveList : CSR_AAPCS_SaveList; + } } const uint32_t* diff --git a/lib/Target/ARM/ARMCallingConv.td b/lib/Target/ARM/ARMCallingConv.td index b9a2512..bda1517 100644 --- a/lib/Target/ARM/ARMCallingConv.td +++ b/lib/Target/ARM/ARMCallingConv.td @@ -79,6 +79,25 @@ def RetFastCC_ARM_APCS : CallingConv<[ CCDelegateTo<RetCC_ARM_APCS> ]>; +//===----------------------------------------------------------------------===// +// ARM APCS Calling Convention for GHC +//===----------------------------------------------------------------------===// + +def CC_ARM_APCS_GHC : CallingConv<[ + // Handle all vector types as either f64 or v2f64. + CCIfType<[v1i64, v2i32, v4i16, v8i8, v2f32], CCBitConvertToType<f64>>, + CCIfType<[v2i64, v4i32, v8i16, v16i8, v4f32], CCBitConvertToType<v2f64>>, + + CCIfType<[v2f64], CCAssignToReg<[Q4, Q5]>>, + CCIfType<[f64], CCAssignToReg<[D8, D9, D10, D11]>>, + CCIfType<[f32], CCAssignToReg<[S16, S17, S18, S19, S20, S21, S22, S23]>>, + + // Promote i8/i16 arguments to i32. + CCIfType<[i8, i16], CCPromoteToType<i32>>, + + // Pass in STG registers: Base, Sp, Hp, R1, R2, R3, R4, SpLim + CCIfType<[i32], CCAssignToReg<[R4, R5, R6, R7, R8, R9, R10, R11]>> +]>; //===----------------------------------------------------------------------===// // ARM AAPCS (EABI) Calling Convention, common parts @@ -113,6 +132,9 @@ def RetCC_ARM_AAPCS_Common : CallingConv<[ //===----------------------------------------------------------------------===// def CC_ARM_AAPCS : CallingConv<[ + // Handles byval parameters. + CCIfByVal<CCPassByVal<4, 4>>, + // Handle all vector types as either f64 or v2f64. CCIfType<[v1i64, v2i32, v4i16, v8i8, v2f32], CCBitConvertToType<f64>>, CCIfType<[v2i64, v4i32, v8i16, v16i8, v4f32], CCBitConvertToType<v2f64>>, @@ -138,6 +160,9 @@ def RetCC_ARM_AAPCS : CallingConv<[ //===----------------------------------------------------------------------===// def CC_ARM_AAPCS_VFP : CallingConv<[ + // Handles byval parameters. + CCIfByVal<CCPassByVal<4, 4>>, + // Handle all vector types as either f64 or v2f64. CCIfType<[v1i64, v2i32, v4i16, v8i8, v2f32], CCBitConvertToType<f64>>, CCIfType<[v2i64, v4i32, v8i16, v16i8, v4f32], CCBitConvertToType<v2f64>>, @@ -171,3 +196,9 @@ def CSR_AAPCS : CalleeSavedRegs<(add LR, R11, R10, R9, R8, R7, R6, R5, R4, // iOS ABI deviates from ARM standard ABI. R9 is not a callee-saved register. // Also save R7-R4 first to match the stack frame fixed spill areas. def CSR_iOS : CalleeSavedRegs<(add LR, R7, R6, R5, R4, (sub CSR_AAPCS, R9))>; + +// GHC set of callee saved regs is empty as all those regs are +// used for passing STG regs around +// add is a workaround for not being able to compile empty list: +// def CSR_GHC : CalleeSavedRegs<()>; +def CSR_GHC : CalleeSavedRegs<(add)>; diff --git a/lib/Target/ARM/ARMCodeEmitter.cpp b/lib/Target/ARM/ARMCodeEmitter.cpp index af260a5..132b81f 100644 --- a/lib/Target/ARM/ARMCodeEmitter.cpp +++ b/lib/Target/ARM/ARMCodeEmitter.cpp @@ -264,7 +264,7 @@ namespace { emitConstPoolAddress(MO.getIndex(), ARM::reloc_arm_cp_entry); return 0; } - unsigned Reg = getARMRegisterNumbering(MO.getReg()); + unsigned Reg = II->getRegisterInfo().getEncodingValue(MO.getReg()); int32_t Imm12 = MO1.getImm(); uint32_t Binary; Binary = Imm12 & 0xfff; @@ -314,18 +314,24 @@ namespace { // {7-0} = imm8 uint32_t Binary = 0; const MachineOperand &MO = MI.getOperand(Op); - uint32_t Reg = getMachineOpValue(MI, MO); - Binary |= (Reg << 9); - - // If there is a non-zero immediate offset, encode it. - if (MO.isReg()) { - const MachineOperand &MO1 = MI.getOperand(Op + 1); - if (uint32_t ImmOffs = ARM_AM::getAM5Offset(MO1.getImm())) { - if (ARM_AM::getAM5Op(MO1.getImm()) == ARM_AM::add) - Binary |= 1 << 8; - Binary |= ImmOffs & 0xff; - return Binary; - } + const MachineOperand &MO1 = MI.getOperand(Op + 1); + if (!MO.isReg()) { + emitConstPoolAddress(MO.getIndex(), ARM::reloc_arm_cp_entry); + return 0; + } + unsigned Reg = II->getRegisterInfo().getEncodingValue(MO.getReg()); + int32_t Imm12 = MO1.getImm(); + + // Special value for #-0 + if (Imm12 == INT32_MIN) + Imm12 = 0; + + // Immediate is always encoded as positive. The 'U' bit controls add vs + // sub. + bool isAdd = true; + if (Imm12 < 0) { + Imm12 = -Imm12; + isAdd = false; } // If immediate offset is omitted, default to +0. @@ -367,6 +373,12 @@ namespace { void emitJumpTableAddress(unsigned JTIndex, unsigned Reloc) const; void emitMachineBasicBlock(MachineBasicBlock *BB, unsigned Reloc, intptr_t JTBase = 0) const; + unsigned encodeVFPRd(const MachineInstr &MI, unsigned OpIdx) const; + unsigned encodeVFPRn(const MachineInstr &MI, unsigned OpIdx) const; + unsigned encodeVFPRm(const MachineInstr &MI, unsigned OpIdx) const; + unsigned encodeNEONRd(const MachineInstr &MI, unsigned OpIdx) const; + unsigned encodeNEONRn(const MachineInstr &MI, unsigned OpIdx) const; + unsigned encodeNEONRm(const MachineInstr &MI, unsigned OpIdx) const; }; } @@ -455,7 +467,7 @@ unsigned ARMCodeEmitter::getMovi32Value(const MachineInstr &MI, unsigned ARMCodeEmitter::getMachineOpValue(const MachineInstr &MI, const MachineOperand &MO) const { if (MO.isReg()) - return getARMRegisterNumbering(MO.getReg()); + return II->getRegisterInfo().getEncodingValue(MO.getReg()); else if (MO.isImm()) return static_cast<unsigned>(MO.getImm()); else if (MO.isFPImm()) @@ -816,7 +828,7 @@ void ARMCodeEmitter::emitLEApcrelInstruction(const MachineInstr &MI) { Binary |= getMachineOpValue(MI, 0) << ARMII::RegRdShift; // Encode Rn which is PC. - Binary |= getARMRegisterNumbering(ARM::PC) << ARMII::RegRnShift; + Binary |= II->getRegisterInfo().getEncodingValue(ARM::PC) << ARMII::RegRnShift; // Encode the displacement which is a so_imm. // Set bit I(25) to identify this is the immediate form of <shifter_op> @@ -844,7 +856,7 @@ void ARMCodeEmitter::emitLEApcrelJTInstruction(const MachineInstr &MI) { Binary |= getMachineOpValue(MI, 0) << ARMII::RegRdShift; // Encode Rn which is PC. - Binary |= getARMRegisterNumbering(ARM::PC) << ARMII::RegRnShift; + Binary |= II->getRegisterInfo().getEncodingValue(ARM::PC) << ARMII::RegRnShift; // Encode the displacement. Binary |= 1 << ARMII::I_BitShift; @@ -1045,7 +1057,7 @@ unsigned ARMCodeEmitter::getMachineSoRegOpValue(const MachineInstr &MI, if (Rs) { // Encode Rs bit[11:8]. assert(ARM_AM::getSORegOffset(MO2.getImm()) == 0); - return Binary | (getARMRegisterNumbering(Rs) << ARMII::RegRsShift); + return Binary | (II->getRegisterInfo().getEncodingValue(Rs) << ARMII::RegRsShift); } // Encode shift_imm bit[11:7]. @@ -1101,7 +1113,7 @@ void ARMCodeEmitter::emitDataProcessingInstruction(const MachineInstr &MI, Binary |= getMachineOpValue(MI, OpIdx++) << ARMII::RegRdShift; else if (ImplicitRd) // Special handling for implicit use (e.g. PC). - Binary |= (getARMRegisterNumbering(ImplicitRd) << ARMII::RegRdShift); + Binary |= (II->getRegisterInfo().getEncodingValue(ImplicitRd) << ARMII::RegRdShift); if (MCID.Opcode == ARM::MOVi16) { // Get immediate from MI. @@ -1151,7 +1163,7 @@ void ARMCodeEmitter::emitDataProcessingInstruction(const MachineInstr &MI, if (!isUnary) { if (ImplicitRn) // Special handling for implicit use (e.g. PC). - Binary |= (getARMRegisterNumbering(ImplicitRn) << ARMII::RegRnShift); + Binary |= (II->getRegisterInfo().getEncodingValue(ImplicitRn) << ARMII::RegRnShift); else { Binary |= getMachineOpValue(MI, OpIdx) << ARMII::RegRnShift; ++OpIdx; @@ -1168,7 +1180,7 @@ void ARMCodeEmitter::emitDataProcessingInstruction(const MachineInstr &MI, if (MO.isReg()) { // Encode register Rm. - emitWordLE(Binary | getARMRegisterNumbering(MO.getReg())); + emitWordLE(Binary | II->getRegisterInfo().getEncodingValue(MO.getReg())); return; } @@ -1217,14 +1229,14 @@ void ARMCodeEmitter::emitLoadStoreInstruction(const MachineInstr &MI, // Set first operand if (ImplicitRd) // Special handling for implicit use (e.g. PC). - Binary |= (getARMRegisterNumbering(ImplicitRd) << ARMII::RegRdShift); + Binary |= (II->getRegisterInfo().getEncodingValue(ImplicitRd) << ARMII::RegRdShift); else Binary |= getMachineOpValue(MI, OpIdx++) << ARMII::RegRdShift; // Set second operand if (ImplicitRn) // Special handling for implicit use (e.g. PC). - Binary |= (getARMRegisterNumbering(ImplicitRn) << ARMII::RegRnShift); + Binary |= (II->getRegisterInfo().getEncodingValue(ImplicitRn) << ARMII::RegRnShift); else Binary |= getMachineOpValue(MI, OpIdx++) << ARMII::RegRnShift; @@ -1251,7 +1263,7 @@ void ARMCodeEmitter::emitLoadStoreInstruction(const MachineInstr &MI, Binary |= 1 << ARMII::I_BitShift; assert(TargetRegisterInfo::isPhysicalRegister(MO2.getReg())); // Set bit[3:0] to the corresponding Rm register - Binary |= getARMRegisterNumbering(MO2.getReg()); + Binary |= II->getRegisterInfo().getEncodingValue(MO2.getReg()); // If this instr is in scaled register offset/index instruction, set // shift_immed(bit[11:7]) and shift(bit[6:5]) fields. @@ -1295,7 +1307,7 @@ void ARMCodeEmitter::emitMiscLoadStoreInstruction(const MachineInstr &MI, // Set second operand if (ImplicitRn) // Special handling for implicit use (e.g. PC). - Binary |= (getARMRegisterNumbering(ImplicitRn) << ARMII::RegRnShift); + Binary |= (II->getRegisterInfo().getEncodingValue(ImplicitRn) << ARMII::RegRnShift); else Binary |= getMachineOpValue(MI, OpIdx++) << ARMII::RegRnShift; @@ -1314,7 +1326,7 @@ void ARMCodeEmitter::emitMiscLoadStoreInstruction(const MachineInstr &MI, // If this instr is in register offset/index encoding, set bit[3:0] // to the corresponding Rm register. if (MO2.getReg()) { - Binary |= getARMRegisterNumbering(MO2.getReg()); + Binary |= II->getRegisterInfo().getEncodingValue(MO2.getReg()); emitWordLE(Binary); return; } @@ -1385,7 +1397,7 @@ void ARMCodeEmitter::emitLoadStoreMultipleInstruction(const MachineInstr &MI) { const MachineOperand &MO = MI.getOperand(i); if (!MO.isReg() || MO.isImplicit()) break; - unsigned RegNum = getARMRegisterNumbering(MO.getReg()); + unsigned RegNum = II->getRegisterInfo().getEncodingValue(MO.getReg()); assert(TargetRegisterInfo::isPhysicalRegister(MO.getReg()) && RegNum < 16); Binary |= 0x1 << RegNum; @@ -1632,7 +1644,7 @@ void ARMCodeEmitter::emitMiscBranchInstruction(const MachineInstr &MI) { if (MCID.Opcode == ARM::BX_RET || MCID.Opcode == ARM::MOVPCLR) // The return register is LR. - Binary |= getARMRegisterNumbering(ARM::LR); + Binary |= II->getRegisterInfo().getEncodingValue(ARM::LR); else // otherwise, set the return register Binary |= getMachineOpValue(MI, 0); @@ -1640,11 +1652,12 @@ void ARMCodeEmitter::emitMiscBranchInstruction(const MachineInstr &MI) { emitWordLE(Binary); } -static unsigned encodeVFPRd(const MachineInstr &MI, unsigned OpIdx) { +unsigned ARMCodeEmitter::encodeVFPRd(const MachineInstr &MI, + unsigned OpIdx) const { unsigned RegD = MI.getOperand(OpIdx).getReg(); unsigned Binary = 0; bool isSPVFP = ARM::SPRRegClass.contains(RegD); - RegD = getARMRegisterNumbering(RegD); + RegD = II->getRegisterInfo().getEncodingValue(RegD); if (!isSPVFP) { Binary |= (RegD & 0x0F) << ARMII::RegRdShift; Binary |= ((RegD & 0x10) >> 4) << ARMII::D_BitShift; @@ -1655,11 +1668,12 @@ static unsigned encodeVFPRd(const MachineInstr &MI, unsigned OpIdx) { return Binary; } -static unsigned encodeVFPRn(const MachineInstr &MI, unsigned OpIdx) { +unsigned ARMCodeEmitter::encodeVFPRn(const MachineInstr &MI, + unsigned OpIdx) const { unsigned RegN = MI.getOperand(OpIdx).getReg(); unsigned Binary = 0; bool isSPVFP = ARM::SPRRegClass.contains(RegN); - RegN = getARMRegisterNumbering(RegN); + RegN = II->getRegisterInfo().getEncodingValue(RegN); if (!isSPVFP) { Binary |= (RegN & 0x0F) << ARMII::RegRnShift; Binary |= ((RegN & 0x10) >> 4) << ARMII::N_BitShift; @@ -1670,11 +1684,12 @@ static unsigned encodeVFPRn(const MachineInstr &MI, unsigned OpIdx) { return Binary; } -static unsigned encodeVFPRm(const MachineInstr &MI, unsigned OpIdx) { +unsigned ARMCodeEmitter::encodeVFPRm(const MachineInstr &MI, + unsigned OpIdx) const { unsigned RegM = MI.getOperand(OpIdx).getReg(); unsigned Binary = 0; bool isSPVFP = ARM::SPRRegClass.contains(RegM); - RegM = getARMRegisterNumbering(RegM); + RegM = II->getRegisterInfo().getEncodingValue(RegM); if (!isSPVFP) { Binary |= (RegM & 0x0F); Binary |= ((RegM & 0x10) >> 4) << ARMII::M_BitShift; @@ -1885,28 +1900,31 @@ void ARMCodeEmitter::emitMiscInstruction(const MachineInstr &MI) { emitWordLE(Binary); } -static unsigned encodeNEONRd(const MachineInstr &MI, unsigned OpIdx) { +unsigned ARMCodeEmitter::encodeNEONRd(const MachineInstr &MI, + unsigned OpIdx) const { unsigned RegD = MI.getOperand(OpIdx).getReg(); unsigned Binary = 0; - RegD = getARMRegisterNumbering(RegD); + RegD = II->getRegisterInfo().getEncodingValue(RegD); Binary |= (RegD & 0xf) << ARMII::RegRdShift; Binary |= ((RegD >> 4) & 1) << ARMII::D_BitShift; return Binary; } -static unsigned encodeNEONRn(const MachineInstr &MI, unsigned OpIdx) { +unsigned ARMCodeEmitter::encodeNEONRn(const MachineInstr &MI, + unsigned OpIdx) const { unsigned RegN = MI.getOperand(OpIdx).getReg(); unsigned Binary = 0; - RegN = getARMRegisterNumbering(RegN); + RegN = II->getRegisterInfo().getEncodingValue(RegN); Binary |= (RegN & 0xf) << ARMII::RegRnShift; Binary |= ((RegN >> 4) & 1) << ARMII::N_BitShift; return Binary; } -static unsigned encodeNEONRm(const MachineInstr &MI, unsigned OpIdx) { +unsigned ARMCodeEmitter::encodeNEONRm(const MachineInstr &MI, + unsigned OpIdx) const { unsigned RegM = MI.getOperand(OpIdx).getReg(); unsigned Binary = 0; - RegM = getARMRegisterNumbering(RegM); + RegM = II->getRegisterInfo().getEncodingValue(RegM); Binary |= (RegM & 0xf); Binary |= ((RegM >> 4) & 1) << ARMII::M_BitShift; return Binary; @@ -1940,7 +1958,7 @@ void ARMCodeEmitter::emitNEONLaneInstruction(const MachineInstr &MI) { Binary |= (IsThumb ? ARMCC::AL : II->getPredicate(&MI)) << ARMII::CondShift; unsigned RegT = MI.getOperand(RegTOpIdx).getReg(); - RegT = getARMRegisterNumbering(RegT); + RegT = II->getRegisterInfo().getEncodingValue(RegT); Binary |= (RegT << ARMII::RegRdShift); Binary |= encodeNEONRn(MI, RegNOpIdx); @@ -1969,7 +1987,7 @@ void ARMCodeEmitter::emitNEONDupInstruction(const MachineInstr &MI) { Binary |= (IsThumb ? ARMCC::AL : II->getPredicate(&MI)) << ARMII::CondShift; unsigned RegT = MI.getOperand(1).getReg(); - RegT = getARMRegisterNumbering(RegT); + RegT = II->getRegisterInfo().getEncodingValue(RegT); Binary |= (RegT << ARMII::RegRdShift); Binary |= encodeNEONRn(MI, 0); emitWordLE(Binary); diff --git a/lib/Target/ARM/ARMExpandPseudoInsts.cpp b/lib/Target/ARM/ARMExpandPseudoInsts.cpp index a242b13..15bb32e 100644 --- a/lib/Target/ARM/ARMExpandPseudoInsts.cpp +++ b/lib/Target/ARM/ARMExpandPseudoInsts.cpp @@ -1009,7 +1009,7 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB, BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(NewOpc)); unsigned OpIdx = 0; unsigned SrcReg = MI.getOperand(1).getReg(); - unsigned Lane = getARMRegisterNumbering(SrcReg) & 1; + unsigned Lane = TRI->getEncodingValue(SrcReg) & 1; unsigned DReg = TRI->getMatchingSuperReg(SrcReg, Lane & 1 ? ARM::ssub_1 : ARM::ssub_0, &ARM::DPR_VFP2RegClass); diff --git a/lib/Target/ARM/ARMFastISel.cpp b/lib/Target/ARM/ARMFastISel.cpp index b96395f..5a5ca1b 100644 --- a/lib/Target/ARM/ARMFastISel.cpp +++ b/lib/Target/ARM/ARMFastISel.cpp @@ -87,8 +87,9 @@ class ARMFastISel : public FastISel { LLVMContext *Context; public: - explicit ARMFastISel(FunctionLoweringInfo &funcInfo) - : FastISel(funcInfo), + explicit ARMFastISel(FunctionLoweringInfo &funcInfo, + const TargetLibraryInfo *libInfo) + : FastISel(funcInfo, libInfo), TM(funcInfo.MF->getTarget()), TII(*TM.getInstrInfo()), TLI(*TM.getTargetLowering()) { @@ -99,51 +100,53 @@ class ARMFastISel : public FastISel { } // Code from FastISel.cpp. - virtual unsigned FastEmitInst_(unsigned MachineInstOpcode, - const TargetRegisterClass *RC); - virtual unsigned FastEmitInst_r(unsigned MachineInstOpcode, - const TargetRegisterClass *RC, - unsigned Op0, bool Op0IsKill); - virtual unsigned FastEmitInst_rr(unsigned MachineInstOpcode, - const TargetRegisterClass *RC, - unsigned Op0, bool Op0IsKill, - unsigned Op1, bool Op1IsKill); - virtual unsigned FastEmitInst_rrr(unsigned MachineInstOpcode, - const TargetRegisterClass *RC, - unsigned Op0, bool Op0IsKill, - unsigned Op1, bool Op1IsKill, - unsigned Op2, bool Op2IsKill); - virtual unsigned FastEmitInst_ri(unsigned MachineInstOpcode, - const TargetRegisterClass *RC, - unsigned Op0, bool Op0IsKill, - uint64_t Imm); - virtual unsigned FastEmitInst_rf(unsigned MachineInstOpcode, - const TargetRegisterClass *RC, - unsigned Op0, bool Op0IsKill, - const ConstantFP *FPImm); - virtual unsigned FastEmitInst_rri(unsigned MachineInstOpcode, - const TargetRegisterClass *RC, - unsigned Op0, bool Op0IsKill, - unsigned Op1, bool Op1IsKill, - uint64_t Imm); - virtual unsigned FastEmitInst_i(unsigned MachineInstOpcode, - const TargetRegisterClass *RC, - uint64_t Imm); - virtual unsigned FastEmitInst_ii(unsigned MachineInstOpcode, - const TargetRegisterClass *RC, - uint64_t Imm1, uint64_t Imm2); - - virtual unsigned FastEmitInst_extractsubreg(MVT RetVT, - unsigned Op0, bool Op0IsKill, - uint32_t Idx); + private: + unsigned FastEmitInst_(unsigned MachineInstOpcode, + const TargetRegisterClass *RC); + unsigned FastEmitInst_r(unsigned MachineInstOpcode, + const TargetRegisterClass *RC, + unsigned Op0, bool Op0IsKill); + unsigned FastEmitInst_rr(unsigned MachineInstOpcode, + const TargetRegisterClass *RC, + unsigned Op0, bool Op0IsKill, + unsigned Op1, bool Op1IsKill); + unsigned FastEmitInst_rrr(unsigned MachineInstOpcode, + const TargetRegisterClass *RC, + unsigned Op0, bool Op0IsKill, + unsigned Op1, bool Op1IsKill, + unsigned Op2, bool Op2IsKill); + unsigned FastEmitInst_ri(unsigned MachineInstOpcode, + const TargetRegisterClass *RC, + unsigned Op0, bool Op0IsKill, + uint64_t Imm); + unsigned FastEmitInst_rf(unsigned MachineInstOpcode, + const TargetRegisterClass *RC, + unsigned Op0, bool Op0IsKill, + const ConstantFP *FPImm); + unsigned FastEmitInst_rri(unsigned MachineInstOpcode, + const TargetRegisterClass *RC, + unsigned Op0, bool Op0IsKill, + unsigned Op1, bool Op1IsKill, + uint64_t Imm); + unsigned FastEmitInst_i(unsigned MachineInstOpcode, + const TargetRegisterClass *RC, + uint64_t Imm); + unsigned FastEmitInst_ii(unsigned MachineInstOpcode, + const TargetRegisterClass *RC, + uint64_t Imm1, uint64_t Imm2); + + unsigned FastEmitInst_extractsubreg(MVT RetVT, + unsigned Op0, bool Op0IsKill, + uint32_t Idx); // Backend specific FastISel code. + private: virtual bool TargetSelectInstruction(const Instruction *I); virtual unsigned TargetMaterializeConstant(const Constant *C); virtual unsigned TargetMaterializeAlloca(const AllocaInst *AI); virtual bool TryToFoldLoad(MachineInstr *MI, unsigned OpNo, const LoadInst *LI); - + private: #include "ARMGenFastISel.inc" // Instruction selection routines. @@ -167,6 +170,7 @@ class ARMFastISel : public FastISel { bool SelectRet(const Instruction *I); bool SelectTrunc(const Instruction *I); bool SelectIntExt(const Instruction *I); + bool SelectShift(const Instruction *I, ARM_AM::ShiftOpc ShiftTy); // Utility routines. private: @@ -1819,9 +1823,12 @@ CCAssignFn *ARMFastISel::CCAssignFnForCall(CallingConv::ID CC, default: llvm_unreachable("Unsupported calling convention"); case CallingConv::Fast: - // Ignore fastcc. Silence compiler warnings. - (void)RetFastCC_ARM_APCS; - (void)FastCC_ARM_APCS; + if (Subtarget->hasVFP2() && !isVarArg) { + if (!Subtarget->isAAPCS_ABI()) + return (Return ? RetFastCC_ARM_APCS : FastCC_ARM_APCS); + // For AAPCS ABI targets, just use VFP variant of the calling convention. + return (Return ? RetCC_ARM_AAPCS_VFP : CC_ARM_AAPCS_VFP); + } // Fallthrough case CallingConv::C: // Use target triple & subtarget features to do actual dispatch. @@ -1842,6 +1849,11 @@ CCAssignFn *ARMFastISel::CCAssignFnForCall(CallingConv::ID CC, return (Return ? RetCC_ARM_AAPCS: CC_ARM_AAPCS); case CallingConv::ARM_APCS: return (Return ? RetCC_ARM_APCS: CC_ARM_APCS); + case CallingConv::GHC: + if (Return) + llvm_unreachable("Can't return in GHC call convention"); + else + return CC_ARM_APCS_GHC; } } @@ -2608,6 +2620,61 @@ bool ARMFastISel::SelectIntExt(const Instruction *I) { return true; } +bool ARMFastISel::SelectShift(const Instruction *I, + ARM_AM::ShiftOpc ShiftTy) { + // We handle thumb2 mode by target independent selector + // or SelectionDAG ISel. + if (isThumb2) + return false; + + // Only handle i32 now. + EVT DestVT = TLI.getValueType(I->getType(), true); + if (DestVT != MVT::i32) + return false; + + unsigned Opc = ARM::MOVsr; + unsigned ShiftImm; + Value *Src2Value = I->getOperand(1); + if (const ConstantInt *CI = dyn_cast<ConstantInt>(Src2Value)) { + ShiftImm = CI->getZExtValue(); + + // Fall back to selection DAG isel if the shift amount + // is zero or greater than the width of the value type. + if (ShiftImm == 0 || ShiftImm >=32) + return false; + + Opc = ARM::MOVsi; + } + + Value *Src1Value = I->getOperand(0); + unsigned Reg1 = getRegForValue(Src1Value); + if (Reg1 == 0) return false; + + unsigned Reg2; + if (Opc == ARM::MOVsr) { + Reg2 = getRegForValue(Src2Value); + if (Reg2 == 0) return false; + } + + unsigned ResultReg = createResultReg(TLI.getRegClassFor(MVT::i32)); + if(ResultReg == 0) return false; + + MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, + TII.get(Opc), ResultReg) + .addReg(Reg1); + + if (Opc == ARM::MOVsi) + MIB.addImm(ARM_AM::getSORegOpc(ShiftTy, ShiftImm)); + else if (Opc == ARM::MOVsr) { + MIB.addReg(Reg2); + MIB.addImm(ARM_AM::getSORegOpc(ShiftTy, 0)); + } + + AddOptionalDefs(MIB); + UpdateValueMap(I, ResultReg); + return true; +} + // TODO: SoftFP support. bool ARMFastISel::TargetSelectInstruction(const Instruction *I) { @@ -2668,6 +2735,12 @@ bool ARMFastISel::TargetSelectInstruction(const Instruction *I) { case Instruction::ZExt: case Instruction::SExt: return SelectIntExt(I); + case Instruction::Shl: + return SelectShift(I, ARM_AM::lsl); + case Instruction::LShr: + return SelectShift(I, ARM_AM::lsr); + case Instruction::AShr: + return SelectShift(I, ARM_AM::asr); default: break; } return false; @@ -2720,14 +2793,15 @@ bool ARMFastISel::TryToFoldLoad(MachineInstr *MI, unsigned OpNo, } namespace llvm { - FastISel *ARM::createFastISel(FunctionLoweringInfo &funcInfo) { + FastISel *ARM::createFastISel(FunctionLoweringInfo &funcInfo, + const TargetLibraryInfo *libInfo) { // Completely untested on non-iOS. const TargetMachine &TM = funcInfo.MF->getTarget(); // Darwin and thumb1 only for now. const ARMSubtarget *Subtarget = &TM.getSubtarget<ARMSubtarget>(); if (Subtarget->isTargetIOS() && !Subtarget->isThumb1Only()) - return new ARMFastISel(funcInfo); + return new ARMFastISel(funcInfo, libInfo); return 0; } } diff --git a/lib/Target/ARM/ARMFrameLowering.cpp b/lib/Target/ARM/ARMFrameLowering.cpp index 2629496..aee72d2 100644 --- a/lib/Target/ARM/ARMFrameLowering.cpp +++ b/lib/Target/ARM/ARMFrameLowering.cpp @@ -15,6 +15,8 @@ #include "ARMBaseInstrInfo.h" #include "ARMBaseRegisterInfo.h" #include "ARMMachineFunctionInfo.h" +#include "llvm/CallingConv.h" +#include "llvm/Function.h" #include "MCTargetDesc/ARMAddressingModes.h" #include "llvm/Function.h" #include "llvm/CodeGen/MachineFrameInfo.h" @@ -151,6 +153,10 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF) const { int FramePtrSpillFI = 0; int D8SpillFI = 0; + // All calls are tail calls in GHC calling conv, and functions have no prologue/epilogue. + if (MF.getFunction()->getCallingConv() == CallingConv::GHC) + return; + // Allocate the vararg register save area. This is not counted in NumBytes. if (VARegSaveSize) emitSPUpdate(isARM, MBB, MBBI, dl, TII, -VARegSaveSize, @@ -354,6 +360,10 @@ void ARMFrameLowering::emitEpilogue(MachineFunction &MF, int NumBytes = (int)MFI->getStackSize(); unsigned FramePtr = RegInfo->getFrameRegister(MF); + // All calls are tail calls in GHC calling conv, and functions have no prologue/epilogue. + if (MF.getFunction()->getCallingConv() == CallingConv::GHC) + return; + if (!AFI->hasStackFrame()) { if (NumBytes != 0) emitSPUpdate(isARM, MBB, MBBI, dl, TII, NumBytes); diff --git a/lib/Target/ARM/ARMISelDAGToDAG.cpp b/lib/Target/ARM/ARMISelDAGToDAG.cpp index 1953192..c6f9d15 100644 --- a/lib/Target/ARM/ARMISelDAGToDAG.cpp +++ b/lib/Target/ARM/ARMISelDAGToDAG.cpp @@ -47,11 +47,6 @@ CheckVMLxHazard("check-vmlx-hazard", cl::Hidden, cl::desc("Check fp vmla / vmls hazard at isel time"), cl::init(true)); -static cl::opt<bool> -DisableARMIntABS("disable-arm-int-abs", cl::Hidden, - cl::desc("Enable / disable ARM integer abs transform"), - cl::init(false)); - //===--------------------------------------------------------------------===// /// ARMDAGToDAGISel - ARM specific code to select ARM machine /// instructions for SelectionDAG operations. @@ -244,7 +239,6 @@ private: /// SelectCMOVOp - Select CMOV instructions for ARM. SDNode *SelectCMOVOp(SDNode *N); - SDNode *SelectConditionalOp(SDNode *N); SDNode *SelectT2CMOVShiftOp(SDNode *N, SDValue FalseVal, SDValue TrueVal, ARMCC::CondCodes CCVal, SDValue CCR, SDValue InFlag); @@ -2368,115 +2362,6 @@ SDNode *ARMDAGToDAGISel::SelectCMOVOp(SDNode *N) { return CurDAG->SelectNodeTo(N, Opc, VT, Ops, 5); } -SDNode *ARMDAGToDAGISel::SelectConditionalOp(SDNode *N) { - SDValue FalseVal = N->getOperand(0); - SDValue TrueVal = N->getOperand(1); - ARMCC::CondCodes CCVal = - (ARMCC::CondCodes)cast<ConstantSDNode>(N->getOperand(2))->getZExtValue(); - SDValue CCR = N->getOperand(3); - assert(CCR.getOpcode() == ISD::Register); - SDValue InFlag = N->getOperand(4); - SDValue CC = CurDAG->getTargetConstant(CCVal, MVT::i32); - SDValue Reg0 = CurDAG->getRegister(0, MVT::i32); - - if (Subtarget->isThumb()) { - SDValue CPTmp0; - SDValue CPTmp1; - if (SelectT2ShifterOperandReg(TrueVal, CPTmp0, CPTmp1)) { - unsigned Opc; - switch (N->getOpcode()) { - default: llvm_unreachable("Unexpected node"); - case ARMISD::CAND: Opc = ARM::t2ANDCCrs; break; - case ARMISD::COR: Opc = ARM::t2ORRCCrs; break; - case ARMISD::CXOR: Opc = ARM::t2EORCCrs; break; - } - SDValue Ops[] = { FalseVal, CPTmp0, CPTmp1, CC, CCR, Reg0, InFlag }; - return CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops, 7); - } - - ConstantSDNode *T = dyn_cast<ConstantSDNode>(TrueVal); - if (T) { - unsigned TrueImm = T->getZExtValue(); - if (is_t2_so_imm(TrueImm)) { - unsigned Opc; - switch (N->getOpcode()) { - default: llvm_unreachable("Unexpected node"); - case ARMISD::CAND: Opc = ARM::t2ANDCCri; break; - case ARMISD::COR: Opc = ARM::t2ORRCCri; break; - case ARMISD::CXOR: Opc = ARM::t2EORCCri; break; - } - SDValue True = CurDAG->getTargetConstant(TrueImm, MVT::i32); - SDValue Ops[] = { FalseVal, True, CC, CCR, Reg0, InFlag }; - return CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops, 6); - } - } - - unsigned Opc; - switch (N->getOpcode()) { - default: llvm_unreachable("Unexpected node"); - case ARMISD::CAND: Opc = ARM::t2ANDCCrr; break; - case ARMISD::COR: Opc = ARM::t2ORRCCrr; break; - case ARMISD::CXOR: Opc = ARM::t2EORCCrr; break; - } - SDValue Ops[] = { FalseVal, TrueVal, CC, CCR, Reg0, InFlag }; - return CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops, 6); - } - - SDValue CPTmp0; - SDValue CPTmp1; - SDValue CPTmp2; - if (SelectImmShifterOperand(TrueVal, CPTmp0, CPTmp2)) { - unsigned Opc; - switch (N->getOpcode()) { - default: llvm_unreachable("Unexpected node"); - case ARMISD::CAND: Opc = ARM::ANDCCrsi; break; - case ARMISD::COR: Opc = ARM::ORRCCrsi; break; - case ARMISD::CXOR: Opc = ARM::EORCCrsi; break; - } - SDValue Ops[] = { FalseVal, CPTmp0, CPTmp2, CC, CCR, Reg0, InFlag }; - return CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops, 7); - } - - if (SelectRegShifterOperand(TrueVal, CPTmp0, CPTmp1, CPTmp2)) { - unsigned Opc; - switch (N->getOpcode()) { - default: llvm_unreachable("Unexpected node"); - case ARMISD::CAND: Opc = ARM::ANDCCrsr; break; - case ARMISD::COR: Opc = ARM::ORRCCrsr; break; - case ARMISD::CXOR: Opc = ARM::EORCCrsr; break; - } - SDValue Ops[] = { FalseVal, CPTmp0, CPTmp1, CPTmp2, CC, CCR, Reg0, InFlag }; - return CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops, 8); - } - - ConstantSDNode *T = dyn_cast<ConstantSDNode>(TrueVal); - if (T) { - unsigned TrueImm = T->getZExtValue(); - if (is_so_imm(TrueImm)) { - unsigned Opc; - switch (N->getOpcode()) { - default: llvm_unreachable("Unexpected node"); - case ARMISD::CAND: Opc = ARM::ANDCCri; break; - case ARMISD::COR: Opc = ARM::ORRCCri; break; - case ARMISD::CXOR: Opc = ARM::EORCCri; break; - } - SDValue True = CurDAG->getTargetConstant(TrueImm, MVT::i32); - SDValue Ops[] = { FalseVal, True, CC, CCR, Reg0, InFlag }; - return CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops, 6); - } - } - - unsigned Opc; - switch (N->getOpcode()) { - default: llvm_unreachable("Unexpected node"); - case ARMISD::CAND: Opc = ARM::ANDCCrr; break; - case ARMISD::COR: Opc = ARM::ORRCCrr; break; - case ARMISD::CXOR: Opc = ARM::EORCCrr; break; - } - SDValue Ops[] = { FalseVal, TrueVal, CC, CCR, Reg0, InFlag }; - return CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops, 6); -} - /// Target-specific DAG combining for ISD::XOR. /// Target-independent combining lowers SELECT_CC nodes of the form /// select_cc setg[ge] X, 0, X, -X @@ -2492,14 +2377,10 @@ SDNode *ARMDAGToDAGISel::SelectABSOp(SDNode *N){ SDValue XORSrc1 = N->getOperand(1); EVT VT = N->getValueType(0); - if (DisableARMIntABS) - return NULL; - if (Subtarget->isThumb1Only()) return NULL; - if (XORSrc0.getOpcode() != ISD::ADD || - XORSrc1.getOpcode() != ISD::SRA) + if (XORSrc0.getOpcode() != ISD::ADD || XORSrc1.getOpcode() != ISD::SRA) return NULL; SDValue ADDSrc0 = XORSrc0.getOperand(0); @@ -2510,16 +2391,10 @@ SDNode *ARMDAGToDAGISel::SelectABSOp(SDNode *N){ EVT XType = SRASrc0.getValueType(); unsigned Size = XType.getSizeInBits() - 1; - if (ADDSrc1 == XORSrc1 && - ADDSrc0 == SRASrc0 && - XType.isInteger() && - SRAConstant != NULL && + if (ADDSrc1 == XORSrc1 && ADDSrc0 == SRASrc0 && + XType.isInteger() && SRAConstant != NULL && Size == SRAConstant->getZExtValue()) { - - unsigned Opcode = ARM::ABS; - if (Subtarget->isThumb2()) - Opcode = ARM::t2ABS; - + unsigned Opcode = Subtarget->isThumb2() ? ARM::t2ABS : ARM::ABS; return CurDAG->SelectNodeTo(N, Opcode, VT, ADDSrc0); } @@ -2814,10 +2689,6 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { } case ARMISD::CMOV: return SelectCMOVOp(N); - case ARMISD::CAND: - case ARMISD::COR: - case ARMISD::CXOR: - return SelectConditionalOp(N); case ARMISD::VZIP: { unsigned Opc = 0; EVT VT = N->getValueType(0); diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp index 04370c0..df4039b 100644 --- a/lib/Target/ARM/ARMISelLowering.cpp +++ b/lib/Target/ARM/ARMISelLowering.cpp @@ -90,75 +90,70 @@ static const uint16_t GPRArgRegs[] = { ARM::R0, ARM::R1, ARM::R2, ARM::R3 }; -void ARMTargetLowering::addTypeForNEON(EVT VT, EVT PromotedLdStVT, - EVT PromotedBitwiseVT) { +void ARMTargetLowering::addTypeForNEON(MVT VT, MVT PromotedLdStVT, + MVT PromotedBitwiseVT) { if (VT != PromotedLdStVT) { - setOperationAction(ISD::LOAD, VT.getSimpleVT(), Promote); - AddPromotedToType (ISD::LOAD, VT.getSimpleVT(), - PromotedLdStVT.getSimpleVT()); + setOperationAction(ISD::LOAD, VT, Promote); + AddPromotedToType (ISD::LOAD, VT, PromotedLdStVT); - setOperationAction(ISD::STORE, VT.getSimpleVT(), Promote); - AddPromotedToType (ISD::STORE, VT.getSimpleVT(), - PromotedLdStVT.getSimpleVT()); + setOperationAction(ISD::STORE, VT, Promote); + AddPromotedToType (ISD::STORE, VT, PromotedLdStVT); } - EVT ElemTy = VT.getVectorElementType(); + MVT ElemTy = VT.getVectorElementType(); if (ElemTy != MVT::i64 && ElemTy != MVT::f64) - setOperationAction(ISD::SETCC, VT.getSimpleVT(), Custom); - setOperationAction(ISD::INSERT_VECTOR_ELT, VT.getSimpleVT(), Custom); - setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT.getSimpleVT(), Custom); + setOperationAction(ISD::SETCC, VT, Custom); + setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom); + setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom); if (ElemTy == MVT::i32) { - setOperationAction(ISD::SINT_TO_FP, VT.getSimpleVT(), Custom); - setOperationAction(ISD::UINT_TO_FP, VT.getSimpleVT(), Custom); - setOperationAction(ISD::FP_TO_SINT, VT.getSimpleVT(), Custom); - setOperationAction(ISD::FP_TO_UINT, VT.getSimpleVT(), Custom); + setOperationAction(ISD::SINT_TO_FP, VT, Custom); + setOperationAction(ISD::UINT_TO_FP, VT, Custom); + setOperationAction(ISD::FP_TO_SINT, VT, Custom); + setOperationAction(ISD::FP_TO_UINT, VT, Custom); } else { - setOperationAction(ISD::SINT_TO_FP, VT.getSimpleVT(), Expand); - setOperationAction(ISD::UINT_TO_FP, VT.getSimpleVT(), Expand); - setOperationAction(ISD::FP_TO_SINT, VT.getSimpleVT(), Expand); - setOperationAction(ISD::FP_TO_UINT, VT.getSimpleVT(), Expand); - } - setOperationAction(ISD::BUILD_VECTOR, VT.getSimpleVT(), Custom); - setOperationAction(ISD::VECTOR_SHUFFLE, VT.getSimpleVT(), Custom); - setOperationAction(ISD::CONCAT_VECTORS, VT.getSimpleVT(), Legal); - setOperationAction(ISD::EXTRACT_SUBVECTOR, VT.getSimpleVT(), Legal); - setOperationAction(ISD::SELECT, VT.getSimpleVT(), Expand); - setOperationAction(ISD::SELECT_CC, VT.getSimpleVT(), Expand); - setOperationAction(ISD::SIGN_EXTEND_INREG, VT.getSimpleVT(), Expand); + setOperationAction(ISD::SINT_TO_FP, VT, Expand); + setOperationAction(ISD::UINT_TO_FP, VT, Expand); + setOperationAction(ISD::FP_TO_SINT, VT, Expand); + setOperationAction(ISD::FP_TO_UINT, VT, Expand); + } + setOperationAction(ISD::BUILD_VECTOR, VT, Custom); + setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom); + setOperationAction(ISD::CONCAT_VECTORS, VT, Legal); + setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Legal); + setOperationAction(ISD::SELECT, VT, Expand); + setOperationAction(ISD::SELECT_CC, VT, Expand); + setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Expand); if (VT.isInteger()) { - setOperationAction(ISD::SHL, VT.getSimpleVT(), Custom); - setOperationAction(ISD::SRA, VT.getSimpleVT(), Custom); - setOperationAction(ISD::SRL, VT.getSimpleVT(), Custom); + setOperationAction(ISD::SHL, VT, Custom); + setOperationAction(ISD::SRA, VT, Custom); + setOperationAction(ISD::SRL, VT, Custom); } // Promote all bit-wise operations. if (VT.isInteger() && VT != PromotedBitwiseVT) { - setOperationAction(ISD::AND, VT.getSimpleVT(), Promote); - AddPromotedToType (ISD::AND, VT.getSimpleVT(), - PromotedBitwiseVT.getSimpleVT()); - setOperationAction(ISD::OR, VT.getSimpleVT(), Promote); - AddPromotedToType (ISD::OR, VT.getSimpleVT(), - PromotedBitwiseVT.getSimpleVT()); - setOperationAction(ISD::XOR, VT.getSimpleVT(), Promote); - AddPromotedToType (ISD::XOR, VT.getSimpleVT(), - PromotedBitwiseVT.getSimpleVT()); + setOperationAction(ISD::AND, VT, Promote); + AddPromotedToType (ISD::AND, VT, PromotedBitwiseVT); + setOperationAction(ISD::OR, VT, Promote); + AddPromotedToType (ISD::OR, VT, PromotedBitwiseVT); + setOperationAction(ISD::XOR, VT, Promote); + AddPromotedToType (ISD::XOR, VT, PromotedBitwiseVT); } // Neon does not support vector divide/remainder operations. - setOperationAction(ISD::SDIV, VT.getSimpleVT(), Expand); - setOperationAction(ISD::UDIV, VT.getSimpleVT(), Expand); - setOperationAction(ISD::FDIV, VT.getSimpleVT(), Expand); - setOperationAction(ISD::SREM, VT.getSimpleVT(), Expand); - setOperationAction(ISD::UREM, VT.getSimpleVT(), Expand); - setOperationAction(ISD::FREM, VT.getSimpleVT(), Expand); + setOperationAction(ISD::SDIV, VT, Expand); + setOperationAction(ISD::UDIV, VT, Expand); + setOperationAction(ISD::FDIV, VT, Expand); + setOperationAction(ISD::SREM, VT, Expand); + setOperationAction(ISD::UREM, VT, Expand); + setOperationAction(ISD::FREM, VT, Expand); } -void ARMTargetLowering::addDRTypeForNEON(EVT VT) { +void ARMTargetLowering::addDRTypeForNEON(MVT VT) { addRegisterClass(VT, &ARM::DPRRegClass); addTypeForNEON(VT, MVT::f64, MVT::v2i32); } -void ARMTargetLowering::addQRTypeForNEON(EVT VT) { +void ARMTargetLowering::addQRTypeForNEON(MVT VT) { addRegisterClass(VT, &ARM::QPRRegClass); addTypeForNEON(VT, MVT::v2f64, MVT::v4i32); } @@ -903,9 +898,6 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const { case ARMISD::FMSTAT: return "ARMISD::FMSTAT"; case ARMISD::CMOV: return "ARMISD::CMOV"; - case ARMISD::CAND: return "ARMISD::CAND"; - case ARMISD::COR: return "ARMISD::COR"; - case ARMISD::CXOR: return "ARMISD::CXOR"; case ARMISD::RBIT: return "ARMISD::RBIT"; @@ -1041,8 +1033,9 @@ const TargetRegisterClass *ARMTargetLowering::getRegClassFor(EVT VT) const { // Create a fast isel object. FastISel * -ARMTargetLowering::createFastISel(FunctionLoweringInfo &funcInfo) const { - return ARM::createFastISel(funcInfo); +ARMTargetLowering::createFastISel(FunctionLoweringInfo &funcInfo, + const TargetLibraryInfo *libInfo) const { + return ARM::createFastISel(funcInfo, libInfo); } /// getMaximalGlobalOffset - Returns the maximal possible offset which can @@ -1171,6 +1164,8 @@ CCAssignFn *ARMTargetLowering::CCAssignFnForNode(CallingConv::ID CC, return (Return ? RetCC_ARM_AAPCS : CC_ARM_AAPCS); case CallingConv::ARM_APCS: return (Return ? RetCC_ARM_APCS : CC_ARM_APCS); + case CallingConv::GHC: + return (Return ? RetCC_ARM_APCS : CC_ARM_APCS_GHC); } } @@ -4271,6 +4266,10 @@ SDValue ARMTargetLowering::ReconstructShuffle(SDValue Op, // Record this extraction against the appropriate vector if possible... SDValue SourceVec = V.getOperand(0); + // If the element number isn't a constant, we can't effectively + // analyze what's going on. + if (!isa<ConstantSDNode>(V.getOperand(1))) + return SDValue(); unsigned EltNo = cast<ConstantSDNode>(V.getOperand(1))->getZExtValue(); bool FoundSource = false; for (unsigned j = 0; j < SourceVecs.size(); ++j) { @@ -6152,13 +6151,12 @@ EmitSjLjDispatchBlock(MachineInstr *MI, MachineBasicBlock *MBB) const { } // Add the jump table entries as successors to the MBB. - MachineBasicBlock *PrevMBB = 0; + SmallPtrSet<MachineBasicBlock*, 8> SeenMBBs; for (std::vector<MachineBasicBlock*>::iterator I = LPadList.begin(), E = LPadList.end(); I != E; ++I) { MachineBasicBlock *CurMBB = *I; - if (PrevMBB != CurMBB) + if (SeenMBBs.insert(CurMBB)) DispContBB->addSuccessor(CurMBB); - PrevMBB = CurMBB; } // N.B. the order the invoke BBs are processed in doesn't matter here. @@ -6971,62 +6969,137 @@ void ARMTargetLowering::AdjustInstrPostInstrSelection(MachineInstr *MI, // ARM Optimization Hooks //===----------------------------------------------------------------------===// +// Helper function that checks if N is a null or all ones constant. +static inline bool isZeroOrAllOnes(SDValue N, bool AllOnes) { + ConstantSDNode *C = dyn_cast<ConstantSDNode>(N); + if (!C) + return false; + return AllOnes ? C->isAllOnesValue() : C->isNullValue(); +} + +// Return true if N is conditionally 0 or all ones. +// Detects these expressions where cc is an i1 value: +// +// (select cc 0, y) [AllOnes=0] +// (select cc y, 0) [AllOnes=0] +// (zext cc) [AllOnes=0] +// (sext cc) [AllOnes=0/1] +// (select cc -1, y) [AllOnes=1] +// (select cc y, -1) [AllOnes=1] +// +// Invert is set when N is the null/all ones constant when CC is false. +// OtherOp is set to the alternative value of N. +static bool isConditionalZeroOrAllOnes(SDNode *N, bool AllOnes, + SDValue &CC, bool &Invert, + SDValue &OtherOp, + SelectionDAG &DAG) { + switch (N->getOpcode()) { + default: return false; + case ISD::SELECT: { + CC = N->getOperand(0); + SDValue N1 = N->getOperand(1); + SDValue N2 = N->getOperand(2); + if (isZeroOrAllOnes(N1, AllOnes)) { + Invert = false; + OtherOp = N2; + return true; + } + if (isZeroOrAllOnes(N2, AllOnes)) { + Invert = true; + OtherOp = N1; + return true; + } + return false; + } + case ISD::ZERO_EXTEND: + // (zext cc) can never be the all ones value. + if (AllOnes) + return false; + // Fall through. + case ISD::SIGN_EXTEND: { + EVT VT = N->getValueType(0); + CC = N->getOperand(0); + if (CC.getValueType() != MVT::i1) + return false; + Invert = !AllOnes; + if (AllOnes) + // When looking for an AllOnes constant, N is an sext, and the 'other' + // value is 0. + OtherOp = DAG.getConstant(0, VT); + else if (N->getOpcode() == ISD::ZERO_EXTEND) + // When looking for a 0 constant, N can be zext or sext. + OtherOp = DAG.getConstant(1, VT); + else + OtherOp = DAG.getConstant(APInt::getAllOnesValue(VT.getSizeInBits()), VT); + return true; + } + } +} + +// Combine a constant select operand into its use: +// +// (add (select cc, 0, c), x) -> (select cc, x, (add, x, c)) +// (sub x, (select cc, 0, c)) -> (select cc, x, (sub, x, c)) +// (and (select cc, -1, c), x) -> (select cc, x, (and, x, c)) [AllOnes=1] +// (or (select cc, 0, c), x) -> (select cc, x, (or, x, c)) +// (xor (select cc, 0, c), x) -> (select cc, x, (xor, x, c)) +// +// The transform is rejected if the select doesn't have a constant operand that +// is null, or all ones when AllOnes is set. +// +// Also recognize sext/zext from i1: +// +// (add (zext cc), x) -> (select cc (add x, 1), x) +// (add (sext cc), x) -> (select cc (add x, -1), x) +// +// These transformations eventually create predicated instructions. +// +// @param N The node to transform. +// @param Slct The N operand that is a select. +// @param OtherOp The other N operand (x above). +// @param DCI Context. +// @param AllOnes Require the select constant to be all ones instead of null. +// @returns The new node, or SDValue() on failure. static SDValue combineSelectAndUse(SDNode *N, SDValue Slct, SDValue OtherOp, - TargetLowering::DAGCombinerInfo &DCI) { + TargetLowering::DAGCombinerInfo &DCI, + bool AllOnes = false) { SelectionDAG &DAG = DCI.DAG; - const TargetLowering &TLI = DAG.getTargetLoweringInfo(); EVT VT = N->getValueType(0); - unsigned Opc = N->getOpcode(); - bool isSlctCC = Slct.getOpcode() == ISD::SELECT_CC; - SDValue LHS = isSlctCC ? Slct.getOperand(2) : Slct.getOperand(1); - SDValue RHS = isSlctCC ? Slct.getOperand(3) : Slct.getOperand(2); - ISD::CondCode CC = ISD::SETCC_INVALID; - - if (isSlctCC) { - CC = cast<CondCodeSDNode>(Slct.getOperand(4))->get(); - } else { - SDValue CCOp = Slct.getOperand(0); - if (CCOp.getOpcode() == ISD::SETCC) - CC = cast<CondCodeSDNode>(CCOp.getOperand(2))->get(); - } - - bool DoXform = false; - bool InvCC = false; - assert ((Opc == ISD::ADD || (Opc == ISD::SUB && Slct == N->getOperand(1))) && - "Bad input!"); - - if (LHS.getOpcode() == ISD::Constant && - cast<ConstantSDNode>(LHS)->isNullValue()) { - DoXform = true; - } else if (CC != ISD::SETCC_INVALID && - RHS.getOpcode() == ISD::Constant && - cast<ConstantSDNode>(RHS)->isNullValue()) { - std::swap(LHS, RHS); - SDValue Op0 = Slct.getOperand(0); - EVT OpVT = isSlctCC ? Op0.getValueType() : - Op0.getOperand(0).getValueType(); - bool isInt = OpVT.isInteger(); - CC = ISD::getSetCCInverse(CC, isInt); - - if (!TLI.isCondCodeLegal(CC, OpVT)) - return SDValue(); // Inverse operator isn't legal. - - DoXform = true; - InvCC = true; - } - - if (DoXform) { - SDValue Result = DAG.getNode(Opc, RHS.getDebugLoc(), VT, OtherOp, RHS); - if (isSlctCC) - return DAG.getSelectCC(N->getDebugLoc(), OtherOp, Result, - Slct.getOperand(0), Slct.getOperand(1), CC); - SDValue CCOp = Slct.getOperand(0); - if (InvCC) - CCOp = DAG.getSetCC(Slct.getDebugLoc(), CCOp.getValueType(), - CCOp.getOperand(0), CCOp.getOperand(1), CC); - return DAG.getNode(ISD::SELECT, N->getDebugLoc(), VT, - CCOp, OtherOp, Result); + SDValue NonConstantVal; + SDValue CCOp; + bool SwapSelectOps; + if (!isConditionalZeroOrAllOnes(Slct.getNode(), AllOnes, CCOp, SwapSelectOps, + NonConstantVal, DAG)) + return SDValue(); + + // Slct is now know to be the desired identity constant when CC is true. + SDValue TrueVal = OtherOp; + SDValue FalseVal = DAG.getNode(N->getOpcode(), N->getDebugLoc(), VT, + OtherOp, NonConstantVal); + // Unless SwapSelectOps says CC should be false. + if (SwapSelectOps) + std::swap(TrueVal, FalseVal); + + return DAG.getNode(ISD::SELECT, N->getDebugLoc(), VT, + CCOp, TrueVal, FalseVal); +} + +// Attempt combineSelectAndUse on each operand of a commutative operator N. +static +SDValue combineSelectAndUseCommutative(SDNode *N, bool AllOnes, + TargetLowering::DAGCombinerInfo &DCI) { + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + if (N0.getNode()->hasOneUse()) { + SDValue Result = combineSelectAndUse(N, N0, N1, DCI, AllOnes); + if (Result.getNode()) + return Result; + } + if (N1.getNode()->hasOneUse()) { + SDValue Result = combineSelectAndUse(N, N1, N0, DCI, AllOnes); + if (Result.getNode()) + return Result; } return SDValue(); } @@ -7134,7 +7207,7 @@ static SDValue PerformADDCombineWithOperands(SDNode *N, SDValue N0, SDValue N1, return Result; // fold (add (select cc, 0, c), x) -> (select cc, x, (add, x, c)) - if (N0.getOpcode() == ISD::SELECT && N0.getNode()->hasOneUse()) { + if (N0.getNode()->hasOneUse()) { SDValue Result = combineSelectAndUse(N, N0, N1, DCI); if (Result.getNode()) return Result; } @@ -7166,7 +7239,7 @@ static SDValue PerformSUBCombine(SDNode *N, SDValue N1 = N->getOperand(1); // fold (sub x, (select cc, 0, c)) -> (select cc, x, (sub, x, c)) - if (N1.getOpcode() == ISD::SELECT && N1.getNode()->hasOneUse()) { + if (N1.getNode()->hasOneUse()) { SDValue Result = combineSelectAndUse(N, N1, N0, DCI); if (Result.getNode()) return Result; } @@ -7294,49 +7367,6 @@ static SDValue PerformMULCombine(SDNode *N, return SDValue(); } -static bool isCMOVWithZeroOrAllOnesLHS(SDValue N, bool AllOnes) { - if (N.getOpcode() != ARMISD::CMOV || !N.getNode()->hasOneUse()) - return false; - - SDValue FalseVal = N.getOperand(0); - ConstantSDNode *C = dyn_cast<ConstantSDNode>(FalseVal); - if (!C) - return false; - if (AllOnes) - return C->isAllOnesValue(); - return C->isNullValue(); -} - -/// formConditionalOp - Combine an operation with a conditional move operand -/// to form a conditional op. e.g. (or x, (cmov 0, y, cond)) => (or.cond x, y) -/// (and x, (cmov -1, y, cond)) => (and.cond, x, y) -static SDValue formConditionalOp(SDNode *N, SelectionDAG &DAG, - bool Commutable) { - SDValue N0 = N->getOperand(0); - SDValue N1 = N->getOperand(1); - - bool isAND = N->getOpcode() == ISD::AND; - bool isCand = isCMOVWithZeroOrAllOnesLHS(N1, isAND); - if (!isCand && Commutable) { - isCand = isCMOVWithZeroOrAllOnesLHS(N0, isAND); - if (isCand) - std::swap(N0, N1); - } - if (!isCand) - return SDValue(); - - unsigned Opc = 0; - switch (N->getOpcode()) { - default: llvm_unreachable("Unexpected node"); - case ISD::AND: Opc = ARMISD::CAND; break; - case ISD::OR: Opc = ARMISD::COR; break; - case ISD::XOR: Opc = ARMISD::CXOR; break; - } - return DAG.getNode(Opc, N->getDebugLoc(), N->getValueType(0), N0, - N1.getOperand(1), N1.getOperand(2), N1.getOperand(3), - N1.getOperand(4)); -} - static SDValue PerformANDCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget) { @@ -7371,10 +7401,10 @@ static SDValue PerformANDCombine(SDNode *N, } if (!Subtarget->isThumb1Only()) { - // (and x, (cmov -1, y, cond)) => (and.cond x, y) - SDValue CAND = formConditionalOp(N, DAG, true); - if (CAND.getNode()) - return CAND; + // fold (and (select cc, -1, c), x) -> (select cc, x, (and, x, c)) + SDValue Result = combineSelectAndUseCommutative(N, true, DCI); + if (Result.getNode()) + return Result; } return SDValue(); @@ -7414,14 +7444,17 @@ static SDValue PerformORCombine(SDNode *N, } if (!Subtarget->isThumb1Only()) { - // (or x, (cmov 0, y, cond)) => (or.cond x, y) - SDValue COR = formConditionalOp(N, DAG, true); - if (COR.getNode()) - return COR; + // fold (or (select cc, 0, c), x) -> (select cc, x, (or, x, c)) + SDValue Result = combineSelectAndUseCommutative(N, false, DCI); + if (Result.getNode()) + return Result; } + // The code below optimizes (or (and X, Y), Z). + // The AND operand needs to have a single user to make these optimizations + // profitable. SDValue N0 = N->getOperand(0); - if (N0.getOpcode() != ISD::AND) + if (N0.getOpcode() != ISD::AND || !N0.hasOneUse()) return SDValue(); SDValue N1 = N->getOperand(1); @@ -7578,10 +7611,10 @@ static SDValue PerformXORCombine(SDNode *N, return SDValue(); if (!Subtarget->isThumb1Only()) { - // (xor x, (cmov 0, y, cond)) => (xor.cond x, y) - SDValue CXOR = formConditionalOp(N, DAG, true); - if (CXOR.getNode()) - return CXOR; + // fold (xor (select cc, 0, c), x) -> (select cc, x, (xor, x, c)) + SDValue Result = combineSelectAndUseCommutative(N, false, DCI); + if (Result.getNode()) + return Result; } return SDValue(); @@ -8802,6 +8835,8 @@ bool ARMTargetLowering::allowsUnalignedMemoryAccesses(EVT VT) const { case MVT::i16: case MVT::i32: return true; + case MVT::f64: + return Subtarget->hasNEON(); // FIXME: VLD1 etc with standard alignment is legal. } } diff --git a/lib/Target/ARM/ARMISelLowering.h b/lib/Target/ARM/ARMISelLowering.h index 7ad48b9..13b83de 100644 --- a/lib/Target/ARM/ARMISelLowering.h +++ b/lib/Target/ARM/ARMISelLowering.h @@ -63,9 +63,6 @@ namespace llvm { FMSTAT, // ARM fmstat instruction. CMOV, // ARM conditional move instructions. - CAND, // ARM conditional and instructions. - COR, // ARM conditional or instructions. - CXOR, // ARM conditional xor instructions. BCC_i64, @@ -361,7 +358,8 @@ namespace llvm { /// createFastISel - This method returns a target specific FastISel object, /// or null if the target does not support "fast" ISel. - virtual FastISel *createFastISel(FunctionLoweringInfo &funcInfo) const; + virtual FastISel *createFastISel(FunctionLoweringInfo &funcInfo, + const TargetLibraryInfo *libInfo) const; Sched::Preference getSchedulingPreference(SDNode *N) const; @@ -393,9 +391,9 @@ namespace llvm { /// unsigned ARMPCLabelIndex; - void addTypeForNEON(EVT VT, EVT PromotedLdStVT, EVT PromotedBitwiseVT); - void addDRTypeForNEON(EVT VT); - void addQRTypeForNEON(EVT VT); + void addTypeForNEON(MVT VT, MVT PromotedLdStVT, MVT PromotedBitwiseVT); + void addDRTypeForNEON(MVT VT); + void addQRTypeForNEON(MVT VT); typedef SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPassVector; void PassF64ArgInRegs(DebugLoc dl, SelectionDAG &DAG, @@ -544,7 +542,8 @@ namespace llvm { namespace ARM { - FastISel *createFastISel(FunctionLoweringInfo &funcInfo); + FastISel *createFastISel(FunctionLoweringInfo &funcInfo, + const TargetLibraryInfo *libInfo); } } diff --git a/lib/Target/ARM/ARMInstrInfo.td b/lib/Target/ARM/ARMInstrInfo.td index 1b8fc3f..992aba5 100644 --- a/lib/Target/ARM/ARMInstrInfo.td +++ b/lib/Target/ARM/ARMInstrInfo.td @@ -242,6 +242,9 @@ def UseFusedMAC : Predicate<"(TM.Options.AllowFPOpFusion ==" def DontUseFusedMAC : Predicate<"!Subtarget->hasVFP4() || " "Subtarget->isTargetDarwin()">; +def IsLE : Predicate<"TLI.isLittleEndian()">; +def IsBE : Predicate<"TLI.isBigEndian()">; + //===----------------------------------------------------------------------===// // ARM Flag Definitions. @@ -416,8 +419,11 @@ def pclabel : Operand<i32> { } // ADR instruction labels. +def AdrLabelAsmOperand : AsmOperandClass { let Name = "AdrLabel"; } def adrlabel : Operand<i32> { let EncoderMethod = "getAdrLabelOpValue"; + let ParserMatchClass = AdrLabelAsmOperand; + let PrintMethod = "printAdrLabelOperand"; } def neon_vcvt_imm32 : Operand<i32> { @@ -968,7 +974,7 @@ include "ARMInstrFormats.td" let TwoOperandAliasConstraint = "$Rn = $Rd" in multiclass AsI1_bin_irs<bits<4> opcod, string opc, InstrItinClass iii, InstrItinClass iir, InstrItinClass iis, - PatFrag opnode, string baseOpc, bit Commutable = 0> { + PatFrag opnode, bit Commutable = 0> { // The register-immediate version is re-materializable. This is useful // in particular for taking the address of a local. let isReMaterializable = 1 in { @@ -1037,7 +1043,7 @@ multiclass AsI1_bin_irs<bits<4> opcod, string opc, let TwoOperandAliasConstraint = "$Rn = $Rd" in multiclass AsI1_rbin_irs<bits<4> opcod, string opc, InstrItinClass iii, InstrItinClass iir, InstrItinClass iis, - PatFrag opnode, string baseOpc, bit Commutable = 0> { + PatFrag opnode, bit Commutable = 0> { // The register-immediate version is re-materializable. This is useful // in particular for taking the address of a local. let isReMaterializable = 1 in { @@ -1285,7 +1291,7 @@ class AI_exta_rrot_np<bits<8> opcod, string opc> /// AI1_adde_sube_irs - Define instructions and patterns for adde and sube. let TwoOperandAliasConstraint = "$Rn = $Rd" in multiclass AI1_adde_sube_irs<bits<4> opcod, string opc, PatFrag opnode, - string baseOpc, bit Commutable = 0> { + bit Commutable = 0> { let hasPostISelHook = 1, Defs = [CPSR], Uses = [CPSR] in { def ri : AsI1<opcod, (outs GPR:$Rd), (ins GPR:$Rn, so_imm:$imm), DPFrm, IIC_iALUi, opc, "\t$Rd, $Rn, $imm", @@ -1351,8 +1357,7 @@ multiclass AI1_adde_sube_irs<bits<4> opcod, string opc, PatFrag opnode, /// AI1_rsc_irs - Define instructions and patterns for rsc let TwoOperandAliasConstraint = "$Rn = $Rd" in -multiclass AI1_rsc_irs<bits<4> opcod, string opc, PatFrag opnode, - string baseOpc> { +multiclass AI1_rsc_irs<bits<4> opcod, string opc, PatFrag opnode> { let hasPostISelHook = 1, Defs = [CPSR], Uses = [CPSR] in { def ri : AsI1<opcod, (outs GPR:$Rd), (ins GPR:$Rn, so_imm:$imm), DPFrm, IIC_iALUi, opc, "\t$Rd, $Rn, $imm", @@ -2816,9 +2821,6 @@ def MOVr : AsI1<0b1101, (outs GPR:$Rd), (ins GPR:$Rm), DPFrm, IIC_iMOVr, let Inst{15-12} = Rd; } -def : ARMInstAlias<"movs${p} $Rd, $Rm", - (MOVr GPR:$Rd, GPR:$Rm, pred:$p, CPSR)>; - // A version for the smaller set of tail call registers. let neverHasSideEffects = 1 in def MOVr_TC : AsI1<0b1101, (outs tcGPR:$Rd), (ins tcGPR:$Rm), DPFrm, @@ -3029,10 +3031,10 @@ def UBFX : I<(outs GPR:$Rd), defm ADD : AsI1_bin_irs<0b0100, "add", IIC_iALUi, IIC_iALUr, IIC_iALUsr, - BinOpFrag<(add node:$LHS, node:$RHS)>, "ADD", 1>; + BinOpFrag<(add node:$LHS, node:$RHS)>, 1>; defm SUB : AsI1_bin_irs<0b0010, "sub", IIC_iALUi, IIC_iALUr, IIC_iALUsr, - BinOpFrag<(sub node:$LHS, node:$RHS)>, "SUB">; + BinOpFrag<(sub node:$LHS, node:$RHS)>>; // ADD and SUB with 's' bit set. // @@ -3050,15 +3052,13 @@ defm SUBS : AsI1_bin_s_irs<IIC_iALUi, IIC_iALUr, IIC_iALUsr, BinOpFrag<(ARMsubc node:$LHS, node:$RHS)>>; defm ADC : AI1_adde_sube_irs<0b0101, "adc", - BinOpWithFlagFrag<(ARMadde node:$LHS, node:$RHS, node:$FLAG)>, - "ADC", 1>; + BinOpWithFlagFrag<(ARMadde node:$LHS, node:$RHS, node:$FLAG)>, 1>; defm SBC : AI1_adde_sube_irs<0b0110, "sbc", - BinOpWithFlagFrag<(ARMsube node:$LHS, node:$RHS, node:$FLAG)>, - "SBC">; + BinOpWithFlagFrag<(ARMsube node:$LHS, node:$RHS, node:$FLAG)>>; -defm RSB : AsI1_rbin_irs <0b0011, "rsb", - IIC_iALUi, IIC_iALUr, IIC_iALUsr, - BinOpFrag<(sub node:$LHS, node:$RHS)>, "RSB">; +defm RSB : AsI1_rbin_irs<0b0011, "rsb", + IIC_iALUi, IIC_iALUr, IIC_iALUsr, + BinOpFrag<(sub node:$LHS, node:$RHS)>>; // FIXME: Eliminate them if we can write def : Pat patterns which defines // CPSR and the implicit def of CPSR is not needed. @@ -3066,8 +3066,7 @@ defm RSBS : AsI1_rbin_s_is<IIC_iALUi, IIC_iALUr, IIC_iALUsr, BinOpFrag<(ARMsubc node:$LHS, node:$RHS)>>; defm RSC : AI1_rsc_irs<0b0111, "rsc", - BinOpWithFlagFrag<(ARMsube node:$LHS, node:$RHS, node:$FLAG)>, - "RSC">; + BinOpWithFlagFrag<(ARMsube node:$LHS, node:$RHS, node:$FLAG)>>; // (sub X, imm) gets canonicalized to (add X, -imm). Match this form. // The assume-no-carry-in form uses the negation of the input since add/sub @@ -3276,16 +3275,16 @@ def : ARMV6Pat<(int_arm_usat GPRnopc:$a, imm:$pos), defm AND : AsI1_bin_irs<0b0000, "and", IIC_iBITi, IIC_iBITr, IIC_iBITsr, - BinOpFrag<(and node:$LHS, node:$RHS)>, "AND", 1>; + BinOpFrag<(and node:$LHS, node:$RHS)>, 1>; defm ORR : AsI1_bin_irs<0b1100, "orr", IIC_iBITi, IIC_iBITr, IIC_iBITsr, - BinOpFrag<(or node:$LHS, node:$RHS)>, "ORR", 1>; + BinOpFrag<(or node:$LHS, node:$RHS)>, 1>; defm EOR : AsI1_bin_irs<0b0001, "eor", IIC_iBITi, IIC_iBITr, IIC_iBITsr, - BinOpFrag<(xor node:$LHS, node:$RHS)>, "EOR", 1>; + BinOpFrag<(xor node:$LHS, node:$RHS)>, 1>; defm BIC : AsI1_bin_irs<0b1110, "bic", IIC_iBITi, IIC_iBITr, IIC_iBITsr, - BinOpFrag<(and node:$LHS, (not node:$RHS))>, "BIC">; + BinOpFrag<(and node:$LHS, (not node:$RHS))>>; // FIXME: bf_inv_mask_imm should be two operands, the lsb and the msb, just // like in the actual instruction encoding. The complexity of mapping the mask @@ -3940,7 +3939,7 @@ def BCCZi64 : PseudoInst<(outs), // a two-value operand where a dag node expects two operands. :( let neverHasSideEffects = 1 in { -let isCommutable = 1 in +let isCommutable = 1, isSelect = 1 in def MOVCCr : ARMPseudoInst<(outs GPR:$Rd), (ins GPR:$false, GPR:$Rm, pred:$p), 4, IIC_iCMOVr, [/*(set GPR:$Rd, (ARMcmov GPR:$false, GPR:$Rm, imm:$cc, CCR:$ccr))*/]>, @@ -3993,25 +3992,29 @@ multiclass AsI1_bincc_irs<Instruction iri, Instruction irr, Instruction irsi, InstrItinClass iii, InstrItinClass iir, InstrItinClass iis> { def ri : ARMPseudoExpand<(outs GPR:$Rd), - (ins GPR:$Rn, so_imm:$imm, pred:$p, cc_out:$s), + (ins GPR:$Rfalse, GPR:$Rn, so_imm:$imm, + pred:$p, cc_out:$s), 4, iii, [], (iri GPR:$Rd, GPR:$Rn, so_imm:$imm, pred:$p, cc_out:$s)>, - RegConstraint<"$Rn = $Rd">; + RegConstraint<"$Rfalse = $Rd">; def rr : ARMPseudoExpand<(outs GPR:$Rd), - (ins GPR:$Rn, GPR:$Rm, pred:$p, cc_out:$s), + (ins GPR:$Rfalse, GPR:$Rn, GPR:$Rm, + pred:$p, cc_out:$s), 4, iir, [], (irr GPR:$Rd, GPR:$Rn, GPR:$Rm, pred:$p, cc_out:$s)>, - RegConstraint<"$Rn = $Rd">; + RegConstraint<"$Rfalse = $Rd">; def rsi : ARMPseudoExpand<(outs GPR:$Rd), - (ins GPR:$Rn, so_reg_imm:$shift, pred:$p, cc_out:$s), + (ins GPR:$Rfalse, GPR:$Rn, so_reg_imm:$shift, + pred:$p, cc_out:$s), 4, iis, [], (irsi GPR:$Rd, GPR:$Rn, so_reg_imm:$shift, pred:$p, cc_out:$s)>, - RegConstraint<"$Rn = $Rd">; + RegConstraint<"$Rfalse = $Rd">; def rsr : ARMPseudoExpand<(outs GPRnopc:$Rd), - (ins GPRnopc:$Rn, so_reg_reg:$shift, pred:$p, cc_out:$s), + (ins GPRnopc:$Rfalse, GPRnopc:$Rn, so_reg_reg:$shift, + pred:$p, cc_out:$s), 4, iis, [], (irsr GPR:$Rd, GPR:$Rn, so_reg_reg:$shift, pred:$p, cc_out:$s)>, - RegConstraint<"$Rn = $Rd">; + RegConstraint<"$Rfalse = $Rd">; } defm ANDCC : AsI1_bincc_irs<ANDri, ANDrr, ANDrsi, ANDrsr, @@ -4020,6 +4023,10 @@ defm ORRCC : AsI1_bincc_irs<ORRri, ORRrr, ORRrsi, ORRrsr, IIC_iBITi, IIC_iBITr, IIC_iBITsr>; defm EORCC : AsI1_bincc_irs<EORri, EORrr, EORrsi, EORrsr, IIC_iBITi, IIC_iBITr, IIC_iBITsr>; +defm ADDCC : AsI1_bincc_irs<ADDri, ADDrr, ADDrsi, ADDrsr, + IIC_iBITi, IIC_iBITr, IIC_iBITsr>; +defm SUBCC : AsI1_bincc_irs<SUBri, SUBrr, SUBrsi, SUBrsr, + IIC_iBITi, IIC_iBITr, IIC_iBITsr>; } // neverHasSideEffects @@ -4068,11 +4075,8 @@ def ISB : AInoP<(outs), (ins memb_opt:$opt), MiscFrm, NoItinerary, // Pseudo instruction that combines movs + predicated rsbmi // to implement integer ABS -let usesCustomInserter = 1, Defs = [CPSR] in { -def ABS : ARMPseudoInst< - (outs GPR:$dst), (ins GPR:$src), - 8, NoItinerary, []>; -} +let usesCustomInserter = 1, Defs = [CPSR] in +def ABS : ARMPseudoInst<(outs GPR:$dst), (ins GPR:$src), 8, NoItinerary, []>; let usesCustomInserter = 1 in { let Defs = [CPSR] in { diff --git a/lib/Target/ARM/ARMInstrNEON.td b/lib/Target/ARM/ARMInstrNEON.td index 3134088..048d340 100644 --- a/lib/Target/ARM/ARMInstrNEON.td +++ b/lib/Target/ARM/ARMInstrNEON.td @@ -398,6 +398,27 @@ def VecListFourQWordIndexed : Operand<i32> { let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); } +def hword_alignedload : PatFrag<(ops node:$ptr), (load node:$ptr), [{ + return cast<LoadSDNode>(N)->getAlignment() == 2; +}]>; +def hword_alignedstore : PatFrag<(ops node:$val, node:$ptr), + (store node:$val, node:$ptr), [{ + return cast<StoreSDNode>(N)->getAlignment() == 2; +}]>; +def byte_alignedload : PatFrag<(ops node:$ptr), (load node:$ptr), [{ + return cast<LoadSDNode>(N)->getAlignment() == 1; +}]>; +def byte_alignedstore : PatFrag<(ops node:$val, node:$ptr), + (store node:$val, node:$ptr), [{ + return cast<StoreSDNode>(N)->getAlignment() == 1; +}]>; +def non_word_alignedload : PatFrag<(ops node:$ptr), (load node:$ptr), [{ + return cast<LoadSDNode>(N)->getAlignment() < 4; +}]>; +def non_word_alignedstore : PatFrag<(ops node:$val, node:$ptr), + (store node:$val, node:$ptr), [{ + return cast<StoreSDNode>(N)->getAlignment() < 4; +}]>; //===----------------------------------------------------------------------===// // NEON-specific DAG Nodes. @@ -2238,6 +2259,19 @@ def VST4LNq32Pseudo_UPD : VSTQQQQLNWBPseudo<IIC_VST4lnu>; } // mayStore = 1, neverHasSideEffects = 1, hasExtraSrcRegAllocReq = 1 +// Use vld1/vst1 for unaligned f64 load / store +def : Pat<(f64 (hword_alignedload addrmode6:$addr)), + (VLD1d16 addrmode6:$addr)>, Requires<[IsLE]>; +def : Pat<(hword_alignedstore (f64 DPR:$value), addrmode6:$addr), + (VST1d16 addrmode6:$addr, DPR:$value)>, Requires<[IsLE]>; +def : Pat<(f64 (byte_alignedload addrmode6:$addr)), + (VLD1d8 addrmode6:$addr)>, Requires<[IsLE]>; +def : Pat<(byte_alignedstore (f64 DPR:$value), addrmode6:$addr), + (VST1d8 addrmode6:$addr, DPR:$value)>, Requires<[IsLE]>; +def : Pat<(f64 (non_word_alignedload addrmode6:$addr)), + (VLD1d64 addrmode6:$addr)>, Requires<[IsBE]>; +def : Pat<(non_word_alignedstore (f64 DPR:$value), addrmode6:$addr), + (VST1d64 addrmode6:$addr, DPR:$value)>, Requires<[IsBE]>; //===----------------------------------------------------------------------===// // NEON pattern fragments diff --git a/lib/Target/ARM/ARMInstrThumb2.td b/lib/Target/ARM/ARMInstrThumb2.td index d83530a..8ecf009 100644 --- a/lib/Target/ARM/ARMInstrThumb2.td +++ b/lib/Target/ARM/ARMInstrThumb2.td @@ -172,6 +172,7 @@ def t2ldr_pcrel_imm12 : Operand<i32> { // ADR instruction labels. def t2adrlabel : Operand<i32> { let EncoderMethod = "getT2AdrLabelOpValue"; + let PrintMethod = "printAdrLabelOperand"; } @@ -529,7 +530,7 @@ class T2MulLong<bits<3> opc22_20, bits<4> opc7_4, /// changed to modify CPSR. multiclass T2I_bin_irs<bits<4> opcod, string opc, InstrItinClass iii, InstrItinClass iir, InstrItinClass iis, - PatFrag opnode, string baseOpc, bit Commutable = 0, + PatFrag opnode, bit Commutable = 0, string wide = ""> { // shifted imm def ri : T2sTwoRegImm< @@ -565,15 +566,15 @@ multiclass T2I_bin_irs<bits<4> opcod, string opc, // Assembly aliases for optional destination operand when it's the same // as the source operand. def : t2InstAlias<!strconcat(opc, "${s}${p} $Rdn, $imm"), - (!cast<Instruction>(!strconcat(baseOpc, "ri")) rGPR:$Rdn, rGPR:$Rdn, + (!cast<Instruction>(NAME#"ri") rGPR:$Rdn, rGPR:$Rdn, t2_so_imm:$imm, pred:$p, cc_out:$s)>; def : t2InstAlias<!strconcat(opc, "${s}${p}", wide, " $Rdn, $Rm"), - (!cast<Instruction>(!strconcat(baseOpc, "rr")) rGPR:$Rdn, rGPR:$Rdn, + (!cast<Instruction>(NAME#"rr") rGPR:$Rdn, rGPR:$Rdn, rGPR:$Rm, pred:$p, cc_out:$s)>; def : t2InstAlias<!strconcat(opc, "${s}${p}", wide, " $Rdn, $shift"), - (!cast<Instruction>(!strconcat(baseOpc, "rs")) rGPR:$Rdn, rGPR:$Rdn, + (!cast<Instruction>(NAME#"rs") rGPR:$Rdn, rGPR:$Rdn, t2_so_reg:$shift, pred:$p, cc_out:$s)>; } @@ -582,36 +583,30 @@ multiclass T2I_bin_irs<bits<4> opcod, string opc, // the ".w" suffix to indicate that they are wide. multiclass T2I_bin_w_irs<bits<4> opcod, string opc, InstrItinClass iii, InstrItinClass iir, InstrItinClass iis, - PatFrag opnode, string baseOpc, bit Commutable = 0> : - T2I_bin_irs<opcod, opc, iii, iir, iis, opnode, baseOpc, Commutable, ".w"> { + PatFrag opnode, bit Commutable = 0> : + T2I_bin_irs<opcod, opc, iii, iir, iis, opnode, Commutable, ".w"> { // Assembler aliases w/ the ".w" suffix. def : t2InstAlias<!strconcat(opc, "${s}${p}.w", " $Rd, $Rn, $imm"), - (!cast<Instruction>(!strconcat(baseOpc, "ri")) rGPR:$Rd, rGPR:$Rn, - t2_so_imm:$imm, pred:$p, - cc_out:$s)>; + (!cast<Instruction>(NAME#"ri") rGPR:$Rd, rGPR:$Rn, t2_so_imm:$imm, pred:$p, + cc_out:$s)>; // Assembler aliases w/o the ".w" suffix. def : t2InstAlias<!strconcat(opc, "${s}${p}", " $Rd, $Rn, $Rm"), - (!cast<Instruction>(!strconcat(baseOpc, "rr")) rGPR:$Rd, rGPR:$Rn, - rGPR:$Rm, pred:$p, - cc_out:$s)>; + (!cast<Instruction>(NAME#"rr") rGPR:$Rd, rGPR:$Rn, rGPR:$Rm, pred:$p, + cc_out:$s)>; def : t2InstAlias<!strconcat(opc, "${s}${p}", " $Rd, $Rn, $shift"), - (!cast<Instruction>(!strconcat(baseOpc, "rs")) rGPR:$Rd, rGPR:$Rn, - t2_so_reg:$shift, pred:$p, - cc_out:$s)>; + (!cast<Instruction>(NAME#"rs") rGPR:$Rd, rGPR:$Rn, t2_so_reg:$shift, + pred:$p, cc_out:$s)>; // and with the optional destination operand, too. def : t2InstAlias<!strconcat(opc, "${s}${p}.w", " $Rdn, $imm"), - (!cast<Instruction>(!strconcat(baseOpc, "ri")) rGPR:$Rdn, rGPR:$Rdn, - t2_so_imm:$imm, pred:$p, - cc_out:$s)>; + (!cast<Instruction>(NAME#"ri") rGPR:$Rdn, rGPR:$Rdn, t2_so_imm:$imm, + pred:$p, cc_out:$s)>; def : t2InstAlias<!strconcat(opc, "${s}${p}", " $Rdn, $Rm"), - (!cast<Instruction>(!strconcat(baseOpc, "rr")) rGPR:$Rdn, rGPR:$Rdn, - rGPR:$Rm, pred:$p, - cc_out:$s)>; + (!cast<Instruction>(NAME#"rr") rGPR:$Rdn, rGPR:$Rdn, rGPR:$Rm, pred:$p, + cc_out:$s)>; def : t2InstAlias<!strconcat(opc, "${s}${p}", " $Rdn, $shift"), - (!cast<Instruction>(!strconcat(baseOpc, "rs")) rGPR:$Rdn, rGPR:$Rdn, - t2_so_reg:$shift, pred:$p, - cc_out:$s)>; + (!cast<Instruction>(NAME#"rs") rGPR:$Rdn, rGPR:$Rdn, t2_so_reg:$shift, + pred:$p, cc_out:$s)>; } /// T2I_rbin_is - Same as T2I_bin_irs except the order of operands are @@ -762,6 +757,33 @@ multiclass T2I_bin_ii12rs<bits<3> op23_21, string opc, PatFrag opnode, let Inst{24} = 1; let Inst{23-21} = op23_21; } + + // Predicated versions. + def CCri : t2PseudoExpand<(outs GPRnopc:$Rd), + (ins GPRnopc:$Rfalse, GPRnopc:$Rn, t2_so_imm:$imm, + pred:$p, cc_out:$s), 4, IIC_iALUi, [], + (!cast<Instruction>(NAME#ri) GPRnopc:$Rd, + GPRnopc:$Rn, t2_so_imm:$imm, pred:$p, cc_out:$s)>, + RegConstraint<"$Rfalse = $Rd">; + def CCri12 : t2PseudoExpand<(outs GPRnopc:$Rd), + (ins GPRnopc:$Rfalse, GPR:$Rn, imm0_4095:$imm, + pred:$p), + 4, IIC_iALUi, [], + (!cast<Instruction>(NAME#ri12) GPRnopc:$Rd, + GPR:$Rn, imm0_4095:$imm, pred:$p)>, + RegConstraint<"$Rfalse = $Rd">; + def CCrr : t2PseudoExpand<(outs GPRnopc:$Rd), + (ins GPRnopc:$Rfalse, GPRnopc:$Rn, rGPR:$Rm, + pred:$p, cc_out:$s), 4, IIC_iALUr, [], + (!cast<Instruction>(NAME#rr) GPRnopc:$Rd, + GPRnopc:$Rn, rGPR:$Rm, pred:$p, cc_out:$s)>, + RegConstraint<"$Rfalse = $Rd">; + def CCrs : t2PseudoExpand<(outs GPRnopc:$Rd), + (ins GPRnopc:$Rfalse, GPRnopc:$Rn, t2_so_reg:$Rm, + pred:$p, cc_out:$s), 4, IIC_iALUsi, [], + (!cast<Instruction>(NAME#rs) GPRnopc:$Rd, + GPRnopc:$Rn, t2_so_reg:$Rm, pred:$p, cc_out:$s)>, + RegConstraint<"$Rfalse = $Rd">; } /// T2I_adde_sube_irs - Defines a set of (op reg, {so_imm|r|so_reg}) patterns @@ -808,8 +830,7 @@ multiclass T2I_adde_sube_irs<bits<4> opcod, string opc, PatFrag opnode, /// T2I_sh_ir - Defines a set of (op reg, {so_imm|r}) patterns for a shift / // rotate operation that produces a value. -multiclass T2I_sh_ir<bits<2> opcod, string opc, Operand ty, PatFrag opnode, - string baseOpc> { +multiclass T2I_sh_ir<bits<2> opcod, string opc, Operand ty, PatFrag opnode> { // 5-bit imm def ri : T2sTwoRegShiftImm< (outs rGPR:$Rd), (ins rGPR:$Rm, ty:$imm), IIC_iMOVsi, @@ -834,33 +855,27 @@ multiclass T2I_sh_ir<bits<2> opcod, string opc, Operand ty, PatFrag opnode, // Optional destination register def : t2InstAlias<!strconcat(opc, "${s}${p}", ".w $Rdn, $imm"), - (!cast<Instruction>(!strconcat(baseOpc, "ri")) rGPR:$Rdn, rGPR:$Rdn, - ty:$imm, pred:$p, - cc_out:$s)>; + (!cast<Instruction>(NAME#"ri") rGPR:$Rdn, rGPR:$Rdn, ty:$imm, pred:$p, + cc_out:$s)>; def : t2InstAlias<!strconcat(opc, "${s}${p}", ".w $Rdn, $Rm"), - (!cast<Instruction>(!strconcat(baseOpc, "rr")) rGPR:$Rdn, rGPR:$Rdn, - rGPR:$Rm, pred:$p, - cc_out:$s)>; + (!cast<Instruction>(NAME#"rr") rGPR:$Rdn, rGPR:$Rdn, rGPR:$Rm, pred:$p, + cc_out:$s)>; // Assembler aliases w/o the ".w" suffix. def : t2InstAlias<!strconcat(opc, "${s}${p}", " $Rd, $Rn, $imm"), - (!cast<Instruction>(!strconcat(baseOpc, "ri")) rGPR:$Rd, rGPR:$Rn, - ty:$imm, pred:$p, - cc_out:$s)>; + (!cast<Instruction>(NAME#"ri") rGPR:$Rd, rGPR:$Rn, ty:$imm, pred:$p, + cc_out:$s)>; def : t2InstAlias<!strconcat(opc, "${s}${p}", " $Rd, $Rn, $Rm"), - (!cast<Instruction>(!strconcat(baseOpc, "rr")) rGPR:$Rd, rGPR:$Rn, - rGPR:$Rm, pred:$p, - cc_out:$s)>; + (!cast<Instruction>(NAME#"rr") rGPR:$Rd, rGPR:$Rn, rGPR:$Rm, pred:$p, + cc_out:$s)>; // and with the optional destination operand, too. def : t2InstAlias<!strconcat(opc, "${s}${p}", " $Rdn, $imm"), - (!cast<Instruction>(!strconcat(baseOpc, "ri")) rGPR:$Rdn, rGPR:$Rdn, - ty:$imm, pred:$p, - cc_out:$s)>; + (!cast<Instruction>(NAME#"ri") rGPR:$Rdn, rGPR:$Rdn, ty:$imm, pred:$p, + cc_out:$s)>; def : t2InstAlias<!strconcat(opc, "${s}${p}", " $Rdn, $Rm"), - (!cast<Instruction>(!strconcat(baseOpc, "rr")) rGPR:$Rdn, rGPR:$Rdn, - rGPR:$Rm, pred:$p, - cc_out:$s)>; + (!cast<Instruction>(NAME#"rr") rGPR:$Rdn, rGPR:$Rdn, rGPR:$Rm, pred:$p, + cc_out:$s)>; } /// T2I_cmp_irs - Defines a set of (op r, {so_imm|r|so_reg}) cmp / test @@ -868,7 +883,7 @@ multiclass T2I_sh_ir<bits<2> opcod, string opc, Operand ty, PatFrag opnode, /// a explicit result, only implicitly set CPSR. multiclass T2I_cmp_irs<bits<4> opcod, string opc, InstrItinClass iii, InstrItinClass iir, InstrItinClass iis, - PatFrag opnode, string baseOpc> { + PatFrag opnode> { let isCompare = 1, Defs = [CPSR] in { // shifted imm def ri : T2OneRegCmpImm< @@ -913,12 +928,9 @@ let isCompare = 1, Defs = [CPSR] in { // No alias here for 'rr' version as not all instantiations of this // multiclass want one (CMP in particular, does not). def : t2InstAlias<!strconcat(opc, "${p}", " $Rn, $imm"), - (!cast<Instruction>(!strconcat(baseOpc, "ri")) GPRnopc:$Rn, - t2_so_imm:$imm, pred:$p)>; + (!cast<Instruction>(NAME#"ri") GPRnopc:$Rn, t2_so_imm:$imm, pred:$p)>; def : t2InstAlias<!strconcat(opc, "${p}", " $Rn, $shift"), - (!cast<Instruction>(!strconcat(baseOpc, "rs")) GPRnopc:$Rn, - t2_so_reg:$shift, - pred:$p)>; + (!cast<Instruction>(NAME#"rs") GPRnopc:$Rn, t2_so_reg:$shift, pred:$p)>; } /// T2I_ld - Defines a set of (op r, {imm12|imm8|so_reg}) load patterns. @@ -2152,13 +2164,13 @@ def : T2Pat<(int_arm_usat GPR:$a, imm:$pos), (t2USAT imm:$pos, GPR:$a, 0)>; // defm t2LSL : T2I_sh_ir<0b00, "lsl", imm0_31, - BinOpFrag<(shl node:$LHS, node:$RHS)>, "t2LSL">; + BinOpFrag<(shl node:$LHS, node:$RHS)>>; defm t2LSR : T2I_sh_ir<0b01, "lsr", imm_sr, - BinOpFrag<(srl node:$LHS, node:$RHS)>, "t2LSR">; + BinOpFrag<(srl node:$LHS, node:$RHS)>>; defm t2ASR : T2I_sh_ir<0b10, "asr", imm_sr, - BinOpFrag<(sra node:$LHS, node:$RHS)>, "t2ASR">; + BinOpFrag<(sra node:$LHS, node:$RHS)>>; defm t2ROR : T2I_sh_ir<0b11, "ror", imm0_31, - BinOpFrag<(rotr node:$LHS, node:$RHS)>, "t2ROR">; + BinOpFrag<(rotr node:$LHS, node:$RHS)>>; // (rotr x, (and y, 0x...1f)) ==> (ROR x, y) def : T2Pat<(rotr rGPR:$lhs, (and rGPR:$rhs, lo5AllOne)), @@ -2214,18 +2226,17 @@ def t2MOVsra_flag : T2TwoRegShiftImm< defm t2AND : T2I_bin_w_irs<0b0000, "and", IIC_iBITi, IIC_iBITr, IIC_iBITsi, - BinOpFrag<(and node:$LHS, node:$RHS)>, "t2AND", 1>; + BinOpFrag<(and node:$LHS, node:$RHS)>, 1>; defm t2ORR : T2I_bin_w_irs<0b0010, "orr", IIC_iBITi, IIC_iBITr, IIC_iBITsi, - BinOpFrag<(or node:$LHS, node:$RHS)>, "t2ORR", 1>; + BinOpFrag<(or node:$LHS, node:$RHS)>, 1>; defm t2EOR : T2I_bin_w_irs<0b0100, "eor", IIC_iBITi, IIC_iBITr, IIC_iBITsi, - BinOpFrag<(xor node:$LHS, node:$RHS)>, "t2EOR", 1>; + BinOpFrag<(xor node:$LHS, node:$RHS)>, 1>; defm t2BIC : T2I_bin_w_irs<0b0001, "bic", IIC_iBITi, IIC_iBITr, IIC_iBITsi, - BinOpFrag<(and node:$LHS, (not node:$RHS))>, - "t2BIC">; + BinOpFrag<(and node:$LHS, (not node:$RHS))>>; class T2BitFI<dag oops, dag iops, InstrItinClass itin, string opc, string asm, list<dag> pattern> @@ -2305,8 +2316,7 @@ let Constraints = "$src = $Rd" in { defm t2ORN : T2I_bin_irs<0b0011, "orn", IIC_iBITi, IIC_iBITr, IIC_iBITsi, - BinOpFrag<(or node:$LHS, (not node:$RHS))>, - "t2ORN", 0, "">; + BinOpFrag<(or node:$LHS, (not node:$RHS))>, 0, "">; /// T2I_un_irs - Defines a set of (op reg, {so_imm|r|so_reg}) patterns for a /// unary operation that produces a value. These are predicable and can be @@ -2878,7 +2888,7 @@ def : T2Pat<(or (and rGPR:$src1, 0xFFFF0000), // defm t2CMP : T2I_cmp_irs<0b1101, "cmp", IIC_iCMPi, IIC_iCMPr, IIC_iCMPsi, - BinOpFrag<(ARMcmp node:$LHS, node:$RHS)>, "t2CMP">; + BinOpFrag<(ARMcmp node:$LHS, node:$RHS)>>; def : T2Pat<(ARMcmpZ GPRnopc:$lhs, t2_so_imm:$imm), (t2CMPri GPRnopc:$lhs, t2_so_imm:$imm)>; @@ -2932,13 +2942,10 @@ let isCompare = 1, Defs = [CPSR] in { // Assembler aliases w/o the ".w" suffix. // No alias here for 'rr' version as not all instantiations of this multiclass // want one (CMP in particular, does not). -def : t2InstAlias<!strconcat("cmn", "${p}", " $Rn, $imm"), - (!cast<Instruction>(!strconcat("t2CMN", "ri")) GPRnopc:$Rn, - t2_so_imm:$imm, pred:$p)>; -def : t2InstAlias<!strconcat("cmn", "${p}", " $Rn, $shift"), - (!cast<Instruction>(!strconcat("t2CMNz", "rs")) GPRnopc:$Rn, - t2_so_reg:$shift, - pred:$p)>; +def : t2InstAlias<"cmn${p} $Rn, $imm", + (t2CMNri GPRnopc:$Rn, t2_so_imm:$imm, pred:$p)>; +def : t2InstAlias<"cmn${p} $Rn, $shift", + (t2CMNzrs GPRnopc:$Rn, t2_so_reg:$shift, pred:$p)>; def : T2Pat<(ARMcmp GPR:$src, t2_so_imm_neg:$imm), (t2CMNri GPR:$src, t2_so_imm_neg:$imm)>; @@ -2948,19 +2955,17 @@ def : T2Pat<(ARMcmpZ GPRnopc:$src, t2_so_imm_neg:$imm), defm t2TST : T2I_cmp_irs<0b0000, "tst", IIC_iTSTi, IIC_iTSTr, IIC_iTSTsi, - BinOpFrag<(ARMcmpZ (and_su node:$LHS, node:$RHS), 0)>, - "t2TST">; + BinOpFrag<(ARMcmpZ (and_su node:$LHS, node:$RHS), 0)>>; defm t2TEQ : T2I_cmp_irs<0b0100, "teq", IIC_iTSTi, IIC_iTSTr, IIC_iTSTsi, - BinOpFrag<(ARMcmpZ (xor_su node:$LHS, node:$RHS), 0)>, - "t2TEQ">; + BinOpFrag<(ARMcmpZ (xor_su node:$LHS, node:$RHS), 0)>>; // Conditional moves // FIXME: should be able to write a pattern for ARMcmov, but can't use // a two-value operand where a dag node expects two operands. :( let neverHasSideEffects = 1 in { -let isCommutable = 1 in +let isCommutable = 1, isSelect = 1 in def t2MOVCCr : t2PseudoInst<(outs rGPR:$Rd), (ins rGPR:$false, rGPR:$Rm, pred:$p), 4, IIC_iCMOVr, @@ -3048,22 +3053,25 @@ multiclass T2I_bincc_irs<Instruction iri, Instruction irr, Instruction irs, InstrItinClass iii, InstrItinClass iir, InstrItinClass iis> { // shifted imm def ri : t2PseudoExpand<(outs rGPR:$Rd), - (ins rGPR:$Rn, t2_so_imm:$imm, pred:$p, cc_out:$s), + (ins rGPR:$Rfalse, rGPR:$Rn, t2_so_imm:$imm, + pred:$p, cc_out:$s), 4, iii, [], (iri rGPR:$Rd, rGPR:$Rn, t2_so_imm:$imm, pred:$p, cc_out:$s)>, - RegConstraint<"$Rn = $Rd">; + RegConstraint<"$Rfalse = $Rd">; // register def rr : t2PseudoExpand<(outs rGPR:$Rd), - (ins rGPR:$Rn, rGPR:$Rm, pred:$p, cc_out:$s), + (ins rGPR:$Rfalse, rGPR:$Rn, rGPR:$Rm, + pred:$p, cc_out:$s), 4, iir, [], (irr rGPR:$Rd, rGPR:$Rn, rGPR:$Rm, pred:$p, cc_out:$s)>, - RegConstraint<"$Rn = $Rd">; + RegConstraint<"$Rfalse = $Rd">; // shifted register def rs : t2PseudoExpand<(outs rGPR:$Rd), - (ins rGPR:$Rn, t2_so_reg:$ShiftedRm, pred:$p, cc_out:$s), + (ins rGPR:$Rfalse, rGPR:$Rn, t2_so_reg:$ShiftedRm, + pred:$p, cc_out:$s), 4, iis, [], (irs rGPR:$Rd, rGPR:$Rn, t2_so_reg:$ShiftedRm, pred:$p, cc_out:$s)>, - RegConstraint<"$Rn = $Rd">; + RegConstraint<"$Rfalse = $Rd">; } // T2I_bincc_irs defm t2ANDCC : T2I_bincc_irs<t2ANDri, t2ANDrr, t2ANDrs, diff --git a/lib/Target/ARM/ARMInstrVFP.td b/lib/Target/ARM/ARMInstrVFP.td index 23c132e..7d6692f 100644 --- a/lib/Target/ARM/ARMInstrVFP.td +++ b/lib/Target/ARM/ARMInstrVFP.td @@ -61,6 +61,15 @@ def vfp_f64imm : Operand<f64>, let ParserMatchClass = FPImmOperand; } +def alignedload32 : PatFrag<(ops node:$ptr), (load node:$ptr), [{ + return cast<LoadSDNode>(N)->getAlignment() >= 4; +}]>; + +def alignedstore32 : PatFrag<(ops node:$val, node:$ptr), + (store node:$val, node:$ptr), [{ + return cast<StoreSDNode>(N)->getAlignment() >= 4; +}]>; + // The VCVT to/from fixed-point instructions encode the 'fbits' operand // (the number of fixed bits) differently than it appears in the assembly // source. It's encoded as "Size - fbits" where Size is the size of the @@ -86,7 +95,7 @@ let canFoldAsLoad = 1, isReMaterializable = 1 in { def VLDRD : ADI5<0b1101, 0b01, (outs DPR:$Dd), (ins addrmode5:$addr), IIC_fpLoad64, "vldr", "\t$Dd, $addr", - [(set DPR:$Dd, (f64 (load addrmode5:$addr)))]>; + [(set DPR:$Dd, (f64 (alignedload32 addrmode5:$addr)))]>; def VLDRS : ASI5<0b1101, 0b01, (outs SPR:$Sd), (ins addrmode5:$addr), IIC_fpLoad32, "vldr", "\t$Sd, $addr", @@ -100,7 +109,7 @@ def VLDRS : ASI5<0b1101, 0b01, (outs SPR:$Sd), (ins addrmode5:$addr), def VSTRD : ADI5<0b1101, 0b00, (outs), (ins DPR:$Dd, addrmode5:$addr), IIC_fpStore64, "vstr", "\t$Dd, $addr", - [(store (f64 DPR:$Dd), addrmode5:$addr)]>; + [(alignedstore32 (f64 DPR:$Dd), addrmode5:$addr)]>; def VSTRS : ASI5<0b1101, 0b00, (outs), (ins SPR:$Sd, addrmode5:$addr), IIC_fpStore32, "vstr", "\t$Sd, $addr", @@ -433,25 +442,25 @@ def VCVTSD : VFPAI<(outs SPR:$Sd), (ins DPR:$Dm), VFPUnaryFrm, // Between half-precision and single-precision. For disassembly only. // FIXME: Verify encoding after integrated assembler is working. -def VCVTBSH: ASuI<0b11101, 0b11, 0b0010, 0b01, 0, (outs SPR:$Sd), (ins SPR:$Sm), +def VCVTBHS: ASuI<0b11101, 0b11, 0b0010, 0b01, 0, (outs SPR:$Sd), (ins SPR:$Sm), /* FIXME */ IIC_fpCVTSH, "vcvtb", ".f32.f16\t$Sd, $Sm", [/* For disassembly only; pattern left blank */]>; -def : ARMPat<(f32_to_f16 SPR:$a), - (i32 (COPY_TO_REGCLASS (VCVTBSH SPR:$a), GPR))>; - -def VCVTBHS: ASuI<0b11101, 0b11, 0b0011, 0b01, 0, (outs SPR:$Sd), (ins SPR:$Sm), +def VCVTBSH: ASuI<0b11101, 0b11, 0b0011, 0b01, 0, (outs SPR:$Sd), (ins SPR:$Sm), /* FIXME */ IIC_fpCVTHS, "vcvtb", ".f16.f32\t$Sd, $Sm", [/* For disassembly only; pattern left blank */]>; -def : ARMPat<(f16_to_f32 GPR:$a), - (VCVTBHS (COPY_TO_REGCLASS GPR:$a, SPR))>; +def : Pat<(f32_to_f16 SPR:$a), + (i32 (COPY_TO_REGCLASS (VCVTBSH SPR:$a), GPR))>; + +def : Pat<(f16_to_f32 GPR:$a), + (VCVTBHS (COPY_TO_REGCLASS GPR:$a, SPR))>; -def VCVTTSH: ASuI<0b11101, 0b11, 0b0010, 0b11, 0, (outs SPR:$Sd), (ins SPR:$Sm), +def VCVTTHS: ASuI<0b11101, 0b11, 0b0010, 0b11, 0, (outs SPR:$Sd), (ins SPR:$Sm), /* FIXME */ IIC_fpCVTSH, "vcvtt", ".f32.f16\t$Sd, $Sm", [/* For disassembly only; pattern left blank */]>; -def VCVTTHS: ASuI<0b11101, 0b11, 0b0011, 0b11, 0, (outs SPR:$Sd), (ins SPR:$Sm), +def VCVTTSH: ASuI<0b11101, 0b11, 0b0011, 0b11, 0, (outs SPR:$Sd), (ins SPR:$Sm), /* FIXME */ IIC_fpCVTHS, "vcvtt", ".f16.f32\t$Sd, $Sm", [/* For disassembly only; pattern left blank */]>; diff --git a/lib/Target/ARM/ARMJITInfo.cpp b/lib/Target/ARM/ARMJITInfo.cpp index c5db211..357fc3f 100644 --- a/lib/Target/ARM/ARMJITInfo.cpp +++ b/lib/Target/ARM/ARMJITInfo.cpp @@ -291,9 +291,9 @@ void ARMJITInfo::relocate(void *Function, MachineRelocation *MR, if (MR->getRelocationType() == ARM::reloc_arm_vfp_cp_entry) ResultPtr = ResultPtr >> 2; *((intptr_t*)RelocPos) |= ResultPtr; - // Set register Rn to PC. - *((intptr_t*)RelocPos) |= - getARMRegisterNumbering(ARM::PC) << ARMII::RegRnShift; + // Set register Rn to PC (which is register 15 on all architectures). + // FIXME: This avoids the need for register info in the JIT class. + *((intptr_t*)RelocPos) |= 15 << ARMII::RegRnShift; break; } case ARM::reloc_arm_so_imm_cp_entry: { diff --git a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp index cb1b2a2..897ceb6 100644 --- a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp +++ b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp @@ -456,8 +456,7 @@ ARMLoadStoreOpt::MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex, DebugLoc dl = Loc->getDebugLoc(); const MachineOperand &PMO = Loc->getOperand(0); unsigned PReg = PMO.getReg(); - unsigned PRegNum = PMO.isUndef() ? UINT_MAX - : getARMRegisterNumbering(PReg); + unsigned PRegNum = PMO.isUndef() ? UINT_MAX : TRI->getEncodingValue(PReg); unsigned Count = 1; unsigned Limit = ~0U; @@ -483,8 +482,7 @@ ARMLoadStoreOpt::MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex, int NewOffset = MemOps[i].Offset; const MachineOperand &MO = MemOps[i].MBBI->getOperand(0); unsigned Reg = MO.getReg(); - unsigned RegNum = MO.isUndef() ? UINT_MAX - : getARMRegisterNumbering(Reg); + unsigned RegNum = MO.isUndef() ? UINT_MAX : TRI->getEncodingValue(Reg); // Register numbers must be in ascending order. For VFP / NEON load and // store multiples, the registers must also be consecutive and within the // limit on the number of registers per instruction. diff --git a/lib/Target/ARM/ARMRegisterInfo.td b/lib/Target/ARM/ARMRegisterInfo.td index 3857647..6f974fd 100644 --- a/lib/Target/ARM/ARMRegisterInfo.td +++ b/lib/Target/ARM/ARMRegisterInfo.td @@ -12,16 +12,16 @@ //===----------------------------------------------------------------------===// // Registers are identified with 4-bit ID numbers. -class ARMReg<bits<4> num, string n, list<Register> subregs = []> : Register<n> { - field bits<4> Num; +class ARMReg<bits<16> Enc, string n, list<Register> subregs = []> : Register<n> { + let HWEncoding = Enc; let Namespace = "ARM"; let SubRegs = subregs; // All bits of ARM registers with sub-registers are covered by sub-registers. let CoveredBySubRegs = 1; } -class ARMFReg<bits<6> num, string n> : Register<n> { - field bits<6> Num; +class ARMFReg<bits<16> Enc, string n> : Register<n> { + let HWEncoding = Enc; let Namespace = "ARM"; } diff --git a/lib/Target/ARM/ARMScheduleA8.td b/lib/Target/ARM/ARMScheduleA8.td index 56197d4..2c63825 100644 --- a/lib/Target/ARM/ARMScheduleA8.td +++ b/lib/Target/ARM/ARMScheduleA8.td @@ -1069,6 +1069,7 @@ def CortexA8Model : SchedMachineModel { let LoadLatency = 2; // Optimistic load latency assuming bypass. // This is overriden by OperandCycles if the // Itineraries are queried instead. + let MispredictPenalty = 13; // Based on estimate of pipeline depth. let Itineraries = CortexA8Itineraries; } diff --git a/lib/Target/ARM/ARMScheduleA9.td b/lib/Target/ARM/ARMScheduleA9.td index 738974e..7bc590f 100644 --- a/lib/Target/ARM/ARMScheduleA9.td +++ b/lib/Target/ARM/ARMScheduleA9.td @@ -1886,6 +1886,7 @@ def CortexA9Model : SchedMachineModel { let LoadLatency = 2; // Optimistic load latency assuming bypass. // This is overriden by OperandCycles if the // Itineraries are queried instead. + let MispredictPenalty = 8; // Based on estimate of pipeline depth. let Itineraries = CortexA9Itineraries; } diff --git a/lib/Target/ARM/ARMSubtarget.cpp b/lib/Target/ARM/ARMSubtarget.cpp index e067a9f..89e29ad 100644 --- a/lib/Target/ARM/ARMSubtarget.cpp +++ b/lib/Target/ARM/ARMSubtarget.cpp @@ -97,6 +97,9 @@ ARMSubtarget::ARMSubtarget(const std::string &TT, const std::string &CPU, if (!HasV6T2Ops && hasThumb2()) HasV4TOps = HasV5TOps = HasV5TEOps = HasV6Ops = HasV6T2Ops = true; + // Keep a pointer to static instruction cost data for the specified CPU. + SchedModel = getSchedModelForCPU(CPUString); + // Initialize scheduling itinerary for the specified CPU. InstrItins = getInstrItineraryForCPU(CPUString); @@ -179,15 +182,7 @@ ARMSubtarget::GVIsIndirectSymbol(const GlobalValue *GV, } unsigned ARMSubtarget::getMispredictionPenalty() const { - // If we have a reasonable estimate of the pipeline depth, then we can - // estimate the penalty of a misprediction based on that. - if (isCortexA8()) - return 13; - else if (isCortexA9()) - return 8; - - // Otherwise, just return a sensible default. - return 10; + return SchedModel->MispredictPenalty; } bool ARMSubtarget::enablePostRAScheduler( diff --git a/lib/Target/ARM/ARMSubtarget.h b/lib/Target/ARM/ARMSubtarget.h index e72b06f..b394061 100644 --- a/lib/Target/ARM/ARMSubtarget.h +++ b/lib/Target/ARM/ARMSubtarget.h @@ -74,7 +74,7 @@ protected: /// HasThumb2 - True if Thumb2 instructions are supported. bool HasThumb2; - /// IsMClass - True if the subtarget belongs to the 'M' profile of CPUs - + /// IsMClass - True if the subtarget belongs to the 'M' profile of CPUs - /// v6m, v7m for example. bool IsMClass; @@ -155,6 +155,9 @@ protected: /// TargetTriple - What processor and OS we're targeting. Triple TargetTriple; + /// SchedModel - Processor specific instruction costs. + const MCSchedModel *SchedModel; + /// Selected instruction itineraries (one entry per itinerary class.) InstrItineraryData InstrItins; diff --git a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp index 4497720..3a5957b 100644 --- a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp +++ b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp @@ -796,6 +796,13 @@ public: int64_t Value = CE->getValue(); return Value > 0 && Value <= 32; } + bool isAdrLabel() const { + // If we have an immediate that's not a constant, treat it as a label + // reference needing a fixup. If it is a constant, but it can't fit + // into shift immediate encoding, we reject it. + if (isImm() && !isa<MCConstantExpr>(getImm())) return true; + else return (isARMSOImm() || isARMSOImmNeg()); + } bool isARMSOImm() const { if (!isImm()) return false; const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); @@ -1033,7 +1040,8 @@ public: // Immediate offset a multiple of 4 in range [-1020, 1020]. if (!Memory.OffsetImm) return true; int64_t Val = Memory.OffsetImm->getValue(); - return Val >= -1020 && Val <= 1020 && (Val & 3) == 0; + // Special case, #-0 is INT32_MIN. + return (Val >= -1020 && Val <= 1020 && (Val & 3) == 0) || Val == INT32_MIN; } bool isMemImm0_1020s4Offset() const { if (!isMemory() || Memory.OffsetRegNum != 0 || Memory.Alignment != 0) @@ -1644,6 +1652,22 @@ public: Inst.addOperand(MCOperand::CreateImm(Imm)); } + void addAdrLabelOperands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + assert(isImm() && "Not an immediate!"); + + // If we have an immediate that's not a constant, treat it as a label + // reference needing a fixup. + if (!isa<MCConstantExpr>(getImm())) { + Inst.addOperand(MCOperand::CreateExpr(getImm())); + return; + } + + const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); + int Val = CE->getValue(); + Inst.addOperand(MCOperand::CreateImm(Val)); + } + void addAlignedMemoryOperands(MCInst &Inst, unsigned N) const { assert(N == 2 && "Invalid number of operands!"); Inst.addOperand(MCOperand::CreateReg(Memory.BaseRegNum)); @@ -2884,7 +2908,7 @@ parseRegisterList(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { if (!RC->contains(EndReg)) return Error(EndLoc, "invalid register in register list"); // Ranges must go from low to high. - if (getARMRegisterNumbering(Reg) > getARMRegisterNumbering(EndReg)) + if (MRI->getEncodingValue(Reg) > MRI->getEncodingValue(EndReg)) return Error(EndLoc, "bad range in register list"); // Add all the registers in the range to the register list. @@ -2911,13 +2935,13 @@ parseRegisterList(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { if (!RC->contains(Reg)) return Error(RegLoc, "invalid register in register list"); // List must be monotonically increasing. - if (getARMRegisterNumbering(Reg) < getARMRegisterNumbering(OldReg)) { + if (MRI->getEncodingValue(Reg) < MRI->getEncodingValue(OldReg)) { if (ARMMCRegisterClasses[ARM::GPRRegClassID].contains(Reg)) Warning(RegLoc, "register list not in ascending order"); else return Error(RegLoc, "register list not in ascending order"); } - if (getARMRegisterNumbering(Reg) == getARMRegisterNumbering(OldReg)) { + if (MRI->getEncodingValue(Reg) == MRI->getEncodingValue(OldReg)) { Warning(RegLoc, "duplicated register (" + RegTok.getString() + ") in register list"); continue; @@ -3256,29 +3280,59 @@ ARMAsmParser::OperandMatchResultTy ARMAsmParser:: parseMemBarrierOptOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { SMLoc S = Parser.getTok().getLoc(); const AsmToken &Tok = Parser.getTok(); - if (!Tok.is(AsmToken::Identifier)) - return MatchOperand_NoMatch; - StringRef OptStr = Tok.getString(); - - unsigned Opt = StringSwitch<unsigned>(OptStr.slice(0, OptStr.size()).lower()) - .Case("sy", ARM_MB::SY) - .Case("st", ARM_MB::ST) - .Case("sh", ARM_MB::ISH) - .Case("ish", ARM_MB::ISH) - .Case("shst", ARM_MB::ISHST) - .Case("ishst", ARM_MB::ISHST) - .Case("nsh", ARM_MB::NSH) - .Case("un", ARM_MB::NSH) - .Case("nshst", ARM_MB::NSHST) - .Case("unst", ARM_MB::NSHST) - .Case("osh", ARM_MB::OSH) - .Case("oshst", ARM_MB::OSHST) - .Default(~0U); + unsigned Opt; + + if (Tok.is(AsmToken::Identifier)) { + StringRef OptStr = Tok.getString(); + + Opt = StringSwitch<unsigned>(OptStr.slice(0, OptStr.size()).lower()) + .Case("sy", ARM_MB::SY) + .Case("st", ARM_MB::ST) + .Case("sh", ARM_MB::ISH) + .Case("ish", ARM_MB::ISH) + .Case("shst", ARM_MB::ISHST) + .Case("ishst", ARM_MB::ISHST) + .Case("nsh", ARM_MB::NSH) + .Case("un", ARM_MB::NSH) + .Case("nshst", ARM_MB::NSHST) + .Case("unst", ARM_MB::NSHST) + .Case("osh", ARM_MB::OSH) + .Case("oshst", ARM_MB::OSHST) + .Default(~0U); - if (Opt == ~0U) - return MatchOperand_NoMatch; + if (Opt == ~0U) + return MatchOperand_NoMatch; + + Parser.Lex(); // Eat identifier token. + } else if (Tok.is(AsmToken::Hash) || + Tok.is(AsmToken::Dollar) || + Tok.is(AsmToken::Integer)) { + if (Parser.getTok().isNot(AsmToken::Integer)) + Parser.Lex(); // Eat the '#'. + SMLoc Loc = Parser.getTok().getLoc(); + + const MCExpr *MemBarrierID; + if (getParser().ParseExpression(MemBarrierID)) { + Error(Loc, "illegal expression"); + return MatchOperand_ParseFail; + } + + const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(MemBarrierID); + if (!CE) { + Error(Loc, "constant expression expected"); + return MatchOperand_ParseFail; + } + + int Val = CE->getValue(); + if (Val & ~0xf) { + Error(Loc, "immediate value out of range"); + return MatchOperand_ParseFail; + } + + Opt = ARM_MB::RESERVED_0 + Val; + } else + return MatchOperand_ParseFail; - Parser.Lex(); // Eat identifier token. Operands.push_back(ARMOperand::CreateMemBarrierOpt((ARM_MB::MemBOpt)Opt, S)); return MatchOperand_Success; } @@ -5250,8 +5304,8 @@ validateInstruction(MCInst &Inst, case ARM::LDRD_POST: case ARM::LDREXD: { // Rt2 must be Rt + 1. - unsigned Rt = getARMRegisterNumbering(Inst.getOperand(0).getReg()); - unsigned Rt2 = getARMRegisterNumbering(Inst.getOperand(1).getReg()); + unsigned Rt = MRI->getEncodingValue(Inst.getOperand(0).getReg()); + unsigned Rt2 = MRI->getEncodingValue(Inst.getOperand(1).getReg()); if (Rt2 != Rt + 1) return Error(Operands[3]->getStartLoc(), "destination operands must be sequential"); @@ -5259,8 +5313,8 @@ validateInstruction(MCInst &Inst, } case ARM::STRD: { // Rt2 must be Rt + 1. - unsigned Rt = getARMRegisterNumbering(Inst.getOperand(0).getReg()); - unsigned Rt2 = getARMRegisterNumbering(Inst.getOperand(1).getReg()); + unsigned Rt = MRI->getEncodingValue(Inst.getOperand(0).getReg()); + unsigned Rt2 = MRI->getEncodingValue(Inst.getOperand(1).getReg()); if (Rt2 != Rt + 1) return Error(Operands[3]->getStartLoc(), "source operands must be sequential"); @@ -5270,8 +5324,8 @@ validateInstruction(MCInst &Inst, case ARM::STRD_POST: case ARM::STREXD: { // Rt2 must be Rt + 1. - unsigned Rt = getARMRegisterNumbering(Inst.getOperand(1).getReg()); - unsigned Rt2 = getARMRegisterNumbering(Inst.getOperand(2).getReg()); + unsigned Rt = MRI->getEncodingValue(Inst.getOperand(1).getReg()); + unsigned Rt2 = MRI->getEncodingValue(Inst.getOperand(2).getReg()); if (Rt2 != Rt + 1) return Error(Operands[3]->getStartLoc(), "source operands must be sequential"); diff --git a/lib/Target/ARM/Disassembler/ARMDisassembler.cpp b/lib/Target/ARM/Disassembler/ARMDisassembler.cpp index 47cca2a..c90751d 100644 --- a/lib/Target/ARM/Disassembler/ARMDisassembler.cpp +++ b/lib/Target/ARM/Disassembler/ARMDisassembler.cpp @@ -18,10 +18,12 @@ #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCDisassembler.h" +#include "llvm/MC/MCFixedLenDisassembler.h" #include "llvm/MC/MCSubtargetInfo.h" #include "llvm/Support/Debug.h" #include "llvm/Support/MemoryObject.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/LEB128.h" #include "llvm/Support/TargetRegistry.h" #include "llvm/Support/raw_ostream.h" #include <vector> @@ -383,7 +385,6 @@ static DecodeStatus DecodeLDR(MCInst &Inst, unsigned Val, static DecodeStatus DecodeMRRC2(llvm::MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder); #include "ARMGenDisassemblerTables.inc" -#include "ARMGenInstrInfo.inc" #include "ARMGenEDInfo.inc" static MCDisassembler *createARMDisassembler(const Target &T, const MCSubtargetInfo &STI) { @@ -427,7 +428,8 @@ DecodeStatus ARMDisassembler::getInstruction(MCInst &MI, uint64_t &Size, (bytes[0] << 0); // Calling the auto-generated decoder function. - DecodeStatus result = decodeARMInstruction32(MI, insn, Address, this, STI); + DecodeStatus result = decodeInstruction(DecoderTableARM32, MI, insn, + Address, this, STI); if (result != MCDisassembler::Fail) { Size = 4; return result; @@ -436,14 +438,15 @@ DecodeStatus ARMDisassembler::getInstruction(MCInst &MI, uint64_t &Size, // VFP and NEON instructions, similarly, are shared between ARM // and Thumb modes. MI.clear(); - result = decodeVFPInstruction32(MI, insn, Address, this, STI); + result = decodeInstruction(DecoderTableVFP32, MI, insn, Address, this, STI); if (result != MCDisassembler::Fail) { Size = 4; return result; } MI.clear(); - result = decodeNEONDataInstruction32(MI, insn, Address, this, STI); + result = decodeInstruction(DecoderTableNEONData32, MI, insn, Address, + this, STI); if (result != MCDisassembler::Fail) { Size = 4; // Add a fake predicate operand, because we share these instruction @@ -454,7 +457,8 @@ DecodeStatus ARMDisassembler::getInstruction(MCInst &MI, uint64_t &Size, } MI.clear(); - result = decodeNEONLoadStoreInstruction32(MI, insn, Address, this, STI); + result = decodeInstruction(DecoderTableNEONLoadStore32, MI, insn, Address, + this, STI); if (result != MCDisassembler::Fail) { Size = 4; // Add a fake predicate operand, because we share these instruction @@ -465,7 +469,8 @@ DecodeStatus ARMDisassembler::getInstruction(MCInst &MI, uint64_t &Size, } MI.clear(); - result = decodeNEONDupInstruction32(MI, insn, Address, this, STI); + result = decodeInstruction(DecoderTableNEONDup32, MI, insn, Address, + this, STI); if (result != MCDisassembler::Fail) { Size = 4; // Add a fake predicate operand, because we share these instruction @@ -765,7 +770,8 @@ DecodeStatus ThumbDisassembler::getInstruction(MCInst &MI, uint64_t &Size, } uint16_t insn16 = (bytes[1] << 8) | bytes[0]; - DecodeStatus result = decodeThumbInstruction16(MI, insn16, Address, this, STI); + DecodeStatus result = decodeInstruction(DecoderTableThumb16, MI, insn16, + Address, this, STI); if (result != MCDisassembler::Fail) { Size = 2; Check(result, AddThumbPredicate(MI)); @@ -773,7 +779,8 @@ DecodeStatus ThumbDisassembler::getInstruction(MCInst &MI, uint64_t &Size, } MI.clear(); - result = decodeThumbSBitInstruction16(MI, insn16, Address, this, STI); + result = decodeInstruction(DecoderTableThumbSBit16, MI, insn16, + Address, this, STI); if (result) { Size = 2; bool InITBlock = ITBlock.instrInITBlock(); @@ -783,7 +790,8 @@ DecodeStatus ThumbDisassembler::getInstruction(MCInst &MI, uint64_t &Size, } MI.clear(); - result = decodeThumb2Instruction16(MI, insn16, Address, this, STI); + result = decodeInstruction(DecoderTableThumb216, MI, insn16, + Address, this, STI); if (result != MCDisassembler::Fail) { Size = 2; @@ -818,7 +826,8 @@ DecodeStatus ThumbDisassembler::getInstruction(MCInst &MI, uint64_t &Size, (bytes[1] << 24) | (bytes[0] << 16); MI.clear(); - result = decodeThumbInstruction32(MI, insn32, Address, this, STI); + result = decodeInstruction(DecoderTableThumb32, MI, insn32, Address, + this, STI); if (result != MCDisassembler::Fail) { Size = 4; bool InITBlock = ITBlock.instrInITBlock(); @@ -828,7 +837,8 @@ DecodeStatus ThumbDisassembler::getInstruction(MCInst &MI, uint64_t &Size, } MI.clear(); - result = decodeThumb2Instruction32(MI, insn32, Address, this, STI); + result = decodeInstruction(DecoderTableThumb232, MI, insn32, Address, + this, STI); if (result != MCDisassembler::Fail) { Size = 4; Check(result, AddThumbPredicate(MI)); @@ -836,7 +846,7 @@ DecodeStatus ThumbDisassembler::getInstruction(MCInst &MI, uint64_t &Size, } MI.clear(); - result = decodeVFPInstruction32(MI, insn32, Address, this, STI); + result = decodeInstruction(DecoderTableVFP32, MI, insn32, Address, this, STI); if (result != MCDisassembler::Fail) { Size = 4; UpdateThumbVFPPredicate(MI); @@ -844,19 +854,21 @@ DecodeStatus ThumbDisassembler::getInstruction(MCInst &MI, uint64_t &Size, } MI.clear(); - result = decodeNEONDupInstruction32(MI, insn32, Address, this, STI); + result = decodeInstruction(DecoderTableNEONDup32, MI, insn32, Address, + this, STI); if (result != MCDisassembler::Fail) { Size = 4; Check(result, AddThumbPredicate(MI)); return result; } - if (fieldFromInstruction32(insn32, 24, 8) == 0xF9) { + if (fieldFromInstruction(insn32, 24, 8) == 0xF9) { MI.clear(); uint32_t NEONLdStInsn = insn32; NEONLdStInsn &= 0xF0FFFFFF; NEONLdStInsn |= 0x04000000; - result = decodeNEONLoadStoreInstruction32(MI, NEONLdStInsn, Address, this, STI); + result = decodeInstruction(DecoderTableNEONLoadStore32, MI, NEONLdStInsn, + Address, this, STI); if (result != MCDisassembler::Fail) { Size = 4; Check(result, AddThumbPredicate(MI)); @@ -864,13 +876,14 @@ DecodeStatus ThumbDisassembler::getInstruction(MCInst &MI, uint64_t &Size, } } - if (fieldFromInstruction32(insn32, 24, 4) == 0xF) { + if (fieldFromInstruction(insn32, 24, 4) == 0xF) { MI.clear(); uint32_t NEONDataInsn = insn32; NEONDataInsn &= 0xF0FFFFFF; // Clear bits 27-24 NEONDataInsn |= (NEONDataInsn & 0x10000000) >> 4; // Move bit 28 to bit 24 NEONDataInsn |= 0x12000000; // Set bits 28 and 25 - result = decodeNEONDataInstruction32(MI, NEONDataInsn, Address, this, STI); + result = decodeInstruction(DecoderTableNEONData32, MI, NEONDataInsn, + Address, this, STI); if (result != MCDisassembler::Fail) { Size = 4; Check(result, AddThumbPredicate(MI)); @@ -1117,9 +1130,9 @@ static DecodeStatus DecodeSORegImmOperand(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder) { DecodeStatus S = MCDisassembler::Success; - unsigned Rm = fieldFromInstruction32(Val, 0, 4); - unsigned type = fieldFromInstruction32(Val, 5, 2); - unsigned imm = fieldFromInstruction32(Val, 7, 5); + unsigned Rm = fieldFromInstruction(Val, 0, 4); + unsigned type = fieldFromInstruction(Val, 5, 2); + unsigned imm = fieldFromInstruction(Val, 7, 5); // Register-immediate if (!Check(S, DecodeGPRRegisterClass(Inst, Rm, Address, Decoder))) @@ -1154,9 +1167,9 @@ static DecodeStatus DecodeSORegRegOperand(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder) { DecodeStatus S = MCDisassembler::Success; - unsigned Rm = fieldFromInstruction32(Val, 0, 4); - unsigned type = fieldFromInstruction32(Val, 5, 2); - unsigned Rs = fieldFromInstruction32(Val, 8, 4); + unsigned Rm = fieldFromInstruction(Val, 0, 4); + unsigned type = fieldFromInstruction(Val, 5, 2); + unsigned Rs = fieldFromInstruction(Val, 8, 4); // Register-register if (!Check(S, DecodeGPRnopcRegisterClass(Inst, Rm, Address, Decoder))) @@ -1224,8 +1237,8 @@ static DecodeStatus DecodeSPRRegListOperand(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder) { DecodeStatus S = MCDisassembler::Success; - unsigned Vd = fieldFromInstruction32(Val, 8, 5); - unsigned regs = fieldFromInstruction32(Val, 0, 8); + unsigned Vd = fieldFromInstruction(Val, 8, 5); + unsigned regs = fieldFromInstruction(Val, 0, 8); if (!Check(S, DecodeSPRRegisterClass(Inst, Vd, Address, Decoder))) return MCDisassembler::Fail; @@ -1241,8 +1254,8 @@ static DecodeStatus DecodeDPRRegListOperand(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder) { DecodeStatus S = MCDisassembler::Success; - unsigned Vd = fieldFromInstruction32(Val, 8, 5); - unsigned regs = fieldFromInstruction32(Val, 0, 8); + unsigned Vd = fieldFromInstruction(Val, 8, 5); + unsigned regs = fieldFromInstruction(Val, 0, 8); regs = regs >> 1; @@ -1263,8 +1276,8 @@ static DecodeStatus DecodeBitfieldMaskOperand(MCInst &Inst, unsigned Val, // the mask of all bits LSB-and-lower, and then xor them to create // the mask of that's all ones on [msb, lsb]. Finally we not it to // create the final mask. - unsigned msb = fieldFromInstruction32(Val, 5, 5); - unsigned lsb = fieldFromInstruction32(Val, 0, 5); + unsigned msb = fieldFromInstruction(Val, 5, 5); + unsigned lsb = fieldFromInstruction(Val, 0, 5); DecodeStatus S = MCDisassembler::Success; if (lsb > msb) Check(S, MCDisassembler::SoftFail); @@ -1281,12 +1294,12 @@ static DecodeStatus DecodeCopMemInstruction(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder) { DecodeStatus S = MCDisassembler::Success; - unsigned pred = fieldFromInstruction32(Insn, 28, 4); - unsigned CRd = fieldFromInstruction32(Insn, 12, 4); - unsigned coproc = fieldFromInstruction32(Insn, 8, 4); - unsigned imm = fieldFromInstruction32(Insn, 0, 8); - unsigned Rn = fieldFromInstruction32(Insn, 16, 4); - unsigned U = fieldFromInstruction32(Insn, 23, 1); + unsigned pred = fieldFromInstruction(Insn, 28, 4); + unsigned CRd = fieldFromInstruction(Insn, 12, 4); + unsigned coproc = fieldFromInstruction(Insn, 8, 4); + unsigned imm = fieldFromInstruction(Insn, 0, 8); + unsigned Rn = fieldFromInstruction(Insn, 16, 4); + unsigned U = fieldFromInstruction(Insn, 23, 1); switch (Inst.getOpcode()) { case ARM::LDC_OFFSET: @@ -1426,14 +1439,14 @@ DecodeAddrMode2IdxInstruction(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder) { DecodeStatus S = MCDisassembler::Success; - unsigned Rn = fieldFromInstruction32(Insn, 16, 4); - unsigned Rt = fieldFromInstruction32(Insn, 12, 4); - unsigned Rm = fieldFromInstruction32(Insn, 0, 4); - unsigned imm = fieldFromInstruction32(Insn, 0, 12); - unsigned pred = fieldFromInstruction32(Insn, 28, 4); - unsigned reg = fieldFromInstruction32(Insn, 25, 1); - unsigned P = fieldFromInstruction32(Insn, 24, 1); - unsigned W = fieldFromInstruction32(Insn, 21, 1); + unsigned Rn = fieldFromInstruction(Insn, 16, 4); + unsigned Rt = fieldFromInstruction(Insn, 12, 4); + unsigned Rm = fieldFromInstruction(Insn, 0, 4); + unsigned imm = fieldFromInstruction(Insn, 0, 12); + unsigned pred = fieldFromInstruction(Insn, 28, 4); + unsigned reg = fieldFromInstruction(Insn, 25, 1); + unsigned P = fieldFromInstruction(Insn, 24, 1); + unsigned W = fieldFromInstruction(Insn, 21, 1); // On stores, the writeback operand precedes Rt. switch (Inst.getOpcode()) { @@ -1476,7 +1489,7 @@ DecodeAddrMode2IdxInstruction(MCInst &Inst, unsigned Insn, return MCDisassembler::Fail; ARM_AM::AddrOpc Op = ARM_AM::add; - if (!fieldFromInstruction32(Insn, 23, 1)) + if (!fieldFromInstruction(Insn, 23, 1)) Op = ARM_AM::sub; bool writeback = (P == 0) || (W == 1); @@ -1493,7 +1506,7 @@ DecodeAddrMode2IdxInstruction(MCInst &Inst, unsigned Insn, if (!Check(S, DecodeGPRnopcRegisterClass(Inst, Rm, Address, Decoder))) return MCDisassembler::Fail; ARM_AM::ShiftOpc Opc = ARM_AM::lsl; - switch( fieldFromInstruction32(Insn, 5, 2)) { + switch( fieldFromInstruction(Insn, 5, 2)) { case 0: Opc = ARM_AM::lsl; break; @@ -1509,7 +1522,7 @@ DecodeAddrMode2IdxInstruction(MCInst &Inst, unsigned Insn, default: return MCDisassembler::Fail; } - unsigned amt = fieldFromInstruction32(Insn, 7, 5); + unsigned amt = fieldFromInstruction(Insn, 7, 5); unsigned imm = ARM_AM::getAM2Opc(Op, amt, Opc, idx_mode); Inst.addOperand(MCOperand::CreateImm(imm)); @@ -1529,11 +1542,11 @@ static DecodeStatus DecodeSORegMemOperand(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder) { DecodeStatus S = MCDisassembler::Success; - unsigned Rn = fieldFromInstruction32(Val, 13, 4); - unsigned Rm = fieldFromInstruction32(Val, 0, 4); - unsigned type = fieldFromInstruction32(Val, 5, 2); - unsigned imm = fieldFromInstruction32(Val, 7, 5); - unsigned U = fieldFromInstruction32(Val, 12, 1); + unsigned Rn = fieldFromInstruction(Val, 13, 4); + unsigned Rm = fieldFromInstruction(Val, 0, 4); + unsigned type = fieldFromInstruction(Val, 5, 2); + unsigned imm = fieldFromInstruction(Val, 7, 5); + unsigned U = fieldFromInstruction(Val, 12, 1); ARM_AM::ShiftOpc ShOp = ARM_AM::lsl; switch (type) { @@ -1570,15 +1583,15 @@ DecodeAddrMode3Instruction(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder) { DecodeStatus S = MCDisassembler::Success; - unsigned Rt = fieldFromInstruction32(Insn, 12, 4); - unsigned Rn = fieldFromInstruction32(Insn, 16, 4); - unsigned Rm = fieldFromInstruction32(Insn, 0, 4); - unsigned type = fieldFromInstruction32(Insn, 22, 1); - unsigned imm = fieldFromInstruction32(Insn, 8, 4); - unsigned U = ((~fieldFromInstruction32(Insn, 23, 1)) & 1) << 8; - unsigned pred = fieldFromInstruction32(Insn, 28, 4); - unsigned W = fieldFromInstruction32(Insn, 21, 1); - unsigned P = fieldFromInstruction32(Insn, 24, 1); + unsigned Rt = fieldFromInstruction(Insn, 12, 4); + unsigned Rn = fieldFromInstruction(Insn, 16, 4); + unsigned Rm = fieldFromInstruction(Insn, 0, 4); + unsigned type = fieldFromInstruction(Insn, 22, 1); + unsigned imm = fieldFromInstruction(Insn, 8, 4); + unsigned U = ((~fieldFromInstruction(Insn, 23, 1)) & 1) << 8; + unsigned pred = fieldFromInstruction(Insn, 28, 4); + unsigned W = fieldFromInstruction(Insn, 21, 1); + unsigned P = fieldFromInstruction(Insn, 24, 1); unsigned Rt2 = Rt + 1; bool writeback = (W == 1) | (P == 0); @@ -1609,7 +1622,7 @@ DecodeAddrMode3Instruction(MCInst &Inst, unsigned Insn, S = MCDisassembler::SoftFail; if (Rt2 == 15) S = MCDisassembler::SoftFail; - if (!type && fieldFromInstruction32(Insn, 8, 4)) + if (!type && fieldFromInstruction(Insn, 8, 4)) S = MCDisassembler::SoftFail; break; case ARM::STRH: @@ -1761,8 +1774,8 @@ static DecodeStatus DecodeRFEInstruction(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder) { DecodeStatus S = MCDisassembler::Success; - unsigned Rn = fieldFromInstruction32(Insn, 16, 4); - unsigned mode = fieldFromInstruction32(Insn, 23, 2); + unsigned Rn = fieldFromInstruction(Insn, 16, 4); + unsigned mode = fieldFromInstruction(Insn, 23, 2); switch (mode) { case 0: @@ -1791,9 +1804,9 @@ static DecodeStatus DecodeMemMultipleWritebackInstruction(MCInst &Inst, uint64_t Address, const void *Decoder) { DecodeStatus S = MCDisassembler::Success; - unsigned Rn = fieldFromInstruction32(Insn, 16, 4); - unsigned pred = fieldFromInstruction32(Insn, 28, 4); - unsigned reglist = fieldFromInstruction32(Insn, 0, 16); + unsigned Rn = fieldFromInstruction(Insn, 16, 4); + unsigned pred = fieldFromInstruction(Insn, 28, 4); + unsigned reglist = fieldFromInstruction(Insn, 0, 16); if (pred == 0xF) { switch (Inst.getOpcode()) { @@ -1850,9 +1863,9 @@ static DecodeStatus DecodeMemMultipleWritebackInstruction(MCInst &Inst, } // For stores (which become SRS's, the only operand is the mode. - if (fieldFromInstruction32(Insn, 20, 1) == 0) { + if (fieldFromInstruction(Insn, 20, 1) == 0) { Inst.addOperand( - MCOperand::CreateImm(fieldFromInstruction32(Insn, 0, 4))); + MCOperand::CreateImm(fieldFromInstruction(Insn, 0, 4))); return S; } @@ -1873,10 +1886,10 @@ static DecodeStatus DecodeMemMultipleWritebackInstruction(MCInst &Inst, static DecodeStatus DecodeCPSInstruction(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder) { - unsigned imod = fieldFromInstruction32(Insn, 18, 2); - unsigned M = fieldFromInstruction32(Insn, 17, 1); - unsigned iflags = fieldFromInstruction32(Insn, 6, 3); - unsigned mode = fieldFromInstruction32(Insn, 0, 5); + unsigned imod = fieldFromInstruction(Insn, 18, 2); + unsigned M = fieldFromInstruction(Insn, 17, 1); + unsigned iflags = fieldFromInstruction(Insn, 6, 3); + unsigned mode = fieldFromInstruction(Insn, 0, 5); DecodeStatus S = MCDisassembler::Success; @@ -1913,10 +1926,10 @@ static DecodeStatus DecodeCPSInstruction(MCInst &Inst, unsigned Insn, static DecodeStatus DecodeT2CPSInstruction(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder) { - unsigned imod = fieldFromInstruction32(Insn, 9, 2); - unsigned M = fieldFromInstruction32(Insn, 8, 1); - unsigned iflags = fieldFromInstruction32(Insn, 5, 3); - unsigned mode = fieldFromInstruction32(Insn, 0, 5); + unsigned imod = fieldFromInstruction(Insn, 9, 2); + unsigned M = fieldFromInstruction(Insn, 8, 1); + unsigned iflags = fieldFromInstruction(Insn, 5, 3); + unsigned mode = fieldFromInstruction(Insn, 0, 5); DecodeStatus S = MCDisassembler::Success; @@ -1955,13 +1968,13 @@ static DecodeStatus DecodeT2MOVTWInstruction(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder) { DecodeStatus S = MCDisassembler::Success; - unsigned Rd = fieldFromInstruction32(Insn, 8, 4); + unsigned Rd = fieldFromInstruction(Insn, 8, 4); unsigned imm = 0; - imm |= (fieldFromInstruction32(Insn, 0, 8) << 0); - imm |= (fieldFromInstruction32(Insn, 12, 3) << 8); - imm |= (fieldFromInstruction32(Insn, 16, 4) << 12); - imm |= (fieldFromInstruction32(Insn, 26, 1) << 11); + imm |= (fieldFromInstruction(Insn, 0, 8) << 0); + imm |= (fieldFromInstruction(Insn, 12, 3) << 8); + imm |= (fieldFromInstruction(Insn, 16, 4) << 12); + imm |= (fieldFromInstruction(Insn, 26, 1) << 11); if (Inst.getOpcode() == ARM::t2MOVTi16) if (!Check(S, DecoderGPRRegisterClass(Inst, Rd, Address, Decoder))) @@ -1979,12 +1992,12 @@ static DecodeStatus DecodeArmMOVTWInstruction(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder) { DecodeStatus S = MCDisassembler::Success; - unsigned Rd = fieldFromInstruction32(Insn, 12, 4); - unsigned pred = fieldFromInstruction32(Insn, 28, 4); + unsigned Rd = fieldFromInstruction(Insn, 12, 4); + unsigned pred = fieldFromInstruction(Insn, 28, 4); unsigned imm = 0; - imm |= (fieldFromInstruction32(Insn, 0, 12) << 0); - imm |= (fieldFromInstruction32(Insn, 16, 4) << 12); + imm |= (fieldFromInstruction(Insn, 0, 12) << 0); + imm |= (fieldFromInstruction(Insn, 16, 4) << 12); if (Inst.getOpcode() == ARM::MOVTi16) if (!Check(S, DecoderGPRRegisterClass(Inst, Rd, Address, Decoder))) @@ -2005,11 +2018,11 @@ static DecodeStatus DecodeSMLAInstruction(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder) { DecodeStatus S = MCDisassembler::Success; - unsigned Rd = fieldFromInstruction32(Insn, 16, 4); - unsigned Rn = fieldFromInstruction32(Insn, 0, 4); - unsigned Rm = fieldFromInstruction32(Insn, 8, 4); - unsigned Ra = fieldFromInstruction32(Insn, 12, 4); - unsigned pred = fieldFromInstruction32(Insn, 28, 4); + unsigned Rd = fieldFromInstruction(Insn, 16, 4); + unsigned Rn = fieldFromInstruction(Insn, 0, 4); + unsigned Rm = fieldFromInstruction(Insn, 8, 4); + unsigned Ra = fieldFromInstruction(Insn, 12, 4); + unsigned pred = fieldFromInstruction(Insn, 28, 4); if (pred == 0xF) return DecodeCPSInstruction(Inst, Insn, Address, Decoder); @@ -2033,9 +2046,9 @@ static DecodeStatus DecodeAddrModeImm12Operand(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder) { DecodeStatus S = MCDisassembler::Success; - unsigned add = fieldFromInstruction32(Val, 12, 1); - unsigned imm = fieldFromInstruction32(Val, 0, 12); - unsigned Rn = fieldFromInstruction32(Val, 13, 4); + unsigned add = fieldFromInstruction(Val, 12, 1); + unsigned imm = fieldFromInstruction(Val, 0, 12); + unsigned Rn = fieldFromInstruction(Val, 13, 4); if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder))) return MCDisassembler::Fail; @@ -2053,9 +2066,9 @@ static DecodeStatus DecodeAddrMode5Operand(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder) { DecodeStatus S = MCDisassembler::Success; - unsigned Rn = fieldFromInstruction32(Val, 9, 4); - unsigned U = fieldFromInstruction32(Val, 8, 1); - unsigned imm = fieldFromInstruction32(Val, 0, 8); + unsigned Rn = fieldFromInstruction(Val, 9, 4); + unsigned U = fieldFromInstruction(Val, 8, 1); + unsigned imm = fieldFromInstruction(Val, 0, 8); if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder))) return MCDisassembler::Fail; @@ -2077,11 +2090,11 @@ static DecodeStatus DecodeT2BInstruction(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder) { DecodeStatus S = MCDisassembler::Success; - unsigned imm = (fieldFromInstruction32(Insn, 0, 11) << 0) | - (fieldFromInstruction32(Insn, 11, 1) << 18) | - (fieldFromInstruction32(Insn, 13, 1) << 17) | - (fieldFromInstruction32(Insn, 16, 6) << 11) | - (fieldFromInstruction32(Insn, 26, 1) << 19); + unsigned imm = (fieldFromInstruction(Insn, 0, 11) << 0) | + (fieldFromInstruction(Insn, 11, 1) << 18) | + (fieldFromInstruction(Insn, 13, 1) << 17) | + (fieldFromInstruction(Insn, 16, 6) << 11) | + (fieldFromInstruction(Insn, 26, 1) << 19); if (!tryAddingSymbolicOperand(Address, Address + SignExtend32<20>(imm<<1) + 4, true, 4, Inst, Decoder)) Inst.addOperand(MCOperand::CreateImm(SignExtend32<20>(imm << 1))); @@ -2093,12 +2106,12 @@ DecodeBranchImmInstruction(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder) { DecodeStatus S = MCDisassembler::Success; - unsigned pred = fieldFromInstruction32(Insn, 28, 4); - unsigned imm = fieldFromInstruction32(Insn, 0, 24) << 2; + unsigned pred = fieldFromInstruction(Insn, 28, 4); + unsigned imm = fieldFromInstruction(Insn, 0, 24) << 2; if (pred == 0xF) { Inst.setOpcode(ARM::BLXi); - imm |= fieldFromInstruction32(Insn, 24, 1) << 1; + imm |= fieldFromInstruction(Insn, 24, 1) << 1; if (!tryAddingSymbolicOperand(Address, Address + SignExtend32<26>(imm) + 8, true, 4, Inst, Decoder)) Inst.addOperand(MCOperand::CreateImm(SignExtend32<26>(imm))); @@ -2119,8 +2132,8 @@ static DecodeStatus DecodeAddrMode6Operand(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder) { DecodeStatus S = MCDisassembler::Success; - unsigned Rm = fieldFromInstruction32(Val, 0, 4); - unsigned align = fieldFromInstruction32(Val, 4, 2); + unsigned Rm = fieldFromInstruction(Val, 0, 4); + unsigned align = fieldFromInstruction(Val, 4, 2); if (!Check(S, DecodeGPRRegisterClass(Inst, Rm, Address, Decoder))) return MCDisassembler::Fail; @@ -2136,12 +2149,12 @@ static DecodeStatus DecodeVLDInstruction(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder) { DecodeStatus S = MCDisassembler::Success; - unsigned Rd = fieldFromInstruction32(Insn, 12, 4); - Rd |= fieldFromInstruction32(Insn, 22, 1) << 4; - unsigned wb = fieldFromInstruction32(Insn, 16, 4); - unsigned Rn = fieldFromInstruction32(Insn, 16, 4); - Rn |= fieldFromInstruction32(Insn, 4, 2) << 4; - unsigned Rm = fieldFromInstruction32(Insn, 0, 4); + unsigned Rd = fieldFromInstruction(Insn, 12, 4); + Rd |= fieldFromInstruction(Insn, 22, 1) << 4; + unsigned wb = fieldFromInstruction(Insn, 16, 4); + unsigned Rn = fieldFromInstruction(Insn, 16, 4); + Rn |= fieldFromInstruction(Insn, 4, 2) << 4; + unsigned Rm = fieldFromInstruction(Insn, 0, 4); // First output register switch (Inst.getOpcode()) { @@ -2410,12 +2423,12 @@ static DecodeStatus DecodeVSTInstruction(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder) { DecodeStatus S = MCDisassembler::Success; - unsigned Rd = fieldFromInstruction32(Insn, 12, 4); - Rd |= fieldFromInstruction32(Insn, 22, 1) << 4; - unsigned wb = fieldFromInstruction32(Insn, 16, 4); - unsigned Rn = fieldFromInstruction32(Insn, 16, 4); - Rn |= fieldFromInstruction32(Insn, 4, 2) << 4; - unsigned Rm = fieldFromInstruction32(Insn, 0, 4); + unsigned Rd = fieldFromInstruction(Insn, 12, 4); + Rd |= fieldFromInstruction(Insn, 22, 1) << 4; + unsigned wb = fieldFromInstruction(Insn, 16, 4); + unsigned Rn = fieldFromInstruction(Insn, 16, 4); + Rn |= fieldFromInstruction(Insn, 4, 2) << 4; + unsigned Rm = fieldFromInstruction(Insn, 0, 4); // Writeback Operand switch (Inst.getOpcode()) { @@ -2681,12 +2694,12 @@ static DecodeStatus DecodeVLD1DupInstruction(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder) { DecodeStatus S = MCDisassembler::Success; - unsigned Rd = fieldFromInstruction32(Insn, 12, 4); - Rd |= fieldFromInstruction32(Insn, 22, 1) << 4; - unsigned Rn = fieldFromInstruction32(Insn, 16, 4); - unsigned Rm = fieldFromInstruction32(Insn, 0, 4); - unsigned align = fieldFromInstruction32(Insn, 4, 1); - unsigned size = fieldFromInstruction32(Insn, 6, 2); + unsigned Rd = fieldFromInstruction(Insn, 12, 4); + Rd |= fieldFromInstruction(Insn, 22, 1) << 4; + unsigned Rn = fieldFromInstruction(Insn, 16, 4); + unsigned Rm = fieldFromInstruction(Insn, 0, 4); + unsigned align = fieldFromInstruction(Insn, 4, 1); + unsigned size = fieldFromInstruction(Insn, 6, 2); align *= (1 << size); @@ -2726,12 +2739,12 @@ static DecodeStatus DecodeVLD2DupInstruction(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder) { DecodeStatus S = MCDisassembler::Success; - unsigned Rd = fieldFromInstruction32(Insn, 12, 4); - Rd |= fieldFromInstruction32(Insn, 22, 1) << 4; - unsigned Rn = fieldFromInstruction32(Insn, 16, 4); - unsigned Rm = fieldFromInstruction32(Insn, 0, 4); - unsigned align = fieldFromInstruction32(Insn, 4, 1); - unsigned size = 1 << fieldFromInstruction32(Insn, 6, 2); + unsigned Rd = fieldFromInstruction(Insn, 12, 4); + Rd |= fieldFromInstruction(Insn, 22, 1) << 4; + unsigned Rn = fieldFromInstruction(Insn, 16, 4); + unsigned Rm = fieldFromInstruction(Insn, 0, 4); + unsigned align = fieldFromInstruction(Insn, 4, 1); + unsigned size = 1 << fieldFromInstruction(Insn, 6, 2); align *= 2*size; switch (Inst.getOpcode()) { @@ -2774,11 +2787,11 @@ static DecodeStatus DecodeVLD3DupInstruction(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder) { DecodeStatus S = MCDisassembler::Success; - unsigned Rd = fieldFromInstruction32(Insn, 12, 4); - Rd |= fieldFromInstruction32(Insn, 22, 1) << 4; - unsigned Rn = fieldFromInstruction32(Insn, 16, 4); - unsigned Rm = fieldFromInstruction32(Insn, 0, 4); - unsigned inc = fieldFromInstruction32(Insn, 5, 1) + 1; + unsigned Rd = fieldFromInstruction(Insn, 12, 4); + Rd |= fieldFromInstruction(Insn, 22, 1) << 4; + unsigned Rn = fieldFromInstruction(Insn, 16, 4); + unsigned Rm = fieldFromInstruction(Insn, 0, 4); + unsigned inc = fieldFromInstruction(Insn, 5, 1) + 1; if (!Check(S, DecodeDPRRegisterClass(Inst, Rd, Address, Decoder))) return MCDisassembler::Fail; @@ -2809,13 +2822,13 @@ static DecodeStatus DecodeVLD4DupInstruction(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder) { DecodeStatus S = MCDisassembler::Success; - unsigned Rd = fieldFromInstruction32(Insn, 12, 4); - Rd |= fieldFromInstruction32(Insn, 22, 1) << 4; - unsigned Rn = fieldFromInstruction32(Insn, 16, 4); - unsigned Rm = fieldFromInstruction32(Insn, 0, 4); - unsigned size = fieldFromInstruction32(Insn, 6, 2); - unsigned inc = fieldFromInstruction32(Insn, 5, 1) + 1; - unsigned align = fieldFromInstruction32(Insn, 4, 1); + unsigned Rd = fieldFromInstruction(Insn, 12, 4); + Rd |= fieldFromInstruction(Insn, 22, 1) << 4; + unsigned Rn = fieldFromInstruction(Insn, 16, 4); + unsigned Rm = fieldFromInstruction(Insn, 0, 4); + unsigned size = fieldFromInstruction(Insn, 6, 2); + unsigned inc = fieldFromInstruction(Insn, 5, 1) + 1; + unsigned align = fieldFromInstruction(Insn, 4, 1); if (size == 0x3) { size = 4; @@ -2862,14 +2875,14 @@ DecodeNEONModImmInstruction(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder) { DecodeStatus S = MCDisassembler::Success; - unsigned Rd = fieldFromInstruction32(Insn, 12, 4); - Rd |= fieldFromInstruction32(Insn, 22, 1) << 4; - unsigned imm = fieldFromInstruction32(Insn, 0, 4); - imm |= fieldFromInstruction32(Insn, 16, 3) << 4; - imm |= fieldFromInstruction32(Insn, 24, 1) << 7; - imm |= fieldFromInstruction32(Insn, 8, 4) << 8; - imm |= fieldFromInstruction32(Insn, 5, 1) << 12; - unsigned Q = fieldFromInstruction32(Insn, 6, 1); + unsigned Rd = fieldFromInstruction(Insn, 12, 4); + Rd |= fieldFromInstruction(Insn, 22, 1) << 4; + unsigned imm = fieldFromInstruction(Insn, 0, 4); + imm |= fieldFromInstruction(Insn, 16, 3) << 4; + imm |= fieldFromInstruction(Insn, 24, 1) << 7; + imm |= fieldFromInstruction(Insn, 8, 4) << 8; + imm |= fieldFromInstruction(Insn, 5, 1) << 12; + unsigned Q = fieldFromInstruction(Insn, 6, 1); if (Q) { if (!Check(S, DecodeQPRRegisterClass(Inst, Rd, Address, Decoder))) @@ -2907,11 +2920,11 @@ static DecodeStatus DecodeVSHLMaxInstruction(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder) { DecodeStatus S = MCDisassembler::Success; - unsigned Rd = fieldFromInstruction32(Insn, 12, 4); - Rd |= fieldFromInstruction32(Insn, 22, 1) << 4; - unsigned Rm = fieldFromInstruction32(Insn, 0, 4); - Rm |= fieldFromInstruction32(Insn, 5, 1) << 4; - unsigned size = fieldFromInstruction32(Insn, 18, 2); + unsigned Rd = fieldFromInstruction(Insn, 12, 4); + Rd |= fieldFromInstruction(Insn, 22, 1) << 4; + unsigned Rm = fieldFromInstruction(Insn, 0, 4); + Rm |= fieldFromInstruction(Insn, 5, 1) << 4; + unsigned size = fieldFromInstruction(Insn, 18, 2); if (!Check(S, DecodeQPRRegisterClass(Inst, Rd, Address, Decoder))) return MCDisassembler::Fail; @@ -2950,13 +2963,13 @@ static DecodeStatus DecodeTBLInstruction(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder) { DecodeStatus S = MCDisassembler::Success; - unsigned Rd = fieldFromInstruction32(Insn, 12, 4); - Rd |= fieldFromInstruction32(Insn, 22, 1) << 4; - unsigned Rn = fieldFromInstruction32(Insn, 16, 4); - Rn |= fieldFromInstruction32(Insn, 7, 1) << 4; - unsigned Rm = fieldFromInstruction32(Insn, 0, 4); - Rm |= fieldFromInstruction32(Insn, 5, 1) << 4; - unsigned op = fieldFromInstruction32(Insn, 6, 1); + unsigned Rd = fieldFromInstruction(Insn, 12, 4); + Rd |= fieldFromInstruction(Insn, 22, 1) << 4; + unsigned Rn = fieldFromInstruction(Insn, 16, 4); + Rn |= fieldFromInstruction(Insn, 7, 1) << 4; + unsigned Rm = fieldFromInstruction(Insn, 0, 4); + Rm |= fieldFromInstruction(Insn, 5, 1) << 4; + unsigned op = fieldFromInstruction(Insn, 6, 1); if (!Check(S, DecodeDPRRegisterClass(Inst, Rd, Address, Decoder))) return MCDisassembler::Fail; @@ -2986,8 +2999,8 @@ static DecodeStatus DecodeThumbAddSpecialReg(MCInst &Inst, uint16_t Insn, uint64_t Address, const void *Decoder) { DecodeStatus S = MCDisassembler::Success; - unsigned dst = fieldFromInstruction16(Insn, 8, 3); - unsigned imm = fieldFromInstruction16(Insn, 0, 8); + unsigned dst = fieldFromInstruction(Insn, 8, 3); + unsigned imm = fieldFromInstruction(Insn, 0, 8); if (!Check(S, DecodetGPRRegisterClass(Inst, dst, Address, Decoder))) return MCDisassembler::Fail; @@ -3034,8 +3047,8 @@ static DecodeStatus DecodeThumbAddrModeRR(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder) { DecodeStatus S = MCDisassembler::Success; - unsigned Rn = fieldFromInstruction32(Val, 0, 3); - unsigned Rm = fieldFromInstruction32(Val, 3, 3); + unsigned Rn = fieldFromInstruction(Val, 0, 3); + unsigned Rm = fieldFromInstruction(Val, 3, 3); if (!Check(S, DecodetGPRRegisterClass(Inst, Rn, Address, Decoder))) return MCDisassembler::Fail; @@ -3049,8 +3062,8 @@ static DecodeStatus DecodeThumbAddrModeIS(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder) { DecodeStatus S = MCDisassembler::Success; - unsigned Rn = fieldFromInstruction32(Val, 0, 3); - unsigned imm = fieldFromInstruction32(Val, 3, 5); + unsigned Rn = fieldFromInstruction(Val, 0, 3); + unsigned imm = fieldFromInstruction(Val, 3, 5); if (!Check(S, DecodetGPRRegisterClass(Inst, Rn, Address, Decoder))) return MCDisassembler::Fail; @@ -3081,9 +3094,9 @@ static DecodeStatus DecodeT2AddrModeSOReg(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder) { DecodeStatus S = MCDisassembler::Success; - unsigned Rn = fieldFromInstruction32(Val, 6, 4); - unsigned Rm = fieldFromInstruction32(Val, 2, 4); - unsigned imm = fieldFromInstruction32(Val, 0, 2); + unsigned Rn = fieldFromInstruction(Val, 6, 4); + unsigned Rm = fieldFromInstruction(Val, 2, 4); + unsigned imm = fieldFromInstruction(Val, 0, 2); if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder))) return MCDisassembler::Fail; @@ -3104,13 +3117,13 @@ static DecodeStatus DecodeT2LoadShift(MCInst &Inst, unsigned Insn, case ARM::t2PLIs: break; default: { - unsigned Rt = fieldFromInstruction32(Insn, 12, 4); + unsigned Rt = fieldFromInstruction(Insn, 12, 4); if (!Check(S, DecoderGPRRegisterClass(Inst, Rt, Address, Decoder))) return MCDisassembler::Fail; } } - unsigned Rn = fieldFromInstruction32(Insn, 16, 4); + unsigned Rn = fieldFromInstruction(Insn, 16, 4); if (Rn == 0xF) { switch (Inst.getOpcode()) { case ARM::t2LDRBs: @@ -3133,16 +3146,16 @@ static DecodeStatus DecodeT2LoadShift(MCInst &Inst, unsigned Insn, return MCDisassembler::Fail; } - int imm = fieldFromInstruction32(Insn, 0, 12); - if (!fieldFromInstruction32(Insn, 23, 1)) imm *= -1; + int imm = fieldFromInstruction(Insn, 0, 12); + if (!fieldFromInstruction(Insn, 23, 1)) imm *= -1; Inst.addOperand(MCOperand::CreateImm(imm)); return S; } - unsigned addrmode = fieldFromInstruction32(Insn, 4, 2); - addrmode |= fieldFromInstruction32(Insn, 0, 4) << 2; - addrmode |= fieldFromInstruction32(Insn, 16, 4) << 6; + unsigned addrmode = fieldFromInstruction(Insn, 4, 2); + addrmode |= fieldFromInstruction(Insn, 0, 4) << 2; + addrmode |= fieldFromInstruction(Insn, 16, 4) << 6; if (!Check(S, DecodeT2AddrModeSOReg(Inst, addrmode, Address, Decoder))) return MCDisassembler::Fail; @@ -3151,9 +3164,14 @@ static DecodeStatus DecodeT2LoadShift(MCInst &Inst, unsigned Insn, static DecodeStatus DecodeT2Imm8S4(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder) { - int imm = Val & 0xFF; - if (!(Val & 0x100)) imm *= -1; - Inst.addOperand(MCOperand::CreateImm(imm << 2)); + if (Val == 0) + Inst.addOperand(MCOperand::CreateImm(INT32_MIN)); + else { + int imm = Val & 0xFF; + + if (!(Val & 0x100)) imm *= -1; + Inst.addOperand(MCOperand::CreateImm(imm << 2)); + } return MCDisassembler::Success; } @@ -3162,8 +3180,8 @@ static DecodeStatus DecodeT2AddrModeImm8s4(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder) { DecodeStatus S = MCDisassembler::Success; - unsigned Rn = fieldFromInstruction32(Val, 9, 4); - unsigned imm = fieldFromInstruction32(Val, 0, 9); + unsigned Rn = fieldFromInstruction(Val, 9, 4); + unsigned imm = fieldFromInstruction(Val, 0, 9); if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder))) return MCDisassembler::Fail; @@ -3177,8 +3195,8 @@ static DecodeStatus DecodeT2AddrModeImm0_1020s4(MCInst &Inst,unsigned Val, uint64_t Address, const void *Decoder) { DecodeStatus S = MCDisassembler::Success; - unsigned Rn = fieldFromInstruction32(Val, 8, 4); - unsigned imm = fieldFromInstruction32(Val, 0, 8); + unsigned Rn = fieldFromInstruction(Val, 8, 4); + unsigned imm = fieldFromInstruction(Val, 0, 8); if (!Check(S, DecodeGPRnopcRegisterClass(Inst, Rn, Address, Decoder))) return MCDisassembler::Fail; @@ -3205,8 +3223,8 @@ static DecodeStatus DecodeT2AddrModeImm8(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder) { DecodeStatus S = MCDisassembler::Success; - unsigned Rn = fieldFromInstruction32(Val, 9, 4); - unsigned imm = fieldFromInstruction32(Val, 0, 9); + unsigned Rn = fieldFromInstruction(Val, 9, 4); + unsigned imm = fieldFromInstruction(Val, 0, 9); // Some instructions always use an additive offset. switch (Inst.getOpcode()) { @@ -3236,12 +3254,12 @@ static DecodeStatus DecodeT2LdStPre(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder) { DecodeStatus S = MCDisassembler::Success; - unsigned Rt = fieldFromInstruction32(Insn, 12, 4); - unsigned Rn = fieldFromInstruction32(Insn, 16, 4); - unsigned addr = fieldFromInstruction32(Insn, 0, 8); - addr |= fieldFromInstruction32(Insn, 9, 1) << 8; + unsigned Rt = fieldFromInstruction(Insn, 12, 4); + unsigned Rn = fieldFromInstruction(Insn, 16, 4); + unsigned addr = fieldFromInstruction(Insn, 0, 8); + addr |= fieldFromInstruction(Insn, 9, 1) << 8; addr |= Rn << 9; - unsigned load = fieldFromInstruction32(Insn, 20, 1); + unsigned load = fieldFromInstruction(Insn, 20, 1); if (!load) { if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder))) @@ -3266,8 +3284,8 @@ static DecodeStatus DecodeT2AddrModeImm12(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder) { DecodeStatus S = MCDisassembler::Success; - unsigned Rn = fieldFromInstruction32(Val, 13, 4); - unsigned imm = fieldFromInstruction32(Val, 0, 12); + unsigned Rn = fieldFromInstruction(Val, 13, 4); + unsigned imm = fieldFromInstruction(Val, 0, 12); if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder))) return MCDisassembler::Fail; @@ -3279,7 +3297,7 @@ static DecodeStatus DecodeT2AddrModeImm12(MCInst &Inst, unsigned Val, static DecodeStatus DecodeThumbAddSPImm(MCInst &Inst, uint16_t Insn, uint64_t Address, const void *Decoder) { - unsigned imm = fieldFromInstruction16(Insn, 0, 7); + unsigned imm = fieldFromInstruction(Insn, 0, 7); Inst.addOperand(MCOperand::CreateReg(ARM::SP)); Inst.addOperand(MCOperand::CreateReg(ARM::SP)); @@ -3293,8 +3311,8 @@ static DecodeStatus DecodeThumbAddSPReg(MCInst &Inst, uint16_t Insn, DecodeStatus S = MCDisassembler::Success; if (Inst.getOpcode() == ARM::tADDrSP) { - unsigned Rdm = fieldFromInstruction16(Insn, 0, 3); - Rdm |= fieldFromInstruction16(Insn, 7, 1) << 3; + unsigned Rdm = fieldFromInstruction(Insn, 0, 3); + Rdm |= fieldFromInstruction(Insn, 7, 1) << 3; if (!Check(S, DecodeGPRRegisterClass(Inst, Rdm, Address, Decoder))) return MCDisassembler::Fail; @@ -3302,7 +3320,7 @@ static DecodeStatus DecodeThumbAddSPReg(MCInst &Inst, uint16_t Insn, if (!Check(S, DecodeGPRRegisterClass(Inst, Rdm, Address, Decoder))) return MCDisassembler::Fail; } else if (Inst.getOpcode() == ARM::tADDspr) { - unsigned Rm = fieldFromInstruction16(Insn, 3, 4); + unsigned Rm = fieldFromInstruction(Insn, 3, 4); Inst.addOperand(MCOperand::CreateReg(ARM::SP)); Inst.addOperand(MCOperand::CreateReg(ARM::SP)); @@ -3315,8 +3333,8 @@ static DecodeStatus DecodeThumbAddSPReg(MCInst &Inst, uint16_t Insn, static DecodeStatus DecodeThumbCPS(MCInst &Inst, uint16_t Insn, uint64_t Address, const void *Decoder) { - unsigned imod = fieldFromInstruction16(Insn, 4, 1) | 0x2; - unsigned flags = fieldFromInstruction16(Insn, 0, 3); + unsigned imod = fieldFromInstruction(Insn, 4, 1) | 0x2; + unsigned flags = fieldFromInstruction(Insn, 0, 3); Inst.addOperand(MCOperand::CreateImm(imod)); Inst.addOperand(MCOperand::CreateImm(flags)); @@ -3327,8 +3345,8 @@ static DecodeStatus DecodeThumbCPS(MCInst &Inst, uint16_t Insn, static DecodeStatus DecodePostIdxReg(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder) { DecodeStatus S = MCDisassembler::Success; - unsigned Rm = fieldFromInstruction32(Insn, 0, 4); - unsigned add = fieldFromInstruction32(Insn, 4, 1); + unsigned Rm = fieldFromInstruction(Insn, 0, 4); + unsigned add = fieldFromInstruction(Insn, 4, 1); if (!Check(S, DecodeGPRnopcRegisterClass(Inst, Rm, Address, Decoder))) return MCDisassembler::Fail; @@ -3375,8 +3393,8 @@ DecodeThumbTableBranch(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder) { DecodeStatus S = MCDisassembler::Success; - unsigned Rn = fieldFromInstruction32(Insn, 16, 4); - unsigned Rm = fieldFromInstruction32(Insn, 0, 4); + unsigned Rn = fieldFromInstruction(Insn, 16, 4); + unsigned Rm = fieldFromInstruction(Insn, 0, 4); if (Rn == ARM::SP) S = MCDisassembler::SoftFail; if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder))) @@ -3391,9 +3409,9 @@ DecodeThumb2BCCInstruction(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder) { DecodeStatus S = MCDisassembler::Success; - unsigned pred = fieldFromInstruction32(Insn, 22, 4); + unsigned pred = fieldFromInstruction(Insn, 22, 4); if (pred == 0xE || pred == 0xF) { - unsigned opc = fieldFromInstruction32(Insn, 4, 28); + unsigned opc = fieldFromInstruction(Insn, 4, 28); switch (opc) { default: return MCDisassembler::Fail; @@ -3408,15 +3426,15 @@ DecodeThumb2BCCInstruction(MCInst &Inst, unsigned Insn, break; } - unsigned imm = fieldFromInstruction32(Insn, 0, 4); + unsigned imm = fieldFromInstruction(Insn, 0, 4); return DecodeMemBarrierOption(Inst, imm, Address, Decoder); } - unsigned brtarget = fieldFromInstruction32(Insn, 0, 11) << 1; - brtarget |= fieldFromInstruction32(Insn, 11, 1) << 19; - brtarget |= fieldFromInstruction32(Insn, 13, 1) << 18; - brtarget |= fieldFromInstruction32(Insn, 16, 6) << 12; - brtarget |= fieldFromInstruction32(Insn, 26, 1) << 20; + unsigned brtarget = fieldFromInstruction(Insn, 0, 11) << 1; + brtarget |= fieldFromInstruction(Insn, 11, 1) << 19; + brtarget |= fieldFromInstruction(Insn, 13, 1) << 18; + brtarget |= fieldFromInstruction(Insn, 16, 6) << 12; + brtarget |= fieldFromInstruction(Insn, 26, 1) << 20; if (!Check(S, DecodeT2BROperand(Inst, brtarget, Address, Decoder))) return MCDisassembler::Fail; @@ -3431,10 +3449,10 @@ DecodeThumb2BCCInstruction(MCInst &Inst, unsigned Insn, // a splat operation or a rotation. static DecodeStatus DecodeT2SOImm(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder) { - unsigned ctrl = fieldFromInstruction32(Val, 10, 2); + unsigned ctrl = fieldFromInstruction(Val, 10, 2); if (ctrl == 0) { - unsigned byte = fieldFromInstruction32(Val, 8, 2); - unsigned imm = fieldFromInstruction32(Val, 0, 8); + unsigned byte = fieldFromInstruction(Val, 8, 2); + unsigned imm = fieldFromInstruction(Val, 0, 8); switch (byte) { case 0: Inst.addOperand(MCOperand::CreateImm(imm)); @@ -3451,8 +3469,8 @@ static DecodeStatus DecodeT2SOImm(MCInst &Inst, unsigned Val, break; } } else { - unsigned unrot = fieldFromInstruction32(Val, 0, 7) | 0x80; - unsigned rot = fieldFromInstruction32(Val, 7, 5); + unsigned unrot = fieldFromInstruction(Val, 0, 7) | 0x80; + unsigned rot = fieldFromInstruction(Val, 7, 5); unsigned imm = (unrot >> rot) | (unrot << ((32-rot)&31)); Inst.addOperand(MCOperand::CreateImm(imm)); } @@ -3494,19 +3512,8 @@ static DecodeStatus DecodeThumbBLTargetOperand(MCInst &Inst, unsigned Val, static DecodeStatus DecodeMemBarrierOption(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder) { - switch (Val) { - default: + if (Val & ~0xf) return MCDisassembler::Fail; - case 0xF: // SY - case 0xE: // ST - case 0xB: // ISH - case 0xA: // ISHST - case 0x7: // NSH - case 0x6: // NSHST - case 0x3: // OSH - case 0x2: // OSHST - break; - } Inst.addOperand(MCOperand::CreateImm(Val)); return MCDisassembler::Success; @@ -3523,9 +3530,9 @@ static DecodeStatus DecodeDoubleRegLoad(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder) { DecodeStatus S = MCDisassembler::Success; - unsigned Rt = fieldFromInstruction32(Insn, 12, 4); - unsigned Rn = fieldFromInstruction32(Insn, 16, 4); - unsigned pred = fieldFromInstruction32(Insn, 28, 4); + unsigned Rt = fieldFromInstruction(Insn, 12, 4); + unsigned Rn = fieldFromInstruction(Insn, 16, 4); + unsigned pred = fieldFromInstruction(Insn, 28, 4); if ((Rt & 1) || Rt == 0xE || Rn == 0xF) return MCDisassembler::Fail; @@ -3546,10 +3553,10 @@ static DecodeStatus DecodeDoubleRegStore(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder){ DecodeStatus S = MCDisassembler::Success; - unsigned Rd = fieldFromInstruction32(Insn, 12, 4); - unsigned Rt = fieldFromInstruction32(Insn, 0, 4); - unsigned Rn = fieldFromInstruction32(Insn, 16, 4); - unsigned pred = fieldFromInstruction32(Insn, 28, 4); + unsigned Rd = fieldFromInstruction(Insn, 12, 4); + unsigned Rt = fieldFromInstruction(Insn, 0, 4); + unsigned Rn = fieldFromInstruction(Insn, 16, 4); + unsigned pred = fieldFromInstruction(Insn, 28, 4); if (!Check(S, DecoderGPRRegisterClass(Inst, Rd, Address, Decoder))) return MCDisassembler::Fail; @@ -3573,12 +3580,12 @@ static DecodeStatus DecodeLDRPreImm(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder) { DecodeStatus S = MCDisassembler::Success; - unsigned Rn = fieldFromInstruction32(Insn, 16, 4); - unsigned Rt = fieldFromInstruction32(Insn, 12, 4); - unsigned imm = fieldFromInstruction32(Insn, 0, 12); - imm |= fieldFromInstruction32(Insn, 16, 4) << 13; - imm |= fieldFromInstruction32(Insn, 23, 1) << 12; - unsigned pred = fieldFromInstruction32(Insn, 28, 4); + unsigned Rn = fieldFromInstruction(Insn, 16, 4); + unsigned Rt = fieldFromInstruction(Insn, 12, 4); + unsigned imm = fieldFromInstruction(Insn, 0, 12); + imm |= fieldFromInstruction(Insn, 16, 4) << 13; + imm |= fieldFromInstruction(Insn, 23, 1) << 12; + unsigned pred = fieldFromInstruction(Insn, 28, 4); if (Rn == 0xF || Rn == Rt) S = MCDisassembler::SoftFail; @@ -3598,13 +3605,13 @@ static DecodeStatus DecodeLDRPreReg(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder) { DecodeStatus S = MCDisassembler::Success; - unsigned Rn = fieldFromInstruction32(Insn, 16, 4); - unsigned Rt = fieldFromInstruction32(Insn, 12, 4); - unsigned imm = fieldFromInstruction32(Insn, 0, 12); - imm |= fieldFromInstruction32(Insn, 16, 4) << 13; - imm |= fieldFromInstruction32(Insn, 23, 1) << 12; - unsigned pred = fieldFromInstruction32(Insn, 28, 4); - unsigned Rm = fieldFromInstruction32(Insn, 0, 4); + unsigned Rn = fieldFromInstruction(Insn, 16, 4); + unsigned Rt = fieldFromInstruction(Insn, 12, 4); + unsigned imm = fieldFromInstruction(Insn, 0, 12); + imm |= fieldFromInstruction(Insn, 16, 4) << 13; + imm |= fieldFromInstruction(Insn, 23, 1) << 12; + unsigned pred = fieldFromInstruction(Insn, 28, 4); + unsigned Rm = fieldFromInstruction(Insn, 0, 4); if (Rn == 0xF || Rn == Rt) S = MCDisassembler::SoftFail; if (Rm == 0xF) S = MCDisassembler::SoftFail; @@ -3626,12 +3633,12 @@ static DecodeStatus DecodeSTRPreImm(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder) { DecodeStatus S = MCDisassembler::Success; - unsigned Rn = fieldFromInstruction32(Insn, 16, 4); - unsigned Rt = fieldFromInstruction32(Insn, 12, 4); - unsigned imm = fieldFromInstruction32(Insn, 0, 12); - imm |= fieldFromInstruction32(Insn, 16, 4) << 13; - imm |= fieldFromInstruction32(Insn, 23, 1) << 12; - unsigned pred = fieldFromInstruction32(Insn, 28, 4); + unsigned Rn = fieldFromInstruction(Insn, 16, 4); + unsigned Rt = fieldFromInstruction(Insn, 12, 4); + unsigned imm = fieldFromInstruction(Insn, 0, 12); + imm |= fieldFromInstruction(Insn, 16, 4) << 13; + imm |= fieldFromInstruction(Insn, 23, 1) << 12; + unsigned pred = fieldFromInstruction(Insn, 28, 4); if (Rn == 0xF || Rn == Rt) S = MCDisassembler::SoftFail; @@ -3651,12 +3658,12 @@ static DecodeStatus DecodeSTRPreReg(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder) { DecodeStatus S = MCDisassembler::Success; - unsigned Rn = fieldFromInstruction32(Insn, 16, 4); - unsigned Rt = fieldFromInstruction32(Insn, 12, 4); - unsigned imm = fieldFromInstruction32(Insn, 0, 12); - imm |= fieldFromInstruction32(Insn, 16, 4) << 13; - imm |= fieldFromInstruction32(Insn, 23, 1) << 12; - unsigned pred = fieldFromInstruction32(Insn, 28, 4); + unsigned Rn = fieldFromInstruction(Insn, 16, 4); + unsigned Rt = fieldFromInstruction(Insn, 12, 4); + unsigned imm = fieldFromInstruction(Insn, 0, 12); + imm |= fieldFromInstruction(Insn, 16, 4) << 13; + imm |= fieldFromInstruction(Insn, 23, 1) << 12; + unsigned pred = fieldFromInstruction(Insn, 28, 4); if (Rn == 0xF || Rn == Rt) S = MCDisassembler::SoftFail; @@ -3676,11 +3683,11 @@ static DecodeStatus DecodeVLD1LN(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder) { DecodeStatus S = MCDisassembler::Success; - unsigned Rn = fieldFromInstruction32(Insn, 16, 4); - unsigned Rm = fieldFromInstruction32(Insn, 0, 4); - unsigned Rd = fieldFromInstruction32(Insn, 12, 4); - Rd |= fieldFromInstruction32(Insn, 22, 1) << 4; - unsigned size = fieldFromInstruction32(Insn, 10, 2); + unsigned Rn = fieldFromInstruction(Insn, 16, 4); + unsigned Rm = fieldFromInstruction(Insn, 0, 4); + unsigned Rd = fieldFromInstruction(Insn, 12, 4); + Rd |= fieldFromInstruction(Insn, 22, 1) << 4; + unsigned size = fieldFromInstruction(Insn, 10, 2); unsigned align = 0; unsigned index = 0; @@ -3688,22 +3695,22 @@ static DecodeStatus DecodeVLD1LN(MCInst &Inst, unsigned Insn, default: return MCDisassembler::Fail; case 0: - if (fieldFromInstruction32(Insn, 4, 1)) + if (fieldFromInstruction(Insn, 4, 1)) return MCDisassembler::Fail; // UNDEFINED - index = fieldFromInstruction32(Insn, 5, 3); + index = fieldFromInstruction(Insn, 5, 3); break; case 1: - if (fieldFromInstruction32(Insn, 5, 1)) + if (fieldFromInstruction(Insn, 5, 1)) return MCDisassembler::Fail; // UNDEFINED - index = fieldFromInstruction32(Insn, 6, 2); - if (fieldFromInstruction32(Insn, 4, 1)) + index = fieldFromInstruction(Insn, 6, 2); + if (fieldFromInstruction(Insn, 4, 1)) align = 2; break; case 2: - if (fieldFromInstruction32(Insn, 6, 1)) + if (fieldFromInstruction(Insn, 6, 1)) return MCDisassembler::Fail; // UNDEFINED - index = fieldFromInstruction32(Insn, 7, 1); - if (fieldFromInstruction32(Insn, 4, 2) != 0) + index = fieldFromInstruction(Insn, 7, 1); + if (fieldFromInstruction(Insn, 4, 2) != 0) align = 4; } @@ -3735,11 +3742,11 @@ static DecodeStatus DecodeVST1LN(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder) { DecodeStatus S = MCDisassembler::Success; - unsigned Rn = fieldFromInstruction32(Insn, 16, 4); - unsigned Rm = fieldFromInstruction32(Insn, 0, 4); - unsigned Rd = fieldFromInstruction32(Insn, 12, 4); - Rd |= fieldFromInstruction32(Insn, 22, 1) << 4; - unsigned size = fieldFromInstruction32(Insn, 10, 2); + unsigned Rn = fieldFromInstruction(Insn, 16, 4); + unsigned Rm = fieldFromInstruction(Insn, 0, 4); + unsigned Rd = fieldFromInstruction(Insn, 12, 4); + Rd |= fieldFromInstruction(Insn, 22, 1) << 4; + unsigned size = fieldFromInstruction(Insn, 10, 2); unsigned align = 0; unsigned index = 0; @@ -3747,22 +3754,22 @@ static DecodeStatus DecodeVST1LN(MCInst &Inst, unsigned Insn, default: return MCDisassembler::Fail; case 0: - if (fieldFromInstruction32(Insn, 4, 1)) + if (fieldFromInstruction(Insn, 4, 1)) return MCDisassembler::Fail; // UNDEFINED - index = fieldFromInstruction32(Insn, 5, 3); + index = fieldFromInstruction(Insn, 5, 3); break; case 1: - if (fieldFromInstruction32(Insn, 5, 1)) + if (fieldFromInstruction(Insn, 5, 1)) return MCDisassembler::Fail; // UNDEFINED - index = fieldFromInstruction32(Insn, 6, 2); - if (fieldFromInstruction32(Insn, 4, 1)) + index = fieldFromInstruction(Insn, 6, 2); + if (fieldFromInstruction(Insn, 4, 1)) align = 2; break; case 2: - if (fieldFromInstruction32(Insn, 6, 1)) + if (fieldFromInstruction(Insn, 6, 1)) return MCDisassembler::Fail; // UNDEFINED - index = fieldFromInstruction32(Insn, 7, 1); - if (fieldFromInstruction32(Insn, 4, 2) != 0) + index = fieldFromInstruction(Insn, 7, 1); + if (fieldFromInstruction(Insn, 4, 2) != 0) align = 4; } @@ -3793,11 +3800,11 @@ static DecodeStatus DecodeVLD2LN(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder) { DecodeStatus S = MCDisassembler::Success; - unsigned Rn = fieldFromInstruction32(Insn, 16, 4); - unsigned Rm = fieldFromInstruction32(Insn, 0, 4); - unsigned Rd = fieldFromInstruction32(Insn, 12, 4); - Rd |= fieldFromInstruction32(Insn, 22, 1) << 4; - unsigned size = fieldFromInstruction32(Insn, 10, 2); + unsigned Rn = fieldFromInstruction(Insn, 16, 4); + unsigned Rm = fieldFromInstruction(Insn, 0, 4); + unsigned Rd = fieldFromInstruction(Insn, 12, 4); + Rd |= fieldFromInstruction(Insn, 22, 1) << 4; + unsigned size = fieldFromInstruction(Insn, 10, 2); unsigned align = 0; unsigned index = 0; @@ -3806,24 +3813,24 @@ static DecodeStatus DecodeVLD2LN(MCInst &Inst, unsigned Insn, default: return MCDisassembler::Fail; case 0: - index = fieldFromInstruction32(Insn, 5, 3); - if (fieldFromInstruction32(Insn, 4, 1)) + index = fieldFromInstruction(Insn, 5, 3); + if (fieldFromInstruction(Insn, 4, 1)) align = 2; break; case 1: - index = fieldFromInstruction32(Insn, 6, 2); - if (fieldFromInstruction32(Insn, 4, 1)) + index = fieldFromInstruction(Insn, 6, 2); + if (fieldFromInstruction(Insn, 4, 1)) align = 4; - if (fieldFromInstruction32(Insn, 5, 1)) + if (fieldFromInstruction(Insn, 5, 1)) inc = 2; break; case 2: - if (fieldFromInstruction32(Insn, 5, 1)) + if (fieldFromInstruction(Insn, 5, 1)) return MCDisassembler::Fail; // UNDEFINED - index = fieldFromInstruction32(Insn, 7, 1); - if (fieldFromInstruction32(Insn, 4, 1) != 0) + index = fieldFromInstruction(Insn, 7, 1); + if (fieldFromInstruction(Insn, 4, 1) != 0) align = 8; - if (fieldFromInstruction32(Insn, 6, 1)) + if (fieldFromInstruction(Insn, 6, 1)) inc = 2; break; } @@ -3860,11 +3867,11 @@ static DecodeStatus DecodeVST2LN(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder) { DecodeStatus S = MCDisassembler::Success; - unsigned Rn = fieldFromInstruction32(Insn, 16, 4); - unsigned Rm = fieldFromInstruction32(Insn, 0, 4); - unsigned Rd = fieldFromInstruction32(Insn, 12, 4); - Rd |= fieldFromInstruction32(Insn, 22, 1) << 4; - unsigned size = fieldFromInstruction32(Insn, 10, 2); + unsigned Rn = fieldFromInstruction(Insn, 16, 4); + unsigned Rm = fieldFromInstruction(Insn, 0, 4); + unsigned Rd = fieldFromInstruction(Insn, 12, 4); + Rd |= fieldFromInstruction(Insn, 22, 1) << 4; + unsigned size = fieldFromInstruction(Insn, 10, 2); unsigned align = 0; unsigned index = 0; @@ -3873,24 +3880,24 @@ static DecodeStatus DecodeVST2LN(MCInst &Inst, unsigned Insn, default: return MCDisassembler::Fail; case 0: - index = fieldFromInstruction32(Insn, 5, 3); - if (fieldFromInstruction32(Insn, 4, 1)) + index = fieldFromInstruction(Insn, 5, 3); + if (fieldFromInstruction(Insn, 4, 1)) align = 2; break; case 1: - index = fieldFromInstruction32(Insn, 6, 2); - if (fieldFromInstruction32(Insn, 4, 1)) + index = fieldFromInstruction(Insn, 6, 2); + if (fieldFromInstruction(Insn, 4, 1)) align = 4; - if (fieldFromInstruction32(Insn, 5, 1)) + if (fieldFromInstruction(Insn, 5, 1)) inc = 2; break; case 2: - if (fieldFromInstruction32(Insn, 5, 1)) + if (fieldFromInstruction(Insn, 5, 1)) return MCDisassembler::Fail; // UNDEFINED - index = fieldFromInstruction32(Insn, 7, 1); - if (fieldFromInstruction32(Insn, 4, 1) != 0) + index = fieldFromInstruction(Insn, 7, 1); + if (fieldFromInstruction(Insn, 4, 1) != 0) align = 8; - if (fieldFromInstruction32(Insn, 6, 1)) + if (fieldFromInstruction(Insn, 6, 1)) inc = 2; break; } @@ -3924,11 +3931,11 @@ static DecodeStatus DecodeVLD3LN(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder) { DecodeStatus S = MCDisassembler::Success; - unsigned Rn = fieldFromInstruction32(Insn, 16, 4); - unsigned Rm = fieldFromInstruction32(Insn, 0, 4); - unsigned Rd = fieldFromInstruction32(Insn, 12, 4); - Rd |= fieldFromInstruction32(Insn, 22, 1) << 4; - unsigned size = fieldFromInstruction32(Insn, 10, 2); + unsigned Rn = fieldFromInstruction(Insn, 16, 4); + unsigned Rm = fieldFromInstruction(Insn, 0, 4); + unsigned Rd = fieldFromInstruction(Insn, 12, 4); + Rd |= fieldFromInstruction(Insn, 22, 1) << 4; + unsigned size = fieldFromInstruction(Insn, 10, 2); unsigned align = 0; unsigned index = 0; @@ -3937,22 +3944,22 @@ static DecodeStatus DecodeVLD3LN(MCInst &Inst, unsigned Insn, default: return MCDisassembler::Fail; case 0: - if (fieldFromInstruction32(Insn, 4, 1)) + if (fieldFromInstruction(Insn, 4, 1)) return MCDisassembler::Fail; // UNDEFINED - index = fieldFromInstruction32(Insn, 5, 3); + index = fieldFromInstruction(Insn, 5, 3); break; case 1: - if (fieldFromInstruction32(Insn, 4, 1)) + if (fieldFromInstruction(Insn, 4, 1)) return MCDisassembler::Fail; // UNDEFINED - index = fieldFromInstruction32(Insn, 6, 2); - if (fieldFromInstruction32(Insn, 5, 1)) + index = fieldFromInstruction(Insn, 6, 2); + if (fieldFromInstruction(Insn, 5, 1)) inc = 2; break; case 2: - if (fieldFromInstruction32(Insn, 4, 2)) + if (fieldFromInstruction(Insn, 4, 2)) return MCDisassembler::Fail; // UNDEFINED - index = fieldFromInstruction32(Insn, 7, 1); - if (fieldFromInstruction32(Insn, 6, 1)) + index = fieldFromInstruction(Insn, 7, 1); + if (fieldFromInstruction(Insn, 6, 1)) inc = 2; break; } @@ -3994,11 +4001,11 @@ static DecodeStatus DecodeVST3LN(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder) { DecodeStatus S = MCDisassembler::Success; - unsigned Rn = fieldFromInstruction32(Insn, 16, 4); - unsigned Rm = fieldFromInstruction32(Insn, 0, 4); - unsigned Rd = fieldFromInstruction32(Insn, 12, 4); - Rd |= fieldFromInstruction32(Insn, 22, 1) << 4; - unsigned size = fieldFromInstruction32(Insn, 10, 2); + unsigned Rn = fieldFromInstruction(Insn, 16, 4); + unsigned Rm = fieldFromInstruction(Insn, 0, 4); + unsigned Rd = fieldFromInstruction(Insn, 12, 4); + Rd |= fieldFromInstruction(Insn, 22, 1) << 4; + unsigned size = fieldFromInstruction(Insn, 10, 2); unsigned align = 0; unsigned index = 0; @@ -4007,22 +4014,22 @@ static DecodeStatus DecodeVST3LN(MCInst &Inst, unsigned Insn, default: return MCDisassembler::Fail; case 0: - if (fieldFromInstruction32(Insn, 4, 1)) + if (fieldFromInstruction(Insn, 4, 1)) return MCDisassembler::Fail; // UNDEFINED - index = fieldFromInstruction32(Insn, 5, 3); + index = fieldFromInstruction(Insn, 5, 3); break; case 1: - if (fieldFromInstruction32(Insn, 4, 1)) + if (fieldFromInstruction(Insn, 4, 1)) return MCDisassembler::Fail; // UNDEFINED - index = fieldFromInstruction32(Insn, 6, 2); - if (fieldFromInstruction32(Insn, 5, 1)) + index = fieldFromInstruction(Insn, 6, 2); + if (fieldFromInstruction(Insn, 5, 1)) inc = 2; break; case 2: - if (fieldFromInstruction32(Insn, 4, 2)) + if (fieldFromInstruction(Insn, 4, 2)) return MCDisassembler::Fail; // UNDEFINED - index = fieldFromInstruction32(Insn, 7, 1); - if (fieldFromInstruction32(Insn, 6, 1)) + index = fieldFromInstruction(Insn, 7, 1); + if (fieldFromInstruction(Insn, 6, 1)) inc = 2; break; } @@ -4058,11 +4065,11 @@ static DecodeStatus DecodeVLD4LN(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder) { DecodeStatus S = MCDisassembler::Success; - unsigned Rn = fieldFromInstruction32(Insn, 16, 4); - unsigned Rm = fieldFromInstruction32(Insn, 0, 4); - unsigned Rd = fieldFromInstruction32(Insn, 12, 4); - Rd |= fieldFromInstruction32(Insn, 22, 1) << 4; - unsigned size = fieldFromInstruction32(Insn, 10, 2); + unsigned Rn = fieldFromInstruction(Insn, 16, 4); + unsigned Rm = fieldFromInstruction(Insn, 0, 4); + unsigned Rd = fieldFromInstruction(Insn, 12, 4); + Rd |= fieldFromInstruction(Insn, 22, 1) << 4; + unsigned size = fieldFromInstruction(Insn, 10, 2); unsigned align = 0; unsigned index = 0; @@ -4071,22 +4078,22 @@ static DecodeStatus DecodeVLD4LN(MCInst &Inst, unsigned Insn, default: return MCDisassembler::Fail; case 0: - if (fieldFromInstruction32(Insn, 4, 1)) + if (fieldFromInstruction(Insn, 4, 1)) align = 4; - index = fieldFromInstruction32(Insn, 5, 3); + index = fieldFromInstruction(Insn, 5, 3); break; case 1: - if (fieldFromInstruction32(Insn, 4, 1)) + if (fieldFromInstruction(Insn, 4, 1)) align = 8; - index = fieldFromInstruction32(Insn, 6, 2); - if (fieldFromInstruction32(Insn, 5, 1)) + index = fieldFromInstruction(Insn, 6, 2); + if (fieldFromInstruction(Insn, 5, 1)) inc = 2; break; case 2: - if (fieldFromInstruction32(Insn, 4, 2)) - align = 4 << fieldFromInstruction32(Insn, 4, 2); - index = fieldFromInstruction32(Insn, 7, 1); - if (fieldFromInstruction32(Insn, 6, 1)) + if (fieldFromInstruction(Insn, 4, 2)) + align = 4 << fieldFromInstruction(Insn, 4, 2); + index = fieldFromInstruction(Insn, 7, 1); + if (fieldFromInstruction(Insn, 6, 1)) inc = 2; break; } @@ -4132,11 +4139,11 @@ static DecodeStatus DecodeVST4LN(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder) { DecodeStatus S = MCDisassembler::Success; - unsigned Rn = fieldFromInstruction32(Insn, 16, 4); - unsigned Rm = fieldFromInstruction32(Insn, 0, 4); - unsigned Rd = fieldFromInstruction32(Insn, 12, 4); - Rd |= fieldFromInstruction32(Insn, 22, 1) << 4; - unsigned size = fieldFromInstruction32(Insn, 10, 2); + unsigned Rn = fieldFromInstruction(Insn, 16, 4); + unsigned Rm = fieldFromInstruction(Insn, 0, 4); + unsigned Rd = fieldFromInstruction(Insn, 12, 4); + Rd |= fieldFromInstruction(Insn, 22, 1) << 4; + unsigned size = fieldFromInstruction(Insn, 10, 2); unsigned align = 0; unsigned index = 0; @@ -4145,22 +4152,22 @@ static DecodeStatus DecodeVST4LN(MCInst &Inst, unsigned Insn, default: return MCDisassembler::Fail; case 0: - if (fieldFromInstruction32(Insn, 4, 1)) + if (fieldFromInstruction(Insn, 4, 1)) align = 4; - index = fieldFromInstruction32(Insn, 5, 3); + index = fieldFromInstruction(Insn, 5, 3); break; case 1: - if (fieldFromInstruction32(Insn, 4, 1)) + if (fieldFromInstruction(Insn, 4, 1)) align = 8; - index = fieldFromInstruction32(Insn, 6, 2); - if (fieldFromInstruction32(Insn, 5, 1)) + index = fieldFromInstruction(Insn, 6, 2); + if (fieldFromInstruction(Insn, 5, 1)) inc = 2; break; case 2: - if (fieldFromInstruction32(Insn, 4, 2)) - align = 4 << fieldFromInstruction32(Insn, 4, 2); - index = fieldFromInstruction32(Insn, 7, 1); - if (fieldFromInstruction32(Insn, 6, 1)) + if (fieldFromInstruction(Insn, 4, 2)) + align = 4 << fieldFromInstruction(Insn, 4, 2); + index = fieldFromInstruction(Insn, 7, 1); + if (fieldFromInstruction(Insn, 6, 1)) inc = 2; break; } @@ -4196,11 +4203,11 @@ static DecodeStatus DecodeVST4LN(MCInst &Inst, unsigned Insn, static DecodeStatus DecodeVMOVSRR(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder) { DecodeStatus S = MCDisassembler::Success; - unsigned Rt = fieldFromInstruction32(Insn, 12, 4); - unsigned Rt2 = fieldFromInstruction32(Insn, 16, 4); - unsigned Rm = fieldFromInstruction32(Insn, 5, 1); - unsigned pred = fieldFromInstruction32(Insn, 28, 4); - Rm |= fieldFromInstruction32(Insn, 0, 4) << 1; + unsigned Rt = fieldFromInstruction(Insn, 12, 4); + unsigned Rt2 = fieldFromInstruction(Insn, 16, 4); + unsigned Rm = fieldFromInstruction(Insn, 5, 1); + unsigned pred = fieldFromInstruction(Insn, 28, 4); + Rm |= fieldFromInstruction(Insn, 0, 4) << 1; if (Rt == 0xF || Rt2 == 0xF || Rm == 0x1F) S = MCDisassembler::SoftFail; @@ -4222,11 +4229,11 @@ static DecodeStatus DecodeVMOVSRR(MCInst &Inst, unsigned Insn, static DecodeStatus DecodeVMOVRRS(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder) { DecodeStatus S = MCDisassembler::Success; - unsigned Rt = fieldFromInstruction32(Insn, 12, 4); - unsigned Rt2 = fieldFromInstruction32(Insn, 16, 4); - unsigned Rm = fieldFromInstruction32(Insn, 5, 1); - unsigned pred = fieldFromInstruction32(Insn, 28, 4); - Rm |= fieldFromInstruction32(Insn, 0, 4) << 1; + unsigned Rt = fieldFromInstruction(Insn, 12, 4); + unsigned Rt2 = fieldFromInstruction(Insn, 16, 4); + unsigned Rm = fieldFromInstruction(Insn, 5, 1); + unsigned pred = fieldFromInstruction(Insn, 28, 4); + Rm |= fieldFromInstruction(Insn, 0, 4) << 1; if (Rt == 0xF || Rt2 == 0xF || Rm == 0x1F) S = MCDisassembler::SoftFail; @@ -4248,8 +4255,8 @@ static DecodeStatus DecodeVMOVRRS(MCInst &Inst, unsigned Insn, static DecodeStatus DecodeIT(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder) { DecodeStatus S = MCDisassembler::Success; - unsigned pred = fieldFromInstruction16(Insn, 4, 4); - unsigned mask = fieldFromInstruction16(Insn, 0, 4); + unsigned pred = fieldFromInstruction(Insn, 4, 4); + unsigned mask = fieldFromInstruction(Insn, 0, 4); if (pred == 0xF) { pred = 0xE; @@ -4271,13 +4278,13 @@ DecodeT2LDRDPreInstruction(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder) { DecodeStatus S = MCDisassembler::Success; - unsigned Rt = fieldFromInstruction32(Insn, 12, 4); - unsigned Rt2 = fieldFromInstruction32(Insn, 8, 4); - unsigned Rn = fieldFromInstruction32(Insn, 16, 4); - unsigned addr = fieldFromInstruction32(Insn, 0, 8); - unsigned W = fieldFromInstruction32(Insn, 21, 1); - unsigned U = fieldFromInstruction32(Insn, 23, 1); - unsigned P = fieldFromInstruction32(Insn, 24, 1); + unsigned Rt = fieldFromInstruction(Insn, 12, 4); + unsigned Rt2 = fieldFromInstruction(Insn, 8, 4); + unsigned Rn = fieldFromInstruction(Insn, 16, 4); + unsigned addr = fieldFromInstruction(Insn, 0, 8); + unsigned W = fieldFromInstruction(Insn, 21, 1); + unsigned U = fieldFromInstruction(Insn, 23, 1); + unsigned P = fieldFromInstruction(Insn, 24, 1); bool writeback = (W == 1) | (P == 0); addr |= (U << 8) | (Rn << 9); @@ -4308,13 +4315,13 @@ DecodeT2STRDPreInstruction(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder) { DecodeStatus S = MCDisassembler::Success; - unsigned Rt = fieldFromInstruction32(Insn, 12, 4); - unsigned Rt2 = fieldFromInstruction32(Insn, 8, 4); - unsigned Rn = fieldFromInstruction32(Insn, 16, 4); - unsigned addr = fieldFromInstruction32(Insn, 0, 8); - unsigned W = fieldFromInstruction32(Insn, 21, 1); - unsigned U = fieldFromInstruction32(Insn, 23, 1); - unsigned P = fieldFromInstruction32(Insn, 24, 1); + unsigned Rt = fieldFromInstruction(Insn, 12, 4); + unsigned Rt2 = fieldFromInstruction(Insn, 8, 4); + unsigned Rn = fieldFromInstruction(Insn, 16, 4); + unsigned addr = fieldFromInstruction(Insn, 0, 8); + unsigned W = fieldFromInstruction(Insn, 21, 1); + unsigned U = fieldFromInstruction(Insn, 23, 1); + unsigned P = fieldFromInstruction(Insn, 24, 1); bool writeback = (W == 1) | (P == 0); addr |= (U << 8) | (Rn << 9); @@ -4340,13 +4347,13 @@ DecodeT2STRDPreInstruction(MCInst &Inst, unsigned Insn, static DecodeStatus DecodeT2Adr(MCInst &Inst, uint32_t Insn, uint64_t Address, const void *Decoder) { - unsigned sign1 = fieldFromInstruction32(Insn, 21, 1); - unsigned sign2 = fieldFromInstruction32(Insn, 23, 1); + unsigned sign1 = fieldFromInstruction(Insn, 21, 1); + unsigned sign2 = fieldFromInstruction(Insn, 23, 1); if (sign1 != sign2) return MCDisassembler::Fail; - unsigned Val = fieldFromInstruction32(Insn, 0, 8); - Val |= fieldFromInstruction32(Insn, 12, 3) << 8; - Val |= fieldFromInstruction32(Insn, 26, 1) << 11; + unsigned Val = fieldFromInstruction(Insn, 0, 8); + Val |= fieldFromInstruction(Insn, 12, 3) << 8; + Val |= fieldFromInstruction(Insn, 26, 1) << 11; Val |= sign1 << 12; Inst.addOperand(MCOperand::CreateImm(SignExtend32<13>(Val))); @@ -4366,10 +4373,10 @@ static DecodeStatus DecodeT2ShifterImmOperand(MCInst &Inst, uint32_t Val, static DecodeStatus DecodeSwap(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder) { - unsigned Rt = fieldFromInstruction32(Insn, 12, 4); - unsigned Rt2 = fieldFromInstruction32(Insn, 0, 4); - unsigned Rn = fieldFromInstruction32(Insn, 16, 4); - unsigned pred = fieldFromInstruction32(Insn, 28, 4); + unsigned Rt = fieldFromInstruction(Insn, 12, 4); + unsigned Rt2 = fieldFromInstruction(Insn, 0, 4); + unsigned Rn = fieldFromInstruction(Insn, 16, 4); + unsigned pred = fieldFromInstruction(Insn, 28, 4); if (pred == 0xF) return DecodeCPSInstruction(Inst, Insn, Address, Decoder); @@ -4393,12 +4400,12 @@ static DecodeStatus DecodeSwap(MCInst &Inst, unsigned Insn, static DecodeStatus DecodeVCVTD(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder) { - unsigned Vd = (fieldFromInstruction32(Insn, 12, 4) << 0); - Vd |= (fieldFromInstruction32(Insn, 22, 1) << 4); - unsigned Vm = (fieldFromInstruction32(Insn, 0, 4) << 0); - Vm |= (fieldFromInstruction32(Insn, 5, 1) << 4); - unsigned imm = fieldFromInstruction32(Insn, 16, 6); - unsigned cmode = fieldFromInstruction32(Insn, 8, 4); + unsigned Vd = (fieldFromInstruction(Insn, 12, 4) << 0); + Vd |= (fieldFromInstruction(Insn, 22, 1) << 4); + unsigned Vm = (fieldFromInstruction(Insn, 0, 4) << 0); + Vm |= (fieldFromInstruction(Insn, 5, 1) << 4); + unsigned imm = fieldFromInstruction(Insn, 16, 6); + unsigned cmode = fieldFromInstruction(Insn, 8, 4); DecodeStatus S = MCDisassembler::Success; @@ -4421,12 +4428,12 @@ static DecodeStatus DecodeVCVTD(MCInst &Inst, unsigned Insn, static DecodeStatus DecodeVCVTQ(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder) { - unsigned Vd = (fieldFromInstruction32(Insn, 12, 4) << 0); - Vd |= (fieldFromInstruction32(Insn, 22, 1) << 4); - unsigned Vm = (fieldFromInstruction32(Insn, 0, 4) << 0); - Vm |= (fieldFromInstruction32(Insn, 5, 1) << 4); - unsigned imm = fieldFromInstruction32(Insn, 16, 6); - unsigned cmode = fieldFromInstruction32(Insn, 8, 4); + unsigned Vd = (fieldFromInstruction(Insn, 12, 4) << 0); + Vd |= (fieldFromInstruction(Insn, 22, 1) << 4); + unsigned Vm = (fieldFromInstruction(Insn, 0, 4) << 0); + Vm |= (fieldFromInstruction(Insn, 5, 1) << 4); + unsigned imm = fieldFromInstruction(Insn, 16, 6); + unsigned cmode = fieldFromInstruction(Insn, 8, 4); DecodeStatus S = MCDisassembler::Success; @@ -4451,13 +4458,13 @@ static DecodeStatus DecodeLDR(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder) { DecodeStatus S = MCDisassembler::Success; - unsigned Rn = fieldFromInstruction32(Val, 16, 4); - unsigned Rt = fieldFromInstruction32(Val, 12, 4); - unsigned Rm = fieldFromInstruction32(Val, 0, 4); - Rm |= (fieldFromInstruction32(Val, 23, 1) << 4); - unsigned Cond = fieldFromInstruction32(Val, 28, 4); + unsigned Rn = fieldFromInstruction(Val, 16, 4); + unsigned Rt = fieldFromInstruction(Val, 12, 4); + unsigned Rm = fieldFromInstruction(Val, 0, 4); + Rm |= (fieldFromInstruction(Val, 23, 1) << 4); + unsigned Cond = fieldFromInstruction(Val, 28, 4); - if (fieldFromInstruction32(Val, 8, 4) != 0 || Rn == Rt) + if (fieldFromInstruction(Val, 8, 4) != 0 || Rn == Rt) S = MCDisassembler::SoftFail; if (!Check(S, DecodeGPRnopcRegisterClass(Inst, Rt, Address, Decoder))) @@ -4479,11 +4486,11 @@ static DecodeStatus DecodeMRRC2(llvm::MCInst &Inst, unsigned Val, DecodeStatus S = MCDisassembler::Success; - unsigned CRm = fieldFromInstruction32(Val, 0, 4); - unsigned opc1 = fieldFromInstruction32(Val, 4, 4); - unsigned cop = fieldFromInstruction32(Val, 8, 4); - unsigned Rt = fieldFromInstruction32(Val, 12, 4); - unsigned Rt2 = fieldFromInstruction32(Val, 16, 4); + unsigned CRm = fieldFromInstruction(Val, 0, 4); + unsigned opc1 = fieldFromInstruction(Val, 4, 4); + unsigned cop = fieldFromInstruction(Val, 8, 4); + unsigned Rt = fieldFromInstruction(Val, 12, 4); + unsigned Rt2 = fieldFromInstruction(Val, 16, 4); if ((cop & ~0x1) == 0xa) return MCDisassembler::Fail; diff --git a/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp b/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp index 2f6b1b0..8b9109e 100644 --- a/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp +++ b/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp @@ -792,6 +792,25 @@ void ARMInstPrinter::printPCLabel(const MCInst *MI, unsigned OpNum, llvm_unreachable("Unhandled PC-relative pseudo-instruction!"); } +void ARMInstPrinter::printAdrLabelOperand(const MCInst *MI, unsigned OpNum, + raw_ostream &O) { + const MCOperand &MO = MI->getOperand(OpNum); + + if (MO.isExpr()) { + O << *MO.getExpr(); + return; + } + + int32_t OffImm = (int32_t)MO.getImm(); + + if (OffImm == INT32_MIN) + O << "#-0"; + else if (OffImm < 0) + O << "#-" << -OffImm; + else + O << "#" << OffImm; +} + void ARMInstPrinter::printThumbS4ImmOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O) { O << "#" << MI->getOperand(OpNum).getImm() * 4; @@ -953,12 +972,17 @@ void ARMInstPrinter::printT2AddrModeImm8s4Operand(const MCInst *MI, O << "[" << getRegisterName(MO1.getReg()); - int32_t OffImm = (int32_t)MO2.getImm() / 4; + int32_t OffImm = (int32_t)MO2.getImm(); + + assert(((OffImm & 0x3) == 0) && "Not a valid immediate!"); + // Don't print +0. - if (OffImm < 0) - O << ", #-" << -OffImm * 4; + if (OffImm == INT32_MIN) + O << ", #-0"; + else if (OffImm < 0) + O << ", #-" << -OffImm; else if (OffImm > 0) - O << ", #" << OffImm * 4; + O << ", #" << OffImm; O << "]"; } @@ -990,15 +1014,17 @@ void ARMInstPrinter::printT2AddrModeImm8s4OffsetOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O) { const MCOperand &MO1 = MI->getOperand(OpNum); - int32_t OffImm = (int32_t)MO1.getImm() / 4; + int32_t OffImm = (int32_t)MO1.getImm(); + + assert(((OffImm & 0x3) == 0) && "Not a valid immediate!"); + // Don't print +0. - if (OffImm != 0) { - O << ", "; - if (OffImm < 0) - O << "#-" << -OffImm * 4; - else if (OffImm > 0) - O << "#" << OffImm * 4; - } + if (OffImm == INT32_MIN) + O << ", #-0"; + else if (OffImm < 0) + O << ", #-" << -OffImm; + else if (OffImm > 0) + O << ", #" << OffImm; } void ARMInstPrinter::printT2AddrModeSoRegOperand(const MCInst *MI, diff --git a/lib/Target/ARM/InstPrinter/ARMInstPrinter.h b/lib/Target/ARM/InstPrinter/ARMInstPrinter.h index 8acb7ee..73d7bfd 100644 --- a/lib/Target/ARM/InstPrinter/ARMInstPrinter.h +++ b/lib/Target/ARM/InstPrinter/ARMInstPrinter.h @@ -73,6 +73,7 @@ public: void printPKHLSLShiftImm(const MCInst *MI, unsigned OpNum, raw_ostream &O); void printPKHASRShiftImm(const MCInst *MI, unsigned OpNum, raw_ostream &O); + void printAdrLabelOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O); void printThumbS4ImmOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O); void printThumbSRImm(const MCInst *MI, unsigned OpNum, raw_ostream &O); void printThumbITMask(const MCInst *MI, unsigned OpNum, raw_ostream &O); diff --git a/lib/Target/ARM/MCTargetDesc/ARMBaseInfo.h b/lib/Target/ARM/MCTargetDesc/ARMBaseInfo.h index ae11be8..de48a0e 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMBaseInfo.h +++ b/lib/Target/ARM/MCTargetDesc/ARMBaseInfo.h @@ -120,14 +120,22 @@ namespace ARM_MB { // The Memory Barrier Option constants map directly to the 4-bit encoding of // the option field for memory barrier operations. enum MemBOpt { - SY = 15, - ST = 14, - ISH = 11, - ISHST = 10, - NSH = 7, - NSHST = 6, + RESERVED_0 = 0, + RESERVED_1 = 1, + OSHST = 2, OSH = 3, - OSHST = 2 + RESERVED_4 = 4, + RESERVED_5 = 5, + NSHST = 6, + NSH = 7, + RESERVED_8 = 8, + RESERVED_9 = 9, + ISHST = 10, + ISH = 11, + RESERVED_12 = 12, + RESERVED_13 = 13, + ST = 14, + SY = 15 }; inline static const char *MemBOptToString(unsigned val) { @@ -135,92 +143,24 @@ namespace ARM_MB { default: llvm_unreachable("Unknown memory operation"); case SY: return "sy"; case ST: return "st"; + case RESERVED_13: return "#0xd"; + case RESERVED_12: return "#0xc"; case ISH: return "ish"; case ISHST: return "ishst"; + case RESERVED_9: return "#0x9"; + case RESERVED_8: return "#0x8"; case NSH: return "nsh"; case NSHST: return "nshst"; + case RESERVED_5: return "#0x5"; + case RESERVED_4: return "#0x4"; case OSH: return "osh"; case OSHST: return "oshst"; + case RESERVED_1: return "#0x1"; + case RESERVED_0: return "#0x0"; } } } // namespace ARM_MB -/// getARMRegisterNumbering - Given the enum value for some register, e.g. -/// ARM::LR, return the number that it corresponds to (e.g. 14). -inline static unsigned getARMRegisterNumbering(unsigned Reg) { - using namespace ARM; - switch (Reg) { - default: - llvm_unreachable("Unknown ARM register!"); - case R0: case S0: case D0: case Q0: return 0; - case R1: case S1: case D1: case Q1: return 1; - case R2: case S2: case D2: case Q2: return 2; - case R3: case S3: case D3: case Q3: return 3; - case R4: case S4: case D4: case Q4: return 4; - case R5: case S5: case D5: case Q5: return 5; - case R6: case S6: case D6: case Q6: return 6; - case R7: case S7: case D7: case Q7: return 7; - case R8: case S8: case D8: case Q8: return 8; - case R9: case S9: case D9: case Q9: return 9; - case R10: case S10: case D10: case Q10: return 10; - case R11: case S11: case D11: case Q11: return 11; - case R12: case S12: case D12: case Q12: return 12; - case SP: case S13: case D13: case Q13: return 13; - case LR: case S14: case D14: case Q14: return 14; - case PC: case S15: case D15: case Q15: return 15; - - case S16: case D16: return 16; - case S17: case D17: return 17; - case S18: case D18: return 18; - case S19: case D19: return 19; - case S20: case D20: return 20; - case S21: case D21: return 21; - case S22: case D22: return 22; - case S23: case D23: return 23; - case S24: case D24: return 24; - case S25: case D25: return 25; - case S26: case D26: return 26; - case S27: case D27: return 27; - case S28: case D28: return 28; - case S29: case D29: return 29; - case S30: case D30: return 30; - case S31: case D31: return 31; - - // Composite registers use the regnum of the first register in the list. - /* Q0 */ case D0_D2: return 0; - case D1_D2: case D1_D3: return 1; - /* Q1 */ case D2_D4: return 2; - case D3_D4: case D3_D5: return 3; - /* Q2 */ case D4_D6: return 4; - case D5_D6: case D5_D7: return 5; - /* Q3 */ case D6_D8: return 6; - case D7_D8: case D7_D9: return 7; - /* Q4 */ case D8_D10: return 8; - case D9_D10: case D9_D11: return 9; - /* Q5 */ case D10_D12: return 10; - case D11_D12: case D11_D13: return 11; - /* Q6 */ case D12_D14: return 12; - case D13_D14: case D13_D15: return 13; - /* Q7 */ case D14_D16: return 14; - case D15_D16: case D15_D17: return 15; - /* Q8 */ case D16_D18: return 16; - case D17_D18: case D17_D19: return 17; - /* Q9 */ case D18_D20: return 18; - case D19_D20: case D19_D21: return 19; - /* Q10 */ case D20_D22: return 20; - case D21_D22: case D21_D23: return 21; - /* Q11 */ case D22_D24: return 22; - case D23_D24: case D23_D25: return 23; - /* Q12 */ case D24_D26: return 24; - case D25_D26: case D25_D27: return 25; - /* Q13 */ case D26_D28: return 26; - case D27_D28: case D27_D29: return 27; - /* Q14 */ case D28_D30: return 28; - case D29_D30: case D29_D31: return 29; - /* Q15 */ - } -} - /// isARMLowRegister - Returns true if the register is a low register (r0-r7). /// static inline bool isARMLowRegister(unsigned Reg) { diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp b/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp index 1964bcd..94f1082 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp +++ b/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp @@ -18,6 +18,7 @@ #include "MCTargetDesc/ARMMCExpr.h" #include "MCTargetDesc/ARMMCTargetDesc.h" #include "llvm/MC/MCCodeEmitter.h" +#include "llvm/MC/MCContext.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCInst.h" #include "llvm/MC/MCInstrInfo.h" @@ -38,11 +39,12 @@ class ARMMCCodeEmitter : public MCCodeEmitter { void operator=(const ARMMCCodeEmitter &); // DO NOT IMPLEMENT const MCInstrInfo &MCII; const MCSubtargetInfo &STI; + const MCContext &CTX; public: ARMMCCodeEmitter(const MCInstrInfo &mcii, const MCSubtargetInfo &sti, MCContext &ctx) - : MCII(mcii), STI(sti) { + : MCII(mcii), STI(sti), CTX(ctx) { } ~ARMMCCodeEmitter() {} @@ -405,7 +407,7 @@ getMachineOpValue(const MCInst &MI, const MCOperand &MO, SmallVectorImpl<MCFixup> &Fixups) const { if (MO.isReg()) { unsigned Reg = MO.getReg(); - unsigned RegNo = getARMRegisterNumbering(Reg); + unsigned RegNo = CTX.getRegisterInfo().getEncodingValue(Reg); // Q registers are encoded as 2x their register number. switch (Reg) { @@ -434,7 +436,7 @@ EncodeAddrModeOpValues(const MCInst &MI, unsigned OpIdx, unsigned &Reg, const MCOperand &MO = MI.getOperand(OpIdx); const MCOperand &MO1 = MI.getOperand(OpIdx + 1); - Reg = getARMRegisterNumbering(MO.getReg()); + Reg = CTX.getRegisterInfo().getEncodingValue(MO.getReg()); int32_t SImm = MO1.getImm(); bool isAdd = true; @@ -641,8 +643,8 @@ getUnconditionalBranchTargetOpValue(const MCInst &MI, unsigned OpIdx, return Val; } -/// getAdrLabelOpValue - Return encoding info for 12-bit immediate ADR label -/// target. +/// getAdrLabelOpValue - Return encoding info for 12-bit shifted-immediate +/// ADR label target. uint32_t ARMMCCodeEmitter:: getAdrLabelOpValue(const MCInst &MI, unsigned OpIdx, SmallVectorImpl<MCFixup> &Fixups) const { @@ -652,15 +654,23 @@ getAdrLabelOpValue(const MCInst &MI, unsigned OpIdx, Fixups); int32_t offset = MO.getImm(); uint32_t Val = 0x2000; - if (offset < 0) { + + if (offset == INT32_MIN) { + Val = 0x1000; + offset = 0; + } else if (offset < 0) { Val = 0x1000; offset *= -1; } - Val |= offset; + + int SoImmVal = ARM_AM::getSOImmVal(offset); + assert(SoImmVal != -1 && "Not a valid so_imm value!"); + + Val |= SoImmVal; return Val; } -/// getAdrLabelOpValue - Return encoding info for 12-bit immediate ADR label +/// getT2AdrLabelOpValue - Return encoding info for 12-bit immediate ADR label /// target. uint32_t ARMMCCodeEmitter:: getT2AdrLabelOpValue(const MCInst &MI, unsigned OpIdx, @@ -670,14 +680,16 @@ getT2AdrLabelOpValue(const MCInst &MI, unsigned OpIdx, return ::getBranchTargetOpValue(MI, OpIdx, ARM::fixup_t2_adr_pcrel_12, Fixups); int32_t Val = MO.getImm(); - if (Val < 0) { + if (Val == INT32_MIN) + Val = 0x1000; + else if (Val < 0) { Val *= -1; Val |= 0x1000; } return Val; } -/// getAdrLabelOpValue - Return encoding info for 8-bit immediate ADR label +/// getThumbAdrLabelOpValue - Return encoding info for 8-bit immediate ADR label /// target. uint32_t ARMMCCodeEmitter:: getThumbAdrLabelOpValue(const MCInst &MI, unsigned OpIdx, @@ -699,8 +711,8 @@ getThumbAddrModeRegRegOpValue(const MCInst &MI, unsigned OpIdx, // {2-0} = Rn const MCOperand &MO1 = MI.getOperand(OpIdx); const MCOperand &MO2 = MI.getOperand(OpIdx + 1); - unsigned Rn = getARMRegisterNumbering(MO1.getReg()); - unsigned Rm = getARMRegisterNumbering(MO2.getReg()); + unsigned Rn = CTX.getRegisterInfo().getEncodingValue(MO1.getReg()); + unsigned Rm = CTX.getRegisterInfo().getEncodingValue(MO2.getReg()); return (Rm << 3) | Rn; } @@ -716,7 +728,7 @@ getAddrModeImm12OpValue(const MCInst &MI, unsigned OpIdx, // If The first operand isn't a register, we have a label reference. const MCOperand &MO = MI.getOperand(OpIdx); if (!MO.isReg()) { - Reg = getARMRegisterNumbering(ARM::PC); // Rn is PC. + Reg = CTX.getRegisterInfo().getEncodingValue(ARM::PC); // Rn is PC. Imm12 = 0; isAdd = false ; // 'U' bit is set as part of the fixup. @@ -796,7 +808,7 @@ getT2AddrModeImm8s4OpValue(const MCInst &MI, unsigned OpIdx, // If The first operand isn't a register, we have a label reference. const MCOperand &MO = MI.getOperand(OpIdx); if (!MO.isReg()) { - Reg = getARMRegisterNumbering(ARM::PC); // Rn is PC. + Reg = CTX.getRegisterInfo().getEncodingValue(ARM::PC); // Rn is PC. Imm8 = 0; isAdd = false ; // 'U' bit is set as part of the fixup. @@ -832,7 +844,7 @@ getT2AddrModeImm0_1020s4OpValue(const MCInst &MI, unsigned OpIdx, // {7-0} = imm8 const MCOperand &MO = MI.getOperand(OpIdx); const MCOperand &MO1 = MI.getOperand(OpIdx + 1); - unsigned Reg = getARMRegisterNumbering(MO.getReg()); + unsigned Reg = CTX.getRegisterInfo().getEncodingValue(MO.getReg()); unsigned Imm8 = MO1.getImm(); return (Reg << 8) | Imm8; } @@ -915,8 +927,8 @@ getLdStSORegOpValue(const MCInst &MI, unsigned OpIdx, const MCOperand &MO = MI.getOperand(OpIdx); const MCOperand &MO1 = MI.getOperand(OpIdx+1); const MCOperand &MO2 = MI.getOperand(OpIdx+2); - unsigned Rn = getARMRegisterNumbering(MO.getReg()); - unsigned Rm = getARMRegisterNumbering(MO1.getReg()); + unsigned Rn = CTX.getRegisterInfo().getEncodingValue(MO.getReg()); + unsigned Rm = CTX.getRegisterInfo().getEncodingValue(MO1.getReg()); unsigned ShImm = ARM_AM::getAM2Offset(MO2.getImm()); bool isAdd = ARM_AM::getAM2Op(MO2.getImm()) == ARM_AM::add; ARM_AM::ShiftOpc ShOp = ARM_AM::getAM2ShiftOpc(MO2.getImm()); @@ -946,7 +958,7 @@ getAddrMode2OpValue(const MCInst &MI, unsigned OpIdx, // {12} isAdd // {11-0} imm12/Rm const MCOperand &MO = MI.getOperand(OpIdx); - unsigned Rn = getARMRegisterNumbering(MO.getReg()); + unsigned Rn = CTX.getRegisterInfo().getEncodingValue(MO.getReg()); uint32_t Binary = getAddrMode2OffsetOpValue(MI, OpIdx + 1, Fixups); Binary |= Rn << 14; return Binary; @@ -969,7 +981,7 @@ getAddrMode2OffsetOpValue(const MCInst &MI, unsigned OpIdx, ARM_AM::ShiftOpc ShOp = ARM_AM::getAM2ShiftOpc(Imm); Binary <<= 7; // Shift amount is bits [11:7] Binary |= getShiftOp(ShOp) << 5; // Shift type is bits [6:5] - Binary |= getARMRegisterNumbering(MO.getReg()); // Rm is bits [3:0] + Binary |= CTX.getRegisterInfo().getEncodingValue(MO.getReg()); // Rm is bits [3:0] } return Binary | (isAdd << 12) | (isReg << 13); } @@ -982,7 +994,7 @@ getPostIdxRegOpValue(const MCInst &MI, unsigned OpIdx, const MCOperand &MO = MI.getOperand(OpIdx); const MCOperand &MO1 = MI.getOperand(OpIdx+1); bool isAdd = MO1.getImm() != 0; - return getARMRegisterNumbering(MO.getReg()) | (isAdd << 4); + return CTX.getRegisterInfo().getEncodingValue(MO.getReg()) | (isAdd << 4); } uint32_t ARMMCCodeEmitter:: @@ -1000,7 +1012,7 @@ getAddrMode3OffsetOpValue(const MCInst &MI, unsigned OpIdx, uint32_t Imm8 = ARM_AM::getAM3Offset(Imm); // if reg +/- reg, Rm will be non-zero. Otherwise, we have reg +/- imm8 if (!isImm) - Imm8 = getARMRegisterNumbering(MO.getReg()); + Imm8 = CTX.getRegisterInfo().getEncodingValue(MO.getReg()); return Imm8 | (isAdd << 8) | (isImm << 9); } @@ -1018,7 +1030,7 @@ getAddrMode3OpValue(const MCInst &MI, unsigned OpIdx, // If The first operand isn't a register, we have a label reference. if (!MO.isReg()) { - unsigned Rn = getARMRegisterNumbering(ARM::PC); // Rn is PC. + unsigned Rn = CTX.getRegisterInfo().getEncodingValue(ARM::PC); // Rn is PC. assert(MO.isExpr() && "Unexpected machine operand type!"); const MCExpr *Expr = MO.getExpr(); @@ -1028,14 +1040,14 @@ getAddrMode3OpValue(const MCInst &MI, unsigned OpIdx, ++MCNumCPRelocations; return (Rn << 9) | (1 << 13); } - unsigned Rn = getARMRegisterNumbering(MO.getReg()); + unsigned Rn = CTX.getRegisterInfo().getEncodingValue(MO.getReg()); unsigned Imm = MO2.getImm(); bool isAdd = ARM_AM::getAM3Op(Imm) == ARM_AM::add; bool isImm = MO1.getReg() == 0; uint32_t Imm8 = ARM_AM::getAM3Offset(Imm); // if reg +/- reg, Rm will be non-zero. Otherwise, we have reg +/- imm8 if (!isImm) - Imm8 = getARMRegisterNumbering(MO1.getReg()); + Imm8 = CTX.getRegisterInfo().getEncodingValue(MO1.getReg()); return (Rn << 9) | Imm8 | (isAdd << 8) | (isImm << 13); } @@ -1063,7 +1075,7 @@ getAddrModeISOpValue(const MCInst &MI, unsigned OpIdx, // {2-0} = Rn const MCOperand &MO = MI.getOperand(OpIdx); const MCOperand &MO1 = MI.getOperand(OpIdx + 1); - unsigned Rn = getARMRegisterNumbering(MO.getReg()); + unsigned Rn = CTX.getRegisterInfo().getEncodingValue(MO.getReg()); unsigned Imm5 = MO1.getImm(); return ((Imm5 & 0x1f) << 3) | Rn; } @@ -1090,7 +1102,7 @@ getAddrMode5OpValue(const MCInst &MI, unsigned OpIdx, // If The first operand isn't a register, we have a label reference. const MCOperand &MO = MI.getOperand(OpIdx); if (!MO.isReg()) { - Reg = getARMRegisterNumbering(ARM::PC); // Rn is PC. + Reg = CTX.getRegisterInfo().getEncodingValue(ARM::PC); // Rn is PC. Imm8 = 0; isAdd = false; // 'U' bit is handled as part of the fixup. @@ -1136,7 +1148,7 @@ getSORegRegOpValue(const MCInst &MI, unsigned OpIdx, ARM_AM::ShiftOpc SOpc = ARM_AM::getSORegShOp(MO2.getImm()); // Encode Rm. - unsigned Binary = getARMRegisterNumbering(MO.getReg()); + unsigned Binary = CTX.getRegisterInfo().getEncodingValue(MO.getReg()); // Encode the shift opcode. unsigned SBits = 0; @@ -1161,7 +1173,7 @@ getSORegRegOpValue(const MCInst &MI, unsigned OpIdx, // Encode the shift operation Rs. // Encode Rs bit[11:8]. assert(ARM_AM::getSORegOffset(MO2.getImm()) == 0); - return Binary | (getARMRegisterNumbering(Rs) << ARMII::RegRsShift); + return Binary | (CTX.getRegisterInfo().getEncodingValue(Rs) << ARMII::RegRsShift); } unsigned ARMMCCodeEmitter:: @@ -1180,7 +1192,7 @@ getSORegImmOpValue(const MCInst &MI, unsigned OpIdx, ARM_AM::ShiftOpc SOpc = ARM_AM::getSORegShOp(MO1.getImm()); // Encode Rm. - unsigned Binary = getARMRegisterNumbering(MO.getReg()); + unsigned Binary = CTX.getRegisterInfo().getEncodingValue(MO.getReg()); // Encode the shift opcode. unsigned SBits = 0; @@ -1219,9 +1231,9 @@ getT2AddrModeSORegOpValue(const MCInst &MI, unsigned OpNum, // Encoded as [Rn, Rm, imm]. // FIXME: Needs fixup support. - unsigned Value = getARMRegisterNumbering(MO1.getReg()); + unsigned Value = CTX.getRegisterInfo().getEncodingValue(MO1.getReg()); Value <<= 4; - Value |= getARMRegisterNumbering(MO2.getReg()); + Value |= CTX.getRegisterInfo().getEncodingValue(MO2.getReg()); Value <<= 2; Value |= MO3.getImm(); @@ -1235,7 +1247,7 @@ getT2AddrModeImm8OpValue(const MCInst &MI, unsigned OpNum, const MCOperand &MO2 = MI.getOperand(OpNum+1); // FIXME: Needs fixup support. - unsigned Value = getARMRegisterNumbering(MO1.getReg()); + unsigned Value = CTX.getRegisterInfo().getEncodingValue(MO1.getReg()); // Even though the immediate is 8 bits long, we need 9 bits in order // to represent the (inverse of the) sign bit. @@ -1297,7 +1309,7 @@ getT2SORegOpValue(const MCInst &MI, unsigned OpIdx, ARM_AM::ShiftOpc SOpc = ARM_AM::getSORegShOp(MO1.getImm()); // Encode Rm. - unsigned Binary = getARMRegisterNumbering(MO.getReg()); + unsigned Binary = CTX.getRegisterInfo().getEncodingValue(MO.getReg()); // Encode the shift opcode. unsigned SBits = 0; @@ -1353,7 +1365,7 @@ getRegisterListOpValue(const MCInst &MI, unsigned Op, if (SPRRegs || DPRRegs) { // VLDM/VSTM - unsigned RegNo = getARMRegisterNumbering(Reg); + unsigned RegNo = CTX.getRegisterInfo().getEncodingValue(Reg); unsigned NumRegs = (MI.getNumOperands() - Op) & 0xff; Binary |= (RegNo & 0x1f) << 8; if (SPRRegs) @@ -1362,7 +1374,7 @@ getRegisterListOpValue(const MCInst &MI, unsigned Op, Binary |= NumRegs * 2; } else { for (unsigned I = Op, E = MI.getNumOperands(); I < E; ++I) { - unsigned RegNo = getARMRegisterNumbering(MI.getOperand(I).getReg()); + unsigned RegNo = CTX.getRegisterInfo().getEncodingValue(MI.getOperand(I).getReg()); Binary |= 1 << RegNo; } } @@ -1378,7 +1390,7 @@ getAddrMode6AddressOpValue(const MCInst &MI, unsigned Op, const MCOperand &Reg = MI.getOperand(Op); const MCOperand &Imm = MI.getOperand(Op + 1); - unsigned RegNo = getARMRegisterNumbering(Reg.getReg()); + unsigned RegNo = CTX.getRegisterInfo().getEncodingValue(Reg.getReg()); unsigned Align = 0; switch (Imm.getImm()) { @@ -1401,7 +1413,7 @@ getAddrMode6OneLane32AddressOpValue(const MCInst &MI, unsigned Op, const MCOperand &Reg = MI.getOperand(Op); const MCOperand &Imm = MI.getOperand(Op + 1); - unsigned RegNo = getARMRegisterNumbering(Reg.getReg()); + unsigned RegNo = CTX.getRegisterInfo().getEncodingValue(Reg.getReg()); unsigned Align = 0; switch (Imm.getImm()) { @@ -1427,7 +1439,7 @@ getAddrMode6DupAddressOpValue(const MCInst &MI, unsigned Op, const MCOperand &Reg = MI.getOperand(Op); const MCOperand &Imm = MI.getOperand(Op + 1); - unsigned RegNo = getARMRegisterNumbering(Reg.getReg()); + unsigned RegNo = CTX.getRegisterInfo().getEncodingValue(Reg.getReg()); unsigned Align = 0; switch (Imm.getImm()) { @@ -1446,7 +1458,7 @@ getAddrMode6OffsetOpValue(const MCInst &MI, unsigned Op, SmallVectorImpl<MCFixup> &Fixups) const { const MCOperand &MO = MI.getOperand(Op); if (MO.getReg() == 0) return 0x0D; - return getARMRegisterNumbering(MO.getReg()); + return CTX.getRegisterInfo().getEncodingValue(MO.getReg()); } unsigned ARMMCCodeEmitter:: diff --git a/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp b/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp index 78faf59..a51e0fa 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp +++ b/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp @@ -408,15 +408,22 @@ void ARMMachObjectWriter::RecordRelocation(MachObjectWriter *Writer, // Even when it's not a scattered relocation, movw/movt always uses // a PAIR relocation. if (Type == macho::RIT_ARM_Half) { - // The other-half value only gets populated for the movt relocation. + // The other-half value only gets populated for the movt and movw + // relocation entries. uint32_t Value = 0;; switch ((unsigned)Fixup.getKind()) { default: break; + case ARM::fixup_arm_movw_lo16: + case ARM::fixup_arm_movw_lo16_pcrel: + case ARM::fixup_t2_movw_lo16: + case ARM::fixup_t2_movw_lo16_pcrel: + Value = (FixedValue >> 16) & 0xffff; + break; case ARM::fixup_arm_movt_hi16: case ARM::fixup_arm_movt_hi16_pcrel: case ARM::fixup_t2_movt_hi16: case ARM::fixup_t2_movt_hi16_pcrel: - Value = FixedValue; + Value = FixedValue & 0xffff; break; } macho::RelocationEntry MREPair; diff --git a/lib/Target/ARM/Thumb2InstrInfo.cpp b/lib/Target/ARM/Thumb2InstrInfo.cpp index 2097bb9..e9e20dd 100644 --- a/lib/Target/ARM/Thumb2InstrInfo.cpp +++ b/lib/Target/ARM/Thumb2InstrInfo.cpp @@ -563,48 +563,6 @@ bool llvm::rewriteT2FrameIndex(MachineInstr &MI, unsigned FrameRegIdx, return Offset == 0; } -/// scheduleTwoAddrSource - Schedule the copy / re-mat of the source of the -/// two-addrss instruction inserted by two-address pass. -void -Thumb2InstrInfo::scheduleTwoAddrSource(MachineInstr *SrcMI, - MachineInstr *UseMI, - const TargetRegisterInfo &TRI) const { - if (SrcMI->getOpcode() != ARM::tMOVr || SrcMI->getOperand(1).isKill()) - return; - - unsigned PredReg = 0; - ARMCC::CondCodes CC = getInstrPredicate(UseMI, PredReg); - if (CC == ARMCC::AL || PredReg != ARM::CPSR) - return; - - // Schedule the copy so it doesn't come between previous instructions - // and UseMI which can form an IT block. - unsigned SrcReg = SrcMI->getOperand(1).getReg(); - ARMCC::CondCodes OCC = ARMCC::getOppositeCondition(CC); - MachineBasicBlock *MBB = UseMI->getParent(); - MachineBasicBlock::iterator MBBI = SrcMI; - unsigned NumInsts = 0; - while (--MBBI != MBB->begin()) { - if (MBBI->isDebugValue()) - continue; - - MachineInstr *NMI = &*MBBI; - ARMCC::CondCodes NCC = getInstrPredicate(NMI, PredReg); - if (!(NCC == CC || NCC == OCC) || - NMI->modifiesRegister(SrcReg, &TRI) || - NMI->modifiesRegister(ARM::CPSR, &TRI)) - break; - if (++NumInsts == 4) - // Too many in a row! - return; - } - - if (NumInsts) { - MBB->remove(SrcMI); - MBB->insert(++MBBI, SrcMI); - } -} - ARMCC::CondCodes llvm::getITInstrPredicate(const MachineInstr *MI, unsigned &PredReg) { unsigned Opc = MI->getOpcode(); diff --git a/lib/Target/ARM/Thumb2InstrInfo.h b/lib/Target/ARM/Thumb2InstrInfo.h index 0911f8a..2cdcd06 100644 --- a/lib/Target/ARM/Thumb2InstrInfo.h +++ b/lib/Target/ARM/Thumb2InstrInfo.h @@ -57,11 +57,6 @@ public: const TargetRegisterClass *RC, const TargetRegisterInfo *TRI) const; - /// scheduleTwoAddrSource - Schedule the copy / re-mat of the source of the - /// two-addrss instruction inserted by two-address pass. - void scheduleTwoAddrSource(MachineInstr *SrcMI, MachineInstr *UseMI, - const TargetRegisterInfo &TRI) const; - /// getRegisterInfo - TargetInstrInfo is a superset of MRegister info. As /// such, whenever a client has an instance of instruction info, it should /// always be able to get register info as well (through this method). diff --git a/lib/Target/CppBackend/CPPBackend.cpp b/lib/Target/CppBackend/CPPBackend.cpp index c8e757b..4ddcd38 100644 --- a/lib/Target/CppBackend/CPPBackend.cpp +++ b/lib/Target/CppBackend/CPPBackend.cpp @@ -285,14 +285,14 @@ void CppWriter::printLinkageType(GlobalValue::LinkageTypes LT) { Out << "GlobalValue::LinkerPrivateLinkage"; break; case GlobalValue::LinkerPrivateWeakLinkage: Out << "GlobalValue::LinkerPrivateWeakLinkage"; break; - case GlobalValue::LinkerPrivateWeakDefAutoLinkage: - Out << "GlobalValue::LinkerPrivateWeakDefAutoLinkage"; break; case GlobalValue::AvailableExternallyLinkage: Out << "GlobalValue::AvailableExternallyLinkage "; break; case GlobalValue::LinkOnceAnyLinkage: Out << "GlobalValue::LinkOnceAnyLinkage "; break; case GlobalValue::LinkOnceODRLinkage: Out << "GlobalValue::LinkOnceODRLinkage "; break; + case GlobalValue::LinkOnceODRAutoHideLinkage: + Out << "GlobalValue::LinkOnceODRAutoHideLinkage"; break; case GlobalValue::WeakAnyLinkage: Out << "GlobalValue::WeakAnyLinkage"; break; case GlobalValue::WeakODRLinkage: diff --git a/lib/Target/Hexagon/HexagonHardwareLoops.cpp b/lib/Target/Hexagon/HexagonHardwareLoops.cpp index 1357cc5..d756aec 100644 --- a/lib/Target/Hexagon/HexagonHardwareLoops.cpp +++ b/lib/Target/Hexagon/HexagonHardwareLoops.cpp @@ -328,7 +328,10 @@ CountValue *HexagonHardwareLoops::getTripCount(MachineLoop *L) const { // can get a useful trip count. The trip count can // be either a register or an immediate. The location // of the value depends upon the type (reg or imm). - while ((IV_Opnd = IV_Opnd->getNextOperandForReg())) { + for (MachineRegisterInfo::reg_iterator + RI = MRI->reg_begin(IV_Opnd->getReg()), RE = MRI->reg_end(); + RI != RE; ++RI) { + IV_Opnd = &RI.getOperand(); const MachineInstr *MI = IV_Opnd->getParent(); if (L->contains(MI) && isCompareEqualsImm(MI)) { const MachineOperand &MO = MI->getOperand(2); diff --git a/lib/Target/Hexagon/HexagonInstrInfo.td b/lib/Target/Hexagon/HexagonInstrInfo.td index c7be5ce..c0c0df6 100644 --- a/lib/Target/Hexagon/HexagonInstrInfo.td +++ b/lib/Target/Hexagon/HexagonInstrInfo.td @@ -2580,22 +2580,16 @@ let isCall = 1, neverHasSideEffects = 1, } // Tail Calls. -let isCall = 1, isBarrier = 1, isReturn = 1, isTerminator = 1, - Defs = [D0, D1, D2, D3, D4, D5, D6, D7, D8, D9, D10, - R22, R23, R28, R31, P0, P1, P2, P3, LC0, LC1, SA0, SA1] in { +let isCall = 1, isBarrier = 1, isReturn = 1, isTerminator = 1 in { def TCRETURNtg : JInst<(outs), (ins calltarget:$dst), "jump $dst // TAILCALL", []>; } -let isCall = 1, isBarrier = 1, isReturn = 1, isTerminator = 1, - Defs = [D0, D1, D2, D3, D4, D5, D6, D7, D8, D9, D10, - R22, R23, R28, R31, P0, P1, P2, P3, LC0, LC1, SA0, SA1] in { +let isCall = 1, isBarrier = 1, isReturn = 1, isTerminator = 1 in { def TCRETURNtext : JInst<(outs), (ins calltarget:$dst), "jump $dst // TAILCALL", []>; } -let isCall = 1, isBarrier = 1, isReturn = 1, isTerminator = 1, - Defs = [D0, D1, D2, D3, D4, D5, D6, D7, D8, D9, D10, - R22, R23, R28, R31, P0, P1, P2, P3, LC0, LC1, SA0, SA1] in { +let isCall = 1, isBarrier = 1, isReturn = 1, isTerminator = 1 in { def TCRETURNR : JInst<(outs), (ins IntRegs:$dst), "jumpr $dst // TAILCALL", []>; } diff --git a/lib/Target/Hexagon/HexagonSubtarget.cpp b/lib/Target/Hexagon/HexagonSubtarget.cpp index 5d087db..4bacb8f 100644 --- a/lib/Target/Hexagon/HexagonSubtarget.cpp +++ b/lib/Target/Hexagon/HexagonSubtarget.cpp @@ -40,28 +40,27 @@ EnableIEEERndNear( HexagonSubtarget::HexagonSubtarget(StringRef TT, StringRef CPU, StringRef FS): HexagonGenSubtargetInfo(TT, CPU, FS), - HexagonArchVersion(V2), CPUString(CPU.str()) { - ParseSubtargetFeatures(CPU, FS); - switch(HexagonArchVersion) { - case HexagonSubtarget::V2: - break; - case HexagonSubtarget::V3: - EnableV3 = true; - break; - case HexagonSubtarget::V4: - break; - case HexagonSubtarget::V5: - break; - default: - // If the programmer has not specified a Hexagon version, default - // to -mv4. + // If the programmer has not specified a Hexagon version, default to -mv4. + if (CPUString.empty()) CPUString = "hexagonv4"; - HexagonArchVersion = HexagonSubtarget::V4; - break; + + if (CPUString == "hexagonv2") { + HexagonArchVersion = V2; + } else if (CPUString == "hexagonv3") { + EnableV3 = true; + HexagonArchVersion = V3; + } else if (CPUString == "hexagonv4") { + HexagonArchVersion = V4; + } else if (CPUString == "hexagonv5") { + HexagonArchVersion = V5; + } else { + llvm_unreachable("Unrecognized Hexagon processor version"); } + ParseSubtargetFeatures(CPUString, FS); + // Initialize scheduling itinerary for the specified CPU. InstrItins = getInstrItineraryForCPU(CPUString); diff --git a/lib/Target/Mangler.cpp b/lib/Target/Mangler.cpp index 786a0c5..05f6fa6 100644 --- a/lib/Target/Mangler.cpp +++ b/lib/Target/Mangler.cpp @@ -183,8 +183,7 @@ void Mangler::getNameWithPrefix(SmallVectorImpl<char> &OutName, ManglerPrefixTy PrefixTy = Mangler::Default; if (GV->hasPrivateLinkage() || isImplicitlyPrivate) PrefixTy = Mangler::Private; - else if (GV->hasLinkerPrivateLinkage() || GV->hasLinkerPrivateWeakLinkage() || - GV->hasLinkerPrivateWeakDefAutoLinkage()) + else if (GV->hasLinkerPrivateLinkage() || GV->hasLinkerPrivateWeakLinkage()) PrefixTy = Mangler::LinkerPrivate; // If this global has a name, handle it simply. diff --git a/lib/Target/Mips/AsmParser/CMakeLists.txt b/lib/Target/Mips/AsmParser/CMakeLists.txt index 6c7343b..28f5219 100644 --- a/lib/Target/Mips/AsmParser/CMakeLists.txt +++ b/lib/Target/Mips/AsmParser/CMakeLists.txt @@ -1,3 +1,4 @@ +include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. ) add_llvm_library(LLVMMipsAsmParser MipsAsmParser.cpp ) diff --git a/lib/Target/Mips/AsmParser/MipsAsmParser.cpp b/lib/Target/Mips/AsmParser/MipsAsmParser.cpp index 58b5590..43bd345 100644 --- a/lib/Target/Mips/AsmParser/MipsAsmParser.cpp +++ b/lib/Target/Mips/AsmParser/MipsAsmParser.cpp @@ -11,11 +11,20 @@ #include "llvm/MC/MCParser/MCAsmLexer.h" #include "llvm/MC/MCTargetAsmParser.h" #include "llvm/Support/TargetRegistry.h" +#include "llvm/MC/MCParser/MCParsedAsmOperand.h" +#include "llvm/MC/MCTargetAsmParser.h" +#include "llvm/MC/MCInst.h" +#include "llvm/MC/MCExpr.h" +#include "llvm/Support/MathExtras.h" using namespace llvm; namespace { class MipsAsmParser : public MCTargetAsmParser { + +#define GET_ASSEMBLER_HEADER +#include "MipsGenAsmMatcher.inc" + bool MatchAndEmitInstruction(SMLoc IDLoc, SmallVectorImpl<MCParsedAsmOperand*> &Operands, MCStreamer &Out); @@ -23,10 +32,11 @@ class MipsAsmParser : public MCTargetAsmParser { bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc); bool ParseInstruction(StringRef Name, SMLoc NameLoc, - SmallVectorImpl<MCParsedAsmOperand*> &Operands); + SmallVectorImpl<MCParsedAsmOperand*> &Operands); bool ParseDirective(AsmToken DirectiveID); + OperandMatchResultTy parseMemOperand(SmallVectorImpl<MCParsedAsmOperand*>&); public: MipsAsmParser(MCSubtargetInfo &sti, MCAsmParser &parser) : MCTargetAsmParser() { @@ -35,6 +45,57 @@ public: }; } +namespace { + +/// MipsOperand - Instances of this class represent a parsed Mips machine +/// instruction. +class MipsOperand : public MCParsedAsmOperand { + enum KindTy { + k_CondCode, + k_CoprocNum, + k_Immediate, + k_Memory, + k_PostIndexRegister, + k_Register, + k_Token + } Kind; + + MipsOperand(KindTy K) : MCParsedAsmOperand(), Kind(K) {} +public: + void addRegOperands(MCInst &Inst, unsigned N) const { + llvm_unreachable("unimplemented!"); + } + void addExpr(MCInst &Inst, const MCExpr *Expr) const{ + llvm_unreachable("unimplemented!"); + } + void addImmOperands(MCInst &Inst, unsigned N) const { + llvm_unreachable("unimplemented!"); + } + void addMemOperands(MCInst &Inst, unsigned N) const { + llvm_unreachable("unimplemented!"); + } + + bool isReg() const { return Kind == k_Register; } + bool isImm() const { return Kind == k_Immediate; } + bool isToken() const { return Kind == k_Token; } + bool isMem() const { return Kind == k_Memory; } + + StringRef getToken() const { + assert(Kind == k_Token && "Invalid access!"); + return ""; + } + + unsigned getReg() const { + assert((Kind == k_Register) && "Invalid access!"); + return 0; + } + + virtual void print(raw_ostream &OS) const { + llvm_unreachable("unimplemented!"); + } +}; +} + bool MipsAsmParser:: MatchAndEmitInstruction(SMLoc IDLoc, SmallVectorImpl<MCParsedAsmOperand*> &Operands, @@ -58,6 +119,11 @@ ParseDirective(AsmToken DirectiveID) { return true; } +MipsAsmParser::OperandMatchResultTy MipsAsmParser:: + parseMemOperand(SmallVectorImpl<MCParsedAsmOperand*>&) { + return MatchOperand_ParseFail; +} + extern "C" void LLVMInitializeMipsAsmParser() { RegisterMCAsmParser<MipsAsmParser> X(TheMipsTarget); RegisterMCAsmParser<MipsAsmParser> Y(TheMipselTarget); diff --git a/lib/Target/Mips/CMakeLists.txt b/lib/Target/Mips/CMakeLists.txt index e9a228c..f535c50 100644 --- a/lib/Target/Mips/CMakeLists.txt +++ b/lib/Target/Mips/CMakeLists.txt @@ -10,13 +10,18 @@ tablegen(LLVM MipsGenDAGISel.inc -gen-dag-isel) tablegen(LLVM MipsGenCallingConv.inc -gen-callingconv) tablegen(LLVM MipsGenSubtargetInfo.inc -gen-subtarget) tablegen(LLVM MipsGenEDInfo.inc -gen-enhanced-disassembly-info) +tablegen(LLVM MipsGenAsmMatcher.inc -gen-asm-matcher) add_public_tablegen_target(MipsCommonTableGen) add_llvm_target(MipsCodeGen + Mips16FrameLowering.cpp + Mips16InstrInfo.cpp + Mips16RegisterInfo.cpp MipsAnalyzeImmediate.cpp MipsAsmPrinter.cpp MipsCodeEmitter.cpp MipsDelaySlotFiller.cpp + MipsELFWriterInfo.cpp MipsJITInfo.cpp MipsInstrInfo.cpp MipsISelDAGToDAG.cpp @@ -26,6 +31,9 @@ add_llvm_target(MipsCodeGen MipsMCInstLower.cpp MipsMachineFunction.cpp MipsRegisterInfo.cpp + MipsSEFrameLowering.cpp + MipsSEInstrInfo.cpp + MipsSERegisterInfo.cpp MipsSubtarget.cpp MipsTargetMachine.cpp MipsTargetObjectFile.cpp diff --git a/lib/Target/Mips/Disassembler/MipsDisassembler.cpp b/lib/Target/Mips/Disassembler/MipsDisassembler.cpp index 042b456..aa57472 100644 --- a/lib/Target/Mips/Disassembler/MipsDisassembler.cpp +++ b/lib/Target/Mips/Disassembler/MipsDisassembler.cpp @@ -16,6 +16,7 @@ #include "MipsRegisterInfo.h" #include "llvm/MC/EDInstInfo.h" #include "llvm/MC/MCDisassembler.h" +#include "llvm/MC/MCFixedLenDisassembler.h" #include "llvm/Support/MemoryObject.h" #include "llvm/Support/TargetRegistry.h" #include "llvm/MC/MCSubtargetInfo.h" @@ -274,7 +275,8 @@ MipsDisassembler::getInstruction(MCInst &instr, return MCDisassembler::Fail; // Calling the auto-generated decoder function. - Result = decodeMipsInstruction32(instr, Insn, Address, this, STI); + Result = decodeInstruction(DecoderTableMips32, instr, Insn, Address, + this, STI); if (Result != MCDisassembler::Fail) { Size = 4; return Result; @@ -298,13 +300,15 @@ Mips64Disassembler::getInstruction(MCInst &instr, return MCDisassembler::Fail; // Calling the auto-generated decoder function. - Result = decodeMips64Instruction32(instr, Insn, Address, this, STI); + Result = decodeInstruction(DecoderTableMips6432, instr, Insn, Address, + this, STI); if (Result != MCDisassembler::Fail) { Size = 4; return Result; } // If we fail to decode in Mips64 decoder space we can try in Mips32 - Result = decodeMipsInstruction32(instr, Insn, Address, this, STI); + Result = decodeInstruction(DecoderTableMips32, instr, Insn, Address, + this, STI); if (Result != MCDisassembler::Fail) { Size = 4; return Result; @@ -379,8 +383,8 @@ static DecodeStatus DecodeMem(MCInst &Inst, uint64_t Address, const void *Decoder) { int Offset = SignExtend32<16>(Insn & 0xffff); - unsigned Reg = fieldFromInstruction32(Insn, 16, 5); - unsigned Base = fieldFromInstruction32(Insn, 21, 5); + unsigned Reg = fieldFromInstruction(Insn, 16, 5); + unsigned Base = fieldFromInstruction(Insn, 21, 5); Reg = getReg(Decoder, Mips::CPURegsRegClassID, Reg); Base = getReg(Decoder, Mips::CPURegsRegClassID, Base); @@ -401,8 +405,8 @@ static DecodeStatus DecodeFMem(MCInst &Inst, uint64_t Address, const void *Decoder) { int Offset = SignExtend32<16>(Insn & 0xffff); - unsigned Reg = fieldFromInstruction32(Insn, 16, 5); - unsigned Base = fieldFromInstruction32(Insn, 21, 5); + unsigned Reg = fieldFromInstruction(Insn, 16, 5); + unsigned Base = fieldFromInstruction(Insn, 21, 5); Reg = getReg(Decoder, Mips::FGR64RegClassID, Reg); Base = getReg(Decoder, Mips::CPURegsRegClassID, Base); @@ -484,7 +488,7 @@ static DecodeStatus DecodeJumpTarget(MCInst &Inst, uint64_t Address, const void *Decoder) { - unsigned JumpOffset = fieldFromInstruction32(Insn, 0, 26) << 2; + unsigned JumpOffset = fieldFromInstruction(Insn, 0, 26) << 2; Inst.addOperand(MCOperand::CreateImm(JumpOffset)); return MCDisassembler::Success; } diff --git a/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp b/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp index 6fe0c11..18961fd 100644 --- a/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp +++ b/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp @@ -35,6 +35,7 @@ static unsigned adjustFixupValue(unsigned Kind, uint64_t Value) { return 0; case FK_GPRel_4: case FK_Data_4: + case FK_Data_8: case Mips::fixup_Mips_LO16: case Mips::fixup_Mips_GPOFF_HI: case Mips::fixup_Mips_GPOFF_LO: @@ -59,9 +60,17 @@ static unsigned adjustFixupValue(unsigned Kind, uint64_t Value) { break; case Mips::fixup_Mips_HI16: case Mips::fixup_Mips_GOT_Local: - // Get the higher 16-bits. Also add 1 if bit 15 is 1. + // Get the 2nd 16-bits. Also add 1 if bit 15 is 1. Value = ((Value + 0x8000) >> 16) & 0xffff; break; + case Mips::fixup_Mips_HIGHER: + // Get the 3rd 16-bits. + Value = ((Value + 0x80008000LL) >> 32) & 0xffff; + break; + case Mips::fixup_Mips_HIGHEST: + // Get the 4th 16-bits. + Value = ((Value + 0x800080008000LL) >> 48) & 0xffff; + break; } return Value; @@ -168,7 +177,9 @@ public: { "fixup_Mips_GPOFF_LO", 0, 16, 0 }, { "fixup_Mips_GOT_PAGE", 0, 16, 0 }, { "fixup_Mips_GOT_OFST", 0, 16, 0 }, - { "fixup_Mips_GOT_DISP", 0, 16, 0 } + { "fixup_Mips_GOT_DISP", 0, 16, 0 }, + { "fixup_Mips_HIGHER", 0, 16, 0 }, + { "fixup_Mips_HIGHEST", 0, 16, 0 } }; if (Kind < FirstTargetFixupKind) diff --git a/lib/Target/Mips/MCTargetDesc/MipsELFObjectWriter.cpp b/lib/Target/Mips/MCTargetDesc/MipsELFObjectWriter.cpp index 77c1524..b8489ca 100644 --- a/lib/Target/Mips/MCTargetDesc/MipsELFObjectWriter.cpp +++ b/lib/Target/Mips/MCTargetDesc/MipsELFObjectWriter.cpp @@ -34,7 +34,8 @@ namespace { class MipsELFObjectWriter : public MCELFObjectTargetWriter { public: - MipsELFObjectWriter(bool _is64Bit, uint8_t OSABI, bool _isN64); + MipsELFObjectWriter(bool _is64Bit, uint8_t OSABI, + bool _isN64, bool IsLittleEndian); virtual ~MipsELFObjectWriter(); @@ -53,7 +54,7 @@ namespace { } MipsELFObjectWriter::MipsELFObjectWriter(bool _is64Bit, uint8_t OSABI, - bool _isN64) + bool _isN64, bool IsLittleEndian) : MCELFObjectTargetWriter(_is64Bit, OSABI, ELF::EM_MIPS, /*HasRelocationAddend*/ false, /*IsN64*/ _isN64) {} @@ -103,6 +104,9 @@ unsigned MipsELFObjectWriter::GetRelocType(const MCValue &Target, case FK_Data_4: Type = ELF::R_MIPS_32; break; + case FK_Data_8: + Type = ELF::R_MIPS_64; + break; case FK_GPRel_4: Type = ELF::R_MIPS_GPREL32; break; @@ -169,6 +173,12 @@ unsigned MipsELFObjectWriter::GetRelocType(const MCValue &Target, Type = setRType2((unsigned)ELF::R_MIPS_SUB, Type); Type = setRType3((unsigned)ELF::R_MIPS_LO16, Type); break; + case Mips::fixup_Mips_HIGHER: + Type = ELF::R_MIPS_HIGHER; + break; + case Mips::fixup_Mips_HIGHEST: + Type = ELF::R_MIPS_HIGHEST; + break; } return Type; } @@ -265,6 +275,7 @@ MCObjectWriter *llvm::createMipsELFObjectWriter(raw_ostream &OS, bool IsLittleEndian, bool Is64Bit) { MCELFObjectTargetWriter *MOTW = new MipsELFObjectWriter(Is64Bit, OSABI, - (Is64Bit) ? true : false); + (Is64Bit) ? true : false, + IsLittleEndian); return createELFObjectWriter(MOTW, OS, IsLittleEndian); } diff --git a/lib/Target/Mips/MCTargetDesc/MipsFixupKinds.h b/lib/Target/Mips/MCTargetDesc/MipsFixupKinds.h index f5cbbd5..77faec5 100644 --- a/lib/Target/Mips/MCTargetDesc/MipsFixupKinds.h +++ b/lib/Target/Mips/MCTargetDesc/MipsFixupKinds.h @@ -110,6 +110,12 @@ namespace Mips { // resulting in - R_MIPS_GOT_DISP fixup_Mips_GOT_DISP, + // resulting in - R_MIPS_GOT_HIGHER + fixup_Mips_HIGHER, + + // resulting in - R_MIPS_HIGHEST + fixup_Mips_HIGHEST, + // Marker LastTargetFixupKind, NumTargetFixupKinds = LastTargetFixupKind - FirstTargetFixupKind diff --git a/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp b/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp index ff3b3a7..8dab62d 100644 --- a/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp +++ b/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp @@ -255,6 +255,12 @@ getMachineOpValue(const MCInst &MI, const MCOperand &MO, case MCSymbolRefExpr::VK_Mips_TPREL_LO: FixupKind = Mips::fixup_Mips_TPREL_LO; break; + case MCSymbolRefExpr::VK_Mips_HIGHER: + FixupKind = Mips::fixup_Mips_HIGHER; + break; + case MCSymbolRefExpr::VK_Mips_HIGHEST: + FixupKind = Mips::fixup_Mips_HIGHEST; + break; } // switch Fixups.push_back(MCFixup::Create(0, MO.getExpr(), MCFixupKind(FixupKind))); diff --git a/lib/Target/Mips/Makefile b/lib/Target/Mips/Makefile index 596f071..93de517 100644 --- a/lib/Target/Mips/Makefile +++ b/lib/Target/Mips/Makefile @@ -16,7 +16,9 @@ BUILT_SOURCES = MipsGenRegisterInfo.inc MipsGenInstrInfo.inc \ MipsGenAsmWriter.inc MipsGenCodeEmitter.inc \ MipsGenDAGISel.inc MipsGenCallingConv.inc \ MipsGenSubtargetInfo.inc MipsGenMCCodeEmitter.inc \ - MipsGenEDInfo.inc MipsGenDisassemblerTables.inc + MipsGenEDInfo.inc MipsGenDisassemblerTables.inc \ + MipsGenAsmMatcher.inc + DIRS = InstPrinter Disassembler AsmParser TargetInfo MCTargetDesc include $(LEVEL)/Makefile.common diff --git a/lib/Target/Mips/Mips.td b/lib/Target/Mips/Mips.td index 8548ae0..7cec531 100644 --- a/lib/Target/Mips/Mips.td +++ b/lib/Target/Mips/Mips.td @@ -44,6 +44,8 @@ def FeatureN64 : SubtargetFeature<"n64", "MipsABI", "N64", "Enable n64 ABI">; def FeatureEABI : SubtargetFeature<"eabi", "MipsABI", "EABI", "Enable eabi ABI">; +def FeatureAndroid : SubtargetFeature<"android", "IsAndroid", "true", + "Target is android">; def FeatureVFPU : SubtargetFeature<"vfpu", "HasVFPU", "true", "Enable vector FPU instructions.">; def FeatureSEInReg : SubtargetFeature<"seinreg", "HasSEInReg", "true", @@ -93,9 +95,20 @@ def MipsAsmWriter : AsmWriter { bit isMCAsmWriter = 1; } +def MipsAsmParser : AsmParser { + let ShouldEmitMatchRegisterName = 0; +} + +def MipsAsmParserVariant : AsmParserVariant { + int Variant = 0; + + // Recognize hard coded registers. + string RegisterPrefix = "$"; +} + def Mips : Target { let InstructionSet = MipsInstrInfo; - + let AssemblyParsers = [MipsAsmParser]; let AssemblyWriters = [MipsAsmWriter]; + let AssemblyParserVariants = [MipsAsmParserVariant]; } - diff --git a/lib/Target/Mips/Mips16FrameLowering.cpp b/lib/Target/Mips/Mips16FrameLowering.cpp new file mode 100644 index 0000000..030042f --- /dev/null +++ b/lib/Target/Mips/Mips16FrameLowering.cpp @@ -0,0 +1,87 @@ +//===-- Mips16FrameLowering.cpp - Mips16 Frame Information ----------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the Mips16 implementation of TargetFrameLowering class. +// +//===----------------------------------------------------------------------===// + +#include "Mips16FrameLowering.h" +#include "MipsInstrInfo.h" +#include "MCTargetDesc/MipsBaseInfo.h" +#include "llvm/Function.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/Target/TargetData.h" +#include "llvm/Target/TargetOptions.h" +#include "llvm/Support/CommandLine.h" + +using namespace llvm; + +void Mips16FrameLowering::emitPrologue(MachineFunction &MF) const { + MachineBasicBlock &MBB = MF.front(); + MachineFrameInfo *MFI = MF.getFrameInfo(); + const MipsInstrInfo &TII = + *static_cast<const MipsInstrInfo*>(MF.getTarget().getInstrInfo()); + MachineBasicBlock::iterator MBBI = MBB.begin(); + DebugLoc dl = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc(); + uint64_t StackSize = MFI->getStackSize(); + + // No need to allocate space on the stack. + if (StackSize == 0 && !MFI->adjustsStack()) return; + + // Adjust stack. + if (isInt<16>(-StackSize)) + BuildMI(MBB, MBBI, dl, TII.get(Mips::SaveRaF16)).addImm(StackSize); +} + +void Mips16FrameLowering::emitEpilogue(MachineFunction &MF, + MachineBasicBlock &MBB) const { + MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr(); + MachineFrameInfo *MFI = MF.getFrameInfo(); + const MipsInstrInfo &TII = + *static_cast<const MipsInstrInfo*>(MF.getTarget().getInstrInfo()); + DebugLoc dl = MBBI->getDebugLoc(); + uint64_t StackSize = MFI->getStackSize(); + + if (!StackSize) + return; + + // Adjust stack. + if (isInt<16>(StackSize)) + // assumes stacksize multiple of 8 + BuildMI(MBB, MBBI, dl, TII.get(Mips::RestoreRaF16)).addImm(StackSize); +} + +bool Mips16FrameLowering:: +spillCalleeSavedRegisters(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI, + const std::vector<CalleeSavedInfo> &CSI, + const TargetRegisterInfo *TRI) const { + // FIXME: implement. + return true; +} + +bool +Mips16FrameLowering::hasReservedCallFrame(const MachineFunction &MF) const { + // FIXME: implement. + return true; +} + +void Mips16FrameLowering:: +processFunctionBeforeCalleeSavedScan(MachineFunction &MF, + RegScavenger *RS) const { +} + +const MipsFrameLowering * +llvm::createMips16FrameLowering(const MipsSubtarget &ST) { + return new Mips16FrameLowering(ST); +} diff --git a/lib/Target/Mips/Mips16FrameLowering.h b/lib/Target/Mips/Mips16FrameLowering.h new file mode 100644 index 0000000..25cc37b --- /dev/null +++ b/lib/Target/Mips/Mips16FrameLowering.h @@ -0,0 +1,43 @@ +//===-- Mips16FrameLowering.h - Mips16 frame lowering ----------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// +// +//===----------------------------------------------------------------------===// + +#ifndef MIPS16_FRAMEINFO_H +#define MIPS16_FRAMEINFO_H + +#include "MipsFrameLowering.h" + +namespace llvm { +class Mips16FrameLowering : public MipsFrameLowering { +public: + explicit Mips16FrameLowering(const MipsSubtarget &STI) + : MipsFrameLowering(STI) {} + + /// emitProlog/emitEpilog - These methods insert prolog and epilog code into + /// the function. + void emitPrologue(MachineFunction &MF) const; + void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const; + + bool spillCalleeSavedRegisters(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI, + const std::vector<CalleeSavedInfo> &CSI, + const TargetRegisterInfo *TRI) const; + + bool hasReservedCallFrame(const MachineFunction &MF) const; + + void processFunctionBeforeCalleeSavedScan(MachineFunction &MF, + RegScavenger *RS) const; +}; + +} // End llvm namespace + +#endif diff --git a/lib/Target/Mips/Mips16InstrInfo.cpp b/lib/Target/Mips/Mips16InstrInfo.cpp new file mode 100644 index 0000000..2bc286b --- /dev/null +++ b/lib/Target/Mips/Mips16InstrInfo.cpp @@ -0,0 +1,132 @@ +//===-- Mips16InstrInfo.cpp - Mips16 Instruction Information --------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the Mips16 implementation of the TargetInstrInfo class. +// +//===----------------------------------------------------------------------===// + +#include "Mips16InstrInfo.h" +#include "MipsTargetMachine.h" +#include "MipsMachineFunction.h" +#include "InstPrinter/MipsInstPrinter.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/TargetRegistry.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/StringRef.h" + +using namespace llvm; + +Mips16InstrInfo::Mips16InstrInfo(MipsTargetMachine &tm) + : MipsInstrInfo(tm, /* FIXME: set mips16 unconditional br */ 0), + RI(*tm.getSubtargetImpl(), *this) {} + +const MipsRegisterInfo &Mips16InstrInfo::getRegisterInfo() const { + return RI; +} + +/// isLoadFromStackSlot - If the specified machine instruction is a direct +/// load from a stack slot, return the virtual or physical register number of +/// the destination along with the FrameIndex of the loaded stack slot. If +/// not, return 0. This predicate must return 0 if the instruction has +/// any side effects other than loading from the stack slot. +unsigned Mips16InstrInfo:: +isLoadFromStackSlot(const MachineInstr *MI, int &FrameIndex) const +{ + return 0; +} + +/// isStoreToStackSlot - If the specified machine instruction is a direct +/// store to a stack slot, return the virtual or physical register number of +/// the source reg along with the FrameIndex of the loaded stack slot. If +/// not, return 0. This predicate must return 0 if the instruction has +/// any side effects other than storing to the stack slot. +unsigned Mips16InstrInfo:: +isStoreToStackSlot(const MachineInstr *MI, int &FrameIndex) const +{ + return 0; +} + +void Mips16InstrInfo::copyPhysReg(MachineBasicBlock &MBB, + MachineBasicBlock::iterator I, DebugLoc DL, + unsigned DestReg, unsigned SrcReg, + bool KillSrc) const { + unsigned Opc = 0, ZeroReg = 0; + + if (Mips::CPURegsRegClass.contains(DestReg)) { // Copy to CPU Reg. + if (Mips::CPURegsRegClass.contains(SrcReg)) + Opc = Mips::Mov32R16; + } + + assert(Opc && "Cannot copy registers"); + + MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(Opc)); + + if (DestReg) + MIB.addReg(DestReg, RegState::Define); + + if (ZeroReg) + MIB.addReg(ZeroReg); + + if (SrcReg) + MIB.addReg(SrcReg, getKillRegState(KillSrc)); +} + +void Mips16InstrInfo:: +storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, + unsigned SrcReg, bool isKill, int FI, + const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI) const { + assert(false && "Implement this function."); +} + +void Mips16InstrInfo:: +loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, + unsigned DestReg, int FI, + const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI) const { + assert(false && "Implement this function."); +} + +bool Mips16InstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const { + MachineBasicBlock &MBB = *MI->getParent(); + + switch(MI->getDesc().getOpcode()) { + default: + return false; + case Mips::RetRA16: + ExpandRetRA16(MBB, MI, Mips::JrRa16); + break; + } + + MBB.erase(MI); + return true; +} + +/// GetOppositeBranchOpc - Return the inverse of the specified +/// opcode, e.g. turning BEQ to BNE. +unsigned Mips16InstrInfo::GetOppositeBranchOpc(unsigned Opc) const { + assert(false && "Implement this function."); + return 0; +} + +unsigned Mips16InstrInfo::GetAnalyzableBrOpc(unsigned Opc) const { + return 0; +} + +void Mips16InstrInfo::ExpandRetRA16(MachineBasicBlock &MBB, + MachineBasicBlock::iterator I, + unsigned Opc) const { + BuildMI(MBB, I, I->getDebugLoc(), get(Opc)); +} + +const MipsInstrInfo *llvm::createMips16InstrInfo(MipsTargetMachine &TM) { + return new Mips16InstrInfo(TM); +} diff --git a/lib/Target/Mips/Mips16InstrInfo.h b/lib/Target/Mips/Mips16InstrInfo.h new file mode 100644 index 0000000..260c5b6 --- /dev/null +++ b/lib/Target/Mips/Mips16InstrInfo.h @@ -0,0 +1,76 @@ +//===-- Mips16InstrInfo.h - Mips16 Instruction Information ------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the Mips16 implementation of the TargetInstrInfo class. +// +//===----------------------------------------------------------------------===// + +#ifndef MIPS16INSTRUCTIONINFO_H +#define MIPS16INSTRUCTIONINFO_H + +#include "MipsInstrInfo.h" +#include "Mips16RegisterInfo.h" + +namespace llvm { + +class Mips16InstrInfo : public MipsInstrInfo { + const Mips16RegisterInfo RI; + +public: + explicit Mips16InstrInfo(MipsTargetMachine &TM); + + virtual const MipsRegisterInfo &getRegisterInfo() const; + + /// isLoadFromStackSlot - If the specified machine instruction is a direct + /// load from a stack slot, return the virtual or physical register number of + /// the destination along with the FrameIndex of the loaded stack slot. If + /// not, return 0. This predicate must return 0 if the instruction has + /// any side effects other than loading from the stack slot. + virtual unsigned isLoadFromStackSlot(const MachineInstr *MI, + int &FrameIndex) const; + + /// isStoreToStackSlot - If the specified machine instruction is a direct + /// store to a stack slot, return the virtual or physical register number of + /// the source reg along with the FrameIndex of the loaded stack slot. If + /// not, return 0. This predicate must return 0 if the instruction has + /// any side effects other than storing to the stack slot. + virtual unsigned isStoreToStackSlot(const MachineInstr *MI, + int &FrameIndex) const; + + virtual void copyPhysReg(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI, DebugLoc DL, + unsigned DestReg, unsigned SrcReg, + bool KillSrc) const; + + virtual void storeRegToStackSlot(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + unsigned SrcReg, bool isKill, int FrameIndex, + const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI) const; + + virtual void loadRegFromStackSlot(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + unsigned DestReg, int FrameIndex, + const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI) const; + + virtual bool expandPostRAPseudo(MachineBasicBlock::iterator MI) const; + + virtual unsigned GetOppositeBranchOpc(unsigned Opc) const; + +private: + virtual unsigned GetAnalyzableBrOpc(unsigned Opc) const; + + void ExpandRetRA16(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, + unsigned Opc) const; +}; + +} + +#endif diff --git a/lib/Target/Mips/Mips16InstrInfo.td b/lib/Target/Mips/Mips16InstrInfo.td index c852042..94cf984 100644 --- a/lib/Target/Mips/Mips16InstrInfo.td +++ b/lib/Target/Mips/Mips16InstrInfo.td @@ -11,10 +11,6 @@ // //===----------------------------------------------------------------------===// -def uimm5 : Operand<i8> { - let DecoderMethod= "DecodeSimm16"; -} - // // RRR-type instruction format // @@ -46,9 +42,32 @@ class FEXT_RI16_ins<bits<5> _op, string asmstr, class FEXT_RI16_PC_ins<bits<5> _op, string asmstr, InstrItinClass itin>: FEXT_RI16_ins_base<_op, asmstr, "\t$rx, $$pc, $imm", itin>; + +class FEXT_2RI16_ins<bits<5> _op, string asmstr, + InstrItinClass itin>: + FEXT_RI16<_op, (outs CPU16Regs:$rx), (ins CPU16Regs:$rx_, simm16:$imm), + !strconcat(asmstr, "\t$rx, $imm"), [], itin> { + let Constraints = "$rx_ = $rx"; +} + + // // RR-type instruction format // + +class FRR16_ins<bits<5> f, string asmstr, InstrItinClass itin> : + FRR16<f, (outs CPU16Regs:$rx), (ins CPU16Regs:$ry), + !strconcat(asmstr, "\t$rx, $ry"), [], itin> { +} + +class FRxRxRy16_ins<bits<5> f, string asmstr, + InstrItinClass itin> : + FRR16<f, (outs CPU16Regs:$rz), (ins CPU16Regs:$rx, CPU16Regs:$ry), + !strconcat(asmstr, "\t$rz, $ry"), + [], itin> { + let Constraints = "$rx = $rz"; +} + let rx=0 in class FRR16_JALRC_RA_only_ins<bits<1> nd_, bits<1> l_, string asmstr, InstrItinClass itin>: @@ -64,11 +83,16 @@ class FEXT_RRI16_mem_ins<bits<5> op, string asmstr, Operand MemOpnd, FEXT_RRI16<op, (outs CPU16Regs:$ry), (ins MemOpnd:$addr), !strconcat(asmstr, "\t$ry, $addr"), [], itin>; +class FEXT_RRI16_mem2_ins<bits<5> op, string asmstr, Operand MemOpnd, + InstrItinClass itin>: + FEXT_RRI16<op, (outs ), (ins CPU16Regs:$ry, MemOpnd:$addr), + !strconcat(asmstr, "\t$ry, $addr"), [], itin>; + // // EXT-SHIFT instruction format // class FEXT_SHIFT16_ins<bits<2> _f, string asmstr, InstrItinClass itin>: - FEXT_SHIFT16<_f, (outs CPU16Regs:$rx), (ins CPU16Regs:$ry, uimm5:$sa), + FEXT_SHIFT16<_f, (outs CPU16Regs:$rx), (ins CPU16Regs:$ry, shamt:$sa), !strconcat(asmstr, "\t$rx, $ry, $sa"), [], itin>; // @@ -80,20 +104,49 @@ def mem16 : Operand<i32> { } // +// Some general instruction class info +// +// + +class ArithLogic16Defs<bit isCom=0> { + bits<5> shamt = 0; + bit isCommutable = isCom; + bit isReMaterializable = 1; + bit neverHasSideEffects = 1; +} + +// + +// Format: ADDIU rx, immediate MIPS16e +// Purpose: Add Immediate Unsigned Word (2-Operand, Extended) +// To add a constant to a 32-bit integer. +// +def AddiuRxImmX16: FEXT_RI16_ins<0b01001, "addiu", IIAlu>; + +def AddiuRxRxImmX16: FEXT_2RI16_ins<0b01001, "addiu", IIAlu>, + ArithLogic16Defs<0>; + +// + // Format: ADDIU rx, pc, immediate MIPS16e // Purpose: Add Immediate Unsigned Word (3-Operand, PC-Relative, Extended) // To add a constant to the program counter. // -class AddiuRxPcImmX16_base : FEXT_RI16_PC_ins<0b00001, "addiu", IIAlu>; -def AddiuRxPcImmX16 : AddiuRxPcImmX16_base; +def AddiuRxPcImmX16: FEXT_RI16_PC_ins<0b00001, "addiu", IIAlu>; // // Format: ADDU rz, rx, ry MIPS16e // Purpose: Add Unsigned Word (3-Operand) // To add 32-bit integers. // -class AdduRxRyRz16_base: FRRR16_ins<01, "addu", IIAlu>; -def AdduRxRyRz16: AdduRxRyRz16_base; +def AdduRxRyRz16: FRRR16_ins<01, "addu", IIAlu>, ArithLogic16Defs<1>; + +// +// Format: AND rx, ry MIPS16e +// Purpose: AND +// To do a bitwise logical AND. + +def AndRxRxRy16: FRxRxRy16_ins<0b01100, "and", IIAlu>, ArithLogic16Defs<1>; // // Format: JR ra MIPS16e @@ -105,6 +158,34 @@ def AdduRxRyRz16: AdduRxRyRz16_base; def JrRa16: FRR16_JALRC_RA_only_ins<0, 0, "jr", IIAlu>; // +// Format: LB ry, offset(rx) MIPS16e +// Purpose: Load Byte (Extended) +// To load a byte from memory as a signed value. +// +def LbRxRyOffMemX16: FEXT_RRI16_mem_ins<0b10011, "lb", mem16, IIAlu>; + +// +// Format: LBU ry, offset(rx) MIPS16e +// Purpose: Load Byte Unsigned (Extended) +// To load a byte from memory as a unsigned value. +// +def LbuRxRyOffMemX16: FEXT_RRI16_mem_ins<0b10100, "lbu", mem16, IIAlu>; + +// +// Format: LH ry, offset(rx) MIPS16e +// Purpose: Load Halfword signed (Extended) +// To load a halfword from memory as a signed value. +// +def LhRxRyOffMemX16: FEXT_RRI16_mem_ins<0b10100, "lh", mem16, IIAlu>; + +// +// Format: LHU ry, offset(rx) MIPS16e +// Purpose: Load Halfword unsigned (Extended) +// To load a halfword from memory as an unsigned value. +// +def LhuRxRyOffMemX16: FEXT_RRI16_mem_ins<0b10100, "lhu", mem16, IIAlu>; + +// // Format: LI rx, immediate MIPS16e // Purpose: Load Immediate (Extended) // To load a constant into a GPR. @@ -116,8 +197,7 @@ def LiRxImmX16: FEXT_RI16_ins<0b01101, "li", IIAlu>; // Purpose: Load Word (Extended) // To load a word from memory as a signed value. // -class LwRxRyOffMemX16_base: FEXT_RRI16_mem_ins<0b10011, "lw", mem16, IIAlu>; -def LwRxRyOffMemX16: LwRxRyOffMemX16_base; +def LwRxRyOffMemX16: FEXT_RRI16_mem_ins<0b10011, "lw", mem16, IIAlu>; // // Format: MOVE r32, rz MIPS16e @@ -125,6 +205,28 @@ def LwRxRyOffMemX16: LwRxRyOffMemX16_base; // To move the contents of a GPR to a GPR. // def Mov32R16: FI8_MOV32R16_ins<"move", IIAlu>; + +// +// Format: NEG rx, ry MIPS16e +// Purpose: Negate +// To negate an integer value. +// +def NegRxRy16: FRR16_ins<0b11101, "neg", IIAlu>; + +// +// Format: NOT rx, ry MIPS16e +// Purpose: Not +// To complement an integer value +// +def NotRxRy16: FRR16_ins<0b01111, "not", IIAlu>; + +// +// Format: OR rx, ry MIPS16e +// Purpose: Or +// To do a bitwise logical OR. +// +def OrRxRxRy16: FRxRxRy16_ins<0b01101, "or", IIAlu>, ArithLogic16Defs<1>; + // // Format: RESTORE {ra,}{s0/s1/s0-1,}{framesize} // (All args are optional) MIPS16e @@ -156,6 +258,20 @@ def SaveRaF16: "save \t$$ra, $frame_size", [], IILoad >; // +// Format: SB ry, offset(rx) MIPS16e +// Purpose: Store Byte (Extended) +// To store a byte to memory. +// +def SbRxRyOffMemX16: FEXT_RRI16_mem2_ins<0b11000, "sb", mem16, IIAlu>; + +// +// Format: SH ry, offset(rx) MIPS16e +// Purpose: Store Halfword (Extended) +// To store a halfword to memory. +// +def ShRxRyOffMemX16: FEXT_RRI16_mem2_ins<0b11001, "sh", mem16, IIAlu>; + +// // Format: SLL rx, ry, sa MIPS16e // Purpose: Shift Word Left Logical (Extended) // To execute a left-shift of a word by a fixed number of bits—0 to 31 bits. @@ -163,57 +279,127 @@ def SaveRaF16: def SllX16: FEXT_SHIFT16_ins<0b00, "sll", IIAlu>; // +// Format: SLLV ry, rx MIPS16e +// Purpose: Shift Word Left Logical Variable +// To execute a left-shift of a word by a variable number of bits. +// +def SllvRxRy16 : FRxRxRy16_ins<0b00100, "sllv", IIAlu>; + + +// +// Format: SRAV ry, rx MIPS16e +// Purpose: Shift Word Right Arithmetic Variable +// To execute an arithmetic right-shift of a word by a variable +// number of bits. +// +def SravRxRy16: FRxRxRy16_ins<0b00111, "srav", IIAlu>; + + +// +// Format: SRA rx, ry, sa MIPS16e +// Purpose: Shift Word Right Arithmetic (Extended) +// To execute an arithmetic right-shift of a word by a fixed +// number of bits—1 to 8 bits. +// +def SraX16: FEXT_SHIFT16_ins<0b11, "sra", IIAlu>; + + +// +// Format: SRLV ry, rx MIPS16e +// Purpose: Shift Word Right Logical Variable +// To execute a logical right-shift of a word by a variable +// number of bits. +// +def SrlvRxRy16: FRxRxRy16_ins<0b00110, "srlv", IIAlu>; + + +// +// Format: SRL rx, ry, sa MIPS16e +// Purpose: Shift Word Right Logical (Extended) +// To execute a logical right-shift of a word by a fixed +// number of bits—1 to 31 bits. +// +def SrlX16: FEXT_SHIFT16_ins<0b10, "srl", IIAlu>; + +// +// Format: SUBU rz, rx, ry MIPS16e +// Purpose: Subtract Unsigned Word +// To subtract 32-bit integers +// +def SubuRxRyRz16: FRRR16_ins<0b11, "subu", IIAlu>, ArithLogic16Defs<0>; + +// // Format: SW ry, offset(rx) MIPS16e // Purpose: Store Word (Extended) // To store a word to memory. // -class SwRxRyOffMemX16_base: FEXT_RRI16_mem_ins<0b11011, "sw", mem16, IIAlu>; -def SwRxRyOffMemX16: SwRxRyOffMemX16_base; +def SwRxRyOffMemX16: FEXT_RRI16_mem2_ins<0b11011, "sw", mem16, IIAlu>; + +// +// Format: XOR rx, ry MIPS16e +// Purpose: Xor +// To do a bitwise logical XOR. +// +def XorRxRxRy16: FRxRxRy16_ins<0b01110, "xor", IIAlu>, ArithLogic16Defs<1>; class Mips16Pat<dag pattern, dag result> : Pat<pattern, result> { let Predicates = [InMips16Mode]; } -class ArithLogicR16Defs<SDNode OpNode, bit isComm = 0> { - dag OutOperandList = (outs CPU16Regs:$rz); - dag InOperandList = (ins CPU16Regs:$rx, CPU16Regs:$ry); - list<dag> Pattern = [(set CPU16Regs:$rz, - (OpNode CPU16Regs:$rx, CPU16Regs:$ry))]; -} +// Unary Arith/Logic +// +class ArithLogicU_pat<PatFrag OpNode, Instruction I> : + Mips16Pat<(OpNode CPU16Regs:$r), + (I CPU16Regs:$r)>; -multiclass ArithLogicR16_base { - def _add: AdduRxRyRz16_base, ArithLogicR16Defs<add, 1>; -} +def: ArithLogicU_pat<not, NotRxRy16>; +def: ArithLogicU_pat<ineg, NegRxRy16>; -defm ArithLogicR16_patt : ArithLogicR16_base; +class ArithLogic16_pat<SDNode OpNode, Instruction I> : + Mips16Pat<(OpNode CPU16Regs:$l, CPU16Regs:$r), + (I CPU16Regs:$l, CPU16Regs:$r)>; -class LoadM16Defs<PatFrag OpNode, Operand _MemOpnd, bit Pseudo=0> { - bit isPseudo = Pseudo; - Operand MemOpnd = _MemOpnd; - dag OutOperandList = (outs CPU16Regs:$ry); - dag InOperandList = (ins MemOpnd:$addr); - list<dag> Pattern = [(set CPU16Regs:$ry, (OpNode addr:$addr))]; -} +def: ArithLogic16_pat<add, AdduRxRyRz16>; +def: ArithLogic16_pat<and, AndRxRxRy16>; +def: ArithLogic16_pat<or, OrRxRxRy16>; +def: ArithLogic16_pat<sub, SubuRxRyRz16>; +def: ArithLogic16_pat<xor, XorRxRxRy16>; -multiclass LoadM16_base { - def _LwRxRyOffMemX16: LwRxRyOffMemX16_base, LoadM16Defs<load_a, mem16>; -} +// Arithmetic and logical instructions with 2 register operands. -defm LoadM16: LoadM16_base; +class ArithLogicI16_pat<SDNode OpNode, PatFrag imm_type, Instruction I> : + Mips16Pat<(OpNode CPU16Regs:$in, imm_type:$imm), + (I CPU16Regs:$in, imm_type:$imm)>; -class StoreM16Defs<PatFrag OpNode, Operand _MemOpnd, bit Pseudo=0> { - bit isPseudo = Pseudo; - Operand MemOpnd = _MemOpnd; - dag OutOperandList = (outs ); - dag InOperandList = (ins CPU16Regs:$ry, MemOpnd:$addr); - list<dag> Pattern = [(OpNode CPU16Regs:$ry, addr:$addr)]; -} +def: ArithLogicI16_pat<add, immSExt16, AddiuRxRxImmX16>; +def: ArithLogicI16_pat<shl, immZExt5, SllX16>; +def: ArithLogicI16_pat<srl, immZExt5, SrlX16>; +def: ArithLogicI16_pat<sra, immZExt5, SraX16>; -multiclass StoreM16_base { - def _SwRxRyOffMemX16: SwRxRyOffMemX16_base, StoreM16Defs<store_a, mem16>; -} +class shift_rotate_reg16_pat<SDNode OpNode, Instruction I> : + Mips16Pat<(OpNode CPU16Regs:$r, CPU16Regs:$ra), + (I CPU16Regs:$r, CPU16Regs:$ra)>; + +def: shift_rotate_reg16_pat<shl, SllvRxRy16>; +def: shift_rotate_reg16_pat<sra, SravRxRy16>; +def: shift_rotate_reg16_pat<srl, SrlvRxRy16>; + +class LoadM16_pat<PatFrag OpNode, Instruction I> : + Mips16Pat<(OpNode addr:$addr), (I addr:$addr)>; + +def: LoadM16_pat<sextloadi8, LbRxRyOffMemX16>; +def: LoadM16_pat<zextloadi8, LbuRxRyOffMemX16>; +def: LoadM16_pat<sextloadi16_a, LhRxRyOffMemX16>; +def: LoadM16_pat<zextloadi16_a, LhuRxRyOffMemX16>; +def: LoadM16_pat<load_a, LwRxRyOffMemX16>; + +class StoreM16_pat<PatFrag OpNode, Instruction I> : + Mips16Pat<(OpNode CPU16Regs:$r, addr:$addr), (I CPU16Regs:$r, addr:$addr)>; + +def: StoreM16_pat<truncstorei8, SbRxRyOffMemX16>; +def: StoreM16_pat<truncstorei16_a, ShRxRyOffMemX16>; +def: StoreM16_pat<store_a, SwRxRyOffMemX16>; -defm StoreM16: StoreM16_base; // Jump and Link (Call) let isCall=1, hasDelaySlot=1 in @@ -226,18 +412,8 @@ let isReturn=1, isTerminator=1, hasDelaySlot=1, isBarrier=1, hasCtrlDep=1, hasExtraSrcRegAllocReq = 1 in def RetRA16 : MipsPseudo16<(outs), (ins), "", [(MipsRet)]>; -// As stack alignment is always done with addiu, we need a 16-bit immediate -// This is basically deprecated code but needs to be there for things -// to work. -let Defs = [SP], Uses = [SP] in { -def ADJCALLSTACKDOWN16 : MipsPseudo16<(outs), (ins uimm16:$amt), - ";", - [(callseq_start timm:$amt)]>; -def ADJCALLSTACKUP16 : MipsPseudo16<(outs), (ins uimm16:$amt1, uimm16:$amt2), - ";", - [(callseq_end timm:$amt1, timm:$amt2)]>; -} - // Small immediates -def : Mips16Pat<(i32 immZExt16:$in), (LiRxImmX16 immZExt16:$in)>; -def : Mips16Pat<(MipsLo tglobaladdr:$in), (LiRxImmX16 tglobaladdr:$in)>; +def: Mips16Pat<(i32 immZExt16:$in), (LiRxImmX16 immZExt16:$in)>; + +def: Mips16Pat<(add CPU16Regs:$hi, (MipsLo tglobaladdr:$lo)), + (AddiuRxRxImmX16 CPU16Regs:$hi, tglobaladdr:$lo)>; diff --git a/lib/Target/Mips/Mips16RegisterInfo.cpp b/lib/Target/Mips/Mips16RegisterInfo.cpp new file mode 100644 index 0000000..c15d1bf --- /dev/null +++ b/lib/Target/Mips/Mips16RegisterInfo.cpp @@ -0,0 +1,111 @@ +//===-- Mips16RegisterInfo.cpp - MIPS16 Register Information -== ----------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the MIPS16 implementation of the TargetRegisterInfo class. +// +//===----------------------------------------------------------------------===// + +#include "Mips16RegisterInfo.h" +#include "Mips.h" +#include "MipsAnalyzeImmediate.h" +#include "MipsInstrInfo.h" +#include "MipsSubtarget.h" +#include "MipsMachineFunction.h" +#include "llvm/Constants.h" +#include "llvm/DebugInfo.h" +#include "llvm/Type.h" +#include "llvm/Function.h" +#include "llvm/CodeGen/ValueTypes.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/Target/TargetFrameLowering.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetOptions.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/ADT/BitVector.h" +#include "llvm/ADT/STLExtras.h" + +using namespace llvm; + +Mips16RegisterInfo::Mips16RegisterInfo(const MipsSubtarget &ST, + const TargetInstrInfo &TII) + : MipsRegisterInfo(ST, TII) {} + +// This function eliminate ADJCALLSTACKDOWN, +// ADJCALLSTACKUP pseudo instructions +void Mips16RegisterInfo:: +eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, + MachineBasicBlock::iterator I) const { + // Simply discard ADJCALLSTACKDOWN, ADJCALLSTACKUP instructions. + MBB.erase(I); +} + +void Mips16RegisterInfo::eliminateFI(MachineBasicBlock::iterator II, + unsigned OpNo, int FrameIndex, + uint64_t StackSize, + int64_t SPOffset) const { + MachineInstr &MI = *II; + MachineFunction &MF = *MI.getParent()->getParent(); + MachineFrameInfo *MFI = MF.getFrameInfo(); + MipsFunctionInfo *MipsFI = MF.getInfo<MipsFunctionInfo>(); + + const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo(); + int MinCSFI = 0; + int MaxCSFI = -1; + + if (CSI.size()) { + MinCSFI = CSI[0].getFrameIdx(); + MaxCSFI = CSI[CSI.size() - 1].getFrameIdx(); + } + + // The following stack frame objects are always + // referenced relative to $sp: + // 1. Outgoing arguments. + // 2. Pointer to dynamically allocated stack space. + // 3. Locations for callee-saved registers. + // Everything else is referenced relative to whatever register + // getFrameRegister() returns. + unsigned FrameReg; + + if (MipsFI->isOutArgFI(FrameIndex) || + (FrameIndex >= MinCSFI && FrameIndex <= MaxCSFI)) + FrameReg = Subtarget.isABI_N64() ? Mips::SP_64 : Mips::SP; + else + FrameReg = getFrameRegister(MF); + + // Calculate final offset. + // - There is no need to change the offset if the frame object + // is one of the + // following: an outgoing argument, pointer to a dynamically allocated + // stack space or a $gp restore location, + // - If the frame object is any of the following, + // its offset must be adjusted + // by adding the size of the stack: + // incoming argument, callee-saved register location or local variable. + int64_t Offset; + + if (MipsFI->isOutArgFI(FrameIndex)) + Offset = SPOffset; + else + Offset = SPOffset + (int64_t)StackSize; + + Offset += MI.getOperand(OpNo + 1).getImm(); + + DEBUG(errs() << "Offset : " << Offset << "\n" << "<--------->\n"); + + MI.getOperand(OpNo).ChangeToRegister(FrameReg, false); + MI.getOperand(OpNo + 1).ChangeToImmediate(Offset); + + +} diff --git a/lib/Target/Mips/Mips16RegisterInfo.h b/lib/Target/Mips/Mips16RegisterInfo.h new file mode 100644 index 0000000..3f4b3a7 --- /dev/null +++ b/lib/Target/Mips/Mips16RegisterInfo.h @@ -0,0 +1,37 @@ +//===-- Mips16RegisterInfo.h - Mips16 Register Information ------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the Mips16 implementation of the TargetRegisterInfo class. +// +//===----------------------------------------------------------------------===// + +#ifndef MIPS16REGISTERINFO_H +#define MIPS16REGISTERINFO_H + +#include "MipsRegisterInfo.h" + +namespace llvm { + +class Mips16RegisterInfo : public MipsRegisterInfo { +public: + Mips16RegisterInfo(const MipsSubtarget &Subtarget, + const TargetInstrInfo &TII); + + void eliminateCallFramePseudoInstr(MachineFunction &MF, + MachineBasicBlock &MBB, + MachineBasicBlock::iterator I) const; +private: + virtual void eliminateFI(MachineBasicBlock::iterator II, unsigned OpNo, + int FrameIndex, uint64_t StackSize, + int64_t SPOffset) const; +}; + +} // end namespace llvm + +#endif diff --git a/lib/Target/Mips/Mips64InstrInfo.td b/lib/Target/Mips/Mips64InstrInfo.td index cceee24..20fc178 100644 --- a/lib/Target/Mips/Mips64InstrInfo.td +++ b/lib/Target/Mips/Mips64InstrInfo.td @@ -208,26 +208,25 @@ def DCLO : CountLeading1<0x25, "dclo", CPU64Regs>; def DSBH : SubwordSwap<0x24, 0x2, "dsbh", CPU64Regs>; def DSHD : SubwordSwap<0x24, 0x5, "dshd", CPU64Regs>; -def LEA_ADDiu64 : EffectiveAddress<"daddiu\t$rt, $addr", CPU64Regs, mem_ea_64>; +def LEA_ADDiu64 : EffectiveAddress<0x19,"daddiu\t$rt, $addr", CPU64Regs, mem_ea_64>; } let Uses = [SP_64], DecoderNamespace = "Mips64" in -def DynAlloc64 : EffectiveAddress<"daddiu\t$rt, $addr", CPU64Regs, mem_ea_64>, - Requires<[IsN64, HasStandardEncoding]> { - let isCodeGenOnly = 1; -} +def DynAlloc64 : EffectiveAddress<0x19,"daddiu\t$rt, $addr", CPU64Regs, mem_ea_64>, + Requires<[IsN64, HasStandardEncoding]>; let DecoderNamespace = "Mips64" in { def RDHWR64 : ReadHardware<CPU64Regs, HWRegs64>; def DEXT : ExtBase<3, "dext", CPU64Regs>; def DINS : InsBase<7, "dins", CPU64Regs>; -def DSLL64_32 : FR<0x3c, 0x00, (outs CPU64Regs:$rd), (ins CPURegs:$rt), - "dsll\t$rd, $rt, 32", [], IIAlu>; -def SLL64_32 : FR<0x0, 0x00, (outs CPU64Regs:$rd), (ins CPURegs:$rt), - "sll\t$rd, $rt, 0", [], IIAlu>; -let isCodeGenOnly = 1 in -def SLL64_64 : FR<0x0, 0x00, (outs CPU64Regs:$rd), (ins CPU64Regs:$rt), - "sll\t$rd, $rt, 0", [], IIAlu>; +let isCodeGenOnly = 1, rs = 0, shamt = 0 in { + def DSLL64_32 : FR<0x00, 0x3c, (outs CPU64Regs:$rd), (ins CPURegs:$rt), + "dsll\t$rd, $rt, 32", [], IIAlu>; + def SLL64_32 : FR<0x0, 0x00, (outs CPU64Regs:$rd), (ins CPURegs:$rt), + "sll\t$rd, $rt, 0", [], IIAlu>; + def SLL64_64 : FR<0x0, 0x00, (outs CPU64Regs:$rd), (ins CPU64Regs:$rt), + "sll\t$rd, $rt, 0", [], IIAlu>; +} } //===----------------------------------------------------------------------===// // Arbitrary patterns that map to one or more instructions diff --git a/lib/Target/Mips/MipsCallingConv.td b/lib/Target/Mips/MipsCallingConv.td index 8aadefd..19213fa 100644 --- a/lib/Target/Mips/MipsCallingConv.td +++ b/lib/Target/Mips/MipsCallingConv.td @@ -145,6 +145,17 @@ def RetCC_MipsEABI : CallingConv<[ ]>; //===----------------------------------------------------------------------===// +// Mips Android Calling Convention +//===----------------------------------------------------------------------===// + +def RetCC_MipsAndroid : CallingConv<[ + // f32 are returned in registers F0, F2, F1, F3 + CCIfType<[f32], CCAssignToReg<[F0, F2, F1, F3]>>, + + CCDelegateTo<RetCC_MipsO32> +]>; + +//===----------------------------------------------------------------------===// // Mips FastCC Calling Convention //===----------------------------------------------------------------------===// def CC_MipsO32_FastCC : CallingConv<[ @@ -210,6 +221,7 @@ def RetCC_Mips : CallingConv<[ CCIfSubtarget<"isABI_EABI()", CCDelegateTo<RetCC_MipsEABI>>, CCIfSubtarget<"isABI_N32()", CCDelegateTo<RetCC_MipsN>>, CCIfSubtarget<"isABI_N64()", CCDelegateTo<RetCC_MipsN>>, + CCIfSubtarget<"isAndroid()", CCDelegateTo<RetCC_MipsAndroid>>, CCDelegateTo<RetCC_MipsO32> ]>; diff --git a/lib/Target/Mips/MipsELFWriterInfo.cpp b/lib/Target/Mips/MipsELFWriterInfo.cpp new file mode 100644 index 0000000..ac3a547 --- /dev/null +++ b/lib/Target/Mips/MipsELFWriterInfo.cpp @@ -0,0 +1,92 @@ +//===-- MipsELFWriterInfo.cpp - ELF Writer Info for the Mips backend ------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements ELF writer information for the Mips backend. +// +//===----------------------------------------------------------------------===// + +#include "MipsELFWriterInfo.h" +#include "MipsRelocations.h" +#include "llvm/Function.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Target/TargetData.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Support/ELF.h" + +using namespace llvm; + +//===----------------------------------------------------------------------===// +// Implementation of the MipsELFWriterInfo class +//===----------------------------------------------------------------------===// + +MipsELFWriterInfo::MipsELFWriterInfo(bool is64Bit_, bool isLittleEndian_) + : TargetELFWriterInfo(is64Bit_, isLittleEndian_) { + EMachine = EM_MIPS; +} + +MipsELFWriterInfo::~MipsELFWriterInfo() {} + +unsigned MipsELFWriterInfo::getRelocationType(unsigned MachineRelTy) const { + switch(MachineRelTy) { + case Mips::reloc_mips_pc16: + return ELF::R_MIPS_GOT16; + case Mips::reloc_mips_hi: + return ELF::R_MIPS_HI16; + case Mips::reloc_mips_lo: + return ELF::R_MIPS_LO16; + case Mips::reloc_mips_26: + return ELF::R_MIPS_26; + default: + llvm_unreachable("unknown Mips machine relocation type"); + } +} + +long int MipsELFWriterInfo::getDefaultAddendForRelTy(unsigned RelTy, + long int Modifier) const { + switch(RelTy) { + case ELF::R_MIPS_26: return Modifier; + default: + llvm_unreachable("unknown Mips relocation type"); + } +} + +unsigned MipsELFWriterInfo::getRelocationTySize(unsigned RelTy) const { + switch(RelTy) { + case ELF::R_MIPS_GOT16: + case ELF::R_MIPS_26: + return 32; + default: + llvm_unreachable("unknown Mips relocation type"); + } +} + +bool MipsELFWriterInfo::isPCRelativeRel(unsigned RelTy) const { + switch(RelTy) { + case ELF::R_MIPS_GOT16: + return true; + case ELF::R_MIPS_26: + return false; + default: + llvm_unreachable("unknown Mips relocation type"); + } +} + +unsigned MipsELFWriterInfo::getAbsoluteLabelMachineRelTy() const { + return Mips::reloc_mips_26; +} + +long int MipsELFWriterInfo::computeRelocation(unsigned SymOffset, + unsigned RelOffset, + unsigned RelTy) const { + + if (RelTy == ELF::R_MIPS_GOT16) + return SymOffset - (RelOffset + 4); + + llvm_unreachable("computeRelocation unknown for this relocation type"); +} diff --git a/lib/Target/Mips/MipsELFWriterInfo.h b/lib/Target/Mips/MipsELFWriterInfo.h new file mode 100644 index 0000000..23f3f03 --- /dev/null +++ b/lib/Target/Mips/MipsELFWriterInfo.h @@ -0,0 +1,59 @@ +//===-- MipsELFWriterInfo.h - ELF Writer Info for Mips ------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements ELF writer information for the Mips backend. +// +//===----------------------------------------------------------------------===// + +#ifndef MIPS_ELF_WRITER_INFO_H +#define MIPS_ELF_WRITER_INFO_H + +#include "llvm/Target/TargetELFWriterInfo.h" + +namespace llvm { + + class MipsELFWriterInfo : public TargetELFWriterInfo { + + public: + MipsELFWriterInfo(bool is64Bit_, bool isLittleEndian_); + virtual ~MipsELFWriterInfo(); + + /// getRelocationType - Returns the target specific ELF Relocation type. + /// 'MachineRelTy' contains the object code independent relocation type + virtual unsigned getRelocationType(unsigned MachineRelTy) const; + + /// hasRelocationAddend - True if the target uses an addend in the + /// ELF relocation entry. + virtual bool hasRelocationAddend() const { return is64Bit ? true : false; } + + /// getDefaultAddendForRelTy - Gets the default addend value for a + /// relocation entry based on the target ELF relocation type. + virtual long int getDefaultAddendForRelTy(unsigned RelTy, + long int Modifier = 0) const; + + /// getRelTySize - Returns the size of relocatable field in bits + virtual unsigned getRelocationTySize(unsigned RelTy) const; + + /// isPCRelativeRel - True if the relocation type is pc relative + virtual bool isPCRelativeRel(unsigned RelTy) const; + + /// getJumpTableRelocationTy - Returns the machine relocation type used + /// to reference a jumptable. + virtual unsigned getAbsoluteLabelMachineRelTy() const; + + /// computeRelocation - Some relocatable fields could be relocated + /// directly, avoiding the relocation symbol emission, compute the + /// final relocation value for this symbol. + virtual long int computeRelocation(unsigned SymOffset, unsigned RelOffset, + unsigned RelTy) const; + }; + +} // end llvm namespace + +#endif // MIPS_ELF_WRITER_INFO_H diff --git a/lib/Target/Mips/MipsFrameLowering.cpp b/lib/Target/Mips/MipsFrameLowering.cpp index 6338f3c..8c0474b 100644 --- a/lib/Target/Mips/MipsFrameLowering.cpp +++ b/lib/Target/Mips/MipsFrameLowering.cpp @@ -15,6 +15,7 @@ #include "MipsAnalyzeImmediate.h" #include "MipsInstrInfo.h" #include "MipsMachineFunction.h" +#include "MipsTargetMachine.h" #include "MCTargetDesc/MipsBaseInfo.h" #include "llvm/Function.h" #include "llvm/CodeGen/MachineFrameInfo.h" @@ -81,6 +82,14 @@ using namespace llvm; // //===----------------------------------------------------------------------===// +const MipsFrameLowering *MipsFrameLowering::create(MipsTargetMachine &TM, + const MipsSubtarget &ST) { + if (TM.getSubtargetImpl()->inMips16Mode()) + return llvm::createMips16FrameLowering(ST); + + return llvm::createMipsSEFrameLowering(ST); +} + // hasFP - Return true if the specified function should have a dedicated frame // pointer register. This is true if the function has variable sized allocas or // if frame pointer elimination is disabled. @@ -89,218 +98,3 @@ bool MipsFrameLowering::hasFP(const MachineFunction &MF) const { return MF.getTarget().Options.DisableFramePointerElim(MF) || MFI->hasVarSizedObjects() || MFI->isFrameAddressTaken(); } - -bool MipsFrameLowering::targetHandlesStackFrameRounding() const { - return true; -} - -void MipsFrameLowering::emitPrologue(MachineFunction &MF) const { - MachineBasicBlock &MBB = MF.front(); - MachineFrameInfo *MFI = MF.getFrameInfo(); - MipsFunctionInfo *MipsFI = MF.getInfo<MipsFunctionInfo>(); - const MipsRegisterInfo *RegInfo = - static_cast<const MipsRegisterInfo*>(MF.getTarget().getRegisterInfo()); - const MipsInstrInfo &TII = - *static_cast<const MipsInstrInfo*>(MF.getTarget().getInstrInfo()); - MachineBasicBlock::iterator MBBI = MBB.begin(); - DebugLoc dl = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc(); - unsigned SP = STI.isABI_N64() ? Mips::SP_64 : Mips::SP; - unsigned FP = STI.isABI_N64() ? Mips::FP_64 : Mips::FP; - unsigned ZERO = STI.isABI_N64() ? Mips::ZERO_64 : Mips::ZERO; - unsigned ADDu = STI.isABI_N64() ? Mips::DADDu : Mips::ADDu; - unsigned ADDiu = STI.isABI_N64() ? Mips::DADDiu : Mips::ADDiu; - - // First, compute final stack size. - unsigned StackAlign = getStackAlignment(); - uint64_t StackSize = RoundUpToAlignment(MFI->getStackSize(), StackAlign); - - if (MipsFI->globalBaseRegSet()) - StackSize += MFI->getObjectOffset(MipsFI->getGlobalRegFI()) + StackAlign; - else - StackSize += RoundUpToAlignment(MipsFI->getMaxCallFrameSize(), StackAlign); - - // Update stack size - MFI->setStackSize(StackSize); - - // No need to allocate space on the stack. - if (StackSize == 0 && !MFI->adjustsStack()) return; - - MachineModuleInfo &MMI = MF.getMMI(); - std::vector<MachineMove> &Moves = MMI.getFrameMoves(); - MachineLocation DstML, SrcML; - - // Adjust stack. - if (isInt<16>(-StackSize)) {// addi sp, sp, (-stacksize) - if (STI.inMips16Mode()) - BuildMI(MBB, MBBI, dl, - TII.get(Mips::SaveRaF16)).addImm(StackSize); // cleanup - else - BuildMI(MBB, MBBI, dl, TII.get(ADDiu), SP).addReg(SP).addImm(-StackSize); - } - else { // Expand immediate that doesn't fit in 16-bit. - unsigned ATReg = STI.isABI_N64() ? Mips::AT_64 : Mips::AT; - - MF.getInfo<MipsFunctionInfo>()->setEmitNOAT(); - Mips::loadImmediate(-StackSize, STI.isABI_N64(), TII, MBB, MBBI, dl, false, - 0); - BuildMI(MBB, MBBI, dl, TII.get(ADDu), SP).addReg(SP).addReg(ATReg); - } - - // emit ".cfi_def_cfa_offset StackSize" - MCSymbol *AdjustSPLabel = MMI.getContext().CreateTempSymbol(); - BuildMI(MBB, MBBI, dl, - TII.get(TargetOpcode::PROLOG_LABEL)).addSym(AdjustSPLabel); - DstML = MachineLocation(MachineLocation::VirtualFP); - SrcML = MachineLocation(MachineLocation::VirtualFP, -StackSize); - Moves.push_back(MachineMove(AdjustSPLabel, DstML, SrcML)); - - const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo(); - - if (CSI.size()) { - // Find the instruction past the last instruction that saves a callee-saved - // register to the stack. - for (unsigned i = 0; i < CSI.size(); ++i) - ++MBBI; - - // Iterate over list of callee-saved registers and emit .cfi_offset - // directives. - MCSymbol *CSLabel = MMI.getContext().CreateTempSymbol(); - BuildMI(MBB, MBBI, dl, - TII.get(TargetOpcode::PROLOG_LABEL)).addSym(CSLabel); - - for (std::vector<CalleeSavedInfo>::const_iterator I = CSI.begin(), - E = CSI.end(); I != E; ++I) { - int64_t Offset = MFI->getObjectOffset(I->getFrameIdx()); - unsigned Reg = I->getReg(); - - // If Reg is a double precision register, emit two cfa_offsets, - // one for each of the paired single precision registers. - if (Mips::AFGR64RegClass.contains(Reg)) { - MachineLocation DstML0(MachineLocation::VirtualFP, Offset); - MachineLocation DstML1(MachineLocation::VirtualFP, Offset + 4); - MachineLocation SrcML0(RegInfo->getSubReg(Reg, Mips::sub_fpeven)); - MachineLocation SrcML1(RegInfo->getSubReg(Reg, Mips::sub_fpodd)); - - if (!STI.isLittle()) - std::swap(SrcML0, SrcML1); - - Moves.push_back(MachineMove(CSLabel, DstML0, SrcML0)); - Moves.push_back(MachineMove(CSLabel, DstML1, SrcML1)); - } else { - // Reg is either in CPURegs or FGR32. - DstML = MachineLocation(MachineLocation::VirtualFP, Offset); - SrcML = MachineLocation(Reg); - Moves.push_back(MachineMove(CSLabel, DstML, SrcML)); - } - } - } - - // if framepointer enabled, set it to point to the stack pointer. - if (hasFP(MF)) { - // Insert instruction "move $fp, $sp" at this location. - BuildMI(MBB, MBBI, dl, TII.get(ADDu), FP).addReg(SP).addReg(ZERO); - - // emit ".cfi_def_cfa_register $fp" - MCSymbol *SetFPLabel = MMI.getContext().CreateTempSymbol(); - BuildMI(MBB, MBBI, dl, - TII.get(TargetOpcode::PROLOG_LABEL)).addSym(SetFPLabel); - DstML = MachineLocation(FP); - SrcML = MachineLocation(MachineLocation::VirtualFP); - Moves.push_back(MachineMove(SetFPLabel, DstML, SrcML)); - } -} - -void MipsFrameLowering::emitEpilogue(MachineFunction &MF, - MachineBasicBlock &MBB) const { - MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr(); - MachineFrameInfo *MFI = MF.getFrameInfo(); - const MipsInstrInfo &TII = - *static_cast<const MipsInstrInfo*>(MF.getTarget().getInstrInfo()); - DebugLoc dl = MBBI->getDebugLoc(); - unsigned SP = STI.isABI_N64() ? Mips::SP_64 : Mips::SP; - unsigned FP = STI.isABI_N64() ? Mips::FP_64 : Mips::FP; - unsigned ZERO = STI.isABI_N64() ? Mips::ZERO_64 : Mips::ZERO; - unsigned ADDu = STI.isABI_N64() ? Mips::DADDu : Mips::ADDu; - unsigned ADDiu = STI.isABI_N64() ? Mips::DADDiu : Mips::ADDiu; - - // if framepointer enabled, restore the stack pointer. - if (hasFP(MF)) { - // Find the first instruction that restores a callee-saved register. - MachineBasicBlock::iterator I = MBBI; - - for (unsigned i = 0; i < MFI->getCalleeSavedInfo().size(); ++i) - --I; - - // Insert instruction "move $sp, $fp" at this location. - BuildMI(MBB, I, dl, TII.get(ADDu), SP).addReg(FP).addReg(ZERO); - } - - // Get the number of bytes from FrameInfo - uint64_t StackSize = MFI->getStackSize(); - - if (!StackSize) - return; - - // Adjust stack. - if (isInt<16>(StackSize)) { // addi sp, sp, (-stacksize) - if (STI.inMips16Mode()) - // assumes stacksize multiple of 8 - BuildMI(MBB, MBBI, dl, - TII.get(Mips::RestoreRaF16)).addImm(StackSize); - else - BuildMI(MBB, MBBI, dl, TII.get(ADDiu), SP).addReg(SP).addImm(StackSize); - } - else { // Expand immediate that doesn't fit in 16-bit. - unsigned ATReg = STI.isABI_N64() ? Mips::AT_64 : Mips::AT; - - MF.getInfo<MipsFunctionInfo>()->setEmitNOAT(); - Mips::loadImmediate(StackSize, STI.isABI_N64(), TII, MBB, MBBI, dl, false, - 0); - BuildMI(MBB, MBBI, dl, TII.get(ADDu), SP).addReg(SP).addReg(ATReg); - } -} - -void MipsFrameLowering:: -processFunctionBeforeCalleeSavedScan(MachineFunction &MF, - RegScavenger *RS) const { - MachineRegisterInfo &MRI = MF.getRegInfo(); - unsigned FP = STI.isABI_N64() ? Mips::FP_64 : Mips::FP; - - // FIXME: remove this code if register allocator can correctly mark - // $fp and $ra used or unused. - - // Mark $fp and $ra as used or unused. - if (hasFP(MF)) - MRI.setPhysRegUsed(FP); -} - -bool MipsFrameLowering:: -spillCalleeSavedRegisters(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MI, - const std::vector<CalleeSavedInfo> &CSI, - const TargetRegisterInfo *TRI) const { - MachineFunction *MF = MBB.getParent(); - MachineBasicBlock *EntryBlock = MF->begin(); - const TargetInstrInfo &TII = *MF->getTarget().getInstrInfo(); - - for (unsigned i = 0, e = CSI.size(); i != e; ++i) { - // Add the callee-saved register as live-in. Do not add if the register is - // RA and return address is taken, because it has already been added in - // method MipsTargetLowering::LowerRETURNADDR. - // It's killed at the spill, unless the register is RA and return address - // is taken. - unsigned Reg = CSI[i].getReg(); - bool IsRAAndRetAddrIsTaken = (Reg == Mips::RA || Reg == Mips::RA_64) - && MF->getFrameInfo()->isReturnAddressTaken(); - if (!IsRAAndRetAddrIsTaken) - EntryBlock->addLiveIn(Reg); - - // Insert the spill to the stack frame. - bool IsKill = !IsRAAndRetAddrIsTaken; - const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg); - TII.storeRegToStackSlot(*EntryBlock, MI, Reg, IsKill, - CSI[i].getFrameIdx(), RC, TRI); - } - - return true; -} diff --git a/lib/Target/Mips/MipsFrameLowering.h b/lib/Target/Mips/MipsFrameLowering.h index e364ded..ed7b7fe 100644 --- a/lib/Target/Mips/MipsFrameLowering.h +++ b/lib/Target/Mips/MipsFrameLowering.h @@ -27,28 +27,19 @@ protected: public: explicit MipsFrameLowering(const MipsSubtarget &sti) - : TargetFrameLowering(StackGrowsDown, sti.hasMips64() ? 16 : 8, 0), - STI(sti) { - } + : TargetFrameLowering(StackGrowsDown, sti.hasMips64() ? 16 : 8, 0, + sti.hasMips64() ? 16 : 8), STI(sti) {} - bool targetHandlesStackFrameRounding() const; - - /// emitProlog/emitEpilog - These methods insert prolog and epilog code into - /// the function. - void emitPrologue(MachineFunction &MF) const; - void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const; - - bool spillCalleeSavedRegisters(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MI, - const std::vector<CalleeSavedInfo> &CSI, - const TargetRegisterInfo *TRI) const; + static const MipsFrameLowering *create(MipsTargetMachine &TM, + const MipsSubtarget &ST); bool hasFP(const MachineFunction &MF) const; - - void processFunctionBeforeCalleeSavedScan(MachineFunction &MF, - RegScavenger *RS) const; }; +/// Create MipsInstrInfo objects. +const MipsFrameLowering *createMips16FrameLowering(const MipsSubtarget &ST); +const MipsFrameLowering *createMipsSEFrameLowering(const MipsSubtarget &ST); + } // End llvm namespace #endif diff --git a/lib/Target/Mips/MipsISelDAGToDAG.cpp b/lib/Target/Mips/MipsISelDAGToDAG.cpp index ea33b74..5a97c17 100644 --- a/lib/Target/Mips/MipsISelDAGToDAG.cpp +++ b/lib/Target/Mips/MipsISelDAGToDAG.cpp @@ -117,28 +117,23 @@ private: void MipsDAGToDAGISel::InitGlobalBaseReg(MachineFunction &MF) { MipsFunctionInfo *MipsFI = MF.getInfo<MipsFunctionInfo>(); - if (((MF.getTarget().getRelocationModel() == Reloc::Static) || - Subtarget.inMips16Mode()) && !MipsFI->globalBaseRegSet()) + if (!MipsFI->globalBaseRegSet()) return; MachineBasicBlock &MBB = MF.front(); MachineBasicBlock::iterator I = MBB.begin(); MachineRegisterInfo &RegInfo = MF.getRegInfo(); - const MipsRegisterInfo *TargetRegInfo = TM.getRegisterInfo(); - const MipsInstrInfo *MII = TM.getInstrInfo(); const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo(); DebugLoc DL = I != MBB.end() ? I->getDebugLoc() : DebugLoc(); unsigned V0, V1, V2, GlobalBaseReg = MipsFI->getGlobalBaseReg(); - int FI = 0; + const TargetRegisterClass *RC; - FI= MipsFI->initGlobalRegFI(); - - const TargetRegisterClass *RC = Subtarget.isABI_N64() ? - (const TargetRegisterClass*)&Mips::CPU64RegsRegClass : - (const TargetRegisterClass*)&Mips::CPURegsRegClass; - - if (Subtarget.inMips16Mode()) - RC=(const TargetRegisterClass*)&Mips::CPU16RegsRegClass; + if (Subtarget.isABI_N64()) + RC = (const TargetRegisterClass*)&Mips::CPU64RegsRegClass; + else if (Subtarget.inMips16Mode()) + RC = (const TargetRegisterClass*)&Mips::CPU16RegsRegClass; + else + RC = (const TargetRegisterClass*)&Mips::CPURegsRegClass; V0 = RegInfo.createVirtualRegister(RC); V1 = RegInfo.createVirtualRegister(RC); @@ -158,23 +153,17 @@ void MipsDAGToDAGISel::InitGlobalBaseReg(MachineFunction &MF) { .addReg(Mips::T9_64); BuildMI(MBB, I, DL, TII.get(Mips::DADDiu), GlobalBaseReg).addReg(V1) .addGlobalAddress(FName, 0, MipsII::MO_GPOFF_LO); - MII->storeRegToStackSlot(MBB, I, GlobalBaseReg, false, FI, RC, - TargetRegInfo); return; } if (Subtarget.inMips16Mode()) { BuildMI(MBB, I, DL, TII.get(Mips::LiRxImmX16), V0) - .addExternalSymbol("_gp_disp", MipsII::MO_ABS_HI); - BuildMI(MBB, I, DL, TII.get(Mips::AddiuRxPcImmX16), - V1) - .addExternalSymbol("_gp_disp", MipsII::MO_ABS_LO); - BuildMI(MBB, I, DL, TII.get(Mips::SllX16), - V2 ).addReg(V0).addImm(16); + .addExternalSymbol("_gp_disp", MipsII::MO_ABS_HI); + BuildMI(MBB, I, DL, TII.get(Mips::AddiuRxPcImmX16), V1) + .addExternalSymbol("_gp_disp", MipsII::MO_ABS_LO); + BuildMI(MBB, I, DL, TII.get(Mips::SllX16), V2).addReg(V0).addImm(16); BuildMI(MBB, I, DL, TII.get(Mips::AdduRxRyRz16), GlobalBaseReg) .addReg(V1).addReg(V2); - - return; } @@ -203,19 +192,11 @@ void MipsDAGToDAGISel::InitGlobalBaseReg(MachineFunction &MF) { BuildMI(MBB, I, DL, TII.get(Mips::ADDu), V1).addReg(V0).addReg(Mips::T9); BuildMI(MBB, I, DL, TII.get(Mips::ADDiu), GlobalBaseReg).addReg(V1) .addGlobalAddress(FName, 0, MipsII::MO_GPOFF_LO); - MII->storeRegToStackSlot(MBB, I, GlobalBaseReg, false, FI, RC, - TargetRegInfo); return; } assert(Subtarget.isABI_O32()); - - //if (Subtarget.inMips16Mode()) - // return; // no need to load GP. It can be calculated anywhere - - - // For O32 ABI, the following instruction sequence is emitted to initialize // the global base register: // @@ -237,7 +218,6 @@ void MipsDAGToDAGISel::InitGlobalBaseReg(MachineFunction &MF) { MBB.addLiveIn(Mips::V0); BuildMI(MBB, I, DL, TII.get(Mips::ADDu), GlobalBaseReg) .addReg(Mips::V0).addReg(Mips::T9); - MII->storeRegToStackSlot(MBB, I, GlobalBaseReg, false, FI, RC, TargetRegInfo); } bool MipsDAGToDAGISel::ReplaceUsesWithZeroReg(MachineRegisterInfo *MRI, @@ -262,13 +242,14 @@ bool MipsDAGToDAGISel::ReplaceUsesWithZeroReg(MachineRegisterInfo *MRI, // Replace uses with ZeroReg. for (MachineRegisterInfo::use_iterator U = MRI->use_begin(DstReg), - E = MRI->use_end(); U != E; ++U) { + E = MRI->use_end(); U != E;) { MachineOperand &MO = U.getOperand(); + unsigned OpNo = U.getOperandNo(); MachineInstr *MI = MO.getParent(); + ++U; // Do not replace if it is a phi's operand or is tied to def operand. - if (MI->isPHI() || MI->isRegTiedToDefOperand(U.getOperandNo()) || - MI->isPseudo()) + if (MI->isPHI() || MI->isRegTiedToDefOperand(OpNo) || MI->isPseudo()) continue; MO.setReg(ZeroReg); @@ -309,21 +290,6 @@ bool MipsDAGToDAGISel:: SelectAddr(SDNode *Parent, SDValue Addr, SDValue &Base, SDValue &Offset) { EVT ValTy = Addr.getValueType(); - // If Parent is an unaligned f32 load or store, select a (base + index) - // floating point load/store instruction (luxc1 or suxc1). - const LSBaseSDNode *LS = 0; - - if (Parent && (LS = dyn_cast<LSBaseSDNode>(Parent))) { - EVT VT = LS->getMemoryVT(); - - if (VT.getSizeInBits() / 8 > LS->getAlignment()) { - assert(TLI.allowsUnalignedMemoryAccesses(VT) && - "Unaligned loads/stores not supported for this type."); - if (VT == MVT::f32) - return false; - } - } - // if Address is FI, get the TargetFrameIndex. if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) { Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), ValTy); @@ -382,6 +348,8 @@ SelectAddr(SDNode *Parent, SDValue Addr, SDValue &Base, SDValue &Offset) { } // If an indexed floating point load/store can be emitted, return false. + const LSBaseSDNode *LS = dyn_cast<LSBaseSDNode>(Parent); + if (LS && (LS->getMemoryVT() == MVT::f32 || LS->getMemoryVT() == MVT::f64) && Subtarget.hasMips32r2Or64()) diff --git a/lib/Target/Mips/MipsISelLowering.cpp b/lib/Target/Mips/MipsISelLowering.cpp index 7741f9f..c5207c6 100644 --- a/lib/Target/Mips/MipsISelLowering.cpp +++ b/lib/Target/Mips/MipsISelLowering.cpp @@ -157,7 +157,6 @@ MipsTargetLowering(MipsTargetMachine &TM) setOperationAction(ISD::SETCC, MVT::f32, Custom); setOperationAction(ISD::SETCC, MVT::f64, Custom); setOperationAction(ISD::BRCOND, MVT::Other, Custom); - setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Custom); setOperationAction(ISD::VASTART, MVT::Other, Custom); setOperationAction(ISD::FCOPYSIGN, MVT::f32, Custom); setOperationAction(ISD::FCOPYSIGN, MVT::f64, Custom); @@ -178,7 +177,6 @@ MipsTargetLowering(MipsTargetMachine &TM) setOperationAction(ISD::JumpTable, MVT::i64, Custom); setOperationAction(ISD::ConstantPool, MVT::i64, Custom); setOperationAction(ISD::SELECT, MVT::i64, Custom); - setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64, Custom); setOperationAction(ISD::LOAD, MVT::i64, Custom); setOperationAction(ISD::STORE, MVT::i64, Custom); } @@ -217,6 +215,8 @@ MipsTargetLowering(MipsTargetMachine &TM) setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i64, Expand); setOperationAction(ISD::ROTL, MVT::i32, Expand); setOperationAction(ISD::ROTL, MVT::i64, Expand); + setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Expand); + setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64, Expand); if (!Subtarget->hasMips32r2()) setOperationAction(ISD::ROTR, MVT::i32, Expand); @@ -314,8 +314,6 @@ bool MipsTargetLowering::allowsUnalignedMemoryAccesses(EVT VT) const { case MVT::i64: case MVT::i32: return true; - case MVT::f32: - return Subtarget->hasMips32r2Or64(); default: return false; } @@ -794,7 +792,6 @@ LowerOperation(SDValue Op, SelectionDAG &DAG) const { case ISD::BRCOND: return LowerBRCOND(Op, DAG); case ISD::ConstantPool: return LowerConstantPool(Op, DAG); - case ISD::DYNAMIC_STACKALLOC: return LowerDYNAMIC_STACKALLOC(Op, DAG); case ISD::GlobalAddress: return LowerGlobalAddress(Op, DAG); case ISD::BlockAddress: return LowerBlockAddress(Op, DAG); case ISD::GlobalTLSAddress: return LowerGlobalTLSAddress(Op, DAG); @@ -1504,42 +1501,6 @@ MipsTargetLowering::EmitAtomicCmpSwapPartword(MachineInstr *MI, // Misc Lower Operation implementation //===----------------------------------------------------------------------===// SDValue MipsTargetLowering:: -LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const -{ - MachineFunction &MF = DAG.getMachineFunction(); - MipsFunctionInfo *MipsFI = MF.getInfo<MipsFunctionInfo>(); - unsigned SP = IsN64 ? Mips::SP_64 : Mips::SP; - - assert(getTargetMachine().getFrameLowering()->getStackAlignment() >= - cast<ConstantSDNode>(Op.getOperand(2).getNode())->getZExtValue() && - "Cannot lower if the alignment of the allocated space is larger than \ - that of the stack."); - - SDValue Chain = Op.getOperand(0); - SDValue Size = Op.getOperand(1); - DebugLoc dl = Op.getDebugLoc(); - - // Get a reference from Mips stack pointer - SDValue StackPointer = DAG.getCopyFromReg(Chain, dl, SP, getPointerTy()); - - // Subtract the dynamic size from the actual stack size to - // obtain the new stack size. - SDValue Sub = DAG.getNode(ISD::SUB, dl, getPointerTy(), StackPointer, Size); - - // The Sub result contains the new stack start address, so it - // must be placed in the stack pointer register. - Chain = DAG.getCopyToReg(StackPointer.getValue(1), dl, SP, Sub, SDValue()); - - // This node always has two return values: a new stack pointer - // value and a chain - SDVTList VTLs = DAG.getVTList(getPointerTy(), MVT::Other); - SDValue Ptr = DAG.getFrameIndex(MipsFI->getDynAllocFI(), getPointerTy()); - SDValue Ops[] = { Chain, Ptr, Chain.getValue(1) }; - - return DAG.getNode(MipsISD::DynAlloc, dl, VTLs, Ops, 3); -} - -SDValue MipsTargetLowering:: LowerBRCOND(SDValue Op, SelectionDAG &DAG) const { // The first operand is the chain, the second is the condition, the third is @@ -2455,9 +2416,9 @@ static unsigned getNextIntArgReg(unsigned Reg) { // Write ByVal Arg to arg registers and stack. static void -WriteByValArg(SDValue& ByValChain, SDValue Chain, DebugLoc dl, +WriteByValArg(SDValue Chain, DebugLoc dl, SmallVector<std::pair<unsigned, SDValue>, 16> &RegsToPass, - SmallVector<SDValue, 8> &MemOpChains, int &LastFI, + SmallVector<SDValue, 8> &MemOpChains, SDValue StackPtr, MachineFrameInfo *MFI, SelectionDAG &DAG, SDValue Arg, const CCValAssign &VA, const ISD::ArgFlagsTy &Flags, MVT PtrType, bool isLittle) { @@ -2531,24 +2492,24 @@ WriteByValArg(SDValue& ByValChain, SDValue Chain, DebugLoc dl, return; } - // Create a fixed object on stack at offset LocMemOffset and copy - // remaining part of byval arg to it using memcpy. + // Copy remaining part of byval arg using memcpy. SDValue Src = DAG.getNode(ISD::ADD, dl, MVT::i32, Arg, DAG.getConstant(Offset, MVT::i32)); - LastFI = MFI->CreateFixedObject(RemainingSize, LocMemOffset, true); - SDValue Dst = DAG.getFrameIndex(LastFI, PtrType); - ByValChain = DAG.getMemcpy(ByValChain, dl, Dst, Src, - DAG.getConstant(RemainingSize, MVT::i32), - std::min(ByValAlign, (unsigned)4), - /*isVolatile=*/false, /*AlwaysInline=*/false, - MachinePointerInfo(0), MachinePointerInfo(0)); + SDValue Dst = DAG.getNode(ISD::ADD, dl, MVT::i32, StackPtr, + DAG.getIntPtrConstant(LocMemOffset)); + Chain = DAG.getMemcpy(Chain, dl, Dst, Src, + DAG.getConstant(RemainingSize, MVT::i32), + std::min(ByValAlign, (unsigned)4), + /*isVolatile=*/false, /*AlwaysInline=*/false, + MachinePointerInfo(0), MachinePointerInfo(0)); + MemOpChains.push_back(Chain); } // Copy Mips64 byVal arg to registers and stack. void static -PassByValArg64(SDValue& ByValChain, SDValue Chain, DebugLoc dl, +PassByValArg64(SDValue Chain, DebugLoc dl, SmallVector<std::pair<unsigned, SDValue>, 16> &RegsToPass, - SmallVector<SDValue, 8> &MemOpChains, int &LastFI, + SmallVector<SDValue, 8> &MemOpChains, SDValue StackPtr, MachineFrameInfo *MFI, SelectionDAG &DAG, SDValue Arg, const CCValAssign &VA, const ISD::ArgFlagsTy &Flags, EVT PtrTy, bool isLittle) { @@ -2620,16 +2581,16 @@ PassByValArg64(SDValue& ByValChain, SDValue Chain, DebugLoc dl, assert(MemCpySize && "MemCpySize must not be zero."); - // Create a fixed object on stack at offset LocMemOffset and copy - // remainder of byval arg to it with memcpy. + // Copy remainder of byval arg to it with memcpy. SDValue Src = DAG.getNode(ISD::ADD, dl, PtrTy, Arg, DAG.getConstant(Offset, PtrTy)); - LastFI = MFI->CreateFixedObject(MemCpySize, LocMemOffset, true); - SDValue Dst = DAG.getFrameIndex(LastFI, PtrTy); - ByValChain = DAG.getMemcpy(ByValChain, dl, Dst, Src, - DAG.getConstant(MemCpySize, PtrTy), Alignment, - /*isVolatile=*/false, /*AlwaysInline=*/false, - MachinePointerInfo(0), MachinePointerInfo(0)); + SDValue Dst = DAG.getNode(ISD::ADD, dl, MVT::i64, StackPtr, + DAG.getIntPtrConstant(LocMemOffset)); + Chain = DAG.getMemcpy(Chain, dl, Dst, Src, + DAG.getConstant(MemCpySize, PtrTy), Alignment, + /*isVolatile=*/false, /*AlwaysInline=*/false, + MachinePointerInfo(0), MachinePointerInfo(0)); + MemOpChains.push_back(Chain); } /// LowerCall - functions arguments are copied from virtual regs to @@ -2643,9 +2604,8 @@ MipsTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, SmallVector<ISD::OutputArg, 32> &Outs = CLI.Outs; SmallVector<SDValue, 32> &OutVals = CLI.OutVals; SmallVector<ISD::InputArg, 32> &Ins = CLI.Ins; - SDValue InChain = CLI.Chain; + SDValue Chain = CLI.Chain; SDValue Callee = CLI.Callee; - SDValue CalleeSave = CLI.Callee; bool &isTailCall = CLI.IsTailCall; CallingConv::ID CallConv = CLI.CallConv; bool isVarArg = CLI.IsVarArg; @@ -2675,18 +2635,8 @@ MipsTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, // Get a count of how many bytes are to be pushed on the stack. unsigned NextStackOffset = CCInfo.getNextStackOffset(); - - // Chain is the output chain of the last Load/Store or CopyToReg node. - // ByValChain is the output chain of the last Memcpy node created for copying - // byval arguments to the stack. - SDValue Chain, CallSeqStart, ByValChain; - SDValue NextStackOffsetVal = DAG.getIntPtrConstant(NextStackOffset, true); - Chain = CallSeqStart = DAG.getCALLSEQ_START(InChain, NextStackOffsetVal); - ByValChain = InChain; - - // Get the frame index of the stack frame object that points to the location - // of dynamically allocated area on the stack. - int DynAllocFI = MipsFI->getDynAllocFI(); + unsigned StackAlignment = TFL->getStackAlignment(); + NextStackOffset = RoundUpToAlignment(NextStackOffset, StackAlignment); // Update size of the maximum argument space. // For O32, a minimum of four words (16 bytes) of argument space is @@ -2694,27 +2644,23 @@ MipsTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, if (IsO32 && (CallConv != CallingConv::Fast)) NextStackOffset = std::max(NextStackOffset, (unsigned)16); - unsigned MaxCallFrameSize = MipsFI->getMaxCallFrameSize(); - - if (MaxCallFrameSize < NextStackOffset) { - MipsFI->setMaxCallFrameSize(NextStackOffset); + // Chain is the output chain of the last Load/Store or CopyToReg node. + // ByValChain is the output chain of the last Memcpy node created for copying + // byval arguments to the stack. + SDValue NextStackOffsetVal = DAG.getIntPtrConstant(NextStackOffset, true); + Chain = DAG.getCALLSEQ_START(Chain, NextStackOffsetVal); - // Set the offsets relative to $sp of the $gp restore slot and dynamically - // allocated stack space. These offsets must be aligned to a boundary - // determined by the stack alignment of the ABI. - unsigned StackAlignment = TFL->getStackAlignment(); - NextStackOffset = (NextStackOffset + StackAlignment - 1) / - StackAlignment * StackAlignment; + SDValue StackPtr = DAG.getCopyFromReg(Chain, dl, + IsN64 ? Mips::SP_64 : Mips::SP, + getPointerTy()); - MFI->setObjectOffset(DynAllocFI, NextStackOffset); - } + if (MipsFI->getMaxCallFrameSize() < NextStackOffset) + MipsFI->setMaxCallFrameSize(NextStackOffset); // With EABI is it possible to have 16 args on registers. SmallVector<std::pair<unsigned, SDValue>, 16> RegsToPass; SmallVector<SDValue, 8> MemOpChains; - int FirstFI = -MFI->getNumFixedObjects() - 1, LastFI = 0; - // Walk the register/memloc assignments, inserting copies/loads. for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { SDValue Arg = OutVals[i]; @@ -2727,11 +2673,11 @@ MipsTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, assert(Flags.getByValSize() && "ByVal args of size 0 should have been ignored by front-end."); if (IsO32) - WriteByValArg(ByValChain, Chain, dl, RegsToPass, MemOpChains, LastFI, + WriteByValArg(Chain, dl, RegsToPass, MemOpChains, StackPtr, MFI, DAG, Arg, VA, Flags, getPointerTy(), Subtarget->isLittle()); else - PassByValArg64(ByValChain, Chain, dl, RegsToPass, MemOpChains, LastFI, + PassByValArg64(Chain, dl, RegsToPass, MemOpChains, StackPtr, MFI, DAG, Arg, VA, Flags, getPointerTy(), Subtarget->isLittle()); continue; @@ -2781,29 +2727,14 @@ MipsTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, // Register can't get to this point... assert(VA.isMemLoc()); - // Create the frame index object for this incoming parameter - LastFI = MFI->CreateFixedObject(ValVT.getSizeInBits()/8, - VA.getLocMemOffset(), true); - SDValue PtrOff = DAG.getFrameIndex(LastFI, getPointerTy()); - // emit ISD::STORE whichs stores the // parameter value to a stack Location + SDValue PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(), StackPtr, + DAG.getIntPtrConstant(VA.getLocMemOffset())); MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo(), false, false, 0)); } - // Extend range of indices of frame objects for outgoing arguments that were - // created during this function call. Skip this step if no such objects were - // created. - if (LastFI) - MipsFI->extendOutArgFIRange(FirstFI, LastFI); - - // If a memcpy has been created to copy a byval arg to a stack, replace the - // chain input of CallSeqStart with ByValChain. - if (InChain != ByValChain) - DAG.UpdateNodeOperands(CallSeqStart.getNode(), ByValChain, - NextStackOffsetVal); - // Transform all store nodes into one single node because all store // nodes are independent of each other. if (!MemOpChains.empty()) @@ -2867,6 +2798,9 @@ MipsTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, } } + // T9 register operand. + SDValue T9; + // T9 should contain the address of the callee function if // -reloction-model=pic or it is an indirect call. if (IsPICCall || !GlobalOrExternal) { @@ -2874,7 +2808,11 @@ MipsTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, unsigned T9Reg = IsN64 ? Mips::T9_64 : Mips::T9; Chain = DAG.getCopyToReg(Chain, dl, T9Reg, Callee, SDValue(0, 0)); InFlag = Chain.getValue(1); - Callee = DAG.getRegister(T9Reg, getPointerTy()); + + if (Subtarget->inMips16Mode()) + T9 = DAG.getRegister(T9Reg, getPointerTy()); + else + Callee = DAG.getRegister(T9Reg, getPointerTy()); } // Insert node "GP copy globalreg" before call to function. @@ -2902,7 +2840,7 @@ MipsTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); SmallVector<SDValue, 8> Ops; Ops.push_back(Chain); - Ops.push_back(Subtarget->inMips16Mode()? CalleeSave: Callee); + Ops.push_back(Callee); // Add argument registers to the end of the list so that they are // known live into the call. @@ -2910,8 +2848,10 @@ MipsTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, Ops.push_back(DAG.getRegister(RegsToPass[i].first, RegsToPass[i].second.getValueType())); - if (Subtarget->inMips16Mode()) - Ops.push_back(Callee); + // Add T9 register operand. + if (T9.getNode()) + Ops.push_back(T9); + // Add a register mask operand representing the call-preserved registers. const TargetRegisterInfo *TRI = getTargetMachine().getRegisterInfo(); const uint32_t *Mask = TRI->getCallPreservedMask(CallConv); @@ -2925,8 +2865,7 @@ MipsTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, InFlag = Chain.getValue(1); // Create the CALLSEQ_END node. - Chain = DAG.getCALLSEQ_END(Chain, - DAG.getIntPtrConstant(NextStackOffset, true), + Chain = DAG.getCALLSEQ_END(Chain, NextStackOffsetVal, DAG.getIntPtrConstant(0, true), InFlag); InFlag = Chain.getValue(1); diff --git a/lib/Target/Mips/MipsISelLowering.h b/lib/Target/Mips/MipsISelLowering.h index edab03c..95ea8fa 100644 --- a/lib/Target/Mips/MipsISelLowering.h +++ b/lib/Target/Mips/MipsISelLowering.h @@ -132,7 +132,6 @@ namespace llvm { // Lower Operand specifics SDValue LowerBRCOND(SDValue Op, SelectionDAG &DAG) const; SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const; SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const; SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const; SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const; diff --git a/lib/Target/Mips/MipsInstrFPU.td b/lib/Target/Mips/MipsInstrFPU.td index 9654b86..df45df4 100644 --- a/lib/Target/Mips/MipsInstrFPU.td +++ b/lib/Target/Mips/MipsInstrFPU.td @@ -101,18 +101,18 @@ class FPStore<bits<6> op, string opstr, RegisterClass RC, Operand MemOpnd>: } // FP indexed load. class FPIdxLoad<bits<6> funct, string opstr, RegisterClass DRC, - RegisterClass PRC, PatFrag FOp>: + RegisterClass PRC, SDPatternOperator FOp = null_frag>: FFMemIdx<funct, (outs DRC:$fd), (ins PRC:$base, PRC:$index), - !strconcat(opstr, "\t$fd, $index($base)"), + !strconcat(opstr, "\t$fd, ${index}(${base})"), [(set DRC:$fd, (FOp (add PRC:$base, PRC:$index)))]> { let fs = 0; } // FP indexed store. class FPIdxStore<bits<6> funct, string opstr, RegisterClass DRC, - RegisterClass PRC, PatFrag FOp>: + RegisterClass PRC, SDPatternOperator FOp= null_frag>: FFMemIdx<funct, (outs), (ins DRC:$fs, PRC:$base, PRC:$index), - !strconcat(opstr, "\t$fs, $index($base)"), + !strconcat(opstr, "\t$fs, ${index}(${base})"), [(FOp DRC:$fs, (add PRC:$base, PRC:$index))]> { let fd = 0; } @@ -270,7 +270,7 @@ let Predicates = [NotN64, HasStandardEncoding] in { } let Predicates = [NotN64, HasMips64, HasStandardEncoding], - DecoderNamespace = "Mips64" in { + DecoderNamespace = "Mips64" in { def LDC164 : FPLoad<0x35, "ldc1", FGR64, mem>; def SDC164 : FPStore<0x3d, "sdc1", FGR64, mem>; } @@ -283,9 +283,7 @@ let Predicates = [NotN64, NotMips64, HasStandardEncoding] in { // Indexed loads and stores. let Predicates = [HasMips32r2Or64, HasStandardEncoding] in { def LWXC1 : FPIdxLoad<0x0, "lwxc1", FGR32, CPURegs, load_a>; - def LUXC1 : FPIdxLoad<0x5, "luxc1", FGR32, CPURegs, load_u>; def SWXC1 : FPIdxStore<0x8, "swxc1", FGR32, CPURegs, store_a>; - def SUXC1 : FPIdxStore<0xd, "suxc1", FGR32, CPURegs, store_u>; } let Predicates = [HasMips32r2, NotMips64, HasStandardEncoding] in { @@ -301,13 +299,23 @@ let Predicates = [HasMips64, NotN64, HasStandardEncoding], DecoderNamespace="Mip // n64 let Predicates = [IsN64, HasStandardEncoding], isCodeGenOnly=1 in { def LWXC1_P8 : FPIdxLoad<0x0, "lwxc1", FGR32, CPU64Regs, load_a>; - def LUXC1_P8 : FPIdxLoad<0x5, "luxc1", FGR32, CPU64Regs, load_u>; def LDXC164_P8 : FPIdxLoad<0x1, "ldxc1", FGR64, CPU64Regs, load_a>; def SWXC1_P8 : FPIdxStore<0x8, "swxc1", FGR32, CPU64Regs, store_a>; - def SUXC1_P8 : FPIdxStore<0xd, "suxc1", FGR32, CPU64Regs, store_u>; def SDXC164_P8 : FPIdxStore<0x9, "sdxc1", FGR64, CPU64Regs, store_a>; } +// Load/store doubleword indexed unaligned. +let Predicates = [NotMips64, HasStandardEncoding] in { + def LUXC1 : FPIdxLoad<0x5, "luxc1", AFGR64, CPURegs>; + def SUXC1 : FPIdxStore<0xd, "suxc1", AFGR64, CPURegs>; +} + +let Predicates = [HasMips64, HasStandardEncoding], + DecoderNamespace="Mips64" in { + def LUXC164 : FPIdxLoad<0x5, "luxc1", FGR64, CPURegs>; + def SUXC164 : FPIdxStore<0xd, "suxc1", FGR64, CPURegs>; +} + /// Floating-point Aritmetic defm FADD : FFR2P_M<0x00, "add", fadd, 1>; defm FDIV : FFR2P_M<0x03, "div", fdiv>; @@ -408,25 +416,23 @@ let Defs=[FCR31] in { //===----------------------------------------------------------------------===// // Floating Point Pseudo-Instructions //===----------------------------------------------------------------------===// -def MOVCCRToCCR : MipsPseudo<(outs CCR:$dst), (ins CCR:$src), - "# MOVCCRToCCR", []>; +def MOVCCRToCCR : PseudoSE<(outs CCR:$dst), (ins CCR:$src), + "# MOVCCRToCCR", []>; // This pseudo instr gets expanded into 2 mtc1 instrs after register // allocation. def BuildPairF64 : - MipsPseudo<(outs AFGR64:$dst), - (ins CPURegs:$lo, CPURegs:$hi), "", - [(set AFGR64:$dst, (MipsBuildPairF64 CPURegs:$lo, CPURegs:$hi))]>; + PseudoSE<(outs AFGR64:$dst), + (ins CPURegs:$lo, CPURegs:$hi), "", + [(set AFGR64:$dst, (MipsBuildPairF64 CPURegs:$lo, CPURegs:$hi))]>; // This pseudo instr gets expanded into 2 mfc1 instrs after register // allocation. // if n is 0, lower part of src is extracted. // if n is 1, higher part of src is extracted. def ExtractElementF64 : - MipsPseudo<(outs CPURegs:$dst), - (ins AFGR64:$src, i32imm:$n), "", - [(set CPURegs:$dst, - (MipsExtractElementF64 AFGR64:$src, imm:$n))]>; + PseudoSE<(outs CPURegs:$dst), (ins AFGR64:$src, i32imm:$n), "", + [(set CPURegs:$dst, (MipsExtractElementF64 AFGR64:$src, imm:$n))]>; //===----------------------------------------------------------------------===// // Floating Point Patterns @@ -466,17 +472,3 @@ let Predicates = [IsFP64bit, HasStandardEncoding] in { def : MipsPat<(f32 (fround FGR64:$src)), (CVT_S_D64 FGR64:$src)>; def : MipsPat<(f64 (fextend FGR32:$src)), (CVT_D64_S FGR32:$src)>; } - -// Patterns for unaligned floating point loads and stores. -let Predicates = [HasMips32r2Or64, NotN64, HasStandardEncoding] in { - def : MipsPat<(f32 (load_u CPURegs:$addr)), (LUXC1 CPURegs:$addr, ZERO)>; - def : MipsPat<(store_u FGR32:$src, CPURegs:$addr), - (SUXC1 FGR32:$src, CPURegs:$addr, ZERO)>; -} - -let Predicates = [IsN64, HasStandardEncoding] in { - def : MipsPat<(f32 (load_u CPU64Regs:$addr)), - (LUXC1_P8 CPU64Regs:$addr, ZERO_64)>; - def : MipsPat<(store_u FGR32:$src, CPU64Regs:$addr), - (SUXC1_P8 FGR32:$src, CPU64Regs:$addr, ZERO_64)>; -} diff --git a/lib/Target/Mips/MipsInstrFormats.td b/lib/Target/Mips/MipsInstrFormats.td index 15a77fb..8feb853 100644 --- a/lib/Target/Mips/MipsInstrFormats.td +++ b/lib/Target/Mips/MipsInstrFormats.td @@ -70,25 +70,35 @@ class MipsInst<dag outs, dag ins, string asmstr, list<dag> pattern, let DecoderNamespace = "Mips"; field bits<32> SoftFail = 0; +} +// Mips32/64 Instruction Format +class InstSE<dag outs, dag ins, string asmstr, list<dag> pattern, + InstrItinClass itin, Format f>: + MipsInst<outs, ins, asmstr, pattern, itin, f> { let Predicates = [HasStandardEncoding]; - } // Mips Pseudo Instructions Format class MipsPseudo<dag outs, dag ins, string asmstr, list<dag> pattern>: - MipsInst<outs, ins, asmstr, pattern, IIPseudo, Pseudo> { + MipsInst<outs, ins, asmstr, pattern, IIPseudo, Pseudo> { let isCodeGenOnly = 1; let isPseudo = 1; } +// Mips32/64 Pseudo Instruction Format +class PseudoSE<dag outs, dag ins, string asmstr, list<dag> pattern>: + MipsPseudo<outs, ins, asmstr, pattern> { + let Predicates = [HasStandardEncoding]; +} + //===----------------------------------------------------------------------===// // Format R instruction class in Mips : <|opcode|rs|rt|rd|shamt|funct|> //===----------------------------------------------------------------------===// class FR<bits<6> op, bits<6> _funct, dag outs, dag ins, string asmstr, list<dag> pattern, InstrItinClass itin>: - MipsInst<outs, ins, asmstr, pattern, itin, FrmR> + InstSE<outs, ins, asmstr, pattern, itin, FrmR> { bits<5> rd; bits<5> rs; @@ -111,7 +121,7 @@ class FR<bits<6> op, bits<6> _funct, dag outs, dag ins, string asmstr, //===----------------------------------------------------------------------===// class FI<bits<6> op, dag outs, dag ins, string asmstr, list<dag> pattern, - InstrItinClass itin>: MipsInst<outs, ins, asmstr, pattern, itin, FrmI> + InstrItinClass itin>: InstSE<outs, ins, asmstr, pattern, itin, FrmI> { bits<5> rt; bits<5> rs; @@ -126,7 +136,7 @@ class FI<bits<6> op, dag outs, dag ins, string asmstr, list<dag> pattern, class BranchBase<bits<6> op, dag outs, dag ins, string asmstr, list<dag> pattern, InstrItinClass itin>: - MipsInst<outs, ins, asmstr, pattern, itin, FrmI> + InstSE<outs, ins, asmstr, pattern, itin, FrmI> { bits<5> rs; bits<5> rt; @@ -144,7 +154,7 @@ class BranchBase<bits<6> op, dag outs, dag ins, string asmstr, //===----------------------------------------------------------------------===// class FJ<bits<6> op, dag outs, dag ins, string asmstr, list<dag> pattern, - InstrItinClass itin>: MipsInst<outs, ins, asmstr, pattern, itin, FrmJ> + InstrItinClass itin>: InstSE<outs, ins, asmstr, pattern, itin, FrmJ> { bits<26> addr; @@ -172,7 +182,7 @@ class FJ<bits<6> op, dag outs, dag ins, string asmstr, list<dag> pattern, class FFR<bits<6> op, bits<6> _funct, bits<5> _fmt, dag outs, dag ins, string asmstr, list<dag> pattern> : - MipsInst<outs, ins, asmstr, pattern, NoItinerary, FrmFR> + InstSE<outs, ins, asmstr, pattern, NoItinerary, FrmFR> { bits<5> fd; bits<5> fs; @@ -196,7 +206,7 @@ class FFR<bits<6> op, bits<6> _funct, bits<5> _fmt, dag outs, dag ins, //===----------------------------------------------------------------------===// class FFI<bits<6> op, dag outs, dag ins, string asmstr, list<dag> pattern>: - MipsInst<outs, ins, asmstr, pattern, NoItinerary, FrmFI> + InstSE<outs, ins, asmstr, pattern, NoItinerary, FrmFI> { bits<5> ft; bits<5> base; @@ -214,7 +224,7 @@ class FFI<bits<6> op, dag outs, dag ins, string asmstr, list<dag> pattern>: //===----------------------------------------------------------------------===// class FCC<bits<5> _fmt, dag outs, dag ins, string asmstr, list<dag> pattern> : - MipsInst<outs, ins, asmstr, pattern, NoItinerary, FrmOther> + InstSE<outs, ins, asmstr, pattern, NoItinerary, FrmOther> { bits<5> fs; bits<5> ft; @@ -235,7 +245,7 @@ class FCC<bits<5> _fmt, dag outs, dag ins, string asmstr, list<dag> pattern> : class FCMOV<bits<1> _tf, dag outs, dag ins, string asmstr, list<dag> pattern> : - MipsInst<outs, ins, asmstr, pattern, NoItinerary, FrmOther> + InstSE<outs, ins, asmstr, pattern, NoItinerary, FrmOther> { bits<5> rd; bits<5> rs; @@ -256,7 +266,7 @@ class FCMOV<bits<1> _tf, dag outs, dag ins, string asmstr, class FFCMOV<bits<5> _fmt, bits<1> _tf, dag outs, dag ins, string asmstr, list<dag> pattern> : - MipsInst<outs, ins, asmstr, pattern, NoItinerary, FrmOther> + InstSE<outs, ins, asmstr, pattern, NoItinerary, FrmOther> { bits<5> fd; bits<5> fs; @@ -303,7 +313,7 @@ class FFR2P<bits<6> funct, bits<5> fmt, string opstr, // Floating point madd/msub/nmadd/nmsub. class FFMADDSUB<bits<3> funct, bits<3> fmt, dag outs, dag ins, string asmstr, list<dag> pattern> - : MipsInst<outs, ins, asmstr, pattern, NoItinerary, FrmOther> { + : InstSE<outs, ins, asmstr, pattern, NoItinerary, FrmOther> { bits<5> fd; bits<5> fr; bits<5> fs; @@ -321,7 +331,7 @@ class FFMADDSUB<bits<3> funct, bits<3> fmt, dag outs, dag ins, string asmstr, // FP indexed load/store instructions. class FFMemIdx<bits<6> funct, dag outs, dag ins, string asmstr, list<dag> pattern> : - MipsInst<outs, ins, asmstr, pattern, NoItinerary, FrmOther> + InstSE<outs, ins, asmstr, pattern, NoItinerary, FrmOther> { bits<5> base; bits<5> index; diff --git a/lib/Target/Mips/MipsInstrInfo.cpp b/lib/Target/Mips/MipsInstrInfo.cpp index 458e4f7..50e3eb5 100644 --- a/lib/Target/Mips/MipsInstrInfo.cpp +++ b/lib/Target/Mips/MipsInstrInfo.cpp @@ -27,68 +27,19 @@ using namespace llvm; -MipsInstrInfo::MipsInstrInfo(MipsTargetMachine &tm) +MipsInstrInfo::MipsInstrInfo(MipsTargetMachine &tm, unsigned UncondBr) : MipsGenInstrInfo(Mips::ADJCALLSTACKDOWN, Mips::ADJCALLSTACKUP), - TM(tm), IsN64(TM.getSubtarget<MipsSubtarget>().isABI_N64()), - InMips16Mode(TM.getSubtarget<MipsSubtarget>().inMips16Mode()), - RI(*TM.getSubtargetImpl(), *this), - UncondBrOpc(TM.getRelocationModel() == Reloc::PIC_ ? Mips::B : Mips::J) {} + TM(tm), UncondBrOpc(UncondBr) {} -const MipsRegisterInfo &MipsInstrInfo::getRegisterInfo() const { - return RI; -} +const MipsInstrInfo *MipsInstrInfo::create(MipsTargetMachine &TM) { + if (TM.getSubtargetImpl()->inMips16Mode()) + return llvm::createMips16InstrInfo(TM); -static bool isZeroImm(const MachineOperand &op) { - return op.isImm() && op.getImm() == 0; + return llvm::createMipsSEInstrInfo(TM); } -/// isLoadFromStackSlot - If the specified machine instruction is a direct -/// load from a stack slot, return the virtual or physical register number of -/// the destination along with the FrameIndex of the loaded stack slot. If -/// not, return 0. This predicate must return 0 if the instruction has -/// any side effects other than loading from the stack slot. -unsigned MipsInstrInfo:: -isLoadFromStackSlot(const MachineInstr *MI, int &FrameIndex) const -{ - unsigned Opc = MI->getOpcode(); - - if ((Opc == Mips::LW) || (Opc == Mips::LW_P8) || (Opc == Mips::LD) || - (Opc == Mips::LD_P8) || (Opc == Mips::LWC1) || (Opc == Mips::LWC1_P8) || - (Opc == Mips::LDC1) || (Opc == Mips::LDC164) || - (Opc == Mips::LDC164_P8)) { - if ((MI->getOperand(1).isFI()) && // is a stack slot - (MI->getOperand(2).isImm()) && // the imm is zero - (isZeroImm(MI->getOperand(2)))) { - FrameIndex = MI->getOperand(1).getIndex(); - return MI->getOperand(0).getReg(); - } - } - - return 0; -} - -/// isStoreToStackSlot - If the specified machine instruction is a direct -/// store to a stack slot, return the virtual or physical register number of -/// the source reg along with the FrameIndex of the loaded stack slot. If -/// not, return 0. This predicate must return 0 if the instruction has -/// any side effects other than storing to the stack slot. -unsigned MipsInstrInfo:: -isStoreToStackSlot(const MachineInstr *MI, int &FrameIndex) const -{ - unsigned Opc = MI->getOpcode(); - - if ((Opc == Mips::SW) || (Opc == Mips::SW_P8) || (Opc == Mips::SD) || - (Opc == Mips::SD_P8) || (Opc == Mips::SWC1) || (Opc == Mips::SWC1_P8) || - (Opc == Mips::SDC1) || (Opc == Mips::SDC164) || - (Opc == Mips::SDC164_P8)) { - if ((MI->getOperand(1).isFI()) && // is a stack slot - (MI->getOperand(2).isImm()) && // the imm is zero - (isZeroImm(MI->getOperand(2)))) { - FrameIndex = MI->getOperand(1).getIndex(); - return MI->getOperand(0).getReg(); - } - } - return 0; +bool MipsInstrInfo::isZeroImm(const MachineOperand &op) const { + return op.isImm() && op.getImm() == 0; } /// insertNoop - If data hazard condition is found insert the target nop @@ -100,83 +51,8 @@ insertNoop(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI) const BuildMI(MBB, MI, DL, get(Mips::NOP)); } -void MipsInstrInfo:: -copyPhysReg(MachineBasicBlock &MBB, - MachineBasicBlock::iterator I, DebugLoc DL, - unsigned DestReg, unsigned SrcReg, - bool KillSrc) const { - unsigned Opc = 0, ZeroReg = 0; - - if (Mips::CPURegsRegClass.contains(DestReg)) { // Copy to CPU Reg. - if (Mips::CPURegsRegClass.contains(SrcReg)) { - if (InMips16Mode) - Opc=Mips::Mov32R16; - else { - Opc = Mips::ADDu, ZeroReg = Mips::ZERO; - } - } - else if (Mips::CCRRegClass.contains(SrcReg)) - Opc = Mips::CFC1; - else if (Mips::FGR32RegClass.contains(SrcReg)) - Opc = Mips::MFC1; - else if (SrcReg == Mips::HI) - Opc = Mips::MFHI, SrcReg = 0; - else if (SrcReg == Mips::LO) - Opc = Mips::MFLO, SrcReg = 0; - } - else if (Mips::CPURegsRegClass.contains(SrcReg)) { // Copy from CPU Reg. - if (Mips::CCRRegClass.contains(DestReg)) - Opc = Mips::CTC1; - else if (Mips::FGR32RegClass.contains(DestReg)) - Opc = Mips::MTC1; - else if (DestReg == Mips::HI) - Opc = Mips::MTHI, DestReg = 0; - else if (DestReg == Mips::LO) - Opc = Mips::MTLO, DestReg = 0; - } - else if (Mips::FGR32RegClass.contains(DestReg, SrcReg)) - Opc = Mips::FMOV_S; - else if (Mips::AFGR64RegClass.contains(DestReg, SrcReg)) - Opc = Mips::FMOV_D32; - else if (Mips::FGR64RegClass.contains(DestReg, SrcReg)) - Opc = Mips::FMOV_D64; - else if (Mips::CCRRegClass.contains(DestReg, SrcReg)) - Opc = Mips::MOVCCRToCCR; - else if (Mips::CPU64RegsRegClass.contains(DestReg)) { // Copy to CPU64 Reg. - if (Mips::CPU64RegsRegClass.contains(SrcReg)) - Opc = Mips::DADDu, ZeroReg = Mips::ZERO_64; - else if (SrcReg == Mips::HI64) - Opc = Mips::MFHI64, SrcReg = 0; - else if (SrcReg == Mips::LO64) - Opc = Mips::MFLO64, SrcReg = 0; - else if (Mips::FGR64RegClass.contains(SrcReg)) - Opc = Mips::DMFC1; - } - else if (Mips::CPU64RegsRegClass.contains(SrcReg)) { // Copy from CPU64 Reg. - if (DestReg == Mips::HI64) - Opc = Mips::MTHI64, DestReg = 0; - else if (DestReg == Mips::LO64) - Opc = Mips::MTLO64, DestReg = 0; - else if (Mips::FGR64RegClass.contains(DestReg)) - Opc = Mips::DMTC1; - } - - assert(Opc && "Cannot copy registers"); - - MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(Opc)); - - if (DestReg) - MIB.addReg(DestReg, RegState::Define); - - if (ZeroReg) - MIB.addReg(ZeroReg); - - if (SrcReg) - MIB.addReg(SrcReg, getKillRegState(KillSrc)); -} - -static MachineMemOperand* GetMemOperand(MachineBasicBlock &MBB, int FI, - unsigned Flag) { +MachineMemOperand *MipsInstrInfo::GetMemOperand(MachineBasicBlock &MBB, int FI, + unsigned Flag) const { MachineFunction &MF = *MBB.getParent(); MachineFrameInfo &MFI = *MF.getFrameInfo(); unsigned Align = MFI.getObjectAlignment(FI); @@ -185,130 +61,6 @@ static MachineMemOperand* GetMemOperand(MachineBasicBlock &MBB, int FI, MFI.getObjectSize(FI), Align); } -void MipsInstrInfo:: -storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, - unsigned SrcReg, bool isKill, int FI, - const TargetRegisterClass *RC, - const TargetRegisterInfo *TRI) const { - DebugLoc DL; - if (I != MBB.end()) DL = I->getDebugLoc(); - MachineMemOperand *MMO = GetMemOperand(MBB, FI, MachineMemOperand::MOStore); - - unsigned Opc = 0; - - if (Mips::CPURegsRegClass.hasSubClassEq(RC)) - Opc = IsN64 ? Mips::SW_P8 : Mips::SW; - else if (Mips::CPU64RegsRegClass.hasSubClassEq(RC)) - Opc = IsN64 ? Mips::SD_P8 : Mips::SD; - else if (Mips::FGR32RegClass.hasSubClassEq(RC)) - Opc = IsN64 ? Mips::SWC1_P8 : Mips::SWC1; - else if (Mips::AFGR64RegClass.hasSubClassEq(RC)) - Opc = Mips::SDC1; - else if (Mips::FGR64RegClass.hasSubClassEq(RC)) - Opc = IsN64 ? Mips::SDC164_P8 : Mips::SDC164; - - assert(Opc && "Register class not handled!"); - BuildMI(MBB, I, DL, get(Opc)).addReg(SrcReg, getKillRegState(isKill)) - .addFrameIndex(FI).addImm(0).addMemOperand(MMO); -} - -void MipsInstrInfo:: -loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, - unsigned DestReg, int FI, - const TargetRegisterClass *RC, - const TargetRegisterInfo *TRI) const -{ - DebugLoc DL; - if (I != MBB.end()) DL = I->getDebugLoc(); - MachineMemOperand *MMO = GetMemOperand(MBB, FI, MachineMemOperand::MOLoad); - unsigned Opc = 0; - - if (Mips::CPURegsRegClass.hasSubClassEq(RC)) - Opc = IsN64 ? Mips::LW_P8 : Mips::LW; - else if (Mips::CPU64RegsRegClass.hasSubClassEq(RC)) - Opc = IsN64 ? Mips::LD_P8 : Mips::LD; - else if (Mips::FGR32RegClass.hasSubClassEq(RC)) - Opc = IsN64 ? Mips::LWC1_P8 : Mips::LWC1; - else if (Mips::AFGR64RegClass.hasSubClassEq(RC)) - Opc = Mips::LDC1; - else if (Mips::FGR64RegClass.hasSubClassEq(RC)) - Opc = IsN64 ? Mips::LDC164_P8 : Mips::LDC164; - - assert(Opc && "Register class not handled!"); - BuildMI(MBB, I, DL, get(Opc), DestReg).addFrameIndex(FI).addImm(0) - .addMemOperand(MMO); -} - -void MipsInstrInfo::ExpandRetRA(MachineBasicBlock &MBB, - MachineBasicBlock::iterator I, - unsigned Opc) const { - BuildMI(MBB, I, I->getDebugLoc(), TM.getInstrInfo()->get(Opc)) - .addReg(Mips::RA); -} - -void MipsInstrInfo::ExpandRetRA16(MachineBasicBlock &MBB, - MachineBasicBlock::iterator I, - unsigned Opc) const { - BuildMI(MBB, I, I->getDebugLoc(), TM.getInstrInfo()->get(Opc)); -} - -void MipsInstrInfo::ExpandExtractElementF64(MachineBasicBlock &MBB, - MachineBasicBlock::iterator I) const { - const TargetInstrInfo *TII = TM.getInstrInfo(); - unsigned DstReg = I->getOperand(0).getReg(); - unsigned SrcReg = I->getOperand(1).getReg(); - unsigned N = I->getOperand(2).getImm(); - const MCInstrDesc& Mfc1Tdd = TII->get(Mips::MFC1); - DebugLoc dl = I->getDebugLoc(); - - assert(N < 2 && "Invalid immediate"); - unsigned SubIdx = N ? Mips::sub_fpodd : Mips::sub_fpeven; - unsigned SubReg = TM.getRegisterInfo()->getSubReg(SrcReg, SubIdx); - - BuildMI(MBB, I, dl, Mfc1Tdd, DstReg).addReg(SubReg); -} - -void MipsInstrInfo::ExpandBuildPairF64(MachineBasicBlock &MBB, - MachineBasicBlock::iterator I) const { - const TargetInstrInfo *TII = TM.getInstrInfo(); - unsigned DstReg = I->getOperand(0).getReg(); - unsigned LoReg = I->getOperand(1).getReg(), HiReg = I->getOperand(2).getReg(); - const MCInstrDesc& Mtc1Tdd = TII->get(Mips::MTC1); - DebugLoc dl = I->getDebugLoc(); - const TargetRegisterInfo *TRI = TM.getRegisterInfo(); - - // mtc1 Lo, $fp - // mtc1 Hi, $fp + 1 - BuildMI(MBB, I, dl, Mtc1Tdd, TRI->getSubReg(DstReg, Mips::sub_fpeven)) - .addReg(LoReg); - BuildMI(MBB, I, dl, Mtc1Tdd, TRI->getSubReg(DstReg, Mips::sub_fpodd)) - .addReg(HiReg); -} - -bool MipsInstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const { - MachineBasicBlock &MBB = *MI->getParent(); - - switch(MI->getDesc().getOpcode()) { - default: - return false; - case Mips::RetRA: - ExpandRetRA(MBB, MI, Mips::RET); - break; - case Mips::RetRA16: - ExpandRetRA16(MBB, MI, Mips::JrRa16); - break; - case Mips::BuildPairF64: - ExpandBuildPairF64(MBB, MI); - break; - case Mips::ExtractElementF64: - ExpandExtractElementF64(MBB, MI); - break; - } - - MBB.erase(MI); - return true; -} - MachineInstr* MipsInstrInfo::emitFrameIndexDebugValue(MachineFunction &MF, int FrameIx, uint64_t Offset, const MDNode *MDPtr, @@ -322,42 +74,9 @@ MipsInstrInfo::emitFrameIndexDebugValue(MachineFunction &MF, int FrameIx, // Branch Analysis //===----------------------------------------------------------------------===// -static unsigned GetAnalyzableBrOpc(unsigned Opc) { - return (Opc == Mips::BEQ || Opc == Mips::BNE || Opc == Mips::BGTZ || - Opc == Mips::BGEZ || Opc == Mips::BLTZ || Opc == Mips::BLEZ || - Opc == Mips::BEQ64 || Opc == Mips::BNE64 || Opc == Mips::BGTZ64 || - Opc == Mips::BGEZ64 || Opc == Mips::BLTZ64 || Opc == Mips::BLEZ64 || - Opc == Mips::BC1T || Opc == Mips::BC1F || Opc == Mips::B || - Opc == Mips::J) ? - Opc : 0; -} - -/// GetOppositeBranchOpc - Return the inverse of the specified -/// opcode, e.g. turning BEQ to BNE. -unsigned Mips::GetOppositeBranchOpc(unsigned Opc) -{ - switch (Opc) { - default: llvm_unreachable("Illegal opcode!"); - case Mips::BEQ: return Mips::BNE; - case Mips::BNE: return Mips::BEQ; - case Mips::BGTZ: return Mips::BLEZ; - case Mips::BGEZ: return Mips::BLTZ; - case Mips::BLTZ: return Mips::BGEZ; - case Mips::BLEZ: return Mips::BGTZ; - case Mips::BEQ64: return Mips::BNE64; - case Mips::BNE64: return Mips::BEQ64; - case Mips::BGTZ64: return Mips::BLEZ64; - case Mips::BGEZ64: return Mips::BLTZ64; - case Mips::BLTZ64: return Mips::BGEZ64; - case Mips::BLEZ64: return Mips::BGTZ64; - case Mips::BC1T: return Mips::BC1F; - case Mips::BC1F: return Mips::BC1T; - } -} - -static void AnalyzeCondBr(const MachineInstr *Inst, unsigned Opc, - MachineBasicBlock *&BB, - SmallVectorImpl<MachineOperand> &Cond) { +void MipsInstrInfo::AnalyzeCondBr(const MachineInstr *Inst, unsigned Opc, + MachineBasicBlock *&BB, + SmallVectorImpl<MachineOperand> &Cond) const { assert(GetAnalyzableBrOpc(Opc) && "Not an analyzable branch"); int NumOp = Inst->getNumExplicitOperands(); @@ -527,7 +246,7 @@ ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const { assert( (Cond.size() && Cond.size() <= 3) && "Invalid Mips branch condition!"); - Cond[0].setImm(Mips::GetOppositeBranchOpc(Cond[0].getImm())); + Cond[0].setImm(GetOppositeBranchOpc(Cond[0].getImm())); return false; } diff --git a/lib/Target/Mips/MipsInstrInfo.h b/lib/Target/Mips/MipsInstrInfo.h index 358f817..7d56259 100644 --- a/lib/Target/Mips/MipsInstrInfo.h +++ b/lib/Target/Mips/MipsInstrInfo.h @@ -26,99 +26,69 @@ namespace llvm { class MipsInstrInfo : public MipsGenInstrInfo { +protected: MipsTargetMachine &TM; - bool IsN64; bool InMips16Mode; - const MipsRegisterInfo RI; unsigned UncondBrOpc; + public: - explicit MipsInstrInfo(MipsTargetMachine &TM); + explicit MipsInstrInfo(MipsTargetMachine &TM, unsigned UncondBrOpc); - /// getRegisterInfo - TargetInstrInfo is a superset of MRegister info. As - /// such, whenever a client has an instance of instruction info, it should - /// always be able to get register info as well (through this method). - /// - virtual const MipsRegisterInfo &getRegisterInfo() const; - - /// isLoadFromStackSlot - If the specified machine instruction is a direct - /// load from a stack slot, return the virtual or physical register number of - /// the destination along with the FrameIndex of the loaded stack slot. If - /// not, return 0. This predicate must return 0 if the instruction has - /// any side effects other than loading from the stack slot. - virtual unsigned isLoadFromStackSlot(const MachineInstr *MI, - int &FrameIndex) const; - - /// isStoreToStackSlot - If the specified machine instruction is a direct - /// store to a stack slot, return the virtual or physical register number of - /// the source reg along with the FrameIndex of the loaded stack slot. If - /// not, return 0. This predicate must return 0 if the instruction has - /// any side effects other than storing to the stack slot. - virtual unsigned isStoreToStackSlot(const MachineInstr *MI, - int &FrameIndex) const; + static const MipsInstrInfo *create(MipsTargetMachine &TM); /// Branch Analysis virtual bool AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, SmallVectorImpl<MachineOperand> &Cond, bool AllowModify) const; - virtual unsigned RemoveBranch(MachineBasicBlock &MBB) const; - -private: - void ExpandRetRA(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, - unsigned Opc) const; - void ExpandRetRA16(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, - unsigned Opc) const; - void BuildCondBr(MachineBasicBlock &MBB, MachineBasicBlock *TBB, DebugLoc DL, - const SmallVectorImpl<MachineOperand>& Cond) const; - void ExpandExtractElementF64(MachineBasicBlock &MBB, - MachineBasicBlock::iterator I) const; - void ExpandBuildPairF64(MachineBasicBlock &MBB, - MachineBasicBlock::iterator I) const; + virtual unsigned RemoveBranch(MachineBasicBlock &MBB) const; -public: virtual unsigned InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, const SmallVectorImpl<MachineOperand> &Cond, DebugLoc DL) const; - virtual void copyPhysReg(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MI, DebugLoc DL, - unsigned DestReg, unsigned SrcReg, - bool KillSrc) const; - virtual void storeRegToStackSlot(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MBBI, - unsigned SrcReg, bool isKill, int FrameIndex, - const TargetRegisterClass *RC, - const TargetRegisterInfo *TRI) const; - - virtual void loadRegFromStackSlot(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MBBI, - unsigned DestReg, int FrameIndex, - const TargetRegisterClass *RC, - const TargetRegisterInfo *TRI) const; - - virtual bool expandPostRAPseudo(MachineBasicBlock::iterator MI) const; + + virtual + bool ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const; virtual MachineInstr* emitFrameIndexDebugValue(MachineFunction &MF, int FrameIx, uint64_t Offset, const MDNode *MDPtr, DebugLoc DL) const; - virtual - bool ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const; - /// Insert nop instruction when hazard condition is found virtual void insertNoop(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI) const; + /// getRegisterInfo - TargetInstrInfo is a superset of MRegister info. As + /// such, whenever a client has an instance of instruction info, it should + /// always be able to get register info as well (through this method). + /// + virtual const MipsRegisterInfo &getRegisterInfo() const = 0; + + virtual unsigned GetOppositeBranchOpc(unsigned Opc) const = 0; + /// Return the number of bytes of code the specified instruction may be. unsigned GetInstSizeInBytes(const MachineInstr *MI) const; + +protected: + bool isZeroImm(const MachineOperand &op) const; + + MachineMemOperand *GetMemOperand(MachineBasicBlock &MBB, int FI, + unsigned Flag) const; + +private: + virtual unsigned GetAnalyzableBrOpc(unsigned Opc) const = 0; + + void AnalyzeCondBr(const MachineInstr *Inst, unsigned Opc, + MachineBasicBlock *&BB, + SmallVectorImpl<MachineOperand> &Cond) const; + + void BuildCondBr(MachineBasicBlock &MBB, MachineBasicBlock *TBB, DebugLoc DL, + const SmallVectorImpl<MachineOperand>& Cond) const; }; namespace Mips { - /// GetOppositeBranchOpc - Return the inverse of the specified - /// opcode, e.g. turning BEQ to BNE. - unsigned GetOppositeBranchOpc(unsigned Opc); - /// Emit a series of instructions to load an immediate. All instructions /// except for the last one are emitted. The function returns the number of /// MachineInstrs generated. The opcode-immediate pair of the last @@ -130,6 +100,10 @@ namespace Mips { MipsAnalyzeImmediate::Inst *LastInst); } +/// Create MipsInstrInfo objects. +const MipsInstrInfo *createMips16InstrInfo(MipsTargetMachine &TM); +const MipsInstrInfo *createMipsSEInstrInfo(MipsTargetMachine &TM); + } #endif diff --git a/lib/Target/Mips/MipsInstrInfo.td b/lib/Target/Mips/MipsInstrInfo.td index f1aada4..fd952ef 100644 --- a/lib/Target/Mips/MipsInstrInfo.td +++ b/lib/Target/Mips/MipsInstrInfo.td @@ -208,17 +208,24 @@ def uimm16 : Operand<i32> { let PrintMethod = "printUnsignedImm"; } +def MipsMemAsmOperand : AsmOperandClass { + let Name = "Mem"; + let ParserMethod = "parseMemOperand"; +} + // Address operand def mem : Operand<i32> { let PrintMethod = "printMemOperand"; let MIOperandInfo = (ops CPURegs, simm16); let EncoderMethod = "getMemEncoding"; + let ParserMatchClass = MipsMemAsmOperand; } def mem64 : Operand<i64> { let PrintMethod = "printMemOperand"; let MIOperandInfo = (ops CPU64Regs, simm16_64); let EncoderMethod = "getMemEncoding"; + let ParserMatchClass = MipsMemAsmOperand; } def mem_ea : Operand<i32> { @@ -722,9 +729,11 @@ class MoveToLOHI<bits<6> func, string instr_asm, RegisterClass RC, let neverHasSideEffects = 1; } -class EffectiveAddress<string instr_asm, RegisterClass RC, Operand Mem> : - FMem<0x09, (outs RC:$rt), (ins Mem:$addr), - instr_asm, [(set RC:$rt, addr:$addr)], IIAlu>; +class EffectiveAddress<bits<6> opc, string instr_asm, RegisterClass RC, Operand Mem> : + FMem<opc, (outs RC:$rt), (ins Mem:$addr), + instr_asm, [(set RC:$rt, addr:$addr)], IIAlu> { + let isCodeGenOnly = 1; +} // Count Leading Ones/Zeros in Word class CountLeading0<bits<6> func, string instr_asm, RegisterClass RC>: @@ -803,9 +812,9 @@ class InsBase<bits<6> _funct, string instr_asm, RegisterClass RC>: // Atomic instructions with 2 source operands (ATOMIC_SWAP & ATOMIC_LOAD_*). class Atomic2Ops<PatFrag Op, string Opstr, RegisterClass DRC, RegisterClass PRC> : - MipsPseudo<(outs DRC:$dst), (ins PRC:$ptr, DRC:$incr), - !strconcat("atomic_", Opstr, "\t$dst, $ptr, $incr"), - [(set DRC:$dst, (Op PRC:$ptr, DRC:$incr))]>; + PseudoSE<(outs DRC:$dst), (ins PRC:$ptr, DRC:$incr), + !strconcat("atomic_", Opstr, "\t$dst, $ptr, $incr"), + [(set DRC:$dst, (Op PRC:$ptr, DRC:$incr))]>; multiclass Atomic2Ops32<PatFrag Op, string Opstr> { def #NAME# : Atomic2Ops<Op, Opstr, CPURegs, CPURegs>, @@ -819,9 +828,9 @@ multiclass Atomic2Ops32<PatFrag Op, string Opstr> { // Atomic Compare & Swap. class AtomicCmpSwap<PatFrag Op, string Width, RegisterClass DRC, RegisterClass PRC> : - MipsPseudo<(outs DRC:$dst), (ins PRC:$ptr, DRC:$cmp, DRC:$swap), - !strconcat("atomic_cmp_swap_", Width, "\t$dst, $ptr, $cmp, $swap"), - [(set DRC:$dst, (Op PRC:$ptr, DRC:$cmp, DRC:$swap))]>; + PseudoSE<(outs DRC:$dst), (ins PRC:$ptr, DRC:$cmp, DRC:$swap), + !strconcat("atomic_cmp_swap_", Width, "\t$dst, $ptr, $cmp, $swap"), + [(set DRC:$dst, (Op PRC:$ptr, DRC:$cmp, DRC:$swap))]>; multiclass AtomicCmpSwap32<PatFrag Op, string Width> { def #NAME# : AtomicCmpSwap<Op, Width, CPURegs, CPURegs>, @@ -851,14 +860,13 @@ class SCBase<bits<6> Opc, string opstring, RegisterClass RC, Operand Mem> : // Return RA. let isReturn=1, isTerminator=1, hasDelaySlot=1, isBarrier=1, hasCtrlDep=1 in -def RetRA : MipsPseudo<(outs), (ins), "", [(MipsRet)]>; +def RetRA : PseudoSE<(outs), (ins), "", [(MipsRet)]>; -// As stack alignment is always done with addiu, we need a 16-bit immediate -let Defs = [SP], Uses = [SP] in { -def ADJCALLSTACKDOWN : MipsPseudo<(outs), (ins uimm16:$amt), +let Defs = [SP], Uses = [SP], hasSideEffects = 1 in { +def ADJCALLSTACKDOWN : MipsPseudo<(outs), (ins i32imm:$amt), "!ADJCALLSTACKDOWN $amt", [(callseq_start timm:$amt)]>; -def ADJCALLSTACKUP : MipsPseudo<(outs), (ins uimm16:$amt1, uimm16:$amt2), +def ADJCALLSTACKUP : MipsPseudo<(outs), (ins i32imm:$amt1, i32imm:$amt2), "!ADJCALLSTACKUP $amt1", [(callseq_end timm:$amt1, timm:$amt2)]>; } @@ -868,8 +876,8 @@ def ADJCALLSTACKUP : MipsPseudo<(outs), (ins uimm16:$amt1, uimm16:$amt2), // are used, we have the same behavior, but get also a bunch of warnings // from the assembler. let neverHasSideEffects = 1 in -def CPRESTORE : MipsPseudo<(outs), (ins i32imm:$loc, CPURegs:$gp), - ".cprestore\t$loc", []>; +def CPRESTORE : PseudoSE<(outs), (ins i32imm:$loc, CPURegs:$gp), + ".cprestore\t$loc", []>; let usesCustomInserter = 1 in { defm ATOMIC_LOAD_ADD_I8 : Atomic2Ops32<atomic_load_add_8, "load_add_8">; @@ -969,8 +977,8 @@ defm SWL : StoreLeftRightM32<0x2a, "swl", MipsSWL>; defm SWR : StoreLeftRightM32<0x2e, "swr", MipsSWR>; let hasSideEffects = 1 in -def SYNC : MipsInst<(outs), (ins i32imm:$stype), "sync $stype", - [(MipsSync imm:$stype)], NoItinerary, FrmOther> +def SYNC : InstSE<(outs), (ins i32imm:$stype), "sync $stype", + [(MipsSync imm:$stype)], NoItinerary, FrmOther> { bits<5> stype; let Opcode = 0; @@ -1046,17 +1054,13 @@ let addr=0 in // instructions. The same not happens for stack address copies, so an // add op with mem ComplexPattern is used and the stack address copy // can be matched. It's similar to Sparc LEA_ADDRi -def LEA_ADDiu : EffectiveAddress<"addiu\t$rt, $addr", CPURegs, mem_ea> { - let isCodeGenOnly = 1; -} +def LEA_ADDiu : EffectiveAddress<0x09,"addiu\t$rt, $addr", CPURegs, mem_ea>; // DynAlloc node points to dynamically allocated stack space. // $sp is added to the list of implicitly used registers to prevent dead code // elimination from removing instructions that modify $sp. let Uses = [SP] in -def DynAlloc : EffectiveAddress<"addiu\t$rt, $addr", CPURegs, mem_ea> { - let isCodeGenOnly = 1; -} +def DynAlloc : EffectiveAddress<0x09,"addiu\t$rt, $addr", CPURegs, mem_ea>; // MADD*/MSUB* def MADD : MArithR<0, "madd", MipsMAdd, 1>; diff --git a/lib/Target/Mips/MipsJITInfo.cpp b/lib/Target/Mips/MipsJITInfo.cpp index 150bdbb..052046a 100644 --- a/lib/Target/Mips/MipsJITInfo.cpp +++ b/lib/Target/Mips/MipsJITInfo.cpp @@ -27,7 +27,52 @@ using namespace llvm; void MipsJITInfo::replaceMachineCodeForFunction(void *Old, void *New) { - report_fatal_error("MipsJITInfo::replaceMachineCodeForFunction"); + unsigned NewAddr = (intptr_t)New; + unsigned OldAddr = (intptr_t)Old; + const unsigned NopInstr = 0x0; + + // If the functions are in the same memory segment, insert PC-region branch. + if ((NewAddr & 0xF0000000) == ((OldAddr + 4) & 0xF0000000)) { + unsigned *OldInstruction = (unsigned *)Old; + *OldInstruction = 0x08000000; + unsigned JTargetAddr = NewAddr & 0x0FFFFFFC; + + JTargetAddr >>= 2; + *OldInstruction |= JTargetAddr; + + // Insert a NOP. + OldInstruction++; + *OldInstruction = NopInstr; + + sys::Memory::InvalidateInstructionCache(Old, 2 * 4); + } else { + // We need to clear hint bits from the instruction, in case it is 'jr ra'. + const unsigned HintMask = 0xFFFFF83F, ReturnSequence = 0x03e00008; + unsigned* CurrentInstr = (unsigned*)Old; + unsigned CurrInstrHintClear = (*CurrentInstr) & HintMask; + unsigned* NextInstr = CurrentInstr + 1; + unsigned NextInstrHintClear = (*NextInstr) & HintMask; + + // Do absolute jump if there are 2 or more instructions before return from + // the old function. + if ((CurrInstrHintClear != ReturnSequence) && + (NextInstrHintClear != ReturnSequence)) { + const unsigned LuiT0Instr = 0x3c080000, AddiuT0Instr = 0x25080000; + const unsigned JrT0Instr = 0x01000008; + // lui t0, high 16 bit of the NewAddr + (*(CurrentInstr++)) = LuiT0Instr | ((NewAddr & 0xffff0000) >> 16); + // addiu t0, t0, low 16 bit of the NewAddr + (*(CurrentInstr++)) = AddiuT0Instr | (NewAddr & 0x0000ffff); + // jr t0 + (*(CurrentInstr++)) = JrT0Instr; + (*CurrentInstr) = NopInstr; + + sys::Memory::InvalidateInstructionCache(Old, 4 * 4); + } else { + // Unsupported case + report_fatal_error("MipsJITInfo::replaceMachineCodeForFunction"); + } + } } /// JITCompilerFunction - This contains the address of the JIT function used to diff --git a/lib/Target/Mips/MipsLongBranch.cpp b/lib/Target/Mips/MipsLongBranch.cpp index 70ecbc1..f78203f 100644 --- a/lib/Target/Mips/MipsLongBranch.cpp +++ b/lib/Target/Mips/MipsLongBranch.cpp @@ -207,7 +207,7 @@ int64_t MipsLongBranch::computeOffset(const MachineInstr *Br) { // MachineBasicBlock operand MBBOpnd. void MipsLongBranch::replaceBranch(MachineBasicBlock &MBB, Iter Br, DebugLoc DL, MachineBasicBlock *MBBOpnd) { - unsigned NewOpc = Mips::GetOppositeBranchOpc(Br->getOpcode()); + unsigned NewOpc = TII->GetOppositeBranchOpc(Br->getOpcode()); const MCInstrDesc &NewDesc = TII->get(NewOpc); MachineInstrBuilder MIB = BuildMI(MBB, Br, DL, NewDesc); diff --git a/lib/Target/Mips/MipsMachineFunction.h b/lib/Target/Mips/MipsMachineFunction.h index b2232c6..df3c4c0 100644 --- a/lib/Target/Mips/MipsMachineFunction.h +++ b/lib/Target/Mips/MipsMachineFunction.h @@ -48,8 +48,6 @@ class MipsFunctionInfo : public MachineFunctionInfo { // OutArgFIRange: Range of indices of all frame objects created during call to // LowerCall except for the frame object for restoring $gp. std::pair<int, int> InArgFIRange, OutArgFIRange; - int GlobalRegFI; - mutable int DynAllocFI; // Frame index of dynamically allocated stack area. unsigned MaxCallFrameSize; bool EmitNOAT; @@ -58,8 +56,7 @@ public: MipsFunctionInfo(MachineFunction& MF) : MF(MF), SRetReturnReg(0), GlobalBaseReg(0), VarArgsFrameIndex(0), InArgFIRange(std::make_pair(-1, 0)), - OutArgFIRange(std::make_pair(-1, 0)), GlobalRegFI(0), DynAllocFI(0), - MaxCallFrameSize(0), EmitNOAT(false) + OutArgFIRange(std::make_pair(-1, 0)), MaxCallFrameSize(0), EmitNOAT(false) {} bool isInArgFI(int FI) const { @@ -77,34 +74,6 @@ public: OutArgFIRange.second = LastFI; } - bool isGlobalRegFI(int FI) const { - return GlobalRegFI && (FI == GlobalRegFI); - } - - int getGlobalRegFI() const { - return GlobalRegFI; - } - - int initGlobalRegFI() { - const TargetMachine &TM = MF.getTarget(); - unsigned RegSize = TM.getSubtarget<MipsSubtarget>().isABI_N64() ? 8 : 4; - int64_t StackAlignment = TM.getFrameLowering()->getStackAlignment(); - uint64_t Offset = RoundUpToAlignment(MaxCallFrameSize, StackAlignment); - - GlobalRegFI = MF.getFrameInfo()->CreateFixedObject(RegSize, Offset, true); - return GlobalRegFI; - } - - // The first call to this function creates a frame object for dynamically - // allocated stack area. - int getDynAllocFI() const { - if (!DynAllocFI) - DynAllocFI = MF.getFrameInfo()->CreateFixedObject(4, 0, true); - - return DynAllocFI; - } - bool isDynAllocFI(int FI) const { return DynAllocFI && DynAllocFI == FI; } - unsigned getSRetReturnReg() const { return SRetReturnReg; } void setSRetReturnReg(unsigned Reg) { SRetReturnReg = Reg; } diff --git a/lib/Target/Mips/MipsRegisterInfo.cpp b/lib/Target/Mips/MipsRegisterInfo.cpp index a3ce236..ae6ae3a 100644 --- a/lib/Target/Mips/MipsRegisterInfo.cpp +++ b/lib/Target/Mips/MipsRegisterInfo.cpp @@ -144,15 +144,6 @@ MipsRegisterInfo::trackLivenessAfterRegAlloc(const MachineFunction &MF) const { return true; } -// This function eliminate ADJCALLSTACKDOWN, -// ADJCALLSTACKUP pseudo instructions -void MipsRegisterInfo:: -eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, - MachineBasicBlock::iterator I) const { - // Simply discard ADJCALLSTACKDOWN, ADJCALLSTACKUP instructions. - MBB.erase(I); -} - // FrameIndex represent objects inside a abstract stack. // We must replace FrameIndex with an stack/frame pointer // direct reference. @@ -161,8 +152,6 @@ eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj, RegScavenger *RS) const { MachineInstr &MI = *II; MachineFunction &MF = *MI.getParent()->getParent(); - MachineFrameInfo *MFI = MF.getFrameInfo(); - MipsFunctionInfo *MipsFI = MF.getInfo<MipsFunctionInfo>(); unsigned i = 0; while (!MI.getOperand(i).isFI()) { @@ -182,68 +171,7 @@ eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj, << "spOffset : " << spOffset << "\n" << "stackSize : " << stackSize << "\n"); - const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo(); - int MinCSFI = 0; - int MaxCSFI = -1; - - if (CSI.size()) { - MinCSFI = CSI[0].getFrameIdx(); - MaxCSFI = CSI[CSI.size() - 1].getFrameIdx(); - } - - // The following stack frame objects are always referenced relative to $sp: - // 1. Outgoing arguments. - // 2. Pointer to dynamically allocated stack space. - // 3. Locations for callee-saved registers. - // Everything else is referenced relative to whatever register - // getFrameRegister() returns. - unsigned FrameReg; - - if (MipsFI->isOutArgFI(FrameIndex) || MipsFI->isDynAllocFI(FrameIndex) || - (FrameIndex >= MinCSFI && FrameIndex <= MaxCSFI)) - FrameReg = Subtarget.isABI_N64() ? Mips::SP_64 : Mips::SP; - else - FrameReg = getFrameRegister(MF); - - // Calculate final offset. - // - There is no need to change the offset if the frame object is one of the - // following: an outgoing argument, pointer to a dynamically allocated - // stack space or a $gp restore location, - // - If the frame object is any of the following, its offset must be adjusted - // by adding the size of the stack: - // incoming argument, callee-saved register location or local variable. - int64_t Offset; - - if (MipsFI->isOutArgFI(FrameIndex) || MipsFI->isDynAllocFI(FrameIndex) || - MipsFI->isGlobalRegFI(FrameIndex)) - Offset = spOffset; - else - Offset = spOffset + (int64_t)stackSize; - - Offset += MI.getOperand(i+1).getImm(); - - DEBUG(errs() << "Offset : " << Offset << "\n" << "<--------->\n"); - - // If MI is not a debug value, make sure Offset fits in the 16-bit immediate - // field. - if (!MI.isDebugValue() && !isInt<16>(Offset)) { - MachineBasicBlock &MBB = *MI.getParent(); - DebugLoc DL = II->getDebugLoc(); - unsigned ADDu = Subtarget.isABI_N64() ? Mips::DADDu : Mips::ADDu; - unsigned ATReg = Subtarget.isABI_N64() ? Mips::AT_64 : Mips::AT; - MipsAnalyzeImmediate::Inst LastInst(0, 0); - - MipsFI->setEmitNOAT(); - Mips::loadImmediate(Offset, Subtarget.isABI_N64(), TII, MBB, II, DL, true, - &LastInst); - BuildMI(MBB, II, DL, TII.get(ADDu), ATReg).addReg(FrameReg).addReg(ATReg); - - FrameReg = ATReg; - Offset = SignExtend64<16>(LastInst.ImmOpnd); - } - - MI.getOperand(i).ChangeToRegister(FrameReg, false); - MI.getOperand(i+1).ChangeToImmediate(Offset); + eliminateFI(MI, i, FrameIndex, stackSize, spOffset); } unsigned MipsRegisterInfo:: diff --git a/lib/Target/Mips/MipsRegisterInfo.h b/lib/Target/Mips/MipsRegisterInfo.h index f320bae..9a05e94 100644 --- a/lib/Target/Mips/MipsRegisterInfo.h +++ b/lib/Target/Mips/MipsRegisterInfo.h @@ -25,10 +25,12 @@ class MipsSubtarget; class TargetInstrInfo; class Type; -struct MipsRegisterInfo : public MipsGenRegisterInfo { +class MipsRegisterInfo : public MipsGenRegisterInfo { +protected: const MipsSubtarget &Subtarget; const TargetInstrInfo &TII; +public: MipsRegisterInfo(const MipsSubtarget &Subtarget, const TargetInstrInfo &tii); /// getRegisterNumbering - Given the enum value for some register, e.g. @@ -51,10 +53,6 @@ struct MipsRegisterInfo : public MipsGenRegisterInfo { virtual bool trackLivenessAfterRegAlloc(const MachineFunction &MF) const; - void eliminateCallFramePseudoInstr(MachineFunction &MF, - MachineBasicBlock &MBB, - MachineBasicBlock::iterator I) const; - /// Stack Frame Processing Methods void eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj, RegScavenger *RS = NULL) const; @@ -67,6 +65,11 @@ struct MipsRegisterInfo : public MipsGenRegisterInfo { /// Exception handling queries. unsigned getEHExceptionRegister() const; unsigned getEHHandlerRegister() const; + +private: + virtual void eliminateFI(MachineBasicBlock::iterator II, unsigned OpNo, + int FrameIndex, uint64_t StackSize, + int64_t SPOffset) const = 0; }; } // end namespace llvm diff --git a/lib/Target/Mips/MipsRegisterInfo.td b/lib/Target/Mips/MipsRegisterInfo.td index b255e42..4015add 100644 --- a/lib/Target/Mips/MipsRegisterInfo.td +++ b/lib/Target/Mips/MipsRegisterInfo.td @@ -239,6 +239,9 @@ let Namespace = "Mips" in { // fcc0 register def FCC0 : Register<"fcc0">; + // PC register + def PC : Register<"pc">; + // Hardware register $29 def HWR29 : Register<"29">; def HWR29_64 : Register<"29">; diff --git a/lib/Target/Mips/MipsSEFrameLowering.cpp b/lib/Target/Mips/MipsSEFrameLowering.cpp new file mode 100644 index 0000000..1c59847 --- /dev/null +++ b/lib/Target/Mips/MipsSEFrameLowering.cpp @@ -0,0 +1,210 @@ +//===-- MipsSEFrameLowering.cpp - Mips32/64 Frame Information -------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the Mips32/64 implementation of TargetFrameLowering class. +// +//===----------------------------------------------------------------------===// + +#include "MipsSEFrameLowering.h" +#include "MipsAnalyzeImmediate.h" +#include "MipsSEInstrInfo.h" +#include "MipsMachineFunction.h" +#include "MCTargetDesc/MipsBaseInfo.h" +#include "llvm/Function.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/Target/TargetData.h" +#include "llvm/Target/TargetOptions.h" +#include "llvm/Support/CommandLine.h" + +using namespace llvm; + +void MipsSEFrameLowering::emitPrologue(MachineFunction &MF) const { + MachineBasicBlock &MBB = MF.front(); + MachineFrameInfo *MFI = MF.getFrameInfo(); + const MipsRegisterInfo *RegInfo = + static_cast<const MipsRegisterInfo*>(MF.getTarget().getRegisterInfo()); + const MipsSEInstrInfo &TII = + *static_cast<const MipsSEInstrInfo*>(MF.getTarget().getInstrInfo()); + MachineBasicBlock::iterator MBBI = MBB.begin(); + DebugLoc dl = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc(); + unsigned SP = STI.isABI_N64() ? Mips::SP_64 : Mips::SP; + unsigned FP = STI.isABI_N64() ? Mips::FP_64 : Mips::FP; + unsigned ZERO = STI.isABI_N64() ? Mips::ZERO_64 : Mips::ZERO; + unsigned ADDu = STI.isABI_N64() ? Mips::DADDu : Mips::ADDu; + + // First, compute final stack size. + uint64_t StackSize = MFI->getStackSize(); + + // No need to allocate space on the stack. + if (StackSize == 0 && !MFI->adjustsStack()) return; + + MachineModuleInfo &MMI = MF.getMMI(); + std::vector<MachineMove> &Moves = MMI.getFrameMoves(); + MachineLocation DstML, SrcML; + + // Adjust stack. + TII.adjustStackPtr(SP, -StackSize, MBB, MBBI); + + // emit ".cfi_def_cfa_offset StackSize" + MCSymbol *AdjustSPLabel = MMI.getContext().CreateTempSymbol(); + BuildMI(MBB, MBBI, dl, + TII.get(TargetOpcode::PROLOG_LABEL)).addSym(AdjustSPLabel); + DstML = MachineLocation(MachineLocation::VirtualFP); + SrcML = MachineLocation(MachineLocation::VirtualFP, -StackSize); + Moves.push_back(MachineMove(AdjustSPLabel, DstML, SrcML)); + + const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo(); + + if (CSI.size()) { + // Find the instruction past the last instruction that saves a callee-saved + // register to the stack. + for (unsigned i = 0; i < CSI.size(); ++i) + ++MBBI; + + // Iterate over list of callee-saved registers and emit .cfi_offset + // directives. + MCSymbol *CSLabel = MMI.getContext().CreateTempSymbol(); + BuildMI(MBB, MBBI, dl, + TII.get(TargetOpcode::PROLOG_LABEL)).addSym(CSLabel); + + for (std::vector<CalleeSavedInfo>::const_iterator I = CSI.begin(), + E = CSI.end(); I != E; ++I) { + int64_t Offset = MFI->getObjectOffset(I->getFrameIdx()); + unsigned Reg = I->getReg(); + + // If Reg is a double precision register, emit two cfa_offsets, + // one for each of the paired single precision registers. + if (Mips::AFGR64RegClass.contains(Reg)) { + MachineLocation DstML0(MachineLocation::VirtualFP, Offset); + MachineLocation DstML1(MachineLocation::VirtualFP, Offset + 4); + MachineLocation SrcML0(RegInfo->getSubReg(Reg, Mips::sub_fpeven)); + MachineLocation SrcML1(RegInfo->getSubReg(Reg, Mips::sub_fpodd)); + + if (!STI.isLittle()) + std::swap(SrcML0, SrcML1); + + Moves.push_back(MachineMove(CSLabel, DstML0, SrcML0)); + Moves.push_back(MachineMove(CSLabel, DstML1, SrcML1)); + } else { + // Reg is either in CPURegs or FGR32. + DstML = MachineLocation(MachineLocation::VirtualFP, Offset); + SrcML = MachineLocation(Reg); + Moves.push_back(MachineMove(CSLabel, DstML, SrcML)); + } + } + } + + // if framepointer enabled, set it to point to the stack pointer. + if (hasFP(MF)) { + // Insert instruction "move $fp, $sp" at this location. + BuildMI(MBB, MBBI, dl, TII.get(ADDu), FP).addReg(SP).addReg(ZERO); + + // emit ".cfi_def_cfa_register $fp" + MCSymbol *SetFPLabel = MMI.getContext().CreateTempSymbol(); + BuildMI(MBB, MBBI, dl, + TII.get(TargetOpcode::PROLOG_LABEL)).addSym(SetFPLabel); + DstML = MachineLocation(FP); + SrcML = MachineLocation(MachineLocation::VirtualFP); + Moves.push_back(MachineMove(SetFPLabel, DstML, SrcML)); + } +} + +void MipsSEFrameLowering::emitEpilogue(MachineFunction &MF, + MachineBasicBlock &MBB) const { + MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr(); + MachineFrameInfo *MFI = MF.getFrameInfo(); + const MipsSEInstrInfo &TII = + *static_cast<const MipsSEInstrInfo*>(MF.getTarget().getInstrInfo()); + DebugLoc dl = MBBI->getDebugLoc(); + unsigned SP = STI.isABI_N64() ? Mips::SP_64 : Mips::SP; + unsigned FP = STI.isABI_N64() ? Mips::FP_64 : Mips::FP; + unsigned ZERO = STI.isABI_N64() ? Mips::ZERO_64 : Mips::ZERO; + unsigned ADDu = STI.isABI_N64() ? Mips::DADDu : Mips::ADDu; + + // if framepointer enabled, restore the stack pointer. + if (hasFP(MF)) { + // Find the first instruction that restores a callee-saved register. + MachineBasicBlock::iterator I = MBBI; + + for (unsigned i = 0; i < MFI->getCalleeSavedInfo().size(); ++i) + --I; + + // Insert instruction "move $sp, $fp" at this location. + BuildMI(MBB, I, dl, TII.get(ADDu), SP).addReg(FP).addReg(ZERO); + } + + // Get the number of bytes from FrameInfo + uint64_t StackSize = MFI->getStackSize(); + + if (!StackSize) + return; + + // Adjust stack. + TII.adjustStackPtr(SP, StackSize, MBB, MBBI); +} + +bool MipsSEFrameLowering:: +spillCalleeSavedRegisters(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI, + const std::vector<CalleeSavedInfo> &CSI, + const TargetRegisterInfo *TRI) const { + MachineFunction *MF = MBB.getParent(); + MachineBasicBlock *EntryBlock = MF->begin(); + const TargetInstrInfo &TII = *MF->getTarget().getInstrInfo(); + + for (unsigned i = 0, e = CSI.size(); i != e; ++i) { + // Add the callee-saved register as live-in. Do not add if the register is + // RA and return address is taken, because it has already been added in + // method MipsTargetLowering::LowerRETURNADDR. + // It's killed at the spill, unless the register is RA and return address + // is taken. + unsigned Reg = CSI[i].getReg(); + bool IsRAAndRetAddrIsTaken = (Reg == Mips::RA || Reg == Mips::RA_64) + && MF->getFrameInfo()->isReturnAddressTaken(); + if (!IsRAAndRetAddrIsTaken) + EntryBlock->addLiveIn(Reg); + + // Insert the spill to the stack frame. + bool IsKill = !IsRAAndRetAddrIsTaken; + const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg); + TII.storeRegToStackSlot(*EntryBlock, MI, Reg, IsKill, + CSI[i].getFrameIdx(), RC, TRI); + } + + return true; +} + +bool +MipsSEFrameLowering::hasReservedCallFrame(const MachineFunction &MF) const { + const MachineFrameInfo *MFI = MF.getFrameInfo(); + + // Reserve call frame if the size of the maximum call frame fits into 16-bit + // immediate field and there are no variable sized objects on the stack. + return isInt<16>(MFI->getMaxCallFrameSize()) && !MFI->hasVarSizedObjects(); +} + +void MipsSEFrameLowering:: +processFunctionBeforeCalleeSavedScan(MachineFunction &MF, + RegScavenger *RS) const { + MachineRegisterInfo &MRI = MF.getRegInfo(); + unsigned FP = STI.isABI_N64() ? Mips::FP_64 : Mips::FP; + + // Mark $fp as used if function has dedicated frame pointer. + if (hasFP(MF)) + MRI.setPhysRegUsed(FP); +} + +const MipsFrameLowering * +llvm::createMipsSEFrameLowering(const MipsSubtarget &ST) { + return new MipsSEFrameLowering(ST); +} diff --git a/lib/Target/Mips/MipsSEFrameLowering.h b/lib/Target/Mips/MipsSEFrameLowering.h new file mode 100644 index 0000000..6481a0a --- /dev/null +++ b/lib/Target/Mips/MipsSEFrameLowering.h @@ -0,0 +1,44 @@ +//===-- MipsSEFrameLowering.h - Mips32/64 frame lowering --------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// +// +//===----------------------------------------------------------------------===// + +#ifndef MIPSSE_FRAMEINFO_H +#define MIPSSE_FRAMEINFO_H + +#include "MipsFrameLowering.h" + +namespace llvm { + +class MipsSEFrameLowering : public MipsFrameLowering { +public: + explicit MipsSEFrameLowering(const MipsSubtarget &STI) + : MipsFrameLowering(STI) {} + + /// emitProlog/emitEpilog - These methods insert prolog and epilog code into + /// the function. + void emitPrologue(MachineFunction &MF) const; + void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const; + + bool spillCalleeSavedRegisters(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI, + const std::vector<CalleeSavedInfo> &CSI, + const TargetRegisterInfo *TRI) const; + + bool hasReservedCallFrame(const MachineFunction &MF) const; + + void processFunctionBeforeCalleeSavedScan(MachineFunction &MF, + RegScavenger *RS) const; +}; + +} // End llvm namespace + +#endif diff --git a/lib/Target/Mips/MipsSEInstrInfo.cpp b/lib/Target/Mips/MipsSEInstrInfo.cpp new file mode 100644 index 0000000..eeb1de3 --- /dev/null +++ b/lib/Target/Mips/MipsSEInstrInfo.cpp @@ -0,0 +1,320 @@ +//===-- MipsSEInstrInfo.cpp - Mips32/64 Instruction Information -----------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the Mips32/64 implementation of the TargetInstrInfo class. +// +//===----------------------------------------------------------------------===// + +#include "MipsSEInstrInfo.h" +#include "MipsTargetMachine.h" +#include "MipsMachineFunction.h" +#include "InstPrinter/MipsInstPrinter.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/TargetRegistry.h" +#include "llvm/ADT/STLExtras.h" + +using namespace llvm; + +MipsSEInstrInfo::MipsSEInstrInfo(MipsTargetMachine &tm) + : MipsInstrInfo(tm, + tm.getRelocationModel() == Reloc::PIC_ ? Mips::B : Mips::J), + RI(*tm.getSubtargetImpl(), *this), + IsN64(tm.getSubtarget<MipsSubtarget>().isABI_N64()) {} + +const MipsRegisterInfo &MipsSEInstrInfo::getRegisterInfo() const { + return RI; +} + +/// isLoadFromStackSlot - If the specified machine instruction is a direct +/// load from a stack slot, return the virtual or physical register number of +/// the destination along with the FrameIndex of the loaded stack slot. If +/// not, return 0. This predicate must return 0 if the instruction has +/// any side effects other than loading from the stack slot. +unsigned MipsSEInstrInfo:: +isLoadFromStackSlot(const MachineInstr *MI, int &FrameIndex) const +{ + unsigned Opc = MI->getOpcode(); + + if ((Opc == Mips::LW) || (Opc == Mips::LW_P8) || (Opc == Mips::LD) || + (Opc == Mips::LD_P8) || (Opc == Mips::LWC1) || (Opc == Mips::LWC1_P8) || + (Opc == Mips::LDC1) || (Opc == Mips::LDC164) || + (Opc == Mips::LDC164_P8)) { + if ((MI->getOperand(1).isFI()) && // is a stack slot + (MI->getOperand(2).isImm()) && // the imm is zero + (isZeroImm(MI->getOperand(2)))) { + FrameIndex = MI->getOperand(1).getIndex(); + return MI->getOperand(0).getReg(); + } + } + + return 0; +} + +/// isStoreToStackSlot - If the specified machine instruction is a direct +/// store to a stack slot, return the virtual or physical register number of +/// the source reg along with the FrameIndex of the loaded stack slot. If +/// not, return 0. This predicate must return 0 if the instruction has +/// any side effects other than storing to the stack slot. +unsigned MipsSEInstrInfo:: +isStoreToStackSlot(const MachineInstr *MI, int &FrameIndex) const +{ + unsigned Opc = MI->getOpcode(); + + if ((Opc == Mips::SW) || (Opc == Mips::SW_P8) || (Opc == Mips::SD) || + (Opc == Mips::SD_P8) || (Opc == Mips::SWC1) || (Opc == Mips::SWC1_P8) || + (Opc == Mips::SDC1) || (Opc == Mips::SDC164) || + (Opc == Mips::SDC164_P8)) { + if ((MI->getOperand(1).isFI()) && // is a stack slot + (MI->getOperand(2).isImm()) && // the imm is zero + (isZeroImm(MI->getOperand(2)))) { + FrameIndex = MI->getOperand(1).getIndex(); + return MI->getOperand(0).getReg(); + } + } + return 0; +} + +void MipsSEInstrInfo::copyPhysReg(MachineBasicBlock &MBB, + MachineBasicBlock::iterator I, DebugLoc DL, + unsigned DestReg, unsigned SrcReg, + bool KillSrc) const { + unsigned Opc = 0, ZeroReg = 0; + + if (Mips::CPURegsRegClass.contains(DestReg)) { // Copy to CPU Reg. + if (Mips::CPURegsRegClass.contains(SrcReg)) + Opc = Mips::ADDu, ZeroReg = Mips::ZERO; + else if (Mips::CCRRegClass.contains(SrcReg)) + Opc = Mips::CFC1; + else if (Mips::FGR32RegClass.contains(SrcReg)) + Opc = Mips::MFC1; + else if (SrcReg == Mips::HI) + Opc = Mips::MFHI, SrcReg = 0; + else if (SrcReg == Mips::LO) + Opc = Mips::MFLO, SrcReg = 0; + } + else if (Mips::CPURegsRegClass.contains(SrcReg)) { // Copy from CPU Reg. + if (Mips::CCRRegClass.contains(DestReg)) + Opc = Mips::CTC1; + else if (Mips::FGR32RegClass.contains(DestReg)) + Opc = Mips::MTC1; + else if (DestReg == Mips::HI) + Opc = Mips::MTHI, DestReg = 0; + else if (DestReg == Mips::LO) + Opc = Mips::MTLO, DestReg = 0; + } + else if (Mips::FGR32RegClass.contains(DestReg, SrcReg)) + Opc = Mips::FMOV_S; + else if (Mips::AFGR64RegClass.contains(DestReg, SrcReg)) + Opc = Mips::FMOV_D32; + else if (Mips::FGR64RegClass.contains(DestReg, SrcReg)) + Opc = Mips::FMOV_D64; + else if (Mips::CCRRegClass.contains(DestReg, SrcReg)) + Opc = Mips::MOVCCRToCCR; + else if (Mips::CPU64RegsRegClass.contains(DestReg)) { // Copy to CPU64 Reg. + if (Mips::CPU64RegsRegClass.contains(SrcReg)) + Opc = Mips::DADDu, ZeroReg = Mips::ZERO_64; + else if (SrcReg == Mips::HI64) + Opc = Mips::MFHI64, SrcReg = 0; + else if (SrcReg == Mips::LO64) + Opc = Mips::MFLO64, SrcReg = 0; + else if (Mips::FGR64RegClass.contains(SrcReg)) + Opc = Mips::DMFC1; + } + else if (Mips::CPU64RegsRegClass.contains(SrcReg)) { // Copy from CPU64 Reg. + if (DestReg == Mips::HI64) + Opc = Mips::MTHI64, DestReg = 0; + else if (DestReg == Mips::LO64) + Opc = Mips::MTLO64, DestReg = 0; + else if (Mips::FGR64RegClass.contains(DestReg)) + Opc = Mips::DMTC1; + } + + assert(Opc && "Cannot copy registers"); + + MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(Opc)); + + if (DestReg) + MIB.addReg(DestReg, RegState::Define); + + if (ZeroReg) + MIB.addReg(ZeroReg); + + if (SrcReg) + MIB.addReg(SrcReg, getKillRegState(KillSrc)); +} + +void MipsSEInstrInfo:: +storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, + unsigned SrcReg, bool isKill, int FI, + const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI) const { + DebugLoc DL; + if (I != MBB.end()) DL = I->getDebugLoc(); + MachineMemOperand *MMO = GetMemOperand(MBB, FI, MachineMemOperand::MOStore); + + unsigned Opc = 0; + + if (Mips::CPURegsRegClass.hasSubClassEq(RC)) + Opc = IsN64 ? Mips::SW_P8 : Mips::SW; + else if (Mips::CPU64RegsRegClass.hasSubClassEq(RC)) + Opc = IsN64 ? Mips::SD_P8 : Mips::SD; + else if (Mips::FGR32RegClass.hasSubClassEq(RC)) + Opc = IsN64 ? Mips::SWC1_P8 : Mips::SWC1; + else if (Mips::AFGR64RegClass.hasSubClassEq(RC)) + Opc = Mips::SDC1; + else if (Mips::FGR64RegClass.hasSubClassEq(RC)) + Opc = IsN64 ? Mips::SDC164_P8 : Mips::SDC164; + + assert(Opc && "Register class not handled!"); + BuildMI(MBB, I, DL, get(Opc)).addReg(SrcReg, getKillRegState(isKill)) + .addFrameIndex(FI).addImm(0).addMemOperand(MMO); +} + +void MipsSEInstrInfo:: +loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, + unsigned DestReg, int FI, + const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI) const +{ + DebugLoc DL; + if (I != MBB.end()) DL = I->getDebugLoc(); + MachineMemOperand *MMO = GetMemOperand(MBB, FI, MachineMemOperand::MOLoad); + unsigned Opc = 0; + + if (Mips::CPURegsRegClass.hasSubClassEq(RC)) + Opc = IsN64 ? Mips::LW_P8 : Mips::LW; + else if (Mips::CPU64RegsRegClass.hasSubClassEq(RC)) + Opc = IsN64 ? Mips::LD_P8 : Mips::LD; + else if (Mips::FGR32RegClass.hasSubClassEq(RC)) + Opc = IsN64 ? Mips::LWC1_P8 : Mips::LWC1; + else if (Mips::AFGR64RegClass.hasSubClassEq(RC)) + Opc = Mips::LDC1; + else if (Mips::FGR64RegClass.hasSubClassEq(RC)) + Opc = IsN64 ? Mips::LDC164_P8 : Mips::LDC164; + + assert(Opc && "Register class not handled!"); + BuildMI(MBB, I, DL, get(Opc), DestReg).addFrameIndex(FI).addImm(0) + .addMemOperand(MMO); +} + +bool MipsSEInstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const { + MachineBasicBlock &MBB = *MI->getParent(); + + switch(MI->getDesc().getOpcode()) { + default: + return false; + case Mips::RetRA: + ExpandRetRA(MBB, MI, Mips::RET); + break; + case Mips::BuildPairF64: + ExpandBuildPairF64(MBB, MI); + break; + case Mips::ExtractElementF64: + ExpandExtractElementF64(MBB, MI); + break; + } + + MBB.erase(MI); + return true; +} + +/// GetOppositeBranchOpc - Return the inverse of the specified +/// opcode, e.g. turning BEQ to BNE. +unsigned MipsSEInstrInfo::GetOppositeBranchOpc(unsigned Opc) const { + switch (Opc) { + default: llvm_unreachable("Illegal opcode!"); + case Mips::BEQ: return Mips::BNE; + case Mips::BNE: return Mips::BEQ; + case Mips::BGTZ: return Mips::BLEZ; + case Mips::BGEZ: return Mips::BLTZ; + case Mips::BLTZ: return Mips::BGEZ; + case Mips::BLEZ: return Mips::BGTZ; + case Mips::BEQ64: return Mips::BNE64; + case Mips::BNE64: return Mips::BEQ64; + case Mips::BGTZ64: return Mips::BLEZ64; + case Mips::BGEZ64: return Mips::BLTZ64; + case Mips::BLTZ64: return Mips::BGEZ64; + case Mips::BLEZ64: return Mips::BGTZ64; + case Mips::BC1T: return Mips::BC1F; + case Mips::BC1F: return Mips::BC1T; + } +} + +/// Adjust SP by Amount bytes. +void MipsSEInstrInfo::adjustStackPtr(unsigned SP, int64_t Amount, + MachineBasicBlock &MBB, + MachineBasicBlock::iterator I) const { + const MipsSubtarget &STI = TM.getSubtarget<MipsSubtarget>(); + DebugLoc DL = I != MBB.end() ? I->getDebugLoc() : DebugLoc(); + unsigned ADDu = STI.isABI_N64() ? Mips::DADDu : Mips::ADDu; + unsigned ADDiu = STI.isABI_N64() ? Mips::DADDiu : Mips::ADDiu; + + if (isInt<16>(Amount))// addi sp, sp, amount + BuildMI(MBB, I, DL, get(ADDiu), SP).addReg(SP).addImm(Amount); + else { // Expand immediate that doesn't fit in 16-bit. + unsigned ATReg = STI.isABI_N64() ? Mips::AT_64 : Mips::AT; + + MBB.getParent()->getInfo<MipsFunctionInfo>()->setEmitNOAT(); + Mips::loadImmediate(Amount, STI.isABI_N64(), *this, MBB, I, DL, false, 0); + BuildMI(MBB, I, DL, get(ADDu), SP).addReg(SP).addReg(ATReg); + } +} + +unsigned MipsSEInstrInfo::GetAnalyzableBrOpc(unsigned Opc) const { + return (Opc == Mips::BEQ || Opc == Mips::BNE || Opc == Mips::BGTZ || + Opc == Mips::BGEZ || Opc == Mips::BLTZ || Opc == Mips::BLEZ || + Opc == Mips::BEQ64 || Opc == Mips::BNE64 || Opc == Mips::BGTZ64 || + Opc == Mips::BGEZ64 || Opc == Mips::BLTZ64 || Opc == Mips::BLEZ64 || + Opc == Mips::BC1T || Opc == Mips::BC1F || Opc == Mips::B || + Opc == Mips::J) ? + Opc : 0; +} + +void MipsSEInstrInfo::ExpandRetRA(MachineBasicBlock &MBB, + MachineBasicBlock::iterator I, + unsigned Opc) const { + BuildMI(MBB, I, I->getDebugLoc(), get(Opc)).addReg(Mips::RA); +} + +void MipsSEInstrInfo::ExpandExtractElementF64(MachineBasicBlock &MBB, + MachineBasicBlock::iterator I) const { + unsigned DstReg = I->getOperand(0).getReg(); + unsigned SrcReg = I->getOperand(1).getReg(); + unsigned N = I->getOperand(2).getImm(); + const MCInstrDesc& Mfc1Tdd = get(Mips::MFC1); + DebugLoc dl = I->getDebugLoc(); + + assert(N < 2 && "Invalid immediate"); + unsigned SubIdx = N ? Mips::sub_fpodd : Mips::sub_fpeven; + unsigned SubReg = getRegisterInfo().getSubReg(SrcReg, SubIdx); + + BuildMI(MBB, I, dl, Mfc1Tdd, DstReg).addReg(SubReg); +} + +void MipsSEInstrInfo::ExpandBuildPairF64(MachineBasicBlock &MBB, + MachineBasicBlock::iterator I) const { + unsigned DstReg = I->getOperand(0).getReg(); + unsigned LoReg = I->getOperand(1).getReg(), HiReg = I->getOperand(2).getReg(); + const MCInstrDesc& Mtc1Tdd = get(Mips::MTC1); + DebugLoc dl = I->getDebugLoc(); + const TargetRegisterInfo &TRI = getRegisterInfo(); + + // mtc1 Lo, $fp + // mtc1 Hi, $fp + 1 + BuildMI(MBB, I, dl, Mtc1Tdd, TRI.getSubReg(DstReg, Mips::sub_fpeven)) + .addReg(LoReg); + BuildMI(MBB, I, dl, Mtc1Tdd, TRI.getSubReg(DstReg, Mips::sub_fpodd)) + .addReg(HiReg); +} + +const MipsInstrInfo *llvm::createMipsSEInstrInfo(MipsTargetMachine &TM) { + return new MipsSEInstrInfo(TM); +} diff --git a/lib/Target/Mips/MipsSEInstrInfo.h b/lib/Target/Mips/MipsSEInstrInfo.h new file mode 100644 index 0000000..346e74d --- /dev/null +++ b/lib/Target/Mips/MipsSEInstrInfo.h @@ -0,0 +1,86 @@ +//===-- MipsSEInstrInfo.h - Mips32/64 Instruction Information ---*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the Mips32/64 implementation of the TargetInstrInfo class. +// +//===----------------------------------------------------------------------===// + +#ifndef MIPSSEINSTRUCTIONINFO_H +#define MIPSSEINSTRUCTIONINFO_H + +#include "MipsInstrInfo.h" +#include "MipsAnalyzeImmediate.h" +#include "MipsSERegisterInfo.h" + +namespace llvm { + +class MipsSEInstrInfo : public MipsInstrInfo { + const MipsSERegisterInfo RI; + bool IsN64; + +public: + explicit MipsSEInstrInfo(MipsTargetMachine &TM); + + virtual const MipsRegisterInfo &getRegisterInfo() const; + + /// isLoadFromStackSlot - If the specified machine instruction is a direct + /// load from a stack slot, return the virtual or physical register number of + /// the destination along with the FrameIndex of the loaded stack slot. If + /// not, return 0. This predicate must return 0 if the instruction has + /// any side effects other than loading from the stack slot. + virtual unsigned isLoadFromStackSlot(const MachineInstr *MI, + int &FrameIndex) const; + + /// isStoreToStackSlot - If the specified machine instruction is a direct + /// store to a stack slot, return the virtual or physical register number of + /// the source reg along with the FrameIndex of the loaded stack slot. If + /// not, return 0. This predicate must return 0 if the instruction has + /// any side effects other than storing to the stack slot. + virtual unsigned isStoreToStackSlot(const MachineInstr *MI, + int &FrameIndex) const; + + virtual void copyPhysReg(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI, DebugLoc DL, + unsigned DestReg, unsigned SrcReg, + bool KillSrc) const; + + virtual void storeRegToStackSlot(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + unsigned SrcReg, bool isKill, int FrameIndex, + const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI) const; + + virtual void loadRegFromStackSlot(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + unsigned DestReg, int FrameIndex, + const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI) const; + + virtual bool expandPostRAPseudo(MachineBasicBlock::iterator MI) const; + + virtual unsigned GetOppositeBranchOpc(unsigned Opc) const; + + /// Adjust SP by Amount bytes. + void adjustStackPtr(unsigned SP, int64_t Amount, MachineBasicBlock &MBB, + MachineBasicBlock::iterator I) const; + +private: + virtual unsigned GetAnalyzableBrOpc(unsigned Opc) const; + + void ExpandRetRA(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, + unsigned Opc) const; + void ExpandExtractElementF64(MachineBasicBlock &MBB, + MachineBasicBlock::iterator I) const; + void ExpandBuildPairF64(MachineBasicBlock &MBB, + MachineBasicBlock::iterator I) const; +}; + +} + +#endif diff --git a/lib/Target/Mips/MipsSERegisterInfo.cpp b/lib/Target/Mips/MipsSERegisterInfo.cpp new file mode 100644 index 0000000..043a1ef --- /dev/null +++ b/lib/Target/Mips/MipsSERegisterInfo.cpp @@ -0,0 +1,138 @@ +//===-- MipsSERegisterInfo.cpp - MIPS32/64 Register Information -== -------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the MIPS32/64 implementation of the TargetRegisterInfo +// class. +// +//===----------------------------------------------------------------------===// + +#include "MipsSERegisterInfo.h" +#include "Mips.h" +#include "MipsAnalyzeImmediate.h" +#include "MipsSEInstrInfo.h" +#include "MipsSubtarget.h" +#include "MipsMachineFunction.h" +#include "llvm/Constants.h" +#include "llvm/DebugInfo.h" +#include "llvm/Type.h" +#include "llvm/Function.h" +#include "llvm/CodeGen/ValueTypes.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/Target/TargetFrameLowering.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetOptions.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/ADT/BitVector.h" +#include "llvm/ADT/STLExtras.h" + +using namespace llvm; + +MipsSERegisterInfo::MipsSERegisterInfo(const MipsSubtarget &ST, + const TargetInstrInfo &TII) + : MipsRegisterInfo(ST, TII) {} + +// This function eliminate ADJCALLSTACKDOWN, +// ADJCALLSTACKUP pseudo instructions +void MipsSERegisterInfo:: +eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, + MachineBasicBlock::iterator I) const { + const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering(); + + if (!TFI->hasReservedCallFrame(MF)) { + int64_t Amount = I->getOperand(0).getImm(); + + if (I->getOpcode() == Mips::ADJCALLSTACKDOWN) + Amount = -Amount; + + const MipsSEInstrInfo *II = static_cast<const MipsSEInstrInfo*>(&TII); + unsigned SP = Subtarget.isABI_N64() ? Mips::SP_64 : Mips::SP; + + II->adjustStackPtr(SP, Amount, MBB, I); + } + + MBB.erase(I); +} + +void MipsSERegisterInfo::eliminateFI(MachineBasicBlock::iterator II, + unsigned OpNo, int FrameIndex, + uint64_t StackSize, + int64_t SPOffset) const { + MachineInstr &MI = *II; + MachineFunction &MF = *MI.getParent()->getParent(); + MachineFrameInfo *MFI = MF.getFrameInfo(); + MipsFunctionInfo *MipsFI = MF.getInfo<MipsFunctionInfo>(); + + const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo(); + int MinCSFI = 0; + int MaxCSFI = -1; + + if (CSI.size()) { + MinCSFI = CSI[0].getFrameIdx(); + MaxCSFI = CSI[CSI.size() - 1].getFrameIdx(); + } + + // The following stack frame objects are always referenced relative to $sp: + // 1. Outgoing arguments. + // 2. Pointer to dynamically allocated stack space. + // 3. Locations for callee-saved registers. + // Everything else is referenced relative to whatever register + // getFrameRegister() returns. + unsigned FrameReg; + + if (MipsFI->isOutArgFI(FrameIndex) || + (FrameIndex >= MinCSFI && FrameIndex <= MaxCSFI)) + FrameReg = Subtarget.isABI_N64() ? Mips::SP_64 : Mips::SP; + else + FrameReg = getFrameRegister(MF); + + // Calculate final offset. + // - There is no need to change the offset if the frame object is one of the + // following: an outgoing argument, pointer to a dynamically allocated + // stack space or a $gp restore location, + // - If the frame object is any of the following, its offset must be adjusted + // by adding the size of the stack: + // incoming argument, callee-saved register location or local variable. + int64_t Offset; + + if (MipsFI->isOutArgFI(FrameIndex)) + Offset = SPOffset; + else + Offset = SPOffset + (int64_t)StackSize; + + Offset += MI.getOperand(OpNo + 1).getImm(); + + DEBUG(errs() << "Offset : " << Offset << "\n" << "<--------->\n"); + + // If MI is not a debug value, make sure Offset fits in the 16-bit immediate + // field. + if (!MI.isDebugValue() && !isInt<16>(Offset)) { + MachineBasicBlock &MBB = *MI.getParent(); + DebugLoc DL = II->getDebugLoc(); + unsigned ADDu = Subtarget.isABI_N64() ? Mips::DADDu : Mips::ADDu; + unsigned ATReg = Subtarget.isABI_N64() ? Mips::AT_64 : Mips::AT; + MipsAnalyzeImmediate::Inst LastInst(0, 0); + + MipsFI->setEmitNOAT(); + Mips::loadImmediate(Offset, Subtarget.isABI_N64(), TII, MBB, II, DL, true, + &LastInst); + BuildMI(MBB, II, DL, TII.get(ADDu), ATReg).addReg(FrameReg).addReg(ATReg); + + FrameReg = ATReg; + Offset = SignExtend64<16>(LastInst.ImmOpnd); + } + + MI.getOperand(OpNo).ChangeToRegister(FrameReg, false); + MI.getOperand(OpNo + 1).ChangeToImmediate(Offset); +} diff --git a/lib/Target/Mips/MipsSERegisterInfo.h b/lib/Target/Mips/MipsSERegisterInfo.h new file mode 100644 index 0000000..4b17b33 --- /dev/null +++ b/lib/Target/Mips/MipsSERegisterInfo.h @@ -0,0 +1,39 @@ +//===-- MipsSERegisterInfo.h - Mips32/64 Register Information ---*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the Mips32/64 implementation of the TargetRegisterInfo +// class. +// +//===----------------------------------------------------------------------===// + +#ifndef MIPSSEREGISTERINFO_H +#define MIPSSEREGISTERINFO_H + +#include "MipsRegisterInfo.h" + +namespace llvm { + +class MipsSERegisterInfo : public MipsRegisterInfo { +public: + MipsSERegisterInfo(const MipsSubtarget &Subtarget, + const TargetInstrInfo &TII); + + void eliminateCallFramePseudoInstr(MachineFunction &MF, + MachineBasicBlock &MBB, + MachineBasicBlock::iterator I) const; + +private: + virtual void eliminateFI(MachineBasicBlock::iterator II, unsigned OpNo, + int FrameIndex, uint64_t StackSize, + int64_t SPOffset) const; +}; + +} // end namespace llvm + +#endif diff --git a/lib/Target/Mips/MipsSubtarget.h b/lib/Target/Mips/MipsSubtarget.h index 3215c44..ba15362 100644 --- a/lib/Target/Mips/MipsSubtarget.h +++ b/lib/Target/Mips/MipsSubtarget.h @@ -89,6 +89,9 @@ protected: // InMips16 -- can process Mips16 instructions bool InMips16Mode; + // IsAndroid -- target is android + bool IsAndroid; + InstrItineraryData InstrItins; public: @@ -128,6 +131,7 @@ public: bool isNotSingleFloat() const { return !IsSingleFloat; } bool hasVFPU() const { return HasVFPU; } bool inMips16Mode() const { return InMips16Mode; } + bool isAndroid() const { return IsAndroid; } bool isLinux() const { return IsLinux; } bool hasStandardEncoding() const { return !inMips16Mode(); } diff --git a/lib/Target/Mips/MipsTargetMachine.cpp b/lib/Target/Mips/MipsTargetMachine.cpp index dd5d35f..2928a73 100644 --- a/lib/Target/Mips/MipsTargetMachine.cpp +++ b/lib/Target/Mips/MipsTargetMachine.cpp @@ -13,6 +13,8 @@ #include "MipsTargetMachine.h" #include "Mips.h" +#include "MipsFrameLowering.h" +#include "MipsInstrInfo.h" #include "llvm/PassManager.h" #include "llvm/CodeGen/Passes.h" #include "llvm/Support/TargetRegistry.h" @@ -22,8 +24,8 @@ extern "C" void LLVMInitializeMipsTarget() { // Register the target. RegisterTargetMachine<MipsebTargetMachine> X(TheMipsTarget); RegisterTargetMachine<MipselTargetMachine> Y(TheMipselTarget); - RegisterTargetMachine<Mips64ebTargetMachine> A(TheMips64Target); - RegisterTargetMachine<Mips64elTargetMachine> B(TheMips64elTarget); + RegisterTargetMachine<MipsebTargetMachine> A(TheMips64Target); + RegisterTargetMachine<MipselTargetMachine> B(TheMips64elTarget); } // DataLayout --> Big-endian, 32-bit pointer/ABI/alignment @@ -48,9 +50,10 @@ MipsTargetMachine(const Target &T, StringRef TT, (Subtarget.isABI_N64() ? "E-p:64:64:64-i8:8:32-i16:16:32-i64:64:64-f128:128:128-n32" : "E-p:32:32:32-i8:8:32-i16:16:32-i64:64:64-n32")), - InstrInfo(*this), - FrameLowering(Subtarget), - TLInfo(*this), TSInfo(*this), JITInfo() { + InstrInfo(MipsInstrInfo::create(*this)), + FrameLowering(MipsFrameLowering::create(*this, Subtarget)), + TLInfo(*this), TSInfo(*this), JITInfo(), + ELFWriterInfo(false, isLittle) { } void MipsebTargetMachine::anchor() { } @@ -71,24 +74,6 @@ MipselTargetMachine(const Target &T, StringRef TT, CodeGenOpt::Level OL) : MipsTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, true) {} -void Mips64ebTargetMachine::anchor() { } - -Mips64ebTargetMachine:: -Mips64ebTargetMachine(const Target &T, StringRef TT, - StringRef CPU, StringRef FS, const TargetOptions &Options, - Reloc::Model RM, CodeModel::Model CM, - CodeGenOpt::Level OL) - : MipsTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, false) {} - -void Mips64elTargetMachine::anchor() { } - -Mips64elTargetMachine:: -Mips64elTargetMachine(const Target &T, StringRef TT, - StringRef CPU, StringRef FS, const TargetOptions &Options, - Reloc::Model RM, CodeModel::Model CM, - CodeGenOpt::Level OL) - : MipsTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, true) {} - namespace { /// Mips Code Generator Pass Configuration Options. class MipsPassConfig : public TargetPassConfig { diff --git a/lib/Target/Mips/MipsTargetMachine.h b/lib/Target/Mips/MipsTargetMachine.h index 5cbf057..a542ef6 100644 --- a/lib/Target/Mips/MipsTargetMachine.h +++ b/lib/Target/Mips/MipsTargetMachine.h @@ -20,59 +20,67 @@ #include "MipsJITInfo.h" #include "MipsSelectionDAGInfo.h" #include "MipsSubtarget.h" +#include "MipsELFWriterInfo.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetData.h" #include "llvm/Target/TargetFrameLowering.h" namespace llvm { - class formatted_raw_ostream; - - class MipsTargetMachine : public LLVMTargetMachine { - MipsSubtarget Subtarget; - const TargetData DataLayout; // Calculates type size & alignment - MipsInstrInfo InstrInfo; - MipsFrameLowering FrameLowering; - MipsTargetLowering TLInfo; - MipsSelectionDAGInfo TSInfo; - MipsJITInfo JITInfo; - - public: - MipsTargetMachine(const Target &T, StringRef TT, - StringRef CPU, StringRef FS, const TargetOptions &Options, - Reloc::Model RM, CodeModel::Model CM, - CodeGenOpt::Level OL, - bool isLittle); - - virtual const MipsInstrInfo *getInstrInfo() const - { return &InstrInfo; } - virtual const TargetFrameLowering *getFrameLowering() const - { return &FrameLowering; } - virtual const MipsSubtarget *getSubtargetImpl() const - { return &Subtarget; } - virtual const TargetData *getTargetData() const - { return &DataLayout;} - virtual MipsJITInfo *getJITInfo() - { return &JITInfo; } - - - virtual const MipsRegisterInfo *getRegisterInfo() const { - return &InstrInfo.getRegisterInfo(); - } - - virtual const MipsTargetLowering *getTargetLowering() const { - return &TLInfo; - } - - virtual const MipsSelectionDAGInfo* getSelectionDAGInfo() const { - return &TSInfo; - } - - // Pass Pipeline Configuration - virtual TargetPassConfig *createPassConfig(PassManagerBase &PM); - virtual bool addCodeEmitter(PassManagerBase &PM, JITCodeEmitter &JCE); - }; - -/// MipsebTargetMachine - Mips32 big endian target machine. +class formatted_raw_ostream; +class MipsRegisterInfo; + +class MipsTargetMachine : public LLVMTargetMachine { + MipsSubtarget Subtarget; + const TargetData DataLayout; // Calculates type size & alignment + const MipsInstrInfo *InstrInfo; + const MipsFrameLowering *FrameLowering; + MipsTargetLowering TLInfo; + MipsSelectionDAGInfo TSInfo; + MipsJITInfo JITInfo; + MipsELFWriterInfo ELFWriterInfo; + +public: + MipsTargetMachine(const Target &T, StringRef TT, + StringRef CPU, StringRef FS, const TargetOptions &Options, + Reloc::Model RM, CodeModel::Model CM, + CodeGenOpt::Level OL, + bool isLittle); + + virtual ~MipsTargetMachine() { delete InstrInfo; } + + virtual const MipsInstrInfo *getInstrInfo() const + { return InstrInfo; } + virtual const TargetFrameLowering *getFrameLowering() const + { return FrameLowering; } + virtual const MipsSubtarget *getSubtargetImpl() const + { return &Subtarget; } + virtual const TargetData *getTargetData() const + { return &DataLayout;} + virtual MipsJITInfo *getJITInfo() + { return &JITInfo; } + + virtual const MipsRegisterInfo *getRegisterInfo() const { + return &InstrInfo->getRegisterInfo(); + } + + virtual const MipsTargetLowering *getTargetLowering() const { + return &TLInfo; + } + + virtual const MipsSelectionDAGInfo* getSelectionDAGInfo() const { + return &TSInfo; + } + + virtual const MipsELFWriterInfo *getELFWriterInfo() const { + return &ELFWriterInfo; + } + + // Pass Pipeline Configuration + virtual TargetPassConfig *createPassConfig(PassManagerBase &PM); + virtual bool addCodeEmitter(PassManagerBase &PM, JITCodeEmitter &JCE); +}; + +/// MipsebTargetMachine - Mips32/64 big endian target machine. /// class MipsebTargetMachine : public MipsTargetMachine { virtual void anchor(); @@ -83,7 +91,7 @@ public: CodeGenOpt::Level OL); }; -/// MipselTargetMachine - Mips32 little endian target machine. +/// MipselTargetMachine - Mips32/64 little endian target machine. /// class MipselTargetMachine : public MipsTargetMachine { virtual void anchor(); @@ -94,29 +102,6 @@ public: CodeGenOpt::Level OL); }; -/// Mips64ebTargetMachine - Mips64 big endian target machine. -/// -class Mips64ebTargetMachine : public MipsTargetMachine { - virtual void anchor(); -public: - Mips64ebTargetMachine(const Target &T, StringRef TT, - StringRef CPU, StringRef FS, - const TargetOptions &Options, - Reloc::Model RM, CodeModel::Model CM, - CodeGenOpt::Level OL); -}; - -/// Mips64elTargetMachine - Mips64 little endian target machine. -/// -class Mips64elTargetMachine : public MipsTargetMachine { - virtual void anchor(); -public: - Mips64elTargetMachine(const Target &T, StringRef TT, - StringRef CPU, StringRef FS, - const TargetOptions &Options, - Reloc::Model RM, CodeModel::Model CM, - CodeGenOpt::Level OL); -}; } // End llvm namespace #endif diff --git a/lib/Target/PowerPC/PPCCTRLoops.cpp b/lib/Target/PowerPC/PPCCTRLoops.cpp index f50f9b5..2a2abb1 100644 --- a/lib/Target/PowerPC/PPCCTRLoops.cpp +++ b/lib/Target/PowerPC/PPCCTRLoops.cpp @@ -337,7 +337,10 @@ CountValue *PPCCTRLoops::getTripCount(MachineLoop *L, // can get a useful trip count. The trip count can // be either a register or an immediate. The location // of the value depends upon the type (reg or imm). - while ((IV_Opnd = IV_Opnd->getNextOperandForReg())) { + for (MachineRegisterInfo::reg_iterator + RI = MRI->reg_begin(IV_Opnd->getReg()), RE = MRI->reg_end(); + RI != RE; ++RI) { + IV_Opnd = &RI.getOperand(); bool SignedCmp; MachineInstr *MI = IV_Opnd->getParent(); if (L->contains(MI) && isCompareEqualsImm(MI, SignedCmp) && diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp index 13250b3..61d44c5 100644 --- a/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/lib/Target/PowerPC/PPCISelLowering.cpp @@ -106,7 +106,7 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM) // from FP_ROUND: that rounds to nearest, this rounds to zero. setOperationAction(ISD::FP_ROUND_INREG, MVT::ppcf128, Custom); - // We do not currently implment this libm ops for PowerPC. + // We do not currently implement these libm ops for PowerPC. setOperationAction(ISD::FFLOOR, MVT::ppcf128, Expand); setOperationAction(ISD::FCEIL, MVT::ppcf128, Expand); setOperationAction(ISD::FTRUNC, MVT::ppcf128, Expand); @@ -394,8 +394,10 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM) setOperationAction(ISD::BUILD_VECTOR, MVT::v4f32, Custom); } - if (Subtarget->has64BitSupport()) + if (Subtarget->has64BitSupport()) { setOperationAction(ISD::PREFETCH, MVT::Other, Legal); + setOperationAction(ISD::READCYCLECOUNTER, MVT::i64, Legal); + } setOperationAction(ISD::ATOMIC_LOAD, MVT::i32, Expand); setOperationAction(ISD::ATOMIC_STORE, MVT::i32, Expand); diff --git a/lib/Target/PowerPC/PPCInstr64Bit.td b/lib/Target/PowerPC/PPCInstr64Bit.td index 91c5366..39778a5 100644 --- a/lib/Target/PowerPC/PPCInstr64Bit.td +++ b/lib/Target/PowerPC/PPCInstr64Bit.td @@ -265,6 +265,15 @@ def MTCTR8 : XFXForm_7_ext<31, 467, 9, (outs), (ins G8RC:$rS), PPC970_DGroup_First, PPC970_Unit_FXU; } +let Pattern = [(set G8RC:$rT, readcyclecounter)] in +def MFTB8 : XFXForm_1_ext<31, 339, 268, (outs G8RC:$rT), (ins), + "mfspr $rT, 268", SprMFTB>, + PPC970_DGroup_First, PPC970_Unit_FXU; +// Note that encoding mftb using mfspr is now the preferred form, +// and has been since at least ISA v2.03. The mftb instruction has +// now been phased out. Using mfspr, however, is known not to work on +// the POWER3. + let Defs = [X1], Uses = [X1] in def DYNALLOC8 : Pseudo<(outs G8RC:$result), (ins G8RC:$negsize, memri:$fpsi),"", [(set G8RC:$result, diff --git a/lib/Target/PowerPC/TargetInfo/Makefile b/lib/Target/PowerPC/TargetInfo/Makefile index a101aa4..2d0560d 100644 --- a/lib/Target/PowerPC/TargetInfo/Makefile +++ b/lib/Target/PowerPC/TargetInfo/Makefile @@ -10,6 +10,6 @@ LEVEL = ../../../.. LIBRARYNAME = LLVMPowerPCInfo # Hack: we need to include 'main' target directory to grab private headers -CPPFLAGS = -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/.. +override CPPFLAGS += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/.. include $(LEVEL)/Makefile.common diff --git a/lib/Target/README.txt b/lib/Target/README.txt index cbfa4cf..9c27f27 100644 --- a/lib/Target/README.txt +++ b/lib/Target/README.txt @@ -2367,8 +2367,3 @@ unsigned foo(unsigned x, unsigned y) { return x > y && x != 0; } should fold to x > y. //===---------------------------------------------------------------------===// - -int f(double x) { return __builtin_fabs(x) < 0.0; } -should fold to false. - -//===---------------------------------------------------------------------===// diff --git a/lib/Target/Sparc/SparcRegisterInfo.cpp b/lib/Target/Sparc/SparcRegisterInfo.cpp index 6357468..ff8d3c5 100644 --- a/lib/Target/Sparc/SparcRegisterInfo.cpp +++ b/lib/Target/Sparc/SparcRegisterInfo.cpp @@ -109,9 +109,6 @@ SparcRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, } } -void SparcRegisterInfo:: -processFunctionBeforeFrameFinalized(MachineFunction &MF) const {} - unsigned SparcRegisterInfo::getFrameRegister(const MachineFunction &MF) const { return SP::I6; } diff --git a/lib/Target/TargetLibraryInfo.cpp b/lib/Target/TargetLibraryInfo.cpp index ec95ad4..8e215a7 100644 --- a/lib/Target/TargetLibraryInfo.cpp +++ b/lib/Target/TargetLibraryInfo.cpp @@ -24,64 +24,72 @@ void TargetLibraryInfo::anchor() { } const char* TargetLibraryInfo::StandardNames[LibFunc::NumLibFuncs] = { + "__cxa_atexit", + "__cxa_guard_abort", + "__cxa_guard_acquire", + "__cxa_guard_release", + "__memcpy_chk", "acos", - "acosl", "acosf", + "acosl", "asin", - "asinl", "asinf", + "asinl", "atan", - "atanl", - "atanf", "atan2", - "atan2l", "atan2f", + "atan2l", + "atanf", + "atanl", "ceil", - "ceill", "ceilf", + "ceill", "copysign", "copysignf", "copysignl", "cos", - "cosl", "cosf", "cosh", - "coshl", "coshf", + "coshl", + "cosl", "exp", - "expl", - "expf", "exp2", - "exp2l", "exp2f", + "exp2l", + "expf", + "expl", "expm1", - "expm1l", "expm1f", + "expm1l", "fabs", - "fabsl", "fabsf", + "fabsl", + "fiprintf", "floor", - "floorl", "floorf", - "fiprintf", + "floorl", "fmod", - "fmodl", "fmodf", + "fmodl", + "fputc", "fputs", "fwrite", "iprintf", "log", - "logl", - "logf", - "log2", - "log2l", - "log2f", "log10", - "log10l", "log10f", + "log10l", "log1p", - "log1pl", "log1pf", + "log1pl", + "log2", + "log2f", + "log2l", + "logf", + "logl", + "memchr", + "memcmp", "memcpy", "memmove", "memset", @@ -92,6 +100,8 @@ const char* TargetLibraryInfo::StandardNames[LibFunc::NumLibFuncs] = "pow", "powf", "powl", + "putchar", + "puts", "rint", "rintf", "rintl", @@ -99,36 +109,48 @@ const char* TargetLibraryInfo::StandardNames[LibFunc::NumLibFuncs] = "roundf", "roundl", "sin", - "sinl", "sinf", "sinh", - "sinhl", "sinhf", + "sinhl", + "sinl", "siprintf", "sqrt", - "sqrtl", "sqrtf", + "sqrtl", + "strcat", + "strchr", + "strcpy", + "strlen", + "strncat", + "strncmp", + "strncpy", + "strnlen", "tan", - "tanl", "tanf", "tanh", - "tanhl", "tanhf", + "tanhl", + "tanl", "trunc", "truncf", - "truncl", - "__cxa_atexit", - "__cxa_guard_abort", - "__cxa_guard_acquire", - "__cxa_guard_release" + "truncl" }; /// initialize - Initialize the set of available library functions based on the /// specified target triple. This should be carefully written so that a missing /// target triple gets a sane set of defaults. -static void initialize(TargetLibraryInfo &TLI, const Triple &T) { +static void initialize(TargetLibraryInfo &TLI, const Triple &T, + const char **StandardNames) { initializeTargetLibraryInfoPass(*PassRegistry::getPassRegistry()); +#ifndef NDEBUG + // Verify that the StandardNames array is in alphabetical order. + for (unsigned F = 1; F < LibFunc::NumLibFuncs; ++F) { + if (strcmp(StandardNames[F-1], StandardNames[F]) >= 0) + llvm_unreachable("TargetLibraryInfo function names must be sorted"); + } +#endif // !NDEBUG // memset_pattern16 is only available on iOS 3.0 and Mac OS/X 10.5 and later. if (T.isMacOSX()) { @@ -240,14 +262,14 @@ TargetLibraryInfo::TargetLibraryInfo() : ImmutablePass(ID) { // Default to everything being available. memset(AvailableArray, -1, sizeof(AvailableArray)); - initialize(*this, Triple()); + initialize(*this, Triple(), StandardNames); } TargetLibraryInfo::TargetLibraryInfo(const Triple &T) : ImmutablePass(ID) { // Default to everything being available. memset(AvailableArray, -1, sizeof(AvailableArray)); - initialize(*this, T); + initialize(*this, T, StandardNames); } TargetLibraryInfo::TargetLibraryInfo(const TargetLibraryInfo &TLI) @@ -256,6 +278,17 @@ TargetLibraryInfo::TargetLibraryInfo(const TargetLibraryInfo &TLI) CustomNames = TLI.CustomNames; } +bool TargetLibraryInfo::getLibFunc(StringRef funcName, + LibFunc::Func &F) const { + const char **Start = &StandardNames[0]; + const char **End = &StandardNames[LibFunc::NumLibFuncs]; + const char **I = std::lower_bound(Start, End, funcName); + if (I != End && *I == funcName) { + F = (LibFunc::Func)(I - Start); + return true; + } + return false; +} /// disableAllFunctions - This disables all builtins, which is used for options /// like -fno-builtin. diff --git a/lib/Target/X86/AsmParser/X86AsmParser.cpp b/lib/Target/X86/AsmParser/X86AsmParser.cpp index 95e83ec..73a0095 100644 --- a/lib/Target/X86/AsmParser/X86AsmParser.cpp +++ b/lib/Target/X86/AsmParser/X86AsmParser.cpp @@ -39,7 +39,9 @@ private: MCAsmLexer &getLexer() const { return Parser.getLexer(); } bool Error(SMLoc L, const Twine &Msg, - ArrayRef<SMRange> Ranges = ArrayRef<SMRange>()) { + ArrayRef<SMRange> Ranges = ArrayRef<SMRange>(), + bool matchingInlineAsm = false) { + if (matchingInlineAsm) return true; return Parser.Error(L, Msg, Ranges); } @@ -65,6 +67,12 @@ private: SmallVectorImpl<MCParsedAsmOperand*> &Operands, MCStreamer &Out); + bool MatchInstruction(SMLoc IDLoc, + SmallVectorImpl<MCParsedAsmOperand*> &Operands, + SmallVectorImpl<MCInst> &MCInsts, + unsigned &OrigErrorInfo, + bool matchingInlineAsm = false); + /// isSrcOp - Returns true if operand is either (%rsi) or %ds:%(rsi) /// in 64bit mode or (%esi) or %es:(%esi) in 32bit mode. bool isSrcOp(X86Operand &Op); @@ -1508,9 +1516,24 @@ bool X86AsmParser:: MatchAndEmitInstruction(SMLoc IDLoc, SmallVectorImpl<MCParsedAsmOperand*> &Operands, MCStreamer &Out) { + SmallVector<MCInst, 2> Insts; + unsigned ErrorInfo; + bool Error = MatchInstruction(IDLoc, Operands, Insts, ErrorInfo); + if (!Error) + for (unsigned i = 0, e = Insts.size(); i != e; ++i) + Out.EmitInstruction(Insts[i]); + return Error; +} + +bool X86AsmParser:: +MatchInstruction(SMLoc IDLoc, + SmallVectorImpl<MCParsedAsmOperand*> &Operands, + SmallVectorImpl<MCInst> &MCInsts, unsigned &OrigErrorInfo, + bool matchingInlineAsm) { assert(!Operands.empty() && "Unexpect empty operand list!"); X86Operand *Op = static_cast<X86Operand*>(Operands[0]); assert(Op->isToken() && "Leading operand should always be a mnemonic!"); + ArrayRef<SMRange> EmptyRanges = ArrayRef<SMRange>(); // First, handle aliases that expand to multiple instructions. // FIXME: This should be replaced with a real .td file alias mechanism. @@ -1523,7 +1546,7 @@ MatchAndEmitInstruction(SMLoc IDLoc, MCInst Inst; Inst.setOpcode(X86::WAIT); Inst.setLoc(IDLoc); - Out.EmitInstruction(Inst); + MCInsts.push_back(Inst); const char *Repl = StringSwitch<const char*>(Op->getToken()) @@ -1542,7 +1565,6 @@ MatchAndEmitInstruction(SMLoc IDLoc, } bool WasOriginallyInvalidOperand = false; - unsigned OrigErrorInfo; MCInst Inst; // First, try a direct match. @@ -1557,13 +1579,15 @@ MatchAndEmitInstruction(SMLoc IDLoc, ; Inst.setLoc(IDLoc); - Out.EmitInstruction(Inst); + MCInsts.push_back(Inst); return false; case Match_MissingFeature: - Error(IDLoc, "instruction requires a CPU feature not currently enabled"); + Error(IDLoc, "instruction requires a CPU feature not currently enabled", + EmptyRanges, matchingInlineAsm); return true; case Match_ConversionFail: - return Error(IDLoc, "unable to convert operands to instruction"); + return Error(IDLoc, "unable to convert operands to instruction", + EmptyRanges, matchingInlineAsm); case Match_InvalidOperand: WasOriginallyInvalidOperand = true; break; @@ -1615,7 +1639,7 @@ MatchAndEmitInstruction(SMLoc IDLoc, (Match3 == Match_Success) + (Match4 == Match_Success); if (NumSuccessfulMatches == 1) { Inst.setLoc(IDLoc); - Out.EmitInstruction(Inst); + MCInsts.push_back(Inst); return false; } @@ -1642,7 +1666,7 @@ MatchAndEmitInstruction(SMLoc IDLoc, OS << "'" << Base << MatchChars[i] << "'"; } OS << ")"; - Error(IDLoc, OS.str()); + Error(IDLoc, OS.str(), EmptyRanges, matchingInlineAsm); return true; } @@ -1654,30 +1678,33 @@ MatchAndEmitInstruction(SMLoc IDLoc, (Match3 == Match_MnemonicFail) && (Match4 == Match_MnemonicFail)) { if (!WasOriginallyInvalidOperand) { return Error(IDLoc, "invalid instruction mnemonic '" + Base + "'", - Op->getLocRange()); + Op->getLocRange(), matchingInlineAsm); } // Recover location info for the operand if we know which was the problem. if (OrigErrorInfo != ~0U) { if (OrigErrorInfo >= Operands.size()) - return Error(IDLoc, "too few operands for instruction"); + return Error(IDLoc, "too few operands for instruction", + EmptyRanges, matchingInlineAsm); X86Operand *Operand = (X86Operand*)Operands[OrigErrorInfo]; if (Operand->getStartLoc().isValid()) { SMRange OperandRange = Operand->getLocRange(); return Error(Operand->getStartLoc(), "invalid operand for instruction", - OperandRange); + OperandRange, matchingInlineAsm); } } - return Error(IDLoc, "invalid operand for instruction"); + return Error(IDLoc, "invalid operand for instruction", EmptyRanges, + matchingInlineAsm); } // If one instruction matched with a missing feature, report this as a // missing feature. if ((Match1 == Match_MissingFeature) + (Match2 == Match_MissingFeature) + (Match3 == Match_MissingFeature) + (Match4 == Match_MissingFeature) == 1){ - Error(IDLoc, "instruction requires a CPU feature not currently enabled"); + Error(IDLoc, "instruction requires a CPU feature not currently enabled", + EmptyRanges, matchingInlineAsm); return true; } @@ -1685,12 +1712,14 @@ MatchAndEmitInstruction(SMLoc IDLoc, // operand failure. if ((Match1 == Match_InvalidOperand) + (Match2 == Match_InvalidOperand) + (Match3 == Match_InvalidOperand) + (Match4 == Match_InvalidOperand) == 1){ - Error(IDLoc, "invalid operand for instruction"); + Error(IDLoc, "invalid operand for instruction", EmptyRanges, + matchingInlineAsm); return true; } // If all of these were an outright failure, report it in a useless way. - Error(IDLoc, "unknown use of instruction mnemonic without a size suffix"); + Error(IDLoc, "unknown use of instruction mnemonic without a size suffix", + EmptyRanges, matchingInlineAsm); return true; } diff --git a/lib/Target/X86/Disassembler/X86Disassembler.cpp b/lib/Target/X86/Disassembler/X86Disassembler.cpp index 4bbfe95..5039887 100644 --- a/lib/Target/X86/Disassembler/X86Disassembler.cpp +++ b/lib/Target/X86/Disassembler/X86Disassembler.cpp @@ -327,7 +327,7 @@ static void translateImmediate(MCInst &mcInst, uint64_t immediate, if (type == TYPE_RELv) { isBranch = true; pcrel = insn.startLocation + - insn.displacementOffset + insn.displacementSize; + insn.immediateOffset + insn.immediateSize; switch (insn.displacementSize) { default: break; @@ -762,8 +762,7 @@ static bool translateOperand(MCInst &mcInst, const OperandSpecifier &operand, translateRegister(mcInst, insn.vvvv); return false; case ENCODING_DUP: - return translateOperand(mcInst, - insn.spec->operands[operand.type - TYPE_DUP0], + return translateOperand(mcInst, insn.operands[operand.type - TYPE_DUP0], insn, Dis); } } @@ -789,8 +788,8 @@ static bool translateInstruction(MCInst &mcInst, insn.numImmediatesTranslated = 0; for (index = 0; index < X86_MAX_OPERANDS; ++index) { - if (insn.spec->operands[index].encoding != ENCODING_NONE) { - if (translateOperand(mcInst, insn.spec->operands[index], insn, Dis)) { + if (insn.operands[index].encoding != ENCODING_NONE) { + if (translateOperand(mcInst, insn.operands[index], insn, Dis)) { return true; } } diff --git a/lib/Target/X86/Disassembler/X86Disassembler.h b/lib/Target/X86/Disassembler/X86Disassembler.h index c11f51c..0dbfa26 100644 --- a/lib/Target/X86/Disassembler/X86Disassembler.h +++ b/lib/Target/X86/Disassembler/X86Disassembler.h @@ -20,7 +20,7 @@ // 2. Read the opcode, and determine what kind of opcode it is. The // disassembler distinguishes four kinds of opcodes, which are enumerated in // OpcodeType (X86DisassemblerDecoderCommon.h): one-byte (0xnn), two-byte -// (0x0f 0xnn), three-byte-38 (0x0f 0x38 0xnn), or three-byte-3a +// (0x0f 0xnn), three-byte-38 (0x0f 0x38 0xnn), or three-byte-3a // (0x0f 0x3a 0xnn). Mandatory prefixes are treated as part of the context. // // 3. Depending on the opcode type, look in one of four ClassDecision structures @@ -74,8 +74,8 @@ #ifndef X86DISASSEMBLER_H #define X86DISASSEMBLER_H -#define INSTRUCTION_SPECIFIER_FIELDS \ - const char* name; +#define INSTRUCTION_SPECIFIER_FIELDS \ + uint16_t operands; #define INSTRUCTION_IDS \ unsigned instructionIDs; @@ -88,7 +88,7 @@ #include "llvm/MC/MCDisassembler.h" namespace llvm { - + class MCInst; class MCInstrInfo; class MCSubtargetInfo; @@ -96,7 +96,7 @@ class MemoryObject; class raw_ostream; struct EDInstInfo; - + namespace X86Disassembler { /// X86GenericDisassembler - Generic disassembler for all X86 platforms. diff --git a/lib/Target/X86/Disassembler/X86DisassemblerDecoder.c b/lib/Target/X86/Disassembler/X86DisassemblerDecoder.c index 6020877..0c92912 100644 --- a/lib/Target/X86/Disassembler/X86DisassemblerDecoder.c +++ b/lib/Target/X86/Disassembler/X86DisassemblerDecoder.c @@ -1495,14 +1495,14 @@ static int readOperands(struct InternalInstruction* insn) { needVVVV = hasVVVV && (insn->vvvv != 0); for (index = 0; index < X86_MAX_OPERANDS; ++index) { - switch (insn->spec->operands[index].encoding) { + switch (x86OperandSets[insn->spec->operands][index].encoding) { case ENCODING_NONE: break; case ENCODING_REG: case ENCODING_RM: if (readModRM(insn)) return -1; - if (fixupReg(insn, &insn->spec->operands[index])) + if (fixupReg(insn, &x86OperandSets[insn->spec->operands][index])) return -1; break; case ENCODING_CB: @@ -1524,14 +1524,14 @@ static int readOperands(struct InternalInstruction* insn) { } if (readImmediate(insn, 1)) return -1; - if (insn->spec->operands[index].type == TYPE_IMM3 && + if (x86OperandSets[insn->spec->operands][index].type == TYPE_IMM3 && insn->immediates[insn->numImmediatesConsumed - 1] > 7) return -1; - if (insn->spec->operands[index].type == TYPE_IMM5 && + if (x86OperandSets[insn->spec->operands][index].type == TYPE_IMM5 && insn->immediates[insn->numImmediatesConsumed - 1] > 31) return -1; - if (insn->spec->operands[index].type == TYPE_XMM128 || - insn->spec->operands[index].type == TYPE_XMM256) + if (x86OperandSets[insn->spec->operands][index].type == TYPE_XMM128 || + x86OperandSets[insn->spec->operands][index].type == TYPE_XMM256) sawRegImm = 1; break; case ENCODING_IW: @@ -1582,7 +1582,7 @@ static int readOperands(struct InternalInstruction* insn) { needVVVV = 0; /* Mark that we have found a VVVV operand. */ if (!hasVVVV) return -1; - if (fixupReg(insn, &insn->spec->operands[index])) + if (fixupReg(insn, &x86OperandSets[insn->spec->operands][index])) return -1; break; case ENCODING_DUP: @@ -1644,6 +1644,8 @@ int decodeInstruction(struct InternalInstruction* insn, insn->instructionID == 0 || readOperands(insn)) return -1; + + insn->operands = &x86OperandSets[insn->spec->operands][0]; insn->length = insn->readerCursor - insn->startLocation; diff --git a/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h b/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h index e2caf6a..797703f 100644 --- a/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h +++ b/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h @@ -19,17 +19,18 @@ #ifdef __cplusplus extern "C" { #endif - -#define INSTRUCTION_SPECIFIER_FIELDS + +#define INSTRUCTION_SPECIFIER_FIELDS \ + uint16_t operands; #define INSTRUCTION_IDS \ unsigned instructionIDs; #include "X86DisassemblerDecoderCommon.h" - + #undef INSTRUCTION_SPECIFIER_FIELDS #undef INSTRUCTION_IDS - + /* * Accessor functions for various fields of an Intel instruction */ @@ -43,7 +44,7 @@ extern "C" { #define rFromREX(rex) (((rex) & 0x4) >> 2) #define xFromREX(rex) (((rex) & 0x2) >> 1) #define bFromREX(rex) ((rex) & 0x1) - + #define rFromVEX2of3(vex) (((~(vex)) & 0x80) >> 7) #define xFromVEX2of3(vex) (((~(vex)) & 0x40) >> 6) #define bFromVEX2of3(vex) (((~(vex)) & 0x20) >> 5) @@ -237,7 +238,7 @@ extern "C" { ENTRY(YMM13) \ ENTRY(YMM14) \ ENTRY(YMM15) - + #define REGS_SEGMENT \ ENTRY(ES) \ ENTRY(CS) \ @@ -245,7 +246,7 @@ extern "C" { ENTRY(DS) \ ENTRY(FS) \ ENTRY(GS) - + #define REGS_DEBUG \ ENTRY(DR0) \ ENTRY(DR1) \ @@ -266,12 +267,12 @@ extern "C" { ENTRY(CR6) \ ENTRY(CR7) \ ENTRY(CR8) - + #define ALL_EA_BASES \ EA_BASES_16BIT \ EA_BASES_32BIT \ EA_BASES_64BIT - + #define ALL_SIB_BASES \ REGS_32BIT \ REGS_64BIT @@ -290,7 +291,7 @@ extern "C" { ENTRY(RIP) /* - * EABase - All possible values of the base field for effective-address + * EABase - All possible values of the base field for effective-address * computations, a.k.a. the Mod and R/M fields of the ModR/M byte. We * distinguish between bases (EA_BASE_*) and registers that just happen to be * referred to when Mod == 0b11 (EA_REG_*). @@ -305,8 +306,8 @@ typedef enum { #undef ENTRY EA_max } EABase; - -/* + +/* * SIBIndex - All possible values of the SIB index field. * Borrows entries from ALL_EA_BASES with the special case that * sib is synonymous with NONE. @@ -321,7 +322,7 @@ typedef enum { #undef ENTRY SIB_INDEX_max } SIBIndex; - + /* * SIBBase - All possible values of the SIB base field. */ @@ -353,7 +354,7 @@ typedef enum { #undef ENTRY MODRM_REG_max } Reg; - + /* * SegmentOverride - All possible segment overrides. */ @@ -367,7 +368,7 @@ typedef enum { SEG_OVERRIDE_GS, SEG_OVERRIDE_max } SegmentOverride; - + /* * VEXLeadingOpcodeByte - Possible values for the VEX.m-mmmm field */ @@ -431,16 +432,16 @@ struct InternalInstruction { void* dlogArg; /* General instruction information */ - + /* The mode to disassemble for (64-bit, protected, real) */ DisassemblerMode mode; /* The start of the instruction, usable with the reader */ uint64_t startLocation; /* The length of the instruction, in bytes */ size_t length; - + /* Prefix state */ - + /* 1 if the prefix byte corresponding to the entry is present; 0 if not */ uint8_t prefixPresent[0x100]; /* contains the location (for use with the reader) of the prefix byte */ @@ -456,7 +457,7 @@ struct InternalInstruction { uint64_t necessaryPrefixLocation; /* The segment override type */ SegmentOverride segmentOverride; - + /* Sizes of various critical pieces of data, in bytes */ uint8_t registerSize; uint8_t addressSize; @@ -467,9 +468,9 @@ struct InternalInstruction { needed to find relocation entries for adding symbolic operands */ uint8_t displacementOffset; uint8_t immediateOffset; - + /* opcode state */ - + /* The value of the two-byte escape prefix (usually 0x0f) */ uint8_t twoByteEscape; /* The value of the three-byte escape prefix (usually 0x38 or 0x3a) */ @@ -478,16 +479,16 @@ struct InternalInstruction { uint8_t opcode; /* The ModR/M byte of the instruction, if it is an opcode extension */ uint8_t modRMExtension; - + /* decode state */ - + /* The type of opcode, used for indexing into the array of decode tables */ OpcodeType opcodeType; /* The instruction ID, extracted from the decode table */ uint16_t instructionID; /* The specifier for the instruction, from the instruction info table */ const struct InstructionSpecifier *spec; - + /* state for additional bytes, consumed during operand decode. Pattern: consumed___ indicates that the byte was already consumed and does not need to be consumed again */ @@ -495,12 +496,12 @@ struct InternalInstruction { /* The VEX.vvvv field, which contains a third register operand for some AVX instructions */ Reg vvvv; - + /* The ModR/M byte, which contains most register operands and some portion of all memory operands */ BOOL consumedModRM; uint8_t modRM; - + /* The SIB byte, used for more complex 32- or 64-bit memory operands */ BOOL consumedSIB; uint8_t sib; @@ -508,19 +509,19 @@ struct InternalInstruction { /* The displacement, used for memory operands */ BOOL consumedDisplacement; int32_t displacement; - + /* Immediates. There can be two in some cases */ uint8_t numImmediatesConsumed; uint8_t numImmediatesTranslated; uint64_t immediates[2]; - + /* A register or immediate operand encoded into the opcode */ BOOL consumedOpcodeModifier; uint8_t opcodeModifier; Reg opcodeRegister; - + /* Portions of the ModR/M byte */ - + /* These fields determine the allowable values for the ModR/M fields, which depend on operand and address widths */ EABase eaBaseBase; @@ -533,11 +534,13 @@ struct InternalInstruction { EADisplacement eaDisplacement; /* The reg field always encodes a register */ Reg reg; - + /* SIB state */ SIBIndex sibIndex; uint8_t sibScale; SIBBase sibBase; + + const struct OperandSpecifier *operands; }; /* decodeInstruction - Decode one instruction and store the decoding results in @@ -571,15 +574,15 @@ int decodeInstruction(struct InternalInstruction* insn, * @param line - The line number that printed the debug message. * @param s - The message to print. */ - + void x86DisassemblerDebug(const char *file, unsigned line, const char *s); const char *x86DisassemblerGetInstrName(unsigned Opcode, void *mii); -#ifdef __cplusplus +#ifdef __cplusplus } #endif - + #endif diff --git a/lib/Target/X86/Disassembler/X86DisassemblerDecoderCommon.h b/lib/Target/X86/Disassembler/X86DisassemblerDecoderCommon.h index 13e1136..b0a0e1e 100644 --- a/lib/Target/X86/Disassembler/X86DisassemblerDecoderCommon.h +++ b/lib/Target/X86/Disassembler/X86DisassemblerDecoderCommon.h @@ -119,7 +119,7 @@ enum attributeBits { ENUM_ENTRY(IC_VEX_L_W_OPSIZE, 5, "requires VEX, L, W and OpSize") -#define ENUM_ENTRY(n, r, d) n, +#define ENUM_ENTRY(n, r, d) n, typedef enum { INSTRUCTION_CONTEXTS IC_max @@ -148,11 +148,11 @@ typedef enum { * If a ModR/M byte is not required, "required" is left unset, and the values * for each instructionID are identical. */ - + typedef uint16_t InstrUID; /* - * ModRMDecisionType - describes the type of ModR/M decision, allowing the + * ModRMDecisionType - describes the type of ModR/M decision, allowing the * consumer to determine the number of entries in it. * * MODRM_ONEENTRY - No matter what the value of the ModR/M byte is, the decoded @@ -172,7 +172,7 @@ typedef uint16_t InstrUID; ENUM_ENTRY(MODRM_SPLITREG) \ ENUM_ENTRY(MODRM_FULL) -#define ENUM_ENTRY(n) n, +#define ENUM_ENTRY(n) n, typedef enum { MODRMTYPES MODRM_max @@ -180,13 +180,13 @@ typedef enum { #undef ENUM_ENTRY /* - * ModRMDecision - Specifies whether a ModR/M byte is needed and (if so) which + * ModRMDecision - Specifies whether a ModR/M byte is needed and (if so) which * instruction each possible value of the ModR/M byte corresponds to. Once * this information is known, we have narrowed down to a single instruction. */ struct ModRMDecision { uint8_t modrm_type; - + /* The macro below must be defined wherever this file is included. */ INSTRUCTION_IDS }; @@ -210,7 +210,7 @@ struct ContextDecision { struct OpcodeDecision opcodeDecisions[IC_max]; }; -/* +/* * Physical encodings of instruction operands. */ @@ -244,14 +244,14 @@ struct ContextDecision { ENUM_ENTRY(ENCODING_DUP, "Duplicate of another operand; ID is encoded " \ "in type") -#define ENUM_ENTRY(n, d) n, +#define ENUM_ENTRY(n, d) n, typedef enum { ENCODINGS ENCODING_max } OperandEncoding; #undef ENUM_ENTRY -/* +/* * Semantic interpretations of instruction operands. */ @@ -332,14 +332,14 @@ struct ContextDecision { ENUM_ENTRY(TYPE_DUP4, "operand 4") \ ENUM_ENTRY(TYPE_M512, "512-bit FPU/MMX/XMM/MXCSR state") -#define ENUM_ENTRY(n, d) n, +#define ENUM_ENTRY(n, d) n, typedef enum { TYPES TYPE_max } OperandType; #undef ENUM_ENTRY -/* +/* * OperandSpecifier - The specification for how to extract and interpret one * operand. */ @@ -374,8 +374,7 @@ typedef enum { struct InstructionSpecifier { uint8_t modifierType; uint8_t modifierBase; - struct OperandSpecifier operands[X86_MAX_OPERANDS]; - + /* The macro below must be defined wherever this file is included. */ INSTRUCTION_SPECIFIER_FIELDS }; diff --git a/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp b/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp index 49c07f3..b0acd7d 100644 --- a/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp +++ b/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp @@ -91,9 +91,10 @@ X86ELFMCAsmInfo::X86ELFMCAsmInfo(const Triple &T) { // Exceptions handling ExceptionsType = ExceptionHandling::DwarfCFI; - // OpenBSD has buggy support for .quad in 32-bit mode, just split into two - // .words. - if (T.getOS() == Triple::OpenBSD && T.getArch() == Triple::x86) + // OpenBSD and Bitrig have buggy support for .quad in 32-bit mode, just split + // into two .words. + if ((T.getOS() == Triple::OpenBSD || T.getOS() == Triple::Bitrig) && + T.getArch() == Triple::x86) Data64bitsDirective = 0; } diff --git a/lib/Target/X86/X86.h b/lib/Target/X86/X86.h index bf05ccf..dce5b4d 100644 --- a/lib/Target/X86/X86.h +++ b/lib/Target/X86/X86.h @@ -26,7 +26,7 @@ class FunctionPass; class JITCodeEmitter; class X86TargetMachine; -/// createX86ISelDag - This pass converts a legalized DAG into a +/// createX86ISelDag - This pass converts a legalized DAG into a /// X86-specific DAG, ready for instruction scheduling. /// FunctionPass *createX86ISelDag(X86TargetMachine &TM, diff --git a/lib/Target/X86/X86.td b/lib/Target/X86/X86.td index 6c1a816..18e6b7c 100644 --- a/lib/Target/X86/X86.td +++ b/lib/Target/X86/X86.td @@ -17,14 +17,14 @@ include "llvm/Target/Target.td" //===----------------------------------------------------------------------===// -// X86 Subtarget state. +// X86 Subtarget state // def Mode64Bit : SubtargetFeature<"64bit-mode", "In64BitMode", "true", "64-bit mode (x86_64)">; //===----------------------------------------------------------------------===// -// X86 Subtarget features. +// X86 Subtarget features //===----------------------------------------------------------------------===// def FeatureCMOV : SubtargetFeature<"cmov","HasCMov", "true", @@ -97,7 +97,7 @@ def FeatureFMA4 : SubtargetFeature<"fma4", "HasFMA4", "true", [FeatureAVX, FeatureSSE4A]>; def FeatureXOP : SubtargetFeature<"xop", "HasXOP", "true", "Enable XOP instructions", - [FeatureAVX, FeatureSSE4A]>; + [FeatureFMA4]>; def FeatureVectorUAMem : SubtargetFeature<"vector-unaligned-mem", "HasVectorUAMem", "true", "Allow unaligned memory operands on vector/SIMD instructions">; @@ -226,7 +226,7 @@ def : Proc<"bdver1", [FeatureXOP, FeatureFMA4, FeatureCMPXCHG16B, def : Proc<"bdver2", [FeatureXOP, FeatureFMA4, FeatureCMPXCHG16B, FeatureAES, FeaturePCLMUL, FeatureF16C, FeatureLZCNT, - FeaturePOPCNT, FeatureBMI]>; + FeaturePOPCNT, FeatureBMI, FeatureFMA]>; def : Proc<"winchip-c6", [FeatureMMX]>; def : Proc<"winchip2", [Feature3DNow]>; diff --git a/lib/Target/X86/X86AsmPrinter.h b/lib/Target/X86/X86AsmPrinter.h index a6ed9ba..35386cd 100644 --- a/lib/Target/X86/X86AsmPrinter.h +++ b/lib/Target/X86/X86AsmPrinter.h @@ -37,15 +37,15 @@ class LLVM_LIBRARY_VISIBILITY X86AsmPrinter : public AsmPrinter { virtual const char *getPassName() const { return "X86 AT&T-Style Assembly Printer"; } - + const X86Subtarget &getSubtarget() const { return *Subtarget; } virtual void EmitStartOfAsmFile(Module &M); virtual void EmitEndOfAsmFile(Module &M); - + virtual void EmitInstruction(const MachineInstr *MI); - + void printSymbolOperand(const MachineOperand &MO, raw_ostream &O); // These methods are used by the tablegen'erated instruction printer. @@ -71,7 +71,7 @@ class LLVM_LIBRARY_VISIBILITY X86AsmPrinter : public AsmPrinter { void printPICLabel(const MachineInstr *MI, unsigned Op, raw_ostream &O); bool runOnMachineFunction(MachineFunction &F); - + void PrintDebugValueComment(const MachineInstr *MI, raw_ostream &OS); MachineLocation getDebugValueLocation(const MachineInstr *MI) const; diff --git a/lib/Target/X86/X86COFFMachineModuleInfo.cpp b/lib/Target/X86/X86COFFMachineModuleInfo.cpp index e01ff41..6a6125b 100644 --- a/lib/Target/X86/X86COFFMachineModuleInfo.cpp +++ b/lib/Target/X86/X86COFFMachineModuleInfo.cpp @@ -17,4 +17,3 @@ using namespace llvm; X86COFFMachineModuleInfo::~X86COFFMachineModuleInfo() { } - diff --git a/lib/Target/X86/X86COFFMachineModuleInfo.h b/lib/Target/X86/X86COFFMachineModuleInfo.h index 0cec95a..471eb31 100644 --- a/lib/Target/X86/X86COFFMachineModuleInfo.h +++ b/lib/Target/X86/X86COFFMachineModuleInfo.h @@ -1,4 +1,4 @@ -//===-- X86COFFMachineModuleInfo.h - X86 COFF MMI Impl ----------*- C++ -*-===// +//===-- X86coffmachinemoduleinfo.h - X86 COFF MMI Impl ----------*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -33,7 +33,7 @@ public: void addExternalFunction(MCSymbol* Symbol) { Externals.insert(Symbol); } - + typedef DenseSet<MCSymbol const *>::const_iterator externals_iterator; externals_iterator externals_begin() const { return Externals.begin(); } externals_iterator externals_end() const { return Externals.end(); } diff --git a/lib/Target/X86/X86FastISel.cpp b/lib/Target/X86/X86FastISel.cpp index 585b7a5..e5952aa 100644 --- a/lib/Target/X86/X86FastISel.cpp +++ b/lib/Target/X86/X86FastISel.cpp @@ -57,7 +57,9 @@ class X86FastISel : public FastISel { bool X86ScalarSSEf32; public: - explicit X86FastISel(FunctionLoweringInfo &funcInfo) : FastISel(funcInfo) { + explicit X86FastISel(FunctionLoweringInfo &funcInfo, + const TargetLibraryInfo *libInfo) + : FastISel(funcInfo, libInfo) { Subtarget = &TM.getSubtarget<X86Subtarget>(); StackPtr = Subtarget->is64Bit() ? X86::RSP : X86::ESP; X86ScalarSSEf64 = Subtarget->hasSSE2(); @@ -155,9 +157,9 @@ bool X86FastISel::isTypeLegal(Type *Ty, MVT &VT, bool AllowI1) { // For now, require SSE/SSE2 for performing floating-point operations, // since x87 requires additional work. if (VT == MVT::f64 && !X86ScalarSSEf64) - return false; + return false; if (VT == MVT::f32 && !X86ScalarSSEf32) - return false; + return false; // Similarly, no f80 support yet. if (VT == MVT::f80) return false; @@ -1516,6 +1518,22 @@ bool X86FastISel::X86SelectCall(const Instruction *I) { return DoSelectCall(I, 0); } +static unsigned computeBytesPoppedByCallee(const X86Subtarget &Subtarget, + const ImmutableCallSite &CS) { + if (Subtarget.is64Bit()) + return 0; + if (Subtarget.isTargetWindows()) + return 0; + CallingConv::ID CC = CS.getCallingConv(); + if (CC == CallingConv::Fast || CC == CallingConv::GHC) + return 0; + if (!CS.paramHasAttr(1, Attribute::StructRet)) + return 0; + if (CS.paramHasAttr(1, Attribute::InReg)) + return 0; + return 4; +} + // Select either a call, or an llvm.memcpy/memmove/memset intrinsic bool X86FastISel::DoSelectCall(const Instruction *I, const char *MemIntName) { const CallInst *CI = cast<CallInst>(I); @@ -1862,12 +1880,7 @@ bool X86FastISel::DoSelectCall(const Instruction *I, const char *MemIntName) { // Issue CALLSEQ_END unsigned AdjStackUp = TII.getCallFrameDestroyOpcode(); - unsigned NumBytesCallee = 0; - if (!Subtarget->is64Bit() && !Subtarget->isTargetWindows() && - !(CS.getCallingConv() == CallingConv::Fast || - CS.getCallingConv() == CallingConv::GHC) && - CS.paramHasAttr(1, Attribute::StructRet)) - NumBytesCallee = 4; + const unsigned NumBytesCallee = computeBytesPoppedByCallee(*Subtarget, CS); BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(AdjStackUp)) .addImm(NumBytes).addImm(NumBytesCallee); @@ -2129,28 +2142,28 @@ unsigned X86FastISel::TargetMaterializeFloatZero(const ConstantFP *CF) { unsigned Opc = 0; const TargetRegisterClass *RC = NULL; switch (VT.SimpleTy) { - default: return false; - case MVT::f32: - if (X86ScalarSSEf32) { - Opc = X86::FsFLD0SS; - RC = &X86::FR32RegClass; - } else { - Opc = X86::LD_Fp032; - RC = &X86::RFP32RegClass; - } - break; - case MVT::f64: - if (X86ScalarSSEf64) { - Opc = X86::FsFLD0SD; - RC = &X86::FR64RegClass; - } else { - Opc = X86::LD_Fp064; - RC = &X86::RFP64RegClass; - } - break; - case MVT::f80: - // No f80 support yet. - return false; + default: return false; + case MVT::f32: + if (X86ScalarSSEf32) { + Opc = X86::FsFLD0SS; + RC = &X86::FR32RegClass; + } else { + Opc = X86::LD_Fp032; + RC = &X86::RFP32RegClass; + } + break; + case MVT::f64: + if (X86ScalarSSEf64) { + Opc = X86::FsFLD0SD; + RC = &X86::FR64RegClass; + } else { + Opc = X86::LD_Fp064; + RC = &X86::RFP64RegClass; + } + break; + case MVT::f80: + // No f80 support yet. + return false; } unsigned ResultReg = createResultReg(RC); @@ -2169,7 +2182,7 @@ bool X86FastISel::TryToFoldLoad(MachineInstr *MI, unsigned OpNo, if (!X86SelectAddress(LI->getOperand(0), AM)) return false; - X86InstrInfo &XII = (X86InstrInfo&)TII; + const X86InstrInfo &XII = (const X86InstrInfo&)TII; unsigned Size = TD.getTypeAllocSize(LI->getType()); unsigned Alignment = LI->getAlignment(); @@ -2188,7 +2201,8 @@ bool X86FastISel::TryToFoldLoad(MachineInstr *MI, unsigned OpNo, namespace llvm { - FastISel *X86::createFastISel(FunctionLoweringInfo &funcInfo) { - return new X86FastISel(funcInfo); + FastISel *X86::createFastISel(FunctionLoweringInfo &funcInfo, + const TargetLibraryInfo *libInfo) { + return new X86FastISel(funcInfo, libInfo); } } diff --git a/lib/Target/X86/X86FloatingPoint.cpp b/lib/Target/X86/X86FloatingPoint.cpp index 711ee41..955c75a 100644 --- a/lib/Target/X86/X86FloatingPoint.cpp +++ b/lib/Target/X86/X86FloatingPoint.cpp @@ -971,7 +971,7 @@ void FPS::handleZeroArgFP(MachineBasicBlock::iterator &I) { // Change from the pseudo instruction to the concrete instruction. MI->RemoveOperand(0); // Remove the explicit ST(0) operand MI->setDesc(TII->get(getConcreteOpcode(MI->getOpcode()))); - + // Result gets pushed on the stack. pushReg(DestReg); } @@ -1015,7 +1015,7 @@ void FPS::handleOneArgFP(MachineBasicBlock::iterator &I) { } else { moveToTop(Reg, I); // Move to the top of the stack... } - + // Convert from the pseudo instruction to the concrete instruction. MI->RemoveOperand(NumOps-1); // Remove explicit ST(0) operand MI->setDesc(TII->get(getConcreteOpcode(MI->getOpcode()))); @@ -1297,7 +1297,7 @@ void FPS::handleCondMovFP(MachineBasicBlock::iterator &I) { MI->RemoveOperand(1); MI->getOperand(0).setReg(getSTReg(Op1)); MI->setDesc(TII->get(getConcreteOpcode(MI->getOpcode()))); - + // If we kill the second operand, make sure to pop it from the stack. if (Op0 != Op1 && KillsOp1) { // Get this value off of the register stack. @@ -1714,38 +1714,38 @@ void FPS::handleSpecialFP(MachineBasicBlock::iterator &I) { // Assert that the top of stack contains the right FP register. assert(StackTop == 1 && FirstFPRegOp == getStackEntry(0) && "Top of stack not the right register for RET!"); - + // Ok, everything is good, mark the value as not being on the stack // anymore so that our assertion about the stack being empty at end of // block doesn't fire. StackTop = 0; return; } - + // Otherwise, we are returning two values: // 2) If returning the same value for both, we only have one thing in the FP // stack. Consider: RET FP1, FP1 if (StackTop == 1) { assert(FirstFPRegOp == SecondFPRegOp && FirstFPRegOp == getStackEntry(0)&& "Stack misconfiguration for RET!"); - + // Duplicate the TOS so that we return it twice. Just pick some other FPx // register to hold it. unsigned NewReg = getScratchReg(); duplicateToTop(FirstFPRegOp, NewReg, MI); FirstFPRegOp = NewReg; } - + /// Okay we know we have two different FPx operands now: assert(StackTop == 2 && "Must have two values live!"); - + /// 3) If SecondFPRegOp is currently in ST(0) and FirstFPRegOp is currently /// in ST(1). In this case, emit an fxch. if (getStackEntry(0) == SecondFPRegOp) { assert(getStackEntry(1) == FirstFPRegOp && "Unknown regs live"); moveToTop(FirstFPRegOp, MI); } - + /// 4) Finally, FirstFPRegOp must be in ST(0) and SecondFPRegOp must be in /// ST(1). Just remove both from our understanding of the stack and return. assert(getStackEntry(0) == FirstFPRegOp && "Unknown regs live"); diff --git a/lib/Target/X86/X86ISelDAGToDAG.cpp b/lib/Target/X86/X86ISelDAGToDAG.cpp index 5186482..27195b4 100644 --- a/lib/Target/X86/X86ISelDAGToDAG.cpp +++ b/lib/Target/X86/X86ISelDAGToDAG.cpp @@ -60,7 +60,7 @@ namespace { int Base_FrameIndex; unsigned Scale; - SDValue IndexReg; + SDValue IndexReg; int32_t Disp; SDValue Segment; const GlobalValue *GV; @@ -80,11 +80,11 @@ namespace { bool hasSymbolicDisplacement() const { return GV != 0 || CP != 0 || ES != 0 || JT != -1 || BlockAddr != 0; } - + bool hasBaseOrIndexReg() const { return IndexReg.getNode() != 0 || Base_Reg.getNode() != 0; } - + /// isRIPRelative - Return true if this addressing mode is already RIP /// relative. bool isRIPRelative() const { @@ -94,7 +94,7 @@ namespace { return RegNode->getReg() == X86::RIP; return false; } - + void setBaseReg(SDValue Reg) { BaseType = RegBase; Base_Reg = Reg; @@ -104,7 +104,7 @@ namespace { dbgs() << "X86ISelAddressMode " << this << '\n'; dbgs() << "Base_Reg "; if (Base_Reg.getNode() != 0) - Base_Reg.getNode()->dump(); + Base_Reg.getNode()->dump(); else dbgs() << "nul"; dbgs() << " Base.FrameIndex " << Base_FrameIndex << '\n' @@ -113,7 +113,7 @@ namespace { if (IndexReg.getNode() != 0) IndexReg.getNode()->dump(); else - dbgs() << "nul"; + dbgs() << "nul"; dbgs() << " Disp " << Disp << '\n' << "GV "; if (GV) @@ -213,21 +213,21 @@ namespace { SDValue &Index, SDValue &Disp, SDValue &Segment, SDValue &NodeWithChain); - + bool TryFoldLoad(SDNode *P, SDValue N, SDValue &Base, SDValue &Scale, SDValue &Index, SDValue &Disp, SDValue &Segment); - + /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for /// inline asm expressions. virtual bool SelectInlineAsmMemoryOperand(const SDValue &Op, char ConstraintCode, std::vector<SDValue> &OutOps); - + void EmitSpecialCodeForMain(MachineBasicBlock *BB, MachineFrameInfo *MFI); - inline void getAddressOperands(X86ISelAddressMode &AM, SDValue &Base, + inline void getAddressOperands(X86ISelAddressMode &AM, SDValue &Base, SDValue &Scale, SDValue &Index, SDValue &Disp, SDValue &Segment) { Base = (AM.BaseType == X86ISelAddressMode::FrameIndexBase) ? @@ -426,7 +426,7 @@ static bool isCalleeLoad(SDValue Callee, SDValue &Chain, bool HasCallSeq) { void X86DAGToDAGISel::PreprocessISelDAG() { // OptForSize is used in pattern predicates that isel is matching. OptForSize = MF->getFunction()->hasFnAttr(Attribute::OptimizeForSize); - + for (SelectionDAG::allnodes_iterator I = CurDAG->allnodes_begin(), E = CurDAG->allnodes_end(); I != E; ) { SDNode *N = I++; // Preincrement iterator to avoid invalidation issues. @@ -462,7 +462,7 @@ void X86DAGToDAGISel::PreprocessISelDAG() { ++NumLoadMoved; continue; } - + // Lower fpround and fpextend nodes that target the FP stack to be store and // load to the stack. This is a gross hack. We would like to simply mark // these as being illegal, but when we do that, legalize produces these when @@ -473,7 +473,7 @@ void X86DAGToDAGISel::PreprocessISelDAG() { // FIXME: This should only happen when not compiled with -O0. if (N->getOpcode() != ISD::FP_ROUND && N->getOpcode() != ISD::FP_EXTEND) continue; - + EVT SrcVT = N->getOperand(0).getValueType(); EVT DstVT = N->getValueType(0); @@ -496,7 +496,7 @@ void X86DAGToDAGISel::PreprocessISelDAG() { if (N->getConstantOperandVal(1)) continue; } - + // Here we could have an FP stack truncation or an FPStack <-> SSE convert. // FPStack has extload and truncstore. SSE can fold direct loads into other // operations. Based on this, decide what we want to do. @@ -505,10 +505,10 @@ void X86DAGToDAGISel::PreprocessISelDAG() { MemVT = DstVT; // FP_ROUND must use DstVT, we can't do a 'trunc load'. else MemVT = SrcIsSSE ? SrcVT : DstVT; - + SDValue MemTmp = CurDAG->CreateStackTemporary(MemVT); DebugLoc dl = N->getDebugLoc(); - + // FIXME: optimize the case where the src/dest is a load or store? SDValue Store = CurDAG->getTruncStore(CurDAG->getEntryNode(), dl, N->getOperand(0), @@ -524,12 +524,12 @@ void X86DAGToDAGISel::PreprocessISelDAG() { // To avoid invalidating 'I', back it up to the convert node. --I; CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), Result); - + // Now that we did that, the node is dead. Increment the iterator to the // next node to process, then delete N. ++I; CurDAG->DeleteNode(N); - } + } } @@ -584,7 +584,7 @@ bool X86DAGToDAGISel::FoldOffsetIntoAddress(uint64_t Offset, bool X86DAGToDAGISel::MatchLoadInAddress(LoadSDNode *N, X86ISelAddressMode &AM){ SDValue Address = N->getOperand(1); - + // load gs:0 -> GS segment register. // load fs:0 -> FS segment register. // @@ -593,7 +593,7 @@ bool X86DAGToDAGISel::MatchLoadInAddress(LoadSDNode *N, X86ISelAddressMode &AM){ // For more information see http://people.redhat.com/drepper/tls.pdf if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Address)) if (C->getSExtValue() == 0 && AM.Segment.getNode() == 0 && - Subtarget->isTargetELF()) + Subtarget->isTargetLinux()) switch (N->getPointerInfo().getAddrSpace()) { case 256: AM.Segment = CurDAG->getRegister(X86::GS, MVT::i16); @@ -602,7 +602,7 @@ bool X86DAGToDAGISel::MatchLoadInAddress(LoadSDNode *N, X86ISelAddressMode &AM){ AM.Segment = CurDAG->getRegister(X86::FS, MVT::i16); return false; } - + return true; } @@ -992,7 +992,7 @@ bool X86DAGToDAGISel::MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM, case ISD::SHL: if (AM.IndexReg.getNode() != 0 || AM.Scale != 1) break; - + if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N.getNode()->getOperand(1))) { unsigned Val = CN->getZExtValue(); @@ -1167,7 +1167,7 @@ bool X86DAGToDAGISel::MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM, !MatchAddressRecursively(Handle.getValue().getOperand(1), AM, Depth+1)) return false; AM = Backup; - + // Try again after commuting the operands. if (!MatchAddressRecursively(Handle.getValue().getOperand(1), AM, Depth+1)&& !MatchAddressRecursively(Handle.getValue().getOperand(0), AM, Depth+1)) @@ -1203,7 +1203,7 @@ bool X86DAGToDAGISel::MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM, AM = Backup; } break; - + case ISD::AND: { // Perform some heroic transforms on an and of a constant-count shift // with a constant to enable use of the scaled offset field. @@ -1275,7 +1275,7 @@ bool X86DAGToDAGISel::SelectAddr(SDNode *Parent, SDValue N, SDValue &Base, SDValue &Scale, SDValue &Index, SDValue &Disp, SDValue &Segment) { X86ISelAddressMode AM; - + if (Parent && // This list of opcodes are all the nodes that have an "addr:$ptr" operand // that are not a MemSDNode, and thus don't have proper addrspace info. @@ -1290,7 +1290,7 @@ bool X86DAGToDAGISel::SelectAddr(SDNode *Parent, SDValue N, SDValue &Base, if (AddrSpace == 257) AM.Segment = CurDAG->getRegister(X86::FS, MVT::i16); } - + if (MatchAddress(N, AM)) return false; @@ -1336,7 +1336,7 @@ bool X86DAGToDAGISel::SelectScalarSSELoad(SDNode *Root, // elements. This is a vector shuffle from the zero vector. if (N.getOpcode() == X86ISD::VZEXT_MOVL && N.getNode()->hasOneUse() && // Check to see if the top elements are all zeros (or bitcast of zeros). - N.getOperand(0).getOpcode() == ISD::SCALAR_TO_VECTOR && + N.getOperand(0).getOpcode() == ISD::SCALAR_TO_VECTOR && N.getOperand(0).getNode()->hasOneUse() && ISD::isNON_EXTLoad(N.getOperand(0).getOperand(0).getNode()) && N.getOperand(0).getOperand(0).hasOneUse() && @@ -1411,7 +1411,7 @@ bool X86DAGToDAGISel::SelectLEAAddr(SDValue N, // If it isn't worth using an LEA, reject it. if (Complexity <= 2) return false; - + getAddressOperands(AM, Base, Scale, Index, Disp, Segment); return true; } @@ -1422,7 +1422,7 @@ bool X86DAGToDAGISel::SelectTLSADDRAddr(SDValue N, SDValue &Base, SDValue &Disp, SDValue &Segment) { assert(N.getOpcode() == ISD::TargetGlobalTLSAddress); const GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(N); - + X86ISelAddressMode AM; AM.GV = GA->getGlobal(); AM.Disp += GA->getOffset(); @@ -1435,7 +1435,7 @@ bool X86DAGToDAGISel::SelectTLSADDRAddr(SDValue N, SDValue &Base, } else { AM.IndexReg = CurDAG->getRegister(0, MVT::i64); } - + getAddressOperands(AM, Base, Scale, Index, Disp, Segment); return true; } @@ -1449,7 +1449,7 @@ bool X86DAGToDAGISel::TryFoldLoad(SDNode *P, SDValue N, !IsProfitableToFold(N, P, P) || !IsLegalToFold(N, P, P, OptLevel)) return false; - + return SelectAddr(N.getNode(), N.getOperand(1), Base, Scale, Index, Disp, Segment); } @@ -1700,7 +1700,7 @@ static const uint16_t AtomicOpcTbl[AtomicOpcEnd][AtomicSzEnd] = { SDNode *X86DAGToDAGISel::SelectAtomicLoadArith(SDNode *Node, EVT NVT) { if (Node->hasAnyUseOfValue(0)) return 0; - + // Optimize common patterns for __sync_or_and_fetch and similar arith // operations where the result is not used. This allows us to use the "lock" // version of the arithmetic instruction. @@ -1727,14 +1727,14 @@ SDNode *X86DAGToDAGISel::SelectAtomicLoadArith(SDNode *Node, EVT NVT) { default: return 0; } - + bool isCN = false; ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Val); if (CN && (int32_t)CN->getSExtValue() == CN->getSExtValue()) { isCN = true; Val = CurDAG->getTargetConstant(CN->getSExtValue(), NVT); } - + unsigned Opc = 0; switch (NVT.getSimpleVT().SimpleTy) { default: return 0; @@ -1772,7 +1772,7 @@ SDNode *X86DAGToDAGISel::SelectAtomicLoadArith(SDNode *Node, EVT NVT) { } break; } - + assert(Opc != 0 && "Invalid arith lock transform!"); DebugLoc dl = Node->getDebugLoc(); @@ -1852,7 +1852,7 @@ static bool HasNoSignedComparisonUses(SDNode *N) { /// isLoadIncOrDecStore - Check whether or not the chain ending in StoreNode /// is suitable for doing the {load; increment or decrement; store} to modify /// transformation. -static bool isLoadIncOrDecStore(StoreSDNode *StoreNode, unsigned Opc, +static bool isLoadIncOrDecStore(StoreSDNode *StoreNode, unsigned Opc, SDValue StoredVal, SelectionDAG *CurDAG, LoadSDNode* &LoadNode, SDValue &InputChain) { @@ -1876,15 +1876,15 @@ static bool isLoadIncOrDecStore(StoreSDNode *StoreNode, unsigned Opc, // Return LoadNode by reference. LoadNode = cast<LoadSDNode>(Load); // is the size of the value one that we can handle? (i.e. 64, 32, 16, or 8) - EVT LdVT = LoadNode->getMemoryVT(); - if (LdVT != MVT::i64 && LdVT != MVT::i32 && LdVT != MVT::i16 && + EVT LdVT = LoadNode->getMemoryVT(); + if (LdVT != MVT::i64 && LdVT != MVT::i32 && LdVT != MVT::i16 && LdVT != MVT::i8) return false; // Is store the only read of the loaded value? if (!Load.hasOneUse()) return false; - + // Is the address of the store the same as the load? if (LoadNode->getBasePtr() != StoreNode->getBasePtr() || LoadNode->getOffset() != StoreNode->getOffset()) @@ -1990,7 +1990,7 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) { unsigned Opc, MOpc; unsigned Opcode = Node->getOpcode(); DebugLoc dl = Node->getDebugLoc(); - + DEBUG(dbgs() << "Selecting: "; Node->dump(CurDAG); dbgs() << '\n'); if (Node->isMachineOpcode()) { @@ -2062,7 +2062,7 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) { case X86ISD::ATOMSWAP64_DAG: { unsigned Opc; switch (Opcode) { - default: llvm_unreachable("Impossible intrinsic"); + default: llvm_unreachable("Impossible opcode"); case X86ISD::ATOMOR64_DAG: Opc = X86::ATOMOR6432; break; case X86ISD::ATOMXOR64_DAG: Opc = X86::ATOMXOR6432; break; case X86ISD::ATOMADD64_DAG: Opc = X86::ATOMADD6432; break; @@ -2119,7 +2119,7 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) { if (Opcode != ISD::AND && ((Val >> ShlVal) << ShlVal) != Val) break; - unsigned ShlOp, Op = 0; + unsigned ShlOp, Op; EVT CstVT = NVT; // Check the minimum bitwidth for the new constant. @@ -2142,6 +2142,7 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) { ShlOp = X86::SHL32ri; switch (Opcode) { + default: llvm_unreachable("Impossible opcode"); case ISD::AND: Op = X86::AND32ri8; break; case ISD::OR: Op = X86::OR32ri8; break; case ISD::XOR: Op = X86::XOR32ri8; break; @@ -2152,6 +2153,7 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) { ShlOp = X86::SHL64ri; switch (Opcode) { + default: llvm_unreachable("Impossible opcode"); case ISD::AND: Op = CstVT==MVT::i8? X86::AND64ri8 : X86::AND64ri32; break; case ISD::OR: Op = CstVT==MVT::i8? X86::OR64ri8 : X86::OR64ri32; break; case ISD::XOR: Op = CstVT==MVT::i8? X86::XOR64ri8 : X86::XOR64ri32; break; @@ -2168,7 +2170,7 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) { case X86ISD::UMUL: { SDValue N0 = Node->getOperand(0); SDValue N1 = Node->getOperand(1); - + unsigned LoReg; switch (NVT.getSimpleVT().SimpleTy) { default: llvm_unreachable("Unsupported VT!"); @@ -2177,20 +2179,20 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) { case MVT::i32: LoReg = X86::EAX; Opc = X86::MUL32r; break; case MVT::i64: LoReg = X86::RAX; Opc = X86::MUL64r; break; } - + SDValue InFlag = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, LoReg, N0, SDValue()).getValue(1); - + SDVTList VTs = CurDAG->getVTList(NVT, NVT, MVT::i32); SDValue Ops[] = {N1, InFlag}; SDNode *CNode = CurDAG->getMachineNode(Opc, dl, VTs, Ops, 2); - + ReplaceUses(SDValue(Node, 0), SDValue(CNode, 0)); ReplaceUses(SDValue(Node, 1), SDValue(CNode, 1)); ReplaceUses(SDValue(Node, 2), SDValue(CNode, 2)); return NULL; } - + case ISD::SMUL_LOHI: case ISD::UMUL_LOHI: { SDValue N0 = Node->getOperand(0); @@ -2287,7 +2289,7 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) { ReplaceUses(SDValue(Node, 1), Result); DEBUG(dbgs() << "=> "; Result.getNode()->dump(CurDAG); dbgs() << '\n'); } - + return NULL; } @@ -2438,7 +2440,12 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) { return NULL; } - case X86ISD::CMP: { + case X86ISD::CMP: + case X86ISD::SUB: { + // Sometimes a SUB is used to perform comparison. + if (Opcode == X86ISD::SUB && Node->hasAnyUseOfValue(0)) + // This node is not a CMP. + break; SDValue N0 = Node->getOperand(0); SDValue N1 = Node->getOperand(1); @@ -2555,7 +2562,7 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) { // a simple increment or decrement through memory of that value, if the // uses of the modified value and its address are suitable. // The DEC64m tablegen pattern is currently not able to match the case where - // the EFLAGS on the original DEC are used. (This also applies to + // the EFLAGS on the original DEC are used. (This also applies to // {INC,DEC}X{64,32,16,8}.) // We'll need to improve tablegen to allow flags to be transferred from a // node in the pattern to the result node. probably with a new keyword @@ -2587,7 +2594,7 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) { MemOp[0] = StoreNode->getMemOperand(); MemOp[1] = LoadNode->getMemOperand(); const SDValue Ops[] = { Base, Scale, Index, Disp, Segment, InputChain }; - EVT LdVT = LoadNode->getMemoryVT(); + EVT LdVT = LoadNode->getMemoryVT(); unsigned newOpc = getFusedLdStOpcode(LdVT, Opc); MachineSDNode *Result = CurDAG->getMachineNode(newOpc, Node->getDebugLoc(), @@ -2600,6 +2607,85 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) { return Result; } + + // FIXME: Custom handling because TableGen doesn't support multiple implicit + // defs in an instruction pattern + case X86ISD::PCMPESTRI: { + SDValue N0 = Node->getOperand(0); + SDValue N1 = Node->getOperand(1); + SDValue N2 = Node->getOperand(2); + SDValue N3 = Node->getOperand(3); + SDValue N4 = Node->getOperand(4); + + // Make sure last argument is a constant + ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(N4); + if (!Cst) + break; + + uint64_t Imm = Cst->getZExtValue(); + + SDValue InFlag = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, + X86::EAX, N1, SDValue()).getValue(1); + InFlag = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, X86::EDX, + N3, InFlag).getValue(1); + + SDValue Ops[] = { N0, N2, getI8Imm(Imm), InFlag }; + unsigned Opc = Subtarget->hasAVX() ? X86::VPCMPESTRIrr : + X86::PCMPESTRIrr; + InFlag = SDValue(CurDAG->getMachineNode(Opc, dl, MVT::Glue, Ops, + array_lengthof(Ops)), 0); + + if (!SDValue(Node, 0).use_empty()) { + SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl, + X86::ECX, NVT, InFlag); + InFlag = Result.getValue(2); + ReplaceUses(SDValue(Node, 0), Result); + } + if (!SDValue(Node, 1).use_empty()) { + SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl, + X86::EFLAGS, NVT, InFlag); + InFlag = Result.getValue(2); + ReplaceUses(SDValue(Node, 1), Result); + } + + return NULL; + } + + // FIXME: Custom handling because TableGen doesn't support multiple implicit + // defs in an instruction pattern + case X86ISD::PCMPISTRI: { + SDValue N0 = Node->getOperand(0); + SDValue N1 = Node->getOperand(1); + SDValue N2 = Node->getOperand(2); + + // Make sure last argument is a constant + ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(N2); + if (!Cst) + break; + + uint64_t Imm = Cst->getZExtValue(); + + SDValue Ops[] = { N0, N1, getI8Imm(Imm) }; + unsigned Opc = Subtarget->hasAVX() ? X86::VPCMPISTRIrr : + X86::PCMPISTRIrr; + SDValue InFlag = SDValue(CurDAG->getMachineNode(Opc, dl, MVT::Glue, Ops, + array_lengthof(Ops)), 0); + + if (!SDValue(Node, 0).use_empty()) { + SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl, + X86::ECX, NVT, InFlag); + InFlag = Result.getValue(2); + ReplaceUses(SDValue(Node, 0), Result); + } + if (!SDValue(Node, 1).use_empty()) { + SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl, + X86::EFLAGS, NVT, InFlag); + InFlag = Result.getValue(2); + ReplaceUses(SDValue(Node, 1), Result); + } + + return NULL; + } } SDNode *ResNode = SelectCode(Node); @@ -2627,7 +2713,7 @@ SelectInlineAsmMemoryOperand(const SDValue &Op, char ConstraintCode, return true; break; } - + OutOps.push_back(Op0); OutOps.push_back(Op1); OutOps.push_back(Op2); @@ -2636,7 +2722,7 @@ SelectInlineAsmMemoryOperand(const SDValue &Op, char ConstraintCode, return false; } -/// createX86ISelDag - This pass converts a legalized DAG into a +/// createX86ISelDag - This pass converts a legalized DAG into a /// X86-specific DAG, ready for instruction scheduling. /// FunctionPass *llvm::createX86ISelDag(X86TargetMachine &TM, diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index b88f2fa..7954170 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -66,7 +66,7 @@ static SDValue getMOVL(SelectionDAG &DAG, DebugLoc dl, EVT VT, SDValue V1, static SDValue Extract128BitVector(SDValue Vec, unsigned IdxVal, SelectionDAG &DAG, DebugLoc dl) { EVT VT = Vec.getValueType(); - assert(VT.getSizeInBits() == 256 && "Unexpected vector size!"); + assert(VT.is256BitVector() && "Unexpected vector size!"); EVT ElVT = VT.getVectorElementType(); unsigned Factor = VT.getSizeInBits()/128; EVT ResultVT = EVT::getVectorVT(*DAG.getContext(), ElVT, @@ -105,7 +105,7 @@ static SDValue Insert128BitVector(SDValue Result, SDValue Vec, return Result; EVT VT = Vec.getValueType(); - assert(VT.getSizeInBits() == 128 && "Unexpected vector size!"); + assert(VT.is128BitVector() && "Unexpected vector size!"); EVT ElVT = VT.getVectorElementType(); EVT ResultVT = Result.getValueType(); @@ -174,7 +174,7 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) // For 64-bit since we have so many registers use the ILP scheduler, for // 32-bit code use the register pressure specific scheduling. // For Atom, always use ILP scheduling. - if (Subtarget->isAtom()) + if (Subtarget->isAtom()) setSchedulingPreference(Sched::ILP); else if (Subtarget->is64Bit()) setSchedulingPreference(Sched::ILP); @@ -731,6 +731,7 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) setOperationAction(ISD::FSIN, (MVT::SimpleValueType)VT, Expand); setOperationAction(ISD::FCOS, (MVT::SimpleValueType)VT, Expand); setOperationAction(ISD::FREM, (MVT::SimpleValueType)VT, Expand); + setOperationAction(ISD::FMA, (MVT::SimpleValueType)VT, Expand); setOperationAction(ISD::FPOWI, (MVT::SimpleValueType)VT, Expand); setOperationAction(ISD::FSQRT, (MVT::SimpleValueType)VT, Expand); setOperationAction(ISD::FCOPYSIGN, (MVT::SimpleValueType)VT, Expand); @@ -828,7 +829,6 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v4f32, Custom); setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f32, Custom); setOperationAction(ISD::SELECT, MVT::v4f32, Custom); - setOperationAction(ISD::SETCC, MVT::v4f32, Custom); } if (!TM.Options.UseSoftFloat && Subtarget->hasSSE2()) { @@ -869,27 +869,18 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4i32, Custom); setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4f32, Custom); - setOperationAction(ISD::CONCAT_VECTORS, MVT::v2f64, Custom); - setOperationAction(ISD::CONCAT_VECTORS, MVT::v2i64, Custom); - setOperationAction(ISD::CONCAT_VECTORS, MVT::v16i8, Custom); - setOperationAction(ISD::CONCAT_VECTORS, MVT::v8i16, Custom); - setOperationAction(ISD::CONCAT_VECTORS, MVT::v4i32, Custom); - // Custom lower build_vector, vector_shuffle, and extract_vector_elt. for (int i = MVT::v16i8; i != MVT::v2i64; ++i) { - EVT VT = (MVT::SimpleValueType)i; + MVT VT = (MVT::SimpleValueType)i; // Do not attempt to custom lower non-power-of-2 vectors if (!isPowerOf2_32(VT.getVectorNumElements())) continue; // Do not attempt to custom lower non-128-bit vectors if (!VT.is128BitVector()) continue; - setOperationAction(ISD::BUILD_VECTOR, - VT.getSimpleVT().SimpleTy, Custom); - setOperationAction(ISD::VECTOR_SHUFFLE, - VT.getSimpleVT().SimpleTy, Custom); - setOperationAction(ISD::EXTRACT_VECTOR_ELT, - VT.getSimpleVT().SimpleTy, Custom); + setOperationAction(ISD::BUILD_VECTOR, VT, Custom); + setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom); + setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom); } setOperationAction(ISD::BUILD_VECTOR, MVT::v2f64, Custom); @@ -906,23 +897,22 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) // Promote v16i8, v8i16, v4i32 load, select, and, or, xor to v2i64. for (int i = MVT::v16i8; i != MVT::v2i64; ++i) { - MVT::SimpleValueType SVT = (MVT::SimpleValueType)i; - EVT VT = SVT; + MVT VT = (MVT::SimpleValueType)i; // Do not attempt to promote non-128-bit vectors if (!VT.is128BitVector()) continue; - setOperationAction(ISD::AND, SVT, Promote); - AddPromotedToType (ISD::AND, SVT, MVT::v2i64); - setOperationAction(ISD::OR, SVT, Promote); - AddPromotedToType (ISD::OR, SVT, MVT::v2i64); - setOperationAction(ISD::XOR, SVT, Promote); - AddPromotedToType (ISD::XOR, SVT, MVT::v2i64); - setOperationAction(ISD::LOAD, SVT, Promote); - AddPromotedToType (ISD::LOAD, SVT, MVT::v2i64); - setOperationAction(ISD::SELECT, SVT, Promote); - AddPromotedToType (ISD::SELECT, SVT, MVT::v2i64); + setOperationAction(ISD::AND, VT, Promote); + AddPromotedToType (ISD::AND, VT, MVT::v2i64); + setOperationAction(ISD::OR, VT, Promote); + AddPromotedToType (ISD::OR, VT, MVT::v2i64); + setOperationAction(ISD::XOR, VT, Promote); + AddPromotedToType (ISD::XOR, VT, MVT::v2i64); + setOperationAction(ISD::LOAD, VT, Promote); + AddPromotedToType (ISD::LOAD, VT, MVT::v2i64); + setOperationAction(ISD::SELECT, VT, Promote); + AddPromotedToType (ISD::SELECT, VT, MVT::v2i64); } setTruncStoreAction(MVT::f64, MVT::f32, Expand); @@ -1009,9 +999,6 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) } } - if (Subtarget->hasSSE42()) - setOperationAction(ISD::SETCC, MVT::v2i64, Custom); - if (!TM.Options.UseSoftFloat && Subtarget->hasAVX()) { addRegisterClass(MVT::v32i8, &X86::VR256RegClass); addRegisterClass(MVT::v16i16, &X86::VR256RegClass); @@ -1042,13 +1029,6 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) setOperationAction(ISD::SINT_TO_FP, MVT::v8i32, Legal); setOperationAction(ISD::FP_ROUND, MVT::v4f32, Legal); - setOperationAction(ISD::CONCAT_VECTORS, MVT::v4f64, Custom); - setOperationAction(ISD::CONCAT_VECTORS, MVT::v4i64, Custom); - setOperationAction(ISD::CONCAT_VECTORS, MVT::v8f32, Custom); - setOperationAction(ISD::CONCAT_VECTORS, MVT::v8i32, Custom); - setOperationAction(ISD::CONCAT_VECTORS, MVT::v32i8, Custom); - setOperationAction(ISD::CONCAT_VECTORS, MVT::v16i16, Custom); - setOperationAction(ISD::SRL, MVT::v16i16, Custom); setOperationAction(ISD::SRL, MVT::v32i8, Custom); @@ -1072,6 +1052,15 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) setOperationAction(ISD::VSELECT, MVT::v8i32, Legal); setOperationAction(ISD::VSELECT, MVT::v8f32, Legal); + if (Subtarget->hasFMA()) { + setOperationAction(ISD::FMA, MVT::v8f32, Custom); + setOperationAction(ISD::FMA, MVT::v4f64, Custom); + setOperationAction(ISD::FMA, MVT::v4f32, Custom); + setOperationAction(ISD::FMA, MVT::v2f64, Custom); + setOperationAction(ISD::FMA, MVT::f32, Custom); + setOperationAction(ISD::FMA, MVT::f64, Custom); + } + if (Subtarget->hasAVX2()) { setOperationAction(ISD::ADD, MVT::v4i64, Legal); setOperationAction(ISD::ADD, MVT::v8i32, Legal); @@ -1125,45 +1114,44 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) // Custom lower several nodes for 256-bit types. for (int i = MVT::FIRST_VECTOR_VALUETYPE; i <= MVT::LAST_VECTOR_VALUETYPE; ++i) { - MVT::SimpleValueType SVT = (MVT::SimpleValueType)i; - EVT VT = SVT; + MVT VT = (MVT::SimpleValueType)i; // Extract subvector is special because the value type // (result) is 128-bit but the source is 256-bit wide. if (VT.is128BitVector()) - setOperationAction(ISD::EXTRACT_SUBVECTOR, SVT, Custom); + setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom); // Do not attempt to custom lower other non-256-bit vectors if (!VT.is256BitVector()) continue; - setOperationAction(ISD::BUILD_VECTOR, SVT, Custom); - setOperationAction(ISD::VECTOR_SHUFFLE, SVT, Custom); - setOperationAction(ISD::INSERT_VECTOR_ELT, SVT, Custom); - setOperationAction(ISD::EXTRACT_VECTOR_ELT, SVT, Custom); - setOperationAction(ISD::SCALAR_TO_VECTOR, SVT, Custom); - setOperationAction(ISD::INSERT_SUBVECTOR, SVT, Custom); + setOperationAction(ISD::BUILD_VECTOR, VT, Custom); + setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom); + setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom); + setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom); + setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom); + setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom); + setOperationAction(ISD::CONCAT_VECTORS, VT, Custom); } // Promote v32i8, v16i16, v8i32 select, and, or, xor to v4i64. for (int i = MVT::v32i8; i != MVT::v4i64; ++i) { - MVT::SimpleValueType SVT = (MVT::SimpleValueType)i; - EVT VT = SVT; + MVT VT = (MVT::SimpleValueType)i; // Do not attempt to promote non-256-bit vectors if (!VT.is256BitVector()) continue; - setOperationAction(ISD::AND, SVT, Promote); - AddPromotedToType (ISD::AND, SVT, MVT::v4i64); - setOperationAction(ISD::OR, SVT, Promote); - AddPromotedToType (ISD::OR, SVT, MVT::v4i64); - setOperationAction(ISD::XOR, SVT, Promote); - AddPromotedToType (ISD::XOR, SVT, MVT::v4i64); - setOperationAction(ISD::LOAD, SVT, Promote); - AddPromotedToType (ISD::LOAD, SVT, MVT::v4i64); - setOperationAction(ISD::SELECT, SVT, Promote); - AddPromotedToType (ISD::SELECT, SVT, MVT::v4i64); + setOperationAction(ISD::AND, VT, Promote); + AddPromotedToType (ISD::AND, VT, MVT::v4i64); + setOperationAction(ISD::OR, VT, Promote); + AddPromotedToType (ISD::OR, VT, MVT::v4i64); + setOperationAction(ISD::XOR, VT, Promote); + AddPromotedToType (ISD::XOR, VT, MVT::v4i64); + setOperationAction(ISD::LOAD, VT, Promote); + AddPromotedToType (ISD::LOAD, VT, MVT::v4i64); + setOperationAction(ISD::SELECT, VT, Promote); + AddPromotedToType (ISD::SELECT, VT, MVT::v4i64); } } @@ -1221,6 +1209,7 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) setTargetDAGCombine(ISD::ADD); setTargetDAGCombine(ISD::FADD); setTargetDAGCombine(ISD::FSUB); + setTargetDAGCombine(ISD::FMA); setTargetDAGCombine(ISD::SUB); setTargetDAGCombine(ISD::LOAD); setTargetDAGCombine(ISD::STORE); @@ -1718,21 +1707,37 @@ X86TargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag, /// CallIsStructReturn - Determines whether a call uses struct return /// semantics. -static bool CallIsStructReturn(const SmallVectorImpl<ISD::OutputArg> &Outs) { +enum StructReturnType { + NotStructReturn, + RegStructReturn, + StackStructReturn +}; +static StructReturnType +callIsStructReturn(const SmallVectorImpl<ISD::OutputArg> &Outs) { if (Outs.empty()) - return false; + return NotStructReturn; - return Outs[0].Flags.isSRet(); + const ISD::ArgFlagsTy &Flags = Outs[0].Flags; + if (!Flags.isSRet()) + return NotStructReturn; + if (Flags.isInReg()) + return RegStructReturn; + return StackStructReturn; } /// ArgsAreStructReturn - Determines whether a function uses struct /// return semantics. -static bool -ArgsAreStructReturn(const SmallVectorImpl<ISD::InputArg> &Ins) { +static StructReturnType +argsAreStructReturn(const SmallVectorImpl<ISD::InputArg> &Ins) { if (Ins.empty()) - return false; + return NotStructReturn; - return Ins[0].Flags.isSRet(); + const ISD::ArgFlagsTy &Flags = Ins[0].Flags; + if (!Flags.isSRet()) + return NotStructReturn; + if (Flags.isInReg()) + return RegStructReturn; + return StackStructReturn; } /// CreateCopyOfByValArgument - Make a copy of an aggregate at address specified @@ -1876,9 +1881,9 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain, RC = &X86::FR32RegClass; else if (RegVT == MVT::f64) RC = &X86::FR64RegClass; - else if (RegVT.isVector() && RegVT.getSizeInBits() == 256) + else if (RegVT.is256BitVector()) RC = &X86::VR256RegClass; - else if (RegVT.isVector() && RegVT.getSizeInBits() == 128) + else if (RegVT.is128BitVector()) RC = &X86::VR128RegClass; else if (RegVT == MVT::x86mmx) RC = &X86::VR64RegClass; @@ -2073,7 +2078,7 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain, FuncInfo->setBytesToPopOnReturn(0); // Callee pops nothing. // If this is an sret function, the return should pop the hidden pointer. if (!Is64Bit && !IsTailCallConvention(CallConv) && !IsWindows && - ArgsAreStructReturn(Ins)) + argsAreStructReturn(Ins) == StackStructReturn) FuncInfo->setBytesToPopOnReturn(4); } @@ -2163,7 +2168,7 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, bool Is64Bit = Subtarget->is64Bit(); bool IsWin64 = Subtarget->isTargetWin64(); bool IsWindows = Subtarget->isTargetWindows(); - bool IsStructRet = CallIsStructReturn(Outs); + StructReturnType SR = callIsStructReturn(Outs); bool IsSibcall = false; if (MF.getTarget().Options.DisableTailCalls) @@ -2172,8 +2177,9 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, if (isTailCall) { // Check if it's really possible to do a tail call. isTailCall = IsEligibleForTailCallOptimization(Callee, CallConv, - isVarArg, IsStructRet, MF.getFunction()->hasStructRetAttr(), - Outs, OutVals, Ins, DAG); + isVarArg, SR != NotStructReturn, + MF.getFunction()->hasStructRetAttr(), + Outs, OutVals, Ins, DAG); // Sibcalls are automatically detected tailcalls which do not require // ABI changes. @@ -2255,7 +2261,7 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, RegVT, Arg); break; case CCValAssign::AExt: - if (RegVT.isVector() && RegVT.getSizeInBits() == 128) { + if (RegVT.is128BitVector()) { // Special case: passing MMX values in XMM registers. Arg = DAG.getNode(ISD::BITCAST, dl, MVT::i64, Arg); Arg = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2i64, Arg); @@ -2549,7 +2555,7 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, getTargetMachine().Options.GuaranteedTailCallOpt)) NumBytesForCalleeToPush = NumBytes; // Callee pops everything else if (!Is64Bit && !IsTailCallConvention(CallConv) && !IsWindows && - IsStructRet) + SR == StackStructReturn) // If this is a call to a struct-return function, the callee // pops the hidden struct pointer, so we have to push it back. // This is common for Darwin/X86, Linux & Mingw32 targets. @@ -2870,8 +2876,9 @@ X86TargetLowering::IsEligibleForTailCallOptimization(SDValue Callee, } FastISel * -X86TargetLowering::createFastISel(FunctionLoweringInfo &funcInfo) const { - return X86::createFastISel(funcInfo); +X86TargetLowering::createFastISel(FunctionLoweringInfo &funcInfo, + const TargetLibraryInfo *libInfo) const { + return X86::createFastISel(funcInfo, libInfo); } @@ -3397,11 +3404,11 @@ static bool isSHUFPMask(ArrayRef<int> Mask, EVT VT, bool HasAVX, /// isMOVHLPSMask - Return true if the specified VECTOR_SHUFFLE operand /// specifies a shuffle of elements that is suitable for input to MOVHLPS. static bool isMOVHLPSMask(ArrayRef<int> Mask, EVT VT) { - unsigned NumElems = VT.getVectorNumElements(); - - if (VT.getSizeInBits() != 128) + if (!VT.is128BitVector()) return false; + unsigned NumElems = VT.getVectorNumElements(); + if (NumElems != 4) return false; @@ -3416,11 +3423,11 @@ static bool isMOVHLPSMask(ArrayRef<int> Mask, EVT VT) { /// of vector_shuffle v, v, <2, 3, 2, 3>, i.e. vector_shuffle v, undef, /// <2, 3, 2, 3> static bool isMOVHLPS_v_undef_Mask(ArrayRef<int> Mask, EVT VT) { - unsigned NumElems = VT.getVectorNumElements(); - - if (VT.getSizeInBits() != 128) + if (!VT.is128BitVector()) return false; + unsigned NumElems = VT.getVectorNumElements(); + if (NumElems != 4) return false; @@ -3433,7 +3440,7 @@ static bool isMOVHLPS_v_undef_Mask(ArrayRef<int> Mask, EVT VT) { /// isMOVLPMask - Return true if the specified VECTOR_SHUFFLE operand /// specifies a shuffle of elements that is suitable for input to MOVLP{S|D}. static bool isMOVLPMask(ArrayRef<int> Mask, EVT VT) { - if (VT.getSizeInBits() != 128) + if (!VT.is128BitVector()) return false; unsigned NumElems = VT.getVectorNumElements(); @@ -3455,10 +3462,12 @@ static bool isMOVLPMask(ArrayRef<int> Mask, EVT VT) { /// isMOVLHPSMask - Return true if the specified VECTOR_SHUFFLE operand /// specifies a shuffle of elements that is suitable for input to MOVLHPS. static bool isMOVLHPSMask(ArrayRef<int> Mask, EVT VT) { + if (!VT.is128BitVector()) + return false; + unsigned NumElems = VT.getVectorNumElements(); - if ((NumElems != 2 && NumElems != 4) - || VT.getSizeInBits() > 128) + if (NumElems != 2 && NumElems != 4) return false; for (unsigned i = 0, e = NumElems/2; i != e; ++i) @@ -3675,7 +3684,7 @@ static bool isUNPCKH_v_undef_Mask(ArrayRef<int> Mask, EVT VT, bool HasAVX2) { static bool isMOVLMask(ArrayRef<int> Mask, EVT VT) { if (VT.getVectorElementType().getSizeInBits() < 32) return false; - if (VT.getSizeInBits() == 256) + if (!VT.is128BitVector()) return false; unsigned NumElts = VT.getVectorNumElements(); @@ -3697,7 +3706,7 @@ static bool isMOVLMask(ArrayRef<int> Mask, EVT VT) { /// The first half comes from the second half of V1 and the second half from the /// the second half of V2. static bool isVPERM2X128Mask(ArrayRef<int> Mask, EVT VT, bool HasAVX) { - if (!HasAVX || VT.getSizeInBits() != 256) + if (!HasAVX || !VT.is256BitVector()) return false; // The shuffle result is divided into half A and half B. In total the two @@ -3789,9 +3798,10 @@ static bool isVPERMILPMask(ArrayRef<int> Mask, EVT VT, bool HasAVX) { /// element of vector 2 and the other elements to come from vector 1 in order. static bool isCommutedMOVLMask(ArrayRef<int> Mask, EVT VT, bool V2IsSplat = false, bool V2IsUndef = false) { - unsigned NumOps = VT.getVectorNumElements(); - if (VT.getSizeInBits() == 256) + if (!VT.is128BitVector()) return false; + + unsigned NumOps = VT.getVectorNumElements(); if (NumOps != 2 && NumOps != 4 && NumOps != 8 && NumOps != 16) return false; @@ -3857,9 +3867,11 @@ static bool isMOVSLDUPMask(ArrayRef<int> Mask, EVT VT, /// specifies a shuffle of elements that is suitable for input to 256-bit /// version of MOVDDUP. static bool isMOVDDUPYMask(ArrayRef<int> Mask, EVT VT, bool HasAVX) { - unsigned NumElts = VT.getVectorNumElements(); + if (!HasAVX || !VT.is256BitVector()) + return false; - if (!HasAVX || VT.getSizeInBits() != 256 || NumElts != 4) + unsigned NumElts = VT.getVectorNumElements(); + if (NumElts != 4) return false; for (unsigned i = 0; i != NumElts/2; ++i) @@ -3875,7 +3887,7 @@ static bool isMOVDDUPYMask(ArrayRef<int> Mask, EVT VT, bool HasAVX) { /// specifies a shuffle of elements that is suitable for input to 128-bit /// version of MOVDDUP. static bool isMOVDDUPMask(ArrayRef<int> Mask, EVT VT) { - if (VT.getSizeInBits() != 128) + if (!VT.is128BitVector()) return false; unsigned e = VT.getVectorNumElements() / 2; @@ -4120,7 +4132,7 @@ static SDValue CommuteVectorShuffle(ShuffleVectorSDNode *SVOp, /// V1 (and in order), and the upper half elements should come from the upper /// half of V2 (and in order). static bool ShouldXformToMOVHLPS(ArrayRef<int> Mask, EVT VT) { - if (VT.getSizeInBits() != 128) + if (!VT.is128BitVector()) return false; if (VT.getVectorNumElements() != 4) return false; @@ -4177,7 +4189,7 @@ static bool WillBeConstantPoolLoad(SDNode *N) { /// MOVLP, it must be either a vector load or a scalar load to vector. static bool ShouldXformToMOVLP(SDNode *V1, SDNode *V2, ArrayRef<int> Mask, EVT VT) { - if (VT.getSizeInBits() != 128) + if (!VT.is128BitVector()) return false; if (!ISD::isNON_EXTLoad(V1) && !isScalarLoadToVector(V1)) @@ -4719,7 +4731,7 @@ static bool isVectorShift(ShuffleVectorSDNode *SVOp, SelectionDAG &DAG, bool &isLeft, SDValue &ShVal, unsigned &ShAmt) { // Although the logic below support any bitwidth size, there are no // shift instructions which handle more than 128-bit vectors. - if (SVOp->getValueType(0).getSizeInBits() > 128) + if (!SVOp->getValueType(0).is128BitVector()) return false; if (isVectorShiftLeft(SVOp, DAG, isLeft, ShVal, ShAmt) || @@ -4814,7 +4826,7 @@ static SDValue LowerBuildVectorv8i16(SDValue Op, unsigned NonZeros, static SDValue getVShift(bool isLeft, EVT VT, SDValue SrcOp, unsigned NumBits, SelectionDAG &DAG, const TargetLowering &TLI, DebugLoc dl) { - assert(VT.getSizeInBits() == 128 && "Unknown type for VShift"); + assert(VT.is128BitVector() && "Unknown type for VShift"); EVT ShVT = MVT::v2i64; unsigned Opc = isLeft ? X86ISD::VSHLDQ : X86ISD::VSRLDQ; SrcOp = DAG.getNode(ISD::BITCAST, dl, ShVT, SrcOp); @@ -5047,7 +5059,7 @@ X86TargetLowering::LowerVectorBroadcast(SDValue &Op, SelectionDAG &DAG) const { } } - bool Is256 = VT.getSizeInBits() == 256; + bool Is256 = VT.is256BitVector(); // Handle the broadcasting a single constant scalar from the constant pool // into a vector. On Sandybridge it is still better to load a constant vector @@ -5102,6 +5114,86 @@ X86TargetLowering::LowerVectorBroadcast(SDValue &Op, SelectionDAG &DAG) const { return SDValue(); } +// LowerVectorFpExtend - Recognize the scalarized FP_EXTEND from v2f32 to v2f64 +// and convert it into X86ISD::VFPEXT due to the current ISD::FP_EXTEND has the +// constraint of matching input/output vector elements. +SDValue +X86TargetLowering::LowerVectorFpExtend(SDValue &Op, SelectionDAG &DAG) const { + DebugLoc DL = Op.getDebugLoc(); + SDNode *N = Op.getNode(); + EVT VT = Op.getValueType(); + unsigned NumElts = Op.getNumOperands(); + + // Check supported types and sub-targets. + // + // Only v2f32 -> v2f64 needs special handling. + if (VT != MVT::v2f64 || !Subtarget->hasSSE2()) + return SDValue(); + + SDValue VecIn; + EVT VecInVT; + SmallVector<int, 8> Mask; + EVT SrcVT = MVT::Other; + + // Check the patterns could be translated into X86vfpext. + for (unsigned i = 0; i < NumElts; ++i) { + SDValue In = N->getOperand(i); + unsigned Opcode = In.getOpcode(); + + // Skip if the element is undefined. + if (Opcode == ISD::UNDEF) { + Mask.push_back(-1); + continue; + } + + // Quit if one of the elements is not defined from 'fpext'. + if (Opcode != ISD::FP_EXTEND) + return SDValue(); + + // Check how the source of 'fpext' is defined. + SDValue L2In = In.getOperand(0); + EVT L2InVT = L2In.getValueType(); + + // Check the original type + if (SrcVT == MVT::Other) + SrcVT = L2InVT; + else if (SrcVT != L2InVT) // Quit if non-homogenous typed. + return SDValue(); + + // Check whether the value being 'fpext'ed is extracted from the same + // source. + Opcode = L2In.getOpcode(); + + // Quit if it's not extracted with a constant index. + if (Opcode != ISD::EXTRACT_VECTOR_ELT || + !isa<ConstantSDNode>(L2In.getOperand(1))) + return SDValue(); + + SDValue ExtractedFromVec = L2In.getOperand(0); + + if (VecIn.getNode() == 0) { + VecIn = ExtractedFromVec; + VecInVT = ExtractedFromVec.getValueType(); + } else if (VecIn != ExtractedFromVec) // Quit if built from more than 1 vec. + return SDValue(); + + Mask.push_back(cast<ConstantSDNode>(L2In.getOperand(1))->getZExtValue()); + } + + // Quit if all operands of BUILD_VECTOR are undefined. + if (!VecIn.getNode()) + return SDValue(); + + // Fill the remaining mask as undef. + for (unsigned i = NumElts; i < VecInVT.getVectorNumElements(); ++i) + Mask.push_back(-1); + + return DAG.getNode(X86ISD::VFPEXT, DL, VT, + DAG.getVectorShuffle(VecInVT, DL, + VecIn, DAG.getUNDEF(VecInVT), + &Mask[0])); +} + SDValue X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const { DebugLoc dl = Op.getDebugLoc(); @@ -5134,6 +5226,10 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const { if (Broadcast.getNode()) return Broadcast; + SDValue FpExt = LowerVectorFpExtend(Op, DAG); + if (FpExt.getNode()) + return FpExt; + unsigned EVTBits = ExtVT.getSizeInBits(); unsigned NumZero = 0; @@ -5209,12 +5305,12 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const { if (ExtVT == MVT::i32 || ExtVT == MVT::f32 || ExtVT == MVT::f64 || (ExtVT == MVT::i64 && Subtarget->is64Bit())) { - if (VT.getSizeInBits() == 256) { + if (VT.is256BitVector()) { SDValue ZeroVec = getZeroVector(VT, Subtarget, DAG, dl); return DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, ZeroVec, Item, DAG.getIntPtrConstant(0)); } - assert(VT.getSizeInBits() == 128 && "Expected an SSE value type!"); + assert(VT.is128BitVector() && "Expected an SSE value type!"); Item = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Item); // Turn it into a MOVL (i.e. movss, movsd, or movd) to a zero vector. return getShuffleVectorZeroOrUndef(Item, 0, true, Subtarget, DAG); @@ -5223,11 +5319,11 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const { if (ExtVT == MVT::i16 || ExtVT == MVT::i8) { Item = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, Item); Item = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v4i32, Item); - if (VT.getSizeInBits() == 256) { + if (VT.is256BitVector()) { SDValue ZeroVec = getZeroVector(MVT::v8i32, Subtarget, DAG, dl); Item = Insert128BitVector(ZeroVec, Item, 0, DAG, dl); } else { - assert(VT.getSizeInBits() == 128 && "Expected an SSE value type!"); + assert(VT.is128BitVector() && "Expected an SSE value type!"); Item = getShuffleVectorZeroOrUndef(Item, 0, true, Subtarget, DAG); } return DAG.getNode(ISD::BITCAST, dl, VT, Item); @@ -5287,7 +5383,7 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const { // For AVX-length vectors, build the individual 128-bit pieces and use // shuffles to put them in place. - if (VT.getSizeInBits() == 256) { + if (VT.is256BitVector()) { SmallVector<SDValue, 32> V; for (unsigned i = 0; i != NumElems; ++i) V.push_back(Op.getOperand(i)); @@ -5368,7 +5464,7 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const { return DAG.getVectorShuffle(VT, dl, V[0], V[1], &MaskVec[0]); } - if (Values.size() > 1 && VT.getSizeInBits() == 128) { + if (Values.size() > 1 && VT.is128BitVector()) { // Check for a build vector of consecutive loads. for (unsigned i = 0; i < NumElems; ++i) V[i] = Op.getOperand(i); @@ -5429,39 +5525,13 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const { return SDValue(); } -// LowerMMXCONCAT_VECTORS - We support concatenate two MMX registers and place -// them in a MMX register. This is better than doing a stack convert. -static SDValue LowerMMXCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) { - DebugLoc dl = Op.getDebugLoc(); - EVT ResVT = Op.getValueType(); - - assert(ResVT == MVT::v2i64 || ResVT == MVT::v4i32 || - ResVT == MVT::v8i16 || ResVT == MVT::v16i8); - int Mask[2]; - SDValue InVec = DAG.getNode(ISD::BITCAST,dl, MVT::v1i64, Op.getOperand(0)); - SDValue VecOp = DAG.getNode(X86ISD::MOVQ2DQ, dl, MVT::v2i64, InVec); - InVec = Op.getOperand(1); - if (InVec.getOpcode() == ISD::SCALAR_TO_VECTOR) { - unsigned NumElts = ResVT.getVectorNumElements(); - VecOp = DAG.getNode(ISD::BITCAST, dl, ResVT, VecOp); - VecOp = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, ResVT, VecOp, - InVec.getOperand(0), DAG.getIntPtrConstant(NumElts/2+1)); - } else { - InVec = DAG.getNode(ISD::BITCAST, dl, MVT::v1i64, InVec); - SDValue VecOp2 = DAG.getNode(X86ISD::MOVQ2DQ, dl, MVT::v2i64, InVec); - Mask[0] = 0; Mask[1] = 2; - VecOp = DAG.getVectorShuffle(MVT::v2i64, dl, VecOp, VecOp2, Mask); - } - return DAG.getNode(ISD::BITCAST, dl, ResVT, VecOp); -} - // LowerAVXCONCAT_VECTORS - 256-bit AVX can use the vinsertf128 instruction // to create 256-bit vectors from two other 128-bit ones. static SDValue LowerAVXCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) { DebugLoc dl = Op.getDebugLoc(); EVT ResVT = Op.getValueType(); - assert(ResVT.getSizeInBits() == 256 && "Value type must be 256-bit wide"); + assert(ResVT.is256BitVector() && "Value type must be 256-bit wide"); SDValue V1 = Op.getOperand(0); SDValue V2 = Op.getOperand(1); @@ -5472,16 +5542,7 @@ static SDValue LowerAVXCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) { SDValue X86TargetLowering::LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const { - EVT ResVT = Op.getValueType(); - assert(Op.getNumOperands() == 2); - assert((ResVT.getSizeInBits() == 128 || ResVT.getSizeInBits() == 256) && - "Unsupported CONCAT_VECTORS for value type"); - - // We support concatenate two MMX registers and place them in a MMX register. - // This is better than doing a stack convert. - if (ResVT.is128BitVector()) - return LowerMMXCONCAT_VECTORS(Op, DAG); // 256-bit AVX can use the vinsertf128 instruction to create 256-bit vectors // from two other 128-bit ones. @@ -6131,7 +6192,7 @@ LowerVECTOR_SHUFFLE_128v4(ShuffleVectorSDNode *SVOp, SelectionDAG &DAG) { DebugLoc dl = SVOp->getDebugLoc(); EVT VT = SVOp->getValueType(0); - assert(VT.getSizeInBits() == 128 && "Unsupported vector size"); + assert(VT.is128BitVector() && "Unsupported vector size"); std::pair<int, int> Locs[4]; int Mask1[] = { -1, -1, -1, -1 }; @@ -6759,7 +6820,7 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const { // Handle all 128-bit wide vectors with 4 elements, and match them with // several different shuffle types. - if (NumElems == 4 && VT.getSizeInBits() == 128) + if (NumElems == 4 && VT.is128BitVector()) return LowerVECTOR_SHUFFLE_128v4(SVOp, DAG); // Handle general 256-bit shuffles @@ -6775,7 +6836,7 @@ X86TargetLowering::LowerEXTRACT_VECTOR_ELT_SSE4(SDValue Op, EVT VT = Op.getValueType(); DebugLoc dl = Op.getDebugLoc(); - if (Op.getOperand(0).getValueType().getSizeInBits() != 128) + if (!Op.getOperand(0).getValueType().is128BitVector()) return SDValue(); if (VT.getSizeInBits() == 8) { @@ -6845,7 +6906,7 @@ X86TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op, // If this is a 256-bit vector result, first extract the 128-bit vector and // then extract the element from the 128-bit vector. - if (VecVT.getSizeInBits() == 256) { + if (VecVT.is256BitVector()) { DebugLoc dl = Op.getNode()->getDebugLoc(); unsigned NumElems = VecVT.getVectorNumElements(); SDValue Idx = Op.getOperand(1); @@ -6860,7 +6921,7 @@ X86TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op, DAG.getConstant(IdxVal, MVT::i32)); } - assert(Vec.getValueSizeInBits() <= 128 && "Unexpected vector length"); + assert(VecVT.is128BitVector() && "Unexpected vector length"); if (Subtarget->hasSSE41()) { SDValue Res = LowerEXTRACT_VECTOR_ELT_SSE4(Op, DAG); @@ -6936,7 +6997,7 @@ X86TargetLowering::LowerINSERT_VECTOR_ELT_SSE4(SDValue Op, SDValue N1 = Op.getOperand(1); SDValue N2 = Op.getOperand(2); - if (VT.getSizeInBits() == 256) + if (!VT.is128BitVector()) return SDValue(); if ((EltVT.getSizeInBits() == 8 || EltVT.getSizeInBits() == 16) && @@ -6992,7 +7053,7 @@ X86TargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const { // If this is a 256-bit vector result, first extract the 128-bit vector, // insert the element into the extracted half and then place it back. - if (VT.getSizeInBits() == 256) { + if (VT.is256BitVector()) { if (!isa<ConstantSDNode>(N2)) return SDValue(); @@ -7036,7 +7097,7 @@ X86TargetLowering::LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) const { // If this is a 256-bit vector result, first insert into a 128-bit // vector and then insert into the 256-bit vector. - if (OpVT.getSizeInBits() > 128) { + if (!OpVT.is128BitVector()) { // Insert into a 128-bit vector. EVT VT128 = EVT::getVectorVT(*Context, OpVT.getVectorElementType(), @@ -7053,7 +7114,7 @@ X86TargetLowering::LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) const { return DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v1i64, Op.getOperand(0)); SDValue AnyExt = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, Op.getOperand(0)); - assert(OpVT.getSizeInBits() == 128 && "Expected an SSE type!"); + assert(OpVT.is128BitVector() && "Expected an SSE type!"); return DAG.getNode(ISD::BITCAST, dl, OpVT, DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v4i32,AnyExt)); } @@ -7068,8 +7129,8 @@ X86TargetLowering::LowerEXTRACT_SUBVECTOR(SDValue Op, SelectionDAG &DAG) const { SDValue Vec = Op.getNode()->getOperand(0); SDValue Idx = Op.getNode()->getOperand(1); - if (Op.getNode()->getValueType(0).getSizeInBits() == 128 && - Vec.getNode()->getValueType(0).getSizeInBits() == 256 && + if (Op.getNode()->getValueType(0).is128BitVector() && + Vec.getNode()->getValueType(0).is256BitVector() && isa<ConstantSDNode>(Idx)) { unsigned IdxVal = cast<ConstantSDNode>(Idx)->getZExtValue(); return Extract128BitVector(Vec, IdxVal, DAG, dl); @@ -7089,8 +7150,8 @@ X86TargetLowering::LowerINSERT_SUBVECTOR(SDValue Op, SelectionDAG &DAG) const { SDValue SubVec = Op.getNode()->getOperand(1); SDValue Idx = Op.getNode()->getOperand(2); - if (Op.getNode()->getValueType(0).getSizeInBits() == 256 && - SubVec.getNode()->getValueType(0).getSizeInBits() == 128 && + if (Op.getNode()->getValueType(0).is256BitVector() && + SubVec.getNode()->getValueType(0).is128BitVector() && isa<ConstantSDNode>(Idx)) { unsigned IdxVal = cast<ConstantSDNode>(Idx)->getZExtValue(); return Insert128BitVector(Vec, SubVec, IdxVal, DAG, dl); @@ -7735,9 +7796,9 @@ SDValue X86TargetLowering::LowerUINT_TO_FP_i64(SDValue Op, punpckldq (c0), %xmm0 // c0: (uint4){ 0x43300000U, 0x45300000U, 0U, 0U } subpd (c1), %xmm0 // c1: (double2){ 0x1.0p52, 0x1.0p52 * 0x1.0p32 } #ifdef __SSE3__ - haddpd %xmm0, %xmm0 + haddpd %xmm0, %xmm0 #else - pshufd $0x4e, %xmm0, %xmm1 + pshufd $0x4e, %xmm0, %xmm1 addpd %xmm1, %xmm0 #endif */ @@ -8064,7 +8125,7 @@ SDValue X86TargetLowering::LowerFABS(SDValue Op, EltVT = VT.getVectorElementType(); Constant *C; if (EltVT == MVT::f64) { - C = ConstantVector::getSplat(2, + C = ConstantVector::getSplat(2, ConstantFP::get(*Context, APFloat(APInt(64, ~(1ULL << 63))))); } else { C = ConstantVector::getSplat(4, @@ -8098,7 +8159,7 @@ SDValue X86TargetLowering::LowerFNEG(SDValue Op, SelectionDAG &DAG) const { MachinePointerInfo::getConstantPool(), false, false, false, 16); if (VT.isVector()) { - MVT XORVT = VT.getSizeInBits() == 128 ? MVT::v2i64 : MVT::v4i64; + MVT XORVT = VT.is128BitVector() ? MVT::v2i64 : MVT::v4i64; return DAG.getNode(ISD::BITCAST, dl, VT, DAG.getNode(ISD::XOR, dl, XORVT, DAG.getNode(ISD::BITCAST, dl, XORVT, @@ -8226,7 +8287,33 @@ SDValue X86TargetLowering::EmitTest(SDValue Op, unsigned X86CC, unsigned Opcode = 0; unsigned NumOperands = 0; - switch (Op.getNode()->getOpcode()) { + + // Truncate operations may prevent the merge of the SETCC instruction + // and the arithmetic intruction before it. Attempt to truncate the operands + // of the arithmetic instruction and use a reduced bit-width instruction. + bool NeedTruncation = false; + SDValue ArithOp = Op; + if (Op->getOpcode() == ISD::TRUNCATE && Op->hasOneUse()) { + SDValue Arith = Op->getOperand(0); + // Both the trunc and the arithmetic op need to have one user each. + if (Arith->hasOneUse()) + switch (Arith.getOpcode()) { + default: break; + case ISD::ADD: + case ISD::SUB: + case ISD::AND: + case ISD::OR: + case ISD::XOR: { + NeedTruncation = true; + ArithOp = Arith; + } + } + } + + // NOTICE: In the code below we use ArithOp to hold the arithmetic operation + // which may be the result of a CAST. We use the variable 'Op', which is the + // non-casted variable when we check for possible users. + switch (ArithOp.getOpcode()) { case ISD::ADD: // Due to an isel shortcoming, be conservative if this add is likely to be // selected as part of a load-modify-store instruction. When the root node @@ -8246,7 +8333,7 @@ SDValue X86TargetLowering::EmitTest(SDValue Op, unsigned X86CC, goto default_case; if (ConstantSDNode *C = - dyn_cast<ConstantSDNode>(Op.getNode()->getOperand(1))) { + dyn_cast<ConstantSDNode>(ArithOp.getNode()->getOperand(1))) { // An add of one will be selected as an INC. if (C->getAPIntValue() == 1) { Opcode = X86ISD::INC; @@ -8282,7 +8369,7 @@ SDValue X86TargetLowering::EmitTest(SDValue Op, unsigned X86CC, if (User->getOpcode() != ISD::BRCOND && User->getOpcode() != ISD::SETCC && - (User->getOpcode() != ISD::SELECT || UOpNo != 0)) { + !(User->getOpcode() == ISD::SELECT && UOpNo == 0)) { NonFlagUse = true; break; } @@ -8303,15 +8390,9 @@ SDValue X86TargetLowering::EmitTest(SDValue Op, unsigned X86CC, goto default_case; // Otherwise use a regular EFLAGS-setting instruction. - switch (Op.getNode()->getOpcode()) { + switch (ArithOp.getOpcode()) { default: llvm_unreachable("unexpected operator!"); - case ISD::SUB: - // If the only use of SUB is EFLAGS, use CMP instead. - if (Op.hasOneUse()) - Opcode = X86ISD::CMP; - else - Opcode = X86ISD::SUB; - break; + case ISD::SUB: Opcode = X86ISD::SUB; break; case ISD::OR: Opcode = X86ISD::OR; break; case ISD::XOR: Opcode = X86ISD::XOR; break; case ISD::AND: Opcode = X86ISD::AND; break; @@ -8332,19 +8413,40 @@ SDValue X86TargetLowering::EmitTest(SDValue Op, unsigned X86CC, break; } + // If we found that truncation is beneficial, perform the truncation and + // update 'Op'. + if (NeedTruncation) { + EVT VT = Op.getValueType(); + SDValue WideVal = Op->getOperand(0); + EVT WideVT = WideVal.getValueType(); + unsigned ConvertedOp = 0; + // Use a target machine opcode to prevent further DAGCombine + // optimizations that may separate the arithmetic operations + // from the setcc node. + switch (WideVal.getOpcode()) { + default: break; + case ISD::ADD: ConvertedOp = X86ISD::ADD; break; + case ISD::SUB: ConvertedOp = X86ISD::SUB; break; + case ISD::AND: ConvertedOp = X86ISD::AND; break; + case ISD::OR: ConvertedOp = X86ISD::OR; break; + case ISD::XOR: ConvertedOp = X86ISD::XOR; break; + } + + if (ConvertedOp) { + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + if (TLI.isOperationLegal(WideVal.getOpcode(), WideVT)) { + SDValue V0 = DAG.getNode(ISD::TRUNCATE, dl, VT, WideVal.getOperand(0)); + SDValue V1 = DAG.getNode(ISD::TRUNCATE, dl, VT, WideVal.getOperand(1)); + Op = DAG.getNode(ConvertedOp, dl, VT, V0, V1); + } + } + } + if (Opcode == 0) // Emit a CMP with 0, which is the TEST pattern. return DAG.getNode(X86ISD::CMP, dl, MVT::i32, Op, DAG.getConstant(0, Op.getValueType())); - if (Opcode == X86ISD::CMP) { - SDValue New = DAG.getNode(Opcode, dl, MVT::i32, Op.getOperand(0), - Op.getOperand(1)); - // We can't replace usage of SUB with CMP. - // The SUB node will be removed later because there is no use of it. - return SDValue(New.getNode(), 0); - } - SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::i32); SmallVector<SDValue, 4> Ops; for (unsigned i = 0; i != NumOperands; ++i) @@ -8364,6 +8466,14 @@ SDValue X86TargetLowering::EmitCmp(SDValue Op0, SDValue Op1, unsigned X86CC, return EmitTest(Op0, X86CC, DAG); DebugLoc dl = Op0.getDebugLoc(); + if ((Op0.getValueType() == MVT::i8 || Op0.getValueType() == MVT::i16 || + Op0.getValueType() == MVT::i32 || Op0.getValueType() == MVT::i64)) { + // Use SUB instead of CMP to enable CSE between SUB and CMP. + SDVTList VTs = DAG.getVTList(Op0.getValueType(), MVT::i32); + SDValue Sub = DAG.getNode(X86ISD::SUB, dl, VTs, + Op0, Op1); + return SDValue(Sub.getNode(), 1); + } return DAG.getNode(X86ISD::CMP, dl, MVT::i32, Op0, Op1); } @@ -8522,7 +8632,7 @@ SDValue X86TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const { static SDValue Lower256IntVSETCC(SDValue Op, SelectionDAG &DAG) { EVT VT = Op.getValueType(); - assert(VT.getSizeInBits() == 256 && Op.getOpcode() == ISD::SETCC && + assert(VT.is256BitVector() && Op.getOpcode() == ISD::SETCC && "Unsupported value type for operation"); unsigned NumElems = VT.getVectorNumElements(); @@ -8559,10 +8669,12 @@ SDValue X86TargetLowering::LowerVSETCC(SDValue Op, SelectionDAG &DAG) const { DebugLoc dl = Op.getDebugLoc(); if (isFP) { - unsigned SSECC = 8; +#ifndef NDEBUG EVT EltVT = Op0.getValueType().getVectorElementType(); - assert(EltVT == MVT::f32 || EltVT == MVT::f64); (void)EltVT; + assert(EltVT == MVT::f32 || EltVT == MVT::f64); +#endif + unsigned SSECC; bool Swap = false; // SSE Condition code mapping: @@ -8575,7 +8687,7 @@ SDValue X86TargetLowering::LowerVSETCC(SDValue Op, SelectionDAG &DAG) const { // 6 - NLE // 7 - ORD switch (SetCCOpcode) { - default: break; + default: llvm_unreachable("Unexpected SETCC condition"); case ISD::SETOEQ: case ISD::SETEQ: SSECC = 0; break; case ISD::SETOGT: @@ -8589,34 +8701,33 @@ SDValue X86TargetLowering::LowerVSETCC(SDValue Op, SelectionDAG &DAG) const { case ISD::SETUO: SSECC = 3; break; case ISD::SETUNE: case ISD::SETNE: SSECC = 4; break; - case ISD::SETULE: Swap = true; + case ISD::SETULE: Swap = true; // Fallthrough case ISD::SETUGE: SSECC = 5; break; - case ISD::SETULT: Swap = true; + case ISD::SETULT: Swap = true; // Fallthrough case ISD::SETUGT: SSECC = 6; break; case ISD::SETO: SSECC = 7; break; + case ISD::SETUEQ: + case ISD::SETONE: SSECC = 8; break; } if (Swap) std::swap(Op0, Op1); // In the two special cases we can't handle, emit two comparisons. if (SSECC == 8) { + unsigned CC0, CC1; + unsigned CombineOpc; if (SetCCOpcode == ISD::SETUEQ) { - SDValue UNORD, EQ; - UNORD = DAG.getNode(X86ISD::CMPP, dl, VT, Op0, Op1, - DAG.getConstant(3, MVT::i8)); - EQ = DAG.getNode(X86ISD::CMPP, dl, VT, Op0, Op1, - DAG.getConstant(0, MVT::i8)); - return DAG.getNode(ISD::OR, dl, VT, UNORD, EQ); - } - if (SetCCOpcode == ISD::SETONE) { - SDValue ORD, NEQ; - ORD = DAG.getNode(X86ISD::CMPP, dl, VT, Op0, Op1, - DAG.getConstant(7, MVT::i8)); - NEQ = DAG.getNode(X86ISD::CMPP, dl, VT, Op0, Op1, - DAG.getConstant(4, MVT::i8)); - return DAG.getNode(ISD::AND, dl, VT, ORD, NEQ); + CC0 = 3; CC1 = 0; CombineOpc = ISD::OR; + } else { + assert(SetCCOpcode == ISD::SETONE); + CC0 = 7; CC1 = 4; CombineOpc = ISD::AND; } - llvm_unreachable("Illegal FP comparison"); + + SDValue Cmp0 = DAG.getNode(X86ISD::CMPP, dl, VT, Op0, Op1, + DAG.getConstant(CC0, MVT::i8)); + SDValue Cmp1 = DAG.getNode(X86ISD::CMPP, dl, VT, Op0, Op1, + DAG.getConstant(CC1, MVT::i8)); + return DAG.getNode(CombineOpc, dl, VT, Cmp0, Cmp1); } // Handle all other FP comparisons here. return DAG.getNode(X86ISD::CMPP, dl, VT, Op0, Op1, @@ -8624,17 +8735,17 @@ SDValue X86TargetLowering::LowerVSETCC(SDValue Op, SelectionDAG &DAG) const { } // Break 256-bit integer vector compare into smaller ones. - if (VT.getSizeInBits() == 256 && !Subtarget->hasAVX2()) + if (VT.is256BitVector() && !Subtarget->hasAVX2()) return Lower256IntVSETCC(Op, DAG); // We are handling one of the integer comparisons here. Since SSE only has // GT and EQ comparisons for integer, swapping operands and multiple // operations may be required for some comparisons. - unsigned Opc = 0; + unsigned Opc; bool Swap = false, Invert = false, FlipSigns = false; switch (SetCCOpcode) { - default: break; + default: llvm_unreachable("Unexpected SETCC condition"); case ISD::SETNE: Invert = true; case ISD::SETEQ: Opc = X86ISD::PCMPEQ; break; case ISD::SETLT: Swap = true; @@ -8651,10 +8762,12 @@ SDValue X86TargetLowering::LowerVSETCC(SDValue Op, SelectionDAG &DAG) const { // Check that the operation in question is available (most are plain SSE2, // but PCMPGTQ and PCMPEQQ have different requirements). - if (Opc == X86ISD::PCMPGT && VT == MVT::v2i64 && !Subtarget->hasSSE42()) - return SDValue(); - if (Opc == X86ISD::PCMPEQ && VT == MVT::v2i64 && !Subtarget->hasSSE41()) - return SDValue(); + if (VT == MVT::v2i64) { + if (Opc == X86ISD::PCMPGT && !Subtarget->hasSSE42()) + return SDValue(); + if (Opc == X86ISD::PCMPEQ && !Subtarget->hasSSE41()) + return SDValue(); + } // Since SSE has no unsigned integer comparisons, we need to flip the sign // bits of the inputs before performing those operations. @@ -8714,6 +8827,16 @@ static bool isAllOnes(SDValue V) { return C && C->isAllOnesValue(); } +static bool isTruncWithZeroHighBitsInput(SDValue V, SelectionDAG &DAG) { + if (V.getOpcode() != ISD::TRUNCATE) + return false; + + SDValue VOp0 = V.getOperand(0); + unsigned InBits = VOp0.getValueSizeInBits(); + unsigned Bits = V.getValueSizeInBits(); + return DAG.MaskedValueIsZero(VOp0, APInt::getHighBitsSet(InBits,InBits-Bits)); +} + SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const { bool addTest = true; SDValue Cond = Op.getOperand(0); @@ -8728,46 +8851,6 @@ SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const { Cond = NewCond; } - // Handle the following cases related to max and min: - // (a > b) ? (a-b) : 0 - // (a >= b) ? (a-b) : 0 - // (b < a) ? (a-b) : 0 - // (b <= a) ? (a-b) : 0 - // Comparison is removed to use EFLAGS from SUB. - if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op2)) - if (Cond.getOpcode() == X86ISD::SETCC && - Cond.getOperand(1).getOpcode() == X86ISD::CMP && - (Op1.getOpcode() == ISD::SUB || Op1.getOpcode() == X86ISD::SUB) && - C->getAPIntValue() == 0) { - SDValue Cmp = Cond.getOperand(1); - unsigned CC = cast<ConstantSDNode>(Cond.getOperand(0))->getZExtValue(); - if ((DAG.isEqualTo(Op1.getOperand(0), Cmp.getOperand(0)) && - DAG.isEqualTo(Op1.getOperand(1), Cmp.getOperand(1)) && - (CC == X86::COND_G || CC == X86::COND_GE || - CC == X86::COND_A || CC == X86::COND_AE)) || - (DAG.isEqualTo(Op1.getOperand(0), Cmp.getOperand(1)) && - DAG.isEqualTo(Op1.getOperand(1), Cmp.getOperand(0)) && - (CC == X86::COND_L || CC == X86::COND_LE || - CC == X86::COND_B || CC == X86::COND_BE))) { - - if (Op1.getOpcode() == ISD::SUB) { - SDVTList VTs = DAG.getVTList(Op1.getValueType(), MVT::i32); - SDValue New = DAG.getNode(X86ISD::SUB, DL, VTs, - Op1.getOperand(0), Op1.getOperand(1)); - DAG.ReplaceAllUsesWith(Op1, New); - Op1 = New; - } - - SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::Glue); - unsigned NewCC = (CC == X86::COND_G || CC == X86::COND_GE || - CC == X86::COND_L || - CC == X86::COND_LE) ? X86::COND_GE : X86::COND_AE; - SDValue Ops[] = { Op2, Op1, DAG.getConstant(NewCC, MVT::i8), - SDValue(Op1.getNode(), 1) }; - return DAG.getNode(X86ISD::CMOV, DL, VTs, Ops, array_lengthof(Ops)); - } - } - // (select (x == 0), -1, y) -> (sign_bit (x - 1)) | y // (select (x == 0), y, -1) -> ~(sign_bit (x - 1)) | y // (select (x != 0), y, -1) -> (sign_bit (x - 1)) | y @@ -8788,11 +8871,11 @@ SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const { // (select (x != 0), -1, 0) -> neg & sbb // (select (x == 0), 0, -1) -> neg & sbb if (ConstantSDNode *YC = dyn_cast<ConstantSDNode>(Y)) - if (YC->isNullValue() && + if (YC->isNullValue() && (isAllOnes(Op1) == (CondCode == X86::COND_NE))) { SDVTList VTs = DAG.getVTList(CmpOp0.getValueType(), MVT::i32); - SDValue Neg = DAG.getNode(X86ISD::SUB, DL, VTs, - DAG.getConstant(0, CmpOp0.getValueType()), + SDValue Neg = DAG.getNode(X86ISD::SUB, DL, VTs, + DAG.getConstant(0, CmpOp0.getValueType()), CmpOp0); SDValue Res = DAG.getNode(X86ISD::SETCC_CARRY, DL, Op.getValueType(), DAG.getConstant(X86::COND_B, MVT::i8), @@ -8883,9 +8966,9 @@ SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const { } if (addTest) { - // Look pass the truncate. - if (Cond.getOpcode() == ISD::TRUNCATE) - Cond = Cond.getOperand(0); + // Look pass the truncate if the high bits are known zero. + if (isTruncWithZeroHighBitsInput(Cond, DAG)) + Cond = Cond.getOperand(0); // We know the result of AND is compared against zero. Try to match // it to BT. @@ -8908,7 +8991,7 @@ SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const { // a < b ? 0 : -1 -> RES = setcc_carry // a >= b ? -1 : 0 -> RES = setcc_carry // a >= b ? 0 : -1 -> RES = ~setcc_carry - if (Cond.getOpcode() == X86ISD::CMP) { + if (Cond.getOpcode() == X86ISD::SUB) { Cond = ConvertCmpIfNecessary(Cond, DAG); unsigned CondCode = cast<ConstantSDNode>(CC)->getZExtValue(); @@ -9192,9 +9275,9 @@ SDValue X86TargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const { } if (addTest) { - // Look pass the truncate. - if (Cond.getOpcode() == ISD::TRUNCATE) - Cond = Cond.getOperand(0); + // Look pass the truncate if the high bits are known zero. + if (isTruncWithZeroHighBitsInput(Cond, DAG)) + Cond = Cond.getOperand(0); // We know the result of AND is compared against zero. Try to match // it to BT. @@ -9459,8 +9542,7 @@ static SDValue getTargetVShiftNode(unsigned Opc, DebugLoc dl, EVT VT, SDValue ShOps[4]; ShOps[0] = ShAmt; ShOps[1] = DAG.getConstant(0, MVT::i32); - ShOps[2] = DAG.getUNDEF(MVT::i32); - ShOps[3] = DAG.getUNDEF(MVT::i32); + ShOps[2] = ShOps[3] = DAG.getUNDEF(MVT::i32); ShAmt = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, &ShOps[0], 4); // The return type has to be a 128-bit type with the same element @@ -9503,8 +9585,8 @@ X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const case Intrinsic::x86_sse2_ucomigt_sd: case Intrinsic::x86_sse2_ucomige_sd: case Intrinsic::x86_sse2_ucomineq_sd: { - unsigned Opc = 0; - ISD::CondCode CC = ISD::SETCC_INVALID; + unsigned Opc; + ISD::CondCode CC; switch (IntNo) { default: llvm_unreachable("Impossible intrinsic"); // Can't reach here. case Intrinsic::x86_sse_comieq_ss: @@ -9578,55 +9660,102 @@ X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const DAG.getConstant(X86CC, MVT::i8), Cond); return DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, SetCC); } + // Arithmetic intrinsics. case Intrinsic::x86_sse2_pmulu_dq: case Intrinsic::x86_avx2_pmulu_dq: return DAG.getNode(X86ISD::PMULUDQ, dl, Op.getValueType(), Op.getOperand(1), Op.getOperand(2)); + + // SSE3/AVX horizontal add/sub intrinsics case Intrinsic::x86_sse3_hadd_ps: case Intrinsic::x86_sse3_hadd_pd: case Intrinsic::x86_avx_hadd_ps_256: case Intrinsic::x86_avx_hadd_pd_256: - return DAG.getNode(X86ISD::FHADD, dl, Op.getValueType(), - Op.getOperand(1), Op.getOperand(2)); case Intrinsic::x86_sse3_hsub_ps: case Intrinsic::x86_sse3_hsub_pd: case Intrinsic::x86_avx_hsub_ps_256: case Intrinsic::x86_avx_hsub_pd_256: - return DAG.getNode(X86ISD::FHSUB, dl, Op.getValueType(), - Op.getOperand(1), Op.getOperand(2)); case Intrinsic::x86_ssse3_phadd_w_128: case Intrinsic::x86_ssse3_phadd_d_128: case Intrinsic::x86_avx2_phadd_w: case Intrinsic::x86_avx2_phadd_d: - return DAG.getNode(X86ISD::HADD, dl, Op.getValueType(), - Op.getOperand(1), Op.getOperand(2)); case Intrinsic::x86_ssse3_phsub_w_128: case Intrinsic::x86_ssse3_phsub_d_128: case Intrinsic::x86_avx2_phsub_w: - case Intrinsic::x86_avx2_phsub_d: - return DAG.getNode(X86ISD::HSUB, dl, Op.getValueType(), + case Intrinsic::x86_avx2_phsub_d: { + unsigned Opcode; + switch (IntNo) { + default: llvm_unreachable("Impossible intrinsic"); // Can't reach here. + case Intrinsic::x86_sse3_hadd_ps: + case Intrinsic::x86_sse3_hadd_pd: + case Intrinsic::x86_avx_hadd_ps_256: + case Intrinsic::x86_avx_hadd_pd_256: + Opcode = X86ISD::FHADD; + break; + case Intrinsic::x86_sse3_hsub_ps: + case Intrinsic::x86_sse3_hsub_pd: + case Intrinsic::x86_avx_hsub_ps_256: + case Intrinsic::x86_avx_hsub_pd_256: + Opcode = X86ISD::FHSUB; + break; + case Intrinsic::x86_ssse3_phadd_w_128: + case Intrinsic::x86_ssse3_phadd_d_128: + case Intrinsic::x86_avx2_phadd_w: + case Intrinsic::x86_avx2_phadd_d: + Opcode = X86ISD::HADD; + break; + case Intrinsic::x86_ssse3_phsub_w_128: + case Intrinsic::x86_ssse3_phsub_d_128: + case Intrinsic::x86_avx2_phsub_w: + case Intrinsic::x86_avx2_phsub_d: + Opcode = X86ISD::HSUB; + break; + } + return DAG.getNode(Opcode, dl, Op.getValueType(), Op.getOperand(1), Op.getOperand(2)); + } + + // AVX2 variable shift intrinsics case Intrinsic::x86_avx2_psllv_d: case Intrinsic::x86_avx2_psllv_q: case Intrinsic::x86_avx2_psllv_d_256: case Intrinsic::x86_avx2_psllv_q_256: - return DAG.getNode(ISD::SHL, dl, Op.getValueType(), - Op.getOperand(1), Op.getOperand(2)); case Intrinsic::x86_avx2_psrlv_d: case Intrinsic::x86_avx2_psrlv_q: case Intrinsic::x86_avx2_psrlv_d_256: case Intrinsic::x86_avx2_psrlv_q_256: - return DAG.getNode(ISD::SRL, dl, Op.getValueType(), - Op.getOperand(1), Op.getOperand(2)); case Intrinsic::x86_avx2_psrav_d: - case Intrinsic::x86_avx2_psrav_d_256: - return DAG.getNode(ISD::SRA, dl, Op.getValueType(), - Op.getOperand(1), Op.getOperand(2)); + case Intrinsic::x86_avx2_psrav_d_256: { + unsigned Opcode; + switch (IntNo) { + default: llvm_unreachable("Impossible intrinsic"); // Can't reach here. + case Intrinsic::x86_avx2_psllv_d: + case Intrinsic::x86_avx2_psllv_q: + case Intrinsic::x86_avx2_psllv_d_256: + case Intrinsic::x86_avx2_psllv_q_256: + Opcode = ISD::SHL; + break; + case Intrinsic::x86_avx2_psrlv_d: + case Intrinsic::x86_avx2_psrlv_q: + case Intrinsic::x86_avx2_psrlv_d_256: + case Intrinsic::x86_avx2_psrlv_q_256: + Opcode = ISD::SRL; + break; + case Intrinsic::x86_avx2_psrav_d: + case Intrinsic::x86_avx2_psrav_d_256: + Opcode = ISD::SRA; + break; + } + return DAG.getNode(Opcode, dl, Op.getValueType(), + Op.getOperand(1), Op.getOperand(2)); + } + case Intrinsic::x86_ssse3_pshuf_b_128: case Intrinsic::x86_avx2_pshuf_b: return DAG.getNode(X86ISD::PSHUFB, dl, Op.getValueType(), Op.getOperand(1), Op.getOperand(2)); + case Intrinsic::x86_ssse3_psign_b_128: case Intrinsic::x86_ssse3_psign_w_128: case Intrinsic::x86_ssse3_psign_d_128: @@ -9635,15 +9764,18 @@ X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const case Intrinsic::x86_avx2_psign_d: return DAG.getNode(X86ISD::PSIGN, dl, Op.getValueType(), Op.getOperand(1), Op.getOperand(2)); + case Intrinsic::x86_sse41_insertps: return DAG.getNode(X86ISD::INSERTPS, dl, Op.getValueType(), Op.getOperand(1), Op.getOperand(2), Op.getOperand(3)); + case Intrinsic::x86_avx_vperm2f128_ps_256: case Intrinsic::x86_avx_vperm2f128_pd_256: case Intrinsic::x86_avx_vperm2f128_si_256: case Intrinsic::x86_avx2_vperm2i128: return DAG.getNode(X86ISD::VPERM2X128, dl, Op.getValueType(), Op.getOperand(1), Op.getOperand(2), Op.getOperand(3)); + case Intrinsic::x86_avx2_permd: case Intrinsic::x86_avx2_permps: // Operands intentionally swapped. Mask is last operand to intrinsic, @@ -9673,7 +9805,7 @@ X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const case Intrinsic::x86_avx_vtestc_pd_256: case Intrinsic::x86_avx_vtestnzc_pd_256: { bool IsTestPacked = false; - unsigned X86CC = 0; + unsigned X86CC; switch (IntNo) { default: llvm_unreachable("Bad fallthrough in Intrinsic lowering."); case Intrinsic::x86_avx_vtestz_ps: @@ -9724,44 +9856,93 @@ X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const case Intrinsic::x86_avx2_psll_w: case Intrinsic::x86_avx2_psll_d: case Intrinsic::x86_avx2_psll_q: - return DAG.getNode(X86ISD::VSHL, dl, Op.getValueType(), - Op.getOperand(1), Op.getOperand(2)); case Intrinsic::x86_sse2_psrl_w: case Intrinsic::x86_sse2_psrl_d: case Intrinsic::x86_sse2_psrl_q: case Intrinsic::x86_avx2_psrl_w: case Intrinsic::x86_avx2_psrl_d: case Intrinsic::x86_avx2_psrl_q: - return DAG.getNode(X86ISD::VSRL, dl, Op.getValueType(), - Op.getOperand(1), Op.getOperand(2)); case Intrinsic::x86_sse2_psra_w: case Intrinsic::x86_sse2_psra_d: case Intrinsic::x86_avx2_psra_w: - case Intrinsic::x86_avx2_psra_d: - return DAG.getNode(X86ISD::VSRA, dl, Op.getValueType(), + case Intrinsic::x86_avx2_psra_d: { + unsigned Opcode; + switch (IntNo) { + default: llvm_unreachable("Impossible intrinsic"); // Can't reach here. + case Intrinsic::x86_sse2_psll_w: + case Intrinsic::x86_sse2_psll_d: + case Intrinsic::x86_sse2_psll_q: + case Intrinsic::x86_avx2_psll_w: + case Intrinsic::x86_avx2_psll_d: + case Intrinsic::x86_avx2_psll_q: + Opcode = X86ISD::VSHL; + break; + case Intrinsic::x86_sse2_psrl_w: + case Intrinsic::x86_sse2_psrl_d: + case Intrinsic::x86_sse2_psrl_q: + case Intrinsic::x86_avx2_psrl_w: + case Intrinsic::x86_avx2_psrl_d: + case Intrinsic::x86_avx2_psrl_q: + Opcode = X86ISD::VSRL; + break; + case Intrinsic::x86_sse2_psra_w: + case Intrinsic::x86_sse2_psra_d: + case Intrinsic::x86_avx2_psra_w: + case Intrinsic::x86_avx2_psra_d: + Opcode = X86ISD::VSRA; + break; + } + return DAG.getNode(Opcode, dl, Op.getValueType(), Op.getOperand(1), Op.getOperand(2)); + } + + // SSE/AVX immediate shift intrinsics case Intrinsic::x86_sse2_pslli_w: case Intrinsic::x86_sse2_pslli_d: case Intrinsic::x86_sse2_pslli_q: case Intrinsic::x86_avx2_pslli_w: case Intrinsic::x86_avx2_pslli_d: case Intrinsic::x86_avx2_pslli_q: - return getTargetVShiftNode(X86ISD::VSHLI, dl, Op.getValueType(), - Op.getOperand(1), Op.getOperand(2), DAG); case Intrinsic::x86_sse2_psrli_w: case Intrinsic::x86_sse2_psrli_d: case Intrinsic::x86_sse2_psrli_q: case Intrinsic::x86_avx2_psrli_w: case Intrinsic::x86_avx2_psrli_d: case Intrinsic::x86_avx2_psrli_q: - return getTargetVShiftNode(X86ISD::VSRLI, dl, Op.getValueType(), - Op.getOperand(1), Op.getOperand(2), DAG); case Intrinsic::x86_sse2_psrai_w: case Intrinsic::x86_sse2_psrai_d: case Intrinsic::x86_avx2_psrai_w: - case Intrinsic::x86_avx2_psrai_d: - return getTargetVShiftNode(X86ISD::VSRAI, dl, Op.getValueType(), + case Intrinsic::x86_avx2_psrai_d: { + unsigned Opcode; + switch (IntNo) { + default: llvm_unreachable("Impossible intrinsic"); // Can't reach here. + case Intrinsic::x86_sse2_pslli_w: + case Intrinsic::x86_sse2_pslli_d: + case Intrinsic::x86_sse2_pslli_q: + case Intrinsic::x86_avx2_pslli_w: + case Intrinsic::x86_avx2_pslli_d: + case Intrinsic::x86_avx2_pslli_q: + Opcode = X86ISD::VSHLI; + break; + case Intrinsic::x86_sse2_psrli_w: + case Intrinsic::x86_sse2_psrli_d: + case Intrinsic::x86_sse2_psrli_q: + case Intrinsic::x86_avx2_psrli_w: + case Intrinsic::x86_avx2_psrli_d: + case Intrinsic::x86_avx2_psrli_q: + Opcode = X86ISD::VSRLI; + break; + case Intrinsic::x86_sse2_psrai_w: + case Intrinsic::x86_sse2_psrai_d: + case Intrinsic::x86_avx2_psrai_w: + case Intrinsic::x86_avx2_psrai_d: + Opcode = X86ISD::VSRAI; + break; + } + return getTargetVShiftNode(Opcode, dl, Op.getValueType(), Op.getOperand(1), Op.getOperand(2), DAG); + } + // Fix vector shift instructions where the last operand is a non-immediate // i32 value. case Intrinsic::x86_mmx_pslli_w: @@ -9776,8 +9957,9 @@ X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const if (isa<ConstantSDNode>(ShAmt)) return SDValue(); - unsigned NewIntNo = 0; + unsigned NewIntNo; switch (IntNo) { + default: llvm_unreachable("Impossible intrinsic"); // Can't reach here. case Intrinsic::x86_mmx_pslli_w: NewIntNo = Intrinsic::x86_mmx_psll_w; break; @@ -9802,7 +9984,6 @@ X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const case Intrinsic::x86_mmx_psrai_d: NewIntNo = Intrinsic::x86_mmx_psra_d; break; - default: llvm_unreachable("Impossible intrinsic"); // Can't reach here. } // The vector shift intrinsics with scalars uses 32b shift amounts but @@ -9818,6 +9999,84 @@ X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const DAG.getConstant(NewIntNo, MVT::i32), Op.getOperand(1), ShAmt); } + case Intrinsic::x86_sse42_pcmpistria128: + case Intrinsic::x86_sse42_pcmpestria128: + case Intrinsic::x86_sse42_pcmpistric128: + case Intrinsic::x86_sse42_pcmpestric128: + case Intrinsic::x86_sse42_pcmpistrio128: + case Intrinsic::x86_sse42_pcmpestrio128: + case Intrinsic::x86_sse42_pcmpistris128: + case Intrinsic::x86_sse42_pcmpestris128: + case Intrinsic::x86_sse42_pcmpistriz128: + case Intrinsic::x86_sse42_pcmpestriz128: { + unsigned Opcode; + unsigned X86CC; + switch (IntNo) { + default: llvm_unreachable("Impossible intrinsic"); // Can't reach here. + case Intrinsic::x86_sse42_pcmpistria128: + Opcode = X86ISD::PCMPISTRI; + X86CC = X86::COND_A; + break; + case Intrinsic::x86_sse42_pcmpestria128: + Opcode = X86ISD::PCMPESTRI; + X86CC = X86::COND_A; + break; + case Intrinsic::x86_sse42_pcmpistric128: + Opcode = X86ISD::PCMPISTRI; + X86CC = X86::COND_B; + break; + case Intrinsic::x86_sse42_pcmpestric128: + Opcode = X86ISD::PCMPESTRI; + X86CC = X86::COND_B; + break; + case Intrinsic::x86_sse42_pcmpistrio128: + Opcode = X86ISD::PCMPISTRI; + X86CC = X86::COND_O; + break; + case Intrinsic::x86_sse42_pcmpestrio128: + Opcode = X86ISD::PCMPESTRI; + X86CC = X86::COND_O; + break; + case Intrinsic::x86_sse42_pcmpistris128: + Opcode = X86ISD::PCMPISTRI; + X86CC = X86::COND_S; + break; + case Intrinsic::x86_sse42_pcmpestris128: + Opcode = X86ISD::PCMPESTRI; + X86CC = X86::COND_S; + break; + case Intrinsic::x86_sse42_pcmpistriz128: + Opcode = X86ISD::PCMPISTRI; + X86CC = X86::COND_E; + break; + case Intrinsic::x86_sse42_pcmpestriz128: + Opcode = X86ISD::PCMPESTRI; + X86CC = X86::COND_E; + break; + } + SmallVector<SDValue, 5> NewOps; + NewOps.append(Op->op_begin()+1, Op->op_end()); + SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::i32); + SDValue PCMP = DAG.getNode(Opcode, dl, VTs, NewOps.data(), NewOps.size()); + SDValue SetCC = DAG.getNode(X86ISD::SETCC, dl, MVT::i8, + DAG.getConstant(X86CC, MVT::i8), + SDValue(PCMP.getNode(), 1)); + return DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, SetCC); + } + + case Intrinsic::x86_sse42_pcmpistri128: + case Intrinsic::x86_sse42_pcmpestri128: { + unsigned Opcode; + if (IntNo == Intrinsic::x86_sse42_pcmpistri128) + Opcode = X86ISD::PCMPISTRI; + else + Opcode = X86ISD::PCMPESTRI; + + SmallVector<SDValue, 5> NewOps; + NewOps.append(Op->op_begin()+1, Op->op_end()); + SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::i32); + return DAG.getNode(Opcode, dl, VTs, NewOps.data(), NewOps.size()); + } } } @@ -10231,7 +10490,7 @@ SDValue X86TargetLowering::LowerCTTZ(SDValue Op, SelectionDAG &DAG) const { static SDValue Lower256IntArith(SDValue Op, SelectionDAG &DAG) { EVT VT = Op.getValueType(); - assert(VT.getSizeInBits() == 256 && VT.isInteger() && + assert(VT.is256BitVector() && VT.isInteger() && "Unsupported value type for operation"); unsigned NumElems = VT.getVectorNumElements(); @@ -10256,14 +10515,14 @@ static SDValue Lower256IntArith(SDValue Op, SelectionDAG &DAG) { } SDValue X86TargetLowering::LowerADD(SDValue Op, SelectionDAG &DAG) const { - assert(Op.getValueType().getSizeInBits() == 256 && + assert(Op.getValueType().is256BitVector() && Op.getValueType().isInteger() && "Only handle AVX 256-bit vector integer operation"); return Lower256IntArith(Op, DAG); } SDValue X86TargetLowering::LowerSUB(SDValue Op, SelectionDAG &DAG) const { - assert(Op.getValueType().getSizeInBits() == 256 && + assert(Op.getValueType().is256BitVector() && Op.getValueType().isInteger() && "Only handle AVX 256-bit vector integer operation"); return Lower256IntArith(Op, DAG); @@ -10273,7 +10532,7 @@ SDValue X86TargetLowering::LowerMUL(SDValue Op, SelectionDAG &DAG) const { EVT VT = Op.getValueType(); // Decompose 256-bit ops into smaller 128-bit ops. - if (VT.getSizeInBits() == 256 && !Subtarget->hasAVX2()) + if (VT.is256BitVector() && !Subtarget->hasAVX2()) return Lower256IntArith(Op, DAG); assert((VT == MVT::v2i64 || VT == MVT::v4i64) && @@ -10503,7 +10762,7 @@ SDValue X86TargetLowering::LowerShift(SDValue Op, SelectionDAG &DAG) const { } // Decompose 256-bit shifts into smaller 128-bit shifts. - if (VT.getSizeInBits() == 256) { + if (VT.is256BitVector()) { unsigned NumElems = VT.getVectorNumElements(); MVT EltVT = VT.getVectorElementType().getSimpleVT(); EVT NewVT = MVT::getVectorVT(EltVT, NumElems/2); @@ -10992,9 +11251,9 @@ static void ReplaceATOMIC_LOAD(SDNode *Node, Results.push_back(Swap.getValue(1)); } -void X86TargetLowering:: +static void ReplaceATOMIC_BINARY_64(SDNode *Node, SmallVectorImpl<SDValue>&Results, - SelectionDAG &DAG, unsigned NewOp) const { + SelectionDAG &DAG, unsigned NewOp) { DebugLoc dl = Node->getDebugLoc(); assert (Node->getValueType(0) == MVT::i64 && "Only know how to expand i64 atomics"); @@ -11092,7 +11351,7 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N, Regs64bit ? X86::RBX : X86::EBX, swapInL, cpInH.getValue(1)); swapInH = DAG.getCopyToReg(swapInL.getValue(0), dl, - Regs64bit ? X86::RCX : X86::ECX, + Regs64bit ? X86::RCX : X86::ECX, swapInH, swapInL.getValue(1)); SDValue Ops[] = { swapInH.getValue(0), N->getOperand(1), @@ -11115,26 +11374,40 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N, return; } case ISD::ATOMIC_LOAD_ADD: - ReplaceATOMIC_BINARY_64(N, Results, DAG, X86ISD::ATOMADD64_DAG); - return; case ISD::ATOMIC_LOAD_AND: - ReplaceATOMIC_BINARY_64(N, Results, DAG, X86ISD::ATOMAND64_DAG); - return; case ISD::ATOMIC_LOAD_NAND: - ReplaceATOMIC_BINARY_64(N, Results, DAG, X86ISD::ATOMNAND64_DAG); - return; case ISD::ATOMIC_LOAD_OR: - ReplaceATOMIC_BINARY_64(N, Results, DAG, X86ISD::ATOMOR64_DAG); - return; case ISD::ATOMIC_LOAD_SUB: - ReplaceATOMIC_BINARY_64(N, Results, DAG, X86ISD::ATOMSUB64_DAG); - return; case ISD::ATOMIC_LOAD_XOR: - ReplaceATOMIC_BINARY_64(N, Results, DAG, X86ISD::ATOMXOR64_DAG); - return; - case ISD::ATOMIC_SWAP: - ReplaceATOMIC_BINARY_64(N, Results, DAG, X86ISD::ATOMSWAP64_DAG); + case ISD::ATOMIC_SWAP: { + unsigned Opc; + switch (N->getOpcode()) { + default: llvm_unreachable("Unexpected opcode"); + case ISD::ATOMIC_LOAD_ADD: + Opc = X86ISD::ATOMADD64_DAG; + break; + case ISD::ATOMIC_LOAD_AND: + Opc = X86ISD::ATOMAND64_DAG; + break; + case ISD::ATOMIC_LOAD_NAND: + Opc = X86ISD::ATOMNAND64_DAG; + break; + case ISD::ATOMIC_LOAD_OR: + Opc = X86ISD::ATOMOR64_DAG; + break; + case ISD::ATOMIC_LOAD_SUB: + Opc = X86ISD::ATOMSUB64_DAG; + break; + case ISD::ATOMIC_LOAD_XOR: + Opc = X86ISD::ATOMXOR64_DAG; + break; + case ISD::ATOMIC_SWAP: + Opc = X86ISD::ATOMSWAP64_DAG; + break; + } + ReplaceATOMIC_BINARY_64(N, Results, DAG, Opc); return; + } case ISD::ATOMIC_LOAD: ReplaceATOMIC_LOAD(N, Results, DAG); } @@ -11194,6 +11467,8 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const { case X86ISD::FHSUB: return "X86ISD::FHSUB"; case X86ISD::FMAX: return "X86ISD::FMAX"; case X86ISD::FMIN: return "X86ISD::FMIN"; + case X86ISD::FMAXC: return "X86ISD::FMAXC"; + case X86ISD::FMINC: return "X86ISD::FMINC"; case X86ISD::FRSQRT: return "X86ISD::FRSQRT"; case X86ISD::FRCP: return "X86ISD::FRCP"; case X86ISD::TLSADDR: return "X86ISD::TLSADDR"; @@ -11212,7 +11487,9 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const { case X86ISD::ATOMAND64_DAG: return "X86ISD::ATOMAND64_DAG"; case X86ISD::ATOMNAND64_DAG: return "X86ISD::ATOMNAND64_DAG"; case X86ISD::VZEXT_MOVL: return "X86ISD::VZEXT_MOVL"; + case X86ISD::VSEXT_MOVL: return "X86ISD::VSEXT_MOVL"; case X86ISD::VZEXT_LOAD: return "X86ISD::VZEXT_LOAD"; + case X86ISD::VFPEXT: return "X86ISD::VFPEXT"; case X86ISD::VSHLDQ: return "X86ISD::VSHLDQ"; case X86ISD::VSRLDQ: return "X86ISD::VSRLDQ"; case X86ISD::VSHL: return "X86ISD::VSHL"; @@ -11273,6 +11550,12 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const { case X86ISD::WIN_FTOL: return "X86ISD::WIN_FTOL"; case X86ISD::SAHF: return "X86ISD::SAHF"; case X86ISD::RDRAND: return "X86ISD::RDRAND"; + case X86ISD::FMADD: return "X86ISD::FMADD"; + case X86ISD::FMSUB: return "X86ISD::FMSUB"; + case X86ISD::FNMADD: return "X86ISD::FNMADD"; + case X86ISD::FNMSUB: return "X86ISD::FNMSUB"; + case X86ISD::FMADDSUB: return "X86ISD::FMADDSUB"; + case X86ISD::FMSUBADD: return "X86ISD::FMSUBADD"; } } @@ -11408,7 +11691,7 @@ X86TargetLowering::isVectorClearMaskLegal(const SmallVectorImpl<int> &Mask, // FIXME: This collection of masks seems suspect. if (NumElts == 2) return true; - if (NumElts == 4 && VT.getSizeInBits() == 128) { + if (NumElts == 4 && VT.is128BitVector()) { return (isMOVLMask(Mask, VT) || isCommutedMOVLMask(Mask, VT, true) || isSHUFPMask(Mask, VT, Subtarget->hasAVX()) || @@ -11834,8 +12117,7 @@ X86TargetLowering::EmitPCMP(MachineInstr *MI, MachineBasicBlock *BB, MIB.addOperand(Op); } BuildMI(*BB, MI, dl, - TII->get(Subtarget->hasAVX() ? X86::VMOVAPSrr : X86::MOVAPSrr), - MI->getOperand(0).getReg()) + TII->get(TargetOpcode::COPY), MI->getOperand(0).getReg()) .addReg(X86::XMM0); MI->eraseFromParent(); @@ -11868,24 +12150,6 @@ X86TargetLowering::EmitMonitor(MachineInstr *MI, MachineBasicBlock *BB) const { } MachineBasicBlock * -X86TargetLowering::EmitMwait(MachineInstr *MI, MachineBasicBlock *BB) const { - DebugLoc dl = MI->getDebugLoc(); - const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); - - // First arg in ECX, the second in EAX. - BuildMI(*BB, MI, dl, TII->get(TargetOpcode::COPY), X86::ECX) - .addReg(MI->getOperand(0).getReg()); - BuildMI(*BB, MI, dl, TII->get(TargetOpcode::COPY), X86::EAX) - .addReg(MI->getOperand(1).getReg()); - - // The instruction doesn't actually take any operands though. - BuildMI(*BB, MI, dl, TII->get(X86::MWAITrr)); - - MI->eraseFromParent(); // The pseudo is gone now. - return BB; -} - -MachineBasicBlock * X86TargetLowering::EmitVAARG64WithCustomInserter( MachineInstr *MI, MachineBasicBlock *MBB) const { @@ -12675,185 +12939,208 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, // String/text processing lowering. case X86::PCMPISTRM128REG: case X86::VPCMPISTRM128REG: - return EmitPCMP(MI, BB, 3, false /* in-mem */); case X86::PCMPISTRM128MEM: case X86::VPCMPISTRM128MEM: - return EmitPCMP(MI, BB, 3, true /* in-mem */); case X86::PCMPESTRM128REG: case X86::VPCMPESTRM128REG: - return EmitPCMP(MI, BB, 5, false /* in mem */); case X86::PCMPESTRM128MEM: - case X86::VPCMPESTRM128MEM: - return EmitPCMP(MI, BB, 5, true /* in mem */); + case X86::VPCMPESTRM128MEM: { + unsigned NumArgs; + bool MemArg; + switch (MI->getOpcode()) { + default: llvm_unreachable("illegal opcode!"); + case X86::PCMPISTRM128REG: + case X86::VPCMPISTRM128REG: + NumArgs = 3; MemArg = false; break; + case X86::PCMPISTRM128MEM: + case X86::VPCMPISTRM128MEM: + NumArgs = 3; MemArg = true; break; + case X86::PCMPESTRM128REG: + case X86::VPCMPESTRM128REG: + NumArgs = 5; MemArg = false; break; + case X86::PCMPESTRM128MEM: + case X86::VPCMPESTRM128MEM: + NumArgs = 5; MemArg = true; break; + } + return EmitPCMP(MI, BB, NumArgs, MemArg); + } // Thread synchronization. case X86::MONITOR: return EmitMonitor(MI, BB); - case X86::MWAIT: - return EmitMwait(MI, BB); // Atomic Lowering. - case X86::ATOMAND32: - return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::AND32rr, - X86::AND32ri, X86::MOV32rm, - X86::LCMPXCHG32, - X86::NOT32r, X86::EAX, - &X86::GR32RegClass); - case X86::ATOMOR32: - return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::OR32rr, - X86::OR32ri, X86::MOV32rm, - X86::LCMPXCHG32, - X86::NOT32r, X86::EAX, - &X86::GR32RegClass); - case X86::ATOMXOR32: - return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::XOR32rr, - X86::XOR32ri, X86::MOV32rm, - X86::LCMPXCHG32, - X86::NOT32r, X86::EAX, - &X86::GR32RegClass); - case X86::ATOMNAND32: - return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::AND32rr, - X86::AND32ri, X86::MOV32rm, - X86::LCMPXCHG32, - X86::NOT32r, X86::EAX, - &X86::GR32RegClass, true); case X86::ATOMMIN32: - return EmitAtomicMinMaxWithCustomInserter(MI, BB, X86::CMOVL32rr); case X86::ATOMMAX32: - return EmitAtomicMinMaxWithCustomInserter(MI, BB, X86::CMOVG32rr); case X86::ATOMUMIN32: - return EmitAtomicMinMaxWithCustomInserter(MI, BB, X86::CMOVB32rr); case X86::ATOMUMAX32: - return EmitAtomicMinMaxWithCustomInserter(MI, BB, X86::CMOVA32rr); + case X86::ATOMMIN16: + case X86::ATOMMAX16: + case X86::ATOMUMIN16: + case X86::ATOMUMAX16: + case X86::ATOMMIN64: + case X86::ATOMMAX64: + case X86::ATOMUMIN64: + case X86::ATOMUMAX64: { + unsigned Opc; + switch (MI->getOpcode()) { + default: llvm_unreachable("illegal opcode!"); + case X86::ATOMMIN32: Opc = X86::CMOVL32rr; break; + case X86::ATOMMAX32: Opc = X86::CMOVG32rr; break; + case X86::ATOMUMIN32: Opc = X86::CMOVB32rr; break; + case X86::ATOMUMAX32: Opc = X86::CMOVA32rr; break; + case X86::ATOMMIN16: Opc = X86::CMOVL16rr; break; + case X86::ATOMMAX16: Opc = X86::CMOVG16rr; break; + case X86::ATOMUMIN16: Opc = X86::CMOVB16rr; break; + case X86::ATOMUMAX16: Opc = X86::CMOVA16rr; break; + case X86::ATOMMIN64: Opc = X86::CMOVL64rr; break; + case X86::ATOMMAX64: Opc = X86::CMOVG64rr; break; + case X86::ATOMUMIN64: Opc = X86::CMOVB64rr; break; + case X86::ATOMUMAX64: Opc = X86::CMOVA64rr; break; + // FIXME: There are no CMOV8 instructions; MIN/MAX need some other way. + } + return EmitAtomicMinMaxWithCustomInserter(MI, BB, Opc); + } + + case X86::ATOMAND32: + case X86::ATOMOR32: + case X86::ATOMXOR32: + case X86::ATOMNAND32: { + bool Invert = false; + unsigned RegOpc, ImmOpc; + switch (MI->getOpcode()) { + default: llvm_unreachable("illegal opcode!"); + case X86::ATOMAND32: + RegOpc = X86::AND32rr; ImmOpc = X86::AND32ri; break; + case X86::ATOMOR32: + RegOpc = X86::OR32rr; ImmOpc = X86::OR32ri; break; + case X86::ATOMXOR32: + RegOpc = X86::XOR32rr; ImmOpc = X86::XOR32ri; break; + case X86::ATOMNAND32: + RegOpc = X86::AND32rr; ImmOpc = X86::AND32ri; Invert = true; break; + } + return EmitAtomicBitwiseWithCustomInserter(MI, BB, RegOpc, ImmOpc, + X86::MOV32rm, X86::LCMPXCHG32, + X86::NOT32r, X86::EAX, + &X86::GR32RegClass, Invert); + } case X86::ATOMAND16: - return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::AND16rr, - X86::AND16ri, X86::MOV16rm, - X86::LCMPXCHG16, - X86::NOT16r, X86::AX, - &X86::GR16RegClass); case X86::ATOMOR16: - return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::OR16rr, - X86::OR16ri, X86::MOV16rm, - X86::LCMPXCHG16, - X86::NOT16r, X86::AX, - &X86::GR16RegClass); case X86::ATOMXOR16: - return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::XOR16rr, - X86::XOR16ri, X86::MOV16rm, - X86::LCMPXCHG16, - X86::NOT16r, X86::AX, - &X86::GR16RegClass); - case X86::ATOMNAND16: - return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::AND16rr, - X86::AND16ri, X86::MOV16rm, - X86::LCMPXCHG16, + case X86::ATOMNAND16: { + bool Invert = false; + unsigned RegOpc, ImmOpc; + switch (MI->getOpcode()) { + default: llvm_unreachable("illegal opcode!"); + case X86::ATOMAND16: + RegOpc = X86::AND16rr; ImmOpc = X86::AND16ri; break; + case X86::ATOMOR16: + RegOpc = X86::OR16rr; ImmOpc = X86::OR16ri; break; + case X86::ATOMXOR16: + RegOpc = X86::XOR16rr; ImmOpc = X86::XOR16ri; break; + case X86::ATOMNAND16: + RegOpc = X86::AND16rr; ImmOpc = X86::AND16ri; Invert = true; break; + } + return EmitAtomicBitwiseWithCustomInserter(MI, BB, RegOpc, ImmOpc, + X86::MOV16rm, X86::LCMPXCHG16, X86::NOT16r, X86::AX, - &X86::GR16RegClass, true); - case X86::ATOMMIN16: - return EmitAtomicMinMaxWithCustomInserter(MI, BB, X86::CMOVL16rr); - case X86::ATOMMAX16: - return EmitAtomicMinMaxWithCustomInserter(MI, BB, X86::CMOVG16rr); - case X86::ATOMUMIN16: - return EmitAtomicMinMaxWithCustomInserter(MI, BB, X86::CMOVB16rr); - case X86::ATOMUMAX16: - return EmitAtomicMinMaxWithCustomInserter(MI, BB, X86::CMOVA16rr); + &X86::GR16RegClass, Invert); + } case X86::ATOMAND8: - return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::AND8rr, - X86::AND8ri, X86::MOV8rm, - X86::LCMPXCHG8, - X86::NOT8r, X86::AL, - &X86::GR8RegClass); case X86::ATOMOR8: - return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::OR8rr, - X86::OR8ri, X86::MOV8rm, - X86::LCMPXCHG8, - X86::NOT8r, X86::AL, - &X86::GR8RegClass); case X86::ATOMXOR8: - return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::XOR8rr, - X86::XOR8ri, X86::MOV8rm, - X86::LCMPXCHG8, - X86::NOT8r, X86::AL, - &X86::GR8RegClass); - case X86::ATOMNAND8: - return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::AND8rr, - X86::AND8ri, X86::MOV8rm, - X86::LCMPXCHG8, + case X86::ATOMNAND8: { + bool Invert = false; + unsigned RegOpc, ImmOpc; + switch (MI->getOpcode()) { + default: llvm_unreachable("illegal opcode!"); + case X86::ATOMAND8: + RegOpc = X86::AND8rr; ImmOpc = X86::AND8ri; break; + case X86::ATOMOR8: + RegOpc = X86::OR8rr; ImmOpc = X86::OR8ri; break; + case X86::ATOMXOR8: + RegOpc = X86::XOR8rr; ImmOpc = X86::XOR8ri; break; + case X86::ATOMNAND8: + RegOpc = X86::AND8rr; ImmOpc = X86::AND8ri; Invert = true; break; + } + return EmitAtomicBitwiseWithCustomInserter(MI, BB, RegOpc, ImmOpc, + X86::MOV8rm, X86::LCMPXCHG8, X86::NOT8r, X86::AL, - &X86::GR8RegClass, true); - // FIXME: There are no CMOV8 instructions; MIN/MAX need some other way. + &X86::GR8RegClass, Invert); + } + // This group is for 64-bit host. case X86::ATOMAND64: - return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::AND64rr, - X86::AND64ri32, X86::MOV64rm, - X86::LCMPXCHG64, - X86::NOT64r, X86::RAX, - &X86::GR64RegClass); case X86::ATOMOR64: - return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::OR64rr, - X86::OR64ri32, X86::MOV64rm, - X86::LCMPXCHG64, - X86::NOT64r, X86::RAX, - &X86::GR64RegClass); case X86::ATOMXOR64: - return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::XOR64rr, - X86::XOR64ri32, X86::MOV64rm, - X86::LCMPXCHG64, - X86::NOT64r, X86::RAX, - &X86::GR64RegClass); - case X86::ATOMNAND64: - return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::AND64rr, - X86::AND64ri32, X86::MOV64rm, - X86::LCMPXCHG64, + case X86::ATOMNAND64: { + bool Invert = false; + unsigned RegOpc, ImmOpc; + switch (MI->getOpcode()) { + default: llvm_unreachable("illegal opcode!"); + case X86::ATOMAND64: + RegOpc = X86::AND64rr; ImmOpc = X86::AND64ri32; break; + case X86::ATOMOR64: + RegOpc = X86::OR64rr; ImmOpc = X86::OR64ri32; break; + case X86::ATOMXOR64: + RegOpc = X86::XOR64rr; ImmOpc = X86::XOR64ri32; break; + case X86::ATOMNAND64: + RegOpc = X86::AND64rr; ImmOpc = X86::AND64ri32; Invert = true; break; + } + return EmitAtomicBitwiseWithCustomInserter(MI, BB, RegOpc, ImmOpc, + X86::MOV64rm, X86::LCMPXCHG64, X86::NOT64r, X86::RAX, - &X86::GR64RegClass, true); - case X86::ATOMMIN64: - return EmitAtomicMinMaxWithCustomInserter(MI, BB, X86::CMOVL64rr); - case X86::ATOMMAX64: - return EmitAtomicMinMaxWithCustomInserter(MI, BB, X86::CMOVG64rr); - case X86::ATOMUMIN64: - return EmitAtomicMinMaxWithCustomInserter(MI, BB, X86::CMOVB64rr); - case X86::ATOMUMAX64: - return EmitAtomicMinMaxWithCustomInserter(MI, BB, X86::CMOVA64rr); + &X86::GR64RegClass, Invert); + } // This group does 64-bit operations on a 32-bit host. case X86::ATOMAND6432: - return EmitAtomicBit6432WithCustomInserter(MI, BB, - X86::AND32rr, X86::AND32rr, - X86::AND32ri, X86::AND32ri, - false); case X86::ATOMOR6432: - return EmitAtomicBit6432WithCustomInserter(MI, BB, - X86::OR32rr, X86::OR32rr, - X86::OR32ri, X86::OR32ri, - false); case X86::ATOMXOR6432: - return EmitAtomicBit6432WithCustomInserter(MI, BB, - X86::XOR32rr, X86::XOR32rr, - X86::XOR32ri, X86::XOR32ri, - false); case X86::ATOMNAND6432: - return EmitAtomicBit6432WithCustomInserter(MI, BB, - X86::AND32rr, X86::AND32rr, - X86::AND32ri, X86::AND32ri, - true); case X86::ATOMADD6432: - return EmitAtomicBit6432WithCustomInserter(MI, BB, - X86::ADD32rr, X86::ADC32rr, - X86::ADD32ri, X86::ADC32ri, - false); case X86::ATOMSUB6432: - return EmitAtomicBit6432WithCustomInserter(MI, BB, - X86::SUB32rr, X86::SBB32rr, - X86::SUB32ri, X86::SBB32ri, - false); - case X86::ATOMSWAP6432: - return EmitAtomicBit6432WithCustomInserter(MI, BB, - X86::MOV32rr, X86::MOV32rr, - X86::MOV32ri, X86::MOV32ri, - false); + case X86::ATOMSWAP6432: { + bool Invert = false; + unsigned RegOpcL, RegOpcH, ImmOpcL, ImmOpcH; + switch (MI->getOpcode()) { + default: llvm_unreachable("illegal opcode!"); + case X86::ATOMAND6432: + RegOpcL = RegOpcH = X86::AND32rr; + ImmOpcL = ImmOpcH = X86::AND32ri; + break; + case X86::ATOMOR6432: + RegOpcL = RegOpcH = X86::OR32rr; + ImmOpcL = ImmOpcH = X86::OR32ri; + break; + case X86::ATOMXOR6432: + RegOpcL = RegOpcH = X86::XOR32rr; + ImmOpcL = ImmOpcH = X86::XOR32ri; + break; + case X86::ATOMNAND6432: + RegOpcL = RegOpcH = X86::AND32rr; + ImmOpcL = ImmOpcH = X86::AND32ri; + Invert = true; + break; + case X86::ATOMADD6432: + RegOpcL = X86::ADD32rr; RegOpcH = X86::ADC32rr; + ImmOpcL = X86::ADD32ri; ImmOpcH = X86::ADC32ri; + break; + case X86::ATOMSUB6432: + RegOpcL = X86::SUB32rr; RegOpcH = X86::SBB32rr; + ImmOpcL = X86::SUB32ri; ImmOpcH = X86::SBB32ri; + break; + case X86::ATOMSWAP6432: + RegOpcL = RegOpcH = X86::MOV32rr; + ImmOpcL = ImmOpcH = X86::MOV32ri; + break; + } + return EmitAtomicBit6432WithCustomInserter(MI, BB, RegOpcL, RegOpcH, + ImmOpcL, ImmOpcH, Invert); + } + case X86::VASTART_SAVE_XMM_REGS: return EmitVAStartSaveXMMRegsWithCustomInserter(MI, BB); @@ -13043,7 +13330,7 @@ static SDValue PerformShuffleCombine256(SDNode *N, SelectionDAG &DAG, false/*WriteMem*/); return DAG.getNode(ISD::BITCAST, dl, VT, ResNode); } - } + } // Emit a zeroed vector and insert the desired subvector on its // first half. @@ -13086,12 +13373,12 @@ static SDValue PerformShuffleCombine(SDNode *N, SelectionDAG &DAG, return SDValue(); // Combine 256-bit vector shuffles. This is only profitable when in AVX mode - if (Subtarget->hasAVX() && VT.getSizeInBits() == 256 && + if (Subtarget->hasAVX() && VT.is256BitVector() && N->getOpcode() == ISD::VECTOR_SHUFFLE) return PerformShuffleCombine256(N, DAG, DCI, Subtarget); // Only handle 128 wide vector from here on. - if (VT.getSizeInBits() != 128) + if (!VT.is128BitVector()) return SDValue(); // Combine a vector_shuffle that is equal to build_vector load1, load2, load3, @@ -13109,7 +13396,7 @@ static SDValue PerformShuffleCombine(SDNode *N, SelectionDAG &DAG, /// a sequence of vector shuffle operations. /// It is possible when we truncate 256-bit vector to 128-bit vector -SDValue X86TargetLowering::PerformTruncateCombine(SDNode *N, SelectionDAG &DAG, +SDValue X86TargetLowering::PerformTruncateCombine(SDNode *N, SelectionDAG &DAG, DAGCombinerInfo &DCI) const { if (!DCI.isBeforeLegalizeOps()) return SDValue(); @@ -13151,8 +13438,9 @@ SDValue X86TargetLowering::PerformTruncateCombine(SDNode *N, SelectionDAG &DAG, // PSHUFD static const int ShufMask1[] = {0, 2, 0, 0}; - OpLo = DAG.getVectorShuffle(VT, dl, OpLo, DAG.getUNDEF(VT), ShufMask1); - OpHi = DAG.getVectorShuffle(VT, dl, OpHi, DAG.getUNDEF(VT), ShufMask1); + SDValue Undef = DAG.getUNDEF(VT); + OpLo = DAG.getVectorShuffle(VT, dl, OpLo, Undef, ShufMask1); + OpHi = DAG.getVectorShuffle(VT, dl, OpHi, Undef, ShufMask1); // MOVLHPS static const int ShufMask2[] = {0, 1, 4, 5}; @@ -13210,10 +13498,9 @@ SDValue X86TargetLowering::PerformTruncateCombine(SDNode *N, SelectionDAG &DAG, static const int ShufMask1[] = {0, 1, 4, 5, 8, 9, 12, 13, -1, -1, -1, -1, -1, -1, -1, -1}; - OpLo = DAG.getVectorShuffle(MVT::v16i8, dl, OpLo, DAG.getUNDEF(MVT::v16i8), - ShufMask1); - OpHi = DAG.getVectorShuffle(MVT::v16i8, dl, OpHi, DAG.getUNDEF(MVT::v16i8), - ShufMask1); + SDValue Undef = DAG.getUNDEF(MVT::v16i8); + OpLo = DAG.getVectorShuffle(MVT::v16i8, dl, OpLo, Undef, ShufMask1); + OpHi = DAG.getVectorShuffle(MVT::v16i8, dl, OpHi, Undef, ShufMask1); OpLo = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, OpLo); OpHi = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, OpHi); @@ -13718,6 +14005,88 @@ static SDValue PerformSELECTCombine(SDNode *N, SelectionDAG &DAG, return SDValue(); } +// Check whether a boolean test is testing a boolean value generated by +// X86ISD::SETCC. If so, return the operand of that SETCC and proper condition +// code. +// +// Simplify the following patterns: +// (Op (CMP (SETCC Cond EFLAGS) 1) EQ) or +// (Op (CMP (SETCC Cond EFLAGS) 0) NEQ) +// to (Op EFLAGS Cond) +// +// (Op (CMP (SETCC Cond EFLAGS) 0) EQ) or +// (Op (CMP (SETCC Cond EFLAGS) 1) NEQ) +// to (Op EFLAGS !Cond) +// +// where Op could be BRCOND or CMOV. +// +static SDValue BoolTestSetCCCombine(SDValue Cmp, X86::CondCode &CC) { + // Quit if not CMP and SUB with its value result used. + if (Cmp.getOpcode() != X86ISD::CMP && + (Cmp.getOpcode() != X86ISD::SUB || Cmp.getNode()->hasAnyUseOfValue(0))) + return SDValue(); + + // Quit if not used as a boolean value. + if (CC != X86::COND_E && CC != X86::COND_NE) + return SDValue(); + + // Check CMP operands. One of them should be 0 or 1 and the other should be + // an SetCC or extended from it. + SDValue Op1 = Cmp.getOperand(0); + SDValue Op2 = Cmp.getOperand(1); + + SDValue SetCC; + const ConstantSDNode* C = 0; + bool needOppositeCond = (CC == X86::COND_E); + + if ((C = dyn_cast<ConstantSDNode>(Op1))) + SetCC = Op2; + else if ((C = dyn_cast<ConstantSDNode>(Op2))) + SetCC = Op1; + else // Quit if all operands are not constants. + return SDValue(); + + if (C->getZExtValue() == 1) + needOppositeCond = !needOppositeCond; + else if (C->getZExtValue() != 0) + // Quit if the constant is neither 0 or 1. + return SDValue(); + + // Skip 'zext' node. + if (SetCC.getOpcode() == ISD::ZERO_EXTEND) + SetCC = SetCC.getOperand(0); + + // Quit if not SETCC. + // FIXME: So far we only handle the boolean value generated from SETCC. If + // there is other ways to generate boolean values, we need handle them here + // as well. + if (SetCC.getOpcode() != X86ISD::SETCC) + return SDValue(); + + // Set the condition code or opposite one if necessary. + CC = X86::CondCode(SetCC.getConstantOperandVal(0)); + if (needOppositeCond) + CC = X86::GetOppositeBranchCondition(CC); + + return SetCC.getOperand(1); +} + +static bool IsValidFCMOVCondition(X86::CondCode CC) { + switch (CC) { + default: + return false; + case X86::COND_B: + case X86::COND_BE: + case X86::COND_E: + case X86::COND_P: + case X86::COND_AE: + case X86::COND_A: + case X86::COND_NE: + case X86::COND_NP: + return true; + } +} + /// Optimize X86ISD::CMOV [LHS, RHS, CONDCODE (e.g. X86::COND_NE), CONDVAL] static SDValue PerformCMOVCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI) { @@ -13731,6 +14100,7 @@ static SDValue PerformCMOVCombine(SDNode *N, SelectionDAG &DAG, SDValue TrueOp = N->getOperand(1); X86::CondCode CC = (X86::CondCode)N->getConstantOperandVal(2); SDValue Cond = N->getOperand(3); + if (CC == X86::COND_E || CC == X86::COND_NE) { switch (Cond.getOpcode()) { default: break; @@ -13742,6 +14112,18 @@ static SDValue PerformCMOVCombine(SDNode *N, SelectionDAG &DAG, } } + SDValue Flags; + + Flags = BoolTestSetCCCombine(Cond, CC); + if (Flags.getNode() && + // Extra check as FCMOV only supports a subset of X86 cond. + (FalseOp.getValueType() != MVT::f80 || IsValidFCMOVCondition(CC))) { + SDValue Ops[] = { FalseOp, TrueOp, + DAG.getConstant(CC, MVT::i8), Flags }; + return DAG.getNode(X86ISD::CMOV, DL, N->getVTList(), + Ops, array_lengthof(Ops)); + } + // If this is a select between two integer constants, try to do some // optimizations. Note that the operands are ordered the opposite of SELECT // operands. @@ -14164,7 +14546,7 @@ static bool CanFoldXORWithAllOnes(const SDNode *N) { // Sometimes the operand may come from a insert_subvector building a 256-bit // allones vector - if (VT.getSizeInBits() == 256 && + if (VT.is256BitVector() && N->getOpcode() == ISD::INSERT_SUBVECTOR) { SDValue V1 = N->getOperand(0); SDValue V2 = N->getOperand(1); @@ -14609,7 +14991,7 @@ static SDValue PerformSTORECombine(SDNode *N, SelectionDAG &DAG, // On Sandy Bridge, 256-bit memory operations are executed by two // 128-bit ports. However, on Haswell it is better to issue a single 256-bit // memory operation. - if (VT.getSizeInBits() == 256 && !Subtarget->hasAVX2() && + if (VT.is256BitVector() && !Subtarget->hasAVX2() && StoredVal.getNode()->getOpcode() == ISD::CONCAT_VECTORS && StoredVal.getNumOperands() == 2) { SDValue Value0 = StoredVal.getOperand(0); @@ -14992,6 +15374,29 @@ static SDValue PerformFORCombine(SDNode *N, SelectionDAG &DAG) { return SDValue(); } +/// PerformFMinFMaxCombine - Do target-specific dag combines on X86ISD::FMIN and +/// X86ISD::FMAX nodes. +static SDValue PerformFMinFMaxCombine(SDNode *N, SelectionDAG &DAG) { + assert(N->getOpcode() == X86ISD::FMIN || N->getOpcode() == X86ISD::FMAX); + + // Only perform optimizations if UnsafeMath is used. + if (!DAG.getTarget().Options.UnsafeFPMath) + return SDValue(); + + // If we run in unsafe-math mode, then convert the FMAX and FMIN nodes + // into FMINC and MMAXC, which are Commutative operations. + unsigned NewOp = 0; + switch (N->getOpcode()) { + default: llvm_unreachable("unknown opcode"); + case X86ISD::FMIN: NewOp = X86ISD::FMINC; break; + case X86ISD::FMAX: NewOp = X86ISD::FMAXC; break; + } + + return DAG.getNode(NewOp, N->getDebugLoc(), N->getValueType(0), + N->getOperand(0), N->getOperand(1)); +} + + /// PerformFANDCombine - Do target-specific dag combines on X86ISD::FAND nodes. static SDValue PerformFANDCombine(SDNode *N, SelectionDAG &DAG) { // FAND(0.0, x) -> 0.0 @@ -15067,19 +15472,19 @@ static SDValue PerformSExtCombine(SDNode *N, SelectionDAG &DAG, // concat the vectors to original VT unsigned NumElems = OpVT.getVectorNumElements(); + SDValue Undef = DAG.getUNDEF(OpVT); + SmallVector<int,8> ShufMask1(NumElems, -1); for (unsigned i = 0; i != NumElems/2; ++i) ShufMask1[i] = i; - SDValue OpLo = DAG.getVectorShuffle(OpVT, dl, Op, DAG.getUNDEF(OpVT), - &ShufMask1[0]); + SDValue OpLo = DAG.getVectorShuffle(OpVT, dl, Op, Undef, &ShufMask1[0]); SmallVector<int,8> ShufMask2(NumElems, -1); for (unsigned i = 0; i != NumElems/2; ++i) ShufMask2[i] = i + NumElems/2; - SDValue OpHi = DAG.getVectorShuffle(OpVT, dl, Op, DAG.getUNDEF(OpVT), - &ShufMask2[0]); + SDValue OpHi = DAG.getVectorShuffle(OpVT, dl, Op, Undef, &ShufMask2[0]); EVT HalfVT = EVT::getVectorVT(*DAG.getContext(), VT.getScalarType(), VT.getVectorNumElements()/2); @@ -15092,6 +15497,40 @@ static SDValue PerformSExtCombine(SDNode *N, SelectionDAG &DAG, return SDValue(); } +static SDValue PerformFMACombine(SDNode *N, SelectionDAG &DAG, + const X86Subtarget* Subtarget) { + DebugLoc dl = N->getDebugLoc(); + EVT VT = N->getValueType(0); + + EVT ScalarVT = VT.getScalarType(); + if ((ScalarVT != MVT::f32 && ScalarVT != MVT::f64) || !Subtarget->hasFMA()) + return SDValue(); + + SDValue A = N->getOperand(0); + SDValue B = N->getOperand(1); + SDValue C = N->getOperand(2); + + bool NegA = (A.getOpcode() == ISD::FNEG); + bool NegB = (B.getOpcode() == ISD::FNEG); + bool NegC = (C.getOpcode() == ISD::FNEG); + + // Negative multiplication when NegA xor NegB + bool NegMul = (NegA != NegB); + if (NegA) + A = A.getOperand(0); + if (NegB) + B = B.getOperand(0); + if (NegC) + C = C.getOperand(0); + + unsigned Opcode; + if (!NegMul) + Opcode = (!NegC)? X86ISD::FMADD : X86ISD::FMSUB; + else + Opcode = (!NegC)? X86ISD::FNMADD : X86ISD::FNMSUB; + return DAG.getNode(Opcode, dl, VT, A, B, C); +} + static SDValue PerformZExtCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const X86Subtarget *Subtarget) { @@ -15164,7 +15603,7 @@ static SDValue PerformZExtCombine(SDNode *N, SelectionDAG &DAG, static SDValue PerformISDSETCCCombine(SDNode *N, SelectionDAG &DAG) { ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get(); SDValue LHS = N->getOperand(0); - SDValue RHS = N->getOperand(1); + SDValue RHS = N->getOperand(1); if ((CC == ISD::SETNE || CC == ISD::SETEQ) && LHS.getOpcode() == ISD::SUB) if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(LHS.getOperand(0))) @@ -15187,19 +15626,50 @@ static SDValue PerformISDSETCCCombine(SDNode *N, SelectionDAG &DAG) { // Optimize RES = X86ISD::SETCC CONDCODE, EFLAG_INPUT static SDValue PerformSETCCCombine(SDNode *N, SelectionDAG &DAG) { - unsigned X86CC = N->getConstantOperandVal(0); - SDValue EFLAG = N->getOperand(1); DebugLoc DL = N->getDebugLoc(); + X86::CondCode CC = X86::CondCode(N->getConstantOperandVal(0)); + SDValue EFLAGS = N->getOperand(1); // Materialize "setb reg" as "sbb reg,reg", since it can be extended without // a zext and produces an all-ones bit which is more useful than 0/1 in some // cases. - if (X86CC == X86::COND_B) + if (CC == X86::COND_B) return DAG.getNode(ISD::AND, DL, MVT::i8, DAG.getNode(X86ISD::SETCC_CARRY, DL, MVT::i8, - DAG.getConstant(X86CC, MVT::i8), EFLAG), + DAG.getConstant(CC, MVT::i8), EFLAGS), DAG.getConstant(1, MVT::i8)); + SDValue Flags; + + Flags = BoolTestSetCCCombine(EFLAGS, CC); + if (Flags.getNode()) { + SDValue Cond = DAG.getConstant(CC, MVT::i8); + return DAG.getNode(X86ISD::SETCC, DL, N->getVTList(), Cond, Flags); + } + + return SDValue(); +} + +// Optimize branch condition evaluation. +// +static SDValue PerformBrCondCombine(SDNode *N, SelectionDAG &DAG, + TargetLowering::DAGCombinerInfo &DCI, + const X86Subtarget *Subtarget) { + DebugLoc DL = N->getDebugLoc(); + SDValue Chain = N->getOperand(0); + SDValue Dest = N->getOperand(1); + SDValue EFLAGS = N->getOperand(3); + X86::CondCode CC = X86::CondCode(N->getConstantOperandVal(2)); + + SDValue Flags; + + Flags = BoolTestSetCCCombine(EFLAGS, CC); + if (Flags.getNode()) { + SDValue Cond = DAG.getConstant(CC, MVT::i8); + return DAG.getNode(X86ISD::BRCOND, DL, N->getVTList(), Chain, Dest, Cond, + Flags); + } + return SDValue(); } @@ -15408,6 +15878,8 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N, case ISD::FSUB: return PerformFSUBCombine(N, DAG, Subtarget); case X86ISD::FXOR: case X86ISD::FOR: return PerformFORCombine(N, DAG); + case X86ISD::FMIN: + case X86ISD::FMAX: return PerformFMinFMaxCombine(N, DAG); case X86ISD::FAND: return PerformFANDCombine(N, DAG); case X86ISD::BT: return PerformBTCombine(N, DAG, DCI); case X86ISD::VZEXT_MOVL: return PerformVZEXT_MOVLCombine(N, DAG); @@ -15417,6 +15889,7 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N, case ISD::TRUNCATE: return PerformTruncateCombine(N, DAG, DCI); case ISD::SETCC: return PerformISDSETCCCombine(N, DAG); case X86ISD::SETCC: return PerformSETCCCombine(N, DAG); + case X86ISD::BRCOND: return PerformBrCondCombine(N, DAG, DCI, Subtarget); case X86ISD::SHUFP: // Handle all target specific shuffles case X86ISD::PALIGN: case X86ISD::UNPCKH: @@ -15431,6 +15904,7 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N, case X86ISD::VPERMILP: case X86ISD::VPERM2X128: case ISD::VECTOR_SHUFFLE: return PerformShuffleCombine(N, DAG, DCI,Subtarget); + case ISD::FMA: return PerformFMACombine(N, DAG, Subtarget); } return SDValue(); diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h index 78e4d75..74f5167 100644 --- a/lib/Target/X86/X86ISelLowering.h +++ b/lib/Target/X86/X86ISelLowering.h @@ -137,10 +137,6 @@ namespace llvm { /// relative displacements. WrapperRIP, - /// MOVQ2DQ - Copies a 64-bit value from an MMX vector to the low word - /// of an XMM vector, with the high word zero filled. - MOVQ2DQ, - /// MOVDQ2Q - Copies a 64-bit value from the low word of an XMM vector /// to an MMX vector. If you think this is too close to the previous /// mnemonic, so do I; blame Intel. @@ -199,6 +195,9 @@ namespace llvm { /// FMAX, FMIN, + /// FMAXC, FMINC - Commutative FMIN and FMAX. + FMAXC, FMINC, + /// FRSQRT, FRCP - Floating point reciprocal-sqrt and reciprocal /// approximation. Note that these typically require refinement /// in order to obtain suitable precision. @@ -231,6 +230,9 @@ namespace llvm { // VSEXT_MOVL - Vector move low and sign extend. VSEXT_MOVL, + // VFPEXT - Vector FP extend. + VFPEXT, + // VSHL, VSRL - 128-bit vector logical left / right shift VSHLDQ, VSRLDQ, @@ -294,6 +296,14 @@ namespace llvm { // PMULUDQ - Vector multiply packed unsigned doubleword integers PMULUDQ, + // FMA nodes + FMADD, + FNMADD, + FMSUB, + FNMSUB, + FMADDSUB, + FMSUBADD, + // VASTART_SAVE_XMM_REGS - Save xmm argument registers to the stack, // according to %al. An operator is needed so that this can be expanded // with control flow. @@ -325,6 +335,10 @@ namespace llvm { // RDRAND - Get a random integer and indicate whether it is valid in CF. RDRAND, + // PCMP*STRI + PCMPISTRI, + PCMPESTRI, + // ATOMADD64_DAG, ATOMSUB64_DAG, ATOMOR64_DAG, ATOMAND64_DAG, // ATOMXOR64_DAG, ATOMNAND64_DAG, ATOMSWAP64_DAG - // Atomic 64-bit binary operations. @@ -597,6 +611,12 @@ namespace llvm { virtual bool isZExtFree(Type *Ty1, Type *Ty2) const; virtual bool isZExtFree(EVT VT1, EVT VT2) const; + /// isFMAFasterThanMulAndAdd - Return true if an FMA operation is faster than + /// a pair of mul and add instructions. fmuladd intrinsics will be expanded to + /// FMAs when this method returns true (and FMAs are legal), otherwise fmuladd + /// is expanded to mul + add. + virtual bool isFMAFasterThanMulAndAdd(EVT) const { return true; } + /// isNarrowingProfitable - Return true if it's profitable to narrow /// operations of type VT1 to VT2. e.g. on x86, it's profitable to narrow /// from i32 to i8 but not from i32 to i16. @@ -656,7 +676,8 @@ namespace llvm { /// createFastISel - This method returns a target specific FastISel object, /// or null if the target does not support "fast" ISel. - virtual FastISel *createFastISel(FunctionLoweringInfo &funcInfo) const; + virtual FastISel *createFastISel(FunctionLoweringInfo &funcInfo, + const TargetLibraryInfo *libInfo) const; /// getStackCookieLocation - Return true if the target stores stack /// protector cookies at a fixed offset in some non-standard address @@ -813,6 +834,8 @@ namespace llvm { SDValue LowerVectorBroadcast(SDValue &Op, SelectionDAG &DAG) const; SDValue NormalizeVectorShuffle(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerVectorFpExtend(SDValue &Op, SelectionDAG &DAG) const; + virtual SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, @@ -844,9 +867,6 @@ namespace llvm { const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext &Context) const; - void ReplaceATOMIC_BINARY_64(SDNode *N, SmallVectorImpl<SDValue> &Results, - SelectionDAG &DAG, unsigned NewOp) const; - /// Utility function to emit string processing sse4.2 instructions /// that return in xmm0. /// This takes the instruction to expand, the associated machine basic @@ -933,7 +953,8 @@ namespace llvm { }; namespace X86 { - FastISel *createFastISel(FunctionLoweringInfo &funcInfo); + FastISel *createFastISel(FunctionLoweringInfo &funcInfo, + const TargetLibraryInfo *libInfo); } } diff --git a/lib/Target/X86/X86InstrArithmetic.td b/lib/Target/X86/X86InstrArithmetic.td index b6ba68f..f790611 100644 --- a/lib/Target/X86/X86InstrArithmetic.td +++ b/lib/Target/X86/X86InstrArithmetic.td @@ -1132,8 +1132,10 @@ defm XOR : ArithBinOp_RF<0x30, 0x32, 0x34, "xor", MRM6r, MRM6m, X86xor_flag, xor, 1, 0>; defm ADD : ArithBinOp_RF<0x00, 0x02, 0x04, "add", MRM0r, MRM0m, X86add_flag, add, 1, 1>; +let isCompare = 1 in { defm SUB : ArithBinOp_RF<0x28, 0x2A, 0x2C, "sub", MRM5r, MRM5m, X86sub_flag, sub, 0, 0>; +} // Arithmetic. let Uses = [EFLAGS] in { diff --git a/lib/Target/X86/X86InstrExtension.td b/lib/Target/X86/X86InstrExtension.td index 0d5490a..2eb454d 100644 --- a/lib/Target/X86/X86InstrExtension.td +++ b/lib/Target/X86/X86InstrExtension.td @@ -39,12 +39,15 @@ let neverHasSideEffects = 1 in { // Sign/Zero extenders +let neverHasSideEffects = 1 in { def MOVSX16rr8 : I<0xBE, MRMSrcReg, (outs GR16:$dst), (ins GR8:$src), "movs{bw|x}\t{$src, $dst|$dst, $src}", [], IIC_MOVSX_R16_R8>, TB, OpSize; +let mayLoad = 1 in def MOVSX16rm8 : I<0xBE, MRMSrcMem, (outs GR16:$dst), (ins i8mem:$src), "movs{bw|x}\t{$src, $dst|$dst, $src}", [], IIC_MOVSX_R16_M8>, TB, OpSize; +} // neverHasSideEffects = 1 def MOVSX32rr8 : I<0xBE, MRMSrcReg, (outs GR32:$dst), (ins GR8:$src), "movs{bl|x}\t{$src, $dst|$dst, $src}", [(set GR32:$dst, (sext GR8:$src))], IIC_MOVSX>, TB; @@ -59,12 +62,15 @@ def MOVSX32rm16: I<0xBF, MRMSrcMem, (outs GR32:$dst), (ins i16mem:$src), [(set GR32:$dst, (sextloadi32i16 addr:$src))], IIC_MOVSX>, TB; +let neverHasSideEffects = 1 in { def MOVZX16rr8 : I<0xB6, MRMSrcReg, (outs GR16:$dst), (ins GR8:$src), "movz{bw|x}\t{$src, $dst|$dst, $src}", [], IIC_MOVZX_R16_R8>, TB, OpSize; +let mayLoad = 1 in def MOVZX16rm8 : I<0xB6, MRMSrcMem, (outs GR16:$dst), (ins i8mem:$src), "movz{bw|x}\t{$src, $dst|$dst, $src}", [], IIC_MOVZX_R16_M8>, TB, OpSize; +} // neverHasSideEffects = 1 def MOVZX32rr8 : I<0xB6, MRMSrcReg, (outs GR32:$dst), (ins GR8 :$src), "movz{bl|x}\t{$src, $dst|$dst, $src}", [(set GR32:$dst, (zext GR8:$src))], IIC_MOVZX>, TB; @@ -82,6 +88,7 @@ def MOVZX32rm16: I<0xB7, MRMSrcMem, (outs GR32:$dst), (ins i16mem:$src), // These are the same as the regular MOVZX32rr8 and MOVZX32rm8 // except that they use GR32_NOREX for the output operand register class // instead of GR32. This allows them to operate on h registers on x86-64. +let neverHasSideEffects = 1, isCodeGenOnly = 1 in { def MOVZX32_NOREXrr8 : I<0xB6, MRMSrcReg, (outs GR32_NOREX:$dst), (ins GR8_NOREX:$src), "movz{bl|x}\t{$src, $dst|$dst, $src}", @@ -91,6 +98,7 @@ def MOVZX32_NOREXrm8 : I<0xB6, MRMSrcMem, (outs GR32_NOREX:$dst), (ins i8mem_NOREX:$src), "movz{bl|x}\t{$src, $dst|$dst, $src}", [], IIC_MOVZX>, TB; +} // MOVSX64rr8 always has a REX prefix and it has an 8-bit register // operand, which makes it a rare instruction with an 8-bit register diff --git a/lib/Target/X86/X86InstrFMA.td b/lib/Target/X86/X86InstrFMA.td index 8802a2e..95ee7e5 100644 --- a/lib/Target/X86/X86InstrFMA.td +++ b/lib/Target/X86/X86InstrFMA.td @@ -16,159 +16,307 @@ //===----------------------------------------------------------------------===// let Constraints = "$src1 = $dst" in { -multiclass fma3p_rm<bits<8> opc, string OpcodeStr> { -let neverHasSideEffects = 1 in { - def r : FMA3<opc, MRMSrcReg, (outs VR128:$dst), - (ins VR128:$src1, VR128:$src2, VR128:$src3), - !strconcat(OpcodeStr, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), - []>; - let mayLoad = 1 in - def m : FMA3<opc, MRMSrcMem, (outs VR128:$dst), - (ins VR128:$src1, VR128:$src2, f128mem:$src3), - !strconcat(OpcodeStr, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), - []>; - def rY : FMA3<opc, MRMSrcReg, (outs VR256:$dst), - (ins VR256:$src1, VR256:$src2, VR256:$src3), - !strconcat(OpcodeStr, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), - []>; - let mayLoad = 1 in - def mY : FMA3<opc, MRMSrcMem, (outs VR256:$dst), - (ins VR256:$src1, VR256:$src2, f256mem:$src3), - !strconcat(OpcodeStr, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), - []>; -} // neverHasSideEffects = 1 -} +multiclass fma3p_rm<bits<8> opc, string OpcodeStr, + PatFrag MemFrag128, PatFrag MemFrag256, + ValueType OpVT128, ValueType OpVT256, + SDPatternOperator Op = null_frag, bit MayLoad = 1> { + def r : FMA3<opc, MRMSrcReg, (outs VR128:$dst), + (ins VR128:$src1, VR128:$src2, VR128:$src3), + !strconcat(OpcodeStr, + "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), + [(set VR128:$dst, (OpVT128 (Op VR128:$src2, + VR128:$src1, VR128:$src3)))]>; -// Intrinsic for 132 pattern -multiclass fma3p_rm_int<bits<8> opc, string OpcodeStr, - PatFrag MemFrag128, PatFrag MemFrag256, - Intrinsic Int128, Intrinsic Int256> { - def r_Int : FMA3<opc, MRMSrcReg, (outs VR128:$dst), - (ins VR128:$src1, VR128:$src2, VR128:$src3), - !strconcat(OpcodeStr, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), - [(set VR128:$dst, (Int128 VR128:$src1, VR128:$src3, VR128:$src2))]>; - def m_Int : FMA3<opc, MRMSrcMem, (outs VR128:$dst), - (ins VR128:$src1, VR128:$src2, f128mem:$src3), - !strconcat(OpcodeStr, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), - [(set VR128:$dst, - (Int128 VR128:$src1, (MemFrag128 addr:$src3), VR128:$src2))]>; - def rY_Int : FMA3<opc, MRMSrcReg, (outs VR256:$dst), - (ins VR256:$src1, VR256:$src2, VR256:$src3), - !strconcat(OpcodeStr, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), - [(set VR256:$dst, (Int256 VR256:$src1, VR256:$src3, VR256:$src2))]>; - def mY_Int : FMA3<opc, MRMSrcMem, (outs VR256:$dst), - (ins VR256:$src1, VR256:$src2, f256mem:$src3), - !strconcat(OpcodeStr, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), - [(set VR256:$dst, - (Int256 VR256:$src1, (MemFrag256 addr:$src3), VR256:$src2))]>; + let mayLoad = MayLoad in + def m : FMA3<opc, MRMSrcMem, (outs VR128:$dst), + (ins VR128:$src1, VR128:$src2, f128mem:$src3), + !strconcat(OpcodeStr, + "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), + [(set VR128:$dst, (OpVT128 (Op VR128:$src2, VR128:$src1, + (MemFrag128 addr:$src3))))]>; + + def rY : FMA3<opc, MRMSrcReg, (outs VR256:$dst), + (ins VR256:$src1, VR256:$src2, VR256:$src3), + !strconcat(OpcodeStr, + "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), + [(set VR256:$dst, (OpVT256 (Op VR256:$src2, VR256:$src1, + VR256:$src3)))]>; + + let mayLoad = MayLoad in + def mY : FMA3<opc, MRMSrcMem, (outs VR256:$dst), + (ins VR256:$src1, VR256:$src2, f256mem:$src3), + !strconcat(OpcodeStr, + "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), + [(set VR256:$dst, + (OpVT256 (Op VR256:$src2, VR256:$src1, + (MemFrag256 addr:$src3))))]>; } } // Constraints = "$src1 = $dst" multiclass fma3p_forms<bits<8> opc132, bits<8> opc213, bits<8> opc231, string OpcodeStr, string PackTy, PatFrag MemFrag128, PatFrag MemFrag256, - Intrinsic Int128, Intrinsic Int256> { - defm r132 : fma3p_rm_int <opc132, !strconcat(OpcodeStr, - !strconcat("132", PackTy)), MemFrag128, MemFrag256, - Int128, Int256>; - defm r132 : fma3p_rm <opc132, !strconcat(OpcodeStr, !strconcat("132", PackTy))>; - defm r213 : fma3p_rm <opc213, !strconcat(OpcodeStr, !strconcat("213", PackTy))>; - defm r231 : fma3p_rm <opc231, !strconcat(OpcodeStr, !strconcat("231", PackTy))>; + SDNode Op, ValueType OpTy128, ValueType OpTy256> { + defm r213 : fma3p_rm<opc213, + !strconcat(OpcodeStr, !strconcat("213", PackTy)), + MemFrag128, MemFrag256, OpTy128, OpTy256, Op, 0>; +let neverHasSideEffects = 1 in { + defm r132 : fma3p_rm<opc132, + !strconcat(OpcodeStr, !strconcat("132", PackTy)), + MemFrag128, MemFrag256, OpTy128, OpTy256>; + defm r231 : fma3p_rm<opc231, + !strconcat(OpcodeStr, !strconcat("231", PackTy)), + MemFrag128, MemFrag256, OpTy128, OpTy256>; +} // neverHasSideEffects = 1 } // Fused Multiply-Add let ExeDomain = SSEPackedSingle in { defm VFMADDPS : fma3p_forms<0x98, 0xA8, 0xB8, "vfmadd", "ps", memopv4f32, - memopv8f32, int_x86_fma_vfmadd_ps, int_x86_fma_vfmadd_ps_256>; - defm VFMSUBPS : fma3p_forms<0x9A, 0xAA, 0xBA, "vfmsub", "ps", memopv4f32, - memopv8f32, int_x86_fma_vfmsub_ps, int_x86_fma_vfmsub_ps_256>; + memopv8f32, X86Fmadd, v4f32, v8f32>; + defm VFMSUBPS : fma3p_forms<0x9A, 0xAA, 0xBA, "vfmsub", "ps", memopv4f32, + memopv8f32, X86Fmsub, v4f32, v8f32>; defm VFMADDSUBPS : fma3p_forms<0x96, 0xA6, 0xB6, "vfmaddsub", "ps", - memopv4f32, memopv8f32, int_x86_fma_vfmaddsub_ps, - int_x86_fma_vfmaddsub_ps_256>; + memopv4f32, memopv8f32, X86Fmaddsub, + v4f32, v8f32>; defm VFMSUBADDPS : fma3p_forms<0x97, 0xA7, 0xB7, "vfmsubadd", "ps", - memopv4f32, memopv8f32, int_x86_fma_vfmsubadd_ps, - int_x86_fma_vfmaddsub_ps_256>; + memopv4f32, memopv8f32, X86Fmsubadd, + v4f32, v8f32>; } let ExeDomain = SSEPackedDouble in { defm VFMADDPD : fma3p_forms<0x98, 0xA8, 0xB8, "vfmadd", "pd", memopv2f64, - memopv4f64, int_x86_fma_vfmadd_pd, int_x86_fma_vfmadd_pd_256>, VEX_W; + memopv4f64, X86Fmadd, v2f64, v4f64>, VEX_W; defm VFMSUBPD : fma3p_forms<0x9A, 0xAA, 0xBA, "vfmsub", "pd", memopv2f64, - memopv4f64, int_x86_fma_vfmsub_pd, int_x86_fma_vfmsub_pd_256>, VEX_W; - defm VFMADDSUBPD : fma3p_forms<0x96, 0xA6, 0xB6, "vfmaddsub", "pd", memopv2f64, - memopv4f64, int_x86_fma_vfmaddsub_pd, int_x86_fma_vfmaddsub_pd_256>, VEX_W; - defm VFMSUBADDPD : fma3p_forms<0x97, 0xA7, 0xB7, "vfmsubadd", "pd", memopv2f64, - memopv4f64, int_x86_fma_vfmsubadd_pd, int_x86_fma_vfmsubadd_pd_256>, VEX_W; + memopv4f64, X86Fmsub, v2f64, v4f64>, VEX_W; + defm VFMADDSUBPD : fma3p_forms<0x96, 0xA6, 0xB6, "vfmaddsub", "pd", + memopv2f64, memopv4f64, X86Fmaddsub, + v2f64, v4f64>, VEX_W; + defm VFMSUBADDPD : fma3p_forms<0x97, 0xA7, 0xB7, "vfmsubadd", "pd", + memopv2f64, memopv4f64, X86Fmsubadd, + v2f64, v4f64>, VEX_W; } // Fused Negative Multiply-Add let ExeDomain = SSEPackedSingle in { defm VFNMADDPS : fma3p_forms<0x9C, 0xAC, 0xBC, "vfnmadd", "ps", memopv4f32, - memopv8f32, int_x86_fma_vfnmadd_ps, int_x86_fma_vfnmadd_ps_256>; + memopv8f32, X86Fnmadd, v4f32, v8f32>; defm VFNMSUBPS : fma3p_forms<0x9E, 0xAE, 0xBE, "vfnmsub", "ps", memopv4f32, - memopv8f32, int_x86_fma_vfnmsub_ps, int_x86_fma_vfnmsub_ps_256>; + memopv8f32, X86Fnmsub, v4f32, v8f32>; } let ExeDomain = SSEPackedDouble in { defm VFNMADDPD : fma3p_forms<0x9C, 0xAC, 0xBC, "vfnmadd", "pd", memopv2f64, - memopv4f64, int_x86_fma_vfnmadd_pd, int_x86_fma_vfnmadd_pd_256>, VEX_W; - defm VFNMSUBPD : fma3p_forms<0x9E, 0xAE, 0xBE, "vfnmsub", "pd", memopv2f64, - memopv4f64, int_x86_fma_vfnmsub_pd, int_x86_fma_vfnmsub_pd_256>, VEX_W; + memopv4f64, X86Fnmadd, v2f64, v4f64>, VEX_W; + defm VFNMSUBPD : fma3p_forms<0x9E, 0xAE, 0xBE, "vfnmsub", "pd", + memopv2f64, memopv4f64, X86Fnmsub, v2f64, + v4f64>, VEX_W; } +let Predicates = [HasFMA] in { + def : Pat<(int_x86_fma_vfmadd_ps VR128:$src2, VR128:$src1, VR128:$src3), + (VFMADDPSr213r VR128:$src1, VR128:$src2, VR128:$src3)>; + def : Pat<(int_x86_fma_vfmadd_ps VR128:$src2, VR128:$src1, + (memopv4f32 addr:$src3)), + (VFMADDPSr213m VR128:$src1, VR128:$src2, addr:$src3)>; + def : Pat<(int_x86_fma_vfmsub_ps VR128:$src2, VR128:$src1, VR128:$src3), + (VFMSUBPSr213r VR128:$src1, VR128:$src2, VR128:$src3)>; + def : Pat<(int_x86_fma_vfmsub_ps VR128:$src2, VR128:$src1, + (memopv4f32 addr:$src3)), + (VFMSUBPSr213m VR128:$src1, VR128:$src2, addr:$src3)>; + def : Pat<(int_x86_fma_vfmaddsub_ps VR128:$src2, VR128:$src1, VR128:$src3), + (VFMADDSUBPSr213r VR128:$src1, VR128:$src2, VR128:$src3)>; + def : Pat<(int_x86_fma_vfmaddsub_ps VR128:$src2, VR128:$src1, + (memopv4f32 addr:$src3)), + (VFMADDSUBPSr213m VR128:$src1, VR128:$src2, addr:$src3)>; + def : Pat<(int_x86_fma_vfmsubadd_ps VR128:$src2, VR128:$src1, VR128:$src3), + (VFMSUBADDPSr213r VR128:$src1, VR128:$src2, VR128:$src3)>; + def : Pat<(int_x86_fma_vfmsubadd_ps VR128:$src2, VR128:$src1, + (memopv4f32 addr:$src3)), + (VFMSUBADDPSr213m VR128:$src1, VR128:$src2, addr:$src3)>; + + def : Pat<(int_x86_fma_vfmadd_ps_256 VR256:$src2, VR256:$src1, VR256:$src3), + (VFMADDPSr213rY VR256:$src1, VR256:$src2, VR256:$src3)>; + def : Pat<(int_x86_fma_vfmadd_ps_256 VR256:$src2, VR256:$src1, + (memopv8f32 addr:$src3)), + (VFMADDPSr213mY VR256:$src1, VR256:$src2, addr:$src3)>; + def : Pat<(int_x86_fma_vfmsub_ps_256 VR256:$src2, VR256:$src1, VR256:$src3), + (VFMSUBPSr213rY VR256:$src1, VR256:$src2, VR256:$src3)>; + def : Pat<(int_x86_fma_vfmsub_ps_256 VR256:$src2, VR256:$src1, + (memopv8f32 addr:$src3)), + (VFMSUBPSr213mY VR256:$src1, VR256:$src2, addr:$src3)>; + def : Pat<(int_x86_fma_vfmaddsub_ps_256 VR256:$src2, VR256:$src1, VR256:$src3), + (VFMADDSUBPSr213rY VR256:$src1, VR256:$src2, VR256:$src3)>; + def : Pat<(int_x86_fma_vfmaddsub_ps_256 VR256:$src2, VR256:$src1, + (memopv8f32 addr:$src3)), + (VFMADDSUBPSr213mY VR256:$src1, VR256:$src2, addr:$src3)>; + def : Pat<(int_x86_fma_vfmsubadd_ps_256 VR256:$src2, VR256:$src1, VR256:$src3), + (VFMSUBADDPSr213rY VR256:$src1, VR256:$src2, VR256:$src3)>; + def : Pat<(int_x86_fma_vfmsubadd_ps_256 VR256:$src2, VR256:$src1, + (memopv8f32 addr:$src3)), + (VFMSUBADDPSr213mY VR256:$src1, VR256:$src2, addr:$src3)>; + + def : Pat<(int_x86_fma_vfmadd_pd VR128:$src2, VR128:$src1, VR128:$src3), + (VFMADDPDr213r VR128:$src1, VR128:$src2, VR128:$src3)>; + def : Pat<(int_x86_fma_vfmadd_pd VR128:$src2, VR128:$src1, + (memopv2f64 addr:$src3)), + (VFMADDPDr213m VR128:$src1, VR128:$src2, addr:$src3)>; + def : Pat<(int_x86_fma_vfmsub_pd VR128:$src2, VR128:$src1, VR128:$src3), + (VFMSUBPDr213r VR128:$src1, VR128:$src2, VR128:$src3)>; + def : Pat<(int_x86_fma_vfmsub_pd VR128:$src2, VR128:$src1, + (memopv2f64 addr:$src3)), + (VFMSUBPDr213m VR128:$src1, VR128:$src2, addr:$src3)>; + def : Pat<(int_x86_fma_vfmaddsub_pd VR128:$src2, VR128:$src1, VR128:$src3), + (VFMADDSUBPDr213r VR128:$src1, VR128:$src2, VR128:$src3)>; + def : Pat<(int_x86_fma_vfmaddsub_pd VR128:$src2, VR128:$src1, + (memopv2f64 addr:$src3)), + (VFMADDSUBPDr213m VR128:$src1, VR128:$src2, addr:$src3)>; + def : Pat<(int_x86_fma_vfmsubadd_pd VR128:$src2, VR128:$src1, VR128:$src3), + (VFMSUBADDPDr213r VR128:$src1, VR128:$src2, VR128:$src3)>; + def : Pat<(int_x86_fma_vfmsubadd_pd VR128:$src2, VR128:$src1, + (memopv2f64 addr:$src3)), + (VFMSUBADDPDr213m VR128:$src1, VR128:$src2, addr:$src3)>; + + def : Pat<(int_x86_fma_vfmadd_pd_256 VR256:$src2, VR256:$src1, VR256:$src3), + (VFMADDPDr213rY VR256:$src1, VR256:$src2, VR256:$src3)>; + def : Pat<(int_x86_fma_vfmadd_pd_256 VR256:$src2, VR256:$src1, + (memopv4f64 addr:$src3)), + (VFMADDPDr213mY VR256:$src1, VR256:$src2, addr:$src3)>; + def : Pat<(int_x86_fma_vfmsub_pd_256 VR256:$src2, VR256:$src1, VR256:$src3), + (VFMSUBPDr213rY VR256:$src1, VR256:$src2, VR256:$src3)>; + def : Pat<(int_x86_fma_vfmsub_pd_256 VR256:$src2, VR256:$src1, + (memopv4f64 addr:$src3)), + (VFMSUBPDr213mY VR256:$src1, VR256:$src2, addr:$src3)>; + def : Pat<(int_x86_fma_vfmaddsub_pd_256 VR256:$src2, VR256:$src1, VR256:$src3), + (VFMADDSUBPDr213rY VR256:$src1, VR256:$src2, VR256:$src3)>; + def : Pat<(int_x86_fma_vfmaddsub_pd_256 VR256:$src2, VR256:$src1, + (memopv4f64 addr:$src3)), + (VFMADDSUBPDr213mY VR256:$src1, VR256:$src2, addr:$src3)>; + def : Pat<(int_x86_fma_vfmsubadd_pd_256 VR256:$src2, VR256:$src1, VR256:$src3), + (VFMSUBADDPDr213rY VR256:$src1, VR256:$src2, VR256:$src3)>; + def : Pat<(int_x86_fma_vfmsubadd_pd_256 VR256:$src2, VR256:$src1, + (memopv4f64 addr:$src3)), + (VFMSUBADDPDr213mY VR256:$src1, VR256:$src2, addr:$src3)>; + + def : Pat<(int_x86_fma_vfnmadd_ps VR128:$src2, VR128:$src1, VR128:$src3), + (VFNMADDPSr213r VR128:$src1, VR128:$src2, VR128:$src3)>; + def : Pat<(int_x86_fma_vfnmadd_ps VR128:$src2, VR128:$src1, + (memopv4f32 addr:$src3)), + (VFNMADDPSr213m VR128:$src1, VR128:$src2, addr:$src3)>; + def : Pat<(int_x86_fma_vfnmsub_ps VR128:$src2, VR128:$src1, VR128:$src3), + (VFNMSUBPSr213r VR128:$src1, VR128:$src2, VR128:$src3)>; + def : Pat<(int_x86_fma_vfnmsub_ps VR128:$src2, VR128:$src1, + (memopv4f32 addr:$src3)), + (VFNMSUBPSr213m VR128:$src1, VR128:$src2, addr:$src3)>; + + def : Pat<(int_x86_fma_vfnmadd_ps_256 VR256:$src2, VR256:$src1, VR256:$src3), + (VFNMADDPSr213rY VR256:$src1, VR256:$src2, VR256:$src3)>; + def : Pat<(int_x86_fma_vfnmadd_ps_256 VR256:$src2, VR256:$src1, + (memopv8f32 addr:$src3)), + (VFNMADDPSr213mY VR256:$src1, VR256:$src2, addr:$src3)>; + def : Pat<(int_x86_fma_vfnmsub_ps_256 VR256:$src2, VR256:$src1, VR256:$src3), + (VFNMSUBPSr213rY VR256:$src1, VR256:$src2, VR256:$src3)>; + def : Pat<(int_x86_fma_vfnmsub_ps_256 VR256:$src2, VR256:$src1, + (memopv8f32 addr:$src3)), + (VFNMSUBPSr213mY VR256:$src1, VR256:$src2, addr:$src3)>; + + def : Pat<(int_x86_fma_vfnmadd_pd VR128:$src2, VR128:$src1, VR128:$src3), + (VFNMADDPDr213r VR128:$src1, VR128:$src2, VR128:$src3)>; + def : Pat<(int_x86_fma_vfnmadd_pd VR128:$src2, VR128:$src1, + (memopv2f64 addr:$src3)), + (VFNMADDPDr213m VR128:$src1, VR128:$src2, addr:$src3)>; + def : Pat<(int_x86_fma_vfnmsub_pd VR128:$src2, VR128:$src1, VR128:$src3), + (VFNMSUBPDr213r VR128:$src1, VR128:$src2, VR128:$src3)>; + def : Pat<(int_x86_fma_vfnmsub_pd VR128:$src2, VR128:$src1, + (memopv2f64 addr:$src3)), + (VFNMSUBPDr213m VR128:$src1, VR128:$src2, addr:$src3)>; + + def : Pat<(int_x86_fma_vfnmadd_pd_256 VR256:$src2, VR256:$src1, VR256:$src3), + (VFNMADDPDr213rY VR256:$src1, VR256:$src2, VR256:$src3)>; + def : Pat<(int_x86_fma_vfnmadd_pd_256 VR256:$src2, VR256:$src1, + (memopv4f64 addr:$src3)), + (VFNMADDPDr213mY VR256:$src1, VR256:$src2, addr:$src3)>; + def : Pat<(int_x86_fma_vfnmsub_pd_256 VR256:$src2, VR256:$src1, VR256:$src3), + (VFNMSUBPDr213rY VR256:$src1, VR256:$src2, VR256:$src3)>; + def : Pat<(int_x86_fma_vfnmsub_pd_256 VR256:$src2, VR256:$src1, + (memopv4f64 addr:$src3)), + (VFNMSUBPDr213mY VR256:$src1, VR256:$src2, addr:$src3)>; + +} // Predicates = [HasFMA] let Constraints = "$src1 = $dst" in { multiclass fma3s_rm<bits<8> opc, string OpcodeStr, X86MemOperand x86memop, - RegisterClass RC> { -let neverHasSideEffects = 1 in { - def r : FMA3<opc, MRMSrcReg, (outs RC:$dst), - (ins RC:$src1, RC:$src2, RC:$src3), - !strconcat(OpcodeStr, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), - []>; - let mayLoad = 1 in - def m : FMA3<opc, MRMSrcMem, (outs RC:$dst), - (ins RC:$src1, RC:$src2, x86memop:$src3), - !strconcat(OpcodeStr, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), - []>; -} // neverHasSideEffects = 1 + RegisterClass RC, ValueType OpVT, PatFrag mem_frag, + SDPatternOperator OpNode = null_frag, bit MayLoad = 1> { + def r : FMA3<opc, MRMSrcReg, (outs RC:$dst), + (ins RC:$src1, RC:$src2, RC:$src3), + !strconcat(OpcodeStr, + "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), + [(set RC:$dst, + (OpVT (OpNode RC:$src2, RC:$src1, RC:$src3)))]>; + let mayLoad = MayLoad in + def m : FMA3<opc, MRMSrcMem, (outs RC:$dst), + (ins RC:$src1, RC:$src2, x86memop:$src3), + !strconcat(OpcodeStr, + "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), + [(set RC:$dst, + (OpVT (OpNode RC:$src2, RC:$src1, + (mem_frag addr:$src3))))]>; } multiclass fma3s_rm_int<bits<8> opc, string OpcodeStr, Operand memop, - ComplexPattern mem_cpat, Intrinsic IntId> { + ComplexPattern mem_cpat, Intrinsic IntId, + RegisterClass RC> { def r_Int : FMA3<opc, MRMSrcReg, (outs VR128:$dst), - (ins VR128:$src1, VR128:$src2, VR128:$src3), - !strconcat(OpcodeStr, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), - [(set VR128:$dst, (IntId VR128:$src1, VR128:$src3, VR128:$src2))]>; + (ins VR128:$src1, VR128:$src2, VR128:$src3), + !strconcat(OpcodeStr, + "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), + [(set VR128:$dst, (IntId VR128:$src2, VR128:$src1, + VR128:$src3))]>; def m_Int : FMA3<opc, MRMSrcMem, (outs VR128:$dst), - (ins VR128:$src1, VR128:$src2, memop:$src3), - !strconcat(OpcodeStr, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), - [(set VR128:$dst, - (IntId VR128:$src1, mem_cpat:$src3, VR128:$src2))]>; + (ins VR128:$src1, VR128:$src2, memop:$src3), + !strconcat(OpcodeStr, + "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), + [(set VR128:$dst, + (IntId VR128:$src2, VR128:$src1, mem_cpat:$src3))]>; } } // Constraints = "$src1 = $dst" multiclass fma3s_forms<bits<8> opc132, bits<8> opc213, bits<8> opc231, - string OpStr, Intrinsic IntF32, Intrinsic IntF64> { - defm SSr132 : fma3s_rm<opc132, !strconcat(OpStr, "132ss"), f32mem, FR32>; - defm SSr213 : fma3s_rm<opc213, !strconcat(OpStr, "213ss"), f32mem, FR32>; - defm SSr231 : fma3s_rm<opc231, !strconcat(OpStr, "231ss"), f32mem, FR32>; - defm SDr132 : fma3s_rm<opc132, !strconcat(OpStr, "132sd"), f64mem, FR64>, VEX_W; - defm SDr213 : fma3s_rm<opc213, !strconcat(OpStr, "213sd"), f64mem, FR64>, VEX_W; - defm SDr231 : fma3s_rm<opc231, !strconcat(OpStr, "231sd"), f64mem, FR64>, VEX_W; - defm SSr132 : fma3s_rm_int <opc132, !strconcat(OpStr, "132ss"), ssmem, - sse_load_f32, IntF32>; - defm SDr132 : fma3s_rm_int <opc132, !strconcat(OpStr, "132sd"), sdmem, - sse_load_f64, IntF64>; + string OpStr, string PackTy, Intrinsic Int, + SDNode OpNode, RegisterClass RC, ValueType OpVT, + X86MemOperand x86memop, Operand memop, PatFrag mem_frag, + ComplexPattern mem_cpat> { +let neverHasSideEffects = 1 in { + defm r132 : fma3s_rm<opc132, !strconcat(OpStr, !strconcat("132", PackTy)), + x86memop, RC, OpVT, mem_frag>; + defm r231 : fma3s_rm<opc231, !strconcat(OpStr, !strconcat("231", PackTy)), + x86memop, RC, OpVT, mem_frag>; +} + +defm r213 : fma3s_rm<opc213, !strconcat(OpStr, !strconcat("213", PackTy)), + x86memop, RC, OpVT, mem_frag, OpNode, 0>, + fma3s_rm_int<opc213, !strconcat(OpStr, !strconcat("213", PackTy)), + memop, mem_cpat, Int, RC>; +} + +multiclass fma3s<bits<8> opc132, bits<8> opc213, bits<8> opc231, + string OpStr, Intrinsic IntF32, Intrinsic IntF64, + SDNode OpNode> { + defm SS : fma3s_forms<opc132, opc213, opc231, OpStr, "ss", IntF32, OpNode, + FR32, f32, f32mem, ssmem, loadf32, sse_load_f32>; + defm SD : fma3s_forms<opc132, opc213, opc231, OpStr, "sd", IntF64, OpNode, + FR64, f64, f64mem, sdmem, loadf64, sse_load_f64>, VEX_W; } -defm VFMADD : fma3s_forms<0x99, 0xA9, 0xB9, "vfmadd", int_x86_fma_vfmadd_ss, - int_x86_fma_vfmadd_sd>, VEX_LIG; -defm VFMSUB : fma3s_forms<0x9B, 0xAB, 0xBB, "vfmsub", int_x86_fma_vfmsub_ss, - int_x86_fma_vfmsub_sd>, VEX_LIG; +defm VFMADD : fma3s<0x99, 0xA9, 0xB9, "vfmadd", int_x86_fma_vfmadd_ss, + int_x86_fma_vfmadd_sd, X86Fmadd>, VEX_LIG; +defm VFMSUB : fma3s<0x9B, 0xAB, 0xBB, "vfmsub", int_x86_fma_vfmsub_ss, + int_x86_fma_vfmsub_sd, X86Fmsub>, VEX_LIG; -defm VFNMADD : fma3s_forms<0x9D, 0xAD, 0xBD, "vfnmadd", int_x86_fma_vfnmadd_ss, - int_x86_fma_vfnmadd_sd>, VEX_LIG; -defm VFNMSUB : fma3s_forms<0x9F, 0xAF, 0xBF, "vfnmsub", int_x86_fma_vfnmsub_ss, - int_x86_fma_vfnmsub_sd>, VEX_LIG; +defm VFNMADD : fma3s<0x9D, 0xAD, 0xBD, "vfnmadd", int_x86_fma_vfnmadd_ss, + int_x86_fma_vfnmadd_sd, X86Fnmadd>, VEX_LIG; +defm VFNMSUB : fma3s<0x9F, 0xAF, 0xBF, "vfnmsub", int_x86_fma_vfnmsub_ss, + int_x86_fma_vfnmsub_sd, X86Fnmsub>, VEX_LIG; //===----------------------------------------------------------------------===// diff --git a/lib/Target/X86/X86InstrFormats.td b/lib/Target/X86/X86InstrFormats.td index a115ab4..81b4f81 100644 --- a/lib/Target/X86/X86InstrFormats.td +++ b/lib/Target/X86/X86InstrFormats.td @@ -366,7 +366,7 @@ class VPSI<bits<8> o, Format F, dag outs, dag ins, string asm, // // SDI - SSE2 instructions with XD prefix. // SDIi8 - SSE2 instructions with ImmT == Imm8 and XD prefix. -// SSDI - SSE2 instructions with XS prefix. +// S2SI - SSE2 instructions with XS prefix. // SSDIi8 - SSE2 instructions with ImmT == Imm8 and XS prefix. // PDI - SSE2 instructions with TB and OpSize prefixes. // PDIi8 - SSE2 instructions with ImmT == Imm8 and TB and OpSize prefixes. @@ -379,10 +379,10 @@ class SDI<bits<8> o, Format F, dag outs, dag ins, string asm, class SDIi8<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern, InstrItinClass itin = IIC_DEFAULT> : Ii8<o, F, outs, ins, asm, pattern, itin>, XD, Requires<[HasSSE2]>; -class SSDI<bits<8> o, Format F, dag outs, dag ins, string asm, - list<dag> pattern, InstrItinClass itin = IIC_DEFAULT> +class S2SI<bits<8> o, Format F, dag outs, dag ins, string asm, + list<dag> pattern, InstrItinClass itin = IIC_DEFAULT> : I<o, F, outs, ins, asm, pattern, itin>, XS, Requires<[HasSSE2]>; -class SSDIi8<bits<8> o, Format F, dag outs, dag ins, string asm, +class S2SIi8<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern, InstrItinClass itin = IIC_DEFAULT> : Ii8<o, F, outs, ins, asm, pattern>, XS, Requires<[HasSSE2]>; class PDI<bits<8> o, Format F, dag outs, dag ins, string asm, @@ -397,6 +397,10 @@ class VSDI<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern, InstrItinClass itin = IIC_DEFAULT> : I<o, F, outs, ins, !strconcat("v", asm), pattern, itin>, XD, Requires<[HasAVX]>; +class VS2SI<bits<8> o, Format F, dag outs, dag ins, string asm, + list<dag> pattern, InstrItinClass itin = IIC_DEFAULT> + : I<o, F, outs, ins, !strconcat("v", asm), pattern, itin>, XS, + Requires<[HasAVX]>; class VPDI<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern, InstrItinClass itin = IIC_DEFAULT> : I<o, F, outs, ins, !strconcat("v", asm), pattern, itin, SSEPackedDouble>, TB, diff --git a/lib/Target/X86/X86InstrFragmentsSIMD.td b/lib/Target/X86/X86InstrFragmentsSIMD.td index ec030dd..ee2d3c4 100644 --- a/lib/Target/X86/X86InstrFragmentsSIMD.td +++ b/lib/Target/X86/X86InstrFragmentsSIMD.td @@ -29,6 +29,13 @@ def SDTX86VFCMP : SDTypeProfile<1, 3, [SDTCisInt<0>, SDTCisSameAs<1, 2>, def X86fmin : SDNode<"X86ISD::FMIN", SDTFPBinOp>; def X86fmax : SDNode<"X86ISD::FMAX", SDTFPBinOp>; + +// Commutative and Associative FMIN and FMAX. +def X86fminc : SDNode<"X86ISD::FMINC", SDTFPBinOp, + [SDNPCommutative, SDNPAssociative]>; +def X86fmaxc : SDNode<"X86ISD::FMAXC", SDTFPBinOp, + [SDNPCommutative, SDNPAssociative]>; + def X86fand : SDNode<"X86ISD::FAND", SDTFPBinOp, [SDNPCommutative, SDNPAssociative]>; def X86for : SDNode<"X86ISD::FOR", SDTFPBinOp, @@ -73,14 +80,20 @@ def X86vzmovl : SDNode<"X86ISD::VZEXT_MOVL", SDTypeProfile<1, 1, [SDTCisSameAs<0,1>]>>; def X86vzmovly : SDNode<"X86ISD::VZEXT_MOVL", - SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVec<1>, + SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVec<1>, SDTCisOpSmallerThanOp<1, 0> ]>>; def X86vsmovl : SDNode<"X86ISD::VSEXT_MOVL", - SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisInt<1>, SDTCisInt<0>]>>; + SDTypeProfile<1, 1, + [SDTCisVec<0>, SDTCisInt<1>, SDTCisInt<0>]>>; def X86vzload : SDNode<"X86ISD::VZEXT_LOAD", SDTLoad, [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>; + +def X86vfpext : SDNode<"X86ISD::VFPEXT", + SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVec<1>, + SDTCisFP<0>, SDTCisFP<1>]>>; + def X86vshldq : SDNode<"X86ISD::VSHLDQ", SDTIntShiftOp>; def X86vshrdq : SDNode<"X86ISD::VSRLDQ", SDTIntShiftOp>; def X86cmpp : SDNode<"X86ISD::CMPP", SDTX86VFCMP>; @@ -125,7 +138,10 @@ def SDTShuff3OpI : SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0,1>, def SDTVBroadcast : SDTypeProfile<1, 1, [SDTCisVec<0>]>; def SDTBlend : SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0,1>, -SDTCisSameAs<1,2>, SDTCisVT<3, i32>]>; + SDTCisSameAs<1,2>, SDTCisVT<3, i32>]>; + +def SDTFma : SDTypeProfile<1, 3, [SDTCisSameAs<0,1>, + SDTCisSameAs<1,2>, SDTCisSameAs<1,3>]>; def X86PAlign : SDNode<"X86ISD::PALIGN", SDTShuff3OpI>; @@ -160,9 +176,26 @@ def X86VPerm2x128 : SDNode<"X86ISD::VPERM2X128", SDTShuff3OpI>; def X86VBroadcast : SDNode<"X86ISD::VBROADCAST", SDTVBroadcast>; -def X86Blendpw : SDNode<"X86ISD::BLENDPW", SDTBlend>; -def X86Blendps : SDNode<"X86ISD::BLENDPS", SDTBlend>; -def X86Blendpd : SDNode<"X86ISD::BLENDPD", SDTBlend>; +def X86Blendpw : SDNode<"X86ISD::BLENDPW", SDTBlend>; +def X86Blendps : SDNode<"X86ISD::BLENDPS", SDTBlend>; +def X86Blendpd : SDNode<"X86ISD::BLENDPD", SDTBlend>; +def X86Fmadd : SDNode<"X86ISD::FMADD", SDTFma>; +def X86Fnmadd : SDNode<"X86ISD::FNMADD", SDTFma>; +def X86Fmsub : SDNode<"X86ISD::FMSUB", SDTFma>; +def X86Fnmsub : SDNode<"X86ISD::FNMSUB", SDTFma>; +def X86Fmaddsub : SDNode<"X86ISD::FMSUBADD", SDTFma>; +def X86Fmsubadd : SDNode<"X86ISD::FMADDSUB", SDTFma>; + +def SDT_PCMPISTRI : SDTypeProfile<2, 3, [SDTCisVT<0, i32>, SDTCisVT<1, i32>, + SDTCisVT<2, v16i8>, SDTCisVT<3, v16i8>, + SDTCisVT<4, i8>]>; +def SDT_PCMPESTRI : SDTypeProfile<2, 5, [SDTCisVT<0, i32>, SDTCisVT<1, i32>, + SDTCisVT<2, v16i8>, SDTCisVT<3, i32>, + SDTCisVT<4, v16i8>, SDTCisVT<5, i32>, + SDTCisVT<6, i8>]>; + +def X86pcmpistri : SDNode<"X86ISD::PCMPISTRI", SDT_PCMPISTRI>; +def X86pcmpestri : SDNode<"X86ISD::PCMPESTRI", SDT_PCMPESTRI>; //===----------------------------------------------------------------------===// // SSE Complex Patterns diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp index 69493bc..459f01a 100644 --- a/lib/Target/X86/X86InstrInfo.cpp +++ b/lib/Target/X86/X86InstrInfo.cpp @@ -414,12 +414,6 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm) { X86::CVTSD2SIrr, X86::CVTSD2SIrm, 0 }, { X86::CVTSS2SI64rr, X86::CVTSS2SI64rm, 0 }, { X86::CVTSS2SIrr, X86::CVTSS2SIrm, 0 }, - { X86::Int_CVTSD2SSrr, X86::Int_CVTSD2SSrm, 0 }, - { X86::Int_CVTSI2SD64rr,X86::Int_CVTSI2SD64rm, 0 }, - { X86::Int_CVTSI2SDrr, X86::Int_CVTSI2SDrm, 0 }, - { X86::Int_CVTSI2SS64rr,X86::Int_CVTSI2SS64rm, 0 }, - { X86::Int_CVTSI2SSrr, X86::Int_CVTSI2SSrm, 0 }, - { X86::Int_CVTSS2SDrr, X86::Int_CVTSS2SDrm, 0 }, { X86::CVTTPD2DQrr, X86::CVTTPD2DQrm, TB_ALIGN_16 }, { X86::CVTTPS2DQrr, X86::CVTTPS2DQrm, TB_ALIGN_16 }, { X86::Int_CVTTSD2SI64rr,X86::Int_CVTTSD2SI64rm, 0 }, @@ -680,6 +674,12 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm) { X86::IMUL64rr, X86::IMUL64rm, 0 }, { X86::Int_CMPSDrr, X86::Int_CMPSDrm, 0 }, { X86::Int_CMPSSrr, X86::Int_CMPSSrm, 0 }, + { X86::Int_CVTSD2SSrr, X86::Int_CVTSD2SSrm, 0 }, + { X86::Int_CVTSI2SD64rr,X86::Int_CVTSI2SD64rm, 0 }, + { X86::Int_CVTSI2SDrr, X86::Int_CVTSI2SDrm, 0 }, + { X86::Int_CVTSI2SS64rr,X86::Int_CVTSI2SS64rm, 0 }, + { X86::Int_CVTSI2SSrr, X86::Int_CVTSI2SSrm, 0 }, + { X86::Int_CVTSS2SDrr, X86::Int_CVTSS2SDrm, 0 }, { X86::MAXPDrr, X86::MAXPDrm, TB_ALIGN_16 }, { X86::MAXPDrr_Int, X86::MAXPDrm_Int, TB_ALIGN_16 }, { X86::MAXPSrr, X86::MAXPSrm, TB_ALIGN_16 }, @@ -1130,8 +1130,8 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm) { X86::VFMADDSDr132r, X86::VFMADDSDr132m, 0 }, { X86::VFMADDSSr213r, X86::VFMADDSSr213m, 0 }, { X86::VFMADDSDr213r, X86::VFMADDSDr213m, 0 }, - { X86::VFMADDSSr132r_Int, X86::VFMADDSSr132m_Int, 0 }, - { X86::VFMADDSDr132r_Int, X86::VFMADDSDr132m_Int, 0 }, + { X86::VFMADDSSr213r_Int, X86::VFMADDSSr213m_Int, 0 }, + { X86::VFMADDSDr213r_Int, X86::VFMADDSDr213m_Int, 0 }, { X86::VFMADDPSr231r, X86::VFMADDPSr231m, TB_ALIGN_16 }, { X86::VFMADDPDr231r, X86::VFMADDPDr231m, TB_ALIGN_16 }, @@ -1145,10 +1145,6 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm) { X86::VFMADDPDr132rY, X86::VFMADDPDr132mY, TB_ALIGN_32 }, { X86::VFMADDPSr213rY, X86::VFMADDPSr213mY, TB_ALIGN_32 }, { X86::VFMADDPDr213rY, X86::VFMADDPDr213mY, TB_ALIGN_32 }, - { X86::VFMADDPSr132r_Int, X86::VFMADDPSr132m_Int, TB_ALIGN_16 }, - { X86::VFMADDPDr132r_Int, X86::VFMADDPDr132m_Int, TB_ALIGN_16 }, - { X86::VFMADDPSr132rY_Int, X86::VFMADDPSr132mY_Int, TB_ALIGN_32 }, - { X86::VFMADDPDr132rY_Int, X86::VFMADDPDr132mY_Int, TB_ALIGN_32 }, { X86::VFNMADDSSr231r, X86::VFNMADDSSr231m, 0 }, { X86::VFNMADDSDr231r, X86::VFNMADDSDr231m, 0 }, @@ -1156,8 +1152,8 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm) { X86::VFNMADDSDr132r, X86::VFNMADDSDr132m, 0 }, { X86::VFNMADDSSr213r, X86::VFNMADDSSr213m, 0 }, { X86::VFNMADDSDr213r, X86::VFNMADDSDr213m, 0 }, - { X86::VFNMADDSSr132r_Int, X86::VFNMADDSSr132m_Int, 0 }, - { X86::VFNMADDSDr132r_Int, X86::VFNMADDSDr132m_Int, 0 }, + { X86::VFNMADDSSr213r_Int, X86::VFNMADDSSr213m_Int, 0 }, + { X86::VFNMADDSDr213r_Int, X86::VFNMADDSDr213m_Int, 0 }, { X86::VFNMADDPSr231r, X86::VFNMADDPSr231m, TB_ALIGN_16 }, { X86::VFNMADDPDr231r, X86::VFNMADDPDr231m, TB_ALIGN_16 }, @@ -1171,10 +1167,6 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm) { X86::VFNMADDPDr132rY, X86::VFNMADDPDr132mY, TB_ALIGN_32 }, { X86::VFNMADDPSr213rY, X86::VFNMADDPSr213mY, TB_ALIGN_32 }, { X86::VFNMADDPDr213rY, X86::VFNMADDPDr213mY, TB_ALIGN_32 }, - { X86::VFNMADDPSr132r_Int, X86::VFNMADDPSr132m_Int, TB_ALIGN_16 }, - { X86::VFNMADDPDr132r_Int, X86::VFNMADDPDr132m_Int, TB_ALIGN_16 }, - { X86::VFNMADDPSr132rY_Int, X86::VFNMADDPSr132mY_Int, TB_ALIGN_32 }, - { X86::VFNMADDPDr132rY_Int, X86::VFNMADDPDr132mY_Int, TB_ALIGN_32 }, { X86::VFMSUBSSr231r, X86::VFMSUBSSr231m, 0 }, { X86::VFMSUBSDr231r, X86::VFMSUBSDr231m, 0 }, @@ -1182,8 +1174,8 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm) { X86::VFMSUBSDr132r, X86::VFMSUBSDr132m, 0 }, { X86::VFMSUBSSr213r, X86::VFMSUBSSr213m, 0 }, { X86::VFMSUBSDr213r, X86::VFMSUBSDr213m, 0 }, - { X86::VFMSUBSSr132r_Int, X86::VFMSUBSSr132m_Int, 0 }, - { X86::VFMSUBSDr132r_Int, X86::VFMSUBSDr132m_Int, 0 }, + { X86::VFMSUBSSr213r_Int, X86::VFMSUBSSr213m_Int, 0 }, + { X86::VFMSUBSDr213r_Int, X86::VFMSUBSDr213m_Int, 0 }, { X86::VFMSUBPSr231r, X86::VFMSUBPSr231m, TB_ALIGN_16 }, { X86::VFMSUBPDr231r, X86::VFMSUBPDr231m, TB_ALIGN_16 }, @@ -1197,10 +1189,6 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm) { X86::VFMSUBPDr132rY, X86::VFMSUBPDr132mY, TB_ALIGN_32 }, { X86::VFMSUBPSr213rY, X86::VFMSUBPSr213mY, TB_ALIGN_32 }, { X86::VFMSUBPDr213rY, X86::VFMSUBPDr213mY, TB_ALIGN_32 }, - { X86::VFMSUBPSr132r_Int, X86::VFMSUBPSr132m_Int, TB_ALIGN_16 }, - { X86::VFMSUBPDr132r_Int, X86::VFMSUBPDr132m_Int, TB_ALIGN_16 }, - { X86::VFMSUBPSr132rY_Int, X86::VFMSUBPSr132mY_Int, TB_ALIGN_32 }, - { X86::VFMSUBPDr132rY_Int, X86::VFMSUBPDr132mY_Int, TB_ALIGN_32 }, { X86::VFNMSUBSSr231r, X86::VFNMSUBSSr231m, 0 }, { X86::VFNMSUBSDr231r, X86::VFNMSUBSDr231m, 0 }, @@ -1208,8 +1196,8 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm) { X86::VFNMSUBSDr132r, X86::VFNMSUBSDr132m, 0 }, { X86::VFNMSUBSSr213r, X86::VFNMSUBSSr213m, 0 }, { X86::VFNMSUBSDr213r, X86::VFNMSUBSDr213m, 0 }, - { X86::VFNMSUBSSr132r_Int, X86::VFNMSUBSSr132m_Int, 0 }, - { X86::VFNMSUBSDr132r_Int, X86::VFNMSUBSDr132m_Int, 0 }, + { X86::VFNMSUBSSr213r_Int, X86::VFNMSUBSSr213m_Int, 0 }, + { X86::VFNMSUBSDr213r_Int, X86::VFNMSUBSDr213m_Int, 0 }, { X86::VFNMSUBPSr231r, X86::VFNMSUBPSr231m, TB_ALIGN_16 }, { X86::VFNMSUBPDr231r, X86::VFNMSUBPDr231m, TB_ALIGN_16 }, @@ -1223,10 +1211,6 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm) { X86::VFNMSUBPDr132rY, X86::VFNMSUBPDr132mY, TB_ALIGN_32 }, { X86::VFNMSUBPSr213rY, X86::VFNMSUBPSr213mY, TB_ALIGN_32 }, { X86::VFNMSUBPDr213rY, X86::VFNMSUBPDr213mY, TB_ALIGN_32 }, - { X86::VFNMSUBPSr132r_Int, X86::VFNMSUBPSr132m_Int, TB_ALIGN_16 }, - { X86::VFNMSUBPDr132r_Int, X86::VFNMSUBPDr132m_Int, TB_ALIGN_16 }, - { X86::VFNMSUBPSr132rY_Int, X86::VFNMSUBPSr132mY_Int, TB_ALIGN_32 }, - { X86::VFNMSUBPDr132rY_Int, X86::VFNMSUBPDr132mY_Int, TB_ALIGN_32 }, { X86::VFMADDSUBPSr231r, X86::VFMADDSUBPSr231m, TB_ALIGN_16 }, { X86::VFMADDSUBPDr231r, X86::VFMADDSUBPDr231m, TB_ALIGN_16 }, @@ -1240,10 +1224,6 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm) { X86::VFMADDSUBPDr132rY, X86::VFMADDSUBPDr132mY, TB_ALIGN_32 }, { X86::VFMADDSUBPSr213rY, X86::VFMADDSUBPSr213mY, TB_ALIGN_32 }, { X86::VFMADDSUBPDr213rY, X86::VFMADDSUBPDr213mY, TB_ALIGN_32 }, - { X86::VFMADDSUBPSr132r_Int, X86::VFMADDSUBPSr132m_Int, TB_ALIGN_16 }, - { X86::VFMADDSUBPDr132r_Int, X86::VFMADDSUBPDr132m_Int, TB_ALIGN_16 }, - { X86::VFMADDSUBPSr132rY_Int, X86::VFMADDSUBPSr132mY_Int, TB_ALIGN_32 }, - { X86::VFMADDSUBPDr132rY_Int, X86::VFMADDSUBPDr132mY_Int, TB_ALIGN_32 }, { X86::VFMSUBADDPSr231r, X86::VFMSUBADDPSr231m, TB_ALIGN_16 }, { X86::VFMSUBADDPDr231r, X86::VFMSUBADDPDr231m, TB_ALIGN_16 }, @@ -1257,10 +1237,6 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm) { X86::VFMSUBADDPDr132rY, X86::VFMSUBADDPDr132mY, TB_ALIGN_32 }, { X86::VFMSUBADDPSr213rY, X86::VFMSUBADDPSr213mY, TB_ALIGN_32 }, { X86::VFMSUBADDPDr213rY, X86::VFMSUBADDPDr213mY, TB_ALIGN_32 }, - { X86::VFMSUBADDPSr132r_Int, X86::VFMSUBADDPSr132m_Int, TB_ALIGN_16 }, - { X86::VFMSUBADDPDr132r_Int, X86::VFMSUBADDPDr132m_Int, TB_ALIGN_16 }, - { X86::VFMSUBADDPSr132rY_Int, X86::VFMSUBADDPSr132mY_Int, TB_ALIGN_32 }, - { X86::VFMSUBADDPDr132rY_Int, X86::VFMSUBADDPDr132mY_Int, TB_ALIGN_32 }, }; for (unsigned i = 0, e = array_lengthof(OpTbl3); i != e; ++i) { @@ -1318,8 +1294,7 @@ X86InstrInfo::isCoalescableExtInstr(const MachineInstr &MI, SrcReg = MI.getOperand(1).getReg(); DstReg = MI.getOperand(0).getReg(); switch (MI.getOpcode()) { - default: - llvm_unreachable(0); + default: llvm_unreachable("Unreachable!"); case X86::MOVSX16rr8: case X86::MOVZX16rr8: case X86::MOVSX32rr8: @@ -1463,6 +1438,9 @@ unsigned X86InstrInfo::isStoreToStackSlotPostFE(const MachineInstr *MI, /// regIsPICBase - Return true if register is PIC base (i.e.g defined by /// X86::MOVPC32r. static bool regIsPICBase(unsigned BaseReg, const MachineRegisterInfo &MRI) { + // Don't waste compile time scanning use-def chains of physregs. + if (!TargetRegisterInfo::isVirtualRegister(BaseReg)) + return false; bool isPICBase = false; for (MachineRegisterInfo::def_iterator I = MRI.def_begin(BaseReg), E = MRI.def_end(); I != E; ++I) { @@ -1480,78 +1458,69 @@ X86InstrInfo::isReallyTriviallyReMaterializable(const MachineInstr *MI, AliasAnalysis *AA) const { switch (MI->getOpcode()) { default: break; - case X86::MOV8rm: - case X86::MOV16rm: - case X86::MOV32rm: - case X86::MOV64rm: - case X86::LD_Fp64m: - case X86::MOVSSrm: - case X86::MOVSDrm: - case X86::MOVAPSrm: - case X86::MOVUPSrm: - case X86::MOVAPDrm: - case X86::MOVDQArm: - case X86::VMOVSSrm: - case X86::VMOVSDrm: - case X86::VMOVAPSrm: - case X86::VMOVUPSrm: - case X86::VMOVAPDrm: - case X86::VMOVDQArm: - case X86::VMOVAPSYrm: - case X86::VMOVUPSYrm: - case X86::VMOVAPDYrm: - case X86::VMOVDQAYrm: - case X86::MMX_MOVD64rm: - case X86::MMX_MOVQ64rm: - case X86::FsVMOVAPSrm: - case X86::FsVMOVAPDrm: - case X86::FsMOVAPSrm: - case X86::FsMOVAPDrm: { - // Loads from constant pools are trivially rematerializable. - if (MI->getOperand(1).isReg() && - MI->getOperand(2).isImm() && - MI->getOperand(3).isReg() && MI->getOperand(3).getReg() == 0 && - MI->isInvariantLoad(AA)) { - unsigned BaseReg = MI->getOperand(1).getReg(); - if (BaseReg == 0 || BaseReg == X86::RIP) - return true; - // Allow re-materialization of PIC load. - if (!ReMatPICStubLoad && MI->getOperand(4).isGlobal()) - return false; - const MachineFunction &MF = *MI->getParent()->getParent(); - const MachineRegisterInfo &MRI = MF.getRegInfo(); - bool isPICBase = false; - for (MachineRegisterInfo::def_iterator I = MRI.def_begin(BaseReg), - E = MRI.def_end(); I != E; ++I) { - MachineInstr *DefMI = I.getOperand().getParent(); - if (DefMI->getOpcode() != X86::MOVPC32r) - return false; - assert(!isPICBase && "More than one PIC base?"); - isPICBase = true; - } - return isPICBase; - } - return false; + case X86::MOV8rm: + case X86::MOV16rm: + case X86::MOV32rm: + case X86::MOV64rm: + case X86::LD_Fp64m: + case X86::MOVSSrm: + case X86::MOVSDrm: + case X86::MOVAPSrm: + case X86::MOVUPSrm: + case X86::MOVAPDrm: + case X86::MOVDQArm: + case X86::VMOVSSrm: + case X86::VMOVSDrm: + case X86::VMOVAPSrm: + case X86::VMOVUPSrm: + case X86::VMOVAPDrm: + case X86::VMOVDQArm: + case X86::VMOVAPSYrm: + case X86::VMOVUPSYrm: + case X86::VMOVAPDYrm: + case X86::VMOVDQAYrm: + case X86::MMX_MOVD64rm: + case X86::MMX_MOVQ64rm: + case X86::FsVMOVAPSrm: + case X86::FsVMOVAPDrm: + case X86::FsMOVAPSrm: + case X86::FsMOVAPDrm: { + // Loads from constant pools are trivially rematerializable. + if (MI->getOperand(1).isReg() && + MI->getOperand(2).isImm() && + MI->getOperand(3).isReg() && MI->getOperand(3).getReg() == 0 && + MI->isInvariantLoad(AA)) { + unsigned BaseReg = MI->getOperand(1).getReg(); + if (BaseReg == 0 || BaseReg == X86::RIP) + return true; + // Allow re-materialization of PIC load. + if (!ReMatPICStubLoad && MI->getOperand(4).isGlobal()) + return false; + const MachineFunction &MF = *MI->getParent()->getParent(); + const MachineRegisterInfo &MRI = MF.getRegInfo(); + return regIsPICBase(BaseReg, MRI); } + return false; + } - case X86::LEA32r: - case X86::LEA64r: { - if (MI->getOperand(2).isImm() && - MI->getOperand(3).isReg() && MI->getOperand(3).getReg() == 0 && - !MI->getOperand(4).isReg()) { - // lea fi#, lea GV, etc. are all rematerializable. - if (!MI->getOperand(1).isReg()) - return true; - unsigned BaseReg = MI->getOperand(1).getReg(); - if (BaseReg == 0) - return true; - // Allow re-materialization of lea PICBase + x. - const MachineFunction &MF = *MI->getParent()->getParent(); - const MachineRegisterInfo &MRI = MF.getRegInfo(); - return regIsPICBase(BaseReg, MRI); - } - return false; - } + case X86::LEA32r: + case X86::LEA64r: { + if (MI->getOperand(2).isImm() && + MI->getOperand(3).isReg() && MI->getOperand(3).getReg() == 0 && + !MI->getOperand(4).isReg()) { + // lea fi#, lea GV, etc. are all rematerializable. + if (!MI->getOperand(1).isReg()) + return true; + unsigned BaseReg = MI->getOperand(1).getReg(); + if (BaseReg == 0) + return true; + // Allow re-materialization of lea PICBase + x. + const MachineFunction &MF = *MI->getParent()->getParent(); + const MachineRegisterInfo &MRI = MF.getRegInfo(); + return regIsPICBase(BaseReg, MRI); + } + return false; + } } // All other instructions marked M_REMATERIALIZABLE are always trivially @@ -1660,7 +1629,7 @@ void X86InstrInfo::reMaterialize(MachineBasicBlock &MBB, case X86::MOV64r0: { if (!isSafeToClobberEFLAGS(MBB, I)) { switch (Opc) { - default: break; + default: llvm_unreachable("Unreachable!"); case X86::MOV8r0: Opc = X86::MOV8ri; break; case X86::MOV16r0: Opc = X86::MOV16ri; break; case X86::MOV32r0: Opc = X86::MOV32ri; break; @@ -1733,8 +1702,7 @@ X86InstrInfo::convertToThreeAddressWithLEA(unsigned MIOpc, MachineInstrBuilder MIB = BuildMI(*MFI, MBBI, MI->getDebugLoc(), get(Opc), leaOutReg); switch (MIOpc) { - default: - llvm_unreachable(0); + default: llvm_unreachable("Unreachable!"); case X86::SHL16ri: { unsigned ShAmt = MI->getOperand(2).getImm(); MIB.addReg(0).addImm(1 << ShAmt) @@ -2126,57 +2094,25 @@ X86InstrInfo::commuteInstruction(MachineInstr *MI, bool NewMI) const { MI->getOperand(3).setImm(Size-Amt); return TargetInstrInfoImpl::commuteInstruction(MI, NewMI); } - case X86::CMOVB16rr: - case X86::CMOVB32rr: - case X86::CMOVB64rr: - case X86::CMOVAE16rr: - case X86::CMOVAE32rr: - case X86::CMOVAE64rr: - case X86::CMOVE16rr: - case X86::CMOVE32rr: - case X86::CMOVE64rr: - case X86::CMOVNE16rr: - case X86::CMOVNE32rr: - case X86::CMOVNE64rr: - case X86::CMOVBE16rr: - case X86::CMOVBE32rr: - case X86::CMOVBE64rr: - case X86::CMOVA16rr: - case X86::CMOVA32rr: - case X86::CMOVA64rr: - case X86::CMOVL16rr: - case X86::CMOVL32rr: - case X86::CMOVL64rr: - case X86::CMOVGE16rr: - case X86::CMOVGE32rr: - case X86::CMOVGE64rr: - case X86::CMOVLE16rr: - case X86::CMOVLE32rr: - case X86::CMOVLE64rr: - case X86::CMOVG16rr: - case X86::CMOVG32rr: - case X86::CMOVG64rr: - case X86::CMOVS16rr: - case X86::CMOVS32rr: - case X86::CMOVS64rr: - case X86::CMOVNS16rr: - case X86::CMOVNS32rr: - case X86::CMOVNS64rr: - case X86::CMOVP16rr: - case X86::CMOVP32rr: - case X86::CMOVP64rr: - case X86::CMOVNP16rr: - case X86::CMOVNP32rr: - case X86::CMOVNP64rr: - case X86::CMOVO16rr: - case X86::CMOVO32rr: - case X86::CMOVO64rr: - case X86::CMOVNO16rr: - case X86::CMOVNO32rr: - case X86::CMOVNO64rr: { - unsigned Opc = 0; + case X86::CMOVB16rr: case X86::CMOVB32rr: case X86::CMOVB64rr: + case X86::CMOVAE16rr: case X86::CMOVAE32rr: case X86::CMOVAE64rr: + case X86::CMOVE16rr: case X86::CMOVE32rr: case X86::CMOVE64rr: + case X86::CMOVNE16rr: case X86::CMOVNE32rr: case X86::CMOVNE64rr: + case X86::CMOVBE16rr: case X86::CMOVBE32rr: case X86::CMOVBE64rr: + case X86::CMOVA16rr: case X86::CMOVA32rr: case X86::CMOVA64rr: + case X86::CMOVL16rr: case X86::CMOVL32rr: case X86::CMOVL64rr: + case X86::CMOVGE16rr: case X86::CMOVGE32rr: case X86::CMOVGE64rr: + case X86::CMOVLE16rr: case X86::CMOVLE32rr: case X86::CMOVLE64rr: + case X86::CMOVG16rr: case X86::CMOVG32rr: case X86::CMOVG64rr: + case X86::CMOVS16rr: case X86::CMOVS32rr: case X86::CMOVS64rr: + case X86::CMOVNS16rr: case X86::CMOVNS32rr: case X86::CMOVNS64rr: + case X86::CMOVP16rr: case X86::CMOVP32rr: case X86::CMOVP64rr: + case X86::CMOVNP16rr: case X86::CMOVNP32rr: case X86::CMOVNP64rr: + case X86::CMOVO16rr: case X86::CMOVO32rr: case X86::CMOVO64rr: + case X86::CMOVNO16rr: case X86::CMOVNO32rr: case X86::CMOVNO64rr: { + unsigned Opc; switch (MI->getOpcode()) { - default: break; + default: llvm_unreachable("Unreachable!"); case X86::CMOVB16rr: Opc = X86::CMOVAE16rr; break; case X86::CMOVB32rr: Opc = X86::CMOVAE32rr; break; case X86::CMOVB64rr: Opc = X86::CMOVAE64rr; break; @@ -2408,7 +2344,7 @@ static X86::CondCode getSwappedCondition(X86::CondCode CC) { /// whether it has memory operand. static unsigned getSETFromCond(X86::CondCode CC, bool HasMemoryOperand) { - static const unsigned Opc[16][2] = { + static const uint16_t Opc[16][2] = { { X86::SETAr, X86::SETAm }, { X86::SETAEr, X86::SETAEm }, { X86::SETBr, X86::SETBm }, @@ -2435,7 +2371,7 @@ static unsigned getSETFromCond(X86::CondCode CC, /// register size in bytes, and operand type. static unsigned getCMovFromCond(X86::CondCode CC, unsigned RegBytes, bool HasMemoryOperand) { - static const unsigned Opc[32][3] = { + static const uint16_t Opc[32][3] = { { X86::CMOVA16rr, X86::CMOVA32rr, X86::CMOVA64rr }, { X86::CMOVAE16rr, X86::CMOVAE32rr, X86::CMOVAE64rr }, { X86::CMOVB16rr, X86::CMOVB32rr, X86::CMOVB64rr }, @@ -2768,19 +2704,18 @@ static unsigned CopyToFromAsymmetricReg(unsigned DestReg, unsigned SrcReg, // SrcReg(GR64) -> DestReg(VR64) if (X86::GR64RegClass.contains(DestReg)) { - if (X86::VR128RegClass.contains(SrcReg)) { + if (X86::VR128RegClass.contains(SrcReg)) // Copy from a VR128 register to a GR64 register. return HasAVX ? X86::VMOVPQIto64rr : X86::MOVPQIto64rr; - } else if (X86::VR64RegClass.contains(SrcReg)) { + if (X86::VR64RegClass.contains(SrcReg)) // Copy from a VR64 register to a GR64 register. return X86::MOVSDto64rr; - } } else if (X86::GR64RegClass.contains(SrcReg)) { // Copy from a GR64 register to a VR128 register. if (X86::VR128RegClass.contains(DestReg)) return HasAVX ? X86::VMOV64toPQIrr : X86::MOV64toPQIrr; // Copy from a GR64 register to a VR64 register. - else if (X86::VR64RegClass.contains(DestReg)) + if (X86::VR64RegClass.contains(DestReg)) return X86::MOV64toSDrr; } @@ -2788,12 +2723,12 @@ static unsigned CopyToFromAsymmetricReg(unsigned DestReg, unsigned SrcReg, // SrcReg(GR32) -> DestReg(FR32) if (X86::GR32RegClass.contains(DestReg) && X86::FR32RegClass.contains(SrcReg)) - // Copy from a FR32 register to a GR32 register. - return HasAVX ? X86::VMOVSS2DIrr : X86::MOVSS2DIrr; + // Copy from a FR32 register to a GR32 register. + return HasAVX ? X86::VMOVSS2DIrr : X86::MOVSS2DIrr; if (X86::FR32RegClass.contains(DestReg) && X86::GR32RegClass.contains(SrcReg)) - // Copy from a GR32 register to a FR32 register. - return HasAVX ? X86::VMOVDI2SSrr : X86::MOVDI2SSrr; + // Copy from a GR32 register to a FR32 register. + return HasAVX ? X86::VMOVDI2SSrr : X86::MOVDI2SSrr; return 0; } @@ -2804,7 +2739,7 @@ void X86InstrInfo::copyPhysReg(MachineBasicBlock &MBB, bool KillSrc) const { // First deal with the normal symmetric copies. bool HasAVX = TM.getSubtarget<X86Subtarget>().hasAVX(); - unsigned Opc = 0; + unsigned Opc; if (X86::GR64RegClass.contains(DestReg, SrcReg)) Opc = X86::MOV64rr; else if (X86::GR32RegClass.contains(DestReg, SrcReg)) @@ -2843,7 +2778,8 @@ void X86InstrInfo::copyPhysReg(MachineBasicBlock &MBB, BuildMI(MBB, MI, DL, get(X86::PUSHF64)); BuildMI(MBB, MI, DL, get(X86::POP64r), DestReg); return; - } else if (X86::GR32RegClass.contains(DestReg)) { + } + if (X86::GR32RegClass.contains(DestReg)) { BuildMI(MBB, MI, DL, get(X86::PUSHF32)); BuildMI(MBB, MI, DL, get(X86::POP32r), DestReg); return; @@ -2855,7 +2791,8 @@ void X86InstrInfo::copyPhysReg(MachineBasicBlock &MBB, .addReg(SrcReg, getKillRegState(KillSrc)); BuildMI(MBB, MI, DL, get(X86::POPF64)); return; - } else if (X86::GR32RegClass.contains(SrcReg)) { + } + if (X86::GR32RegClass.contains(SrcReg)) { BuildMI(MBB, MI, DL, get(X86::PUSH32r)) .addReg(SrcReg, getKillRegState(KillSrc)); BuildMI(MBB, MI, DL, get(X86::POPF32)); @@ -3037,6 +2974,37 @@ analyzeCompare(const MachineInstr *MI, unsigned &SrcReg, unsigned &SrcReg2, CmpMask = ~0; CmpValue = MI->getOperand(1).getImm(); return true; + // A SUB can be used to perform comparison. + case X86::SUB64rm: + case X86::SUB32rm: + case X86::SUB16rm: + case X86::SUB8rm: + SrcReg = MI->getOperand(1).getReg(); + SrcReg2 = 0; + CmpMask = ~0; + CmpValue = 0; + return true; + case X86::SUB64rr: + case X86::SUB32rr: + case X86::SUB16rr: + case X86::SUB8rr: + SrcReg = MI->getOperand(1).getReg(); + SrcReg2 = MI->getOperand(2).getReg(); + CmpMask = ~0; + CmpValue = 0; + return true; + case X86::SUB64ri32: + case X86::SUB64ri8: + case X86::SUB32ri: + case X86::SUB32ri8: + case X86::SUB16ri: + case X86::SUB16ri8: + case X86::SUB8ri: + SrcReg = MI->getOperand(1).getReg(); + SrcReg2 = 0; + CmpMask = ~0; + CmpValue = MI->getOperand(2).getImm(); + return true; case X86::CMP64rr: case X86::CMP32rr: case X86::CMP16rr: @@ -3145,6 +3113,55 @@ bool X86InstrInfo:: optimizeCompareInstr(MachineInstr *CmpInstr, unsigned SrcReg, unsigned SrcReg2, int CmpMask, int CmpValue, const MachineRegisterInfo *MRI) const { + // Check whether we can replace SUB with CMP. + unsigned NewOpcode = 0; + switch (CmpInstr->getOpcode()) { + default: break; + case X86::SUB64ri32: + case X86::SUB64ri8: + case X86::SUB32ri: + case X86::SUB32ri8: + case X86::SUB16ri: + case X86::SUB16ri8: + case X86::SUB8ri: + case X86::SUB64rm: + case X86::SUB32rm: + case X86::SUB16rm: + case X86::SUB8rm: + case X86::SUB64rr: + case X86::SUB32rr: + case X86::SUB16rr: + case X86::SUB8rr: { + if (!MRI->use_nodbg_empty(CmpInstr->getOperand(0).getReg())) + return false; + // There is no use of the destination register, we can replace SUB with CMP. + switch (CmpInstr->getOpcode()) { + default: llvm_unreachable("Unreachable!"); + case X86::SUB64rm: NewOpcode = X86::CMP64rm; break; + case X86::SUB32rm: NewOpcode = X86::CMP32rm; break; + case X86::SUB16rm: NewOpcode = X86::CMP16rm; break; + case X86::SUB8rm: NewOpcode = X86::CMP8rm; break; + case X86::SUB64rr: NewOpcode = X86::CMP64rr; break; + case X86::SUB32rr: NewOpcode = X86::CMP32rr; break; + case X86::SUB16rr: NewOpcode = X86::CMP16rr; break; + case X86::SUB8rr: NewOpcode = X86::CMP8rr; break; + case X86::SUB64ri32: NewOpcode = X86::CMP64ri32; break; + case X86::SUB64ri8: NewOpcode = X86::CMP64ri8; break; + case X86::SUB32ri: NewOpcode = X86::CMP32ri; break; + case X86::SUB32ri8: NewOpcode = X86::CMP32ri8; break; + case X86::SUB16ri: NewOpcode = X86::CMP16ri; break; + case X86::SUB16ri8: NewOpcode = X86::CMP16ri8; break; + case X86::SUB8ri: NewOpcode = X86::CMP8ri; break; + } + CmpInstr->setDesc(get(NewOpcode)); + CmpInstr->RemoveOperand(0); + // Fall through to optimize Cmp if Cmp is CMPrr or CMPri. + if (NewOpcode == X86::CMP64rm || NewOpcode == X86::CMP32rm || + NewOpcode == X86::CMP16rm || NewOpcode == X86::CMP8rm) + return false; + } + } + // Get the unique definition of SrcReg. MachineInstr *MI = MRI->getUniqueVRegDef(SrcReg); if (!MI) return false; @@ -3221,12 +3238,15 @@ optimizeCompareInstr(MachineInstr *CmpInstr, unsigned SrcReg, unsigned SrcReg2, MachineBasicBlock::iterator E = CmpInstr->getParent()->end(); for (++I; I != E; ++I) { const MachineInstr &Instr = *I; - if (Instr.modifiesRegister(X86::EFLAGS, TRI)) { + bool ModifyEFLAGS = Instr.modifiesRegister(X86::EFLAGS, TRI); + bool UseEFLAGS = Instr.readsRegister(X86::EFLAGS, TRI); + // We should check the usage if this instruction uses and updates EFLAGS. + if (!UseEFLAGS && ModifyEFLAGS) { // It is safe to remove CmpInstr if EFLAGS is updated again. IsSafe = true; break; } - if (!Instr.readsRegister(X86::EFLAGS, TRI)) + if (!UseEFLAGS && !ModifyEFLAGS) continue; // EFLAGS is used by this instruction. @@ -3281,7 +3301,8 @@ optimizeCompareInstr(MachineInstr *CmpInstr, unsigned SrcReg, unsigned SrcReg2, // instructions will be modified. OpsToUpdate.push_back(std::make_pair(&*I, NewOpc)); } - if (Instr.killsRegister(X86::EFLAGS, TRI)) { + if (ModifyEFLAGS || Instr.killsRegister(X86::EFLAGS, TRI)) { + // It is safe to remove CmpInstr if EFLAGS is updated again or killed. IsSafe = true; break; } @@ -3319,6 +3340,81 @@ optimizeCompareInstr(MachineInstr *CmpInstr, unsigned SrcReg, unsigned SrcReg2, return true; } +/// optimizeLoadInstr - Try to remove the load by folding it to a register +/// operand at the use. We fold the load instructions if load defines a virtual +/// register, the virtual register is used once in the same BB, and the +/// instructions in-between do not load or store, and have no side effects. +MachineInstr* X86InstrInfo:: +optimizeLoadInstr(MachineInstr *MI, const MachineRegisterInfo *MRI, + unsigned &FoldAsLoadDefReg, + MachineInstr *&DefMI) const { + if (FoldAsLoadDefReg == 0) + return 0; + // To be conservative, if there exists another load, clear the load candidate. + if (MI->mayLoad()) { + FoldAsLoadDefReg = 0; + return 0; + } + + // Check whether we can move DefMI here. + DefMI = MRI->getVRegDef(FoldAsLoadDefReg); + assert(DefMI); + bool SawStore = false; + if (!DefMI->isSafeToMove(this, 0, SawStore)) + return 0; + + // We try to commute MI if possible. + unsigned IdxEnd = (MI->isCommutable()) ? 2 : 1; + for (unsigned Idx = 0; Idx < IdxEnd; Idx++) { + // Collect information about virtual register operands of MI. + unsigned SrcOperandId = 0; + bool FoundSrcOperand = false; + for (unsigned i = 0, e = MI->getDesc().getNumOperands(); i != e; ++i) { + MachineOperand &MO = MI->getOperand(i); + if (!MO.isReg()) + continue; + unsigned Reg = MO.getReg(); + if (Reg != FoldAsLoadDefReg) + continue; + // Do not fold if we have a subreg use or a def or multiple uses. + if (MO.getSubReg() || MO.isDef() || FoundSrcOperand) + return 0; + + SrcOperandId = i; + FoundSrcOperand = true; + } + if (!FoundSrcOperand) return 0; + + // Check whether we can fold the def into SrcOperandId. + SmallVector<unsigned, 8> Ops; + Ops.push_back(SrcOperandId); + MachineInstr *FoldMI = foldMemoryOperand(MI, Ops, DefMI); + if (FoldMI) { + FoldAsLoadDefReg = 0; + return FoldMI; + } + + if (Idx == 1) { + // MI was changed but it didn't help, commute it back! + commuteInstruction(MI, false); + return 0; + } + + // Check whether we can commute MI and enable folding. + if (MI->isCommutable()) { + MachineInstr *NewMI = commuteInstruction(MI, false); + // Unable to commute. + if (!NewMI) return 0; + if (NewMI != MI) { + // New instruction. It doesn't need to be kept. + NewMI->eraseFromParent(); + return 0; + } + } + } + return 0; +} + /// Expand2AddrUndef - Expand a single-def pseudo instruction to a two-addr /// instruction with two undef reads of the register being defined. This is /// used for mapping: @@ -3477,6 +3573,8 @@ X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF, OpcodeTablePtr = &RegOp2MemOpTable1; } else if (i == 2) { OpcodeTablePtr = &RegOp2MemOpTable2; + } else if (i == 3) { + OpcodeTablePtr = &RegOp2MemOpTable3; } // If table selected... @@ -3947,7 +4045,6 @@ bool X86InstrInfo::unfoldMemoryOperand(MachineFunction &MF, MachineInstr *MI, getUndefRegState(MO.isUndef())); } // Change CMP32ri r, 0 back to TEST32rr r, r, etc. - unsigned NewOpc = 0; switch (DataMI->getOpcode()) { default: break; case X86::CMP64ri32: @@ -3960,8 +4057,9 @@ bool X86InstrInfo::unfoldMemoryOperand(MachineFunction &MF, MachineInstr *MI, MachineOperand &MO0 = DataMI->getOperand(0); MachineOperand &MO1 = DataMI->getOperand(1); if (MO1.getImm() == 0) { + unsigned NewOpc; switch (DataMI->getOpcode()) { - default: break; + default: llvm_unreachable("Unreachable!"); case X86::CMP64ri8: case X86::CMP64ri32: NewOpc = X86::TEST64rr; break; case X86::CMP32ri8: diff --git a/lib/Target/X86/X86InstrInfo.h b/lib/Target/X86/X86InstrInfo.h index ec9b2e6..b6f69af 100644 --- a/lib/Target/X86/X86InstrInfo.h +++ b/lib/Target/X86/X86InstrInfo.h @@ -387,6 +387,18 @@ public: unsigned SrcReg2, int CmpMask, int CmpValue, const MachineRegisterInfo *MRI) const; + /// optimizeLoadInstr - Try to remove the load by folding it to a register + /// operand at the use. We fold the load instructions if and only if the + /// def and use are in the same BB. We only look at one load and see + /// whether it can be folded into MI. FoldAsLoadDefReg is the virtual register + /// defined by the load we are trying to fold. DefMI returns the machine + /// instruction that defines FoldAsLoadDefReg, and the function returns + /// the machine instruction generated due to folding. + virtual MachineInstr* optimizeLoadInstr(MachineInstr *MI, + const MachineRegisterInfo *MRI, + unsigned &FoldAsLoadDefReg, + MachineInstr *&DefMI) const; + private: MachineInstr * convertToThreeAddressWithLEA(unsigned MIOpc, MachineFunction::iterator &MFI, diff --git a/lib/Target/X86/X86InstrMMX.td b/lib/Target/X86/X86InstrMMX.td index e4edd36..c8f40bb 100644 --- a/lib/Target/X86/X86InstrMMX.td +++ b/lib/Target/X86/X86InstrMMX.td @@ -251,7 +251,7 @@ def MMX_MOVDQ2Qrr : SDIi8<0xD6, MRMSrcReg, (outs VR64:$dst), (iPTR 0))))))], IIC_MMX_MOVQ_RR>; -def MMX_MOVQ2DQrr : SSDIi8<0xD6, MRMSrcReg, (outs VR128:$dst), +def MMX_MOVQ2DQrr : S2SIi8<0xD6, MRMSrcReg, (outs VR128:$dst), (ins VR64:$src), "movq2dq\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (v2i64 (scalar_to_vector @@ -259,7 +259,7 @@ def MMX_MOVQ2DQrr : SSDIi8<0xD6, MRMSrcReg, (outs VR128:$dst), IIC_MMX_MOVQ_RR>; let neverHasSideEffects = 1 in -def MMX_MOVQ2FR64rr: SSDIi8<0xD6, MRMSrcReg, (outs FR64:$dst), +def MMX_MOVQ2FR64rr: S2SIi8<0xD6, MRMSrcReg, (outs FR64:$dst), (ins VR64:$src), "movq2dq\t{$src, $dst|$dst, $src}", [], IIC_MMX_MOVQ_RR>; @@ -554,20 +554,6 @@ def MMX_PMOVMSKBrr : MMXI<0xD7, MRMSrcReg, (outs GR32:$dst), (ins VR64:$src), (int_x86_mmx_pmovmskb VR64:$src))]>; -// MMX to XMM for vector types -def MMX_X86movq2dq : SDNode<"X86ISD::MOVQ2DQ", SDTypeProfile<1, 1, - [SDTCisVT<0, v2i64>, SDTCisVT<1, x86mmx>]>>; - -def : Pat<(v2i64 (MMX_X86movq2dq VR64:$src)), - (v2i64 (MMX_MOVQ2DQrr VR64:$src))>; - -def : Pat<(v2i64 (MMX_X86movq2dq (load_mmx addr:$src))), - (v2i64 (MOVQI2PQIrm addr:$src))>; - -def : Pat<(v2i64 (MMX_X86movq2dq - (x86mmx (scalar_to_vector (loadi32 addr:$src))))), - (v2i64 (MOVDI2PDIrm addr:$src))>; - // Low word of XMM to MMX. def MMX_X86movdq2q : SDNode<"X86ISD::MOVDQ2Q", SDTypeProfile<1, 1, [SDTCisVT<0, x86mmx>, SDTCisVT<1, v2i64>]>>; diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index c2d169a..220c06d 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -245,9 +245,9 @@ multiclass sse12_fp_packed_int<bits<8> opc, string OpcodeStr, RegisterClass RC, // A vector extract of the first f32/f64 position is a subregister copy def : Pat<(f32 (vector_extract (v4f32 VR128:$src), (iPTR 0))), - (f32 (EXTRACT_SUBREG (v4f32 VR128:$src), sub_ss))>; + (COPY_TO_REGCLASS (v4f32 VR128:$src), FR32)>; def : Pat<(f64 (vector_extract (v2f64 VR128:$src), (iPTR 0))), - (f64 (EXTRACT_SUBREG (v2f64 VR128:$src), sub_sd))>; + (COPY_TO_REGCLASS (v2f64 VR128:$src), FR64)>; // A 128-bit subvector extract from the first 256-bit vector position // is a subregister copy that needs no instruction. @@ -283,14 +283,14 @@ def : Pat<(insert_subvector undef, (v16i8 VR128:$src), (i32 0)), // Implicitly promote a 32-bit scalar to a vector. def : Pat<(v4f32 (scalar_to_vector FR32:$src)), - (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), FR32:$src, sub_ss)>; + (COPY_TO_REGCLASS FR32:$src, VR128)>; def : Pat<(v8f32 (scalar_to_vector FR32:$src)), - (INSERT_SUBREG (v8f32 (IMPLICIT_DEF)), FR32:$src, sub_ss)>; + (COPY_TO_REGCLASS FR32:$src, VR128)>; // Implicitly promote a 64-bit scalar to a vector. def : Pat<(v2f64 (scalar_to_vector FR64:$src)), - (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), FR64:$src, sub_sd)>; + (COPY_TO_REGCLASS FR64:$src, VR128)>; def : Pat<(v4f64 (scalar_to_vector FR64:$src)), - (INSERT_SUBREG (v4f64 (IMPLICIT_DEF)), FR64:$src, sub_sd)>; + (COPY_TO_REGCLASS FR64:$src, VR128)>; // Bitcasts between 128-bit vector types. Return the original type since // no instruction is needed for the conversion @@ -562,59 +562,57 @@ let Predicates = [HasAVX] in { def : Pat<(v4f32 (X86vzmovl (v4f32 (scalar_to_vector FR32:$src)))), (VMOVSSrr (v4f32 (V_SET0)), FR32:$src)>; def : Pat<(v4f32 (X86vzmovl (v4f32 VR128:$src))), - (VMOVSSrr (v4f32 (V_SET0)), - (f32 (EXTRACT_SUBREG (v4f32 VR128:$src), sub_ss)))>; + (VMOVSSrr (v4f32 (V_SET0)), (COPY_TO_REGCLASS VR128:$src, FR32))>; def : Pat<(v4i32 (X86vzmovl (v4i32 VR128:$src))), - (VMOVSSrr (v4i32 (V_SET0)), - (EXTRACT_SUBREG (v4i32 VR128:$src), sub_ss))>; + (VMOVSSrr (v4i32 (V_SET0)), (COPY_TO_REGCLASS VR128:$src, FR32))>; def : Pat<(v2f64 (X86vzmovl (v2f64 (scalar_to_vector FR64:$src)))), (VMOVSDrr (v2f64 (V_SET0)), FR64:$src)>; // Move low f32 and clear high bits. def : Pat<(v8f32 (X86vzmovl (v8f32 VR256:$src))), (SUBREG_TO_REG (i32 0), - (VMOVSSrr (v4f32 (V_SET0)), - (EXTRACT_SUBREG (v8f32 VR256:$src), sub_ss)), sub_xmm)>; + (VMOVSSrr (v4f32 (V_SET0)), + (EXTRACT_SUBREG (v8f32 VR256:$src), sub_xmm)), sub_xmm)>; def : Pat<(v8i32 (X86vzmovl (v8i32 VR256:$src))), (SUBREG_TO_REG (i32 0), - (VMOVSSrr (v4i32 (V_SET0)), - (EXTRACT_SUBREG (v8i32 VR256:$src), sub_ss)), sub_xmm)>; + (VMOVSSrr (v4i32 (V_SET0)), + (EXTRACT_SUBREG (v8i32 VR256:$src), sub_xmm)), sub_xmm)>; } let AddedComplexity = 20 in { // MOVSSrm zeros the high parts of the register; represent this // with SUBREG_TO_REG. The AVX versions also write: DST[255:128] <- 0 def : Pat<(v4f32 (X86vzmovl (v4f32 (scalar_to_vector (loadf32 addr:$src))))), - (SUBREG_TO_REG (i32 0), (VMOVSSrm addr:$src), sub_ss)>; + (COPY_TO_REGCLASS (VMOVSSrm addr:$src), VR128)>; def : Pat<(v4f32 (scalar_to_vector (loadf32 addr:$src))), - (SUBREG_TO_REG (i32 0), (VMOVSSrm addr:$src), sub_ss)>; + (COPY_TO_REGCLASS (VMOVSSrm addr:$src), VR128)>; def : Pat<(v4f32 (X86vzmovl (loadv4f32 addr:$src))), - (SUBREG_TO_REG (i32 0), (VMOVSSrm addr:$src), sub_ss)>; + (COPY_TO_REGCLASS (VMOVSSrm addr:$src), VR128)>; // MOVSDrm zeros the high parts of the register; represent this // with SUBREG_TO_REG. The AVX versions also write: DST[255:128] <- 0 def : Pat<(v2f64 (X86vzmovl (v2f64 (scalar_to_vector (loadf64 addr:$src))))), - (SUBREG_TO_REG (i64 0), (VMOVSDrm addr:$src), sub_sd)>; + (COPY_TO_REGCLASS (VMOVSDrm addr:$src), VR128)>; def : Pat<(v2f64 (scalar_to_vector (loadf64 addr:$src))), - (SUBREG_TO_REG (i64 0), (VMOVSDrm addr:$src), sub_sd)>; + (COPY_TO_REGCLASS (VMOVSDrm addr:$src), VR128)>; def : Pat<(v2f64 (X86vzmovl (loadv2f64 addr:$src))), - (SUBREG_TO_REG (i64 0), (VMOVSDrm addr:$src), sub_sd)>; + (COPY_TO_REGCLASS (VMOVSDrm addr:$src), VR128)>; def : Pat<(v2f64 (X86vzmovl (bc_v2f64 (loadv4f32 addr:$src)))), - (SUBREG_TO_REG (i64 0), (VMOVSDrm addr:$src), sub_sd)>; + (COPY_TO_REGCLASS (VMOVSDrm addr:$src), VR128)>; def : Pat<(v2f64 (X86vzload addr:$src)), - (SUBREG_TO_REG (i64 0), (VMOVSDrm addr:$src), sub_sd)>; + (COPY_TO_REGCLASS (VMOVSDrm addr:$src), VR128)>; // Represent the same patterns above but in the form they appear for // 256-bit types def : Pat<(v8i32 (X86vzmovl (insert_subvector undef, (v4i32 (scalar_to_vector (loadi32 addr:$src))), (i32 0)))), - (SUBREG_TO_REG (i32 0), (VMOVSSrm addr:$src), sub_ss)>; + (SUBREG_TO_REG (i32 0), (VMOVSSrm addr:$src), sub_xmm)>; def : Pat<(v8f32 (X86vzmovl (insert_subvector undef, (v4f32 (scalar_to_vector (loadf32 addr:$src))), (i32 0)))), - (SUBREG_TO_REG (i32 0), (VMOVSSrm addr:$src), sub_ss)>; + (SUBREG_TO_REG (i32 0), (VMOVSSrm addr:$src), sub_xmm)>; def : Pat<(v4f64 (X86vzmovl (insert_subvector undef, (v2f64 (scalar_to_vector (loadf64 addr:$src))), (i32 0)))), - (SUBREG_TO_REG (i32 0), (VMOVSDrm addr:$src), sub_sd)>; + (SUBREG_TO_REG (i32 0), (VMOVSDrm addr:$src), sub_xmm)>; } def : Pat<(v8f32 (X86vzmovl (insert_subvector undef, (v4f32 (scalar_to_vector FR32:$src)), (i32 0)))), @@ -628,70 +626,68 @@ let Predicates = [HasAVX] in { sub_xmm)>; def : Pat<(v4i64 (X86vzmovl (insert_subvector undef, (v2i64 (scalar_to_vector (loadi64 addr:$src))), (i32 0)))), - (SUBREG_TO_REG (i64 0), (VMOVSDrm addr:$src), sub_sd)>; + (SUBREG_TO_REG (i64 0), (VMOVSDrm addr:$src), sub_xmm)>; // Move low f64 and clear high bits. def : Pat<(v4f64 (X86vzmovl (v4f64 VR256:$src))), (SUBREG_TO_REG (i32 0), - (VMOVSDrr (v2f64 (V_SET0)), - (EXTRACT_SUBREG (v4f64 VR256:$src), sub_sd)), sub_xmm)>; + (VMOVSDrr (v2f64 (V_SET0)), + (EXTRACT_SUBREG (v4f64 VR256:$src), sub_xmm)), sub_xmm)>; def : Pat<(v4i64 (X86vzmovl (v4i64 VR256:$src))), (SUBREG_TO_REG (i32 0), - (VMOVSDrr (v2i64 (V_SET0)), - (EXTRACT_SUBREG (v4i64 VR256:$src), sub_sd)), sub_xmm)>; + (VMOVSDrr (v2i64 (V_SET0)), + (EXTRACT_SUBREG (v4i64 VR256:$src), sub_xmm)), sub_xmm)>; // Extract and store. def : Pat<(store (f32 (vector_extract (v4f32 VR128:$src), (iPTR 0))), addr:$dst), - (VMOVSSmr addr:$dst, - (EXTRACT_SUBREG (v4f32 VR128:$src), sub_ss))>; + (VMOVSSmr addr:$dst, (COPY_TO_REGCLASS (v4f32 VR128:$src), FR32))>; def : Pat<(store (f64 (vector_extract (v2f64 VR128:$src), (iPTR 0))), addr:$dst), - (VMOVSDmr addr:$dst, - (EXTRACT_SUBREG (v2f64 VR128:$src), sub_sd))>; + (VMOVSDmr addr:$dst, (COPY_TO_REGCLASS (v2f64 VR128:$src), FR64))>; // Shuffle with VMOVSS def : Pat<(v4i32 (X86Movss VR128:$src1, VR128:$src2)), (VMOVSSrr (v4i32 VR128:$src1), - (EXTRACT_SUBREG (v4i32 VR128:$src2), sub_ss))>; + (COPY_TO_REGCLASS (v4i32 VR128:$src2), FR32))>; def : Pat<(v4f32 (X86Movss VR128:$src1, VR128:$src2)), (VMOVSSrr (v4f32 VR128:$src1), - (EXTRACT_SUBREG (v4f32 VR128:$src2), sub_ss))>; + (COPY_TO_REGCLASS (v4f32 VR128:$src2), FR32))>; // 256-bit variants def : Pat<(v8i32 (X86Movss VR256:$src1, VR256:$src2)), (SUBREG_TO_REG (i32 0), - (VMOVSSrr (EXTRACT_SUBREG (v8i32 VR256:$src1), sub_ss), - (EXTRACT_SUBREG (v8i32 VR256:$src2), sub_ss)), sub_xmm)>; + (VMOVSSrr (EXTRACT_SUBREG (v8i32 VR256:$src1), sub_xmm), + (EXTRACT_SUBREG (v8i32 VR256:$src2), sub_xmm)), + sub_xmm)>; def : Pat<(v8f32 (X86Movss VR256:$src1, VR256:$src2)), (SUBREG_TO_REG (i32 0), - (VMOVSSrr (EXTRACT_SUBREG (v8f32 VR256:$src1), sub_ss), - (EXTRACT_SUBREG (v8f32 VR256:$src2), sub_ss)), sub_xmm)>; + (VMOVSSrr (EXTRACT_SUBREG (v8f32 VR256:$src1), sub_xmm), + (EXTRACT_SUBREG (v8f32 VR256:$src2), sub_xmm)), + sub_xmm)>; // Shuffle with VMOVSD def : Pat<(v2i64 (X86Movsd VR128:$src1, VR128:$src2)), - (VMOVSDrr (v2i64 VR128:$src1), - (EXTRACT_SUBREG (v2i64 VR128:$src2), sub_sd))>; + (VMOVSDrr VR128:$src1, (COPY_TO_REGCLASS VR128:$src2, FR64))>; def : Pat<(v2f64 (X86Movsd VR128:$src1, VR128:$src2)), - (VMOVSDrr (v2f64 VR128:$src1), - (EXTRACT_SUBREG (v2f64 VR128:$src2), sub_sd))>; + (VMOVSDrr VR128:$src1, (COPY_TO_REGCLASS VR128:$src2, FR64))>; def : Pat<(v4f32 (X86Movsd VR128:$src1, VR128:$src2)), - (VMOVSDrr VR128:$src1, (EXTRACT_SUBREG (v4f32 VR128:$src2), - sub_sd))>; + (VMOVSDrr VR128:$src1, (COPY_TO_REGCLASS VR128:$src2, FR64))>; def : Pat<(v4i32 (X86Movsd VR128:$src1, VR128:$src2)), - (VMOVSDrr VR128:$src1, (EXTRACT_SUBREG (v4i32 VR128:$src2), - sub_sd))>; + (VMOVSDrr VR128:$src1, (COPY_TO_REGCLASS VR128:$src2, FR64))>; // 256-bit variants def : Pat<(v4i64 (X86Movsd VR256:$src1, VR256:$src2)), (SUBREG_TO_REG (i32 0), - (VMOVSDrr (EXTRACT_SUBREG (v4i64 VR256:$src1), sub_sd), - (EXTRACT_SUBREG (v4i64 VR256:$src2), sub_sd)), sub_xmm)>; + (VMOVSDrr (EXTRACT_SUBREG (v4i64 VR256:$src1), sub_xmm), + (EXTRACT_SUBREG (v4i64 VR256:$src2), sub_xmm)), + sub_xmm)>; def : Pat<(v4f64 (X86Movsd VR256:$src1, VR256:$src2)), (SUBREG_TO_REG (i32 0), - (VMOVSDrr (EXTRACT_SUBREG (v4f64 VR256:$src1), sub_sd), - (EXTRACT_SUBREG (v4f64 VR256:$src2), sub_sd)), sub_xmm)>; + (VMOVSDrr (EXTRACT_SUBREG (v4f64 VR256:$src1), sub_xmm), + (EXTRACT_SUBREG (v4f64 VR256:$src2), sub_xmm)), + sub_xmm)>; // FIXME: Instead of a X86Movlps there should be a X86Movsd here, the problem @@ -699,17 +695,13 @@ let Predicates = [HasAVX] in { // it has two uses through a bitcast. One use disappears at isel time and the // fold opportunity reappears. def : Pat<(v2f64 (X86Movlpd VR128:$src1, VR128:$src2)), - (VMOVSDrr VR128:$src1, (EXTRACT_SUBREG (v2f64 VR128:$src2), - sub_sd))>; + (VMOVSDrr VR128:$src1, (COPY_TO_REGCLASS VR128:$src2, FR64))>; def : Pat<(v2i64 (X86Movlpd VR128:$src1, VR128:$src2)), - (VMOVSDrr VR128:$src1, (EXTRACT_SUBREG (v2i64 VR128:$src2), - sub_sd))>; + (VMOVSDrr VR128:$src1, (COPY_TO_REGCLASS VR128:$src2, FR64))>; def : Pat<(v4f32 (X86Movlps VR128:$src1, VR128:$src2)), - (VMOVSDrr VR128:$src1, (EXTRACT_SUBREG (v4f32 VR128:$src2), - sub_sd))>; + (VMOVSDrr VR128:$src1, (COPY_TO_REGCLASS VR128:$src2, FR64))>; def : Pat<(v4i32 (X86Movlps VR128:$src1, VR128:$src2)), - (VMOVSDrr VR128:$src1, (EXTRACT_SUBREG (v4i32 VR128:$src2), - sub_sd))>; + (VMOVSDrr VR128:$src1, (COPY_TO_REGCLASS VR128:$src2, FR64))>; } let Predicates = [HasSSE1] in { @@ -719,37 +711,31 @@ let Predicates = [HasSSE1] in { def : Pat<(v4f32 (X86vzmovl (v4f32 (scalar_to_vector FR32:$src)))), (MOVSSrr (v4f32 (V_SET0)), FR32:$src)>; def : Pat<(v4f32 (X86vzmovl (v4f32 VR128:$src))), - (MOVSSrr (v4f32 (V_SET0)), - (f32 (EXTRACT_SUBREG (v4f32 VR128:$src), sub_ss)))>; + (MOVSSrr (v4f32 (V_SET0)), (COPY_TO_REGCLASS VR128:$src, FR32))>; def : Pat<(v4i32 (X86vzmovl (v4i32 VR128:$src))), - (MOVSSrr (v4i32 (V_SET0)), - (EXTRACT_SUBREG (v4i32 VR128:$src), sub_ss))>; + (MOVSSrr (v4i32 (V_SET0)), (COPY_TO_REGCLASS VR128:$src, FR32))>; } let AddedComplexity = 20 in { - // MOVSSrm zeros the high parts of the register; represent this - // with SUBREG_TO_REG. + // MOVSSrm already zeros the high parts of the register. def : Pat<(v4f32 (X86vzmovl (v4f32 (scalar_to_vector (loadf32 addr:$src))))), - (SUBREG_TO_REG (i32 0), (MOVSSrm addr:$src), sub_ss)>; + (COPY_TO_REGCLASS (MOVSSrm addr:$src), VR128)>; def : Pat<(v4f32 (scalar_to_vector (loadf32 addr:$src))), - (SUBREG_TO_REG (i32 0), (MOVSSrm addr:$src), sub_ss)>; + (COPY_TO_REGCLASS (MOVSSrm addr:$src), VR128)>; def : Pat<(v4f32 (X86vzmovl (loadv4f32 addr:$src))), - (SUBREG_TO_REG (i32 0), (MOVSSrm addr:$src), sub_ss)>; + (COPY_TO_REGCLASS (MOVSSrm addr:$src), VR128)>; } // Extract and store. def : Pat<(store (f32 (vector_extract (v4f32 VR128:$src), (iPTR 0))), addr:$dst), - (MOVSSmr addr:$dst, - (EXTRACT_SUBREG (v4f32 VR128:$src), sub_ss))>; + (MOVSSmr addr:$dst, (COPY_TO_REGCLASS VR128:$src, FR32))>; // Shuffle with MOVSS def : Pat<(v4i32 (X86Movss VR128:$src1, VR128:$src2)), - (MOVSSrr (v4i32 VR128:$src1), - (EXTRACT_SUBREG (v4i32 VR128:$src2), sub_ss))>; + (MOVSSrr VR128:$src1, (COPY_TO_REGCLASS VR128:$src2, FR32))>; def : Pat<(v4f32 (X86Movss VR128:$src1, VR128:$src2)), - (MOVSSrr (v4f32 VR128:$src1), - (EXTRACT_SUBREG (v4f32 VR128:$src2), sub_ss))>; + (MOVSSrr VR128:$src1, (COPY_TO_REGCLASS VR128:$src2, FR32))>; } let Predicates = [HasSSE2] in { @@ -761,50 +747,46 @@ let Predicates = [HasSSE2] in { } let AddedComplexity = 20 in { - // MOVSDrm zeros the high parts of the register; represent this - // with SUBREG_TO_REG. + // MOVSDrm already zeros the high parts of the register. def : Pat<(v2f64 (X86vzmovl (v2f64 (scalar_to_vector (loadf64 addr:$src))))), - (SUBREG_TO_REG (i64 0), (MOVSDrm addr:$src), sub_sd)>; + (COPY_TO_REGCLASS (MOVSDrm addr:$src), VR128)>; def : Pat<(v2f64 (scalar_to_vector (loadf64 addr:$src))), - (SUBREG_TO_REG (i64 0), (MOVSDrm addr:$src), sub_sd)>; + (COPY_TO_REGCLASS (MOVSDrm addr:$src), VR128)>; def : Pat<(v2f64 (X86vzmovl (loadv2f64 addr:$src))), - (SUBREG_TO_REG (i64 0), (MOVSDrm addr:$src), sub_sd)>; + (COPY_TO_REGCLASS (MOVSDrm addr:$src), VR128)>; def : Pat<(v2f64 (X86vzmovl (bc_v2f64 (loadv4f32 addr:$src)))), - (SUBREG_TO_REG (i64 0), (MOVSDrm addr:$src), sub_sd)>; + (COPY_TO_REGCLASS (MOVSDrm addr:$src), VR128)>; def : Pat<(v2f64 (X86vzload addr:$src)), - (SUBREG_TO_REG (i64 0), (MOVSDrm addr:$src), sub_sd)>; + (COPY_TO_REGCLASS (MOVSDrm addr:$src), VR128)>; } // Extract and store. def : Pat<(store (f64 (vector_extract (v2f64 VR128:$src), (iPTR 0))), addr:$dst), - (MOVSDmr addr:$dst, - (EXTRACT_SUBREG (v2f64 VR128:$src), sub_sd))>; + (MOVSDmr addr:$dst, (COPY_TO_REGCLASS VR128:$src, FR64))>; // Shuffle with MOVSD def : Pat<(v2i64 (X86Movsd VR128:$src1, VR128:$src2)), - (MOVSDrr (v2i64 VR128:$src1), - (EXTRACT_SUBREG (v2i64 VR128:$src2), sub_sd))>; + (MOVSDrr VR128:$src1, (COPY_TO_REGCLASS VR128:$src2, FR64))>; def : Pat<(v2f64 (X86Movsd VR128:$src1, VR128:$src2)), - (MOVSDrr (v2f64 VR128:$src1), - (EXTRACT_SUBREG (v2f64 VR128:$src2), sub_sd))>; + (MOVSDrr VR128:$src1, (COPY_TO_REGCLASS VR128:$src2, FR64))>; def : Pat<(v4f32 (X86Movsd VR128:$src1, VR128:$src2)), - (MOVSDrr VR128:$src1, (EXTRACT_SUBREG (v4f32 VR128:$src2),sub_sd))>; + (MOVSDrr VR128:$src1, (COPY_TO_REGCLASS VR128:$src2, FR64))>; def : Pat<(v4i32 (X86Movsd VR128:$src1, VR128:$src2)), - (MOVSDrr VR128:$src1, (EXTRACT_SUBREG (v4i32 VR128:$src2),sub_sd))>; + (MOVSDrr VR128:$src1, (COPY_TO_REGCLASS VR128:$src2, FR64))>; // FIXME: Instead of a X86Movlps there should be a X86Movsd here, the problem // is during lowering, where it's not possible to recognize the fold cause // it has two uses through a bitcast. One use disappears at isel time and the // fold opportunity reappears. def : Pat<(v2f64 (X86Movlpd VR128:$src1, VR128:$src2)), - (MOVSDrr VR128:$src1, (EXTRACT_SUBREG (v2f64 VR128:$src2),sub_sd))>; + (MOVSDrr VR128:$src1, (COPY_TO_REGCLASS VR128:$src2, FR64))>; def : Pat<(v2i64 (X86Movlpd VR128:$src1, VR128:$src2)), - (MOVSDrr VR128:$src1, (EXTRACT_SUBREG (v2i64 VR128:$src2),sub_sd))>; + (MOVSDrr VR128:$src1, (COPY_TO_REGCLASS VR128:$src2, FR64))>; def : Pat<(v4f32 (X86Movlps VR128:$src1, VR128:$src2)), - (MOVSDrr VR128:$src1, (EXTRACT_SUBREG (v4f32 VR128:$src2),sub_sd))>; + (MOVSDrr VR128:$src1, (COPY_TO_REGCLASS VR128:$src2, FR64))>; def : Pat<(v4i32 (X86Movlps VR128:$src1, VR128:$src2)), - (MOVSDrr VR128:$src1, (EXTRACT_SUBREG (v4i32 VR128:$src2),sub_sd))>; + (MOVSDrr VR128:$src1, (COPY_TO_REGCLASS VR128:$src2, FR64))>; } //===----------------------------------------------------------------------===// @@ -1416,14 +1398,15 @@ multiclass sse12_cvt_s<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC, } multiclass sse12_cvt_p<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC, - SDNode OpNode, X86MemOperand x86memop, PatFrag ld_frag, - string asm, Domain d, OpndItins itins> { + X86MemOperand x86memop, string asm, Domain d, + OpndItins itins> { +let neverHasSideEffects = 1 in { def rr : I<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src), asm, - [(set DstRC:$dst, (OpNode SrcRC:$src))], - itins.rr, d>; + [], itins.rr, d>; + let mayLoad = 1 in def rm : I<opc, MRMSrcMem, (outs DstRC:$dst), (ins x86memop:$src), asm, - [(set DstRC:$dst, (OpNode (ld_frag addr:$src)))], - itins.rm, d>; + [], itins.rm, d>; +} } multiclass sse12_vcvt_avx<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC, @@ -1443,7 +1426,7 @@ defm VCVTTSS2SI : sse12_cvt_s<0x2C, FR32, GR32, fp_to_sint, f32mem, loadf32, SSE_CVT_SS2SI_32>, XS, VEX, VEX_LIG; defm VCVTTSS2SI64 : sse12_cvt_s<0x2C, FR32, GR64, fp_to_sint, f32mem, loadf32, - "cvttss2si\t{$src, $dst|$dst, $src}", + "cvttss2si{q}\t{$src, $dst|$dst, $src}", SSE_CVT_SS2SI_64>, XS, VEX, VEX_W, VEX_LIG; defm VCVTTSD2SI : sse12_cvt_s<0x2C, FR64, GR32, fp_to_sint, f64mem, loadf64, @@ -1451,7 +1434,7 @@ defm VCVTTSD2SI : sse12_cvt_s<0x2C, FR64, GR32, fp_to_sint, f64mem, loadf64, SSE_CVT_SD2SI>, XD, VEX, VEX_LIG; defm VCVTTSD2SI64 : sse12_cvt_s<0x2C, FR64, GR64, fp_to_sint, f64mem, loadf64, - "cvttsd2si\t{$src, $dst|$dst, $src}", + "cvttsd2si{q}\t{$src, $dst|$dst, $src}", SSE_CVT_SD2SI>, XD, VEX, VEX_W, VEX_LIG; @@ -1465,11 +1448,14 @@ defm VCVTSI2SS64 : sse12_vcvt_avx<0x2A, GR64, FR32, i64mem, "cvtsi2ss{q}">, XS, VEX_4V, VEX_W, VEX_LIG; defm VCVTSI2SD : sse12_vcvt_avx<0x2A, GR32, FR64, i32mem, "cvtsi2sd">, XD, VEX_4V, VEX_LIG; -defm VCVTSI2SDL : sse12_vcvt_avx<0x2A, GR32, FR64, i32mem, "cvtsi2sd{l}">, - XD, VEX_4V, VEX_LIG; defm VCVTSI2SD64 : sse12_vcvt_avx<0x2A, GR64, FR64, i64mem, "cvtsi2sd{q}">, XD, VEX_4V, VEX_W, VEX_LIG; +def : InstAlias<"vcvtsi2sd{l}\t{$src, $src1, $dst|$dst, $src1, $src}", + (VCVTSI2SDrr FR64:$dst, FR64:$src1, GR32:$src)>; +def : InstAlias<"vcvtsi2sd{l}\t{$src, $src1, $dst|$dst, $src1, $src}", + (VCVTSI2SDrm FR64:$dst, FR64:$src1, i32mem:$src)>; + let Predicates = [HasAVX], AddedComplexity = 1 in { def : Pat<(f32 (sint_to_fp (loadi32 addr:$src))), (VCVTSI2SSrm (f32 (IMPLICIT_DEF)), addr:$src)>; @@ -1519,14 +1505,14 @@ defm CVTSI2SD64 : sse12_cvt_s<0x2A, GR64, FR64, sint_to_fp, i64mem, loadi64, // and/or XMM operand(s). multiclass sse12_cvt_sint<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC, - Intrinsic Int, X86MemOperand x86memop, PatFrag ld_frag, + Intrinsic Int, Operand memop, ComplexPattern mem_cpat, string asm, OpndItins itins> { def rr : SI<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src), !strconcat(asm, "\t{$src, $dst|$dst, $src}"), [(set DstRC:$dst, (Int SrcRC:$src))], itins.rr>; - def rm : SI<opc, MRMSrcMem, (outs DstRC:$dst), (ins x86memop:$src), + def rm : SI<opc, MRMSrcMem, (outs DstRC:$dst), (ins memop:$src), !strconcat(asm, "\t{$src, $dst|$dst, $src}"), - [(set DstRC:$dst, (Int (ld_frag addr:$src)))], itins.rm>; + [(set DstRC:$dst, (Int mem_cpat:$src))], itins.rm>; } multiclass sse12_cvt_sint_3addr<bits<8> opc, RegisterClass SrcRC, @@ -1548,30 +1534,31 @@ multiclass sse12_cvt_sint_3addr<bits<8> opc, RegisterClass SrcRC, itins.rm>; } -defm VCVTSD2SI : sse12_cvt_sint<0x2D, VR128, GR32, int_x86_sse2_cvtsd2si, - f128mem, load, "cvtsd2si", SSE_CVT_SD2SI>, XD, VEX, VEX_LIG; +defm VCVTSD2SI : sse12_cvt_sint<0x2D, VR128, GR32, + int_x86_sse2_cvtsd2si, sdmem, sse_load_f64, "cvtsd2si{l}", + SSE_CVT_SD2SI>, XD, VEX, VEX_LIG; defm VCVTSD2SI64 : sse12_cvt_sint<0x2D, VR128, GR64, - int_x86_sse2_cvtsd2si64, f128mem, load, "cvtsd2si", - SSE_CVT_SD2SI>, XD, VEX, VEX_W, VEX_LIG; + int_x86_sse2_cvtsd2si64, sdmem, sse_load_f64, "cvtsd2si{q}", + SSE_CVT_SD2SI>, XD, VEX, VEX_W, VEX_LIG; defm CVTSD2SI : sse12_cvt_sint<0x2D, VR128, GR32, int_x86_sse2_cvtsd2si, - f128mem, load, "cvtsd2si{l}", SSE_CVT_SD2SI>, XD; + sdmem, sse_load_f64, "cvtsd2si{l}", SSE_CVT_SD2SI>, XD; defm CVTSD2SI64 : sse12_cvt_sint<0x2D, VR128, GR64, int_x86_sse2_cvtsd2si64, - f128mem, load, "cvtsd2si{q}", SSE_CVT_SD2SI>, XD, REX_W; + sdmem, sse_load_f64, "cvtsd2si{q}", SSE_CVT_SD2SI>, XD, REX_W; defm Int_VCVTSI2SS : sse12_cvt_sint_3addr<0x2A, GR32, VR128, int_x86_sse_cvtsi2ss, i32mem, loadi32, "cvtsi2ss", SSE_CVT_Scalar, 0>, XS, VEX_4V; defm Int_VCVTSI2SS64 : sse12_cvt_sint_3addr<0x2A, GR64, VR128, - int_x86_sse_cvtsi642ss, i64mem, loadi64, "cvtsi2ss", + int_x86_sse_cvtsi642ss, i64mem, loadi64, "cvtsi2ss{q}", SSE_CVT_Scalar, 0>, XS, VEX_4V, VEX_W; defm Int_VCVTSI2SD : sse12_cvt_sint_3addr<0x2A, GR32, VR128, int_x86_sse2_cvtsi2sd, i32mem, loadi32, "cvtsi2sd", SSE_CVT_Scalar, 0>, XD, VEX_4V; defm Int_VCVTSI2SD64 : sse12_cvt_sint_3addr<0x2A, GR64, VR128, - int_x86_sse2_cvtsi642sd, i64mem, loadi64, "cvtsi2sd", + int_x86_sse2_cvtsi642sd, i64mem, loadi64, "cvtsi2sd{q}", SSE_CVT_Scalar, 0>, XD, VEX_4V, VEX_W; @@ -1587,96 +1574,71 @@ let Constraints = "$src1 = $dst" in { "cvtsi2sd", SSE_CVT_Scalar>, XD; defm Int_CVTSI2SD64 : sse12_cvt_sint_3addr<0x2A, GR64, VR128, int_x86_sse2_cvtsi642sd, i64mem, loadi64, - "cvtsi2sd", SSE_CVT_Scalar>, XD, REX_W; + "cvtsi2sd{q}", SSE_CVT_Scalar>, XD, REX_W; } /// SSE 1 Only // Aliases for intrinsics defm Int_VCVTTSS2SI : sse12_cvt_sint<0x2C, VR128, GR32, int_x86_sse_cvttss2si, - f32mem, load, "cvttss2si", + ssmem, sse_load_f32, "cvttss2si", SSE_CVT_SS2SI_32>, XS, VEX; defm Int_VCVTTSS2SI64 : sse12_cvt_sint<0x2C, VR128, GR64, - int_x86_sse_cvttss2si64, f32mem, load, - "cvttss2si", SSE_CVT_SS2SI_64>, - XS, VEX, VEX_W; + int_x86_sse_cvttss2si64, ssmem, sse_load_f32, + "cvttss2si{q}", SSE_CVT_SS2SI_64>, + XS, VEX, VEX_W; defm Int_VCVTTSD2SI : sse12_cvt_sint<0x2C, VR128, GR32, int_x86_sse2_cvttsd2si, - f128mem, load, "cvttsd2si", SSE_CVT_SD2SI>, - XD, VEX; + sdmem, sse_load_f64, "cvttsd2si", + SSE_CVT_SD2SI>, XD, VEX; defm Int_VCVTTSD2SI64 : sse12_cvt_sint<0x2C, VR128, GR64, - int_x86_sse2_cvttsd2si64, f128mem, load, - "cvttsd2si", SSE_CVT_SD2SI>, - XD, VEX, VEX_W; + int_x86_sse2_cvttsd2si64, sdmem, sse_load_f64, + "cvttsd2si{q}", SSE_CVT_SD2SI>, + XD, VEX, VEX_W; defm Int_CVTTSS2SI : sse12_cvt_sint<0x2C, VR128, GR32, int_x86_sse_cvttss2si, - f32mem, load, "cvttss2si", + ssmem, sse_load_f32, "cvttss2si", SSE_CVT_SS2SI_32>, XS; defm Int_CVTTSS2SI64 : sse12_cvt_sint<0x2C, VR128, GR64, - int_x86_sse_cvttss2si64, f32mem, load, - "cvttss2si{q}", SSE_CVT_SS2SI_64>, - XS, REX_W; + int_x86_sse_cvttss2si64, ssmem, sse_load_f32, + "cvttss2si{q}", SSE_CVT_SS2SI_64>, XS, REX_W; defm Int_CVTTSD2SI : sse12_cvt_sint<0x2C, VR128, GR32, int_x86_sse2_cvttsd2si, - f128mem, load, "cvttsd2si", SSE_CVT_SD2SI>, - XD; + sdmem, sse_load_f64, "cvttsd2si", + SSE_CVT_SD2SI>, XD; defm Int_CVTTSD2SI64 : sse12_cvt_sint<0x2C, VR128, GR64, - int_x86_sse2_cvttsd2si64, f128mem, load, - "cvttsd2si{q}", SSE_CVT_SD2SI>, - XD, REX_W; - -let Pattern = []<dag>, neverHasSideEffects = 1 in { -defm VCVTSS2SI : sse12_cvt_s<0x2D, FR32, GR32, undef, f32mem, load, - "cvtss2si{l}\t{$src, $dst|$dst, $src}", - SSE_CVT_SS2SI_32>, XS, VEX, VEX_LIG; -defm VCVTSS2SI64 : sse12_cvt_s<0x2D, FR32, GR64, undef, f32mem, load, - "cvtss2si\t{$src, $dst|$dst, $src}", - SSE_CVT_SS2SI_64>, XS, VEX, VEX_W, VEX_LIG; -defm VCVTDQ2PS : sse12_cvt_p<0x5B, VR128, VR128, undef, i128mem, load, + int_x86_sse2_cvttsd2si64, sdmem, sse_load_f64, + "cvttsd2si{q}", SSE_CVT_SD2SI>, XD, REX_W; + +defm VCVTSS2SI : sse12_cvt_sint<0x2D, VR128, GR32, int_x86_sse_cvtss2si, + ssmem, sse_load_f32, "cvtss2si{l}", + SSE_CVT_SS2SI_32>, XS, VEX, VEX_LIG; +defm VCVTSS2SI64 : sse12_cvt_sint<0x2D, VR128, GR64, int_x86_sse_cvtss2si64, + ssmem, sse_load_f32, "cvtss2si{q}", + SSE_CVT_SS2SI_64>, XS, VEX, VEX_W, VEX_LIG; + +defm CVTSS2SI : sse12_cvt_sint<0x2D, VR128, GR32, int_x86_sse_cvtss2si, + ssmem, sse_load_f32, "cvtss2si{l}", + SSE_CVT_SS2SI_32>, XS; +defm CVTSS2SI64 : sse12_cvt_sint<0x2D, VR128, GR64, int_x86_sse_cvtss2si64, + ssmem, sse_load_f32, "cvtss2si{q}", + SSE_CVT_SS2SI_64>, XS, REX_W; + +defm VCVTDQ2PS : sse12_cvt_p<0x5B, VR128, VR128, i128mem, "vcvtdq2ps\t{$src, $dst|$dst, $src}", - SSEPackedSingle, SSE_CVT_PS>, TB, VEX, - Requires<[HasAVX]>; -defm VCVTDQ2PSY : sse12_cvt_p<0x5B, VR256, VR256, undef, i256mem, load, + SSEPackedSingle, SSE_CVT_PS>, + TB, VEX, Requires<[HasAVX]>; +defm VCVTDQ2PSY : sse12_cvt_p<0x5B, VR256, VR256, i256mem, "vcvtdq2ps\t{$src, $dst|$dst, $src}", - SSEPackedSingle, SSE_CVT_PS>, TB, VEX, - Requires<[HasAVX]>; -} - -let Pattern = []<dag>, neverHasSideEffects = 1 in { -defm CVTSS2SI : sse12_cvt_s<0x2D, FR32, GR32, undef, f32mem, load /*dummy*/, - "cvtss2si{l}\t{$src, $dst|$dst, $src}", - SSE_CVT_SS2SI_32>, XS; -defm CVTSS2SI64 : sse12_cvt_s<0x2D, FR32, GR64, undef, f32mem, load /*dummy*/, - "cvtss2si{q}\t{$src, $dst|$dst, $src}", - SSE_CVT_SS2SI_64>, XS, REX_W; -defm CVTDQ2PS : sse12_cvt_p<0x5B, VR128, VR128, undef, i128mem, load /*dummy*/, - "cvtdq2ps\t{$src, $dst|$dst, $src}", - SSEPackedSingle, SSE_CVT_PS>, TB, - Requires<[HasSSE2]>; -} + SSEPackedSingle, SSE_CVT_PS>, + TB, VEX, Requires<[HasAVX]>; -let Predicates = [HasAVX] in { - def : Pat<(int_x86_sse_cvtss2si VR128:$src), - (VCVTSS2SIrr (EXTRACT_SUBREG (v4f32 VR128:$src), sub_ss))>; - def : Pat<(int_x86_sse_cvtss2si (load addr:$src)), - (VCVTSS2SIrm addr:$src)>; - def : Pat<(int_x86_sse_cvtss2si64 VR128:$src), - (VCVTSS2SI64rr (EXTRACT_SUBREG (v4f32 VR128:$src), sub_ss))>; - def : Pat<(int_x86_sse_cvtss2si64 (load addr:$src)), - (VCVTSS2SI64rm addr:$src)>; -} - -let Predicates = [HasSSE1] in { - def : Pat<(int_x86_sse_cvtss2si VR128:$src), - (CVTSS2SIrr (EXTRACT_SUBREG (v4f32 VR128:$src), sub_ss))>; - def : Pat<(int_x86_sse_cvtss2si (load addr:$src)), - (CVTSS2SIrm addr:$src)>; - def : Pat<(int_x86_sse_cvtss2si64 VR128:$src), - (CVTSS2SI64rr (EXTRACT_SUBREG (v4f32 VR128:$src), sub_ss))>; - def : Pat<(int_x86_sse_cvtss2si64 (load addr:$src)), - (CVTSS2SI64rm addr:$src)>; -} +defm CVTDQ2PS : sse12_cvt_p<0x5B, VR128, VR128, i128mem, + "cvtdq2ps\t{$src, $dst|$dst, $src}", + SSEPackedSingle, SSE_CVT_PS>, + TB, Requires<[HasSSE2]>; /// SSE 2 Only // Convert scalar double to scalar single +let neverHasSideEffects = 1 in { def VCVTSD2SSrr : VSDI<0x5A, MRMSrcReg, (outs FR32:$dst), (ins FR64:$src1, FR64:$src2), "cvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}", [], @@ -1687,6 +1649,7 @@ def VCVTSD2SSrm : I<0x5A, MRMSrcMem, (outs FR32:$dst), "vcvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}", [], IIC_SSE_CVT_Scalar_RM>, XD, Requires<[HasAVX, OptForSize]>, VEX_4V, VEX_LIG; +} def : Pat<(f32 (fround FR64:$src)), (VCVTSD2SSrr FR64:$src, FR64:$src)>, Requires<[HasAVX]>; @@ -1702,17 +1665,37 @@ def CVTSD2SSrm : I<0x5A, MRMSrcMem, (outs FR32:$dst), (ins f64mem:$src), XD, Requires<[HasSSE2, OptForSize]>; -defm Int_VCVTSD2SS: sse12_cvt_sint_3addr<0x5A, VR128, VR128, - int_x86_sse2_cvtsd2ss, f64mem, load, "cvtsd2ss", - SSE_CVT_Scalar, 0>, - XS, VEX_4V; -let Constraints = "$src1 = $dst" in -defm Int_CVTSD2SS: sse12_cvt_sint_3addr<0x5A, VR128, VR128, - int_x86_sse2_cvtsd2ss, f64mem, load, "cvtsd2ss", - SSE_CVT_Scalar>, XS; +def Int_VCVTSD2SSrr: I<0x5A, MRMSrcReg, + (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), + "vcvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}", + [(set VR128:$dst, + (int_x86_sse2_cvtsd2ss VR128:$src1, VR128:$src2))], + IIC_SSE_CVT_Scalar_RR>, XD, VEX_4V, Requires<[HasAVX]>; +def Int_VCVTSD2SSrm: I<0x5A, MRMSrcReg, + (outs VR128:$dst), (ins VR128:$src1, sdmem:$src2), + "vcvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}", + [(set VR128:$dst, (int_x86_sse2_cvtsd2ss + VR128:$src1, sse_load_f64:$src2))], + IIC_SSE_CVT_Scalar_RM>, XD, VEX_4V, Requires<[HasAVX]>; + +let Constraints = "$src1 = $dst" in { +def Int_CVTSD2SSrr: I<0x5A, MRMSrcReg, + (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), + "cvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}", + [(set VR128:$dst, + (int_x86_sse2_cvtsd2ss VR128:$src1, VR128:$src2))], + IIC_SSE_CVT_Scalar_RR>, XD, Requires<[HasSSE2]>; +def Int_CVTSD2SSrm: I<0x5A, MRMSrcReg, + (outs VR128:$dst), (ins VR128:$src1, sdmem:$src2), + "cvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}", + [(set VR128:$dst, (int_x86_sse2_cvtsd2ss + VR128:$src1, sse_load_f64:$src2))], + IIC_SSE_CVT_Scalar_RM>, XD, Requires<[HasSSE2]>; +} // Convert scalar single to scalar double // SSE2 instructions with XS prefix +let neverHasSideEffects = 1 in { def VCVTSS2SDrr : I<0x5A, MRMSrcReg, (outs FR64:$dst), (ins FR32:$src1, FR32:$src2), "vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}", @@ -1724,19 +1707,21 @@ def VCVTSS2SDrm : I<0x5A, MRMSrcMem, (outs FR64:$dst), "vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}", [], IIC_SSE_CVT_Scalar_RM>, XS, VEX_4V, VEX_LIG, Requires<[HasAVX, OptForSize]>; +} -let Predicates = [HasAVX] in { +let AddedComplexity = 1 in { // give AVX priority def : Pat<(f64 (fextend FR32:$src)), - (VCVTSS2SDrr FR32:$src, FR32:$src)>; + (VCVTSS2SDrr FR32:$src, FR32:$src)>, Requires<[HasAVX]>; def : Pat<(fextend (loadf32 addr:$src)), - (VCVTSS2SDrm (f32 (IMPLICIT_DEF)), addr:$src)>; - def : Pat<(extloadf32 addr:$src), - (VCVTSS2SDrm (f32 (IMPLICIT_DEF)), addr:$src)>; -} + (VCVTSS2SDrm (f32 (IMPLICIT_DEF)), addr:$src)>, Requires<[HasAVX]>; -def : Pat<(extloadf32 addr:$src), - (VCVTSS2SDrr (f32 (IMPLICIT_DEF)), (MOVSSrm addr:$src))>, - Requires<[HasAVX, OptForSpeed]>; + def : Pat<(extloadf32 addr:$src), + (VCVTSS2SDrm (f32 (IMPLICIT_DEF)), addr:$src)>, + Requires<[HasAVX, OptForSize]>; + def : Pat<(extloadf32 addr:$src), + (VCVTSS2SDrr (f32 (IMPLICIT_DEF)), (VMOVSSrm addr:$src))>, + Requires<[HasAVX, OptForSpeed]>; +} // AddedComplexity = 1 def CVTSS2SDrr : I<0x5A, MRMSrcReg, (outs FR64:$dst), (ins FR32:$src), "cvtss2sd\t{$src, $dst|$dst, $src}", @@ -1762,67 +1747,60 @@ def : Pat<(extloadf32 addr:$src), def Int_VCVTSS2SDrr: I<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), "vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}", - [(set VR128:$dst, (int_x86_sse2_cvtss2sd VR128:$src1, - VR128:$src2))], - IIC_SSE_CVT_Scalar_RR>, XS, VEX_4V, - Requires<[HasAVX]>; + [(set VR128:$dst, + (int_x86_sse2_cvtss2sd VR128:$src1, VR128:$src2))], + IIC_SSE_CVT_Scalar_RR>, XS, VEX_4V, Requires<[HasAVX]>; def Int_VCVTSS2SDrm: I<0x5A, MRMSrcMem, - (outs VR128:$dst), (ins VR128:$src1, f32mem:$src2), + (outs VR128:$dst), (ins VR128:$src1, ssmem:$src2), "vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}", - [(set VR128:$dst, (int_x86_sse2_cvtss2sd VR128:$src1, - (load addr:$src2)))], - IIC_SSE_CVT_Scalar_RM>, XS, VEX_4V, - Requires<[HasAVX]>; + [(set VR128:$dst, + (int_x86_sse2_cvtss2sd VR128:$src1, sse_load_f32:$src2))], + IIC_SSE_CVT_Scalar_RM>, XS, VEX_4V, Requires<[HasAVX]>; let Constraints = "$src1 = $dst" in { // SSE2 instructions with XS prefix def Int_CVTSS2SDrr: I<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), "cvtss2sd\t{$src2, $dst|$dst, $src2}", - [(set VR128:$dst, (int_x86_sse2_cvtss2sd VR128:$src1, - VR128:$src2))], - IIC_SSE_CVT_Scalar_RR>, XS, - Requires<[HasSSE2]>; + [(set VR128:$dst, + (int_x86_sse2_cvtss2sd VR128:$src1, VR128:$src2))], + IIC_SSE_CVT_Scalar_RR>, XS, Requires<[HasSSE2]>; def Int_CVTSS2SDrm: I<0x5A, MRMSrcMem, - (outs VR128:$dst), (ins VR128:$src1, f32mem:$src2), + (outs VR128:$dst), (ins VR128:$src1, ssmem:$src2), "cvtss2sd\t{$src2, $dst|$dst, $src2}", - [(set VR128:$dst, (int_x86_sse2_cvtss2sd VR128:$src1, - (load addr:$src2)))], - IIC_SSE_CVT_Scalar_RM>, XS, - Requires<[HasSSE2]>; + [(set VR128:$dst, + (int_x86_sse2_cvtss2sd VR128:$src1, sse_load_f32:$src2))], + IIC_SSE_CVT_Scalar_RM>, XS, Requires<[HasSSE2]>; } // Convert packed single/double fp to doubleword def VCVTPS2DQrr : VPDI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), - "cvtps2dq\t{$src, $dst|$dst, $src}", [], + "cvtps2dq\t{$src, $dst|$dst, $src}", + [(set VR128:$dst, (int_x86_sse2_cvtps2dq VR128:$src))], IIC_SSE_CVT_PS_RR>, VEX; def VCVTPS2DQrm : VPDI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), - "cvtps2dq\t{$src, $dst|$dst, $src}", [], + "cvtps2dq\t{$src, $dst|$dst, $src}", + [(set VR128:$dst, + (int_x86_sse2_cvtps2dq (memopv4f32 addr:$src)))], IIC_SSE_CVT_PS_RM>, VEX; def VCVTPS2DQYrr : VPDI<0x5B, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src), - "cvtps2dq\t{$src, $dst|$dst, $src}", [], + "cvtps2dq\t{$src, $dst|$dst, $src}", + [(set VR256:$dst, + (int_x86_avx_cvt_ps2dq_256 VR256:$src))], IIC_SSE_CVT_PS_RR>, VEX; def VCVTPS2DQYrm : VPDI<0x5B, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src), - "cvtps2dq\t{$src, $dst|$dst, $src}", [], + "cvtps2dq\t{$src, $dst|$dst, $src}", + [(set VR256:$dst, + (int_x86_avx_cvt_ps2dq_256 (memopv8f32 addr:$src)))], IIC_SSE_CVT_PS_RM>, VEX; def CVTPS2DQrr : PDI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), - "cvtps2dq\t{$src, $dst|$dst, $src}", [], + "cvtps2dq\t{$src, $dst|$dst, $src}", + [(set VR128:$dst, (int_x86_sse2_cvtps2dq VR128:$src))], IIC_SSE_CVT_PS_RR>; def CVTPS2DQrm : PDI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), - "cvtps2dq\t{$src, $dst|$dst, $src}", [], + "cvtps2dq\t{$src, $dst|$dst, $src}", + [(set VR128:$dst, + (int_x86_sse2_cvtps2dq (memopv4f32 addr:$src)))], IIC_SSE_CVT_PS_RM>; -let Predicates = [HasAVX] in { - def : Pat<(int_x86_sse2_cvtps2dq VR128:$src), - (VCVTPS2DQrr VR128:$src)>; - def : Pat<(int_x86_sse2_cvtps2dq (memopv4f32 addr:$src)), - (VCVTPS2DQrm addr:$src)>; -} - -let Predicates = [HasSSE2] in { - def : Pat<(int_x86_sse2_cvtps2dq VR128:$src), - (CVTPS2DQrr VR128:$src)>; - def : Pat<(int_x86_sse2_cvtps2dq (memopv4f32 addr:$src)), - (CVTPS2DQrm addr:$src)>; -} // Convert Packed Double FP to Packed DW Integers let Predicates = [HasAVX] in { @@ -1830,77 +1808,74 @@ let Predicates = [HasAVX] in { // register, but the same isn't true when using memory operands instead. // Provide other assembly rr and rm forms to address this explicitly. def VCVTPD2DQrr : SDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), - "vcvtpd2dq\t{$src, $dst|$dst, $src}", []>, VEX; + "vcvtpd2dq\t{$src, $dst|$dst, $src}", + [(set VR128:$dst, (int_x86_sse2_cvtpd2dq VR128:$src))]>, + VEX; // XMM only def : InstAlias<"vcvtpd2dqx\t{$src, $dst|$dst, $src}", (VCVTPD2DQrr VR128:$dst, VR128:$src)>; def VCVTPD2DQXrm : SDI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), - "vcvtpd2dqx\t{$src, $dst|$dst, $src}", []>, VEX; + "vcvtpd2dqx\t{$src, $dst|$dst, $src}", + [(set VR128:$dst, + (int_x86_sse2_cvtpd2dq (memopv2f64 addr:$src)))]>, VEX; // YMM only def VCVTPD2DQYrr : SDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR256:$src), - "vcvtpd2dq{y}\t{$src, $dst|$dst, $src}", []>, VEX; + "vcvtpd2dq{y}\t{$src, $dst|$dst, $src}", + [(set VR128:$dst, + (int_x86_avx_cvt_pd2dq_256 VR256:$src))]>, VEX; def VCVTPD2DQYrm : SDI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f256mem:$src), - "vcvtpd2dq{y}\t{$src, $dst|$dst, $src}", []>, VEX, VEX_L; + "vcvtpd2dq{y}\t{$src, $dst|$dst, $src}", + [(set VR128:$dst, + (int_x86_avx_cvt_pd2dq_256 (memopv4f64 addr:$src)))]>, + VEX, VEX_L; def : InstAlias<"vcvtpd2dq\t{$src, $dst|$dst, $src}", (VCVTPD2DQYrr VR128:$dst, VR256:$src)>; } def CVTPD2DQrm : SDI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), - "cvtpd2dq\t{$src, $dst|$dst, $src}", [], + "cvtpd2dq\t{$src, $dst|$dst, $src}", + [(set VR128:$dst, + (int_x86_sse2_cvtpd2dq (memopv2f64 addr:$src)))], IIC_SSE_CVT_PD_RM>; def CVTPD2DQrr : SDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), - "cvtpd2dq\t{$src, $dst|$dst, $src}", [], + "cvtpd2dq\t{$src, $dst|$dst, $src}", + [(set VR128:$dst, (int_x86_sse2_cvtpd2dq VR128:$src))], IIC_SSE_CVT_PD_RR>; -let Predicates = [HasAVX] in { - def : Pat<(int_x86_sse2_cvtpd2dq VR128:$src), - (VCVTPD2DQrr VR128:$src)>; - def : Pat<(int_x86_sse2_cvtpd2dq (memopv2f64 addr:$src)), - (VCVTPD2DQXrm addr:$src)>; -} - -let Predicates = [HasSSE2] in { - def : Pat<(int_x86_sse2_cvtpd2dq VR128:$src), - (CVTPD2DQrr VR128:$src)>; - def : Pat<(int_x86_sse2_cvtpd2dq (memopv2f64 addr:$src)), - (CVTPD2DQrm addr:$src)>; -} - // Convert with truncation packed single/double fp to doubleword // SSE2 packed instructions with XS prefix -def VCVTTPS2DQrr : VSSI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), - "cvttps2dq\t{$src, $dst|$dst, $src}", - [(set VR128:$dst, - (int_x86_sse2_cvttps2dq VR128:$src))], - IIC_SSE_CVT_PS_RR>, VEX; -def VCVTTPS2DQrm : VSSI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), - "cvttps2dq\t{$src, $dst|$dst, $src}", - [(set VR128:$dst, (int_x86_sse2_cvttps2dq - (memopv4f32 addr:$src)))], - IIC_SSE_CVT_PS_RM>, VEX; -def VCVTTPS2DQYrr : VSSI<0x5B, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src), +def VCVTTPS2DQrr : VS2SI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "cvttps2dq\t{$src, $dst|$dst, $src}", - [(set VR256:$dst, - (int_x86_avx_cvtt_ps2dq_256 VR256:$src))], - IIC_SSE_CVT_PS_RR>, VEX; -def VCVTTPS2DQYrm : VSSI<0x5B, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src), + [(set VR128:$dst, + (int_x86_sse2_cvttps2dq VR128:$src))], + IIC_SSE_CVT_PS_RR>, VEX; +def VCVTTPS2DQrm : VS2SI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), "cvttps2dq\t{$src, $dst|$dst, $src}", - [(set VR256:$dst, (int_x86_avx_cvtt_ps2dq_256 - (memopv8f32 addr:$src)))], - IIC_SSE_CVT_PS_RM>, VEX; - -def CVTTPS2DQrr : SSI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), - "cvttps2dq\t{$src, $dst|$dst, $src}", - [(set VR128:$dst, - (int_x86_sse2_cvttps2dq VR128:$src))], - IIC_SSE_CVT_PS_RR>; -def CVTTPS2DQrm : SSI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), - "cvttps2dq\t{$src, $dst|$dst, $src}", - [(set VR128:$dst, - (int_x86_sse2_cvttps2dq (memopv4f32 addr:$src)))], - IIC_SSE_CVT_PS_RM>; + [(set VR128:$dst, (int_x86_sse2_cvttps2dq + (memopv4f32 addr:$src)))], + IIC_SSE_CVT_PS_RM>, VEX; +def VCVTTPS2DQYrr : VS2SI<0x5B, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src), + "cvttps2dq\t{$src, $dst|$dst, $src}", + [(set VR256:$dst, + (int_x86_avx_cvtt_ps2dq_256 VR256:$src))], + IIC_SSE_CVT_PS_RR>, VEX; +def VCVTTPS2DQYrm : VS2SI<0x5B, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src), + "cvttps2dq\t{$src, $dst|$dst, $src}", + [(set VR256:$dst, (int_x86_avx_cvtt_ps2dq_256 + (memopv8f32 addr:$src)))], + IIC_SSE_CVT_PS_RM>, VEX; + +def CVTTPS2DQrr : S2SI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), + "cvttps2dq\t{$src, $dst|$dst, $src}", + [(set VR128:$dst, (int_x86_sse2_cvttps2dq VR128:$src))], + IIC_SSE_CVT_PS_RR>; +def CVTTPS2DQrm : S2SI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), + "cvttps2dq\t{$src, $dst|$dst, $src}", + [(set VR128:$dst, + (int_x86_sse2_cvttps2dq (memopv4f32 addr:$src)))], + IIC_SSE_CVT_PS_RM>; let Predicates = [HasAVX] in { def : Pat<(v4f32 (sint_to_fp (v4i32 VR128:$src))), @@ -1952,16 +1927,6 @@ def VCVTTPD2DQrr : VPDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), (int_x86_sse2_cvttpd2dq VR128:$src))], IIC_SSE_CVT_PD_RR>, VEX; -def CVTTPD2DQrr : PDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), - "cvttpd2dq\t{$src, $dst|$dst, $src}", - [(set VR128:$dst, (int_x86_sse2_cvttpd2dq VR128:$src))], - IIC_SSE_CVT_PD_RR>; -def CVTTPD2DQrm : PDI<0xE6, MRMSrcMem, (outs VR128:$dst),(ins f128mem:$src), - "cvttpd2dq\t{$src, $dst|$dst, $src}", - [(set VR128:$dst, (int_x86_sse2_cvttpd2dq - (memopv2f64 addr:$src)))], - IIC_SSE_CVT_PD_RM>; - // The assembler can recognize rr 256-bit instructions by seeing a ymm // register, but the same isn't true when using memory operands instead. // Provide other assembly rr and rm forms to address this explicitly. @@ -1977,10 +1942,14 @@ def VCVTTPD2DQXrm : VPDI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), // YMM only def VCVTTPD2DQYrr : VPDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR256:$src), - "cvttpd2dq{y}\t{$src, $dst|$dst, $src}", [], + "cvttpd2dq{y}\t{$src, $dst|$dst, $src}", + [(set VR128:$dst, + (int_x86_avx_cvtt_pd2dq_256 VR256:$src))], IIC_SSE_CVT_PD_RR>, VEX; def VCVTTPD2DQYrm : VPDI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f256mem:$src), - "cvttpd2dq{y}\t{$src, $dst|$dst, $src}", [], + "cvttpd2dq{y}\t{$src, $dst|$dst, $src}", + [(set VR128:$dst, + (int_x86_avx_cvtt_pd2dq_256 (memopv4f64 addr:$src)))], IIC_SSE_CVT_PD_RM>, VEX, VEX_L; def : InstAlias<"vcvttpd2dq\t{$src, $dst|$dst, $src}", (VCVTTPD2DQYrr VR128:$dst, VR256:$src)>; @@ -1992,82 +1961,82 @@ let Predicates = [HasAVX] in { (VCVTTPD2DQYrm addr:$src)>; } // Predicates = [HasAVX] +def CVTTPD2DQrr : PDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), + "cvttpd2dq\t{$src, $dst|$dst, $src}", + [(set VR128:$dst, (int_x86_sse2_cvttpd2dq VR128:$src))], + IIC_SSE_CVT_PD_RR>; +def CVTTPD2DQrm : PDI<0xE6, MRMSrcMem, (outs VR128:$dst),(ins f128mem:$src), + "cvttpd2dq\t{$src, $dst|$dst, $src}", + [(set VR128:$dst, (int_x86_sse2_cvttpd2dq + (memopv2f64 addr:$src)))], + IIC_SSE_CVT_PD_RM>; + // Convert packed single to packed double let Predicates = [HasAVX] in { // SSE2 instructions without OpSize prefix def VCVTPS2PDrr : I<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), - "vcvtps2pd\t{$src, $dst|$dst, $src}", [], + "vcvtps2pd\t{$src, $dst|$dst, $src}", + [(set VR128:$dst, (int_x86_sse2_cvtps2pd VR128:$src))], IIC_SSE_CVT_PD_RR>, TB, VEX; +let neverHasSideEffects = 1, mayLoad = 1 in def VCVTPS2PDrm : I<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src), "vcvtps2pd\t{$src, $dst|$dst, $src}", [], IIC_SSE_CVT_PD_RM>, TB, VEX; def VCVTPS2PDYrr : I<0x5A, MRMSrcReg, (outs VR256:$dst), (ins VR128:$src), - "vcvtps2pd\t{$src, $dst|$dst, $src}", [], + "vcvtps2pd\t{$src, $dst|$dst, $src}", + [(set VR256:$dst, + (int_x86_avx_cvt_ps2_pd_256 VR128:$src))], IIC_SSE_CVT_PD_RR>, TB, VEX; def VCVTPS2PDYrm : I<0x5A, MRMSrcMem, (outs VR256:$dst), (ins f128mem:$src), - "vcvtps2pd\t{$src, $dst|$dst, $src}", [], + "vcvtps2pd\t{$src, $dst|$dst, $src}", + [(set VR256:$dst, + (int_x86_avx_cvt_ps2_pd_256 (memopv4f32 addr:$src)))], IIC_SSE_CVT_PD_RM>, TB, VEX; } let Predicates = [HasSSE2] in { def CVTPS2PDrr : I<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), - "cvtps2pd\t{$src, $dst|$dst, $src}", [], + "cvtps2pd\t{$src, $dst|$dst, $src}", + [(set VR128:$dst, (int_x86_sse2_cvtps2pd VR128:$src))], IIC_SSE_CVT_PD_RR>, TB; +let neverHasSideEffects = 1, mayLoad = 1 in def CVTPS2PDrm : I<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src), "cvtps2pd\t{$src, $dst|$dst, $src}", [], IIC_SSE_CVT_PD_RM>, TB; } -let Predicates = [HasAVX] in { - def : Pat<(int_x86_sse2_cvtps2pd VR128:$src), - (VCVTPS2PDrr VR128:$src)>; -} - -let Predicates = [HasSSE2] in { - def : Pat<(int_x86_sse2_cvtps2pd VR128:$src), - (CVTPS2PDrr VR128:$src)>; -} - // Convert Packed DW Integers to Packed Double FP let Predicates = [HasAVX] in { -def VCVTDQ2PDrm : SSDI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src), - "vcvtdq2pd\t{$src, $dst|$dst, $src}", []>, VEX; -def VCVTDQ2PDrr : SSDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), - "vcvtdq2pd\t{$src, $dst|$dst, $src}", []>, VEX; -def VCVTDQ2PDYrm : SSDI<0xE6, MRMSrcMem, (outs VR256:$dst), (ins i128mem:$src), - "vcvtdq2pd\t{$src, $dst|$dst, $src}", []>, VEX; -def VCVTDQ2PDYrr : SSDI<0xE6, MRMSrcReg, (outs VR256:$dst), (ins VR128:$src), - "vcvtdq2pd\t{$src, $dst|$dst, $src}", []>, VEX; +let neverHasSideEffects = 1, mayLoad = 1 in +def VCVTDQ2PDrm : S2SI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src), + "vcvtdq2pd\t{$src, $dst|$dst, $src}", + []>, VEX; +def VCVTDQ2PDrr : S2SI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), + "vcvtdq2pd\t{$src, $dst|$dst, $src}", + [(set VR128:$dst, + (int_x86_sse2_cvtdq2pd VR128:$src))]>, VEX; +def VCVTDQ2PDYrm : S2SI<0xE6, MRMSrcMem, (outs VR256:$dst), (ins i128mem:$src), + "vcvtdq2pd\t{$src, $dst|$dst, $src}", + [(set VR256:$dst, + (int_x86_avx_cvtdq2_pd_256 + (bitconvert (memopv2i64 addr:$src))))]>, VEX; +def VCVTDQ2PDYrr : S2SI<0xE6, MRMSrcReg, (outs VR256:$dst), (ins VR128:$src), + "vcvtdq2pd\t{$src, $dst|$dst, $src}", + [(set VR256:$dst, + (int_x86_avx_cvtdq2_pd_256 VR128:$src))]>, VEX; } -def CVTDQ2PDrm : SSDI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src), +let neverHasSideEffects = 1, mayLoad = 1 in +def CVTDQ2PDrm : S2SI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src), "cvtdq2pd\t{$src, $dst|$dst, $src}", [], IIC_SSE_CVT_PD_RR>; -def CVTDQ2PDrr : SSDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), - "cvtdq2pd\t{$src, $dst|$dst, $src}", [], +def CVTDQ2PDrr : S2SI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), + "cvtdq2pd\t{$src, $dst|$dst, $src}", + [(set VR128:$dst, (int_x86_sse2_cvtdq2pd VR128:$src))], IIC_SSE_CVT_PD_RM>; -// 128 bit register conversion intrinsics -let Predicates = [HasAVX] in -def : Pat<(int_x86_sse2_cvtdq2pd VR128:$src), - (VCVTDQ2PDrr VR128:$src)>; - -let Predicates = [HasSSE2] in -def : Pat<(int_x86_sse2_cvtdq2pd VR128:$src), - (CVTDQ2PDrr VR128:$src)>; - // AVX 256-bit register conversion intrinsics let Predicates = [HasAVX] in { - def : Pat<(int_x86_avx_cvtdq2_pd_256 VR128:$src), - (VCVTDQ2PDYrr VR128:$src)>; - def : Pat<(int_x86_avx_cvtdq2_pd_256 (bitconvert (memopv2i64 addr:$src))), - (VCVTDQ2PDYrm addr:$src)>; - - def : Pat<(int_x86_avx_cvt_pd2dq_256 VR256:$src), - (VCVTPD2DQYrr VR256:$src)>; - def : Pat<(int_x86_avx_cvt_pd2dq_256 (memopv4f64 addr:$src)), - (VCVTPD2DQYrm addr:$src)>; - def : Pat<(v4f64 (sint_to_fp (v4i32 VR128:$src))), (VCVTDQ2PDYrr VR128:$src)>; def : Pat<(v4f64 (sint_to_fp (bc_v4i32 (memopv2i64 addr:$src)))), @@ -2079,48 +2048,44 @@ let Predicates = [HasAVX] in { // register, but the same isn't true when using memory operands instead. // Provide other assembly rr and rm forms to address this explicitly. def VCVTPD2PSrr : VPDI<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), - "cvtpd2ps\t{$src, $dst|$dst, $src}", [], + "cvtpd2ps\t{$src, $dst|$dst, $src}", + [(set VR128:$dst, (int_x86_sse2_cvtpd2ps VR128:$src))], IIC_SSE_CVT_PD_RR>, VEX; // XMM only def : InstAlias<"vcvtpd2psx\t{$src, $dst|$dst, $src}", (VCVTPD2PSrr VR128:$dst, VR128:$src)>; def VCVTPD2PSXrm : VPDI<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), - "cvtpd2psx\t{$src, $dst|$dst, $src}", [], + "cvtpd2psx\t{$src, $dst|$dst, $src}", + [(set VR128:$dst, + (int_x86_sse2_cvtpd2ps (memopv2f64 addr:$src)))], IIC_SSE_CVT_PD_RM>, VEX; // YMM only def VCVTPD2PSYrr : VPDI<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR256:$src), - "cvtpd2ps{y}\t{$src, $dst|$dst, $src}", [], + "cvtpd2ps{y}\t{$src, $dst|$dst, $src}", + [(set VR128:$dst, + (int_x86_avx_cvt_pd2_ps_256 VR256:$src))], IIC_SSE_CVT_PD_RR>, VEX; def VCVTPD2PSYrm : VPDI<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f256mem:$src), - "cvtpd2ps{y}\t{$src, $dst|$dst, $src}", [], + "cvtpd2ps{y}\t{$src, $dst|$dst, $src}", + [(set VR128:$dst, + (int_x86_avx_cvt_pd2_ps_256 (memopv4f64 addr:$src)))], IIC_SSE_CVT_PD_RM>, VEX, VEX_L; def : InstAlias<"vcvtpd2ps\t{$src, $dst|$dst, $src}", (VCVTPD2PSYrr VR128:$dst, VR256:$src)>; def CVTPD2PSrr : PDI<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), - "cvtpd2ps\t{$src, $dst|$dst, $src}", [], + "cvtpd2ps\t{$src, $dst|$dst, $src}", + [(set VR128:$dst, (int_x86_sse2_cvtpd2ps VR128:$src))], IIC_SSE_CVT_PD_RR>; def CVTPD2PSrm : PDI<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), - "cvtpd2ps\t{$src, $dst|$dst, $src}", [], + "cvtpd2ps\t{$src, $dst|$dst, $src}", + [(set VR128:$dst, + (int_x86_sse2_cvtpd2ps (memopv2f64 addr:$src)))], IIC_SSE_CVT_PD_RM>; -let Predicates = [HasAVX] in { - def : Pat<(int_x86_sse2_cvtpd2ps VR128:$src), - (VCVTPD2PSrr VR128:$src)>; - def : Pat<(int_x86_sse2_cvtpd2ps (memopv2f64 addr:$src)), - (VCVTPD2PSXrm addr:$src)>; -} - -let Predicates = [HasSSE2] in { - def : Pat<(int_x86_sse2_cvtpd2ps VR128:$src), - (CVTPD2PSrr VR128:$src)>; - def : Pat<(int_x86_sse2_cvtpd2ps (memopv2f64 addr:$src)), - (CVTPD2PSrm addr:$src)>; -} - // AVX 256-bit register conversion intrinsics // FIXME: Migrate SSE conversion intrinsics matching to use patterns as below // whenever possible to avoid declaring two versions of each one. @@ -2130,38 +2095,26 @@ let Predicates = [HasAVX] in { def : Pat<(int_x86_avx_cvtdq2_ps_256 (bitconvert (memopv4i64 addr:$src))), (VCVTDQ2PSYrm addr:$src)>; - def : Pat<(int_x86_avx_cvt_pd2_ps_256 VR256:$src), - (VCVTPD2PSYrr VR256:$src)>; - def : Pat<(int_x86_avx_cvt_pd2_ps_256 (memopv4f64 addr:$src)), - (VCVTPD2PSYrm addr:$src)>; - - def : Pat<(int_x86_avx_cvt_ps2dq_256 VR256:$src), - (VCVTPS2DQYrr VR256:$src)>; - def : Pat<(int_x86_avx_cvt_ps2dq_256 (memopv8f32 addr:$src)), - (VCVTPS2DQYrm addr:$src)>; - - def : Pat<(int_x86_avx_cvt_ps2_pd_256 VR128:$src), - (VCVTPS2PDYrr VR128:$src)>; - def : Pat<(int_x86_avx_cvt_ps2_pd_256 (memopv4f32 addr:$src)), - (VCVTPS2PDYrm addr:$src)>; - - def : Pat<(int_x86_avx_cvtt_pd2dq_256 VR256:$src), - (VCVTTPD2DQYrr VR256:$src)>; - def : Pat<(int_x86_avx_cvtt_pd2dq_256 (memopv4f64 addr:$src)), - (VCVTTPD2DQYrm addr:$src)>; - // Match fround and fextend for 128/256-bit conversions def : Pat<(v4f32 (fround (v4f64 VR256:$src))), (VCVTPD2PSYrr VR256:$src)>; def : Pat<(v4f32 (fround (loadv4f64 addr:$src))), (VCVTPD2PSYrm addr:$src)>; + def : Pat<(v2f64 (X86vfpext (v4f32 VR128:$src))), + (VCVTPS2PDrr VR128:$src)>; def : Pat<(v4f64 (fextend (v4f32 VR128:$src))), (VCVTPS2PDYrr VR128:$src)>; def : Pat<(v4f64 (fextend (loadv4f32 addr:$src))), (VCVTPS2PDYrm addr:$src)>; } +let Predicates = [HasSSE2] in { + // Match fextend for 128 conversions + def : Pat<(v2f64 (X86vfpext (v4f32 VR128:$src))), + (CVTPS2PDrr VR128:$src)>; +} + //===----------------------------------------------------------------------===// // SSE 1 & 2 - Compare Instructions //===----------------------------------------------------------------------===// @@ -2593,17 +2546,13 @@ let Predicates = [HasAVX] in { OpSize, VEX; def : Pat<(i32 (X86fgetsign FR32:$src)), - (VMOVMSKPSrr32 (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), FR32:$src, - sub_ss))>; + (VMOVMSKPSrr32 (COPY_TO_REGCLASS FR32:$src, VR128))>; def : Pat<(i64 (X86fgetsign FR32:$src)), - (VMOVMSKPSrr64 (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), FR32:$src, - sub_ss))>; + (VMOVMSKPSrr64 (COPY_TO_REGCLASS FR32:$src, VR128))>; def : Pat<(i32 (X86fgetsign FR64:$src)), - (VMOVMSKPDrr32 (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), FR64:$src, - sub_sd))>; + (VMOVMSKPDrr32 (COPY_TO_REGCLASS FR64:$src, VR128))>; def : Pat<(i64 (X86fgetsign FR64:$src)), - (VMOVMSKPDrr64 (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), FR64:$src, - sub_sd))>; + (VMOVMSKPDrr64 (COPY_TO_REGCLASS FR64:$src, VR128))>; // Assembler Only def VMOVMSKPSr64r : PI<0x50, MRMSrcReg, (outs GR64:$dst), (ins VR128:$src), @@ -2628,17 +2577,17 @@ defm MOVMSKPD : sse12_extr_sign_mask<VR128, int_x86_sse2_movmsk_pd, "movmskpd", SSEPackedDouble>, TB, OpSize; def : Pat<(i32 (X86fgetsign FR32:$src)), - (MOVMSKPSrr32 (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), FR32:$src, - sub_ss))>, Requires<[HasSSE1]>; + (MOVMSKPSrr32 (COPY_TO_REGCLASS FR32:$src, VR128))>, + Requires<[HasSSE1]>; def : Pat<(i64 (X86fgetsign FR32:$src)), - (MOVMSKPSrr64 (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), FR32:$src, - sub_ss))>, Requires<[HasSSE1]>; + (MOVMSKPSrr64 (COPY_TO_REGCLASS FR32:$src, VR128))>, + Requires<[HasSSE1]>; def : Pat<(i32 (X86fgetsign FR64:$src)), - (MOVMSKPDrr32 (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), FR64:$src, - sub_sd))>, Requires<[HasSSE2]>; + (MOVMSKPDrr32 (COPY_TO_REGCLASS FR64:$src, VR128))>, + Requires<[HasSSE2]>; def : Pat<(i64 (X86fgetsign FR64:$src)), - (MOVMSKPDrr64 (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), FR64:$src, - sub_sd))>, Requires<[HasSSE2]>; + (MOVMSKPDrr64 (COPY_TO_REGCLASS FR64:$src, VR128))>, + Requires<[HasSSE2]>; //===---------------------------------------------------------------------===// // SSE2 - Packed Integer Logical Instructions @@ -2923,7 +2872,8 @@ let isCommutable = 0 in { basic_sse12_fp_binop_s_int<0x5C, "sub", SSE_ALU_ITINS_S, 0>, VEX_4V, VEX_LIG; defm VSUB : basic_sse12_fp_binop_p<0x5C, "sub", fsub, SSE_ALU_ITINS_P, 0>, - basic_sse12_fp_binop_p_y<0x5C, "sub", fsub, SSE_ALU_ITINS_P>, VEX_4V; + basic_sse12_fp_binop_p_y<0x5C, "sub", fsub, SSE_ALU_ITINS_P>, + VEX_4V; defm VDIV : basic_sse12_fp_binop_s<0x5E, "div", fdiv, SSE_DIV_ITINS_S, 0>, basic_sse12_fp_binop_s_int<0x5E, "div", SSE_DIV_ITINS_S, 0>, VEX_4V, VEX_LIG; @@ -2974,6 +2924,23 @@ let Constraints = "$src1 = $dst" in { } } +let isCommutable = 1, isCodeGenOnly = 1 in { + defm VMAXC: basic_sse12_fp_binop_s<0x5F, "max", X86fmaxc, SSE_ALU_ITINS_S, 0>, + VEX_4V, VEX_LIG; + defm VMAXC: basic_sse12_fp_binop_p<0x5F, "max", X86fmaxc, SSE_ALU_ITINS_P, 0>, + basic_sse12_fp_binop_p_y<0x5F, "max", X86fmaxc, SSE_ALU_ITINS_P>, VEX_4V; + defm VMINC: basic_sse12_fp_binop_s<0x5D, "min", X86fminc, SSE_ALU_ITINS_S, 0>, + VEX_4V, VEX_LIG; + defm VMINC: basic_sse12_fp_binop_p<0x5D, "min", X86fminc, SSE_ALU_ITINS_P, 0>, + basic_sse12_fp_binop_p_y<0x5D, "min", X86fminc, SSE_ALU_ITINS_P>, VEX_4V; + let Constraints = "$src1 = $dst" in { + defm MAXC: basic_sse12_fp_binop_s<0x5F, "max", X86fmaxc, SSE_ALU_ITINS_S>, + basic_sse12_fp_binop_p<0x5F, "max", X86fmaxc, SSE_ALU_ITINS_P>; + defm MINC: basic_sse12_fp_binop_s<0x5D, "min", X86fminc, SSE_ALU_ITINS_S>, + basic_sse12_fp_binop_p<0x5D, "min", X86fminc, SSE_ALU_ITINS_P>; + } +} + /// Unop Arithmetic /// In addition, we also have a special variant of the scalar form here to /// represent the associated intrinsic operation. This form is unlike the @@ -3236,34 +3203,30 @@ def : Pat<(f32 (X86frcp (load addr:$src))), let Predicates = [HasAVX], AddedComplexity = 1 in { def : Pat<(int_x86_sse_sqrt_ss VR128:$src), - (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), - (VSQRTSSr (f32 (IMPLICIT_DEF)), - (EXTRACT_SUBREG (v4f32 VR128:$src), sub_ss)), - sub_ss)>; + (COPY_TO_REGCLASS (VSQRTSSr (f32 (IMPLICIT_DEF)), + (COPY_TO_REGCLASS VR128:$src, FR32)), + VR128)>; def : Pat<(int_x86_sse_sqrt_ss sse_load_f32:$src), (VSQRTSSm_Int (v4f32 (IMPLICIT_DEF)), sse_load_f32:$src)>; def : Pat<(int_x86_sse2_sqrt_sd VR128:$src), - (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), - (VSQRTSDr (f64 (IMPLICIT_DEF)), - (EXTRACT_SUBREG (v2f64 VR128:$src), sub_sd)), - sub_sd)>; + (COPY_TO_REGCLASS (VSQRTSDr (f64 (IMPLICIT_DEF)), + (COPY_TO_REGCLASS VR128:$src, FR64)), + VR128)>; def : Pat<(int_x86_sse2_sqrt_sd sse_load_f64:$src), (VSQRTSDm_Int (v2f64 (IMPLICIT_DEF)), sse_load_f64:$src)>; def : Pat<(int_x86_sse_rsqrt_ss VR128:$src), - (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), - (VRSQRTSSr (f32 (IMPLICIT_DEF)), - (EXTRACT_SUBREG (v4f32 VR128:$src), sub_ss)), - sub_ss)>; + (COPY_TO_REGCLASS (VRSQRTSSr (f32 (IMPLICIT_DEF)), + (COPY_TO_REGCLASS VR128:$src, FR32)), + VR128)>; def : Pat<(int_x86_sse_rsqrt_ss sse_load_f32:$src), (VRSQRTSSm_Int (v4f32 (IMPLICIT_DEF)), sse_load_f32:$src)>; def : Pat<(int_x86_sse_rcp_ss VR128:$src), - (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), - (VRCPSSr (f32 (IMPLICIT_DEF)), - (EXTRACT_SUBREG (v4f32 VR128:$src), sub_ss)), - sub_ss)>; + (COPY_TO_REGCLASS (VRCPSSr (f32 (IMPLICIT_DEF)), + (COPY_TO_REGCLASS VR128:$src, FR32)), + VR128)>; def : Pat<(int_x86_sse_rcp_ss sse_load_f32:$src), (VRCPSSm_Int (v4f32 (IMPLICIT_DEF)), sse_load_f32:$src)>; } @@ -4609,7 +4572,7 @@ def MOVPQIto64rr : RPDI<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128:$src), // Bitcast FR64 <-> GR64 // let Predicates = [HasAVX] in -def VMOV64toSDrm : SSDI<0x7E, MRMSrcMem, (outs FR64:$dst), (ins i64mem:$src), +def VMOV64toSDrm : S2SI<0x7E, MRMSrcMem, (outs FR64:$dst), (ins i64mem:$src), "vmovq\t{$src, $dst|$dst, $src}", [(set FR64:$dst, (bitconvert (loadi64 addr:$src)))]>, VEX; @@ -4622,7 +4585,7 @@ def VMOVSDto64mr : VRPDI<0x7E, MRMDestMem, (outs), (ins i64mem:$dst, FR64:$src), [(store (i64 (bitconvert FR64:$src)), addr:$dst)], IIC_SSE_MOVDQ>, VEX; -def MOV64toSDrm : SSDI<0x7E, MRMSrcMem, (outs FR64:$dst), (ins i64mem:$src), +def MOV64toSDrm : S2SI<0x7E, MRMSrcMem, (outs FR64:$dst), (ins i64mem:$src), "movq\t{$src, $dst|$dst, $src}", [(set FR64:$dst, (bitconvert (loadi64 addr:$src)))], IIC_SSE_MOVDQ>; @@ -5505,16 +5468,14 @@ let usesCustomInserter = 1 in { def MONITOR : PseudoI<(outs), (ins i32mem:$src1, GR32:$src2, GR32:$src3), [(int_x86_sse3_monitor addr:$src1, GR32:$src2, GR32:$src3)]>, Requires<[HasSSE3]>; -def MWAIT : PseudoI<(outs), (ins GR32:$src1, GR32:$src2), - [(int_x86_sse3_mwait GR32:$src1, GR32:$src2)]>, - Requires<[HasSSE3]>; } let Uses = [EAX, ECX, EDX] in def MONITORrrr : I<0x01, MRM_C8, (outs), (ins), "monitor", [], IIC_SSE_MONITOR>, TB, Requires<[HasSSE3]>; let Uses = [ECX, EAX] in -def MWAITrr : I<0x01, MRM_C9, (outs), (ins), "mwait", [], IIC_SSE_MWAIT>, +def MWAITrr : I<0x01, MRM_C9, (outs), (ins), "mwait", + [(int_x86_sse3_mwait ECX, EAX)], IIC_SSE_MWAIT>, TB, Requires<[HasSSE3]>; def : InstAlias<"mwait %eax, %ecx", (MWAITrr)>, Requires<[In32BitMode]>; @@ -6906,81 +6867,42 @@ let Defs = [XMM0, EFLAGS], Uses = [EAX, EDX], neverHasSideEffects = 1 in { } // Packed Compare Implicit Length Strings, Return Index -let Defs = [ECX, EFLAGS] in { - multiclass SS42AI_pcmpistri<Intrinsic IntId128, string asm = "pcmpistri"> { +let Defs = [ECX, EFLAGS], neverHasSideEffects = 1 in { + multiclass SS42AI_pcmpistri<string asm> { def rr : SS42AI<0x63, MRMSrcReg, (outs), (ins VR128:$src1, VR128:$src2, i8imm:$src3), !strconcat(asm, "\t{$src3, $src2, $src1|$src1, $src2, $src3}"), - [(set ECX, (IntId128 VR128:$src1, VR128:$src2, imm:$src3)), - (implicit EFLAGS)]>, OpSize; + []>, OpSize; + let mayLoad = 1 in def rm : SS42AI<0x63, MRMSrcMem, (outs), (ins VR128:$src1, i128mem:$src2, i8imm:$src3), !strconcat(asm, "\t{$src3, $src2, $src1|$src1, $src2, $src3}"), - [(set ECX, (IntId128 VR128:$src1, (load addr:$src2), imm:$src3)), - (implicit EFLAGS)]>, OpSize; + []>, OpSize; } } -let Predicates = [HasAVX] in { -defm VPCMPISTRI : SS42AI_pcmpistri<int_x86_sse42_pcmpistri128, "vpcmpistri">, - VEX; -defm VPCMPISTRIA : SS42AI_pcmpistri<int_x86_sse42_pcmpistria128, "vpcmpistri">, - VEX; -defm VPCMPISTRIC : SS42AI_pcmpistri<int_x86_sse42_pcmpistric128, "vpcmpistri">, - VEX; -defm VPCMPISTRIO : SS42AI_pcmpistri<int_x86_sse42_pcmpistrio128, "vpcmpistri">, - VEX; -defm VPCMPISTRIS : SS42AI_pcmpistri<int_x86_sse42_pcmpistris128, "vpcmpistri">, - VEX; -defm VPCMPISTRIZ : SS42AI_pcmpistri<int_x86_sse42_pcmpistriz128, "vpcmpistri">, - VEX; -} - -defm PCMPISTRI : SS42AI_pcmpistri<int_x86_sse42_pcmpistri128>; -defm PCMPISTRIA : SS42AI_pcmpistri<int_x86_sse42_pcmpistria128>; -defm PCMPISTRIC : SS42AI_pcmpistri<int_x86_sse42_pcmpistric128>; -defm PCMPISTRIO : SS42AI_pcmpistri<int_x86_sse42_pcmpistrio128>; -defm PCMPISTRIS : SS42AI_pcmpistri<int_x86_sse42_pcmpistris128>; -defm PCMPISTRIZ : SS42AI_pcmpistri<int_x86_sse42_pcmpistriz128>; +let Predicates = [HasAVX] in +defm VPCMPISTRI : SS42AI_pcmpistri<"vpcmpistri">, VEX; +defm PCMPISTRI : SS42AI_pcmpistri<"pcmpistri">; // Packed Compare Explicit Length Strings, Return Index -let Defs = [ECX, EFLAGS], Uses = [EAX, EDX] in { - multiclass SS42AI_pcmpestri<Intrinsic IntId128, string asm = "pcmpestri"> { +let Defs = [ECX, EFLAGS], Uses = [EAX, EDX], neverHasSideEffects = 1 in { + multiclass SS42AI_pcmpestri<string asm> { def rr : SS42AI<0x61, MRMSrcReg, (outs), (ins VR128:$src1, VR128:$src3, i8imm:$src5), !strconcat(asm, "\t{$src5, $src3, $src1|$src1, $src3, $src5}"), - [(set ECX, (IntId128 VR128:$src1, EAX, VR128:$src3, EDX, imm:$src5)), - (implicit EFLAGS)]>, OpSize; + []>, OpSize; + let mayLoad = 1 in def rm : SS42AI<0x61, MRMSrcMem, (outs), (ins VR128:$src1, i128mem:$src3, i8imm:$src5), !strconcat(asm, "\t{$src5, $src3, $src1|$src1, $src3, $src5}"), - [(set ECX, - (IntId128 VR128:$src1, EAX, (load addr:$src3), EDX, imm:$src5)), - (implicit EFLAGS)]>, OpSize; + []>, OpSize; } } -let Predicates = [HasAVX] in { -defm VPCMPESTRI : SS42AI_pcmpestri<int_x86_sse42_pcmpestri128, "vpcmpestri">, - VEX; -defm VPCMPESTRIA : SS42AI_pcmpestri<int_x86_sse42_pcmpestria128, "vpcmpestri">, - VEX; -defm VPCMPESTRIC : SS42AI_pcmpestri<int_x86_sse42_pcmpestric128, "vpcmpestri">, - VEX; -defm VPCMPESTRIO : SS42AI_pcmpestri<int_x86_sse42_pcmpestrio128, "vpcmpestri">, - VEX; -defm VPCMPESTRIS : SS42AI_pcmpestri<int_x86_sse42_pcmpestris128, "vpcmpestri">, - VEX; -defm VPCMPESTRIZ : SS42AI_pcmpestri<int_x86_sse42_pcmpestriz128, "vpcmpestri">, - VEX; -} - -defm PCMPESTRI : SS42AI_pcmpestri<int_x86_sse42_pcmpestri128>; -defm PCMPESTRIA : SS42AI_pcmpestri<int_x86_sse42_pcmpestria128>; -defm PCMPESTRIC : SS42AI_pcmpestri<int_x86_sse42_pcmpestric128>; -defm PCMPESTRIO : SS42AI_pcmpestri<int_x86_sse42_pcmpestrio128>; -defm PCMPESTRIS : SS42AI_pcmpestri<int_x86_sse42_pcmpestris128>; -defm PCMPESTRIZ : SS42AI_pcmpestri<int_x86_sse42_pcmpestriz128>; +let Predicates = [HasAVX] in +defm VPCMPESTRI : SS42AI_pcmpestri<"vpcmpestri">, VEX; +defm PCMPESTRI : SS42AI_pcmpestri<"pcmpestri">; //===----------------------------------------------------------------------===// // SSE4.2 - CRC Instructions @@ -7727,24 +7649,18 @@ let Predicates = [HasAVX2] in { // is used by additional users, which prevents the pattern selection. let AddedComplexity = 20 in { def : Pat<(v4f32 (X86VBroadcast FR32:$src)), - (VBROADCASTSSrr - (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), FR32:$src, sub_ss))>; + (VBROADCASTSSrr (COPY_TO_REGCLASS FR32:$src, VR128))>; def : Pat<(v8f32 (X86VBroadcast FR32:$src)), - (VBROADCASTSSYrr - (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), FR32:$src, sub_ss))>; + (VBROADCASTSSYrr (COPY_TO_REGCLASS FR32:$src, VR128))>; def : Pat<(v4f64 (X86VBroadcast FR64:$src)), - (VBROADCASTSDYrr - (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), FR64:$src, sub_sd))>; + (VBROADCASTSDYrr (COPY_TO_REGCLASS FR64:$src, VR128))>; def : Pat<(v4i32 (X86VBroadcast GR32:$src)), - (VBROADCASTSSrr - (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), GR32:$src, sub_ss))>; + (VBROADCASTSSrr (COPY_TO_REGCLASS GR32:$src, VR128))>; def : Pat<(v8i32 (X86VBroadcast GR32:$src)), - (VBROADCASTSSYrr - (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), GR32:$src, sub_ss))>; + (VBROADCASTSSYrr (COPY_TO_REGCLASS GR32:$src, VR128))>; def : Pat<(v4i64 (X86VBroadcast GR64:$src)), - (VBROADCASTSDYrr - (INSERT_SUBREG (v2i64 (IMPLICIT_DEF)), GR64:$src, sub_sd))>; + (VBROADCASTSDYrr (COPY_TO_REGCLASS GR64:$src, VR128))>; } } @@ -7768,46 +7684,26 @@ def : Pat<(v4i32 (X86VBroadcast (loadi32 addr:$src))), let AddedComplexity = 20 in { // 128bit broadcasts: def : Pat<(v4f32 (X86VBroadcast FR32:$src)), - (VPSHUFDri - (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), FR32:$src, sub_ss), 0)>; + (VPSHUFDri (COPY_TO_REGCLASS FR32:$src, VR128), 0)>; def : Pat<(v8f32 (X86VBroadcast FR32:$src)), (VINSERTF128rr (INSERT_SUBREG (v8f32 (IMPLICIT_DEF)), - (VPSHUFDri - (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), FR32:$src, sub_ss), 0), - sub_xmm), - (VPSHUFDri - (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), FR32:$src, sub_ss), - 0), 1)>; + (VPSHUFDri (COPY_TO_REGCLASS FR32:$src, VR128), 0), sub_xmm), + (VPSHUFDri (COPY_TO_REGCLASS FR32:$src, VR128), 0), 1)>; def : Pat<(v4f64 (X86VBroadcast FR64:$src)), (VINSERTF128rr (INSERT_SUBREG (v4f64 (IMPLICIT_DEF)), - (VPSHUFDri - (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), FR64:$src, sub_sd), - 0x44), - sub_xmm), - (VPSHUFDri - (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), FR64:$src, sub_sd), - 0x44), 1)>; + (VPSHUFDri (COPY_TO_REGCLASS FR64:$src, VR128), 0x44), sub_xmm), + (VPSHUFDri (COPY_TO_REGCLASS FR64:$src, VR128), 0x44), 1)>; def : Pat<(v4i32 (X86VBroadcast GR32:$src)), - (VPSHUFDri - (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), GR32:$src, sub_ss), 0)>; + (VPSHUFDri (COPY_TO_REGCLASS GR32:$src, VR128), 0)>; def : Pat<(v8i32 (X86VBroadcast GR32:$src)), (VINSERTF128rr (INSERT_SUBREG (v8i32 (IMPLICIT_DEF)), - (VPSHUFDri - (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), GR32:$src, sub_ss), 0), - sub_xmm), - (VPSHUFDri - (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), GR32:$src, sub_ss), - 0), 1)>; + (VPSHUFDri (COPY_TO_REGCLASS GR32:$src, VR128), 0), sub_xmm), + (VPSHUFDri (COPY_TO_REGCLASS GR32:$src, VR128), 0), 1)>; def : Pat<(v4i64 (X86VBroadcast GR64:$src)), (VINSERTF128rr (INSERT_SUBREG (v4i64 (IMPLICIT_DEF)), - (VPSHUFDri - (INSERT_SUBREG (v2i64 (IMPLICIT_DEF)), GR64:$src, sub_sd), - 0x44), - sub_xmm), - (VPSHUFDri - (INSERT_SUBREG (v2i64 (IMPLICIT_DEF)), GR64:$src, sub_sd), - 0x44), 1)>; + (VPSHUFDri (COPY_TO_REGCLASS GR64:$src, VR128), 0x44), sub_xmm), + (VPSHUFDri (COPY_TO_REGCLASS GR64:$src, VR128), 0x44), 1)>; } } @@ -8052,7 +7948,7 @@ multiclass avx2_gather<bits<8> opc, string OpcodeStr, RegisterClass RC256, []>, VEX_4VOp3, VEX_L; } -let Constraints = "$src1 = $dst, $mask = $mask_wb" in { +let mayLoad = 1, Constraints = "$src1 = $dst, $mask = $mask_wb" in { defm VGATHERDPD : avx2_gather<0x92, "vgatherdpd", VR256, vx64mem, vx64mem>, VEX_W; defm VGATHERQPD : avx2_gather<0x93, "vgatherqpd", VR256, vx64mem, vy64mem>, VEX_W; defm VGATHERDPS : avx2_gather<0x92, "vgatherdps", VR256, vx32mem, vy32mem>; diff --git a/lib/Target/X86/X86JITInfo.cpp b/lib/Target/X86/X86JITInfo.cpp index 0168d12..7ac4cec 100644 --- a/lib/Target/X86/X86JITInfo.cpp +++ b/lib/Target/X86/X86JITInfo.cpp @@ -532,6 +532,15 @@ uintptr_t X86JITInfo::getPICJumpTableEntry(uintptr_t BB, uintptr_t Entry) { #endif } +template<typename T> void addUnaligned(void *Pos, T Delta) { + T Value; + std::memcpy(reinterpret_cast<char*>(&Value), reinterpret_cast<char*>(Pos), + sizeof(T)); + Value += Delta; + std::memcpy(reinterpret_cast<char*>(Pos), reinterpret_cast<char*>(&Value), + sizeof(T)); +} + /// relocate - Before the JIT can run a block of code that has been emitted, /// it must rewrite the code to contain the actual addresses of any /// referenced global symbols. @@ -545,24 +554,24 @@ void X86JITInfo::relocate(void *Function, MachineRelocation *MR, // PC relative relocation, add the relocated value to the value already in // memory, after we adjust it for where the PC is. ResultPtr = ResultPtr -(intptr_t)RelocPos - 4 - MR->getConstantVal(); - *((unsigned*)RelocPos) += (unsigned)ResultPtr; + addUnaligned<unsigned>(RelocPos, ResultPtr); break; } case X86::reloc_picrel_word: { // PIC base relative relocation, add the relocated value to the value // already in memory, after we adjust it for where the PIC base is. ResultPtr = ResultPtr - ((intptr_t)Function + MR->getConstantVal()); - *((unsigned*)RelocPos) += (unsigned)ResultPtr; + addUnaligned<unsigned>(RelocPos, ResultPtr); break; } case X86::reloc_absolute_word: case X86::reloc_absolute_word_sext: // Absolute relocation, just add the relocated value to the value already // in memory. - *((unsigned*)RelocPos) += (unsigned)ResultPtr; + addUnaligned<unsigned>(RelocPos, ResultPtr); break; case X86::reloc_absolute_dword: - *((intptr_t*)RelocPos) += ResultPtr; + addUnaligned<intptr_t>(RelocPos, ResultPtr); break; } } diff --git a/lib/Target/X86/X86JITInfo.h b/lib/Target/X86/X86JITInfo.h index c76d3cc..d7c08df 100644 --- a/lib/Target/X86/X86JITInfo.h +++ b/lib/Target/X86/X86JITInfo.h @@ -65,7 +65,7 @@ namespace llvm { /// referenced global symbols. virtual void relocate(void *Function, MachineRelocation *MR, unsigned NumRelocs, unsigned char* GOTBase); - + /// allocateThreadLocalMemory - Each target has its own way of /// handling thread local variables. This method returns a value only /// meaningful to the target. diff --git a/lib/Target/X86/X86MCInstLower.cpp b/lib/Target/X86/X86MCInstLower.cpp index df7507c..9c0ce4e 100644 --- a/lib/Target/X86/X86MCInstLower.cpp +++ b/lib/Target/X86/X86MCInstLower.cpp @@ -46,12 +46,12 @@ GetSymbolFromOperand(const MachineOperand &MO) const { assert((MO.isGlobal() || MO.isSymbol()) && "Isn't a symbol reference"); SmallString<128> Name; - + if (!MO.isGlobal()) { assert(MO.isSymbol()); Name += MAI.getGlobalPrefix(); Name += MO.getSymbolName(); - } else { + } else { const GlobalValue *GV = MO.getGlobal(); bool isImplicitlyPrivate = false; if (MO.getTargetFlags() == X86II::MO_DARWIN_STUB || @@ -59,7 +59,7 @@ GetSymbolFromOperand(const MachineOperand &MO) const { MO.getTargetFlags() == X86II::MO_DARWIN_NONLAZY_PIC_BASE || MO.getTargetFlags() == X86II::MO_DARWIN_HIDDEN_NONLAZY_PIC_BASE) isImplicitlyPrivate = true; - + Mang->getNameWithPrefix(Name, GV, isImplicitlyPrivate); } @@ -110,7 +110,7 @@ GetSymbolFromOperand(const MachineOperand &MO) const { getMachOMMI().getFnStubEntry(Sym); if (StubSym.getPointer()) return Sym; - + if (MO.isGlobal()) { StubSym = MachineModuleInfoImpl:: @@ -135,7 +135,7 @@ MCOperand X86MCInstLower::LowerSymbolOperand(const MachineOperand &MO, // lot of extra uniquing. const MCExpr *Expr = 0; MCSymbolRefExpr::VariantKind RefKind = MCSymbolRefExpr::VK_None; - + switch (MO.getTargetFlags()) { default: llvm_unreachable("Unknown target flag on GV operand"); case X86II::MO_NO_FLAG: // No flag. @@ -144,7 +144,7 @@ MCOperand X86MCInstLower::LowerSymbolOperand(const MachineOperand &MO, case X86II::MO_DLLIMPORT: case X86II::MO_DARWIN_STUB: break; - + case X86II::MO_TLVP: RefKind = MCSymbolRefExpr::VK_TLVP; break; case X86II::MO_TLVP_PIC_BASE: Expr = MCSymbolRefExpr::Create(Sym, MCSymbolRefExpr::VK_TLVP, Ctx); @@ -173,7 +173,7 @@ MCOperand X86MCInstLower::LowerSymbolOperand(const MachineOperand &MO, case X86II::MO_DARWIN_HIDDEN_NONLAZY_PIC_BASE: Expr = MCSymbolRefExpr::Create(Sym, Ctx); // Subtract the pic base. - Expr = MCBinaryExpr::CreateSub(Expr, + Expr = MCBinaryExpr::CreateSub(Expr, MCSymbolRefExpr::Create(MF.getPICBaseSymbol(), Ctx), Ctx); if (MO.isJTI() && MAI.hasSetDirective()) { @@ -187,10 +187,10 @@ MCOperand X86MCInstLower::LowerSymbolOperand(const MachineOperand &MO, } break; } - + if (Expr == 0) Expr = MCSymbolRefExpr::Create(Sym, RefKind, Ctx); - + if (!MO.isJTI() && MO.getOffset()) Expr = MCBinaryExpr::CreateAdd(Expr, MCConstantExpr::Create(MO.getOffset(), Ctx), @@ -211,10 +211,10 @@ static void lower_lea64_32mem(MCInst *MI, unsigned OpNo) { // Convert registers in the addr mode according to subreg64. for (unsigned i = 0; i != 4; ++i) { if (!MI->getOperand(OpNo+i).isReg()) continue; - + unsigned Reg = MI->getOperand(OpNo+i).getReg(); if (Reg == 0) continue; - + MI->getOperand(OpNo+i).setReg(getX86SubSuperRegister(Reg, MVT::i64)); } } @@ -280,7 +280,7 @@ static void SimplifyShortMoveForm(X86AsmPrinter &Printer, MCInst &Inst, return; // Check whether this is an absolute address. - // FIXME: We know TLVP symbol refs aren't, but there should be a better way + // FIXME: We know TLVP symbol refs aren't, but there should be a better way // to do this here. bool Absolute = true; if (Inst.getOperand(AddrOp).isExpr()) { @@ -289,7 +289,7 @@ static void SimplifyShortMoveForm(X86AsmPrinter &Printer, MCInst &Inst, if (SRE->getKind() == MCSymbolRefExpr::VK_TLVP) Absolute = false; } - + if (Absolute && (Inst.getOperand(AddrBase + 0).getReg() != 0 || Inst.getOperand(AddrBase + 2).getReg() != 0 || @@ -306,10 +306,10 @@ static void SimplifyShortMoveForm(X86AsmPrinter &Printer, MCInst &Inst, void X86MCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const { OutMI.setOpcode(MI->getOpcode()); - + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { const MachineOperand &MO = MI->getOperand(i); - + MCOperand MCOp; switch (MO.getType()) { default: @@ -345,10 +345,10 @@ void X86MCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const { // Ignore call clobbers. continue; } - + OutMI.addOperand(MCOp); } - + // Handle a few special cases to eliminate operand modifiers. ReSimplify: switch (OutMI.getOpcode()) { @@ -425,7 +425,7 @@ ReSimplify: case X86::TAILJMPd: case X86::TAILJMPd64: Opcode = X86::JMP_1; break; } - + MCOperand Saved = OutMI.getOperand(0); OutMI = MCInst(); OutMI.setOpcode(Opcode); @@ -445,7 +445,7 @@ ReSimplify: case X86::ADD16ri8_DB: OutMI.setOpcode(X86::OR16ri8); goto ReSimplify; case X86::ADD32ri8_DB: OutMI.setOpcode(X86::OR32ri8); goto ReSimplify; case X86::ADD64ri8_DB: OutMI.setOpcode(X86::OR64ri8); goto ReSimplify; - + // The assembler backend wants to see branches in their small form and relax // them to their large form. The JIT can only handle the large form because // it does not do relaxation. For now, translate the large form to the @@ -688,7 +688,7 @@ void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) { // call "L1$pb" // "L1$pb": // popl %esi - + // Emit the call. MCSymbol *PICBase = MF->getPICBaseSymbol(); TmpInst.setOpcode(X86::CALLpcrel32); @@ -697,43 +697,43 @@ void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) { TmpInst.addOperand(MCOperand::CreateExpr(MCSymbolRefExpr::Create(PICBase, OutContext))); OutStreamer.EmitInstruction(TmpInst); - + // Emit the label. OutStreamer.EmitLabel(PICBase); - + // popl $reg TmpInst.setOpcode(X86::POP32r); TmpInst.getOperand(0) = MCOperand::CreateReg(MI->getOperand(0).getReg()); OutStreamer.EmitInstruction(TmpInst); return; } - + case X86::ADD32ri: { // Lower the MO_GOT_ABSOLUTE_ADDRESS form of ADD32ri. if (MI->getOperand(2).getTargetFlags() != X86II::MO_GOT_ABSOLUTE_ADDRESS) break; - + // Okay, we have something like: // EAX = ADD32ri EAX, MO_GOT_ABSOLUTE_ADDRESS(@MYGLOBAL) - + // For this, we want to print something like: // MYGLOBAL + (. - PICBASE) // However, we can't generate a ".", so just emit a new label here and refer // to it. MCSymbol *DotSym = OutContext.CreateTempSymbol(); OutStreamer.EmitLabel(DotSym); - + // Now that we have emitted the label, lower the complex operand expression. MCSymbol *OpSym = MCInstLowering.GetSymbolFromOperand(MI->getOperand(2)); - + const MCExpr *DotExpr = MCSymbolRefExpr::Create(DotSym, OutContext); const MCExpr *PICBase = MCSymbolRefExpr::Create(MF->getPICBaseSymbol(), OutContext); DotExpr = MCBinaryExpr::CreateSub(DotExpr, PICBase, OutContext); - - DotExpr = MCBinaryExpr::CreateAdd(MCSymbolRefExpr::Create(OpSym,OutContext), + + DotExpr = MCBinaryExpr::CreateAdd(MCSymbolRefExpr::Create(OpSym,OutContext), DotExpr, OutContext); - + MCInst TmpInst; TmpInst.setOpcode(X86::ADD32ri); TmpInst.addOperand(MCOperand::CreateReg(MI->getOperand(0).getReg())); @@ -743,7 +743,7 @@ void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) { return; } } - + MCInst TmpInst; MCInstLowering.Lower(MI, TmpInst); OutStreamer.EmitInstruction(TmpInst); diff --git a/lib/Target/X86/X86MCInstLower.h b/lib/Target/X86/X86MCInstLower.h index 40df3db..b4d4cfd 100644 --- a/lib/Target/X86/X86MCInstLower.h +++ b/lib/Target/X86/X86MCInstLower.h @@ -25,7 +25,7 @@ namespace llvm { class Mangler; class TargetMachine; class X86AsmPrinter; - + /// X86MCInstLower - This class is used to lower an MachineInstr into an MCInst. class LLVM_LIBRARY_VISIBILITY X86MCInstLower { MCContext &Ctx; @@ -37,12 +37,12 @@ class LLVM_LIBRARY_VISIBILITY X86MCInstLower { public: X86MCInstLower(Mangler *mang, const MachineFunction &MF, X86AsmPrinter &asmprinter); - + void Lower(const MachineInstr *MI, MCInst &OutMI) const; MCSymbol *GetSymbolFromOperand(const MachineOperand &MO) const; MCOperand LowerSymbolOperand(const MachineOperand &MO, MCSymbol *Sym) const; - + private: MachineModuleInfoMachO &getMachOMMI() const; }; diff --git a/lib/Target/X86/X86MachineFunctionInfo.h b/lib/Target/X86/X86MachineFunctionInfo.h index f83a525..78d20ce 100644 --- a/lib/Target/X86/X86MachineFunctionInfo.h +++ b/lib/Target/X86/X86MachineFunctionInfo.h @@ -24,7 +24,7 @@ class X86MachineFunctionInfo : public MachineFunctionInfo { virtual void anchor(); /// ForceFramePointer - True if the function is required to use of frame - /// pointer for reasons other than it containing dynamic allocation or + /// pointer for reasons other than it containing dynamic allocation or /// that FP eliminatation is turned off. For example, Cygwin main function /// contains stack pointer re-alignment code which requires FP. bool ForceFramePointer; @@ -83,7 +83,7 @@ public: VarArgsFPOffset(0), ArgumentStackSize(0), NumLocalDynamics(0) {} - + explicit X86MachineFunctionInfo(MachineFunction &MF) : ForceFramePointer(false), CalleeSavedFrameSize(0), @@ -99,7 +99,7 @@ public: ArgumentStackSize(0), NumLocalDynamics(0) {} - bool getForceFramePointer() const { return ForceFramePointer;} + bool getForceFramePointer() const { return ForceFramePointer;} void setForceFramePointer(bool forceFP) { ForceFramePointer = forceFP; } unsigned getCalleeSavedFrameSize() const { return CalleeSavedFrameSize; } diff --git a/lib/Target/X86/X86RegisterInfo.cpp b/lib/Target/X86/X86RegisterInfo.cpp index acf53f8..877b8f6 100644 --- a/lib/Target/X86/X86RegisterInfo.cpp +++ b/lib/Target/X86/X86RegisterInfo.cpp @@ -72,13 +72,15 @@ X86RegisterInfo::X86RegisterInfo(X86TargetMachine &tm, SlotSize = 8; StackPtr = X86::RSP; FramePtr = X86::RBP; - BasePtr = X86::RBX; } else { SlotSize = 4; StackPtr = X86::ESP; FramePtr = X86::EBP; - BasePtr = X86::EBX; } + // Use a callee-saved register as the base pointer. These registers must + // not conflict with any ABI requirements. For example, in 32-bit mode PIC + // requires GOT in the EBX register before function calls via PLT GOT pointer. + BasePtr = Is64Bit ? X86::RBX : X86::ESI; } /// getCompactUnwindRegNum - This function maps the register to the number for @@ -366,7 +368,7 @@ bool X86RegisterInfo::hasBasePointer(const MachineFunction &MF) const { if (!EnableBasePointer) return false; - // When we need stack realignment and there are dynamic allocas, we can't + // When we need stack realignment and there are dynamic allocas, we can't // reference off of the stack pointer, so we reserve a base pointer. if (needsStackRealignment(MF) && MFI->hasVarSizedObjects()) return true; diff --git a/lib/Target/X86/X86RegisterInfo.td b/lib/Target/X86/X86RegisterInfo.td index ae2d4d0..edc7184 100644 --- a/lib/Target/X86/X86RegisterInfo.td +++ b/lib/Target/X86/X86RegisterInfo.td @@ -23,9 +23,6 @@ let Namespace = "X86" in { def sub_8bit_hi : SubRegIndex; def sub_16bit : SubRegIndex; def sub_32bit : SubRegIndex; - - def sub_ss : SubRegIndex; - def sub_sd : SubRegIndex; def sub_xmm : SubRegIndex; @@ -163,8 +160,6 @@ let Namespace = "X86" in { def FP6 : Register<"fp6">; // XMM Registers, used by the various SSE instruction set extensions. - // The sub_ss and sub_sd subregs are the same registers with another regclass. - let CompositeIndices = [(sub_ss), (sub_sd)] in { def XMM0: Register<"xmm0">, DwarfRegNum<[17, 21, 21]>; def XMM1: Register<"xmm1">, DwarfRegNum<[18, 22, 22]>; def XMM2: Register<"xmm2">, DwarfRegNum<[19, 23, 23]>; @@ -184,7 +179,7 @@ let Namespace = "X86" in { def XMM13: Register<"xmm13">, DwarfRegNum<[30, -2, -2]>; def XMM14: Register<"xmm14">, DwarfRegNum<[31, -2, -2]>; def XMM15: Register<"xmm15">, DwarfRegNum<[32, -2, -2]>; - }} + } // CostPerUse // YMM Registers, used by AVX instructions let SubRegIndices = [sub_xmm] in { diff --git a/lib/Target/X86/X86Relocations.h b/lib/Target/X86/X86Relocations.h index 857becf..0333056 100644 --- a/lib/Target/X86/X86Relocations.h +++ b/lib/Target/X86/X86Relocations.h @@ -21,7 +21,7 @@ namespace llvm { /// RelocationType - An enum for the x86 relocation codes. Note that /// the terminology here doesn't follow x86 convention - word means /// 32-bit and dword means 64-bit. The relocations will be treated - /// by JIT or ObjectCode emitters, this is transparent to the x86 code + /// by JIT or ObjectCode emitters, this is transparent to the x86 code /// emitter but JIT and ObjectCode will treat them differently enum RelocationType { /// reloc_pcrel_word - PC relative relocation, add the relocated value to diff --git a/lib/Target/X86/X86SelectionDAGInfo.cpp b/lib/Target/X86/X86SelectionDAGInfo.cpp index 7c6788f..00edcbc 100644 --- a/lib/Target/X86/X86SelectionDAGInfo.cpp +++ b/lib/Target/X86/X86SelectionDAGInfo.cpp @@ -38,7 +38,7 @@ X86SelectionDAGInfo::EmitTargetCodeForMemset(SelectionDAG &DAG, DebugLoc dl, // If to a segment-relative address space, use the default lowering. if (DstPtrInfo.getAddrSpace() >= 256) return SDValue(); - + // If not DWORD aligned or size is more than the threshold, call the library. // The libc version is likely to be faster for these cases. It can use the // address value and run time information about the CPU. diff --git a/lib/Target/X86/X86Subtarget.cpp b/lib/Target/X86/X86Subtarget.cpp index e6e9c56..9087852 100644 --- a/lib/Target/X86/X86Subtarget.cpp +++ b/lib/Target/X86/X86Subtarget.cpp @@ -39,10 +39,10 @@ unsigned char X86Subtarget:: ClassifyBlockAddressReference() const { if (isPICStyleGOT()) // 32-bit ELF targets. return X86II::MO_GOTOFF; - + if (isPICStyleStubPIC()) // Darwin/32 in PIC mode. return X86II::MO_PIC_BASE_OFFSET; - + // Direct static reference to label. return X86II::MO_NO_FLAG; } @@ -69,7 +69,7 @@ ClassifyGlobalReference(const GlobalValue *GV, const TargetMachine &TM) const { // Large model never uses stubs. if (TM.getCodeModel() == CodeModel::Large) return X86II::MO_NO_FLAG; - + if (isTargetDarwin()) { // If symbol visibility is hidden, the extra load is not needed if // target is x86-64 or the symbol is definitely defined in the current @@ -87,18 +87,18 @@ ClassifyGlobalReference(const GlobalValue *GV, const TargetMachine &TM) const { return X86II::MO_NO_FLAG; } - + if (isPICStyleGOT()) { // 32-bit ELF targets. // Extra load is needed for all externally visible. if (GV->hasLocalLinkage() || GV->hasHiddenVisibility()) return X86II::MO_GOTOFF; return X86II::MO_GOT; } - + if (isPICStyleStubPIC()) { // Darwin/32 in PIC mode. // Determine whether we have a stub reference and/or whether the reference // is relative to the PIC base or not. - + // If this is a strong reference to a definition, it is definitely not // through a stub. if (!isDecl && !GV->isWeakForLinker()) @@ -108,26 +108,26 @@ ClassifyGlobalReference(const GlobalValue *GV, const TargetMachine &TM) const { // normal $non_lazy_ptr stub because this symbol might be resolved late. if (!GV->hasHiddenVisibility()) // Non-hidden $non_lazy_ptr reference. return X86II::MO_DARWIN_NONLAZY_PIC_BASE; - + // If symbol visibility is hidden, we have a stub for common symbol // references and external declarations. if (isDecl || GV->hasCommonLinkage()) { // Hidden $non_lazy_ptr reference. return X86II::MO_DARWIN_HIDDEN_NONLAZY_PIC_BASE; } - + // Otherwise, no stub. return X86II::MO_PIC_BASE_OFFSET; } - + if (isPICStyleStubNoDynamic()) { // Darwin/32 in -mdynamic-no-pic mode. // Determine whether we have a stub reference. - + // If this is a strong reference to a definition, it is definitely not // through a stub. if (!isDecl && !GV->isWeakForLinker()) return X86II::MO_NO_FLAG; - + // Unless we have a symbol with hidden visibility, we have to go through a // normal $non_lazy_ptr stub because this symbol might be resolved late. if (!GV->hasHiddenVisibility()) // Non-hidden $non_lazy_ptr reference. @@ -136,7 +136,7 @@ ClassifyGlobalReference(const GlobalValue *GV, const TargetMachine &TM) const { // Otherwise, no stub. return X86II::MO_NO_FLAG; } - + // Direct static reference to global. return X86II::MO_NO_FLAG; } @@ -246,8 +246,11 @@ void X86Subtarget::AutoDetectSubtargetFeatures() { } // If it's Nehalem, unaligned memory access is fast. - // FIXME: Nehalem is family 6. Also include Westmere and later processors? - if (Family == 15 && Model == 26) { + // Include Westmere and Sandy Bridge as well. + // FIXME: add later processors. + if (IsIntel && ((Family == 6 && Model == 26) || + (Family == 6 && Model == 44) || + (Family == 6 && Model == 42))) { IsUAMemFast = true; ToggleFeature(X86::FeatureFastUAMem); } @@ -315,7 +318,7 @@ void X86Subtarget::AutoDetectSubtargetFeatures() { } X86Subtarget::X86Subtarget(const std::string &TT, const std::string &CPU, - const std::string &FS, + const std::string &FS, unsigned StackAlignOverride, bool is64Bit) : X86GenSubtargetInfo(TT, CPU, FS) , X86ProcFamily(Others) @@ -397,10 +400,10 @@ X86Subtarget::X86Subtarget(const std::string &TT, const std::string &CPU, } } - if (X86ProcFamily == IntelAtom) { + if (X86ProcFamily == IntelAtom) PostRAScheduler = true; - InstrItins = getInstrItineraryForCPU(CPUName); - } + + InstrItins = getInstrItineraryForCPU(CPUName); // It's important to keep the MCSubtargetInfo feature bits in sync with // target data structure which is shared with MC code emitter, etc. diff --git a/lib/Target/X86/X86Subtarget.h b/lib/Target/X86/X86Subtarget.h index 1af585f..6841c5b 100644 --- a/lib/Target/X86/X86Subtarget.h +++ b/lib/Target/X86/X86Subtarget.h @@ -55,7 +55,7 @@ protected: /// X86ProcFamily - X86 processor family: Intel Atom, and others X86ProcFamilyEnum X86ProcFamily; - + /// PICStyle - Which PIC style to use /// PICStyles::Style PICStyle; @@ -149,7 +149,7 @@ protected: /// TargetTriple - What processor and OS we're targeting. Triple TargetTriple; - + /// Instruction itineraries for scheduling InstrItineraryData InstrItins; diff --git a/lib/Target/X86/X86VZeroUpper.cpp b/lib/Target/X86/X86VZeroUpper.cpp index e4f567f..80b75dc 100644 --- a/lib/Target/X86/X86VZeroUpper.cpp +++ b/lib/Target/X86/X86VZeroUpper.cpp @@ -222,7 +222,7 @@ bool VZeroUpperInserter::processBasicBlock(MachineFunction &MF, DebugLoc dl = I->getDebugLoc(); bool isControlFlow = MI->isCall() || MI->isReturn(); - // Shortcut: don't need to check regular instructions in dirty state. + // Shortcut: don't need to check regular instructions in dirty state. if (!isControlFlow && CurState == ST_DIRTY) continue; diff --git a/lib/Target/XCore/XCoreFrameLowering.cpp b/lib/Target/XCore/XCoreFrameLowering.cpp index 3dbc3b9..a4e5647 100644 --- a/lib/Target/XCore/XCoreFrameLowering.cpp +++ b/lib/Target/XCore/XCoreFrameLowering.cpp @@ -371,8 +371,3 @@ XCoreFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF, false)); } } - -void XCoreFrameLowering:: -processFunctionBeforeFrameFinalized(MachineFunction &MF) const { - -} diff --git a/lib/Target/XCore/XCoreFrameLowering.h b/lib/Target/XCore/XCoreFrameLowering.h index afa2773..db1bbb6 100644 --- a/lib/Target/XCore/XCoreFrameLowering.h +++ b/lib/Target/XCore/XCoreFrameLowering.h @@ -44,8 +44,6 @@ namespace llvm { void processFunctionBeforeCalleeSavedScan(MachineFunction &MF, RegScavenger *RS = NULL) const; - void processFunctionBeforeFrameFinalized(MachineFunction &MF) const; - //! Stack slot size (4 bytes) static int stackSlotSize() { return 4; diff --git a/lib/Transforms/IPO/GlobalOpt.cpp b/lib/Transforms/IPO/GlobalOpt.cpp index 60ce958..6d950d2 100644 --- a/lib/Transforms/IPO/GlobalOpt.cpp +++ b/lib/Transforms/IPO/GlobalOpt.cpp @@ -352,7 +352,8 @@ static bool IsSafeComputationToRemove(Value *V) { return true; if (!V->hasOneUse()) return false; - if (isa<LoadInst>(V) || isa<Argument>(V) || isa<GlobalValue>(V)) + if (isa<LoadInst>(V) || isa<InvokeInst>(V) || isa<Argument>(V) || + isa<GlobalValue>(V)) return false; if (isAllocationFn(V)) return true; @@ -442,12 +443,14 @@ static bool CleanupPointerRootUsers(GlobalVariable *GV) { Dead[i].second->eraseFromParent(); Instruction *I = Dead[i].first; do { + if (isAllocationFn(I)) + break; Instruction *J = dyn_cast<Instruction>(I->getOperand(0)); if (!J) break; I->eraseFromParent(); I = J; - } while (!isAllocationFn(I)); + } while (1); I->eraseFromParent(); } } diff --git a/lib/Transforms/IPO/StripSymbols.cpp b/lib/Transforms/IPO/StripSymbols.cpp index d8e8cf7..80bfc1c 100644 --- a/lib/Transforms/IPO/StripSymbols.cpp +++ b/lib/Transforms/IPO/StripSymbols.cpp @@ -27,6 +27,7 @@ #include "llvm/Instructions.h" #include "llvm/Module.h" #include "llvm/Pass.h" +#include "llvm/TypeFinder.h" #include "llvm/ValueSymbolTable.h" #include "llvm/Transforms/Utils/Local.h" #include "llvm/ADT/DenseMap.h" @@ -175,8 +176,8 @@ static void StripSymtab(ValueSymbolTable &ST, bool PreserveDbgInfo) { // Strip any named types of their names. static void StripTypeNames(Module &M, bool PreserveDbgInfo) { - std::vector<StructType*> StructTypes; - M.findUsedStructTypes(StructTypes); + TypeFinder StructTypes; + StructTypes.run(M, false); for (unsigned i = 0, e = StructTypes.size(); i != e; ++i) { StructType *STy = StructTypes[i]; diff --git a/lib/Transforms/InstCombine/InstCombineCalls.cpp b/lib/Transforms/InstCombine/InstCombineCalls.cpp index c1d9d01..cbe1ca4 100644 --- a/lib/Transforms/InstCombine/InstCombineCalls.cpp +++ b/lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -51,8 +51,8 @@ Instruction *InstCombiner::SimplifyMemTransfer(MemIntrinsic *MI) { // if the size is something we can handle with a single primitive load/store. // A single load+store correctly handles overlapping memory in the memmove // case. - unsigned Size = MemOpLength->getZExtValue(); - if (Size == 0) return MI; // Delete this mem transfer. + uint64_t Size = MemOpLength->getLimitedValue(); + assert(Size && "0-sized memory transfering should be removed already."); if (Size > 8 || (Size&(Size-1))) return 0; // If not 1/2/4/8 bytes, exit. @@ -133,11 +133,9 @@ Instruction *InstCombiner::SimplifyMemSet(MemSetInst *MI) { ConstantInt *FillC = dyn_cast<ConstantInt>(MI->getValue()); if (!LenC || !FillC || !FillC->getType()->isIntegerTy(8)) return 0; - uint64_t Len = LenC->getZExtValue(); + uint64_t Len = LenC->getLimitedValue(); Alignment = MI->getAlignment(); - - // If the length is zero, this is a no-op - if (Len == 0) return MI; // memset(d,c,0,a) -> noop + assert(Len && "0-sized memory setting should be removed already."); // memset(s,c,n) -> store s, c (for n=1,2,4,8) if (Len <= 8 && isPowerOf2_32((uint32_t)Len)) { @@ -795,7 +793,7 @@ Instruction *InstCombiner::tryOptimizeCall(CallInst *CI, const TargetData *TD) { if (CI->getCalledFunction() == 0) return 0; InstCombineFortifiedLibCalls Simplifier(this); - Simplifier.fold(CI, TD); + Simplifier.fold(CI, TD, TLI); return Simplifier.NewInstruction; } diff --git a/lib/Transforms/InstCombine/InstCombineCompares.cpp b/lib/Transforms/InstCombine/InstCombineCompares.cpp index 7076d88..c3fc18c 100644 --- a/lib/Transforms/InstCombine/InstCombineCompares.cpp +++ b/lib/Transforms/InstCombine/InstCombineCompares.cpp @@ -17,6 +17,7 @@ #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/MemoryBuiltins.h" #include "llvm/Target/TargetData.h" +#include "llvm/Target/TargetLibraryInfo.h" #include "llvm/Support/ConstantRange.h" #include "llvm/Support/GetElementPtrTypeIterator.h" #include "llvm/Support/PatternMatch.h" @@ -2824,7 +2825,7 @@ Instruction *InstCombiner::FoldFCmp_IntToFP_Cst(FCmpInst &I, case ICmpInst::ICMP_UGE: // (float)int >= -4.4 --> true // (float)int >= 4.4 --> int > 4 - if (!RHS.isNegative()) + if (RHS.isNegative()) return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getContext())); Pred = ICmpInst::ICMP_UGT; break; @@ -2985,6 +2986,44 @@ Instruction *InstCombiner::visitFCmpInst(FCmpInst &I) { return Res; } break; + case Instruction::Call: { + CallInst *CI = cast<CallInst>(LHSI); + LibFunc::Func Func; + // Various optimization for fabs compared with zero. + if (RHSC->isNullValue() && CI->getCalledFunction() && + TLI->getLibFunc(CI->getCalledFunction()->getName(), Func) && + TLI->has(Func)) { + if (Func == LibFunc::fabs || Func == LibFunc::fabsf || + Func == LibFunc::fabsl) { + switch (I.getPredicate()) { + default: break; + // fabs(x) < 0 --> false + case FCmpInst::FCMP_OLT: + return ReplaceInstUsesWith(I, Builder->getFalse()); + // fabs(x) > 0 --> x != 0 + case FCmpInst::FCMP_OGT: + return new FCmpInst(FCmpInst::FCMP_ONE, CI->getArgOperand(0), + RHSC); + // fabs(x) <= 0 --> x == 0 + case FCmpInst::FCMP_OLE: + return new FCmpInst(FCmpInst::FCMP_OEQ, CI->getArgOperand(0), + RHSC); + // fabs(x) >= 0 --> !isnan(x) + case FCmpInst::FCMP_OGE: + return new FCmpInst(FCmpInst::FCMP_ORD, CI->getArgOperand(0), + RHSC); + // fabs(x) == 0 --> x == 0 + // fabs(x) != 0 --> x != 0 + case FCmpInst::FCMP_OEQ: + case FCmpInst::FCMP_UEQ: + case FCmpInst::FCMP_ONE: + case FCmpInst::FCMP_UNE: + return new FCmpInst(I.getPredicate(), CI->getArgOperand(0), + RHSC); + } + } + } + } } } diff --git a/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp b/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp index c485844..6ecb4c5 100644 --- a/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp +++ b/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp @@ -20,7 +20,154 @@ #include "llvm/ADT/Statistic.h" using namespace llvm; -STATISTIC(NumDeadStore, "Number of dead stores eliminated"); +STATISTIC(NumDeadStore, "Number of dead stores eliminated"); +STATISTIC(NumGlobalCopies, "Number of allocas copied from constant global"); + +/// pointsToConstantGlobal - Return true if V (possibly indirectly) points to +/// some part of a constant global variable. This intentionally only accepts +/// constant expressions because we can't rewrite arbitrary instructions. +static bool pointsToConstantGlobal(Value *V) { + if (GlobalVariable *GV = dyn_cast<GlobalVariable>(V)) + return GV->isConstant(); + if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V)) + if (CE->getOpcode() == Instruction::BitCast || + CE->getOpcode() == Instruction::GetElementPtr) + return pointsToConstantGlobal(CE->getOperand(0)); + return false; +} + +/// isOnlyCopiedFromConstantGlobal - Recursively walk the uses of a (derived) +/// pointer to an alloca. Ignore any reads of the pointer, return false if we +/// see any stores or other unknown uses. If we see pointer arithmetic, keep +/// track of whether it moves the pointer (with IsOffset) but otherwise traverse +/// the uses. If we see a memcpy/memmove that targets an unoffseted pointer to +/// the alloca, and if the source pointer is a pointer to a constant global, we +/// can optimize this. +static bool +isOnlyCopiedFromConstantGlobal(Value *V, MemTransferInst *&TheCopy, + SmallVectorImpl<Instruction *> &ToDelete, + bool IsOffset = false) { + // We track lifetime intrinsics as we encounter them. If we decide to go + // ahead and replace the value with the global, this lets the caller quickly + // eliminate the markers. + + for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI!=E; ++UI) { + User *U = cast<Instruction>(*UI); + + if (LoadInst *LI = dyn_cast<LoadInst>(U)) { + // Ignore non-volatile loads, they are always ok. + if (!LI->isSimple()) return false; + continue; + } + + if (BitCastInst *BCI = dyn_cast<BitCastInst>(U)) { + // If uses of the bitcast are ok, we are ok. + if (!isOnlyCopiedFromConstantGlobal(BCI, TheCopy, ToDelete, IsOffset)) + return false; + continue; + } + if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(U)) { + // If the GEP has all zero indices, it doesn't offset the pointer. If it + // doesn't, it does. + if (!isOnlyCopiedFromConstantGlobal(GEP, TheCopy, ToDelete, + IsOffset || !GEP->hasAllZeroIndices())) + return false; + continue; + } + + if (CallSite CS = U) { + // If this is the function being called then we treat it like a load and + // ignore it. + if (CS.isCallee(UI)) + continue; + + // If this is a readonly/readnone call site, then we know it is just a + // load (but one that potentially returns the value itself), so we can + // ignore it if we know that the value isn't captured. + unsigned ArgNo = CS.getArgumentNo(UI); + if (CS.onlyReadsMemory() && + (CS.getInstruction()->use_empty() || CS.doesNotCapture(ArgNo))) + continue; + + // If this is being passed as a byval argument, the caller is making a + // copy, so it is only a read of the alloca. + if (CS.isByValArgument(ArgNo)) + continue; + } + + // Lifetime intrinsics can be handled by the caller. + if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(U)) { + if (II->getIntrinsicID() == Intrinsic::lifetime_start || + II->getIntrinsicID() == Intrinsic::lifetime_end) { + assert(II->use_empty() && "Lifetime markers have no result to use!"); + ToDelete.push_back(II); + continue; + } + } + + // If this is isn't our memcpy/memmove, reject it as something we can't + // handle. + MemTransferInst *MI = dyn_cast<MemTransferInst>(U); + if (MI == 0) + return false; + + // If the transfer is using the alloca as a source of the transfer, then + // ignore it since it is a load (unless the transfer is volatile). + if (UI.getOperandNo() == 1) { + if (MI->isVolatile()) return false; + continue; + } + + // If we already have seen a copy, reject the second one. + if (TheCopy) return false; + + // If the pointer has been offset from the start of the alloca, we can't + // safely handle this. + if (IsOffset) return false; + + // If the memintrinsic isn't using the alloca as the dest, reject it. + if (UI.getOperandNo() != 0) return false; + + // If the source of the memcpy/move is not a constant global, reject it. + if (!pointsToConstantGlobal(MI->getSource())) + return false; + + // Otherwise, the transform is safe. Remember the copy instruction. + TheCopy = MI; + } + return true; +} + +/// isOnlyCopiedFromConstantGlobal - Return true if the specified alloca is only +/// modified by a copy from a constant global. If we can prove this, we can +/// replace any uses of the alloca with uses of the global directly. +static MemTransferInst * +isOnlyCopiedFromConstantGlobal(AllocaInst *AI, + SmallVectorImpl<Instruction *> &ToDelete) { + MemTransferInst *TheCopy = 0; + if (isOnlyCopiedFromConstantGlobal(AI, TheCopy, ToDelete)) + return TheCopy; + return 0; +} + +/// getPointeeAlignment - Compute the minimum alignment of the value pointed +/// to by the given pointer. +static unsigned getPointeeAlignment(Value *V, const TargetData &TD) { + if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V)) + if (CE->getOpcode() == Instruction::BitCast || + (CE->getOpcode() == Instruction::GetElementPtr && + cast<GEPOperator>(CE)->hasAllZeroIndices())) + return getPointeeAlignment(CE->getOperand(0), TD); + + if (GlobalVariable *GV = dyn_cast<GlobalVariable>(V)) + if (!GV->isDeclaration()) + return TD.getPreferredAlignment(GV); + + if (PointerType *PT = dyn_cast<PointerType>(V->getType())) + return TD.getABITypeAlignment(PT->getElementType()); + + return 0; +} Instruction *InstCombiner::visitAllocaInst(AllocaInst &AI) { // Ensure that the alloca array size argument has type intptr_t, so that @@ -113,6 +260,29 @@ Instruction *InstCombiner::visitAllocaInst(AllocaInst &AI) { } } + // Check to see if this allocation is only modified by a memcpy/memmove from + // a constant global whose alignment is equal to or exceeds that of the + // allocation. If this is the case, we can change all users to use + // the constant global instead. This is commonly produced by the CFE by + // constructs like "void foo() { int A[] = {1,2,3,4,5,6,7,8,9...}; }" if 'A' + // is only subsequently read. + SmallVector<Instruction *, 4> ToDelete; + if (MemTransferInst *Copy = isOnlyCopiedFromConstantGlobal(&AI, ToDelete)) { + if (AI.getAlignment() <= getPointeeAlignment(Copy->getSource(), *TD)) { + DEBUG(dbgs() << "Found alloca equal to global: " << AI << '\n'); + DEBUG(dbgs() << " memcpy = " << *Copy << '\n'); + for (unsigned i = 0, e = ToDelete.size(); i != e; ++i) + EraseInstFromFunction(*ToDelete[i]); + Constant *TheSrc = cast<Constant>(Copy->getSource()); + Instruction *NewI + = ReplaceInstUsesWith(AI, ConstantExpr::getBitCast(TheSrc, + AI.getType())); + EraseInstFromFunction(*Copy); + ++NumGlobalCopies; + return NewI; + } + } + // At last, use the generic allocation site handler to aggressively remove // unused allocas. return visitAllocSite(AI); diff --git a/lib/Transforms/InstCombine/InstCombineSelect.cpp b/lib/Transforms/InstCombine/InstCombineSelect.cpp index eb9945b..291e800 100644 --- a/lib/Transforms/InstCombine/InstCombineSelect.cpp +++ b/lib/Transforms/InstCombine/InstCombineSelect.cpp @@ -881,12 +881,16 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) { if (SelectInst *TrueSI = dyn_cast<SelectInst>(TrueVal)) { if (TrueSI->getCondition() == CondVal) { + if (SI.getTrueValue() == TrueSI->getTrueValue()) + return 0; SI.setOperand(1, TrueSI->getTrueValue()); return &SI; } } if (SelectInst *FalseSI = dyn_cast<SelectInst>(FalseVal)) { if (FalseSI->getCondition() == CondVal) { + if (SI.getFalseValue() == FalseSI->getFalseValue()) + return 0; SI.setOperand(2, FalseSI->getFalseValue()); return &SI; } @@ -899,5 +903,16 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) { return &SI; } + if (VectorType* VecTy = dyn_cast<VectorType>(SI.getType())) { + unsigned VWidth = VecTy->getNumElements(); + APInt UndefElts(VWidth, 0); + APInt AllOnesEltMask(APInt::getAllOnesValue(VWidth)); + if (Value *V = SimplifyDemandedVectorElts(&SI, AllOnesEltMask, UndefElts)) { + if (V != &SI) + return ReplaceInstUsesWith(SI, V); + return &SI; + } + } + return 0; } diff --git a/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp b/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp index 125c74a..54be8ed 100644 --- a/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp +++ b/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp @@ -989,6 +989,29 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts, } break; } + case Instruction::Select: { + APInt LeftDemanded(DemandedElts), RightDemanded(DemandedElts); + if (ConstantVector* CV = dyn_cast<ConstantVector>(I->getOperand(0))) { + for (unsigned i = 0; i < VWidth; i++) { + if (CV->getAggregateElement(i)->isNullValue()) + LeftDemanded.clearBit(i); + else + RightDemanded.clearBit(i); + } + } + + TmpV = SimplifyDemandedVectorElts(I->getOperand(1), LeftDemanded, + UndefElts, Depth+1); + if (TmpV) { I->setOperand(1, TmpV); MadeChange = true; } + + TmpV = SimplifyDemandedVectorElts(I->getOperand(2), RightDemanded, + UndefElts2, Depth+1); + if (TmpV) { I->setOperand(2, TmpV); MadeChange = true; } + + // Output elements are undefined if both are undefined. + UndefElts &= UndefElts2; + break; + } case Instruction::BitCast: { // Vector->vector casts only. VectorType *VTy = dyn_cast<VectorType>(I->getOperand(0)->getType()); @@ -1074,6 +1097,12 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts, // like undef&0. The result is known zero, not undef. UndefElts &= UndefElts2; break; + case Instruction::FPTrunc: + case Instruction::FPExt: + TmpV = SimplifyDemandedVectorElts(I->getOperand(0), DemandedElts, + UndefElts, Depth+1); + if (TmpV) { I->setOperand(0, TmpV); MadeChange = true; } + break; case Instruction::Call: { IntrinsicInst *II = dyn_cast<IntrinsicInst>(I); diff --git a/lib/Transforms/Instrumentation/AddressSanitizer.cpp b/lib/Transforms/Instrumentation/AddressSanitizer.cpp index 3368026..06f4d2f 100644 --- a/lib/Transforms/Instrumentation/AddressSanitizer.cpp +++ b/lib/Transforms/Instrumentation/AddressSanitizer.cpp @@ -61,6 +61,8 @@ static const int kAsanCtorAndCtorPriority = 1; static const char *kAsanReportErrorTemplate = "__asan_report_"; static const char *kAsanRegisterGlobalsName = "__asan_register_globals"; static const char *kAsanUnregisterGlobalsName = "__asan_unregister_globals"; +static const char *kAsanPoisonGlobalsName = "__asan_before_dynamic_init"; +static const char *kAsanUnpoisonGlobalsName = "__asan_after_dynamic_init"; static const char *kAsanInitName = "__asan_init"; static const char *kAsanHandleNoReturnName = "__asan_handle_no_return"; static const char *kAsanMappingOffsetName = "__asan_mapping_offset"; @@ -86,8 +88,8 @@ static cl::opt<bool> ClInstrumentWrites("asan-instrument-writes", static cl::opt<bool> ClInstrumentAtomics("asan-instrument-atomics", cl::desc("instrument atomic instructions (rmw, cmpxchg)"), cl::Hidden, cl::init(true)); -static cl::opt<bool> ClMergeCallbacks("asan-merge-callbacks", - cl::desc("merge __asan_report_ callbacks to create fewer BBs"), +static cl::opt<bool> ClAlwaysSlowPath("asan-always-slow-path", + cl::desc("use instrumentation with slow path for all accesses"), cl::Hidden, cl::init(false)); // This flag limits the number of instructions to be instrumented // in any given BB. Normally, this should be set to unlimited (INT_MAX), @@ -106,6 +108,8 @@ static cl::opt<bool> ClUseAfterReturn("asan-use-after-return", // This flag may need to be replaced with -f[no]asan-globals. static cl::opt<bool> ClGlobals("asan-globals", cl::desc("Handle global objects"), cl::Hidden, cl::init(true)); +static cl::opt<bool> ClInitializers("asan-initialization-order", + cl::desc("Handle C++ initializer order"), cl::Hidden, cl::init(false)); static cl::opt<bool> ClMemIntrin("asan-memintrin", cl::desc("Handle memset/memcpy/memmove"), cl::Hidden, cl::init(true)); // This flag may need to be replaced with -fasan-blacklist. @@ -145,24 +149,11 @@ static cl::opt<int> ClDebugMax("asan-debug-max", cl::desc("Debug man inst"), namespace { -/// When the crash callbacks are merged, they receive some amount of arguments -/// that are merged in a PHI node. This struct represents arguments from one -/// call site. -struct CrashArg { - Value *Arg1; - Value *Arg2; -}; - /// An object of this type is created while instrumenting every function. struct AsanFunctionContext { - AsanFunctionContext(Function &Function) : F(Function), CrashBlock() { } + AsanFunctionContext(Function &Function) : F(Function) { } Function &F; - // These are initially zero. If we require at least one call to - // __asan_report_{read,write}{1,2,4,8,16}, an appropriate BB is created. - BasicBlock *CrashBlock[2][kNumberOfAccessSizes]; - typedef SmallVector<CrashArg, 8> CrashArgsVec; - CrashArgsVec CrashArgs[2][kNumberOfAccessSizes]; }; /// AddressSanitizer: instrument the code in module to find memory bugs. @@ -175,7 +166,7 @@ struct AddressSanitizer : public ModulePass { Value *Addr, uint32_t TypeSize, bool IsWrite); Value *createSlowPathCmp(IRBuilder<> &IRB, Value *AddrLong, Value *ShadowValue, uint32_t TypeSize); - Instruction *generateCrashCode(BasicBlock *BB, Value *Addr, Value *PC, + Instruction *generateCrashCode(Instruction *InsertBefore, Value *Addr, bool IsWrite, size_t AccessSizeIndex); bool instrumentMemIntrinsic(AsanFunctionContext &AFC, MemIntrinsic *MI); void instrumentMemIntrinsicParam(AsanFunctionContext &AFC, @@ -184,6 +175,8 @@ struct AddressSanitizer : public ModulePass { Instruction *InsertBefore, bool IsWrite); Value *memToShadow(Value *Shadow, IRBuilder<> &IRB); bool handleFunction(Module &M, Function &F); + void createInitializerPoisonCalls(Module &M, + Value *FirstAddr, Value *LastAddr); bool maybeInsertAsanInitAtFunctionEntry(Function &F); bool poisonStackInFunction(Module &M, Function &F); virtual bool runOnModule(Module &M); @@ -191,7 +184,6 @@ struct AddressSanitizer : public ModulePass { static char ID; // Pass identification, replacement for typeid private: - uint64_t getAllocaSizeInBytes(AllocaInst *AI) { Type *Ty = AI->getAllocatedType(); uint64_t SizeInBytes = TD->getTypeAllocSize(Ty); @@ -207,9 +199,12 @@ struct AddressSanitizer : public ModulePass { } Function *checkInterfaceFunction(Constant *FuncOrBitcast); + bool ShouldInstrumentGlobal(GlobalVariable *G); void PoisonStack(const ArrayRef<AllocaInst*> &AllocaVec, IRBuilder<> IRB, Value *ShadowBase, bool DoPoison); bool LooksLikeCodeInBug11395(Instruction *I); + void FindDynamicInitializers(Module &M); + bool HasDynamicInitializer(GlobalVariable *G); LLVMContext *C; TargetData *TD; @@ -226,6 +221,7 @@ struct AddressSanitizer : public ModulePass { // This array is indexed by AccessIsWrite and log2(AccessSize). Function *AsanErrorCallback[2][kNumberOfAccessSizes]; InlineAsm *EmptyAsm; + SmallSet<GlobalValue*, 32> DynamicallyInitializedGlobals; }; } // namespace @@ -267,24 +263,24 @@ static GlobalVariable *createPrivateGlobalForString(Module &M, StringRef Str) { // ThenBlock // Tail // -// If ThenBlock is zero, a new block is created and its terminator is returned. -// Otherwize 0 is returned. -static BranchInst *splitBlockAndInsertIfThen(Value *Cmp, - BasicBlock *ThenBlock = 0) { +// ThenBlock block is created and its terminator is returned. +// If Unreachable, ThenBlock is terminated with UnreachableInst, otherwise +// it is terminated with BranchInst to Tail. +static TerminatorInst *splitBlockAndInsertIfThen(Value *Cmp, bool Unreachable) { Instruction *SplitBefore = cast<Instruction>(Cmp)->getNextNode(); BasicBlock *Head = SplitBefore->getParent(); BasicBlock *Tail = Head->splitBasicBlock(SplitBefore); TerminatorInst *HeadOldTerm = Head->getTerminator(); - BranchInst *CheckTerm = 0; - if (!ThenBlock) { - LLVMContext &C = Head->getParent()->getParent()->getContext(); - ThenBlock = BasicBlock::Create(C, "", Head->getParent(), Tail); + LLVMContext &C = Head->getParent()->getParent()->getContext(); + BasicBlock *ThenBlock = BasicBlock::Create(C, "", Head->getParent(), Tail); + TerminatorInst *CheckTerm; + if (Unreachable) + CheckTerm = new UnreachableInst(C, ThenBlock); + else CheckTerm = BranchInst::Create(Tail, ThenBlock); - } BranchInst *HeadNewTerm = BranchInst::Create(/*ifTrue*/ThenBlock, /*ifFalse*/Tail, Cmp); ReplaceInstWithInst(HeadOldTerm, HeadNewTerm); - return CheckTerm; } @@ -336,7 +332,7 @@ bool AddressSanitizer::instrumentMemIntrinsic(AsanFunctionContext &AFC, Value *Cmp = IRB.CreateICmpNE(Length, Constant::getNullValue(Length->getType())); - InsertBefore = splitBlockAndInsertIfThen(Cmp); + InsertBefore = splitBlockAndInsertIfThen(Cmp, false); } instrumentMemIntrinsicParam(AFC, MI, Dst, Length, InsertBefore, true); @@ -371,14 +367,50 @@ static Value *isInterestingMemoryAccess(Instruction *I, bool *IsWrite) { return NULL; } +void AddressSanitizer::FindDynamicInitializers(Module& M) { + // Clang generates metadata identifying all dynamically initialized globals. + NamedMDNode *DynamicGlobals = + M.getNamedMetadata("llvm.asan.dynamically_initialized_globals"); + if (!DynamicGlobals) + return; + for (int i = 0, n = DynamicGlobals->getNumOperands(); i < n; ++i) { + MDNode *MDN = DynamicGlobals->getOperand(i); + assert(MDN->getNumOperands() == 1); + Value *VG = MDN->getOperand(0); + // The optimizer may optimize away a global entirely, in which case we + // cannot instrument access to it. + if (!VG) + continue; + + GlobalVariable *G = cast<GlobalVariable>(VG); + DynamicallyInitializedGlobals.insert(G); + } +} +// Returns true if a global variable is initialized dynamically in this TU. +bool AddressSanitizer::HasDynamicInitializer(GlobalVariable *G) { + return DynamicallyInitializedGlobals.count(G); +} + void AddressSanitizer::instrumentMop(AsanFunctionContext &AFC, Instruction *I) { bool IsWrite; Value *Addr = isInterestingMemoryAccess(I, &IsWrite); assert(Addr); - if (ClOpt && ClOptGlobals && isa<GlobalVariable>(Addr)) { - // We are accessing a global scalar variable. Nothing to catch here. - return; + if (ClOpt && ClOptGlobals) { + if (GlobalVariable *G = dyn_cast<GlobalVariable>(Addr)) { + // If initialization order checking is disabled, a simple access to a + // dynamically initialized global is always valid. + if (!ClInitializers) + return; + // If a global variable does not have dynamic initialization we don't + // have to instrument it. However, if a global has external linkage, we + // assume it has dynamic initialization, as it may have an initializer + // in a different TU. + if (G->getLinkage() != GlobalVariable::ExternalLinkage && + !HasDynamicInitializer(G)) + return; + } } + Type *OrigPtrTy = Addr->getType(); Type *OrigTy = cast<PointerType>(OrigPtrTy)->getElementType(); @@ -407,15 +439,11 @@ Function *AddressSanitizer::checkInterfaceFunction(Constant *FuncOrBitcast) { } Instruction *AddressSanitizer::generateCrashCode( - BasicBlock *BB, Value *Addr, Value *PC, + Instruction *InsertBefore, Value *Addr, bool IsWrite, size_t AccessSizeIndex) { - IRBuilder<> IRB(BB->getFirstNonPHI()); - CallInst *Call; - if (PC) - Call = IRB.CreateCall2(AsanErrorCallback[IsWrite][AccessSizeIndex], - Addr, PC); - else - Call = IRB.CreateCall(AsanErrorCallback[IsWrite][AccessSizeIndex], Addr); + IRBuilder<> IRB(InsertBefore); + CallInst *Call = IRB.CreateCall(AsanErrorCallback[IsWrite][AccessSizeIndex], + Addr); // We don't do Call->setDoesNotReturn() because the BB already has // UnreachableInst at the end. // This EmptyAsm is required to avoid callback merge. @@ -436,7 +464,7 @@ Value *AddressSanitizer::createSlowPathCmp(IRBuilder<> &IRB, Value *AddrLong, LastAccessedByte, ConstantInt::get(IntptrTy, TypeSize / 8 - 1)); // (uint8_t) ((Addr & (Granularity-1)) + size - 1) LastAccessedByte = IRB.CreateIntCast( - LastAccessedByte, IRB.getInt8Ty(), false); + LastAccessedByte, ShadowValue->getType(), false); // ((uint8_t) ((Addr & (Granularity-1)) + size - 1)) >= ShadowValue return IRB.CreateICmpSGE(LastAccessedByte, ShadowValue); } @@ -456,112 +484,129 @@ void AddressSanitizer::instrumentAddress(AsanFunctionContext &AFC, IRB.CreateIntToPtr(ShadowPtr, ShadowPtrTy)); Value *Cmp = IRB.CreateICmpNE(ShadowValue, CmpVal); - - BasicBlock *CrashBlock = 0; - if (ClMergeCallbacks) { - size_t AccessSizeIndex = TypeSizeToSizeIndex(TypeSize); - BasicBlock **Cached = &AFC.CrashBlock[IsWrite][AccessSizeIndex]; - if (!*Cached) { - std::string BBName("crash_bb-"); - BBName += (IsWrite ? "w-" : "r-") + itostr(1 << AccessSizeIndex); - BasicBlock *BB = BasicBlock::Create(*C, BBName, &AFC.F); - new UnreachableInst(*C, BB); - *Cached = BB; - } - CrashBlock = *Cached; - // We need to pass the PC as the second parameter to __asan_report_*. - // There are few problems: - // - Some architectures (e.g. x86_32) don't have a cheap way to get the PC. - // - LLVM doesn't have the appropriate intrinsic. - // For now, put a random number into the PC, just to allow experiments. - Value *PC = ConstantInt::get(IntptrTy, rand()); - CrashArg Arg = {AddrLong, PC}; - AFC.CrashArgs[IsWrite][AccessSizeIndex].push_back(Arg); - } else { - CrashBlock = BasicBlock::Create(*C, "crash_bb", &AFC.F); - new UnreachableInst(*C, CrashBlock); - size_t AccessSizeIndex = TypeSizeToSizeIndex(TypeSize); - Instruction *Crash = - generateCrashCode(CrashBlock, AddrLong, 0, IsWrite, AccessSizeIndex); - Crash->setDebugLoc(OrigIns->getDebugLoc()); - } - + size_t AccessSizeIndex = TypeSizeToSizeIndex(TypeSize); size_t Granularity = 1 << MappingScale; - if (TypeSize < 8 * Granularity) { - BranchInst *CheckTerm = splitBlockAndInsertIfThen(Cmp); - assert(CheckTerm->isUnconditional()); + TerminatorInst *CrashTerm = 0; + + if (ClAlwaysSlowPath || (TypeSize < 8 * Granularity)) { + TerminatorInst *CheckTerm = splitBlockAndInsertIfThen(Cmp, false); + assert(dyn_cast<BranchInst>(CheckTerm)->isUnconditional()); BasicBlock *NextBB = CheckTerm->getSuccessor(0); IRB.SetInsertPoint(CheckTerm); Value *Cmp2 = createSlowPathCmp(IRB, AddrLong, ShadowValue, TypeSize); + BasicBlock *CrashBlock = BasicBlock::Create(*C, "", &AFC.F, NextBB); + CrashTerm = new UnreachableInst(*C, CrashBlock); BranchInst *NewTerm = BranchInst::Create(CrashBlock, NextBB, Cmp2); ReplaceInstWithInst(CheckTerm, NewTerm); } else { - splitBlockAndInsertIfThen(Cmp, CrashBlock); + CrashTerm = splitBlockAndInsertIfThen(Cmp, true); + } + + Instruction *Crash = + generateCrashCode(CrashTerm, AddrLong, IsWrite, AccessSizeIndex); + Crash->setDebugLoc(OrigIns->getDebugLoc()); +} + +void AddressSanitizer::createInitializerPoisonCalls(Module &M, + Value *FirstAddr, + Value *LastAddr) { + // We do all of our poisoning and unpoisoning within _GLOBAL__I_a. + Function *GlobalInit = M.getFunction("_GLOBAL__I_a"); + // If that function is not present, this TU contains no globals, or they have + // all been optimized away + if (!GlobalInit) + return; + + // Set up the arguments to our poison/unpoison functions. + IRBuilder<> IRB(GlobalInit->begin()->getFirstInsertionPt()); + + // Declare our poisoning and unpoisoning functions. + Function *AsanPoisonGlobals = checkInterfaceFunction(M.getOrInsertFunction( + kAsanPoisonGlobalsName, IRB.getVoidTy(), IntptrTy, IntptrTy, NULL)); + AsanPoisonGlobals->setLinkage(Function::ExternalLinkage); + Function *AsanUnpoisonGlobals = checkInterfaceFunction(M.getOrInsertFunction( + kAsanUnpoisonGlobalsName, IRB.getVoidTy(), NULL)); + AsanUnpoisonGlobals->setLinkage(Function::ExternalLinkage); + + // Add a call to poison all external globals before the given function starts. + IRB.CreateCall2(AsanPoisonGlobals, FirstAddr, LastAddr); + + // Add calls to unpoison all globals before each return instruction. + for (Function::iterator I = GlobalInit->begin(), E = GlobalInit->end(); + I != E; ++I) { + if (ReturnInst *RI = dyn_cast<ReturnInst>(I->getTerminator())) { + CallInst::Create(AsanUnpoisonGlobals, "", RI); + } } } +bool AddressSanitizer::ShouldInstrumentGlobal(GlobalVariable *G) { + Type *Ty = cast<PointerType>(G->getType())->getElementType(); + DEBUG(dbgs() << "GLOBAL: " << *G); + + if (!Ty->isSized()) return false; + if (!G->hasInitializer()) return false; + // Touch only those globals that will not be defined in other modules. + // Don't handle ODR type linkages since other modules may be built w/o asan. + if (G->getLinkage() != GlobalVariable::ExternalLinkage && + G->getLinkage() != GlobalVariable::PrivateLinkage && + G->getLinkage() != GlobalVariable::InternalLinkage) + return false; + // Two problems with thread-locals: + // - The address of the main thread's copy can't be computed at link-time. + // - Need to poison all copies, not just the main thread's one. + if (G->isThreadLocal()) + return false; + // For now, just ignore this Alloca if the alignment is large. + if (G->getAlignment() > RedzoneSize) return false; + + // Ignore all the globals with the names starting with "\01L_OBJC_". + // Many of those are put into the .cstring section. The linker compresses + // that section by removing the spare \0s after the string terminator, so + // our redzones get broken. + if ((G->getName().find("\01L_OBJC_") == 0) || + (G->getName().find("\01l_OBJC_") == 0)) { + DEBUG(dbgs() << "Ignoring \\01L_OBJC_* global: " << *G); + return false; + } + + if (G->hasSection()) { + StringRef Section(G->getSection()); + // Ignore the globals from the __OBJC section. The ObjC runtime assumes + // those conform to /usr/lib/objc/runtime.h, so we can't add redzones to + // them. + if ((Section.find("__OBJC,") == 0) || + (Section.find("__DATA, __objc_") == 0)) { + DEBUG(dbgs() << "Ignoring ObjC runtime global: " << *G); + return false; + } + // See http://code.google.com/p/address-sanitizer/issues/detail?id=32 + // Constant CFString instances are compiled in the following way: + // -- the string buffer is emitted into + // __TEXT,__cstring,cstring_literals + // -- the constant NSConstantString structure referencing that buffer + // is placed into __DATA,__cfstring + // Therefore there's no point in placing redzones into __DATA,__cfstring. + // Moreover, it causes the linker to crash on OS X 10.7 + if (Section.find("__DATA,__cfstring") == 0) { + DEBUG(dbgs() << "Ignoring CFString: " << *G); + return false; + } + } + + return true; +} + // This function replaces all global variables with new variables that have // trailing redzones. It also creates a function that poisons // redzones and inserts this function into llvm.global_ctors. bool AddressSanitizer::insertGlobalRedzones(Module &M) { SmallVector<GlobalVariable *, 16> GlobalsToChange; - for (Module::GlobalListType::iterator G = M.getGlobalList().begin(), - E = M.getGlobalList().end(); G != E; ++G) { - Type *Ty = cast<PointerType>(G->getType())->getElementType(); - DEBUG(dbgs() << "GLOBAL: " << *G); - - if (!Ty->isSized()) continue; - if (!G->hasInitializer()) continue; - // Touch only those globals that will not be defined in other modules. - // Don't handle ODR type linkages since other modules may be built w/o asan. - if (G->getLinkage() != GlobalVariable::ExternalLinkage && - G->getLinkage() != GlobalVariable::PrivateLinkage && - G->getLinkage() != GlobalVariable::InternalLinkage) - continue; - // Two problems with thread-locals: - // - The address of the main thread's copy can't be computed at link-time. - // - Need to poison all copies, not just the main thread's one. - if (G->isThreadLocal()) - continue; - // For now, just ignore this Alloca if the alignment is large. - if (G->getAlignment() > RedzoneSize) continue; - - // Ignore all the globals with the names starting with "\01L_OBJC_". - // Many of those are put into the .cstring section. The linker compresses - // that section by removing the spare \0s after the string terminator, so - // our redzones get broken. - if ((G->getName().find("\01L_OBJC_") == 0) || - (G->getName().find("\01l_OBJC_") == 0)) { - DEBUG(dbgs() << "Ignoring \\01L_OBJC_* global: " << *G); - continue; - } - - if (G->hasSection()) { - StringRef Section(G->getSection()); - // Ignore the globals from the __OBJC section. The ObjC runtime assumes - // those conform to /usr/lib/objc/runtime.h, so we can't add redzones to - // them. - if ((Section.find("__OBJC,") == 0) || - (Section.find("__DATA, __objc_") == 0)) { - DEBUG(dbgs() << "Ignoring ObjC runtime global: " << *G); - continue; - } - // See http://code.google.com/p/address-sanitizer/issues/detail?id=32 - // Constant CFString instances are compiled in the following way: - // -- the string buffer is emitted into - // __TEXT,__cstring,cstring_literals - // -- the constant NSConstantString structure referencing that buffer - // is placed into __DATA,__cfstring - // Therefore there's no point in placing redzones into __DATA,__cfstring. - // Moreover, it causes the linker to crash on OS X 10.7 - if (Section.find("__DATA,__cfstring") == 0) { - DEBUG(dbgs() << "Ignoring CFString: " << *G); - continue; - } - } - - GlobalsToChange.push_back(G); + for (Module::GlobalListType::iterator G = M.global_begin(), + E = M.global_end(); G != E; ++G) { + if (ShouldInstrumentGlobal(G)) + GlobalsToChange.push_back(G); } size_t n = GlobalsToChange.size(); @@ -572,13 +617,22 @@ bool AddressSanitizer::insertGlobalRedzones(Module &M) { // size_t size; // size_t size_with_redzone; // const char *name; + // size_t has_dynamic_init; // We initialize an array of such structures and pass it to a run-time call. StructType *GlobalStructTy = StructType::get(IntptrTy, IntptrTy, - IntptrTy, IntptrTy, NULL); - SmallVector<Constant *, 16> Initializers(n); + IntptrTy, IntptrTy, + IntptrTy, NULL); + SmallVector<Constant *, 16> Initializers(n), DynamicInit; IRBuilder<> IRB(CtorInsertBefore); + if (ClInitializers) + FindDynamicInitializers(M); + + // The addresses of the first and last dynamically initialized globals in + // this TU. Used in initialization order checking. + Value *FirstDynamic = 0, *LastDynamic = 0; + for (size_t i = 0; i < n; i++) { GlobalVariable *G = GlobalsToChange[i]; PointerType *PtrTy = cast<PointerType>(G->getType()); @@ -587,6 +641,8 @@ bool AddressSanitizer::insertGlobalRedzones(Module &M) { uint64_t RightRedzoneSize = RedzoneSize + (RedzoneSize - (SizeInBytes % RedzoneSize)); Type *RightRedZoneTy = ArrayType::get(IRB.getInt8Ty(), RightRedzoneSize); + // Determine whether this global should be poisoned in initialization. + bool GlobalHasDynamicInitializer = HasDynamicInitializer(G); StructType *NewTy = StructType::get(Ty, RightRedZoneTy, NULL); Constant *NewInitializer = ConstantStruct::get( @@ -621,7 +677,16 @@ bool AddressSanitizer::insertGlobalRedzones(Module &M) { ConstantInt::get(IntptrTy, SizeInBytes), ConstantInt::get(IntptrTy, SizeInBytes + RightRedzoneSize), ConstantExpr::getPointerCast(Name, IntptrTy), + ConstantInt::get(IntptrTy, GlobalHasDynamicInitializer), NULL); + + // Populate the first and last globals declared in this TU. + if (ClInitializers && GlobalHasDynamicInitializer) { + LastDynamic = ConstantExpr::getPointerCast(NewGlobal, IntptrTy); + if (FirstDynamic == 0) + FirstDynamic = LastDynamic; + } + DEBUG(dbgs() << "NEW GLOBAL:\n" << *NewGlobal); } @@ -630,8 +695,13 @@ bool AddressSanitizer::insertGlobalRedzones(Module &M) { M, ArrayOfGlobalStructTy, false, GlobalVariable::PrivateLinkage, ConstantArray::get(ArrayOfGlobalStructTy, Initializers), ""); + // Create calls for poisoning before initializers run and unpoisoning after. + if (ClInitializers && FirstDynamic && LastDynamic) + createInitializerPoisonCalls(M, FirstDynamic, LastDynamic); + Function *AsanRegisterGlobals = checkInterfaceFunction(M.getOrInsertFunction( - kAsanRegisterGlobalsName, IRB.getVoidTy(), IntptrTy, IntptrTy, NULL)); + kAsanRegisterGlobalsName, IRB.getVoidTy(), + IntptrTy, IntptrTy, NULL)); AsanRegisterGlobals->setLinkage(Function::ExternalLinkage); IRB.CreateCall2(AsanRegisterGlobals, @@ -694,12 +764,7 @@ bool AddressSanitizer::runOnModule(Module &M) { std::string FunctionName = std::string(kAsanReportErrorTemplate) + (AccessIsWrite ? "store" : "load") + itostr(1 << AccessSizeIndex); // If we are merging crash callbacks, they have two parameters. - if (ClMergeCallbacks) - AsanErrorCallback[AccessIsWrite][AccessSizeIndex] = cast<Function>( - M.getOrInsertFunction(FunctionName, IRB.getVoidTy(), IntptrTy, - IntptrTy, NULL)); - else - AsanErrorCallback[AccessIsWrite][AccessSizeIndex] = cast<Function>( + AsanErrorCallback[AccessIsWrite][AccessSizeIndex] = cast<Function>( M.getOrInsertFunction(FunctionName, IRB.getVoidTy(), IntptrTy, NULL)); } } @@ -845,33 +910,6 @@ bool AddressSanitizer::handleFunction(Module &M, Function &F) { NumInstrumented++; } - // Create PHI nodes and crash callbacks if we are merging crash callbacks. - if (NumInstrumented) { - for (size_t IsWrite = 0; IsWrite <= 1; IsWrite++) { - for (size_t AccessSizeIndex = 0; AccessSizeIndex < kNumberOfAccessSizes; - AccessSizeIndex++) { - BasicBlock *BB = AFC.CrashBlock[IsWrite][AccessSizeIndex]; - if (!BB) continue; - assert(ClMergeCallbacks); - AsanFunctionContext::CrashArgsVec &Args = - AFC.CrashArgs[IsWrite][AccessSizeIndex]; - IRBuilder<> IRB(BB->getFirstNonPHI()); - size_t n = Args.size(); - PHINode *PN1 = IRB.CreatePHI(IntptrTy, n); - PHINode *PN2 = IRB.CreatePHI(IntptrTy, n); - // We need to match crash parameters and the predecessors. - for (pred_iterator PI = pred_begin(BB), PE = pred_end(BB); - PI != PE; ++PI) { - n--; - PN1->addIncoming(Args[n].Arg1, *PI); - PN2->addIncoming(Args[n].Arg2, *PI); - } - assert(n == 0); - generateCrashCode(BB, PN1, PN2, IsWrite, AccessSizeIndex); - } - } - } - DEBUG(dbgs() << F); bool ChangedStack = poisonStackInFunction(M, F); diff --git a/lib/Transforms/Instrumentation/MaximumSpanningTree.h b/lib/Transforms/Instrumentation/MaximumSpanningTree.h index f76c77e..a4bb5a6 100644 --- a/lib/Transforms/Instrumentation/MaximumSpanningTree.h +++ b/lib/Transforms/Instrumentation/MaximumSpanningTree.h @@ -26,30 +26,6 @@ namespace llvm { /// The type parameter T determines the type of the nodes of the graph. template <typename T> class MaximumSpanningTree { - - // A comparing class for comparing weighted edges. - template <typename CT> - struct EdgeWeightCompare { - bool operator()(typename MaximumSpanningTree<CT>::EdgeWeight X, - typename MaximumSpanningTree<CT>::EdgeWeight Y) const { - if (X.second > Y.second) return true; - if (X.second < Y.second) return false; - if (const BasicBlock *BBX = dyn_cast<BasicBlock>(X.first.first)) { - if (const BasicBlock *BBY = dyn_cast<BasicBlock>(Y.first.first)) { - if (BBX->size() > BBY->size()) return true; - if (BBX->size() < BBY->size()) return false; - } - } - if (const BasicBlock *BBX = dyn_cast<BasicBlock>(X.first.second)) { - if (const BasicBlock *BBY = dyn_cast<BasicBlock>(Y.first.second)) { - if (BBX->size() > BBY->size()) return true; - if (BBX->size() < BBY->size()) return false; - } - } - return false; - } - }; - public: typedef std::pair<const T*, const T*> Edge; typedef std::pair<Edge, double> EdgeWeight; @@ -59,6 +35,33 @@ namespace llvm { MaxSpanTree MST; + private: + // A comparing class for comparing weighted edges. + struct EdgeWeightCompare { + static bool getBlockSize(const T *X) { + const BasicBlock *BB = dyn_cast_or_null<BasicBlock>(X); + return BB ? BB->size() : 0; + } + + bool operator()(EdgeWeight X, EdgeWeight Y) const { + if (X.second > Y.second) return true; + if (X.second < Y.second) return false; + + // Equal edge weights: break ties by comparing block sizes. + size_t XSizeA = getBlockSize(X.first.first); + size_t YSizeA = getBlockSize(Y.first.first); + if (XSizeA > YSizeA) return true; + if (XSizeA < YSizeA) return false; + + size_t XSizeB = getBlockSize(X.first.second); + size_t YSizeB = getBlockSize(Y.first.second); + if (XSizeB > YSizeB) return true; + if (XSizeB < YSizeB) return false; + + return false; + } + }; + public: static char ID; // Class identification, replacement for typeinfo @@ -66,7 +69,7 @@ namespace llvm { /// spanning tree. MaximumSpanningTree(EdgeWeights &EdgeVector) { - std::stable_sort(EdgeVector.begin(), EdgeVector.end(), EdgeWeightCompare<T>()); + std::stable_sort(EdgeVector.begin(), EdgeVector.end(), EdgeWeightCompare()); // Create spanning tree, Forest contains a special data structure // that makes checking if two nodes are already in a common (sub-)tree diff --git a/lib/Transforms/Scalar/CodeGenPrepare.cpp b/lib/Transforms/Scalar/CodeGenPrepare.cpp index 277c4d5..a8deda8 100644 --- a/lib/Transforms/Scalar/CodeGenPrepare.cpp +++ b/lib/Transforms/Scalar/CodeGenPrepare.cpp @@ -66,11 +66,6 @@ static cl::opt<bool> DisableBranchOpts( "disable-cgp-branch-opts", cl::Hidden, cl::init(false), cl::desc("Disable branch optimizations in CodeGenPrepare")); -// FIXME: Remove this abomination once all of the tests pass without it! -static cl::opt<bool> DisableDeleteDeadBlocks( - "disable-cgp-delete-dead-blocks", cl::Hidden, cl::init(false), - cl::desc("Disable deleting dead blocks in CodeGenPrepare")); - static cl::opt<bool> DisableSelectToBranch( "disable-cgp-select2branch", cl::Hidden, cl::init(false), cl::desc("Disable select to branch conversion.")); @@ -116,6 +111,7 @@ namespace { } private: + bool EliminateFallThrough(Function &F); bool EliminateMostlyEmptyBlocks(Function &F); bool CanMergeBlocks(const BasicBlock *BB, const BasicBlock *DestBB) const; void EliminateMostlyEmptyBlock(BasicBlock *BB); @@ -187,10 +183,14 @@ bool CodeGenPrepare::runOnFunction(Function &F) { WorkList.insert(*II); } - if (!DisableDeleteDeadBlocks) - for (SmallPtrSet<BasicBlock*, 8>::iterator - I = WorkList.begin(), E = WorkList.end(); I != E; ++I) - DeleteDeadBlock(*I); + for (SmallPtrSet<BasicBlock*, 8>::iterator + I = WorkList.begin(), E = WorkList.end(); I != E; ++I) + DeleteDeadBlock(*I); + + // Merge pairs of basic blocks with unconditional branches, connected by + // a single edge. + if (EverMadeChange || MadeChange) + MadeChange |= EliminateFallThrough(F); if (MadeChange) ModifiedDT = true; @@ -203,6 +203,39 @@ bool CodeGenPrepare::runOnFunction(Function &F) { return EverMadeChange; } +/// EliminateFallThrough - Merge basic blocks which are connected +/// by a single edge, where one of the basic blocks has a single successor +/// pointing to the other basic block, which has a single predecessor. +bool CodeGenPrepare::EliminateFallThrough(Function &F) { + bool Changed = false; + // Scan all of the blocks in the function, except for the entry block. + for (Function::iterator I = ++F.begin(), E = F.end(); I != E; ) { + BasicBlock *BB = I++; + // If the destination block has a single pred, then this is a trivial + // edge, just collapse it. + BasicBlock *SinglePred = BB->getSinglePredecessor(); + + if (!SinglePred || SinglePred == BB) continue; + + BranchInst *Term = dyn_cast<BranchInst>(SinglePred->getTerminator()); + if (Term && !Term->isConditional()) { + Changed = true; + DEBUG(dbgs() << "To merge:\n"<< *SinglePred << "\n\n\n"); + // Remember if SinglePred was the entry block of the function. + // If so, we will need to move BB back to the entry position. + bool isEntry = SinglePred == &SinglePred->getParent()->getEntryBlock(); + MergeBasicBlockIntoOnlyPred(BB, this); + + if (isEntry && BB != &BB->getParent()->getEntryBlock()) + BB->moveBefore(&BB->getParent()->getEntryBlock()); + + // We have erased a block. Update the iterator. + I = BB; + } + } + return Changed; +} + /// EliminateMostlyEmptyBlocks - eliminate blocks that contain only PHI nodes, /// debug info directives, and an unconditional branch. Passes before isel /// (e.g. LSR/loopsimplify) often split edges in ways that are non-optimal for @@ -610,7 +643,7 @@ bool CodeGenPrepare::OptimizeCallInst(CallInst *CI) { // that have the default "don't know" as the objectsize. Anything else // should be left alone. CodeGenPrepareFortifiedLibCalls Simplifier; - return Simplifier.fold(CI, TD); + return Simplifier.fold(CI, TD, TLInfo); } /// DupRetToEnableTailCallOpts - Look for opportunities to duplicate return @@ -645,10 +678,18 @@ bool CodeGenPrepare::DupRetToEnableTailCallOpts(ReturnInst *RI) { if (!TLI) return false; + PHINode *PN = 0; + BitCastInst *BCI = 0; Value *V = RI->getReturnValue(); - PHINode *PN = V ? dyn_cast<PHINode>(V) : NULL; - if (V && !PN) - return false; + if (V) { + BCI = dyn_cast<BitCastInst>(V); + if (BCI) + V = BCI->getOperand(0); + + PN = dyn_cast<PHINode>(V); + if (!PN) + return false; + } BasicBlock *BB = RI->getParent(); if (PN && PN->getParent() != BB) @@ -666,6 +707,9 @@ bool CodeGenPrepare::DupRetToEnableTailCallOpts(ReturnInst *RI) { if (PN) { BasicBlock::iterator BI = BB->begin(); do { ++BI; } while (isa<DbgInfoIntrinsic>(BI)); + if (&*BI == BCI) + // Also skip over the bitcast. + ++BI; if (&*BI != RI) return false; } else { diff --git a/lib/Transforms/Scalar/DeadStoreElimination.cpp b/lib/Transforms/Scalar/DeadStoreElimination.cpp index 5eff0e5..8b1283f 100644 --- a/lib/Transforms/Scalar/DeadStoreElimination.cpp +++ b/lib/Transforms/Scalar/DeadStoreElimination.cpp @@ -378,7 +378,7 @@ static OverwriteResult isOverwrite(const AliasAnalysis::Location &Later, // // We have to be careful here as *Off is signed while *.Size is unsigned. if (EarlierOff >= LaterOff && - Later.Size > Earlier.Size && + Later.Size >= Earlier.Size && uint64_t(EarlierOff - LaterOff) + Earlier.Size <= Later.Size) return OverwriteComplete; @@ -740,12 +740,19 @@ bool DSE::handleEndBlock(BasicBlock &BB) { continue; } - if (isa<AllocaInst>(BBI) || isAllocLikeFn(BBI)) { + if (isa<AllocaInst>(BBI)) { + // Remove allocas from the list of dead stack objects; there can't be + // any references before the definition. DeadStackObjects.remove(BBI); continue; } if (CallSite CS = cast<Value>(BBI)) { + // Remove allocation function calls from the list of dead stack objects; + // there can't be any references before the definition. + if (isAllocLikeFn(BBI)) + DeadStackObjects.remove(BBI); + // If this call does not access memory, it can't be loading any of our // pointers. if (AA->doesNotAccessMemory(CS)) @@ -771,7 +778,7 @@ bool DSE::handleEndBlock(BasicBlock &BB) { // If all of the allocas were clobbered by the call then we're not going // to find anything else to process. if (DeadStackObjects.empty()) - return MadeChange; + break; continue; } diff --git a/lib/Transforms/Scalar/GVN.cpp b/lib/Transforms/Scalar/GVN.cpp index 140864d..4822fd0 100644 --- a/lib/Transforms/Scalar/GVN.cpp +++ b/lib/Transforms/Scalar/GVN.cpp @@ -512,7 +512,7 @@ namespace { /// have that value number. Use findLeader to query it. struct LeaderTableEntry { Value *Val; - BasicBlock *BB; + const BasicBlock *BB; LeaderTableEntry *Next; }; DenseMap<uint32_t, LeaderTableEntry> LeaderTable; @@ -542,7 +542,7 @@ namespace { private: /// addToLeaderTable - Push a new Value to the LeaderTable onto the list for /// its value number. - void addToLeaderTable(uint32_t N, Value *V, BasicBlock *BB) { + void addToLeaderTable(uint32_t N, Value *V, const BasicBlock *BB) { LeaderTableEntry &Curr = LeaderTable[N]; if (!Curr.Val) { Curr.Val = V; @@ -608,13 +608,13 @@ namespace { void dump(DenseMap<uint32_t, Value*> &d); bool iterateOnFunction(Function &F); bool performPRE(Function &F); - Value *findLeader(BasicBlock *BB, uint32_t num); + Value *findLeader(const BasicBlock *BB, uint32_t num); void cleanupGlobalSets(); void verifyRemoved(const Instruction *I) const; bool splitCriticalEdges(); unsigned replaceAllDominatedUsesWith(Value *From, Value *To, - BasicBlock *Root); - bool propagateEquality(Value *LHS, Value *RHS, BasicBlock *Root); + const BasicBlockEdge &Root); + bool propagateEquality(Value *LHS, Value *RHS, const BasicBlockEdge &Root); }; char GVN::ID = 0; @@ -1977,7 +1977,7 @@ bool GVN::processLoad(LoadInst *L) { // and then scan the list to find one whose block dominates the block in // question. This is fast because dominator tree queries consist of only // a few comparisons of DFS numbers. -Value *GVN::findLeader(BasicBlock *BB, uint32_t num) { +Value *GVN::findLeader(const BasicBlock *BB, uint32_t num) { LeaderTableEntry Vals = LeaderTable[num]; if (!Vals.Val) return 0; @@ -2004,22 +2004,13 @@ Value *GVN::findLeader(BasicBlock *BB, uint32_t num) { /// use is dominated by the given basic block. Returns the number of uses that /// were replaced. unsigned GVN::replaceAllDominatedUsesWith(Value *From, Value *To, - BasicBlock *Root) { + const BasicBlockEdge &Root) { unsigned Count = 0; for (Value::use_iterator UI = From->use_begin(), UE = From->use_end(); UI != UE; ) { Use &U = (UI++).getUse(); - // If From occurs as a phi node operand then the use implicitly lives in the - // corresponding incoming block. Otherwise it is the block containing the - // user that must be dominated by Root. - BasicBlock *UsingBlock; - if (PHINode *PN = dyn_cast<PHINode>(U.getUser())) - UsingBlock = PN->getIncomingBlock(U); - else - UsingBlock = cast<Instruction>(U.getUser())->getParent(); - - if (DT->dominates(Root, UsingBlock)) { + if (DT->dominates(Root, U)) { U.set(To); ++Count; } @@ -2027,13 +2018,34 @@ unsigned GVN::replaceAllDominatedUsesWith(Value *From, Value *To, return Count; } +/// isOnlyReachableViaThisEdge - There is an edge from 'Src' to 'Dst'. Return +/// true if every path from the entry block to 'Dst' passes via this edge. In +/// particular 'Dst' must not be reachable via another edge from 'Src'. +static bool isOnlyReachableViaThisEdge(const BasicBlockEdge &E, + DominatorTree *DT) { + // While in theory it is interesting to consider the case in which Dst has + // more than one predecessor, because Dst might be part of a loop which is + // only reachable from Src, in practice it is pointless since at the time + // GVN runs all such loops have preheaders, which means that Dst will have + // been changed to have only one predecessor, namely Src. + const BasicBlock *Pred = E.getEnd()->getSinglePredecessor(); + const BasicBlock *Src = E.getStart(); + assert((!Pred || Pred == Src) && "No edge between these basic blocks!"); + (void)Src; + return Pred != 0; +} + /// propagateEquality - The given values are known to be equal in every block /// dominated by 'Root'. Exploit this, for example by replacing 'LHS' with /// 'RHS' everywhere in the scope. Returns whether a change was made. -bool GVN::propagateEquality(Value *LHS, Value *RHS, BasicBlock *Root) { +bool GVN::propagateEquality(Value *LHS, Value *RHS, + const BasicBlockEdge &Root) { SmallVector<std::pair<Value*, Value*>, 4> Worklist; Worklist.push_back(std::make_pair(LHS, RHS)); bool Changed = false; + // For speed, compute a conservative fast approximation to + // DT->dominates(Root, Root.getEnd()); + bool RootDominatesEnd = isOnlyReachableViaThisEdge(Root, DT); while (!Worklist.empty()) { std::pair<Value*, Value*> Item = Worklist.pop_back_val(); @@ -2065,9 +2077,6 @@ bool GVN::propagateEquality(Value *LHS, Value *RHS, BasicBlock *Root) { LVN = RVN; } } - assert((!isa<Instruction>(RHS) || - DT->properlyDominates(cast<Instruction>(RHS)->getParent(), Root)) && - "Instruction doesn't dominate scope!"); // If value numbering later sees that an instruction in the scope is equal // to 'LHS' then ensure it will be turned into 'RHS'. In order to preserve @@ -2076,8 +2085,10 @@ bool GVN::propagateEquality(Value *LHS, Value *RHS, BasicBlock *Root) { // if RHS is an instruction (if an instruction in the scope is morphed into // LHS then it will be turned into RHS by the next GVN iteration anyway, so // using the leader table is about compiling faster, not optimizing better). - if (!isa<Instruction>(RHS)) - addToLeaderTable(LVN, RHS, Root); + // The leader table only tracks basic blocks, not edges. Only add to if we + // have the simple case where the edge dominates the end. + if (RootDominatesEnd && !isa<Instruction>(RHS)) + addToLeaderTable(LVN, RHS, Root.getEnd()); // Replace all occurrences of 'LHS' with 'RHS' everywhere in the scope. As // LHS always has at least one use that is not dominated by Root, this will @@ -2136,7 +2147,7 @@ bool GVN::propagateEquality(Value *LHS, Value *RHS, BasicBlock *Root) { // If the number we were assigned was brand new then there is no point in // looking for an instruction realizing it: there cannot be one! if (Num < NextNum) { - Value *NotCmp = findLeader(Root, Num); + Value *NotCmp = findLeader(Root.getEnd(), Num); if (NotCmp && isa<Instruction>(NotCmp)) { unsigned NumReplacements = replaceAllDominatedUsesWith(NotCmp, NotVal, Root); @@ -2146,7 +2157,10 @@ bool GVN::propagateEquality(Value *LHS, Value *RHS, BasicBlock *Root) { } // Ensure that any instruction in scope that gets the "A < B" value number // is replaced with false. - addToLeaderTable(Num, NotVal, Root); + // The leader table only tracks basic blocks, not edges. Only add to if we + // have the simple case where the edge dominates the end. + if (RootDominatesEnd) + addToLeaderTable(Num, NotVal, Root.getEnd()); continue; } @@ -2155,22 +2169,6 @@ bool GVN::propagateEquality(Value *LHS, Value *RHS, BasicBlock *Root) { return Changed; } -/// isOnlyReachableViaThisEdge - There is an edge from 'Src' to 'Dst'. Return -/// true if every path from the entry block to 'Dst' passes via this edge. In -/// particular 'Dst' must not be reachable via another edge from 'Src'. -static bool isOnlyReachableViaThisEdge(BasicBlock *Src, BasicBlock *Dst, - DominatorTree *DT) { - // While in theory it is interesting to consider the case in which Dst has - // more than one predecessor, because Dst might be part of a loop which is - // only reachable from Src, in practice it is pointless since at the time - // GVN runs all such loops have preheaders, which means that Dst will have - // been changed to have only one predecessor, namely Src. - BasicBlock *Pred = Dst->getSinglePredecessor(); - assert((!Pred || Pred == Src) && "No edge between these basic blocks!"); - (void)Src; - return Pred != 0; -} - /// processInstruction - When calculating availability, handle an instruction /// by inserting it into the appropriate sets bool GVN::processInstruction(Instruction *I) { @@ -2210,18 +2208,20 @@ bool GVN::processInstruction(Instruction *I) { BasicBlock *TrueSucc = BI->getSuccessor(0); BasicBlock *FalseSucc = BI->getSuccessor(1); + // Avoid multiple edges early. + if (TrueSucc == FalseSucc) + return false; + BasicBlock *Parent = BI->getParent(); bool Changed = false; - if (isOnlyReachableViaThisEdge(Parent, TrueSucc, DT)) - Changed |= propagateEquality(BranchCond, - ConstantInt::getTrue(TrueSucc->getContext()), - TrueSucc); + Value *TrueVal = ConstantInt::getTrue(TrueSucc->getContext()); + BasicBlockEdge TrueE(Parent, TrueSucc); + Changed |= propagateEquality(BranchCond, TrueVal, TrueE); - if (isOnlyReachableViaThisEdge(Parent, FalseSucc, DT)) - Changed |= propagateEquality(BranchCond, - ConstantInt::getFalse(FalseSucc->getContext()), - FalseSucc); + Value *FalseVal = ConstantInt::getFalse(FalseSucc->getContext()); + BasicBlockEdge FalseE(Parent, FalseSucc); + Changed |= propagateEquality(BranchCond, FalseVal, FalseE); return Changed; } @@ -2234,8 +2234,9 @@ bool GVN::processInstruction(Instruction *I) { for (SwitchInst::CaseIt i = SI->case_begin(), e = SI->case_end(); i != e; ++i) { BasicBlock *Dst = i.getCaseSuccessor(); - if (isOnlyReachableViaThisEdge(Parent, Dst, DT)) - Changed |= propagateEquality(SwitchCond, i.getCaseValue(), Dst); + BasicBlockEdge E(Parent, Dst); + if (E.isSingleEdge()) + Changed |= propagateEquality(SwitchCond, i.getCaseValue(), E); } return Changed; } diff --git a/lib/Transforms/Scalar/LICM.cpp b/lib/Transforms/Scalar/LICM.cpp index 582948e..0192e92 100644 --- a/lib/Transforms/Scalar/LICM.cpp +++ b/lib/Transforms/Scalar/LICM.cpp @@ -175,7 +175,9 @@ namespace { bool canSinkOrHoistInst(Instruction &I); bool isNotUsedInLoop(Instruction &I); - void PromoteAliasSet(AliasSet &AS); + void PromoteAliasSet(AliasSet &AS, + SmallVectorImpl<BasicBlock*> &ExitBlocks, + SmallVectorImpl<Instruction*> &InsertPts); }; } @@ -256,10 +258,13 @@ bool LICM::runOnLoop(Loop *L, LPPassManager &LPM) { // Now that all loop invariants have been removed from the loop, promote any // memory references to scalars that we can. if (!DisablePromotion && Preheader && L->hasDedicatedExits()) { + SmallVector<BasicBlock *, 8> ExitBlocks; + SmallVector<Instruction *, 8> InsertPts; + // Loop over all of the alias sets in the tracker object. for (AliasSetTracker::iterator I = CurAST->begin(), E = CurAST->end(); I != E; ++I) - PromoteAliasSet(*I); + PromoteAliasSet(*I, ExitBlocks, InsertPts); } // Clear out loops state information for the next iteration @@ -631,6 +636,7 @@ namespace { Value *SomePtr; // Designated pointer to store to. SmallPtrSet<Value*, 4> &PointerMustAliases; SmallVectorImpl<BasicBlock*> &LoopExitBlocks; + SmallVectorImpl<Instruction*> &LoopInsertPts; AliasSetTracker &AST; DebugLoc DL; int Alignment; @@ -638,11 +644,12 @@ namespace { LoopPromoter(Value *SP, const SmallVectorImpl<Instruction*> &Insts, SSAUpdater &S, SmallPtrSet<Value*, 4> &PMA, - SmallVectorImpl<BasicBlock*> &LEB, AliasSetTracker &ast, - DebugLoc dl, int alignment) + SmallVectorImpl<BasicBlock*> &LEB, + SmallVectorImpl<Instruction*> &LIP, + AliasSetTracker &ast, DebugLoc dl, int alignment) : LoadAndStorePromoter(Insts, S), SomePtr(SP), - PointerMustAliases(PMA), LoopExitBlocks(LEB), AST(ast), DL(dl), - Alignment(alignment) {} + PointerMustAliases(PMA), LoopExitBlocks(LEB), LoopInsertPts(LIP), + AST(ast), DL(dl), Alignment(alignment) {} virtual bool isInstInList(Instruction *I, const SmallVectorImpl<Instruction*> &) const { @@ -662,7 +669,7 @@ namespace { for (unsigned i = 0, e = LoopExitBlocks.size(); i != e; ++i) { BasicBlock *ExitBlock = LoopExitBlocks[i]; Value *LiveInValue = SSA.GetValueInMiddleOfBlock(ExitBlock); - Instruction *InsertPos = ExitBlock->getFirstInsertionPt(); + Instruction *InsertPos = LoopInsertPts[i]; StoreInst *NewSI = new StoreInst(LiveInValue, SomePtr, InsertPos); NewSI->setAlignment(Alignment); NewSI->setDebugLoc(DL); @@ -684,7 +691,9 @@ namespace { /// looping over the stores in the loop, looking for stores to Must pointers /// which are loop invariant. /// -void LICM::PromoteAliasSet(AliasSet &AS) { +void LICM::PromoteAliasSet(AliasSet &AS, + SmallVectorImpl<BasicBlock*> &ExitBlocks, + SmallVectorImpl<Instruction*> &InsertPts) { // We can promote this alias set if it has a store, if it is a "Must" alias // set, if the pointer is loop invariant, and if we are not eliminating any // volatile loads or stores. @@ -794,14 +803,20 @@ void LICM::PromoteAliasSet(AliasSet &AS) { // location is better than none. DebugLoc DL = LoopUses[0]->getDebugLoc(); - SmallVector<BasicBlock*, 8> ExitBlocks; - CurLoop->getUniqueExitBlocks(ExitBlocks); + // Figure out the loop exits and their insertion points, if this is the + // first promotion. + if (ExitBlocks.empty()) { + CurLoop->getUniqueExitBlocks(ExitBlocks); + InsertPts.resize(ExitBlocks.size()); + for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i) + InsertPts[i] = ExitBlocks[i]->getFirstInsertionPt(); + } // We use the SSAUpdater interface to insert phi nodes as required. SmallVector<PHINode*, 16> NewPHIs; SSAUpdater SSA(&NewPHIs); LoopPromoter Promoter(SomePtr, LoopUses, SSA, PointerMustAliases, ExitBlocks, - *CurAST, DL, Alignment); + InsertPts, *CurAST, DL, Alignment); // Set up the preheader to have a definition of the value. It is the live-out // value from the preheader that uses in the loop will use. diff --git a/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/lib/Transforms/Scalar/LoopStrengthReduce.cpp index b14a713..0ae7a51 100644 --- a/lib/Transforms/Scalar/LoopStrengthReduce.cpp +++ b/lib/Transforms/Scalar/LoopStrengthReduce.cpp @@ -738,7 +738,8 @@ DeleteTriviallyDeadInstructions(SmallVectorImpl<WeakVH> &DeadInsts) { bool Changed = false; while (!DeadInsts.empty()) { - Instruction *I = dyn_cast_or_null<Instruction>(&*DeadInsts.pop_back_val()); + Value *V = DeadInsts.pop_back_val(); + Instruction *I = dyn_cast_or_null<Instruction>(V); if (I == 0 || !isInstructionTriviallyDead(I)) continue; diff --git a/lib/Transforms/Scalar/Reassociate.cpp b/lib/Transforms/Scalar/Reassociate.cpp index ffcf97c..09687d8 100644 --- a/lib/Transforms/Scalar/Reassociate.cpp +++ b/lib/Transforms/Scalar/Reassociate.cpp @@ -543,6 +543,7 @@ static bool LinearizeExprTree(BinaryOperator *I, // Update the number of paths to the leaf. IncorporateWeight(It->second, Weight, Opcode); +#if 0 // TODO: Re-enable once PR13021 is fixed. // The leaf already has one use from inside the expression. As we want // exactly one such use, drop this new use of the leaf. assert(!Op->hasOneUse() && "Only one use, but we got here twice!"); @@ -559,6 +560,7 @@ static bool LinearizeExprTree(BinaryOperator *I, Leaves.erase(It); continue; } +#endif // If we still have uses that are not accounted for by the expression // then it is not safe to modify the value. diff --git a/lib/Transforms/Scalar/ScalarReplAggregates.cpp b/lib/Transforms/Scalar/ScalarReplAggregates.cpp index ec835b1..8090fdf 100644 --- a/lib/Transforms/Scalar/ScalarReplAggregates.cpp +++ b/lib/Transforms/Scalar/ScalarReplAggregates.cpp @@ -56,7 +56,6 @@ STATISTIC(NumReplaced, "Number of allocas broken up"); STATISTIC(NumPromoted, "Number of allocas promoted"); STATISTIC(NumAdjusted, "Number of scalar allocas adjusted to allow promotion"); STATISTIC(NumConverted, "Number of aggregates converted to scalar"); -STATISTIC(NumGlobals, "Number of allocas copied from constant global"); namespace { struct SROA : public FunctionPass { @@ -183,9 +182,6 @@ namespace { void RewriteLoadUserOfWholeAlloca(LoadInst *LI, AllocaInst *AI, SmallVector<AllocaInst*, 32> &NewElts); bool ShouldAttemptScalarRepl(AllocaInst *AI); - - static MemTransferInst *isOnlyCopiedFromConstantGlobal( - AllocaInst *AI, SmallVector<Instruction*, 4> &ToDelete); }; // SROA_DT - SROA that uses DominatorTree. @@ -612,11 +608,16 @@ void ConvertToScalarInfo::ConvertUsesToScalar(Value *Ptr, AllocaInst *NewAI, if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(User)) { // Compute the offset that this GEP adds to the pointer. SmallVector<Value*, 8> Indices(GEP->op_begin()+1, GEP->op_end()); - if (!GEP->hasAllConstantIndices()) - NonConstantIdx = Indices.pop_back_val(); + Value* GEPNonConstantIdx = 0; + if (!GEP->hasAllConstantIndices()) { + assert(!NonConstantIdx && + "Dynamic GEP reading from dynamic GEP unsupported"); + GEPNonConstantIdx = Indices.pop_back_val(); + } else + GEPNonConstantIdx = NonConstantIdx; uint64_t GEPOffset = TD.getIndexedOffset(GEP->getPointerOperandType(), Indices); - ConvertUsesToScalar(GEP, NewAI, Offset+GEPOffset*8, NonConstantIdx); + ConvertUsesToScalar(GEP, NewAI, Offset+GEPOffset*8, GEPNonConstantIdx); GEP->eraseFromParent(); continue; } @@ -1460,26 +1461,6 @@ bool SROA::ShouldAttemptScalarRepl(AllocaInst *AI) { return false; } -/// getPointeeAlignment - Compute the minimum alignment of the value pointed -/// to by the given pointer. -static unsigned getPointeeAlignment(Value *V, const TargetData &TD) { - if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V)) - if (CE->getOpcode() == Instruction::BitCast || - (CE->getOpcode() == Instruction::GetElementPtr && - cast<GEPOperator>(CE)->hasAllZeroIndices())) - return getPointeeAlignment(CE->getOperand(0), TD); - - if (GlobalVariable *GV = dyn_cast<GlobalVariable>(V)) - if (!GV->isDeclaration()) - return TD.getPreferredAlignment(GV); - - if (PointerType *PT = dyn_cast<PointerType>(V->getType())) - return TD.getABITypeAlignment(PT->getElementType()); - - return 0; -} - - // performScalarRepl - This algorithm is a simple worklist driven algorithm, // which runs on all of the alloca instructions in the function, removing them // if they are only used by getelementptr instructions. @@ -1511,29 +1492,6 @@ bool SROA::performScalarRepl(Function &F) { if (AI->isArrayAllocation() || !AI->getAllocatedType()->isSized()) continue; - // Check to see if this allocation is only modified by a memcpy/memmove from - // a constant global whose alignment is equal to or exceeds that of the - // allocation. If this is the case, we can change all users to use - // the constant global instead. This is commonly produced by the CFE by - // constructs like "void foo() { int A[] = {1,2,3,4,5,6,7,8,9...}; }" if 'A' - // is only subsequently read. - SmallVector<Instruction *, 4> ToDelete; - if (MemTransferInst *Copy = isOnlyCopiedFromConstantGlobal(AI, ToDelete)) { - if (AI->getAlignment() <= getPointeeAlignment(Copy->getSource(), *TD)) { - DEBUG(dbgs() << "Found alloca equal to global: " << *AI << '\n'); - DEBUG(dbgs() << " memcpy = " << *Copy << '\n'); - for (unsigned i = 0, e = ToDelete.size(); i != e; ++i) - ToDelete[i]->eraseFromParent(); - Constant *TheSrc = cast<Constant>(Copy->getSource()); - AI->replaceAllUsesWith(ConstantExpr::getBitCast(TheSrc, AI->getType())); - Copy->eraseFromParent(); // Don't mutate the global. - AI->eraseFromParent(); - ++NumGlobals; - Changed = true; - continue; - } - } - // Check to see if we can perform the core SROA transformation. We cannot // transform the allocation instruction if it is an array allocation // (allocations OF arrays are ok though), and an allocation of a scalar @@ -2651,134 +2609,3 @@ bool SROA::isSafeAllocaToScalarRepl(AllocaInst *AI) { return true; } - - - -/// PointsToConstantGlobal - Return true if V (possibly indirectly) points to -/// some part of a constant global variable. This intentionally only accepts -/// constant expressions because we don't can't rewrite arbitrary instructions. -static bool PointsToConstantGlobal(Value *V) { - if (GlobalVariable *GV = dyn_cast<GlobalVariable>(V)) - return GV->isConstant(); - if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V)) - if (CE->getOpcode() == Instruction::BitCast || - CE->getOpcode() == Instruction::GetElementPtr) - return PointsToConstantGlobal(CE->getOperand(0)); - return false; -} - -/// isOnlyCopiedFromConstantGlobal - Recursively walk the uses of a (derived) -/// pointer to an alloca. Ignore any reads of the pointer, return false if we -/// see any stores or other unknown uses. If we see pointer arithmetic, keep -/// track of whether it moves the pointer (with isOffset) but otherwise traverse -/// the uses. If we see a memcpy/memmove that targets an unoffseted pointer to -/// the alloca, and if the source pointer is a pointer to a constant global, we -/// can optimize this. -static bool -isOnlyCopiedFromConstantGlobal(Value *V, MemTransferInst *&TheCopy, - bool isOffset, - SmallVector<Instruction *, 4> &LifetimeMarkers) { - // We track lifetime intrinsics as we encounter them. If we decide to go - // ahead and replace the value with the global, this lets the caller quickly - // eliminate the markers. - - for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI!=E; ++UI) { - User *U = cast<Instruction>(*UI); - - if (LoadInst *LI = dyn_cast<LoadInst>(U)) { - // Ignore non-volatile loads, they are always ok. - if (!LI->isSimple()) return false; - continue; - } - - if (BitCastInst *BCI = dyn_cast<BitCastInst>(U)) { - // If uses of the bitcast are ok, we are ok. - if (!isOnlyCopiedFromConstantGlobal(BCI, TheCopy, isOffset, - LifetimeMarkers)) - return false; - continue; - } - if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(U)) { - // If the GEP has all zero indices, it doesn't offset the pointer. If it - // doesn't, it does. - if (!isOnlyCopiedFromConstantGlobal(GEP, TheCopy, - isOffset || !GEP->hasAllZeroIndices(), - LifetimeMarkers)) - return false; - continue; - } - - if (CallSite CS = U) { - // If this is the function being called then we treat it like a load and - // ignore it. - if (CS.isCallee(UI)) - continue; - - // If this is a readonly/readnone call site, then we know it is just a - // load (but one that potentially returns the value itself), so we can - // ignore it if we know that the value isn't captured. - unsigned ArgNo = CS.getArgumentNo(UI); - if (CS.onlyReadsMemory() && - (CS.getInstruction()->use_empty() || CS.doesNotCapture(ArgNo))) - continue; - - // If this is being passed as a byval argument, the caller is making a - // copy, so it is only a read of the alloca. - if (CS.isByValArgument(ArgNo)) - continue; - } - - // Lifetime intrinsics can be handled by the caller. - if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(U)) { - if (II->getIntrinsicID() == Intrinsic::lifetime_start || - II->getIntrinsicID() == Intrinsic::lifetime_end) { - assert(II->use_empty() && "Lifetime markers have no result to use!"); - LifetimeMarkers.push_back(II); - continue; - } - } - - // If this is isn't our memcpy/memmove, reject it as something we can't - // handle. - MemTransferInst *MI = dyn_cast<MemTransferInst>(U); - if (MI == 0) - return false; - - // If the transfer is using the alloca as a source of the transfer, then - // ignore it since it is a load (unless the transfer is volatile). - if (UI.getOperandNo() == 1) { - if (MI->isVolatile()) return false; - continue; - } - - // If we already have seen a copy, reject the second one. - if (TheCopy) return false; - - // If the pointer has been offset from the start of the alloca, we can't - // safely handle this. - if (isOffset) return false; - - // If the memintrinsic isn't using the alloca as the dest, reject it. - if (UI.getOperandNo() != 0) return false; - - // If the source of the memcpy/move is not a constant global, reject it. - if (!PointsToConstantGlobal(MI->getSource())) - return false; - - // Otherwise, the transform is safe. Remember the copy instruction. - TheCopy = MI; - } - return true; -} - -/// isOnlyCopiedFromConstantGlobal - Return true if the specified alloca is only -/// modified by a copy from a constant global. If we can prove this, we can -/// replace any uses of the alloca with uses of the global directly. -MemTransferInst * -SROA::isOnlyCopiedFromConstantGlobal(AllocaInst *AI, - SmallVector<Instruction*, 4> &ToDelete) { - MemTransferInst *TheCopy = 0; - if (::isOnlyCopiedFromConstantGlobal(AI, TheCopy, false, ToDelete)) - return TheCopy; - return 0; -} diff --git a/lib/Transforms/Scalar/SimplifyLibCalls.cpp b/lib/Transforms/Scalar/SimplifyLibCalls.cpp index a1a8a41..3904419 100644 --- a/lib/Transforms/Scalar/SimplifyLibCalls.cpp +++ b/lib/Transforms/Scalar/SimplifyLibCalls.cpp @@ -157,14 +157,15 @@ struct StrCatOpt : public LibCallOptimization { // These optimizations require TargetData. if (!TD) return 0; - EmitStrLenMemCpy(Src, Dst, Len, B); - return Dst; + return EmitStrLenMemCpy(Src, Dst, Len, B); } - void EmitStrLenMemCpy(Value *Src, Value *Dst, uint64_t Len, IRBuilder<> &B) { + Value *EmitStrLenMemCpy(Value *Src, Value *Dst, uint64_t Len, IRBuilder<> &B) { // We need to find the end of the destination string. That's where the // memory is to be moved to. We just generate a call to strlen. - Value *DstLen = EmitStrLen(Dst, B, TD); + Value *DstLen = EmitStrLen(Dst, B, TD, TLI); + if (!DstLen) + return 0; // Now that we have the destination's length, we must index into the // destination's pointer to get the actual memcpy destination (end of @@ -175,6 +176,7 @@ struct StrCatOpt : public LibCallOptimization { // concatenation for us. Make a memcpy to copy the nul byte with align = 1. B.CreateMemCpy(CpyDst, Src, ConstantInt::get(TD->getIntPtrType(*Context), Len + 1), 1); + return Dst; } }; @@ -221,8 +223,7 @@ struct StrNCatOpt : public StrCatOpt { // strncat(x, s, c) -> strcat(x, s) // s is constant so the strcat can be optimized further - EmitStrLenMemCpy(Src, Dst, SrcLen, B); - return Dst; + return EmitStrLenMemCpy(Src, Dst, SrcLen, B); } }; @@ -254,7 +255,7 @@ struct StrChrOpt : public LibCallOptimization { return EmitMemChr(SrcStr, CI->getArgOperand(1), // include nul. ConstantInt::get(TD->getIntPtrType(*Context), Len), - B, TD); + B, TD, TLI); } // Otherwise, the character is a constant, see if the first argument is @@ -299,7 +300,7 @@ struct StrRChrOpt : public LibCallOptimization { if (!getConstantStringInfo(SrcStr, Str)) { // strrchr(s, 0) -> strchr(s, 0) if (TD && CharC->isZero()) - return EmitStrChr(SrcStr, '\0', B, TD); + return EmitStrChr(SrcStr, '\0', B, TD, TLI); return 0; } @@ -355,7 +356,7 @@ struct StrCmpOpt : public LibCallOptimization { return EmitMemCmp(Str1P, Str2P, ConstantInt::get(TD->getIntPtrType(*Context), - std::min(Len1, Len2)), B, TD); + std::min(Len1, Len2)), B, TD, TLI); } return 0; @@ -391,7 +392,7 @@ struct StrNCmpOpt : public LibCallOptimization { return ConstantInt::get(CI->getType(), 0); if (TD && Length == 1) // strncmp(x,y,1) -> memcmp(x,y,1) - return EmitMemCmp(Str1P, Str2P, CI->getArgOperand(2), B, TD); + return EmitMemCmp(Str1P, Str2P, CI->getArgOperand(2), B, TD, TLI); StringRef Str1, Str2; bool HasStr1 = getConstantStringInfo(Str1P, Str1); @@ -447,11 +448,10 @@ struct StrCpyOpt : public LibCallOptimization { // We have enough information to now generate the memcpy call to do the // concatenation for us. Make a memcpy to copy the nul byte with align = 1. - if (OptChkCall) - EmitMemCpyChk(Dst, Src, - ConstantInt::get(TD->getIntPtrType(*Context), Len), - CI->getArgOperand(2), B, TD); - else + if (!OptChkCall || + !EmitMemCpyChk(Dst, Src, + ConstantInt::get(TD->getIntPtrType(*Context), Len), + CI->getArgOperand(2), B, TD, TLI)) B.CreateMemCpy(Dst, Src, ConstantInt::get(TD->getIntPtrType(*Context), Len), 1); return Dst; @@ -480,8 +480,10 @@ struct StpCpyOpt: public LibCallOptimization { if (!TD) return 0; Value *Dst = CI->getArgOperand(0), *Src = CI->getArgOperand(1); - if (Dst == Src) // stpcpy(x,x) -> x+strlen(x) - return B.CreateInBoundsGEP(Dst, EmitStrLen(Src, B, TD)); + if (Dst == Src) { // stpcpy(x,x) -> x+strlen(x) + Value *StrLen = EmitStrLen(Src, B, TD, TLI); + return StrLen ? B.CreateInBoundsGEP(Dst, StrLen) : 0; + } // See if we can get the length of the input string. uint64_t Len = GetStringLength(Src); @@ -494,9 +496,8 @@ struct StpCpyOpt: public LibCallOptimization { // We have enough information to now generate the memcpy call to do the // copy for us. Make a memcpy to copy the nul byte with align = 1. - if (OptChkCall) - EmitMemCpyChk(Dst, Src, LenV, CI->getArgOperand(2), B, TD); - else + if (!OptChkCall || !EmitMemCpyChk(Dst, Src, LenV, CI->getArgOperand(2), B, + TD, TLI)) B.CreateMemCpy(Dst, Src, LenV, 1); return DstEnd; } @@ -609,7 +610,7 @@ struct StrPBrkOpt : public LibCallOptimization { // strpbrk(s, "a") -> strchr(s, 'a') if (TD && HasS2 && S2.size() == 1) - return EmitStrChr(CI->getArgOperand(0), S2[0], B, TD); + return EmitStrChr(CI->getArgOperand(0), S2[0], B, TD, TLI); return 0; } @@ -698,7 +699,7 @@ struct StrCSpnOpt : public LibCallOptimization { // strcspn(s, "") -> strlen(s) if (TD && HasS2 && S2.empty()) - return EmitStrLen(CI->getArgOperand(0), B, TD); + return EmitStrLen(CI->getArgOperand(0), B, TD, TLI); return 0; } @@ -722,9 +723,13 @@ struct StrStrOpt : public LibCallOptimization { // fold strstr(a, b) == a -> strncmp(a, b, strlen(b)) == 0 if (TD && IsOnlyUsedInEqualityComparison(CI, CI->getArgOperand(0))) { - Value *StrLen = EmitStrLen(CI->getArgOperand(1), B, TD); + Value *StrLen = EmitStrLen(CI->getArgOperand(1), B, TD, TLI); + if (!StrLen) + return 0; Value *StrNCmp = EmitStrNCmp(CI->getArgOperand(0), CI->getArgOperand(1), - StrLen, B, TD); + StrLen, B, TD, TLI); + if (!StrNCmp) + return 0; for (Value::use_iterator UI = CI->use_begin(), UE = CI->use_end(); UI != UE; ) { ICmpInst *Old = cast<ICmpInst>(*UI++); @@ -760,9 +765,10 @@ struct StrStrOpt : public LibCallOptimization { } // fold strstr(x, "y") -> strchr(x, 'y'). - if (HasStr2 && ToFindStr.size() == 1) - return B.CreateBitCast(EmitStrChr(CI->getArgOperand(0), - ToFindStr[0], B, TD), CI->getType()); + if (HasStr2 && ToFindStr.size() == 1) { + Value *StrChr= EmitStrChr(CI->getArgOperand(0), ToFindStr[0], B, TD, TLI); + return StrChr ? B.CreateBitCast(StrChr, CI->getType()) : 0; + } return 0; } }; @@ -1179,8 +1185,8 @@ struct PrintFOpt : public LibCallOptimization { // printf("x") -> putchar('x'), even for '%'. if (FormatStr.size() == 1) { - Value *Res = EmitPutChar(B.getInt32(FormatStr[0]), B, TD); - if (CI->use_empty()) return CI; + Value *Res = EmitPutChar(B.getInt32(FormatStr[0]), B, TD, TLI); + if (CI->use_empty() || !Res) return Res; return B.CreateIntCast(Res, CI->getType(), true); } @@ -1191,26 +1197,26 @@ struct PrintFOpt : public LibCallOptimization { // pass to be run after this pass, to merge duplicate strings. FormatStr = FormatStr.drop_back(); Value *GV = B.CreateGlobalString(FormatStr, "str"); - EmitPutS(GV, B, TD); - return CI->use_empty() ? (Value*)CI : - ConstantInt::get(CI->getType(), FormatStr.size()+1); + Value *NewCI = EmitPutS(GV, B, TD, TLI); + return (CI->use_empty() || !NewCI) ? + NewCI : + ConstantInt::get(CI->getType(), FormatStr.size()+1); } // Optimize specific format strings. // printf("%c", chr) --> putchar(chr) if (FormatStr == "%c" && CI->getNumArgOperands() > 1 && CI->getArgOperand(1)->getType()->isIntegerTy()) { - Value *Res = EmitPutChar(CI->getArgOperand(1), B, TD); + Value *Res = EmitPutChar(CI->getArgOperand(1), B, TD, TLI); - if (CI->use_empty()) return CI; + if (CI->use_empty() || !Res) return Res; return B.CreateIntCast(Res, CI->getType(), true); } // printf("%s\n", str) --> puts(str) if (FormatStr == "%s\n" && CI->getNumArgOperands() > 1 && CI->getArgOperand(1)->getType()->isPointerTy()) { - EmitPutS(CI->getArgOperand(1), B, TD); - return CI; + return EmitPutS(CI->getArgOperand(1), B, TD, TLI); } return 0; } @@ -1297,7 +1303,9 @@ struct SPrintFOpt : public LibCallOptimization { // sprintf(dest, "%s", str) -> llvm.memcpy(dest, str, strlen(str)+1, 1) if (!CI->getArgOperand(2)->getType()->isPointerTy()) return 0; - Value *Len = EmitStrLen(CI->getArgOperand(2), B, TD); + Value *Len = EmitStrLen(CI->getArgOperand(2), B, TD, TLI); + if (!Len) + return 0; Value *IncLen = B.CreateAdd(Len, ConstantInt::get(Len->getType(), 1), "leninc"); @@ -1364,8 +1372,8 @@ struct FWriteOpt : public LibCallOptimization { // This optimisation is only valid, if the return value is unused. if (Bytes == 1 && CI->use_empty()) { // fwrite(S,1,1,F) -> fputc(S[0],F) Value *Char = B.CreateLoad(CastToCStr(CI->getArgOperand(0), B), "char"); - EmitFPutC(Char, CI->getArgOperand(3), B, TD); - return ConstantInt::get(CI->getType(), 1); + Value *NewCI = EmitFPutC(Char, CI->getArgOperand(3), B, TD, TLI); + return NewCI ? ConstantInt::get(CI->getType(), 1) : 0; } return 0; @@ -1390,10 +1398,10 @@ struct FPutsOpt : public LibCallOptimization { // fputs(s,F) --> fwrite(s,1,strlen(s),F) uint64_t Len = GetStringLength(CI->getArgOperand(0)); if (!Len) return 0; - EmitFWrite(CI->getArgOperand(0), - ConstantInt::get(TD->getIntPtrType(*Context), Len-1), - CI->getArgOperand(1), B, TD, TLI); - return CI; // Known to have no uses (see above). + // Known to have no uses (see above). + return EmitFWrite(CI->getArgOperand(0), + ConstantInt::get(TD->getIntPtrType(*Context), Len-1), + CI->getArgOperand(1), B, TD, TLI); } }; @@ -1417,11 +1425,11 @@ struct FPrintFOpt : public LibCallOptimization { // These optimizations require TargetData. if (!TD) return 0; - EmitFWrite(CI->getArgOperand(1), - ConstantInt::get(TD->getIntPtrType(*Context), - FormatStr.size()), - CI->getArgOperand(0), B, TD, TLI); - return ConstantInt::get(CI->getType(), FormatStr.size()); + Value *NewCI = EmitFWrite(CI->getArgOperand(1), + ConstantInt::get(TD->getIntPtrType(*Context), + FormatStr.size()), + CI->getArgOperand(0), B, TD, TLI); + return NewCI ? ConstantInt::get(CI->getType(), FormatStr.size()) : 0; } // The remaining optimizations require the format string to be "%s" or "%c" @@ -1434,16 +1442,16 @@ struct FPrintFOpt : public LibCallOptimization { if (FormatStr[1] == 'c') { // fprintf(F, "%c", chr) --> fputc(chr, F) if (!CI->getArgOperand(2)->getType()->isIntegerTy()) return 0; - EmitFPutC(CI->getArgOperand(2), CI->getArgOperand(0), B, TD); - return ConstantInt::get(CI->getType(), 1); + Value *NewCI = EmitFPutC(CI->getArgOperand(2), CI->getArgOperand(0), B, + TD, TLI); + return NewCI ? ConstantInt::get(CI->getType(), 1) : 0; } if (FormatStr[1] == 's') { // fprintf(F, "%s", str) --> fputs(str, F) if (!CI->getArgOperand(2)->getType()->isPointerTy() || !CI->use_empty()) return 0; - EmitFPutS(CI->getArgOperand(2), CI->getArgOperand(0), B, TD, TLI); - return CI; + return EmitFPutS(CI->getArgOperand(2), CI->getArgOperand(0), B, TD, TLI); } return 0; } @@ -1494,8 +1502,8 @@ struct PutsOpt : public LibCallOptimization { if (Str.empty() && CI->use_empty()) { // puts("") -> putchar('\n') - Value *Res = EmitPutChar(B.getInt32('\n'), B, TD); - if (CI->use_empty()) return CI; + Value *Res = EmitPutChar(B.getInt32('\n'), B, TD, TLI); + if (CI->use_empty() || !Res) return Res; return B.CreateIntCast(Res, CI->getType(), true); } @@ -1633,6 +1641,8 @@ void SimplifyLibCalls::InitOptimizations() { Optimizations["llvm.exp2.f64"] = &Exp2; Optimizations["llvm.exp2.f32"] = &Exp2; + if (TLI->has(LibFunc::fabs) && TLI->has(LibFunc::fabsf)) + Optimizations["fabs"] = &UnaryDoubleFP; if (TLI->has(LibFunc::floor) && TLI->has(LibFunc::floorf)) Optimizations["floor"] = &UnaryDoubleFP; if (TLI->has(LibFunc::ceil) && TLI->has(LibFunc::ceilf)) @@ -1643,6 +1653,8 @@ void SimplifyLibCalls::InitOptimizations() { Optimizations["rint"] = &UnaryDoubleFP; if (TLI->has(LibFunc::nearbyint) && TLI->has(LibFunc::nearbyintf)) Optimizations["nearbyint"] = &UnaryDoubleFP; + if (TLI->has(LibFunc::trunc) && TLI->has(LibFunc::truncf)) + Optimizations["trunc"] = &UnaryDoubleFP; // Integer Optimizations Optimizations["ffs"] = &FFS; diff --git a/lib/Transforms/Utils/BasicBlockUtils.cpp b/lib/Transforms/Utils/BasicBlockUtils.cpp index 5576432..2679b93 100644 --- a/lib/Transforms/Utils/BasicBlockUtils.cpp +++ b/lib/Transforms/Utils/BasicBlockUtils.cpp @@ -659,10 +659,26 @@ ReturnInst *llvm::FoldReturnIntoUncondBranch(ReturnInst *RI, BasicBlock *BB, // If the return instruction returns a value, and if the value was a // PHI node in "BB", propagate the right value into the return. for (User::op_iterator i = NewRet->op_begin(), e = NewRet->op_end(); - i != e; ++i) - if (PHINode *PN = dyn_cast<PHINode>(*i)) - if (PN->getParent() == BB) - *i = PN->getIncomingValueForBlock(Pred); + i != e; ++i) { + Value *V = *i; + Instruction *NewBC = 0; + if (BitCastInst *BCI = dyn_cast<BitCastInst>(V)) { + // Return value might be bitcasted. Clone and insert it before the + // return instruction. + V = BCI->getOperand(0); + NewBC = BCI->clone(); + Pred->getInstList().insert(NewRet, NewBC); + *i = NewBC; + } + if (PHINode *PN = dyn_cast<PHINode>(V)) { + if (PN->getParent() == BB) { + if (NewBC) + NewBC->setOperand(0, PN->getIncomingValueForBlock(Pred)); + else + *i = PN->getIncomingValueForBlock(Pred); + } + } + } // Update any PHI nodes in the returning block to realize that we no // longer branch to them. diff --git a/lib/Transforms/Utils/BuildLibCalls.cpp b/lib/Transforms/Utils/BuildLibCalls.cpp index 27f7724..e13fd71 100644 --- a/lib/Transforms/Utils/BuildLibCalls.cpp +++ b/lib/Transforms/Utils/BuildLibCalls.cpp @@ -34,7 +34,11 @@ Value *llvm::CastToCStr(Value *V, IRBuilder<> &B) { /// EmitStrLen - Emit a call to the strlen function to the builder, for the /// specified pointer. This always returns an integer value of size intptr_t. -Value *llvm::EmitStrLen(Value *Ptr, IRBuilder<> &B, const TargetData *TD) { +Value *llvm::EmitStrLen(Value *Ptr, IRBuilder<> &B, const TargetData *TD, + const TargetLibraryInfo *TLI) { + if (!TLI->has(LibFunc::strlen)) + return 0; + Module *M = B.GetInsertBlock()->getParent()->getParent(); AttributeWithIndex AWI[2]; AWI[0] = AttributeWithIndex::get(1, Attribute::NoCapture); @@ -53,11 +57,41 @@ Value *llvm::EmitStrLen(Value *Ptr, IRBuilder<> &B, const TargetData *TD) { return CI; } +/// EmitStrNLen - Emit a call to the strnlen function to the builder, for the +/// specified pointer. Ptr is required to be some pointer type, MaxLen must +/// be of size_t type, and the return value has 'intptr_t' type. +Value *llvm::EmitStrNLen(Value *Ptr, Value *MaxLen, IRBuilder<> &B, + const TargetData *TD, const TargetLibraryInfo *TLI) { + if (!TLI->has(LibFunc::strnlen)) + return 0; + + Module *M = B.GetInsertBlock()->getParent()->getParent(); + AttributeWithIndex AWI[2]; + AWI[0] = AttributeWithIndex::get(1, Attribute::NoCapture); + AWI[1] = AttributeWithIndex::get(~0u, Attribute::ReadOnly | + Attribute::NoUnwind); + + LLVMContext &Context = B.GetInsertBlock()->getContext(); + Constant *StrNLen = M->getOrInsertFunction("strnlen", AttrListPtr::get(AWI), + TD->getIntPtrType(Context), + B.getInt8PtrTy(), + TD->getIntPtrType(Context), + NULL); + CallInst *CI = B.CreateCall2(StrNLen, CastToCStr(Ptr, B), MaxLen, "strnlen"); + if (const Function *F = dyn_cast<Function>(StrNLen->stripPointerCasts())) + CI->setCallingConv(F->getCallingConv()); + + return CI; +} + /// EmitStrChr - Emit a call to the strchr function to the builder, for the /// specified pointer and character. Ptr is required to be some pointer type, /// and the return value has 'i8*' type. Value *llvm::EmitStrChr(Value *Ptr, char C, IRBuilder<> &B, - const TargetData *TD) { + const TargetData *TD, const TargetLibraryInfo *TLI) { + if (!TLI->has(LibFunc::strchr)) + return 0; + Module *M = B.GetInsertBlock()->getParent()->getParent(); AttributeWithIndex AWI = AttributeWithIndex::get(~0u, Attribute::ReadOnly | Attribute::NoUnwind); @@ -75,7 +109,11 @@ Value *llvm::EmitStrChr(Value *Ptr, char C, IRBuilder<> &B, /// EmitStrNCmp - Emit a call to the strncmp function to the builder. Value *llvm::EmitStrNCmp(Value *Ptr1, Value *Ptr2, Value *Len, - IRBuilder<> &B, const TargetData *TD) { + IRBuilder<> &B, const TargetData *TD, + const TargetLibraryInfo *TLI) { + if (!TLI->has(LibFunc::strncmp)) + return 0; + Module *M = B.GetInsertBlock()->getParent()->getParent(); AttributeWithIndex AWI[3]; AWI[0] = AttributeWithIndex::get(1, Attribute::NoCapture); @@ -101,7 +139,11 @@ Value *llvm::EmitStrNCmp(Value *Ptr1, Value *Ptr2, Value *Len, /// EmitStrCpy - Emit a call to the strcpy function to the builder, for the /// specified pointer arguments. Value *llvm::EmitStrCpy(Value *Dst, Value *Src, IRBuilder<> &B, - const TargetData *TD, StringRef Name) { + const TargetData *TD, const TargetLibraryInfo *TLI, + StringRef Name) { + if (!TLI->has(LibFunc::strcpy)) + return 0; + Module *M = B.GetInsertBlock()->getParent()->getParent(); AttributeWithIndex AWI[2]; AWI[0] = AttributeWithIndex::get(2, Attribute::NoCapture); @@ -119,7 +161,11 @@ Value *llvm::EmitStrCpy(Value *Dst, Value *Src, IRBuilder<> &B, /// EmitStrNCpy - Emit a call to the strncpy function to the builder, for the /// specified pointer arguments. Value *llvm::EmitStrNCpy(Value *Dst, Value *Src, Value *Len, - IRBuilder<> &B, const TargetData *TD, StringRef Name) { + IRBuilder<> &B, const TargetData *TD, + const TargetLibraryInfo *TLI, StringRef Name) { + if (!TLI->has(LibFunc::strncpy)) + return 0; + Module *M = B.GetInsertBlock()->getParent()->getParent(); AttributeWithIndex AWI[2]; AWI[0] = AttributeWithIndex::get(2, Attribute::NoCapture); @@ -139,7 +185,11 @@ Value *llvm::EmitStrNCpy(Value *Dst, Value *Src, Value *Len, /// This expects that the Len and ObjSize have type 'intptr_t' and Dst/Src /// are pointers. Value *llvm::EmitMemCpyChk(Value *Dst, Value *Src, Value *Len, Value *ObjSize, - IRBuilder<> &B, const TargetData *TD) { + IRBuilder<> &B, const TargetData *TD, + const TargetLibraryInfo *TLI) { + if (!TLI->has(LibFunc::memcpy_chk)) + return 0; + Module *M = B.GetInsertBlock()->getParent()->getParent(); AttributeWithIndex AWI; AWI = AttributeWithIndex::get(~0u, Attribute::NoUnwind); @@ -162,7 +212,11 @@ Value *llvm::EmitMemCpyChk(Value *Dst, Value *Src, Value *Len, Value *ObjSize, /// EmitMemChr - Emit a call to the memchr function. This assumes that Ptr is /// a pointer, Val is an i32 value, and Len is an 'intptr_t' value. Value *llvm::EmitMemChr(Value *Ptr, Value *Val, - Value *Len, IRBuilder<> &B, const TargetData *TD) { + Value *Len, IRBuilder<> &B, const TargetData *TD, + const TargetLibraryInfo *TLI) { + if (!TLI->has(LibFunc::memchr)) + return 0; + Module *M = B.GetInsertBlock()->getParent()->getParent(); AttributeWithIndex AWI; AWI = AttributeWithIndex::get(~0u, Attribute::ReadOnly | Attribute::NoUnwind); @@ -183,7 +237,11 @@ Value *llvm::EmitMemChr(Value *Ptr, Value *Val, /// EmitMemCmp - Emit a call to the memcmp function. Value *llvm::EmitMemCmp(Value *Ptr1, Value *Ptr2, - Value *Len, IRBuilder<> &B, const TargetData *TD) { + Value *Len, IRBuilder<> &B, const TargetData *TD, + const TargetLibraryInfo *TLI) { + if (!TLI->has(LibFunc::memcmp)) + return 0; + Module *M = B.GetInsertBlock()->getParent()->getParent(); AttributeWithIndex AWI[3]; AWI[0] = AttributeWithIndex::get(1, Attribute::NoCapture); @@ -236,7 +294,11 @@ Value *llvm::EmitUnaryFloatFnCall(Value *Op, StringRef Name, IRBuilder<> &B, /// EmitPutChar - Emit a call to the putchar function. This assumes that Char /// is an integer. -Value *llvm::EmitPutChar(Value *Char, IRBuilder<> &B, const TargetData *TD) { +Value *llvm::EmitPutChar(Value *Char, IRBuilder<> &B, const TargetData *TD, + const TargetLibraryInfo *TLI) { + if (!TLI->has(LibFunc::putchar)) + return 0; + Module *M = B.GetInsertBlock()->getParent()->getParent(); Value *PutChar = M->getOrInsertFunction("putchar", B.getInt32Ty(), B.getInt32Ty(), NULL); @@ -254,7 +316,11 @@ Value *llvm::EmitPutChar(Value *Char, IRBuilder<> &B, const TargetData *TD) { /// EmitPutS - Emit a call to the puts function. This assumes that Str is /// some pointer. -void llvm::EmitPutS(Value *Str, IRBuilder<> &B, const TargetData *TD) { +Value *llvm::EmitPutS(Value *Str, IRBuilder<> &B, const TargetData *TD, + const TargetLibraryInfo *TLI) { + if (!TLI->has(LibFunc::puts)) + return 0; + Module *M = B.GetInsertBlock()->getParent()->getParent(); AttributeWithIndex AWI[2]; AWI[0] = AttributeWithIndex::get(1, Attribute::NoCapture); @@ -267,13 +333,16 @@ void llvm::EmitPutS(Value *Str, IRBuilder<> &B, const TargetData *TD) { CallInst *CI = B.CreateCall(PutS, CastToCStr(Str, B), "puts"); if (const Function *F = dyn_cast<Function>(PutS->stripPointerCasts())) CI->setCallingConv(F->getCallingConv()); - + return CI; } /// EmitFPutC - Emit a call to the fputc function. This assumes that Char is /// an integer and File is a pointer to FILE. -void llvm::EmitFPutC(Value *Char, Value *File, IRBuilder<> &B, - const TargetData *TD) { +Value *llvm::EmitFPutC(Value *Char, Value *File, IRBuilder<> &B, + const TargetData *TD, const TargetLibraryInfo *TLI) { + if (!TLI->has(LibFunc::fputc)) + return 0; + Module *M = B.GetInsertBlock()->getParent()->getParent(); AttributeWithIndex AWI[2]; AWI[0] = AttributeWithIndex::get(2, Attribute::NoCapture); @@ -295,12 +364,16 @@ void llvm::EmitFPutC(Value *Char, Value *File, IRBuilder<> &B, if (const Function *Fn = dyn_cast<Function>(F->stripPointerCasts())) CI->setCallingConv(Fn->getCallingConv()); + return CI; } /// EmitFPutS - Emit a call to the puts function. Str is required to be a /// pointer and File is a pointer to FILE. -void llvm::EmitFPutS(Value *Str, Value *File, IRBuilder<> &B, - const TargetData *TD, const TargetLibraryInfo *TLI) { +Value *llvm::EmitFPutS(Value *Str, Value *File, IRBuilder<> &B, + const TargetData *TD, const TargetLibraryInfo *TLI) { + if (!TLI->has(LibFunc::fputs)) + return 0; + Module *M = B.GetInsertBlock()->getParent()->getParent(); AttributeWithIndex AWI[3]; AWI[0] = AttributeWithIndex::get(1, Attribute::NoCapture); @@ -321,13 +394,17 @@ void llvm::EmitFPutS(Value *Str, Value *File, IRBuilder<> &B, if (const Function *Fn = dyn_cast<Function>(F->stripPointerCasts())) CI->setCallingConv(Fn->getCallingConv()); + return CI; } /// EmitFWrite - Emit a call to the fwrite function. This assumes that Ptr is /// a pointer, Size is an 'intptr_t', and File is a pointer to FILE. -void llvm::EmitFWrite(Value *Ptr, Value *Size, Value *File, - IRBuilder<> &B, const TargetData *TD, - const TargetLibraryInfo *TLI) { +Value *llvm::EmitFWrite(Value *Ptr, Value *Size, Value *File, + IRBuilder<> &B, const TargetData *TD, + const TargetLibraryInfo *TLI) { + if (!TLI->has(LibFunc::fwrite)) + return 0; + Module *M = B.GetInsertBlock()->getParent()->getParent(); AttributeWithIndex AWI[3]; AWI[0] = AttributeWithIndex::get(1, Attribute::NoCapture); @@ -354,11 +431,13 @@ void llvm::EmitFWrite(Value *Ptr, Value *Size, Value *File, if (const Function *Fn = dyn_cast<Function>(F->stripPointerCasts())) CI->setCallingConv(Fn->getCallingConv()); + return CI; } SimplifyFortifiedLibCalls::~SimplifyFortifiedLibCalls() { } -bool SimplifyFortifiedLibCalls::fold(CallInst *CI, const TargetData *TD) { +bool SimplifyFortifiedLibCalls::fold(CallInst *CI, const TargetData *TD, + const TargetLibraryInfo *TLI) { // We really need TargetData for later. if (!TD) return false; @@ -446,7 +525,9 @@ bool SimplifyFortifiedLibCalls::fold(CallInst *CI, const TargetData *TD) { // string lengths for varying. if (isFoldable(2, 1, true)) { Value *Ret = EmitStrCpy(CI->getArgOperand(0), CI->getArgOperand(1), B, TD, - Name.substr(2, 6)); + TLI, Name.substr(2, 6)); + if (!Ret) + return false; replaceCall(Ret); return true; } @@ -464,7 +545,10 @@ bool SimplifyFortifiedLibCalls::fold(CallInst *CI, const TargetData *TD) { if (isFoldable(3, 2, false)) { Value *Ret = EmitStrNCpy(CI->getArgOperand(0), CI->getArgOperand(1), - CI->getArgOperand(2), B, TD, Name.substr(2, 7)); + CI->getArgOperand(2), B, TD, TLI, + Name.substr(2, 7)); + if (!Ret) + return false; replaceCall(Ret); return true; } diff --git a/lib/Transforms/Utils/SSAUpdater.cpp b/lib/Transforms/Utils/SSAUpdater.cpp index b3f5289..72d4199 100644 --- a/lib/Transforms/Utils/SSAUpdater.cpp +++ b/lib/Transforms/Utils/SSAUpdater.cpp @@ -39,7 +39,7 @@ SSAUpdater::SSAUpdater(SmallVectorImpl<PHINode*> *NewPHI) : AV(0), ProtoType(0), ProtoName(), InsertedPHIs(NewPHI) {} SSAUpdater::~SSAUpdater() { - delete &getAvailableVals(AV); + delete static_cast<AvailableValsTy*>(AV); } /// Initialize - Reset this object to get ready for a new set of SSA @@ -214,6 +214,11 @@ void SSAUpdater::RewriteUse(Use &U) { else V = GetValueInMiddleOfBlock(User->getParent()); + // Notify that users of the existing value that it is being replaced. + Value *OldVal = U.get(); + if (OldVal != V && OldVal->hasValueHandle()) + ValueHandleBase::ValueIsRAUWd(OldVal, V); + U.set(V); } diff --git a/lib/VMCore/AsmWriter.cpp b/lib/VMCore/AsmWriter.cpp index aedb86b..c09c69b 100644 --- a/lib/VMCore/AsmWriter.cpp +++ b/lib/VMCore/AsmWriter.cpp @@ -26,6 +26,7 @@ #include "llvm/IntrinsicInst.h" #include "llvm/Operator.h" #include "llvm/Module.h" +#include "llvm/TypeFinder.h" #include "llvm/ValueSymbolTable.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/SmallString.h" @@ -145,7 +146,7 @@ class TypePrinting { public: /// NamedTypes - The named types that are used by the current module. - std::vector<StructType*> NamedTypes; + TypeFinder NamedTypes; /// NumberedTypes - The numbered types, along with their value. DenseMap<StructType*, unsigned> NumberedTypes; @@ -164,7 +165,7 @@ public: void TypePrinting::incorporateTypes(const Module &M) { - M.findUsedStructTypes(NamedTypes); + NamedTypes.run(M, false); // The list of struct types we got back includes all the struct types, split // the unnamed ones out to a numbering and remove the anonymous structs. @@ -1352,12 +1353,12 @@ static void PrintLinkage(GlobalValue::LinkageTypes LT, case GlobalValue::LinkerPrivateWeakLinkage: Out << "linker_private_weak "; break; - case GlobalValue::LinkerPrivateWeakDefAutoLinkage: - Out << "linker_private_weak_def_auto "; - break; case GlobalValue::InternalLinkage: Out << "internal "; break; case GlobalValue::LinkOnceAnyLinkage: Out << "linkonce "; break; case GlobalValue::LinkOnceODRLinkage: Out << "linkonce_odr "; break; + case GlobalValue::LinkOnceODRAutoHideLinkage: + Out << "linkonce_odr_auto_hide "; + break; case GlobalValue::WeakAnyLinkage: Out << "weak "; break; case GlobalValue::WeakODRLinkage: Out << "weak_odr "; break; case GlobalValue::CommonLinkage: Out << "common "; break; diff --git a/lib/VMCore/Attributes.cpp b/lib/VMCore/Attributes.cpp index d466ac6..c8219eb 100644 --- a/lib/VMCore/Attributes.cpp +++ b/lib/VMCore/Attributes.cpp @@ -88,6 +88,9 @@ std::string Attribute::getAsString(Attributes Attrs) { Result += utostr(Attribute::getAlignmentFromAttrs(Attrs)); Result += " "; } + if (Attrs & Attribute::IANSDialect) + Result += "ia_nsdialect "; + // Trim the trailing space. assert(!Result.empty() && "Unknown attribute!"); Result.erase(Result.end()-1); diff --git a/lib/VMCore/CMakeLists.txt b/lib/VMCore/CMakeLists.txt index 648ccbd..6a20be6 100644 --- a/lib/VMCore/CMakeLists.txt +++ b/lib/VMCore/CMakeLists.txt @@ -31,6 +31,7 @@ add_llvm_library(LLVMCore PassRegistry.cpp PrintModulePass.cpp Type.cpp + TypeFinder.cpp Use.cpp User.cpp Value.cpp diff --git a/lib/VMCore/Core.cpp b/lib/VMCore/Core.cpp index 972db3c..a56f1b2 100644 --- a/lib/VMCore/Core.cpp +++ b/lib/VMCore/Core.cpp @@ -1084,6 +1084,8 @@ LLVMLinkage LLVMGetLinkage(LLVMValueRef Global) { return LLVMLinkOnceAnyLinkage; case GlobalValue::LinkOnceODRLinkage: return LLVMLinkOnceODRLinkage; + case GlobalValue::LinkOnceODRAutoHideLinkage: + return LLVMLinkOnceODRAutoHideLinkage; case GlobalValue::WeakAnyLinkage: return LLVMWeakAnyLinkage; case GlobalValue::WeakODRLinkage: @@ -1098,8 +1100,6 @@ LLVMLinkage LLVMGetLinkage(LLVMValueRef Global) { return LLVMLinkerPrivateLinkage; case GlobalValue::LinkerPrivateWeakLinkage: return LLVMLinkerPrivateWeakLinkage; - case GlobalValue::LinkerPrivateWeakDefAutoLinkage: - return LLVMLinkerPrivateWeakDefAutoLinkage; case GlobalValue::DLLImportLinkage: return LLVMDLLImportLinkage; case GlobalValue::DLLExportLinkage: @@ -1129,6 +1129,9 @@ void LLVMSetLinkage(LLVMValueRef Global, LLVMLinkage Linkage) { case LLVMLinkOnceODRLinkage: GV->setLinkage(GlobalValue::LinkOnceODRLinkage); break; + case LLVMLinkOnceODRAutoHideLinkage: + GV->setLinkage(GlobalValue::LinkOnceODRAutoHideLinkage); + break; case LLVMWeakAnyLinkage: GV->setLinkage(GlobalValue::WeakAnyLinkage); break; @@ -1150,9 +1153,6 @@ void LLVMSetLinkage(LLVMValueRef Global, LLVMLinkage Linkage) { case LLVMLinkerPrivateWeakLinkage: GV->setLinkage(GlobalValue::LinkerPrivateWeakLinkage); break; - case LLVMLinkerPrivateWeakDefAutoLinkage: - GV->setLinkage(GlobalValue::LinkerPrivateWeakDefAutoLinkage); - break; case LLVMDLLImportLinkage: GV->setLinkage(GlobalValue::DLLImportLinkage); break; diff --git a/lib/VMCore/Dominators.cpp b/lib/VMCore/Dominators.cpp index 219e631..77b2403 100644 --- a/lib/VMCore/Dominators.cpp +++ b/lib/VMCore/Dominators.cpp @@ -39,6 +39,19 @@ static cl::opt<bool,true> VerifyDomInfoX("verify-dom-info", cl::location(VerifyDomInfo), cl::desc("Verify dominator info (time consuming)")); +bool BasicBlockEdge::isSingleEdge() const { + const TerminatorInst *TI = Start->getTerminator(); + unsigned NumEdgesToEnd = 0; + for (unsigned int i = 0, n = TI->getNumSuccessors(); i < n; ++i) { + if (TI->getSuccessor(i) == End) + ++NumEdgesToEnd; + if (NumEdgesToEnd >= 2) + return false; + } + assert(NumEdgesToEnd == 1); + return true; +} + //===----------------------------------------------------------------------===// // DominatorTree Implementation //===----------------------------------------------------------------------===// @@ -142,12 +155,27 @@ bool DominatorTree::dominates(const Instruction *Def, // Invoke results are only usable in the normal destination, not in the // exceptional destination. BasicBlock *NormalDest = II->getNormalDest(); - if (!dominates(NormalDest, UseBB)) + BasicBlockEdge E(DefBB, NormalDest); + return dominates(E, UseBB); +} + +bool DominatorTree::dominates(const BasicBlockEdge &BBE, + const BasicBlock *UseBB) const { + // Assert that we have a single edge. We could handle them by simply + // returning false, but since isSingleEdge is linear on the number of + // edges, the callers can normally handle them more efficiently. + assert(BBE.isSingleEdge()); + + // If the BB the edge ends in doesn't dominate the use BB, then the + // edge also doesn't. + const BasicBlock *Start = BBE.getStart(); + const BasicBlock *End = BBE.getEnd(); + if (!dominates(End, UseBB)) return false; - // Simple case: if the normal destination has a single predecessor, the - // fact that it dominates the use block implies that we also do. - if (NormalDest->getSinglePredecessor()) + // Simple case: if the end BB has a single predecessor, the fact that it + // dominates the use block implies that the edge also does. + if (End->getSinglePredecessor()) return true; // The normal edge from the invoke is critical. Conceptually, what we would @@ -170,29 +198,45 @@ bool DominatorTree::dominates(const Instruction *Def, // trivially dominates itself, so we only have to find if it dominates the // other predecessors. Since the only way out of X is via NormalDest, X can // only properly dominate a node if NormalDest dominates that node too. - for (pred_iterator PI = pred_begin(NormalDest), - E = pred_end(NormalDest); PI != E; ++PI) { + for (const_pred_iterator PI = pred_begin(End), E = pred_end(End); + PI != E; ++PI) { const BasicBlock *BB = *PI; - if (BB == DefBB) + if (BB == Start) continue; - if (!DT->isReachableFromEntry(BB)) - continue; - - if (!dominates(NormalDest, BB)) + if (!dominates(End, BB)) return false; } return true; } -bool DominatorTree::dominates(const Instruction *Def, +bool DominatorTree::dominates(const BasicBlockEdge &BBE, const Use &U) const { - Instruction *UserInst = dyn_cast<Instruction>(U.getUser()); + // Assert that we have a single edge. We could handle them by simply + // returning false, but since isSingleEdge is linear on the number of + // edges, the callers can normally handle them more efficiently. + assert(BBE.isSingleEdge()); + + Instruction *UserInst = cast<Instruction>(U.getUser()); + // A PHI in the end of the edge is dominated by it. + PHINode *PN = dyn_cast<PHINode>(UserInst); + if (PN && PN->getParent() == BBE.getEnd() && + PN->getIncomingBlock(U) == BBE.getStart()) + return true; - // Instructions do not dominate non-instructions. - if (!UserInst) - return false; + // Otherwise use the edge-dominates-block query, which + // handles the crazy critical edge cases properly. + const BasicBlock *UseBB; + if (PN) + UseBB = PN->getIncomingBlock(U); + else + UseBB = UserInst->getParent(); + return dominates(BBE, UseBB); +} +bool DominatorTree::dominates(const Instruction *Def, + const Use &U) const { + Instruction *UserInst = cast<Instruction>(U.getUser()); const BasicBlock *DefBB = Def->getParent(); // Determine the block in which the use happens. PHI nodes use @@ -218,17 +262,9 @@ bool DominatorTree::dominates(const Instruction *Def, // their own block, except possibly a phi, so we don't need to // walk the block in any case. if (const InvokeInst *II = dyn_cast<InvokeInst>(Def)) { - // A PHI in the normal successor using the invoke's return value is - // dominated by the invoke's return value. - if (isa<PHINode>(UserInst) && - UserInst->getParent() == II->getNormalDest() && - cast<PHINode>(UserInst)->getIncomingBlock(U) == DefBB) - return true; - - // Otherwise use the instruction-dominates-block query, which - // handles the crazy case of an invoke with a critical edge - // properly. - return dominates(Def, UseBB); + BasicBlock *NormalDest = II->getNormalDest(); + BasicBlockEdge E(DefBB, NormalDest); + return dominates(E, U); } // If the def and use are in different blocks, do a simple CFG dominator diff --git a/lib/VMCore/Metadata.cpp b/lib/VMCore/Metadata.cpp index ede4626..95e5a8b 100644 --- a/lib/VMCore/Metadata.cpp +++ b/lib/VMCore/Metadata.cpp @@ -200,7 +200,7 @@ const Function *MDNode::getFunction() const { // destroy - Delete this node. Only when there are no uses. void MDNode::destroy() { setValueSubclassData(getSubclassDataFromValue() | DestroyFlag); - // Placement delete, the free the memory. + // Placement delete, then free the memory. this->~MDNode(); free(this); } diff --git a/lib/VMCore/Module.cpp b/lib/VMCore/Module.cpp index 8ea3665..5b5176b 100644 --- a/lib/VMCore/Module.cpp +++ b/lib/VMCore/Module.cpp @@ -467,143 +467,3 @@ void Module::removeLibrary(StringRef Lib) { return; } } - -//===----------------------------------------------------------------------===// -// Type finding functionality. -//===----------------------------------------------------------------------===// - -namespace { - /// TypeFinder - Walk over a module, identifying all of the types that are - /// used by the module. - class TypeFinder { - // To avoid walking constant expressions multiple times and other IR - // objects, we keep several helper maps. - DenseSet<const Value*> VisitedConstants; - DenseSet<Type*> VisitedTypes; - - std::vector<StructType*> &StructTypes; - bool OnlyNamed; - public: - TypeFinder(std::vector<StructType*> &structTypes, bool onlyNamed) - : StructTypes(structTypes), OnlyNamed(onlyNamed) {} - - void run(const Module &M) { - // Get types from global variables. - for (Module::const_global_iterator I = M.global_begin(), - E = M.global_end(); I != E; ++I) { - incorporateType(I->getType()); - if (I->hasInitializer()) - incorporateValue(I->getInitializer()); - } - - // Get types from aliases. - for (Module::const_alias_iterator I = M.alias_begin(), - E = M.alias_end(); I != E; ++I) { - incorporateType(I->getType()); - if (const Value *Aliasee = I->getAliasee()) - incorporateValue(Aliasee); - } - - // Get types from functions. - SmallVector<std::pair<unsigned, MDNode*>, 4> MDForInst; - for (Module::const_iterator FI = M.begin(), E = M.end(); FI != E; ++FI) { - incorporateType(FI->getType()); - - // First incorporate the arguments. - for (Function::const_arg_iterator AI = FI->arg_begin(), - AE = FI->arg_end(); AI != AE; ++AI) - incorporateValue(AI); - - for (Function::const_iterator BB = FI->begin(), E = FI->end(); - BB != E;++BB) - for (BasicBlock::const_iterator II = BB->begin(), - E = BB->end(); II != E; ++II) { - const Instruction &I = *II; - // Incorporate the type of the instruction. - incorporateType(I.getType()); - - // Incorporate non-instruction operand types. (We are incorporating - // all instructions with this loop.) - for (User::const_op_iterator OI = I.op_begin(), OE = I.op_end(); - OI != OE; ++OI) - if (!isa<Instruction>(OI)) - incorporateValue(*OI); - - // Incorporate types hiding in metadata. - I.getAllMetadataOtherThanDebugLoc(MDForInst); - for (unsigned i = 0, e = MDForInst.size(); i != e; ++i) - incorporateMDNode(MDForInst[i].second); - MDForInst.clear(); - } - } - - for (Module::const_named_metadata_iterator I = M.named_metadata_begin(), - E = M.named_metadata_end(); I != E; ++I) { - const NamedMDNode *NMD = I; - for (unsigned i = 0, e = NMD->getNumOperands(); i != e; ++i) - incorporateMDNode(NMD->getOperand(i)); - } - } - - private: - void incorporateType(Type *Ty) { - // Check to see if we're already visited this type. - if (!VisitedTypes.insert(Ty).second) - return; - - // If this is a structure or opaque type, add a name for the type. - if (StructType *STy = dyn_cast<StructType>(Ty)) - if (!OnlyNamed || STy->hasName()) - StructTypes.push_back(STy); - - // Recursively walk all contained types. - for (Type::subtype_iterator I = Ty->subtype_begin(), - E = Ty->subtype_end(); I != E; ++I) - incorporateType(*I); - } - - /// incorporateValue - This method is used to walk operand lists finding - /// types hiding in constant expressions and other operands that won't be - /// walked in other ways. GlobalValues, basic blocks, instructions, and - /// inst operands are all explicitly enumerated. - void incorporateValue(const Value *V) { - if (const MDNode *M = dyn_cast<MDNode>(V)) - return incorporateMDNode(M); - if (!isa<Constant>(V) || isa<GlobalValue>(V)) return; - - // Already visited? - if (!VisitedConstants.insert(V).second) - return; - - // Check this type. - incorporateType(V->getType()); - - // If this is an instruction, we incorporate it separately. - if (isa<Instruction>(V)) - return; - - // Look in operands for types. - const User *U = cast<User>(V); - for (Constant::const_op_iterator I = U->op_begin(), - E = U->op_end(); I != E;++I) - incorporateValue(*I); - } - - void incorporateMDNode(const MDNode *V) { - - // Already visited? - if (!VisitedConstants.insert(V).second) - return; - - // Look in operands for types. - for (unsigned i = 0, e = V->getNumOperands(); i != e; ++i) - if (Value *Op = V->getOperand(i)) - incorporateValue(Op); - } - }; -} // end anonymous namespace - -void Module::findUsedStructTypes(std::vector<StructType*> &StructTypes, - bool OnlyNamed) const { - TypeFinder(StructTypes, OnlyNamed).run(*this); -} diff --git a/lib/VMCore/Type.cpp b/lib/VMCore/Type.cpp index c6f3558..5e9a00f 100644 --- a/lib/VMCore/Type.cpp +++ b/lib/VMCore/Type.cpp @@ -464,19 +464,26 @@ void StructType::setBody(ArrayRef<Type*> Elements, bool isPacked) { void StructType::setName(StringRef Name) { if (Name == getName()) return; - // If this struct already had a name, remove its symbol table entry. - if (SymbolTableEntry) { - getContext().pImpl->NamedStructTypes.erase(getName()); - SymbolTableEntry = 0; - } - + StringMap<StructType *> &SymbolTable = getContext().pImpl->NamedStructTypes; + typedef StringMap<StructType *>::MapEntryTy EntryTy; + + // If this struct already had a name, remove its symbol table entry. Don't + // delete the data yet because it may be part of the new name. + if (SymbolTableEntry) + SymbolTable.remove((EntryTy *)SymbolTableEntry); + // If this is just removing the name, we're done. - if (Name.empty()) + if (Name.empty()) { + if (SymbolTableEntry) { + // Delete the old string data. + ((EntryTy *)SymbolTableEntry)->Destroy(SymbolTable.getAllocator()); + SymbolTableEntry = 0; + } return; + } // Look up the entry for the name. - StringMapEntry<StructType*> *Entry = - &getContext().pImpl->NamedStructTypes.GetOrCreateValue(Name); + EntryTy *Entry = &getContext().pImpl->NamedStructTypes.GetOrCreateValue(Name); // While we have a name collision, try a random rename. if (Entry->getValue()) { @@ -497,7 +504,10 @@ void StructType::setName(StringRef Name) { // Okay, we found an entry that isn't used. It's us! Entry->setValue(this); - + + // Delete the old string data. + if (SymbolTableEntry) + ((EntryTy *)SymbolTableEntry)->Destroy(SymbolTable.getAllocator()); SymbolTableEntry = Entry; } diff --git a/lib/VMCore/TypeFinder.cpp b/lib/VMCore/TypeFinder.cpp new file mode 100644 index 0000000..4de649f --- /dev/null +++ b/lib/VMCore/TypeFinder.cpp @@ -0,0 +1,148 @@ +//===-- TypeFinder.cpp - Implement the TypeFinder class -------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the TypeFinder class for the VMCore library. +// +//===----------------------------------------------------------------------===// + +#include "llvm/TypeFinder.h" +#include "llvm/BasicBlock.h" +#include "llvm/DerivedTypes.h" +#include "llvm/Function.h" +#include "llvm/Metadata.h" +#include "llvm/Module.h" +#include "llvm/ADT/SmallVector.h" +using namespace llvm; + +void TypeFinder::run(const Module &M, bool onlyNamed) { + OnlyNamed = onlyNamed; + + // Get types from global variables. + for (Module::const_global_iterator I = M.global_begin(), + E = M.global_end(); I != E; ++I) { + incorporateType(I->getType()); + if (I->hasInitializer()) + incorporateValue(I->getInitializer()); + } + + // Get types from aliases. + for (Module::const_alias_iterator I = M.alias_begin(), + E = M.alias_end(); I != E; ++I) { + incorporateType(I->getType()); + if (const Value *Aliasee = I->getAliasee()) + incorporateValue(Aliasee); + } + + // Get types from functions. + SmallVector<std::pair<unsigned, MDNode*>, 4> MDForInst; + for (Module::const_iterator FI = M.begin(), E = M.end(); FI != E; ++FI) { + incorporateType(FI->getType()); + + // First incorporate the arguments. + for (Function::const_arg_iterator AI = FI->arg_begin(), + AE = FI->arg_end(); AI != AE; ++AI) + incorporateValue(AI); + + for (Function::const_iterator BB = FI->begin(), E = FI->end(); + BB != E;++BB) + for (BasicBlock::const_iterator II = BB->begin(), + E = BB->end(); II != E; ++II) { + const Instruction &I = *II; + + // Incorporate the type of the instruction. + incorporateType(I.getType()); + + // Incorporate non-instruction operand types. (We are incorporating all + // instructions with this loop.) + for (User::const_op_iterator OI = I.op_begin(), OE = I.op_end(); + OI != OE; ++OI) + if (!isa<Instruction>(OI)) + incorporateValue(*OI); + + // Incorporate types hiding in metadata. + I.getAllMetadataOtherThanDebugLoc(MDForInst); + for (unsigned i = 0, e = MDForInst.size(); i != e; ++i) + incorporateMDNode(MDForInst[i].second); + + MDForInst.clear(); + } + } + + for (Module::const_named_metadata_iterator I = M.named_metadata_begin(), + E = M.named_metadata_end(); I != E; ++I) { + const NamedMDNode *NMD = I; + for (unsigned i = 0, e = NMD->getNumOperands(); i != e; ++i) + incorporateMDNode(NMD->getOperand(i)); + } +} + +void TypeFinder::clear() { + VisitedConstants.clear(); + VisitedTypes.clear(); + StructTypes.clear(); +} + +/// incorporateType - This method adds the type to the list of used structures +/// if it's not in there already. +void TypeFinder::incorporateType(Type *Ty) { + // Check to see if we're already visited this type. + if (!VisitedTypes.insert(Ty).second) + return; + + // If this is a structure or opaque type, add a name for the type. + if (StructType *STy = dyn_cast<StructType>(Ty)) + if (!OnlyNamed || STy->hasName()) + StructTypes.push_back(STy); + + // Recursively walk all contained types. + for (Type::subtype_iterator I = Ty->subtype_begin(), + E = Ty->subtype_end(); I != E; ++I) + incorporateType(*I); +} + +/// incorporateValue - This method is used to walk operand lists finding types +/// hiding in constant expressions and other operands that won't be walked in +/// other ways. GlobalValues, basic blocks, instructions, and inst operands are +/// all explicitly enumerated. +void TypeFinder::incorporateValue(const Value *V) { + if (const MDNode *M = dyn_cast<MDNode>(V)) + return incorporateMDNode(M); + + if (!isa<Constant>(V) || isa<GlobalValue>(V)) return; + + // Already visited? + if (!VisitedConstants.insert(V).second) + return; + + // Check this type. + incorporateType(V->getType()); + + // If this is an instruction, we incorporate it separately. + if (isa<Instruction>(V)) + return; + + // Look in operands for types. + const User *U = cast<User>(V); + for (Constant::const_op_iterator I = U->op_begin(), + E = U->op_end(); I != E;++I) + incorporateValue(*I); +} + +/// incorporateMDNode - This method is used to walk the operands of an MDNode to +/// find types hiding within. +void TypeFinder::incorporateMDNode(const MDNode *V) { + // Already visited? + if (!VisitedConstants.insert(V).second) + return; + + // Look in operands for types. + for (unsigned i = 0, e = V->getNumOperands(); i != e; ++i) + if (Value *Op = V->getOperand(i)) + incorporateValue(Op); +} diff --git a/lib/VMCore/ValueTypes.cpp b/lib/VMCore/ValueTypes.cpp index 9a8e185..d1ca953 100644 --- a/lib/VMCore/ValueTypes.cpp +++ b/lib/VMCore/ValueTypes.cpp @@ -71,6 +71,10 @@ bool EVT::isExtended512BitVector() const { return isExtendedVector() && getSizeInBits() == 512; } +bool EVT::isExtended1024BitVector() const { + return isExtendedVector() && getSizeInBits() == 1024; +} + EVT EVT::getExtendedVectorElementType() const { assert(isExtended() && "Type is not extended!"); return EVT::getEVT(cast<VectorType>(LLVMTy)->getElementType()); @@ -128,10 +132,12 @@ std::string EVT::getEVTString() const { case MVT::v2i32: return "v2i32"; case MVT::v4i32: return "v4i32"; case MVT::v8i32: return "v8i32"; + case MVT::v16i32: return "v16i32"; case MVT::v1i64: return "v1i64"; case MVT::v2i64: return "v2i64"; case MVT::v4i64: return "v4i64"; case MVT::v8i64: return "v8i64"; + case MVT::v16i64: return "v16i64"; case MVT::v2f32: return "v2f32"; case MVT::v2f16: return "v2f16"; case MVT::v4f32: return "v4f32"; @@ -177,10 +183,12 @@ Type *EVT::getTypeForEVT(LLVMContext &Context) const { case MVT::v2i32: return VectorType::get(Type::getInt32Ty(Context), 2); case MVT::v4i32: return VectorType::get(Type::getInt32Ty(Context), 4); case MVT::v8i32: return VectorType::get(Type::getInt32Ty(Context), 8); + case MVT::v16i32: return VectorType::get(Type::getInt32Ty(Context), 16); case MVT::v1i64: return VectorType::get(Type::getInt64Ty(Context), 1); case MVT::v2i64: return VectorType::get(Type::getInt64Ty(Context), 2); case MVT::v4i64: return VectorType::get(Type::getInt64Ty(Context), 4); case MVT::v8i64: return VectorType::get(Type::getInt64Ty(Context), 8); + case MVT::v16i64: return VectorType::get(Type::getInt64Ty(Context), 16); case MVT::v2f16: return VectorType::get(Type::getHalfTy(Context), 2); case MVT::v2f32: return VectorType::get(Type::getFloatTy(Context), 2); case MVT::v4f32: return VectorType::get(Type::getFloatTy(Context), 4); diff --git a/lib/VMCore/Verifier.cpp b/lib/VMCore/Verifier.cpp index 5d51f41..c932d9e 100644 --- a/lib/VMCore/Verifier.cpp +++ b/lib/VMCore/Verifier.cpp @@ -400,8 +400,8 @@ void Verifier::visitGlobalValue(GlobalValue &GV) { "Only global arrays can have appending linkage!", GVar); } - Assert1(!GV.hasLinkerPrivateWeakDefAutoLinkage() || GV.hasDefaultVisibility(), - "linker_private_weak_def_auto can only have default visibility!", + Assert1(!GV.hasLinkOnceODRAutoHideLinkage() || GV.hasDefaultVisibility(), + "linkonce_odr_auto_hide can only have default visibility!", &GV); } @@ -1093,7 +1093,7 @@ void Verifier::visitBitCastInst(BitCastInst &I) { // BitCast implies a no-op cast of type only. No bits change. // However, you can't cast pointers to anything but pointers. - Assert1(DestTy->isPointerTy() == DestTy->isPointerTy(), + Assert1(SrcTy->isPointerTy() == DestTy->isPointerTy(), "Bitcast requires both operands to be pointer or neither", &I); Assert1(SrcBitSize == DestBitSize, "Bitcast requires types of same width",&I); @@ -1378,6 +1378,15 @@ void Verifier::visitLoadInst(LoadInst &LI) { "Load cannot have Release ordering", &LI); Assert1(LI.getAlignment() != 0, "Atomic load must specify explicit alignment", &LI); + if (!ElTy->isPointerTy()) { + Assert2(ElTy->isIntegerTy(), + "atomic store operand must have integer type!", + &LI, ElTy); + unsigned Size = ElTy->getPrimitiveSizeInBits(); + Assert2(Size >= 8 && !(Size & (Size - 1)), + "atomic store operand must be power-of-two byte-sized integer", + &LI, ElTy); + } } else { Assert1(LI.getSynchScope() == CrossThread, "Non-atomic load cannot have SynchronizationScope specified", &LI); @@ -1444,6 +1453,15 @@ void Verifier::visitStoreInst(StoreInst &SI) { "Store cannot have Acquire ordering", &SI); Assert1(SI.getAlignment() != 0, "Atomic store must specify explicit alignment", &SI); + if (!ElTy->isPointerTy()) { + Assert2(ElTy->isIntegerTy(), + "atomic store operand must have integer type!", + &SI, ElTy); + unsigned Size = ElTy->getPrimitiveSizeInBits(); + Assert2(Size >= 8 && !(Size & (Size - 1)), + "atomic store operand must be power-of-two byte-sized integer", + &SI, ElTy); + } } else { Assert1(SI.getSynchScope() == CrossThread, "Non-atomic store cannot have SynchronizationScope specified", &SI); @@ -1471,6 +1489,13 @@ void Verifier::visitAtomicCmpXchgInst(AtomicCmpXchgInst &CXI) { PointerType *PTy = dyn_cast<PointerType>(CXI.getOperand(0)->getType()); Assert1(PTy, "First cmpxchg operand must be a pointer.", &CXI); Type *ElTy = PTy->getElementType(); + Assert2(ElTy->isIntegerTy(), + "cmpxchg operand must have integer type!", + &CXI, ElTy); + unsigned Size = ElTy->getPrimitiveSizeInBits(); + Assert2(Size >= 8 && !(Size & (Size - 1)), + "cmpxchg operand must be power-of-two byte-sized integer", + &CXI, ElTy); Assert2(ElTy == CXI.getOperand(1)->getType(), "Expected value type does not match pointer operand type!", &CXI, ElTy); @@ -1488,6 +1513,13 @@ void Verifier::visitAtomicRMWInst(AtomicRMWInst &RMWI) { PointerType *PTy = dyn_cast<PointerType>(RMWI.getOperand(0)->getType()); Assert1(PTy, "First atomicrmw operand must be a pointer.", &RMWI); Type *ElTy = PTy->getElementType(); + Assert2(ElTy->isIntegerTy(), + "atomicrmw operand must have integer type!", + &RMWI, ElTy); + unsigned Size = ElTy->getPrimitiveSizeInBits(); + Assert2(Size >= 8 && !(Size & (Size - 1)), + "atomicrmw operand must be power-of-two byte-sized integer", + &RMWI, ElTy); Assert2(ElTy == RMWI.getOperand(1)->getType(), "Argument value type does not match pointer operand type!", &RMWI, ElTy); @@ -1536,7 +1568,7 @@ void Verifier::visitLandingPadInst(LandingPadInst &LPI) { // landing pad block may be branched to only by the unwind edge of an invoke. for (pred_iterator I = pred_begin(BB), E = pred_end(BB); I != E; ++I) { const InvokeInst *II = dyn_cast<InvokeInst>((*I)->getTerminator()); - Assert1(II && II->getUnwindDest() == BB, + Assert1(II && II->getUnwindDest() == BB && II->getNormalDest() != BB, "Block containing LandingPadInst must be jumped to " "only by the unwind edge of an invoke.", &LPI); } @@ -1575,6 +1607,13 @@ void Verifier::visitLandingPadInst(LandingPadInst &LPI) { void Verifier::verifyDominatesUse(Instruction &I, unsigned i) { Instruction *Op = cast<Instruction>(I.getOperand(i)); + // If the we have an invalid invoke, don't try to compute the dominance. + // We already reject it in the invoke specific checks and the dominance + // computation doesn't handle multiple edges. + if (InvokeInst *II = dyn_cast<InvokeInst>(Op)) { + if (II->getNormalDest() == II->getUnwindDest()) + return; + } const Use &U = I.getOperandUse(i); Assert2(InstsInThisBlock.count(Op) || DT->dominates(Op, U), |
